#!/usr/bin/env python3 """ This mirror status checker determines whether CSC mirror is up-to-date with upstream """ import time import os import requests from arch import Arch from ceph import Ceph from debian import Debian from eclipse import Eclipse from gnu import GNU from kernel import Kernel from openbsd import OpenBSD from dateparser.search import search_dates # this library seems to be super slow but the other library: dateutil.parser gets some errors # http://theautomatic.net/2018/12/18/2-packages-for-extracting-dates-from-a-string-of-text-in-python/ import re # import regular expressions to remove stray numbers in string that might interfere with date finding import json # import json to read distro info stored in json file import datefinder # another date finding library CSC_MIRROR = "http://mirror.csclub.uwaterloo.ca/" def checker(directory_URL, file_name): page = requests.get(directory_URL).text indexOfFile = page.find(file_name) # print(page) # remove stray numbers (file size numbers in particular) that might interfere with date finding segment_clean = re.sub(r'\s\d+\s', ' ', page[indexOfFile:]) # removes numbers for size segment_clean = re.sub(r'\s\d+\w*\s', ' ', page[indexOfFile:]) # removes numbers + size unit. e.x. 50kb # print(segment_clean) # implementation using dateparser.search.search_dates # notes: some dates don't parse correctly with this tool # print(search_dates(page[indexOfFile:], languages=['en'])) # print(search_dates(page[indexOfFile:])[0]) # finds the dates in the segment after the file name # notes: a generator will be returned by the datefinder module. I'm typecasting it to a list. Please read the note of caution provided at the bottom. matches = list(datefinder.find_dates(segment_clean)) # print(matches) if len(matches) > 0: date = matches[0] # date is of type datetime.datetime return(date.strftime("%m/%d/%Y, %H:%M:%S")) else: return('No dates found') def gentoo_portage_checker(data_json, distro_name): """GentooPortage checker""" rsync_command = "rsync -q {}{} {}" os.system(rsync_command.format(data_json[distro_name]["csc"], data_json[distro_name]["file"], "csc_manifest")) os.system(rsync_command.format(data_json[distro_name]["upstream1"], data_json[distro_name]["file"], "upstream_manifest1")) os.system(rsync_command.format(data_json[distro_name]["upstream2"], data_json[distro_name]["file"], "upstream_manifest2")) stream1 = os.popen("diff csc_manifest upstream_manifest1") output1 = stream1.read() stream2 = os.popen("diff csc_manifest upstream_manifest2") output2 = stream2.read() os.system("rm csc_manifest") os.system("rm upstream_manifest1") os.system("rm upstream_manifest2") return 0 in [len(output1), len(output2)] def gnome_checker(data_json, distro_name): """GNOME checker""" csc_url = CSC_MIRROR + data_json[distro_name]["file"] upstream_url1 = data_json[distro_name]["upstream1"] + data_json[distro_name]["file"] upstream_url2 = data_json[distro_name]["upstream2"] + data_json[distro_name]["file"] csc_gnome_text = requests.get(csc_url).text return csc_gnome_text in [requests.get(upstream_url1).text, requests.get(upstream_url2).text] def ipfire_checker(): """IPFire checker""" ipfire_text = requests.get("https://mirrors.ipfire.org/mirrors/mirror.csclub.uwaterloo.ca").text return ipfire_text.find("The mirror is up") != -1 def general_checker(data_json, distro_name): """General distro checker""" csc_url = CSC_MIRROR + data_json[distro_name]["csc"] + data_json[distro_name]["file"] upstream_url = data_json[distro_name]["upstream"] + data_json[distro_name]["file"] return requests.get(csc_url).text == requests.get(upstream_url).text if __name__ == "__main__": """for distro in [Arch, Ceph, Debian, Eclipse, GNU, Kernel, OpenBSD]: try: distro.print_output(distro.check()) except requests.exceptions.RequestException as err: print(f"Error: {distro.name()}\n{err}")""" """distros = json.load(open('distros.json',)) print(distros) for distro in distros: print(distro[0] + ":") print("CSC mirror: " + checker(distro[1], distro[3])) print("Official distro: " + checker(distro[2], distro[3]))""" with open("data.json", "r", encoding = "utf-8") as file: data = json.load(file) current_time = int(time.time()) for distro in data: try: if distro == "CPAN": res_json = requests.get("http://mirrors.cpan.org/cpan-json.txt").json() for mirror in res_json: if mirror["url"] == f"{CSC_MIRROR}CPAN/": if current_time - int(mirror["age"]) \ > data[distro]["out_of_date_interval"]: print(f"Failure: {distro} out-of-sync") else: print(f"Success: {distro} up-to-date") break continue if distro == "GentooPortage": checker_result = gentoo_portage_checker(data, distro) elif distro == "GNOME": gnome_text = requests.get("https://download.gnome.org/core/").text line_count = len(gnome_text.split('\n')) # Latest version is currently 41, which has line count of 49 if line_count == 49: checker_result = gnome_checker(data, distro) else: data[distro]["out_of_date_since"] = None print(f"Failure: {distro} should check for latest version") continue elif distro == "IPFire": checker_result = ipfire_checker() else: checker_result = general_checker(data, distro) if checker_result: data[distro]["out_of_date_since"] = None elif data[distro]["out_of_date_since"] is None: data[distro]["out_of_date_since"] = current_time elif current_time - data[distro]["out_of_date_since"] \ > data[distro]["out_of_date_interval"]: print(f"Failure: {distro} out-of-sync") continue print(f"Success: {distro} up-to-date") except requests.exceptions.RequestException as err: print(f"Error: {distro}\n{err}") with open("data.json", "w", encoding = "utf-8") as file: json.dump(data, file, indent = 4)