#!/usr/bin/env python3 """ This mirror status checker determines whether CSC mirror is up-to-date with upstream """ import time import sys import requests from almalinux import AlmaLinux from alpine import Alpine from apache import Apache from arch import Arch from centos import CentOS from ceph import Ceph from cpan import CPAN from cygwin import Cygwin from debian import Debian from debiancd import DebianCD from debianmultimedia import DebianMultimedia from debianports import DebianPorts from debiansecurity import DebianSecurity from eclipse import Eclipse from fedora import Fedora from freebsd import FreeBSD from gentoodistfiles import GentooDistfiles from gentooportage import GentooPortage from gnome import GNOME from gnu import GNU from gutenberg import Gutenberg from ipfire import IPFire from kde import KDE from kdeapplicationdata import KDEApplicationData from kernel import Kernel from openbsd import OpenBSD from shared import CSC_MIRROR from dateparser.search import search_dates # this library seems to be super slow but the other library: dateutil.parser gets some errors # http://theautomatic.net/2018/12/18/2-packages-for-extracting-dates-from-a-string-of-text-in-python/ import re # import regular expressions to remove stray numbers in string that might interfere with date finding import json # import json to read distro info stored in json file import datefinder # another date finding library def checker(directory_URL, file_name): page = requests.get(directory_URL).text indexOfFile = page.find(file_name) # print(page) # remove stray numbers (file size numbers in particular) that might interfere with date finding segment_clean = re.sub(r'\s\d+\s', ' ', page[indexOfFile:]) # removes numbers for size segment_clean = re.sub(r'\s\d+\w*\s', ' ', page[indexOfFile:]) # removes numbers + size unit. e.x. 50kb # print(segment_clean) # implementation using dateparser.search.search_dates # notes: some dates don't parse correctly with this tool # print(search_dates(page[indexOfFile:], languages=['en'])) # print(search_dates(page[indexOfFile:])[0]) # finds the dates in the segment after the file name # notes: a generator will be returned by the datefinder module. I'm typecasting it to a list. Please read the note of caution provided at the bottom. matches = list(datefinder.find_dates(segment_clean)) # print(matches) if len(matches) > 0: date = matches[0] # date is of type datetime.datetime return(date.strftime("%m/%d/%Y, %H:%M:%S")) else: return('No dates found') if __name__ == "__main__": """distros = json.load(open('distros.json',)) print(distros) for distro in distros: print(distro[0] + ":") print("CSC mirror: " + checker(distro[1], distro[3])) print("Official distro: " + checker(distro[2], distro[3]))""" with open("data.json", "r", encoding = "utf-8") as file: data = json.load(file) if sys.stdin.isatty(): distros = data else: distros = [distro.rstrip() for distro in sys.stdin.readlines()] current_time = int(time.time()) for distro in distros: try: if distro not in data: print(f"Failure: {distro} does not exist") continue distro_class = getattr(sys.modules[__name__], distro) if distro == "CPAN": checker_result = distro_class.check(data, distro, current_time) if checker_result: print(f"Success: {distro} up-to-date") else: print(f"Failure: {distro} out-of-sync") continue checker_result = distro_class.check(data, distro) if checker_result: data[distro]["out_of_sync_since"] = None elif data[distro]["out_of_sync_since"] is None: data[distro]["out_of_sync_since"] = current_time elif current_time - data[distro]["out_of_sync_since"] \ > data[distro]["out_of_sync_interval"]: print(f"Failure: {distro} out-of-sync") continue print(f"Success: {distro} up-to-date") except requests.exceptions.RequestException as err: print(f"Error: {distro}\n{err}") with open("data.json", "w", encoding = "utf-8") as file: json.dump(data, file, indent = 4)