#!/usr/bin/env python3 """ This mirror status checker determines whether CSC mirror is up-to-date with upstream """ import time import sys import requests from projects.almalinux import AlmaLinux from projects.alpine import Alpine from projects.apache import Apache from projects.arch import Arch from projects.centos import CentOS from projects.ceph import Ceph from projects.cpan import CPAN from projects.cygwin import Cygwin from projects.debian import Debian from projects.debiancd import DebianCD from projects.debianmultimedia import DebianMultimedia from projects.debianports import DebianPorts from projects.debiansecurity import DebianSecurity from projects.eclipse import Eclipse from projects.fedora import Fedora from projects.freebsd import FreeBSD from projects.gentoodistfiles import GentooDistfiles from projects.gentooportage import GentooPortage from projects.gnome import GNOME from projects.gnu import GNU from projects.gutenberg import Gutenberg from projects.ipfire import IPFire from projects.kde import KDE from projects.kdeapplicationdata import KDEApplicationData from projects.kernel import Kernel from projects.openbsd import OpenBSD from shared import CSC_MIRROR from dateparser.search import search_dates # this library seems to be super slow but the other library: dateutil.parser gets some errors # http://theautomatic.net/2018/12/18/2-packages-for-extracting-dates-from-a-string-of-text-in-python/ import re # import regular expressions to remove stray numbers in string that might interfere with date finding import json # import json to read distro info stored in json file import datefinder # another date finding library def checker(directory_URL, file_name): page = requests.get(directory_URL).text indexOfFile = page.find(file_name) # print(page) # remove stray numbers (file size numbers in particular) that might interfere with date finding segment_clean = re.sub(r'\s\d+\s', ' ', page[indexOfFile:]) # removes numbers for size segment_clean = re.sub(r'\s\d+\w*\s', ' ', page[indexOfFile:]) # removes numbers + size unit. e.x. 50kb # print(segment_clean) # implementation using dateparser.search.search_dates # notes: some dates don't parse correctly with this tool # print(search_dates(page[indexOfFile:], languages=['en'])) # print(search_dates(page[indexOfFile:])[0]) # finds the dates in the segment after the file name # notes: a generator will be returned by the datefinder module. I'm typecasting it to a list. Please read the note of caution provided at the bottom. matches = list(datefinder.find_dates(segment_clean)) # print(matches) if len(matches) > 0: date = matches[0] # date is of type datetime.datetime return (date.strftime("%m/%d/%Y, %H:%M:%S")) else: return ('No dates found') if __name__ == "__main__": """distros = json.load(open('distros.json',)) print(distros) for distro in distros: print(distro[0] + ":") print("CSC mirror: " + checker(distro[1], distro[3])) print("Official distro: " + checker(distro[2], distro[3]))""" with open("data.json", "r", encoding="utf-8") as file: data = json.load(file) if sys.stdin.isatty(): distros = data else: distros = [distro.rstrip() for distro in sys.stdin.readlines()] current_time = int(time.time()) for distro in distros: try: if distro not in data: print(f"Failure: {distro} does not exist") continue distro_class = getattr(sys.modules[__name__], distro) if distro == "CPAN": checker_result = distro_class.check(data, distro, current_time) if checker_result: print(f"Success: {distro} up-to-date") else: print(f"Failure: {distro} out-of-sync") continue checker_result = distro_class.check(data, distro) if checker_result: data[distro]["out_of_sync_since"] = None elif data[distro]["out_of_sync_since"] is None: data[distro]["out_of_sync_since"] = current_time elif current_time - data[distro]["out_of_sync_since"] \ > data[distro]["out_of_sync_interval"]: print(f"Failure: {distro} out-of-sync") continue print(f"Success: {distro} up-to-date") except requests.exceptions.RequestException as err: print(f"Error: {distro}\n{err}") with open("data.json", "w", encoding="utf-8") as file: json.dump(data, file, indent=4)