mirror-checker/main.py

117 lines
4.8 KiB
Python
Raw Normal View History

2021-08-16 18:23:04 -04:00
#!/usr/bin/env python3
"""
This mirror status checker determines whether CSC mirror is up-to-date with upstream
"""
2021-09-01 23:25:06 -04:00
import time
import sys
2021-08-16 18:23:04 -04:00
import requests
from almalinux import AlmaLinux
from alpine import Alpine
from apache import Apache
from arch import Arch
from centos import CentOS
2021-08-24 18:37:27 -04:00
from ceph import Ceph
from cpan import CPAN
from cygwin import Cygwin
from debian import Debian
from debiancd import DebianCD
from debianmultimedia import DebianMultimedia
from debianports import DebianPorts
from debiansecurity import DebianSecurity
2021-08-24 17:00:51 -04:00
from eclipse import Eclipse
from fedora import Fedora
from freebsd import FreeBSD
from gentoodistfiles import GentooDistfiles
from gentooportage import GentooPortage
from gnome import GNOME
2021-08-24 17:34:39 -04:00
from gnu import GNU
from gutenberg import Gutenberg
from ipfire import IPFire
from kde import KDE
from kdeapplicationdata import KDEApplicationData
from kernel import Kernel
from openbsd import OpenBSD
2021-10-03 18:26:43 -04:00
from tdf import tdf
from ubuntu import ubuntu
from vlc import vlc
from shared import CSC_MIRROR
from dateparser.search import search_dates # this library seems to be super slow but the other library: dateutil.parser gets some errors
# http://theautomatic.net/2018/12/18/2-packages-for-extracting-dates-from-a-string-of-text-in-python/
import re # import regular expressions to remove stray numbers in string that might interfere with date finding
import json # import json to read distro info stored in json file
import datefinder # another date finding library
2021-09-12 01:06:55 -04:00
# checker: gets the timestamp of the file inside the directory at the specified URL and returns it as a string
def checker(directory_URL, file_name):
page = requests.get(directory_URL).text
indexOfFile = page.find(file_name)
# print(page)
# remove stray numbers (file size numbers in particular) that might interfere with date finding
segment_clean = re.sub(r'\s\d+\s', ' ', page[indexOfFile:]) # removes numbers for size
segment_clean = re.sub(r'\s\d+\w*\s', ' ', page[indexOfFile:]) # removes numbers + size unit. e.x. 50kb
# print(segment_clean)
# implementation using dateparser.search.search_dates
# notes: some dates don't parse correctly with this tool
# print(search_dates(page[indexOfFile:], languages=['en']))
# print(search_dates(page[indexOfFile:])[0])
# finds the dates in the segment after the file name
# notes: a generator will be returned by the datefinder module. I'm typecasting it to a list. Please read the note of caution provided at the bottom.
matches = list(datefinder.find_dates(segment_clean))
# print(matches)
if len(matches) > 0:
date = matches[0] # date is of type datetime.datetime
return(date.strftime("%m/%d/%Y, %H:%M:%S"))
else:
return('No dates found')
2021-08-19 17:00:55 -04:00
2021-08-16 18:23:04 -04:00
if __name__ == "__main__":
2021-09-01 23:25:06 -04:00
"""distros = json.load(open('distros.json',))
print(distros)
for distro in distros:
print(distro[0] + ":")
print("CSC mirror: " + checker(distro[1], distro[3]))
2021-09-01 23:25:06 -04:00
print("Official distro: " + checker(distro[2], distro[3]))"""
with open("data.json", "r", encoding = "utf-8") as file:
data = json.load(file)
if sys.stdin.isatty():
distros = data
else:
distros = [distro.rstrip() for distro in sys.stdin.readlines()]
2021-09-01 23:25:06 -04:00
current_time = int(time.time())
for distro in distros:
2021-09-01 23:25:06 -04:00
try:
if distro not in data:
print(f"Failure: {distro} does not exist")
2021-09-02 20:53:20 -04:00
continue
distro_class = getattr(sys.modules[__name__], distro)
2021-10-03 18:26:43 -04:00
if distro == "CPAN" or distro == "ubuntu":
checker_result = distro_class.check(data, distro, current_time)
if checker_result:
print(f"Success: {distro} up-to-date")
2021-09-02 23:44:59 -04:00
else:
print(f"Failure: {distro} out-of-sync")
continue
checker_result = distro_class.check(data, distro)
2021-09-02 19:23:46 -04:00
if checker_result:
2021-10-03 18:26:43 -04:00
data[distro]["out_of_sync_since"] = None # out of sync since is just the last time we checked and returned true
2021-09-03 14:59:39 -04:00
elif data[distro]["out_of_sync_since"] is None:
2021-10-03 18:26:43 -04:00
data[distro]["out_of_sync_since"] = current_time # starts counting out of date
2021-09-03 14:59:39 -04:00
elif current_time - data[distro]["out_of_sync_since"] \
2021-10-03 18:26:43 -04:00
> data[distro]["out_of_sync_interval"]: # last time checked out of date, now still out of date, raise alert when it reaches this threshold
2021-09-01 23:25:06 -04:00
print(f"Failure: {distro} out-of-sync")
continue
print(f"Success: {distro} up-to-date")
except requests.exceptions.RequestException as err:
print(f"Error: {distro}\n{err}")
with open("data.json", "w", encoding = "utf-8") as file:
json.dump(data, file, indent = 4)