160 lines
7.1 KiB

#!/usr/bin/env python3
This mirror status checker determines whether CSC mirror is up-to-date with upstream
import time
import os
import sys
import requests
from arch import Arch
from ceph import Ceph
from debian import Debian
from eclipse import Eclipse
from gnu import GNU
from kernel import Kernel
from openbsd import OpenBSD
from dateparser.search import search_dates # this library seems to be super slow but the other library: dateutil.parser gets some errors
# http://theautomatic.net/2018/12/18/2-packages-for-extracting-dates-from-a-string-of-text-in-python/
import re # import regular expressions to remove stray numbers in string that might interfere with date finding
import json # import json to read distro info stored in json file
import datefinder # another date finding library
CSC_MIRROR = "http://mirror.csclub.uwaterloo.ca/"
def checker(directory_URL, file_name):
page = requests.get(directory_URL).text
indexOfFile = page.find(file_name)
# print(page)
# remove stray numbers (file size numbers in particular) that might interfere with date finding
segment_clean = re.sub(r'\s\d+\s', ' ', page[indexOfFile:]) # removes numbers for size
segment_clean = re.sub(r'\s\d+\w*\s', ' ', page[indexOfFile:]) # removes numbers + size unit. e.x. 50kb
# print(segment_clean)
# implementation using dateparser.search.search_dates
# notes: some dates don't parse correctly with this tool
# print(search_dates(page[indexOfFile:], languages=['en']))
# print(search_dates(page[indexOfFile:])[0])
# finds the dates in the segment after the file name
# notes: a generator will be returned by the datefinder module. I'm typecasting it to a list. Please read the note of caution provided at the bottom.
matches = list(datefinder.find_dates(segment_clean))
# print(matches)
if len(matches) > 0:
date = matches[0] # date is of type datetime.datetime
return(date.strftime("%m/%d/%Y, %H:%M:%S"))
return('No dates found')
def gentoo_portage_checker(data_json, distro_name):
"""GentooPortage checker"""
rsync_command = "rsync -q {}{} {}"
stream1 = os.popen("diff csc_manifest upstream_manifest1")
output1 = stream1.read()
stream2 = os.popen("diff csc_manifest upstream_manifest2")
output2 = stream2.read()
os.system("rm csc_manifest")
os.system("rm upstream_manifest1")
os.system("rm upstream_manifest2")
return 0 in [len(output1), len(output2)]
def gnome_checker(data_json, distro_name):
"""GNOME checker"""
csc_url = CSC_MIRROR + data_json[distro_name]["file"]
upstream_url1 = data_json[distro_name]["upstream1"] + data_json[distro_name]["file"]
upstream_url2 = data_json[distro_name]["upstream2"] + data_json[distro_name]["file"]
csc_gnome_text = requests.get(csc_url).text
return csc_gnome_text in [requests.get(upstream_url1).text, requests.get(upstream_url2).text]
def ipfire_checker():
"""IPFire checker"""
ipfire_text = requests.get("https://mirrors.ipfire.org/mirrors/mirror.csclub.uwaterloo.ca").text
return ipfire_text.find("The mirror is up") != -1
def general_checker(data_json, distro_name):
"""General distro checker"""
csc_url = CSC_MIRROR + data_json[distro_name]["csc"] + data_json[distro_name]["file"]
upstream_url = data_json[distro_name]["upstream"] + data_json[distro_name]["file"]
return requests.get(csc_url).text == requests.get(upstream_url).text
if __name__ == "__main__":
"""for distro in [Arch, Ceph, Debian, Eclipse, GNU, Kernel, OpenBSD]:
except requests.exceptions.RequestException as err:
print(f"Error: {distro.name()}\n{err}")"""
"""distros = json.load(open('distros.json',))
for distro in distros:
print(distro[0] + ":")
print("CSC mirror: " + checker(distro[1], distro[3]))
print("Official distro: " + checker(distro[2], distro[3]))"""
with open("data.json", "r", encoding = "utf-8") as file:
data = json.load(file)
if sys.stdin.isatty():
distros = data
distros = [distro.rstrip() for distro in sys.stdin.readlines()]
current_time = int(time.time())
for distro in distros:
if distro == "CPAN":
res_json = requests.get("http://mirrors.cpan.org/cpan-json.txt").json()
for mirror in res_json:
if mirror["url"] == f"{CSC_MIRROR}CPAN/":
if current_time - int(mirror["age"]) \
> data[distro]["out_of_sync_interval"]:
print(f"Failure: {distro} out-of-sync")
print(f"Success: {distro} up-to-date")
if distro == "GentooPortage":
checker_result = gentoo_portage_checker(data, distro)
elif distro == "GNOME":
gnome_text = requests.get("https://download.gnome.org/core/").text
line_count = len(gnome_text.split('\n'))
# Latest version is currently 41, which has line count of 49
if line_count == 49:
checker_result = gnome_checker(data, distro)
data[distro]["out_of_sync_since"] = None
print(f"Failure: {distro} should check for latest version")
elif distro == "IPFire":
checker_result = ipfire_checker()
elif distro not in data:
print(f"Failure: {distro} does not exist")
checker_result = general_checker(data, distro)
if checker_result:
data[distro]["out_of_sync_since"] = None
elif data[distro]["out_of_sync_since"] is None:
data[distro]["out_of_sync_since"] = current_time
elif current_time - data[distro]["out_of_sync_since"] \
> data[distro]["out_of_sync_interval"]:
print(f"Failure: {distro} out-of-sync")
print(f"Success: {distro} up-to-date")
except requests.exceptions.RequestException as err:
print(f"Error: {distro}\n{err}")
with open("data.json", "w", encoding = "utf-8") as file:
json.dump(data, file, indent = 4)