Compare commits

..

No commits in common. "master" and "master" have entirely different histories.

19 changed files with 468 additions and 697 deletions

View File

@ -48,8 +48,7 @@
"file": "timestamp"
},
"CPAN": {
"out_of_sync_interval": 172800,
"out_of_sync_since": null
"out_of_sync_interval": 172800
},
"cran": {
"out_of_sync_since": null,
@ -87,7 +86,7 @@
"file": "debian-cd/project/trace/cdimage.debian.org"
},
"DebianMultimedia": {
"out_of_sync_since": 1659116719,
"out_of_sync_since": null,
"out_of_sync_interval": 86400,
"csc": "debian-multimedia/",
"upstream": "http://debian-mirrors.sdinet.de/deb-multimedia/",
@ -117,7 +116,7 @@
},
"Fedora": {
"out_of_sync_since": null,
"out_of_sync_interval": 259200,
"out_of_sync_interval": 86400,
"csc": "fedora/",
"upstream": "http://fedora.mirror.iweb.com/",
"file": "linux/development/rawhide/COMPOSE_ID"
@ -174,7 +173,7 @@
"out_of_sync_interval": 172800
},
"KDE": {
"out_of_sync_since": 1659116720,
"out_of_sync_since": null,
"out_of_sync_interval": 86400,
"csc": "kde/",
"upstream": "https://kde.c3sl.ufpr.br/",
@ -209,7 +208,7 @@
"file": "dists/"
},
"macports": {
"out_of_sync_since": 1642827723,
"out_of_sync_since": null,
"out_of_sync_interval": 86400,
"csc": "MacPorts/mpdistfiles/",
"upstream": "https://distfiles.macports.org/",
@ -270,7 +269,7 @@
"exclude": true
},
"opensuse": {
"out_of_sync_since": 1648699331,
"out_of_sync_since": null,
"out_of_sync_interval": 86400,
"csc": "opensuse/update/",
"upstream": "http://opensuse-mirror-gce-us.opensu.se/update/",
@ -321,7 +320,7 @@
"file": "dists/"
},
"raspbian": {
"out_of_sync_since": 1659116721,
"out_of_sync_since": null,
"out_of_sync_interval": 86400,
"csc": "raspbian/",
"upstream": "http://archive.raspbian.org/",
@ -342,7 +341,7 @@
"file": ""
},
"slackware": {
"out_of_sync_since": 1642827723,
"out_of_sync_since": null,
"out_of_sync_interval": 86400,
"csc": "slackware/",
"upstream": "https://mirrors.slackware.com/slackware/",
@ -374,7 +373,7 @@
"file": ""
},
"ubuntu_ports": {
"out_of_sync_since": 1651550528,
"out_of_sync_since": null,
"out_of_sync_interval": 86400,
"csc": "ubuntu-ports/",
"upstream": "http://ports.ubuntu.com/ubuntu-ports/",
@ -391,7 +390,7 @@
"out_of_sync_since": null,
"out_of_sync_interval": 172800,
"csc": "",
"upstream": "https://launchpad.net/ubuntu/+mirror/mirror.csclub.uwaterloo.ca-archive",
"upstream": "https://launchpad.net/ubuntu/+mirror/mirror.csclub.uwaterloo.ca-release",
"file": ""
},
"vlc": {
@ -402,7 +401,7 @@
"file": "trace"
},
"x_org": {
"out_of_sync_since": 1657512131,
"out_of_sync_since": null,
"out_of_sync_interval": 86400,
"csc": "x.org/individual/",
"upstream": "https://www.x.org/releases/individual/",

44
main.py
View File

@ -8,68 +8,46 @@ import time
import sys
import requests
from multiprocessing import Pool, Manager
from typing import Optional
from time import sleep, localtime, strftime
from projects import *
import json
NUM_THREAD = 16
MAX_RETRY = 3
RETRY_TIMEOUT = 30 # In seconds
current_time = int(time.time())
def safe_print(*args, **kwargs):
# When run with 'chronic' and 'timeout', stdout gets suppressed
# due to buffering. Make sure to always flush the output.
print(*args, **kwargs, flush=True)
# Return None if no error occurs and a string for error message otherwise
def check_project(args) -> Optional[str]:
current_time = int(time.time())
def check_project(args):
project, data = args
try:
project_class = getattr(sys.modules[__name__], project)
# Skip projects we no longer mirror
if data[project].get('exclude', False):
return None
return True
checker_result = project_class.check(data, project, current_time)
if checker_result:
data[project]["out_of_sync_since"] = None
return None
safe_print(f"Success: {project} up-to-date")
return True
elif (data[project]["out_of_sync_since"] is not None
and current_time - data[project]["out_of_sync_since"] > data[project]["out_of_sync_interval"]):
now_str = strftime("%d %b %Y %H:%M:%S (local time)", localtime())
duration = current_time - data[project]["out_of_sync_since"]
return f"{project} out-of-sync at {now_str} for {duration}s"
safe_print(f"Failure: {project} out-of-sync")
return False
else:
data[project]["out_of_sync_since"] = current_time
return None
return True
except requests.exceptions.RequestException as err:
return f"{project}\n{err}"
def check_project_with_retry(args) -> bool:
project, _ = args
errs = []
for _ in range(MAX_RETRY):
res = check_project(args)
if res == None:
safe_print(f"Success: {project} up-to-date")
return True
else:
errs.append(res)
# Do nothing, try again later
sleep(RETRY_TIMEOUT)
# Max try reached, print errors
safe_print(f"Error: {project}")
for reason in errs:
safe_print(f" {reason}")
safe_print(f"Error: {project}\n{err}")
return False
@ -83,7 +61,7 @@ def main():
sync_data = manager.dict({k: manager.dict(v) for k, v in data.items()})
with Pool(NUM_THREAD) as pool:
all_pass = all(pool.imap(check_project_with_retry, ((k, sync_data) for k in data.keys())))
all_pass = all(pool.imap(check_project, ((k, sync_data) for k in data.keys())))
with open(data_file, "w", encoding="utf-8") as file:
json.dump({k: dict(v) for k, v in sync_data.items()}, file, indent=' ')

View File

@ -3,35 +3,7 @@ Contains Apache class
"""
from project import Project
from shared import CSC_MIRROR
import requests
class Apache(Project):
"""Apache class"""
# Apache's time file has two segments, so we need a special function
# Example: 1648323001 rsync-he-fi
def check(data, project, current_time):
"""Check if project packages are up-to-date"""
csc_url = CSC_MIRROR + data[project]["csc"] + data[project]["file"]
upstream_url = data[project]["upstream"] + data[project]["file"]
req = requests.get(csc_url)
req.raise_for_status()
CSC = req.text
req = requests.get(upstream_url)
req.raise_for_status()
upstream = req.text
if upstream == CSC:
return True
try:
return get_timestamp_from_apache(upstream) - get_timestamp_from_apache(CSC) < data[project]["out_of_sync_interval"]
except ValueError:
print("failed to parse apache")
return False
def get_timestamp_from_apache(s: str) -> int:
real_time = s.split(" ")[0]
return int(real_time)

View File

@ -15,8 +15,7 @@ class CPAN(Project):
def check(data, project, current_time):
res_json = requests.get("http://mirrors.cpan.org/cpan-json.txt").json()
for mirror in res_json:
if mirror["url"] == f"{CSC_MIRROR}CPAN/" and mirror["last_status"] == "ok":
# This is an improvised method: report we're good if CPAN think we are good
# Change this to a more precise method if you find a better way to do it
return True
if mirror["url"] == f"{CSC_MIRROR}CPAN/":
data[project]["out_of_sync_since"] = int(mirror["age"])
return current_time - data[project]["out_of_sync_since"] <= data[project]["out_of_sync_interval"]
return False

View File

@ -18,7 +18,7 @@ class cran(Project):
page = requests.get(data[project]["upstream"]).text
indexOfFile = page.find("mirror.csclub.uwaterloo.ca")
m = re.search(r'(\d+ minutes?)|(\d+ hours?)|(\d+(\.)?\d+ days?)', page[indexOfFile:])
m = re.search(r'(\d+ hour)|(\d+ hours)|(\d+(\.)?\d+ days)', page[indexOfFile:]) # solution from: https://stackoverflow.com/questions/21074100/how-to-convert-standard-timedelta-string-to-timedelta-object/21074460
duration = pd.to_timedelta(m.group(0))
data[project]["out_of_sync_since"] = current_time - duration.total_seconds()

View File

@ -7,7 +7,7 @@ from project import Project
from shared import CSC_MIRROR
import requests
import datefinder # another date finding library
from datetime import datetime, timedelta
from datetime import timedelta
import re
import pandas as pd
@ -18,10 +18,9 @@ class ctan(Project):
page = requests.get(data[project]["upstream"]).text
indexOfFile = page.find("mirror.csclub.uwaterloo.ca")
m = re.search(r'(\d+ minutes?)|(\d+ hours?)|(\d+(\.)?\d+ days?)', page[indexOfFile:])
m = re.search(r'(\d+ hour)|(\d+ hours)|(\d+(\.)?\d+ days)', page[indexOfFile:]) # solution from: https://stackoverflow.com/questions/21074100/how-to-convert-standard-timedelta-string-to-timedelta-object/21074460
duration = pd.to_timedelta(m.group(0))
data[project]["out_of_sync_since"] = datetime.now() - duration
data[project]["out_of_sync_since"] = datetime.now() - duration.total_seconds()
return duration <= pd.to_timedelta(data[project]["out_of_sync_interval"], unit='s')

View File

@ -3,37 +3,7 @@ Contains Debian class
"""
from project import Project
from shared import CSC_MIRROR
import requests
from datetime import datetime
import time
class Debian(Project):
"""Debian class"""
@staticmethod
def check(data, project, current_time):
csc_url = CSC_MIRROR + data[project]["csc"] + data[project]["file"]
upstream_url = data[project]["upstream"] + data[project]["file"]
req = requests.get(csc_url)
req.raise_for_status()
CSC = req.text
req = requests.get(upstream_url)
req.raise_for_status()
upstream = req.text
if upstream == CSC:
return True
CSC_date = datetime.strptime(CSC.partition('\n')[0], "%a %b %d %H:%M:%S UTC %Y")
CSC_utc_time = time.mktime(CSC_date.timetuple())
upstream_date = datetime.strptime(upstream.partition('\n')[0], "%a %b %d %H:%M:%S UTC %Y")
upstream_utc_time = time.mktime(upstream_date.timetuple())
try:
return int(upstream_utc_time) - int(CSC_utc_time) < data[project]["out_of_sync_interval"]
except ValueError:
return False

View File

@ -3,40 +3,7 @@ Contains DebianCD class
"""
from project import Project
from shared import CSC_MIRROR
import requests
from datetime import datetime
import time
class DebianCD(Project):
"""DebianCD class"""
@staticmethod
def check(data, project, current_time):
csc_url = CSC_MIRROR + data[project]["csc"] + data[project]["file"]
upstream_url = data[project]["upstream"] + data[project]["file"]
req = requests.get(csc_url)
req.raise_for_status()
CSC = req.text
req = requests.get(upstream_url)
req.raise_for_status()
upstream = req.text
if upstream == CSC:
return True
# Date Format Example: Sun 27 Mar 00:20:12 UTC 2022
date_format = "%a %d %b %H:%M:%S UTC %Y\n"
CSC_date = datetime.strptime(CSC, date_format)
CSC_utc_time = time.mktime(CSC_date.timetuple())
upstream_date = datetime.strptime(upstream, date_format)
upstream_utc_time = time.mktime(upstream_date.timetuple())
try:
return int(upstream_utc_time) - int(CSC_utc_time) < data[project]["out_of_sync_interval"]
except ValueError:
return False

View File

@ -3,40 +3,7 @@ Contains Fedora class
"""
from project import Project
from shared import CSC_MIRROR
import requests
from datetime import datetime
import time
class Fedora(Project):
"""Fedora class"""
@staticmethod
def check(data, project, current_time):
csc_url = CSC_MIRROR + data[project]["csc"] + data[project]["file"]
upstream_url = data[project]["upstream"] + data[project]["file"]
req = requests.get(csc_url)
req.raise_for_status()
CSC = req.text
req = requests.get(upstream_url)
req.raise_for_status()
upstream = req.text
if upstream == CSC:
return True
# Date example: Fedora-Rawhide-20220725.n.1
date_format = "%Y%m%d"
CSC_date = datetime.strptime(CSC[15:23], "%Y%m%d")
CSC_utc_time = time.mktime(CSC_date.timetuple())
upstream_date = datetime.strptime(upstream[15:23], "%Y%m%d")
upstream_utc_time = time.mktime(upstream_date.timetuple())
try:
return upstream_utc_time - CSC_utc_time < data[project]["out_of_sync_interval"]
except ValueError:
return False

View File

@ -3,35 +3,7 @@ Contains FreeBSD class
"""
from project import Project
from shared import CSC_MIRROR
import requests
class FreeBSD(Project):
"""FreeBSD class"""
# FreeBSD's time file has two segments, so we need a special function
# Example TIMESTAMP file: 1648308600 2022-03-26T15:30:00 UTC
def check(data, project, current_time):
"""Check if project packages are up-to-date"""
csc_url = CSC_MIRROR + data[project]["csc"] + data[project]["file"]
upstream_url = data[project]["upstream"] + data[project]["file"]
req = requests.get(csc_url)
req.raise_for_status()
CSC = req.text
req = requests.get(upstream_url)
req.raise_for_status()
upstream = req.text
if upstream == CSC:
return True
try:
return get_timestamp_for_freebsd(upstream) - get_timestamp_for_freebsd(CSC) < data[project]["out_of_sync_interval"]
except ValueError:
print("failed to parse apache")
return False
def get_timestamp_for_freebsd(s: str) -> int:
real_time = s.split(" ")[0]
return int(real_time)

View File

@ -3,35 +3,7 @@ Contains GentooDistfiles class
"""
from project import Project
from shared import CSC_MIRROR
import requests
from datetime import datetime
import time
class GentooDistfiles(Project):
"""GentooDistfiles class"""
@staticmethod
def check(data, project, current_time):
csc_url = CSC_MIRROR + data[project]["csc"] + data[project]["file"]
upstream_url = data[project]["upstream"] + data[project]["file"]
req = requests.get(csc_url)
req.raise_for_status()
CSC = req.text
req = requests.get(upstream_url)
req.raise_for_status()
upstream = req.text
if upstream == CSC:
return True
CSC_utc_time = CSC[0:11]
upstream_utc_time = upstream[0:11]
try:
return int(upstream_utc_time) - int(CSC_utc_time) < data[project]["out_of_sync_interval"]
except ValueError:
return False

View File

@ -18,7 +18,7 @@ class mxlinux(Project):
page = requests.get(data[project]["upstream"]).text
indexOfFile = page.find("mirror.csclub.uwaterloo.ca")
m = re.search(r'(\d+ minutes?)|(\d+ hours?)|(\d+(\.)?\d+ days?)', page[indexOfFile:])
m = re.search(r'(\d+ hour)|(\d+ hours)|(\d+(\.)?\d+ days)', page[indexOfFile:]) # solution from: https://stackoverflow.com/questions/21074100/how-to-convert-standard-timedelta-string-to-timedelta-object/21074460
duration = pd.to_timedelta(m.group(0))
data[project]["out_of_sync_since"] = current_time - duration.total_seconds()

View File

@ -19,7 +19,7 @@ class mxlinux_iso(Project):
page = requests.get(data[project]["upstream"]).text
indexOfFile = page.find("mirror.csclub.uwaterloo.ca")
m = re.search(r'(\d+ minutes?)|(\d+ hours?)|(\d+(\.)?\d+ days?)', page[indexOfFile:])
m = re.search(r'(\d+ hour)|(\d+ hours)|(\d+(\.)?\d+ days)', page[indexOfFile:]) # solution from: https://stackoverflow.com/questions/21074100/how-to-convert-standard-timedelta-string-to-timedelta-object/21074460
duration = pd.to_timedelta(m.group(0))

View File

@ -3,31 +3,6 @@ Contains nongnu class
"""
from project import Project
from shared import CSC_MIRROR
import requests
from datetime import datetime
import time
class nongnu(Project):
"""nongnu class"""
@staticmethod
def check(data, project, current_time):
csc_url = CSC_MIRROR + data[project]["csc"] + data[project]["file"]
upstream_url = data[project]["upstream"] + data[project]["file"]
req = requests.get(csc_url)
req.raise_for_status()
CSC = req.text
req = requests.get(upstream_url)
req.raise_for_status()
upstream = req.text
if upstream == CSC:
return True
try:
return int(upstream.partition('\n')[0]) - int(CSC.partition('\n')[0]) < data[project]["out_of_sync_interval"]
except ValueError:
return False

View File

@ -23,8 +23,7 @@ class raspberrypi(Project):
for i in s.find_all("a"): # for a href directories
href = i.attrs['href']
# The raspberry pi server doesn't use a relative path to the parent directory
if href.endswith("/") and href != "../" and href != "/" and href != "/debian/":
if href.endswith("/") and href != "../" and href != "/":
site_next = site+href+"Release"
if site_next not in urls:
@ -45,7 +44,6 @@ class raspberrypi(Project):
cls.scrape(urls1, csc_url)
cls.scrape(urls2, upstream_url)
if (len(urls1) != len(urls2)):
return False
urls1.sort()

View File

@ -34,7 +34,7 @@ class slackware(Project):
hrefs2 = [i.attrs['href'] for i in s2.find_all("a")]
for href in hrefs1: # for a href directories
if href.endswith("/") and href != "../" and href != "/" and not href.startswith("/") and not re.match(r'slackware-([1-7]|8\.0).*', href) and href != "slackware-iso/" and href != "slackware-current/" and href != "slackware-pre-1.0-beta/" and href != "unsupported/" and not href.startswith("http"):
if href.endswith("/") and href != "../" and href != "/" and not href.startswith("/") and not re.match(r'slackware-([1-7]|8\.0).*', href) and href != "slackware-iso/" and href != "slackware-current/" and href != "slackware-pre-1.0-beta/" and href != "unsupported/":
# print(href)
if href not in hrefs2:
return False

View File

@ -5,7 +5,6 @@ Contains ubuntu class
import os
from project import Project
from shared import CSC_MIRROR
from shared import NUM_UBUNTU_RELEASES
import requests
import datefinder # another date finding library
from datetime import timedelta
@ -18,4 +17,4 @@ class ubuntu(Project):
@staticmethod
def check(data, project, current_time):
page = requests.get(data[project]["upstream"]).text
return page.count("Up to date") == NUM_UBUNTU_RELEASES
return page.count("Up to date") == 21

View File

@ -5,7 +5,6 @@ Contains ubuntu_releases class
import os
from project import Project
from shared import CSC_MIRROR
from shared import NUM_UBUNTU_RELEASES
import requests
import datefinder # another date finding library
from datetime import timedelta
@ -18,4 +17,10 @@ class ubuntu_releases(Project):
@staticmethod
def check(data, project, current_time):
page = requests.get(data[project]["upstream"]).text
return page.count("Up to date") == NUM_UBUNTU_RELEASES
indexOfFile = page.find("last verified")
matches = list(datefinder.find_dates(page[indexOfFile:]))
date = matches[0].replace(tzinfo=None) # date is of type datetime.datetime
data[project]["out_of_sync_since"] = date.timestamp()
return(pd.to_datetime(current_time, unit='s') - date <= pd.to_timedelta(data[project]["out_of_sync_interval"], unit='s'))
# https://launchpad.net/ubuntu/+mirror/mirror.csclub.uwaterloo.ca-release

View File

@ -1,4 +1,3 @@
"""Contains shared constants"""
CSC_MIRROR = "http://mirror.csclub.uwaterloo.ca/"
NUM_UBUNTU_RELEASES = 18