diff --git a/main.py b/main.py index a91cb70..46fb640 100644 --- a/main.py +++ b/main.py @@ -7,59 +7,66 @@ This mirror status checker determines whether CSC mirror is up-to-date with upst import time import sys import requests +from multiprocessing import Pool, Manager from projects import * import json +NUM_THREAD = 16 + +current_time = int(time.time()) def safe_print(*args, **kwargs): # When run with 'chronic' and 'timeout', stdout gets suppressed # due to buffering. Make sure to always flush the output. print(*args, **kwargs, flush=True) +def check_project(args): + project, data = args + try: + project_class = getattr(sys.modules[__name__], project) -if __name__ == "__main__": + # Skip projects we no longer mirror + if data[project].get('exclude', False): + return True - exit_code = 0 + checker_result = project_class.check(data, project, current_time) + if checker_result: + data[project]["out_of_sync_since"] = None + safe_print(f"Success: {project} up-to-date") + return True + + elif (data[project]["out_of_sync_since"] is not None + and current_time - data[project]["out_of_sync_since"] > data[project]["out_of_sync_interval"]): + safe_print(f"Failure: {project} out-of-sync") + return False + + else: + data[project]["out_of_sync_since"] = current_time + return True + + except requests.exceptions.RequestException as err: + safe_print(f"Error: {project}\n{err}") + + return False + +def main(): data_file = 'data.json' if len(sys.argv) > 1: data_file = sys.argv[1] + + manager = Manager() data = json.load(open(data_file)) + sync_data = manager.dict({k: manager.dict(v) for k, v in data.items()}) - current_time = int(time.time()) - for project in data: - try: - project_class = getattr(sys.modules[__name__], project) + with Pool(NUM_THREAD) as pool: + all_pass = all(pool.imap(check_project, ((k, sync_data) for k in data.keys()))) - # Skip projects we no longer mirror - if data[project].get('exclude', False): - continue - checker_result = project_class.check(data, project, current_time) - if project in ["CPAN", "ubuntu_releases", "manjaro", "mxlinux", "cran", "ctan", "gentooportage", "Artix"]: - if checker_result: - safe_print(f"Success: {project} up-to-date") - else: - safe_print(f"Failure: {project} out-of-sync") - - # Exit with non-zero status if any of the projects are not up-to-date - exit_code = 1 - continue - if checker_result: - data[project]["out_of_sync_since"] = None - elif data[project]["out_of_sync_since"] is None: - data[project]["out_of_sync_since"] = current_time - elif current_time - data[project]["out_of_sync_since"] \ - > data[project]["out_of_sync_interval"]: - safe_print(f"Failure: {project} out-of-sync") - - # Exit with non-zero status if any of the projects are not up-to-date - exit_code = 1 - continue - safe_print(f"Success: {project} up-to-date") - except requests.exceptions.RequestException as err: - safe_print(f"Error: {project}\n{err}") with open(data_file, "w", encoding="utf-8") as file: - json.dump(data, file, indent='\t') + json.dump({k: dict(v) for k, v in sync_data.items()}, file, indent=' ') - sys.exit(exit_code) + sys.exit(0 if all_pass else 1) + +if __name__ == "__main__": + main() diff --git a/projects/artix.py b/projects/artix.py index c3ace18..d1f1b19 100644 --- a/projects/artix.py +++ b/projects/artix.py @@ -39,6 +39,6 @@ class Artix(Project): if outdated_since is not None: data[project]['out_of_sync_since'] = int(outdated_since.timestamp()) - return (datetime.now() - outdated_since).total_seconds() < data[project]['out_of_sync_interval'] + return current_time - data[project]['out_of_sync_since'] < data[project]['out_of_sync_interval'] return True diff --git a/projects/cpan.py b/projects/cpan.py index 24701e4..478174d 100644 --- a/projects/cpan.py +++ b/projects/cpan.py @@ -16,5 +16,6 @@ class CPAN(Project): res_json = requests.get("http://mirrors.cpan.org/cpan-json.txt").json() for mirror in res_json: if mirror["url"] == f"{CSC_MIRROR}CPAN/": - return current_time - int(mirror["age"]) <= data[project]["out_of_sync_interval"] + data[project]["out_of_sync_since"] = int(mirror["age"]) + return current_time - data[project]["out_of_sync_since"] <= data[project]["out_of_sync_interval"] return False diff --git a/projects/cran.py b/projects/cran.py index 185cf46..17bac02 100644 --- a/projects/cran.py +++ b/projects/cran.py @@ -8,7 +8,6 @@ from shared import CSC_MIRROR import requests import datefinder # another date finding library from datetime import timedelta -from datetime import datetime import re import pandas as pd @@ -22,5 +21,6 @@ class cran(Project): m = re.search(r'(\d+ hour)|(\d+ hours)|(\d+(\.)?\d+ days)', page[indexOfFile:]) # solution from: https://stackoverflow.com/questions/21074100/how-to-convert-standard-timedelta-string-to-timedelta-object/21074460 duration = pd.to_timedelta(m.group(0)) + data[project]["out_of_sync_since"] = current_time - duration.total_seconds() - return duration <= pd.to_timedelta(data[project]["out_of_sync_interval"], unit='s') \ No newline at end of file + return duration <= pd.to_timedelta(data[project]["out_of_sync_interval"], unit='s') diff --git a/projects/ctan.py b/projects/ctan.py index 788a93b..4a834d8 100644 --- a/projects/ctan.py +++ b/projects/ctan.py @@ -8,7 +8,6 @@ from shared import CSC_MIRROR import requests import datefinder # another date finding library from datetime import timedelta -from datetime import datetime import re import pandas as pd @@ -22,5 +21,6 @@ class ctan(Project): m = re.search(r'(\d+ hour)|(\d+ hours)|(\d+(\.)?\d+ days)', page[indexOfFile:]) # solution from: https://stackoverflow.com/questions/21074100/how-to-convert-standard-timedelta-string-to-timedelta-object/21074460 duration = pd.to_timedelta(m.group(0)) + data[project]["out_of_sync_since"] = datetime.now() - duration.total_seconds() return duration <= pd.to_timedelta(data[project]["out_of_sync_interval"], unit='s') diff --git a/projects/gentooportage.py b/projects/gentooportage.py index d216774..8ddffe5 100644 --- a/projects/gentooportage.py +++ b/projects/gentooportage.py @@ -9,7 +9,6 @@ from project import Project import requests import datefinder # another date finding library from datetime import timedelta -from datetime import datetime import re import pandas as pd @@ -42,8 +41,9 @@ class GentooPortage(Project): page = requests.get(data[project]["upstream"]).text indexOfFile = page.find("rsync4.ca.gentoo.org") - m = re.search(r'(\d+ hour)|(\d+ hours)|(\d+(\.)?\d+ days)', page[indexOfFile:]) + m = re.search(r'(\d+ minutes?)|(\d+ hours?)|(\d+(\.)?\d+ days?)', page[indexOfFile:]) duration = pd.to_timedelta(m.group(0)) + data[project]["out_of_sync_since"] = current_time - duration.total_seconds() return duration <= pd.to_timedelta(data[project]["out_of_sync_interval"], unit='s') diff --git a/projects/manjaro.py b/projects/manjaro.py index 95066fc..b181700 100644 --- a/projects/manjaro.py +++ b/projects/manjaro.py @@ -20,7 +20,8 @@ class manjaro(Project): m = re.search(r'(?P\d+):(?P\d+)', page[indexOfFile:]) # solution from: https://stackoverflow.com/questions/21074100/how-to-convert-standard-timedelta-string-to-timedelta-object/21074460 duration = timedelta(**{key: float(val) for key, val in m.groupdict().items()}) + data[project]["out_of_sync_since"] = current_time - duration.total_seconds() return duration <= pd.to_timedelta(data[project]["out_of_sync_interval"], unit='s') - # https://launchpad.net/ubuntu/+mirror/mirror.csclub.uwaterloo.ca-archive \ No newline at end of file + # https://launchpad.net/ubuntu/+mirror/mirror.csclub.uwaterloo.ca-archive diff --git a/projects/mxlinux.py b/projects/mxlinux.py index 9ebc21f..3529470 100644 --- a/projects/mxlinux.py +++ b/projects/mxlinux.py @@ -8,7 +8,6 @@ from shared import CSC_MIRROR import requests import datefinder # another date finding library from datetime import timedelta -from datetime import datetime import re import pandas as pd @@ -22,5 +21,6 @@ class mxlinux(Project): m = re.search(r'(\d+ hour)|(\d+ hours)|(\d+(\.)?\d+ days)', page[indexOfFile:]) # solution from: https://stackoverflow.com/questions/21074100/how-to-convert-standard-timedelta-string-to-timedelta-object/21074460 duration = pd.to_timedelta(m.group(0)) + data[project]["out_of_sync_since"] = current_time - duration.total_seconds() - return duration <= pd.to_timedelta(data[project]["out_of_sync_interval"], unit='s') \ No newline at end of file + return duration <= pd.to_timedelta(data[project]["out_of_sync_interval"], unit='s') diff --git a/projects/mxlinux_iso.py b/projects/mxlinux_iso.py index 59de585..a8e8b74 100644 --- a/projects/mxlinux_iso.py +++ b/projects/mxlinux_iso.py @@ -8,7 +8,6 @@ from shared import CSC_MIRROR import requests import datefinder # another date finding library from datetime import timedelta -from datetime import datetime import re import pandas as pd diff --git a/projects/ubuntu_releases.py b/projects/ubuntu_releases.py index 0cfc172..68e4095 100644 --- a/projects/ubuntu_releases.py +++ b/projects/ubuntu_releases.py @@ -19,7 +19,8 @@ class ubuntu_releases(Project): page = requests.get(data[project]["upstream"]).text indexOfFile = page.find("last verified") matches = list(datefinder.find_dates(page[indexOfFile:])) - date = matches[0] # date is of type datetime.datetime - return(pd.to_datetime(current_time, unit='s') - date.replace(tzinfo=None) <= pd.to_timedelta(data[project]["out_of_sync_interval"], unit='s')) + date = matches[0].replace(tzinfo=None) # date is of type datetime.datetime + data[project]["out_of_sync_since"] = date.timestamp() + return(pd.to_datetime(current_time, unit='s') - date <= pd.to_timedelta(data[project]["out_of_sync_interval"], unit='s')) - # https://launchpad.net/ubuntu/+mirror/mirror.csclub.uwaterloo.ca-release \ No newline at end of file + # https://launchpad.net/ubuntu/+mirror/mirror.csclub.uwaterloo.ca-release