Merge pull request 'parallize mirror checking and remove special case matching' (#5) from multithread into master

Reviewed-on: #5
This commit is contained in:
Raymond Li 2022-02-16 16:23:17 -05:00
commit 250f358f4e
10 changed files with 58 additions and 49 deletions

77
main.py
View File

@ -7,59 +7,66 @@ This mirror status checker determines whether CSC mirror is up-to-date with upst
import time
import sys
import requests
from multiprocessing import Pool, Manager
from projects import *
import json
NUM_THREAD = 16
current_time = int(time.time())
def safe_print(*args, **kwargs):
# When run with 'chronic' and 'timeout', stdout gets suppressed
# due to buffering. Make sure to always flush the output.
print(*args, **kwargs, flush=True)
def check_project(args):
project, data = args
try:
project_class = getattr(sys.modules[__name__], project)
if __name__ == "__main__":
# Skip projects we no longer mirror
if data[project].get('exclude', False):
return True
exit_code = 0
checker_result = project_class.check(data, project, current_time)
if checker_result:
data[project]["out_of_sync_since"] = None
safe_print(f"Success: {project} up-to-date")
return True
elif (data[project]["out_of_sync_since"] is not None
and current_time - data[project]["out_of_sync_since"] > data[project]["out_of_sync_interval"]):
safe_print(f"Failure: {project} out-of-sync")
return False
else:
data[project]["out_of_sync_since"] = current_time
return True
except requests.exceptions.RequestException as err:
safe_print(f"Error: {project}\n{err}")
return False
def main():
data_file = 'data.json'
if len(sys.argv) > 1:
data_file = sys.argv[1]
manager = Manager()
data = json.load(open(data_file))
sync_data = manager.dict({k: manager.dict(v) for k, v in data.items()})
current_time = int(time.time())
for project in data:
try:
project_class = getattr(sys.modules[__name__], project)
with Pool(NUM_THREAD) as pool:
all_pass = all(pool.imap(check_project, ((k, sync_data) for k in data.keys())))
# Skip projects we no longer mirror
if data[project].get('exclude', False):
continue
checker_result = project_class.check(data, project, current_time)
if project in ["CPAN", "ubuntu_releases", "manjaro", "mxlinux", "cran", "ctan", "gentooportage", "Artix"]:
if checker_result:
safe_print(f"Success: {project} up-to-date")
else:
safe_print(f"Failure: {project} out-of-sync")
# Exit with non-zero status if any of the projects are not up-to-date
exit_code = 1
continue
if checker_result:
data[project]["out_of_sync_since"] = None
elif data[project]["out_of_sync_since"] is None:
data[project]["out_of_sync_since"] = current_time
elif current_time - data[project]["out_of_sync_since"] \
> data[project]["out_of_sync_interval"]:
safe_print(f"Failure: {project} out-of-sync")
# Exit with non-zero status if any of the projects are not up-to-date
exit_code = 1
continue
safe_print(f"Success: {project} up-to-date")
except requests.exceptions.RequestException as err:
safe_print(f"Error: {project}\n{err}")
with open(data_file, "w", encoding="utf-8") as file:
json.dump(data, file, indent='\t')
json.dump({k: dict(v) for k, v in sync_data.items()}, file, indent=' ')
sys.exit(exit_code)
sys.exit(0 if all_pass else 1)
if __name__ == "__main__":
main()

View File

@ -39,6 +39,6 @@ class Artix(Project):
if outdated_since is not None:
data[project]['out_of_sync_since'] = int(outdated_since.timestamp())
return (datetime.now() - outdated_since).total_seconds() < data[project]['out_of_sync_interval']
return current_time - data[project]['out_of_sync_since'] < data[project]['out_of_sync_interval']
return True

View File

@ -16,5 +16,6 @@ class CPAN(Project):
res_json = requests.get("http://mirrors.cpan.org/cpan-json.txt").json()
for mirror in res_json:
if mirror["url"] == f"{CSC_MIRROR}CPAN/":
return current_time - int(mirror["age"]) <= data[project]["out_of_sync_interval"]
data[project]["out_of_sync_since"] = int(mirror["age"])
return current_time - data[project]["out_of_sync_since"] <= data[project]["out_of_sync_interval"]
return False

View File

@ -8,7 +8,6 @@ from shared import CSC_MIRROR
import requests
import datefinder # another date finding library
from datetime import timedelta
from datetime import datetime
import re
import pandas as pd
@ -22,5 +21,6 @@ class cran(Project):
m = re.search(r'(\d+ hour)|(\d+ hours)|(\d+(\.)?\d+ days)', page[indexOfFile:]) # solution from: https://stackoverflow.com/questions/21074100/how-to-convert-standard-timedelta-string-to-timedelta-object/21074460
duration = pd.to_timedelta(m.group(0))
data[project]["out_of_sync_since"] = current_time - duration.total_seconds()
return duration <= pd.to_timedelta(data[project]["out_of_sync_interval"], unit='s')
return duration <= pd.to_timedelta(data[project]["out_of_sync_interval"], unit='s')

View File

@ -8,7 +8,6 @@ from shared import CSC_MIRROR
import requests
import datefinder # another date finding library
from datetime import timedelta
from datetime import datetime
import re
import pandas as pd
@ -22,5 +21,6 @@ class ctan(Project):
m = re.search(r'(\d+ hour)|(\d+ hours)|(\d+(\.)?\d+ days)', page[indexOfFile:]) # solution from: https://stackoverflow.com/questions/21074100/how-to-convert-standard-timedelta-string-to-timedelta-object/21074460
duration = pd.to_timedelta(m.group(0))
data[project]["out_of_sync_since"] = datetime.now() - duration.total_seconds()
return duration <= pd.to_timedelta(data[project]["out_of_sync_interval"], unit='s')

View File

@ -9,7 +9,6 @@ from project import Project
import requests
import datefinder # another date finding library
from datetime import timedelta
from datetime import datetime
import re
import pandas as pd
@ -42,8 +41,9 @@ class GentooPortage(Project):
page = requests.get(data[project]["upstream"]).text
indexOfFile = page.find("rsync4.ca.gentoo.org")
m = re.search(r'(\d+ hour)|(\d+ hours)|(\d+(\.)?\d+ days)', page[indexOfFile:])
m = re.search(r'(\d+ minutes?)|(\d+ hours?)|(\d+(\.)?\d+ days?)', page[indexOfFile:])
duration = pd.to_timedelta(m.group(0))
data[project]["out_of_sync_since"] = current_time - duration.total_seconds()
return duration <= pd.to_timedelta(data[project]["out_of_sync_interval"], unit='s')

View File

@ -20,7 +20,8 @@ class manjaro(Project):
m = re.search(r'(?P<hours>\d+):(?P<minutes>\d+)', page[indexOfFile:]) # solution from: https://stackoverflow.com/questions/21074100/how-to-convert-standard-timedelta-string-to-timedelta-object/21074460
duration = timedelta(**{key: float(val) for key, val in m.groupdict().items()})
data[project]["out_of_sync_since"] = current_time - duration.total_seconds()
return duration <= pd.to_timedelta(data[project]["out_of_sync_interval"], unit='s')
# https://launchpad.net/ubuntu/+mirror/mirror.csclub.uwaterloo.ca-archive
# https://launchpad.net/ubuntu/+mirror/mirror.csclub.uwaterloo.ca-archive

View File

@ -8,7 +8,6 @@ from shared import CSC_MIRROR
import requests
import datefinder # another date finding library
from datetime import timedelta
from datetime import datetime
import re
import pandas as pd
@ -22,5 +21,6 @@ class mxlinux(Project):
m = re.search(r'(\d+ hour)|(\d+ hours)|(\d+(\.)?\d+ days)', page[indexOfFile:]) # solution from: https://stackoverflow.com/questions/21074100/how-to-convert-standard-timedelta-string-to-timedelta-object/21074460
duration = pd.to_timedelta(m.group(0))
data[project]["out_of_sync_since"] = current_time - duration.total_seconds()
return duration <= pd.to_timedelta(data[project]["out_of_sync_interval"], unit='s')
return duration <= pd.to_timedelta(data[project]["out_of_sync_interval"], unit='s')

View File

@ -8,7 +8,6 @@ from shared import CSC_MIRROR
import requests
import datefinder # another date finding library
from datetime import timedelta
from datetime import datetime
import re
import pandas as pd

View File

@ -19,7 +19,8 @@ class ubuntu_releases(Project):
page = requests.get(data[project]["upstream"]).text
indexOfFile = page.find("last verified")
matches = list(datefinder.find_dates(page[indexOfFile:]))
date = matches[0] # date is of type datetime.datetime
return(pd.to_datetime(current_time, unit='s') - date.replace(tzinfo=None) <= pd.to_timedelta(data[project]["out_of_sync_interval"], unit='s'))
date = matches[0].replace(tzinfo=None) # date is of type datetime.datetime
data[project]["out_of_sync_since"] = date.timestamp()
return(pd.to_datetime(current_time, unit='s') - date <= pd.to_timedelta(data[project]["out_of_sync_interval"], unit='s'))
# https://launchpad.net/ubuntu/+mirror/mirror.csclub.uwaterloo.ca-release
# https://launchpad.net/ubuntu/+mirror/mirror.csclub.uwaterloo.ca-release