Merge pull request 'parallize mirror checking and remove special case matching' (#5) from multithread into master
Reviewed-on: #5
This commit is contained in:
commit
250f358f4e
77
main.py
77
main.py
|
@ -7,59 +7,66 @@ This mirror status checker determines whether CSC mirror is up-to-date with upst
|
|||
import time
|
||||
import sys
|
||||
import requests
|
||||
from multiprocessing import Pool, Manager
|
||||
|
||||
from projects import *
|
||||
import json
|
||||
|
||||
NUM_THREAD = 16
|
||||
|
||||
current_time = int(time.time())
|
||||
|
||||
def safe_print(*args, **kwargs):
|
||||
# When run with 'chronic' and 'timeout', stdout gets suppressed
|
||||
# due to buffering. Make sure to always flush the output.
|
||||
print(*args, **kwargs, flush=True)
|
||||
|
||||
def check_project(args):
|
||||
project, data = args
|
||||
try:
|
||||
project_class = getattr(sys.modules[__name__], project)
|
||||
|
||||
if __name__ == "__main__":
|
||||
# Skip projects we no longer mirror
|
||||
if data[project].get('exclude', False):
|
||||
return True
|
||||
|
||||
exit_code = 0
|
||||
checker_result = project_class.check(data, project, current_time)
|
||||
|
||||
if checker_result:
|
||||
data[project]["out_of_sync_since"] = None
|
||||
safe_print(f"Success: {project} up-to-date")
|
||||
return True
|
||||
|
||||
elif (data[project]["out_of_sync_since"] is not None
|
||||
and current_time - data[project]["out_of_sync_since"] > data[project]["out_of_sync_interval"]):
|
||||
safe_print(f"Failure: {project} out-of-sync")
|
||||
return False
|
||||
|
||||
else:
|
||||
data[project]["out_of_sync_since"] = current_time
|
||||
return True
|
||||
|
||||
except requests.exceptions.RequestException as err:
|
||||
safe_print(f"Error: {project}\n{err}")
|
||||
|
||||
return False
|
||||
|
||||
def main():
|
||||
data_file = 'data.json'
|
||||
if len(sys.argv) > 1:
|
||||
data_file = sys.argv[1]
|
||||
|
||||
manager = Manager()
|
||||
data = json.load(open(data_file))
|
||||
sync_data = manager.dict({k: manager.dict(v) for k, v in data.items()})
|
||||
|
||||
current_time = int(time.time())
|
||||
for project in data:
|
||||
try:
|
||||
project_class = getattr(sys.modules[__name__], project)
|
||||
with Pool(NUM_THREAD) as pool:
|
||||
all_pass = all(pool.imap(check_project, ((k, sync_data) for k in data.keys())))
|
||||
|
||||
# Skip projects we no longer mirror
|
||||
if data[project].get('exclude', False):
|
||||
continue
|
||||
checker_result = project_class.check(data, project, current_time)
|
||||
if project in ["CPAN", "ubuntu_releases", "manjaro", "mxlinux", "cran", "ctan", "gentooportage", "Artix"]:
|
||||
if checker_result:
|
||||
safe_print(f"Success: {project} up-to-date")
|
||||
else:
|
||||
safe_print(f"Failure: {project} out-of-sync")
|
||||
|
||||
# Exit with non-zero status if any of the projects are not up-to-date
|
||||
exit_code = 1
|
||||
continue
|
||||
if checker_result:
|
||||
data[project]["out_of_sync_since"] = None
|
||||
elif data[project]["out_of_sync_since"] is None:
|
||||
data[project]["out_of_sync_since"] = current_time
|
||||
elif current_time - data[project]["out_of_sync_since"] \
|
||||
> data[project]["out_of_sync_interval"]:
|
||||
safe_print(f"Failure: {project} out-of-sync")
|
||||
|
||||
# Exit with non-zero status if any of the projects are not up-to-date
|
||||
exit_code = 1
|
||||
continue
|
||||
safe_print(f"Success: {project} up-to-date")
|
||||
except requests.exceptions.RequestException as err:
|
||||
safe_print(f"Error: {project}\n{err}")
|
||||
with open(data_file, "w", encoding="utf-8") as file:
|
||||
json.dump(data, file, indent='\t')
|
||||
json.dump({k: dict(v) for k, v in sync_data.items()}, file, indent=' ')
|
||||
|
||||
sys.exit(exit_code)
|
||||
sys.exit(0 if all_pass else 1)
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
|
|
|
@ -39,6 +39,6 @@ class Artix(Project):
|
|||
|
||||
if outdated_since is not None:
|
||||
data[project]['out_of_sync_since'] = int(outdated_since.timestamp())
|
||||
return (datetime.now() - outdated_since).total_seconds() < data[project]['out_of_sync_interval']
|
||||
return current_time - data[project]['out_of_sync_since'] < data[project]['out_of_sync_interval']
|
||||
|
||||
return True
|
||||
|
|
|
@ -16,5 +16,6 @@ class CPAN(Project):
|
|||
res_json = requests.get("http://mirrors.cpan.org/cpan-json.txt").json()
|
||||
for mirror in res_json:
|
||||
if mirror["url"] == f"{CSC_MIRROR}CPAN/":
|
||||
return current_time - int(mirror["age"]) <= data[project]["out_of_sync_interval"]
|
||||
data[project]["out_of_sync_since"] = int(mirror["age"])
|
||||
return current_time - data[project]["out_of_sync_since"] <= data[project]["out_of_sync_interval"]
|
||||
return False
|
||||
|
|
|
@ -8,7 +8,6 @@ from shared import CSC_MIRROR
|
|||
import requests
|
||||
import datefinder # another date finding library
|
||||
from datetime import timedelta
|
||||
from datetime import datetime
|
||||
import re
|
||||
import pandas as pd
|
||||
|
||||
|
@ -22,5 +21,6 @@ class cran(Project):
|
|||
m = re.search(r'(\d+ hour)|(\d+ hours)|(\d+(\.)?\d+ days)', page[indexOfFile:]) # solution from: https://stackoverflow.com/questions/21074100/how-to-convert-standard-timedelta-string-to-timedelta-object/21074460
|
||||
|
||||
duration = pd.to_timedelta(m.group(0))
|
||||
data[project]["out_of_sync_since"] = current_time - duration.total_seconds()
|
||||
|
||||
return duration <= pd.to_timedelta(data[project]["out_of_sync_interval"], unit='s')
|
||||
return duration <= pd.to_timedelta(data[project]["out_of_sync_interval"], unit='s')
|
||||
|
|
|
@ -8,7 +8,6 @@ from shared import CSC_MIRROR
|
|||
import requests
|
||||
import datefinder # another date finding library
|
||||
from datetime import timedelta
|
||||
from datetime import datetime
|
||||
import re
|
||||
import pandas as pd
|
||||
|
||||
|
@ -22,5 +21,6 @@ class ctan(Project):
|
|||
m = re.search(r'(\d+ hour)|(\d+ hours)|(\d+(\.)?\d+ days)', page[indexOfFile:]) # solution from: https://stackoverflow.com/questions/21074100/how-to-convert-standard-timedelta-string-to-timedelta-object/21074460
|
||||
|
||||
duration = pd.to_timedelta(m.group(0))
|
||||
data[project]["out_of_sync_since"] = datetime.now() - duration.total_seconds()
|
||||
|
||||
return duration <= pd.to_timedelta(data[project]["out_of_sync_interval"], unit='s')
|
||||
|
|
|
@ -9,7 +9,6 @@ from project import Project
|
|||
import requests
|
||||
import datefinder # another date finding library
|
||||
from datetime import timedelta
|
||||
from datetime import datetime
|
||||
import re
|
||||
import pandas as pd
|
||||
|
||||
|
@ -42,8 +41,9 @@ class GentooPortage(Project):
|
|||
page = requests.get(data[project]["upstream"]).text
|
||||
indexOfFile = page.find("rsync4.ca.gentoo.org")
|
||||
|
||||
m = re.search(r'(\d+ hour)|(\d+ hours)|(\d+(\.)?\d+ days)', page[indexOfFile:])
|
||||
m = re.search(r'(\d+ minutes?)|(\d+ hours?)|(\d+(\.)?\d+ days?)', page[indexOfFile:])
|
||||
|
||||
duration = pd.to_timedelta(m.group(0))
|
||||
data[project]["out_of_sync_since"] = current_time - duration.total_seconds()
|
||||
|
||||
return duration <= pd.to_timedelta(data[project]["out_of_sync_interval"], unit='s')
|
||||
|
|
|
@ -20,7 +20,8 @@ class manjaro(Project):
|
|||
|
||||
m = re.search(r'(?P<hours>\d+):(?P<minutes>\d+)', page[indexOfFile:]) # solution from: https://stackoverflow.com/questions/21074100/how-to-convert-standard-timedelta-string-to-timedelta-object/21074460
|
||||
duration = timedelta(**{key: float(val) for key, val in m.groupdict().items()})
|
||||
data[project]["out_of_sync_since"] = current_time - duration.total_seconds()
|
||||
|
||||
return duration <= pd.to_timedelta(data[project]["out_of_sync_interval"], unit='s')
|
||||
|
||||
# https://launchpad.net/ubuntu/+mirror/mirror.csclub.uwaterloo.ca-archive
|
||||
# https://launchpad.net/ubuntu/+mirror/mirror.csclub.uwaterloo.ca-archive
|
||||
|
|
|
@ -8,7 +8,6 @@ from shared import CSC_MIRROR
|
|||
import requests
|
||||
import datefinder # another date finding library
|
||||
from datetime import timedelta
|
||||
from datetime import datetime
|
||||
import re
|
||||
import pandas as pd
|
||||
|
||||
|
@ -22,5 +21,6 @@ class mxlinux(Project):
|
|||
m = re.search(r'(\d+ hour)|(\d+ hours)|(\d+(\.)?\d+ days)', page[indexOfFile:]) # solution from: https://stackoverflow.com/questions/21074100/how-to-convert-standard-timedelta-string-to-timedelta-object/21074460
|
||||
|
||||
duration = pd.to_timedelta(m.group(0))
|
||||
data[project]["out_of_sync_since"] = current_time - duration.total_seconds()
|
||||
|
||||
return duration <= pd.to_timedelta(data[project]["out_of_sync_interval"], unit='s')
|
||||
return duration <= pd.to_timedelta(data[project]["out_of_sync_interval"], unit='s')
|
||||
|
|
|
@ -8,7 +8,6 @@ from shared import CSC_MIRROR
|
|||
import requests
|
||||
import datefinder # another date finding library
|
||||
from datetime import timedelta
|
||||
from datetime import datetime
|
||||
import re
|
||||
import pandas as pd
|
||||
|
||||
|
|
|
@ -19,7 +19,8 @@ class ubuntu_releases(Project):
|
|||
page = requests.get(data[project]["upstream"]).text
|
||||
indexOfFile = page.find("last verified")
|
||||
matches = list(datefinder.find_dates(page[indexOfFile:]))
|
||||
date = matches[0] # date is of type datetime.datetime
|
||||
return(pd.to_datetime(current_time, unit='s') - date.replace(tzinfo=None) <= pd.to_timedelta(data[project]["out_of_sync_interval"], unit='s'))
|
||||
date = matches[0].replace(tzinfo=None) # date is of type datetime.datetime
|
||||
data[project]["out_of_sync_since"] = date.timestamp()
|
||||
return(pd.to_datetime(current_time, unit='s') - date <= pd.to_timedelta(data[project]["out_of_sync_interval"], unit='s'))
|
||||
|
||||
# https://launchpad.net/ubuntu/+mirror/mirror.csclub.uwaterloo.ca-release
|
||||
# https://launchpad.net/ubuntu/+mirror/mirror.csclub.uwaterloo.ca-release
|
||||
|
|
Loading…
Reference in New Issue