From 8195cbb04244b02660d65e06299767fc2a435998 Mon Sep 17 00:00:00 2001 From: Tom Date: Sun, 3 Oct 2021 23:47:55 -0700 Subject: [PATCH] added mxlinux, mxlinux-iso --- README.md | 4 ++++ data.json | 16 ++++++++++++++- main.py | 2 +- projects/mxlinux.py | 26 +++++++++++++++++++++++++ projects/mxlinux_iso.py | 26 +++++++++++++++++++++++++ test.py | 43 ++++++++++++++++++++++++----------------- 6 files changed, 97 insertions(+), 20 deletions(-) create mode 100644 projects/mxlinux.py create mode 100644 projects/mxlinux_iso.py diff --git a/README.md b/README.md index e1c1523..5143771 100644 --- a/README.md +++ b/README.md @@ -24,7 +24,9 @@ raspbian mirror: https://mirror.ox.ac.uk/sites/archive.raspbian.org/archive/ mxlinux: https://sourceforge.net/projects/mx-linux/ (scrap the last day?) +linuxmint: no public repo linuxmint-packages pool: http://rsync-packages.linuxmint.com/pool/ +macPorts: only distfiles has public repo, no timestamp, too large to loop through scientific: https://scientificlinux.org/downloads/sl-mirrors/ (CSC not listed) slackware: https://mirrors.slackware.com/mirrorlist/ https://mirrors.slackware.com/slackware/ ubuntu-ports: http://ports.ubuntu.com/ubuntu-ports/ @@ -37,5 +39,7 @@ tdf: https://download.documentfoundation.org/ ubuntu: https://launchpad.net/ubuntu/+mirror/mirror.csclub.uwaterloo.ca-archive vlc: http://download.videolan.org/pub/videolan/ manjaro +mxlinux +mxlinx-iso: this one seems out of sync on the official tracker for 134 days, which is weird trisquel: https://trisquel.info/mirmon/index.html out of date website \ No newline at end of file diff --git a/data.json b/data.json index 7d37ce9..4ecfb6d 100644 --- a/data.json +++ b/data.json @@ -35,7 +35,7 @@ "file": "centos/TIME" }, "Ceph": { - "out_of_sync_since": null, + "out_of_sync_since": 1633329349, "out_of_sync_interval": 86400, "csc": "ceph/", "upstream": "https://download.ceph.com/", @@ -205,5 +205,19 @@ "csc": "", "upstream": "https://repo.manjaro.org/", "file": "" + }, + "mxlinux": { + "out_of_sync_since": null, + "out_of_sync_interval": 86400, + "csc": "", + "upstream": "http://rsync-mxlinux.org/mirmon/packages.html", + "file": "" + }, + "mxlinux_iso": { + "out_of_sync_since": null, + "out_of_sync_interval": 86400, + "csc": "", + "upstream": "http://rsync-mxlinux.org/mirmon/index.html", + "file": "" } } \ No newline at end of file diff --git a/main.py b/main.py index 6cdae85..a6e27dc 100644 --- a/main.py +++ b/main.py @@ -67,7 +67,7 @@ if __name__ == "__main__": print(f"Failure: {project} does not exist") continue project_class = getattr(sys.modules[__name__], project) - if project == "CPAN" or project == "ubuntu" or project == "manjaro": + if project == "CPAN" or project == "ubuntu" or project == "manjaro" or project == "mxlinux" or project == "mxlinux_iso": checker_result = project_class.check(data, project, current_time) if checker_result: print(f"Success: {project} up-to-date") diff --git a/projects/mxlinux.py b/projects/mxlinux.py new file mode 100644 index 0000000..00a7837 --- /dev/null +++ b/projects/mxlinux.py @@ -0,0 +1,26 @@ +""" +Contains mxlinux class +""" + +import os +from project import Project +from shared import CSC_MIRROR +import requests +import datefinder # another date finding library +from datetime import timedelta +from datetime import datetime +import re +import pandas as pd + +class mxlinux(Project): + """mxlinux class""" + @staticmethod + def check(data, project, current_time): + page = requests.get(data[project]["upstream"]).text + indexOfFile = page.find("mirror.csclub.uwaterloo.ca") + + m = re.search(r'(\d+ hours)|(\d+(\.)?\d+ days)', page[indexOfFile:]) # solution from: https://stackoverflow.com/questions/21074100/how-to-convert-standard-timedelta-string-to-timedelta-object/21074460 + + duration = pd.to_timedelta(m.group(0)) + + return duration <= pd.to_timedelta(data[project]["out_of_sync_interval"], unit='s') \ No newline at end of file diff --git a/projects/mxlinux_iso.py b/projects/mxlinux_iso.py new file mode 100644 index 0000000..11cbc2c --- /dev/null +++ b/projects/mxlinux_iso.py @@ -0,0 +1,26 @@ +""" +Contains mxlinux_iso class +""" + +import os +from project import Project +from shared import CSC_MIRROR +import requests +import datefinder # another date finding library +from datetime import timedelta +from datetime import datetime +import re +import pandas as pd + +class mxlinux_iso(Project): + """mxlinux_iso class""" + @staticmethod + def check(data, project, current_time): + page = requests.get(data[project]["upstream"]).text + indexOfFile = page.find("mirror.csclub.uwaterloo.ca") + + m = re.search(r'(\d+ hours)|(\d+(\.)?\d+ days)', page[indexOfFile:]) # solution from: https://stackoverflow.com/questions/21074100/how-to-convert-standard-timedelta-string-to-timedelta-object/21074460 + + duration = pd.to_timedelta(m.group(0)) + + return duration <= pd.to_timedelta(data[project]["out_of_sync_interval"], unit='s') \ No newline at end of file diff --git a/test.py b/test.py index 04bae05..536a855 100644 --- a/test.py +++ b/test.py @@ -6,11 +6,14 @@ from datetime import datetime from datetime import timedelta import time import pandas as pd - + +# this function is brute force looping through the whole directory and checking dates +# it may sound horrible, but for certain distros, i believe it's indeed the best solution + # lists urls=[] -home_site = "http://ports.ubuntu.com" +home_site = "http://ykf.ca.distfiles.macports.org" # function created def scrape(site): @@ -25,8 +28,8 @@ def scrape(site): href = i.attrs['href'] if href.endswith("/") and href != "../" and href != "/": - if home_site+href in urls: # avoids the link to parent directory - continue + """if home_site+href in urls: # avoids the link to parent directory + continue""" site_next = site+href if site_next not in urls: @@ -38,7 +41,7 @@ def scrape(site): def get_latest_date(web_dir): page = requests.get(site).text - str_dates = re.findall(r'\d{4}-\d{2}-\d{2} \d{2}:\d{2}', page) + str_dates = re.findall(r'(\d{2}-\w{3}-\d{4} \d{2}:\d{2})|(\d{4}-\d{2}-\d{2} \d{2}:\d{2})', page) dates = [list(datefinder.find_dates(date))[0] for date in str_dates] # for date in dates: @@ -50,27 +53,31 @@ def get_latest_date(web_dir): if __name__ =="__main__": # website to be scrape - # site="http://ports.ubuntu.com/ubuntu-ports/" + #site="http://ykf.ca.distfiles.macports.org/MacPorts/mpdistfiles/" # works on: https://www.x.org/releases/ + # https://mirror.csclub.uwaterloo.ca/linuxmint/ #works wonders for linuxmint + # unfortunately, linuxmint does not have a public repo, the worldwide mirror LayerOnline on https://linuxmint.com/mirrors.php seems like the best choice # calling function - # scrape(site) + #scrape(site) - # latest_date = get_latest_date(urls[0]) + #latest_date = get_latest_date(urls[0]) # get_latest_date(urls[0]) - # for dir in urls: - # latest_date2 = get_latest_date(dir) - # if (latest_date2 >= latest_date): - # latest_date = latest_date2 + #for dir in urls: + # latest_date2 = get_latest_date(dir) + # if (latest_date2 >= latest_date): + # latest_date = latest_date2 - # print(latest_date) + #print(latest_date) - page = requests.get("https://repo.manjaro.org/").text - indexOfFile = page.find("mirror.csclub.uwaterloo.ca/manjaro") + page = requests.get("http://rsync-mxlinux.org/mirmon/index.html").text + indexOfFile = page.find("mirror.csclub.uwaterloo.ca") - m = re.search(r'(?P\d+):(?P\d+)', page[indexOfFile:]) # solution from: https://stackoverflow.com/questions/21074100/how-to-convert-standard-timedelta-string-to-timedelta-object/21074460 - duration = timedelta(**{key: float(val) for key, val in m.groupdict().items()}) - print(duration) + m = re.search(r'(\d+ hours)|(\d+(\.)?\d+ days)', page[indexOfFile:]) # solution from: https://stackoverflow.com/questions/21074100/how-to-convert-standard-timedelta-string-to-timedelta-object/21074460 + + duration = pd.to_timedelta(m.group(0)) print (duration <= pd.to_timedelta(86400, unit='s')) + + # https://launchpad.net/ubuntu/+mirror/mirror.csclub.uwaterloo.ca-archive \ No newline at end of file