diff --git a/README.md b/README.md index c44288f..ff5343a 100644 --- a/README.md +++ b/README.md @@ -29,12 +29,15 @@ linuxmint-packages pool: http://rsync-packages.linuxmint.com/pool/ macPorts: only distfiles has public repo, no timestamp, too large to loop through NetBSD: http://ftp.netbsd.org/pub/NetBSD/ has public repo, no timestamp, web directory hard to loop through, no mirror tracker opensuse: http://download.opensuse.org/ has public repo, a possible timestamp called latest in history, our mirror doesn't have this file tho, no mirror tracker -scientific: https://scientificlinux.org/downloads/sl-mirrors/ (CSC not listed) -slackware: https://mirrors.slackware.com/mirrorlist/ https://mirrors.slackware.com/slackware/ -ubuntu-ports: http://ports.ubuntu.com/ubuntu-ports/ -ubuntu-ports-releases: https://cdimage.ubuntu.com/releases/ -ubuntu-releases: https://releases.ubuntu.com/ -x.org: https://www.x.org/releases/ +puppylinux: https://distro.ibiblio.org/puppylinux/ has public repo, no timestamp, too hard to loop through, not likely to have a mirror tracker +racket: no public repo, no timestamp, no mirror status tracker +raspberry pi: currently no public repo, no timestamp, no mirror status tracker +sagemath: don't know how to deal with this, it's a website +salt stack: don't know how to deal with this, it's a website +scientific: https://scientificlinux.org/downloads/sl-mirrors/ would be easy to scrape the mirror status page, except that csc is not listed here +ubuntu-ports-releases: https://cdimage.ubuntu.com/releases/ has public repo, no timestamp, no status tracker +x.org: https://www.x.org/releases/ no timestamp, but candidate for brute force looping since it has few folders, no status tracker +Xiph: no timestamp, too big to loop through, no status tracker done: tdf: https://download.documentfoundation.org/ @@ -46,5 +49,10 @@ mxlinux-iso: this one seems out of sync on the official tracker for 134 days, wh mysql: http://mirrors.sunsite.dk/mysql/ nongnu: http://download.savannah.nongnu.org/releases/ https://savannah.gnu.org/maintenance/Mirmon/ http://download.savannah.gnu.org/mirmon/savannah/ parabola: https://repo.parabola.nu/ https://www.parabola.nu/mirrors/status/ - -trisquel: https://trisquel.info/mirmon/index.html out of date website \ No newline at end of file +pkgsrc +qtproject: https://download.qt.io/ +raspbian: http://archive.raspbian.org/raspbian/ snapshotindex.txt is most likely a timestamp, tho i'm not sure. also i think our mirror is completely outdated, it's not listed on official mirror list +slackware: https://mirrors.slackware.com/mirrorlist/ https://mirrors.slackware.com/slackware/ checking using the last updated date here, don't know if it's entirely accurate +trisquel: https://trisquel.info/mirmon/index.html out of date website!? please recheck this!!! +ubuntu-ports: http://ports.ubuntu.com/ubuntu-ports/ checks the file anonster.canonical.com, which appears to be a timestamp (check it to make sure!!!) +ubuntu-releases: https://releases.ubuntu.com/ \ No newline at end of file diff --git a/data.json b/data.json index 08c36e0..3435602 100644 --- a/data.json +++ b/data.json @@ -35,7 +35,7 @@ "file": "centos/TIME" }, "Ceph": { - "out_of_sync_since": 1633329349, + "out_of_sync_since": 1633340186, "out_of_sync_interval": 86400, "csc": "ceph/", "upstream": "https://download.ceph.com/", @@ -52,7 +52,7 @@ "file": "x86/sha512.sum" }, "Debian": { - "out_of_sync_since": null, + "out_of_sync_since": 1633337502, "out_of_sync_interval": 86400, "csc": "", "upstream": "https://ftp-master.debian.org/", @@ -66,7 +66,7 @@ "file": "debian-cd/project/trace/cdimage.debian.org" }, "DebianMultimedia": { - "out_of_sync_since": null, + "out_of_sync_since": 1633340186, "out_of_sync_interval": 86400, "csc": "debian-multimedia/", "upstream": "http://debian-mirrors.sdinet.de/deb-multimedia/", @@ -80,7 +80,7 @@ "file": "debian-ports/project/trace/porta.debian.org" }, "DebianSecurity": { - "out_of_sync_since": null, + "out_of_sync_since": 1633337502, "out_of_sync_interval": 86400, "csc": "", "upstream": "http://debian.mirror.iweb.ca/", @@ -234,11 +234,60 @@ "upstream": "http://download-mirror.savannah.gnu.org/releases/", "file": "00_TIME.txt" }, - "parabola": { - "out_of_sync_since": null, + "parabola": { + "out_of_sync_since": 1633335556, "out_of_sync_interval": 86400, "csc": "parabola/", "upstream": "https://repo.parabola.nu/", "file": "lastsync" + }, + "pkgsrc": { + "out_of_sync_since": 1633335556, + "out_of_sync_interval": 86400, + "csc": "pkgsrc/", + "upstream": "http://ftp.netbsd.org/pub/pkgsrc/", + "file": "MIRROR-TIMESTAMP" + }, + "qtproject": { + "out_of_sync_since": 1633340186, + "out_of_sync_interval": 86400, + "csc": "qtproject/", + "upstream": "https://download.qt.io/", + "file": "timestamp.txt" + }, + "raspbian": { + "out_of_sync_since": 1633337502, + "out_of_sync_interval": 86400, + "csc": "raspbian/", + "upstream": "http://archive.raspbian.org/", + "file": "snapshotindex.txt" + }, + "slackware": { + "out_of_sync_since": null, + "out_of_sync_interval": 86400, + "csc": "", + "upstream": "https://mirrors.slackware.com/mirrorlist/", + "file": "" + }, + "trisquel": { + "out_of_sync_since": null, + "out_of_sync_interval": 86400, + "csc": "", + "upstream": "https://trisquel.info/mirmon/index.html", + "file": "" + }, + "ubuntu_ports": { + "out_of_sync_since": 1633341982, + "out_of_sync_interval": 86400, + "csc": "ubuntu-ports/project/trace/", + "upstream": "http://ports.ubuntu.com/ubuntu-ports/project/trace/", + "file": "anonster.canonical.com" + }, + "ubuntu_releases": { + "out_of_sync_since": null, + "out_of_sync_interval": 86400, + "csc": "", + "upstream": "https://launchpad.net/ubuntu/+mirror/mirror.csclub.uwaterloo.ca-release", + "file": "" } } \ No newline at end of file diff --git a/main.py b/main.py index a6e27dc..d525828 100644 --- a/main.py +++ b/main.py @@ -67,7 +67,7 @@ if __name__ == "__main__": print(f"Failure: {project} does not exist") continue project_class = getattr(sys.modules[__name__], project) - if project == "CPAN" or project == "ubuntu" or project == "manjaro" or project == "mxlinux" or project == "mxlinux_iso": + if project == "CPAN" or project == "ubuntu" or project == "ubuntu_releases" or project == "manjaro" or project == "mxlinux" or project == "mxlinux_iso" or project == "slackware" or project == "trisquel": checker_result = project_class.check(data, project, current_time) if checker_result: print(f"Success: {project} up-to-date") diff --git a/projects/mxlinux.py b/projects/mxlinux.py index 00a7837..9ebc21f 100644 --- a/projects/mxlinux.py +++ b/projects/mxlinux.py @@ -19,8 +19,8 @@ class mxlinux(Project): page = requests.get(data[project]["upstream"]).text indexOfFile = page.find("mirror.csclub.uwaterloo.ca") - m = re.search(r'(\d+ hours)|(\d+(\.)?\d+ days)', page[indexOfFile:]) # solution from: https://stackoverflow.com/questions/21074100/how-to-convert-standard-timedelta-string-to-timedelta-object/21074460 + m = re.search(r'(\d+ hour)|(\d+ hours)|(\d+(\.)?\d+ days)', page[indexOfFile:]) # solution from: https://stackoverflow.com/questions/21074100/how-to-convert-standard-timedelta-string-to-timedelta-object/21074460 duration = pd.to_timedelta(m.group(0)) - + return duration <= pd.to_timedelta(data[project]["out_of_sync_interval"], unit='s') \ No newline at end of file diff --git a/projects/mxlinux_iso.py b/projects/mxlinux_iso.py index 11cbc2c..a2a339c 100644 --- a/projects/mxlinux_iso.py +++ b/projects/mxlinux_iso.py @@ -19,7 +19,7 @@ class mxlinux_iso(Project): page = requests.get(data[project]["upstream"]).text indexOfFile = page.find("mirror.csclub.uwaterloo.ca") - m = re.search(r'(\d+ hours)|(\d+(\.)?\d+ days)', page[indexOfFile:]) # solution from: https://stackoverflow.com/questions/21074100/how-to-convert-standard-timedelta-string-to-timedelta-object/21074460 + m = re.search(r'(\d+ hour)|(\d+ hours)|(\d+(\.)?\d+ days)', page[indexOfFile:]) # solution from: https://stackoverflow.com/questions/21074100/how-to-convert-standard-timedelta-string-to-timedelta-object/21074460 duration = pd.to_timedelta(m.group(0)) diff --git a/projects/pkgsrc.py b/projects/pkgsrc.py new file mode 100644 index 0000000..9832fc1 --- /dev/null +++ b/projects/pkgsrc.py @@ -0,0 +1,8 @@ +""" +Contains pkgsrc class +""" + +from project import Project + +class pkgsrc(Project): + """pkgsrc class""" \ No newline at end of file diff --git a/projects/qtproject.py b/projects/qtproject.py new file mode 100644 index 0000000..be6d194 --- /dev/null +++ b/projects/qtproject.py @@ -0,0 +1,8 @@ +""" +Contains qtproject class +""" + +from project import Project + +class qtproject(Project): + """qtproject class""" \ No newline at end of file diff --git a/projects/raspbian.py b/projects/raspbian.py new file mode 100644 index 0000000..d6776d6 --- /dev/null +++ b/projects/raspbian.py @@ -0,0 +1,8 @@ +""" +Contains raspbian class +""" + +from project import Project + +class raspbian(Project): + """raspbian class""" \ No newline at end of file diff --git a/projects/slackware.py b/projects/slackware.py new file mode 100644 index 0000000..2bd146c --- /dev/null +++ b/projects/slackware.py @@ -0,0 +1,26 @@ +""" +Contains slackware class +""" + +import os +from project import Project +from shared import CSC_MIRROR +import requests +import datefinder # another date finding library +from datetime import timedelta +from datetime import datetime +import re +import pandas as pd + +class slackware(Project): + """slackware class""" + @staticmethod + def check(data, project, current_time): + page = requests.get(data[project]["upstream"]).text + if (page.find("mirror.csclub.uwaterloo.ca/slackware/") != -1): + indexOfFile = page.find("Last Updated:") + matches = list(datefinder.find_dates(page[indexOfFile:])) + date = matches[0] # date is of type datetime.datetime + return(pd.to_datetime(current_time, unit='s') - date.replace(tzinfo=None) <= pd.to_timedelta(data[project]["out_of_sync_interval"], unit='s')) + else: + return False \ No newline at end of file diff --git a/projects/trisquel.py b/projects/trisquel.py new file mode 100644 index 0000000..2cea67d --- /dev/null +++ b/projects/trisquel.py @@ -0,0 +1,26 @@ +""" +Contains trisquel class +""" + +import os +from project import Project +from shared import CSC_MIRROR +import requests +import datefinder # another date finding library +from datetime import timedelta +from datetime import datetime +import re +import pandas as pd + +class trisquel(Project): + """trisquel class""" + @staticmethod + def check(data, project, current_time): + page = requests.get(data[project]["upstream"]).text + indexOfFile = page.find("mirror.csclub.uwaterloo.ca") + + m = re.search(r'(\d+ hour)|(\d+ hours)|(\d+(\.)?\d+ days)', page[indexOfFile:]) # solution from: https://stackoverflow.com/questions/21074100/how-to-convert-standard-timedelta-string-to-timedelta-object/21074460 + + duration = pd.to_timedelta(m.group(0)) + + return duration <= pd.to_timedelta(data[project]["out_of_sync_interval"], unit='s') \ No newline at end of file diff --git a/projects/ubuntu_ports.py b/projects/ubuntu_ports.py new file mode 100644 index 0000000..2730864 --- /dev/null +++ b/projects/ubuntu_ports.py @@ -0,0 +1,8 @@ +""" +Contains ubuntu_ports class +""" + +from project import Project + +class ubuntu_ports(Project): + """ubuntu_ports class""" \ No newline at end of file diff --git a/projects/ubuntu_releases.py b/projects/ubuntu_releases.py new file mode 100644 index 0000000..0cfc172 --- /dev/null +++ b/projects/ubuntu_releases.py @@ -0,0 +1,25 @@ +""" +Contains ubuntu_releases class +""" + +import os +from project import Project +from shared import CSC_MIRROR +import requests +import datefinder # another date finding library +from datetime import timedelta +from datetime import datetime +import re +import pandas as pd + +class ubuntu_releases(Project): + """ubuntu_releases class""" + @staticmethod + def check(data, project, current_time): + page = requests.get(data[project]["upstream"]).text + indexOfFile = page.find("last verified") + matches = list(datefinder.find_dates(page[indexOfFile:])) + date = matches[0] # date is of type datetime.datetime + return(pd.to_datetime(current_time, unit='s') - date.replace(tzinfo=None) <= pd.to_timedelta(data[project]["out_of_sync_interval"], unit='s')) + + # https://launchpad.net/ubuntu/+mirror/mirror.csclub.uwaterloo.ca-release \ No newline at end of file diff --git a/test.py b/test.py index 52cdeff..dc98cdd 100644 --- a/test.py +++ b/test.py @@ -54,7 +54,7 @@ def get_latest_date(web_dir): # main function if __name__ =="__main__": - # website to be scrape + """# website to be scrape site="http://ftp.netbsd.org/pub/NetBSD/" # works on: https://www.x.org/releases/ # https://mirror.csclub.uwaterloo.ca/linuxmint/ #works wonders for linuxmint @@ -70,14 +70,10 @@ if __name__ =="__main__": if (latest_date2 >= latest_date): latest_date = latest_date2 - print(latest_date) + print(latest_date)""" - """page = requests.get("http://rsync-mxlinux.org/mirmon/index.html").text - indexOfFile = page.find("mirror.csclub.uwaterloo.ca") - - m = re.search(r'(\d+ hours)|(\d+(\.)?\d+ days)', page[indexOfFile:]) # solution from: https://stackoverflow.com/questions/21074100/how-to-convert-standard-timedelta-string-to-timedelta-object/21074460 - - duration = pd.to_timedelta(m.group(0)) - - print (duration <= pd.to_timedelta(86400, unit='s'))""" + csc_url = "https://mirror.csclub.uwaterloo.ca/ubuntu-ports/project/trace/anonster.canonical.com" + upstream_url = "http://ports.ubuntu.com/ubuntu-ports/project/trace/anonster.canonical.com" + print(requests.get(upstream_url).text) + print(requests.get(csc_url).text == requests.get(upstream_url).text) \ No newline at end of file