diff --git a/README.md b/README.md index 881cbc2..e1c1523 100644 --- a/README.md +++ b/README.md @@ -36,5 +36,6 @@ done: tdf: https://download.documentfoundation.org/ ubuntu: https://launchpad.net/ubuntu/+mirror/mirror.csclub.uwaterloo.ca-archive vlc: http://download.videolan.org/pub/videolan/ +manjaro trisquel: https://trisquel.info/mirmon/index.html out of date website \ No newline at end of file diff --git a/data.json b/data.json index 750a79e..7d37ce9 100644 --- a/data.json +++ b/data.json @@ -1,202 +1,209 @@ { - "AlmaLinux": { - "out_of_sync_since": 1633294718, - "out_of_sync_interval": 86400, - "csc": "", - "upstream": "https://repo.almalinux.org/", - "file": "almalinux/TIME" - }, - "Alpine": { - "out_of_sync_since": null, - "out_of_sync_interval": 86400, - "csc": "", - "upstream": "https://uk.alpinelinux.org/", - "file": "alpine/last-updated" - }, - "Apache": { - "out_of_sync_since": 1633294718, - "out_of_sync_interval": 86400, - "csc": "apache/", - "upstream": "https://downloads.apache.org/", - "file": "zzz/time.txt" - }, - "Arch": { - "out_of_sync_since": 1633294718, - "out_of_sync_interval": 86400, - "csc": "archlinux/", - "upstream": "http://arch.mirror.constant.com/", - "file": "lastupdate" - }, - "CentOS": { - "out_of_sync_since": 1633294718, - "out_of_sync_interval": 86400, - "csc": "", - "upstream": "https://mirrors.edge.kernel.org/", - "file": "centos/TIME" - }, - "Ceph": { - "out_of_sync_since": 1633294718, - "out_of_sync_interval": 86400, - "csc": "ceph/", - "upstream": "https://download.ceph.com/", - "file": "timestamp" - }, - "CPAN": { - "out_of_sync_interval": 172800 - }, - "Cygwin": { - "out_of_sync_since": null, - "out_of_sync_interval": 86400, - "csc": "cygwin/", - "upstream": "https://cygwin.mirror.globo.tech/", - "file": "x86/sha512.sum" - }, - "Debian": { - "out_of_sync_since": 1633294718, - "out_of_sync_interval": 86400, - "csc": "", - "upstream": "https://ftp-master.debian.org/", - "file": "debian/project/trace/master" - }, - "DebianCD": { - "out_of_sync_since": null, - "out_of_sync_interval": 86400, - "csc": "", - "upstream": "http://debian.mirror.estruxture.net/", - "file": "debian-cd/project/trace/cdimage.debian.org" - }, - "DebianMultimedia": { - "out_of_sync_since": null, - "out_of_sync_interval": 86400, - "csc": "debian-multimedia/", - "upstream": "http://debian-mirrors.sdinet.de/deb-multimedia/", - "file": "project/trace/deb-multimedia.org" - }, - "DebianPorts": { - "out_of_sync_since": 1633294718, - "out_of_sync_interval": 86400, - "csc": "", - "upstream": "https://deb.debian.org/", - "file": "debian-ports/project/trace/porta.debian.org" - }, - "DebianSecurity": { - "out_of_sync_since": null, - "out_of_sync_interval": 86400, - "csc": "", - "upstream": "http://debian.mirror.iweb.ca/", - "file": "debian-security/project/trace/master" - }, - "Eclipse": { - "out_of_sync_since": 1633294718, - "out_of_sync_interval": 172800, - "csc": "eclipse/", - "upstream": "http://download.eclipse.org/", - "file": "TIME" - }, - "Fedora": { - "out_of_sync_since": 1633298732, - "out_of_sync_interval": 86400, - "csc": "fedora/", - "upstream": "http://fedora.mirror.iweb.com/", - "file": "linux/development/rawhide/COMPOSE_ID" - }, - "FreeBSD": { - "out_of_sync_since": 1633294718, - "out_of_sync_interval": 86400, - "csc": "", - "upstream": "http://ftp4.freebsd.org/pub/", - "file": "FreeBSD/TIMESTAMP" - }, - "GentooDistfiles": { - "out_of_sync_since": 1633294718, - "out_of_sync_interval": 86400, - "csc": "gentoo-distfiles", - "upstream": "http://gentoo.mirrors.tera-byte.com/", - "file": "distfiles/timestamp.dev-local" - }, - "GentooPortage": { - "out_of_sync_since": null, - "out_of_sync_interval": 86400, - "csc": "rsync://rsync4.ca.gentoo.org/", - "upstream1": "rsync://rsync1.de.gentoo.org/", - "upstream2": "rsync://rsync8.de.gentoo.org/", - "file": "gentoo-portage/Manifest" - }, - "GNOME": { - "out_of_sync_since": null, - "out_of_sync_interval": 86400, - "csc": "gnome/", - "upstream1": "https://download.gnome.org/", - "upstream2": "https://mirrors.dotsrc.org/gnome/", - "upstream3": "https://muug.ca/mirror/gnome/", - "file1": "core/", - "file2": "cache.json" - }, - "GNU": { - "out_of_sync_since": 1633294718, - "out_of_sync_interval": 86400, - "csc": "", - "upstream": "https://mirrors.kernel.org/", - "file": "gnu/mirror-updated-timestamp.txt" - }, - "Gutenberg": { - "out_of_sync_since": 1633294718, - "out_of_sync_interval": 172800, - "csc": "gutenberg/", - "upstream": "https://gutenberg.pglaf.org/", - "file": "gutenberg.dcs" - }, - "IPFire": { - "out_of_sync_since": null, - "out_of_sync_interval": 172800 - }, - "KDE": { - "out_of_sync_since": null, - "out_of_sync_interval": 86400, - "csc": "kde/", - "upstream": "https://kde.c3sl.ufpr.br/", - "file": "ls-lR" - }, - "KDEApplicationData": { - "out_of_sync_since": 1633294718, - "out_of_sync_interval": 86400, - "csc": "kde-applicationdata/", - "upstream": "https://cdn.files.kde.org/", - "file": "last-updated" - }, - "Kernel": { - "out_of_sync_since": null, - "out_of_sync_interval": 86400, - "csc": "kernel.org/", - "upstream": "https://mirrors.edge.kernel.org/pub/", - "file": "linux/kernel/next/sha256sums.asc" - }, - "OpenBSD": { - "out_of_sync_since": 1633294718, - "out_of_sync_interval": 86400, - "csc": "", - "upstream": "https://ftp.openbsd.org/pub/", - "file": "OpenBSD/timestamp" - }, - "tdf": { - "out_of_sync_since": 1633294718, - "out_of_sync_interval": 86400, - "csc": "", - "upstream": "https://download.documentfoundation.org/TIMESTAMP", - "file": "tdf/TIMESTAMP" - }, - "ubuntu": { - "out_of_sync_since": null, - "out_of_sync_interval": 86400, - "csc": "", - "upstream": "https://launchpad.net/ubuntu/+mirror/mirror.csclub.uwaterloo.ca-archive", - "file": "" - }, - "vlc": { - "out_of_sync_since": 1633298732, - "out_of_sync_interval": 86400, - "csc": "vlc/", - "upstream": "http://download.videolan.org/pub/videolan/", - "file": "trace" - } -} + "AlmaLinux": { + "out_of_sync_since": 1633294718, + "out_of_sync_interval": 86400, + "csc": "", + "upstream": "https://repo.almalinux.org/", + "file": "almalinux/TIME" + }, + "Alpine": { + "out_of_sync_since": null, + "out_of_sync_interval": 86400, + "csc": "", + "upstream": "https://uk.alpinelinux.org/", + "file": "alpine/last-updated" + }, + "Apache": { + "out_of_sync_since": 1633294718, + "out_of_sync_interval": 86400, + "csc": "apache/", + "upstream": "https://downloads.apache.org/", + "file": "zzz/time.txt" + }, + "Arch": { + "out_of_sync_since": null, + "out_of_sync_interval": 86400, + "csc": "archlinux/", + "upstream": "http://arch.mirror.constant.com/", + "file": "lastupdate" + }, + "CentOS": { + "out_of_sync_since": 1633294718, + "out_of_sync_interval": 86400, + "csc": "", + "upstream": "https://mirrors.edge.kernel.org/", + "file": "centos/TIME" + }, + "Ceph": { + "out_of_sync_since": null, + "out_of_sync_interval": 86400, + "csc": "ceph/", + "upstream": "https://download.ceph.com/", + "file": "timestamp" + }, + "CPAN": { + "out_of_sync_interval": 172800 + }, + "Cygwin": { + "out_of_sync_since": null, + "out_of_sync_interval": 86400, + "csc": "cygwin/", + "upstream": "https://cygwin.mirror.globo.tech/", + "file": "x86/sha512.sum" + }, + "Debian": { + "out_of_sync_since": null, + "out_of_sync_interval": 86400, + "csc": "", + "upstream": "https://ftp-master.debian.org/", + "file": "debian/project/trace/master" + }, + "DebianCD": { + "out_of_sync_since": null, + "out_of_sync_interval": 86400, + "csc": "", + "upstream": "http://debian.mirror.estruxture.net/", + "file": "debian-cd/project/trace/cdimage.debian.org" + }, + "DebianMultimedia": { + "out_of_sync_since": null, + "out_of_sync_interval": 86400, + "csc": "debian-multimedia/", + "upstream": "http://debian-mirrors.sdinet.de/deb-multimedia/", + "file": "project/trace/deb-multimedia.org" + }, + "DebianPorts": { + "out_of_sync_since": 1633294718, + "out_of_sync_interval": 86400, + "csc": "", + "upstream": "https://deb.debian.org/", + "file": "debian-ports/project/trace/porta.debian.org" + }, + "DebianSecurity": { + "out_of_sync_since": null, + "out_of_sync_interval": 86400, + "csc": "", + "upstream": "http://debian.mirror.iweb.ca/", + "file": "debian-security/project/trace/master" + }, + "Eclipse": { + "out_of_sync_since": 1633294718, + "out_of_sync_interval": 172800, + "csc": "eclipse/", + "upstream": "http://download.eclipse.org/", + "file": "TIME" + }, + "Fedora": { + "out_of_sync_since": null, + "out_of_sync_interval": 86400, + "csc": "fedora/", + "upstream": "http://fedora.mirror.iweb.com/", + "file": "linux/development/rawhide/COMPOSE_ID" + }, + "FreeBSD": { + "out_of_sync_since": 1633294718, + "out_of_sync_interval": 86400, + "csc": "", + "upstream": "http://ftp4.freebsd.org/pub/", + "file": "FreeBSD/TIMESTAMP" + }, + "GentooDistfiles": { + "out_of_sync_since": 1633294718, + "out_of_sync_interval": 86400, + "csc": "gentoo-distfiles", + "upstream": "http://gentoo.mirrors.tera-byte.com/", + "file": "distfiles/timestamp.dev-local" + }, + "GentooPortage": { + "out_of_sync_since": null, + "out_of_sync_interval": 86400, + "csc": "rsync://rsync4.ca.gentoo.org/", + "upstream1": "rsync://rsync1.de.gentoo.org/", + "upstream2": "rsync://rsync8.de.gentoo.org/", + "file": "gentoo-portage/Manifest" + }, + "GNOME": { + "out_of_sync_since": null, + "out_of_sync_interval": 86400, + "csc": "gnome/", + "upstream1": "https://download.gnome.org/", + "upstream2": "https://mirrors.dotsrc.org/gnome/", + "upstream3": "https://muug.ca/mirror/gnome/", + "file1": "core/", + "file2": "cache.json" + }, + "GNU": { + "out_of_sync_since": 1633294718, + "out_of_sync_interval": 86400, + "csc": "", + "upstream": "https://mirrors.kernel.org/", + "file": "gnu/mirror-updated-timestamp.txt" + }, + "Gutenberg": { + "out_of_sync_since": 1633294718, + "out_of_sync_interval": 172800, + "csc": "gutenberg/", + "upstream": "https://gutenberg.pglaf.org/", + "file": "gutenberg.dcs" + }, + "IPFire": { + "out_of_sync_since": null, + "out_of_sync_interval": 172800 + }, + "KDE": { + "out_of_sync_since": null, + "out_of_sync_interval": 86400, + "csc": "kde/", + "upstream": "https://kde.c3sl.ufpr.br/", + "file": "ls-lR" + }, + "KDEApplicationData": { + "out_of_sync_since": 1633294718, + "out_of_sync_interval": 86400, + "csc": "kde-applicationdata/", + "upstream": "https://cdn.files.kde.org/", + "file": "last-updated" + }, + "Kernel": { + "out_of_sync_since": null, + "out_of_sync_interval": 86400, + "csc": "kernel.org/", + "upstream": "https://mirrors.edge.kernel.org/pub/", + "file": "linux/kernel/next/sha256sums.asc" + }, + "OpenBSD": { + "out_of_sync_since": 1633294718, + "out_of_sync_interval": 86400, + "csc": "", + "upstream": "https://ftp.openbsd.org/pub/", + "file": "OpenBSD/timestamp" + }, + "tdf": { + "out_of_sync_since": 1633294718, + "out_of_sync_interval": 86400, + "csc": "", + "upstream": "https://download.documentfoundation.org/TIMESTAMP", + "file": "tdf/TIMESTAMP" + }, + "ubuntu": { + "out_of_sync_since": null, + "out_of_sync_interval": 86400, + "csc": "", + "upstream": "https://launchpad.net/ubuntu/+mirror/mirror.csclub.uwaterloo.ca-archive", + "file": "" + }, + "vlc": { + "out_of_sync_since": 1633298732, + "out_of_sync_interval": 86400, + "csc": "vlc/", + "upstream": "http://download.videolan.org/pub/videolan/", + "file": "trace" + }, + "manjaro": { + "out_of_sync_since": null, + "out_of_sync_interval": 86400, + "csc": "", + "upstream": "https://repo.manjaro.org/", + "file": "" + } +} \ No newline at end of file diff --git a/main.py b/main.py index 9d2994a..6cdae85 100644 --- a/main.py +++ b/main.py @@ -67,7 +67,7 @@ if __name__ == "__main__": print(f"Failure: {project} does not exist") continue project_class = getattr(sys.modules[__name__], project) - if project == "CPAN": + if project == "CPAN" or project == "ubuntu" or project == "manjaro": checker_result = project_class.check(data, project, current_time) if checker_result: print(f"Success: {project} up-to-date") diff --git a/projects/manjaro.py b/projects/manjaro.py new file mode 100644 index 0000000..95066fc --- /dev/null +++ b/projects/manjaro.py @@ -0,0 +1,26 @@ +""" +Contains manjaro class +""" + +import os +from project import Project +from shared import CSC_MIRROR +import requests +import datefinder # another date finding library +from datetime import timedelta +import re +import pandas as pd + +class manjaro(Project): + """manjaro class""" + @staticmethod + def check(data, project, current_time): + page = requests.get(data[project]["upstream"]).text + indexOfFile = page.find("mirror.csclub.uwaterloo.ca/manjaro") + + m = re.search(r'(?P\d+):(?P\d+)', page[indexOfFile:]) # solution from: https://stackoverflow.com/questions/21074100/how-to-convert-standard-timedelta-string-to-timedelta-object/21074460 + duration = timedelta(**{key: float(val) for key, val in m.groupdict().items()}) + + return duration <= pd.to_timedelta(data[project]["out_of_sync_interval"], unit='s') + + # https://launchpad.net/ubuntu/+mirror/mirror.csclub.uwaterloo.ca-archive \ No newline at end of file diff --git a/projects/tdf.py b/projects/tdf.py index c3f7e50..e372a01 100644 --- a/projects/tdf.py +++ b/projects/tdf.py @@ -2,7 +2,7 @@ Contains tdf class """ -from distro import Distro +from project import Project -class tdf(Distro): +class tdf(Project): """tdf class""" diff --git a/projects/ubuntu.py b/projects/ubuntu.py index b02f05b..1bfc1bc 100644 --- a/projects/ubuntu.py +++ b/projects/ubuntu.py @@ -3,27 +3,23 @@ Contains ubuntu class """ import os -from distro import Distro +from project import Project from shared import CSC_MIRROR import requests import datefinder # another date finding library +from datetime import timedelta +from datetime import datetime +import re +import pandas as pd -class ubuntu(Distro): +class ubuntu(Project): """ubuntu class""" @staticmethod - def check(data, distro, current_time): - page = requests.get(data[distro]["upstream"]).text - indexOfFile = page.find("last verified on") - # segment_clean = re.sub(r'\s\d+\s', ' ', page[indexOfFile:]) # removes numbers for size - # segment_clean = re.sub(r'\s\d+\w*\s', ' ', page[indexOfFile:]) # removes numbers + size unit. e.x. 50kb - + def check(data, project, current_time): + page = requests.get(data[project]["upstream"]).text + indexOfFile = page.find("last verified") matches = list(datefinder.find_dates(page[indexOfFile:])) - # print(matches) - - if len(matches) > 0: - date = matches[0] # date is of type datetime.datetime - return(current_time - date.strftime("%Y/%m/%d, %H:%M:%S")) - else: - return(False) - + date = matches[0] # date is of type datetime.datetime + return(pd.to_datetime(current_time, unit='s') - date.replace(tzinfo=None) <= pd.to_timedelta(data[project]["out_of_sync_interval"], unit='s')) + # https://launchpad.net/ubuntu/+mirror/mirror.csclub.uwaterloo.ca-archive \ No newline at end of file diff --git a/test.py b/test.py new file mode 100644 index 0000000..04bae05 --- /dev/null +++ b/test.py @@ -0,0 +1,76 @@ +from bs4 import BeautifulSoup +import requests +import datefinder # another date finding library +import re +from datetime import datetime +from datetime import timedelta +import time +import pandas as pd + +# lists +urls=[] + +home_site = "http://ports.ubuntu.com" + +# function created +def scrape(site): + + # getting the request from url + r = requests.get(site) + + # converting the text + s = BeautifulSoup(r.text,"html.parser") + + for i in s.find_all("a"): # for a href directories + href = i.attrs['href'] + + if href.endswith("/") and href != "../" and href != "/": + if home_site+href in urls: # avoids the link to parent directory + continue + site_next = site+href + + if site_next not in urls: + urls.append(site_next) + print(site_next) + # calling it self + scrape(site_next) + +def get_latest_date(web_dir): + page = requests.get(site).text + + str_dates = re.findall(r'\d{4}-\d{2}-\d{2} \d{2}:\d{2}', page) + dates = [list(datefinder.find_dates(date))[0] for date in str_dates] + + # for date in dates: + # print(date) + + return(max(dates)) + +# main function +if __name__ =="__main__": + + # website to be scrape + # site="http://ports.ubuntu.com/ubuntu-ports/" + # works on: https://www.x.org/releases/ + + # calling function + # scrape(site) + + # latest_date = get_latest_date(urls[0]) + # get_latest_date(urls[0]) + # for dir in urls: + # latest_date2 = get_latest_date(dir) + # if (latest_date2 >= latest_date): + # latest_date = latest_date2 + + # print(latest_date) + + page = requests.get("https://repo.manjaro.org/").text + indexOfFile = page.find("mirror.csclub.uwaterloo.ca/manjaro") + + m = re.search(r'(?P\d+):(?P\d+)', page[indexOfFile:]) # solution from: https://stackoverflow.com/questions/21074100/how-to-convert-standard-timedelta-string-to-timedelta-object/21074460 + duration = timedelta(**{key: float(val) for key, val in m.groupdict().items()}) + print(duration) + + print (duration <= pd.to_timedelta(86400, unit='s')) + \ No newline at end of file