From 0b3e36a8ffba5425d47236e49b9fa981fcddc8fa Mon Sep 17 00:00:00 2001 From: Tom Date: Thu, 14 Oct 2021 23:17:47 -0700 Subject: [PATCH] added sagemath and saltstack --- README.md | 6 +++--- data.json | 22 ++++++++++++++++++---- projects/sage.py | 42 ++++++++++++++++++++++++++++++++++++++++++ projects/saltstack.py | 25 +++++++++++++++++++++++++ test.py | 4 ++-- 5 files changed, 90 insertions(+), 9 deletions(-) create mode 100644 projects/sage.py create mode 100644 projects/saltstack.py diff --git a/README.md b/README.md index d1162e1..1afa94b 100644 --- a/README.md +++ b/README.md @@ -23,9 +23,7 @@ macPorts: only distfiles has public repo, no timestamp, too large to loop throug NetBSD: http://ftp.netbsd.org/pub/NetBSD/ has public repo, no timestamp, web directory hard to loop through, no mirror tracker opensuse: http://download.opensuse.org/ has public repo, a possible timestamp called latest in history, our mirror doesn't have this file tho, no mirror tracker puppylinux: https://distro.ibiblio.org/puppylinux/ check the ISO files in the folders starting with puppy -racket: no public repo, no timestamp, no mirror status tracker -sagemath: don't know how to deal with this, it's a website -salt stack: don't know how to deal with this, it's a website +racket: https://mirror.racket-lang.org/installers/ no public repo, no timestamp, no mirror status tracker make sure that we have the latest version under racket-installers x.org: https://www.x.org/releases/ no timestamp, but candidate for brute force looping since it has few folders, no status tracker Xiph: no timestamp, too big to loop through, no status tracker @@ -75,6 +73,8 @@ pkgsrc qtproject: https://download.qt.io/ raspberry pi: https://archive.raspberrypi.org/ Checking the timestamp of either the Release file or the Packages file should suffice. raspbian: http://archive.raspbian.org/raspbian/ snapshotindex.txt is most likely a timestamp, tho i'm not sure. also i think our mirror is completely outdated, it's not listed on official mirror list +sagemath: same source tarballs as them (the sage-*.tar.gz files under 'Source Code') +salt stack: checking the "Latest release" text under the 'About' header scientific: https://scientificlinux.org/downloads/sl-mirrors/ not checking this one since it's abandoned slackware: https://mirrors.slackware.com/mirrorlist/ https://mirrors.slackware.com/slackware/ checking using the last updated date here, don't know if it's entirely accurate tdf: https://download.documentfoundation.org/ diff --git a/data.json b/data.json index 47fa6de..7a43ebc 100644 --- a/data.json +++ b/data.json @@ -52,7 +52,7 @@ "file": "x86/sha512.sum" }, "Debian": { - "out_of_sync_since": 1633337502, + "out_of_sync_since": null, "out_of_sync_interval": 86400, "csc": "", "upstream": "https://ftp-master.debian.org/", @@ -80,7 +80,7 @@ "file": "debian-ports/project/trace/porta.debian.org" }, "DebianSecurity": { - "out_of_sync_since": null, + "out_of_sync_since": 1634275264, "out_of_sync_interval": 86400, "csc": "", "upstream": "http://debian.mirror.iweb.ca/", @@ -147,7 +147,7 @@ "file": "gutenberg.dcs" }, "IPFire": { - "out_of_sync_since": 1634257890, + "out_of_sync_since": null, "out_of_sync_interval": 172800 }, "KDE": { @@ -228,7 +228,7 @@ "file": "last-updated.txt" }, "nongnu": { - "out_of_sync_since": null, + "out_of_sync_since": 1634275264, "out_of_sync_interval": 86400, "csc": "nongnu/", "upstream": "http://download-mirror.savannah.gnu.org/releases/", @@ -338,5 +338,19 @@ "csc": "linuxmint/", "upstream": "https://mirrors.edge.kernel.org/linuxmint/", "file": "" + }, + "sage": { + "out_of_sync_since": null, + "out_of_sync_interval": 86400, + "csc": "", + "upstream": "", + "file": "sage/src/index.html" + }, + "saltstack": { + "out_of_sync_since": null, + "out_of_sync_interval": 86400, + "csc": "saltstack/", + "upstream": "https://repo.saltproject.io/", + "file": "" } } \ No newline at end of file diff --git a/projects/sage.py b/projects/sage.py new file mode 100644 index 0000000..49be2e8 --- /dev/null +++ b/projects/sage.py @@ -0,0 +1,42 @@ +import requests +import datefinder # another date finding library +import re +from datetime import datetime +from project import Project +from shared import CSC_MIRROR + +class sage(Project): + """sagemath class""" + + @staticmethod + def get_latest_date(dates): + dates = [list(datefinder.find_dates(date))[0] for date in dates] + return(max(dates)) + + @classmethod + def check(cls, data, project): + page1 = requests.get(CSC_MIRROR + data[project]["csc"] + data[project]["file"]).text + page2 = requests.get("http://mirrors.mit.edu/sage/src/index.html").text + page3 = requests.get("https://mirror.rcg.sfu.ca/mirror/sage/src/index.html").text + + CSC_dates = re.findall(r'(\d{4}-\d{2}-\d{2} \d{2}:\d{2})', page1) + MIT_dates = re.findall(r'(\d{4}-\d{2}-\d{2} \d{2}:\d{2})', page2) + SFU_dates = re.findall(r'(\d{4}-\d{2}-\d{2} \d{2}:\d{2})', page3) + + # print(len(CSC_dates)) + # print(len(MIT_dates)) + # print(len(SFU_dates)) + # print(cls.get_latest_date(CSC_dates)) + # print(cls.get_latest_date(MIT_dates)) + # print(cls.get_latest_date(SFU_dates)) + + if len(CSC_dates) < max([len(MIT_dates), len(SFU_dates)]): + return False + elif len(CSC_dates) > max([len(MIT_dates), len(SFU_dates)]): + # if we have more entries than their mirror, ours must be the new one + # since distros only add new versions, and don't delete old versions + return True + if (cls.get_latest_date(CSC_dates) < max([cls.get_latest_date(MIT_dates),cls.get_latest_date(SFU_dates)])): + return False + return True + \ No newline at end of file diff --git a/projects/saltstack.py b/projects/saltstack.py new file mode 100644 index 0000000..8080938 --- /dev/null +++ b/projects/saltstack.py @@ -0,0 +1,25 @@ +import requests +import datefinder # another date finding library +import re +from datetime import datetime +from project import Project +from shared import CSC_MIRROR + +class saltstack(Project): + """saltstack class""" + + @staticmethod + def check(data, project): + csc_url = CSC_MIRROR + data[project]["csc"] + data[project]["file"] + upstream_url = data[project]["upstream"] + data[project]["file"] + + page1 = requests.get(csc_url).text + page2 = requests.get(upstream_url).text + + CSC_release = re.search(r'Latest release: (\d)+.(\d)+ \((.+)\)', page1) + upstream_release = re.search(r'Latest release: (\d)+.(\d)+ \((.+)\)', page2) + + # print(CSC_release.group(0)) + # print(upstream_release.group(0)) + + return CSC_release.group(0) == upstream_release.group(0) \ No newline at end of file diff --git a/test.py b/test.py index 2cc72a7..ac4be9b 100644 --- a/test.py +++ b/test.py @@ -7,7 +7,7 @@ from datetime import timedelta import time import pandas as pd import re # for salt stack specifically -from projects import linuxmint +from projects import saltstack import json # import json to read project info stored in json file # this function is brute force looping through the whole directory and checking dates @@ -65,7 +65,7 @@ def get_latest_date(web_dir): if __name__ =="__main__": with open("data.json", "r", encoding="utf-8") as file: data = json.load(file) - print(linuxmint.check(data, "linuxmint")) + print(saltstack.check(data, "saltstack")) """# website to be scrape site="https://cdimage.ubuntu.com/releases/"