From 3d5eee45dbb3d12fea4812eb6c71b6cff13edbce Mon Sep 17 00:00:00 2001 From: Tom Date: Fri, 15 Oct 2021 15:14:07 -0700 Subject: [PATCH] added racket --- README.md | 2 +- data.json | 19 ++++++++---- projects/racket.py | 74 ++++++++++++++++++++++++++++++++++++++++++++++ test.py | 4 +-- 4 files changed, 90 insertions(+), 9 deletions(-) create mode 100644 projects/racket.py diff --git a/README.md b/README.md index 1afa94b..cfd0bcb 100644 --- a/README.md +++ b/README.md @@ -23,7 +23,7 @@ macPorts: only distfiles has public repo, no timestamp, too large to loop throug NetBSD: http://ftp.netbsd.org/pub/NetBSD/ has public repo, no timestamp, web directory hard to loop through, no mirror tracker opensuse: http://download.opensuse.org/ has public repo, a possible timestamp called latest in history, our mirror doesn't have this file tho, no mirror tracker puppylinux: https://distro.ibiblio.org/puppylinux/ check the ISO files in the folders starting with puppy -racket: https://mirror.racket-lang.org/installers/ no public repo, no timestamp, no mirror status tracker make sure that we have the latest version under racket-installers +racket: https://mirror.racket-lang.org/installers/ no public repo, no timestamp, no mirror status tracker make sure that we have the latest version number under racket-installers x.org: https://www.x.org/releases/ no timestamp, but candidate for brute force looping since it has few folders, no status tracker Xiph: no timestamp, too big to loop through, no status tracker diff --git a/data.json b/data.json index 7a43ebc..df4f4b6 100644 --- a/data.json +++ b/data.json @@ -21,7 +21,7 @@ "file": "zzz/time.txt" }, "Arch": { - "out_of_sync_since": null, + "out_of_sync_since": 1634334754, "out_of_sync_interval": 86400, "csc": "archlinux/", "upstream": "http://arch.mirror.constant.com/", @@ -52,7 +52,7 @@ "file": "x86/sha512.sum" }, "Debian": { - "out_of_sync_since": null, + "out_of_sync_since": 1634334754, "out_of_sync_interval": 86400, "csc": "", "upstream": "https://ftp-master.debian.org/", @@ -66,7 +66,7 @@ "file": "debian-cd/project/trace/cdimage.debian.org" }, "DebianMultimedia": { - "out_of_sync_since": null, + "out_of_sync_since": 1634334754, "out_of_sync_interval": 86400, "csc": "debian-multimedia/", "upstream": "http://debian-mirrors.sdinet.de/deb-multimedia/", @@ -80,7 +80,7 @@ "file": "debian-ports/project/trace/porta.debian.org" }, "DebianSecurity": { - "out_of_sync_since": 1634275264, + "out_of_sync_since": null, "out_of_sync_interval": 86400, "csc": "", "upstream": "http://debian.mirror.iweb.ca/", @@ -305,7 +305,7 @@ "file": "" }, "linuxmint_packages": { - "out_of_sync_since": null, + "out_of_sync_since": 1634334754, "out_of_sync_interval": 86400, "csc": "linuxmint-packages/", "upstream": "https://mirrors.edge.kernel.org/linuxmint-packages/", @@ -319,7 +319,7 @@ "file": "dists/" }, "ubuntu_ports_releases": { - "out_of_sync_since": 1634257890, + "out_of_sync_since": null, "out_of_sync_interval": 86400, "csc": "ubuntu-ports-releases/", "upstream": "https://cdimage.ubuntu.com/releases/", @@ -352,5 +352,12 @@ "csc": "saltstack/", "upstream": "https://repo.saltproject.io/", "file": "" + }, + "racket": { + "out_of_sync_since": null, + "out_of_sync_interval": 86400, + "csc": "racket/racket-installers/", + "upstream": "https://mirror.racket-lang.org/installers/", + "file": "" } } \ No newline at end of file diff --git a/projects/racket.py b/projects/racket.py new file mode 100644 index 0000000..0eb45a8 --- /dev/null +++ b/projects/racket.py @@ -0,0 +1,74 @@ +import requests +import re +from project import Project +from shared import CSC_MIRROR +import itertools +from bs4 import BeautifulSoup + +class racket(Project): + """racket class""" + @staticmethod + def max_version(processed_versions): + latest_version = processed_versions[0] + for version in processed_versions: + if len(latest_version) > len(version): + latest_version_portion = itertools.islice(latest_version, len(version)) + comparisons = list(map(lambda p, q: p == q, latest_version_portion, version)) + if False in comparisons: + index = comparisons.index(False) + if latest_version[index] < version[index]: + latest_version = version + if len(latest_version) < len(version): + version_portion = itertools.islice(version, len(latest_version)) + comparisons = list(map(lambda p, q: p == q, version_portion, latest_version)) + if False in comparisons: + index = comparisons.index(False) + if latest_version[index] < version[index]: + latest_version = version + latest_version = version + if len(latest_version) == len(version): + comparisons = list(map(lambda p, q: p == q, version, latest_version)) + if False in comparisons: + index = comparisons.index(False) + if latest_version[index] < version[index]: + latest_version = version + return latest_version + + @staticmethod + def scrape(versions, site): + # getting the request from url + r = requests.get(site) + + # converting the text + s = BeautifulSoup(r.text,"html.parser") + + for i in s.find_all("a"): # for a href directories + href = i.attrs['href'] + + if href.endswith("/") and href != "../" and href != "/" and href != "recent/": + if href not in versions: + versions.append(href) + + @classmethod + def get_latest_version(cls, web_dir): + page = requests.get(web_dir).text + + versions = [] + cls.scrape(versions, web_dir) + + processed_versions = [] + for version in versions: + # print(version) + processed_versions.append(version.replace("/", "").split(".")) + # print(cls.max_version(processed_versions)) + + return(cls.max_version(processed_versions)) + + + @classmethod + def check(cls, data, project): + """Check if project packages are up-to-date""" + csc_url = CSC_MIRROR + data[project]["csc"] + data[project]["file"] + upstream_url = data[project]["upstream"] + data[project]["file"] + + return cls.get_latest_version(csc_url) == cls.get_latest_version(upstream_url) \ No newline at end of file diff --git a/test.py b/test.py index ac4be9b..c8b3949 100644 --- a/test.py +++ b/test.py @@ -7,7 +7,7 @@ from datetime import timedelta import time import pandas as pd import re # for salt stack specifically -from projects import saltstack +from projects import racket import json # import json to read project info stored in json file # this function is brute force looping through the whole directory and checking dates @@ -65,7 +65,7 @@ def get_latest_date(web_dir): if __name__ =="__main__": with open("data.json", "r", encoding="utf-8") as file: data = json.load(file) - print(saltstack.check(data, "saltstack")) + print(racket.check(data, "racket")) """# website to be scrape site="https://cdimage.ubuntu.com/releases/"