From fe7d22e1e548836813aa58bff76e13f3c0c564aa Mon Sep 17 00:00:00 2001 From: Tom Date: Sun, 17 Oct 2021 00:01:06 -0700 Subject: [PATCH] added netBSD --- README.md | 8 ++-- data.json | 19 +++++++--- projects/netbsd.py | 91 ++++++++++++++++++++++++++++++++++++++++++++++ test.py | 4 +- 4 files changed, 109 insertions(+), 13 deletions(-) create mode 100644 projects/netbsd.py diff --git a/README.md b/README.md index d25f0aa..af826f7 100644 --- a/README.md +++ b/README.md @@ -20,11 +20,7 @@ to find repos of the mirrored projects to check, just search "projectName mirror ## checker information -not done: -NetBSD: http://ftp.netbsd.org/pub/NetBSD/ has public repo, no timestamp, web directory hard to loop through, no mirror tracker -puppylinux: https://distro.ibiblio.org/puppylinux/ check the ISO files in the folders starting with puppy - -done: +all done: almalinux alpine apache @@ -64,11 +60,13 @@ manjaro mxlinux mxlinux-iso: this one seems out of sync on the official tracker for 134 days, which is weird mysql: http://mirrors.sunsite.dk/mysql/ +NetBSD: http://ftp.netbsd.org/pub/NetBSD/ checking timestamps of change files in different versions, and SHA512, MD5 files in the isos of different versions nongnu: http://download.savannah.nongnu.org/releases/ https://savannah.gnu.org/maintenance/Mirmon/ http://download.savannah.gnu.org/mirmon/savannah/ openbsd opensuse: http://download.opensuse.org/ check Update.repo files in folders inside the update folder, not checking tumbleweed-non-oss/ and tumbleweed/ temporarily parabola: https://repo.parabola.nu/ https://www.parabola.nu/mirrors/status/ pkgsrc +puppylinux: https://distro.ibiblio.org/puppylinux/ check the ISO files or htm files in the folders starting with puppy qtproject: https://download.qt.io/ racket: https://mirror.racket-lang.org/installers/ no public repo, no timestamp, no mirror status tracker make sure that we have the latest version number under racket-installers raspberry pi: https://archive.raspberrypi.org/ Checking the timestamp of either the Release file or the Packages file should suffice. diff --git a/data.json b/data.json index 0a44746..1657f74 100644 --- a/data.json +++ b/data.json @@ -7,21 +7,21 @@ "file": "almalinux/TIME" }, "Alpine": { - "out_of_sync_since": 1633923341, + "out_of_sync_since": null, "out_of_sync_interval": 86400, "csc": "", "upstream": "https://uk.alpinelinux.org/", "file": "alpine/last-updated" }, "Apache": { - "out_of_sync_since": null, + "out_of_sync_since": 1634453333, "out_of_sync_interval": 86400, "csc": "apache/", "upstream": "https://downloads.apache.org/", "file": "zzz/time.txt" }, "Arch": { - "out_of_sync_since": 1634433282, + "out_of_sync_since": null, "out_of_sync_interval": 86400, "csc": "archlinux/", "upstream": "http://arch.mirror.constant.com/", @@ -73,7 +73,7 @@ "file": "project/trace/deb-multimedia.org" }, "DebianPorts": { - "out_of_sync_since": 1633294718, + "out_of_sync_since": null, "out_of_sync_interval": 86400, "csc": "", "upstream": "https://deb.debian.org/", @@ -94,7 +94,7 @@ "file": "TIME" }, "Fedora": { - "out_of_sync_since": null, + "out_of_sync_since": 1634453333, "out_of_sync_interval": 86400, "csc": "fedora/", "upstream": "http://fedora.mirror.iweb.com/", @@ -242,7 +242,7 @@ "file": "lastsync" }, "pkgsrc": { - "out_of_sync_since": 1633335556, + "out_of_sync_since": null, "out_of_sync_interval": 86400, "csc": "pkgsrc/", "upstream": "http://ftp.netbsd.org/pub/pkgsrc/", @@ -394,5 +394,12 @@ "csc": "puppylinux/", "upstream": "https://distro.ibiblio.org/puppylinux/", "file": "" + }, + "netbsd": { + "out_of_sync_since": null, + "out_of_sync_interval": 86400, + "csc": "NetBSD/", + "upstream": "http://ftp.netbsd.org/pub/NetBSD/", + "file": "" } } \ No newline at end of file diff --git a/projects/netbsd.py b/projects/netbsd.py new file mode 100644 index 0000000..d90cd6b --- /dev/null +++ b/projects/netbsd.py @@ -0,0 +1,91 @@ +from bs4 import BeautifulSoup +import requests +import re +import datefinder # another date finding library +from project import Project +from shared import CSC_MIRROR + +class netbsd(Project): + """netbsd class""" + @staticmethod + def checker(directory_URL, file_name): + page = requests.get(directory_URL).text + file_index = page.find(file_name) + # print(page) + + if file_index == -1: + return False + + str_dates = re.findall(r'(\d{2}-\w{3}-\d{4} \d{2}:\d{2})|(\d{4}-\w{3}-\d{2} \d{2}:\d{2})', page[file_index:]) + + # print(directory_URL, file_name) + # print(list(datefinder.find_dates("".join(str_dates[0])))[0]) + return list(datefinder.find_dates("".join(str_dates[0])))[0] + + @classmethod + def check_version(cls, site1, site2): + # getting the request from url + r = requests.get(site1) + r1 = requests.get(site2) + + page1 = r.text + page2 = r1.text + + # converting the text + s1 = BeautifulSoup(page1,"html.parser") + s2 = BeautifulSoup(page2,"html.parser") + + hrefs1 = s1.find_all("a") + hrefs2 = s2.find_all("a") + + for i in hrefs1: # for a href directories + href = i.attrs['href'] + + if re.match(r'NetBSD-\d.*', href): + date1 = cls.checker(site1+href, "CHANGES") + if not date1: # if the version is empty, ignore it + continue + if (href not in [i.attrs['href'] for i in hrefs2]) or (date1 > cls.checker(site2+href, "CHANGES")): + return False + elif href.startswith("NetBSD-") and href != "NetBSD-daily/": + date1 = cls.checker(site1+href+"src/doc/", "CHANGES") + if not date1: + continue + if (href not in [i.attrs['href'] for i in hrefs2]) or (date1 > cls.checker(site2+href+"src/doc/", "CHANGES")): + return False + return True + + @classmethod + def check_iso(cls, site1, site2): + # getting the request from url + r = requests.get(site1) + r1 = requests.get(site2) + + page1 = r.text + page2 = r1.text + + # converting the text + s1 = BeautifulSoup(page1,"html.parser") + s2 = BeautifulSoup(page2,"html.parser") + + hrefs1 = s1.find_all("a") + hrefs2 = s2.find_all("a") + + for i in hrefs1: # for a href directories + href = i.attrs['href'] + + if (href not in [i.attrs['href'] for i in hrefs2]) or (cls.checker(site1+href, "SHA512") > cls.checker(site2+href, "SHA512")) or (cls.checker(site1+href, "MD5") > cls.checker(site2+href, "MD5")): + return False + return True + + + @classmethod + def check(cls, data, project): + """Check if project packages are up-to-date""" + + csc_url = CSC_MIRROR + data[project]["csc"] + data[project]["file"] + upstream_url = data[project]["upstream"] + data[project]["file"] + + # print(cls.check_version(upstream_url, csc_url)) + # print(cls.check_iso(upstream_url+"iso/", csc_url+"iso/")) + return cls.check_version(upstream_url, csc_url) and cls.check_iso(upstream_url+"iso/", csc_url+"iso/") \ No newline at end of file diff --git a/test.py b/test.py index b4c799d..7c1f9d6 100644 --- a/test.py +++ b/test.py @@ -7,7 +7,7 @@ from datetime import timedelta import time import pandas as pd import re # for salt stack specifically -from projects import puppy_linux +from projects import netbsd import json # import json to read project info stored in json file # this function is brute force looping through the whole directory and checking dates @@ -65,7 +65,7 @@ def get_latest_date(web_dir): if __name__ =="__main__": with open("data.json", "r", encoding="utf-8") as file: data = json.load(file) - print(puppy_linux.check(data, "puppy_linux")) + print(netbsd.check(data, "netbsd")) """# website to be scrape site="https://cdimage.ubuntu.com/releases/"