added netBSD

This commit is contained in:
Tom 2021-10-17 00:01:06 -07:00
parent 1df671b9e0
commit fe7d22e1e5
4 changed files with 109 additions and 13 deletions

View File

@ -20,11 +20,7 @@ to find repos of the mirrored projects to check, just search "projectName mirror
## checker information
not done:
NetBSD: http://ftp.netbsd.org/pub/NetBSD/ has public repo, no timestamp, web directory hard to loop through, no mirror tracker
puppylinux: https://distro.ibiblio.org/puppylinux/ check the ISO files in the folders starting with puppy
done:
all done:
almalinux
alpine
apache
@ -64,11 +60,13 @@ manjaro
mxlinux
mxlinux-iso: this one seems out of sync on the official tracker for 134 days, which is weird
mysql: http://mirrors.sunsite.dk/mysql/
NetBSD: http://ftp.netbsd.org/pub/NetBSD/ checking timestamps of change files in different versions, and SHA512, MD5 files in the isos of different versions
nongnu: http://download.savannah.nongnu.org/releases/ https://savannah.gnu.org/maintenance/Mirmon/ http://download.savannah.gnu.org/mirmon/savannah/
openbsd
opensuse: http://download.opensuse.org/ check Update.repo files in folders inside the update folder, not checking tumbleweed-non-oss/ and tumbleweed/ temporarily
parabola: https://repo.parabola.nu/ https://www.parabola.nu/mirrors/status/
pkgsrc
puppylinux: https://distro.ibiblio.org/puppylinux/ check the ISO files or htm files in the folders starting with puppy
qtproject: https://download.qt.io/
racket: https://mirror.racket-lang.org/installers/ no public repo, no timestamp, no mirror status tracker make sure that we have the latest version number under racket-installers
raspberry pi: https://archive.raspberrypi.org/ Checking the timestamp of either the Release file or the Packages file should suffice.

View File

@ -7,21 +7,21 @@
"file": "almalinux/TIME"
},
"Alpine": {
"out_of_sync_since": 1633923341,
"out_of_sync_since": null,
"out_of_sync_interval": 86400,
"csc": "",
"upstream": "https://uk.alpinelinux.org/",
"file": "alpine/last-updated"
},
"Apache": {
"out_of_sync_since": null,
"out_of_sync_since": 1634453333,
"out_of_sync_interval": 86400,
"csc": "apache/",
"upstream": "https://downloads.apache.org/",
"file": "zzz/time.txt"
},
"Arch": {
"out_of_sync_since": 1634433282,
"out_of_sync_since": null,
"out_of_sync_interval": 86400,
"csc": "archlinux/",
"upstream": "http://arch.mirror.constant.com/",
@ -73,7 +73,7 @@
"file": "project/trace/deb-multimedia.org"
},
"DebianPorts": {
"out_of_sync_since": 1633294718,
"out_of_sync_since": null,
"out_of_sync_interval": 86400,
"csc": "",
"upstream": "https://deb.debian.org/",
@ -94,7 +94,7 @@
"file": "TIME"
},
"Fedora": {
"out_of_sync_since": null,
"out_of_sync_since": 1634453333,
"out_of_sync_interval": 86400,
"csc": "fedora/",
"upstream": "http://fedora.mirror.iweb.com/",
@ -242,7 +242,7 @@
"file": "lastsync"
},
"pkgsrc": {
"out_of_sync_since": 1633335556,
"out_of_sync_since": null,
"out_of_sync_interval": 86400,
"csc": "pkgsrc/",
"upstream": "http://ftp.netbsd.org/pub/pkgsrc/",
@ -394,5 +394,12 @@
"csc": "puppylinux/",
"upstream": "https://distro.ibiblio.org/puppylinux/",
"file": ""
},
"netbsd": {
"out_of_sync_since": null,
"out_of_sync_interval": 86400,
"csc": "NetBSD/",
"upstream": "http://ftp.netbsd.org/pub/NetBSD/",
"file": ""
}
}

91
projects/netbsd.py Normal file
View File

@ -0,0 +1,91 @@
from bs4 import BeautifulSoup
import requests
import re
import datefinder # another date finding library
from project import Project
from shared import CSC_MIRROR
class netbsd(Project):
"""netbsd class"""
@staticmethod
def checker(directory_URL, file_name):
page = requests.get(directory_URL).text
file_index = page.find(file_name)
# print(page)
if file_index == -1:
return False
str_dates = re.findall(r'(\d{2}-\w{3}-\d{4} \d{2}:\d{2})|(\d{4}-\w{3}-\d{2} \d{2}:\d{2})', page[file_index:])
# print(directory_URL, file_name)
# print(list(datefinder.find_dates("".join(str_dates[0])))[0])
return list(datefinder.find_dates("".join(str_dates[0])))[0]
@classmethod
def check_version(cls, site1, site2):
# getting the request from url
r = requests.get(site1)
r1 = requests.get(site2)
page1 = r.text
page2 = r1.text
# converting the text
s1 = BeautifulSoup(page1,"html.parser")
s2 = BeautifulSoup(page2,"html.parser")
hrefs1 = s1.find_all("a")
hrefs2 = s2.find_all("a")
for i in hrefs1: # for a href directories
href = i.attrs['href']
if re.match(r'NetBSD-\d.*', href):
date1 = cls.checker(site1+href, "CHANGES")
if not date1: # if the version is empty, ignore it
continue
if (href not in [i.attrs['href'] for i in hrefs2]) or (date1 > cls.checker(site2+href, "CHANGES")):
return False
elif href.startswith("NetBSD-") and href != "NetBSD-daily/":
date1 = cls.checker(site1+href+"src/doc/", "CHANGES")
if not date1:
continue
if (href not in [i.attrs['href'] for i in hrefs2]) or (date1 > cls.checker(site2+href+"src/doc/", "CHANGES")):
return False
return True
@classmethod
def check_iso(cls, site1, site2):
# getting the request from url
r = requests.get(site1)
r1 = requests.get(site2)
page1 = r.text
page2 = r1.text
# converting the text
s1 = BeautifulSoup(page1,"html.parser")
s2 = BeautifulSoup(page2,"html.parser")
hrefs1 = s1.find_all("a")
hrefs2 = s2.find_all("a")
for i in hrefs1: # for a href directories
href = i.attrs['href']
if (href not in [i.attrs['href'] for i in hrefs2]) or (cls.checker(site1+href, "SHA512") > cls.checker(site2+href, "SHA512")) or (cls.checker(site1+href, "MD5") > cls.checker(site2+href, "MD5")):
return False
return True
@classmethod
def check(cls, data, project):
"""Check if project packages are up-to-date"""
csc_url = CSC_MIRROR + data[project]["csc"] + data[project]["file"]
upstream_url = data[project]["upstream"] + data[project]["file"]
# print(cls.check_version(upstream_url, csc_url))
# print(cls.check_iso(upstream_url+"iso/", csc_url+"iso/"))
return cls.check_version(upstream_url, csc_url) and cls.check_iso(upstream_url+"iso/", csc_url+"iso/")

View File

@ -7,7 +7,7 @@ from datetime import timedelta
import time
import pandas as pd
import re # for salt stack specifically
from projects import puppy_linux
from projects import netbsd
import json # import json to read project info stored in json file
# this function is brute force looping through the whole directory and checking dates
@ -65,7 +65,7 @@ def get_latest_date(web_dir):
if __name__ =="__main__":
with open("data.json", "r", encoding="utf-8") as file:
data = json.load(file)
print(puppy_linux.check(data, "puppy_linux"))
print(netbsd.check(data, "netbsd"))
"""# website to be scrape
site="https://cdimage.ubuntu.com/releases/"