forked from public/mirror-checker
added netBSD
This commit is contained in:
parent
1df671b9e0
commit
fe7d22e1e5
|
@ -20,11 +20,7 @@ to find repos of the mirrored projects to check, just search "projectName mirror
|
|||
|
||||
## checker information
|
||||
|
||||
not done:
|
||||
NetBSD: http://ftp.netbsd.org/pub/NetBSD/ has public repo, no timestamp, web directory hard to loop through, no mirror tracker
|
||||
puppylinux: https://distro.ibiblio.org/puppylinux/ check the ISO files in the folders starting with puppy
|
||||
|
||||
done:
|
||||
all done:
|
||||
almalinux
|
||||
alpine
|
||||
apache
|
||||
|
@ -64,11 +60,13 @@ manjaro
|
|||
mxlinux
|
||||
mxlinux-iso: this one seems out of sync on the official tracker for 134 days, which is weird
|
||||
mysql: http://mirrors.sunsite.dk/mysql/
|
||||
NetBSD: http://ftp.netbsd.org/pub/NetBSD/ checking timestamps of change files in different versions, and SHA512, MD5 files in the isos of different versions
|
||||
nongnu: http://download.savannah.nongnu.org/releases/ https://savannah.gnu.org/maintenance/Mirmon/ http://download.savannah.gnu.org/mirmon/savannah/
|
||||
openbsd
|
||||
opensuse: http://download.opensuse.org/ check Update.repo files in folders inside the update folder, not checking tumbleweed-non-oss/ and tumbleweed/ temporarily
|
||||
parabola: https://repo.parabola.nu/ https://www.parabola.nu/mirrors/status/
|
||||
pkgsrc
|
||||
puppylinux: https://distro.ibiblio.org/puppylinux/ check the ISO files or htm files in the folders starting with puppy
|
||||
qtproject: https://download.qt.io/
|
||||
racket: https://mirror.racket-lang.org/installers/ no public repo, no timestamp, no mirror status tracker make sure that we have the latest version number under racket-installers
|
||||
raspberry pi: https://archive.raspberrypi.org/ Checking the timestamp of either the Release file or the Packages file should suffice.
|
||||
|
|
19
data.json
19
data.json
|
@ -7,21 +7,21 @@
|
|||
"file": "almalinux/TIME"
|
||||
},
|
||||
"Alpine": {
|
||||
"out_of_sync_since": 1633923341,
|
||||
"out_of_sync_since": null,
|
||||
"out_of_sync_interval": 86400,
|
||||
"csc": "",
|
||||
"upstream": "https://uk.alpinelinux.org/",
|
||||
"file": "alpine/last-updated"
|
||||
},
|
||||
"Apache": {
|
||||
"out_of_sync_since": null,
|
||||
"out_of_sync_since": 1634453333,
|
||||
"out_of_sync_interval": 86400,
|
||||
"csc": "apache/",
|
||||
"upstream": "https://downloads.apache.org/",
|
||||
"file": "zzz/time.txt"
|
||||
},
|
||||
"Arch": {
|
||||
"out_of_sync_since": 1634433282,
|
||||
"out_of_sync_since": null,
|
||||
"out_of_sync_interval": 86400,
|
||||
"csc": "archlinux/",
|
||||
"upstream": "http://arch.mirror.constant.com/",
|
||||
|
@ -73,7 +73,7 @@
|
|||
"file": "project/trace/deb-multimedia.org"
|
||||
},
|
||||
"DebianPorts": {
|
||||
"out_of_sync_since": 1633294718,
|
||||
"out_of_sync_since": null,
|
||||
"out_of_sync_interval": 86400,
|
||||
"csc": "",
|
||||
"upstream": "https://deb.debian.org/",
|
||||
|
@ -94,7 +94,7 @@
|
|||
"file": "TIME"
|
||||
},
|
||||
"Fedora": {
|
||||
"out_of_sync_since": null,
|
||||
"out_of_sync_since": 1634453333,
|
||||
"out_of_sync_interval": 86400,
|
||||
"csc": "fedora/",
|
||||
"upstream": "http://fedora.mirror.iweb.com/",
|
||||
|
@ -242,7 +242,7 @@
|
|||
"file": "lastsync"
|
||||
},
|
||||
"pkgsrc": {
|
||||
"out_of_sync_since": 1633335556,
|
||||
"out_of_sync_since": null,
|
||||
"out_of_sync_interval": 86400,
|
||||
"csc": "pkgsrc/",
|
||||
"upstream": "http://ftp.netbsd.org/pub/pkgsrc/",
|
||||
|
@ -394,5 +394,12 @@
|
|||
"csc": "puppylinux/",
|
||||
"upstream": "https://distro.ibiblio.org/puppylinux/",
|
||||
"file": ""
|
||||
},
|
||||
"netbsd": {
|
||||
"out_of_sync_since": null,
|
||||
"out_of_sync_interval": 86400,
|
||||
"csc": "NetBSD/",
|
||||
"upstream": "http://ftp.netbsd.org/pub/NetBSD/",
|
||||
"file": ""
|
||||
}
|
||||
}
|
|
@ -0,0 +1,91 @@
|
|||
from bs4 import BeautifulSoup
|
||||
import requests
|
||||
import re
|
||||
import datefinder # another date finding library
|
||||
from project import Project
|
||||
from shared import CSC_MIRROR
|
||||
|
||||
class netbsd(Project):
|
||||
"""netbsd class"""
|
||||
@staticmethod
|
||||
def checker(directory_URL, file_name):
|
||||
page = requests.get(directory_URL).text
|
||||
file_index = page.find(file_name)
|
||||
# print(page)
|
||||
|
||||
if file_index == -1:
|
||||
return False
|
||||
|
||||
str_dates = re.findall(r'(\d{2}-\w{3}-\d{4} \d{2}:\d{2})|(\d{4}-\w{3}-\d{2} \d{2}:\d{2})', page[file_index:])
|
||||
|
||||
# print(directory_URL, file_name)
|
||||
# print(list(datefinder.find_dates("".join(str_dates[0])))[0])
|
||||
return list(datefinder.find_dates("".join(str_dates[0])))[0]
|
||||
|
||||
@classmethod
|
||||
def check_version(cls, site1, site2):
|
||||
# getting the request from url
|
||||
r = requests.get(site1)
|
||||
r1 = requests.get(site2)
|
||||
|
||||
page1 = r.text
|
||||
page2 = r1.text
|
||||
|
||||
# converting the text
|
||||
s1 = BeautifulSoup(page1,"html.parser")
|
||||
s2 = BeautifulSoup(page2,"html.parser")
|
||||
|
||||
hrefs1 = s1.find_all("a")
|
||||
hrefs2 = s2.find_all("a")
|
||||
|
||||
for i in hrefs1: # for a href directories
|
||||
href = i.attrs['href']
|
||||
|
||||
if re.match(r'NetBSD-\d.*', href):
|
||||
date1 = cls.checker(site1+href, "CHANGES")
|
||||
if not date1: # if the version is empty, ignore it
|
||||
continue
|
||||
if (href not in [i.attrs['href'] for i in hrefs2]) or (date1 > cls.checker(site2+href, "CHANGES")):
|
||||
return False
|
||||
elif href.startswith("NetBSD-") and href != "NetBSD-daily/":
|
||||
date1 = cls.checker(site1+href+"src/doc/", "CHANGES")
|
||||
if not date1:
|
||||
continue
|
||||
if (href not in [i.attrs['href'] for i in hrefs2]) or (date1 > cls.checker(site2+href+"src/doc/", "CHANGES")):
|
||||
return False
|
||||
return True
|
||||
|
||||
@classmethod
|
||||
def check_iso(cls, site1, site2):
|
||||
# getting the request from url
|
||||
r = requests.get(site1)
|
||||
r1 = requests.get(site2)
|
||||
|
||||
page1 = r.text
|
||||
page2 = r1.text
|
||||
|
||||
# converting the text
|
||||
s1 = BeautifulSoup(page1,"html.parser")
|
||||
s2 = BeautifulSoup(page2,"html.parser")
|
||||
|
||||
hrefs1 = s1.find_all("a")
|
||||
hrefs2 = s2.find_all("a")
|
||||
|
||||
for i in hrefs1: # for a href directories
|
||||
href = i.attrs['href']
|
||||
|
||||
if (href not in [i.attrs['href'] for i in hrefs2]) or (cls.checker(site1+href, "SHA512") > cls.checker(site2+href, "SHA512")) or (cls.checker(site1+href, "MD5") > cls.checker(site2+href, "MD5")):
|
||||
return False
|
||||
return True
|
||||
|
||||
|
||||
@classmethod
|
||||
def check(cls, data, project):
|
||||
"""Check if project packages are up-to-date"""
|
||||
|
||||
csc_url = CSC_MIRROR + data[project]["csc"] + data[project]["file"]
|
||||
upstream_url = data[project]["upstream"] + data[project]["file"]
|
||||
|
||||
# print(cls.check_version(upstream_url, csc_url))
|
||||
# print(cls.check_iso(upstream_url+"iso/", csc_url+"iso/"))
|
||||
return cls.check_version(upstream_url, csc_url) and cls.check_iso(upstream_url+"iso/", csc_url+"iso/")
|
4
test.py
4
test.py
|
@ -7,7 +7,7 @@ from datetime import timedelta
|
|||
import time
|
||||
import pandas as pd
|
||||
import re # for salt stack specifically
|
||||
from projects import puppy_linux
|
||||
from projects import netbsd
|
||||
import json # import json to read project info stored in json file
|
||||
|
||||
# this function is brute force looping through the whole directory and checking dates
|
||||
|
@ -65,7 +65,7 @@ def get_latest_date(web_dir):
|
|||
if __name__ =="__main__":
|
||||
with open("data.json", "r", encoding="utf-8") as file:
|
||||
data = json.load(file)
|
||||
print(puppy_linux.check(data, "puppy_linux"))
|
||||
print(netbsd.check(data, "netbsd"))
|
||||
|
||||
"""# website to be scrape
|
||||
site="https://cdimage.ubuntu.com/releases/"
|
||||
|
|
Loading…
Reference in New Issue