forked from public/mirror-checker
added sagemath and saltstack
This commit is contained in:
parent
c76ae9c325
commit
0b3e36a8ff
|
@ -23,9 +23,7 @@ macPorts: only distfiles has public repo, no timestamp, too large to loop throug
|
||||||
NetBSD: http://ftp.netbsd.org/pub/NetBSD/ has public repo, no timestamp, web directory hard to loop through, no mirror tracker
|
NetBSD: http://ftp.netbsd.org/pub/NetBSD/ has public repo, no timestamp, web directory hard to loop through, no mirror tracker
|
||||||
opensuse: http://download.opensuse.org/ has public repo, a possible timestamp called latest in history, our mirror doesn't have this file tho, no mirror tracker
|
opensuse: http://download.opensuse.org/ has public repo, a possible timestamp called latest in history, our mirror doesn't have this file tho, no mirror tracker
|
||||||
puppylinux: https://distro.ibiblio.org/puppylinux/ check the ISO files in the folders starting with puppy
|
puppylinux: https://distro.ibiblio.org/puppylinux/ check the ISO files in the folders starting with puppy
|
||||||
racket: no public repo, no timestamp, no mirror status tracker
|
racket: https://mirror.racket-lang.org/installers/ no public repo, no timestamp, no mirror status tracker make sure that we have the latest version under racket-installers
|
||||||
sagemath: don't know how to deal with this, it's a website
|
|
||||||
salt stack: don't know how to deal with this, it's a website
|
|
||||||
x.org: https://www.x.org/releases/ no timestamp, but candidate for brute force looping since it has few folders, no status tracker
|
x.org: https://www.x.org/releases/ no timestamp, but candidate for brute force looping since it has few folders, no status tracker
|
||||||
Xiph: no timestamp, too big to loop through, no status tracker
|
Xiph: no timestamp, too big to loop through, no status tracker
|
||||||
|
|
||||||
|
@ -75,6 +73,8 @@ pkgsrc
|
||||||
qtproject: https://download.qt.io/
|
qtproject: https://download.qt.io/
|
||||||
raspberry pi: https://archive.raspberrypi.org/ Checking the timestamp of either the Release file or the Packages file should suffice.
|
raspberry pi: https://archive.raspberrypi.org/ Checking the timestamp of either the Release file or the Packages file should suffice.
|
||||||
raspbian: http://archive.raspbian.org/raspbian/ snapshotindex.txt is most likely a timestamp, tho i'm not sure. also i think our mirror is completely outdated, it's not listed on official mirror list
|
raspbian: http://archive.raspbian.org/raspbian/ snapshotindex.txt is most likely a timestamp, tho i'm not sure. also i think our mirror is completely outdated, it's not listed on official mirror list
|
||||||
|
sagemath: same source tarballs as them (the sage-*.tar.gz files under 'Source Code')
|
||||||
|
salt stack: checking the "Latest release" text under the 'About' header
|
||||||
scientific: https://scientificlinux.org/downloads/sl-mirrors/ not checking this one since it's abandoned
|
scientific: https://scientificlinux.org/downloads/sl-mirrors/ not checking this one since it's abandoned
|
||||||
slackware: https://mirrors.slackware.com/mirrorlist/ https://mirrors.slackware.com/slackware/ checking using the last updated date here, don't know if it's entirely accurate
|
slackware: https://mirrors.slackware.com/mirrorlist/ https://mirrors.slackware.com/slackware/ checking using the last updated date here, don't know if it's entirely accurate
|
||||||
tdf: https://download.documentfoundation.org/
|
tdf: https://download.documentfoundation.org/
|
||||||
|
|
22
data.json
22
data.json
|
@ -52,7 +52,7 @@
|
||||||
"file": "x86/sha512.sum"
|
"file": "x86/sha512.sum"
|
||||||
},
|
},
|
||||||
"Debian": {
|
"Debian": {
|
||||||
"out_of_sync_since": 1633337502,
|
"out_of_sync_since": null,
|
||||||
"out_of_sync_interval": 86400,
|
"out_of_sync_interval": 86400,
|
||||||
"csc": "",
|
"csc": "",
|
||||||
"upstream": "https://ftp-master.debian.org/",
|
"upstream": "https://ftp-master.debian.org/",
|
||||||
|
@ -80,7 +80,7 @@
|
||||||
"file": "debian-ports/project/trace/porta.debian.org"
|
"file": "debian-ports/project/trace/porta.debian.org"
|
||||||
},
|
},
|
||||||
"DebianSecurity": {
|
"DebianSecurity": {
|
||||||
"out_of_sync_since": null,
|
"out_of_sync_since": 1634275264,
|
||||||
"out_of_sync_interval": 86400,
|
"out_of_sync_interval": 86400,
|
||||||
"csc": "",
|
"csc": "",
|
||||||
"upstream": "http://debian.mirror.iweb.ca/",
|
"upstream": "http://debian.mirror.iweb.ca/",
|
||||||
|
@ -147,7 +147,7 @@
|
||||||
"file": "gutenberg.dcs"
|
"file": "gutenberg.dcs"
|
||||||
},
|
},
|
||||||
"IPFire": {
|
"IPFire": {
|
||||||
"out_of_sync_since": 1634257890,
|
"out_of_sync_since": null,
|
||||||
"out_of_sync_interval": 172800
|
"out_of_sync_interval": 172800
|
||||||
},
|
},
|
||||||
"KDE": {
|
"KDE": {
|
||||||
|
@ -228,7 +228,7 @@
|
||||||
"file": "last-updated.txt"
|
"file": "last-updated.txt"
|
||||||
},
|
},
|
||||||
"nongnu": {
|
"nongnu": {
|
||||||
"out_of_sync_since": null,
|
"out_of_sync_since": 1634275264,
|
||||||
"out_of_sync_interval": 86400,
|
"out_of_sync_interval": 86400,
|
||||||
"csc": "nongnu/",
|
"csc": "nongnu/",
|
||||||
"upstream": "http://download-mirror.savannah.gnu.org/releases/",
|
"upstream": "http://download-mirror.savannah.gnu.org/releases/",
|
||||||
|
@ -338,5 +338,19 @@
|
||||||
"csc": "linuxmint/",
|
"csc": "linuxmint/",
|
||||||
"upstream": "https://mirrors.edge.kernel.org/linuxmint/",
|
"upstream": "https://mirrors.edge.kernel.org/linuxmint/",
|
||||||
"file": ""
|
"file": ""
|
||||||
|
},
|
||||||
|
"sage": {
|
||||||
|
"out_of_sync_since": null,
|
||||||
|
"out_of_sync_interval": 86400,
|
||||||
|
"csc": "",
|
||||||
|
"upstream": "",
|
||||||
|
"file": "sage/src/index.html"
|
||||||
|
},
|
||||||
|
"saltstack": {
|
||||||
|
"out_of_sync_since": null,
|
||||||
|
"out_of_sync_interval": 86400,
|
||||||
|
"csc": "saltstack/",
|
||||||
|
"upstream": "https://repo.saltproject.io/",
|
||||||
|
"file": ""
|
||||||
}
|
}
|
||||||
}
|
}
|
|
@ -0,0 +1,42 @@
|
||||||
|
import requests
|
||||||
|
import datefinder # another date finding library
|
||||||
|
import re
|
||||||
|
from datetime import datetime
|
||||||
|
from project import Project
|
||||||
|
from shared import CSC_MIRROR
|
||||||
|
|
||||||
|
class sage(Project):
|
||||||
|
"""sagemath class"""
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def get_latest_date(dates):
|
||||||
|
dates = [list(datefinder.find_dates(date))[0] for date in dates]
|
||||||
|
return(max(dates))
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def check(cls, data, project):
|
||||||
|
page1 = requests.get(CSC_MIRROR + data[project]["csc"] + data[project]["file"]).text
|
||||||
|
page2 = requests.get("http://mirrors.mit.edu/sage/src/index.html").text
|
||||||
|
page3 = requests.get("https://mirror.rcg.sfu.ca/mirror/sage/src/index.html").text
|
||||||
|
|
||||||
|
CSC_dates = re.findall(r'(\d{4}-\d{2}-\d{2} \d{2}:\d{2})', page1)
|
||||||
|
MIT_dates = re.findall(r'(\d{4}-\d{2}-\d{2} \d{2}:\d{2})', page2)
|
||||||
|
SFU_dates = re.findall(r'(\d{4}-\d{2}-\d{2} \d{2}:\d{2})', page3)
|
||||||
|
|
||||||
|
# print(len(CSC_dates))
|
||||||
|
# print(len(MIT_dates))
|
||||||
|
# print(len(SFU_dates))
|
||||||
|
# print(cls.get_latest_date(CSC_dates))
|
||||||
|
# print(cls.get_latest_date(MIT_dates))
|
||||||
|
# print(cls.get_latest_date(SFU_dates))
|
||||||
|
|
||||||
|
if len(CSC_dates) < max([len(MIT_dates), len(SFU_dates)]):
|
||||||
|
return False
|
||||||
|
elif len(CSC_dates) > max([len(MIT_dates), len(SFU_dates)]):
|
||||||
|
# if we have more entries than their mirror, ours must be the new one
|
||||||
|
# since distros only add new versions, and don't delete old versions
|
||||||
|
return True
|
||||||
|
if (cls.get_latest_date(CSC_dates) < max([cls.get_latest_date(MIT_dates),cls.get_latest_date(SFU_dates)])):
|
||||||
|
return False
|
||||||
|
return True
|
||||||
|
|
|
@ -0,0 +1,25 @@
|
||||||
|
import requests
|
||||||
|
import datefinder # another date finding library
|
||||||
|
import re
|
||||||
|
from datetime import datetime
|
||||||
|
from project import Project
|
||||||
|
from shared import CSC_MIRROR
|
||||||
|
|
||||||
|
class saltstack(Project):
|
||||||
|
"""saltstack class"""
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def check(data, project):
|
||||||
|
csc_url = CSC_MIRROR + data[project]["csc"] + data[project]["file"]
|
||||||
|
upstream_url = data[project]["upstream"] + data[project]["file"]
|
||||||
|
|
||||||
|
page1 = requests.get(csc_url).text
|
||||||
|
page2 = requests.get(upstream_url).text
|
||||||
|
|
||||||
|
CSC_release = re.search(r'Latest release: (\d)+.(\d)+ \((.+)\)', page1)
|
||||||
|
upstream_release = re.search(r'Latest release: (\d)+.(\d)+ \((.+)\)', page2)
|
||||||
|
|
||||||
|
# print(CSC_release.group(0))
|
||||||
|
# print(upstream_release.group(0))
|
||||||
|
|
||||||
|
return CSC_release.group(0) == upstream_release.group(0)
|
4
test.py
4
test.py
|
@ -7,7 +7,7 @@ from datetime import timedelta
|
||||||
import time
|
import time
|
||||||
import pandas as pd
|
import pandas as pd
|
||||||
import re # for salt stack specifically
|
import re # for salt stack specifically
|
||||||
from projects import linuxmint
|
from projects import saltstack
|
||||||
import json # import json to read project info stored in json file
|
import json # import json to read project info stored in json file
|
||||||
|
|
||||||
# this function is brute force looping through the whole directory and checking dates
|
# this function is brute force looping through the whole directory and checking dates
|
||||||
|
@ -65,7 +65,7 @@ def get_latest_date(web_dir):
|
||||||
if __name__ =="__main__":
|
if __name__ =="__main__":
|
||||||
with open("data.json", "r", encoding="utf-8") as file:
|
with open("data.json", "r", encoding="utf-8") as file:
|
||||||
data = json.load(file)
|
data = json.load(file)
|
||||||
print(linuxmint.check(data, "linuxmint"))
|
print(saltstack.check(data, "saltstack"))
|
||||||
|
|
||||||
"""# website to be scrape
|
"""# website to be scrape
|
||||||
site="https://cdimage.ubuntu.com/releases/"
|
site="https://cdimage.ubuntu.com/releases/"
|
||||||
|
|
Loading…
Reference in New Issue