added sagemath and saltstack

dev
Tom 12 months ago
parent c76ae9c325
commit 0b3e36a8ff
  1. 6
      README.md
  2. 22
      data.json
  3. 42
      projects/sage.py
  4. 25
      projects/saltstack.py
  5. 4
      test.py

@ -23,9 +23,7 @@ macPorts: only distfiles has public repo, no timestamp, too large to loop throug
NetBSD: http://ftp.netbsd.org/pub/NetBSD/ has public repo, no timestamp, web directory hard to loop through, no mirror tracker
opensuse: http://download.opensuse.org/ has public repo, a possible timestamp called latest in history, our mirror doesn't have this file tho, no mirror tracker
puppylinux: https://distro.ibiblio.org/puppylinux/ check the ISO files in the folders starting with puppy
racket: no public repo, no timestamp, no mirror status tracker
sagemath: don't know how to deal with this, it's a website
salt stack: don't know how to deal with this, it's a website
racket: https://mirror.racket-lang.org/installers/ no public repo, no timestamp, no mirror status tracker make sure that we have the latest version under racket-installers
x.org: https://www.x.org/releases/ no timestamp, but candidate for brute force looping since it has few folders, no status tracker
Xiph: no timestamp, too big to loop through, no status tracker
@ -75,6 +73,8 @@ pkgsrc
qtproject: https://download.qt.io/
raspberry pi: https://archive.raspberrypi.org/ Checking the timestamp of either the Release file or the Packages file should suffice.
raspbian: http://archive.raspbian.org/raspbian/ snapshotindex.txt is most likely a timestamp, tho i'm not sure. also i think our mirror is completely outdated, it's not listed on official mirror list
sagemath: same source tarballs as them (the sage-*.tar.gz files under 'Source Code')
salt stack: checking the "Latest release" text under the 'About' header
scientific: https://scientificlinux.org/downloads/sl-mirrors/ not checking this one since it's abandoned
slackware: https://mirrors.slackware.com/mirrorlist/ https://mirrors.slackware.com/slackware/ checking using the last updated date here, don't know if it's entirely accurate
tdf: https://download.documentfoundation.org/

@ -52,7 +52,7 @@
"file": "x86/sha512.sum"
},
"Debian": {
"out_of_sync_since": 1633337502,
"out_of_sync_since": null,
"out_of_sync_interval": 86400,
"csc": "",
"upstream": "https://ftp-master.debian.org/",
@ -80,7 +80,7 @@
"file": "debian-ports/project/trace/porta.debian.org"
},
"DebianSecurity": {
"out_of_sync_since": null,
"out_of_sync_since": 1634275264,
"out_of_sync_interval": 86400,
"csc": "",
"upstream": "http://debian.mirror.iweb.ca/",
@ -147,7 +147,7 @@
"file": "gutenberg.dcs"
},
"IPFire": {
"out_of_sync_since": 1634257890,
"out_of_sync_since": null,
"out_of_sync_interval": 172800
},
"KDE": {
@ -228,7 +228,7 @@
"file": "last-updated.txt"
},
"nongnu": {
"out_of_sync_since": null,
"out_of_sync_since": 1634275264,
"out_of_sync_interval": 86400,
"csc": "nongnu/",
"upstream": "http://download-mirror.savannah.gnu.org/releases/",
@ -338,5 +338,19 @@
"csc": "linuxmint/",
"upstream": "https://mirrors.edge.kernel.org/linuxmint/",
"file": ""
},
"sage": {
"out_of_sync_since": null,
"out_of_sync_interval": 86400,
"csc": "",
"upstream": "",
"file": "sage/src/index.html"
},
"saltstack": {
"out_of_sync_since": null,
"out_of_sync_interval": 86400,
"csc": "saltstack/",
"upstream": "https://repo.saltproject.io/",
"file": ""
}
}

@ -0,0 +1,42 @@
import requests
import datefinder # another date finding library
import re
from datetime import datetime
from project import Project
from shared import CSC_MIRROR
class sage(Project):
"""sagemath class"""
@staticmethod
def get_latest_date(dates):
dates = [list(datefinder.find_dates(date))[0] for date in dates]
return(max(dates))
@classmethod
def check(cls, data, project):
page1 = requests.get(CSC_MIRROR + data[project]["csc"] + data[project]["file"]).text
page2 = requests.get("http://mirrors.mit.edu/sage/src/index.html").text
page3 = requests.get("https://mirror.rcg.sfu.ca/mirror/sage/src/index.html").text
CSC_dates = re.findall(r'(\d{4}-\d{2}-\d{2} \d{2}:\d{2})', page1)
MIT_dates = re.findall(r'(\d{4}-\d{2}-\d{2} \d{2}:\d{2})', page2)
SFU_dates = re.findall(r'(\d{4}-\d{2}-\d{2} \d{2}:\d{2})', page3)
# print(len(CSC_dates))
# print(len(MIT_dates))
# print(len(SFU_dates))
# print(cls.get_latest_date(CSC_dates))
# print(cls.get_latest_date(MIT_dates))
# print(cls.get_latest_date(SFU_dates))
if len(CSC_dates) < max([len(MIT_dates), len(SFU_dates)]):
return False
elif len(CSC_dates) > max([len(MIT_dates), len(SFU_dates)]):
# if we have more entries than their mirror, ours must be the new one
# since distros only add new versions, and don't delete old versions
return True
if (cls.get_latest_date(CSC_dates) < max([cls.get_latest_date(MIT_dates),cls.get_latest_date(SFU_dates)])):
return False
return True

@ -0,0 +1,25 @@
import requests
import datefinder # another date finding library
import re
from datetime import datetime
from project import Project
from shared import CSC_MIRROR
class saltstack(Project):
"""saltstack class"""
@staticmethod
def check(data, project):
csc_url = CSC_MIRROR + data[project]["csc"] + data[project]["file"]
upstream_url = data[project]["upstream"] + data[project]["file"]
page1 = requests.get(csc_url).text
page2 = requests.get(upstream_url).text
CSC_release = re.search(r'Latest release: (\d)+.(\d)+ \((.+)\)', page1)
upstream_release = re.search(r'Latest release: (\d)+.(\d)+ \((.+)\)', page2)
# print(CSC_release.group(0))
# print(upstream_release.group(0))
return CSC_release.group(0) == upstream_release.group(0)

@ -7,7 +7,7 @@ from datetime import timedelta
import time
import pandas as pd
import re # for salt stack specifically
from projects import linuxmint
from projects import saltstack
import json # import json to read project info stored in json file
# this function is brute force looping through the whole directory and checking dates
@ -65,7 +65,7 @@ def get_latest_date(web_dir):
if __name__ =="__main__":
with open("data.json", "r", encoding="utf-8") as file:
data = json.load(file)
print(linuxmint.check(data, "linuxmint"))
print(saltstack.check(data, "saltstack"))
"""# website to be scrape
site="https://cdimage.ubuntu.com/releases/"

Loading…
Cancel
Save