forked from public/mirror-checker
added macports
This commit is contained in:
parent
3d5eee45db
commit
26e57b4d4d
|
@ -19,11 +19,10 @@ even if the date relies on a specific file in their repo, we can still find the
|
|||
to find repos of the mirrored projects to check, just search "projectName mirrors"
|
||||
|
||||
not done:
|
||||
macPorts: only distfiles has public repo, no timestamp, too large to loop through
|
||||
macPorts: only distfiles has public repo, no timestamp, too large to loop through, comparing ports.tar.gz in distfiles
|
||||
NetBSD: http://ftp.netbsd.org/pub/NetBSD/ has public repo, no timestamp, web directory hard to loop through, no mirror tracker
|
||||
opensuse: http://download.opensuse.org/ has public repo, a possible timestamp called latest in history, our mirror doesn't have this file tho, no mirror tracker
|
||||
puppylinux: https://distro.ibiblio.org/puppylinux/ check the ISO files in the folders starting with puppy
|
||||
racket: https://mirror.racket-lang.org/installers/ no public repo, no timestamp, no mirror status tracker make sure that we have the latest version number under racket-installers
|
||||
x.org: https://www.x.org/releases/ no timestamp, but candidate for brute force looping since it has few folders, no status tracker
|
||||
Xiph: no timestamp, too big to loop through, no status tracker
|
||||
|
||||
|
@ -71,6 +70,7 @@ openbsd
|
|||
parabola: https://repo.parabola.nu/ https://www.parabola.nu/mirrors/status/
|
||||
pkgsrc
|
||||
qtproject: https://download.qt.io/
|
||||
racket: https://mirror.racket-lang.org/installers/ no public repo, no timestamp, no mirror status tracker make sure that we have the latest version number under racket-installers
|
||||
raspberry pi: https://archive.raspberrypi.org/ Checking the timestamp of either the Release file or the Packages file should suffice.
|
||||
raspbian: http://archive.raspbian.org/raspbian/ snapshotindex.txt is most likely a timestamp, tho i'm not sure. also i think our mirror is completely outdated, it's not listed on official mirror list
|
||||
sagemath: same source tarballs as them (the sage-*.tar.gz files under 'Source Code')
|
||||
|
|
|
@ -359,5 +359,12 @@
|
|||
"csc": "racket/racket-installers/",
|
||||
"upstream": "https://mirror.racket-lang.org/installers/",
|
||||
"file": ""
|
||||
},
|
||||
"macports": {
|
||||
"out_of_sync_since": 1634339590,
|
||||
"out_of_sync_interval": 86400,
|
||||
"csc": "MacPorts/mpdistfiles/",
|
||||
"upstream": "https://distfiles.macports.org/",
|
||||
"file": "ports.tar.gz"
|
||||
}
|
||||
}
|
|
@ -0,0 +1,37 @@
|
|||
import requests
|
||||
import re # import regular expressions to remove stray numbers in string that might interfere with date finding
|
||||
import json # import json to read project info stored in json file
|
||||
from project import Project
|
||||
from shared import CSC_MIRROR
|
||||
|
||||
import datefinder # another date finding library
|
||||
|
||||
class macports(Project):
|
||||
"""macports class"""
|
||||
# checker: gets the timestamp of the file inside the directory at the specified URL and returns it as a string
|
||||
@staticmethod
|
||||
def checker(directory_URL, file_name):
|
||||
page = requests.get(directory_URL).text
|
||||
file_index = page.find(file_name)
|
||||
# print(page)
|
||||
|
||||
# remove stray numbers (file size numbers in particular) that might interfere with date finding
|
||||
segment_clean = re.sub(r'\s\d+\s', ' ', page[file_index:]) # removes numbers for size
|
||||
segment_clean = re.sub(r'\s\d+\w*\s', ' ', page[file_index:]) # removes numbers + size unit. e.x. 50kb
|
||||
# print(segment_clean)
|
||||
|
||||
# finds the dates in the segment after the file name
|
||||
# notes: a generator will be returned by the datefinder module. I'm typecasting it to a list. Please read the note of caution provided at the bottom.
|
||||
matches = list(datefinder.find_dates(segment_clean))
|
||||
|
||||
# print(matches[0])
|
||||
return matches[0]
|
||||
|
||||
@classmethod
|
||||
def check(cls, data, project):
|
||||
"""Check if project packages are up-to-date"""
|
||||
csc_url = CSC_MIRROR + data[project]["csc"]
|
||||
upstream_url = data[project]["upstream"]
|
||||
file_name = data[project]["file"]
|
||||
|
||||
return cls.checker(csc_url, file_name) == cls.checker(upstream_url, file_name)
|
4
test.py
4
test.py
|
@ -7,7 +7,7 @@ from datetime import timedelta
|
|||
import time
|
||||
import pandas as pd
|
||||
import re # for salt stack specifically
|
||||
from projects import racket
|
||||
from projects import macports
|
||||
import json # import json to read project info stored in json file
|
||||
|
||||
# this function is brute force looping through the whole directory and checking dates
|
||||
|
@ -65,7 +65,7 @@ def get_latest_date(web_dir):
|
|||
if __name__ =="__main__":
|
||||
with open("data.json", "r", encoding="utf-8") as file:
|
||||
data = json.load(file)
|
||||
print(racket.check(data, "racket"))
|
||||
print(macports.check(data, "macports"))
|
||||
|
||||
"""# website to be scrape
|
||||
site="https://cdimage.ubuntu.com/releases/"
|
||||
|
|
Loading…
Reference in New Issue