added macports

This commit is contained in:
Tom 2021-10-15 16:19:34 -07:00
parent 3d5eee45db
commit 26e57b4d4d
4 changed files with 48 additions and 4 deletions

View File

@ -19,11 +19,10 @@ even if the date relies on a specific file in their repo, we can still find the
to find repos of the mirrored projects to check, just search "projectName mirrors" to find repos of the mirrored projects to check, just search "projectName mirrors"
not done: not done:
macPorts: only distfiles has public repo, no timestamp, too large to loop through macPorts: only distfiles has public repo, no timestamp, too large to loop through, comparing ports.tar.gz in distfiles
NetBSD: http://ftp.netbsd.org/pub/NetBSD/ has public repo, no timestamp, web directory hard to loop through, no mirror tracker NetBSD: http://ftp.netbsd.org/pub/NetBSD/ has public repo, no timestamp, web directory hard to loop through, no mirror tracker
opensuse: http://download.opensuse.org/ has public repo, a possible timestamp called latest in history, our mirror doesn't have this file tho, no mirror tracker opensuse: http://download.opensuse.org/ has public repo, a possible timestamp called latest in history, our mirror doesn't have this file tho, no mirror tracker
puppylinux: https://distro.ibiblio.org/puppylinux/ check the ISO files in the folders starting with puppy puppylinux: https://distro.ibiblio.org/puppylinux/ check the ISO files in the folders starting with puppy
racket: https://mirror.racket-lang.org/installers/ no public repo, no timestamp, no mirror status tracker make sure that we have the latest version number under racket-installers
x.org: https://www.x.org/releases/ no timestamp, but candidate for brute force looping since it has few folders, no status tracker x.org: https://www.x.org/releases/ no timestamp, but candidate for brute force looping since it has few folders, no status tracker
Xiph: no timestamp, too big to loop through, no status tracker Xiph: no timestamp, too big to loop through, no status tracker
@ -71,6 +70,7 @@ openbsd
parabola: https://repo.parabola.nu/ https://www.parabola.nu/mirrors/status/ parabola: https://repo.parabola.nu/ https://www.parabola.nu/mirrors/status/
pkgsrc pkgsrc
qtproject: https://download.qt.io/ qtproject: https://download.qt.io/
racket: https://mirror.racket-lang.org/installers/ no public repo, no timestamp, no mirror status tracker make sure that we have the latest version number under racket-installers
raspberry pi: https://archive.raspberrypi.org/ Checking the timestamp of either the Release file or the Packages file should suffice. raspberry pi: https://archive.raspberrypi.org/ Checking the timestamp of either the Release file or the Packages file should suffice.
raspbian: http://archive.raspbian.org/raspbian/ snapshotindex.txt is most likely a timestamp, tho i'm not sure. also i think our mirror is completely outdated, it's not listed on official mirror list raspbian: http://archive.raspbian.org/raspbian/ snapshotindex.txt is most likely a timestamp, tho i'm not sure. also i think our mirror is completely outdated, it's not listed on official mirror list
sagemath: same source tarballs as them (the sage-*.tar.gz files under 'Source Code') sagemath: same source tarballs as them (the sage-*.tar.gz files under 'Source Code')

View File

@ -359,5 +359,12 @@
"csc": "racket/racket-installers/", "csc": "racket/racket-installers/",
"upstream": "https://mirror.racket-lang.org/installers/", "upstream": "https://mirror.racket-lang.org/installers/",
"file": "" "file": ""
},
"macports": {
"out_of_sync_since": 1634339590,
"out_of_sync_interval": 86400,
"csc": "MacPorts/mpdistfiles/",
"upstream": "https://distfiles.macports.org/",
"file": "ports.tar.gz"
} }
} }

37
projects/macports.py Normal file
View File

@ -0,0 +1,37 @@
import requests
import re # import regular expressions to remove stray numbers in string that might interfere with date finding
import json # import json to read project info stored in json file
from project import Project
from shared import CSC_MIRROR
import datefinder # another date finding library
class macports(Project):
"""macports class"""
# checker: gets the timestamp of the file inside the directory at the specified URL and returns it as a string
@staticmethod
def checker(directory_URL, file_name):
page = requests.get(directory_URL).text
file_index = page.find(file_name)
# print(page)
# remove stray numbers (file size numbers in particular) that might interfere with date finding
segment_clean = re.sub(r'\s\d+\s', ' ', page[file_index:]) # removes numbers for size
segment_clean = re.sub(r'\s\d+\w*\s', ' ', page[file_index:]) # removes numbers + size unit. e.x. 50kb
# print(segment_clean)
# finds the dates in the segment after the file name
# notes: a generator will be returned by the datefinder module. I'm typecasting it to a list. Please read the note of caution provided at the bottom.
matches = list(datefinder.find_dates(segment_clean))
# print(matches[0])
return matches[0]
@classmethod
def check(cls, data, project):
"""Check if project packages are up-to-date"""
csc_url = CSC_MIRROR + data[project]["csc"]
upstream_url = data[project]["upstream"]
file_name = data[project]["file"]
return cls.checker(csc_url, file_name) == cls.checker(upstream_url, file_name)

View File

@ -7,7 +7,7 @@ from datetime import timedelta
import time import time
import pandas as pd import pandas as pd
import re # for salt stack specifically import re # for salt stack specifically
from projects import racket from projects import macports
import json # import json to read project info stored in json file import json # import json to read project info stored in json file
# this function is brute force looping through the whole directory and checking dates # this function is brute force looping through the whole directory and checking dates
@ -65,7 +65,7 @@ def get_latest_date(web_dir):
if __name__ =="__main__": if __name__ =="__main__":
with open("data.json", "r", encoding="utf-8") as file: with open("data.json", "r", encoding="utf-8") as file:
data = json.load(file) data = json.load(file)
print(racket.check(data, "racket")) print(macports.check(data, "macports"))
"""# website to be scrape """# website to be scrape
site="https://cdimage.ubuntu.com/releases/" site="https://cdimage.ubuntu.com/releases/"