From f8917c96878686564ec152f80d4a55296e2f410d Mon Sep 17 00:00:00 2001 From: Tom Date: Mon, 4 Oct 2021 00:49:24 -0700 Subject: [PATCH] nongnu added --- README.md | 2 ++ data.json | 9 ++++++++- projects/mySQL.py | 2 +- projects/nongnu.py | 8 ++++++++ test.py | 26 +++++++++++++------------- 5 files changed, 32 insertions(+), 15 deletions(-) create mode 100644 projects/nongnu.py diff --git a/README.md b/README.md index 8b21b56..e8e563e 100644 --- a/README.md +++ b/README.md @@ -27,6 +27,8 @@ mxlinux: https://sourceforge.net/projects/mx-linux/ (scrap the last day?) linuxmint: no public repo linuxmint-packages pool: http://rsync-packages.linuxmint.com/pool/ macPorts: only distfiles has public repo, no timestamp, too large to loop through +NetBSD: http://ftp.netbsd.org/pub/NetBSD/ has public repo, no timestamp, web directory hard to loop through, no mirror tracker +nongnu: http://download.savannah.nongnu.org/releases/ https://savannah.gnu.org/maintenance/Mirmon/ http://download.savannah.gnu.org/mirmon/savannah/ scientific: https://scientificlinux.org/downloads/sl-mirrors/ (CSC not listed) slackware: https://mirrors.slackware.com/mirrorlist/ https://mirrors.slackware.com/slackware/ ubuntu-ports: http://ports.ubuntu.com/ubuntu-ports/ diff --git a/data.json b/data.json index 752fe96..1fab5b8 100644 --- a/data.json +++ b/data.json @@ -221,10 +221,17 @@ "file": "" }, "mySQL": { - "out_of_sync_since": 1633330958, + "out_of_sync_since": 1633333607, "out_of_sync_interval": 86400, "csc": "mysql/", "upstream": "http://mirrors.sunsite.dk/mysql/", "file": "last-updated.txt" + }, + "nongnu": { + "out_of_sync_since": 1633333607, + "out_of_sync_interval": 86400, + "csc": "nongnu/", + "upstream": "http://download-mirror.savannah.gnu.org/releases/", + "file": "00_TIME.txt" } } \ No newline at end of file diff --git a/projects/mySQL.py b/projects/mySQL.py index 3b3fe97..870ed99 100644 --- a/projects/mySQL.py +++ b/projects/mySQL.py @@ -1,5 +1,5 @@ """ -Contains tdf class +Contains mySQL class """ from project import Project diff --git a/projects/nongnu.py b/projects/nongnu.py new file mode 100644 index 0000000..7bb69c8 --- /dev/null +++ b/projects/nongnu.py @@ -0,0 +1,8 @@ +""" +Contains nongnu class +""" + +from project import Project + +class nongnu(Project): + """nongnu class""" diff --git a/test.py b/test.py index 536a855..52cdeff 100644 --- a/test.py +++ b/test.py @@ -13,7 +13,7 @@ import pandas as pd # lists urls=[] -home_site = "http://ykf.ca.distfiles.macports.org" +home_site = "http://ftp.netbsd.org/pub" # function created def scrape(site): @@ -30,6 +30,8 @@ def scrape(site): if href.endswith("/") and href != "../" and href != "/": """if home_site+href in urls: # avoids the link to parent directory continue""" + if href == "//ftp.netbsd.org/": # netbsd specific code + continue site_next = site+href if site_next not in urls: @@ -53,31 +55,29 @@ def get_latest_date(web_dir): if __name__ =="__main__": # website to be scrape - #site="http://ykf.ca.distfiles.macports.org/MacPorts/mpdistfiles/" + site="http://ftp.netbsd.org/pub/NetBSD/" # works on: https://www.x.org/releases/ # https://mirror.csclub.uwaterloo.ca/linuxmint/ #works wonders for linuxmint # unfortunately, linuxmint does not have a public repo, the worldwide mirror LayerOnline on https://linuxmint.com/mirrors.php seems like the best choice # calling function - #scrape(site) + scrape(site) - #latest_date = get_latest_date(urls[0]) + latest_date = get_latest_date(urls[0]) # get_latest_date(urls[0]) - #for dir in urls: - # latest_date2 = get_latest_date(dir) - # if (latest_date2 >= latest_date): - # latest_date = latest_date2 + for dir in urls: + latest_date2 = get_latest_date(dir) + if (latest_date2 >= latest_date): + latest_date = latest_date2 - #print(latest_date) + print(latest_date) - page = requests.get("http://rsync-mxlinux.org/mirmon/index.html").text + """page = requests.get("http://rsync-mxlinux.org/mirmon/index.html").text indexOfFile = page.find("mirror.csclub.uwaterloo.ca") m = re.search(r'(\d+ hours)|(\d+(\.)?\d+ days)', page[indexOfFile:]) # solution from: https://stackoverflow.com/questions/21074100/how-to-convert-standard-timedelta-string-to-timedelta-object/21074460 duration = pd.to_timedelta(m.group(0)) - print (duration <= pd.to_timedelta(86400, unit='s')) - - # https://launchpad.net/ubuntu/+mirror/mirror.csclub.uwaterloo.ca-archive + print (duration <= pd.to_timedelta(86400, unit='s'))""" \ No newline at end of file