added mxlinux, mxlinux-iso
This commit is contained in:
parent
1b53f605c9
commit
8195cbb042
|
@ -24,7 +24,9 @@ raspbian mirror: https://mirror.ox.ac.uk/sites/archive.raspbian.org/archive/
|
||||||
|
|
||||||
mxlinux: https://sourceforge.net/projects/mx-linux/ (scrap the last day?)
|
mxlinux: https://sourceforge.net/projects/mx-linux/ (scrap the last day?)
|
||||||
|
|
||||||
|
linuxmint: no public repo
|
||||||
linuxmint-packages pool: http://rsync-packages.linuxmint.com/pool/
|
linuxmint-packages pool: http://rsync-packages.linuxmint.com/pool/
|
||||||
|
macPorts: only distfiles has public repo, no timestamp, too large to loop through
|
||||||
scientific: https://scientificlinux.org/downloads/sl-mirrors/ (CSC not listed)
|
scientific: https://scientificlinux.org/downloads/sl-mirrors/ (CSC not listed)
|
||||||
slackware: https://mirrors.slackware.com/mirrorlist/ https://mirrors.slackware.com/slackware/
|
slackware: https://mirrors.slackware.com/mirrorlist/ https://mirrors.slackware.com/slackware/
|
||||||
ubuntu-ports: http://ports.ubuntu.com/ubuntu-ports/
|
ubuntu-ports: http://ports.ubuntu.com/ubuntu-ports/
|
||||||
|
@ -37,5 +39,7 @@ tdf: https://download.documentfoundation.org/
|
||||||
ubuntu: https://launchpad.net/ubuntu/+mirror/mirror.csclub.uwaterloo.ca-archive
|
ubuntu: https://launchpad.net/ubuntu/+mirror/mirror.csclub.uwaterloo.ca-archive
|
||||||
vlc: http://download.videolan.org/pub/videolan/
|
vlc: http://download.videolan.org/pub/videolan/
|
||||||
manjaro
|
manjaro
|
||||||
|
mxlinux
|
||||||
|
mxlinx-iso: this one seems out of sync on the official tracker for 134 days, which is weird
|
||||||
|
|
||||||
trisquel: https://trisquel.info/mirmon/index.html out of date website
|
trisquel: https://trisquel.info/mirmon/index.html out of date website
|
16
data.json
16
data.json
|
@ -35,7 +35,7 @@
|
||||||
"file": "centos/TIME"
|
"file": "centos/TIME"
|
||||||
},
|
},
|
||||||
"Ceph": {
|
"Ceph": {
|
||||||
"out_of_sync_since": null,
|
"out_of_sync_since": 1633329349,
|
||||||
"out_of_sync_interval": 86400,
|
"out_of_sync_interval": 86400,
|
||||||
"csc": "ceph/",
|
"csc": "ceph/",
|
||||||
"upstream": "https://download.ceph.com/",
|
"upstream": "https://download.ceph.com/",
|
||||||
|
@ -205,5 +205,19 @@
|
||||||
"csc": "",
|
"csc": "",
|
||||||
"upstream": "https://repo.manjaro.org/",
|
"upstream": "https://repo.manjaro.org/",
|
||||||
"file": ""
|
"file": ""
|
||||||
|
},
|
||||||
|
"mxlinux": {
|
||||||
|
"out_of_sync_since": null,
|
||||||
|
"out_of_sync_interval": 86400,
|
||||||
|
"csc": "",
|
||||||
|
"upstream": "http://rsync-mxlinux.org/mirmon/packages.html",
|
||||||
|
"file": ""
|
||||||
|
},
|
||||||
|
"mxlinux_iso": {
|
||||||
|
"out_of_sync_since": null,
|
||||||
|
"out_of_sync_interval": 86400,
|
||||||
|
"csc": "",
|
||||||
|
"upstream": "http://rsync-mxlinux.org/mirmon/index.html",
|
||||||
|
"file": ""
|
||||||
}
|
}
|
||||||
}
|
}
|
2
main.py
2
main.py
|
@ -67,7 +67,7 @@ if __name__ == "__main__":
|
||||||
print(f"Failure: {project} does not exist")
|
print(f"Failure: {project} does not exist")
|
||||||
continue
|
continue
|
||||||
project_class = getattr(sys.modules[__name__], project)
|
project_class = getattr(sys.modules[__name__], project)
|
||||||
if project == "CPAN" or project == "ubuntu" or project == "manjaro":
|
if project == "CPAN" or project == "ubuntu" or project == "manjaro" or project == "mxlinux" or project == "mxlinux_iso":
|
||||||
checker_result = project_class.check(data, project, current_time)
|
checker_result = project_class.check(data, project, current_time)
|
||||||
if checker_result:
|
if checker_result:
|
||||||
print(f"Success: {project} up-to-date")
|
print(f"Success: {project} up-to-date")
|
||||||
|
|
|
@ -0,0 +1,26 @@
|
||||||
|
"""
|
||||||
|
Contains mxlinux class
|
||||||
|
"""
|
||||||
|
|
||||||
|
import os
|
||||||
|
from project import Project
|
||||||
|
from shared import CSC_MIRROR
|
||||||
|
import requests
|
||||||
|
import datefinder # another date finding library
|
||||||
|
from datetime import timedelta
|
||||||
|
from datetime import datetime
|
||||||
|
import re
|
||||||
|
import pandas as pd
|
||||||
|
|
||||||
|
class mxlinux(Project):
|
||||||
|
"""mxlinux class"""
|
||||||
|
@staticmethod
|
||||||
|
def check(data, project, current_time):
|
||||||
|
page = requests.get(data[project]["upstream"]).text
|
||||||
|
indexOfFile = page.find("mirror.csclub.uwaterloo.ca")
|
||||||
|
|
||||||
|
m = re.search(r'(\d+ hours)|(\d+(\.)?\d+ days)', page[indexOfFile:]) # solution from: https://stackoverflow.com/questions/21074100/how-to-convert-standard-timedelta-string-to-timedelta-object/21074460
|
||||||
|
|
||||||
|
duration = pd.to_timedelta(m.group(0))
|
||||||
|
|
||||||
|
return duration <= pd.to_timedelta(data[project]["out_of_sync_interval"], unit='s')
|
|
@ -0,0 +1,26 @@
|
||||||
|
"""
|
||||||
|
Contains mxlinux_iso class
|
||||||
|
"""
|
||||||
|
|
||||||
|
import os
|
||||||
|
from project import Project
|
||||||
|
from shared import CSC_MIRROR
|
||||||
|
import requests
|
||||||
|
import datefinder # another date finding library
|
||||||
|
from datetime import timedelta
|
||||||
|
from datetime import datetime
|
||||||
|
import re
|
||||||
|
import pandas as pd
|
||||||
|
|
||||||
|
class mxlinux_iso(Project):
|
||||||
|
"""mxlinux_iso class"""
|
||||||
|
@staticmethod
|
||||||
|
def check(data, project, current_time):
|
||||||
|
page = requests.get(data[project]["upstream"]).text
|
||||||
|
indexOfFile = page.find("mirror.csclub.uwaterloo.ca")
|
||||||
|
|
||||||
|
m = re.search(r'(\d+ hours)|(\d+(\.)?\d+ days)', page[indexOfFile:]) # solution from: https://stackoverflow.com/questions/21074100/how-to-convert-standard-timedelta-string-to-timedelta-object/21074460
|
||||||
|
|
||||||
|
duration = pd.to_timedelta(m.group(0))
|
||||||
|
|
||||||
|
return duration <= pd.to_timedelta(data[project]["out_of_sync_interval"], unit='s')
|
43
test.py
43
test.py
|
@ -6,11 +6,14 @@ from datetime import datetime
|
||||||
from datetime import timedelta
|
from datetime import timedelta
|
||||||
import time
|
import time
|
||||||
import pandas as pd
|
import pandas as pd
|
||||||
|
|
||||||
|
# this function is brute force looping through the whole directory and checking dates
|
||||||
|
# it may sound horrible, but for certain distros, i believe it's indeed the best solution
|
||||||
|
|
||||||
# lists
|
# lists
|
||||||
urls=[]
|
urls=[]
|
||||||
|
|
||||||
home_site = "http://ports.ubuntu.com"
|
home_site = "http://ykf.ca.distfiles.macports.org"
|
||||||
|
|
||||||
# function created
|
# function created
|
||||||
def scrape(site):
|
def scrape(site):
|
||||||
|
@ -25,8 +28,8 @@ def scrape(site):
|
||||||
href = i.attrs['href']
|
href = i.attrs['href']
|
||||||
|
|
||||||
if href.endswith("/") and href != "../" and href != "/":
|
if href.endswith("/") and href != "../" and href != "/":
|
||||||
if home_site+href in urls: # avoids the link to parent directory
|
"""if home_site+href in urls: # avoids the link to parent directory
|
||||||
continue
|
continue"""
|
||||||
site_next = site+href
|
site_next = site+href
|
||||||
|
|
||||||
if site_next not in urls:
|
if site_next not in urls:
|
||||||
|
@ -38,7 +41,7 @@ def scrape(site):
|
||||||
def get_latest_date(web_dir):
|
def get_latest_date(web_dir):
|
||||||
page = requests.get(site).text
|
page = requests.get(site).text
|
||||||
|
|
||||||
str_dates = re.findall(r'\d{4}-\d{2}-\d{2} \d{2}:\d{2}', page)
|
str_dates = re.findall(r'(\d{2}-\w{3}-\d{4} \d{2}:\d{2})|(\d{4}-\d{2}-\d{2} \d{2}:\d{2})', page)
|
||||||
dates = [list(datefinder.find_dates(date))[0] for date in str_dates]
|
dates = [list(datefinder.find_dates(date))[0] for date in str_dates]
|
||||||
|
|
||||||
# for date in dates:
|
# for date in dates:
|
||||||
|
@ -50,27 +53,31 @@ def get_latest_date(web_dir):
|
||||||
if __name__ =="__main__":
|
if __name__ =="__main__":
|
||||||
|
|
||||||
# website to be scrape
|
# website to be scrape
|
||||||
# site="http://ports.ubuntu.com/ubuntu-ports/"
|
#site="http://ykf.ca.distfiles.macports.org/MacPorts/mpdistfiles/"
|
||||||
# works on: https://www.x.org/releases/
|
# works on: https://www.x.org/releases/
|
||||||
|
# https://mirror.csclub.uwaterloo.ca/linuxmint/ #works wonders for linuxmint
|
||||||
|
# unfortunately, linuxmint does not have a public repo, the worldwide mirror LayerOnline on https://linuxmint.com/mirrors.php seems like the best choice
|
||||||
|
|
||||||
# calling function
|
# calling function
|
||||||
# scrape(site)
|
#scrape(site)
|
||||||
|
|
||||||
# latest_date = get_latest_date(urls[0])
|
#latest_date = get_latest_date(urls[0])
|
||||||
# get_latest_date(urls[0])
|
# get_latest_date(urls[0])
|
||||||
# for dir in urls:
|
#for dir in urls:
|
||||||
# latest_date2 = get_latest_date(dir)
|
# latest_date2 = get_latest_date(dir)
|
||||||
# if (latest_date2 >= latest_date):
|
# if (latest_date2 >= latest_date):
|
||||||
# latest_date = latest_date2
|
# latest_date = latest_date2
|
||||||
|
|
||||||
# print(latest_date)
|
#print(latest_date)
|
||||||
|
|
||||||
page = requests.get("https://repo.manjaro.org/").text
|
page = requests.get("http://rsync-mxlinux.org/mirmon/index.html").text
|
||||||
indexOfFile = page.find("mirror.csclub.uwaterloo.ca/manjaro")
|
indexOfFile = page.find("mirror.csclub.uwaterloo.ca")
|
||||||
|
|
||||||
m = re.search(r'(?P<hours>\d+):(?P<minutes>\d+)', page[indexOfFile:]) # solution from: https://stackoverflow.com/questions/21074100/how-to-convert-standard-timedelta-string-to-timedelta-object/21074460
|
m = re.search(r'(\d+ hours)|(\d+(\.)?\d+ days)', page[indexOfFile:]) # solution from: https://stackoverflow.com/questions/21074100/how-to-convert-standard-timedelta-string-to-timedelta-object/21074460
|
||||||
duration = timedelta(**{key: float(val) for key, val in m.groupdict().items()})
|
|
||||||
print(duration)
|
duration = pd.to_timedelta(m.group(0))
|
||||||
|
|
||||||
print (duration <= pd.to_timedelta(86400, unit='s'))
|
print (duration <= pd.to_timedelta(86400, unit='s'))
|
||||||
|
|
||||||
|
# https://launchpad.net/ubuntu/+mirror/mirror.csclub.uwaterloo.ca-archive
|
||||||
|
|
Loading…
Reference in New Issue