Compare commits

..

3 Commits

Author SHA1 Message Date
Justin Toft 365d5dcbac
update ubuntu_ports url to reflect canonical's guide 2022-08-04 19:11:52 -04:00
Justin Toft 9cecd76309
Add looser time check for macports 2022-08-04 19:07:10 -04:00
Justin Toft 7e77546c3e
Fixed 3 more broken mirror checkers 2022-08-04 17:53:52 -04:00
3 changed files with 35 additions and 45 deletions

View File

@ -174,7 +174,7 @@
"out_of_sync_interval": 172800 "out_of_sync_interval": 172800
}, },
"KDE": { "KDE": {
"out_of_sync_since": 1659116720, "out_of_sync_since": null,
"out_of_sync_interval": 86400, "out_of_sync_interval": 86400,
"csc": "kde/", "csc": "kde/",
"upstream": "https://kde.c3sl.ufpr.br/", "upstream": "https://kde.c3sl.ufpr.br/",
@ -209,7 +209,7 @@
"file": "dists/" "file": "dists/"
}, },
"macports": { "macports": {
"out_of_sync_since": 1642827723, "out_of_sync_since": null,
"out_of_sync_interval": 86400, "out_of_sync_interval": 86400,
"csc": "MacPorts/mpdistfiles/", "csc": "MacPorts/mpdistfiles/",
"upstream": "https://distfiles.macports.org/", "upstream": "https://distfiles.macports.org/",
@ -321,7 +321,7 @@
"file": "dists/" "file": "dists/"
}, },
"raspbian": { "raspbian": {
"out_of_sync_since": 1659116721, "out_of_sync_since": null,
"out_of_sync_interval": 86400, "out_of_sync_interval": 86400,
"csc": "raspbian/", "csc": "raspbian/",
"upstream": "http://archive.raspbian.org/", "upstream": "http://archive.raspbian.org/",
@ -374,10 +374,10 @@
"file": "" "file": ""
}, },
"ubuntu_ports": { "ubuntu_ports": {
"out_of_sync_since": 1651550528, "out_of_sync_since": null,
"out_of_sync_interval": 86400, "out_of_sync_interval": 86400,
"csc": "ubuntu-ports/", "csc": "ubuntu-ports/",
"upstream": "http://ports.ubuntu.com/ubuntu-ports/", "upstream": "https://launchpad.net/ubuntu/+mirror/mirror.csclub.uwaterloo.ca-archive",
"file": "dists/" "file": "dists/"
}, },
"ubuntu_ports_releases": { "ubuntu_ports_releases": {
@ -422,4 +422,4 @@
"upstream": "https://cdimage.ubuntu.com/xubuntu/releases/", "upstream": "https://cdimage.ubuntu.com/xubuntu/releases/",
"file": "" "file": ""
} }
} }

View File

@ -5,6 +5,7 @@ from project import Project
from shared import CSC_MIRROR from shared import CSC_MIRROR
import datefinder # another date finding library import datefinder # another date finding library
from datetime import timedelta
class macports(Project): class macports(Project):
"""macports class""" """macports class"""
@ -13,18 +14,20 @@ class macports(Project):
def checker(directory_URL, file_name): def checker(directory_URL, file_name):
page = requests.get(directory_URL).text page = requests.get(directory_URL).text
file_index = page.find(file_name) file_index = page.find(file_name)
# print(page) end_index = page[file_index:].find("</tr>") + file_index
# The CSC mirror does not use tr tags, so end_index will be set to the end of the file
if end_index == (file_index - 1):
end_index = len(page) - 1
# remove stray numbers (file size numbers in particular) that might interfere with date finding # remove stray numbers (file size numbers in particular) that might interfere with date finding
segment_clean = re.sub(r'\s\d+\s', ' ', page[file_index:]) # removes numbers for size segment_clean = re.sub(r'\s\d+\s', ' ', page[file_index:end_index]) # removes numbers for size
segment_clean = re.sub(r'\s\d+\w*\s', ' ', page[file_index:]) # removes numbers + size unit. e.x. 50kb segment_clean = re.sub(r'\s\d+\w*\s', ' ', page[file_index:end_index]) # removes numbers + size unit. e.x. 50kb
# print(segment_clean)
# finds the dates in the segment after the file name # finds the dates in the segment after the file name
# notes: a generator will be returned by the datefinder module. I'm typecasting it to a list. Please read the note of caution provided at the bottom. # notes: a generator will be returned by the datefinder module. I'm typecasting it to a list. Please read the note of caution provided at the bottom.
matches = list(datefinder.find_dates(segment_clean)) matches = list(datefinder.find_dates(segment_clean))
# print(matches[0])
return matches[0] return matches[0]
@classmethod @classmethod
@ -34,4 +37,14 @@ class macports(Project):
upstream_url = data[project]["upstream"] upstream_url = data[project]["upstream"]
file_name = data[project]["file"] file_name = data[project]["file"]
return cls.checker(csc_url, file_name) == cls.checker(upstream_url, file_name) upstreamDate = cls.checker(csc_url, file_name)
downstreamDate = cls.checker(upstream_url, file_name)
if (upstreamDate < downstreamDate):
timeDiff = downstreamDate - upstreamDate
else:
timeDiff = upstreamDate - downstreamDate
# MacPorts are updated so often that we want to make sure we are
# at most 6 hours out of date
return timeDiff < timedelta(hours=6)

View File

@ -1,39 +1,16 @@
from bs4 import BeautifulSoup """
import requests Contains ubuntu class
"""
import os
from project import Project from project import Project
from shared import CSC_MIRROR from shared import CSC_MIRROR
from shared import NUM_UBUNTU_RELEASES
import requests
class ubuntu_ports(Project): class ubuntu_ports(Project):
"""ubuntu_ports class""" """ubuntu_ports class"""
@staticmethod @staticmethod
def scrape(site1, site2): def check(data, project, current_time):
# getting the request from url page = requests.get(data[project]["upstream"]).text
r1 = requests.get(site1) return page.count("Up to date") == NUM_UBUNTU_RELEASES
r2 = requests.get(site2)
# converting the text
s1 = BeautifulSoup(r1.text,"html.parser")
s2 = BeautifulSoup(r2.text,"html.parser")
hrefs1 = [i.attrs['href'] for i in s1.find_all("a")]
hrefs2 = [i.attrs['href'] for i in s2.find_all("a")]
for href in hrefs1: # for a href directories
if href.endswith("/") and href != "../" and href != "/" and not href.startswith("/"):
# print(href)
if href not in hrefs2:
return False
elif requests.get(site1+href+"Release").text != requests.get(site2+href+"Release").text:
return False
return True
@classmethod
def check(cls, data, project, current_time):
"""Check if project packages are up-to-date"""
csc_url = CSC_MIRROR + data[project]["csc"] + data[project]["file"]
upstream_url = data[project]["upstream"] + data[project]["file"]
# calling function
return cls.scrape(upstream_url, csc_url)