diff --git a/data.json b/data.json index 99102d8..f464d33 100644 --- a/data.json +++ b/data.json @@ -174,7 +174,7 @@ "out_of_sync_interval": 172800 }, "KDE": { - "out_of_sync_since": 1659116720, + "out_of_sync_since": null, "out_of_sync_interval": 86400, "csc": "kde/", "upstream": "https://kde.c3sl.ufpr.br/", @@ -209,7 +209,7 @@ "file": "dists/" }, "macports": { - "out_of_sync_since": 1642827723, + "out_of_sync_since": null, "out_of_sync_interval": 86400, "csc": "MacPorts/mpdistfiles/", "upstream": "https://distfiles.macports.org/", @@ -321,7 +321,7 @@ "file": "dists/" }, "raspbian": { - "out_of_sync_since": 1659116721, + "out_of_sync_since": null, "out_of_sync_interval": 86400, "csc": "raspbian/", "upstream": "http://archive.raspbian.org/", @@ -374,10 +374,10 @@ "file": "" }, "ubuntu_ports": { - "out_of_sync_since": 1651550528, + "out_of_sync_since": null, "out_of_sync_interval": 86400, "csc": "ubuntu-ports/", - "upstream": "http://ports.ubuntu.com/ubuntu-ports/", + "upstream": "https://launchpad.net/ubuntu/+mirror/mirror.csclub.uwaterloo.ca-archive", "file": "dists/" }, "ubuntu_ports_releases": { @@ -422,4 +422,4 @@ "upstream": "https://cdimage.ubuntu.com/xubuntu/releases/", "file": "" } -} \ No newline at end of file +} diff --git a/projects/macports.py b/projects/macports.py index 4bd3184..a6bc5aa 100644 --- a/projects/macports.py +++ b/projects/macports.py @@ -5,6 +5,7 @@ from project import Project from shared import CSC_MIRROR import datefinder # another date finding library +from datetime import timedelta class macports(Project): """macports class""" @@ -13,18 +14,20 @@ class macports(Project): def checker(directory_URL, file_name): page = requests.get(directory_URL).text file_index = page.find(file_name) - # print(page) + end_index = page[file_index:].find("") + file_index + + # The CSC mirror does not use tr tags, so end_index will be set to the end of the file + if end_index == (file_index - 1): + end_index = len(page) - 1 # remove stray numbers (file size numbers in particular) that might interfere with date finding - segment_clean = re.sub(r'\s\d+\s', ' ', page[file_index:]) # removes numbers for size - segment_clean = re.sub(r'\s\d+\w*\s', ' ', page[file_index:]) # removes numbers + size unit. e.x. 50kb - # print(segment_clean) + segment_clean = re.sub(r'\s\d+\s', ' ', page[file_index:end_index]) # removes numbers for size + segment_clean = re.sub(r'\s\d+\w*\s', ' ', page[file_index:end_index]) # removes numbers + size unit. e.x. 50kb # finds the dates in the segment after the file name # notes: a generator will be returned by the datefinder module. I'm typecasting it to a list. Please read the note of caution provided at the bottom. matches = list(datefinder.find_dates(segment_clean)) - # print(matches[0]) return matches[0] @classmethod @@ -34,4 +37,14 @@ class macports(Project): upstream_url = data[project]["upstream"] file_name = data[project]["file"] - return cls.checker(csc_url, file_name) == cls.checker(upstream_url, file_name) + upstreamDate = cls.checker(csc_url, file_name) + downstreamDate = cls.checker(upstream_url, file_name) + + if (upstreamDate < downstreamDate): + timeDiff = downstreamDate - upstreamDate + else: + timeDiff = upstreamDate - downstreamDate + + # MacPorts are updated so often that we want to make sure we are + # at most 6 hours out of date + return timeDiff < timedelta(hours=6) diff --git a/projects/ubuntu_ports.py b/projects/ubuntu_ports.py index fcbaef5..2eade49 100644 --- a/projects/ubuntu_ports.py +++ b/projects/ubuntu_ports.py @@ -1,39 +1,16 @@ -from bs4 import BeautifulSoup -import requests +""" +Contains ubuntu class +""" + +import os from project import Project from shared import CSC_MIRROR +from shared import NUM_UBUNTU_RELEASES +import requests class ubuntu_ports(Project): """ubuntu_ports class""" - @staticmethod - def scrape(site1, site2): - # getting the request from url - r1 = requests.get(site1) - r2 = requests.get(site2) - - # converting the text - s1 = BeautifulSoup(r1.text,"html.parser") - s2 = BeautifulSoup(r2.text,"html.parser") - - hrefs1 = [i.attrs['href'] for i in s1.find_all("a")] - hrefs2 = [i.attrs['href'] for i in s2.find_all("a")] - - for href in hrefs1: # for a href directories - if href.endswith("/") and href != "../" and href != "/" and not href.startswith("/"): - # print(href) - if href not in hrefs2: - return False - elif requests.get(site1+href+"Release").text != requests.get(site2+href+"Release").text: - return False - return True - - @classmethod - def check(cls, data, project, current_time): - """Check if project packages are up-to-date""" - - csc_url = CSC_MIRROR + data[project]["csc"] + data[project]["file"] - upstream_url = data[project]["upstream"] + data[project]["file"] - - # calling function - return cls.scrape(upstream_url, csc_url) + def check(data, project, current_time): + page = requests.get(data[project]["upstream"]).text + return page.count("Up to date") == NUM_UBUNTU_RELEASES