3 changed files with 35 additions and 45 deletions
--- a/data.json
+++ b/data.json
@ -174,7 +174,7 @@
    "out_of_sync_interval": 172800
  },
  "KDE": {
-    "out_of_sync_since": 1659116720,
+    "out_of_sync_since": null,
    "out_of_sync_interval": 86400,
    "csc": "kde/",
    "upstream": "https://kde.c3sl.ufpr.br/",
@ -209,7 +209,7 @@
    "file": "dists/"
  },
  "macports": {
-    "out_of_sync_since": 1642827723,
+    "out_of_sync_since": null,
    "out_of_sync_interval": 86400,
    "csc": "MacPorts/mpdistfiles/",
    "upstream": "https://distfiles.macports.org/",
@ -321,7 +321,7 @@
    "file": "dists/"
  },
  "raspbian": {
-    "out_of_sync_since": 1659116721,
+    "out_of_sync_since": null,
    "out_of_sync_interval": 86400,
    "csc": "raspbian/",
    "upstream": "http://archive.raspbian.org/",
@ -374,10 +374,10 @@
    "file": ""
  },
  "ubuntu_ports": {
-    "out_of_sync_since": 1651550528,
+    "out_of_sync_since": null,
    "out_of_sync_interval": 86400,
    "csc": "ubuntu-ports/",
-    "upstream": "http://ports.ubuntu.com/ubuntu-ports/",
+    "upstream": "https://launchpad.net/ubuntu/+mirror/mirror.csclub.uwaterloo.ca-archive",
    "file": "dists/"
  },
  "ubuntu_ports_releases": {
--- a/projects/macports.py
+++ b/projects/macports.py
@ -5,6 +5,7 @@ from project import Project
 from shared import CSC_MIRROR

 import datefinder  # another date finding library
+from datetime import timedelta

 class macports(Project):
    """macports class"""
@ -13,18 +14,20 @@ class macports(Project):
    def checker(directory_URL, file_name):
        page = requests.get(directory_URL).text
        file_index = page.find(file_name)
-        # print(page)
+        end_index = page[file_index:].find("</tr>") + file_index
+
+        # The CSC mirror does not use tr tags, so end_index will be set to the end of the file
+        if end_index == (file_index - 1):
+            end_index = len(page) - 1

        # remove stray numbers (file size numbers in particular) that might interfere with date finding
-        segment_clean = re.sub(r'\s\d+\s', ' ', page[file_index:])  # removes numbers for size
-        segment_clean = re.sub(r'\s\d+\w*\s', ' ', page[file_index:])  # removes numbers + size unit. e.x. 50kb
-        # print(segment_clean)
+        segment_clean = re.sub(r'\s\d+\s', ' ', page[file_index:end_index])  # removes numbers for size
+        segment_clean = re.sub(r'\s\d+\w*\s', ' ', page[file_index:end_index])  # removes numbers + size unit. e.x. 50kb

        # finds the dates in the segment after the file name
        # notes: a generator will be returned by the datefinder module. I'm typecasting it to a list. Please read the note of caution provided at the bottom.
        matches = list(datefinder.find_dates(segment_clean))

-        # print(matches[0])
        return matches[0]
    
    @classmethod
@ -34,4 +37,14 @@ class macports(Project):
        upstream_url = data[project]["upstream"]
        file_name = data[project]["file"]
        
-        return cls.checker(csc_url, file_name) == cls.checker(upstream_url, file_name)
+        upstreamDate = cls.checker(csc_url, file_name)
+        downstreamDate = cls.checker(upstream_url, file_name)
+
+        if (upstreamDate < downstreamDate):
+            timeDiff = downstreamDate - upstreamDate
+        else:
+            timeDiff = upstreamDate - downstreamDate
+
+        # MacPorts are updated so often that we want to make sure we are 
+        #   at most 6 hours out of date
+        return timeDiff < timedelta(hours=6)
--- a/projects/ubuntu_ports.py
+++ b/projects/ubuntu_ports.py
@ -1,39 +1,16 @@
-from bs4 import BeautifulSoup
-import requests
+"""
+Contains ubuntu class
+"""
+
+import os
 from project import Project
 from shared import CSC_MIRROR
+from shared import NUM_UBUNTU_RELEASES
+import requests

 class ubuntu_ports(Project):
    """ubuntu_ports class"""
-
    @staticmethod
-    def scrape(site1, site2):
-        # getting the request from url
-        r1 = requests.get(site1)
-        r2 = requests.get(site2)
-        
-        # converting the text
-        s1 = BeautifulSoup(r1.text,"html.parser")
-        s2 = BeautifulSoup(r2.text,"html.parser")
-
-        hrefs1 = [i.attrs['href'] for i in s1.find_all("a")]
-        hrefs2 = [i.attrs['href'] for i in s2.find_all("a")]
-
-        for href in hrefs1: # for a href directories
-            if href.endswith("/") and href != "../" and href != "/" and not href.startswith("/"):
-                # print(href)
-                if href not in hrefs2:
-                    return False
-                elif requests.get(site1+href+"Release").text != requests.get(site2+href+"Release").text:
-                    return False
-        return True
-
-    @classmethod
-    def check(cls, data, project, current_time):
-        """Check if project packages are up-to-date"""
-        
-        csc_url = CSC_MIRROR + data[project]["csc"] + data[project]["file"]
-        upstream_url = data[project]["upstream"] + data[project]["file"]
-        
-        # calling function
-        return cls.scrape(upstream_url, csc_url)
+    def check(data, project, current_time):
+        page = requests.get(data[project]["upstream"]).text
+        return page.count("Up to date") == NUM_UBUNTU_RELEASES