changed linuxmint, ubuntu_ports_releases, xubuntu_releases

2021-10-17 22:23:55 -07:00 · 2021-10-17 22:23:55 -07:00 · e8265a2802
parent 8244ba0cfd
commit e8265a2802
6 changed files with 140 additions and 177 deletions
--- a/data.json
+++ b/data.json
@ -275,7 +275,7 @@
 		"file": "lastsync"
 	},
 	"pkgsrc": {
-		"out_of_sync_since": 1634524215,
+		"out_of_sync_since": null,
 		"out_of_sync_interval": 86400,
 		"csc": "pkgsrc/",
 		"upstream": "http://ftp.netbsd.org/pub/pkgsrc/",
@ -345,7 +345,7 @@
 		"file": "tdf/TIMESTAMP"
 	},
 	"trisquel": {
-		"out_of_sync_since": 1634524215,
+		"out_of_sync_since": null,
 		"out_of_sync_interval": 86400,
 		"csc": "trisquel/",
 		"upstream": "http://rsync.trisquel.info/trisquel/dists/",
--- a/main.py
+++ b/main.py
@ -27,7 +27,7 @@ if __name__ == "__main__":
 					print(f"Failure: {project} does not exist")
 					continue
 				project_class = getattr(sys.modules[__name__], project)
-				if project == "CPAN" or project == "ubuntu" or project == "ubuntu_releases" or project == "manjaro" or project == "mxlinux" or project == "cran" or project == "ctan" or project == "gentooportage":
+				if project in ["CPAN", "ubuntu", "ubuntu_releases", "manjaro", "mxlinux", "cran", "ctan", "gentooportage"]:
 					checker_result = project_class.check(data, project, current_time)
 					if checker_result:
 						print(f"Success: {project} up-to-date")
--- a/projects/linuxmint.py
+++ b/projects/linuxmint.py
@ -1,26 +1,40 @@
 from bs4 import BeautifulSoup
 import requests
-import datefinder  # another date finding library
 import re
-from datetime import datetime
-from datetime import timedelta
-import time
-import pandas as pd
+import datefinder  # another date finding library
 from project import Project
 from shared import CSC_MIRROR

-# this function is brute force looping through the whole directory and checking dates
-# it may sound horrible, but for certain distros, i believe it's indeed the best solution
-
-# lists
-urls=[]
-
 class linuxmint(Project):
    """linuxmint class"""
+    @staticmethod
+    def checker(directory_URL, file_name):
+        page = requests.get(directory_URL).text
+        file_index = page.find(file_name)
+        # print(page)
+
+        if file_index == -1:
+            return False
+
+        str_dates = re.findall(r'(\d{2}-\w{3}-\d{4} \d{2}:\d{2})|(\d{4}-\d{2}-\d{2} \d{2}:\d{2})', page[file_index:])
+
+        return list(datefinder.find_dates("".join(str_dates[0])))[0]
+
    @classmethod
-    def scrape(cls, urls, site):
+    def scrape(cls, compare, folders, site1, site2, directory):
+        if cls.checker(site1+directory, "sha256sum.txt") != False:
+            # print (site1+directory)
+            # print (cls.checker(site1+directory, "sha256sum.txt"))
+            if cls.checker(site2+directory, "sha256sum.txt") != False:
+                # print (site2+directory)
+                # print (cls.checker(site2+directory, "sha256sum.txt"))
+                compare.append(cls.checker(site1+directory, "sha256sum.txt") <= cls.checker(site2+directory, "sha256sum.txt"))
+                return
+            compare.append(False)
+            return
+
        # getting the request from url
-        r = requests.get(site)
+        r = requests.get(site1 + directory)

        # converting the text
        s = BeautifulSoup(r.text,"html.parser")
@ -28,58 +42,25 @@ class linuxmint(Project):
        for i in s.find_all("a"): # for a href directories
            href = i.attrs['href']
            
-            if href.endswith("/") and href != "../" and href != "/":
-                site_next = site+href
-
-                if site_next not in  urls:
-                    urls.append(site_next) 
-                    # print(site_next)
-                    # calling it self
-                    cls.scrape(urls, site_next)
-    
-    @staticmethod
-    def get_latest_date(web_dir):
-        page = requests.get(web_dir).text
-
-        str_dates = re.findall(r'(\d{2}-\w{3}-\d{4} \d{2}:\d{2})', page)
-        # if you want to match 1+ patterns, like r'(\d{2}-\w{3}-\d{4} \d{2}:\d{2})|(\d{4}-\d{2}-\d{2} \d{2}:\d{2})', note that findall will return a tuple of two groups!!!
-        # print(str_dates[0])
-        if len(str_dates) == 0:
-            return datetime(1000, 1, 1) # return ridiculously old date to discard this entry, since it has no dates
-        # for date in str_dates:
-        #     print(date)
-        dates = [list(datefinder.find_dates(date))[0] for date in str_dates]
-
-        # for date in dates:
-        #     print(date)
-        return(max(dates))
-    
-    @classmethod
-    def max_date(cls, urls):
-        latest_date = cls.get_latest_date(urls[0])
-        # get_latest_date(urls[0])
-        for dir in urls:
-            latest_date2 = cls.get_latest_date(dir)
-            if (latest_date2 >= latest_date):
-                latest_date = latest_date2
-        # print(latest_date)
-        return latest_date
-
+            if href.endswith("/") and href != "../" and href != "/" and not href.startswith("/"):
+                dir_next = directory+href
+                # print(dir_next)
+                # calling it self
+                if dir_next not in folders:
+                    folders.append(dir_next)
+                    cls.scrape(compare, folders, site1, site2, dir_next)

    @classmethod
    def check(cls, data, project):
        """Check if project packages are up-to-date"""
        # lists
-        urls1=[]
-        urls2=[]
+        compare=[]
+        folders=[]

        csc_url = CSC_MIRROR + data[project]["csc"] + data[project]["file"]
        upstream_url = data[project]["upstream"] + data[project]["file"]

        # calling function
-        cls.scrape(urls1, csc_url)
-        cls.scrape(urls2, upstream_url)
+        cls.scrape(compare, folders, upstream_url, csc_url, "")

-        # print(len(urls1), len(urls2))
-
-        return cls.max_date(urls1) == cls.max_date(urls2)
+        return all(compare)
--- a/projects/ubuntu_ports_releases.py
+++ b/projects/ubuntu_ports_releases.py
@ -1,26 +1,50 @@
 from bs4 import BeautifulSoup
 import requests
-import datefinder  # another date finding library
 import re
-from datetime import datetime
-from datetime import timedelta
-import time
-import pandas as pd
+import datefinder  # another date finding library
 from project import Project
 from shared import CSC_MIRROR

-# this function is brute force looping through the whole directory and checking dates
-# it may sound horrible, but for certain distros, i believe it's indeed the best solution
-
-# lists
-urls=[]
-
 class ubuntu_ports_releases(Project):
    """ubuntu_ports_releases class"""
+    @staticmethod
+    def checker(directory_URL, file_name):
+        page = requests.get(directory_URL).text
+        file_index = page.find(file_name)
+        # print(page)
+
+        if file_index == -1:
+            return False
+
+        str_dates = re.findall(r'(\d{2}-\w{3}-\d{4} \d{2}:\d{2})|(\d{4}-\d{2}-\d{2} \d{2}:\d{2})', page[file_index:])
+
+        return list(datefinder.find_dates("".join(str_dates[0])))[0]
+
    @classmethod
-    def scrape(cls, urls, site):
+    def scrape(cls, compare, folders, site1, site2, directory):
+        if cls.checker(site1+directory, "MD5SUMS") != False:
+            # print (site1+directory)
+            # print (cls.checker(site1+directory, "MD5SUMS"))
+            if cls.checker(site2+directory, "MD5SUMS") != False:
+                # print (site2+directory)
+                # print (cls.checker(site2+directory, "MD5SUMS"))
+                compare.append(cls.checker(site1+directory, "MD5SUMS") <= cls.checker(site2+directory, "MD5SUMS"))
+                return
+            compare.append(False)
+            return
+        elif cls.checker(site1+directory, "SHA256SUMS") != False:
+            # print (site1+directory)
+            # print (cls.checker(site1+directory, "SHA256SUMS"))
+            if cls.checker(site2+directory, "SHA256SUMS") != False:
+                # print (site2+directory)
+                # print (cls.checker(site2+directory, "SHA256SUMS"))
+                compare.append(cls.checker(site1+directory, "SHA256SUMS") <= cls.checker(site2+directory, "SHA256SUMS"))
+                return
+            compare.append(False)
+            return
+
        # getting the request from url
-        r = requests.get(site)
+        r = requests.get(site1 + directory)

        # converting the text
        s = BeautifulSoup(r.text,"html.parser")
@ -29,57 +53,24 @@ class ubuntu_ports_releases(Project):
            href = i.attrs['href']
            
            if href.endswith("/") and href != "../" and href != "/" and not href.startswith("/") and not href.startswith("http://"):
-                site_next = site+href
-
-                if site_next not in  urls:
-                    urls.append(site_next) 
-                    # print(site_next)
-                    # calling it self
-                    cls.scrape(urls, site_next)
-    
-    @staticmethod
-    def get_latest_date(web_dir):
-        page = requests.get(web_dir).text
-
-        str_dates = re.findall(r'(\d{2}-\w{3}-\d{4} \d{2}:\d{2})|(\d{4}-\d{2}-\d{2} \d{2}:\d{2})', page)
-        # if you want to match 1+ patterns, like r'(\d{2}-\w{3}-\d{4} \d{2}:\d{2})|(\d{4}-\d{2}-\d{2} \d{2}:\d{2})', note that findall will return a tuple of two groups!!!
-        # print(str_dates[0])
-        if len(str_dates) == 0:
-            return datetime(1000, 1, 1) # return ridiculously old date to discard this entry, since it has no dates
-        # for date in str_dates:
-        #     print(date)
-        dates = [list(datefinder.find_dates("".join(date)))[0] for date in str_dates]
-
-        # for date in dates:
-        #     print(date)
-        return(max(dates))
-    
-    @classmethod
-    def max_date(cls, urls):
-        latest_date = cls.get_latest_date(urls[0])
-        # get_latest_date(urls[0])
-        for dir in urls:
-            latest_date2 = cls.get_latest_date(dir)
-            if (latest_date2 >= latest_date):
-                latest_date = latest_date2
-        # print(latest_date)
-        return latest_date
-
+                dir_next = directory+href
+                # print(dir_next)
+                # calling it self
+                if dir_next not in folders:
+                    folders.append(dir_next)
+                    cls.scrape(compare, folders, site1, site2, dir_next)

    @classmethod
    def check(cls, data, project):
        """Check if project packages are up-to-date"""
        # lists
-        urls1=[]
-        urls2=[]
+        compare=[]
+        folders=[]

        csc_url = CSC_MIRROR + data[project]["csc"] + data[project]["file"]
        upstream_url = data[project]["upstream"] + data[project]["file"]

        # calling function
-        cls.scrape(urls1, csc_url)
-        cls.scrape(urls2, upstream_url)
+        cls.scrape(compare, folders, upstream_url, csc_url, "")

-        # print(len(urls1), len(urls2))
-
-        return cls.max_date(urls1) == cls.max_date(urls2)
+        return all(compare)
--- a/projects/xubuntu_releases.py
+++ b/projects/xubuntu_releases.py
@ -1,26 +1,50 @@
 from bs4 import BeautifulSoup
 import requests
-import datefinder  # another date finding library
 import re
-from datetime import datetime
-from datetime import timedelta
-import time
-import pandas as pd
+import datefinder  # another date finding library
 from project import Project
 from shared import CSC_MIRROR

-# this function is brute force looping through the whole directory and checking dates
-# it may sound horrible, but for certain distros, i believe it's indeed the best solution
-
-# lists
-urls=[]
-
 class xubuntu_releases(Project):
    """xubuntu_releases class"""
+    @staticmethod
+    def checker(directory_URL, file_name):
+        page = requests.get(directory_URL).text
+        file_index = page.find(file_name)
+        # print(page)
+
+        if file_index == -1:
+            return False
+
+        str_dates = re.findall(r'(\d{2}-\w{3}-\d{4} \d{2}:\d{2})|(\d{4}-\d{2}-\d{2} \d{2}:\d{2})', page[file_index:])
+
+        return list(datefinder.find_dates("".join(str_dates[0])))[0]
+
    @classmethod
-    def scrape(cls, urls, site):
+    def scrape(cls, compare, folders, site1, site2, directory):
+        if cls.checker(site1+directory, "MD5SUMS") != False:
+            # print (site1+directory)
+            # print (cls.checker(site1+directory, "MD5SUMS"))
+            if cls.checker(site2+directory, "MD5SUMS") != False:
+                # print (site2+directory)
+                # print (cls.checker(site2+directory, "MD5SUMS"))
+                compare.append(cls.checker(site1+directory, "MD5SUMS") <= cls.checker(site2+directory, "MD5SUMS"))
+                return
+            compare.append(False)
+            return
+        elif cls.checker(site1+directory, "SHA256SUMS") != False:
+            # print (site1+directory)
+            # print (cls.checker(site1+directory, "SHA256SUMS"))
+            if cls.checker(site2+directory, "SHA256SUMS") != False:
+                # print (site2+directory)
+                # print (cls.checker(site2+directory, "SHA256SUMS"))
+                compare.append(cls.checker(site1+directory, "SHA256SUMS") <= cls.checker(site2+directory, "SHA256SUMS"))
+                return
+            compare.append(False)
+            return
+
        # getting the request from url
-        r = requests.get(site)
+        r = requests.get(site1 + directory)

        # converting the text
        s = BeautifulSoup(r.text,"html.parser")
@ -29,57 +53,24 @@ class xubuntu_releases(Project):
            href = i.attrs['href']
            
            if href.endswith("/") and href != "../" and href != "/" and not href.startswith("/") and not href.startswith("http://"):
-                site_next = site+href
-
-                if site_next not in  urls:
-                    urls.append(site_next) 
-                    # print(site_next)
-                    # calling it self
-                    cls.scrape(urls, site_next)
-    
-    @staticmethod
-    def get_latest_date(web_dir):
-        page = requests.get(web_dir).text
-
-        str_dates = re.findall(r'(\d{2}-\w{3}-\d{4} \d{2}:\d{2})|(\d{4}-\d{2}-\d{2} \d{2}:\d{2})', page)
-        # if you want to match 1+ patterns, like r'(\d{2}-\w{3}-\d{4} \d{2}:\d{2})|(\d{4}-\d{2}-\d{2} \d{2}:\d{2})', note that findall will return a tuple of two groups!!!
-        # print(str_dates[0])
-        if len(str_dates) == 0:
-            return datetime(1000, 1, 1) # return ridiculously old date to discard this entry, since it has no dates
-        # for date in str_dates:
-        #     print(date)
-        dates = [list(datefinder.find_dates("".join(date)))[0] for date in str_dates]
-
-        # for date in dates:
-        #     print(date)
-        return(max(dates))
-    
-    @classmethod
-    def max_date(cls, urls):
-        latest_date = cls.get_latest_date(urls[0])
-        # get_latest_date(urls[0])
-        for dir in urls:
-            latest_date2 = cls.get_latest_date(dir)
-            if (latest_date2 >= latest_date):
-                latest_date = latest_date2
-        # print(latest_date)
-        return latest_date
-
+                dir_next = directory+href
+                # print(dir_next)
+                # calling it self
+                if dir_next not in folders:
+                    folders.append(dir_next)
+                    cls.scrape(compare, folders, site1, site2, dir_next)

    @classmethod
    def check(cls, data, project):
        """Check if project packages are up-to-date"""
        # lists
-        urls1=[]
-        urls2=[]
+        compare=[]
+        folders=[]

        csc_url = CSC_MIRROR + data[project]["csc"] + data[project]["file"]
        upstream_url = data[project]["upstream"] + data[project]["file"]

        # calling function
-        cls.scrape(urls1, csc_url)
-        cls.scrape(urls2, upstream_url)
+        cls.scrape(compare, folders, upstream_url, csc_url, "")

-        # print(len(urls1), len(urls2))
-
-        return cls.max_date(urls1) == cls.max_date(urls2)
+        return all(compare)
--- a/test.py
+++ b/test.py
@ -2,12 +2,12 @@
 Test Client for individual classes in projects
 """

-from projects import mxlinux_iso
+from projects import xubuntu_releases
 import json  # import json to read project info stored in json file
   
 # main function
 if __name__ =="__main__":
    with open("data.json", "r", encoding="utf-8") as file:
        data = json.load(file)
-        print(mxlinux_iso.check(data, "mxlinux_iso"))
+        print(xubuntu_releases.check(data, "xubuntu_releases"))