changed linuxmint, ubuntu_ports_releases, xubuntu_releases

2021-10-17 22:23:55 -07:00 · 2021-10-17 22:23:55 -07:00 · e8265a2802
parent 8244ba0cfd
commit e8265a2802
6 changed files with 140 additions and 177 deletions
--- a/data.json
+++ b/data.json
@ -275,7 +275,7 @@
 		"file": "lastsync"
 	},
 	"pkgsrc": {
-		"out_of_sync_since": 1634524215,
+		"out_of_sync_since": null,
 		"out_of_sync_interval": 86400,
 		"csc": "pkgsrc/",
 		"upstream": "http://ftp.netbsd.org/pub/pkgsrc/",
@ -345,7 +345,7 @@
 		"file": "tdf/TIMESTAMP"
 	},
 	"trisquel": {
-		"out_of_sync_since": 1634524215,
+		"out_of_sync_since": null,
 		"out_of_sync_interval": 86400,
 		"csc": "trisquel/",
 		"upstream": "http://rsync.trisquel.info/trisquel/dists/",
--- a/main.py
+++ b/main.py
@ -27,7 +27,7 @@ if __name__ == "__main__":
 					print(f"Failure: {project} does not exist")
 					continue
 				project_class = getattr(sys.modules[__name__], project)
-				if project == "CPAN" or project == "ubuntu" or project == "ubuntu_releases" or project == "manjaro" or project == "mxlinux" or project == "cran" or project == "ctan" or project == "gentooportage":
+				if project in ["CPAN", "ubuntu", "ubuntu_releases", "manjaro", "mxlinux", "cran", "ctan", "gentooportage"]:
 					checker_result = project_class.check(data, project, current_time)
 					if checker_result:
 						print(f"Success: {project} up-to-date")
--- a/projects/linuxmint.py
+++ b/projects/linuxmint.py
@ -1,85 +1,66 @@
 from bs4 import BeautifulSoup
 import requests
 import datefinder  # another date finding library
 import re
-from datetime import datetime
+import datefinder  # another date finding library
 from datetime import timedelta
 import time
 import pandas as pd
 from project import Project
 from shared import CSC_MIRROR
 # this function is brute force looping through the whole directory and checking dates
 # it may sound horrible, but for certain distros, i believe it's indeed the best solution
 # lists
 urls=[]
 class linuxmint(Project):
    """linuxmint class"""
    @staticmethod
    def checker(directory_URL, file_name):
        page = requests.get(directory_URL).text
        file_index = page.find(file_name)
        # print(page)
        if file_index == -1:
            return False
        str_dates = re.findall(r'(\d{2}-\w{3}-\d{4} \d{2}:\d{2})|(\d{4}-\d{2}-\d{2} \d{2}:\d{2})', page[file_index:])
        return list(datefinder.find_dates("".join(str_dates[0])))[0]
    @classmethod
-    def scrape(cls, urls, site):
+    def scrape(cls, compare, folders, site1, site2, directory):
        if cls.checker(site1+directory, "sha256sum.txt") != False:
            # print (site1+directory)
            # print (cls.checker(site1+directory, "sha256sum.txt"))
            if cls.checker(site2+directory, "sha256sum.txt") != False:
                # print (site2+directory)
                # print (cls.checker(site2+directory, "sha256sum.txt"))
                compare.append(cls.checker(site1+directory, "sha256sum.txt") <= cls.checker(site2+directory, "sha256sum.txt"))
                return
            compare.append(False)
            return
        # getting the request from url
-        r = requests.get(site)
+        r = requests.get(site1 + directory)
-        
+
        # converting the text
        s = BeautifulSoup(r.text,"html.parser")
        for i in s.find_all("a"): # for a href directories
            href = i.attrs['href']
-            if href.endswith("/") and href != "../" and href != "/":
+            if href.endswith("/") and href != "../" and href != "/" and not href.startswith("/"):
-                site_next = site+href
+                dir_next = directory+href
                # print(dir_next)
                # calling it self
                if dir_next not in folders:
                    folders.append(dir_next)
                    cls.scrape(compare, folders, site1, site2, dir_next)
                if site_next not in  urls:
                    urls.append(site_next) 
                    # print(site_next)
                    # calling it self
                    cls.scrape(urls, site_next)
    @staticmethod
    def get_latest_date(web_dir):
        page = requests.get(web_dir).text
        str_dates = re.findall(r'(\d{2}-\w{3}-\d{4} \d{2}:\d{2})', page)
        # if you want to match 1+ patterns, like r'(\d{2}-\w{3}-\d{4} \d{2}:\d{2})|(\d{4}-\d{2}-\d{2} \d{2}:\d{2})', note that findall will return a tuple of two groups!!!
        # print(str_dates[0])
        if len(str_dates) == 0:
            return datetime(1000, 1, 1) # return ridiculously old date to discard this entry, since it has no dates
        # for date in str_dates:
        #     print(date)
        dates = [list(datefinder.find_dates(date))[0] for date in str_dates]
        # for date in dates:
        #     print(date)
        return(max(dates))
    @classmethod
    def max_date(cls, urls):
        latest_date = cls.get_latest_date(urls[0])
        # get_latest_date(urls[0])
        for dir in urls:
            latest_date2 = cls.get_latest_date(dir)
            if (latest_date2 >= latest_date):
                latest_date = latest_date2
        # print(latest_date)
        return latest_date
    @classmethod
    def check(cls, data, project):
        """Check if project packages are up-to-date"""
        # lists
-        urls1=[]
+        compare=[]
-        urls2=[]
+        folders=[]
        csc_url = CSC_MIRROR + data[project]["csc"] + data[project]["file"]
        upstream_url = data[project]["upstream"] + data[project]["file"]
        # calling function
-        cls.scrape(urls1, csc_url)
+        cls.scrape(compare, folders, upstream_url, csc_url, "")
        cls.scrape(urls2, upstream_url)
-        # print(len(urls1), len(urls2))
+        return all(compare)
        return cls.max_date(urls1) == cls.max_date(urls2)
--- a/projects/ubuntu_ports_releases.py
+++ b/projects/ubuntu_ports_releases.py
@ -1,27 +1,51 @@
 from bs4 import BeautifulSoup
 import requests
 import datefinder  # another date finding library
 import re
-from datetime import datetime
+import datefinder  # another date finding library
 from datetime import timedelta
 import time
 import pandas as pd
 from project import Project
 from shared import CSC_MIRROR
 # this function is brute force looping through the whole directory and checking dates
 # it may sound horrible, but for certain distros, i believe it's indeed the best solution
 # lists
 urls=[]
 class ubuntu_ports_releases(Project):
    """ubuntu_ports_releases class"""
    @staticmethod
    def checker(directory_URL, file_name):
        page = requests.get(directory_URL).text
        file_index = page.find(file_name)
        # print(page)
        if file_index == -1:
            return False
        str_dates = re.findall(r'(\d{2}-\w{3}-\d{4} \d{2}:\d{2})|(\d{4}-\d{2}-\d{2} \d{2}:\d{2})', page[file_index:])
        return list(datefinder.find_dates("".join(str_dates[0])))[0]
    @classmethod
-    def scrape(cls, urls, site):
+    def scrape(cls, compare, folders, site1, site2, directory):
        if cls.checker(site1+directory, "MD5SUMS") != False:
            # print (site1+directory)
            # print (cls.checker(site1+directory, "MD5SUMS"))
            if cls.checker(site2+directory, "MD5SUMS") != False:
                # print (site2+directory)
                # print (cls.checker(site2+directory, "MD5SUMS"))
                compare.append(cls.checker(site1+directory, "MD5SUMS") <= cls.checker(site2+directory, "MD5SUMS"))
                return
            compare.append(False)
            return
        elif cls.checker(site1+directory, "SHA256SUMS") != False:
            # print (site1+directory)
            # print (cls.checker(site1+directory, "SHA256SUMS"))
            if cls.checker(site2+directory, "SHA256SUMS") != False:
                # print (site2+directory)
                # print (cls.checker(site2+directory, "SHA256SUMS"))
                compare.append(cls.checker(site1+directory, "SHA256SUMS") <= cls.checker(site2+directory, "SHA256SUMS"))
                return
            compare.append(False)
            return
        # getting the request from url
-        r = requests.get(site)
+        r = requests.get(site1 + directory)
-        
+
        # converting the text
        s = BeautifulSoup(r.text,"html.parser")
@ -29,57 +53,24 @@ class ubuntu_ports_releases(Project):
            href = i.attrs['href']
            if href.endswith("/") and href != "../" and href != "/" and not href.startswith("/") and not href.startswith("http://"):
-                site_next = site+href
+                dir_next = directory+href
                # print(dir_next)
                # calling it self
                if dir_next not in folders:
                    folders.append(dir_next)
                    cls.scrape(compare, folders, site1, site2, dir_next)
                if site_next not in  urls:
                    urls.append(site_next) 
                    # print(site_next)
                    # calling it self
                    cls.scrape(urls, site_next)
    @staticmethod
    def get_latest_date(web_dir):
        page = requests.get(web_dir).text
        str_dates = re.findall(r'(\d{2}-\w{3}-\d{4} \d{2}:\d{2})|(\d{4}-\d{2}-\d{2} \d{2}:\d{2})', page)
        # if you want to match 1+ patterns, like r'(\d{2}-\w{3}-\d{4} \d{2}:\d{2})|(\d{4}-\d{2}-\d{2} \d{2}:\d{2})', note that findall will return a tuple of two groups!!!
        # print(str_dates[0])
        if len(str_dates) == 0:
            return datetime(1000, 1, 1) # return ridiculously old date to discard this entry, since it has no dates
        # for date in str_dates:
        #     print(date)
        dates = [list(datefinder.find_dates("".join(date)))[0] for date in str_dates]
        # for date in dates:
        #     print(date)
        return(max(dates))
    @classmethod
    def max_date(cls, urls):
        latest_date = cls.get_latest_date(urls[0])
        # get_latest_date(urls[0])
        for dir in urls:
            latest_date2 = cls.get_latest_date(dir)
            if (latest_date2 >= latest_date):
                latest_date = latest_date2
        # print(latest_date)
        return latest_date
    @classmethod
    def check(cls, data, project):
        """Check if project packages are up-to-date"""
        # lists
-        urls1=[]
+        compare=[]
-        urls2=[]
+        folders=[]
        csc_url = CSC_MIRROR + data[project]["csc"] + data[project]["file"]
        upstream_url = data[project]["upstream"] + data[project]["file"]
        # calling function
-        cls.scrape(urls1, csc_url)
+        cls.scrape(compare, folders, upstream_url, csc_url, "")
        cls.scrape(urls2, upstream_url)
-        # print(len(urls1), len(urls2))
+        return all(compare)
        return cls.max_date(urls1) == cls.max_date(urls2)
--- a/projects/xubuntu_releases.py
+++ b/projects/xubuntu_releases.py
@ -1,27 +1,51 @@
 from bs4 import BeautifulSoup
 import requests
 import datefinder  # another date finding library
 import re
-from datetime import datetime
+import datefinder  # another date finding library
 from datetime import timedelta
 import time
 import pandas as pd
 from project import Project
 from shared import CSC_MIRROR
 # this function is brute force looping through the whole directory and checking dates
 # it may sound horrible, but for certain distros, i believe it's indeed the best solution
 # lists
 urls=[]
 class xubuntu_releases(Project):
    """xubuntu_releases class"""
    @staticmethod
    def checker(directory_URL, file_name):
        page = requests.get(directory_URL).text
        file_index = page.find(file_name)
        # print(page)
        if file_index == -1:
            return False
        str_dates = re.findall(r'(\d{2}-\w{3}-\d{4} \d{2}:\d{2})|(\d{4}-\d{2}-\d{2} \d{2}:\d{2})', page[file_index:])
        return list(datefinder.find_dates("".join(str_dates[0])))[0]
    @classmethod
-    def scrape(cls, urls, site):
+    def scrape(cls, compare, folders, site1, site2, directory):
        if cls.checker(site1+directory, "MD5SUMS") != False:
            # print (site1+directory)
            # print (cls.checker(site1+directory, "MD5SUMS"))
            if cls.checker(site2+directory, "MD5SUMS") != False:
                # print (site2+directory)
                # print (cls.checker(site2+directory, "MD5SUMS"))
                compare.append(cls.checker(site1+directory, "MD5SUMS") <= cls.checker(site2+directory, "MD5SUMS"))
                return
            compare.append(False)
            return
        elif cls.checker(site1+directory, "SHA256SUMS") != False:
            # print (site1+directory)
            # print (cls.checker(site1+directory, "SHA256SUMS"))
            if cls.checker(site2+directory, "SHA256SUMS") != False:
                # print (site2+directory)
                # print (cls.checker(site2+directory, "SHA256SUMS"))
                compare.append(cls.checker(site1+directory, "SHA256SUMS") <= cls.checker(site2+directory, "SHA256SUMS"))
                return
            compare.append(False)
            return
        # getting the request from url
-        r = requests.get(site)
+        r = requests.get(site1 + directory)
-        
+
        # converting the text
        s = BeautifulSoup(r.text,"html.parser")
@ -29,57 +53,24 @@ class xubuntu_releases(Project):
            href = i.attrs['href']
            if href.endswith("/") and href != "../" and href != "/" and not href.startswith("/") and not href.startswith("http://"):
-                site_next = site+href
+                dir_next = directory+href
                # print(dir_next)
                # calling it self
                if dir_next not in folders:
                    folders.append(dir_next)
                    cls.scrape(compare, folders, site1, site2, dir_next)
                if site_next not in  urls:
                    urls.append(site_next) 
                    # print(site_next)
                    # calling it self
                    cls.scrape(urls, site_next)
    @staticmethod
    def get_latest_date(web_dir):
        page = requests.get(web_dir).text
        str_dates = re.findall(r'(\d{2}-\w{3}-\d{4} \d{2}:\d{2})|(\d{4}-\d{2}-\d{2} \d{2}:\d{2})', page)
        # if you want to match 1+ patterns, like r'(\d{2}-\w{3}-\d{4} \d{2}:\d{2})|(\d{4}-\d{2}-\d{2} \d{2}:\d{2})', note that findall will return a tuple of two groups!!!
        # print(str_dates[0])
        if len(str_dates) == 0:
            return datetime(1000, 1, 1) # return ridiculously old date to discard this entry, since it has no dates
        # for date in str_dates:
        #     print(date)
        dates = [list(datefinder.find_dates("".join(date)))[0] for date in str_dates]
        # for date in dates:
        #     print(date)
        return(max(dates))
    @classmethod
    def max_date(cls, urls):
        latest_date = cls.get_latest_date(urls[0])
        # get_latest_date(urls[0])
        for dir in urls:
            latest_date2 = cls.get_latest_date(dir)
            if (latest_date2 >= latest_date):
                latest_date = latest_date2
        # print(latest_date)
        return latest_date
    @classmethod
    def check(cls, data, project):
        """Check if project packages are up-to-date"""
        # lists
-        urls1=[]
+        compare=[]
-        urls2=[]
+        folders=[]
        csc_url = CSC_MIRROR + data[project]["csc"] + data[project]["file"]
        upstream_url = data[project]["upstream"] + data[project]["file"]
        # calling function
-        cls.scrape(urls1, csc_url)
+        cls.scrape(compare, folders, upstream_url, csc_url, "")
        cls.scrape(urls2, upstream_url)
-        # print(len(urls1), len(urls2))
+        return all(compare)
        return cls.max_date(urls1) == cls.max_date(urls2)
--- a/test.py
+++ b/test.py
@ -2,12 +2,12 @@
 Test Client for individual classes in projects
 """
-from projects import mxlinux_iso
+from projects import xubuntu_releases
 import json  # import json to read project info stored in json file
 # main function
 if __name__ =="__main__":
    with open("data.json", "r", encoding="utf-8") as file:
        data = json.load(file)
-        print(mxlinux_iso.check(data, "mxlinux_iso"))
+        print(xubuntu_releases.check(data, "xubuntu_releases"))