From e8265a28024b3f8ed2052417da2cd66d43a707da Mon Sep 17 00:00:00 2001 From: Tom Date: Sun, 17 Oct 2021 22:23:55 -0700 Subject: [PATCH] changed linuxmint, ubuntu_ports_releases, xubuntu_releases --- data.json | 4 +- main.py | 2 +- projects/linuxmint.py | 97 +++++++++++---------------- projects/ubuntu_ports_releases.py | 105 ++++++++++++++---------------- projects/xubuntu_releases.py | 105 ++++++++++++++---------------- test.py | 4 +- 6 files changed, 140 insertions(+), 177 deletions(-) diff --git a/data.json b/data.json index bf78648..70c82b7 100644 --- a/data.json +++ b/data.json @@ -275,7 +275,7 @@ "file": "lastsync" }, "pkgsrc": { - "out_of_sync_since": 1634524215, + "out_of_sync_since": null, "out_of_sync_interval": 86400, "csc": "pkgsrc/", "upstream": "http://ftp.netbsd.org/pub/pkgsrc/", @@ -345,7 +345,7 @@ "file": "tdf/TIMESTAMP" }, "trisquel": { - "out_of_sync_since": 1634524215, + "out_of_sync_since": null, "out_of_sync_interval": 86400, "csc": "trisquel/", "upstream": "http://rsync.trisquel.info/trisquel/dists/", diff --git a/main.py b/main.py index b9ddd8b..f5969f6 100644 --- a/main.py +++ b/main.py @@ -27,7 +27,7 @@ if __name__ == "__main__": print(f"Failure: {project} does not exist") continue project_class = getattr(sys.modules[__name__], project) - if project == "CPAN" or project == "ubuntu" or project == "ubuntu_releases" or project == "manjaro" or project == "mxlinux" or project == "cran" or project == "ctan" or project == "gentooportage": + if project in ["CPAN", "ubuntu", "ubuntu_releases", "manjaro", "mxlinux", "cran", "ctan", "gentooportage"]: checker_result = project_class.check(data, project, current_time) if checker_result: print(f"Success: {project} up-to-date") diff --git a/projects/linuxmint.py b/projects/linuxmint.py index da69635..3ff37c6 100644 --- a/projects/linuxmint.py +++ b/projects/linuxmint.py @@ -1,85 +1,66 @@ from bs4 import BeautifulSoup import requests -import datefinder # another date finding library import re -from datetime import datetime -from datetime import timedelta -import time -import pandas as pd +import datefinder # another date finding library from project import Project from shared import CSC_MIRROR -# this function is brute force looping through the whole directory and checking dates -# it may sound horrible, but for certain distros, i believe it's indeed the best solution - -# lists -urls=[] - class linuxmint(Project): """linuxmint class""" + @staticmethod + def checker(directory_URL, file_name): + page = requests.get(directory_URL).text + file_index = page.find(file_name) + # print(page) + + if file_index == -1: + return False + + str_dates = re.findall(r'(\d{2}-\w{3}-\d{4} \d{2}:\d{2})|(\d{4}-\d{2}-\d{2} \d{2}:\d{2})', page[file_index:]) + + return list(datefinder.find_dates("".join(str_dates[0])))[0] + @classmethod - def scrape(cls, urls, site): + def scrape(cls, compare, folders, site1, site2, directory): + if cls.checker(site1+directory, "sha256sum.txt") != False: + # print (site1+directory) + # print (cls.checker(site1+directory, "sha256sum.txt")) + if cls.checker(site2+directory, "sha256sum.txt") != False: + # print (site2+directory) + # print (cls.checker(site2+directory, "sha256sum.txt")) + compare.append(cls.checker(site1+directory, "sha256sum.txt") <= cls.checker(site2+directory, "sha256sum.txt")) + return + compare.append(False) + return + # getting the request from url - r = requests.get(site) - + r = requests.get(site1 + directory) + # converting the text s = BeautifulSoup(r.text,"html.parser") for i in s.find_all("a"): # for a href directories href = i.attrs['href'] - if href.endswith("/") and href != "../" and href != "/": - site_next = site+href + if href.endswith("/") and href != "../" and href != "/" and not href.startswith("/"): + dir_next = directory+href + # print(dir_next) + # calling it self + if dir_next not in folders: + folders.append(dir_next) + cls.scrape(compare, folders, site1, site2, dir_next) - if site_next not in urls: - urls.append(site_next) - # print(site_next) - # calling it self - cls.scrape(urls, site_next) - - @staticmethod - def get_latest_date(web_dir): - page = requests.get(web_dir).text - - str_dates = re.findall(r'(\d{2}-\w{3}-\d{4} \d{2}:\d{2})', page) - # if you want to match 1+ patterns, like r'(\d{2}-\w{3}-\d{4} \d{2}:\d{2})|(\d{4}-\d{2}-\d{2} \d{2}:\d{2})', note that findall will return a tuple of two groups!!! - # print(str_dates[0]) - if len(str_dates) == 0: - return datetime(1000, 1, 1) # return ridiculously old date to discard this entry, since it has no dates - # for date in str_dates: - # print(date) - dates = [list(datefinder.find_dates(date))[0] for date in str_dates] - - # for date in dates: - # print(date) - return(max(dates)) - - @classmethod - def max_date(cls, urls): - latest_date = cls.get_latest_date(urls[0]) - # get_latest_date(urls[0]) - for dir in urls: - latest_date2 = cls.get_latest_date(dir) - if (latest_date2 >= latest_date): - latest_date = latest_date2 - # print(latest_date) - return latest_date - - @classmethod def check(cls, data, project): """Check if project packages are up-to-date""" # lists - urls1=[] - urls2=[] + compare=[] + folders=[] csc_url = CSC_MIRROR + data[project]["csc"] + data[project]["file"] upstream_url = data[project]["upstream"] + data[project]["file"] # calling function - cls.scrape(urls1, csc_url) - cls.scrape(urls2, upstream_url) + cls.scrape(compare, folders, upstream_url, csc_url, "") - # print(len(urls1), len(urls2)) - - return cls.max_date(urls1) == cls.max_date(urls2) \ No newline at end of file + return all(compare) \ No newline at end of file diff --git a/projects/ubuntu_ports_releases.py b/projects/ubuntu_ports_releases.py index 2baf703..49de202 100644 --- a/projects/ubuntu_ports_releases.py +++ b/projects/ubuntu_ports_releases.py @@ -1,27 +1,51 @@ from bs4 import BeautifulSoup import requests -import datefinder # another date finding library import re -from datetime import datetime -from datetime import timedelta -import time -import pandas as pd +import datefinder # another date finding library from project import Project from shared import CSC_MIRROR -# this function is brute force looping through the whole directory and checking dates -# it may sound horrible, but for certain distros, i believe it's indeed the best solution - -# lists -urls=[] - class ubuntu_ports_releases(Project): """ubuntu_ports_releases class""" + @staticmethod + def checker(directory_URL, file_name): + page = requests.get(directory_URL).text + file_index = page.find(file_name) + # print(page) + + if file_index == -1: + return False + + str_dates = re.findall(r'(\d{2}-\w{3}-\d{4} \d{2}:\d{2})|(\d{4}-\d{2}-\d{2} \d{2}:\d{2})', page[file_index:]) + + return list(datefinder.find_dates("".join(str_dates[0])))[0] + @classmethod - def scrape(cls, urls, site): + def scrape(cls, compare, folders, site1, site2, directory): + if cls.checker(site1+directory, "MD5SUMS") != False: + # print (site1+directory) + # print (cls.checker(site1+directory, "MD5SUMS")) + if cls.checker(site2+directory, "MD5SUMS") != False: + # print (site2+directory) + # print (cls.checker(site2+directory, "MD5SUMS")) + compare.append(cls.checker(site1+directory, "MD5SUMS") <= cls.checker(site2+directory, "MD5SUMS")) + return + compare.append(False) + return + elif cls.checker(site1+directory, "SHA256SUMS") != False: + # print (site1+directory) + # print (cls.checker(site1+directory, "SHA256SUMS")) + if cls.checker(site2+directory, "SHA256SUMS") != False: + # print (site2+directory) + # print (cls.checker(site2+directory, "SHA256SUMS")) + compare.append(cls.checker(site1+directory, "SHA256SUMS") <= cls.checker(site2+directory, "SHA256SUMS")) + return + compare.append(False) + return + # getting the request from url - r = requests.get(site) - + r = requests.get(site1 + directory) + # converting the text s = BeautifulSoup(r.text,"html.parser") @@ -29,57 +53,24 @@ class ubuntu_ports_releases(Project): href = i.attrs['href'] if href.endswith("/") and href != "../" and href != "/" and not href.startswith("/") and not href.startswith("http://"): - site_next = site+href + dir_next = directory+href + # print(dir_next) + # calling it self + if dir_next not in folders: + folders.append(dir_next) + cls.scrape(compare, folders, site1, site2, dir_next) - if site_next not in urls: - urls.append(site_next) - # print(site_next) - # calling it self - cls.scrape(urls, site_next) - - @staticmethod - def get_latest_date(web_dir): - page = requests.get(web_dir).text - - str_dates = re.findall(r'(\d{2}-\w{3}-\d{4} \d{2}:\d{2})|(\d{4}-\d{2}-\d{2} \d{2}:\d{2})', page) - # if you want to match 1+ patterns, like r'(\d{2}-\w{3}-\d{4} \d{2}:\d{2})|(\d{4}-\d{2}-\d{2} \d{2}:\d{2})', note that findall will return a tuple of two groups!!! - # print(str_dates[0]) - if len(str_dates) == 0: - return datetime(1000, 1, 1) # return ridiculously old date to discard this entry, since it has no dates - # for date in str_dates: - # print(date) - dates = [list(datefinder.find_dates("".join(date)))[0] for date in str_dates] - - # for date in dates: - # print(date) - return(max(dates)) - - @classmethod - def max_date(cls, urls): - latest_date = cls.get_latest_date(urls[0]) - # get_latest_date(urls[0]) - for dir in urls: - latest_date2 = cls.get_latest_date(dir) - if (latest_date2 >= latest_date): - latest_date = latest_date2 - # print(latest_date) - return latest_date - - @classmethod def check(cls, data, project): """Check if project packages are up-to-date""" # lists - urls1=[] - urls2=[] + compare=[] + folders=[] csc_url = CSC_MIRROR + data[project]["csc"] + data[project]["file"] upstream_url = data[project]["upstream"] + data[project]["file"] # calling function - cls.scrape(urls1, csc_url) - cls.scrape(urls2, upstream_url) + cls.scrape(compare, folders, upstream_url, csc_url, "") - # print(len(urls1), len(urls2)) - - return cls.max_date(urls1) == cls.max_date(urls2) \ No newline at end of file + return all(compare) \ No newline at end of file diff --git a/projects/xubuntu_releases.py b/projects/xubuntu_releases.py index 171b0d3..4d134b3 100644 --- a/projects/xubuntu_releases.py +++ b/projects/xubuntu_releases.py @@ -1,27 +1,51 @@ from bs4 import BeautifulSoup import requests -import datefinder # another date finding library import re -from datetime import datetime -from datetime import timedelta -import time -import pandas as pd +import datefinder # another date finding library from project import Project from shared import CSC_MIRROR -# this function is brute force looping through the whole directory and checking dates -# it may sound horrible, but for certain distros, i believe it's indeed the best solution - -# lists -urls=[] - class xubuntu_releases(Project): """xubuntu_releases class""" + @staticmethod + def checker(directory_URL, file_name): + page = requests.get(directory_URL).text + file_index = page.find(file_name) + # print(page) + + if file_index == -1: + return False + + str_dates = re.findall(r'(\d{2}-\w{3}-\d{4} \d{2}:\d{2})|(\d{4}-\d{2}-\d{2} \d{2}:\d{2})', page[file_index:]) + + return list(datefinder.find_dates("".join(str_dates[0])))[0] + @classmethod - def scrape(cls, urls, site): + def scrape(cls, compare, folders, site1, site2, directory): + if cls.checker(site1+directory, "MD5SUMS") != False: + # print (site1+directory) + # print (cls.checker(site1+directory, "MD5SUMS")) + if cls.checker(site2+directory, "MD5SUMS") != False: + # print (site2+directory) + # print (cls.checker(site2+directory, "MD5SUMS")) + compare.append(cls.checker(site1+directory, "MD5SUMS") <= cls.checker(site2+directory, "MD5SUMS")) + return + compare.append(False) + return + elif cls.checker(site1+directory, "SHA256SUMS") != False: + # print (site1+directory) + # print (cls.checker(site1+directory, "SHA256SUMS")) + if cls.checker(site2+directory, "SHA256SUMS") != False: + # print (site2+directory) + # print (cls.checker(site2+directory, "SHA256SUMS")) + compare.append(cls.checker(site1+directory, "SHA256SUMS") <= cls.checker(site2+directory, "SHA256SUMS")) + return + compare.append(False) + return + # getting the request from url - r = requests.get(site) - + r = requests.get(site1 + directory) + # converting the text s = BeautifulSoup(r.text,"html.parser") @@ -29,57 +53,24 @@ class xubuntu_releases(Project): href = i.attrs['href'] if href.endswith("/") and href != "../" and href != "/" and not href.startswith("/") and not href.startswith("http://"): - site_next = site+href + dir_next = directory+href + # print(dir_next) + # calling it self + if dir_next not in folders: + folders.append(dir_next) + cls.scrape(compare, folders, site1, site2, dir_next) - if site_next not in urls: - urls.append(site_next) - # print(site_next) - # calling it self - cls.scrape(urls, site_next) - - @staticmethod - def get_latest_date(web_dir): - page = requests.get(web_dir).text - - str_dates = re.findall(r'(\d{2}-\w{3}-\d{4} \d{2}:\d{2})|(\d{4}-\d{2}-\d{2} \d{2}:\d{2})', page) - # if you want to match 1+ patterns, like r'(\d{2}-\w{3}-\d{4} \d{2}:\d{2})|(\d{4}-\d{2}-\d{2} \d{2}:\d{2})', note that findall will return a tuple of two groups!!! - # print(str_dates[0]) - if len(str_dates) == 0: - return datetime(1000, 1, 1) # return ridiculously old date to discard this entry, since it has no dates - # for date in str_dates: - # print(date) - dates = [list(datefinder.find_dates("".join(date)))[0] for date in str_dates] - - # for date in dates: - # print(date) - return(max(dates)) - - @classmethod - def max_date(cls, urls): - latest_date = cls.get_latest_date(urls[0]) - # get_latest_date(urls[0]) - for dir in urls: - latest_date2 = cls.get_latest_date(dir) - if (latest_date2 >= latest_date): - latest_date = latest_date2 - # print(latest_date) - return latest_date - - @classmethod def check(cls, data, project): """Check if project packages are up-to-date""" # lists - urls1=[] - urls2=[] + compare=[] + folders=[] csc_url = CSC_MIRROR + data[project]["csc"] + data[project]["file"] upstream_url = data[project]["upstream"] + data[project]["file"] # calling function - cls.scrape(urls1, csc_url) - cls.scrape(urls2, upstream_url) + cls.scrape(compare, folders, upstream_url, csc_url, "") - # print(len(urls1), len(urls2)) - - return cls.max_date(urls1) == cls.max_date(urls2) \ No newline at end of file + return all(compare) \ No newline at end of file diff --git a/test.py b/test.py index f9a25ca..3533faa 100644 --- a/test.py +++ b/test.py @@ -2,12 +2,12 @@ Test Client for individual classes in projects """ -from projects import mxlinux_iso +from projects import xubuntu_releases import json # import json to read project info stored in json file # main function if __name__ =="__main__": with open("data.json", "r", encoding="utf-8") as file: data = json.load(file) - print(mxlinux_iso.check(data, "mxlinux_iso")) + print(xubuntu_releases.check(data, "xubuntu_releases")) \ No newline at end of file