changed linuxmint, ubuntu_ports_releases, xubuntu_releases
This commit is contained in:
parent
8244ba0cfd
commit
e8265a2802
|
@ -275,7 +275,7 @@
|
||||||
"file": "lastsync"
|
"file": "lastsync"
|
||||||
},
|
},
|
||||||
"pkgsrc": {
|
"pkgsrc": {
|
||||||
"out_of_sync_since": 1634524215,
|
"out_of_sync_since": null,
|
||||||
"out_of_sync_interval": 86400,
|
"out_of_sync_interval": 86400,
|
||||||
"csc": "pkgsrc/",
|
"csc": "pkgsrc/",
|
||||||
"upstream": "http://ftp.netbsd.org/pub/pkgsrc/",
|
"upstream": "http://ftp.netbsd.org/pub/pkgsrc/",
|
||||||
|
@ -345,7 +345,7 @@
|
||||||
"file": "tdf/TIMESTAMP"
|
"file": "tdf/TIMESTAMP"
|
||||||
},
|
},
|
||||||
"trisquel": {
|
"trisquel": {
|
||||||
"out_of_sync_since": 1634524215,
|
"out_of_sync_since": null,
|
||||||
"out_of_sync_interval": 86400,
|
"out_of_sync_interval": 86400,
|
||||||
"csc": "trisquel/",
|
"csc": "trisquel/",
|
||||||
"upstream": "http://rsync.trisquel.info/trisquel/dists/",
|
"upstream": "http://rsync.trisquel.info/trisquel/dists/",
|
||||||
|
|
2
main.py
2
main.py
|
@ -27,7 +27,7 @@ if __name__ == "__main__":
|
||||||
print(f"Failure: {project} does not exist")
|
print(f"Failure: {project} does not exist")
|
||||||
continue
|
continue
|
||||||
project_class = getattr(sys.modules[__name__], project)
|
project_class = getattr(sys.modules[__name__], project)
|
||||||
if project == "CPAN" or project == "ubuntu" or project == "ubuntu_releases" or project == "manjaro" or project == "mxlinux" or project == "cran" or project == "ctan" or project == "gentooportage":
|
if project in ["CPAN", "ubuntu", "ubuntu_releases", "manjaro", "mxlinux", "cran", "ctan", "gentooportage"]:
|
||||||
checker_result = project_class.check(data, project, current_time)
|
checker_result = project_class.check(data, project, current_time)
|
||||||
if checker_result:
|
if checker_result:
|
||||||
print(f"Success: {project} up-to-date")
|
print(f"Success: {project} up-to-date")
|
||||||
|
|
|
@ -1,85 +1,66 @@
|
||||||
from bs4 import BeautifulSoup
|
from bs4 import BeautifulSoup
|
||||||
import requests
|
import requests
|
||||||
import datefinder # another date finding library
|
|
||||||
import re
|
import re
|
||||||
from datetime import datetime
|
import datefinder # another date finding library
|
||||||
from datetime import timedelta
|
|
||||||
import time
|
|
||||||
import pandas as pd
|
|
||||||
from project import Project
|
from project import Project
|
||||||
from shared import CSC_MIRROR
|
from shared import CSC_MIRROR
|
||||||
|
|
||||||
# this function is brute force looping through the whole directory and checking dates
|
|
||||||
# it may sound horrible, but for certain distros, i believe it's indeed the best solution
|
|
||||||
|
|
||||||
# lists
|
|
||||||
urls=[]
|
|
||||||
|
|
||||||
class linuxmint(Project):
|
class linuxmint(Project):
|
||||||
"""linuxmint class"""
|
"""linuxmint class"""
|
||||||
|
@staticmethod
|
||||||
|
def checker(directory_URL, file_name):
|
||||||
|
page = requests.get(directory_URL).text
|
||||||
|
file_index = page.find(file_name)
|
||||||
|
# print(page)
|
||||||
|
|
||||||
|
if file_index == -1:
|
||||||
|
return False
|
||||||
|
|
||||||
|
str_dates = re.findall(r'(\d{2}-\w{3}-\d{4} \d{2}:\d{2})|(\d{4}-\d{2}-\d{2} \d{2}:\d{2})', page[file_index:])
|
||||||
|
|
||||||
|
return list(datefinder.find_dates("".join(str_dates[0])))[0]
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def scrape(cls, urls, site):
|
def scrape(cls, compare, folders, site1, site2, directory):
|
||||||
|
if cls.checker(site1+directory, "sha256sum.txt") != False:
|
||||||
|
# print (site1+directory)
|
||||||
|
# print (cls.checker(site1+directory, "sha256sum.txt"))
|
||||||
|
if cls.checker(site2+directory, "sha256sum.txt") != False:
|
||||||
|
# print (site2+directory)
|
||||||
|
# print (cls.checker(site2+directory, "sha256sum.txt"))
|
||||||
|
compare.append(cls.checker(site1+directory, "sha256sum.txt") <= cls.checker(site2+directory, "sha256sum.txt"))
|
||||||
|
return
|
||||||
|
compare.append(False)
|
||||||
|
return
|
||||||
|
|
||||||
# getting the request from url
|
# getting the request from url
|
||||||
r = requests.get(site)
|
r = requests.get(site1 + directory)
|
||||||
|
|
||||||
# converting the text
|
# converting the text
|
||||||
s = BeautifulSoup(r.text,"html.parser")
|
s = BeautifulSoup(r.text,"html.parser")
|
||||||
|
|
||||||
for i in s.find_all("a"): # for a href directories
|
for i in s.find_all("a"): # for a href directories
|
||||||
href = i.attrs['href']
|
href = i.attrs['href']
|
||||||
|
|
||||||
if href.endswith("/") and href != "../" and href != "/":
|
if href.endswith("/") and href != "../" and href != "/" and not href.startswith("/"):
|
||||||
site_next = site+href
|
dir_next = directory+href
|
||||||
|
# print(dir_next)
|
||||||
|
# calling it self
|
||||||
|
if dir_next not in folders:
|
||||||
|
folders.append(dir_next)
|
||||||
|
cls.scrape(compare, folders, site1, site2, dir_next)
|
||||||
|
|
||||||
if site_next not in urls:
|
|
||||||
urls.append(site_next)
|
|
||||||
# print(site_next)
|
|
||||||
# calling it self
|
|
||||||
cls.scrape(urls, site_next)
|
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def get_latest_date(web_dir):
|
|
||||||
page = requests.get(web_dir).text
|
|
||||||
|
|
||||||
str_dates = re.findall(r'(\d{2}-\w{3}-\d{4} \d{2}:\d{2})', page)
|
|
||||||
# if you want to match 1+ patterns, like r'(\d{2}-\w{3}-\d{4} \d{2}:\d{2})|(\d{4}-\d{2}-\d{2} \d{2}:\d{2})', note that findall will return a tuple of two groups!!!
|
|
||||||
# print(str_dates[0])
|
|
||||||
if len(str_dates) == 0:
|
|
||||||
return datetime(1000, 1, 1) # return ridiculously old date to discard this entry, since it has no dates
|
|
||||||
# for date in str_dates:
|
|
||||||
# print(date)
|
|
||||||
dates = [list(datefinder.find_dates(date))[0] for date in str_dates]
|
|
||||||
|
|
||||||
# for date in dates:
|
|
||||||
# print(date)
|
|
||||||
return(max(dates))
|
|
||||||
|
|
||||||
@classmethod
|
|
||||||
def max_date(cls, urls):
|
|
||||||
latest_date = cls.get_latest_date(urls[0])
|
|
||||||
# get_latest_date(urls[0])
|
|
||||||
for dir in urls:
|
|
||||||
latest_date2 = cls.get_latest_date(dir)
|
|
||||||
if (latest_date2 >= latest_date):
|
|
||||||
latest_date = latest_date2
|
|
||||||
# print(latest_date)
|
|
||||||
return latest_date
|
|
||||||
|
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def check(cls, data, project):
|
def check(cls, data, project):
|
||||||
"""Check if project packages are up-to-date"""
|
"""Check if project packages are up-to-date"""
|
||||||
# lists
|
# lists
|
||||||
urls1=[]
|
compare=[]
|
||||||
urls2=[]
|
folders=[]
|
||||||
|
|
||||||
csc_url = CSC_MIRROR + data[project]["csc"] + data[project]["file"]
|
csc_url = CSC_MIRROR + data[project]["csc"] + data[project]["file"]
|
||||||
upstream_url = data[project]["upstream"] + data[project]["file"]
|
upstream_url = data[project]["upstream"] + data[project]["file"]
|
||||||
|
|
||||||
# calling function
|
# calling function
|
||||||
cls.scrape(urls1, csc_url)
|
cls.scrape(compare, folders, upstream_url, csc_url, "")
|
||||||
cls.scrape(urls2, upstream_url)
|
|
||||||
|
|
||||||
# print(len(urls1), len(urls2))
|
return all(compare)
|
||||||
|
|
||||||
return cls.max_date(urls1) == cls.max_date(urls2)
|
|
|
@ -1,27 +1,51 @@
|
||||||
from bs4 import BeautifulSoup
|
from bs4 import BeautifulSoup
|
||||||
import requests
|
import requests
|
||||||
import datefinder # another date finding library
|
|
||||||
import re
|
import re
|
||||||
from datetime import datetime
|
import datefinder # another date finding library
|
||||||
from datetime import timedelta
|
|
||||||
import time
|
|
||||||
import pandas as pd
|
|
||||||
from project import Project
|
from project import Project
|
||||||
from shared import CSC_MIRROR
|
from shared import CSC_MIRROR
|
||||||
|
|
||||||
# this function is brute force looping through the whole directory and checking dates
|
|
||||||
# it may sound horrible, but for certain distros, i believe it's indeed the best solution
|
|
||||||
|
|
||||||
# lists
|
|
||||||
urls=[]
|
|
||||||
|
|
||||||
class ubuntu_ports_releases(Project):
|
class ubuntu_ports_releases(Project):
|
||||||
"""ubuntu_ports_releases class"""
|
"""ubuntu_ports_releases class"""
|
||||||
|
@staticmethod
|
||||||
|
def checker(directory_URL, file_name):
|
||||||
|
page = requests.get(directory_URL).text
|
||||||
|
file_index = page.find(file_name)
|
||||||
|
# print(page)
|
||||||
|
|
||||||
|
if file_index == -1:
|
||||||
|
return False
|
||||||
|
|
||||||
|
str_dates = re.findall(r'(\d{2}-\w{3}-\d{4} \d{2}:\d{2})|(\d{4}-\d{2}-\d{2} \d{2}:\d{2})', page[file_index:])
|
||||||
|
|
||||||
|
return list(datefinder.find_dates("".join(str_dates[0])))[0]
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def scrape(cls, urls, site):
|
def scrape(cls, compare, folders, site1, site2, directory):
|
||||||
|
if cls.checker(site1+directory, "MD5SUMS") != False:
|
||||||
|
# print (site1+directory)
|
||||||
|
# print (cls.checker(site1+directory, "MD5SUMS"))
|
||||||
|
if cls.checker(site2+directory, "MD5SUMS") != False:
|
||||||
|
# print (site2+directory)
|
||||||
|
# print (cls.checker(site2+directory, "MD5SUMS"))
|
||||||
|
compare.append(cls.checker(site1+directory, "MD5SUMS") <= cls.checker(site2+directory, "MD5SUMS"))
|
||||||
|
return
|
||||||
|
compare.append(False)
|
||||||
|
return
|
||||||
|
elif cls.checker(site1+directory, "SHA256SUMS") != False:
|
||||||
|
# print (site1+directory)
|
||||||
|
# print (cls.checker(site1+directory, "SHA256SUMS"))
|
||||||
|
if cls.checker(site2+directory, "SHA256SUMS") != False:
|
||||||
|
# print (site2+directory)
|
||||||
|
# print (cls.checker(site2+directory, "SHA256SUMS"))
|
||||||
|
compare.append(cls.checker(site1+directory, "SHA256SUMS") <= cls.checker(site2+directory, "SHA256SUMS"))
|
||||||
|
return
|
||||||
|
compare.append(False)
|
||||||
|
return
|
||||||
|
|
||||||
# getting the request from url
|
# getting the request from url
|
||||||
r = requests.get(site)
|
r = requests.get(site1 + directory)
|
||||||
|
|
||||||
# converting the text
|
# converting the text
|
||||||
s = BeautifulSoup(r.text,"html.parser")
|
s = BeautifulSoup(r.text,"html.parser")
|
||||||
|
|
||||||
|
@ -29,57 +53,24 @@ class ubuntu_ports_releases(Project):
|
||||||
href = i.attrs['href']
|
href = i.attrs['href']
|
||||||
|
|
||||||
if href.endswith("/") and href != "../" and href != "/" and not href.startswith("/") and not href.startswith("http://"):
|
if href.endswith("/") and href != "../" and href != "/" and not href.startswith("/") and not href.startswith("http://"):
|
||||||
site_next = site+href
|
dir_next = directory+href
|
||||||
|
# print(dir_next)
|
||||||
|
# calling it self
|
||||||
|
if dir_next not in folders:
|
||||||
|
folders.append(dir_next)
|
||||||
|
cls.scrape(compare, folders, site1, site2, dir_next)
|
||||||
|
|
||||||
if site_next not in urls:
|
|
||||||
urls.append(site_next)
|
|
||||||
# print(site_next)
|
|
||||||
# calling it self
|
|
||||||
cls.scrape(urls, site_next)
|
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def get_latest_date(web_dir):
|
|
||||||
page = requests.get(web_dir).text
|
|
||||||
|
|
||||||
str_dates = re.findall(r'(\d{2}-\w{3}-\d{4} \d{2}:\d{2})|(\d{4}-\d{2}-\d{2} \d{2}:\d{2})', page)
|
|
||||||
# if you want to match 1+ patterns, like r'(\d{2}-\w{3}-\d{4} \d{2}:\d{2})|(\d{4}-\d{2}-\d{2} \d{2}:\d{2})', note that findall will return a tuple of two groups!!!
|
|
||||||
# print(str_dates[0])
|
|
||||||
if len(str_dates) == 0:
|
|
||||||
return datetime(1000, 1, 1) # return ridiculously old date to discard this entry, since it has no dates
|
|
||||||
# for date in str_dates:
|
|
||||||
# print(date)
|
|
||||||
dates = [list(datefinder.find_dates("".join(date)))[0] for date in str_dates]
|
|
||||||
|
|
||||||
# for date in dates:
|
|
||||||
# print(date)
|
|
||||||
return(max(dates))
|
|
||||||
|
|
||||||
@classmethod
|
|
||||||
def max_date(cls, urls):
|
|
||||||
latest_date = cls.get_latest_date(urls[0])
|
|
||||||
# get_latest_date(urls[0])
|
|
||||||
for dir in urls:
|
|
||||||
latest_date2 = cls.get_latest_date(dir)
|
|
||||||
if (latest_date2 >= latest_date):
|
|
||||||
latest_date = latest_date2
|
|
||||||
# print(latest_date)
|
|
||||||
return latest_date
|
|
||||||
|
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def check(cls, data, project):
|
def check(cls, data, project):
|
||||||
"""Check if project packages are up-to-date"""
|
"""Check if project packages are up-to-date"""
|
||||||
# lists
|
# lists
|
||||||
urls1=[]
|
compare=[]
|
||||||
urls2=[]
|
folders=[]
|
||||||
|
|
||||||
csc_url = CSC_MIRROR + data[project]["csc"] + data[project]["file"]
|
csc_url = CSC_MIRROR + data[project]["csc"] + data[project]["file"]
|
||||||
upstream_url = data[project]["upstream"] + data[project]["file"]
|
upstream_url = data[project]["upstream"] + data[project]["file"]
|
||||||
|
|
||||||
# calling function
|
# calling function
|
||||||
cls.scrape(urls1, csc_url)
|
cls.scrape(compare, folders, upstream_url, csc_url, "")
|
||||||
cls.scrape(urls2, upstream_url)
|
|
||||||
|
|
||||||
# print(len(urls1), len(urls2))
|
return all(compare)
|
||||||
|
|
||||||
return cls.max_date(urls1) == cls.max_date(urls2)
|
|
|
@ -1,27 +1,51 @@
|
||||||
from bs4 import BeautifulSoup
|
from bs4 import BeautifulSoup
|
||||||
import requests
|
import requests
|
||||||
import datefinder # another date finding library
|
|
||||||
import re
|
import re
|
||||||
from datetime import datetime
|
import datefinder # another date finding library
|
||||||
from datetime import timedelta
|
|
||||||
import time
|
|
||||||
import pandas as pd
|
|
||||||
from project import Project
|
from project import Project
|
||||||
from shared import CSC_MIRROR
|
from shared import CSC_MIRROR
|
||||||
|
|
||||||
# this function is brute force looping through the whole directory and checking dates
|
|
||||||
# it may sound horrible, but for certain distros, i believe it's indeed the best solution
|
|
||||||
|
|
||||||
# lists
|
|
||||||
urls=[]
|
|
||||||
|
|
||||||
class xubuntu_releases(Project):
|
class xubuntu_releases(Project):
|
||||||
"""xubuntu_releases class"""
|
"""xubuntu_releases class"""
|
||||||
|
@staticmethod
|
||||||
|
def checker(directory_URL, file_name):
|
||||||
|
page = requests.get(directory_URL).text
|
||||||
|
file_index = page.find(file_name)
|
||||||
|
# print(page)
|
||||||
|
|
||||||
|
if file_index == -1:
|
||||||
|
return False
|
||||||
|
|
||||||
|
str_dates = re.findall(r'(\d{2}-\w{3}-\d{4} \d{2}:\d{2})|(\d{4}-\d{2}-\d{2} \d{2}:\d{2})', page[file_index:])
|
||||||
|
|
||||||
|
return list(datefinder.find_dates("".join(str_dates[0])))[0]
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def scrape(cls, urls, site):
|
def scrape(cls, compare, folders, site1, site2, directory):
|
||||||
|
if cls.checker(site1+directory, "MD5SUMS") != False:
|
||||||
|
# print (site1+directory)
|
||||||
|
# print (cls.checker(site1+directory, "MD5SUMS"))
|
||||||
|
if cls.checker(site2+directory, "MD5SUMS") != False:
|
||||||
|
# print (site2+directory)
|
||||||
|
# print (cls.checker(site2+directory, "MD5SUMS"))
|
||||||
|
compare.append(cls.checker(site1+directory, "MD5SUMS") <= cls.checker(site2+directory, "MD5SUMS"))
|
||||||
|
return
|
||||||
|
compare.append(False)
|
||||||
|
return
|
||||||
|
elif cls.checker(site1+directory, "SHA256SUMS") != False:
|
||||||
|
# print (site1+directory)
|
||||||
|
# print (cls.checker(site1+directory, "SHA256SUMS"))
|
||||||
|
if cls.checker(site2+directory, "SHA256SUMS") != False:
|
||||||
|
# print (site2+directory)
|
||||||
|
# print (cls.checker(site2+directory, "SHA256SUMS"))
|
||||||
|
compare.append(cls.checker(site1+directory, "SHA256SUMS") <= cls.checker(site2+directory, "SHA256SUMS"))
|
||||||
|
return
|
||||||
|
compare.append(False)
|
||||||
|
return
|
||||||
|
|
||||||
# getting the request from url
|
# getting the request from url
|
||||||
r = requests.get(site)
|
r = requests.get(site1 + directory)
|
||||||
|
|
||||||
# converting the text
|
# converting the text
|
||||||
s = BeautifulSoup(r.text,"html.parser")
|
s = BeautifulSoup(r.text,"html.parser")
|
||||||
|
|
||||||
|
@ -29,57 +53,24 @@ class xubuntu_releases(Project):
|
||||||
href = i.attrs['href']
|
href = i.attrs['href']
|
||||||
|
|
||||||
if href.endswith("/") and href != "../" and href != "/" and not href.startswith("/") and not href.startswith("http://"):
|
if href.endswith("/") and href != "../" and href != "/" and not href.startswith("/") and not href.startswith("http://"):
|
||||||
site_next = site+href
|
dir_next = directory+href
|
||||||
|
# print(dir_next)
|
||||||
|
# calling it self
|
||||||
|
if dir_next not in folders:
|
||||||
|
folders.append(dir_next)
|
||||||
|
cls.scrape(compare, folders, site1, site2, dir_next)
|
||||||
|
|
||||||
if site_next not in urls:
|
|
||||||
urls.append(site_next)
|
|
||||||
# print(site_next)
|
|
||||||
# calling it self
|
|
||||||
cls.scrape(urls, site_next)
|
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def get_latest_date(web_dir):
|
|
||||||
page = requests.get(web_dir).text
|
|
||||||
|
|
||||||
str_dates = re.findall(r'(\d{2}-\w{3}-\d{4} \d{2}:\d{2})|(\d{4}-\d{2}-\d{2} \d{2}:\d{2})', page)
|
|
||||||
# if you want to match 1+ patterns, like r'(\d{2}-\w{3}-\d{4} \d{2}:\d{2})|(\d{4}-\d{2}-\d{2} \d{2}:\d{2})', note that findall will return a tuple of two groups!!!
|
|
||||||
# print(str_dates[0])
|
|
||||||
if len(str_dates) == 0:
|
|
||||||
return datetime(1000, 1, 1) # return ridiculously old date to discard this entry, since it has no dates
|
|
||||||
# for date in str_dates:
|
|
||||||
# print(date)
|
|
||||||
dates = [list(datefinder.find_dates("".join(date)))[0] for date in str_dates]
|
|
||||||
|
|
||||||
# for date in dates:
|
|
||||||
# print(date)
|
|
||||||
return(max(dates))
|
|
||||||
|
|
||||||
@classmethod
|
|
||||||
def max_date(cls, urls):
|
|
||||||
latest_date = cls.get_latest_date(urls[0])
|
|
||||||
# get_latest_date(urls[0])
|
|
||||||
for dir in urls:
|
|
||||||
latest_date2 = cls.get_latest_date(dir)
|
|
||||||
if (latest_date2 >= latest_date):
|
|
||||||
latest_date = latest_date2
|
|
||||||
# print(latest_date)
|
|
||||||
return latest_date
|
|
||||||
|
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def check(cls, data, project):
|
def check(cls, data, project):
|
||||||
"""Check if project packages are up-to-date"""
|
"""Check if project packages are up-to-date"""
|
||||||
# lists
|
# lists
|
||||||
urls1=[]
|
compare=[]
|
||||||
urls2=[]
|
folders=[]
|
||||||
|
|
||||||
csc_url = CSC_MIRROR + data[project]["csc"] + data[project]["file"]
|
csc_url = CSC_MIRROR + data[project]["csc"] + data[project]["file"]
|
||||||
upstream_url = data[project]["upstream"] + data[project]["file"]
|
upstream_url = data[project]["upstream"] + data[project]["file"]
|
||||||
|
|
||||||
# calling function
|
# calling function
|
||||||
cls.scrape(urls1, csc_url)
|
cls.scrape(compare, folders, upstream_url, csc_url, "")
|
||||||
cls.scrape(urls2, upstream_url)
|
|
||||||
|
|
||||||
# print(len(urls1), len(urls2))
|
return all(compare)
|
||||||
|
|
||||||
return cls.max_date(urls1) == cls.max_date(urls2)
|
|
4
test.py
4
test.py
|
@ -2,12 +2,12 @@
|
||||||
Test Client for individual classes in projects
|
Test Client for individual classes in projects
|
||||||
"""
|
"""
|
||||||
|
|
||||||
from projects import mxlinux_iso
|
from projects import xubuntu_releases
|
||||||
import json # import json to read project info stored in json file
|
import json # import json to read project info stored in json file
|
||||||
|
|
||||||
# main function
|
# main function
|
||||||
if __name__ =="__main__":
|
if __name__ =="__main__":
|
||||||
with open("data.json", "r", encoding="utf-8") as file:
|
with open("data.json", "r", encoding="utf-8") as file:
|
||||||
data = json.load(file)
|
data = json.load(file)
|
||||||
print(mxlinux_iso.check(data, "mxlinux_iso"))
|
print(xubuntu_releases.check(data, "xubuntu_releases"))
|
||||||
|
|
Loading…
Reference in New Issue