mirror-checker/projects/trisquel.py

68 lines
2.5 KiB
Python
Raw Permalink Normal View History

2021-10-17 20:59:35 -04:00
from bs4 import BeautifulSoup
import requests
import re
2021-10-17 20:59:35 -04:00
import datefinder # another date finding library
from project import Project
from shared import CSC_MIRROR
class trisquel(Project):
"""trisquel class"""
@staticmethod
2021-10-17 20:59:35 -04:00
def checker(directory_URL, file_name):
page = requests.get(directory_URL).text
file_index = page.find(file_name)
# print(page)
if file_index == -1:
return False
2022-01-01 16:02:03 -05:00
str_dates = re.findall(r'(\d{2,4}-\w{3}-\d{2,4} \d{2}:\d{2})|(\d{4}-\d{2}-\d{2} \d{2}:\d{2})', page[file_index:])
2022-01-01 16:02:03 -05:00
return list(datefinder.find_dates("".join(str_dates[0])))[0]# if len(str_dates) > 0 else None
2021-10-17 20:59:35 -04:00
@classmethod
def scrape(cls, site1, site2):
# getting the request from url
r1 = requests.get(site1)
r2 = requests.get(site2)
# converting the text
s1 = BeautifulSoup(r1.text,"html.parser")
s2 = BeautifulSoup(r2.text,"html.parser")
hrefs1 = [i.attrs['href'] for i in s1.find_all("a")]
hrefs2 = [i.attrs['href'] for i in s2.find_all("a")]
for href in hrefs1: # for a href directories
if href.endswith("/") and href != "../" and href != "/" and not href.startswith("/"):
2021-10-17 23:12:24 -04:00
# print(href)
2021-10-17 20:59:35 -04:00
if href not in hrefs2:
return False
elif cls.checker(site1+href, "Release") > cls.checker(site2+href, "Release"):
2021-10-17 21:23:30 -04:00
# print(cls.checker(site1+href, "Release"))
# print(cls.checker(site2+href, "Release"))
# print(cls.checker(site1+href, "Release") > cls.checker(site2+href, "Release"))
2021-10-17 20:59:35 -04:00
return False
return True
@classmethod
def check_iso(cls, site, mirrors):
for mirror in mirrors:
2021-10-17 21:23:30 -04:00
# print(cls.checker(site, "md5sum.txt"))
# print(cls.checker(mirror, "md5sum.txt"))
2021-10-17 20:59:35 -04:00
if cls.checker(site, "md5sum.txt") < cls.checker(mirror, "md5sum.txt"):
return False
return True
@classmethod
2022-01-01 14:54:28 -05:00
def check(cls, data, project, current_time):
2021-10-17 20:59:35 -04:00
"""Check if project packages are up-to-date"""
csc_url = CSC_MIRROR + data[project]["csc"] + data[project]["file"]
upstream_url = data[project]["upstream"] + data[project]["file"]
# print(cls.check_iso(upstream_url+"slackware-iso/", csc_url+"slackware-iso/"))
mirrors = data[project]["mirrors"]
2022-01-01 14:54:28 -05:00
return cls.scrape(upstream_url, csc_url+"packages/dists/") and cls.check_iso(csc_url+"iso/", mirrors)