from bs4 import BeautifulSoup import requests import re import datefinder # another date finding library from project import Project from shared import CSC_MIRROR class trisquel(Project): """trisquel class""" @staticmethod def checker(directory_URL, file_name): page = requests.get(directory_URL).text file_index = page.find(file_name) # print(page) if file_index == -1: return False str_dates = re.findall(r'(\d{2,4}-\w{3}-\d{2,4} \d{2}:\d{2})|(\d{4}-\d{2}-\d{2} \d{2}:\d{2})', page[file_index:]) return list(datefinder.find_dates("".join(str_dates[0])))[0]# if len(str_dates) > 0 else None @classmethod def scrape(cls, site1, site2): # getting the request from url r1 = requests.get(site1) r2 = requests.get(site2) # converting the text s1 = BeautifulSoup(r1.text,"html.parser") s2 = BeautifulSoup(r2.text,"html.parser") hrefs1 = [i.attrs['href'] for i in s1.find_all("a")] hrefs2 = [i.attrs['href'] for i in s2.find_all("a")] for href in hrefs1: # for a href directories if href.endswith("/") and href != "../" and href != "/" and not href.startswith("/"): # print(href) if href not in hrefs2: return False elif cls.checker(site1+href, "Release") > cls.checker(site2+href, "Release"): # print(cls.checker(site1+href, "Release")) # print(cls.checker(site2+href, "Release")) # print(cls.checker(site1+href, "Release") > cls.checker(site2+href, "Release")) return False return True @classmethod def check_iso(cls, site, mirrors): for mirror in mirrors: # print(cls.checker(site, "md5sum.txt")) # print(cls.checker(mirror, "md5sum.txt")) if cls.checker(site, "md5sum.txt") < cls.checker(mirror, "md5sum.txt"): return False return True @classmethod def check(cls, data, project, current_time): """Check if project packages are up-to-date""" csc_url = CSC_MIRROR + data[project]["csc"] + data[project]["file"] upstream_url = data[project]["upstream"] + data[project]["file"] # print(cls.check_iso(upstream_url+"slackware-iso/", csc_url+"slackware-iso/")) mirrors = data[project]["mirrors"] return cls.scrape(upstream_url, csc_url+"packages/dists/") and cls.check_iso(csc_url+"iso/", mirrors)