from bs4 import BeautifulSoup import requests from project import Project from shared import CSC_MIRROR class x_org(Project): """x.org class""" @classmethod def scrape(cls, files, site): # getting the request from url r = requests.get(site) # converting the text s = BeautifulSoup(r.text,"html.parser") for i in s.find_all("a"): # for a href directories href = i.attrs['href'] if href.endswith("/") and href != "../" and href != "/" and not href.startswith("/") and not href.startswith("xcb"): site_next = site+href if href not in files: files.append(href) # print(href) # calling it self cls.scrape(files, site_next) elif href != "../" and href != "/" and not href.startswith("/") and not href.startswith("xcb") and href != "?C=N;O=D" and href != "?C=M;O=A" and href != "?C=S;O=A" and href != "?C=D;O=A": # print(href) files.append(href) @classmethod def check(cls, data, project): """Check if project packages are up-to-date""" # lists files1=[] files2=[] csc_url = CSC_MIRROR + data[project]["csc"] + data[project]["file"] upstream_url = data[project]["upstream"] + data[project]["file"] # calling function cls.scrape(files1, csc_url) cls.scrape(files2, upstream_url) # print(set(files1) - set(files2)) return set(files1) == set(files2)