mirror-checker/projects/x_org.py

49 lines
1.6 KiB
Python
Raw Permalink Normal View History

2021-10-16 14:56:29 -04:00
from bs4 import BeautifulSoup
import requests
from project import Project
from shared import CSC_MIRROR
class x_org(Project):
"""x.org class"""
@classmethod
def scrape(cls, files, site):
# getting the request from url
r = requests.get(site)
# converting the text
s = BeautifulSoup(r.text,"html.parser")
for i in s.find_all("a"): # for a href directories
href = i.attrs['href']
if href.endswith("/") and href != "../" and href != "/" and not href.startswith("/") and not href.startswith("xcb"):
site_next = site+href
if href not in files:
files.append(href)
# print(href)
# calling it self
cls.scrape(files, site_next)
elif href != "../" and href != "/" and not href.startswith("/") and not href.startswith("xcb") and href != "?C=N;O=D" and href != "?C=M;O=A" and href != "?C=S;O=A" and href != "?C=D;O=A":
# print(href)
files.append(href)
@classmethod
2022-01-01 14:54:28 -05:00
def check(cls, data, project, current_time):
2021-10-16 14:56:29 -04:00
"""Check if project packages are up-to-date"""
# lists
files1=[]
files2=[]
csc_url = CSC_MIRROR + data[project]["csc"] + data[project]["file"]
upstream_url = data[project]["upstream"] + data[project]["file"]
# calling function
cls.scrape(files1, csc_url)
cls.scrape(files2, upstream_url)
# print(set(files1) - set(files2))
2022-01-01 14:54:28 -05:00
return set(files1) == set(files2)