|
|
|
from bs4 import BeautifulSoup
|
|
|
|
import requests
|
|
|
|
import re
|
|
|
|
import datefinder # another date finding library
|
|
|
|
from project import Project
|
|
|
|
from shared import CSC_MIRROR
|
|
|
|
|
|
|
|
class slackware(Project):
|
|
|
|
"""slackware class"""
|
|
|
|
@staticmethod
|
|
|
|
def checker(directory_URL, file_name):
|
|
|
|
page = requests.get(directory_URL).text
|
|
|
|
file_index = page.find(file_name)
|
|
|
|
# print(page)
|
|
|
|
|
|
|
|
if file_index == -1:
|
|
|
|
return False
|
|
|
|
|
|
|
|
str_dates = re.findall(r'(\d{2}-\w{3}-\d{4} \d{2}:\d{2})|(\d{4}-\d{2}-\d{2} \d{2}:\d{2})', page[file_index:])
|
|
|
|
|
|
|
|
return list(datefinder.find_dates("".join(str_dates[0])))[0]
|
|
|
|
|
|
|
|
@classmethod
|
|
|
|
def scrape(cls, site1, site2):
|
|
|
|
# getting the request from url
|
|
|
|
r1 = requests.get(site1)
|
|
|
|
r2 = requests.get(site2)
|
|
|
|
|
|
|
|
# converting the text
|
|
|
|
s1 = BeautifulSoup(r1.text,"html.parser")
|
|
|
|
s2 = BeautifulSoup(r2.text,"html.parser")
|
|
|
|
|
|
|
|
hrefs1 = [i.attrs['href'] for i in s1.find_all("a")]
|
|
|
|
hrefs2 = [i.attrs['href'] for i in s2.find_all("a")]
|
|
|
|
|
|
|
|
for href in hrefs1: # for a href directories
|
|
|
|
if href.endswith("/") and href != "../" and href != "/" and not href.startswith("/") and not re.match(r'slackware-([1-7]|8\.0).*', href) and href != "slackware-iso/" and href != "slackware-current/" and href != "slackware-pre-1.0-beta/" and href != "unsupported/":
|
|
|
|
# print(href)
|
|
|
|
if href not in hrefs2:
|
|
|
|
return False
|
|
|
|
elif cls.checker(site1+href, "CHECKSUMS.md5") != cls.checker(site2+href, "CHECKSUMS.md5"):
|
|
|
|
return False
|
|
|
|
return True
|
|
|
|
|
|
|
|
@staticmethod
|
|
|
|
def check_iso(site1, site2):
|
|
|
|
# getting the request from url
|
|
|
|
r1 = requests.get(site1)
|
|
|
|
r2 = requests.get(site2)
|
|
|
|
|
|
|
|
# converting the text
|
|
|
|
s1 = BeautifulSoup(r1.text,"html.parser")
|
|
|
|
s2 = BeautifulSoup(r2.text,"html.parser")
|
|
|
|
|
|
|
|
hrefs1 = [i.attrs['href'] for i in s1.find_all("a")]
|
|
|
|
hrefs2 = [i.attrs['href'] for i in s2.find_all("a")]
|
|
|
|
|
|
|
|
for href in hrefs1: # for a href directories
|
|
|
|
if href.endswith("/") and href != "../" and href != "/" and not href.startswith("/") and not href.startswith("http"):
|
|
|
|
# print(href)
|
|
|
|
if href not in hrefs2:
|
|
|
|
return False
|
|
|
|
return True
|
|
|
|
|
|
|
|
@classmethod
|
|
|
|
def check(cls, data, project, current_time):
|
|
|
|
"""Check if project packages are up-to-date"""
|
|
|
|
|
|
|
|
csc_url = CSC_MIRROR + data[project]["csc"] + data[project]["file"]
|
|
|
|
upstream_url = data[project]["upstream"] + data[project]["file"]
|
|
|
|
|
|
|
|
# print(cls.check_iso(upstream_url+"slackware-iso/", csc_url+"slackware-iso/"))
|
|
|
|
|
|
|
|
return cls.scrape(upstream_url, csc_url) and cls.check_iso(upstream_url+"slackware-iso/", csc_url+"slackware-iso/")
|