added x.org
This commit is contained in:
parent
c974d49ffc
commit
709aa04cb8
|
@ -24,7 +24,6 @@ not done:
|
|||
NetBSD: http://ftp.netbsd.org/pub/NetBSD/ has public repo, no timestamp, web directory hard to loop through, no mirror tracker
|
||||
opensuse: http://download.opensuse.org/ check Update.repo files in folders inside the update folder
|
||||
puppylinux: https://distro.ibiblio.org/puppylinux/ check the ISO files in the folders starting with puppy
|
||||
x.org: https://www.x.org/releases/ check all of the files under each directory under /x.org/individual/, and make sure that we have all of the files which the upstream has
|
||||
|
||||
done:
|
||||
almalinux
|
||||
|
@ -85,5 +84,6 @@ ubuntu-ports: http://ports.ubuntu.com/ubuntu-ports/ checks the file anonster.can
|
|||
ubuntu-ports-releases: https://cdimage.ubuntu.com/releases/ has public repo, no timestamp, no status tracker, brute force looped it
|
||||
ubuntu-releases: https://releases.ubuntu.com/
|
||||
vlc: http://download.videolan.org/pub/videolan/
|
||||
x.org: https://www.x.org/releases/ check all of the files under each directory under /x.org/individual/, and make sure that we have all of the files which the upstream has
|
||||
Xiph: https://ftp.osuosl.org/pub/xiph/releases/ loop through each directory in xiph/releases/ and trying to compare the timestamp of the checksum files
|
||||
xubuntu-releases: https://cdimage.ubuntu.com/xubuntu/releases/ candidate for brute force looping since it has few folders
|
|
@ -373,5 +373,12 @@
|
|||
"csc": "xiph/releases/",
|
||||
"upstream": "https://ftp.osuosl.org/pub/xiph/releases/",
|
||||
"file": ""
|
||||
},
|
||||
"x_org": {
|
||||
"out_of_sync_since": null,
|
||||
"out_of_sync_interval": 86400,
|
||||
"csc": "x.org/individual/",
|
||||
"upstream": "https://www.x.org/releases/individual/",
|
||||
"file": ""
|
||||
}
|
||||
}
|
|
@ -0,0 +1,48 @@
|
|||
from bs4 import BeautifulSoup
|
||||
import requests
|
||||
from project import Project
|
||||
from shared import CSC_MIRROR
|
||||
|
||||
class opensuse(Project):
|
||||
"""opensuse class"""
|
||||
@classmethod
|
||||
def scrape(cls, files, site):
|
||||
# getting the request from url
|
||||
r = requests.get(site)
|
||||
|
||||
# converting the text
|
||||
s = BeautifulSoup(r.text,"html.parser")
|
||||
|
||||
for i in s.find_all("a"): # for a href directories
|
||||
href = i.attrs['href']
|
||||
|
||||
if href.endswith("/") and href != "../" and href != "/" and not href.startswith("/"):
|
||||
site_next = site+href
|
||||
|
||||
if site_next not in files:
|
||||
files.append(href)
|
||||
# print(href)
|
||||
# calling it self
|
||||
cls.scrape(files, site_next)
|
||||
elif href != "../" and href != "/" and not href.startswith("/") and href != "?C=N;O=D" and href != "?C=M;O=A" and href != "?C=S;O=A" and href != "?C=D;O=A":
|
||||
# print(href)
|
||||
files.append(href)
|
||||
|
||||
|
||||
@classmethod
|
||||
def check(cls, data, project):
|
||||
"""Check if project packages are up-to-date"""
|
||||
# lists
|
||||
files1=[]
|
||||
files2=[]
|
||||
|
||||
csc_url = CSC_MIRROR + data[project]["csc"] + data[project]["file"]
|
||||
upstream_url = data[project]["upstream"] + data[project]["file"]
|
||||
|
||||
# calling function
|
||||
cls.scrape(files1, csc_url)
|
||||
cls.scrape(files2, upstream_url)
|
||||
|
||||
# print(set(files2) - set(files1))
|
||||
|
||||
return set(files1) == set(files2)
|
|
@ -0,0 +1,48 @@
|
|||
from bs4 import BeautifulSoup
|
||||
import requests
|
||||
from project import Project
|
||||
from shared import CSC_MIRROR
|
||||
|
||||
class x_org(Project):
|
||||
"""x.org class"""
|
||||
@classmethod
|
||||
def scrape(cls, files, site):
|
||||
# getting the request from url
|
||||
r = requests.get(site)
|
||||
|
||||
# converting the text
|
||||
s = BeautifulSoup(r.text,"html.parser")
|
||||
|
||||
for i in s.find_all("a"): # for a href directories
|
||||
href = i.attrs['href']
|
||||
|
||||
if href.endswith("/") and href != "../" and href != "/" and not href.startswith("/") and not href.startswith("xcb"):
|
||||
site_next = site+href
|
||||
|
||||
if href not in files:
|
||||
files.append(href)
|
||||
# print(href)
|
||||
# calling it self
|
||||
cls.scrape(files, site_next)
|
||||
elif href != "../" and href != "/" and not href.startswith("/") and not href.startswith("xcb") and href != "?C=N;O=D" and href != "?C=M;O=A" and href != "?C=S;O=A" and href != "?C=D;O=A":
|
||||
# print(href)
|
||||
files.append(href)
|
||||
|
||||
|
||||
@classmethod
|
||||
def check(cls, data, project):
|
||||
"""Check if project packages are up-to-date"""
|
||||
# lists
|
||||
files1=[]
|
||||
files2=[]
|
||||
|
||||
csc_url = CSC_MIRROR + data[project]["csc"] + data[project]["file"]
|
||||
upstream_url = data[project]["upstream"] + data[project]["file"]
|
||||
|
||||
# calling function
|
||||
cls.scrape(files1, csc_url)
|
||||
cls.scrape(files2, upstream_url)
|
||||
|
||||
# print(set(files1) - set(files2))
|
||||
|
||||
return set(files1) == set(files2)
|
4
test.py
4
test.py
|
@ -7,7 +7,7 @@ from datetime import timedelta
|
|||
import time
|
||||
import pandas as pd
|
||||
import re # for salt stack specifically
|
||||
from projects import xiph
|
||||
from projects import x_org
|
||||
import json # import json to read project info stored in json file
|
||||
|
||||
# this function is brute force looping through the whole directory and checking dates
|
||||
|
@ -65,7 +65,7 @@ def get_latest_date(web_dir):
|
|||
if __name__ =="__main__":
|
||||
with open("data.json", "r", encoding="utf-8") as file:
|
||||
data = json.load(file)
|
||||
print(xiph.check(data, "xiph"))
|
||||
print(x_org.check(data, "x_org"))
|
||||
|
||||
"""# website to be scrape
|
||||
site="https://cdimage.ubuntu.com/releases/"
|
||||
|
|
Loading…
Reference in New Issue