From 709aa04cb8a826ed25735689d962b8f3eee71a52 Mon Sep 17 00:00:00 2001 From: Tom Date: Sat, 16 Oct 2021 11:56:29 -0700 Subject: [PATCH] added x.org --- README.md | 2 +- data.json | 7 +++++++ projects/opensuse.py | 48 ++++++++++++++++++++++++++++++++++++++++++++ projects/x_org.py | 48 ++++++++++++++++++++++++++++++++++++++++++++ test.py | 4 ++-- 5 files changed, 106 insertions(+), 3 deletions(-) create mode 100644 projects/opensuse.py create mode 100644 projects/x_org.py diff --git a/README.md b/README.md index 346dac2..84ab7e4 100644 --- a/README.md +++ b/README.md @@ -24,7 +24,6 @@ not done: NetBSD: http://ftp.netbsd.org/pub/NetBSD/ has public repo, no timestamp, web directory hard to loop through, no mirror tracker opensuse: http://download.opensuse.org/ check Update.repo files in folders inside the update folder puppylinux: https://distro.ibiblio.org/puppylinux/ check the ISO files in the folders starting with puppy -x.org: https://www.x.org/releases/ check all of the files under each directory under /x.org/individual/, and make sure that we have all of the files which the upstream has done: almalinux @@ -85,5 +84,6 @@ ubuntu-ports: http://ports.ubuntu.com/ubuntu-ports/ checks the file anonster.can ubuntu-ports-releases: https://cdimage.ubuntu.com/releases/ has public repo, no timestamp, no status tracker, brute force looped it ubuntu-releases: https://releases.ubuntu.com/ vlc: http://download.videolan.org/pub/videolan/ +x.org: https://www.x.org/releases/ check all of the files under each directory under /x.org/individual/, and make sure that we have all of the files which the upstream has Xiph: https://ftp.osuosl.org/pub/xiph/releases/ loop through each directory in xiph/releases/ and trying to compare the timestamp of the checksum files xubuntu-releases: https://cdimage.ubuntu.com/xubuntu/releases/ candidate for brute force looping since it has few folders \ No newline at end of file diff --git a/data.json b/data.json index 1a48015..728f0d3 100644 --- a/data.json +++ b/data.json @@ -373,5 +373,12 @@ "csc": "xiph/releases/", "upstream": "https://ftp.osuosl.org/pub/xiph/releases/", "file": "" + }, + "x_org": { + "out_of_sync_since": null, + "out_of_sync_interval": 86400, + "csc": "x.org/individual/", + "upstream": "https://www.x.org/releases/individual/", + "file": "" } } \ No newline at end of file diff --git a/projects/opensuse.py b/projects/opensuse.py new file mode 100644 index 0000000..fee6444 --- /dev/null +++ b/projects/opensuse.py @@ -0,0 +1,48 @@ +from bs4 import BeautifulSoup +import requests +from project import Project +from shared import CSC_MIRROR + +class opensuse(Project): + """opensuse class""" + @classmethod + def scrape(cls, files, site): + # getting the request from url + r = requests.get(site) + + # converting the text + s = BeautifulSoup(r.text,"html.parser") + + for i in s.find_all("a"): # for a href directories + href = i.attrs['href'] + + if href.endswith("/") and href != "../" and href != "/" and not href.startswith("/"): + site_next = site+href + + if site_next not in files: + files.append(href) + # print(href) + # calling it self + cls.scrape(files, site_next) + elif href != "../" and href != "/" and not href.startswith("/") and href != "?C=N;O=D" and href != "?C=M;O=A" and href != "?C=S;O=A" and href != "?C=D;O=A": + # print(href) + files.append(href) + + + @classmethod + def check(cls, data, project): + """Check if project packages are up-to-date""" + # lists + files1=[] + files2=[] + + csc_url = CSC_MIRROR + data[project]["csc"] + data[project]["file"] + upstream_url = data[project]["upstream"] + data[project]["file"] + + # calling function + cls.scrape(files1, csc_url) + cls.scrape(files2, upstream_url) + + # print(set(files2) - set(files1)) + + return set(files1) == set(files2) \ No newline at end of file diff --git a/projects/x_org.py b/projects/x_org.py new file mode 100644 index 0000000..135e7a2 --- /dev/null +++ b/projects/x_org.py @@ -0,0 +1,48 @@ +from bs4 import BeautifulSoup +import requests +from project import Project +from shared import CSC_MIRROR + +class x_org(Project): + """x.org class""" + @classmethod + def scrape(cls, files, site): + # getting the request from url + r = requests.get(site) + + # converting the text + s = BeautifulSoup(r.text,"html.parser") + + for i in s.find_all("a"): # for a href directories + href = i.attrs['href'] + + if href.endswith("/") and href != "../" and href != "/" and not href.startswith("/") and not href.startswith("xcb"): + site_next = site+href + + if href not in files: + files.append(href) + # print(href) + # calling it self + cls.scrape(files, site_next) + elif href != "../" and href != "/" and not href.startswith("/") and not href.startswith("xcb") and href != "?C=N;O=D" and href != "?C=M;O=A" and href != "?C=S;O=A" and href != "?C=D;O=A": + # print(href) + files.append(href) + + + @classmethod + def check(cls, data, project): + """Check if project packages are up-to-date""" + # lists + files1=[] + files2=[] + + csc_url = CSC_MIRROR + data[project]["csc"] + data[project]["file"] + upstream_url = data[project]["upstream"] + data[project]["file"] + + # calling function + cls.scrape(files1, csc_url) + cls.scrape(files2, upstream_url) + + # print(set(files1) - set(files2)) + + return set(files1) == set(files2) \ No newline at end of file diff --git a/test.py b/test.py index eeee7ce..3148b47 100644 --- a/test.py +++ b/test.py @@ -7,7 +7,7 @@ from datetime import timedelta import time import pandas as pd import re # for salt stack specifically -from projects import xiph +from projects import x_org import json # import json to read project info stored in json file # this function is brute force looping through the whole directory and checking dates @@ -65,7 +65,7 @@ def get_latest_date(web_dir): if __name__ =="__main__": with open("data.json", "r", encoding="utf-8") as file: data = json.load(file) - print(xiph.check(data, "xiph")) + print(x_org.check(data, "x_org")) """# website to be scrape site="https://cdimage.ubuntu.com/releases/"