added x.org

2021-10-16 11:56:29 -07:00 · 2021-10-16 11:56:29 -07:00 · 709aa04cb8
parent c974d49ffc
commit 709aa04cb8
5 changed files with 106 additions and 3 deletions
--- a/README.md
+++ b/README.md
@ -24,7 +24,6 @@ not done:
 NetBSD: http://ftp.netbsd.org/pub/NetBSD/ has public repo, no timestamp, web directory hard to loop through, no mirror tracker
 opensuse: http://download.opensuse.org/ check Update.repo files in folders inside the update folder
 puppylinux: https://distro.ibiblio.org/puppylinux/ check the ISO files in the folders starting with puppy
 x.org: https://www.x.org/releases/ check all of the files under each directory under /x.org/individual/, and make sure that we have all of the files which the upstream has
 done:
 almalinux
@ -85,5 +84,6 @@ ubuntu-ports: http://ports.ubuntu.com/ubuntu-ports/ checks the file anonster.can
 ubuntu-ports-releases: https://cdimage.ubuntu.com/releases/ has public repo, no timestamp, no status tracker, brute force looped it
 ubuntu-releases: https://releases.ubuntu.com/
 vlc: http://download.videolan.org/pub/videolan/
 x.org: https://www.x.org/releases/ check all of the files under each directory under /x.org/individual/, and make sure that we have all of the files which the upstream has
 Xiph: https://ftp.osuosl.org/pub/xiph/releases/ loop through each directory in xiph/releases/ and trying to compare the timestamp of the checksum files
 xubuntu-releases: https://cdimage.ubuntu.com/xubuntu/releases/ candidate for brute force looping since it has few folders
--- a/data.json
+++ b/data.json
@ -373,5 +373,12 @@
 		"csc": "xiph/releases/",
 		"upstream": "https://ftp.osuosl.org/pub/xiph/releases/",
 		"file": ""
 	},
 	"x_org": {
 		"out_of_sync_since": null,
 		"out_of_sync_interval": 86400,
 		"csc": "x.org/individual/",
 		"upstream": "https://www.x.org/releases/individual/",
 		"file": ""
 	}
 }
--- a/projects/opensuse.py
+++ b/projects/opensuse.py
@ -0,0 +1,48 @@
 from bs4 import BeautifulSoup
 import requests
 from project import Project
 from shared import CSC_MIRROR
 class opensuse(Project):
    """opensuse class"""
    @classmethod
    def scrape(cls, files, site):
        # getting the request from url
        r = requests.get(site)
        # converting the text
        s = BeautifulSoup(r.text,"html.parser")
        for i in s.find_all("a"): # for a href directories
            href = i.attrs['href']
            if href.endswith("/") and href != "../" and href != "/" and not href.startswith("/"):
                site_next = site+href
                if site_next not in  files:
                    files.append(href) 
                    # print(href)
                    # calling it self
                    cls.scrape(files, site_next)
            elif href != "../" and href != "/" and not href.startswith("/") and href != "?C=N;O=D" and href != "?C=M;O=A" and href != "?C=S;O=A" and href != "?C=D;O=A":
                # print(href)
                files.append(href)
    @classmethod
    def check(cls, data, project):
        """Check if project packages are up-to-date"""
        # lists
        files1=[]
        files2=[]
        csc_url = CSC_MIRROR + data[project]["csc"] + data[project]["file"]
        upstream_url = data[project]["upstream"] + data[project]["file"]
        # calling function
        cls.scrape(files1, csc_url)
        cls.scrape(files2, upstream_url)
        # print(set(files2) - set(files1))
        return set(files1) == set(files2)
--- a/projects/x_org.py
+++ b/projects/x_org.py
@ -0,0 +1,48 @@
 from bs4 import BeautifulSoup
 import requests
 from project import Project
 from shared import CSC_MIRROR
 class x_org(Project):
    """x.org class"""
    @classmethod
    def scrape(cls, files, site):
        # getting the request from url
        r = requests.get(site)
        # converting the text
        s = BeautifulSoup(r.text,"html.parser")
        for i in s.find_all("a"): # for a href directories
            href = i.attrs['href']
            if href.endswith("/") and href != "../" and href != "/" and not href.startswith("/") and not href.startswith("xcb"):
                site_next = site+href
                if href not in  files:
                    files.append(href) 
                    # print(href)
                    # calling it self
                    cls.scrape(files, site_next)
            elif href != "../" and href != "/" and not href.startswith("/") and not href.startswith("xcb") and href != "?C=N;O=D" and href != "?C=M;O=A" and href != "?C=S;O=A" and href != "?C=D;O=A":
                # print(href)
                files.append(href)
    @classmethod
    def check(cls, data, project):
        """Check if project packages are up-to-date"""
        # lists
        files1=[]
        files2=[]
        csc_url = CSC_MIRROR + data[project]["csc"] + data[project]["file"]
        upstream_url = data[project]["upstream"] + data[project]["file"]
        # calling function
        cls.scrape(files1, csc_url)
        cls.scrape(files2, upstream_url)
        # print(set(files1) - set(files2))
        return set(files1) == set(files2)
--- a/test.py
+++ b/test.py
@ -7,7 +7,7 @@ from datetime import timedelta
 import time
 import pandas as pd
 import re # for salt stack specifically
-from projects import xiph
+from projects import x_org
 import json  # import json to read project info stored in json file
 # this function is brute force looping through the whole directory and checking dates
@ -65,7 +65,7 @@ def get_latest_date(web_dir):
 if __name__ =="__main__":
    with open("data.json", "r", encoding="utf-8") as file:
        data = json.load(file)
-        print(xiph.check(data, "xiph"))
+        print(x_org.check(data, "x_org"))
    """# website to be scrape
    site="https://cdimage.ubuntu.com/releases/"