added opensuse

2021-10-16 18:31:50 -07:00 · 2021-10-16 18:31:50 -07:00 · e3a4d18b36
parent 709aa04cb8
commit e3a4d18b36
4 changed files with 54 additions and 29 deletions
--- a/README.md
+++ b/README.md
@ -22,7 +22,6 @@ to find repos of the mirrored projects to check, just search "projectName mirror

 not done:
 NetBSD: http://ftp.netbsd.org/pub/NetBSD/ has public repo, no timestamp, web directory hard to loop through, no mirror tracker
-opensuse: http://download.opensuse.org/ check Update.repo files in folders inside the update folder
 puppylinux: https://distro.ibiblio.org/puppylinux/ check the ISO files in the folders starting with puppy

 done:
@ -67,6 +66,7 @@ mxlinux-iso: this one seems out of sync on the official tracker for 134 days, wh
 mysql: http://mirrors.sunsite.dk/mysql/
 nongnu: http://download.savannah.nongnu.org/releases/ https://savannah.gnu.org/maintenance/Mirmon/ http://download.savannah.gnu.org/mirmon/savannah/
 openbsd
+opensuse: http://download.opensuse.org/ check Update.repo files in folders inside the update folder, not checking tumbleweed-non-oss/ and tumbleweed/ temporarily
 parabola: https://repo.parabola.nu/ https://www.parabola.nu/mirrors/status/
 pkgsrc
 qtproject: https://download.qt.io/
@ -84,6 +84,6 @@ ubuntu-ports: http://ports.ubuntu.com/ubuntu-ports/ checks the file anonster.can
 ubuntu-ports-releases: https://cdimage.ubuntu.com/releases/ has public repo, no timestamp, no status tracker, brute force looped it
 ubuntu-releases: https://releases.ubuntu.com/
 vlc: http://download.videolan.org/pub/videolan/
-x.org: https://www.x.org/releases/ check all of the files under each directory under /x.org/individual/, and make sure that we have all of the files which the upstream has
+x.org: https://www.x.org/releases/ check all of the files under each directory under /x.org/individual/, and make sure that we have all of the files which the upstream has, ignoring the xcb folder
 Xiph: https://ftp.osuosl.org/pub/xiph/releases/ loop through each directory in xiph/releases/ and trying to compare the timestamp of the checksum files
 xubuntu-releases: https://cdimage.ubuntu.com/xubuntu/releases/ candidate for brute force looping since it has few folders
--- a/data.json
+++ b/data.json
@ -14,14 +14,14 @@
 		"file": "alpine/last-updated"
 	},
 	"Apache": {
-		"out_of_sync_since": 1633294718,
+		"out_of_sync_since": null,
 		"out_of_sync_interval": 86400,
 		"csc": "apache/",
 		"upstream": "https://downloads.apache.org/",
 		"file": "zzz/time.txt"
 	},
 	"Arch": {
-		"out_of_sync_since": null,
+		"out_of_sync_since": 1634433282,
 		"out_of_sync_interval": 86400,
 		"csc": "archlinux/",
 		"upstream": "http://arch.mirror.constant.com/",
@ -94,7 +94,7 @@
 		"file": "TIME"
 	},
 	"Fedora": {
-		"out_of_sync_since": 1633923341,
+		"out_of_sync_since": null,
 		"out_of_sync_interval": 86400,
 		"csc": "fedora/",
 		"upstream": "http://fedora.mirror.iweb.com/",
@ -380,5 +380,12 @@
 		"csc": "x.org/individual/",
 		"upstream": "https://www.x.org/releases/individual/",
 		"file": ""
+	},
+	"opensuse": {
+		"out_of_sync_since": null,
+		"out_of_sync_interval": 86400,
+		"csc": "opensuse/update/",
+		"upstream": "http://download.opensuse.org/update/",
+		"file": ""
 	}
 }
--- a/projects/opensuse.py
+++ b/projects/opensuse.py
@ -1,14 +1,40 @@
 from bs4 import BeautifulSoup
 import requests
+import re
+import datefinder  # another date finding library
 from project import Project
 from shared import CSC_MIRROR

 class opensuse(Project):
    """opensuse class"""
+    @staticmethod
+    def checker(directory_URL, file_name):
+        page = requests.get(directory_URL).text
+        file_index = page.find(file_name)
+        # print(page)
+
+        if file_index == -1:
+            return False
+
+        str_dates = re.findall(r'(\d{2}-\w{3}-\d{4} \d{2}:\d{2})|(\d{4}-\d{2}-\d{2} \d{2}:\d{2})', page[file_index:])
+
+        return list(datefinder.find_dates("".join(str_dates[0])))[0]
+
    @classmethod
-    def scrape(cls, files, site):
+    def scrape(cls, compare, folders, site1, site2, directory):
+        if cls.checker(site1+directory, ".repo") != False:
+            # print (site1+directory)
+            # print (cls.checker(site1+directory, ".repo"))
+            if cls.checker(site2+directory, ".repo") != False:
+                # print (site2+directory)
+                # print (cls.checker(site2+directory, ".repo"))
+                compare.append(cls.checker(site1+directory, ".repo") <= cls.checker(site2+directory, ".repo"))
+                return
+            compare.append(False)
+            return
+
        # getting the request from url
-        r = requests.get(site)
+        r = requests.get(site1 + directory)

        # converting the text
        s = BeautifulSoup(r.text,"html.parser")
@ -16,33 +42,25 @@ class opensuse(Project):
        for i in s.find_all("a"): # for a href directories
            href = i.attrs['href']
            
-            if href.endswith("/") and href != "../" and href != "/" and not href.startswith("/"):
-                site_next = site+href
-
-                if site_next not in  files:
-                    files.append(href) 
-                    # print(href)
+            if href.endswith("/") and href != "../" and href != "/" and not href.startswith("/") and href != "tumbleweed-non-oss/" and href != "tumbleweed/":
+                dir_next = directory+href
+                # print(dir_next)
                # calling it self
-                    cls.scrape(files, site_next)
-            elif href != "../" and href != "/" and not href.startswith("/") and href != "?C=N;O=D" and href != "?C=M;O=A" and href != "?C=S;O=A" and href != "?C=D;O=A":
-                # print(href)
-                files.append(href)
-
+                if dir_next not in folders:
+                    folders.append(dir_next)
+                    cls.scrape(compare, folders, site1, site2, dir_next)

    @classmethod
    def check(cls, data, project):
        """Check if project packages are up-to-date"""
        # lists
-        files1=[]
-        files2=[]
+        compare=[]
+        folders=[]

        csc_url = CSC_MIRROR + data[project]["csc"] + data[project]["file"]
        upstream_url = data[project]["upstream"] + data[project]["file"]

        # calling function
-        cls.scrape(files1, csc_url)
-        cls.scrape(files2, upstream_url)
+        cls.scrape(compare, folders, upstream_url, csc_url, "")

-        # print(set(files2) - set(files1))
-
-        return set(files1) == set(files2)
+        return all(compare)
--- a/test.py
+++ b/test.py
@ -7,7 +7,7 @@ from datetime import timedelta
 import time
 import pandas as pd
 import re # for salt stack specifically
-from projects import x_org
+from projects import opensuse
 import json  # import json to read project info stored in json file

 # this function is brute force looping through the whole directory and checking dates
@ -65,7 +65,7 @@ def get_latest_date(web_dir):
 if __name__ =="__main__":
    with open("data.json", "r", encoding="utf-8") as file:
        data = json.load(file)
-        print(x_org.check(data, "x_org"))
+        print(opensuse.check(data, "opensuse"))
   
    """# website to be scrape
    site="https://cdimage.ubuntu.com/releases/"