added netBSD

2021-10-17 00:01:06 -07:00 · 2021-10-17 00:01:06 -07:00 · fe7d22e1e5
parent 1df671b9e0
commit fe7d22e1e5
4 changed files with 109 additions and 13 deletions
--- a/README.md
+++ b/README.md
@ -20,11 +20,7 @@ to find repos of the mirrored projects to check, just search "projectName mirror

 ## checker information

-not done:
-NetBSD: http://ftp.netbsd.org/pub/NetBSD/ has public repo, no timestamp, web directory hard to loop through, no mirror tracker
-puppylinux: https://distro.ibiblio.org/puppylinux/ check the ISO files in the folders starting with puppy
-
-done:
+all done:
 almalinux
 alpine
 apache
@ -64,11 +60,13 @@ manjaro
 mxlinux
 mxlinux-iso: this one seems out of sync on the official tracker for 134 days, which is weird
 mysql: http://mirrors.sunsite.dk/mysql/
+NetBSD: http://ftp.netbsd.org/pub/NetBSD/ checking timestamps of change files in different versions, and SHA512, MD5 files in the isos of different versions
 nongnu: http://download.savannah.nongnu.org/releases/ https://savannah.gnu.org/maintenance/Mirmon/ http://download.savannah.gnu.org/mirmon/savannah/
 openbsd
 opensuse: http://download.opensuse.org/ check Update.repo files in folders inside the update folder, not checking tumbleweed-non-oss/ and tumbleweed/ temporarily
 parabola: https://repo.parabola.nu/ https://www.parabola.nu/mirrors/status/
 pkgsrc
+puppylinux: https://distro.ibiblio.org/puppylinux/ check the ISO files or htm files in the folders starting with puppy
 qtproject: https://download.qt.io/
 racket: https://mirror.racket-lang.org/installers/ no public repo, no timestamp, no mirror status tracker make sure that we have the latest version number under racket-installers
 raspberry pi: https://archive.raspberrypi.org/ Checking the timestamp of either the Release file or the Packages file should suffice.
--- a/data.json
+++ b/data.json
@ -7,21 +7,21 @@
 		"file": "almalinux/TIME"
 	},
 	"Alpine": {
-		"out_of_sync_since": 1633923341,
+		"out_of_sync_since": null,
 		"out_of_sync_interval": 86400,
 		"csc": "",
 		"upstream": "https://uk.alpinelinux.org/",
 		"file": "alpine/last-updated"
 	},
 	"Apache": {
-		"out_of_sync_since": null,
+		"out_of_sync_since": 1634453333,
 		"out_of_sync_interval": 86400,
 		"csc": "apache/",
 		"upstream": "https://downloads.apache.org/",
 		"file": "zzz/time.txt"
 	},
 	"Arch": {
-		"out_of_sync_since": 1634433282,
+		"out_of_sync_since": null,
 		"out_of_sync_interval": 86400,
 		"csc": "archlinux/",
 		"upstream": "http://arch.mirror.constant.com/",
@ -73,7 +73,7 @@
 		"file": "project/trace/deb-multimedia.org"
 	},
 	"DebianPorts": {
-		"out_of_sync_since": 1633294718,
+		"out_of_sync_since": null,
 		"out_of_sync_interval": 86400,
 		"csc": "",
 		"upstream": "https://deb.debian.org/",
@ -94,7 +94,7 @@
 		"file": "TIME"
 	},
 	"Fedora": {
-		"out_of_sync_since": null,
+		"out_of_sync_since": 1634453333,
 		"out_of_sync_interval": 86400,
 		"csc": "fedora/",
 		"upstream": "http://fedora.mirror.iweb.com/",
@ -242,7 +242,7 @@
 		"file": "lastsync"
 	},
 	"pkgsrc": {
-		"out_of_sync_since": 1633335556,
+		"out_of_sync_since": null,
 		"out_of_sync_interval": 86400,
 		"csc": "pkgsrc/",
 		"upstream": "http://ftp.netbsd.org/pub/pkgsrc/",
@ -394,5 +394,12 @@
 		"csc": "puppylinux/",
 		"upstream": "https://distro.ibiblio.org/puppylinux/",
 		"file": ""
+	},
+	"netbsd": {
+		"out_of_sync_since": null,
+		"out_of_sync_interval": 86400,
+		"csc": "NetBSD/",
+		"upstream": "http://ftp.netbsd.org/pub/NetBSD/",
+		"file": ""
 	}
 }
--- a/projects/netbsd.py
+++ b/projects/netbsd.py
@ -0,0 +1,91 @@
+from bs4 import BeautifulSoup
+import requests
+import re
+import datefinder  # another date finding library
+from project import Project
+from shared import CSC_MIRROR
+
+class netbsd(Project):
+    """netbsd class"""
+    @staticmethod
+    def checker(directory_URL, file_name):
+        page = requests.get(directory_URL).text
+        file_index = page.find(file_name)
+        # print(page)
+
+        if file_index == -1:
+            return False
+        
+        str_dates = re.findall(r'(\d{2}-\w{3}-\d{4} \d{2}:\d{2})|(\d{4}-\w{3}-\d{2} \d{2}:\d{2})', page[file_index:])
+        
+        # print(directory_URL, file_name)
+        # print(list(datefinder.find_dates("".join(str_dates[0])))[0])
+        return list(datefinder.find_dates("".join(str_dates[0])))[0]
+
+    @classmethod
+    def check_version(cls, site1, site2):
+        # getting the request from url
+        r = requests.get(site1)
+        r1 = requests.get(site2)
+
+        page1 = r.text
+        page2 = r1.text
+
+        # converting the text
+        s1 = BeautifulSoup(page1,"html.parser")
+        s2 = BeautifulSoup(page2,"html.parser")
+
+        hrefs1 = s1.find_all("a")
+        hrefs2 = s2.find_all("a")
+
+        for i in hrefs1: # for a href directories
+            href = i.attrs['href']
+            
+            if re.match(r'NetBSD-\d.*', href):
+                date1 = cls.checker(site1+href, "CHANGES")
+                if not date1: # if the version is empty, ignore it
+                    continue
+                if (href not in [i.attrs['href'] for i in hrefs2]) or (date1 > cls.checker(site2+href, "CHANGES")):
+                    return False
+            elif href.startswith("NetBSD-") and href != "NetBSD-daily/":
+                date1 = cls.checker(site1+href+"src/doc/", "CHANGES")
+                if not date1:
+                    continue
+                if (href not in [i.attrs['href'] for i in hrefs2]) or (date1 > cls.checker(site2+href+"src/doc/", "CHANGES")):
+                    return False
+        return True
+    
+    @classmethod
+    def check_iso(cls, site1, site2):
+        # getting the request from url
+        r = requests.get(site1)
+        r1 = requests.get(site2)
+
+        page1 = r.text
+        page2 = r1.text
+
+        # converting the text
+        s1 = BeautifulSoup(page1,"html.parser")
+        s2 = BeautifulSoup(page2,"html.parser")
+
+        hrefs1 = s1.find_all("a")
+        hrefs2 = s2.find_all("a")
+
+        for i in hrefs1: # for a href directories
+            href = i.attrs['href']
+            
+            if (href not in [i.attrs['href'] for i in hrefs2]) or (cls.checker(site1+href, "SHA512") > cls.checker(site2+href, "SHA512")) or (cls.checker(site1+href, "MD5") > cls.checker(site2+href, "MD5")):
+                return False
+        return True
+                
+
+    @classmethod
+    def check(cls, data, project):
+        """Check if project packages are up-to-date"""
+
+        csc_url = CSC_MIRROR + data[project]["csc"] + data[project]["file"]
+        upstream_url = data[project]["upstream"] + data[project]["file"]
+
+        # print(cls.check_version(upstream_url, csc_url))
+        # print(cls.check_iso(upstream_url+"iso/", csc_url+"iso/"))
+        return cls.check_version(upstream_url, csc_url) and cls.check_iso(upstream_url+"iso/", csc_url+"iso/")
--- a/test.py
+++ b/test.py
@ -7,7 +7,7 @@ from datetime import timedelta
 import time
 import pandas as pd
 import re # for salt stack specifically
-from projects import puppy_linux
+from projects import netbsd
 import json  # import json to read project info stored in json file

 # this function is brute force looping through the whole directory and checking dates
@ -65,7 +65,7 @@ def get_latest_date(web_dir):
 if __name__ =="__main__":
    with open("data.json", "r", encoding="utf-8") as file:
        data = json.load(file)
-        print(puppy_linux.check(data, "puppy_linux"))
+        print(netbsd.check(data, "netbsd"))
   
    """# website to be scrape
    site="https://cdimage.ubuntu.com/releases/"