From 68e13c327e170182de9bf85d738231d5727a9f9d Mon Sep 17 00:00:00 2001 From: Tom Date: Sun, 17 Oct 2021 12:49:06 -0700 Subject: [PATCH] updated ubuntu_ports --- data.json | 15 +++++++------- debian code study.txt | 3 --- distros.json | 10 ---------- main.py | 2 +- projects.json | 20 ------------------- projects/gentooportage.py | 21 ++++++++++++++++++-- projects/ubuntu_ports.py | 41 ++++++++++++++++++++++++++++++++++----- test.py | 4 ++-- 8 files changed, 66 insertions(+), 50 deletions(-) delete mode 100644 debian code study.txt delete mode 100644 distros.json delete mode 100644 projects.json diff --git a/data.json b/data.json index 1657f74..a373a49 100644 --- a/data.json +++ b/data.json @@ -21,7 +21,7 @@ "file": "zzz/time.txt" }, "Arch": { - "out_of_sync_since": null, + "out_of_sync_since": 1634455547, "out_of_sync_interval": 86400, "csc": "archlinux/", "upstream": "http://arch.mirror.constant.com/", @@ -35,7 +35,7 @@ "file": "centos/TIME" }, "Ceph": { - "out_of_sync_since": 1633340186, + "out_of_sync_since": null, "out_of_sync_interval": 86400, "csc": "ceph/", "upstream": "https://download.ceph.com/", @@ -73,14 +73,14 @@ "file": "project/trace/deb-multimedia.org" }, "DebianPorts": { - "out_of_sync_since": null, + "out_of_sync_since": 1634455547, "out_of_sync_interval": 86400, "csc": "", "upstream": "https://deb.debian.org/", "file": "debian-ports/project/trace/porta.debian.org" }, "DebianSecurity": { - "out_of_sync_since": null, + "out_of_sync_since": 1634455547, "out_of_sync_interval": 86400, "csc": "", "upstream": "http://debian.mirror.iweb.ca/", @@ -118,6 +118,7 @@ "out_of_sync_since": null, "out_of_sync_interval": 86400, "csc": "rsync://rsync4.ca.gentoo.org/", + "upstream": "https://mirrorstats.gentoo.org/rsync/", "upstream1": "rsync://rsync1.de.gentoo.org/", "upstream2": "rsync://rsync8.de.gentoo.org/", "file": "gentoo-portage/Manifest" @@ -279,9 +280,9 @@ "ubuntu_ports": { "out_of_sync_since": 1633341982, "out_of_sync_interval": 86400, - "csc": "ubuntu-ports/project/trace/", - "upstream": "http://ports.ubuntu.com/ubuntu-ports/project/trace/", - "file": "anonster.canonical.com" + "csc": "ubuntu-ports/", + "upstream": "http://ports.ubuntu.com/ubuntu-ports/", + "file": "dists/" }, "ubuntu_releases": { "out_of_sync_since": null, diff --git a/debian code study.txt b/debian code study.txt deleted file mode 100644 index 8533b13..0000000 --- a/debian code study.txt +++ /dev/null @@ -1,3 +0,0 @@ -they use this for DBs in the code: - -Psycopg is a PostgreSQL adapter for the Python programming language. This tool allows us to connect the capabilities of the Python language and libraries to obtain, manipulate, input, and update data stored in a PostgreSQL database. \ No newline at end of file diff --git a/distros.json b/distros.json deleted file mode 100644 index e184064..0000000 --- a/distros.json +++ /dev/null @@ -1,10 +0,0 @@ -[ - ["OpenBSD", "https://mirror.csclub.uwaterloo.ca/OpenBSD/", "https://ftp.openbsd.org/pub/OpenBSD/", "timestamp"], - ["kernel", "http://mirror.csclub.uwaterloo.ca/kernel.org/linux/kernel/next/", "https://mirrors.edge.kernel.org/pub/linux/kernel/next/", "sha256sums.asc"], - ["debian", "http://mirror.csclub.uwaterloo.ca/debian/project/trace/", "https://ftp-master.debian.org/debian/project/trace/", "master"], - ["GNU", "http://mirror.csclub.uwaterloo.ca/gnu/", "https://mirrors.kernel.org/gnu/", "mirror-updated-timestamp.txt"], - ["almalinux", "https://mirror.csclub.uwaterloo.ca/almalinux/", "https://repo.almalinux.org/almalinux/", "TIME"], - ["alpine", "https://mirror.csclub.uwaterloo.ca/alpine/", "https://dl-cdn.alpinelinux.org/alpine/", "last-updated"], - ["Apache", "https://mirror.csclub.uwaterloo.ca/apache/zzz/", "https://downloads.apache.org/zzz/", "time.txt"], - ["CentOS", "https://mirror.csclub.uwaterloo.ca/centos/", "https://vault.centos.org/", "timestamp.txt"] -] \ No newline at end of file diff --git a/main.py b/main.py index d92bfa5..3031e47 100644 --- a/main.py +++ b/main.py @@ -67,7 +67,7 @@ if __name__ == "__main__": print(f"Failure: {project} does not exist") continue project_class = getattr(sys.modules[__name__], project) - if project == "CPAN" or project == "ubuntu" or project == "ubuntu_releases" or project == "manjaro" or project == "mxlinux" or project == "mxlinux_iso" or project == "slackware" or project == "trisquel" or project == "cran" or project == "ctan": + if project == "CPAN" or project == "ubuntu" or project == "ubuntu_releases" or project == "manjaro" or project == "mxlinux" or project == "mxlinux_iso" or project == "slackware" or project == "trisquel" or project == "cran" or project == "ctan" or project == "gentooportage": checker_result = project_class.check(data, project, current_time) if checker_result: print(f"Success: {project} up-to-date") diff --git a/projects.json b/projects.json deleted file mode 100644 index d90cbd9..0000000 --- a/projects.json +++ /dev/null @@ -1,20 +0,0 @@ -[ - [ - "OpenBSD", - "https://mirror.csclub.uwaterloo.ca/OpenBSD/", - "https://ftp.openbsd.org/pub/OpenBSD/", - "timestamp" - ], - [ - "kernel", - "http://mirror.csclub.uwaterloo.ca/kernel.org/linux/kernel/next/", - "https://mirrors.edge.kernel.org/pub/linux/kernel/next/", - "sha256sums.asc" - ], - [ - "debian", - "http://mirror.csclub.uwaterloo.ca/debian/project/trace/", - "https://ftp-master.debian.org/debian/project/trace/", - "master" - ] -] diff --git a/projects/gentooportage.py b/projects/gentooportage.py index c562d3e..3e25497 100644 --- a/projects/gentooportage.py +++ b/projects/gentooportage.py @@ -6,13 +6,20 @@ import os from project import Project +import requests +import datefinder # another date finding library +from datetime import timedelta +from datetime import datetime +import re +import pandas as pd + class GentooPortage(Project): """GentooPortage class""" @staticmethod def check(data, project): - rsync_command = "rsync -q {}{} {}" + """rsync_command = "rsync -q {}{} {}" os.system(rsync_command.format(data[project]["csc"], data[project]["file"], "csc_manifest")) @@ -29,4 +36,14 @@ class GentooPortage(Project): os.system("rm csc_manifest") os.system("rm upstream_manifest1") os.system("rm upstream_manifest2") - return 0 in [len(output1), len(output2)] + return 0 in [len(output1), len(output2)]""" + + # i'm changing the above code to the bottom one, since the above one only works in linux + page = requests.get(data[project]["upstream"]).text + indexOfFile = page.find("rsync4.ca.gentoo.org") + + m = re.search(r'(\d+ hour)|(\d+ hours)|(\d+(\.)?\d+ days)', page[indexOfFile:]) + + duration = pd.to_timedelta(m.group(0)) + + return duration <= pd.to_timedelta(data[project]["out_of_sync_interval"], unit='s') diff --git a/projects/ubuntu_ports.py b/projects/ubuntu_ports.py index 2730864..de117f2 100644 --- a/projects/ubuntu_ports.py +++ b/projects/ubuntu_ports.py @@ -1,8 +1,39 @@ -""" -Contains ubuntu_ports class -""" - +from bs4 import BeautifulSoup +import requests from project import Project +from shared import CSC_MIRROR class ubuntu_ports(Project): - """ubuntu_ports class""" \ No newline at end of file + """ubuntu_ports class""" + + @staticmethod + def scrape(site1, site2): + # getting the request from url + r1 = requests.get(site1) + r2 = requests.get(site2) + + # converting the text + s1 = BeautifulSoup(r1.text,"html.parser") + s2 = BeautifulSoup(r2.text,"html.parser") + + hrefs1 = [i.attrs['href'] for i in s1.find_all("a")] + hrefs2 = [i.attrs['href'] for i in s2.find_all("a")] + + for href in hrefs1: # for a href directories + if href.endswith("/") and href != "../" and href != "/" and not href.startswith("/"): + print(href) + if href not in hrefs2: + return False + elif requests.get(site1+href+"Release").text != requests.get(site2+href+"Release").text: + return False + return True + + @classmethod + def check(cls, data, project): + """Check if project packages are up-to-date""" + + csc_url = CSC_MIRROR + data[project]["csc"] + data[project]["file"] + upstream_url = data[project]["upstream"] + data[project]["file"] + + # calling function + return cls.scrape(upstream_url, csc_url) \ No newline at end of file diff --git a/test.py b/test.py index 7c1f9d6..95c4a32 100644 --- a/test.py +++ b/test.py @@ -7,7 +7,7 @@ from datetime import timedelta import time import pandas as pd import re # for salt stack specifically -from projects import netbsd +from projects import ubuntu_ports import json # import json to read project info stored in json file # this function is brute force looping through the whole directory and checking dates @@ -65,7 +65,7 @@ def get_latest_date(web_dir): if __name__ =="__main__": with open("data.json", "r", encoding="utf-8") as file: data = json.load(file) - print(netbsd.check(data, "netbsd")) + print(ubuntu_ports.check(data, "ubuntu_ports")) """# website to be scrape site="https://cdimage.ubuntu.com/releases/"