updated ubuntu_ports
This commit is contained in:
parent
fe7d22e1e5
commit
68e13c327e
15
data.json
15
data.json
|
@ -21,7 +21,7 @@
|
||||||
"file": "zzz/time.txt"
|
"file": "zzz/time.txt"
|
||||||
},
|
},
|
||||||
"Arch": {
|
"Arch": {
|
||||||
"out_of_sync_since": null,
|
"out_of_sync_since": 1634455547,
|
||||||
"out_of_sync_interval": 86400,
|
"out_of_sync_interval": 86400,
|
||||||
"csc": "archlinux/",
|
"csc": "archlinux/",
|
||||||
"upstream": "http://arch.mirror.constant.com/",
|
"upstream": "http://arch.mirror.constant.com/",
|
||||||
|
@ -35,7 +35,7 @@
|
||||||
"file": "centos/TIME"
|
"file": "centos/TIME"
|
||||||
},
|
},
|
||||||
"Ceph": {
|
"Ceph": {
|
||||||
"out_of_sync_since": 1633340186,
|
"out_of_sync_since": null,
|
||||||
"out_of_sync_interval": 86400,
|
"out_of_sync_interval": 86400,
|
||||||
"csc": "ceph/",
|
"csc": "ceph/",
|
||||||
"upstream": "https://download.ceph.com/",
|
"upstream": "https://download.ceph.com/",
|
||||||
|
@ -73,14 +73,14 @@
|
||||||
"file": "project/trace/deb-multimedia.org"
|
"file": "project/trace/deb-multimedia.org"
|
||||||
},
|
},
|
||||||
"DebianPorts": {
|
"DebianPorts": {
|
||||||
"out_of_sync_since": null,
|
"out_of_sync_since": 1634455547,
|
||||||
"out_of_sync_interval": 86400,
|
"out_of_sync_interval": 86400,
|
||||||
"csc": "",
|
"csc": "",
|
||||||
"upstream": "https://deb.debian.org/",
|
"upstream": "https://deb.debian.org/",
|
||||||
"file": "debian-ports/project/trace/porta.debian.org"
|
"file": "debian-ports/project/trace/porta.debian.org"
|
||||||
},
|
},
|
||||||
"DebianSecurity": {
|
"DebianSecurity": {
|
||||||
"out_of_sync_since": null,
|
"out_of_sync_since": 1634455547,
|
||||||
"out_of_sync_interval": 86400,
|
"out_of_sync_interval": 86400,
|
||||||
"csc": "",
|
"csc": "",
|
||||||
"upstream": "http://debian.mirror.iweb.ca/",
|
"upstream": "http://debian.mirror.iweb.ca/",
|
||||||
|
@ -118,6 +118,7 @@
|
||||||
"out_of_sync_since": null,
|
"out_of_sync_since": null,
|
||||||
"out_of_sync_interval": 86400,
|
"out_of_sync_interval": 86400,
|
||||||
"csc": "rsync://rsync4.ca.gentoo.org/",
|
"csc": "rsync://rsync4.ca.gentoo.org/",
|
||||||
|
"upstream": "https://mirrorstats.gentoo.org/rsync/",
|
||||||
"upstream1": "rsync://rsync1.de.gentoo.org/",
|
"upstream1": "rsync://rsync1.de.gentoo.org/",
|
||||||
"upstream2": "rsync://rsync8.de.gentoo.org/",
|
"upstream2": "rsync://rsync8.de.gentoo.org/",
|
||||||
"file": "gentoo-portage/Manifest"
|
"file": "gentoo-portage/Manifest"
|
||||||
|
@ -279,9 +280,9 @@
|
||||||
"ubuntu_ports": {
|
"ubuntu_ports": {
|
||||||
"out_of_sync_since": 1633341982,
|
"out_of_sync_since": 1633341982,
|
||||||
"out_of_sync_interval": 86400,
|
"out_of_sync_interval": 86400,
|
||||||
"csc": "ubuntu-ports/project/trace/",
|
"csc": "ubuntu-ports/",
|
||||||
"upstream": "http://ports.ubuntu.com/ubuntu-ports/project/trace/",
|
"upstream": "http://ports.ubuntu.com/ubuntu-ports/",
|
||||||
"file": "anonster.canonical.com"
|
"file": "dists/"
|
||||||
},
|
},
|
||||||
"ubuntu_releases": {
|
"ubuntu_releases": {
|
||||||
"out_of_sync_since": null,
|
"out_of_sync_since": null,
|
||||||
|
|
|
@ -1,3 +0,0 @@
|
||||||
they use this for DBs in the code:
|
|
||||||
|
|
||||||
Psycopg is a PostgreSQL adapter for the Python programming language. This tool allows us to connect the capabilities of the Python language and libraries to obtain, manipulate, input, and update data stored in a PostgreSQL database.
|
|
10
distros.json
10
distros.json
|
@ -1,10 +0,0 @@
|
||||||
[
|
|
||||||
["OpenBSD", "https://mirror.csclub.uwaterloo.ca/OpenBSD/", "https://ftp.openbsd.org/pub/OpenBSD/", "timestamp"],
|
|
||||||
["kernel", "http://mirror.csclub.uwaterloo.ca/kernel.org/linux/kernel/next/", "https://mirrors.edge.kernel.org/pub/linux/kernel/next/", "sha256sums.asc"],
|
|
||||||
["debian", "http://mirror.csclub.uwaterloo.ca/debian/project/trace/", "https://ftp-master.debian.org/debian/project/trace/", "master"],
|
|
||||||
["GNU", "http://mirror.csclub.uwaterloo.ca/gnu/", "https://mirrors.kernel.org/gnu/", "mirror-updated-timestamp.txt"],
|
|
||||||
["almalinux", "https://mirror.csclub.uwaterloo.ca/almalinux/", "https://repo.almalinux.org/almalinux/", "TIME"],
|
|
||||||
["alpine", "https://mirror.csclub.uwaterloo.ca/alpine/", "https://dl-cdn.alpinelinux.org/alpine/", "last-updated"],
|
|
||||||
["Apache", "https://mirror.csclub.uwaterloo.ca/apache/zzz/", "https://downloads.apache.org/zzz/", "time.txt"],
|
|
||||||
["CentOS", "https://mirror.csclub.uwaterloo.ca/centos/", "https://vault.centos.org/", "timestamp.txt"]
|
|
||||||
]
|
|
2
main.py
2
main.py
|
@ -67,7 +67,7 @@ if __name__ == "__main__":
|
||||||
print(f"Failure: {project} does not exist")
|
print(f"Failure: {project} does not exist")
|
||||||
continue
|
continue
|
||||||
project_class = getattr(sys.modules[__name__], project)
|
project_class = getattr(sys.modules[__name__], project)
|
||||||
if project == "CPAN" or project == "ubuntu" or project == "ubuntu_releases" or project == "manjaro" or project == "mxlinux" or project == "mxlinux_iso" or project == "slackware" or project == "trisquel" or project == "cran" or project == "ctan":
|
if project == "CPAN" or project == "ubuntu" or project == "ubuntu_releases" or project == "manjaro" or project == "mxlinux" or project == "mxlinux_iso" or project == "slackware" or project == "trisquel" or project == "cran" or project == "ctan" or project == "gentooportage":
|
||||||
checker_result = project_class.check(data, project, current_time)
|
checker_result = project_class.check(data, project, current_time)
|
||||||
if checker_result:
|
if checker_result:
|
||||||
print(f"Success: {project} up-to-date")
|
print(f"Success: {project} up-to-date")
|
||||||
|
|
|
@ -1,20 +0,0 @@
|
||||||
[
|
|
||||||
[
|
|
||||||
"OpenBSD",
|
|
||||||
"https://mirror.csclub.uwaterloo.ca/OpenBSD/",
|
|
||||||
"https://ftp.openbsd.org/pub/OpenBSD/",
|
|
||||||
"timestamp"
|
|
||||||
],
|
|
||||||
[
|
|
||||||
"kernel",
|
|
||||||
"http://mirror.csclub.uwaterloo.ca/kernel.org/linux/kernel/next/",
|
|
||||||
"https://mirrors.edge.kernel.org/pub/linux/kernel/next/",
|
|
||||||
"sha256sums.asc"
|
|
||||||
],
|
|
||||||
[
|
|
||||||
"debian",
|
|
||||||
"http://mirror.csclub.uwaterloo.ca/debian/project/trace/",
|
|
||||||
"https://ftp-master.debian.org/debian/project/trace/",
|
|
||||||
"master"
|
|
||||||
]
|
|
||||||
]
|
|
|
@ -6,13 +6,20 @@ import os
|
||||||
|
|
||||||
from project import Project
|
from project import Project
|
||||||
|
|
||||||
|
import requests
|
||||||
|
import datefinder # another date finding library
|
||||||
|
from datetime import timedelta
|
||||||
|
from datetime import datetime
|
||||||
|
import re
|
||||||
|
import pandas as pd
|
||||||
|
|
||||||
|
|
||||||
class GentooPortage(Project):
|
class GentooPortage(Project):
|
||||||
"""GentooPortage class"""
|
"""GentooPortage class"""
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def check(data, project):
|
def check(data, project):
|
||||||
rsync_command = "rsync -q {}{} {}"
|
"""rsync_command = "rsync -q {}{} {}"
|
||||||
os.system(rsync_command.format(data[project]["csc"],
|
os.system(rsync_command.format(data[project]["csc"],
|
||||||
data[project]["file"],
|
data[project]["file"],
|
||||||
"csc_manifest"))
|
"csc_manifest"))
|
||||||
|
@ -29,4 +36,14 @@ class GentooPortage(Project):
|
||||||
os.system("rm csc_manifest")
|
os.system("rm csc_manifest")
|
||||||
os.system("rm upstream_manifest1")
|
os.system("rm upstream_manifest1")
|
||||||
os.system("rm upstream_manifest2")
|
os.system("rm upstream_manifest2")
|
||||||
return 0 in [len(output1), len(output2)]
|
return 0 in [len(output1), len(output2)]"""
|
||||||
|
|
||||||
|
# i'm changing the above code to the bottom one, since the above one only works in linux
|
||||||
|
page = requests.get(data[project]["upstream"]).text
|
||||||
|
indexOfFile = page.find("rsync4.ca.gentoo.org")
|
||||||
|
|
||||||
|
m = re.search(r'(\d+ hour)|(\d+ hours)|(\d+(\.)?\d+ days)', page[indexOfFile:])
|
||||||
|
|
||||||
|
duration = pd.to_timedelta(m.group(0))
|
||||||
|
|
||||||
|
return duration <= pd.to_timedelta(data[project]["out_of_sync_interval"], unit='s')
|
||||||
|
|
|
@ -1,8 +1,39 @@
|
||||||
"""
|
from bs4 import BeautifulSoup
|
||||||
Contains ubuntu_ports class
|
import requests
|
||||||
"""
|
|
||||||
|
|
||||||
from project import Project
|
from project import Project
|
||||||
|
from shared import CSC_MIRROR
|
||||||
|
|
||||||
class ubuntu_ports(Project):
|
class ubuntu_ports(Project):
|
||||||
"""ubuntu_ports class"""
|
"""ubuntu_ports class"""
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def scrape(site1, site2):
|
||||||
|
# getting the request from url
|
||||||
|
r1 = requests.get(site1)
|
||||||
|
r2 = requests.get(site2)
|
||||||
|
|
||||||
|
# converting the text
|
||||||
|
s1 = BeautifulSoup(r1.text,"html.parser")
|
||||||
|
s2 = BeautifulSoup(r2.text,"html.parser")
|
||||||
|
|
||||||
|
hrefs1 = [i.attrs['href'] for i in s1.find_all("a")]
|
||||||
|
hrefs2 = [i.attrs['href'] for i in s2.find_all("a")]
|
||||||
|
|
||||||
|
for href in hrefs1: # for a href directories
|
||||||
|
if href.endswith("/") and href != "../" and href != "/" and not href.startswith("/"):
|
||||||
|
print(href)
|
||||||
|
if href not in hrefs2:
|
||||||
|
return False
|
||||||
|
elif requests.get(site1+href+"Release").text != requests.get(site2+href+"Release").text:
|
||||||
|
return False
|
||||||
|
return True
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def check(cls, data, project):
|
||||||
|
"""Check if project packages are up-to-date"""
|
||||||
|
|
||||||
|
csc_url = CSC_MIRROR + data[project]["csc"] + data[project]["file"]
|
||||||
|
upstream_url = data[project]["upstream"] + data[project]["file"]
|
||||||
|
|
||||||
|
# calling function
|
||||||
|
return cls.scrape(upstream_url, csc_url)
|
4
test.py
4
test.py
|
@ -7,7 +7,7 @@ from datetime import timedelta
|
||||||
import time
|
import time
|
||||||
import pandas as pd
|
import pandas as pd
|
||||||
import re # for salt stack specifically
|
import re # for salt stack specifically
|
||||||
from projects import netbsd
|
from projects import ubuntu_ports
|
||||||
import json # import json to read project info stored in json file
|
import json # import json to read project info stored in json file
|
||||||
|
|
||||||
# this function is brute force looping through the whole directory and checking dates
|
# this function is brute force looping through the whole directory and checking dates
|
||||||
|
@ -65,7 +65,7 @@ def get_latest_date(web_dir):
|
||||||
if __name__ =="__main__":
|
if __name__ =="__main__":
|
||||||
with open("data.json", "r", encoding="utf-8") as file:
|
with open("data.json", "r", encoding="utf-8") as file:
|
||||||
data = json.load(file)
|
data = json.load(file)
|
||||||
print(netbsd.check(data, "netbsd"))
|
print(ubuntu_ports.check(data, "ubuntu_ports"))
|
||||||
|
|
||||||
"""# website to be scrape
|
"""# website to be scrape
|
||||||
site="https://cdimage.ubuntu.com/releases/"
|
site="https://cdimage.ubuntu.com/releases/"
|
||||||
|
|
Loading…
Reference in New Issue