forked from public/mirror-checker
added opensuse
This commit is contained in:
parent
709aa04cb8
commit
e3a4d18b36
|
@ -22,7 +22,6 @@ to find repos of the mirrored projects to check, just search "projectName mirror
|
||||||
|
|
||||||
not done:
|
not done:
|
||||||
NetBSD: http://ftp.netbsd.org/pub/NetBSD/ has public repo, no timestamp, web directory hard to loop through, no mirror tracker
|
NetBSD: http://ftp.netbsd.org/pub/NetBSD/ has public repo, no timestamp, web directory hard to loop through, no mirror tracker
|
||||||
opensuse: http://download.opensuse.org/ check Update.repo files in folders inside the update folder
|
|
||||||
puppylinux: https://distro.ibiblio.org/puppylinux/ check the ISO files in the folders starting with puppy
|
puppylinux: https://distro.ibiblio.org/puppylinux/ check the ISO files in the folders starting with puppy
|
||||||
|
|
||||||
done:
|
done:
|
||||||
|
@ -67,6 +66,7 @@ mxlinux-iso: this one seems out of sync on the official tracker for 134 days, wh
|
||||||
mysql: http://mirrors.sunsite.dk/mysql/
|
mysql: http://mirrors.sunsite.dk/mysql/
|
||||||
nongnu: http://download.savannah.nongnu.org/releases/ https://savannah.gnu.org/maintenance/Mirmon/ http://download.savannah.gnu.org/mirmon/savannah/
|
nongnu: http://download.savannah.nongnu.org/releases/ https://savannah.gnu.org/maintenance/Mirmon/ http://download.savannah.gnu.org/mirmon/savannah/
|
||||||
openbsd
|
openbsd
|
||||||
|
opensuse: http://download.opensuse.org/ check Update.repo files in folders inside the update folder, not checking tumbleweed-non-oss/ and tumbleweed/ temporarily
|
||||||
parabola: https://repo.parabola.nu/ https://www.parabola.nu/mirrors/status/
|
parabola: https://repo.parabola.nu/ https://www.parabola.nu/mirrors/status/
|
||||||
pkgsrc
|
pkgsrc
|
||||||
qtproject: https://download.qt.io/
|
qtproject: https://download.qt.io/
|
||||||
|
@ -84,6 +84,6 @@ ubuntu-ports: http://ports.ubuntu.com/ubuntu-ports/ checks the file anonster.can
|
||||||
ubuntu-ports-releases: https://cdimage.ubuntu.com/releases/ has public repo, no timestamp, no status tracker, brute force looped it
|
ubuntu-ports-releases: https://cdimage.ubuntu.com/releases/ has public repo, no timestamp, no status tracker, brute force looped it
|
||||||
ubuntu-releases: https://releases.ubuntu.com/
|
ubuntu-releases: https://releases.ubuntu.com/
|
||||||
vlc: http://download.videolan.org/pub/videolan/
|
vlc: http://download.videolan.org/pub/videolan/
|
||||||
x.org: https://www.x.org/releases/ check all of the files under each directory under /x.org/individual/, and make sure that we have all of the files which the upstream has
|
x.org: https://www.x.org/releases/ check all of the files under each directory under /x.org/individual/, and make sure that we have all of the files which the upstream has, ignoring the xcb folder
|
||||||
Xiph: https://ftp.osuosl.org/pub/xiph/releases/ loop through each directory in xiph/releases/ and trying to compare the timestamp of the checksum files
|
Xiph: https://ftp.osuosl.org/pub/xiph/releases/ loop through each directory in xiph/releases/ and trying to compare the timestamp of the checksum files
|
||||||
xubuntu-releases: https://cdimage.ubuntu.com/xubuntu/releases/ candidate for brute force looping since it has few folders
|
xubuntu-releases: https://cdimage.ubuntu.com/xubuntu/releases/ candidate for brute force looping since it has few folders
|
13
data.json
13
data.json
|
@ -14,14 +14,14 @@
|
||||||
"file": "alpine/last-updated"
|
"file": "alpine/last-updated"
|
||||||
},
|
},
|
||||||
"Apache": {
|
"Apache": {
|
||||||
"out_of_sync_since": 1633294718,
|
"out_of_sync_since": null,
|
||||||
"out_of_sync_interval": 86400,
|
"out_of_sync_interval": 86400,
|
||||||
"csc": "apache/",
|
"csc": "apache/",
|
||||||
"upstream": "https://downloads.apache.org/",
|
"upstream": "https://downloads.apache.org/",
|
||||||
"file": "zzz/time.txt"
|
"file": "zzz/time.txt"
|
||||||
},
|
},
|
||||||
"Arch": {
|
"Arch": {
|
||||||
"out_of_sync_since": null,
|
"out_of_sync_since": 1634433282,
|
||||||
"out_of_sync_interval": 86400,
|
"out_of_sync_interval": 86400,
|
||||||
"csc": "archlinux/",
|
"csc": "archlinux/",
|
||||||
"upstream": "http://arch.mirror.constant.com/",
|
"upstream": "http://arch.mirror.constant.com/",
|
||||||
|
@ -94,7 +94,7 @@
|
||||||
"file": "TIME"
|
"file": "TIME"
|
||||||
},
|
},
|
||||||
"Fedora": {
|
"Fedora": {
|
||||||
"out_of_sync_since": 1633923341,
|
"out_of_sync_since": null,
|
||||||
"out_of_sync_interval": 86400,
|
"out_of_sync_interval": 86400,
|
||||||
"csc": "fedora/",
|
"csc": "fedora/",
|
||||||
"upstream": "http://fedora.mirror.iweb.com/",
|
"upstream": "http://fedora.mirror.iweb.com/",
|
||||||
|
@ -380,5 +380,12 @@
|
||||||
"csc": "x.org/individual/",
|
"csc": "x.org/individual/",
|
||||||
"upstream": "https://www.x.org/releases/individual/",
|
"upstream": "https://www.x.org/releases/individual/",
|
||||||
"file": ""
|
"file": ""
|
||||||
|
},
|
||||||
|
"opensuse": {
|
||||||
|
"out_of_sync_since": null,
|
||||||
|
"out_of_sync_interval": 86400,
|
||||||
|
"csc": "opensuse/update/",
|
||||||
|
"upstream": "http://download.opensuse.org/update/",
|
||||||
|
"file": ""
|
||||||
}
|
}
|
||||||
}
|
}
|
|
@ -1,48 +1,66 @@
|
||||||
from bs4 import BeautifulSoup
|
from bs4 import BeautifulSoup
|
||||||
import requests
|
import requests
|
||||||
|
import re
|
||||||
|
import datefinder # another date finding library
|
||||||
from project import Project
|
from project import Project
|
||||||
from shared import CSC_MIRROR
|
from shared import CSC_MIRROR
|
||||||
|
|
||||||
class opensuse(Project):
|
class opensuse(Project):
|
||||||
"""opensuse class"""
|
"""opensuse class"""
|
||||||
|
@staticmethod
|
||||||
|
def checker(directory_URL, file_name):
|
||||||
|
page = requests.get(directory_URL).text
|
||||||
|
file_index = page.find(file_name)
|
||||||
|
# print(page)
|
||||||
|
|
||||||
|
if file_index == -1:
|
||||||
|
return False
|
||||||
|
|
||||||
|
str_dates = re.findall(r'(\d{2}-\w{3}-\d{4} \d{2}:\d{2})|(\d{4}-\d{2}-\d{2} \d{2}:\d{2})', page[file_index:])
|
||||||
|
|
||||||
|
return list(datefinder.find_dates("".join(str_dates[0])))[0]
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def scrape(cls, files, site):
|
def scrape(cls, compare, folders, site1, site2, directory):
|
||||||
|
if cls.checker(site1+directory, ".repo") != False:
|
||||||
|
# print (site1+directory)
|
||||||
|
# print (cls.checker(site1+directory, ".repo"))
|
||||||
|
if cls.checker(site2+directory, ".repo") != False:
|
||||||
|
# print (site2+directory)
|
||||||
|
# print (cls.checker(site2+directory, ".repo"))
|
||||||
|
compare.append(cls.checker(site1+directory, ".repo") <= cls.checker(site2+directory, ".repo"))
|
||||||
|
return
|
||||||
|
compare.append(False)
|
||||||
|
return
|
||||||
|
|
||||||
# getting the request from url
|
# getting the request from url
|
||||||
r = requests.get(site)
|
r = requests.get(site1 + directory)
|
||||||
|
|
||||||
# converting the text
|
# converting the text
|
||||||
s = BeautifulSoup(r.text,"html.parser")
|
s = BeautifulSoup(r.text,"html.parser")
|
||||||
|
|
||||||
for i in s.find_all("a"): # for a href directories
|
for i in s.find_all("a"): # for a href directories
|
||||||
href = i.attrs['href']
|
href = i.attrs['href']
|
||||||
|
|
||||||
if href.endswith("/") and href != "../" and href != "/" and not href.startswith("/"):
|
if href.endswith("/") and href != "../" and href != "/" and not href.startswith("/") and href != "tumbleweed-non-oss/" and href != "tumbleweed/":
|
||||||
site_next = site+href
|
dir_next = directory+href
|
||||||
|
# print(dir_next)
|
||||||
|
# calling it self
|
||||||
|
if dir_next not in folders:
|
||||||
|
folders.append(dir_next)
|
||||||
|
cls.scrape(compare, folders, site1, site2, dir_next)
|
||||||
|
|
||||||
if site_next not in files:
|
|
||||||
files.append(href)
|
|
||||||
# print(href)
|
|
||||||
# calling it self
|
|
||||||
cls.scrape(files, site_next)
|
|
||||||
elif href != "../" and href != "/" and not href.startswith("/") and href != "?C=N;O=D" and href != "?C=M;O=A" and href != "?C=S;O=A" and href != "?C=D;O=A":
|
|
||||||
# print(href)
|
|
||||||
files.append(href)
|
|
||||||
|
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def check(cls, data, project):
|
def check(cls, data, project):
|
||||||
"""Check if project packages are up-to-date"""
|
"""Check if project packages are up-to-date"""
|
||||||
# lists
|
# lists
|
||||||
files1=[]
|
compare=[]
|
||||||
files2=[]
|
folders=[]
|
||||||
|
|
||||||
csc_url = CSC_MIRROR + data[project]["csc"] + data[project]["file"]
|
csc_url = CSC_MIRROR + data[project]["csc"] + data[project]["file"]
|
||||||
upstream_url = data[project]["upstream"] + data[project]["file"]
|
upstream_url = data[project]["upstream"] + data[project]["file"]
|
||||||
|
|
||||||
# calling function
|
# calling function
|
||||||
cls.scrape(files1, csc_url)
|
cls.scrape(compare, folders, upstream_url, csc_url, "")
|
||||||
cls.scrape(files2, upstream_url)
|
|
||||||
|
|
||||||
# print(set(files2) - set(files1))
|
return all(compare)
|
||||||
|
|
||||||
return set(files1) == set(files2)
|
|
4
test.py
4
test.py
|
@ -7,7 +7,7 @@ from datetime import timedelta
|
||||||
import time
|
import time
|
||||||
import pandas as pd
|
import pandas as pd
|
||||||
import re # for salt stack specifically
|
import re # for salt stack specifically
|
||||||
from projects import x_org
|
from projects import opensuse
|
||||||
import json # import json to read project info stored in json file
|
import json # import json to read project info stored in json file
|
||||||
|
|
||||||
# this function is brute force looping through the whole directory and checking dates
|
# this function is brute force looping through the whole directory and checking dates
|
||||||
|
@ -65,7 +65,7 @@ def get_latest_date(web_dir):
|
||||||
if __name__ =="__main__":
|
if __name__ =="__main__":
|
||||||
with open("data.json", "r", encoding="utf-8") as file:
|
with open("data.json", "r", encoding="utf-8") as file:
|
||||||
data = json.load(file)
|
data = json.load(file)
|
||||||
print(x_org.check(data, "x_org"))
|
print(opensuse.check(data, "opensuse"))
|
||||||
|
|
||||||
"""# website to be scrape
|
"""# website to be scrape
|
||||||
site="https://cdimage.ubuntu.com/releases/"
|
site="https://cdimage.ubuntu.com/releases/"
|
||||||
|
|
Loading…
Reference in New Issue