Compare commits
3 Commits
master
...
jtoft-rt-4
Author | SHA1 | Date |
---|---|---|
Justin Toft | 365d5dcbac | |
Justin Toft | 9cecd76309 | |
Justin Toft | 7e77546c3e |
12
data.json
12
data.json
|
@ -174,7 +174,7 @@
|
||||||
"out_of_sync_interval": 172800
|
"out_of_sync_interval": 172800
|
||||||
},
|
},
|
||||||
"KDE": {
|
"KDE": {
|
||||||
"out_of_sync_since": 1659116720,
|
"out_of_sync_since": null,
|
||||||
"out_of_sync_interval": 86400,
|
"out_of_sync_interval": 86400,
|
||||||
"csc": "kde/",
|
"csc": "kde/",
|
||||||
"upstream": "https://kde.c3sl.ufpr.br/",
|
"upstream": "https://kde.c3sl.ufpr.br/",
|
||||||
|
@ -209,7 +209,7 @@
|
||||||
"file": "dists/"
|
"file": "dists/"
|
||||||
},
|
},
|
||||||
"macports": {
|
"macports": {
|
||||||
"out_of_sync_since": 1642827723,
|
"out_of_sync_since": null,
|
||||||
"out_of_sync_interval": 86400,
|
"out_of_sync_interval": 86400,
|
||||||
"csc": "MacPorts/mpdistfiles/",
|
"csc": "MacPorts/mpdistfiles/",
|
||||||
"upstream": "https://distfiles.macports.org/",
|
"upstream": "https://distfiles.macports.org/",
|
||||||
|
@ -321,7 +321,7 @@
|
||||||
"file": "dists/"
|
"file": "dists/"
|
||||||
},
|
},
|
||||||
"raspbian": {
|
"raspbian": {
|
||||||
"out_of_sync_since": 1659116721,
|
"out_of_sync_since": null,
|
||||||
"out_of_sync_interval": 86400,
|
"out_of_sync_interval": 86400,
|
||||||
"csc": "raspbian/",
|
"csc": "raspbian/",
|
||||||
"upstream": "http://archive.raspbian.org/",
|
"upstream": "http://archive.raspbian.org/",
|
||||||
|
@ -374,10 +374,10 @@
|
||||||
"file": ""
|
"file": ""
|
||||||
},
|
},
|
||||||
"ubuntu_ports": {
|
"ubuntu_ports": {
|
||||||
"out_of_sync_since": 1651550528,
|
"out_of_sync_since": null,
|
||||||
"out_of_sync_interval": 86400,
|
"out_of_sync_interval": 86400,
|
||||||
"csc": "ubuntu-ports/",
|
"csc": "ubuntu-ports/",
|
||||||
"upstream": "http://ports.ubuntu.com/ubuntu-ports/",
|
"upstream": "https://launchpad.net/ubuntu/+mirror/mirror.csclub.uwaterloo.ca-archive",
|
||||||
"file": "dists/"
|
"file": "dists/"
|
||||||
},
|
},
|
||||||
"ubuntu_ports_releases": {
|
"ubuntu_ports_releases": {
|
||||||
|
@ -422,4 +422,4 @@
|
||||||
"upstream": "https://cdimage.ubuntu.com/xubuntu/releases/",
|
"upstream": "https://cdimage.ubuntu.com/xubuntu/releases/",
|
||||||
"file": ""
|
"file": ""
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -5,6 +5,7 @@ from project import Project
|
||||||
from shared import CSC_MIRROR
|
from shared import CSC_MIRROR
|
||||||
|
|
||||||
import datefinder # another date finding library
|
import datefinder # another date finding library
|
||||||
|
from datetime import timedelta
|
||||||
|
|
||||||
class macports(Project):
|
class macports(Project):
|
||||||
"""macports class"""
|
"""macports class"""
|
||||||
|
@ -13,18 +14,20 @@ class macports(Project):
|
||||||
def checker(directory_URL, file_name):
|
def checker(directory_URL, file_name):
|
||||||
page = requests.get(directory_URL).text
|
page = requests.get(directory_URL).text
|
||||||
file_index = page.find(file_name)
|
file_index = page.find(file_name)
|
||||||
# print(page)
|
end_index = page[file_index:].find("</tr>") + file_index
|
||||||
|
|
||||||
|
# The CSC mirror does not use tr tags, so end_index will be set to the end of the file
|
||||||
|
if end_index == (file_index - 1):
|
||||||
|
end_index = len(page) - 1
|
||||||
|
|
||||||
# remove stray numbers (file size numbers in particular) that might interfere with date finding
|
# remove stray numbers (file size numbers in particular) that might interfere with date finding
|
||||||
segment_clean = re.sub(r'\s\d+\s', ' ', page[file_index:]) # removes numbers for size
|
segment_clean = re.sub(r'\s\d+\s', ' ', page[file_index:end_index]) # removes numbers for size
|
||||||
segment_clean = re.sub(r'\s\d+\w*\s', ' ', page[file_index:]) # removes numbers + size unit. e.x. 50kb
|
segment_clean = re.sub(r'\s\d+\w*\s', ' ', page[file_index:end_index]) # removes numbers + size unit. e.x. 50kb
|
||||||
# print(segment_clean)
|
|
||||||
|
|
||||||
# finds the dates in the segment after the file name
|
# finds the dates in the segment after the file name
|
||||||
# notes: a generator will be returned by the datefinder module. I'm typecasting it to a list. Please read the note of caution provided at the bottom.
|
# notes: a generator will be returned by the datefinder module. I'm typecasting it to a list. Please read the note of caution provided at the bottom.
|
||||||
matches = list(datefinder.find_dates(segment_clean))
|
matches = list(datefinder.find_dates(segment_clean))
|
||||||
|
|
||||||
# print(matches[0])
|
|
||||||
return matches[0]
|
return matches[0]
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
|
@ -34,4 +37,14 @@ class macports(Project):
|
||||||
upstream_url = data[project]["upstream"]
|
upstream_url = data[project]["upstream"]
|
||||||
file_name = data[project]["file"]
|
file_name = data[project]["file"]
|
||||||
|
|
||||||
return cls.checker(csc_url, file_name) == cls.checker(upstream_url, file_name)
|
upstreamDate = cls.checker(csc_url, file_name)
|
||||||
|
downstreamDate = cls.checker(upstream_url, file_name)
|
||||||
|
|
||||||
|
if (upstreamDate < downstreamDate):
|
||||||
|
timeDiff = downstreamDate - upstreamDate
|
||||||
|
else:
|
||||||
|
timeDiff = upstreamDate - downstreamDate
|
||||||
|
|
||||||
|
# MacPorts are updated so often that we want to make sure we are
|
||||||
|
# at most 6 hours out of date
|
||||||
|
return timeDiff < timedelta(hours=6)
|
||||||
|
|
|
@ -1,39 +1,16 @@
|
||||||
from bs4 import BeautifulSoup
|
"""
|
||||||
import requests
|
Contains ubuntu class
|
||||||
|
"""
|
||||||
|
|
||||||
|
import os
|
||||||
from project import Project
|
from project import Project
|
||||||
from shared import CSC_MIRROR
|
from shared import CSC_MIRROR
|
||||||
|
from shared import NUM_UBUNTU_RELEASES
|
||||||
|
import requests
|
||||||
|
|
||||||
class ubuntu_ports(Project):
|
class ubuntu_ports(Project):
|
||||||
"""ubuntu_ports class"""
|
"""ubuntu_ports class"""
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def scrape(site1, site2):
|
def check(data, project, current_time):
|
||||||
# getting the request from url
|
page = requests.get(data[project]["upstream"]).text
|
||||||
r1 = requests.get(site1)
|
return page.count("Up to date") == NUM_UBUNTU_RELEASES
|
||||||
r2 = requests.get(site2)
|
|
||||||
|
|
||||||
# converting the text
|
|
||||||
s1 = BeautifulSoup(r1.text,"html.parser")
|
|
||||||
s2 = BeautifulSoup(r2.text,"html.parser")
|
|
||||||
|
|
||||||
hrefs1 = [i.attrs['href'] for i in s1.find_all("a")]
|
|
||||||
hrefs2 = [i.attrs['href'] for i in s2.find_all("a")]
|
|
||||||
|
|
||||||
for href in hrefs1: # for a href directories
|
|
||||||
if href.endswith("/") and href != "../" and href != "/" and not href.startswith("/"):
|
|
||||||
# print(href)
|
|
||||||
if href not in hrefs2:
|
|
||||||
return False
|
|
||||||
elif requests.get(site1+href+"Release").text != requests.get(site2+href+"Release").text:
|
|
||||||
return False
|
|
||||||
return True
|
|
||||||
|
|
||||||
@classmethod
|
|
||||||
def check(cls, data, project, current_time):
|
|
||||||
"""Check if project packages are up-to-date"""
|
|
||||||
|
|
||||||
csc_url = CSC_MIRROR + data[project]["csc"] + data[project]["file"]
|
|
||||||
upstream_url = data[project]["upstream"] + data[project]["file"]
|
|
||||||
|
|
||||||
# calling function
|
|
||||||
return cls.scrape(upstream_url, csc_url)
|
|
||||||
|
|
Loading…
Reference in New Issue