fixed conflict

This commit is contained in:
Tom 2021-10-03 15:34:07 -07:00
commit c05702bd2c
61 changed files with 449 additions and 390 deletions

1
.gitignore vendored
View File

@ -139,3 +139,4 @@ cython_debug/
# Idea
.idea/
*~

View File

@ -4,7 +4,7 @@ This mirror status checker determines whether CSC mirror is up-to-date with upst
## How To Run
A configuration file may be provided through standard input. Without a configuration file, execute `python main.py`. By default, all of the available distributions will be checked. With a configuration file, execute `python main.py < name_of_config_file.in`, for example, `python main.py < example.in`. In this case, only the distributions listed in the configuration file will be checked.
A configuration file may be provided through standard input. Without a configuration file, execute `python main.py`. By default, all the available distributions will be checked. With a configuration file, execute `python main.py < name_of_config_file.in`, for example, `python main.py < example.in`. In this case, only the distributions listed in the configuration file will be checked.
## Resources
@ -16,7 +16,7 @@ if we can just view their repo online, we only have to remember the link for the
even if the date relies on a specific file in their repo, we can still find the right link for it
to find repos of the distros to check, just search "distroName mirrors"
to find repos of the mirrored projects to check, just search "projectName mirrors"
ubuntu releases: https://releases.ubuntu.com/?_ga=2.251394307.425973732.1629764407-1388600952.1629764407
ubuntu mirrors: http://mirrors.ubuntu.com/mirrors.txt

View File

@ -1,8 +0,0 @@
"""
Contains AlmaLinux class
"""
from distro import Distro
class AlmaLinux(Distro):
"""AlmaLinux class"""

View File

@ -1,8 +0,0 @@
"""
Contains Alpine class
"""
from distro import Distro
class Alpine(Distro):
"""Alpine class"""

View File

@ -1,8 +0,0 @@
"""
Contains Apache class
"""
from distro import Distro
class Apache(Distro):
"""Apache class"""

View File

@ -1,8 +0,0 @@
"""
Contains Arch class
"""
from distro import Distro
class Arch(Distro):
"""Arch class"""

View File

@ -1,8 +0,0 @@
"""
Contains CentOS class
"""
from distro import Distro
class CentOS(Distro):
"""CentOS class"""

View File

@ -1,8 +0,0 @@
"""
Contains Ceph class
"""
from distro import Distro
class Ceph(Distro):
"""Ceph class"""

17
cpan.py
View File

@ -1,17 +0,0 @@
"""
Contains CPAN class
"""
import requests
from distro import Distro
from shared import CSC_MIRROR
class CPAN(Distro):
"""CPAN class"""
@staticmethod
def check(data, distro, current_time):
res_json = requests.get("http://mirrors.cpan.org/cpan-json.txt").json()
for mirror in res_json:
if mirror["url"] == f"{CSC_MIRROR}CPAN/":
return current_time - int(mirror["age"]) <= data[distro]["out_of_sync_interval"]
return False

View File

@ -1,8 +0,0 @@
"""
Contains Cygwin class
"""
from distro import Distro
class Cygwin(Distro):
"""Cygwin class"""

View File

@ -1,8 +0,0 @@
"""
Contains Debian class
"""
from distro import Distro
class Debian(Distro):
"""Debian class"""

View File

@ -1,8 +0,0 @@
"""
Contains DebianCD class
"""
from distro import Distro
class DebianCD(Distro):
"""DebianCD class"""

View File

@ -1,8 +0,0 @@
"""
Contains DebianMultimedia class
"""
from distro import Distro
class DebianMultimedia(Distro):
"""DebianMultimedia class"""

View File

@ -1,8 +0,0 @@
"""
Contains DebianPorts class
"""
from distro import Distro
class DebianPorts(Distro):
"""DebianPorts class"""

View File

@ -1,8 +0,0 @@
"""
Contains DebianSecurity class
"""
from distro import Distro
class DebianSecurity(Distro):
"""DebianSecurity class"""

View File

@ -1,16 +0,0 @@
"""
Contains abstract class for a distro
"""
from abc import ABC
import requests
from shared import CSC_MIRROR
class Distro(ABC):
"""Abstract class for a distro"""
@staticmethod
def check(data, distro):
"""Check if distro packages are up-to-date"""
csc_url = CSC_MIRROR + data[distro]["csc"] + data[distro]["file"]
upstream_url = data[distro]["upstream"] + data[distro]["file"]
return requests.get(csc_url).text == requests.get(upstream_url).text

View File

@ -1,8 +0,0 @@
"""
Contains Eclipse class
"""
from distro import Distro
class Eclipse(Distro):
"""Eclipse class"""

View File

@ -1,8 +0,0 @@
"""
Contains Fedora class
"""
from distro import Distro
class Fedora(Distro):
"""Fedora class"""

View File

@ -1,8 +0,0 @@
"""
Contains FreeBSD class
"""
from distro import Distro
class FreeBSD(Distro):
"""FreeBSD class"""

View File

@ -1,8 +0,0 @@
"""
Contains GentooDistfiles class
"""
from distro import Distro
class GentooDistfiles(Distro):
"""GentooDistfiles class"""

View File

@ -1,29 +0,0 @@
"""
Contains GentooPortage class
"""
import os
from distro import Distro
class GentooPortage(Distro):
"""GentooPortage class"""
@staticmethod
def check(data, distro):
rsync_command = "rsync -q {}{} {}"
os.system(rsync_command.format(data[distro]["csc"],
data[distro]["file"],
"csc_manifest"))
os.system(rsync_command.format(data[distro]["upstream1"],
data[distro]["file"],
"upstream_manifest1"))
os.system(rsync_command.format(data[distro]["upstream2"],
data[distro]["file"],
"upstream_manifest2"))
stream1 = os.popen("diff csc_manifest upstream_manifest1")
output1 = stream1.read()
stream2 = os.popen("diff csc_manifest upstream_manifest2")
output2 = stream2.read()
os.system("rm csc_manifest")
os.system("rm upstream_manifest1")
os.system("rm upstream_manifest2")
return 0 in [len(output1), len(output2)]

View File

@ -1,40 +0,0 @@
"""
Contains GNOME class
"""
import re
import requests
from distro import Distro
from shared import CSC_MIRROR
class GNOME(Distro):
"""GNOME class"""
@staticmethod
def check(data, distro):
file = data[distro]["file1"]
csc_versions = requests.get(CSC_MIRROR + data[distro]["csc"] + file).text
upstream_versions = requests.get(data[distro]["upstream1"] + file).text
csc_latest = re.findall(r"\"\d+\.?\d*", csc_versions)[-1].lstrip('"')
upstream_latest = re.findall(r"\"\d+\.?\d*", upstream_versions)[-1].lstrip('"')
if csc_latest != upstream_latest:
return False
file += csc_latest + "/"
csc_versions = requests.get(CSC_MIRROR + data[distro]["csc"] + file).text
upstream_versions = requests.get(data[distro]["upstream1"] + file).text
csc_latest = re.findall(r"\"\d+\.?\w*\.?\w*", csc_versions)[-1].lstrip('"')
upstream_latest = re.findall(r"\"\d+\.?\w*\.?\w*", upstream_versions)[-1].lstrip('"')
if csc_latest != upstream_latest:
return False
file += csc_latest + "/"
csc_text = requests.get(CSC_MIRROR + data[distro]["csc"] + file
+ data[distro]["file2"]).text
try:
ret = csc_text == requests.get(data[distro]["upstream2"] + file
+ data[distro]["file2"]).text
except requests.exceptions.RequestException:
ret = False
try:
return ret or csc_text == requests.get(data[distro]["upstream3"] + file
+ data[distro]["file2"]).text
except requests.exceptions.RequestException:
return False

8
gnu.py
View File

@ -1,8 +0,0 @@
"""
Contains GNU class
"""
from distro import Distro
class GNU(Distro):
"""GNU class"""

View File

@ -1,8 +0,0 @@
"""
Contains Gutenberg class
"""
from distro import Distro
class Gutenberg(Distro):
"""Gutenberg class"""

View File

@ -1,14 +0,0 @@
"""
Contains IPFire class
"""
import requests
from distro import Distro
class IPFire(Distro):
"""IPFire class"""
@staticmethod
def check(data, distro):
ipfire_url = "https://mirrors.ipfire.org/mirrors/mirror.csclub.uwaterloo.ca"
ipfire_text = requests.get(ipfire_url).text
return ipfire_text.find("The mirror is up") != -1

8
kde.py
View File

@ -1,8 +0,0 @@
"""
Contains KDE class
"""
from distro import Distro
class KDE(Distro):
"""KDE class"""

View File

@ -1,8 +0,0 @@
"""
Contains KDEApplicationData class
"""
from distro import Distro
class KDEApplicationData(Distro):
"""KDEApplicationData class"""

View File

@ -1,8 +0,0 @@
"""
Contains Kernel class
"""
from distro import Distro
class Kernel(Distro):
"""Kernel class"""

108
main.py
View File

@ -7,58 +7,31 @@ This mirror status checker determines whether CSC mirror is up-to-date with upst
import time
import sys
import requests
from almalinux import AlmaLinux
from alpine import Alpine
from apache import Apache
from arch import Arch
from centos import CentOS
from ceph import Ceph
from cpan import CPAN
from cygwin import Cygwin
from debian import Debian
from debiancd import DebianCD
from debianmultimedia import DebianMultimedia
from debianports import DebianPorts
from debiansecurity import DebianSecurity
from eclipse import Eclipse
from fedora import Fedora
from freebsd import FreeBSD
from gentoodistfiles import GentooDistfiles
from gentooportage import GentooPortage
from gnome import GNOME
from gnu import GNU
from gutenberg import Gutenberg
from ipfire import IPFire
from kde import KDE
from kdeapplicationdata import KDEApplicationData
from kernel import Kernel
from openbsd import OpenBSD
from tdf import tdf
from ubuntu import ubuntu
from vlc import vlc
from shared import CSC_MIRROR
from dateparser.search import search_dates # this library seems to be super slow but the other library: dateutil.parser gets some errors
from projects import * # noqa
# from dateparser.search import search_dates # this library seems to be super slow but the other library: dateutil.parser gets some errors
# http://theautomatic.net/2018/12/18/2-packages-for-extracting-dates-from-a-string-of-text-in-python/
import re # import regular expressions to remove stray numbers in string that might interfere with date finding
import json # import json to read distro info stored in json file
import json # import json to read project info stored in json file
import datefinder # another date finding library
# checker: gets the timestamp of the file inside the directory at the specified URL and returns it as a string
def checker(directory_URL, file_name):
page = requests.get(directory_URL).text
indexOfFile = page.find(file_name)
file_index = page.find(file_name)
# print(page)
# remove stray numbers (file size numbers in particular) that might interfere with date finding
segment_clean = re.sub(r'\s\d+\s', ' ', page[indexOfFile:]) # removes numbers for size
segment_clean = re.sub(r'\s\d+\w*\s', ' ', page[indexOfFile:]) # removes numbers + size unit. e.x. 50kb
segment_clean = re.sub(r'\s\d+\s', ' ', page[file_index:]) # removes numbers for size
segment_clean = re.sub(r'\s\d+\w*\s', ' ', page[file_index:]) # removes numbers + size unit. e.x. 50kb
# print(segment_clean)
# implementation using dateparser.search.search_dates
# notes: some dates don't parse correctly with this tool
# print(search_dates(page[indexOfFile:], languages=['en']))
# print(search_dates(page[indexOfFile:])[0])
# print(search_dates(page[file_index:], languages=['en']))
# print(search_dates(page[file_index:])[0])
# finds the dates in the segment after the file name
# notes: a generator will be returned by the datefinder module. I'm typecasting it to a list. Please read the note of caution provided at the bottom.
@ -67,50 +40,51 @@ def checker(directory_URL, file_name):
if len(matches) > 0:
date = matches[0] # date is of type datetime.datetime
return(date.strftime("%m/%d/%Y, %H:%M:%S"))
return date.strftime("%m/%d/%Y, %H:%M:%S")
else:
return('No dates found')
return 'No dates found'
if __name__ == "__main__":
"""distros = json.load(open('distros.json',))
print(distros)
"""projects = json.load(open('projects.json',))
print(projects)
for distro in distros:
print(distro[0] + ":")
print("CSC mirror: " + checker(distro[1], distro[3]))
print("Official distro: " + checker(distro[2], distro[3]))"""
for project in projects:
print(project[0] + ":")
print("CSC mirror: " + checker(project[1], project[3]))
print("Official project: " + checker(project[2], project[3]))"""
with open("data.json", "r", encoding = "utf-8") as file:
with open("data.json", "r", encoding="utf-8") as file:
data = json.load(file)
if sys.stdin.isatty():
distros = data
projects = data
else:
distros = [distro.rstrip() for distro in sys.stdin.readlines()]
projects = [project.rstrip() for project in sys.stdin.readlines()]
current_time = int(time.time())
for distro in distros:
for project in projects:
try:
if distro not in data:
print(f"Failure: {distro} does not exist")
if project not in data:
print(f"Failure: {project} does not exist")
continue
distro_class = getattr(sys.modules[__name__], distro)
if distro == "CPAN" or distro == "ubuntu":
checker_result = distro_class.check(data, distro, current_time)
project_class = getattr(sys.modules[__name__], project)
if project == "CPAN":
checker_result = project_class.check(data, project, current_time)
if checker_result:
print(f"Success: {distro} up-to-date")
print(f"Success: {project} up-to-date")
else:
print(f"Failure: {distro} out-of-sync")
print(f"Failure: {project} out-of-sync")
continue
checker_result = distro_class.check(data, distro)
checker_result = project_class.check(data, project)
if checker_result:
data[distro]["out_of_sync_since"] = None # out of sync since is just the last time we checked and returned true
elif data[distro]["out_of_sync_since"] is None:
data[distro]["out_of_sync_since"] = current_time # starts counting out of date
elif current_time - data[distro]["out_of_sync_since"] \
> data[distro]["out_of_sync_interval"]: # last time checked out of date, now still out of date, raise alert when it reaches this threshold
print(f"Failure: {distro} out-of-sync")
data[project]["out_of_sync_since"] = None
elif data[project]["out_of_sync_since"] is None:
data[project]["out_of_sync_since"] = current_time
elif current_time - data[project]["out_of_sync_since"] \
> data[project]["out_of_sync_interval"]:
print(f"Failure: {project} out-of-sync")
continue
print(f"Success: {distro} up-to-date")
print(f"Success: {project} up-to-date")
except requests.exceptions.RequestException as err:
print(f"Error: {distro}\n{err}")
with open("data.json", "w", encoding = "utf-8") as file:
json.dump(data, file, indent = 4)
print(f"Error: {project}\n{err}")
with open("data.json", "w", encoding="utf-8") as file:
json.dump(data, file, indent='\t')

View File

@ -1,8 +0,0 @@
"""
Contains OpenBSD class
"""
from distro import Distro
class OpenBSD(Distro):
"""OpenBSD class"""

20
project.py Normal file
View File

@ -0,0 +1,20 @@
"""
Contains abstract class for a mirrored project
"""
from abc import ABC
import requests
from shared import CSC_MIRROR
class Project(ABC):
"""Abstract class for a mirrored project"""
@staticmethod
def check(data, project):
"""Check if project packages are up-to-date"""
csc_url = CSC_MIRROR + data[project]["csc"] + data[project]["file"]
upstream_url = data[project]["upstream"] + data[project]["file"]
return requests.get(csc_url).text == requests.get(upstream_url).text

20
projects.json Normal file
View File

@ -0,0 +1,20 @@
[
[
"OpenBSD",
"https://mirror.csclub.uwaterloo.ca/OpenBSD/",
"https://ftp.openbsd.org/pub/OpenBSD/",
"timestamp"
],
[
"kernel",
"http://mirror.csclub.uwaterloo.ca/kernel.org/linux/kernel/next/",
"https://mirrors.edge.kernel.org/pub/linux/kernel/next/",
"sha256sums.asc"
],
[
"debian",
"http://mirror.csclub.uwaterloo.ca/debian/project/trace/",
"https://ftp-master.debian.org/debian/project/trace/",
"master"
]
]

21
projects/__init__.py Normal file
View File

@ -0,0 +1,21 @@
"""
This file automatically imports all Classes in this directory
"""
from inspect import isclass
from pkgutil import iter_modules
from pathlib import Path
from importlib import import_module
# iterate through the modules in the current package
package_dir = Path(__file__).resolve().parent
for (_, module_name, _) in iter_modules([package_dir]):
# import the module and iterate through its attributes
module = import_module(f"{__name__}.{module_name}")
for attribute_name in dir(module):
attribute = getattr(module, attribute_name)
if isclass(attribute):
# Add the class to this package's variables
globals()[attribute_name] = attribute

9
projects/almalinux.py Normal file
View File

@ -0,0 +1,9 @@
"""
Contains AlmaLinux class
"""
from project import Project
class AlmaLinux(Project):
"""AlmaLinux class"""

9
projects/alpine.py Normal file
View File

@ -0,0 +1,9 @@
"""
Contains Alpine class
"""
from project import Project
class Alpine(Project):
"""Alpine class"""

9
projects/apache.py Normal file
View File

@ -0,0 +1,9 @@
"""
Contains Apache class
"""
from project import Project
class Apache(Project):
"""Apache class"""

9
projects/arch.py Normal file
View File

@ -0,0 +1,9 @@
"""
Contains Arch class
"""
from project import Project
class Arch(Project):
"""Arch class"""

9
projects/centos.py Normal file
View File

@ -0,0 +1,9 @@
"""
Contains CentOS class
"""
from project import Project
class CentOS(Project):
"""CentOS class"""

9
projects/ceph.py Normal file
View File

@ -0,0 +1,9 @@
"""
Contains Ceph class
"""
from project import Project
class Ceph(Project):
"""Ceph class"""

20
projects/cpan.py Normal file
View File

@ -0,0 +1,20 @@
"""
Contains CPAN class
"""
import requests
from project import Project
from shared import CSC_MIRROR
class CPAN(Project):
"""CPAN class"""
@staticmethod
def check(data, project, current_time):
res_json = requests.get("http://mirrors.cpan.org/cpan-json.txt").json()
for mirror in res_json:
if mirror["url"] == f"{CSC_MIRROR}CPAN/":
return current_time - int(mirror["age"]) <= data[project]["out_of_sync_interval"]
return False

9
projects/cygwin.py Normal file
View File

@ -0,0 +1,9 @@
"""
Contains Cygwin class
"""
from project import Project
class Cygwin(Project):
"""Cygwin class"""

9
projects/debian.py Normal file
View File

@ -0,0 +1,9 @@
"""
Contains Debian class
"""
from project import Project
class Debian(Project):
"""Debian class"""

9
projects/debiancd.py Normal file
View File

@ -0,0 +1,9 @@
"""
Contains DebianCD class
"""
from project import Project
class DebianCD(Project):
"""DebianCD class"""

View File

@ -0,0 +1,9 @@
"""
Contains DebianMultimedia class
"""
from project import Project
class DebianMultimedia(Project):
"""DebianMultimedia class"""

9
projects/debianports.py Normal file
View File

@ -0,0 +1,9 @@
"""
Contains DebianPorts class
"""
from project import Project
class DebianPorts(Project):
"""DebianPorts class"""

View File

@ -0,0 +1,9 @@
"""
Contains DebianSecurity class
"""
from project import Project
class DebianSecurity(Project):
"""DebianSecurity class"""

9
projects/eclipse.py Normal file
View File

@ -0,0 +1,9 @@
"""
Contains Eclipse class
"""
from project import Project
class Eclipse(Project):
"""Eclipse class"""

9
projects/fedora.py Normal file
View File

@ -0,0 +1,9 @@
"""
Contains Fedora class
"""
from project import Project
class Fedora(Project):
"""Fedora class"""

9
projects/freebsd.py Normal file
View File

@ -0,0 +1,9 @@
"""
Contains FreeBSD class
"""
from project import Project
class FreeBSD(Project):
"""FreeBSD class"""

View File

@ -0,0 +1,9 @@
"""
Contains GentooDistfiles class
"""
from project import Project
class GentooDistfiles(Project):
"""GentooDistfiles class"""

32
projects/gentooportage.py Normal file
View File

@ -0,0 +1,32 @@
"""
Contains GentooPortage class
"""
import os
from project import Project
class GentooPortage(Project):
"""GentooPortage class"""
@staticmethod
def check(data, project):
rsync_command = "rsync -q {}{} {}"
os.system(rsync_command.format(data[project]["csc"],
data[project]["file"],
"csc_manifest"))
os.system(rsync_command.format(data[project]["upstream1"],
data[project]["file"],
"upstream_manifest1"))
os.system(rsync_command.format(data[project]["upstream2"],
data[project]["file"],
"upstream_manifest2"))
stream1 = os.popen("diff csc_manifest upstream_manifest1")
output1 = stream1.read()
stream2 = os.popen("diff csc_manifest upstream_manifest2")
output2 = stream2.read()
os.system("rm csc_manifest")
os.system("rm upstream_manifest1")
os.system("rm upstream_manifest2")
return 0 in [len(output1), len(output2)]

44
projects/gnome.py Normal file
View File

@ -0,0 +1,44 @@
"""
Contains GNOME class
"""
import re
import requests
from project import Project
from shared import CSC_MIRROR
class GNOME(Project):
"""GNOME class"""
@staticmethod
def check(data, project):
file = data[project]["file1"]
csc_versions = requests.get(CSC_MIRROR + data[project]["csc"] + file).text
upstream_versions = requests.get(data[project]["upstream1"] + file).text
csc_latest = re.findall(r"\"\d+\.?\d*", csc_versions)[-1].lstrip('"')
upstream_latest = re.findall(r"\"\d+\.?\d*", upstream_versions)[-1].lstrip('"')
if csc_latest != upstream_latest:
return False
file += csc_latest + "/"
csc_versions = requests.get(CSC_MIRROR + data[project]["csc"] + file).text
upstream_versions = requests.get(data[project]["upstream1"] + file).text
csc_latest = re.findall(r"\"\d+\.?\w*\.?\w*", csc_versions)[-1].lstrip('"')
upstream_latest = re.findall(r"\"\d+\.?\w*\.?\w*", upstream_versions)[-1].lstrip('"')
if csc_latest != upstream_latest:
return False
file += csc_latest + "/"
csc_text = requests.get(CSC_MIRROR + data[project]["csc"] + file
+ data[project]["file2"]).text
try:
ret = csc_text == requests.get(data[project]["upstream2"] + file
+ data[project]["file2"]).text
except requests.exceptions.RequestException:
ret = False
try:
return ret or csc_text == requests.get(data[project]["upstream3"] + file
+ data[project]["file2"]).text
except requests.exceptions.RequestException:
return False

9
projects/gnu.py Normal file
View File

@ -0,0 +1,9 @@
"""
Contains GNU class
"""
from project import Project
class GNU(Project):
"""GNU class"""

9
projects/gutenberg.py Normal file
View File

@ -0,0 +1,9 @@
"""
Contains Gutenberg class
"""
from project import Project
class Gutenberg(Project):
"""Gutenberg class"""

17
projects/ipfire.py Normal file
View File

@ -0,0 +1,17 @@
"""
Contains IPFire class
"""
import requests
from project import Project
class IPFire(Project):
"""IPFire class"""
@staticmethod
def check(data, project):
ipfire_url = "https://mirrors.ipfire.org/mirrors/mirror.csclub.uwaterloo.ca"
ipfire_text = requests.get(ipfire_url).text
return ipfire_text.find("The mirror is up") != -1

9
projects/kde.py Normal file
View File

@ -0,0 +1,9 @@
"""
Contains KDE class
"""
from project import Project
class KDE(Project):
"""KDE class"""

View File

@ -0,0 +1,9 @@
"""
Contains KDEApplicationData class
"""
from project import Project
class KDEApplicationData(Project):
"""KDEApplicationData class"""

9
projects/kernel.py Normal file
View File

@ -0,0 +1,9 @@
"""
Contains Kernel class
"""
from project import Project
class Kernel(Project):
"""Kernel class"""

9
projects/openbsd.py Normal file
View File

@ -0,0 +1,9 @@
"""
Contains OpenBSD class
"""
from project import Project
class OpenBSD(Project):
"""OpenBSD class"""

4
requirements.txt Normal file
View File

@ -0,0 +1,4 @@
requests~=2.26.0
datefinder~=0.7.1
dateparser~=1.0.0