mirror-checker/projects/macports.py

51 lines
2.2 KiB
Python

import requests
import re # import regular expressions to remove stray numbers in string that might interfere with date finding
import json # import json to read project info stored in json file
from project import Project
from shared import CSC_MIRROR
import datefinder # another date finding library
from datetime import timedelta
class macports(Project):
"""macports class"""
# checker: gets the timestamp of the file inside the directory at the specified URL and returns it as a string
@staticmethod
def checker(directory_URL, file_name):
page = requests.get(directory_URL).text
file_index = page.find(file_name)
end_index = page[file_index:].find("</tr>") + file_index
# The CSC mirror does not use tr tags, so end_index will be set to the end of the file
if end_index == (file_index - 1):
end_index = len(page) - 1
# remove stray numbers (file size numbers in particular) that might interfere with date finding
segment_clean = re.sub(r'\s\d+\s', ' ', page[file_index:end_index]) # removes numbers for size
segment_clean = re.sub(r'\s\d+\w*\s', ' ', page[file_index:end_index]) # removes numbers + size unit. e.x. 50kb
# finds the dates in the segment after the file name
# notes: a generator will be returned by the datefinder module. I'm typecasting it to a list. Please read the note of caution provided at the bottom.
matches = list(datefinder.find_dates(segment_clean))
return matches[0]
@classmethod
def check(cls, data, project, current_time):
"""Check if project packages are up-to-date"""
csc_url = CSC_MIRROR + data[project]["csc"]
upstream_url = data[project]["upstream"]
file_name = data[project]["file"]
upstreamDate = cls.checker(csc_url, file_name)
downstreamDate = cls.checker(upstream_url, file_name)
if (upstreamDate < downstreamDate):
timeDiff = downstreamDate - upstreamDate
else:
timeDiff = upstreamDate - downstreamDate
# MacPorts are updated so often that we want to make sure we are
# at most 6 hours out of date
return timeDiff < timedelta(hours=6)