import requests import re # import regular expressions to remove stray numbers in string that might interfere with date finding import json # import json to read project info stored in json file from project import Project from shared import CSC_MIRROR import datefinder # another date finding library from datetime import timedelta class macports(Project): """macports class""" # checker: gets the timestamp of the file inside the directory at the specified URL and returns it as a string @staticmethod def checker(directory_URL, file_name): page = requests.get(directory_URL).text file_index = page.find(file_name) end_index = page[file_index:].find("") + file_index # The CSC mirror does not use tr tags, so end_index will be set to the end of the file if end_index == (file_index - 1): end_index = len(page) - 1 # remove stray numbers (file size numbers in particular) that might interfere with date finding segment_clean = re.sub(r'\s\d+\s', ' ', page[file_index:end_index]) # removes numbers for size segment_clean = re.sub(r'\s\d+\w*\s', ' ', page[file_index:end_index]) # removes numbers + size unit. e.x. 50kb # finds the dates in the segment after the file name # notes: a generator will be returned by the datefinder module. I'm typecasting it to a list. Please read the note of caution provided at the bottom. matches = list(datefinder.find_dates(segment_clean)) return matches[0] @classmethod def check(cls, data, project, current_time): """Check if project packages are up-to-date""" csc_url = CSC_MIRROR + data[project]["csc"] upstream_url = data[project]["upstream"] file_name = data[project]["file"] upstreamDate = cls.checker(csc_url, file_name) downstreamDate = cls.checker(upstream_url, file_name) if (upstreamDate < downstreamDate): timeDiff = downstreamDate - upstreamDate else: timeDiff = upstreamDate - downstreamDate # MacPorts are updated so often that we want to make sure we are # at most 6 hours out of date return timeDiff < timedelta(hours=6)