diff --git a/README.md b/README.md index 6c8141d..2880705 100644 --- a/README.md +++ b/README.md @@ -4,7 +4,7 @@ This mirror status checker determines whether CSC mirror is up-to-date with upst ## How To Run -A configuration file may be provided through standard input. Without a configuration file, execute `python main.py`. By default, all of the available distributions will be checked. With a configuration file, execute `python main.py < name_of_config_file.in`, for example, `python main.py < example.in`. In this case, only the distributions listed in the configuration file will be checked. +A configuration file may be provided through standard input. Without a configuration file, execute `python main.py`. By default, all the available distributions will be checked. With a configuration file, execute `python main.py < name_of_config_file.in`, for example, `python main.py < example.in`. In this case, only the distributions listed in the configuration file will be checked. ## Resources diff --git a/main.py b/main.py index 67a5d6e..e6f650c 100644 --- a/main.py +++ b/main.py @@ -44,18 +44,18 @@ import datefinder # another date finding library def checker(directory_URL, file_name): page = requests.get(directory_URL).text - indexOfFile = page.find(file_name) + file_index = page.find(file_name) # print(page) # remove stray numbers (file size numbers in particular) that might interfere with date finding - segment_clean = re.sub(r'\s\d+\s', ' ', page[indexOfFile:]) # removes numbers for size - segment_clean = re.sub(r'\s\d+\w*\s', ' ', page[indexOfFile:]) # removes numbers + size unit. e.x. 50kb + segment_clean = re.sub(r'\s\d+\s', ' ', page[file_index:]) # removes numbers for size + segment_clean = re.sub(r'\s\d+\w*\s', ' ', page[file_index:]) # removes numbers + size unit. e.x. 50kb # print(segment_clean) # implementation using dateparser.search.search_dates # notes: some dates don't parse correctly with this tool - # print(search_dates(page[indexOfFile:], languages=['en'])) - # print(search_dates(page[indexOfFile:])[0]) + # print(search_dates(page[file_index:], languages=['en'])) + # print(search_dates(page[file_index:])[0]) # finds the dates in the segment after the file name # notes: a generator will be returned by the datefinder module. I'm typecasting it to a list. Please read the note of caution provided at the bottom. @@ -64,9 +64,9 @@ def checker(directory_URL, file_name): if len(matches) > 0: date = matches[0] # date is of type datetime.datetime - return (date.strftime("%m/%d/%Y, %H:%M:%S")) + return date.strftime("%m/%d/%Y, %H:%M:%S") else: - return ('No dates found') + return 'No dates found' if __name__ == "__main__": @@ -104,7 +104,7 @@ if __name__ == "__main__": elif data[project]["out_of_sync_since"] is None: data[project]["out_of_sync_since"] = current_time elif current_time - data[project]["out_of_sync_since"] \ - > data[project]["out_of_sync_interval"]: + > data[project]["out_of_sync_interval"]: print(f"Failure: {project} out-of-sync") continue print(f"Success: {project} up-to-date")