mirror-checker/projects/opensuse.py

from bs4 import BeautifulSoup
import requests
import re
import datefinder  # another date finding library
from project import Project
from shared import CSC_MIRROR

class opensuse(Project):
    """opensuse class"""
    @staticmethod
    def checker(directory_URL, file_name):
        page = requests.get(directory_URL).text
        file_index = page.find(file_name)
        # print(page)

        if file_index == -1:
            return False

        str_dates = re.findall(r'(\d{2}-\w{3}-\d{4} \d{2}:\d{2})|(\d{4}-\d{2}-\d{2} \d{2}:\d{2})', page[file_index:])

        return list(datefinder.find_dates("".join(str_dates[0])))[0]

    @classmethod
    def scrape(cls, compare, folders, site1, site2, directory):
        if cls.checker(site1+directory, ".repo") != False:
            # print (site1+directory)
            # print (cls.checker(site1+directory, ".repo"))
            if cls.checker(site2+directory, ".repo") != False:
                # print (site2+directory)
                # print (cls.checker(site2+directory, ".repo"))
                compare.append(cls.checker(site1+directory, ".repo") <= cls.checker(site2+directory, ".repo"))
                return
            compare.append(False)
            return

        # getting the request from url
        r = requests.get(site1 + directory)

        # converting the text
        s = BeautifulSoup(r.text,"html.parser")

        for i in s.find_all("a"): # for a href directories
            href = i.attrs['href']

            if href.endswith("/") and href != "../" and href != "/" and not href.startswith("/") and href != "tumbleweed-non-oss/" and href != "tumbleweed/":
                dir_next = directory+href
                # print(dir_next)
                # calling it self
                if dir_next not in folders:
                    folders.append(dir_next)
                    cls.scrape(compare, folders, site1, site2, dir_next)

    @classmethod
    def check(cls, data, project):
        """Check if project packages are up-to-date"""
        # lists
        compare=[]
        folders=[]

        csc_url = CSC_MIRROR + data[project]["csc"] + data[project]["file"]
        upstream_url = data[project]["upstream"] + data[project]["file"]

        # calling function
        cls.scrape(compare, folders, upstream_url, csc_url, "")

        return all(compare)