From 8195cbb04244b02660d65e06299767fc2a435998 Mon Sep 17 00:00:00 2001
From: Tom <shutong5s@gmail.com>
Date: Sun, 3 Oct 2021 23:47:55 -0700
Subject: [PATCH] added mxlinux, mxlinux-iso

---
 README.md               |  4 ++++
 data.json               | 16 ++++++++++++++-
 main.py                 |  2 +-
 projects/mxlinux.py     | 26 +++++++++++++++++++++++++
 projects/mxlinux_iso.py | 26 +++++++++++++++++++++++++
 test.py                 | 43 ++++++++++++++++++++++++-----------------
 6 files changed, 97 insertions(+), 20 deletions(-)
 create mode 100644 projects/mxlinux.py
 create mode 100644 projects/mxlinux_iso.py

diff --git a/README.md b/README.md
index e1c1523..5143771 100644
--- a/README.md
+++ b/README.md
@@ -24,7 +24,9 @@ raspbian mirror: https://mirror.ox.ac.uk/sites/archive.raspbian.org/archive/
 
 mxlinux: https://sourceforge.net/projects/mx-linux/ (scrap the last day?)
 
+linuxmint: no public repo
 linuxmint-packages pool: http://rsync-packages.linuxmint.com/pool/
+macPorts: only distfiles has public repo, no timestamp, too large to loop through
 scientific: https://scientificlinux.org/downloads/sl-mirrors/ (CSC not listed)
 slackware: https://mirrors.slackware.com/mirrorlist/ https://mirrors.slackware.com/slackware/
 ubuntu-ports: http://ports.ubuntu.com/ubuntu-ports/
@@ -37,5 +39,7 @@ tdf: https://download.documentfoundation.org/
 ubuntu: https://launchpad.net/ubuntu/+mirror/mirror.csclub.uwaterloo.ca-archive
 vlc: http://download.videolan.org/pub/videolan/
 manjaro
+mxlinux
+mxlinx-iso: this one seems out of sync on the official tracker for 134 days, which is weird
 
 trisquel: https://trisquel.info/mirmon/index.html out of date website
\ No newline at end of file
diff --git a/data.json b/data.json
index 7d37ce9..4ecfb6d 100644
--- a/data.json
+++ b/data.json
@@ -35,7 +35,7 @@
 		"file": "centos/TIME"
 	},
 	"Ceph": {
-		"out_of_sync_since": null,
+		"out_of_sync_since": 1633329349,
 		"out_of_sync_interval": 86400,
 		"csc": "ceph/",
 		"upstream": "https://download.ceph.com/",
@@ -205,5 +205,19 @@
 		"csc": "",
 		"upstream": "https://repo.manjaro.org/",
 		"file": ""
+	},
+	"mxlinux": {
+		"out_of_sync_since": null,
+		"out_of_sync_interval": 86400,
+		"csc": "",
+		"upstream": "http://rsync-mxlinux.org/mirmon/packages.html",
+		"file": ""
+	},
+	"mxlinux_iso": {
+		"out_of_sync_since": null,
+		"out_of_sync_interval": 86400,
+		"csc": "",
+		"upstream": "http://rsync-mxlinux.org/mirmon/index.html",
+		"file": ""
 	}
 }
\ No newline at end of file
diff --git a/main.py b/main.py
index 6cdae85..a6e27dc 100644
--- a/main.py
+++ b/main.py
@@ -67,7 +67,7 @@ if __name__ == "__main__":
 					print(f"Failure: {project} does not exist")
 					continue
 				project_class = getattr(sys.modules[__name__], project)
-				if project == "CPAN" or project == "ubuntu" or project == "manjaro":
+				if project == "CPAN" or project == "ubuntu" or project == "manjaro" or project == "mxlinux" or project == "mxlinux_iso":
 					checker_result = project_class.check(data, project, current_time)
 					if checker_result:
 						print(f"Success: {project} up-to-date")
diff --git a/projects/mxlinux.py b/projects/mxlinux.py
new file mode 100644
index 0000000..00a7837
--- /dev/null
+++ b/projects/mxlinux.py
@@ -0,0 +1,26 @@
+"""
+Contains mxlinux class
+"""
+
+import os
+from project import Project
+from shared import CSC_MIRROR
+import requests
+import datefinder # another date finding library
+from datetime import timedelta
+from datetime import datetime
+import re
+import pandas as pd
+
+class mxlinux(Project):
+    """mxlinux class"""
+    @staticmethod
+    def check(data, project, current_time):
+        page = requests.get(data[project]["upstream"]).text
+        indexOfFile = page.find("mirror.csclub.uwaterloo.ca")
+
+        m = re.search(r'(\d+ hours)|(\d+(\.)?\d+ days)', page[indexOfFile:]) # solution from: https://stackoverflow.com/questions/21074100/how-to-convert-standard-timedelta-string-to-timedelta-object/21074460
+
+        duration = pd.to_timedelta(m.group(0))
+        
+        return duration <= pd.to_timedelta(data[project]["out_of_sync_interval"], unit='s')
\ No newline at end of file
diff --git a/projects/mxlinux_iso.py b/projects/mxlinux_iso.py
new file mode 100644
index 0000000..11cbc2c
--- /dev/null
+++ b/projects/mxlinux_iso.py
@@ -0,0 +1,26 @@
+"""
+Contains mxlinux_iso class
+"""
+
+import os
+from project import Project
+from shared import CSC_MIRROR
+import requests
+import datefinder # another date finding library
+from datetime import timedelta
+from datetime import datetime
+import re
+import pandas as pd
+
+class mxlinux_iso(Project):
+    """mxlinux_iso class"""
+    @staticmethod
+    def check(data, project, current_time):
+        page = requests.get(data[project]["upstream"]).text
+        indexOfFile = page.find("mirror.csclub.uwaterloo.ca")
+
+        m = re.search(r'(\d+ hours)|(\d+(\.)?\d+ days)', page[indexOfFile:]) # solution from: https://stackoverflow.com/questions/21074100/how-to-convert-standard-timedelta-string-to-timedelta-object/21074460
+
+        duration = pd.to_timedelta(m.group(0))
+        
+        return duration <= pd.to_timedelta(data[project]["out_of_sync_interval"], unit='s')
\ No newline at end of file
diff --git a/test.py b/test.py
index 04bae05..536a855 100644
--- a/test.py
+++ b/test.py
@@ -6,11 +6,14 @@ from datetime import datetime
 from datetime import timedelta
 import time
 import pandas as pd
-   
+
+# this function is brute force looping through the whole directory and checking dates
+# it may sound horrible, but for certain distros, i believe it's indeed the best solution
+
 # lists
 urls=[]
 
-home_site = "http://ports.ubuntu.com"
+home_site = "http://ykf.ca.distfiles.macports.org"
 
 # function created
 def scrape(site):
@@ -25,8 +28,8 @@ def scrape(site):
         href = i.attrs['href']
            
         if href.endswith("/") and href != "../" and href != "/":
-            if home_site+href in urls: # avoids the link to parent directory
-                continue
+            """if home_site+href in urls: # avoids the link to parent directory
+                continue"""
             site_next = site+href
 
             if site_next not in  urls:
@@ -38,7 +41,7 @@ def scrape(site):
 def get_latest_date(web_dir):
     page = requests.get(site).text
 
-    str_dates = re.findall(r'\d{4}-\d{2}-\d{2} \d{2}:\d{2}', page)
+    str_dates = re.findall(r'(\d{2}-\w{3}-\d{4} \d{2}:\d{2})|(\d{4}-\d{2}-\d{2} \d{2}:\d{2})', page)
     dates = [list(datefinder.find_dates(date))[0] for date in str_dates]
 
     # for date in dates:
@@ -50,27 +53,31 @@ def get_latest_date(web_dir):
 if __name__ =="__main__":
    
     # website to be scrape
-    # site="http://ports.ubuntu.com/ubuntu-ports/"
+    #site="http://ykf.ca.distfiles.macports.org/MacPorts/mpdistfiles/"
     # works on: https://www.x.org/releases/
+    #           https://mirror.csclub.uwaterloo.ca/linuxmint/ #works wonders for linuxmint
+    #           unfortunately, linuxmint does not have a public repo, the worldwide mirror LayerOnline on https://linuxmint.com/mirrors.php seems like the best choice
 
     # calling function
-    # scrape(site)
+    #scrape(site)
 
-    # latest_date = get_latest_date(urls[0])
+    #latest_date = get_latest_date(urls[0])
     # get_latest_date(urls[0])
-    # for dir in urls:
-    #     latest_date2 = get_latest_date(dir)
-    #     if (latest_date2 >= latest_date):
-    #         latest_date = latest_date2
+    #for dir in urls:
+    #    latest_date2 = get_latest_date(dir)
+    #    if (latest_date2 >= latest_date):
+    #        latest_date = latest_date2
 
-    # print(latest_date)
+    #print(latest_date)
 
-    page = requests.get("https://repo.manjaro.org/").text
-    indexOfFile = page.find("mirror.csclub.uwaterloo.ca/manjaro")
+    page = requests.get("http://rsync-mxlinux.org/mirmon/index.html").text
+    indexOfFile = page.find("mirror.csclub.uwaterloo.ca")
 
-    m = re.search(r'(?P<hours>\d+):(?P<minutes>\d+)', page[indexOfFile:]) # solution from: https://stackoverflow.com/questions/21074100/how-to-convert-standard-timedelta-string-to-timedelta-object/21074460
-    duration = timedelta(**{key: float(val) for key, val in m.groupdict().items()})
-    print(duration)
+    m = re.search(r'(\d+ hours)|(\d+(\.)?\d+ days)', page[indexOfFile:]) # solution from: https://stackoverflow.com/questions/21074100/how-to-convert-standard-timedelta-string-to-timedelta-object/21074460
+
+    duration = pd.to_timedelta(m.group(0))
 
     print (duration <= pd.to_timedelta(86400, unit='s'))
+    
+    # https://launchpad.net/ubuntu/+mirror/mirror.csclub.uwaterloo.ca-archive
         
\ No newline at end of file