add csc-sync-exec

This commit is contained in:
Max Erenberg 2023-01-28 17:06:16 -05:00 committed by Mirror
parent 52b5945857
commit fd75bedb61
5 changed files with 868 additions and 19 deletions

View File

@ -5,6 +5,7 @@ import (
"fmt"
"os"
"path/filepath"
"strings"
"gopkg.in/ini.v1"
@ -139,6 +140,8 @@ type Repo struct {
PasswordFile string `ini:"password_file"`
// the archive name for the ftpsync command (required for csc-sync-ftpsync)
FtpsyncArchive string `ini:"ftpsync_archive"`
// the sync command to execute with `sh -c` (required for csc-sync-exec)
ExecStr string `ini:"exec"`
// full path to file storing the repo sync state
StateFile string `ini:"-"`
// full path for file storing general logging of this repo
@ -258,11 +261,23 @@ func LoadConfig(configPath string, doneChan chan SyncResult, stopChan chan struc
panic("Missing or invalid frequency for " + repo.Name)
} else if repo.SyncType == "" {
panic("Missing sync type from " + repo.Name)
} else if repo.SyncType == "csc-sync-ftpsync" {
if repo.FtpsyncArchive == "" {
panic("Missing ftpsync archive for " + repo.Name)
}
} else if repo.SyncType == "csc-sync-exec" {
if repo.ExecStr == "" {
panic("Missing 'exec' for " + repo.Name)
}
// Allow some limited placeholders
repo.ExecStr = strings.ReplaceAll(repo.ExecStr, "{{repoLogFile}}", repo.RepoLogFile)
repo.ExecStr = strings.ReplaceAll(repo.ExecStr, "{{rsyncLogFile}}", repo.RsyncLogFile)
if strings.Contains(repo.ExecStr, "{{") {
panic(repo.Name + " has unsatisfied placeholders in 'exec': " + repo.ExecStr)
}
} else if repo.LocalDir == "" {
panic("Missing local download location for " + repo.Name)
} else if repo.SyncType == "csc-sync-ftpsync" && repo.FtpsyncArchive == "" {
panic("Missing ftpsync archive for " + repo.Name)
} else if repo.SyncType != "csc-sync-ftpsync" && repo.SyncType != "csc-sync-fedora" && repo.RsyncHost == "" {
} else if repo.RsyncHost == "" {
panic("Missing rsync host for " + repo.Name)
}

View File

@ -279,8 +279,11 @@ rsync_dir = ftp.freebsd.org/pub/FreeBSD/
; This handles both fedora/linux and fedora/epel
; See ~/quick-fedora-mirror/quick-fedora-mirror.conf
[fedora]
sync_type = csc-sync-fedora
sync_type = csc-sync-exec
frequency = bi-hourly
; See ~/quick-fedora-mirror/quick-fedora-mirror.conf
; LOGFILE is set to {{repoLogFile}} (~/merlin/log/fedora.log)
exec = truncate --size=0 {{repoLogFile}}; cd ~/quick-fedora-mirror && ./quick-fedora-mirror > {{rsyncLogFile}}
[ubuntu-ports-releases]
sync_type = csc-sync-standard
@ -396,12 +399,9 @@ rsync_host = rsync.alpinelinux.org
rsync_dir = alpine
[raspbian]
;verbose = true
sync_type = csc-sync-standard
sync_type = csc-sync-exec
frequency = bi-hourly
local_dir = raspbian
rsync_host = raspbian.freemirror.org
rsync_dir = raspbian
exec = cd /mirror/root/raspbian && mkdir -p .~tmp~ && ~/raspbmirror/raspbmirror.py --tmpdir ./.~tmp~ --sourcepool /mirror/root/debian/pool > {{rsyncLogFile}}
[raspberrypi]
sync_type = csc-sync-standard-ipv6

View File

@ -55,8 +55,8 @@ func getSyncCommand(repo *config.Repo) (cmds [][]string) {
return append(cmds, cscSyncDebianStep1(repo), cscSyncDebianStep2(repo))
case "csc-sync-debian-cd":
return append(cmds, cscSyncDebianCD(repo))
case "csc-sync-fedora":
return append(cmds, cscSyncFedora(repo))
case "csc-sync-exec":
return append(cmds, cscSyncExec(repo))
case "csc-sync-ftpsync":
return append(cmds, cscSyncFtpsync(repo))
case "csc-sync-s3":
@ -293,14 +293,8 @@ func cscSyncDebianCD(repo *config.Repo) []string {
return args
}
func cscSyncFedora(repo *config.Repo) []string {
// Make sure that repo.RepoLogFile (default: ~/merlin/log/fedora.log)
// is the same as the LOGFILE setting in ~/quick-fedora-mirror/quick-fedora-mirror.conf
return []string{
"sh", "-c",
"truncate --size=0 " + repo.RepoLogFile + "; " +
"cd ~/quick-fedora-mirror && ./quick-fedora-mirror > " + repo.RsyncLogFile,
}
func cscSyncExec(repo *config.Repo) []string {
return []string{"sh", "-c", repo.ExecStr}
}
func cscSyncFtpsync(repo *config.Repo) []string {

5
raspbmirror/README.md Normal file
View File

@ -0,0 +1,5 @@
This script is to be used for Raspbian, which is NOT the same as Raspberry Pi OS.
It was downloaded from [here](https://raw.githubusercontent.com/plugwash/raspbian-tools/master/raspbmirror.py).
See [here](https://www.raspbian.org/RaspbianMirrors) for details about Raspbian mirroring.

835
raspbmirror/raspbmirror.py Executable file
View File

@ -0,0 +1,835 @@
#!/usr/bin/env python3
# Copyright 2018 Peter Green
# Released under the MIT/Expat license, see doc/COPYING
import os
import sys
import hashlib
import gzip
import stat
#from sortedcontainers import SortedDict
#from sortedcontainers import SortedList
from collections import deque
from collections import OrderedDict
from datetime import datetime
from email.utils import parsedate_to_datetime
import argparse
import re
from heapq import heappush, heappop
import fcntl
parser = argparse.ArgumentParser(description="mirror raspbian repo.")
parser.add_argument("baseurl", help="base url for source repo (e.g. https://archive.raspbian.org/ )",nargs='?')
parser.add_argument("mdurl", help="base url for mirrordirector or local source mirror (e.g. https://mirrordirector.raspbian.org/ )",nargs='?')
parser.add_argument("hpurl", help="base url for last result hash pool (e.g. http://snapshot.raspbian.org/hashpool )",nargs='?')
parser.add_argument("--internal", help=argparse.SUPPRESS) #base URL for private repo (internal use only)
parser.add_argument("--sourcepool", help="specify a source pool to look for packages in before downloading them (useful if maintaining multiple mirrors)",action='append')
parser.add_argument("--tmpdir", help="specify a temporary directory to avoid storing temporary files in the output tree, must be on the same filesystem as the output tree")
#debug option to set the index file used for the "downloadnew" phase but not the "finalize" phase, used to test error recovery.
parser.add_argument("--debugfif", help=argparse.SUPPRESS)
#debug option to set the source url used to download "dists" files during the "downloadnew" phase, used to test error recovery.
parser.add_argument("--debugfdistsurl", help=argparse.SUPPRESS)
parser.add_argument("--tlwhitelist", help="specify comma-seperated whitelist of top-level directories")
parser.add_argument("--cleanup",help="scan for and remove files not managed by raspbmirror from mirror tree", action="store_true")
parser.add_argument("--debugskippool",help="skip downloading pool data, only download metadata (for debugging)",action="store_true")
parser.add_argument("--distswhitelist", help="specify comman seperated list of distributions")
parser.add_argument("--nolock", help="don't try to lock the target directory", action="store_true")
parser.add_argument("--repair", help="during mirroring, verify that all on-disk files match the expected sha256", action="store_true")
parser.add_argument("--urllib", help="force usage of the builtin urllib module, even if urllib3 is present", action="store_true")
parser.add_argument("--urllib3", help="force usage of the urllib3 module, panics if the dependency is missing", action="store_true")
parser.add_argument("--ipv4", help="force usage of IPv4 addresses. Requires urllib3", action="store_true")
parser.add_argument("--ipv6", help="force usage of IPv6 addresses. Requires urllib3", action="store_true")
args = parser.parse_args()
if not args.nolock:
lockfd = os.open('.',os.O_RDONLY)
fcntl.flock(lockfd,fcntl.LOCK_EX | fcntl.LOCK_NB)
if args.urllib and args.urllib3:
print("error: flags --urllib and --urllib3 are in conflict")
exit(1)
if args.urllib:
import urllib.request
use_urllib3 = False
elif args.urllib3:
import urllib3
use_urllib3 = True
else:
# auto detect urllib3
try:
import urllib3
use_urllib3 = True
except:
import urllib.request
use_urllib3 = False
if args.ipv4 and args.ipv6:
print("error: flags --ipv4 and --ipv6 are in conflict")
exit(1)
if use_urllib3:
# the number of pools should be greater than the number of concurrently used sites.
# 10 should be safe.
dlmanager = urllib3.PoolManager(num_pools=10)
print("info: using urllib3")
# a fairly hacky way to force the usage of ipv4 or ipv6 addresses
# https://stackoverflow.com/questions/33046733/force-requests-to-use-ipv4-ipv6
if args.ipv4:
import socket
import requests.packages.urllib3.util.connection as urllib3_cn
def allowed_gai_family():
return socket.AF_INET
urllib3_cn.allowed_gai_family = allowed_gai_family
elif args.ipv6:
import socket
import requests.packages.urllib3.util.connection as urllib3_cn
def allowed_gai_family():
return socket.AF_INET6
urllib3_cn.allowed_gai_family = allowed_gai_family
else:
print("info: using urllib")
if args.ipv4:
print("error: flag --ipv4 requires the urllib3 package")
exit(1)
elif args.ipv6:
print("error: flag --ipv6 requires the urllib3 package")
exit(1)
def addfilefromdebarchive(filestoverify,filequeue,filename,sha256,size):
size = int(size)
sha256andsize = [sha256,size,'M']
if filename in filestoverify:
if (sha256andsize[0:2] != filestoverify[filename][0:2]):
if stage == 'scanexisting':
print('warning: same file with different hash/size during scanexisting phase old:'+repr(filestoverify[filename])+' new:'+repr(sha256andsize))
#find existing sha1/size of file on disk if it exists
if os.path.isfile(filename):
f = open(filename,'rb')
data = f.read()
f.close()
sha256hash = hashlib.sha256(data)
sha256hashed = sha256hash.hexdigest().encode('ascii')
size = len(data)
else:
#otherwise we have no idea
sha256 = None
size = None
filestoverify[filename] = [sha256,size,'M']
else:
print('error: same file with different hash/size during downloadnew phase old:'+repr(filestoverify[filename])+' new:'+repr(sha256andsize))
sys.exit(1)
else:
filestoverify[filename] = sha256andsize
addtofilequeue(filequeue,filename)
def addtofilequeue(filequeue,filename):
filenamesplit = filename.split(b'/')
if b'dists' in filenamesplit:
if filename.endswith(b'.gz'):
# process gz files with high priority so they can be used as substitutes for their uncompressed counterparts
heappush(filequeue,(10,filename))
else:
heappush(filequeue,(20,filename))
heappush(filequeue,(30,filename))
#regex used for filename sanity checks
pfnallowed = re.compile(b'[a-z0-9A-Z\-_:\+~\.]+',re.ASCII)
shaallowed = re.compile(b'[a-z0-9]+',re.ASCII)
def ensuresafepath(path):
pathsplit = path.split(b'/')
if path[0] == '/':
print("path must be relative")
sys.exit(1)
for component in pathsplit:
if not pfnallowed.fullmatch(component):
print("component "+ascii(component)+" in path "+ascii(path)+" contains unexpected characters")
sys.exit(1)
elif component[0] == '.':
print("filenames starting with a dot are not allowed")
sys.exit(1)
def geturl(fileurl):
if use_urllib3:
response = dlmanager.request("GET", fileurl.decode('ascii'))
ts = getts(fileurl, response)
return (response.data, ts)
else:
with urllib.request.urlopen(fileurl.decode('ascii')) as response:
data = response.read()
ts = getts(fileurl, response)
return (data, ts)
def getts(fileurl, response):
if fileurl[:7] == b'file://':
ts = os.path.getmtime(fileurl[7:])
else:
dt = parsedate_to_datetime(response.getheader('Last-Modified'))
if dt.tzinfo is None:
dt = dt.replace(tzinfo=timezone.utc)
ts = dt.timestamp()
return ts
def makenewpath(path):
if args.tmpdir is None:
return path+b'.new'
else:
return os.path.join(args.tmpdir.encode('ascii'),(path+b'.new').replace(b'/',b'~'))
def getfile(path,sha256,size):
ensuresafepath(path)
if not shaallowed.fullmatch(sha256):
print('invalid character in sha256 hash')
sys.exit(1)
#hashfn = b'../hashpool/' + sha256[:2] +b'/'+ sha256[:4] +b'/'+ sha256
#if os.path.isfile(hashfn):
# if os.path.getsize(hashfn) != size:
# print('size mismatch on existing file in hash pool')
# sys.exit(1)
#else:
# secondhashfn = None
# if args.secondpool is not None:
# secondhashfn = os.path.join(args.secondpool.encode('ascii'),sha256[:2] +b'/'+ sha256[:4] +b'/'+ sha256)
# #print(secondhashfn)
# if not os.path.isfile(secondhashfn):
# secondhashfn = None
# if secondhashfn is None:
# else:
# print('copying '+path.decode('ascii')+' with hash '+sha256.decode('ascii')+' from secondary pool')
# f = open(secondhashfn,'rb')
# data = f.read()
# f.close()
# ts = os.path.getmtime(secondhashfn)
# sha256hash = hashlib.sha256(data)
# sha256hashed = sha256hash.hexdigest().encode('ascii')
# if (sha256 != sha256hashed):
# #print(repr(filesize))
# #print(repr(sha256))
# #print(repr(sha256hashed))
# print('hash mismatch while downloading file '+path.decode('ascii')+' '+sha256.decode('ascii')+' '+sha256hashed.decode('ascii'));
# sys.exit(1)
# if len(data) != size:
# print('size mismatch while downloading file')
# sys.exit(1)
# hashdir = os.path.dirname(hashfn)
# os.makedirs(hashdir,exist_ok=True)
# f = open(hashfn,'wb')
# f.write(data)
# f.close()
#
# os.utime(hashfn,(ts,ts))
if len(os.path.dirname(path)) > 0:
os.makedirs(os.path.dirname(path),exist_ok=True)
havenewfile = os.path.isfile(makenewpath(path))
if havenewfile: # "new" file already exists, lets check the hash
fn = makenewpath(path)
sha256hashed, tl = getfilesha256andsize(fn)
if (sha256 == sha256hashed) and (size == tl):
print('existing file '+path.decode('ascii')+' matched by hash and size')
fileupdates.add(path)
return # no download needed but rename is
if os.path.isfile(path): # file already exists
if (size == os.path.getsize(path)): #no point reading the data and calculating a hash if the size does not match
if (not args.repair) and (path in oldknownfiles) and (not havenewfile):
#shortcut exit if file is unchanged, we skip this if a "new" file was detected because
#that means some sort of update was going on to the file and may need to be finished/cleaned up.
oldsha256,oldsize,oldstatus = oldknownfiles[path]
if (oldsha256 == sha256) and (oldsize == size) and (oldstatus != 'F'):
return # no update needed
sha256hashed, tl = getfilesha256andsize(path)
if (sha256 == sha256hashed) and (size == tl):
print('existing file '+path.decode('ascii')+' matched by hash and size')
if havenewfile:
#if file is up to date but a "new" file exists and is bad
#(we wouldn't have got this far if it was good)
#schedule the "new" file for removal by adding it to "basefiles"
basefiles.add(makenewpath(path))
return # no update needed
if os.path.isfile(path): # file already exists
fileupdates.add(path)
if os.path.isfile(makenewpath(path)):
os.remove(makenewpath(path))
outputpath = makenewpath(path)
else:
outputpath = path
pathsplit = path.split(b'/')
if (pathsplit[1:2] == [b'pool']) and (args.debugskippool):
print('skipping download of '+path.decode('ascii')+' because --debugskippool was specified')
return
if (args.internal is not None) and (pathsplit[0] == b'raspbian'):
fileurl = args.internal.encode('ascii') +b'/private/' + b'/'.join(pathsplit[1:])
else:
fileurl = baseurl + b'/' + path
data = None
if args.sourcepool is not None:
for sourcepool in args.sourcepool:
#print(repr(args.sourcepool))
#print(repr(sourcepool))
sourcepool = sourcepool.encode('ascii')
if (len(pathsplit) > 1) and (pathsplit[1] == b'pool'):
spp = os.path.join(sourcepool,b'/'.join(pathsplit[2:]))
if os.path.isfile(spp) and (size == os.path.getsize(spp)):
print('trying file from sourcepool '+spp.decode('ascii'))
ts = os.path.getmtime(spp)
f = open(spp,'rb')
data = f.read()
f.close()
sha256hash = hashlib.sha256(data)
sha256hashed = sha256hash.hexdigest().encode('ascii')
if (sha256 != sha256hashed):
#print(repr(filesize))
#print(repr(sha256))
#print(repr(sha256hashed))
print('hash mismatch while trying file from sourcepool, ignoring file');
data = None
continue
try:
os.link(spp,outputpath)
print('successfully hardlinked file to source pool')
except:
print('file in souce pool was good but hard linking failed, copying file instead')
break
fdownloads.write(outputpath+b'\n')
fdownloads.flush()
return
if data is None:
if path+b'.gz' in knownfiles:
if path+b'.gz' in fileupdates:
gzfile = makenewpath(path+b'.gz')
else:
gzfile = path+b'.gz'
print('uncompressing '+gzfile.decode('ascii')+' with hash '+sha256.decode('ascii')+' to '+outputpath.decode('ascii'))
f = gzip.open(gzfile)
data = f.read()
f.close()
ts = os.path.getmtime(gzfile)
if not checkdatahash(data, sha256, 'hash mismatch while uncompressing file ', path, ''):
sys.exit(1)
if len(data) != size:
print('size mismatch while uncompressing file')
sys.exit(1)
#use slicing so we don't error if pathsplit only has one item
if (data is None) and (mdurl is not None) and (pathsplit[1:2] == [b'pool']):
fileurl = mdurl + b'/' + path
#fileurl = mdurl + b'/' + b'/'.join(pathsplit[1:])
data, ts = getandcheckfile(fileurl, sha256, size, path, outputpath, ' from mirrordirector',' trying main server instead')
if data is None:
if (args.internal is not None) and (pathsplit[0] == b'raspbian'):
fileurl = args.internal.encode('ascii') +b'/private/' + b'/'.join(pathsplit[1:])
elif (args.debugfdistsurl is not None) and (stage == 'downloadnew') and (b'dists' in pathsplit):
fileurl = args.debugfdistsurl.encode('ascii') + b'/' + path
else:
fileurl = baseurl + b'/' + path
data, ts = getandcheckfile(fileurl, sha256, size, path, outputpath, '','')
if data is None:
if (stage == 'downloadnew') and (b'dists' not in pathsplit):
print('continuing dispite download failure of '+path.decode('ascii')+', may revisit later')
global dlerrorcount
dlerrorcount += 1
knownfiles[path][2] = 'F'
return
if (data is None) and (hpurl is not None):
print('failed to get '+path.decode('ascii')+' from normal sources, trying hash pool')
ensuresafepath(sha256)
fileurl = hpurl + b'/' + sha256[0:2] + b'/' + sha256[0:4] + b'/' + sha256
data, ts = getandcheckfile(fileurl, sha256, size, path, outputpath, '', '')
if data is None:
print('failed to get '+path.decode('ascii')+' aborting')
sys.exit(1)
if data is not ...: #... is used to indicate that the file has been downloaded directly to disk and we don't
# need to write it out here.
f = open(outputpath,'wb')
f.write(data)
f.close()
os.utime(outputpath,(ts,ts))
fdownloads.write(outputpath+b'\n')
fdownloads.flush()
def getfilesha256andsize(fn):
sha256hash = hashlib.sha256()
f = open(fn, 'rb')
l = bs
tl = 0
while l == bs:
data = f.read(bs)
l = len(data)
tl += l
sha256hash.update(data)
f.close()
sha256hashed = sha256hash.hexdigest().encode('ascii')
return sha256hashed, tl
bs = 16 * 1024 * 1024
def getandcheckfile(fileurl, sha256, size, path, outputpath, errorfromstr, errorsuffix):
f = None
try:
sha256hash = hashlib.sha256()
if path == outputpath:
writepath = makenewpath(path)
viamsg = ' via '+writepath.decode('ascii')
else:
writepath = outputpath
viamsg = ''
print(
'downloading ' + fileurl.decode('ascii') + ' with hash ' + sha256.decode(
'ascii') + ' to ' + outputpath.decode(
'ascii') + viamsg)
f = open(writepath, 'wb')
if use_urllib3:
response = dlmanager.request("GET", fileurl.decode('ascii'), preload_content=False)
ts = getts(fileurl, response)
tl = 0
for data in response.stream(bs):
tl += len(data)
f.write(data)
sha256hash.update(data)
response.release_conn()
else:
with urllib.request.urlopen(fileurl.decode('ascii')) as response:
l = bs
tl = 0
while l == bs:
data = response.read(bs)
f.write(data)
l = len(data)
tl += l
sha256hash.update(data)
ts = getts(fileurl, response)
data = ... #used as a flag to indicate that the data is written to disk rather than stored in memory
f.close()
if not testandreporthash(sha256hash, sha256, 'hash mismatch while downloading file' + errorfromstr + ' ', path,
errorsuffix):
data = None
elif tl != size:
print('size mismatch while downloading file' + errorfromstr + '.' + errorsuffix)
data = None
except Exception as e:
print('exception ' + str(e) + ' while downloading file' + errorfromstr + '.' + errorsuffix)
if f is not None:
f.close()
data = None
ts = None
if data is not None:
#success
if writepath != outputpath:
os.rename(writepath, outputpath)
else:
#failure, cleanup writepath if nessacery
if os.path.exists(writepath):
os.remove(writepath)
return data, ts
def checkdatahash(data, sha256, errorprefix, path, errorsuffix):
sha256hash = hashlib.sha256(data)
return testandreporthash(sha256hash, sha256, errorprefix, path, errorsuffix)
def testandreporthash(sha256hash, sha256, errorprefix, path, errorsuffix):
sha256hashed = sha256hash.hexdigest().encode('ascii')
if (sha256 != sha256hashed):
# print(repr(filesize))
# print(repr(sha256))
# print(repr(sha256hashed))
print(errorprefix + path.decode('ascii') + ' ' + sha256.decode('ascii') + ' ' + sha256hashed.decode(
'ascii') + errorsuffix);
return False
return True
if (args.mdurl is None) or (args.mdurl.upper() == 'NONE'):
mdurl = None
else:
mdurl = args.mdurl.encode('ascii')
if (args.hpurl is None) or (args.hpurl.upper() == 'NONE'):
hpurl = None
else:
hpurl = args.hpurl.encode('ascii')
if args.baseurl is None:
baseurl = b'https://archive.raspbian.org'
mdurl = b'http://mirrordirector.raspbian.org'
hpurl = b'http://snapshot.raspbian.org/hashpool'
else:
baseurl = args.baseurl.encode('ascii')
symlinkupdates = list()
fileupdates = set()
def opengu(filepath):
#print('in opengu')
#print('filepath = '+repr(filepath))
#print('fileupdates = '+repr(fileupdates))
f = None
if (filepath in fileupdates):
print((b'opening '+makenewpath(filepath)+b' for '+filepath).decode('ascii'))
f = open(makenewpath(filepath),'rb')
elif (filepath+b'.gz' in fileupdates):
print((b'opening '+makenewpath(filepath+b'.gz')+b' for '+filepath).decode('ascii'))
f = gzip.open(makenewpath(filepath+b'.gz'),'rb')
elif os.path.exists(filepath):
print((b'opening '+filepath+b' for '+filepath).decode('ascii'))
f = open(filepath,'rb')
elif os.path.exists(filepath+b'.gz'):
print((b'opening '+filepath+b'.gz for '+filepath).decode('ascii'))
f = gzip.open(filepath+b'.gz','rb')
return f
oldsymlinks = set()
newsymlinks = set()
fdownloads = open(makenewpath(b'raspbmirrordownloads.txt'),"ab")
dlerrorcount = 0;
for stage in ("scanexisting","downloadnew","finalize"):
if stage == "finalize":
if dlerrorcount == 0:
print('skipping stage 3 as there were no download failures in stage 2')
#we can finish now.
break
print('stage 3, download final updates')
oldknownfiles = knownfiles
oldsymlinks |= newsymlinks
newsymlinks = set()
if stage == "downloadnew":
print('stage 2, main download')
oldknownfiles = knownfiles
basefiles = set(oldknownfiles.keys())
if stage == "scanexisting":
print('stage 1, scan existing')
else:
if args.internal is not None:
fileurl = args.internal.encode('ascii') + b'/snapshotindex.txt'
else:
fileurl = baseurl +b'/snapshotindex.txt'
if (stage == "downloadnew") and (args.debugfif is not None):
fileurl = args.debugfif.encode('ascii')
(filedata,ts) = geturl(fileurl)
f = open(makenewpath(b'snapshotindex.txt'),'wb')
if (args.tlwhitelist is None) and (args.distswhitelist is None):
f.write(filedata)
else:
lines = filedata.split(b'\n')
if lines[-1] == b'':
del(lines[-1])
if args.tlwhitelist is not None:
tlwhitelist = set(args.tlwhitelist.encode('ascii').split(b','))
linesnew = []
for line in lines:
linesplit = line.split(b'/')
if linesplit[0] in tlwhitelist:
linesnew.append(line)
lines = linesnew
if args.distswhitelist is not None:
distswhitelist = set(args.distswhitelist.encode('ascii').split(b','))
founddists = set()
foundesdists = set()
linesnew = []
for line in lines:
path, sizeandsha = line.split(b' ')
pathsplit = path.split(b'/')
#print(pathsplit)
#print(len(pathsplit))
if (len(pathsplit) > 2) and (pathsplit[1] == b'dists'):
if sizeandsha[0:2] == b'->': #symlink
target = sizeandsha[2:]
if target in distswhitelist:
linesnew.append(line)
elif pathsplit[2] in distswhitelist:
linesnew.append(line)
founddists.add((pathsplit[0],pathsplit[2]))
if (len(pathsplit) > 3) and (pathsplit[3] == b'extrasources'):
foundesdists.add((pathsplit[0],pathsplit[2]))
elif (len(pathsplit) > 1) and pathsplit[1] == b'pool':
pass
else:
linesnew.append(line)
lines = linesnew
if founddists == set():
print('none of the whitelisted distributions were found in the index file')
sys.exit(1)
missingesdists = founddists - foundesdists
if missingesdists != set():
for toplevel,distribution in missingesdists:
print((b'missing extra sources file for '+toplevel+b'/dists/'+distribution).decode('ascii'))
sys.exit(1)
for line in lines:
f.write(line+b'\n')
f.close()
os.utime(makenewpath(b'snapshotindex.txt'),(ts,ts))
knownfiles = OrderedDict()
filequeue = []
if stage == "scanexisting":
if os.path.isfile(b'snapshotindex.txt'):
f = open(b'snapshotindex.txt','rb')
else:
continue
else:
f = open(makenewpath(b'snapshotindex.txt'),'rb')
for line in f:
line = line.strip()
filepath, sizeandsha = line.split(b' ')
if sizeandsha[:2] == b'->':
symlinktarget = sizeandsha[2:]
ensuresafepath(filepath)
ensuresafepath(symlinktarget)
if len(os.path.dirname(filepath)) > 0:
os.makedirs(os.path.dirname(filepath),exist_ok=True)
if stage == "scanexisting":
oldsymlinks.add(filepath)
else:
if os.path.islink(filepath):
if os.readlink(filepath) != symlinktarget:
symlinkupdates.append((filepath,symlinktarget))
else:
print('creating symlink '+filepath.decode('ascii')+' -> '+symlinktarget.decode('ascii'))
os.symlink(symlinktarget,filepath)
newsymlinks.add(filepath)
else:
size,sha256 = sizeandsha.split(b':')
size = int(size)
knownfiles[filepath] = [sha256,size,'R']
addtofilequeue(filequeue,filepath)
f.close()
extrasources = {}
while filequeue:
(priority, filepath) = heappop(filequeue)
#print('processing '+filepath.decode('ascii'))
sha256,size,status = knownfiles[filepath]
if (stage != "scanexisting") and ((filepath+b'.gz' not in knownfiles) or (status == 'R') or os.path.exists(filepath)):
getfile(filepath,sha256,size)
pathsplit = filepath.split(b'/')
#print(pathsplit[-1])
#if (pathsplit[-1] == b'Packages'):
# print(repr(pathsplit))
if (pathsplit[-1] == b'Release') and (pathsplit[-3] == b'dists'):
distdir = b'/'.join(pathsplit[:-1])
f = opengu(filepath)
if f is None:
if stage == 'scanexisting':
print('warning: cannot find '+filepath.decode('ascii')+' while scanning existing state')
continue
else:
print('error: cannot find '+filepath.decode('ascii')+' or a gzipped substitute, aborting')
sys.exit(1)
insha256 = False;
for line in f:
#print(repr(line[0]))
if (line == b'SHA256:\n'):
insha256 = True
elif ((line[0] == 32) and insha256):
linesplit = line.split()
filename = distdir+b'/'+linesplit[2]
#if filename in knownfiles:
# if files
#print(filename)
addfilefromdebarchive(knownfiles,filequeue,filename,linesplit[0],linesplit[1]);
else:
insha256 = False
f.close()
elif (pathsplit[-1] == b'Packages') and ((pathsplit[-5] == b'dists') or ((pathsplit[-3] == b'debian-installer') and (pathsplit[-6] == b'dists'))):
if pathsplit[-5] == b'dists':
toplevel = b'/'.join(pathsplit[:-5])
else:
toplevel = b'/'.join(pathsplit[:-6])
print('found packages file: '+filepath.decode('ascii'))
pf = opengu(filepath)
if pf is None:
if stage == 'scanexisting':
print('warning: cannot find '+filepath.decode('ascii')+' while scanning existing state')
continue
else:
print('error: cannot find '+filepath.decode('ascii')+' or a gzipped substitute, aborting')
sys.exit(1)
filename = None
size = None
sha256 = None
for line in pf:
linesplit = line.split()
if (len(linesplit) == 0):
if (filename != None):
addfilefromdebarchive(knownfiles,filequeue,filename,sha256,size);
filename = None
size = None
sha256 = None
elif (linesplit[0] == b'Filename:'):
filename = toplevel+b'/'+linesplit[1]
elif (linesplit[0] == b'Size:'):
size = linesplit[1]
elif (linesplit[0] == b'SHA256:'):
sha256 = linesplit[1]
pf.close()
elif (pathsplit[-1] == b'Sources') and (pathsplit[-5] == b'dists'):
print('found sources file: '+filepath.decode('ascii'))
toplevel = b'/'.join(pathsplit[:-5])
pf = opengu(filepath)
if pf is None:
if stage == 'scanexisting':
print('warning: cannot find '+filepath.decode('ascii')+' while scanning existing state')
continue
else:
print('error: cannot find '+filepath.decode('ascii')+' or a gzipped substitute, aborting')
sys.exit(1)
filesfound = [];
directory = None
insha256p = False;
for line in pf:
linesplit = line.split()
if (len(linesplit) == 0):
for ls in filesfound:
#print(repr(ls))
addfilefromdebarchive(knownfiles,filequeue,toplevel+b'/'+directory+b'/'+ls[2],ls[0],ls[1]);
filesfound = [];
directory = None
insha256p = False
elif ((line[0] == 32) and insha256p):
filesfound.append(linesplit)
elif (linesplit[0] == b'Directory:'):
insha256p = False
directory = linesplit[1]
elif (linesplit[0] == b'Checksums-Sha256:'):
insha256p = True
else:
insha256p = False
pf.close()
elif (args.distswhitelist is not None) and (pathsplit[-1] == b'extrasources') and (pathsplit[-3] == b'dists'):
print('found extrasources file: '+filepath.decode('ascii'))
esf = opengu(filepath)
if esf is None:
if stage == 'scanexisting':
print('warning: cannot find '+filepath.decode('ascii')+' while scanning existing state')
continue
else:
print('error: cannot find '+filepath.decode('ascii')+' or a gzipped substitute, aborting')
sys.exit(1)
for line in esf:
line = line.strip()
filename , shaandsize = line.split(b' ')
size , sha256 = shaandsize.split(b':')
addfilefromdebarchive(knownfiles,filequeue,filename,sha256,size)
extrasources[filename] = shaandsize
#print(line)
fdownloads.close()
fdownloads = open(makenewpath(b'raspbmirrordownloads.txt'),"rb")
for line in fdownloads:
basefiles.add(line.strip())
fdownloads.close()
def throwerror(error):
raise error
if args.cleanup:
towalk = os.walk('.', True, throwerror, False)
for (dirpath, dirnames, filenames) in towalk:
for filename in (filenames + dirnames): # os.walk seems to regard symlinks to directories as directories.
filepath = os.path.join(dirpath, filename)[2:].encode('ascii') # [2:] is to strip the ./ prefix
# print(filepath)
if os.path.islink(filepath):
oldsymlinks.add(filepath)
for filename in filenames:
filepath = os.path.join(dirpath, filename)[2:].encode('ascii') # [2:] is to strip the ./ prefix
if not os.path.islink(filepath) and not filepath.startswith(b'snapshotindex.txt') and not filepath.startswith(b'raspbmirrordownloads.txt'):
basefiles.add(filepath)
print('stage 4, moves and deletions')
for filepath in fileupdates:
print((b'renaming '+makenewpath(filepath)+b' to '+filepath).decode('ascii'))
os.replace(makenewpath(filepath),filepath)
for (filepath,symlinktarget) in symlinkupdates:
print('updating symlink '+filepath.decode('ascii')+' -> '+symlinktarget.decode('ascii'))
os.remove(filepath)
os.symlink(symlinktarget,filepath)
removedfiles = (basefiles | oldsymlinks) - (set(knownfiles.keys()) | newsymlinks)
def isemptydir(dirpath):
#scandir would be significantly more efficient, but needs python 3.6 or above
#which is not reasonable to expect at this time.
#return os.path.isdir(dirpath) and ((next(os.scandir(dirpath), None)) is None)
return os.path.isdir(dirpath) and (len(os.listdir(dirpath)) == 0)
if args.tmpdir is None:
tmpdir = None
else:
tmpdir = args.tmpdir.encode('ascii')
if tmpdir[-1] != b'/':
tmpdir += b'/'
for filepath in removedfiles:
#file may not actually exist, either due to earlier updates gone-wrong
#or due to the file being a non-realised uncompressed version of
#a gzipped file.
if os.path.exists(filepath):
checkpath = filepath
#if the path points into the temporary directory we only check the part of it
#that is relative to the tempory directory.
if tmpdir is not None and filepath.startswith(tmpdir):
checkpath = filepath[len(tmpdir):]
ensuresafepath(checkpath)
print('removing '+filepath.decode('ascii'))
os.remove(filepath)
#clean up empty directories.
dirpath = os.path.dirname(filepath)
while (len(dirpath) != 0) and isemptydir(dirpath):
print('removing empty dir '+dirpath.decode('ascii'))
os.rmdir(dirpath)
dirpath = os.path.dirname(dirpath)
f = open(makenewpath(b'snapshotindex.txt'),'ab')
for filename, shaandsize in extrasources.items():
f.write(filename+b' '+shaandsize+b'\n')
f.close()
os.rename(makenewpath(b'snapshotindex.txt'),b'snapshotindex.txt')
os.remove(makenewpath(b'raspbmirrordownloads.txt'))