add csc-sync-exec
This commit is contained in:
parent
52b5945857
commit
fd75bedb61
|
@ -5,6 +5,7 @@ import (
|
|||
"fmt"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
|
||||
"gopkg.in/ini.v1"
|
||||
|
||||
|
@ -139,6 +140,8 @@ type Repo struct {
|
|||
PasswordFile string `ini:"password_file"`
|
||||
// the archive name for the ftpsync command (required for csc-sync-ftpsync)
|
||||
FtpsyncArchive string `ini:"ftpsync_archive"`
|
||||
// the sync command to execute with `sh -c` (required for csc-sync-exec)
|
||||
ExecStr string `ini:"exec"`
|
||||
// full path to file storing the repo sync state
|
||||
StateFile string `ini:"-"`
|
||||
// full path for file storing general logging of this repo
|
||||
|
@ -258,11 +261,23 @@ func LoadConfig(configPath string, doneChan chan SyncResult, stopChan chan struc
|
|||
panic("Missing or invalid frequency for " + repo.Name)
|
||||
} else if repo.SyncType == "" {
|
||||
panic("Missing sync type from " + repo.Name)
|
||||
} else if repo.SyncType == "csc-sync-ftpsync" {
|
||||
if repo.FtpsyncArchive == "" {
|
||||
panic("Missing ftpsync archive for " + repo.Name)
|
||||
}
|
||||
} else if repo.SyncType == "csc-sync-exec" {
|
||||
if repo.ExecStr == "" {
|
||||
panic("Missing 'exec' for " + repo.Name)
|
||||
}
|
||||
// Allow some limited placeholders
|
||||
repo.ExecStr = strings.ReplaceAll(repo.ExecStr, "{{repoLogFile}}", repo.RepoLogFile)
|
||||
repo.ExecStr = strings.ReplaceAll(repo.ExecStr, "{{rsyncLogFile}}", repo.RsyncLogFile)
|
||||
if strings.Contains(repo.ExecStr, "{{") {
|
||||
panic(repo.Name + " has unsatisfied placeholders in 'exec': " + repo.ExecStr)
|
||||
}
|
||||
} else if repo.LocalDir == "" {
|
||||
panic("Missing local download location for " + repo.Name)
|
||||
} else if repo.SyncType == "csc-sync-ftpsync" && repo.FtpsyncArchive == "" {
|
||||
panic("Missing ftpsync archive for " + repo.Name)
|
||||
} else if repo.SyncType != "csc-sync-ftpsync" && repo.SyncType != "csc-sync-fedora" && repo.RsyncHost == "" {
|
||||
} else if repo.RsyncHost == "" {
|
||||
panic("Missing rsync host for " + repo.Name)
|
||||
}
|
||||
|
||||
|
|
|
@ -279,8 +279,11 @@ rsync_dir = ftp.freebsd.org/pub/FreeBSD/
|
|||
; This handles both fedora/linux and fedora/epel
|
||||
; See ~/quick-fedora-mirror/quick-fedora-mirror.conf
|
||||
[fedora]
|
||||
sync_type = csc-sync-fedora
|
||||
sync_type = csc-sync-exec
|
||||
frequency = bi-hourly
|
||||
; See ~/quick-fedora-mirror/quick-fedora-mirror.conf
|
||||
; LOGFILE is set to {{repoLogFile}} (~/merlin/log/fedora.log)
|
||||
exec = truncate --size=0 {{repoLogFile}}; cd ~/quick-fedora-mirror && ./quick-fedora-mirror > {{rsyncLogFile}}
|
||||
|
||||
[ubuntu-ports-releases]
|
||||
sync_type = csc-sync-standard
|
||||
|
@ -396,12 +399,9 @@ rsync_host = rsync.alpinelinux.org
|
|||
rsync_dir = alpine
|
||||
|
||||
[raspbian]
|
||||
;verbose = true
|
||||
sync_type = csc-sync-standard
|
||||
sync_type = csc-sync-exec
|
||||
frequency = bi-hourly
|
||||
local_dir = raspbian
|
||||
rsync_host = raspbian.freemirror.org
|
||||
rsync_dir = raspbian
|
||||
exec = cd /mirror/root/raspbian && mkdir -p .~tmp~ && ~/raspbmirror/raspbmirror.py --tmpdir ./.~tmp~ --sourcepool /mirror/root/debian/pool > {{rsyncLogFile}}
|
||||
|
||||
[raspberrypi]
|
||||
sync_type = csc-sync-standard-ipv6
|
||||
|
|
|
@ -55,8 +55,8 @@ func getSyncCommand(repo *config.Repo) (cmds [][]string) {
|
|||
return append(cmds, cscSyncDebianStep1(repo), cscSyncDebianStep2(repo))
|
||||
case "csc-sync-debian-cd":
|
||||
return append(cmds, cscSyncDebianCD(repo))
|
||||
case "csc-sync-fedora":
|
||||
return append(cmds, cscSyncFedora(repo))
|
||||
case "csc-sync-exec":
|
||||
return append(cmds, cscSyncExec(repo))
|
||||
case "csc-sync-ftpsync":
|
||||
return append(cmds, cscSyncFtpsync(repo))
|
||||
case "csc-sync-s3":
|
||||
|
@ -293,14 +293,8 @@ func cscSyncDebianCD(repo *config.Repo) []string {
|
|||
return args
|
||||
}
|
||||
|
||||
func cscSyncFedora(repo *config.Repo) []string {
|
||||
// Make sure that repo.RepoLogFile (default: ~/merlin/log/fedora.log)
|
||||
// is the same as the LOGFILE setting in ~/quick-fedora-mirror/quick-fedora-mirror.conf
|
||||
return []string{
|
||||
"sh", "-c",
|
||||
"truncate --size=0 " + repo.RepoLogFile + "; " +
|
||||
"cd ~/quick-fedora-mirror && ./quick-fedora-mirror > " + repo.RsyncLogFile,
|
||||
}
|
||||
func cscSyncExec(repo *config.Repo) []string {
|
||||
return []string{"sh", "-c", repo.ExecStr}
|
||||
}
|
||||
|
||||
func cscSyncFtpsync(repo *config.Repo) []string {
|
||||
|
|
|
@ -0,0 +1,5 @@
|
|||
This script is to be used for Raspbian, which is NOT the same as Raspberry Pi OS.
|
||||
|
||||
It was downloaded from [here](https://raw.githubusercontent.com/plugwash/raspbian-tools/master/raspbmirror.py).
|
||||
|
||||
See [here](https://www.raspbian.org/RaspbianMirrors) for details about Raspbian mirroring.
|
|
@ -0,0 +1,835 @@
|
|||
#!/usr/bin/env python3
|
||||
|
||||
# Copyright 2018 Peter Green
|
||||
# Released under the MIT/Expat license, see doc/COPYING
|
||||
|
||||
import os
|
||||
import sys
|
||||
import hashlib
|
||||
import gzip
|
||||
import stat
|
||||
#from sortedcontainers import SortedDict
|
||||
#from sortedcontainers import SortedList
|
||||
from collections import deque
|
||||
from collections import OrderedDict
|
||||
from datetime import datetime
|
||||
from email.utils import parsedate_to_datetime
|
||||
import argparse
|
||||
import re
|
||||
from heapq import heappush, heappop
|
||||
import fcntl
|
||||
|
||||
parser = argparse.ArgumentParser(description="mirror raspbian repo.")
|
||||
parser.add_argument("baseurl", help="base url for source repo (e.g. https://archive.raspbian.org/ )",nargs='?')
|
||||
parser.add_argument("mdurl", help="base url for mirrordirector or local source mirror (e.g. https://mirrordirector.raspbian.org/ )",nargs='?')
|
||||
parser.add_argument("hpurl", help="base url for last result hash pool (e.g. http://snapshot.raspbian.org/hashpool )",nargs='?')
|
||||
|
||||
parser.add_argument("--internal", help=argparse.SUPPRESS) #base URL for private repo (internal use only)
|
||||
parser.add_argument("--sourcepool", help="specify a source pool to look for packages in before downloading them (useful if maintaining multiple mirrors)",action='append')
|
||||
parser.add_argument("--tmpdir", help="specify a temporary directory to avoid storing temporary files in the output tree, must be on the same filesystem as the output tree")
|
||||
|
||||
#debug option to set the index file used for the "downloadnew" phase but not the "finalize" phase, used to test error recovery.
|
||||
parser.add_argument("--debugfif", help=argparse.SUPPRESS)
|
||||
#debug option to set the source url used to download "dists" files during the "downloadnew" phase, used to test error recovery.
|
||||
parser.add_argument("--debugfdistsurl", help=argparse.SUPPRESS)
|
||||
|
||||
parser.add_argument("--tlwhitelist", help="specify comma-seperated whitelist of top-level directories")
|
||||
|
||||
parser.add_argument("--cleanup",help="scan for and remove files not managed by raspbmirror from mirror tree", action="store_true")
|
||||
|
||||
parser.add_argument("--debugskippool",help="skip downloading pool data, only download metadata (for debugging)",action="store_true")
|
||||
|
||||
parser.add_argument("--distswhitelist", help="specify comman seperated list of distributions")
|
||||
|
||||
parser.add_argument("--nolock", help="don't try to lock the target directory", action="store_true")
|
||||
|
||||
parser.add_argument("--repair", help="during mirroring, verify that all on-disk files match the expected sha256", action="store_true")
|
||||
|
||||
parser.add_argument("--urllib", help="force usage of the builtin urllib module, even if urllib3 is present", action="store_true")
|
||||
|
||||
parser.add_argument("--urllib3", help="force usage of the urllib3 module, panics if the dependency is missing", action="store_true")
|
||||
|
||||
parser.add_argument("--ipv4", help="force usage of IPv4 addresses. Requires urllib3", action="store_true")
|
||||
|
||||
parser.add_argument("--ipv6", help="force usage of IPv6 addresses. Requires urllib3", action="store_true")
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
if not args.nolock:
|
||||
lockfd = os.open('.',os.O_RDONLY)
|
||||
fcntl.flock(lockfd,fcntl.LOCK_EX | fcntl.LOCK_NB)
|
||||
|
||||
if args.urllib and args.urllib3:
|
||||
print("error: flags --urllib and --urllib3 are in conflict")
|
||||
exit(1)
|
||||
|
||||
if args.urllib:
|
||||
import urllib.request
|
||||
use_urllib3 = False
|
||||
elif args.urllib3:
|
||||
import urllib3
|
||||
use_urllib3 = True
|
||||
else:
|
||||
# auto detect urllib3
|
||||
try:
|
||||
import urllib3
|
||||
use_urllib3 = True
|
||||
except:
|
||||
import urllib.request
|
||||
use_urllib3 = False
|
||||
|
||||
if args.ipv4 and args.ipv6:
|
||||
print("error: flags --ipv4 and --ipv6 are in conflict")
|
||||
exit(1)
|
||||
|
||||
if use_urllib3:
|
||||
# the number of pools should be greater than the number of concurrently used sites.
|
||||
# 10 should be safe.
|
||||
dlmanager = urllib3.PoolManager(num_pools=10)
|
||||
print("info: using urllib3")
|
||||
|
||||
# a fairly hacky way to force the usage of ipv4 or ipv6 addresses
|
||||
# https://stackoverflow.com/questions/33046733/force-requests-to-use-ipv4-ipv6
|
||||
if args.ipv4:
|
||||
import socket
|
||||
import requests.packages.urllib3.util.connection as urllib3_cn
|
||||
def allowed_gai_family():
|
||||
return socket.AF_INET
|
||||
urllib3_cn.allowed_gai_family = allowed_gai_family
|
||||
elif args.ipv6:
|
||||
import socket
|
||||
import requests.packages.urllib3.util.connection as urllib3_cn
|
||||
def allowed_gai_family():
|
||||
return socket.AF_INET6
|
||||
urllib3_cn.allowed_gai_family = allowed_gai_family
|
||||
else:
|
||||
print("info: using urllib")
|
||||
if args.ipv4:
|
||||
print("error: flag --ipv4 requires the urllib3 package")
|
||||
exit(1)
|
||||
elif args.ipv6:
|
||||
print("error: flag --ipv6 requires the urllib3 package")
|
||||
exit(1)
|
||||
|
||||
def addfilefromdebarchive(filestoverify,filequeue,filename,sha256,size):
|
||||
size = int(size)
|
||||
sha256andsize = [sha256,size,'M']
|
||||
if filename in filestoverify:
|
||||
if (sha256andsize[0:2] != filestoverify[filename][0:2]):
|
||||
if stage == 'scanexisting':
|
||||
print('warning: same file with different hash/size during scanexisting phase old:'+repr(filestoverify[filename])+' new:'+repr(sha256andsize))
|
||||
#find existing sha1/size of file on disk if it exists
|
||||
if os.path.isfile(filename):
|
||||
f = open(filename,'rb')
|
||||
data = f.read()
|
||||
f.close()
|
||||
sha256hash = hashlib.sha256(data)
|
||||
sha256hashed = sha256hash.hexdigest().encode('ascii')
|
||||
size = len(data)
|
||||
else:
|
||||
#otherwise we have no idea
|
||||
sha256 = None
|
||||
size = None
|
||||
filestoverify[filename] = [sha256,size,'M']
|
||||
else:
|
||||
print('error: same file with different hash/size during downloadnew phase old:'+repr(filestoverify[filename])+' new:'+repr(sha256andsize))
|
||||
sys.exit(1)
|
||||
else:
|
||||
filestoverify[filename] = sha256andsize
|
||||
addtofilequeue(filequeue,filename)
|
||||
|
||||
def addtofilequeue(filequeue,filename):
|
||||
filenamesplit = filename.split(b'/')
|
||||
if b'dists' in filenamesplit:
|
||||
if filename.endswith(b'.gz'):
|
||||
# process gz files with high priority so they can be used as substitutes for their uncompressed counterparts
|
||||
heappush(filequeue,(10,filename))
|
||||
else:
|
||||
heappush(filequeue,(20,filename))
|
||||
heappush(filequeue,(30,filename))
|
||||
|
||||
|
||||
#regex used for filename sanity checks
|
||||
pfnallowed = re.compile(b'[a-z0-9A-Z\-_:\+~\.]+',re.ASCII)
|
||||
shaallowed = re.compile(b'[a-z0-9]+',re.ASCII)
|
||||
|
||||
def ensuresafepath(path):
|
||||
pathsplit = path.split(b'/')
|
||||
if path[0] == '/':
|
||||
print("path must be relative")
|
||||
sys.exit(1)
|
||||
for component in pathsplit:
|
||||
if not pfnallowed.fullmatch(component):
|
||||
print("component "+ascii(component)+" in path "+ascii(path)+" contains unexpected characters")
|
||||
sys.exit(1)
|
||||
elif component[0] == '.':
|
||||
print("filenames starting with a dot are not allowed")
|
||||
sys.exit(1)
|
||||
|
||||
def geturl(fileurl):
|
||||
if use_urllib3:
|
||||
response = dlmanager.request("GET", fileurl.decode('ascii'))
|
||||
ts = getts(fileurl, response)
|
||||
return (response.data, ts)
|
||||
else:
|
||||
with urllib.request.urlopen(fileurl.decode('ascii')) as response:
|
||||
data = response.read()
|
||||
ts = getts(fileurl, response)
|
||||
return (data, ts)
|
||||
|
||||
def getts(fileurl, response):
|
||||
if fileurl[:7] == b'file://':
|
||||
ts = os.path.getmtime(fileurl[7:])
|
||||
else:
|
||||
dt = parsedate_to_datetime(response.getheader('Last-Modified'))
|
||||
if dt.tzinfo is None:
|
||||
dt = dt.replace(tzinfo=timezone.utc)
|
||||
ts = dt.timestamp()
|
||||
return ts
|
||||
|
||||
|
||||
def makenewpath(path):
|
||||
if args.tmpdir is None:
|
||||
return path+b'.new'
|
||||
else:
|
||||
return os.path.join(args.tmpdir.encode('ascii'),(path+b'.new').replace(b'/',b'~'))
|
||||
|
||||
def getfile(path,sha256,size):
|
||||
ensuresafepath(path)
|
||||
if not shaallowed.fullmatch(sha256):
|
||||
print('invalid character in sha256 hash')
|
||||
sys.exit(1)
|
||||
#hashfn = b'../hashpool/' + sha256[:2] +b'/'+ sha256[:4] +b'/'+ sha256
|
||||
#if os.path.isfile(hashfn):
|
||||
# if os.path.getsize(hashfn) != size:
|
||||
# print('size mismatch on existing file in hash pool')
|
||||
# sys.exit(1)
|
||||
#else:
|
||||
# secondhashfn = None
|
||||
# if args.secondpool is not None:
|
||||
# secondhashfn = os.path.join(args.secondpool.encode('ascii'),sha256[:2] +b'/'+ sha256[:4] +b'/'+ sha256)
|
||||
# #print(secondhashfn)
|
||||
# if not os.path.isfile(secondhashfn):
|
||||
# secondhashfn = None
|
||||
# if secondhashfn is None:
|
||||
# else:
|
||||
# print('copying '+path.decode('ascii')+' with hash '+sha256.decode('ascii')+' from secondary pool')
|
||||
# f = open(secondhashfn,'rb')
|
||||
# data = f.read()
|
||||
# f.close()
|
||||
# ts = os.path.getmtime(secondhashfn)
|
||||
# sha256hash = hashlib.sha256(data)
|
||||
# sha256hashed = sha256hash.hexdigest().encode('ascii')
|
||||
# if (sha256 != sha256hashed):
|
||||
# #print(repr(filesize))
|
||||
# #print(repr(sha256))
|
||||
# #print(repr(sha256hashed))
|
||||
# print('hash mismatch while downloading file '+path.decode('ascii')+' '+sha256.decode('ascii')+' '+sha256hashed.decode('ascii'));
|
||||
# sys.exit(1)
|
||||
# if len(data) != size:
|
||||
# print('size mismatch while downloading file')
|
||||
# sys.exit(1)
|
||||
# hashdir = os.path.dirname(hashfn)
|
||||
# os.makedirs(hashdir,exist_ok=True)
|
||||
# f = open(hashfn,'wb')
|
||||
# f.write(data)
|
||||
# f.close()
|
||||
#
|
||||
# os.utime(hashfn,(ts,ts))
|
||||
if len(os.path.dirname(path)) > 0:
|
||||
os.makedirs(os.path.dirname(path),exist_ok=True)
|
||||
havenewfile = os.path.isfile(makenewpath(path))
|
||||
if havenewfile: # "new" file already exists, lets check the hash
|
||||
fn = makenewpath(path)
|
||||
sha256hashed, tl = getfilesha256andsize(fn)
|
||||
if (sha256 == sha256hashed) and (size == tl):
|
||||
print('existing file '+path.decode('ascii')+' matched by hash and size')
|
||||
fileupdates.add(path)
|
||||
return # no download needed but rename is
|
||||
if os.path.isfile(path): # file already exists
|
||||
if (size == os.path.getsize(path)): #no point reading the data and calculating a hash if the size does not match
|
||||
if (not args.repair) and (path in oldknownfiles) and (not havenewfile):
|
||||
#shortcut exit if file is unchanged, we skip this if a "new" file was detected because
|
||||
#that means some sort of update was going on to the file and may need to be finished/cleaned up.
|
||||
oldsha256,oldsize,oldstatus = oldknownfiles[path]
|
||||
if (oldsha256 == sha256) and (oldsize == size) and (oldstatus != 'F'):
|
||||
return # no update needed
|
||||
|
||||
sha256hashed, tl = getfilesha256andsize(path)
|
||||
if (sha256 == sha256hashed) and (size == tl):
|
||||
print('existing file '+path.decode('ascii')+' matched by hash and size')
|
||||
if havenewfile:
|
||||
#if file is up to date but a "new" file exists and is bad
|
||||
#(we wouldn't have got this far if it was good)
|
||||
#schedule the "new" file for removal by adding it to "basefiles"
|
||||
basefiles.add(makenewpath(path))
|
||||
return # no update needed
|
||||
if os.path.isfile(path): # file already exists
|
||||
fileupdates.add(path)
|
||||
if os.path.isfile(makenewpath(path)):
|
||||
os.remove(makenewpath(path))
|
||||
outputpath = makenewpath(path)
|
||||
else:
|
||||
outputpath = path
|
||||
pathsplit = path.split(b'/')
|
||||
if (pathsplit[1:2] == [b'pool']) and (args.debugskippool):
|
||||
print('skipping download of '+path.decode('ascii')+' because --debugskippool was specified')
|
||||
return
|
||||
if (args.internal is not None) and (pathsplit[0] == b'raspbian'):
|
||||
fileurl = args.internal.encode('ascii') +b'/private/' + b'/'.join(pathsplit[1:])
|
||||
else:
|
||||
fileurl = baseurl + b'/' + path
|
||||
data = None
|
||||
if args.sourcepool is not None:
|
||||
for sourcepool in args.sourcepool:
|
||||
#print(repr(args.sourcepool))
|
||||
#print(repr(sourcepool))
|
||||
sourcepool = sourcepool.encode('ascii')
|
||||
if (len(pathsplit) > 1) and (pathsplit[1] == b'pool'):
|
||||
spp = os.path.join(sourcepool,b'/'.join(pathsplit[2:]))
|
||||
if os.path.isfile(spp) and (size == os.path.getsize(spp)):
|
||||
print('trying file from sourcepool '+spp.decode('ascii'))
|
||||
ts = os.path.getmtime(spp)
|
||||
f = open(spp,'rb')
|
||||
data = f.read()
|
||||
f.close()
|
||||
sha256hash = hashlib.sha256(data)
|
||||
sha256hashed = sha256hash.hexdigest().encode('ascii')
|
||||
if (sha256 != sha256hashed):
|
||||
#print(repr(filesize))
|
||||
#print(repr(sha256))
|
||||
#print(repr(sha256hashed))
|
||||
print('hash mismatch while trying file from sourcepool, ignoring file');
|
||||
data = None
|
||||
continue
|
||||
try:
|
||||
os.link(spp,outputpath)
|
||||
print('successfully hardlinked file to source pool')
|
||||
|
||||
except:
|
||||
print('file in souce pool was good but hard linking failed, copying file instead')
|
||||
break
|
||||
fdownloads.write(outputpath+b'\n')
|
||||
fdownloads.flush()
|
||||
return
|
||||
if data is None:
|
||||
if path+b'.gz' in knownfiles:
|
||||
if path+b'.gz' in fileupdates:
|
||||
gzfile = makenewpath(path+b'.gz')
|
||||
else:
|
||||
gzfile = path+b'.gz'
|
||||
print('uncompressing '+gzfile.decode('ascii')+' with hash '+sha256.decode('ascii')+' to '+outputpath.decode('ascii'))
|
||||
f = gzip.open(gzfile)
|
||||
data = f.read()
|
||||
f.close()
|
||||
ts = os.path.getmtime(gzfile)
|
||||
if not checkdatahash(data, sha256, 'hash mismatch while uncompressing file ', path, ''):
|
||||
sys.exit(1)
|
||||
if len(data) != size:
|
||||
print('size mismatch while uncompressing file')
|
||||
sys.exit(1)
|
||||
|
||||
#use slicing so we don't error if pathsplit only has one item
|
||||
if (data is None) and (mdurl is not None) and (pathsplit[1:2] == [b'pool']):
|
||||
|
||||
fileurl = mdurl + b'/' + path
|
||||
#fileurl = mdurl + b'/' + b'/'.join(pathsplit[1:])
|
||||
data, ts = getandcheckfile(fileurl, sha256, size, path, outputpath, ' from mirrordirector',' trying main server instead')
|
||||
if data is None:
|
||||
|
||||
if (args.internal is not None) and (pathsplit[0] == b'raspbian'):
|
||||
fileurl = args.internal.encode('ascii') +b'/private/' + b'/'.join(pathsplit[1:])
|
||||
elif (args.debugfdistsurl is not None) and (stage == 'downloadnew') and (b'dists' in pathsplit):
|
||||
fileurl = args.debugfdistsurl.encode('ascii') + b'/' + path
|
||||
else:
|
||||
fileurl = baseurl + b'/' + path
|
||||
data, ts = getandcheckfile(fileurl, sha256, size, path, outputpath, '','')
|
||||
if data is None:
|
||||
if (stage == 'downloadnew') and (b'dists' not in pathsplit):
|
||||
print('continuing dispite download failure of '+path.decode('ascii')+', may revisit later')
|
||||
global dlerrorcount
|
||||
dlerrorcount += 1
|
||||
knownfiles[path][2] = 'F'
|
||||
return
|
||||
if (data is None) and (hpurl is not None):
|
||||
print('failed to get '+path.decode('ascii')+' from normal sources, trying hash pool')
|
||||
ensuresafepath(sha256)
|
||||
fileurl = hpurl + b'/' + sha256[0:2] + b'/' + sha256[0:4] + b'/' + sha256
|
||||
data, ts = getandcheckfile(fileurl, sha256, size, path, outputpath, '', '')
|
||||
if data is None:
|
||||
print('failed to get '+path.decode('ascii')+' aborting')
|
||||
sys.exit(1)
|
||||
if data is not ...: #... is used to indicate that the file has been downloaded directly to disk and we don't
|
||||
# need to write it out here.
|
||||
f = open(outputpath,'wb')
|
||||
f.write(data)
|
||||
f.close()
|
||||
os.utime(outputpath,(ts,ts))
|
||||
fdownloads.write(outputpath+b'\n')
|
||||
fdownloads.flush()
|
||||
|
||||
|
||||
def getfilesha256andsize(fn):
|
||||
sha256hash = hashlib.sha256()
|
||||
f = open(fn, 'rb')
|
||||
l = bs
|
||||
tl = 0
|
||||
while l == bs:
|
||||
data = f.read(bs)
|
||||
l = len(data)
|
||||
tl += l
|
||||
sha256hash.update(data)
|
||||
f.close()
|
||||
sha256hashed = sha256hash.hexdigest().encode('ascii')
|
||||
return sha256hashed, tl
|
||||
|
||||
|
||||
bs = 16 * 1024 * 1024
|
||||
|
||||
def getandcheckfile(fileurl, sha256, size, path, outputpath, errorfromstr, errorsuffix):
|
||||
f = None
|
||||
try:
|
||||
sha256hash = hashlib.sha256()
|
||||
if path == outputpath:
|
||||
writepath = makenewpath(path)
|
||||
viamsg = ' via '+writepath.decode('ascii')
|
||||
else:
|
||||
writepath = outputpath
|
||||
viamsg = ''
|
||||
print(
|
||||
'downloading ' + fileurl.decode('ascii') + ' with hash ' + sha256.decode(
|
||||
'ascii') + ' to ' + outputpath.decode(
|
||||
'ascii') + viamsg)
|
||||
f = open(writepath, 'wb')
|
||||
if use_urllib3:
|
||||
response = dlmanager.request("GET", fileurl.decode('ascii'), preload_content=False)
|
||||
ts = getts(fileurl, response)
|
||||
tl = 0
|
||||
for data in response.stream(bs):
|
||||
tl += len(data)
|
||||
f.write(data)
|
||||
sha256hash.update(data)
|
||||
response.release_conn()
|
||||
else:
|
||||
with urllib.request.urlopen(fileurl.decode('ascii')) as response:
|
||||
l = bs
|
||||
tl = 0
|
||||
while l == bs:
|
||||
data = response.read(bs)
|
||||
f.write(data)
|
||||
l = len(data)
|
||||
tl += l
|
||||
sha256hash.update(data)
|
||||
ts = getts(fileurl, response)
|
||||
|
||||
data = ... #used as a flag to indicate that the data is written to disk rather than stored in memory
|
||||
f.close()
|
||||
if not testandreporthash(sha256hash, sha256, 'hash mismatch while downloading file' + errorfromstr + ' ', path,
|
||||
errorsuffix):
|
||||
data = None
|
||||
elif tl != size:
|
||||
print('size mismatch while downloading file' + errorfromstr + '.' + errorsuffix)
|
||||
data = None
|
||||
except Exception as e:
|
||||
print('exception ' + str(e) + ' while downloading file' + errorfromstr + '.' + errorsuffix)
|
||||
if f is not None:
|
||||
f.close()
|
||||
data = None
|
||||
ts = None
|
||||
if data is not None:
|
||||
#success
|
||||
if writepath != outputpath:
|
||||
os.rename(writepath, outputpath)
|
||||
else:
|
||||
#failure, cleanup writepath if nessacery
|
||||
if os.path.exists(writepath):
|
||||
os.remove(writepath)
|
||||
|
||||
return data, ts
|
||||
|
||||
|
||||
def checkdatahash(data, sha256, errorprefix, path, errorsuffix):
|
||||
sha256hash = hashlib.sha256(data)
|
||||
return testandreporthash(sha256hash, sha256, errorprefix, path, errorsuffix)
|
||||
|
||||
|
||||
def testandreporthash(sha256hash, sha256, errorprefix, path, errorsuffix):
|
||||
sha256hashed = sha256hash.hexdigest().encode('ascii')
|
||||
if (sha256 != sha256hashed):
|
||||
# print(repr(filesize))
|
||||
# print(repr(sha256))
|
||||
# print(repr(sha256hashed))
|
||||
print(errorprefix + path.decode('ascii') + ' ' + sha256.decode('ascii') + ' ' + sha256hashed.decode(
|
||||
'ascii') + errorsuffix);
|
||||
return False
|
||||
return True
|
||||
|
||||
|
||||
if (args.mdurl is None) or (args.mdurl.upper() == 'NONE'):
|
||||
mdurl = None
|
||||
else:
|
||||
mdurl = args.mdurl.encode('ascii')
|
||||
|
||||
if (args.hpurl is None) or (args.hpurl.upper() == 'NONE'):
|
||||
hpurl = None
|
||||
else:
|
||||
hpurl = args.hpurl.encode('ascii')
|
||||
|
||||
if args.baseurl is None:
|
||||
baseurl = b'https://archive.raspbian.org'
|
||||
mdurl = b'http://mirrordirector.raspbian.org'
|
||||
hpurl = b'http://snapshot.raspbian.org/hashpool'
|
||||
else:
|
||||
baseurl = args.baseurl.encode('ascii')
|
||||
|
||||
|
||||
|
||||
|
||||
symlinkupdates = list()
|
||||
fileupdates = set()
|
||||
|
||||
def opengu(filepath):
|
||||
#print('in opengu')
|
||||
#print('filepath = '+repr(filepath))
|
||||
#print('fileupdates = '+repr(fileupdates))
|
||||
f = None
|
||||
if (filepath in fileupdates):
|
||||
print((b'opening '+makenewpath(filepath)+b' for '+filepath).decode('ascii'))
|
||||
f = open(makenewpath(filepath),'rb')
|
||||
elif (filepath+b'.gz' in fileupdates):
|
||||
print((b'opening '+makenewpath(filepath+b'.gz')+b' for '+filepath).decode('ascii'))
|
||||
f = gzip.open(makenewpath(filepath+b'.gz'),'rb')
|
||||
elif os.path.exists(filepath):
|
||||
print((b'opening '+filepath+b' for '+filepath).decode('ascii'))
|
||||
f = open(filepath,'rb')
|
||||
elif os.path.exists(filepath+b'.gz'):
|
||||
print((b'opening '+filepath+b'.gz for '+filepath).decode('ascii'))
|
||||
f = gzip.open(filepath+b'.gz','rb')
|
||||
return f
|
||||
|
||||
oldsymlinks = set()
|
||||
newsymlinks = set()
|
||||
|
||||
fdownloads = open(makenewpath(b'raspbmirrordownloads.txt'),"ab")
|
||||
|
||||
dlerrorcount = 0;
|
||||
|
||||
for stage in ("scanexisting","downloadnew","finalize"):
|
||||
if stage == "finalize":
|
||||
if dlerrorcount == 0:
|
||||
print('skipping stage 3 as there were no download failures in stage 2')
|
||||
#we can finish now.
|
||||
break
|
||||
print('stage 3, download final updates')
|
||||
|
||||
oldknownfiles = knownfiles
|
||||
oldsymlinks |= newsymlinks
|
||||
newsymlinks = set()
|
||||
|
||||
if stage == "downloadnew":
|
||||
print('stage 2, main download')
|
||||
oldknownfiles = knownfiles
|
||||
basefiles = set(oldknownfiles.keys())
|
||||
|
||||
if stage == "scanexisting":
|
||||
print('stage 1, scan existing')
|
||||
else:
|
||||
if args.internal is not None:
|
||||
fileurl = args.internal.encode('ascii') + b'/snapshotindex.txt'
|
||||
else:
|
||||
fileurl = baseurl +b'/snapshotindex.txt'
|
||||
|
||||
if (stage == "downloadnew") and (args.debugfif is not None):
|
||||
fileurl = args.debugfif.encode('ascii')
|
||||
(filedata,ts) = geturl(fileurl)
|
||||
|
||||
f = open(makenewpath(b'snapshotindex.txt'),'wb')
|
||||
if (args.tlwhitelist is None) and (args.distswhitelist is None):
|
||||
f.write(filedata)
|
||||
else:
|
||||
lines = filedata.split(b'\n')
|
||||
if lines[-1] == b'':
|
||||
del(lines[-1])
|
||||
if args.tlwhitelist is not None:
|
||||
tlwhitelist = set(args.tlwhitelist.encode('ascii').split(b','))
|
||||
linesnew = []
|
||||
for line in lines:
|
||||
linesplit = line.split(b'/')
|
||||
if linesplit[0] in tlwhitelist:
|
||||
linesnew.append(line)
|
||||
lines = linesnew
|
||||
if args.distswhitelist is not None:
|
||||
distswhitelist = set(args.distswhitelist.encode('ascii').split(b','))
|
||||
founddists = set()
|
||||
foundesdists = set()
|
||||
linesnew = []
|
||||
for line in lines:
|
||||
path, sizeandsha = line.split(b' ')
|
||||
pathsplit = path.split(b'/')
|
||||
#print(pathsplit)
|
||||
#print(len(pathsplit))
|
||||
if (len(pathsplit) > 2) and (pathsplit[1] == b'dists'):
|
||||
if sizeandsha[0:2] == b'->': #symlink
|
||||
target = sizeandsha[2:]
|
||||
if target in distswhitelist:
|
||||
linesnew.append(line)
|
||||
elif pathsplit[2] in distswhitelist:
|
||||
linesnew.append(line)
|
||||
founddists.add((pathsplit[0],pathsplit[2]))
|
||||
if (len(pathsplit) > 3) and (pathsplit[3] == b'extrasources'):
|
||||
foundesdists.add((pathsplit[0],pathsplit[2]))
|
||||
elif (len(pathsplit) > 1) and pathsplit[1] == b'pool':
|
||||
pass
|
||||
else:
|
||||
linesnew.append(line)
|
||||
|
||||
lines = linesnew
|
||||
if founddists == set():
|
||||
print('none of the whitelisted distributions were found in the index file')
|
||||
sys.exit(1)
|
||||
missingesdists = founddists - foundesdists
|
||||
if missingesdists != set():
|
||||
for toplevel,distribution in missingesdists:
|
||||
print((b'missing extra sources file for '+toplevel+b'/dists/'+distribution).decode('ascii'))
|
||||
sys.exit(1)
|
||||
for line in lines:
|
||||
f.write(line+b'\n')
|
||||
f.close()
|
||||
os.utime(makenewpath(b'snapshotindex.txt'),(ts,ts))
|
||||
|
||||
knownfiles = OrderedDict()
|
||||
filequeue = []
|
||||
|
||||
if stage == "scanexisting":
|
||||
if os.path.isfile(b'snapshotindex.txt'):
|
||||
f = open(b'snapshotindex.txt','rb')
|
||||
else:
|
||||
continue
|
||||
else:
|
||||
f = open(makenewpath(b'snapshotindex.txt'),'rb')
|
||||
for line in f:
|
||||
line = line.strip()
|
||||
filepath, sizeandsha = line.split(b' ')
|
||||
if sizeandsha[:2] == b'->':
|
||||
symlinktarget = sizeandsha[2:]
|
||||
ensuresafepath(filepath)
|
||||
ensuresafepath(symlinktarget)
|
||||
if len(os.path.dirname(filepath)) > 0:
|
||||
os.makedirs(os.path.dirname(filepath),exist_ok=True)
|
||||
if stage == "scanexisting":
|
||||
oldsymlinks.add(filepath)
|
||||
else:
|
||||
if os.path.islink(filepath):
|
||||
if os.readlink(filepath) != symlinktarget:
|
||||
symlinkupdates.append((filepath,symlinktarget))
|
||||
else:
|
||||
print('creating symlink '+filepath.decode('ascii')+' -> '+symlinktarget.decode('ascii'))
|
||||
os.symlink(symlinktarget,filepath)
|
||||
newsymlinks.add(filepath)
|
||||
else:
|
||||
size,sha256 = sizeandsha.split(b':')
|
||||
size = int(size)
|
||||
knownfiles[filepath] = [sha256,size,'R']
|
||||
addtofilequeue(filequeue,filepath)
|
||||
|
||||
f.close()
|
||||
|
||||
extrasources = {}
|
||||
while filequeue:
|
||||
(priority, filepath) = heappop(filequeue)
|
||||
#print('processing '+filepath.decode('ascii'))
|
||||
sha256,size,status = knownfiles[filepath]
|
||||
if (stage != "scanexisting") and ((filepath+b'.gz' not in knownfiles) or (status == 'R') or os.path.exists(filepath)):
|
||||
getfile(filepath,sha256,size)
|
||||
pathsplit = filepath.split(b'/')
|
||||
#print(pathsplit[-1])
|
||||
#if (pathsplit[-1] == b'Packages'):
|
||||
# print(repr(pathsplit))
|
||||
if (pathsplit[-1] == b'Release') and (pathsplit[-3] == b'dists'):
|
||||
distdir = b'/'.join(pathsplit[:-1])
|
||||
f = opengu(filepath)
|
||||
if f is None:
|
||||
if stage == 'scanexisting':
|
||||
print('warning: cannot find '+filepath.decode('ascii')+' while scanning existing state')
|
||||
continue
|
||||
else:
|
||||
print('error: cannot find '+filepath.decode('ascii')+' or a gzipped substitute, aborting')
|
||||
sys.exit(1)
|
||||
insha256 = False;
|
||||
for line in f:
|
||||
#print(repr(line[0]))
|
||||
if (line == b'SHA256:\n'):
|
||||
insha256 = True
|
||||
elif ((line[0] == 32) and insha256):
|
||||
linesplit = line.split()
|
||||
filename = distdir+b'/'+linesplit[2]
|
||||
#if filename in knownfiles:
|
||||
# if files
|
||||
#print(filename)
|
||||
addfilefromdebarchive(knownfiles,filequeue,filename,linesplit[0],linesplit[1]);
|
||||
else:
|
||||
insha256 = False
|
||||
f.close()
|
||||
elif (pathsplit[-1] == b'Packages') and ((pathsplit[-5] == b'dists') or ((pathsplit[-3] == b'debian-installer') and (pathsplit[-6] == b'dists'))):
|
||||
if pathsplit[-5] == b'dists':
|
||||
toplevel = b'/'.join(pathsplit[:-5])
|
||||
else:
|
||||
toplevel = b'/'.join(pathsplit[:-6])
|
||||
print('found packages file: '+filepath.decode('ascii'))
|
||||
pf = opengu(filepath)
|
||||
if pf is None:
|
||||
if stage == 'scanexisting':
|
||||
print('warning: cannot find '+filepath.decode('ascii')+' while scanning existing state')
|
||||
continue
|
||||
else:
|
||||
print('error: cannot find '+filepath.decode('ascii')+' or a gzipped substitute, aborting')
|
||||
sys.exit(1)
|
||||
|
||||
filename = None
|
||||
size = None
|
||||
sha256 = None
|
||||
|
||||
for line in pf:
|
||||
linesplit = line.split()
|
||||
if (len(linesplit) == 0):
|
||||
if (filename != None):
|
||||
addfilefromdebarchive(knownfiles,filequeue,filename,sha256,size);
|
||||
filename = None
|
||||
size = None
|
||||
sha256 = None
|
||||
elif (linesplit[0] == b'Filename:'):
|
||||
filename = toplevel+b'/'+linesplit[1]
|
||||
elif (linesplit[0] == b'Size:'):
|
||||
size = linesplit[1]
|
||||
elif (linesplit[0] == b'SHA256:'):
|
||||
sha256 = linesplit[1]
|
||||
pf.close()
|
||||
elif (pathsplit[-1] == b'Sources') and (pathsplit[-5] == b'dists'):
|
||||
print('found sources file: '+filepath.decode('ascii'))
|
||||
toplevel = b'/'.join(pathsplit[:-5])
|
||||
pf = opengu(filepath)
|
||||
if pf is None:
|
||||
if stage == 'scanexisting':
|
||||
print('warning: cannot find '+filepath.decode('ascii')+' while scanning existing state')
|
||||
continue
|
||||
else:
|
||||
print('error: cannot find '+filepath.decode('ascii')+' or a gzipped substitute, aborting')
|
||||
sys.exit(1)
|
||||
filesfound = [];
|
||||
directory = None
|
||||
insha256p = False;
|
||||
for line in pf:
|
||||
linesplit = line.split()
|
||||
if (len(linesplit) == 0):
|
||||
for ls in filesfound:
|
||||
#print(repr(ls))
|
||||
addfilefromdebarchive(knownfiles,filequeue,toplevel+b'/'+directory+b'/'+ls[2],ls[0],ls[1]);
|
||||
filesfound = [];
|
||||
directory = None
|
||||
insha256p = False
|
||||
elif ((line[0] == 32) and insha256p):
|
||||
filesfound.append(linesplit)
|
||||
elif (linesplit[0] == b'Directory:'):
|
||||
insha256p = False
|
||||
directory = linesplit[1]
|
||||
elif (linesplit[0] == b'Checksums-Sha256:'):
|
||||
insha256p = True
|
||||
else:
|
||||
insha256p = False
|
||||
pf.close()
|
||||
elif (args.distswhitelist is not None) and (pathsplit[-1] == b'extrasources') and (pathsplit[-3] == b'dists'):
|
||||
print('found extrasources file: '+filepath.decode('ascii'))
|
||||
esf = opengu(filepath)
|
||||
if esf is None:
|
||||
if stage == 'scanexisting':
|
||||
print('warning: cannot find '+filepath.decode('ascii')+' while scanning existing state')
|
||||
continue
|
||||
else:
|
||||
print('error: cannot find '+filepath.decode('ascii')+' or a gzipped substitute, aborting')
|
||||
sys.exit(1)
|
||||
for line in esf:
|
||||
line = line.strip()
|
||||
filename , shaandsize = line.split(b' ')
|
||||
size , sha256 = shaandsize.split(b':')
|
||||
addfilefromdebarchive(knownfiles,filequeue,filename,sha256,size)
|
||||
extrasources[filename] = shaandsize
|
||||
#print(line)
|
||||
|
||||
fdownloads.close()
|
||||
fdownloads = open(makenewpath(b'raspbmirrordownloads.txt'),"rb")
|
||||
for line in fdownloads:
|
||||
basefiles.add(line.strip())
|
||||
fdownloads.close()
|
||||
|
||||
def throwerror(error):
|
||||
raise error
|
||||
|
||||
if args.cleanup:
|
||||
towalk = os.walk('.', True, throwerror, False)
|
||||
for (dirpath, dirnames, filenames) in towalk:
|
||||
for filename in (filenames + dirnames): # os.walk seems to regard symlinks to directories as directories.
|
||||
filepath = os.path.join(dirpath, filename)[2:].encode('ascii') # [2:] is to strip the ./ prefix
|
||||
# print(filepath)
|
||||
if os.path.islink(filepath):
|
||||
oldsymlinks.add(filepath)
|
||||
for filename in filenames:
|
||||
filepath = os.path.join(dirpath, filename)[2:].encode('ascii') # [2:] is to strip the ./ prefix
|
||||
if not os.path.islink(filepath) and not filepath.startswith(b'snapshotindex.txt') and not filepath.startswith(b'raspbmirrordownloads.txt'):
|
||||
basefiles.add(filepath)
|
||||
|
||||
print('stage 4, moves and deletions')
|
||||
|
||||
for filepath in fileupdates:
|
||||
print((b'renaming '+makenewpath(filepath)+b' to '+filepath).decode('ascii'))
|
||||
os.replace(makenewpath(filepath),filepath)
|
||||
|
||||
for (filepath,symlinktarget) in symlinkupdates:
|
||||
print('updating symlink '+filepath.decode('ascii')+' -> '+symlinktarget.decode('ascii'))
|
||||
os.remove(filepath)
|
||||
os.symlink(symlinktarget,filepath)
|
||||
|
||||
|
||||
removedfiles = (basefiles | oldsymlinks) - (set(knownfiles.keys()) | newsymlinks)
|
||||
|
||||
def isemptydir(dirpath):
|
||||
#scandir would be significantly more efficient, but needs python 3.6 or above
|
||||
#which is not reasonable to expect at this time.
|
||||
#return os.path.isdir(dirpath) and ((next(os.scandir(dirpath), None)) is None)
|
||||
return os.path.isdir(dirpath) and (len(os.listdir(dirpath)) == 0)
|
||||
|
||||
if args.tmpdir is None:
|
||||
tmpdir = None
|
||||
else:
|
||||
tmpdir = args.tmpdir.encode('ascii')
|
||||
if tmpdir[-1] != b'/':
|
||||
tmpdir += b'/'
|
||||
|
||||
|
||||
for filepath in removedfiles:
|
||||
#file may not actually exist, either due to earlier updates gone-wrong
|
||||
#or due to the file being a non-realised uncompressed version of
|
||||
#a gzipped file.
|
||||
if os.path.exists(filepath):
|
||||
checkpath = filepath
|
||||
#if the path points into the temporary directory we only check the part of it
|
||||
#that is relative to the tempory directory.
|
||||
if tmpdir is not None and filepath.startswith(tmpdir):
|
||||
checkpath = filepath[len(tmpdir):]
|
||||
ensuresafepath(checkpath)
|
||||
print('removing '+filepath.decode('ascii'))
|
||||
os.remove(filepath)
|
||||
#clean up empty directories.
|
||||
dirpath = os.path.dirname(filepath)
|
||||
while (len(dirpath) != 0) and isemptydir(dirpath):
|
||||
print('removing empty dir '+dirpath.decode('ascii'))
|
||||
os.rmdir(dirpath)
|
||||
dirpath = os.path.dirname(dirpath)
|
||||
|
||||
f = open(makenewpath(b'snapshotindex.txt'),'ab')
|
||||
for filename, shaandsize in extrasources.items():
|
||||
f.write(filename+b' '+shaandsize+b'\n')
|
||||
f.close()
|
||||
|
||||
os.rename(makenewpath(b'snapshotindex.txt'),b'snapshotindex.txt')
|
||||
os.remove(makenewpath(b'raspbmirrordownloads.txt'))
|
||||
|
Loading…
Reference in New Issue