#! /bin/bash # No, we can not deal with sh alone. set -e set -u # ERR traps should be inherited from functions too. (And command # substitutions and subshells and whatnot, but for us the function is # the important part here) set -E # ftpsync script for Debian # Based losely on a number of existing scripts, written by an # unknown number of different people over the years. # # Copyright (C) 2008,2009,2010,2011 Joerg Jaspert # # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License as # published by the Free Software Foundation; version 2. # # This program is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. # In case the admin somehow wants to have this script located someplace else, # he can set BASEDIR, and we will take that. If it is unset we take ${HOME} # How the admin sets this isn't our place to deal with. One could use a wrapper # for that. Or pam_env. Or whatever fits in the local setup. :) BASEDIR=${BASEDIR:-"${HOME}"} # Script version. DO NOT CHANGE, *unless* you change the master copy maintained # by Joerg Jaspert and the Debian mirroradm group. # This is used to track which mirror is using which script version. VERSION="80387" # Source our common functions . "${BASEDIR}/etc/common" ######################################################################## ######################################################################## ## functions ## ######################################################################## ######################################################################## # We want to be able to get told what kind of sync we should do. This # might be anything, from the archive to sync, the stage to do, etc. A # list of currently understood and valid options is below. Multiple # options are seperated by space. All the words have to have the word # sync: in front or nothing will get used! # # Option Behaviour # stage1 Only do stage1 sync # stage2 Only do stage2 sync # all Do a complete sync # mhop Do a mhop sync, usually additionally to stage1 # archive:foo Sync archive foo (if config for foo is available) # callback Call back when done (needs proper ssh setup for this to # work). It will always use the "command" callback:$HOSTNAME # where $HOSTNAME is the one defined below/in config and # will happen before slave mirrors are triggered. # # So to get us to sync all of the archive behind bpo and call back when # we are done, a trigger command of # "ssh $USER@$HOST sync:all sync:archive:bpo sync:callback" will do the # trick. check_commandline() { while [ $# -gt 0 ]; do case "$1" in sync:stage1) SYNCSTAGE1="true" SYNCALL="false" ;; sync:stage2) SYNCSTAGE2="true" SYNCALL="false" ;; sync:callback) SYNCCALLBACK="true" ;; sync:archive:*) ARCHIVE=${1##sync:archive:} # We do not like / or . in the remotely supplied archive name. ARCHIVE=${ARCHIVE//\/} ARCHIVE=${ARCHIVE//.} ;; sync:all) SYNCALL="true" ;; sync:mhop) SYNCMHOP="true" ;; *) echo "Unknown option ${1} ignored" ;; esac shift # Check next set of parameters. done } # All the stuff we want to do when we exit, no matter where cleanup() { trap - ERR TERM HUP INT QUIT EXIT # all done. Mail the log, exit. log "Mirrorsync done"; # Lets get a statistical value SPEED="unknown" if [ -f "${LOGDIR}/rsync-${NAME}.log" ]; then SPEED=$( SPEEDLINE=$(egrep '[0-9.]+ bytes/sec' "${LOGDIR}/rsync-${NAME}.log") set "nothing" ${SPEEDLINE} echo ${8:-""} ) if [ -n "${SPEED}" ]; then SPEED=${SPEED%%.*} SPEED=$(( $SPEED / 1024 )) fi fi log "Rsync transfer speed: ${SPEED} KB/s" if [ -n "${MAILTO}" ]; then # In case rsync had something on stderr if [ -s "${LOGDIR}/rsync-${NAME}.error" ]; then mail -e -s "[${PROGRAM}@$(hostname -s)] ($$) rsync ERROR on $(date +"%Y.%m.%d-%H:%M:%S")" ${MAILTO} < "${LOGDIR}/rsync-${NAME}.error" fi if [ "x${ERRORSONLY}x" = "xfalsex" ]; then # And the normal log MAILFILES="${LOG}" if [ "x${FULLLOGS}x" = "xtruex" ]; then # Someone wants full logs including rsync MAILFILES="${MAILFILES} ${LOGDIR}/rsync-${NAME}.log" fi cat ${MAILFILES} | mail -e -s "[${PROGRAM}@$(hostname -s)] archive sync finished on $(date +"%Y.%m.%d-%H:%M:%S")" ${MAILTO} fi fi savelog "${LOGDIR}/rsync-${NAME}.log" savelog "${LOGDIR}/rsync-${NAME}.error" savelog "$LOG" > /dev/null rm -f "${LOCK}" } # Check rsyncs return value check_rsync() { ret=$1 msg=$2 # 24 - vanished source files. Ignored, that should be the target of $UPDATEREQUIRED # and us re-running. If it's not, uplink is broken anyways. case "${ret}" in 0) return 0;; 24) return 0;; 23) return 2;; 30) return 2;; *) error "ERROR: ${msg}" return 1 ;; esac } ######################################################################## ######################################################################## # As what are we called? NAME="$(basename $0)" # The original command line arguments need to be saved! if [ $# -gt 0 ]; then ORIGINAL_COMMAND=$* else ORIGINAL_COMMAND="" fi SSH_ORIGINAL_COMMAND=${SSH_ORIGINAL_COMMAND:-""} # Now, check if we got told about stuff via ssh if [ -n "${SSH_ORIGINAL_COMMAND}" ]; then # We deliberately add "nothing" and ignore it right again, to avoid # people from outside putting some set options in the first place, # making us parse them... set "nothing" "${SSH_ORIGINAL_COMMAND}" shift # Yes, unqouted $* here. Or the function will only see it as one # parameter, which doesnt help the case in it. check_commandline $* fi # Now, we can locally override all the above variables by just putting # them into the .ssh/authorized_keys file forced command. if [ -n "${ORIGINAL_COMMAND}" ]; then set ${ORIGINAL_COMMAND} check_commandline $* fi # If we have been told to do stuff for a different archive than default, # set the name accordingly. ARCHIVE=${ARCHIVE:-""} if [ -n "${ARCHIVE}" ]; then NAME="${NAME}-${ARCHIVE}" fi # Now source the config for the archive we run on. # (Yes, people can also overwrite the options above in the config file # if they want to) if [ -f "${BASEDIR}/etc/${NAME}.conf" ]; then . "${BASEDIR}/etc/${NAME}.conf" else echo "Nono, you can't tell us about random archives. Bad boy!" exit 1 fi ######################################################################## # Config options go here. Feel free to overwrite them in the config # # file if you need to. # # On debian.org machines the defaults should be ok. # # # # The following extra variables can be defined in the config file: # # # # ARCH_EXCLUDE # # can be used to exclude a complete architecture from # # mirrorring. Use as space seperated list. # # Possible values are: # # alpha, amd64, arm, armel, hppa, hurd-i386, i386, ia64, # # mipsel, mips, powerpc, s390, sparc, kfreebsd-i386, kfreebsd-amd64 # # and source. # # eg. ARCH_EXCLUDE="alpha arm armel mipsel mips s390 sparc" # # # # An unset value will mirror all architectures # ######################################################################## ######################################################################## # There should be nothing to edit here, use the config file # ######################################################################## MIRRORNAME=${MIRRORNAME:-$(hostname -f)} # Where to put logfiles in LOGDIR=${LOGDIR:-"${BASEDIR}/log"} # Our own logfile LOG=${LOG:-"${LOGDIR}/${NAME}.log"} # Where should we put all the mirrored files? TO=${TO:-"/org/ftp.debian.org/ftp/"} # used by log() and error() PROGRAM=${PROGRAM:-"${NAME}-$(hostname -s)"} # Where to send mails about mirroring to? if [ "x$(hostname -d)x" != "xdebian.orgx" ]; then # We are not on a debian.org host MAILTO=${MAILTO:-"root"} else # Yay, on a .debian.org host MAILTO=${MAILTO:-"mirrorlogs@debian.org"} fi # Want errors only or every log? ERRORSONLY=${ERRORSONLY:-"true"} # Want full logs, ie. including the rsync one? FULLLOGS=${FULLLOGS:-"false"} # How many logfiles to keep LOGROTATE=${LOGROTATE:-14} # Our lockfile LOCK=${LOCK:-"${TO}/Archive-Update-in-Progress-${MIRRORNAME}"} # timeout for the lockfile, in case we have bash older than v4 (and no /proc) LOCKTIMEOUT=${LOCKTIMEOUT:-3600} # Do we need another rsync run? UPDATEREQUIRED="${TO}/Archive-Update-Required-${MIRRORNAME}" # Trace file for mirror stats and checks (make sure we get full hostname) TRACE=${TRACE:-"project/trace/${MIRRORNAME}"} # rsync program RSYNC=${RSYNC:-rsync} # Rsync filter rules. Used to protect various files we always want to keep, even if we otherwise delete # excluded files RSYNC_FILTER=${RSYNC_FILTER:-"--filter=protect_Archive-Update-in-Progress-${MIRRORNAME} --filter=protect_${TRACE} --filter=protect_Archive-Update-Required-${MIRRORNAME}"} # limit I/O bandwidth. Value is KBytes per second, unset or 0 is unlimited RSYNC_BW=${RSYNC_BW:-0} # Default rsync options for *every* rsync call RSYNC_OPTIONS=${RSYNC_OPTIONS:-"-prltvHSB8192 --timeout 3600 --stats ${RSYNC_FILTER}"} # Options we only use in the first pass, where we do not want packages/sources to fly in yet and don't want to delete files RSYNC_OPTIONS1=${RSYNC_OPTIONS1:-"--exclude Packages* --exclude Sources* --exclude Release* --exclude InRelease --exclude ls-lR*"} # Options for the second pass, where we do want everything, including deletion of old and now unused files RSYNC_OPTIONS2=${RSYNC_OPTIONS2:-"--max-delete=40000 --delay-updates --delete --delete-after --delete-excluded"} # Which rsync share to use on our upstream mirror? RSYNC_PATH=${RSYNC_PATH:-"ftp"} # Now add the bwlimit option. As default is 0 we always add it, rsync interprets # 0 as unlimited, so this is safe. RSYNC_OPTIONS="--bwlimit=${RSYNC_BW} ${RSYNC_OPTIONS}" # Connect from mirror.csclub RSYNC_OPTIONS="--address=129.97.134.71 ${RSYNC_OPTIONS}" # We have no default host to sync from, but will error out if its unset RSYNC_HOST=${RSYNC_HOST:-""} # Error out if we have no host to sync from if [ -z "${RSYNC_HOST}" ]; then error "Missing a host to mirror from, please set RSYNC_HOST variable in ${BASEDIR}/etc/${NAME}.conf" fi # our username for the rsync share RSYNC_USER=${RSYNC_USER:-""} # the password RSYNC_PASSWORD=${RSYNC_PASSWORD:-""} # a possible proxy RSYNC_PROXY=${RSYNC_PROXY:-""} # Do we sync stage1? SYNCSTAGE1=${SYNCSTAGE1:-"false"} # Do we sync stage2? SYNCSTAGE2=${SYNCSTAGE2:-"false"} # Do we sync all? SYNCALL=${SYNCALL:-"true"} # Do we have a mhop sync? SYNCMHOP=${SYNCMHOP:-"false"} # Do we callback? SYNCCALLBACK=${SYNCCALLBACK:-"false"} # If we call back we need some more options defined in the config file. CALLBACKUSER=${CALLBACKUSER:-"archvsync"} CALLBACKHOST=${CALLBACKHOST:-"none"} CALLBACKKEY=${CALLBACKKEY:-"none"} # General excludes. Don't list architecture specific stuff here, use ARCH_EXCLUDE for that! EXCLUDE=${EXCLUDE:-""} # The temp directory used by rsync --delay-updates is not # world-readable remotely. Always exclude it to avoid errors. EXCLUDE="${EXCLUDE} --exclude .~tmp~/" SOURCE_EXCLUDE=${SOURCE_EXCLUDE:-""} ARCH_EXCLUDE=${ARCH_EXCLUDE:-""} # Exclude architectures defined in $ARCH_EXCLUDE for ARCH in ${ARCH_EXCLUDE}; do EXCLUDE="${EXCLUDE} --exclude binary-${ARCH}/ --exclude installer-${ARCH}/ --exclude Contents-${ARCH}.gz --exclude Contents-${ARCH}.bz2 --exclude Contents-${ARCH}.diff/ --exclude arch-${ARCH}.files --exclude arch-${ARCH}.list.gz --exclude *_${ARCH}.deb --exclude *_${ARCH}.udeb --exclude *_${ARCH}.changes" if [ "${ARCH}" = "source" ]; then if [ -z ${SOURCE_EXCLUDE} ]; then SOURCE_EXCLUDE=" --exclude source/ --exclude *.tar.gz --exclude *.diff.gz --exclude *.tar.bz2 --exclude *.diff.bz2 --exclude *.dsc " fi fi done # Hooks HOOK1=${HOOK1:-""} HOOK2=${HOOK2:-""} HOOK3=${HOOK3:-""} HOOK4=${HOOK4:-""} HOOK5=${HOOK5:-""} # Are we a hub? HUB=${HUB:-"false"} ######################################################################## # Really nothing to see below here. Only code follows. # ######################################################################## ######################################################################## # Some sane defaults cd "${BASEDIR}" umask 022 # If we are here for the first time, create the # destination and the trace directory mkdir -p "${TO}/project/trace" # Used to make sure we will have the archive fully and completly synced before # we stop, even if we get multiple pushes while this script is running. # Otherwise we can end up with a half-synced archive: # - get a push # - sync, while locked # - get another push. Of course no extra sync run then happens, we are locked. # - done. Archive not correctly synced, we don't have all the changes from the second push. touch "${UPDATEREQUIRED}" # Check to see if another sync is in progress if ! ( set -o noclobber; echo "$$" > "${LOCK}") 2> /dev/null; then if [ ${BASH_VERSINFO[0]} -gt 3 ] || [ -L /proc/self ]; then # We have a recent enough bash version, lets do it the easy way, # the lock will contain the right pid, thanks to $BASHPID if ! $(kill -0 $(cat ${LOCK}) 2>/dev/null); then # Process does either not exist or is not owned by us. echo "$$" > "${LOCK}" else echo "Unable to start rsync, lock file still exists, PID $(cat ${LOCK})" exit 1 fi else # Old bash, means we dont have the right pid in our lockfile # So take a different way - guess if it is still there by comparing its age. # Not optimal, but hey. stamptime=$(date --reference="${LOCK}" +%s) unixtime=$(date +%s) difference=$(( $unixtime - $stamptime )) if [ ${difference} -ge ${LOCKTIMEOUT} ]; then # Took longer than LOCKTIMEOUT minutes? Assume it broke and take the lock echo "$$" > "${LOCK}" else echo "Unable to start rsync, lock file younger than one hour" exit 1 fi fi fi # When we exit normally we call cleanup on our own. Otherwise we want it called by # this trap. (We can not trap on EXIT, because that is called when the main script # exits. Which also happens when we background the mainroutine, ie. while we still # run!) trap cleanup ERR TERM HUP INT QUIT # Start log by redirecting stdout and stderr there and closing stdin exec >"$LOG" 2>&1 <&- log "Mirrorsync start" # Look who pushed us and note that in the log. PUSHFROM="${SSH_CONNECTION%%\ *}" if [ -n "${PUSHFROM}" ]; then log "We got pushed from ${PUSHFROM}" fi if [ "xtruex" = "x${SYNCCALLBACK}x" ]; then if [ "xnonex" = "x${CALLBACKHOST}x" ] || [ "xnonex" = "x${CALLBACKKEY}x" ]; then SYNCCALLBACK="false" error "We are asked to call back, but we do not know where to and do not have a key, ignoring callback" fi fi HOOK=( HOOKNR=1 HOOKSCR=${HOOK1} ) hook $HOOK # Now, we might want to sync from anonymous too. # This is that deep in this script so hook1 could, if wanted, change things! if [ -z ${RSYNC_USER} ]; then RSYNCPTH="${RSYNC_HOST}" else RSYNCPTH="${RSYNC_USER}@${RSYNC_HOST}" fi # Now do the actual mirroring, and run as long as we have an updaterequired file. export RSYNC_PASSWORD export RSYNC_PROXY while [ -e "${UPDATEREQUIRED}" ]; do log "Running mirrorsync, update is required, ${UPDATEREQUIRED} exists" # if we want stage1 *or* all if [ "xtruex" = "x${SYNCSTAGE1}x" ] || [ "xtruex" = "x${SYNCALL}x" ]; then while [ -e "${UPDATEREQUIRED}" ]; do rm -f "${UPDATEREQUIRED}" log "Running stage1: ${RSYNC} ${RSYNC_OPTIONS} ${RSYNC_OPTIONS1} ${EXCLUDE} ${SOURCE_EXCLUDE} ${RSYNCPTH}::${RSYNC_PATH} ${TO}" set +e # Step one, sync everything except Packages/Releases ${RSYNC} ${RSYNC_OPTIONS} ${RSYNC_OPTIONS1} ${EXCLUDE} ${SOURCE_EXCLUDE} \ ${RSYNCPTH}::${RSYNC_PATH} "${TO}" >"${LOGDIR}/rsync-${NAME}.log" 2>"${LOGDIR}/rsync-${NAME}.error" result=$? set -e log "Back from rsync with returncode ${result}" done else # Fake a good resultcode result=0 fi # Sync stage 1? rm -f "${UPDATEREQUIRED}" set +e check_rsync $result "Sync step 1 went wrong, got errorcode ${result}. Logfile: ${LOG}" GO=$? set -e if [ ${GO} -eq 2 ] && [ -e "${UPDATEREQUIRED}" ]; then log "We got error ${result} from rsync, but a second push went in hence ignoring this error for now" elif [ ${GO} -ne 0 ]; then exit 3 fi HOOK=( HOOKNR=2 HOOKSCR=${HOOK2} ) hook $HOOK # if we want stage2 *or* all if [ "xtruex" = "x${SYNCSTAGE2}x" ] || [ "xtruex" = "x${SYNCALL}x" ]; then log "Running stage2: ${RSYNC} ${RSYNC_OPTIONS} ${RSYNC_OPTIONS2} ${EXCLUDE} ${SOURCE_EXCLUDE} ${RSYNCPTH}::${RSYNC_PATH} ${TO}" set +e # We are lucky, it worked. Now do step 2 and sync again, this time including # the packages/releases files ${RSYNC} ${RSYNC_OPTIONS} ${RSYNC_OPTIONS2} ${EXCLUDE} ${SOURCE_EXCLUDE} \ ${RSYNCPTH}::${RSYNC_PATH} "${TO}" >>"${LOGDIR}/rsync-${NAME}.log" 2>>"${LOGDIR}/rsync-${NAME}.error" result=$? set -e log "Back from rsync with returncode ${result}" else # Fake a good resultcode result=0 fi # Sync stage 2? set +e check_rsync $result "Sync step 2 went wrong, got errorcode ${result}. Logfile: ${LOG}" GO=$? set -e if [ ${GO} -eq 2 ] && [ -e "${UPDATEREQUIRED}" ]; then log "We got error ${result} from rsync, but a second push went in hence ignoring this error for now" elif [ ${GO} -ne 0 ]; then exit 4 fi HOOK=( HOOKNR=3 HOOKSCR=${HOOK3} ) hook $HOOK done # We only update our tracefile when we had a stage2 or an all sync. # Otherwise we would update it after stage1 already, which is wrong. if [ "xtruex" = "x${SYNCSTAGE2}x" ] || [ "xtruex" = "x${SYNCALL}x" ]; then if [ -d "$(dirname "${TO}/${TRACE}")" ]; then LC_ALL=POSIX LANG=POSIX date -u > "${TO}/${TRACE}" echo "Used ftpsync version: ${VERSION}" >> "${TO}/${TRACE}" echo "Running on host: $(hostname -f)" >> "${TO}/${TRACE}" fi fi HOOK=( HOOKNR=4 HOOKSCR=${HOOK4} ) hook $HOOK if [ "xtruex" = "x${SYNCCALLBACK}x" ]; then set +e callback ${CALLBACKUSER} ${CALLBACKHOST} "${CALLBACKKEY}" set -e fi # Remove the Archive-Update-in-Progress file before we push our downstreams. rm -f "${LOCK}" if [ x${HUB} = "xtrue" ]; then # Trigger slave mirrors if we had a push for stage2 or all, or if its mhop if [ "xtruex" = "x${SYNCSTAGE2}x" ] || [ "xtruex" = "x${SYNCALL}x" ] || [ "xtruex" = "x${SYNCMHOP}x" ]; then RUNMIRRORARGS="" if [ -n "${ARCHIVE}" ]; then # We tell runmirrors about the archive we are running on. RUNMIRRORARGS="-a ${ARCHIVE}" fi # We also tell runmirrors that we are running it from within ftpsync, so it can change # the way it works with mhop based on that. RUNMIRRORARGS="${RUNMIRRORARGS} -f" if [ "xtruex" = "x${SYNCSTAGE1}x" ]; then # This is true when we have a mhop sync. A normal multi-stage push sending stage1 will # not get to this point. # So if that happens, tell runmirrors we are doing mhop RUNMIRRORARGS="${RUNMIRRORARGS} -k mhop" elif [ "xtruex" = "x${SYNCSTAGE2}x" ]; then RUNMIRRORARGS="${RUNMIRRORARGS} -k stage2" elif [ "xtruex" = "x${SYNCALL}x" ]; then RUNMIRRORARGS="${RUNMIRRORARGS} -k all" fi log "Trigger slave mirrors using ${RUNMIRRORARGS}" ${BASEDIR}/bin/runmirrors ${RUNMIRRORARGS} log "Trigger slave done" HOOK=( HOOKNR=5 HOOKSCR=${HOOK5} ) hook $HOOK fi fi # All done, lets call cleanup cleanup