make sure to add mirror repo

autoautoinstall
Andrew Wang 1 year ago
parent e8ab91403f
commit 9d33c1aa93
  1. 4
      README.md
  2. 2
      libvirt/main.yml
  3. 1
      requirements.yml
  4. 1
      todo/ftp/main.yml
  5. 23
      todo/mirror/bin/csc-sync-apache
  6. 93
      todo/mirror/bin/csc-sync-archlinux
  7. 66
      todo/mirror/bin/csc-sync-archlinux-old
  8. 23
      todo/mirror/bin/csc-sync-badperms
  9. 25
      todo/mirror/bin/csc-sync-cdimage
  10. 105
      todo/mirror/bin/csc-sync-ceph
  11. 27
      todo/mirror/bin/csc-sync-chmod
  12. 245
      todo/mirror/bin/csc-sync-debian
  13. 107
      todo/mirror/bin/csc-sync-debian-cd
  14. 3
      todo/mirror/bin/csc-sync-gentoo
  15. 16
      todo/mirror/bin/csc-sync-s3
  16. 24
      todo/mirror/bin/csc-sync-ssh
  17. 31
      todo/mirror/bin/csc-sync-standard
  18. 25
      todo/mirror/bin/csc-sync-standard-ipv6
  19. 17
      todo/mirror/bin/csc-sync-wget
  20. 31
      todo/mirror/bin/make-torrents
  21. 272
      todo/mirror/bin/report_mirror
  22. 3
      todo/mirror/bin/ubuntu-releases-sync
  23. 10
      todo/mirror/bin/zfssync
  24. 1
      todo/mirror/config/ADDRESS
  25. 1
      todo/mirror/config/ADDRESS_V6
  26. 7
      todo/mirror/debian/.bash_logout
  27. 11
      todo/mirror/debian/.bash_profile
  28. 46
      todo/mirror/debian/.bashrc
  29. 257
      todo/mirror/debian/README
  30. 62
      todo/mirror/debian/bin/dircombine
  31. 585
      todo/mirror/debian/bin/ftpsync
  32. 112
      todo/mirror/debian/bin/pushpdo
  33. 286
      todo/mirror/debian/bin/runmirrors
  34. 168
      todo/mirror/debian/bin/typicalsync
  35. 13
      todo/mirror/debian/bin/udh
  36. 304
      todo/mirror/debian/bin/websync
  37. 230
      todo/mirror/debian/etc/common
  38. 137
      todo/mirror/debian/etc/ftpsync.conf
  39. 148
      todo/mirror/debian/etc/ftpsync.conf.sample
  40. 40
      todo/mirror/debian/etc/pushpdo.conf.sample
  41. 21
      todo/mirror/debian/etc/pushpdo.mirror.sample
  42. 53
      todo/mirror/debian/etc/runmirrors.conf.sample
  43. 72
      todo/mirror/debian/etc/runmirrors.mirror.sample
  44. 0
      todo/mirror/debian/etc/secrets/.dummy
  45. 121
      todo/mirror/debian/etc/websync.conf.sample
  46. 1371
      todo/mirror/debian/mirrorcheck/bin/dmc-archive.pl
  47. 1371
      todo/mirror/debian/mirrorcheck/bin/dmc.pl
  48. 0
      todo/mirror/debian/mirrorcheck/www/.dummy
  49. 1
      todo/mirror/foooooooo
  50. 3
      todo/mirror/git_old/include/FOOTER.shtml
  51. 5
      todo/mirror/git_old/include/HEADER.shtml
  52. 7
      todo/mirror/git_old/include/default.css
  53. BIN
      todo/mirror/git_old/include/favicon.ico
  54. BIN
      todo/mirror/git_old/include/header.png
  55. 11
      todo/mirror/git_old/include/motd.msg
  56. 2
      todo/mirror/git_old/include/robots.txt
  57. 119
      todo/mirror/git_old/misc/debian-check-md5sum
  58. 22
      todo/mirror/git_old/routing/csc-mirror
  59. 48
      todo/mirror/git_old/routing/interfaces
  60. 186
      todo/mirror/git_old/routing/orionroutes.py
  61. 11
      todo/mirror/git_old/routing/rt_realms
  62. 14
      todo/mirror/git_old/routing/rt_tables
  63. 6
      todo/mirror/git_old/routing/update-orion-routes
  64. 102
      todo/mirror/git_old/rrdtool/mirror-nl-glue.c
  65. 25
      todo/mirror/git_old/rrdtool/mirror-nl-glue.h
  66. 40
      todo/mirror/git_old/rrdtool/mirror-rrd.c
  67. 39
      todo/mirror/git_old/rrdtool/rrdgraph-monthly.sh
  68. 39
      todo/mirror/git_old/rrdtool/rrdgraph-yearly.sh
  69. 38
      todo/mirror/git_old/rrdtool/rrdgraph.sh
  70. 3
      todo/mirror/git_old/snmp/.gitignore
  71. 49
      todo/mirror/git_old/snmp/CSC-MIB.txt
  72. 11
      todo/mirror/git_old/snmp/Makefile
  73. 221
      todo/mirror/git_old/snmp/csc-snmp-subagent.c
  74. 109
      todo/mirror/git_old/snmp/mirror-mib.c
  75. 9
      todo/mirror/git_old/snmp/mirror-mib.h
  76. 102
      todo/mirror/git_old/snmp/mirror-nl-glue.c
  77. 25
      todo/mirror/git_old/snmp/mirror-nl-glue.h
  78. 12
      todo/mirror/git_old/snmp/mirror-stats.c
  79. 1
      todo/mirror/git_old/snmp/query_mib.sh
  80. 2
      todo/mirror/git_old/snmp/snmp.conf
  81. 56
      todo/mirror/git_old/torrents/rtorrent-init.d
  82. 100
      todo/mirror/git_old/torrents/rtorrent.rc
  83. 599
      todo/mirror/index.html__
  84. 7
      todo/mirror/merlin/.gitignore
  85. 25
      todo/mirror/merlin/arthur.py
  86. 55
      todo/mirror/merlin/init-script
  87. 716
      todo/mirror/merlin/merlin.py
  88. 13
      todo/mirror/merlin/merlin.service
  89. 10
      todo/mirror/merlin/rebuild.sh
  90. 47
      todo/mirror/merlin/test.py
  91. 1
      todo/mirror/merlin/test/debianBased
  92. 5
      todo/mirror/merlin/test/debian_update
  93. 1
      todo/mirror/merlin/test/mirror/debian/trace/mirror.csclub.uwaterloo.ca
  94. 1
      todo/mirror/merlin/test/mirror/hobo-linux/mirror.csclub.uwaterloo.ca
  95. 1
      todo/mirror/merlin/test/mirror/ubuntu/trace/mirror.csclub.uwaterloo.ca
  96. 1
      todo/mirror/merlin/test/mirrors
  97. 5
      todo/mirror/merlin/test/sync
  98. 30
      todo/mirror/merlin/test/test_repo_dict
  99. 1
      todo/mirror/merlin/test/trace/leningradskaya.canonical.com
  100. 553
      todo/mirror/merlin/zfssync.yml
  101. Some files were not shown because too many files have changed in this diff Show More

@ -66,6 +66,10 @@ The ubunutu autoinstall can only handle basic installation. We require a more po
First, install ansible and sshpass. Perform all the following commands in the `post-install/` directory.
Also install the extra roles
```
$ ansible-galaxy install -r requirements.yml
```
Check that ansible can talk to the vm:
```
$ ansible -m ping all

@ -1,5 +1,5 @@
```
- name: Create mirror Vm
- name: Create mirror VM
hosts: localhost
connection: local
tasks:

@ -0,0 +1 @@
- src: ansible.posix

@ -0,0 +1 @@
- name: Install

@ -0,0 +1,23 @@
#!/bin/bash -e
if [ $# -lt 3 ]; then
echo 'Usage: sync local_dir rsync_host rsync_dir'
exit 1
fi
umask 002
TO=/mirror/root/$1
RSYNC_HOST=$2
RSYNC_DIR=$3
ADDRESS=$(cat ~/config/ADDRESS)
if test -n "$RSYNC_USER"; then
RSYNC_HOST=$RSYNC_USER@$RSYNC_HOST
fi
exec nice rsync -az --no-owner --no-group --delete --safe-links \
--timeout=3600 -4 --address=$ADDRESS \
--exclude .~tmp~/ \
--quiet --stats --log-file=/home/mirror/merlin/logs/transfer.log \
$RSYNC_HOST::$RSYNC_DIR $TO

@ -0,0 +1,93 @@
#!/bin/bash
# Source: https://git.server-speed.net/users/flo/bin/plain/syncrepo.sh
# (from https://bugs.archlinux.org/task/52853)
if [ $# -lt 1 ]; then
echo "Usage: $0 local_dir"
exit 1
fi
umask 022
LOCAL_DIR=$1
ADDRESS=$(cat ~/config/ADDRESS)
# This is a simple mirroring script. To save bandwidth it first checks a
# timestamp via HTTP and only runs rsync when the timestamp differs from the
# local copy. As of 2016, a single rsync run without changes transfers roughly
# 6MiB of data which adds up to roughly 250GiB of traffic per month when rsync
# is run every minute. Performing a simple check via HTTP first can thus save a
# lot of traffic.
# Directory where the repo is stored locally. Example: /srv/repo
target="/mirror/root/$1"
# Directory where files are downloaded to before being moved in place.
# This should be on the same filesystem as $target, but not a subdirectory of $target.
# Example: /srv/tmp
tmp="/home/mirror/tmp"
# Lockfile path
lock="/home/mirror/tmp/mirrorsync-$1.lck"
# If you want to limit the bandwidth used by rsync set this.
# Use 0 to disable the limit.
# The default unit is KiB (see man rsync /--bwlimit for more)
bwlimit=0
# The source URL of the mirror you want to sync from.
# If you are a tier 1 mirror use rsync.archlinux.org, for example like this:
# rsync://rsync.archlinux.org/ftp_tier1
# Otherwise chose a tier 1 mirror from this list and use its rsync URL:
# https://www.archlinux.org/mirrors/
source_url='rsync://rsync.archlinux.org/ftp_tier1'
# An HTTP(S) URL pointing to the 'lastupdate' file on your chosen mirror.
# If you are a tier 1 mirror use: http://rsync.archlinux.org/lastupdate
# Otherwise use the HTTP(S) URL from your chosen mirror.
lastupdate_url='http://rsync.archlinux.org/lastupdate'
#### END CONFIG
[ ! -d "${target}" ] && mkdir -p "${target}"
[ ! -d "${tmp}" ] && mkdir -p "${tmp}"
exec 9>"${lock}"
flock -n 9 || exit
rsync_cmd() {
local -a cmd=(rsync -rtlH --safe-links --delete-after ${VERBOSE} "--timeout=600" "--contimeout=60" -p \
--delay-updates --no-motd "--temp-dir=${tmp}" \
"--log-file=/home/mirror/merlin/logs/$LOCAL_DIR.log" \
--address=$ADDRESS)
if stty &>/dev/null; then
cmd+=(-h -v --progress)
fi
if ((bwlimit>0)); then
cmd+=("--bwlimit=$bwlimit")
fi
"${cmd[@]}" "$@"
}
# if we are called without a tty (cronjob) only run when there are changes
if ! tty -s && [[ -f "$target/lastupdate" ]] && diff -b <(curl --interface $ADDRESS -s "$lastupdate_url") "$target/lastupdate" >/dev/null; then
# keep lastsync file in sync for statistics generated by the Arch Linux website
rsync_cmd "$source_url/lastsync" "$target/lastsync"
exit 0
fi
# optional
# --exclude='*.links.tar.gz*' \
# --exclude='/other' \
# --exclude='/sources' \
# --exclude='/iso' \
rsync_cmd \
"${source_url}" \
"${target}"
#echo "Last sync was $(date -d @$(cat ${target}/lastsync))"

@ -0,0 +1,66 @@
#!/bin/bash
# Source: https://git.server-speed.net/users/flo/bin/plain/syncrepo.sh
# as recommended in our request to become Tier 1:
# https://bugs.archlinux.org/task/52853
if [ $# -lt 1 ]; then
echo "Usage: $0 local_dir"
exit 1
fi
umask 022
ADDRESS=$(cat ~/config/ADDRESS)
# This is a simple mirroring script. To save bandwidth it first checks a
# timestamp via HTTP and only runs rsync when the timestamp differs from the
# local copy. As of 2016, a single rsync run without changes transfers roughly
# 6MiB of data which adds up to roughly 250GiB of traffic per month when rsync
# is run every minute. Performing a simple check via HTTP first can thus save a
# lot of traffic.
home="/home/mirror"
target=/mirror/root/$1
tmp="${home}/tmp"
lock="${tmp}/mirrorsync-$1.lck"
# NOTE: You'll probably want to change this or remove the --bwlimit setting in
# the rsync call below
bwlimit=4096
# NOTE: most people reading this very likely need to change this since
# rsync.archlinux.org requires you to be a tier 1 mirror
source='rsync://rsync.archlinux.org/ftp_tier1'
lastupdate_url="http://rsync.archlinux.org/lastupdate"
[ ! -d "${target}" ] && mkdir -p "${target}"
[ ! -d "${tmp}" ] && mkdir -p "${tmp}"
exec 9>"${lock}"
flock -n 9 || exit
# if we are called without a tty (cronjob) only run when there are changes
if ! tty -s && diff -b <(curl --interface $ADDRESS -s "$lastupdate_url") "$target/lastupdate" >/dev/null; then
date +'%s' > "$target/lastsync"
exit 0
fi
if ! stty &>/dev/null; then
QUIET="-q"
fi
# this can be added into the rsync: --bwlimit=$bwlimit \
# --exclude='/iso' \
# --exclude='*.links.tar.gz*' \
# --exclude='/other' \
# --exclude='/sources' \
rsync -rtlvH --safe-links --delete-after --progress -h ${QUIET} --timeout=600 --contimeout=60 -p \
--delay-updates --no-motd \
--temp-dir="${tmp}" \
--stats \
--log-file=$home/merlin/logs/$1.log \
--address=$ADDRESS \
${source} \
"${target}"
date +'%s' > "$target/lastsync"
#echo "Last sync was $(date -d @$(cat ${target}/lastsync))"

@ -0,0 +1,23 @@
#!/bin/bash -e
if [ $# -lt 3 ]; then
echo 'Usage: sync local_dir rsync_host rsync_dir'
exit 1
fi
umask 002
TO=/mirror/root/$1
RSYNC_HOST=$2
RSYNC_DIR=$3
ADDRESS=$(cat ~/config/ADDRESS)
if test -n "$RSYNC_USER"; then
RSYNC_HOST=$RSYNC_USER@$RSYNC_HOST
fi
exec nice rsync -aH --no-owner --no-group --chmod=o=rX --delete \
--timeout=3600 -4 --address=$ADDRESS \
--exclude .~tmp~/ \
--quiet --stats --log-file=/home/mirror/merlin/logs/transfer.log \
$RSYNC_HOST::$RSYNC_DIR $TO

@ -0,0 +1,25 @@
#!/bin/bash -e
if [ $# -lt 3 ]; then
echo 'Usage: sync local_dir rsync_host rsync_dir'
exit 1
fi
umask 002
TO=/mirror/root/$1
RSYNC_HOST=$2
RSYNC_DIR=$3
ADDRESS=$(cat ~/config/ADDRESS)
if test -n "$RSYNC_USER"; then
RSYNC_HOST=$RSYNC_USER@$RSYNC_HOST
fi
mkdir -p $TO
exec nice rsync -aH --no-owner --no-group --delete \
--timeout=3600 -4 --address=$ADDRESS \
--exclude ".*/" \
--quiet --stats --log-file=/home/mirror/merlin/logs/transfer.log \
$RSYNC_HOST::$RSYNC_DIR $TO

@ -0,0 +1,105 @@
#!/usr/bin/env bash
set -e
#
# Script to mirror Ceph locally
#
# Please, choose a local source and do not sync in a shorter interval than
# 3 hours.
#
SILENT=0
# All available source mirrors
declare -A SOURCES
SOURCES[eu]="eu.ceph.com"
SOURCES[de]="de.ceph.com"
SOURCES[se]="se.ceph.com"
SOURCES[au]="au.ceph.com"
SOURCES[us]="download.ceph.com"
SOURCES[hk]="hk.ceph.com"
SOURCES[fr]="fr.ceph.com"
SOURCES[us-east]="us-east.ceph.com"
SOURCES[us-west]="us-west.ceph.com"
SOURCES[global]="download.ceph.com"
function print_usage() {
echo "$0 [-q ] -s <source mirror> -t <target directory>"
}
while getopts ":qhs:t:" opt; do
case $opt in
q)
SILENT=1
;;
s)
SOURCE=$OPTARG
;;
t)
TARGET=/mirror/root/$OPTARG
;;
h)
HELP=1
;;
\?)
print_usage
exit 1
;;
esac
done
if [ ! -z "$HELP" ] || [ -z "$TARGET" ] || [ -z "$SOURCE" ]; then
print_usage
exit 1
fi
if [ ! -d "$TARGET" ]; then
echo "$TARGET is not a valid target directory"
exit 1
fi
for i in "${!SOURCES[@]}"; do
if [ "$i" == "$SOURCE" ]; then
SOURCE_HOST=${SOURCES[$i]}
fi
done
if [ -z "$SOURCE_HOST" ]; then
echo -n "Please select one of the following sources:"
for i in "${!SOURCES[@]}"; do
echo -n " $i"
done
echo ""
exit 1
fi
RSYNC_OPTS="--stats --progress"
if [ $SILENT -eq 1 ]; then
RSYNC_OPTS="--quiet"
fi
ADDRESS=$(cat ~/config/ADDRESS)
RSYNC_OPTS="$RSYNC_OPTS -4 --address=$ADDRESS"
# We start a two-stage sync here for DEB and RPM
# Based on: https://www.debian.org/mirror/ftpmirror
#
# The idea is to prevent temporary situations where metadata points to files
# which do not exist
#
# Exclude all metadata files
rsync ${RSYNC_OPTS} ${SOURCE_HOST}::ceph --recursive --times --links \
--hard-links \
--exclude Packages* \
--exclude Sources* \
--exclude Release* \
--exclude InRelease \
--exclude i18n/* \
--exclude ls-lR* \
--exclude repodata/* \
${TARGET}
# Now also transfer the metadata and delete afterwards
rsync ${RSYNC_OPTS} ${SOURCE_HOST}::ceph --recursive --times --links \
--hard-links --delete-after \
${TARGET}

@ -0,0 +1,27 @@
#!/bin/bash -e
if [ $# -lt 3 ]; then
echo 'Usage: sync local_dir rsync_host rsync_dir'
exit 1
fi
umask 002
TO=/mirror/root/$1
RSYNC_HOST=$2
RSYNC_DIR=$3
ADDRESS=$(cat ~/config/ADDRESS)
if test -n "$RSYNC_USER"; then
RSYNC_HOST=$RSYNC_USER@$RSYNC_HOST
fi
mkdir -p $TO
exec nice rsync -aH --no-owner --no-group --delete-after --delay-updates --safe-links \
--timeout=3600 -4 --address=$ADDRESS \
--exclude .~tmp~/ \
--quiet --stats --log-file=/home/mirror/merlin/logs/transfer.log \
--chmod=u=rwX,go=rX \
rsync://$RSYNC_HOST/$RSYNC_DIR $TO
#$RSYNC_HOST::$RSYNC_DIR $TO

@ -0,0 +1,245 @@
#! /bin/bash
if [[ $# < 3 ]]; then
echo 'Usage: sync local_dir rsync_host rsync_dir [trace_host [trace_dir]]'
exit 1
fi
set -e
TOP_DIR=/mirror
TO=$TOP_DIR/root/$1
RSYNC_HOST=$2
RSYNC_DIR=$3
if [ $# = 4 ]; then
TRACE_HOST=$4
fi
TRACE_DIR=project/trace
if [ $# = 5 ]; then
TRACE_DIR=$5
fi
LOGDIR=/var/log/mirror/$1_$2
ADDRESS=$(cat ~/config/ADDRESS)
mkdir -p $LOGDIR
# This script originates from http://www.debian.org/mirror/anonftpsync
# CVS: cvs.debian.org:/cvs/webwml - webwml/english/mirror/anonftpsync
# Version: $Id: anonftpsync,v 1.33 2007/09/12 15:19:03 joy Exp $
# Note: You MUST have rsync 2.6.4 or newer, which is available in sarge
# and all newer Debian releases, or at http://rsync.samba.org/
# Don't forget:
# chmod u+x anonftpsync
# Set the variables below to fit your site. You can then use cron to have
# this script run daily to automatically update your copy of the archive.
# TO is the destination for the base of the Debian mirror directory
# (the dir that holds dists/ and ls-lR).
# (mandatory)
#TO=
# RSYNC_HOST is the site you have chosen from the mirrors file.
# (http://www.debian.org/mirror/list-full)
# (mandatory)
#RSYNC_HOST=
# RSYNC_DIR is the directory given in the "Packages over rsync:" line of
# the mirrors file for the site you have chosen to mirror.
# (mandatory)
#RSYNC_DIR=
# LOGDIR is the directory where the logs will be written to
# (mandatory)
#LOGDIR=
# ARCH_EXCLUDE can be used to exclude a complete architecture from
# mirrorring. Please use as space seperated list.
# Possible values are:
# alpha, amd64, arm, hppa, hurd-i386, i386, ia64, m68k, mipsel, mips, powerpc, s390, sh and sparc
#
# There is one special value: source
# This is not an architecture but will exclude all source code in /pool
#
# eg.
# ARCH_EXCLUDE="alpha arm hppa hurd-i386 ia64 m68k mipsel mips s390 sparc"
#
# With a blank ARCH_EXCLUDE you will mirror all available architectures
# (optional)
# EXCLUDE is a list of parameters listing patterns that rsync will exclude, in
# addition to the architectures excluded by ARCH_EXCLUDE.
#
# Use ARCH_EXCLUDE to exclude specific architectures or all sources
#
# --exclude stable, testing, unstable options DON'T remove the packages of
# the given distribution. If you want do so, use debmirror instead.
#
# The following example would exclude mostly everything:
#EXCLUDE="\
# --exclude stable/ --exclude testing/ --exclude unstable/ \
# --exclude source/ \
# --exclude *.orig.tar.gz --exclude *.diff.gz --exclude *.dsc \
# --exclude /contrib/ --exclude /non-free/ \
# "
# With a blank EXCLUDE you will mirror the entire archive, except the
# architectures excluded by ARCH_EXCLUDE.
# (optional)
EXCLUDE=
# LOCK_TIMEOUT is a timeout in minutes. Defaults to 360 (6 hours).
# This program creates a lock to ensure that only one copy
# of it is mirroring any one archive at any one time.
# Locks held for longer than the timeout are broken, unless
# a running rsync process appears to be connected to $RSYNC_HOST.
LOCK_TIMEOUT=360
# There should be no need to edit anything below this point, unless there
# are problems.
#-----------------------------------------------------------------------------#
# If you are accessing a rsync server/module which is password-protected,
# uncomment the following lines (and edit the other file).
# . ftpsync.conf
# export RSYNC_PASSWORD
# RSYNC_HOST=$RSYNC_USER@$RSYNC_HOST
#-----------------------------------------------------------------------------#
# Check for some environment variables
if [ -z $TO ] || [ -z $RSYNC_HOST ] || [ -z $RSYNC_DIR ] || [ -z $LOGDIR ]; then
echo "One of the following variables seems to be empty:"
echo "TO, RSYNC_HOST, RSYNC_DIR or LOGDIR"
exit 2
fi
if ! [ -d ${TO}/${TRACE_DIR} ]; then
# we are running mirror script for the first time
umask 002
mkdir -p ${TO}/${TRACE_DIR}
fi
# Note: on some non-Debian systems, hostname doesn't accept -f option.
# If that's the case on your system, make sure hostname prints the full
# hostname, and remove the -f option. If there's no hostname command,
# explicitly replace `hostname -f` with the hostname.
HOSTNAME=`hostname -f`
# The hostname must match the "Site" field written in the list of mirrors.
# If hostname doesn't returns the correct value, fill and uncomment below
HOSTNAME=mirror.csclub.uwaterloo.ca
LOCK="${TO}/Archive-Update-in-Progress-${HOSTNAME}"
# The temp directory used by rsync --delay-updates is not
# world-readable remotely. It must be excluded to avoid errors.
TMP_EXCLUDE="--exclude .~tmp~/"
# Exclude architectures defined in $ARCH_EXCLUDE
for ARCH in $ARCH_EXCLUDE; do
EXCLUDE=$EXCLUDE"\
--exclude binary-$ARCH/ \
--exclude disks-$ARCH/ \
--exclude installer-$ARCH/ \
--exclude Contents-$ARCH.gz \
--exclude Contents-$ARCH.diff/ \
--exclude arch-$ARCH.files \
--exclude arch-$ARCH.list.gz \
--exclude *_$ARCH.deb \
--exclude *_$ARCH.udeb "
if [ "$ARCH" == "source" ]; then
SOURCE_EXCLUDE="\
--exclude source/ \
--exclude *.tar.gz \
--exclude *.diff.gz \
--exclude *.dsc "
fi
done
# Logfile
LOGFILE=$LOGDIR/mirror.log
# Get in the right directory and set the umask to be group writable
#
cd $HOME
umask 002
# Check to see if another sync is in progress
if [ -f "$LOCK" ]; then
# Note: this requires the findutils find; for other finds, adjust as necessary
if [ "`find $LOCK -maxdepth 1 -amin -$LOCK_TIMEOUT`" = "" ]; then
# Note: this requires the procps ps; for other ps', adjust as necessary
if ps ax | grep '[r]'sync | grep -q $RSYNC_HOST; then
echo "stale lock found, but a rsync is still running, aiee!" >&2
exit 1
else
echo "stale lock found (not accessed in the last $LOCK_TIMEOUT minutes), forcing update!"
rm -f $LOCK
fi
else
echo "current lock file exists, unable to start rsync!"
exit 1
fi
fi
touch $LOCK
# Note: on some non-Debian systems, trap doesn't accept "exit" as signal
# specification. If that's the case on your system, try using "0".
trap "rm -f $LOCK" exit
set +e
# check if we need to sync
if [[ "$TRACE_HOST" != "" ]]; then
TRACE_OLD_TIME=`stat -c%Y $TO/$TRACE_DIR/$TRACE_HOST 2> /dev/null`
TRACE_NEW_FILE=/tmp/$RSYNC_HOST_$RSYNC_DIR_$RANDOM
nice rsync -tv -4 --address=$ADDRESS \
$RSYNC_HOST::$RSYNC_DIR/$TRACE_DIR/$TRACE_HOST \
$TRACE_NEW_FILE >> $LOGFILE 2>&1
TRACE_NEW_TIME=`stat -c%Y $TRACE_NEW_FILE`
rm -f $TRACE_NEW_FILE
if [ "$TRACE_OLD_TIME" = "$TRACE_NEW_TIME" ]; then
echo 'Trace file for' $RSYNC_HOST::$RSYNC_DIR \
'unchanged!' >> $LOGFILE
fi
fi
# First sync /pool
nice rsync -rlHtvp \
$TMP_EXCLUDE $EXCLUDE $SOURCE_EXCLUDE \
--timeout=3600 -4 --address=$ADDRESS \
$RSYNC_HOST::$RSYNC_DIR/pool/ $TO/pool/ >> $LOGFILE 2>&1
result=$?
if [ 0 = $result ]; then
# Now sync the remaining stuff
nice rsync -rlHtvp --delay-updates --delete-after \
--exclude "Archive-Update-in-Progress-${HOSTNAME}" \
--exclude "${TRACE_DIR}/${HOSTNAME}" \
--timeout=3600 -4 --address=$ADDRESS \
$TMP_EXCLUDE $EXCLUDE $SOURCE_EXCLUDE \
$RSYNC_HOST::$RSYNC_DIR $TO >> $LOGFILE 2>&1
mkdir -p ${TO}/${TRACE_DIR}
LANG=C date -u > "${TO}/${TRACE_DIR}/${HOSTNAME}"
else
echo "ERROR: Help, something weird happened" | tee -a $LOGFILE
echo "mirroring /pool exited with exitcode" $result | tee -a $LOGFILE
fi
savelog $LOGFILE >/dev/null
rm $LOCK

@ -0,0 +1,107 @@
#! /bin/bash
set -e
TOP_DIR=/mirror
TO=$TOP_DIR/root/debian-cd
RSYNC_HOST=cdimage.debian.org
RSYNC_DIR=debian-cd
LOGDIR=/var/log/mirror/debian-cd_cdimage.debian.org
ADDRESS=$(cat ~/config/ADDRESS)
mkdir -p $LOGDIR
# LOCK_TIMEOUT is a timeout in minutes. Defaults to 360 (6 hours).
# This program creates a lock to ensure that only one copy
# of it is mirroring any one archive at any one time.
# Locks held for longer than the timeout are broken, unless
# a running rsync process appears to be connected to $RSYNC_HOST.
LOCK_TIMEOUT=360
# There should be no need to edit anything below this point, unless there
# are problems.
#-----------------------------------------------------------------------------#
# If you are accessing a rsync server/module which is password-protected,
# uncomment the following lines (and edit the other file).
# . ftpsync.conf
export RSYNC_PASSWORD
if [[ "$RSYNC_USER" != "" ]]; then
RSYNC_HOST=$RSYNC_USER@$RSYNC_HOST
fi
#-----------------------------------------------------------------------------#
# Check for some environment variables
if [ -z $TO ] || [ -z $RSYNC_HOST ] || [ -z $RSYNC_DIR ] || [ -z $LOGDIR ]; then
echo "One of the following variables seems to be empty:"
echo "TO, RSYNC_HOST, RSYNC_DIR or LOGDIR"
exit 2
fi
# Note: on some non-Debian systems, hostname doesn't accept -f option.
# If that's the case on your system, make sure hostname prints the full
# hostname, and remove the -f option. If there's no hostname command,
# explicitly replace `hostname -f` with the hostname.
HOSTNAME=`hostname -f`
# The hostname must match the "Site" field written in the list of mirrors.
# If hostname doesn't returns the correct value, fill and uncomment below
HOSTNAME=mirror.csclub.uwaterloo.ca
LOCK="${TO}/Archive-Update-in-Progress-${HOSTNAME}"
# The temp directory used by rsync --delay-updates is not
# world-readable remotely. It must be excluded to avoid errors.
TMP_EXCLUDE="--exclude .~tmp~/"
# Logfile
LOGFILE=$LOGDIR/mirror.log
# Get in the right directory and set the umask to be group writable
#
cd $HOME
umask 002
# Check to see if another sync is in progress
if [ -f "$LOCK" ]; then
# Note: this requires the findutils find; for other finds, adjust as necessary
if [ "`find $LOCK -maxdepth 1 -amin -$LOCK_TIMEOUT`" = "" ]; then
# Note: this requires the procps ps; for other ps', adjust as necessary
if ps ax | grep '[r]'sync | grep -q $RSYNC_HOST; then
echo "stale lock found, but a rsync is still running, aiee!" >&2
exit 1
else
echo "stale lock found (not accessed in the last $LOCK_TIMEOUT minutes), forcing update!"
rm -f $LOCK
fi
else
echo "current lock file exists, unable to start rsync!"
exit 1
fi
fi
touch $LOCK
# Note: on some non-Debian systems, trap doesn't accept "exit" as signal
# specification. If that's the case on your system, try using "0".
trap "rm -f $LOCK" exit
set +e
# Now sync the remaining stuff
nice rsync -rlHtvp --delete \
--timeout=3600 -4 --address=$ADDRESS $TMP_EXCLUDE \
--exclude "Archive-Update-in-Progress-${HOSTNAME}" \
$RSYNC_HOST::$RSYNC_DIR $TO >> $LOGFILE 2>&1
if [[ "$?" != "0" ]]; then
echo "ERROR: Help, something weird happened" | tee -a $LOGFILE
echo "mirroring /pool exited with exitcode" $result | tee -a $LOGFILE
fi
savelog $LOGFILE >/dev/null
rm $LOCK

@ -0,0 +1,3 @@
#!/bin/sh
RSYNC_USER=gentoo exec ~/bin/csc-sync-standard gentoo-distfiles masterdistfiles.gentoo.org gentoo gentoo-distfiles

@ -0,0 +1,16 @@
#!/bin/bash -e
if [ $# -lt 2 ]; then
echo 'Usage: sync local_dir endpoint'
exit 1
fi
umask 002
TO=/mirror/root/$1
export RCLONE_CONFIG_S3_ENDPOINT=$2 RCLONE_CONFIG_S3_TYPE=s3 RCLONE_CONFIG_S3_PROVIDER=Other RCLONE_CONFIG_S3_ENV_AUTH=false
ADDRESS=$(cat ~/config/ADDRESS)
mkdir -p $TO
cd $TO
exec nice rclone sync --fast-list --use-server-modtime --bind $ADDRESS s3:s3/ $TO

@ -0,0 +1,24 @@
#!/bin/bash -e
if [ $# -lt 3 ]; then
echo 'Usage: sync local_dir rsync_host rsync_dir ssh_user ssh_keyfile'
exit 1
fi
umask 002
TO=/mirror/root/$1
RSYNC_HOST=$2
RSYNC_DIR=$3
SSH_USER=$4
SSH_KEYFILE=$5
RSYNC_HOST=$SSH_USER@$RSYNC_HOST
exec nice rsync -aH --no-owner --no-group --delete \
--timeout=3600 -4 \
--exclude .~tmp~/ \
--quiet --stats --log-file=/home/mirror/merlin/logs/transfer-ssh.log \
-e "ssh -i $SSH_KEYFILE" \
$RSYNC_HOST:$RSYNC_DIR/ $TO
#134.71 needs to be used

@ -0,0 +1,31 @@
#!/bin/bash -e
if [ $# -lt 3 ]; then
echo 'Usage: sync local_dir rsync_host rsync_dir [password file]'
exit 1
fi
umask 002
TO=/mirror/root/$1
RSYNC_HOST=$2
RSYNC_DIR=$3
RSYNC_PASSWORD_ARGS=
if [ $# -ge 4 ]; then
RSYNC_PASSWORD_ARGS="--password-file $HOME/passwords/$4"
fi
ADDRESS=$(cat ~/config/ADDRESS)
if test -n "$RSYNC_USER"; then
RSYNC_HOST=$RSYNC_USER@$RSYNC_HOST
fi
mkdir -p $TO
exec nice rsync -aH --no-owner --no-group --delete-after --delay-updates --safe-links \
--timeout=3600 -4 --address=$ADDRESS \
--exclude .~tmp~/ \
--quiet --stats --log-file=/home/mirror/merlin/logs/transfer.log \
$RSYNC_PASSWORD_ARGS \
rsync://$RSYNC_HOST/$RSYNC_DIR $TO
#$RSYNC_HOST::$RSYNC_DIR $TO

@ -0,0 +1,25 @@
#!/bin/bash -e
if [ $# -lt 3 ]; then
echo 'Usage: sync local_dir rsync_host rsync_dir'
exit 1
fi
umask 002
TO=/mirror/root/$1
RSYNC_HOST=$2
RSYNC_DIR=$3
ADDRESS=$(cat ~/config/ADDRESS_V6)
if test -n "$RSYNC_USER"; then
RSYNC_HOST=$RSYNC_USER@$RSYNC_HOST
fi
mkdir -p $TO
exec nice rsync -aH --no-owner --no-group --delete-after --delay-updates --safe-links \
--timeout=3600 -6 --address=$ADDRESS \
--exclude .~tmp~/ \
--quiet --stats --log-file=/home/mirror/merlin/logs/transfer.log \
$RSYNC_HOST::$RSYNC_DIR $TO

@ -0,0 +1,17 @@
#!/bin/bash -e
if [ $# -lt 3 ]; then
echo 'Usage: sync local_dir path cut'
exit 1
fi
umask 002
TO=/mirror/root/$1
SOURCE_PATH=$2
CUT=$3
ADDRESS=$(cat ~/config/ADDRESS)
mkdir -p $TO
cd $TO
exec nice wget -q --bind-address=$ADDRESS --mirror --no-parent --no-host-directories --cut-dirs=$CUT --content-disposition --execute robots=off --recursive --reject "*\?*" $SOURCE_PATH

@ -0,0 +1,31 @@
#!/usr/bin/python
import os.path
base = '/mirror/root/csclub'
htbase = 'http://csclub.uwaterloo.ca/media/files'
announce = 'http://bittorrent.csclub.uwaterloo.ca/announce'
scpto = 'bittorrent.csclub:/var/lib/bnbt/torrents/'
comment = 'Produced by the University of Waterloo Computer Science Club (http://csclub.uwaterloo.ca/)'
minsize = 10*1024*1024 # 10 MiB
btmake = '/usr/bin/btmakemetafile.bittornado'
scp = '/usr/bin/scp'
mediafiles = [ file for file in os.listdir(base) if
not file.endswith('.torrent') and
not os.path.basename(file).startswith('.') and
not os.path.isdir(base + '/' + file) and
os.path.getsize(base + '/' + file) > minsize
]
for file in mediafiles:
path = base + '/' + file
torrentpath = path + '.torrent'
htpath = htbase + '/' + file
if not os.path.exists(torrentpath):
print "Making torrent for %s..." % torrentpath
os.spawnl(os.P_WAIT, btmake, btmake, announce, path,
'--comment', comment, '--target', torrentpath,
'--httpseeds', htpath)
os.spawnl(os.P_WAIT, scp, scp, torrentpath, scpto)
print "The bittorrent tracker will begin tracking new torrents within five minutes."

@ -0,0 +1,272 @@
#!/usr/bin/python
import os, sys
from os.path import join
import pickle
import re
import ConfigParser
import pprint
import time
import xmlrpclib
import base64
import bz2
import socket
class HostConfig(object):
"""Holder for config info from the configuration file"""
def __init__(self):
self.config = { 'version' : 0,
'global': {},
'site': {},
'host': {},
'stats': {},
}
_translation = [chr(_x) for _x in range(256)]
def _translate(s, altchars):
translation = _translation[:]
for k, v in altchars.items():
translation[ord(k)] = v
return s.translate(''.join(translation))
def urlsafe_b64encode(s):
import binascii
altchars = '-_'
encoded = binascii.b2a_base64(s)[:-1]
if altchars is not None:
return _translate(encoded, {'+': altchars[0], '/': altchars[1]})
return encoded
def gen_dirtree(path):
# structure here is:
# dirtree is a dict
# {
# dirpath :
# {
# filename1 : size1,
# filename2 : size2,
# ...
# },
# ...
# }
#
# 2009-03-09: MM's web app ignores the statfiles dict. So don't bother generating it.
dirtree = {}
for dirpath, dirnames, filenames in os.walk(path):
statfiles = {}
if path.endswith('/'):
short_path = dirpath[len(path):]
else:
short_path = dirpath[len(path)+1:]
if len(short_path) > 0:
dirtree[short_path] = statfiles
else:
dirtree[''] = statfiles
return dirtree
def errorprint(error):
sys.stderr.write(error+'\n')
class MissingOption(Exception):
pass
def check_required_options(conf, section, required_options):
for o in required_options:
if not conf.has_option(section, o):
errorprint('missing required option %s in config [%s]' % (o, section))
raise MissingOption()
return True
def parse_value(value):
"""Split multi-line values into a list"""
if value.find('\n') > -1:
return value.split()
return value
def parse_section(conf, section, item, required_options, optional_options=[]):
if conf.has_option(section, 'enabled'):
if conf.get(section, 'enabled') != '1' and section.lower() in item.config:
print 'removing disabled section %s' % (section)
del item.config[section.lower()]
return False
if not check_required_options(conf, section, required_options):
return False
if not section.lower() in item.config:
item.config[section.lower()] = {}
for o in required_options:
item.config[section.lower()][o] = parse_value(conf.get(section, o))
for o in optional_options:
if conf.has_option(section, o):
item.config[section.lower()][o] = parse_value(conf.get(section, o))
return True
def parse_global(conf, section, item):
required_options = [ 'enabled', 'server' ]
if not parse_section(conf, section, item, required_options):
errorprint('missing required options (server AND enabled) in [%s] section' % (section))
return False
return True
def parse_site(conf, section, item):
required_options = [ 'enabled', 'name', 'password' ]
return parse_section(conf, section, item, required_options)
def parse_host(conf, section, item):
required_options = [ 'enabled', 'name' ]
optional_options = [ 'user_active' ]
return parse_section(conf, section, item, required_options, optional_options=optional_options)
def get_stats(conf, section):
if conf.has_option(section, 'enabled'):
if conf.get(section, 'enabled') != '1':
return None
statsdata = {}
for name, value in conf.items(section):
if name == 'enabled':
continue
filenames = parse_value(conf.get(section, name))
if type(filenames) != list:
filenames = [ filenames ]
for fn in filenames:
try:
f = open(fn, 'r')
contents = contents + f.readlines()
statsdata[name] = pickle.dumps(contents, -1)
f.close()
except:
pass
return statsdata
def parse_category(conf, section, item, crawl):
required_options = [ 'enabled', 'path' ]
if not parse_section(conf, section, item, required_options):
return False
if crawl:
dirtree = gen_dirtree(conf.get(section, 'path'))
item.config[section.lower()]['dirtree'] = dirtree
# database doesn't need to know the disk path
del item.config[section.lower()]['path']
def config(cfg, item, crawl=True):
broken = False
conf = ConfigParser.ConfigParser()
files = conf.read(cfg)
if files == []:
errorprint('Configuration file %s not found' % (cfg))
return False
conf.read(cfg)
try:
# don't grab parse_stats here
for section, parsefunc in [ ('global', parse_global), ('site', parse_site),
('host', parse_host)]:
if conf.has_section(section):
if not parsefunc(conf, section, item):
return False
else:
errorprint('Invalid configuration - missing section [%s]' % (section))
sys.exit(1)
for section in conf.sections():
if section in [ 'global', 'site', 'host', 'stats']:
continue
parse_category(conf, section, item, crawl)
except MissingOption:
errorprint('Invalid configuration - Exiting')
sys.exit(1)
return True
def main():
from optparse import OptionParser
parser = OptionParser(usage= sys.argv[0] + " [options]")
parser.add_option("-c", "--config",
dest="config",
default='/home/mirror/mirrormanager-client/report_mirror.conf',
help='Configuration filename (required)')
parser.add_option("-s", "--stats",
action="store_true",
dest="stats",
default=False,
help='Send stats')
parser.add_option("-i", "--input",
dest="input",
default=None,
help="Input filename (for debugging)")
parser.add_option("-o", "--output",
dest="output",
default=None,
help="Output filename (for debugging)")
parser.add_option("-n", "--no-send",
action="store_true",
dest="no_send",
default=False,
help="Don't send data to the server.")
parser.add_option("-d", "--debug",
action="store_true",
dest="debug",
default=False,
help='Enable debugging output')
(options, args) = parser.parse_args()
item = HostConfig()
if options.input:
infile = open(options.input, 'rb')
item.config = pickle.load(infile)
infile.close()
if not config(options.config, item, crawl=False):
sys.exit(1)
else:
if not config(options.config, item, crawl=True):
sys.exit(1)
p = pickle.dumps(item.config, -1)
if options.debug:
pp = pprint.PrettyPrinter(indent=4)
pp.pprint(item.config)
if options.output is not None:
outfile = open(options.output, 'wb')
outfile.write(p)
outfile.close()
if options.stats:
statdata = get_stats(conf, 'stats')
# upload p and statsdata here
if not item.config.has_key('global') or not item.config['global'].has_key('enabled') or item.config['global']['enabled'] != '1':
sys.exit(1)
if not options.no_send:
# print "Connecting to %s" % item.config['global']['server']
server = xmlrpclib.ServerProxy(item.config['global']['server'])
data = None
try:
data = base64.urlsafe_b64encode(bz2.compress(p))
except AttributeError:
data = urlsafe_b64encode(bz2.compress(p))
if data is not None:
try:
print server.checkin(data)
except socket.error, m:
print "Error checking in: %s. Please try again later." % (m[1])
except xmlrpclib.Fault:
print "Error checking in. Connection closed before checkin complete. Please try again later."
sys.exit(1)
if __name__ == '__main__':
main()

@ -0,0 +1,3 @@
#!/bin/sh
# This file is used for ubuntu push mirroring. Do not remove it!
exec ~/bin/csc-sync-standard ubuntu-releases rsync.releases.ubuntu.com releases

@ -0,0 +1,10 @@
#!/bin/bash
if [ $# -ne 1 ]; then
echo 'Usage: project' 1>&2
exit 1
fi
exec 200>/tmp/$1.lock
flock 200 || exit 1
sudo /usr/local/bin/zfssync.py $1

@ -0,0 +1 @@
2620:101:f000:4901:c5c::f:1055

@ -0,0 +1,7 @@
# ~/.bash_logout: executed by bash(1) when login shell exits.
# when leaving the console clear the screen to increase privacy
if [ "$SHLVL" = 1 ]; then
[ -x /usr/bin/clear_console ] && /usr/bin/clear_console -q
fi

@ -0,0 +1,11 @@
# ~/.bash_profile: executed by bash(1) for login shells.
# include .bashrc if it exists
if [ -f ~/.bashrc ]; then
. ~/.bashrc
fi
# set PATH so it includes user's private bin if it exists
if [ -d ~/bin ] ; then
PATH=~/bin:"${PATH}"
fi

@ -0,0 +1,46 @@
# ~/.bashrc: executed by bash(1) for non-login shells.
# If not running interactively, don't do anything
[ -z "$PS1" ] && return
export HISTCONTROL=ignoreboth
# check the window size after each command and, if necessary,
# update the values of LINES and COLUMNS.
shopt -s checkwinsize
# make less more friendly for non-text input files, see lesspipe(1)
[ -x /usr/bin/lesspipe ] && eval "$(lesspipe)"
# A little nice prompt.
PS1='\[\033[01;33m\][`git branch 2>/dev/null|cut -f2 -d\* -s` ]\[\033[01;32m\]\u@\[\033[00;36m\]\h\[\033[01m\]:\[\033[01;34m\]\w\[\033[00m\]\$ '
# If this is an xterm set the title to user@host:dir
case "$TERM" in
xterm*|rxvt*)
PROMPT_COMMAND='echo -ne "\033]0;${USER}@${HOSTNAME}: ${PWD/$HOME/~}\007"'
;;
*)
;;
esac
# Alias definitions.
# enable color support of ls and also add handy aliases
eval "`dircolors -b`"
alias ls='ls --color=auto'
alias ll='ls -l'
alias la='ls -A'
alias l='ls -CF'
alias cp='cp -i'
alias mv='mv -i'
alias ..='cd ..'
# enable programmable completion features (you don't need to enable
# this, if it's already enabled in /etc/bash.bashrc and /etc/profile
# sources /etc/bash.bashrc).
if [ -f /etc/bash_completion ]; then
. /etc/bash_completion
fi

@ -0,0 +1,257 @@
Archvsync
=========
This is the central repository for the Debian mirror scripts. The scripts
in this repository are written for the purposes of maintaining a Debian
archive mirror (and shortly, a Debian bug mirror), but they should be
easily generalizable.
Currently the following scripts are available:
* ftpsync - Used to sync an archive using rsync
* runmirrors - Used to notify leaf nodes of available updates
* dircombine - Internal script to manage the mirror user's $HOME
on debian.org machines
* typicalsync - Generates a typical Debian mirror
* udh - We are lazy, just a shorthand to avoid typing the
commands, ignore... :)
Usage
=====
For impatient people, short usage instruction:
- Create a dedicated user for the whole mirror.
- Create a seperate directory for the mirror, writeable by the new user.
- Place the ftpsync script in the mirror user's $HOME/bin (or just $HOME)
- Place the ftpsync.conf.sample into $HOME/etc as ftpsync.conf and edit
it to suit your system. You should at the very least change the TO=
and RSYNC_HOST lines.
- Create $HOME/log (or wherever you point $LOGDIR to)
- Setup the .ssh/authorized_keys for the mirror user and place the public key of
your upstream mirror into it. Preface it with
no-port-forwarding,no-X11-forwarding,no-agent-forwarding,no-pty,command="~/bin/ftpsync",from="IPADDRESS"
and replace $IPADDRESS with that of your upstream mirror.
- You are finished
In order to receive different pushes or syncs from different archives,
name the config file ftpsync-$ARCHIVE.conf and call the ftpsync script
with the commandline "sync:archive:$ARCHIVE". Replace $ARCHIVE with a
sensible value. If your upstream mirror pushes you using runmirrors
bundled together with this sync script, you do not need to add the
"sync:archive" parameter to the commandline, the scripts deal with it
automatically.
Debian mirror script minimum requirements
=========================================
As always, you may use whatever scripts you want for your Debian mirror,
but we *STRONGLY* recommend you to not invent your own. However, if you
want to be listed as a mirror it *MUST* support the following minimal
functionality:
- Must perform a 2-stage sync
The archive mirroring must be done in 2 stages. The first rsync run
must ignore the index files. The correct exclude options for the
first rsync run are:
--exclude Packages* --exclude Sources* --exclude Release* --exclude ls-lR*
The first stage must not delete any files.
The second stage should then transfer the above excluded files and
delete files that no longer belong on the mirror.
Rationale: If archive mirroring is done in a single stage, there will be
periods of time during which the index files will reference files not
yet mirrored.
- Must not ignore pushes whil(e|st) running.
If a push is received during a run of the mirror sync, it MUST NOT
be ignored. The whole synchronization process must be rerun.
Rationale: Most implementations of Debian mirror scripts will leave the
mirror in an inconsistent state in the event of a second push being
received while the first sync is still running. It is likely that in
the near future, the frequency of pushes will increase.
- Should understand multi-stage pushes.
The script should parse the arguments it gets via ssh, and if they
contain a hint to only sync stage1 or stage2, then ONLY those steps
SHOULD be performed.
Rationale: This enables us to coordinate the timing of the first
and second stage pushes and minimize the time during which the
archive is desynchronized. This is especially important for mirrors
that are involved in a round robin or GeoDNS setup.
The minimum arguments the script has to understand are:
sync:stage1 Only sync stage1
sync:stage2 Only sync stage2
sync:all Do everything. Default if none of stage1/2 are
present.
There are more possible arguments, for a complete list see the
ftpsync script in our git repository.
ftpsync
=======
This script is based on the old anonftpsync script. It has been rewritten
to add flexibilty and fix a number of outstanding issues.
Some of the advantages of the new version are:
- Nearly every aspect is configurable
- Correct support for multiple pushes
- Support for multi-stage archive synchronisations
- Support for hook scripts at various points
- Support for multiple archives, even if they are pushed using one ssh key
- Support for multi-hop, multi-stage archive synchronisations
Correct support for multiple pushes
-----------------------------------
When the script receives a second push while it is running and syncing
the archive it won't ignore it. Instead it will rerun the
synchronisation step to ensure the archive is correctly synchronised.
Scripts that fail to do that risk ending up with an inconsistent archive.
Can do multi-stage archive synchronisations
-------------------------------------------
The script can be told to only perform the first or second stage of the
archive synchronisation.
This enables us to send all the binary packages and sources to a
number of mirrors, and then tell all of them to sync the
Packages/Release files at once. This will keep the timeframe in which
the mirrors are out of sync very small and will greatly help things like
DNS RR entries or even the planned GeoDNS setup.
Multi-hop, multi-stage archive synchronisations
-----------------------------------------------
The script can be told to perform a multi-hop multi-stage archive
synchronisation.
This is basically the same as the multi-stage synchronisation
explained above, but enables the downstream mirror to push his own
staged/multi-hop downstreams before returning. This has the same
advantage than the multi-stage synchronisation but allows us to do
this over multiple level of mirrors. (Imagine one push going from
Europe to Australia, where then locally 3 others get updated before
stage2 is sent out. Instead of 4times transferring data from Europe to
Australia, just to have them all updated near instantly).
Can run hook scripts
--------------------
ftpsync currently allows 5 hook scripts to run at various points of the
mirror sync run.
Hook1: After lock is acquired, before first rsync
Hook2: After first rsync, if successful
Hook3: After second rsync, if successful
Hook4: Right before leaf mirror triggering
Hook5: After leaf mirror trigger (only if we have slave mirrors; HUB=true)
Note that Hook3 and Hook4 are likely to be called directly after each other.
The difference is that Hook3 is called *every* time the second rsync
succeeds even if the mirroring needs to re-run due to a second push.
Hook4 is only executed if mirroring is completed.
Support for multiple archives, even if they are pushed using one ssh key
------------------------------------------------------------------------
If you get multiple archives from your upstream mirror (say Debian,
Debian-Backports and Volatile), previously you had to use 3 different ssh
keys to be able to automagically synchronize them. This script can do it
all with just one key, if your upstream mirror tells you which archive.
See "Commandline/SSH options" below for further details.
For details of all available options, please see the extensive documentation
in the sample configuration file.
Commandline/SSH options
=======================
Script options may be set either on the local command line, or passed by
specifying an ssh "command". Local commandline options always have
precedence over the SSH_ORIGINAL_COMMAND ones.
Currently this script understands the options listed below. To make them
take effect they MUST be prepended by "sync:".
Option Behaviour
stage1 Only do stage1 sync
stage2 Only do stage2 sync
all Do a complete sync (default)
mhop Do a multi-hop sync
archive:foo Sync archive foo (if the file $HOME/etc/ftpsync-foo.conf
exists and is configured)
callback Call back when done (needs proper ssh setup for this to
work). It will always use the "command" callback:$HOSTNAME
where $HOSTNAME is the one defined in config and
will happen before slave mirrors are triggered.
So, to get the script to sync all of the archive behind bpo and call back when
it is complete, use an upstream trigger of
ssh $USER@$HOST sync:all sync:archive:bpo sync:callback
Mirror trace files
==================
Every mirror needs to have a 'trace' file under project/trace.
The file format is as follows:
The filename has to be the full hostname (eg. hostname -f), or in the
case of a mirror participating in RR DNS (where users will never use
the hostname) the name of the DNS RR entry, eg. security.debian.org
for the security rotation)
The content has (no leading spaces):
Sat Nov 8 13:20:22 UTC 2008
Used ftpsync version: 42
Running on host: steffani.debian.org
First line: Output of date -u
Second line: Freeform text containing the program name and version
Third line: Text "Running on host: " followed by hostname -f
The third line MUST NOT be the DNS RR name, even if the mirror is part
of it. It MUST BE the hosts own name. This is in contrast to the filename,
which SHOULD be the DNS RR name.
runmirrors
==========
This script is used to tell leaf mirrors that it is time to synchronize
their copy of the archive. This is done by parsing a mirror list and
using ssh to "push" the leaf nodes. You can read much more about the
principle behind the push at [1], essentially it tells the receiving
end to run a pre-defined script. As the whole setup is extremely limited
and the ssh key is not usable for anything else than the pre-defined
script this is the most secure method for such an action.
This script supports two types of pushes: The normal single stage push,
as well as the newer multi-stage push.
The normal push, as described above, will simply push the leaf node and
then go on with the other nodes.
The multi-staged push first pushes a mirror and tells it to only do a
stage1 sync run. Then it waits for the mirror (and all others being pushed
in the same run) to finish that run, before it tells all of the staged
mirrors to do the stage2 sync.
This way you can do a nearly-simultaneous update of multiple hosts.
This is useful in situations where periods of desynchronization should
be kept as small as possible. Examples of scenarios where this might be
useful include multiple hosts in a DNS Round Robin entry.
For details on the mirror list please see the documented
runmirrors.mirror.sample file.
[1] http://blog.ganneff.de/blog/2007/12/29/ssh-triggers.html

@ -0,0 +1,62 @@
#!/usr/bin/perl
# Uses symlinks to merge the files contained in a set of vcs
# checkouts to into a single directory. Keeps track of when files are
# removed from the merged directories and removes the symlinks.
#
# Only merges files that match the specified pattern.
#
# Note that the directories given to merge should be paths that will work
# for symlink targets from the destination directory (so either full paths,
# or they should be right inside the destination directory).
#
# Note that other files in the destination directory will be left as-is.
#
# Copyright 2006 by Joey Hess, licensed under the GPL.
if (! @ARGV) {
die "usage: dircombine include-pattern dest dir1 [dir2 ...]\n";
}
my $pattern=shift;
my $dest=shift;
foreach my $dir (@ARGV) {
my %known;
# Link in each thing from the dir.
opendir(DIR, $dir) || die "opendir: $!";
while ($_=readdir(DIR)) {