mirror/merlin/common/common.go

351 lines
9.9 KiB
Go

package common
import (
"fmt"
"os"
"os/exec"
"time"
ini "gopkg.in/ini.v1"
)
const (
DAILY = 86400
TWICE_DAILY = DAILY / 2
HOURLY = 3600
TWICE_HOURLY = HOURLY / 2
BI_HOURLY = HOURLY * 2
TRI_HOURLY = HOURLY * 3
TEN_MINUTELY = 600
FIVE_MINUTELY = 300
CONFIG_PATH = "merlin-config.ini"
DEFAULT_MAX_JOBS = 6
DEFAULT_MAX_TIME = DAILY / 4
DEFAULT_DOWNLOAD_DIR = "/mirror/root"
DEFAULT_PASSWORD_DIR = "/home/mirror/passwords"
DEFAULT_STATE_DIR = "/home/mirror/merlin/states"
DEFAULT_LOG_DIR = "/home/mirror/merlin/logs"
DEFAULT_RSYNC_LOG_DIR = "/home/mirror/merlin/logs-rsync"
DEFAULT_ZFSSYNC_LOG_DIR = "/home/mirror/merlin/logs-zfssync"
DEFAULT_SOCK_PATH = "/run/merlin/merlin.sock"
)
var frequencies = map[string]int{
"daily": DAILY,
"twice-daily": TWICE_DAILY,
"hourly": HOURLY,
"twice-hourly": TWICE_HOURLY,
"bi-hourly": BI_HOURLY,
"tri-hourly": TRI_HOURLY,
"ten-minutely": TEN_MINUTELY,
"five-minutely": FIVE_MINUTELY,
}
// Last job attempt statuses
const (
NOT_RUN_YET = iota
SUCCESS
FAILURE
TERMINATED // was killed by a signal
)
type Result struct {
Name string
Exit int
}
type Repo struct {
// the name of this repo
Name string `ini:"-"`
// this should be one of "csc-sync-standard", etc.
SyncType string `ini:"sync_type"`
// a human-readable frequency, e.g. "bi-hourly"
FrequencyStr string `ini:"frequency"`
// the desired interval (in seconds) between successive runs
Frequency int `ini:"-"`
// the maximum time (in seconds) that each child process of this repo
// can for before being killed
MaxTime int `ini:"max_time"`
// where to download the files for this repo (relative to the download
// dir in the config)
LocalDir string `ini:"local_dir"`
// the remote host to rsync from
RsyncHost string `ini:"rsync_host"`
// the remote directory on the rsync host
RsyncDir string `ini:"rsync_dir"`
// the rsync user (optional)
RsyncUser string `ini:"rsync_user"`
// the file storing the password for rsync (optional)
PasswordFile string `ini:"password_file"`
// the file for general logging of this repo
LoggerFile string `ini:"log_file"`
// a reference to the general logger
Logger *Logger `ini:"-"`
// the file for logging this repo's rsync
RsyncLogFile string `ini:"rsync_log_file"`
// the file for logging this repo's zfssync
ZfssyncLogFile string `ini:"zfssync_log_file"`
// the repo will write its name and status in a Result struct to DoneChan
// when it has finished a job (shared by all repos)
DoneChan chan<- Result `ini:"-"`
// the repo should stop syncing if StopChan is closed (shared by all repos)
StopChan chan struct{} `ini:"-"`
// a struct that stores the repo's status
State RepoState `ini:"-"`
// a reference to the global config
cfg *Config `ini:"-"`
}
type Config struct {
// the maximum number of jobs allowed to execute concurrently
MaxJobs int `ini:"max_jobs"`
// the IP addresses to use for rsync
IPv4Address string `ini:"ipv4_address"`
IPv6Address string `ini:"ipv6_address"`
// the default sync type
SyncType string `ini:"default_sync_type"`
// the default frequency string for the repos
FrequencyStr string `ini:"default_frequency"`
// the default MaxTime for each repo
MaxTime int `ini:"default_max_time"`
// the directory where rsync should download files
DownloadDir string `ini:"download_dir"`
// the directory where rsync passwords are stored
PasswordDir string `ini:"password_dir"`
// the directory where the state of each repo sync is saved
StateDir string `ini:"states_dir"`
// the directory where merlin will store the general logs for each repo
LoggerDir string `ini:"log_dir"`
// the directory to store the rsync logs for each repo
RsyncLogDir string `ini:"rsync_log_dir"`
// the directory to store the zfssync logs for each repo
ZfssyncLogDir string `ini:"zfssync_log_dir"`
// the Unix socket path which arthur will use to communicate with us
SockPath string `ini:"sock_path"`
// a list of all of the repos
Repos []*Repo `ini:"-"`
}
// This should only be modified by the main thread
type RepoState struct {
// these are stored in the states folder
// whether this repo is running a job or not
IsRunning bool `ini:"is_running"`
// the Unix epoch timestamp at which this repo last attempted a job
LastAttemptStartTime int64 `ini:"last_attempt_time"`
// the number of seconds this repo ran for during its last attempted job
LastAttemptRunTime int64 `ini:"last_attempt_runtime"`
// whether the last attempt was successful or not
LastAttemptExit int `ini:"last_attempt_exit"`
}
// save the current state of the repo to a file
func (repo *Repo) SaveState() {
state_cfg := ini.Empty()
if err := ini.ReflectFrom(state_cfg, &repo.State); err != nil {
repo.Logger.Error(err.Error())
}
file, err := os.OpenFile(repo.cfg.StateDir+"/"+repo.Name, os.O_RDWR|os.O_CREATE, 0644)
if err != nil {
repo.Logger.Error(err.Error())
}
if _, err := state_cfg.WriteTo(file); err != nil {
repo.Logger.Error(err.Error())
}
}
// start sync job for this repo if more than repo.Frequency seconds have elapsed since its last job
// and is not currently running.
// returns true iff a job is started.
func (repo *Repo) RunIfPossible() bool {
if repo.State.IsRunning {
return false
}
curTime := time.Now().Unix()
if curTime-repo.State.LastAttemptStartTime > int64(repo.Frequency) {
repo.State.IsRunning = true
repo.State.LastAttemptStartTime = curTime
repo.SaveState()
repo.Logger.Info(fmt.Sprintf("Repo %s has started syncing", repo.Name))
go repo.StartSyncJob()
return true
}
return false
}
func zfsSync(repo *Repo) {
out, err := exec.Command("/home/mirror/bin/zfssync", repo.Name).CombinedOutput()
if err != nil {
repo.Logger.Error(err)
} else {
f, err := os.OpenFile(repo.ZfssyncLogFile, os.O_WRONLY|os.O_CREATE|os.O_APPEND, 0644)
if err != nil {
repo.Logger.Error(err.Error())
} else {
f.Write(out)
}
}
}
// update the repo state with the last attempt time and exit now that the job is done
func (repo *Repo) SyncCompleted(exit int) {
repoState := repo.State
syncTook := time.Now().Unix() - repoState.LastAttemptStartTime
nextSync := repo.MaxTime - int(syncTook)
if nextSync < 0 {
nextSync = 0
}
repoState.IsRunning = false
repoState.LastAttemptExit = exit
repoState.LastAttemptRunTime = syncTook
var exitStr string
switch exit {
case SUCCESS:
exitStr = "completed"
case TERMINATED:
exitStr = "terminated"
default:
exitStr = "failed"
}
repo.SaveState()
repo.Logger.Info(fmt.Sprintf("Sync "+exitStr+" after running for %d seconds, will run again in %d seconds", syncTook, nextSync))
if exit == SUCCESS {
// it is possible that the zfssync from the last repo sync is still running is that fine?
go zfsSync(repo)
}
}
func panicIfErr(e error) {
if e != nil {
panic(e)
}
}
func touchFile(file string) {
fi, err := os.Stat(file)
if err != nil {
f, err := os.OpenFile(file, os.O_CREATE, 0644)
if err != nil {
panic(fmt.Errorf("unable to create file %s", file))
}
f.Close()
} else if fi.IsDir() {
panic(fmt.Errorf("%s is a directory", file))
} else if os.Geteuid() != 1001 {
// UID 1001 is the hardcoded uid for mirror
err := os.Chown(file, 1001, os.Getegid())
panicIfErr(err)
} else if fi.Mode().Perm() != 0644 {
err := os.Chmod(file, 0644)
panicIfErr(err)
}
}
func touchFiles(files ...string) {
for _, file := range files {
touchFile(file)
}
}
// GetConfig reads the config from a JSON file, initializes default values,
// and initializes the non-configurable fields of each repo.
// It returns a Config.
func GetConfig(doneChan chan Result, stopChan chan struct{}) Config {
// add global configuration in cfg
data, err := ini.Load(CONFIG_PATH)
panicIfErr(err)
cfg := Config{
MaxJobs: DEFAULT_MAX_JOBS,
MaxTime: DEFAULT_MAX_TIME,
PasswordDir: DEFAULT_PASSWORD_DIR,
DownloadDir: DEFAULT_DOWNLOAD_DIR,
StateDir: DEFAULT_STATE_DIR,
LoggerDir: DEFAULT_LOG_DIR,
RsyncLogDir: DEFAULT_RSYNC_LOG_DIR,
ZfssyncLogDir: DEFAULT_ZFSSYNC_LOG_DIR,
SockPath: DEFAULT_SOCK_PATH,
Repos: make([]*Repo, 0),
}
err = data.MapTo(&cfg)
panicIfErr(err)
for _, dir := range []string{cfg.StateDir, cfg.LoggerDir, cfg.RsyncLogDir, cfg.ZfssyncLogDir} {
err := os.MkdirAll(dir, 0755)
panicIfErr(err)
}
if cfg.IPv4Address == "" {
panic("Missing IPv4 address from config")
} else if cfg.IPv6Address == "" {
panic("Missing IPv6 address from config")
}
// add each repo configuration to cfg
for _, section := range data.Sections() {
repoName := section.Name()
if repoName == "DEFAULT" {
continue
}
repo := Repo{
Name: repoName,
SyncType: cfg.SyncType,
FrequencyStr: cfg.FrequencyStr,
MaxTime: cfg.MaxTime,
LoggerFile: cfg.LoggerDir + "/" + repoName + ".log",
RsyncLogFile: cfg.RsyncLogDir + "/" + repoName + "-rsync.log",
ZfssyncLogFile: cfg.ZfssyncLogDir + "/" + repoName + "-zfssync.log",
DoneChan: doneChan,
StopChan: stopChan,
}
err := section.MapTo(&repo)
panicIfErr(err)
touchFiles(
repo.LoggerFile,
repo.RsyncLogFile,
repo.ZfssyncLogFile,
)
repo.Logger = NewLogger(repo.Name, repo.LoggerFile)
repo.Frequency = frequencies[repo.FrequencyStr]
if repo.SyncType == "" {
panic("Missing sync type from " + repo.Name)
} else if repo.Frequency == 0 {
panic("Missing or invalid frequency for " + repo.Name)
}
repo.cfg = &cfg
repo.State = RepoState{
IsRunning: false,
LastAttemptStartTime: 0,
LastAttemptRunTime: 0,
LastAttemptExit: NOT_RUN_YET,
}
// create the state file if it does not exist, otherwise load it from existing file
repoStateFile := cfg.StateDir + "/" + repo.Name
if _, err := os.Stat(repoStateFile); err != nil {
touchFile(repoStateFile)
repo.SaveState()
} else {
err := ini.MapTo(&repo.State, repoStateFile)
panicIfErr(err)
}
cfg.Repos = append(cfg.Repos, &repo)
}
if len(cfg.Repos) == 0 {
panic("No repos found in config")
}
return cfg
}