#!/usr/bin/env python2
Generates an nice index of the directories from a
Original Author: Jeremy Roman <>
Modified by: Zachary Seguin <>
So if you don't like how I did something,
I'm the person you get to complain to.
Please be gentle.
import os, sys, time
from subprocess import Popen, PIPE
from optparse import OptionParser
import yaml, mako.exceptions, webhelpers.html.tags
from mako.template import Template
import humanize
import datetime
def parse_size(s):
if s.endswith("T"):
return float(s[:-1])*1000*1000*1000*1000
elif s.endswith("G"):
return float(s[:-1])*1000*1000*1000
elif s.endswith("M"):
return float(s[:-1])*1000*1000
elif s.endswith("K"):
return float(s[:-1])*1000
return float(size)
def reformat_size(size):
return humanize.naturalsize(size)
def atomic_write(filename, body):
"""Atomically write to a file by writing a
temporary file and then moving it to replace
the desired output file.
This ensures that partial files are never seen
by clients."""
# generate an appropriate temporary filename
# in the same directory
tmp_filename = "%s.%d.tmp" % (filename, os.getpid())
# open the directory so that we can fsync it
dir =, \
# write to the temporary file
tmp = open(tmp_filename, 'w')
print >>tmp, body
# atomically replace the actual file
os.rename(tmp_filename, filename)
def main():
# accept command-line arguments
parser = OptionParser()
parser.add_option("-c", "--config", dest="config", default="config.yaml",
help="configuration file to be used", metavar="FILE")
parser.add_option("-D", "--docroot", dest="docroot",
help="directory to be scanned", metavar="DIR")
parser.add_option("-F", "--duflags", dest="duflags",
help="flags to be passed to du, replaces any in config")
parser.add_option("-o", "--output", dest="output", metavar="FILE",
help="file to which index page will be written. "
"Use /dev/stdout to send to standard out.")
parser.add_option("-t", "--template", dest="template",
help="Mako template to render", metavar="FILE")
parser.add_option("--nonatomic", dest="nonatomic", action="store_true",
default=False, help="write the output to the path "
"given without creating a temporary file in between. "
"This is automatically set if the output appears "
"to be a character device, not a file.")
(options, args) = parser.parse_args()
# load config file
config = yaml.load(file(options.config,'r'), Loader=yaml.FullLoader)
config = None
if not config or type(config) != dict:
print >>sys.stderr, "Unable to load configuration '%s'." % options.config
# determine important variables based on an appropriate order of
# precedence (command-line flags first, then the config file,
# then built-in fallbacks)
# fallback value for nonatomic is used so that character devices
# (e.g. /dev/stdout, /dev/null) are written to in the regular way
docroot = options.docroot or config.get('docroot')
duflags = options.duflags or config.get('duflags') or "-h --max-depth=1 --exclude='.~tmp~'"
output = options.output or config.get('output')
template = options.template or config.get("template") or "index.mako"
nonatomic = options.nonatomic or config.get("nonatomic") or \
(os.path.exists(output) and not os.path.isfile(output))
# sanity checks
if not docroot:
print >>sys.stderr, "docroot not specified."
print >>sys.stderr, "Define it in the config file or pass -D on the command line."
elif not output:
print >>sys.stderr, "output not specified."
print >>sys.stderr, "Define it in the config file or pass -o on the command line."
elif not config.get('directories'):
print >>sys.stderr, "directories not specified."
print >>sys.stderr, "Define it in the config file."
elif not os.path.isdir(docroot):
print >>sys.stderr, "docroot '%s' not found or not a directory." % docroot
elif not os.path.exists(template) or os.path.isdir(template):
print >>sys.stderr, "template '%s' not found or is a directory." % template
generated =
directories = []
total_size = 0.0
ls = os.listdir(docroot)
for item in ls:
if item == "pub":
path = os.path.join(docroot, item)
# If symlink, then this is a project
if os.path.islink(path):
dataset = os.readlink(path)[1:]
# Get the disk space used by that dataset
du = Popen(
"/usr/bin/sudo /sbin/zfs get -H -o value used %s" % dataset,
shell=True, stdout=PIPE, stderr=PIPE).communicate()
size = "-"
if len(du[1].strip()) > 0:
print >>sys.stderr, "zfs get terminated unsuccessfully for %s" % item
size = parse_size(du[0].strip())
total_size += size
info = {'dir':item, 'size':reformat_size(size)}
# use info from config.yaml, if found
# otherwise, skip this directory
if item in config['exclude']:
elif item in config['directories']:
info.update({ 'site': '', 'url': '' })
# Sort the directories
directories.sort(key=lambda d : d['dir'].lower())
# Call du to compute size
#du = Popen(
# "/usr/bin/du %s/* %s | /usr/bin/sort -fk2" % (docroot, duflags),
# shell=True, stdout=PIPE, stderr=PIPE).communicate()
# du = []
# Check that du executed successfully
# If there's anything on stderr, send it
# out our own stderr and terminate.
#if len(du[1].strip()) > 0:
# sys.stderr.write(du[1])
# print >>sys.stderr, "du terminated unsuccessfully. Not generating index."
# sys.exit(-1)
# first one should be total, grab its size and format
#du = du[0].splitlines() # we only care about stdout now
#total_size = reformat_size(du[0].split(None,2)[0])
# the rest are the sizes we want
# directories = []
# total_size = 0
# for line in du:
# (size, path) = line.split(None, 2)
# dir = os.path.basename(path)
# total_size += int(size)
# info = {'dir':dir, 'size':reformat_size(int(size))}
# use info from config.yaml, if found
# otherwise, skip this directory
# if dir in config['exclude'] or not os.path.isdir(path):
# continue
# elif dir in config['directories']:
# info.update(config['directories'][dir])
# else:
# info.update({ 'site': '', 'url': '' })
# directories.append(info)
# render the template to a string
body = Template(filename=template).render(
# write the rendered output
if nonatomic:
print >>file(output,'w'), body
atomic_write(output, body)
if __name__ == "__main__":