From 45c3e15f063ba493f67e6c3e67780118c86bb1bd Mon Sep 17 00:00:00 2001 From: Colin Guthrie Date: Sun, 9 Feb 2014 13:55:50 +0000 Subject: Add a new system to ensure our git repos are mirrored properly. This is a simple python daemon that I wrote which can be 'pinged' and told to update (or freshly clone) given git repos. Deploy this script on alamut (not started automatically yet) --- modules/gitmirror/manifests/init.pp | 13 + modules/gitmirror/templates/on-the-pull | 369 ++++++++++++++++++++++++++ modules/gitmirror/templates/on-the-pull.init | 60 +++++ modules/gitmirror/templates/rsync-metadata.sh | 29 ++ 4 files changed, 471 insertions(+) create mode 100644 modules/gitmirror/manifests/init.pp create mode 100755 modules/gitmirror/templates/on-the-pull create mode 100755 modules/gitmirror/templates/on-the-pull.init create mode 100755 modules/gitmirror/templates/rsync-metadata.sh (limited to 'modules') diff --git a/modules/gitmirror/manifests/init.pp b/modules/gitmirror/manifests/init.pp new file mode 100644 index 00000000..4ab44c0a --- /dev/null +++ b/modules/gitmirror/manifests/init.pp @@ -0,0 +1,13 @@ +class gitmirror { + mga_common::local_script { 'on-the-pull': + content => template('gitmirror/on-the-pull'), + } + file { '/etc/init.d/on-the-pull': + content => template('gitmirror/on-the-pull.init'), + mode => 755, + } + + mga_common::local_script { 'gitmirror-sync-metadata': + content => template('gitmirror/rsync-metadata.sh'), + } +} diff --git a/modules/gitmirror/templates/on-the-pull b/modules/gitmirror/templates/on-the-pull new file mode 100755 index 00000000..1a82785b --- /dev/null +++ b/modules/gitmirror/templates/on-the-pull @@ -0,0 +1,369 @@ +#!/usr/bin/python + +import sys +import os +import pwd +import BaseHTTPServer +import cgi +import re +import subprocess +# For Python 2.4 compatibility, favour optparse +from optparse import OptionParser +from time import sleep +from threading import Thread +from Queue import Queue + + +class UpdaterQueue(Queue): + # Python 2.4 Queue compatibility methods + + def task_done(self): + """(Wrapper for Python 2.4 compatibility) Indicate that a formerly enqueued task is complete. Used for join(). + + WARNING: Does nothing in Python 2.4 for now + """ + # TODO: Make this do something useful in Python 2.4 + if hasattr(Queue, 'task_done'): + return Queue.task_done(self) + + def join(self): + """(Wrapper for Python 2.4 compatibility) Blocks until all items in the Queue have been gotten and processed. + + WARNING: Does nothing in Python 2.4 for now. + """ + # TODO: Make this do something useful in Python 2.4 + if hasattr(Queue, 'join'): + return Queue.join(self) + + def wait(self): + "DEPRECATED: Use `join()` instead. Block until all jobs are completed." + self.join() + +GitUpdaterQueue = UpdaterQueue(0) + + +# NB The following class and bits for running git commands were "liberated" +# from git_multimail.py + +class CommandError(Exception): + def __init__(self, cmd, retcode): + self.cmd = cmd + self.retcode = retcode + Exception.__init__( + self, + 'Command "%s" failed with retcode %s' % (' '.join(cmd), retcode,) + ) + +# It is assumed in many places that the encoding is uniformly UTF-8, +# so changing these constants is unsupported. But define them here +# anyway, to make it easier to find (at least most of) the places +# where the encoding is important. +(ENCODING, CHARSET) = ('UTF-8', 'utf-8') + + +# The "git" program (this could be changed to include a full path): +GIT_EXECUTABLE = 'git' + + +# How "git" should be invoked (including global arguments), as a list +# of words. This variable is usually initialized automatically by +# read_git_output() via choose_git_command(), but if a value is set +# here then it will be used unconditionally. +GIT_CMD = None + + +def choose_git_command(): + """Decide how to invoke git, and record the choice in GIT_CMD.""" + + global GIT_CMD + + if GIT_CMD is None: + try: + # Check to see whether the "-c" option is accepted (it was + # only added in Git 1.7.2). We don't actually use the + # output of "git --version", though if we needed more + # specific version information this would be the place to + # do it. + cmd = [GIT_EXECUTABLE, '-c', 'foo.bar=baz', '--version'] + read_output(cmd) + GIT_CMD = [GIT_EXECUTABLE, '-c', 'i18n.logoutputencoding=%s' % (ENCODING,)] + except CommandError: + GIT_CMD = [GIT_EXECUTABLE] + + +def read_git_output(args, input=None, keepends=False, **kw): + """Read the output of a Git command.""" + + if GIT_CMD is None: + choose_git_command() + + return read_output(GIT_CMD + args, input=input, keepends=keepends, **kw) + + +def read_output(cmd, input=None, keepends=False, **kw): + if input: + stdin = subprocess.PIPE + else: + stdin = None + p = subprocess.Popen( + cmd, stdin=stdin, stdout=subprocess.PIPE, stderr=subprocess.PIPE, **kw + ) + (out, err) = p.communicate(input) + retcode = p.wait() + if retcode: + raise CommandError(cmd, retcode) + if not keepends: + out = out.rstrip('\n\r') + return out + + + + + +def run_git_command(args, **kw): + """Runs a git command, ignoring the output. + """ + + read_git_output(args, **kw) + + +def run_command(args, **kw): + """Runs a git command, ignoring the output. + """ + + read_output(args, **kw) + + +class GitUpdater(Thread): + def __init__(self, server, basedir, repoprefix, branch='master', cmd=''): + Thread.__init__(self) + self.server = server + self.basedir = basedir + self.repoprefix = repoprefix + self.branch = branch + self.cmd = cmd + + def run(self): + while 42: + repo = GitUpdaterQueue.get() + if repo is None: + break + try: + print >> sys.stderr, "Got update request for '%s'" % repo + clonefolder = os.path.join(self.basedir, repo) + if self.repoprefix: + if not repo.startswith(self.repoprefix): + print >> sys.stderr, "Ignoring repo '%s' due to invalid prefix" % repo + GitUpdaterQueue.task_done() + continue + clonefolder = os.path.join(self.basedir, repo[len(self.repoprefix):]) + command = [] + treeish = '' + changed = True + if not os.path.exists(clonefolder): + cloneparent = os.path.dirname(clonefolder) + if not os.path.exists(cloneparent): + os.makedirs(cloneparent) + cloneurl = self.server + '/' + repo + command = ['clone'] + if '--mirror' == self.branch: + command.append('--mirror') + command.append(cloneurl) + command.append(clonefolder) + print >> sys.stderr, "Cloning repo '%s' ('%s' -> '%s')" % (repo, cloneurl, clonefolder) + + run_git_command(command) + if not os.path.isdir(clonefolder): + raise Exception("Clone folder '%s' is not a directory. Cloning failed or file in it's place?" % clonefolder) + os.chdir(clonefolder) + if '--mirror' != self.branch and 'master' != self.branch: + command = ['checkout', '-t', 'origin/' + self.branch] + run_git_command(command) + elif os.path.isdir(clonefolder): + os.chdir(clonefolder) + print >> sys.stderr, "Updating existing repo '%s' ('%s')" % (repo, clonefolder) + command = ['remote', 'update'] + run_git_command(command) + if '--mirror' != self.branch: + sha1before = read_git_output(['rev-parse', 'refs/heads/' + self.branch]) + sha1after = read_git_output(['rev-parse', 'refs/remotes/origin/' + self.branch]) + if sha1before and sha1after: + if sha1before == sha1after: + changed = False + print >> sys.stderr, "Repo '%s' update on branch '%s': No changed detected" % (repo, self.branch) + else: + treeish = sha1before + '..' + sha1after + print >> sys.stderr, "Repo '%s' update on branch '%s': Treeish '%s'" % (repo, self.branch, treeish) + else: + print >> sys.stderr, "Repo '%s' update on branch '%s': Before or after sha1 could not be extracted." % (repo, self.branch) + command = ['update-ref', 'refs/heads/' + self.branch, 'refs/remotes/origin/' + self.branch] + run_git_command(command) + command = ['checkout', '-f', self.branch] + run_git_command(command) + else: + raise Exception("Clone folder '%s' is appears to be a file :s" % clonefolder) + + if changed and self.cmd: + # Udate the info/web/last-modified file as used by cgit + os.chdir(clonefolder) + command = [self.cmd, repo] + if treeish: + command += [treeish] + run_command(command) + + print >> sys.stderr, "Update for '%s' complete." % repo + except Exception, e: + print >> sys.stderr, "Error processing repo '%s'" % repo + print >> sys.stderr, str(e) + + GitUpdaterQueue.task_done() + +class TimeoutServer(BaseHTTPServer.HTTPServer): + def get_request(self): + result = self.socket.accept() + result[0].settimeout(10) + return result + +class PostHandler(BaseHTTPServer.BaseHTTPRequestHandler): + def do_POST(self): + ctype, pdict = cgi.parse_header(self.headers.getheader('content-type')) + repo = "" + try: + if ctype != 'x-git/repo': + self.send_response(415) + return + + length = int(self.headers.getheader('content-length')) + if length < 1: + self.send_response(411) + return + if length > 1024: + self.send_response(413) + return + repo = self.rfile.read(length) + + if re.match("^[-_/a-zA-Z0-9\+\.]+$", repo) is None: + self.send_response(400) + return + + GitUpdaterQueue.put(repo) + self.send_response(202) + except: + print >> sys.stderr, "Error" + + +def Demote(pidfile, uid, gid): + def result(): + piddir = os.path.dirname(pidfile) + if not os.path.exists(piddir): + os.makedirs(piddir) + fd = open(pidfile, 'w') + fd.write(str(os.getpid())) + fd.close() + + if uid and gid: + os.setgid(gid) + os.setuid(uid) + return result + + +def daemonise(options, serverprefix, basefolder): + pw = None + uid = False + gid = False + if options.user: + pw = pwd.getpwnam(options.user) + uid = pw.pw_uid + gid = pw.pw_gid + else: + pw = pwd.getpwnam(os.getlogin()) + + user = pw.pw_name + dirname = pw.pw_dir + env = { + 'HOME': dirname, + 'LOGNAME': user, + 'PWD': dirname, + 'USER': user, + } + if os.getenv('PATH') is not None: + env['PATH'] = os.getenv('PATH') + if os.getenv('PYTHONPATH') is not None: + env['PYTHONPATH'] = os.getenv('PYTHONPATH') + + args = [os.path.abspath(sys.argv[0])] + args.append('-a') + args.append(options.addr) + args.append('-p') + args.append(str(options.port)) + args.append('-r') + args.append(options.repoprefix) + args.append('-b') + args.append(options.branch) + args.append('-c') + args.append(options.cmd) + args.append(serverprefix) + args.append(basefolder) + + subprocess.Popen( + args, preexec_fn=Demote(options.pidfile, uid, gid), cwd=dirname, env=env + ) + exit(0) + + +def main(): + usage = "usage: %prog [options] " + description = """Listen for repository names being posted via a simle HTTP interface and clone/update them. +POST data simply via curl: +e.g. curl --header 'Content-Type: x-git/repo' --data 'my/repo/name' http://localhost:8000 +""" + parser = OptionParser(usage=usage, description=description) + parser.add_option("-a", "--addr", + type="string", dest="addr", default="0.0.0.0", + help="The interface address to bind to") + parser.add_option("-p", "--port", + type="int", dest="port", default=8000, + help="The port to bind to") + parser.add_option("-r", "--repo-prefix", + type="string", dest="repoprefix", default="", + help="Only handle repositories with the following prefix. This SHOULD contain a trailing slash if it's a folder but SHOULD NOT include a leading slash") + parser.add_option("-b", "--branch", + type="string", dest="branch", default="--mirror", + help="The branch to track on clone. If you pass '--mirror' (the default) as the branch name we will clone as a bare mirror") + parser.add_option("-c", "--cmd", + type="string", dest="cmd", default="", + help="Third party command to exectue after updates. It will execute in the folder of the repo and if we're not in mirror mode, a treeish will be passed as the only argument containing the refs that changed otherwise the command will be run without any arguments") + parser.add_option("-d", "--pid-file", + type="string", dest="pidfile", default="", + help="Daemonise and write pidfile") + parser.add_option("-u", "--user", + type="string", dest="user", default="", + help="Drop privileges to the given user (must be run as root)") + + (options, args) = parser.parse_args() + if len(args) < 2: + parser.error("Both the and arguments must be supplied.") + if len(args) > 2: + parser.print_usage() + exit(1) + + serverprefix = args[0] + basefolder = args[1] + + if options.pidfile: + daemonise(options, serverprefix, basefolder) + + if options.user: + parser.error("You can only specify a user if you're also deamonising (with a pid file).") + + try: + print >> sys.stderr, "Server started" + srvr = TimeoutServer((options.addr, options.port), PostHandler) + GitUpdater(serverprefix, basefolder, options.repoprefix, options.branch, options.cmd).start() + srvr.serve_forever() + except KeyboardInterrupt: + GitUpdaterQueue.put(None) + srvr.socket.close() + +if __name__ == "__main__": + main() diff --git a/modules/gitmirror/templates/on-the-pull.init b/modules/gitmirror/templates/on-the-pull.init new file mode 100755 index 00000000..0c575b77 --- /dev/null +++ b/modules/gitmirror/templates/on-the-pull.init @@ -0,0 +1,60 @@ +#! /bin/bash +# +### BEGIN INIT INFO +# Provides: on-the-pull +# Required-Start: $network +# Required-Stop: $network +# Default-Start: 2 3 4 5 +# Short-Description: Keep git mirrors up-to-date via external triggers +# Description: Keep git mirrors up-to-date via external triggers +### END INIT INFO + +# Source function library. +. /etc/init.d/functions + +pidfile=/var/run/on-the-pull/on-the-pull.pid +prog=/usr/local/bin/on-the-pull +args="--pid-file=$pidfile --user=git --cmd=/usr/local/bin/gitmirror-sync-metadata git://git.mageia.org/ /git" + + +start() { + gprintf "Starting On-The-Pull Git Mirror Daemon: " + daemon --check on-the-pull --pidfile $pidfile $prog $args + RETVAL=$? + echo + [ $RETVAL -eq 0 ] && touch /var/lock/subsys/on-the-pull + return $RETVAL +} + +stop() { + gprintf "Stopping On-The-Pull Git Mirror Daemon: " + killproc -p $pidfile on-the-pull +} + +restart() { + stop + start +} + +case "$1" in + start) + start + ;; + stop) + stop + ;; + status) + status on-the-pull $pidfile + ;; + restart|reload) + restart + ;; + condrestart) + [ -f /var/lock/subsys/on-the-pull ] && restart || : + ;; + *) + gprintf "Usage: %s {start|stop|status|restart|condrestart}\n" "$(basename $0)" + exit 1 +esac + +exit 0 diff --git a/modules/gitmirror/templates/rsync-metadata.sh b/modules/gitmirror/templates/rsync-metadata.sh new file mode 100755 index 00000000..7176cc54 --- /dev/null +++ b/modules/gitmirror/templates/rsync-metadata.sh @@ -0,0 +1,29 @@ +#!/bin/bash + +REPO="$1" +GITROOT="/git" +RSYNCROOT="rsync://valstar.mageia.org/git" + +if [ ! -d "$GITROOT/$REPO" ]; then + echo "No repository found $REPO" >&2 + exit 1 +fi + +cp -af "$GITROOT/$REPO/config" "$GITROOT/$REPO/config.orig" +/usr/bin/rsync -a --delete --include="description" --include="config" --include="info" --include="info/web" --include="info/web/last-modified" --exclude="*" "$RSYNCROOT/$REPO/" "$GITROOT/$REPO/" +cp -af "$GITROOT/$REPO/config" "$GITROOT/$REPO/config.upstream" +mv -f "$GITROOT/$REPO/config.orig" "$GITROOT/$REPO/config" + +OWNER=$(git config --file "$GITROOT/$REPO/config.upstream" gitweb.owner) +DESC=$(git config --file "$GITROOT/$REPO/config.upstream" gitweb.description) +rm -f "$GITROOT/$REPO/config.upstream" + +CUROWNER=$(git config --file "$GITROOT/$REPO/config" gitweb.owner) +if [ "$CUROWNER" != "$OWNER" ]; then + git config --file "$GITROOT/$REPO/config" gitweb.owner "$OWNER" +fi + +CURDESC=$(git config --file "$GITROOT/$REPO/config" gitweb.description) +if [ "$CURDESC" != "$DESC" ]; then + git config --file "$GITROOT/$REPO/config" gitweb.owner "$DESC" +fi -- cgit v1.2.1