#!/usr/bin/perl
#
# Copyright (C) 2005,2006 Mandriva
# 
# Author: Florent Villard <warly@mandriva.com>
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2, or (at your option)
# any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
#
# compare and rebuild packages on different architecture
#
# TODO
# 
# - create a configuration file to handle the various iurt running
# - get the content of the rebuild dir 
# - run as many iurt as machines are available and gather results
# - the scheduler just take the results, launch new rebuild, and quit
# - use perl ssh and try to workarround the non working timeout when the
#   remote machine is stalled
# - use submitter as packager, not generic name
#

use strict;
use MDK::Common qw(any cat_ if_ find);
use Iurt::Config qw(config_usage get_date config_init get_author_email check_arch check_noarch);
use Iurt::File qw(create_file);
use Iurt::Process qw(check_pid);
use Iurt::Queue qw(cleanup_failed_build get_upload_tree_state);
use Iurt::Mail qw(sendmail);
use Iurt::Util qw(plog_init plog ssh_setup ssh sout sget sput);
use Iurt::Ulri qw(build_package warn_about_failure);
use File::Copy 'move';
use File::Path 'make_path';
use File::Temp 'mktemp';
use Filesys::Df qw(df);
use Data::Dumper;
use File::Slurp qw(read_file);

my %run;
my $program_name = 'ulri';
$run{program_name} = $program_name;

my $LOG;
if (!$ENV{ULRI_LOG_FILE} || !open($LOG, '>>', $ENV{ULRI_LOG_FILE})) {
    open($LOG, ">&STDERR");
}

plog_init($program_name, $LOG, 7, 1);

my $HOME = $ENV{HOME};
my $configfile = "$HOME/.upload.conf";
my $sysconfigfile = "/etc/iurt/upload.conf";

my $config = {};
foreach my $f ($configfile, $sysconfigfile) {
    plog('DEBUG', "load config: $f");
    if (-f $f) {
        $config = eval(cat_($f))
          or die "FATAL $program_name: syntax error in $f";
        last;
    }
}

my %config_usage = ( 
    admin => {
	desc => 'mail address of the bot administrator',
	default => 'distrib-admin@mandrivalinux.org'
    },
    'arch_translation' => {  
	desc => "Renaming of arch",
	default => { 'sparc64' => 'sparcv9' }
    },
     bot => {  
	desc => "List of bot able to compile the packages",
	default => {
	    i586 => {
	        localhost => {
		    iurt => {
			user => 'builder',
			command => qq(iurt --copy_srpm --group --config local_spool /home/builder/iurt/__DIR__ --no_rsync --chrooted-urpmi -m __MEDIA__ -- http://localhost/distrib/ -p "__PACKAGER__" -r __TARGET__ __ARCH__),
			packages => '/home/builder/iurt/',
		    } ,
		},
	    },
	},
    },
    media => { 
	desc => 'Corresponding media to add given the current media',
	default => {
	    default => {
		"core/backports" => [ "core/backports", "core/release", "core/updates" ],
		"core/backports_testing" => [
		    "core/backports", "core/backports_testing",
		    "core/release", "core/updates"
		],
		"core/release" => [ "core/release" ],
		"core/updates" => [ "core/release", "core/updates" ],
		"core/updates_testing" => [
		    "core/release", "core/updates", "core/updates_testing"
		],
		"nonfree/backports" => [
		    "core/backports", "core/release", "core/updates",
		    "nonfree/backports", "nonfree/release", "nonfree/updates"
		],
		"nonfree/backports_testing" => [
		    "core/backports", "core/backports_testing",
		    "core/release", "core/updates",
		    "nonfree/backports", "nonfree/backports_testing",
		    "nonfree/release", "nonfree/updates"
		],
		"nonfree/release" => [ "core/release", "nonfree/release" ],
		"nonfree/updates" => [
		    "core/release", "core/updates",
		    "nonfree/release", "nonfree/updates"
		],
		"nonfree/updates_testing" => [
		    "core/release", "core/updates", "core/updates_testing",
		    "nonfree/release", "nonfree/updates", "nonfree/updates_testing"
		],
		"tainted/backports" => [
		    "core/backports", "core/release", "core/updates",
		    "tainted/backports", "tainted/release", "tainted/updates"
		],
		"tainted/backports_testing" => [
		    "core/backports", "core/backports_testing",
		    "core/release", "core/updates",
		    "tainted/backports", "tainted/backports_testing",
		    "tainted/release", "tainted/updates"
		],
		"tainted/release" => [ "core/release", "tainted/release" ],
		"tainted/updates" => [
		    "core/release", "core/updates",
		    "tainted/release", "tainted/updates"
		],
		"tainted/updates_testing" => [
		    "core/release", "core/updates", "core/updates_testing",
		    "tainted/release", "tainted/updates", "tainted/updates_testing"
		],
	    },
	},
    },
    faildelay => {
	desc => "Time after which the rebuild is considered as a failure",
	default => 36000
    },
    http_queue => {
	desc => 'Address where log can be consulted',
	default => 'http://kenobi.mandriva.com/queue '
    },
    queue => {
	desc => "Root of the tree where the packages to compile are located",
	default => "$HOME/uploads"
    },
    tmp => {
       desc => "Temporary directory",
       default => "$HOME/tmp"
    },
    ssh_options => {
	desc => "SSH options",
	default => "-o ConnectTimeout=5 -o BatchMode=yes -o ServerAliveInterval=5"
    },
    packager => {
        desc => 'Default packager tag user by bot',
	default => 'Mageia Team <http://www.mageia.org>'
    },
    'arch' => {
	desc => 'Architectures list for each target',
	default => {
	    default => [ 'i586', 'x86_64' ],
	},
    },
);
config_usage(\%config_usage, $config) if $run{config_usage};
config_init(\%config_usage, $config, \%run);

my %untranslated_arch;
foreach my $k (keys %{$config->{arch_translation}}) {
    my $v = $config->{arch_translation}{$k};
    push @{$untranslated_arch{$v}}, $k;
}

$run{pidfile_home} = $config->{tmp};
$run{pidfile} = $program_name;
my $pidfile = check_pid(\%run);


my ($fulldate, $daydate) = get_date();
$run{daydate} = $daydate;

my $df = df $config->{queue};
if ($df->{per} == 100) {
    # FIXME should send a mail too
    die "FATAL $program_name: not enough space on the filesystem, only $df->{bavail} KB on $config->{queue}, full at $df->{per}%";
}

($fulldate, $daydate) = get_date();

my $todo = "$config->{queue}/todo";
my $failure = "$config->{queue}/failure";
my $done = "$config->{queue}/done";
my $reject = "$config->{queue}/reject";

#
# Part 0: gather data from upload tree
#

plog('MSG', "check uploads tree");

plog('DEBUG', "input queue is $todo");

my %pkg_tree = get_upload_tree_state($config);

# Load bot status
#
foreach my $prefix (keys %pkg_tree) {
    my $ent = $pkg_tree{$prefix};
    foreach my $media (keys %{$ent->{media}}) {
	foreach my $bot (@{$ent->{media}{$media}{bot}}) {
	    $run{bot}{$bot->{host}}{$bot->{bot}} = $prefix;
	}
    }
}

#
# Part 1: get results from finished builds
#

plog('MSG', "check build bot results");

my %later;
my $something_finished;
foreach my $prefix (keys %pkg_tree) {
    my $ent = $pkg_tree{$prefix};
    foreach my $media (keys %{$ent->{media}}) {
        my $path = $ent->{media}{$media}{path};
        my $user = $ent->{user};

        # Local pathnames
        my $done_dir = "$done/$path";
        my $todo_dir = "$todo/$path";
        my $fail_dir = "$failure/$path";

        bot: foreach my $bot_list (@{$ent->{media}{$media}{bot}}) {
	    my ($bot, $host, $date, $pid, $arch, $time) =
			    @$bot_list{qw(bot host date pid arch time)};

	    my $bot_conf = $config->{bot}{$arch}{$host}{$bot};
            my $remote = ssh_setup($config->{ssh_options},
				   $bot_conf->{user}, $host);

	    my $prefix_dir = "$bot_conf->{packages}/$path/$prefix-$arch/";

	    # If our build is noarch, set arch appropriately.
	    #
	    my $lock_file =
	        "$todo_dir/${prefix}_$arch-noarch.$bot.$host.$date.$pid.lock";

	    if (-f $lock_file) {
	        plog('DEBUG', "$prefix is noarch");
	        $arch = "noarch";
	    } else {
	        $lock_file =~ s/-noarch//;
	    }

	    my $status_file = "$prefix_dir/log/status.log";

	    plog('INFO', "check status: $host/$arch ($bot [$pid])");
	    my $status = sout($remote, "cat $status_file");
	    if (!$status) {
		plog('WARN', "failed to get status for $host/$arch");
	        next bot;
	    }

	    my $success;
	    my $fail;
	    my $later;

	    # Check if the build bot finished on the other side
	    #
	    plog('INFO', "check result: $host/$arch ($bot [$pid])");
	    foreach my $res (split "\n", $status) {
		my ($p, $r) = $res =~ /(.*):\s+(.*)/;
		plog('DEBUG', $res);
		if ($r eq 'install_deps_failure') {
		    plog('FAIL', "install deps failure, rebuild later: $p");
		    $later{$prefix} = 1;
		    $later = 1;
		}
		if ($r ne 'ok') {
		    plog('FAIL', "$r: $p");
		    $fail = 1;
		}
	    }

	    if (!$fail) {
		my @list = split "\n", sout($remote, "ls $prefix_dir");
		my $error;
		my $done;

		my $arch_check = join '|', $arch, if_($untranslated_arch{$arch}, @{$untranslated_arch{$arch}});
		plog('MSG', "checking for $arch_check arch");
		foreach my $result (@list) {
		    $result =~ /\.(src|$arch_check|noarch)\.rpm$/ or next;

		    # do not copy the initial src package
		    $result =~ /^$prefix/ and next;

		    my $result_file = "$done_dir/${prefix}_$result";

		    plog('OK', "build ok: $result");
		    if ($result =~ /\.$arch_check\.rpm$/) {
			$done = 1;
		    }

		    plog('DEBUG', "copy files to done");
		    make_path($done_dir);
		    if (sget($remote, "$prefix_dir/$result",
			     "$result_file.new")) {
			plog('ERROR', "copying $result from $host failed ($!)");
			$error = 1;
			last;
		    }
		    if (!move("$result_file.new", $result_file)) {
			$error = 1;
			last;
		    }
		    # Add the package to the list of built ones, in case we fail another arch and need to cleanup
		    push @{$ent->{rpms}}, $result_file;
		}
		next if $error;

		if ($done) {
		    create_file("$done_dir/${prefix}_$arch.done", "$bot $host");
		    $success = 1;
		}

		if ($success) {
		    # Fetch build log and clean remote machine
		    make_path("$done_dir/$prefix");
		    sget($remote, "$prefix_dir/log/*", "$done_dir/$prefix");
		    ssh($remote, "rm -rf $prefix_dir");
		    if (-d "$fail_dir/$prefix") {
			# Other arch had failed
			cleanup_failed_build($todo_dir, $done_dir, $fail_dir, $prefix, $ent, $arch, $config);
		    } else {
			$something_finished = 1;
		    }
		}
	    }

	    #
	    # Handle build failure
	    #

	    my $proc_state;
	    if (!$success && !$later && !$fail) {
		# Need to differenciate process not running with failure to ssh
		chomp($proc_state = sout($remote, "ps h -o state $pid || echo NOT_RUNNING"));
	    }

	    my $seconds = time()-$time;

	    # Reasons for failure
	    my $timeout = $seconds > $config->{faildelay};
	    my $zombie = $proc_state eq 'Z';
	    my $ended = $proc_state eq 'NOT_RUNNING';

	    unless ($success || $later || $fail || $timeout || $zombie || $ended) {
		next bot;
	    }

	    plog('INFO', "delete lock file for $prefix");
	    unlink $lock_file;

	    $run{bot}{$host}{$bot} = 0;

	    next bot if $later;

	    next bot if $success && !$fail;

	    if (!$ended && !$fail) {
		plog('FAIL', "$bot timed out on $host/$arch ($seconds sec) or " .
		     "it's dead (status $proc_state), removing lock");
	        $pkg_tree{$prefix}{media}{$media}{arch}{$arch} = 0;
		next bot;
	    }

	    plog('INFO', "Failure reason: $success || $later || $fail || $timeout || $zombie || $ended");

	    plog('FAIL', "build failed");
	    create_file("$done_dir/${prefix}_$arch.fail", "$bot $host");
	    make_path($fail_dir);

	    mkdir("$fail_dir/$prefix"); 
	    if (sget($remote, "$prefix_dir/*", "$fail_dir/$prefix")) {
		plog('ERROR', "copying from $host:$prefix_dir/ " .
		     "to $fail_dir/ failed ($!)");
		$pkg_tree{$prefix}{media}{$media}{arch}{$arch} = 0;
	    }

	    cleanup_failed_build($todo_dir, $done_dir, $fail_dir, $prefix, $ent, $arch, $config);

	    # Notify user if build failed
	    #
	    if ($user) {
		warn_about_failure($config, $user, $ent, $arch, $fail_dir, $path, $prefix);
	    }

	    # clean the log on the compilation machine
	    ssh($remote, "rm -rf $prefix_dir");

	} # end bot
    } # end path
} # end prefix


#
# Part 2: check queue and start new jobs if a bot is available
#

plog('MSG', "launching new compilations");
my %to_compile;

# do not sort the keys to be able to ignore packages which makes iurt
# crash or just lock ulri somehow

foreach my $prefix (sort keys %pkg_tree) {
    next if $later{$prefix};

    my $ent = $pkg_tree{$prefix};

    my $ready = 1;
    my $failed_dep = 0;

    foreach my $dep (@{$ent->{deps}}) {
	if (glob "$done/*/*/*/$dep.upload") {
	    plog('DEBUG', "Dependent build $dep was uploaded");
	    next;
	}
	# $dep was not uploaded yet, so it's too early to build this one
	plog('DEBUG', "Dependent build $dep not ready ($prefix)");
	$ready = 0;
	if (glob("$reject/*/*/*/$dep.youri") || glob("$done/*/*/*/${dep}_*.fail")) {
	    plog('ERROR', "Dependent build $dep has failed");
	    $failed_dep = 1;
	}
    }

    if ($failed_dep) {
	plog('DEBUG', "Dependent build(s) failed, rejecting this one");
	foreach my $media (keys %{$ent->{media}}) {
	    my $path = $ent->{media}{$media}{path};
	    make_path("$reject/$path");
	    foreach my $srpm (@{$ent->{srpms}}) {
		move("$todo/$path/${prefix}_$srpm", "$reject/$path/${prefix}_$srpm");
	    }
	}
	next;
    }

    next unless $ready;
    plog('DEBUG', "No missing dependent build for $prefix");

    foreach my $media (keys %{$ent->{media}}) {
	my $path = $ent->{media}{$media}{path};
	my $todo_dir = "$config->{queue}/todo/$path";
	my $target = $ent->{target};
	my $srpms = $ent->{srpms} or next;

	#plog('DEBUG', "searching a bot to compile @$srpms");

	# count noarch todos only once even if searching multiple bots
	my $noarch_countflag = 0;

	# If all packages in a group are noarch, consider the entire group
	# as noarch
	my $noarch = 1;
	$noarch = 0 if any { !check_noarch("$todo_dir/${prefix}_$_") } @$srpms;

	my $arch_list = find { ref($_) eq 'ARRAY' } $config->{arch}, (ref($config->{arch}) eq 'HASH' ? ($config->{arch}{$target}, $config->{arch}{default}) : ());
	my @arch_list = $arch_list ? @$arch_list : keys %{$config->{bot}};
	# need to find a bot for each arch
	foreach my $arch (@arch_list) {
	    # Skip this arch if the package is already building as noarch or for this arch
	    # or if it should not be built on this arch or it has already failed
	    next if $pkg_tree{$prefix}{media}{$media}{arch}{noarch};
	    next if $pkg_tree{$prefix}{media}{$media}{arch}{$arch};
	    next if $pkg_tree{$prefix}{media}{$media}{excluded_arch}{$arch};
	    next if $pkg_tree{$prefix}{media}{$media}{failed_arch}{$arch};

	    my $excluded = any { !check_arch("$todo_dir/${prefix}_$_", $arch) } @$srpms;
	    if ($excluded) {
		plog('WARN', "excluding from $arch: $prefix");
		my $done_dir = "$done/$path";
		make_path($done_dir);
		create_file("$done_dir/${prefix}_$arch.excluded",
			    "ulri $arch excluded");
		$pkg_tree{$prefix}{media}{$media}{excluded_arch}{$arch} = 1;
		next;
	    }

	    if ($noarch) {
		plog('DEBUG', "search any bot for @$srpms") unless $noarch_countflag;
	    } else {
		plog('DEBUG', "search $arch bot for @$srpms");
	    }

	    hosts: foreach my $host (keys %{$config->{bot}{$arch}}) {
		foreach my $bot (keys %{$config->{bot}{$arch}{$host}}) {
		    next if $run{bot}{$host}{$bot};
		    my $pid = build_package($config, \%pkg_tree, $path, $prefix, $host, $arch, $bot);
		    if ($pid) {
			# Register that the package is building
			$run{bot}{$host}{$bot} = $prefix;
			$pkg_tree{$prefix}{media}{$media}{arch}{$noarch ? 'noarch' : $arch} = 1;

			my $lock_arch = $noarch ? "$arch-noarch" : $arch;
			my $lock_file = "$todo_dir/${prefix}_" .
			    "$lock_arch.$bot.$host.$fulldate.$pid.lock";
			plog('DEBUG', "create lock $lock_file");
			create_file($lock_file, "$program_name $$", time());

			last hosts;
		    }
		}
	    }

	    # Count packages to compile for each architecture. Count noarch
	    # package only once.
	    #
	    $arch = 'noarch' if $noarch;
	    unless ($pkg_tree{$prefix}{media}{$media}{arch}{$arch}) {
		$to_compile{$arch}++ if !($noarch && $noarch_countflag);
	    }
	    $noarch_countflag = 1 if $noarch;
	}
    }
}

plog('MSG', "Current status");

my %build_list;

foreach my $prefix (keys %pkg_tree) {
    my $ent = $pkg_tree{$prefix};
    foreach my $media (keys %{$ent->{media}}) {
	foreach my $bot (@{$ent->{media}{$media}{bot}}) {
	    my $host = $bot->{host};
	    my $arch = $bot->{arch};
	    push @{$build_list{"$host/$arch"}}, $prefix;
	}
    }
}

if (keys %build_list) {
    plog('INFO', "currently building:");
    map { plog('INFO', "  $_: " . join('', @{$build_list{$_}})) } sort keys %build_list;
}

plog('INFO', "jobs in queue:", %to_compile ?
    map { sprintf("%s(%d)", $_, $to_compile{$_}) } keys %to_compile : "none");


unlink $pidfile;
exec "emi" if $something_finished;

__END__

# ulri ends here

Discussion
----------

20060802 (Warly)

* I prefer creating a separate scheduler, so that it can eventually call
  other bots.
* bots should be able to take packages by themselves.
* Iurt will perform several checks, they have to be standard and usable
  by the maintainer, the results must be put in a visible directory or path
* We can put packages either in a dir or to prefix all files with the date
  and uploader. Having all files in a dir will make the listing simpler.
  Prefixing the files could be problematic if we base the rpm name and
  version parsing on the filename.
* ulri knows the prefix, he could ask iurt to put the packages in a dir
  with the same prefix.

20060806 (Warly)

* All the packages are put in done, then the final youri is run to put them
  in queue/

20061104 (claudio)

* Instead if having configuration defaults for our environment and using
  ulri with the defaults, it would be nicer to have minimalistic/generic
  defaults and install a configuration file in kenobi
* Ulri's configuration file could be renamed to .ulri.conf instead of 
  .upload.conf.   ==> ok, it's also used by emi