#!/usr/bin/perl
#
# Copyright (C) 2005,2006 Mandriva
# 
# Author: Florent Villard <warly@mandriva.com>
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2, or (at your option)
# any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
#

use strict;
use MDK::Common qw(any cat_ if_ find);
use Iurt::Config qw(get_date get_author_email get_target_arch get_mandatory_arch);
use Iurt::File qw(create_file);
use Iurt::Mail qw(sendmail);
use Iurt::Process qw(check_pid);
use Iurt::Queue qw(check_if_mandatory_arch_failed cleanup_failed_build get_upload_tree_state load_lock_file_data record_bot_complete schedule_next_retry);
use Iurt::RPM qw(check_arch check_noarch);
use Iurt::Util qw(plog_init plog ssh_setup ssh sout sget sput);
use Iurt::Ulri qw(build_package fetch_logs_and_cleanup load_config warn_about_failure);
use File::Copy 'move';
use File::Path 'make_path';
use File::Temp 'mktemp';
use Filesys::Df qw(df);
use Data::Dumper;
use File::Slurp qw(read_file);

my %run;
my $program_name = 'ulri';
$run{program_name} = $program_name;

my $LOG;
if (!$ENV{ULRI_LOG_FILE} || !open($LOG, '>>', $ENV{ULRI_LOG_FILE})) {
    open($LOG, ">&STDERR");
}

plog_init($program_name, $LOG, 7, 1);

my $config = load_config(\%run);

my %untranslated_arch;
foreach my $k (keys %{$config->{arch_translation}}) {
    my $v = $config->{arch_translation}{$k};
    push @{$untranslated_arch{$v}}, $k;
}

$run{pidfile_home} = $config->{tmp};
$run{pidfile} = $program_name;
my $pidfile = check_pid(\%run, 1);


my ($fulldate, $daydate) = get_date();
$run{daydate} = $daydate;

my $df = df $config->{queue};
if ($df->{per} == 100) {
    # FIXME should send a mail too
    die "FATAL $program_name: not enough space on the filesystem, only $df->{bavail} KB on $config->{queue}, full at $df->{per}%";
}

($fulldate, $daydate) = get_date();

my $todo = "$config->{queue}/todo";
my $failure = "$config->{queue}/failure";
my $done = "$config->{queue}/done";
my $reject = "$config->{queue}/reject";

#
# Part 0: gather data from upload tree
#

plog('MSG', "check uploads tree");

plog('DEBUG', "input queue is $todo");

my %pkg_tree = get_upload_tree_state($config);

# Load bot status
#
foreach my $prefix (keys %pkg_tree) {
    my $ent = $pkg_tree{$prefix};
    foreach my $media (keys %{$ent->{media}}) {
	foreach my $bot (@{$ent->{media}{$media}{bot}}) {
	    $run{bot}{$bot->{host}}{$bot->{bot}} = $prefix;
	}
    }
}

#
# Part 1: get results from finished builds
#
# TODO: Make this parallel
plog('MSG', "check build bot results");

my $something_finished;
foreach my $prefix (keys %pkg_tree) {
    my $ent = $pkg_tree{$prefix};
    foreach my $media (keys %{$ent->{media}}) {
        my $path = $ent->{media}{$media}{path};
        my $user = $ent->{user};

        # Local pathnames
        my $done_dir = "$done/$path";
        my $todo_dir = "$todo/$path";
        my $fail_dir = "$failure/$path";

        bot: foreach my $bot_list (@{$ent->{media}{$media}{bot}}) {
	    my ($bot, $host, $date, $pid, $arch, $time) =
			    @$bot_list{qw(bot host date pid arch time)};

	    my $bot_conf = $config->{bot}{$arch}{$host}{$bot};
            my $remote = ssh_setup($config->{ssh_options},
				   $bot_conf->{user}, $host);

	    my $prefix_dir = "$bot_conf->{packages}/$path/$prefix-$arch/";

	    # If our build is noarch, set arch appropriately.
	    #
	    my $lock_file =
	        "$todo_dir/${prefix}_$arch-noarch.$bot.$host.$date.$pid.lock";

	    if (-f $lock_file) {
	        plog('DEBUG', "$prefix is noarch");
	        $arch = "noarch";
	    } else {
	        $lock_file =~ s/-noarch//;
	    }

	    my $status_file = "$prefix_dir/log/status.log";

	    plog('INFO', "check status: $host/$arch ($bot [$pid])");
	    my $seconds = time()-$time;
	    my $timeout = $seconds > $config->{faildelay};
	    my $status = sout($remote, "cat $status_file");
	    if ($? == 255) {
		plog('WARN', "failed to get status for $host/$arch");
		# If we are already out of time, we should fail anyway
	        next bot unless $timeout;
	    }

	    my $proc_state;
	    # If we don't get a status, the build should be in progress
	    if (!$status) {
		# Need to differenciate process not running with failure to ssh
		chomp($proc_state = sout($remote, "ps h -o state $pid || echo NOT_RUNNING"));

		# Reasons for failure
		my $zombie = $proc_state eq 'Z';
		my $ended = $proc_state eq 'NOT_RUNNING';

		unless ($timeout || $zombie || $ended) {
		    # Everything is fine, build is continuing!
		    # Kill it if that package had failed on a mandatory arch
		    if (check_if_mandatory_arch_failed($media, $ent, $config)) {
			plog('INFO', "A mandatory arch had failed, killing the build on $host/$arch");
			ssh($remote, "kill -TERM $pid");
			$pkg_tree{$prefix}{media}{$media}{cancelled_arch}{$arch} = 1;
			create_file("$done_dir/${prefix}_$arch.cancelled", "$bot $host");
		    }
		    next bot;
		}
		if ($timeout) {
		    plog('FAIL', "$bot timed out on $host/$arch ($seconds sec), killing it");
		    ssh($remote, "kill -TERM $pid");
		    # Give it some time to die/cleanup
		    if ($seconds < 1.02 * $config->{faildelay}) {
			next bot;
		    }
		    plog('FAIL', "$bot didn't die after timing out on $host/$arch, removing lock");
		} else {
		    plog('FAIL', "$bot died on $host/$arch (status $proc_state), removing lock");
	        }

		fetch_logs_and_cleanup($remote, $prefix_dir, "$done_dir/$prefix");
		record_bot_complete(\%run, $bot, $arch, $lock_file, $prefix, $ent, $media, $host, $pid);

		next bot;
	    }

	    my $fail;
	    my $later;
	    my $done;

	    # Check if the build bot finished on the other side
	    #
	    plog('INFO', "check result: $host/$arch ($bot [$pid])");
	    foreach my $res (split "\n", $status) {
		my ($p, $r) = $res =~ /(.*):\s+(.*)/;
		plog('DEBUG', $res);
		if ($r eq 'install_deps_failure') {
		    plog('FAIL', "install deps failure, rebuild later: $p");
		    if (schedule_next_retry($config, $todo_dir, $prefix, $arch, $pkg_tree{$prefix}{media}{$media}{retries}{arch}{nb_failures})) {
			$later = 1;
			$pkg_tree{$prefix}{media}{$media}{later}{$arch} = 1;
		    } else {
			plog('FAIL', "Too many retries due to install_deps_failure: $p");
			$fail = 1;
		    }
	        }
		if ($r ne 'ok') {
		    plog('FAIL', "$r: $p");
		    $fail = 1;
		} else {
		    plog('OK', "build complete: $p");
		    $done = 1;
		}
	    }

	    if ($done) {
		my @list = split "\n", sout($remote, "ls $prefix_dir");
		my $error;

		my $arch_check = join '|', $arch, if_($untranslated_arch{$arch}, @{$untranslated_arch{$arch}});
		plog('MSG', "checking for $arch_check arch");
		foreach my $result (@list) {
		    $result =~ /\.rpm$/ or next;

		    # do not copy the initial src package
		    $result =~ /^$prefix/ and next;

		    my $result_file = "$done_dir/${prefix}_$result";

		    plog('OK', "build ok: $result");
		    plog('DEBUG', "copy files to done");
		    make_path($done_dir);
		    if (sget($remote, "$prefix_dir/$result",
			     "$result_file.new")) {
			plog('ERROR', "copying $result from $host failed ($!)");
			$error = 1;
			last;
		    }
		    if (!move("$result_file.new", $result_file)) {
			$error = 1;
			last;
		    }
		    # Add the package to the list of built ones, in case we fail another arch and need to cleanup
		    push @{$ent->{rpms}}, $result_file;
		}
		next bot if $error;

		if (check_if_mandatory_arch_failed($media, $ent, $config)) {
		    # Discard this arch as another mandatory one failed
		    plog('INFO', "A mandatory arch had failed, discarding the successful build from $host/$arch");
		    cleanup_failed_build($todo_dir, $done_dir, $fail_dir, $prefix, $ent, $media, $arch, $config);
		    ssh($remote, "rm -rf $prefix_dir");
		} else {
		    create_file("$done_dir/${prefix}_$arch.done", "$bot $host");
		    $pkg_tree{$prefix}{media}{$media}{done_arch}{$arch} = 1;
		    fetch_logs_and_cleanup($remote, $prefix_dir, "$done_dir/$prefix");
		    $something_finished = 1;
		}
	    }

	    record_bot_complete(\%run, $bot, $arch, $lock_file, $prefix, $ent, $media, $host, $pid);

	    # In case of success we have now fetched packages and logs and cleaned up the remote machine
	    next bot if $done;

	    unless ($pkg_tree{$prefix}{media}{$media}{cancelled_arch}{$arch}) {
		make_path($fail_dir);
		fetch_logs_and_cleanup($remote, $prefix_dir, "$fail_dir/$prefix");
		$pkg_tree{$prefix}{media}{$media}{arch}{$arch} = 0;
	    }

	    # We got the logs but want to retry so don't record a failure
	    next bot if $later;

	    cleanup_failed_build($todo_dir, $done_dir, $fail_dir, $prefix, $ent, $media, $arch, $config);

	    unless ($pkg_tree{$prefix}{media}{$media}{cancelled_arch}{$arch}) {
		plog('FAIL', "build failed");
		create_file("$done_dir/${prefix}_$arch.fail", "$bot $host");
		$pkg_tree{$prefix}{media}{$media}{failed_arch}{$arch} = 1;

		# Notify user if build failed
		if ($user) {
		    warn_about_failure($config, $user, $ent, $arch, $fail_dir, $path, $prefix);
		}
	    }

	} # end bot
    } # end path
} # end prefix


#
# Part 2: check queue and start new jobs if a bot is available
#

plog('MSG', "launching new compilations");
my %to_compile;

# do not sort the keys to be able to ignore packages which makes iurt
# crash or just lock ulri somehow

foreach my $prefix (sort keys %pkg_tree) {
    my $ent = $pkg_tree{$prefix};

    my $ready = 1;
    my $failed_dep = 0;

    foreach my $dep (@{$ent->{deps}}) {
	if (glob "$done/*/*/*/$dep.upload") {
	    plog('DEBUG', "Dependent build $dep was uploaded");
	    next;
	}
	# $dep was not uploaded yet, so it's too early to build this one
	plog('DEBUG', "Dependent build $dep not ready ($prefix)");
	$ready = 0;
	if (glob("$reject/*/*/*/$dep.youri") || glob("$done/*/*/*/${dep}_*.fail")) {
	    plog('ERROR', "Dependent build $dep has failed");
	    $failed_dep = 1;
	}
    }

    if ($failed_dep) {
	plog('DEBUG', "Dependent build(s) failed, rejecting this one");
	foreach my $media (keys %{$ent->{media}}) {
	    my $path = $ent->{media}{$media}{path};
	    make_path("$reject/$path");
	    foreach my $srpm (@{$ent->{srpms}}) {
		move("$todo/$path/${prefix}_$srpm", "$reject/$path/${prefix}_$srpm");
	    }
	}
	next;
    }

    next unless $ready;
    plog('DEBUG', "No missing dependent build for $prefix");

    foreach my $media (keys %{$ent->{media}}) {
	my $path = $ent->{media}{$media}{path};
	my $todo_dir = "$config->{queue}/todo/$path";
	my $target = $ent->{target};
	my $srpms = $ent->{srpms} or next;

	#plog('DEBUG', "searching a bot to compile @$srpms");

	# count noarch todos only once even if searching multiple bots
	my $noarch_countflag = 0;

	# If all packages in a group are noarch, consider the entire group
	# as noarch
	my $noarch = 1;
	$noarch = 0 if any { !check_noarch("$todo_dir/${prefix}_$_") } @$srpms;

	# Skip this media if the package is already building as noarch for it or
	# already done.
	next if $pkg_tree{$prefix}{media}{$media}{arch}{noarch};
	next if $pkg_tree{$prefix}{media}{$media}{done_arch}{noarch};

	# Only build noarch packages on mandatory architectures
	# Other architectures may not be up to date
	my $arch_list;
	if ($noarch) {
	    $arch_list = get_mandatory_arch($config, $target);
	} else {
	    $arch_list = get_target_arch($config, $target);
	}

	# need to find a bot for each arch
	foreach my $arch (@$arch_list) {
	    next if $pkg_tree{$prefix}{media}{$media}{later}{$arch};

	    # Skip this arch if the package is already building for it or if it
	    # should not be built on this arch or it has already failed or
	    # succeeded.
	    # Check again for noarch here, as we may ave triggered a noarch build on
	    # previous architecture and should not start another one.
	    next if $pkg_tree{$prefix}{media}{$media}{arch}{$arch};
	    next if $pkg_tree{$prefix}{media}{$media}{arch}{noarch};
	    next if $pkg_tree{$prefix}{media}{$media}{done_arch}{$arch};
	    next if $pkg_tree{$prefix}{media}{$media}{excluded_arch}{$arch};
	    next if $pkg_tree{$prefix}{media}{$media}{failed_arch}{$arch};

	    my $excluded = any { !check_arch("$todo_dir/${prefix}_$_", $arch) } @$srpms;
	    if ($excluded) {
		plog('WARN', "excluding from $arch: $prefix");
		my $done_dir = "$done/$path";
		make_path($done_dir);
		create_file("$done_dir/${prefix}_$arch.excluded",
			    "ulri $arch excluded");
		$pkg_tree{$prefix}{media}{$media}{excluded_arch}{$arch} = 1;
		next;
	    }

	    if ($noarch) {
		plog('DEBUG', "search any bot for @$srpms") unless $noarch_countflag;
	    } else {
		plog('DEBUG', "search $arch bot for @$srpms");
	    }

	    hosts: foreach my $host (keys %{$config->{bot}{$arch}}) {
		foreach my $bot (keys %{$config->{bot}{$arch}{$host}}) {
		    next if $run{bot}{$host}{$bot};
		    my $pid = build_package($config, \%pkg_tree, $media, $prefix, $host, $arch, $bot);
		    if ($pid) {
			# Register that the package is building
			$run{bot}{$host}{$bot} = $prefix;
			$pkg_tree{$prefix}{media}{$media}{arch}{$noarch ? 'noarch' : $arch} = 1;

			my $lock_arch = $noarch ? "$arch-noarch" : $arch;
			my $lock_file = "$todo_dir/${prefix}_" .
			    "$lock_arch.$bot.$host.$fulldate.$pid.lock";
			plog('DEBUG', "create lock $lock_file");
			create_file($lock_file, "$program_name $$", time());
			load_lock_file_data(\%{$pkg_tree{$prefix}}, $lock_file, $media, $config);

			last hosts;
		    }
		}
	    }

	    # Count packages to compile for each architecture. Count noarch
	    # package only once.
	    #
	    $arch = 'noarch' if $noarch;
	    unless ($pkg_tree{$prefix}{media}{$media}{arch}{$arch}) {
		$to_compile{$arch}++ if !($noarch && $noarch_countflag);
	    }
	    $noarch_countflag = 1 if $noarch;
	}
    }
}

plog('MSG', "Current status");

my %build_list;

foreach my $prefix (keys %pkg_tree) {
    my $ent = $pkg_tree{$prefix};
    foreach my $media (keys %{$ent->{media}}) {
	foreach my $bot (@{$ent->{media}{$media}{bot}}) {
	    my $host = $bot->{host};
	    my $arch = $bot->{arch};
	    push @{$build_list{"$host/$arch"}}, $prefix;
	}
    }
}

if (keys %build_list) {
    plog('INFO', "currently building:");
    map { plog('INFO', "  $_: " . join('', @{$build_list{$_}})) } sort keys %build_list;
}

plog('INFO', "jobs in queue:", %to_compile ?
    map { sprintf("%s(%d)", $_, $to_compile{$_}) } sort keys %to_compile : "none");


unlink $pidfile;
exec "emi" if $something_finished;