#!/usr/bin/perl # # Copyright (C) 2005,2006 Mandriva # # Author: Florent Villard # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2, or (at your option) # any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. # use strict; use MDK::Common qw(any cat_ if_ find); use Iurt::Config qw(get_date get_author_email get_target_arch get_mandatory_arch); use Iurt::File qw(create_file); use Iurt::Mail qw(sendmail); use Iurt::Process qw(check_pid); use Iurt::Queue qw(check_if_mandatory_arch_failed cleanup_failed_build get_upload_tree_state load_lock_file_data record_bot_complete schedule_next_retry); use Iurt::RPM qw(check_arch check_noarch); use Iurt::Util qw(plog_init plog ssh_setup ssh sout sget sput); use Iurt::Ulri qw(build_package fetch_logs_and_cleanup load_config warn_about_failure); use File::Copy 'move'; use File::Path 'make_path'; use File::Temp 'mktemp'; use Filesys::Df qw(df); use Data::Dumper; use File::Slurp qw(read_file); my %run; my $program_name = 'ulri'; $run{program_name} = $program_name; my $LOG; if (!$ENV{ULRI_LOG_FILE} || !open($LOG, '>>', $ENV{ULRI_LOG_FILE})) { open($LOG, ">&STDERR"); } plog_init($program_name, $LOG, 7, 1); my $config = load_config(\%run); my %untranslated_arch; foreach my $k (keys %{$config->{arch_translation}}) { my $v = $config->{arch_translation}{$k}; push @{$untranslated_arch{$v}}, $k; } $run{pidfile_home} = $config->{tmp}; $run{pidfile} = $program_name; my $pidfile = check_pid(\%run, 1); my ($fulldate, $daydate) = get_date(); $run{daydate} = $daydate; my $df = df $config->{queue}; if ($df->{per} == 100) { # FIXME should send a mail too die "FATAL $program_name: not enough space on the filesystem, only $df->{bavail} KB on $config->{queue}, full at $df->{per}%"; } ($fulldate, $daydate) = get_date(); my $todo = "$config->{queue}/todo"; my $failure = "$config->{queue}/failure"; my $done = "$config->{queue}/done"; my $reject = "$config->{queue}/reject"; # # Part 0: gather data from upload tree # plog('MSG', "check uploads tree"); plog('DEBUG', "input queue is $todo"); my %pkg_tree = get_upload_tree_state($config); # Load bot status # foreach my $prefix (keys %pkg_tree) { my $ent = $pkg_tree{$prefix}; foreach my $media (keys %{$ent->{media}}) { foreach my $bot (@{$ent->{media}{$media}{bot}}) { $run{bot}{$bot->{host}}{$bot->{bot}} = $prefix; } } } # # Part 1: get results from finished builds # # TODO: Make this parallel plog('MSG', "check build bot results"); my $something_finished; foreach my $prefix (keys %pkg_tree) { my $ent = $pkg_tree{$prefix}; foreach my $media (keys %{$ent->{media}}) { my $path = $ent->{media}{$media}{path}; my $user = $ent->{user}; # Local pathnames my $done_dir = "$done/$path"; my $todo_dir = "$todo/$path"; my $fail_dir = "$failure/$path"; bot: foreach my $bot_list (@{$ent->{media}{$media}{bot}}) { my ($bot, $host, $date, $pid, $arch, $time) = @$bot_list{qw(bot host date pid arch time)}; my $bot_conf = $config->{bot}{$arch}{$host}{$bot}; my $remote = ssh_setup($config->{ssh_options}, $bot_conf->{user}, $host); my $prefix_dir = "$bot_conf->{packages}/$path/$prefix-$arch/"; # If our build is noarch, set arch appropriately. # my $lock_file = "$todo_dir/${prefix}_$arch-noarch.$bot.$host.$date.$pid.lock"; if (-f $lock_file) { plog('DEBUG', "$prefix is noarch"); $arch = "noarch"; } else { $lock_file =~ s/-noarch//; } my $status_file = "$prefix_dir/log/status.log"; plog('INFO', "check status: $host/$arch ($bot [$pid])"); my $seconds = time()-$time; my $timeout = $seconds > $config->{faildelay}; my $status = sout($remote, "cat $status_file"); if ($? == 255) { plog('WARN', "failed to get status for $host/$arch"); # If we are already out of time, we should fail anyway next bot unless $timeout; } my $proc_state; # If we don't get a status, the build should be in progress if (!$status) { # Need to differenciate process not running with failure to ssh chomp($proc_state = sout($remote, "ps h -o state $pid || echo NOT_RUNNING")); # Reasons for failure my $zombie = $proc_state eq 'Z'; my $ended = $proc_state eq 'NOT_RUNNING'; unless ($timeout || $zombie || $ended) { # Everything is fine, build is continuing! # Kill it if that package had failed on a mandatory arch if (check_if_mandatory_arch_failed($media, $ent, $config)) { plog('INFO', "A mandatory arch had failed, killing the build on $host/$arch"); ssh($remote, "kill -TERM $pid"); $pkg_tree{$prefix}{media}{$media}{cancelled_arch}{$arch} = 1; create_file("$done_dir/${prefix}_$arch.cancelled", "$bot $host"); } next bot; } if ($timeout) { plog('FAIL', "$bot timed out on $host/$arch ($seconds sec), killing it"); ssh($remote, "kill -TERM $pid"); # Give it some time to die/cleanup if ($seconds < 1.02 * $config->{faildelay}) { next bot; } plog('FAIL', "$bot didn't die after timing out on $host/$arch, removing lock"); } else { plog('FAIL', "$bot died on $host/$arch (status $proc_state), removing lock"); } fetch_logs_and_cleanup($remote, $prefix_dir, "$done_dir/$prefix"); record_bot_complete(\%run, $bot, $arch, $lock_file, $prefix, $ent, $media, $host, $pid); next bot; } my $fail; my $later; my $done; # Check if the build bot finished on the other side # plog('INFO', "check result: $host/$arch ($bot [$pid])"); foreach my $res (split "\n", $status) { my ($p, $r) = $res =~ /(.*):\s+(.*)/; plog('DEBUG', $res); if ($r eq 'install_deps_failure') { plog('FAIL', "install deps failure, rebuild later: $p"); if (schedule_next_retry($config, $todo_dir, $prefix, $arch, $pkg_tree{$prefix}{media}{$media}{retries}{arch}{nb_failures})) { $later = 1; $pkg_tree{$prefix}{media}{$media}{later}{$arch} = 1; } else { plog('FAIL', "Too many retries due to install_deps_failure: $p"); $fail = 1; } } if ($r ne 'ok') { plog('FAIL', "$r: $p"); $fail = 1; } else { plog('OK', "build complete: $p"); $done = 1; } } if ($done) { my @list = split "\n", sout($remote, "ls $prefix_dir"); my $error; my $arch_check = join '|', $arch, if_($untranslated_arch{$arch}, @{$untranslated_arch{$arch}}); plog('MSG', "checking for $arch_check arch"); foreach my $result (@list) { $result =~ /\.rpm$/ or next; # do not copy the initial src package $result =~ /^$prefix/ and next; my $result_file = "$done_dir/${prefix}_$result"; plog('OK', "build ok: $result"); plog('DEBUG', "copy files to done"); make_path($done_dir); if (sget($remote, "$prefix_dir/$result", "$result_file.new")) { plog('ERROR', "copying $result from $host failed ($!)"); $error = 1; last; } if (!move("$result_file.new", $result_file)) { $error = 1; last; } # Add the package to the list of built ones, in case we fail another arch and need to cleanup push @{$ent->{rpms}}, $result_file; } next bot if $error; if (check_if_mandatory_arch_failed($media, $ent, $config)) { # Discard this arch as another mandatory one failed plog('INFO', "A mandatory arch had failed, discarding the successful build from $host/$arch"); cleanup_failed_build($todo_dir, $done_dir, $fail_dir, $prefix, $ent, $media, $arch, $config); ssh($remote, "rm -rf $prefix_dir"); } else { create_file("$done_dir/${prefix}_$arch.done", "$bot $host"); $pkg_tree{$prefix}{media}{$media}{done_arch}{$arch} = 1; fetch_logs_and_cleanup($remote, $prefix_dir, "$done_dir/$prefix"); $something_finished = 1; } } record_bot_complete(\%run, $bot, $arch, $lock_file, $prefix, $ent, $media, $host, $pid); # In case of success we have now fetched packages and logs and cleaned up the remote machine next bot if $done; unless ($pkg_tree{$prefix}{media}{$media}{cancelled_arch}{$arch}) { make_path($fail_dir); fetch_logs_and_cleanup($remote, $prefix_dir, "$fail_dir/$prefix"); $pkg_tree{$prefix}{media}{$media}{arch}{$arch} = 0; } # We got the logs but want to retry so don't record a failure next bot if $later; cleanup_failed_build($todo_dir, $done_dir, $fail_dir, $prefix, $ent, $media, $arch, $config); unless ($pkg_tree{$prefix}{media}{$media}{cancelled_arch}{$arch}) { plog('FAIL', "build failed"); create_file("$done_dir/${prefix}_$arch.fail", "$bot $host"); $pkg_tree{$prefix}{media}{$media}{failed_arch}{$arch} = 1; # Notify user if build failed if ($user) { warn_about_failure($config, $user, $ent, $arch, $fail_dir, $path, $prefix); } } } # end bot } # end path } # end prefix # # Part 2: check queue and start new jobs if a bot is available # plog('MSG', "launching new compilations"); my %to_compile; # do not sort the keys to be able to ignore packages which makes iurt # crash or just lock ulri somehow foreach my $prefix (sort keys %pkg_tree) { my $ent = $pkg_tree{$prefix}; my $ready = 1; my $failed_dep = 0; foreach my $dep (@{$ent->{deps}}) { if (glob "$done/*/*/*/$dep.upload") { plog('DEBUG', "Dependent build $dep was uploaded"); next; } # $dep was not uploaded yet, so it's too early to build this one plog('DEBUG', "Dependent build $dep not ready ($prefix)"); $ready = 0; if (glob("$reject/*/*/*/$dep.youri") || glob("$done/*/*/*/${dep}_*.fail")) { plog('ERROR', "Dependent build $dep has failed"); $failed_dep = 1; } } if ($failed_dep) { plog('DEBUG', "Dependent build(s) failed, rejecting this one"); foreach my $media (keys %{$ent->{media}}) { my $path = $ent->{media}{$media}{path}; make_path("$reject/$path"); foreach my $srpm (@{$ent->{srpms}}) { move("$todo/$path/${prefix}_$srpm", "$reject/$path/${prefix}_$srpm"); } } next; } next unless $ready; plog('DEBUG', "No missing dependent build for $prefix"); foreach my $media (keys %{$ent->{media}}) { my $path = $ent->{media}{$media}{path}; my $todo_dir = "$config->{queue}/todo/$path"; my $target = $ent->{target}; my $srpms = $ent->{srpms} or next; #plog('DEBUG', "searching a bot to compile @$srpms"); # count noarch todos only once even if searching multiple bots my $noarch_countflag = 0; # If all packages in a group are noarch, consider the entire group # as noarch my $noarch = 1; $noarch = 0 if any { !check_noarch("$todo_dir/${prefix}_$_") } @$srpms; # Skip this media if the package is already building as noarch for it or # already done. next if $pkg_tree{$prefix}{media}{$media}{arch}{noarch}; next if $pkg_tree{$prefix}{media}{$media}{done_arch}{noarch}; # Only build noarch packages on mandatory architectures # Other architectures may not be up to date my $arch_list; if ($noarch) { $arch_list = get_mandatory_arch($config, $target); } else { $arch_list = get_target_arch($config, $target); } # need to find a bot for each arch foreach my $arch (@$arch_list) { next if $pkg_tree{$prefix}{media}{$media}{later}{$arch}; # Skip this arch if the package is already building for it or if it # should not be built on this arch or it has already failed or # succeeded. # Check again for noarch here, as we may ave triggered a noarch build on # previous architecture and should not start another one. next if $pkg_tree{$prefix}{media}{$media}{arch}{$arch}; next if $pkg_tree{$prefix}{media}{$media}{arch}{noarch}; next if $pkg_tree{$prefix}{media}{$media}{done_arch}{$arch}; next if $pkg_tree{$prefix}{media}{$media}{excluded_arch}{$arch}; next if $pkg_tree{$prefix}{media}{$media}{failed_arch}{$arch}; my $excluded = any { !check_arch("$todo_dir/${prefix}_$_", $arch) } @$srpms; if ($excluded) { plog('WARN', "excluding from $arch: $prefix"); my $done_dir = "$done/$path"; make_path($done_dir); create_file("$done_dir/${prefix}_$arch.excluded", "ulri $arch excluded"); $pkg_tree{$prefix}{media}{$media}{excluded_arch}{$arch} = 1; next; } if ($noarch) { plog('DEBUG', "search any bot for @$srpms") unless $noarch_countflag; } else { plog('DEBUG', "search $arch bot for @$srpms"); } hosts: foreach my $host (keys %{$config->{bot}{$arch}}) { foreach my $bot (keys %{$config->{bot}{$arch}{$host}}) { next if $run{bot}{$host}{$bot}; my $pid = build_package($config, \%pkg_tree, $media, $prefix, $host, $arch, $bot); if ($pid) { # Register that the package is building $run{bot}{$host}{$bot} = $prefix; $pkg_tree{$prefix}{media}{$media}{arch}{$noarch ? 'noarch' : $arch} = 1; my $lock_arch = $noarch ? "$arch-noarch" : $arch; my $lock_file = "$todo_dir/${prefix}_" . "$lock_arch.$bot.$host.$fulldate.$pid.lock"; plog('DEBUG', "create lock $lock_file"); create_file($lock_file, "$program_name $$", time()); load_lock_file_data(\%{$pkg_tree{$prefix}}, $lock_file, $media, $config); last hosts; } } } # Count packages to compile for each architecture. Count noarch # package only once. # $arch = 'noarch' if $noarch; unless ($pkg_tree{$prefix}{media}{$media}{arch}{$arch}) { $to_compile{$arch}++ if !($noarch && $noarch_countflag); } $noarch_countflag = 1 if $noarch; } } } plog('MSG', "Current status"); my %build_list; foreach my $prefix (keys %pkg_tree) { my $ent = $pkg_tree{$prefix}; foreach my $media (keys %{$ent->{media}}) { foreach my $bot (@{$ent->{media}{$media}{bot}}) { my $host = $bot->{host}; my $arch = $bot->{arch}; push @{$build_list{"$host/$arch"}}, $prefix; } } } if (keys %build_list) { plog('INFO', "currently building:"); map { plog('INFO', " $_: " . join('', @{$build_list{$_}})) } sort keys %build_list; } plog('INFO', "jobs in queue:", %to_compile ? map { sprintf("%s(%d)", $_, $to_compile{$_}) } sort keys %to_compile : "none"); unlink $pidfile; exec "emi" if $something_finished;