From 21d724e1c82d2d5f562b191a96a1ae2aaadf4a36 Mon Sep 17 00:00:00 2001 From: Pascal Terjan Date: Tue, 4 Oct 2016 00:02:21 +0100 Subject: Reorganize code detecting build status If we can't get status, log a warning and go on. If status is not ready yet, verify the build is still in progress and there is no timeout. Else, analyze the status. --- ulri | 70 +++++++++++++++++++++++++++++++++----------------------------------- 1 file changed, 34 insertions(+), 36 deletions(-) (limited to 'ulri') diff --git a/ulri b/ulri index 68b4415..b7edc54 100755 --- a/ulri +++ b/ulri @@ -277,11 +277,44 @@ foreach my $prefix (keys %pkg_tree) { plog('INFO', "check status: $host/$arch ($bot [$pid])"); my $status = sout($remote, "cat $status_file"); - if (!$status) { + if ($? == 255) { plog('WARN', "failed to get status for $host/$arch"); next bot; } + my $proc_state; + # If we don't get a status, the build should be in progress + if (!$status) { + # Need to differenciate process not running with failure to ssh + chomp($proc_state = sout($remote, "ps h -o state $pid || echo NOT_RUNNING")); + my $seconds = time()-$time; + + # Reasons for failure + my $timeout = $seconds > $config->{faildelay}; + my $zombie = $proc_state eq 'Z'; + my $ended = $proc_state eq 'NOT_RUNNING'; + + unless ($timeout || $zombie || $ended) { + # Everything is fine, build is continuing! + next bot; + } + # TODO: In case of timeout, kill it! + plog('FAIL', "$bot timed out on $host/$arch ($seconds sec) or " . + "it's dead (status $proc_state), removing lock"); + $pkg_tree{$prefix}{media}{$media}{arch}{$arch} = 0; + } + + # Either we are done or we should kill the build + + plog('INFO', "delete lock file for $prefix"); + unlink $lock_file; + + $run{bot}{$host}{$bot} = 0; + + if (!$status) { + next bot; + } + my $success; my $fail; my $later; @@ -359,45 +392,10 @@ foreach my $prefix (keys %pkg_tree) { } } - # - # Handle build failure - # - - my $proc_state; - if (!$success && !$later && !$fail) { - # Need to differenciate process not running with failure to ssh - chomp($proc_state = sout($remote, "ps h -o state $pid || echo NOT_RUNNING")); - } - - my $seconds = time()-$time; - - # Reasons for failure - my $timeout = $seconds > $config->{faildelay}; - my $zombie = $proc_state eq 'Z'; - my $ended = $proc_state eq 'NOT_RUNNING'; - - unless ($success || $later || $fail || $timeout || $zombie || $ended) { - next bot; - } - - plog('INFO', "delete lock file for $prefix"); - unlink $lock_file; - - $run{bot}{$host}{$bot} = 0; - next bot if $later; next bot if $success && !$fail; - if (!$ended && !$fail) { - plog('FAIL', "$bot timed out on $host/$arch ($seconds sec) or " . - "it's dead (status $proc_state), removing lock"); - $pkg_tree{$prefix}{media}{$media}{arch}{$arch} = 0; - next bot; - } - - plog('INFO', "Failure reason: $success || $later || $fail || $timeout || $zombie || $ended"); - plog('FAIL', "build failed"); create_file("$done_dir/${prefix}_$arch.fail", "$bot $host"); make_path($fail_dir); -- cgit v1.2.1