From cfcd4507544f1c57802925255ecdfb9234f1feee Mon Sep 17 00:00:00 2001 From: Pascal Terjan Date: Sun, 13 Nov 2022 18:39:34 +0000 Subject: Increase robustness when scp fails Do not remove lock file and mark build as done until we copied all the files. --- NEWS | 2 ++ lib/Iurt/Queue.pm | 10 ++++++++++ lib/Iurt/Ulri.pm | 8 ++++++++ ulri | 47 +++++++++++++++-------------------------------- 4 files changed, 35 insertions(+), 32 deletions(-) diff --git a/NEWS b/NEWS index 5786627..1c548ec 100644 --- a/NEWS +++ b/NEWS @@ -1,5 +1,7 @@ 0.8.0 - iurt: Add support for DynamicBuildRequires +- ulri: Do not remove lock file and mark build as done until we copied the + files. 0.7.18 - ulri: Fix false positives of iurt dying on the build machine diff --git a/lib/Iurt/Queue.pm b/lib/Iurt/Queue.pm index 2f1e968..585294e 100644 --- a/lib/Iurt/Queue.pm +++ b/lib/Iurt/Queue.pm @@ -15,6 +15,7 @@ our @EXPORT = qw( check_if_all_archs_processed check_if_mandatory_arch_failed load_lock_file_data + record_bot_complete remove_bot_from_package ); @@ -183,6 +184,15 @@ sub remove_bot_from_package { @{$ent->{media}{$media}{bot}} = grep { $_->{host} ne $host || $_->{pid} != $pid} @{$ent->{media}{$media}{bot}}; } +sub record_bot_complete { + my ($run, $bot, $arch, $lock_file, $prefix, $ent, $media, $host, $pid) = @_; + plog('INFO', "delete lock file for $prefix on $host/$arch"); + unlink $lock_file; + $run->{bot}{$host}{$bot} = 0; + remove_bot_from_package($ent, $media, $host, $pid); + $ent->{media}{$media}{arch}{$arch} = 0; +} + sub get_upload_tree_state { our ($config) = @_; diff --git a/lib/Iurt/Ulri.pm b/lib/Iurt/Ulri.pm index 6d4e38f..f2e014c 100755 --- a/lib/Iurt/Ulri.pm +++ b/lib/Iurt/Ulri.pm @@ -12,6 +12,7 @@ use strict; our @EXPORT = qw( build_package + fetch_logs_and_cleanup warn_about_failure ); @@ -144,6 +145,13 @@ sub get_pid_from_file { $pid; } +sub fetch_logs_and_cleanup { + my ($remote, $remote_dir, $target_dir) = @_; + make_path($target_dir); + sget($remote, "$remote_dir/log/*", $target_dir); + ssh($remote, "rm -rf $remote_dir"); +} + sub warn_about_failure { my ($config, $user, $ent, $arch, $fail_dir, $path, $prefix) = @_; my $text = join("\n", "Build of the following packages failed:\n", map { "- $_" } @{$ent->{srpms}}) . "\n"; diff --git a/ulri b/ulri index bbce305..93538f3 100755 --- a/ulri +++ b/ulri @@ -25,10 +25,10 @@ use Iurt::Config qw(config_usage get_date config_init get_author_email get_targe use Iurt::File qw(create_file); use Iurt::Mail qw(sendmail); use Iurt::Process qw(check_pid); -use Iurt::Queue qw(check_if_mandatory_arch_failed cleanup_failed_build get_upload_tree_state load_lock_file_data remove_bot_from_package); +use Iurt::Queue qw(check_if_mandatory_arch_failed cleanup_failed_build get_upload_tree_state load_lock_file_data record_bot_complete); use Iurt::RPM qw(check_arch check_noarch); use Iurt::Util qw(plog_init plog ssh_setup ssh sout sget sput); -use Iurt::Ulri qw(build_package warn_about_failure); +use Iurt::Ulri qw(build_package fetch_logs_and_cleanup warn_about_failure); use File::Copy 'move'; use File::Path 'make_path'; use File::Temp 'mktemp'; @@ -305,19 +305,10 @@ foreach my $prefix (keys %pkg_tree) { } else { plog('FAIL', "$bot died on $host/$arch (status $proc_state), removing lock"); } - $pkg_tree{$prefix}{media}{$media}{arch}{$arch} = 0; - } - - # Either we are done or we should kill the build - - plog('INFO', "delete lock file for $prefix"); - unlink $lock_file; - $run{bot}{$host}{$bot} = 0; - remove_bot_from_package($ent, $media, $host, $pid); + fetch_and_delete_logs($remote, "$prefix_dir/log", "$fail_dir/$prefix"); + record_bot_complete($run, $bot, $lock_file, $prefix, $ent, $media, $host, $pid); - if (!$status) { - # TODO: fetch/clean the logs next bot; } @@ -335,8 +326,7 @@ foreach my $prefix (keys %pkg_tree) { plog('FAIL', "install deps failure, rebuild later: $p"); $later{$prefix} = 1; $later = 1; - # TODO: fetch/clean the logs - } + } if ($r ne 'ok') { plog('FAIL', "$r: $p"); $fail = 1; @@ -376,38 +366,31 @@ foreach my $prefix (keys %pkg_tree) { # Add the package to the list of built ones, in case we fail another arch and need to cleanup push @{$ent->{rpms}}, $result_file; } - next if $error; + next bot if $error; if (check_if_mandatory_arch_failed($media, $ent, $config)) { # Discard this arch as another mandatory one failed cleanup_failed_build($todo_dir, $done_dir, $fail_dir, $prefix, $ent, $media, $arch, $config); + ssh($remote, "rm -rf $prefix_dir"); } else { create_file("$done_dir/${prefix}_$arch.done", "$bot $host"); $pkg_tree{$prefix}{media}{$media}{done_arch}{$arch} = 1; - make_path("$done_dir/$prefix"); - sget($remote, "$prefix_dir/log/*", "$done_dir/$prefix"); + fetch_logs_and_cleanup($remote, $prefix_dir, "$done_dir/$prefix"); $something_finished = 1; } - # Either we already fetched the success logs, or don't care - # as this success was discarded due to another failure. - ssh($remote, "rm -rf $prefix_dir"); - next bot; } - make_path($fail_dir); + record_bot_complete($run, $bot, $lock_file, $prefix, $ent, $media, $host, $pid); + + # In case of success we have now fetched packages and logs and cleaned up the remote machine + next bot if $done; unless ($pkg_tree{$prefix}{media}{$media}{cancelled_arch}{$arch}) { - mkdir("$fail_dir/$prefix"); - if (sget($remote, "$prefix_dir/*", "$fail_dir/$prefix")) { - plog('ERROR', "copying from $host:$prefix_dir/ " . - "to $fail_dir/ failed ($!)"); - $pkg_tree{$prefix}{media}{$media}{arch}{$arch} = 0; - } + make_path($fail_dir); + fetch_logs_and_cleanup($remote, $prefix_dir, "$fail_dir/$prefix"); + $pkg_tree{$prefix}{media}{$media}{arch}{$arch} = 0; } - # clean the log on the compilation machine - ssh($remote, "rm -rf $prefix_dir"); - # We got the logs but want to retry so don't record a failure next bot if $later; -- cgit v1.2.1