From c9bb45ef0018eddbd0172f38db46a96e961a7bd1 Mon Sep 17 00:00:00 2001 From: Dan Fandrich Date: Thu, 14 Mar 2024 18:50:26 -0700 Subject: Use multiprocessing in mksite for improved speed This generates templated files using some parallelism, reducing the total mksite time to less than half in my tests. Increasing parallelism even further is possible, but would make the code harder to understand. The obvious technique of generating each templated file in its own process is actually far slower because the overhead of process creation dwarfs the time spent processing the template, which is on average very small and quick. --- lib/MGA/Advisories.pm | 88 +++++++++++++++++++++++++++++++++++++++------------ 1 file changed, 68 insertions(+), 20 deletions(-) diff --git a/lib/MGA/Advisories.pm b/lib/MGA/Advisories.pm index 08f5c15..2db96db 100644 --- a/lib/MGA/Advisories.pm +++ b/lib/MGA/Advisories.pm @@ -10,6 +10,7 @@ use Email::Simple; use Email::Simple::Creator; use HTTP::Request; use LWP::UserAgent; +use Parallel::ForkManager; use File::Basename; use XMLRPC::Lite; use Term::ReadKey; @@ -614,6 +615,25 @@ sub process_template { } } +# Max 10 processes for processing templates +my $pm = Parallel::ForkManager->new(10); + +# Run process_template in its own process. The process creation overhead is +# high, so this only makes sense for templates that interate over all or most +# of the advisories and not just a single one. +sub parallel_process_template { + if (! $pm->start()) { + # child + process_template(@_); + $pm->finish; # do the exit from the child process + } +} + +# Wait for all processes to finish +sub parallel_complete { + $pm->wait_all_children; +} + sub output_pages { my ($advdb) = @_; my $template = Template->new( @@ -622,27 +642,54 @@ sub output_pages { OUTPUT_PATH => $config->{out_dir}, EVAL_PERL => 1, ); - foreach my $adv (keys %{$advdb->{advisories}}) { - my $vars = { - config => $config, - advisory => $adv, - advdb => $advdb, - basename => \%basename, - tools => \%tools, - }; - process_template($template, 'advisory', $vars, $basename{ID}->($adv)); - process_template($template, 'advisory', $vars, $basename{ID}->($adv), 'json'); + + # Run this loop in parallel in its own process + if (! $pm->start()) { + # child + foreach my $adv (keys %{$advdb->{advisories}}) { + my $vars = { + config => $config, + advisory => $adv, + advdb => $advdb, + basename => \%basename, + tools => \%tools, + }; + process_template($template, 'advisory', $vars, $basename{ID}->($adv)); + } + $pm->finish; # do the exit from the child process } - foreach my $by (['rel', 'by_rel'], ['CVE', 'by_cve'], ['src', 'by_src']) { - foreach my $r (keys %{$advdb->{$by->[1]}}) { + + # Run this loop in parallel in its own process + if (! $pm->start()) { + # child + foreach my $adv (keys %{$advdb->{advisories}}) { my $vars = { config => $config, - $by->[0] => $r, + advisory => $adv, advdb => $advdb, basename => \%basename, tools => \%tools, }; - process_template($template, $by->[1], $vars, $basename{$by->[0]}->($r)); + process_template($template, 'advisory', $vars, $basename{ID}->($adv), 'json'); + } + $pm->finish; # do the exit from the child process + } + + # Run each loop in parallel in its own process + foreach my $by (['rel', 'by_rel'], ['CVE', 'by_cve'], ['src', 'by_src']) { + if (! $pm->start()) { + # child + foreach my $r (keys %{$advdb->{$by->[1]}}) { + my $vars = { + config => $config, + $by->[0] => $r, + advdb => $advdb, + basename => \%basename, + tools => \%tools, + }; + process_template($template, $by->[1], $vars, $basename{$by->[0]}->($r)); + } + $pm->finish; # do the exit from the child process } } my $vars = { @@ -651,12 +698,13 @@ sub output_pages { basename => \%basename, tools => \%tools, }; - process_template($template, 'index', $vars, 'index'); - process_template($template, 'advisories', $vars, 'advisories'); - process_template($template, 'vulns', $vars, 'vulns', 'json'); - process_template($template, 'bugs', $vars, 'bugs', 'json'); - process_template($template, 'infos', $vars, 'infos'); - process_template($template, 'CVE', $vars, 'CVE'); + parallel_process_template($template, 'index', $vars, 'index'); + parallel_process_template($template, 'advisories', $vars, 'advisories'); + parallel_process_template($template, 'vulns', $vars, 'vulns', 'json'); + parallel_process_template($template, 'bugs', $vars, 'bugs', 'json'); + parallel_process_template($template, 'infos', $vars, 'infos'); + parallel_process_template($template, 'CVE', $vars, 'CVE'); + parallel_complete(); } sub send_adv_mail { -- cgit v1.2.1