diff options
author | Dan Fandrich <danf@mageia.org> | 2024-03-14 18:50:26 -0700 |
---|---|---|
committer | Dan Fandrich <danf@mageia.org> | 2024-03-19 20:01:16 -0700 |
commit | fe405b428456decd6fed841fd6de6b8e09e73e55 (patch) | |
tree | 01b4780695dee0f7a55f70cdcc2a5d6c14141586 | |
parent | c9c5444f4f27f0a7375088a1cc0b4594742d8fe8 (diff) | |
download | mgaadvisories-fe405b428456decd6fed841fd6de6b8e09e73e55.tar mgaadvisories-fe405b428456decd6fed841fd6de6b8e09e73e55.tar.gz mgaadvisories-fe405b428456decd6fed841fd6de6b8e09e73e55.tar.bz2 mgaadvisories-fe405b428456decd6fed841fd6de6b8e09e73e55.tar.xz mgaadvisories-fe405b428456decd6fed841fd6de6b8e09e73e55.zip |
Use multiprocessing in mksite for improved speed
This generates templated files using some parallelism, reducing the
total mksite time to less than half in my tests. Increasing parallelism
even further is possible, but would make the code harder to understand.
The obvious technique of generating each templated file in its own
process is actually far slower because the overhead of process creation
dwarfs the time spent processing the template, which is on average very
small and quick.
-rw-r--r-- | NEWS | 1 | ||||
-rw-r--r-- | lib/MGA/Advisories.pm | 88 |
2 files changed, 69 insertions, 20 deletions
@@ -4,6 +4,7 @@ Version X - use https: links where possible - add 'showjson' command to output an advisory in OSV JSON format - have 'mksite' write JSON output files as well +- 'mksite' now runs twice as fast Version 0.27 diff --git a/lib/MGA/Advisories.pm b/lib/MGA/Advisories.pm index 08f5c15..2db96db 100644 --- a/lib/MGA/Advisories.pm +++ b/lib/MGA/Advisories.pm @@ -10,6 +10,7 @@ use Email::Simple; use Email::Simple::Creator; use HTTP::Request; use LWP::UserAgent; +use Parallel::ForkManager; use File::Basename; use XMLRPC::Lite; use Term::ReadKey; @@ -614,6 +615,25 @@ sub process_template { } } +# Max 10 processes for processing templates +my $pm = Parallel::ForkManager->new(10); + +# Run process_template in its own process. The process creation overhead is +# high, so this only makes sense for templates that interate over all or most +# of the advisories and not just a single one. +sub parallel_process_template { + if (! $pm->start()) { + # child + process_template(@_); + $pm->finish; # do the exit from the child process + } +} + +# Wait for all processes to finish +sub parallel_complete { + $pm->wait_all_children; +} + sub output_pages { my ($advdb) = @_; my $template = Template->new( @@ -622,27 +642,54 @@ sub output_pages { OUTPUT_PATH => $config->{out_dir}, EVAL_PERL => 1, ); - foreach my $adv (keys %{$advdb->{advisories}}) { - my $vars = { - config => $config, - advisory => $adv, - advdb => $advdb, - basename => \%basename, - tools => \%tools, - }; - process_template($template, 'advisory', $vars, $basename{ID}->($adv)); - process_template($template, 'advisory', $vars, $basename{ID}->($adv), 'json'); + + # Run this loop in parallel in its own process + if (! $pm->start()) { + # child + foreach my $adv (keys %{$advdb->{advisories}}) { + my $vars = { + config => $config, + advisory => $adv, + advdb => $advdb, + basename => \%basename, + tools => \%tools, + }; + process_template($template, 'advisory', $vars, $basename{ID}->($adv)); + } + $pm->finish; # do the exit from the child process } - foreach my $by (['rel', 'by_rel'], ['CVE', 'by_cve'], ['src', 'by_src']) { - foreach my $r (keys %{$advdb->{$by->[1]}}) { + + # Run this loop in parallel in its own process + if (! $pm->start()) { + # child + foreach my $adv (keys %{$advdb->{advisories}}) { my $vars = { config => $config, - $by->[0] => $r, + advisory => $adv, advdb => $advdb, basename => \%basename, tools => \%tools, }; - process_template($template, $by->[1], $vars, $basename{$by->[0]}->($r)); + process_template($template, 'advisory', $vars, $basename{ID}->($adv), 'json'); + } + $pm->finish; # do the exit from the child process + } + + # Run each loop in parallel in its own process + foreach my $by (['rel', 'by_rel'], ['CVE', 'by_cve'], ['src', 'by_src']) { + if (! $pm->start()) { + # child + foreach my $r (keys %{$advdb->{$by->[1]}}) { + my $vars = { + config => $config, + $by->[0] => $r, + advdb => $advdb, + basename => \%basename, + tools => \%tools, + }; + process_template($template, $by->[1], $vars, $basename{$by->[0]}->($r)); + } + $pm->finish; # do the exit from the child process } } my $vars = { @@ -651,12 +698,13 @@ sub output_pages { basename => \%basename, tools => \%tools, }; - process_template($template, 'index', $vars, 'index'); - process_template($template, 'advisories', $vars, 'advisories'); - process_template($template, 'vulns', $vars, 'vulns', 'json'); - process_template($template, 'bugs', $vars, 'bugs', 'json'); - process_template($template, 'infos', $vars, 'infos'); - process_template($template, 'CVE', $vars, 'CVE'); + parallel_process_template($template, 'index', $vars, 'index'); + parallel_process_template($template, 'advisories', $vars, 'advisories'); + parallel_process_template($template, 'vulns', $vars, 'vulns', 'json'); + parallel_process_template($template, 'bugs', $vars, 'bugs', 'json'); + parallel_process_template($template, 'infos', $vars, 'infos'); + parallel_process_template($template, 'CVE', $vars, 'CVE'); + parallel_complete(); } sub send_adv_mail { |