aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDan Fandrich <danf@mageia.org>2024-03-14 18:50:26 -0700
committerDan Fandrich <danf@mageia.org>2024-03-19 20:01:16 -0700
commitfe405b428456decd6fed841fd6de6b8e09e73e55 (patch)
tree01b4780695dee0f7a55f70cdcc2a5d6c14141586
parentc9c5444f4f27f0a7375088a1cc0b4594742d8fe8 (diff)
downloadmgaadvisories-fe405b428456decd6fed841fd6de6b8e09e73e55.tar
mgaadvisories-fe405b428456decd6fed841fd6de6b8e09e73e55.tar.gz
mgaadvisories-fe405b428456decd6fed841fd6de6b8e09e73e55.tar.bz2
mgaadvisories-fe405b428456decd6fed841fd6de6b8e09e73e55.tar.xz
mgaadvisories-fe405b428456decd6fed841fd6de6b8e09e73e55.zip
Use multiprocessing in mksite for improved speed
This generates templated files using some parallelism, reducing the total mksite time to less than half in my tests. Increasing parallelism even further is possible, but would make the code harder to understand. The obvious technique of generating each templated file in its own process is actually far slower because the overhead of process creation dwarfs the time spent processing the template, which is on average very small and quick.
-rw-r--r--NEWS1
-rw-r--r--lib/MGA/Advisories.pm88
2 files changed, 69 insertions, 20 deletions
diff --git a/NEWS b/NEWS
index 74404ba..b89b938 100644
--- a/NEWS
+++ b/NEWS
@@ -4,6 +4,7 @@ Version X
- use https: links where possible
- add 'showjson' command to output an advisory in OSV JSON format
- have 'mksite' write JSON output files as well
+- 'mksite' now runs twice as fast
Version 0.27
diff --git a/lib/MGA/Advisories.pm b/lib/MGA/Advisories.pm
index 08f5c15..2db96db 100644
--- a/lib/MGA/Advisories.pm
+++ b/lib/MGA/Advisories.pm
@@ -10,6 +10,7 @@ use Email::Simple;
use Email::Simple::Creator;
use HTTP::Request;
use LWP::UserAgent;
+use Parallel::ForkManager;
use File::Basename;
use XMLRPC::Lite;
use Term::ReadKey;
@@ -614,6 +615,25 @@ sub process_template {
}
}
+# Max 10 processes for processing templates
+my $pm = Parallel::ForkManager->new(10);
+
+# Run process_template in its own process. The process creation overhead is
+# high, so this only makes sense for templates that interate over all or most
+# of the advisories and not just a single one.
+sub parallel_process_template {
+ if (! $pm->start()) {
+ # child
+ process_template(@_);
+ $pm->finish; # do the exit from the child process
+ }
+}
+
+# Wait for all processes to finish
+sub parallel_complete {
+ $pm->wait_all_children;
+}
+
sub output_pages {
my ($advdb) = @_;
my $template = Template->new(
@@ -622,27 +642,54 @@ sub output_pages {
OUTPUT_PATH => $config->{out_dir},
EVAL_PERL => 1,
);
- foreach my $adv (keys %{$advdb->{advisories}}) {
- my $vars = {
- config => $config,
- advisory => $adv,
- advdb => $advdb,
- basename => \%basename,
- tools => \%tools,
- };
- process_template($template, 'advisory', $vars, $basename{ID}->($adv));
- process_template($template, 'advisory', $vars, $basename{ID}->($adv), 'json');
+
+ # Run this loop in parallel in its own process
+ if (! $pm->start()) {
+ # child
+ foreach my $adv (keys %{$advdb->{advisories}}) {
+ my $vars = {
+ config => $config,
+ advisory => $adv,
+ advdb => $advdb,
+ basename => \%basename,
+ tools => \%tools,
+ };
+ process_template($template, 'advisory', $vars, $basename{ID}->($adv));
+ }
+ $pm->finish; # do the exit from the child process
}
- foreach my $by (['rel', 'by_rel'], ['CVE', 'by_cve'], ['src', 'by_src']) {
- foreach my $r (keys %{$advdb->{$by->[1]}}) {
+
+ # Run this loop in parallel in its own process
+ if (! $pm->start()) {
+ # child
+ foreach my $adv (keys %{$advdb->{advisories}}) {
my $vars = {
config => $config,
- $by->[0] => $r,
+ advisory => $adv,
advdb => $advdb,
basename => \%basename,
tools => \%tools,
};
- process_template($template, $by->[1], $vars, $basename{$by->[0]}->($r));
+ process_template($template, 'advisory', $vars, $basename{ID}->($adv), 'json');
+ }
+ $pm->finish; # do the exit from the child process
+ }
+
+ # Run each loop in parallel in its own process
+ foreach my $by (['rel', 'by_rel'], ['CVE', 'by_cve'], ['src', 'by_src']) {
+ if (! $pm->start()) {
+ # child
+ foreach my $r (keys %{$advdb->{$by->[1]}}) {
+ my $vars = {
+ config => $config,
+ $by->[0] => $r,
+ advdb => $advdb,
+ basename => \%basename,
+ tools => \%tools,
+ };
+ process_template($template, $by->[1], $vars, $basename{$by->[0]}->($r));
+ }
+ $pm->finish; # do the exit from the child process
}
}
my $vars = {
@@ -651,12 +698,13 @@ sub output_pages {
basename => \%basename,
tools => \%tools,
};
- process_template($template, 'index', $vars, 'index');
- process_template($template, 'advisories', $vars, 'advisories');
- process_template($template, 'vulns', $vars, 'vulns', 'json');
- process_template($template, 'bugs', $vars, 'bugs', 'json');
- process_template($template, 'infos', $vars, 'infos');
- process_template($template, 'CVE', $vars, 'CVE');
+ parallel_process_template($template, 'index', $vars, 'index');
+ parallel_process_template($template, 'advisories', $vars, 'advisories');
+ parallel_process_template($template, 'vulns', $vars, 'vulns', 'json');
+ parallel_process_template($template, 'bugs', $vars, 'bugs', 'json');
+ parallel_process_template($template, 'infos', $vars, 'infos');
+ parallel_process_template($template, 'CVE', $vars, 'CVE');
+ parallel_complete();
}
sub send_adv_mail {