diff options
author | mkanat%bugzilla.org <> | 2009-08-31 21:12:32 +0000 |
---|---|---|
committer | mkanat%bugzilla.org <> | 2009-08-31 21:12:32 +0000 |
commit | 3bc08bc506e5d25165909cf1f5ea2328ea0eeb4f (patch) | |
tree | 8d25b14516600e16158ecc4eff6791e52a57c540 | |
parent | b3782f8d3c53b6e63caea5a6324b237fa5da1ee7 (diff) | |
download | bugs-3bc08bc506e5d25165909cf1f5ea2328ea0eeb4f.tar bugs-3bc08bc506e5d25165909cf1f5ea2328ea0eeb4f.tar.gz bugs-3bc08bc506e5d25165909cf1f5ea2328ea0eeb4f.tar.bz2 bugs-3bc08bc506e5d25165909cf1f5ea2328ea0eeb4f.tar.xz bugs-3bc08bc506e5d25165909cf1f5ea2328ea0eeb4f.zip |
Bug 286625: Make collectstats.pl --regenerate WAY faster and make it correctly count the empty resolution.
Patch by Max Kanat-Alexander <mkanat@bugzilla.org> r=LpSolit, a=LpSolit
-rwxr-xr-x | collectstats.pl | 111 |
1 files changed, 64 insertions, 47 deletions
diff --git a/collectstats.pl b/collectstats.pl index bcb0fac5b..e550c1613 100755 --- a/collectstats.pl +++ b/collectstats.pl @@ -30,12 +30,14 @@ # To run new charts for a specific date, pass it in on the command line in # ISO (2004-08-14) format. -use AnyDBM_File; use strict; +use lib qw(. lib); + +use AnyDBM_File; use IO::Handle; +use List::Util qw(first); use Cwd; -use lib qw(. lib); use Bugzilla; use Bugzilla::Constants; @@ -111,6 +113,37 @@ my @resolutions = @{$fields->{'resolution'}}; # Exclude "" from the resolution list. @resolutions = grep {$_} @resolutions; +# --regenerate was taking an enormous amount of time to query everything +# per bug, per day. Instead, we now just get all the data out of the DB +# at once and stuff it into some data structures. +my (%bug_status, %bug_resolution, %removed); +if ($regenerate) { + %bug_resolution = @{ $dbh->selectcol_arrayref( + 'SELECT bug_id, resolution FROM bugs', {Columns=>[1,2]}) }; + %bug_status = @{ $dbh->selectcol_arrayref( + 'SELECT bug_id, bug_status FROM bugs', {Columns=>[1,2]}) }; + + my $removed_sth = $dbh->prepare( + q{SELECT bugs_activity.bug_id, bugs_activity.removed,} + . $dbh->sql_to_days('bugs_activity.bug_when') + . q{FROM bugs_activity + WHERE bugs_activity.fieldid = ? + ORDER BY bugs_activity.bug_when}); + + %removed = (bug_status => {}, resolution => {}); + foreach my $field (qw(bug_status resolution)) { + my $field_id = Bugzilla::Field->check($field)->id; + my $rows = $dbh->selectall_arrayref($removed_sth, undef, $field_id); + my $hash = $removed{$field}; + foreach my $row (@$rows) { + my ($bug_id, $removed, $when) = @$row; + $hash->{$bug_id} ||= []; + push(@{ $hash->{$bug_id} }, { when => int($when), + removed => $removed }); + } + } +} + my $tstart = time; foreach (@myproducts) { my $dir = "$datadir/mining"; @@ -118,7 +151,7 @@ foreach (@myproducts) { &check_data_dir ($dir); if ($regenerate) { - ®enerate_stats($dir, $_); + regenerate_stats($dir, $_, \%bug_resolution, \%bug_status, \%removed); } else { &collect_stats($dir, $_); } @@ -343,8 +376,7 @@ sub calculate_dupes { # This regenerates all statistics from the database. sub regenerate_stats { - my $dir = shift; - my $product = shift; + my ($dir, $product, $bug_resolution, $bug_status, $removed) = @_; my $dbh = Bugzilla->dbh; my $when = localtime(time()); @@ -356,8 +388,6 @@ sub regenerate_stats { $file_product =~ s/\//-/gs; my $file = join '/', $dir, $file_product; - my @bugs; - my $and_product = ""; my $from_product = ""; @@ -387,7 +417,6 @@ sub regenerate_stats { } if (open DATA, ">$file") { - DATA->autoflush(1); my $fields = join('|', ('DATE', @statuses, @resolutions)); print DATA <<FIN; # Bugzilla Daily Bug Stats @@ -400,6 +429,7 @@ sub regenerate_stats { FIN # For each day, generate a line of statistics. my $total_days = $end - $start; + my @bugs; for (my $day = $start + 1; $day <= $end; $day++) { # Some output feedback my $percent_done = ($day - $start - 1) * 100 / $total_days; @@ -416,56 +446,25 @@ FIN $and_product . q{ ORDER BY bug_id}; my $bug_ids = $dbh->selectcol_arrayref($query, undef, @values); - push(@bugs, @$bug_ids); - # For each bug that existed on that day, determine its status - # at the beginning of the day. If there were no status - # changes on or after that day, the status was the same as it - # is today, which can be found in the bugs table. Otherwise, - # the status was equal to the first "previous value" entry in - # the bugs_activity table for that bug made on or after that - # day. my %bugcount; foreach (@statuses) { $bugcount{$_} = 0; } foreach (@resolutions) { $bugcount{$_} = 0; } # Get information on bug states and resolutions. - $query = qq{SELECT bugs_activity.removed - FROM bugs_activity - INNER JOIN fielddefs - ON bugs_activity.fieldid = fielddefs.id - WHERE fielddefs.name = ? - AND bugs_activity.bug_id = ? - AND bugs_activity.bug_when >= } . - $dbh->sql_from_days($day) . - " ORDER BY bugs_activity.bug_when " . - $dbh->sql_limit(1); - - my $sth_bug = $dbh->prepare($query); - my $sth_status = $dbh->prepare(q{SELECT bug_status - FROM bugs - WHERE bug_id = ?}); - - my $sth_reso = $dbh->prepare(q{SELECT resolution - FROM bugs - WHERE bug_id = ?}); - for my $bug (@bugs) { - my $status = $dbh->selectrow_array($sth_bug, undef, - 'bug_status', $bug); - unless ($status) { - $status = $dbh->selectrow_array($sth_status, undef, $bug); - } + my $status = _get_value( + $removed->{'bug_status'}->{$bug}, + $bug_status, $day, $bug); if (defined $bugcount{$status}) { $bugcount{$status}++; } - my $resolution = $dbh->selectrow_array($sth_bug, undef, - 'resolution', $bug); - unless ($resolution) { - $resolution = $dbh->selectrow_array($sth_reso, undef, $bug); - } - + + my $resolution = _get_value( + $removed->{'resolution'}->{$bug}, + $bug_resolution, $day, $bug); + if (defined $bugcount{$resolution}) { $bugcount{$resolution}++; } @@ -490,6 +489,24 @@ FIN } } +# A helper for --regenerate. +# For each bug that exists on a day, we determine its status/resolution +# at the beginning of the day. If there were no status/resolution +# changes on or after that day, the status was the same as it +# is today (the "current" value). Otherwise, the status was equal to the +# first "previous value" entry in the bugs_activity table for that +# bug made on or after that day. +sub _get_value { + my ($removed, $current, $day, $bug) = @_; + + # Get the first change that's on or after this day. + my $item = first { $_->{when} >= $day } @{ $removed || [] }; + + # If there's no change on or after this day, then we just return the + # current value. + return $item ? $item->{removed} : $current->{$bug}; +} + sub today { my ($dom, $mon, $year) = (localtime(time))[3, 4, 5]; return sprintf "%04d%02d%02d", 1900 + $year, ++$mon, $dom; |