diff options
| author | mkanat%bugzilla.org <> | 2009-08-31 21:12:32 +0000 | 
|---|---|---|
| committer | mkanat%bugzilla.org <> | 2009-08-31 21:12:32 +0000 | 
| commit | 3bc08bc506e5d25165909cf1f5ea2328ea0eeb4f (patch) | |
| tree | 8d25b14516600e16158ecc4eff6791e52a57c540 | |
| parent | b3782f8d3c53b6e63caea5a6324b237fa5da1ee7 (diff) | |
| download | bugs-3bc08bc506e5d25165909cf1f5ea2328ea0eeb4f.tar bugs-3bc08bc506e5d25165909cf1f5ea2328ea0eeb4f.tar.gz bugs-3bc08bc506e5d25165909cf1f5ea2328ea0eeb4f.tar.bz2 bugs-3bc08bc506e5d25165909cf1f5ea2328ea0eeb4f.tar.xz bugs-3bc08bc506e5d25165909cf1f5ea2328ea0eeb4f.zip  | |
Bug 286625: Make collectstats.pl --regenerate WAY faster and make it correctly count the empty resolution.
Patch by Max Kanat-Alexander <mkanat@bugzilla.org> r=LpSolit, a=LpSolit
| -rwxr-xr-x | collectstats.pl | 111 | 
1 files changed, 64 insertions, 47 deletions
diff --git a/collectstats.pl b/collectstats.pl index bcb0fac5b..e550c1613 100755 --- a/collectstats.pl +++ b/collectstats.pl @@ -30,12 +30,14 @@  # To run new charts for a specific date, pass it in on the command line in  # ISO (2004-08-14) format. -use AnyDBM_File;  use strict; +use lib qw(. lib); + +use AnyDBM_File;  use IO::Handle; +use List::Util qw(first);  use Cwd; -use lib qw(. lib);  use Bugzilla;  use Bugzilla::Constants; @@ -111,6 +113,37 @@ my @resolutions = @{$fields->{'resolution'}};  # Exclude "" from the resolution list.  @resolutions = grep {$_} @resolutions; +# --regenerate was taking an enormous amount of time to query everything +# per bug, per day. Instead, we now just get all the data out of the DB +# at once and stuff it into some data structures. +my (%bug_status, %bug_resolution, %removed); +if ($regenerate) { +    %bug_resolution = @{ $dbh->selectcol_arrayref( +        'SELECT bug_id, resolution FROM bugs', {Columns=>[1,2]}) }; +    %bug_status = @{ $dbh->selectcol_arrayref( +        'SELECT bug_id, bug_status FROM bugs', {Columns=>[1,2]}) }; + +    my $removed_sth = $dbh->prepare( +        q{SELECT bugs_activity.bug_id, bugs_activity.removed,} +        . $dbh->sql_to_days('bugs_activity.bug_when') +        . q{FROM bugs_activity +           WHERE bugs_activity.fieldid = ? +        ORDER BY bugs_activity.bug_when}); + +    %removed = (bug_status => {}, resolution => {}); +    foreach my $field (qw(bug_status resolution)) { +        my $field_id = Bugzilla::Field->check($field)->id; +        my $rows = $dbh->selectall_arrayref($removed_sth, undef, $field_id); +        my $hash = $removed{$field}; +        foreach my $row (@$rows) { +            my ($bug_id, $removed, $when) = @$row; +            $hash->{$bug_id} ||= []; +            push(@{ $hash->{$bug_id} }, { when    => int($when), +                                          removed => $removed }); +        } +    } +} +  my $tstart = time;  foreach (@myproducts) {      my $dir = "$datadir/mining"; @@ -118,7 +151,7 @@ foreach (@myproducts) {      &check_data_dir ($dir);      if ($regenerate) { -        ®enerate_stats($dir, $_); +        regenerate_stats($dir, $_, \%bug_resolution, \%bug_status, \%removed);      } else {          &collect_stats($dir, $_);      } @@ -343,8 +376,7 @@ sub calculate_dupes {  # This regenerates all statistics from the database.  sub regenerate_stats { -    my $dir = shift; -    my $product = shift; +    my ($dir, $product, $bug_resolution, $bug_status, $removed) = @_;      my $dbh = Bugzilla->dbh;      my $when = localtime(time()); @@ -356,8 +388,6 @@ sub regenerate_stats {      $file_product =~ s/\//-/gs;      my $file = join '/', $dir, $file_product; -    my @bugs; -      my $and_product = "";      my $from_product = ""; @@ -387,7 +417,6 @@ sub regenerate_stats {      }      if (open DATA, ">$file") { -        DATA->autoflush(1);          my $fields = join('|', ('DATE', @statuses, @resolutions));          print DATA <<FIN;  # Bugzilla Daily Bug Stats @@ -400,6 +429,7 @@ sub regenerate_stats {  FIN          # For each day, generate a line of statistics.          my $total_days = $end - $start; +        my @bugs;          for (my $day = $start + 1; $day <= $end; $day++) {              # Some output feedback              my $percent_done = ($day - $start - 1) * 100 / $total_days; @@ -416,56 +446,25 @@ FIN                          $and_product . q{ ORDER BY bug_id};              my $bug_ids = $dbh->selectcol_arrayref($query, undef, @values); -              push(@bugs, @$bug_ids); -            # For each bug that existed on that day, determine its status -            # at the beginning of the day.  If there were no status -            # changes on or after that day, the status was the same as it -            # is today, which can be found in the bugs table.  Otherwise, -            # the status was equal to the first "previous value" entry in -            # the bugs_activity table for that bug made on or after that -            # day.              my %bugcount;              foreach (@statuses) { $bugcount{$_} = 0; }              foreach (@resolutions) { $bugcount{$_} = 0; }              # Get information on bug states and resolutions. -            $query = qq{SELECT bugs_activity.removed  -                          FROM bugs_activity  -                    INNER JOIN fielddefs  -                            ON bugs_activity.fieldid = fielddefs.id  -                         WHERE fielddefs.name = ?  -                           AND bugs_activity.bug_id = ?  -                           AND bugs_activity.bug_when >= } .  -                           $dbh->sql_from_days($day) .  -                    " ORDER BY bugs_activity.bug_when " .  -                          $dbh->sql_limit(1); - -            my $sth_bug = $dbh->prepare($query); -            my $sth_status = $dbh->prepare(q{SELECT bug_status  -                                               FROM bugs  -                                              WHERE bug_id = ?}); -             -            my $sth_reso = $dbh->prepare(q{SELECT resolution  -                                             FROM bugs  -                                            WHERE bug_id = ?}); -              for my $bug (@bugs) { -                my $status = $dbh->selectrow_array($sth_bug, undef,  -                                                       'bug_status', $bug); -                unless ($status) { -                    $status = $dbh->selectrow_array($sth_status, undef, $bug); -                } +                my $status = _get_value( +                    $removed->{'bug_status'}->{$bug}, +                    $bug_status,  $day, $bug);                  if (defined $bugcount{$status}) {                      $bugcount{$status}++;                  } -                my $resolution = $dbh->selectrow_array($sth_bug, undef,  -                                                         'resolution', $bug); -                unless ($resolution) { -                    $resolution = $dbh->selectrow_array($sth_reso, undef, $bug); -                } -                 + +                my $resolution = _get_value( +                    $removed->{'resolution'}->{$bug}, +                    $bug_resolution, $day, $bug); +                  if (defined $bugcount{$resolution}) {                      $bugcount{$resolution}++;                  } @@ -490,6 +489,24 @@ FIN      }  } +# A helper for --regenerate. +# For each bug that exists on a day, we determine its status/resolution +# at the beginning of the day.  If there were no status/resolution +# changes on or after that day, the status was the same as it +# is today (the "current" value).  Otherwise, the status was equal to the +# first "previous value" entry in the bugs_activity table for that  +# bug made on or after that day. +sub _get_value { +    my ($removed, $current, $day, $bug) = @_; + +    # Get the first change that's on or after this day. +    my $item = first { $_->{when} >= $day } @{ $removed || [] }; + +    # If there's no change on or after this day, then we just return the +    # current value. +    return $item ? $item->{removed} : $current->{$bug}; +} +  sub today {      my ($dom, $mon, $year) = (localtime(time))[3, 4, 5];      return sprintf "%04d%02d%02d", 1900 + $year, ++$mon, $dom;  | 
