aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Bugzilla/DB/Schema.pm3
-rw-r--r--Bugzilla/Install/DB.pm4
-rw-r--r--Bugzilla/Install/Filesystem.pm7
-rwxr-xr-xcollectstats.pl80
-rwxr-xr-xduplicates.cgi182
-rw-r--r--template/en/default/global/user-error.html.tmpl25
-rw-r--r--template/en/default/reports/duplicates-table.html.tmpl7
7 files changed, 93 insertions, 215 deletions
diff --git a/Bugzilla/DB/Schema.pm b/Bugzilla/DB/Schema.pm
index 583b49352..5e8c49ccf 100644
--- a/Bugzilla/DB/Schema.pm
+++ b/Bugzilla/DB/Schema.pm
@@ -352,7 +352,7 @@ use constant ABSTRACT_SCHEMA => {
fieldid => {TYPE => 'INT3', NOTNULL => 1,
REFERENCES => {TABLE => 'fielddefs',
COLUMN => 'id'}},
- added => {TYPE => 'TINYTEXT'},
+ added => {TYPE => 'varchar(255)'},
removed => {TYPE => 'TINYTEXT'},
],
INDEXES => [
@@ -360,6 +360,7 @@ use constant ABSTRACT_SCHEMA => {
bugs_activity_who_idx => ['who'],
bugs_activity_bug_when_idx => ['bug_when'],
bugs_activity_fieldid_idx => ['fieldid'],
+ bugs_activity_added_idx => ['added'],
],
},
diff --git a/Bugzilla/Install/DB.pm b/Bugzilla/Install/DB.pm
index dd4b2fe1d..d39d1a770 100644
--- a/Bugzilla/Install/DB.pm
+++ b/Bugzilla/Install/DB.pm
@@ -574,6 +574,10 @@ sub update_table_definitions {
_convert_disallownew_to_isactive();
+ $dbh->bz_alter_column('bugs_activity', 'added',
+ { TYPE => 'varchar(255)' });
+ $dbh->bz_add_index('bugs_activity', 'bugs_activity_added_idx', ['added']);
+
################################################################
# New --TABLE-- changes should go *** A B O V E *** this point #
################################################################
diff --git a/Bugzilla/Install/Filesystem.pm b/Bugzilla/Install/Filesystem.pm
index fe9783805..763fd85bf 100644
--- a/Bugzilla/Install/Filesystem.pm
+++ b/Bugzilla/Install/Filesystem.pm
@@ -157,8 +157,6 @@ sub FILESYSTEM {
# Readable directories
"$datadir/mining" => { files => $ws_readable,
dirs => $ws_dir_readable },
- "$datadir/duplicates" => { files => $ws_readable,
- dirs => $ws_dir_readable },
"$libdir/Bugzilla" => { files => $ws_readable,
dirs => $ws_dir_readable },
$extlib => { files => $ws_readable,
@@ -379,6 +377,11 @@ EOT
unlink "$datadir/duplicates.rdf";
unlink "$datadir/duplicates-old.rdf";
}
+
+ if (-e "$datadir/duplicates") {
+ print "Removing duplicates directory...\n";
+ rmtree("$datadir/duplicates");
+ }
}
# A simple helper for creating "empty" CSS files.
diff --git a/collectstats.pl b/collectstats.pl
index e550c1613..5f9b4eef3 100755
--- a/collectstats.pl
+++ b/collectstats.pl
@@ -33,12 +33,9 @@
use strict;
use lib qw(. lib);
-use AnyDBM_File;
-use IO::Handle;
use List::Util qw(first);
use Cwd;
-
use Bugzilla;
use Bugzilla::Constants;
use Bugzilla::Error;
@@ -160,8 +157,6 @@ my $tend = time;
# Uncomment the following line for performance testing.
#print "Total time taken " . delta_time($tstart, $tend) . "\n";
-&calculate_dupes();
-
CollectSeriesData();
sub check_data_dir {
@@ -299,81 +294,6 @@ sub get_old_data {
return @data;
}
-sub calculate_dupes {
- my $dbh = Bugzilla->dbh;
- my $rows = $dbh->selectall_arrayref("SELECT dupe_of, dupe FROM duplicates");
-
- my %dupes;
- my %count;
- my $key;
- my $changed = 1;
-
- my $today = &today_dash;
-
- # Save % count here in a date-named file
- # so we can read it back in to do changed counters
- # First, delete it if it exists, so we don't add to the contents of an old file
- my $datadir = bz_locations()->{'datadir'};
-
- if (my @files = <$datadir/duplicates/dupes$today*>) {
- map { trick_taint($_) } @files;
- unlink @files;
- }
-
- dbmopen(%count, "$datadir/duplicates/dupes$today", 0644) || die "Can't open DBM dupes file: $!";
-
- # Create a hash with key "a bug number", value "bug which that bug is a
- # direct dupe of" - straight from the duplicates table.
- foreach my $row (@$rows) {
- my ($dupe_of, $dupe) = @$row;
- $dupes{$dupe} = $dupe_of;
- }
-
- # Total up the number of bugs which are dupes of a given bug
- # count will then have key = "bug number",
- # value = "number of immediate dupes of that bug".
- foreach $key (keys(%dupes))
- {
- my $dupe_of = $dupes{$key};
-
- if (!defined($count{$dupe_of})) {
- $count{$dupe_of} = 0;
- }
-
- $count{$dupe_of}++;
- }
-
- # Now we collapse the dupe tree by iterating over %count until
- # there is no further change.
- while ($changed == 1)
- {
- $changed = 0;
- foreach $key (keys(%count)) {
- # if this bug is actually itself a dupe, and has a count...
- if (defined($dupes{$key}) && $count{$key} > 0) {
- # add that count onto the bug it is a dupe of,
- # and zero the count; the check is to avoid
- # loops
- if ($count{$dupes{$key}} != 0) {
- $count{$dupes{$key}} += $count{$key};
- $count{$key} = 0;
- $changed = 1;
- }
- }
- }
- }
-
- # Remove the values for which the count is zero
- foreach $key (keys(%count))
- {
- if ($count{$key} == 0) {
- delete $count{$key};
- }
- }
-
- dbmclose(%count);
-}
-
# This regenerates all statistics from the database.
sub regenerate_stats {
my ($dir, $product, $bug_resolution, $bug_status, $removed) = @_;
diff --git a/duplicates.cgi b/duplicates.cgi
index af239d632..4c0509864 100755
--- a/duplicates.cgi
+++ b/duplicates.cgi
@@ -18,15 +18,11 @@
# Copyright (C) 1998 Netscape Communications Corporation. All
# Rights Reserved.
#
-# Contributor(s): Gervase Markham <gerv@gerv.net>
-#
-# Generates mostfreq list from data collected by collectstats.pl.
-
+# Contributor(s):
+# Gervase Markham <gerv@gerv.net>
+# Max Kanat-Alexander <mkanat@bugzilla.org>
use strict;
-
-use AnyDBM_File;
-
use lib qw(. lib);
use Bugzilla;
@@ -34,28 +30,52 @@ use Bugzilla::Constants;
use Bugzilla::Util;
use Bugzilla::Error;
use Bugzilla::Search;
+use Bugzilla::Field;
use Bugzilla::Product;
+###############
+# Subroutines #
+###############
+
+# $counts is a count of exactly how many direct duplicates there are for
+# each bug we're considering. $dups is a map of duplicates, from one
+# bug_id to another. We go through the duplicates map ($dups) and if one bug
+# in $count is a duplicate of another bug in $count, we add their counts
+# together under the target bug.
+sub add_indirect_dups {
+ my ($counts, $dups) = @_;
+
+ foreach my $add_from (keys %$dups) {
+ my $add_to = walk_dup_chain($dups, $add_from);
+ my $add_amount = delete $counts->{$add_from} || 0;
+ $counts->{$add_to} += $add_amount;
+ }
+}
+
+sub walk_dup_chain {
+ my ($dups, $from_id) = @_;
+ my $to_id = $dups->{$from_id};
+ while (my $bug_id = $dups->{$to_id}) {
+ last if $bug_id == $from_id; # avoid duplicate loops
+ $to_id = $bug_id;
+ }
+ # Optimize for future calls to add_indirect_dups.
+ $dups->{$from_id} = $to_id;
+ return $to_id;
+}
+
+###############
+# Main Script #
+###############
+
my $cgi = Bugzilla->cgi;
my $template = Bugzilla->template;
my $vars = {};
-# collectstats.pl uses duplicates.cgi to generate the RDF duplicates stats.
-# However, this conflicts with requirelogin if it's enabled; so we make
-# logging-in optional if we are running from the command line.
-if ($::ENV{'GATEWAY_INTERFACE'} eq "cmdline") {
- Bugzilla->login(LOGIN_OPTIONAL);
-}
-else {
- Bugzilla->login();
-}
+Bugzilla->login();
my $dbh = Bugzilla->switch_to_shadow_db();
-my %dbmcount;
-my %count;
-my %before;
-
# Get params from URL
sub formvalue {
my ($name, $default) = (@_);
@@ -70,6 +90,10 @@ my $reverse = formvalue("reverse") ? 1 : 0;
my @query_products = $cgi->param('product');
my $sortvisible = formvalue("sortvisible");
my @buglist = (split(/[:,]/, formvalue("bug_id")));
+detaint_natural($_) foreach @buglist;
+# If we got any non-numeric items, they will now be undef. Remove them from
+# the list.
+@buglist = grep($_, @buglist);
# Make sure all products are valid.
foreach my $p (@query_products) {
@@ -79,54 +103,6 @@ foreach my $p (@query_products) {
# Small backwards-compatibility hack, dated 2002-04-10.
$sortby = "count" if $sortby eq "dup_count";
-# Open today's record of dupes
-my $today = days_ago(0);
-my $yesterday = days_ago(1);
-
-# We don't know the exact file name, because the extension depends on the
-# underlying dbm library, which could be anything. We can't glob, because
-# perl < 5.6 considers if (<*>) { ... } to be tainted
-# Instead, just check the return value for today's data and yesterday's,
-# and ignore file not found errors
-
-use Errno;
-use Fcntl;
-
-my $datadir = bz_locations()->{'datadir'};
-
-if (!tie(%dbmcount, 'AnyDBM_File', "$datadir/duplicates/dupes$today",
- O_RDONLY, 0644)) {
- if ($!{ENOENT}) {
- if (!tie(%dbmcount, 'AnyDBM_File', "$datadir/duplicates/dupes$yesterday",
- O_RDONLY, 0644)) {
- my $vars = { today => $today };
- if ($!{ENOENT}) {
- ThrowUserError("no_dupe_stats", $vars);
- } else {
- $vars->{'error_msg'} = $!;
- ThrowUserError("no_dupe_stats_error_yesterday", $vars);
- }
- }
- } else {
- ThrowUserError("no_dupe_stats_error_today",
- { error_msg => $! });
- }
-}
-
-# Copy hash (so we don't mess up the on-disk file when we remove entries)
-%count = %dbmcount;
-
-# Remove all those dupes under the threshold parameter.
-# We do this, before the sorting, for performance reasons.
-my $threshold = Bugzilla->params->{"mostfreqthreshold"};
-
-while (my ($key, $value) = each %count) {
- delete $count{$key} if ($value < $threshold);
-
- # If there's a buglist, restrict the bugs to that list.
- delete $count{$key} if $sortvisible && (lsearch(\@buglist, $key) == -1);
-}
-
my $origmaxrows = $maxrows;
detaint_natural($maxrows)
|| ThrowUserError("invalid_maxrows", { maxrows => $origmaxrows});
@@ -136,34 +112,45 @@ detaint_natural($changedsince)
|| ThrowUserError("invalid_changedsince",
{ changedsince => $origchangedsince });
-# Try and open the database from "changedsince" days ago
-my $dobefore = 0;
-my %delta;
-my $whenever = days_ago($changedsince);
-
-if (!tie(%before, 'AnyDBM_File', "$datadir/duplicates/dupes$whenever",
- O_RDONLY, 0644)) {
- # Ignore file not found errors
- if (!$!{ENOENT}) {
- ThrowUserError("no_dupe_stats_error_whenever",
- { error_msg => $!,
- changedsince => $changedsince,
- whenever => $whenever,
- });
- }
-} else {
- # Calculate the deltas
- ($delta{$_} = $count{$_} - ($before{$_} || 0)) foreach (keys(%count));
- $dobefore = 1;
+my %total_dups = @{$dbh->selectcol_arrayref(
+ "SELECT dupe_of, COUNT(dupe)
+ FROM duplicates
+ GROUP BY dupe_of", {Columns => [1,2]})};
+
+my %dupe_relation = @{$dbh->selectcol_arrayref(
+ "SELECT dupe, dupe_of FROM duplicates
+ WHERE dupe IN (SELECT dupe_of FROM duplicates)",
+ {Columns => [1,2]})};
+add_indirect_dups(\%total_dups, \%dupe_relation);
+
+my $reso_field_id = get_field_id('resolution');
+my %since_dups = @{$dbh->selectcol_arrayref(
+ "SELECT dupe_of, COUNT(dupe)
+ FROM duplicates INNER JOIN bugs_activity
+ ON bugs_activity.bug_id = duplicates.dupe
+ WHERE added = 'DUPLICATE' AND fieldid = ? AND "
+ . $dbh->sql_to_days('bug_when') . " >= ("
+ . $dbh->sql_to_days('NOW()') . " - ?)
+ GROUP BY dupe_of", {Columns=>[1,2]},
+ $reso_field_id, $changedsince)};
+add_indirect_dups(\%since_dups, \%dupe_relation);
+
+my (@bugs, @bug_ids);
+
+foreach my $id (keys %total_dups) {
+ if ($total_dups{$id} < Bugzilla->params->{'mostfreqthreshold'}) {
+ delete $total_dups{$id};
+ next;
+ }
+ if ($sortvisible and @buglist and !grep($_ == $id, @buglist)) {
+ delete $total_dups{$id};
+ }
}
-my @bugs;
-my @bug_ids;
-
-if (scalar(%count)) {
+if (scalar %total_dups) {
# use Bugzilla::Search so that we get the security checking
- my $params = new Bugzilla::CGI({ 'bug_id' => [keys %count] });
+ my $params = new Bugzilla::CGI({ 'bug_id' => [keys %total_dups] });
if ($openonly) {
$params->param('resolution', '---');
@@ -221,8 +208,8 @@ if (scalar(%count)) {
$short_desc, $bug_status, $resolution) = @$result;
push (@bugs, { id => $id,
- count => $count{$id},
- delta => $delta{$id},
+ count => $total_dups{$id},
+ delta => $since_dups{$id} || 0,
component => $component,
bug_severity => $bug_severity,
op_sys => $op_sys,
@@ -237,7 +224,6 @@ if (scalar(%count)) {
$vars->{'bugs'} = \@bugs;
$vars->{'bug_ids'} = \@bug_ids;
-$vars->{'dobefore'} = $dobefore;
$vars->{'sortby'} = $sortby;
$vars->{'sortvisible'} = $sortvisible;
$vars->{'changedsince'} = $changedsince;
@@ -264,9 +250,3 @@ print $cgi->header(
# Generate and return the UI (HTML page) from the appropriate template.
$template->process($format->{'template'}, $vars)
|| ThrowTemplateError($template->error());
-
-
-sub days_ago {
- my ($dom, $mon, $year) = (localtime(time - ($_[0]*24*60*60)))[3, 4, 5];
- return sprintf "%04d-%02d-%02d", 1900 + $year, ++$mon, $dom;
-}
diff --git a/template/en/default/global/user-error.html.tmpl b/template/en/default/global/user-error.html.tmpl
index c4eefb4aa..3783e523b 100644
--- a/template/en/default/global/user-error.html.tmpl
+++ b/template/en/default/global/user-error.html.tmpl
@@ -1168,31 +1168,6 @@
[% title = "No Tag Selected" %]
You didn't select a tag from which to remove [% terms.bugs %].
- [% ELSIF error == "no_dupe_stats" %]
- [% title = "Cannot Find Duplicate Statistics" %]
- [% admindocslinks = {'extraconfig.html' => 'Setting up the collecstats.pl job'} %]
- There are no duplicate statistics for today ([% today FILTER html %])
- or yesterday.
-
- [% ELSIF error == "no_dupe_stats_error_today" %]
- [% title = "Error Reading Today's Dupes File" %]
- [% admindocslinks = {'extraconfig.html' => 'Setting up the collecstats.pl job'} %]
- An error occurred opening today's dupes file: [% error_msg FILTER html %].
-
- [% ELSIF error == "no_dupe_stats_error_whenever" %]
- [% title = "Error Reading Previous Dupes File" %]
- [% admindocslinks = {'extraconfig.html' => 'Setting up the collecstats.pl job'} %]
- An error occurred opening [% changedsince FILTER html %] days ago
- ([% whenever FILTER html %])'s dupes file:
- [% error_msg FILTER html %].
-
- [% ELSIF error == "no_dupe_stats_error_yesterday" %]
- [% title = "Error Reading Yesterday's Dupes File" %]
- [% admindocslinks = {'extraconfig.html' => 'Setting up the collecstats.pl job'} %]
- There are no duplicate statistics for today ([% today FILTER html %]),
- and an error
- occurred opening yesterday's dupes file: [% error_msg FILTER html %].
-
[% ELSIF error == "no_initial_bug_status" %]
[% title = "No Initial $terms.Bug Status" %]
No [% terms.bug %] status is available on [% terms.bug %] creation.
diff --git a/template/en/default/reports/duplicates-table.html.tmpl b/template/en/default/reports/duplicates-table.html.tmpl
index 5dbef2144..f651a7fd4 100644
--- a/template/en/default/reports/duplicates-table.html.tmpl
+++ b/template/en/default/reports/duplicates-table.html.tmpl
@@ -61,9 +61,6 @@
{ name => "short_desc", description => "Summary" } ]
%]
- [%# Small hack to keep delta column out if we don't need it %]
- [% NEXT IF column.name == "delta" AND NOT dobefore %]
-
<th>
[% bug_ids_string = bug_ids.join(',') %]
<a href="duplicates.cgi?sortby=[% column.name %]
@@ -127,9 +124,7 @@
</center>
</td>
- [% IF dobefore %]
- <td><center>[% bug.delta %]</center></td>
- [% END %]
+ <td><center>[% bug.delta %]</center></td>
<td>[% bug.component FILTER html %]</td>
<td><center>[% bug.bug_severity FILTER html %]</center></td>