From 6ed055cfc3fb1f7821fdfbccbda100a489f57ffa Mon Sep 17 00:00:00 2001 From: "justdave%syndicomm.com" <> Date: Sat, 7 Apr 2001 01:19:44 +0000 Subject: Fix for bug 72721 (duplicates.cgi performs poorly with lots of bugs) and bug 69054 (DB_File not portable): dependence on DB_File removed, now uses AnyDBM_File which comes standard with Perl. Duplicates.cgi now runs its queries against the shadow database if it's available, among many other improvements. Patch by gervase.markham@univ.ox.ac.uk (Gervase Markham) r= justdave --- collectstats.pl | 15 ++-- defparams.pl | 8 +-- duplicates.cgi | 213 +++++++++++++++++++++++++++++++++++--------------------- 3 files changed, 147 insertions(+), 89 deletions(-) diff --git a/collectstats.pl b/collectstats.pl index e645165e1..d6a97e4e2 100755 --- a/collectstats.pl +++ b/collectstats.pl @@ -24,7 +24,7 @@ # Run me out of cron at midnight to collect Bugzilla statistics. -use DB_File; +use AnyDBM_File; use diagnostics; use strict; use vars @::legal_product; @@ -125,16 +125,16 @@ sub calculate_dupes { my $key; my $changed = 1; - my $today = &today; + my $today = &today_dash; # Save % count here in a date-named file # so we can read it back in to do changed counters # First, delete it if it exists, so we don't add to the contents of an old file - if (-e "data/mining/dupes$today.db") { - system("rm -f data/mining/dupes$today.db"); + if (-e "data/mining/dupes$today") { + system("rm -f data/mining/dupes$today"); } - dbmopen(%count, "data/mining/dupes$today.db", 0644) || die "Can't open DBM dupes file: $!"; + dbmopen(%count, "data/mining/dupes$today", 0644) || die "Can't open DBM dupes file: $!"; # Create a hash with key "a bug number", value "bug which that bug is a # direct dupe of" - straight from the duplicates table. @@ -194,3 +194,8 @@ sub today { return sprintf "%04d%02d%02d", 1900 + $year, ++$mon, $dom; } +sub today_dash { + my ($dom, $mon, $year) = (localtime(time))[3, 4, 5]; + return sprintf "%04d-%02d-%02d", 1900 + $year, ++$mon, $dom; +} + diff --git a/defparams.pl b/defparams.pl index c384eb973..ff60338b0 100644 --- a/defparams.pl +++ b/defparams.pl @@ -120,10 +120,6 @@ sub check_shadowdb { # t -- A short text entry field (suitable for a single line) # l -- A long text field (suitable for many lines) # b -- A boolean value (either 1 or 0) -# i -- An integer. -# defenum -- This param defines an enum that defines a column in one of -# the database tables. The name of the parameter is of the form -# "tablename.columnname". DefParam("maintainer", "The email address of the person who maintains this installation of Bugzilla.", @@ -334,6 +330,10 @@ additional data you may have.
}); +DefParam("mostfreqthreshold", + "The minimum number of duplicates a bug needs to show up on the most frequently reported bugs page. If you have a large database and this page takes a long time to load, try increasing this number.", + "t", + "2"); DefParam("mybugstemplate", "This is the URL to use to bring up a simple 'all of my bugs' list for a user. %userid% will get replaced with the login name of a user.", diff --git a/duplicates.cgi b/duplicates.cgi index 6bb4e20ab..03a366763 100755 --- a/duplicates.cgi +++ b/duplicates.cgi @@ -25,82 +25,119 @@ use diagnostics; use strict; use CGI "param"; -use DB_File; +use AnyDBM_File; require "globals.pl"; require "CGI.pl"; -ConnectToDatabase(); +ConnectToDatabase(1); GetVersionTable(); +my %dbmcount; my %count; my $dobefore = 0; my $before = ""; my %before; -my $changedsince; -my $maxrows = 500; # arbitrary limit on max number of rows +# Get params from URL -my $today = &days_ago(0); +my $changedsince = 7; # default one week +my $maxrows = 100; # arbitrary limit on max number of rows +my $sortby = "dup_count"; # default to sorting by dup count -# Open today's record of dupes -if (-e "data/mining/dupes$today.db") -{ - dbmopen(%count, "data/mining/dupes${today}.db", 0644) || die "Can't open today's dupes file: $!"; -} -else +if (defined(param("sortby"))) { - # Try yesterday's, then (in case today's hasn't been created yet) :-) - $today = &days_ago(1); - if (-e "data/mining/dupes$today.db") - { - dbmopen(%count, "data/mining/dupes${today}.db", 0644) || die "Can't open yesterday's dupes file: $!"; - } - else - { - die "There are no duplicate statistics for today or yesterday."; - } + $sortby = param("sortby"); } # Check for changedsince param, and see if it's a positive integer if (defined(param("changedsince")) && param("changedsince") =~ /^\d{1,4}$/) { - $changedsince = param("changedsince"); + $changedsince = param("changedsince"); } -else + +# check for max rows param, and see if it's a positive integer +if (defined(param("maxrows")) && param("maxrows") =~ /^\d{1,4}$/) { - # Otherwise, default to one week - $changedsince = "7"; + $maxrows = param("maxrows"); } -$before = &days_ago($changedsince); +# Start the page +print "Content-type: text/html\n"; +print "\n"; +PutHeader("Most Frequently Reported Bugs"); -# check for max rows parameter -if (defined(param("maxrows")) && param("maxrows") =~ /^\d{1,4}$/) +# Open today's record of dupes +my $today = &days_ago(0); + +if (-e "data/mining/dupes$today.db") { - $maxrows = param("maxrows"); + dbmopen(%dbmcount, "data/mining/dupes$today", 0644) || + &die_politely("Can't open today's dupes file: $!"); } +else +{ + # Try yesterday's, then (in case today's hasn't been created yet) + $today = &days_ago(1); + if (-e "data/mining/dupes$today.db") + { + dbmopen(%dbmcount, "data/mining/dupes$today", 0644) || + &die_politely("Can't open yesterday's dupes file: $!"); + } + else + { + &die_politely("There are no duplicate statistics for today or yesterday."); + } +} + +# Copy hash (so we don't mess up the on-disk file when we remove entries) +%count = %dbmcount; +my $key; +my $value; +my $threshold = Param("mostfreqthreshold"); -if (-e "data/mining/dupes${before}.db") +# Remove all those dupes under the threshold (for performance reasons) +while (($key, $value) = each %count) { - dbmopen(%before, "data/mining/dupes${before}.db", 0644) && ($dobefore = 1); + if ($value < $threshold) + { + delete $count{$key}; + } } -print "Content-type: text/html\n"; -print "\n"; -PutHeader("Most Frequently Reported Bugs"); +# Try and open the database from "changedsince" days ago +$before = &days_ago($changedsince); + +if (-e "data/mining/dupes$before.db") +{ + dbmopen(%before, "data/mining/dupes$before", 0644) && ($dobefore = 1); +} print Param("mostfreqhtml"); print " - - -\n"; + + +\n"; + +my %delta; if ($dobefore) { - print " "; + print ""; + + # Calculate the deltas if we are doing a "before" + foreach (keys(%count)) + { + $delta{$_} = $count{$_} - $before{$_}; + } } print " @@ -111,78 +148,94 @@ print " \n\n"; -my %delta; +# Sort, if required +my @sortedcount; -# Calculate the deltas if we are doing a "before" -if ($dobefore) +if ($sortby eq "delta") { - foreach (keys(%count)) - { - $delta{$_} = $count{$_} - $before{$_}; - } + @sortedcount = sort by_delta keys(%count); } - -# Offer the option of sorting on total count, or on the delta -my @sortedcount; - -if (defined(param("sortby")) && param("sortby") == "delta") +elsif ($sortby eq "bug_no") { - @sortedcount = sort by_delta keys(%count); + @sortedcount = sort by_bug_no keys(%count); } -else +elsif ($sortby eq "dup_count") { - @sortedcount = sort by_dup_count keys(%count); + @sortedcount = sort by_dup_count keys(%count); } my $i = 0; foreach (@sortedcount) { - my $id = $_; - SendSQL("SELECT component, bug_severity, op_sys, target_milestone, short_desc, groupset " . + my $id = $_; + SendSQL("SELECT component, bug_severity, op_sys, target_milestone, short_desc, groupset " . " FROM bugs WHERE bug_id = $id"); - my ($component, $severity, $op_sys, $milestone, $summary, $groupset) = FetchSQLData(); + my ($component, $severity, $op_sys, $milestone, $summary, $groupset) = FetchSQLData(); next unless $groupset == 0; $summary = html_quote($summary); - print ""; - print '"; - print ""; - if ($dobefore) - { - print ""; - } - print "\n "; - print ""; - print ""; - print ""; - print ""; - print "\n"; - - $i++; - if ($i == $maxrows) - { - last; - } + print ""; + print '"; + print ""; + if ($dobefore) + { + print ""; + } + print "\n "; + print ""; + print ""; + print ""; + print ""; + print "\n"; + + $i++; + if ($i == $maxrows) + { + last; + } } print "
Bug #
Dupe
Count
+Bug # +
+Dupe
Count
+
Change in last
$changedsince day(s)
+ Change in + last
$changedsince day(s)
Summary
'; - print $id . "
$count{$id}
$delta{$id}
$component
$severity
$op_sys
$milestone
$summary
'; + print $id . "
$count{$id}
$delta{$id}
$component
$severity
$op_sys
$milestone
$summary


"; PutFooter(); +sub by_bug_no +{ + return ($a <=> $b); +} + sub by_dup_count { - return -($count{$a} <=> $count{$b}); + return -($count{$a} <=> $count{$b}); } sub by_delta { - return -($delta{$a} <=> $delta{$b}); + return -($delta{$a} <=> $delta{$b}); } sub days_ago { - my ($dom, $mon, $year) = (localtime(time - ($_[0]*24*60*60)))[3, 4, 5]; - return sprintf "%04d%02d%02d", 1900 + $year, ++$mon, $dom; + my ($dom, $mon, $year) = (localtime(time - ($_[0]*24*60*60)))[3, 4, 5]; + return sprintf "%04d-%02d-%02d", 1900 + $year, ++$mon, $dom; } +sub die_politely { + my $msg = shift; + + print < + + + + +
+$msg +
+

+FIN + + PutFooter(); + exit; +} -- cgit v1.2.1