aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorFrédéric Buclin <LpSolit@gmail.com>2012-03-29 19:50:02 +0200
committerFrédéric Buclin <LpSolit@gmail.com>2012-03-29 19:50:02 +0200
commitf21583b6ab8b870aaa3f8be48426d4e0f6a7ae48 (patch)
treed70984b1fbb9dc6d3576d0b0e57781fff1146c20
parentb7fb96faa4f99f335695c11fbeed9f3a8bc78aa9 (diff)
downloadbugs-f21583b6ab8b870aaa3f8be48426d4e0f6a7ae48.tar
bugs-f21583b6ab8b870aaa3f8be48426d4e0f6a7ae48.tar.gz
bugs-f21583b6ab8b870aaa3f8be48426d4e0f6a7ae48.tar.bz2
bugs-f21583b6ab8b870aaa3f8be48426d4e0f6a7ae48.tar.xz
bugs-f21583b6ab8b870aaa3f8be48426d4e0f6a7ae48.zip
Bug 554819: Quicksearch should be using Text::ParseWords instead of custom code in splitString
Also fixes QS with accented characters (bug 730207) r=dkl a=LpSolit
-rw-r--r--Bugzilla/Search/Quicksearch.pm173
-rw-r--r--Bugzilla/Util.pm17
-rw-r--r--template/en/default/global/user-error.html.tmpl21
-rw-r--r--template/en/default/pages/quicksearch.html.tmpl83
4 files changed, 191 insertions, 103 deletions
diff --git a/Bugzilla/Search/Quicksearch.pm b/Bugzilla/Search/Quicksearch.pm
index 01e475463..cd4deb94c 100644
--- a/Bugzilla/Search/Quicksearch.pm
+++ b/Bugzilla/Search/Quicksearch.pm
@@ -19,6 +19,7 @@ use Bugzilla::Util;
use List::Util qw(min max);
use List::MoreUtils qw(firstidx);
+use Text::ParseWords qw(parse_line);
use base qw(Exporter);
@Bugzilla::Search::Quicksearch::EXPORT = qw(quicksearch);
@@ -129,54 +130,95 @@ sub quicksearch {
$searchstring =~ s/(^[\s,]+|[\s,]+$)//g;
ThrowUserError('buglist_parameters_required') unless ($searchstring);
- $fulltext = Bugzilla->user->setting('quicksearch_fulltext') eq 'on' ? 1 : 0;
-
if ($searchstring =~ m/^[0-9,\s]*$/) {
_bug_numbers_only($searchstring);
}
else {
_handle_alias($searchstring);
- # Globally translate " AND ", " OR ", " NOT " to space, pipe, dash.
- $searchstring =~ s/\s+AND\s+/ /g;
- $searchstring =~ s/\s+OR\s+/|/g;
- $searchstring =~ s/\s+NOT\s+/ -/g;
+ # Retain backslashes and quotes, to know which strings are quoted,
+ # and which ones are not.
+ my @words = parse_line('\s+', 1, $searchstring);
+ # If parse_line() returns no data, this means strings are badly quoted.
+ # Rather than trying to guess what the user wanted to do, we throw an error.
+ scalar(@words)
+ || ThrowUserError('quicksearch_unbalanced_quotes', {string => $searchstring});
+
+ # A query cannot start with AND or OR, nor can it end with AND, OR or NOT.
+ ThrowUserError('quicksearch_invalid_query')
+ if ($words[0] =~ /^(?:AND|OR)$/ || $words[$#words] =~ /^(?:AND|OR|NOT)$/);
+
+ my (@qswords, @or_group);
+ while (scalar @words) {
+ my $word = shift @words;
+ # AND is the default word separator, similar to a whitespace,
+ # but |a AND OR b| is not a valid combination.
+ if ($word eq 'AND') {
+ ThrowUserError('quicksearch_invalid_query', {operators => ['AND', 'OR']})
+ if $words[0] eq 'OR';
+ }
+ # |a OR AND b| is not a valid combination.
+ # |a OR OR b| is equivalent to |a OR b| and so is harmless.
+ elsif ($word eq 'OR') {
+ ThrowUserError('quicksearch_invalid_query', {operators => ['OR', 'AND']})
+ if $words[0] eq 'AND';
+ }
+ # NOT negates the following word.
+ # |NOT AND| and |NOT OR| are not valid combinations.
+ # |NOT NOT| is fine but has no effect as they cancel themselves.
+ elsif ($word eq 'NOT') {
+ $word = shift @words;
+ next if $word eq 'NOT';
+ if ($word eq 'AND' || $word eq 'OR') {
+ ThrowUserError('quicksearch_invalid_query', {operators => ['NOT', $word]});
+ }
+ unshift(@words, "-$word");
+ }
+ else {
+ # OR groups words together, as OR has higher precedence than AND.
+ push(@or_group, $word);
+ # If the next word is not OR, then we are not in a OR group,
+ # or we are leaving it.
+ if (!defined $words[0] || $words[0] ne 'OR') {
+ push(@qswords, join('|', @or_group));
+ @or_group = ();
+ }
+ }
+ }
- my @words = splitString($searchstring);
- _handle_status_and_resolution(\@words);
+ _handle_status_and_resolution(\@qswords);
my (@unknownFields, %ambiguous_fields);
+ $fulltext = Bugzilla->user->setting('quicksearch_fulltext') eq 'on' ? 1 : 0;
# Loop over all main-level QuickSearch words.
- foreach my $qsword (@words) {
- my $negate = substr($qsword, 0, 1) eq '-';
- if ($negate) {
- $qsword = substr($qsword, 1);
- }
+ foreach my $qsword (@qswords) {
+ my @or_operand = parse_line('\|', 1, $qsword);
+ foreach my $term (@or_operand) {
+ my $negate = substr($term, 0, 1) eq '-';
+ if ($negate) {
+ $term = substr($term, 1);
+ }
- # No special first char
- if (!_handle_special_first_chars($qsword, $negate)) {
- # Split by '|' to get all operands for a boolean OR.
- foreach my $or_operand (split(/\|/, $qsword)) {
- if (!_handle_field_names($or_operand, $negate,
- \@unknownFields,
- \%ambiguous_fields))
- {
- # Having ruled out the special cases, we may now split
- # by comma, which is another legal boolean OR indicator.
- foreach my $word (split(/,/, $or_operand)) {
- if (!_special_field_syntax($word, $negate)) {
- _default_quicksearch_word($word, $negate);
- }
- _handle_urls($word, $negate);
- }
+ next if _handle_special_first_chars($term, $negate);
+ next if _handle_field_names($term, $negate, \@unknownFields,
+ \%ambiguous_fields);
+
+ # Having ruled out the special cases, we may now split
+ # by comma, which is another legal boolean OR indicator.
+ # Remove quotes from quoted words, if any.
+ @words = parse_line(',', 0, $term);
+ foreach my $word (@words) {
+ if (!_special_field_syntax($word, $negate)) {
+ _default_quicksearch_word($word, $negate);
}
+ _handle_urls($word, $negate);
}
}
$chart++;
$and = 0;
$or = 0;
- } # foreach (@words)
+ }
# Inform user about any unknown fields
if (scalar(@unknownFields) || scalar(keys %ambiguous_fields)) {
@@ -302,7 +344,7 @@ sub _handle_special_first_chars {
my $firstChar = substr($qsword, 0, 1);
my $baseWord = substr($qsword, 1);
- my @subWords = split(/[\|,]/, $baseWord);
+ my @subWords = split(/,/, $baseWord);
if ($firstChar eq '#') {
addChart('short_desc', 'substring', $baseWord, $negate);
@@ -334,7 +376,7 @@ sub _handle_special_first_chars {
sub _handle_field_names {
my ($or_operand, $negate, $unknownFields, $ambiguous_fields) = @_;
-
+
# Flag and requestee shortcut
if ($or_operand =~ /^(?:flag:)?([^\?]+\?)([^\?]*)$/) {
addChart('flagtypes.name', 'substring', $1, $negate);
@@ -342,32 +384,40 @@ sub _handle_field_names {
addChart('requestees.login_name', 'substring', $2, $negate);
return 1;
}
-
- # generic field1,field2,field3:value1,value2 notation
- if ($or_operand =~ /^([^:]+):([^:]+)$/) {
- my @fields = split(/,/, $1);
- my @values = split(/,/, $2);
+
+ # Generic field1,field2,field3:value1,value2 notation.
+ # We have to correctly ignore commas and colons in quotes.
+ my @field_values = parse_line(':', 1, $or_operand);
+ if (scalar @field_values == 2) {
+ my @fields = parse_line(',', 1, $field_values[0]);
+ my @values = parse_line(',', 1, $field_values[1]);
foreach my $field (@fields) {
my $translated = _translate_field_name($field);
# Skip and record any unknown fields
if (!defined $translated) {
push(@$unknownFields, $field);
- next;
}
# If we got back an array, that means the substring is
# ambiguous and could match more than field name
elsif (ref $translated) {
$ambiguous_fields->{$field} = $translated;
- next;
}
- foreach my $value (@values) {
- my $operator = FIELD_OPERATOR->{$translated} || 'substring';
- addChart($translated, $operator, $value, $negate);
+ else {
+ foreach my $value (@values) {
+ my $operator = FIELD_OPERATOR->{$translated} || 'substring';
+ # If the string was quoted to protect some special
+ # characters such as commas and colons, we need
+ # to remove quotes.
+ if ($value =~ /^(["'])(.+)\g1$/) {
+ $value = $2;
+ $value =~ s/\\(["'])/$1/g;
+ }
+ addChart($translated, $operator, $value, $negate);
+ }
}
}
return 1;
}
-
return 0;
}
@@ -500,41 +550,6 @@ sub _handle_urls {
# Helpers
###########################################################################
-# Split string on whitespace, retaining quoted strings as one
-sub splitString {
- my $string = shift;
- my @quoteparts;
- my @parts;
- my $i = 0;
-
- # Now split on quote sign; be tolerant about unclosed quotes
- @quoteparts = split(/"/, $string);
- foreach my $part (@quoteparts) {
- # After every odd quote, quote special chars
- if ($i++ %2) {
- $part = url_quote($part);
- # Protect the minus sign from being considered
- # as negation, in quotes.
- $part =~ s/(?<=^)\-/%2D/;
- }
- }
- # Join again
- $string = join('"', @quoteparts);
-
- # Now split on unescaped whitespace
- @parts = split(/\s+/, $string);
- foreach (@parts) {
- # Protect plus signs from becoming a blank.
- # If "+" appears as the first character, leave it alone
- # as it has a special meaning. Strings which start with
- # "+" must be quoted.
- s/(?<!^)\+/%2B/g;
- # Remove quotes
- s/"//g;
- }
- return @parts;
-}
-
# Quote and escape a phrase appropriately for a "content matches" search.
sub _matches_phrase {
my ($phrase) = @_;
@@ -600,7 +615,7 @@ sub makeChart {
my $cgi = Bugzilla->cgi;
$cgi->param("field$expr", $field);
$cgi->param("type$expr", $type);
- $cgi->param("value$expr", url_decode($value));
+ $cgi->param("value$expr", $value);
}
1;
diff --git a/Bugzilla/Util.pm b/Bugzilla/Util.pm
index bf8569839..91d40a4f2 100644
--- a/Bugzilla/Util.pm
+++ b/Bugzilla/Util.pm
@@ -12,7 +12,7 @@ use strict;
use base qw(Exporter);
@Bugzilla::Util::EXPORT = qw(trick_taint detaint_natural detaint_signed
html_quote url_quote xml_quote
- css_class_quote html_light_quote url_decode
+ css_class_quote html_light_quote
i_am_cgi correct_urlbase remote_ip
do_ssl_redirect_if_required use_attachbase
diff_arrays on_main_db say
@@ -220,14 +220,6 @@ sub xml_quote {
return $var;
}
-sub url_decode {
- my ($todecode) = (@_);
- $todecode =~ tr/+/ /; # pluses become spaces
- $todecode =~ s/%([0-9a-fA-F]{2})/pack("c",hex($1))/ge;
- utf8::decode($todecode) if Bugzilla->params->{'utf8'};
- return $todecode;
-}
-
sub i_am_cgi {
# I use SERVER_SOFTWARE because it's required to be
# defined for all requests in the CGI spec.
@@ -751,9 +743,6 @@ Bugzilla::Util - Generic utility functions for bugzilla
xml_quote($var);
email_filter($var);
- # Functions for decoding
- $rv = url_decode($var);
-
# Functions that tell you about your environment
my $is_cgi = i_am_cgi();
my $urlbase = correct_urlbase();
@@ -866,10 +855,6 @@ This is similar to C<html_quote>, except that ' is escaped to &apos;. This
is kept separate from html_quote partly for compatibility with previous code
(for &apos;) and partly for future handling of non-ASCII characters.
-=item C<url_decode($val)>
-
-Converts the %xx encoding from the given URL back to its original form.
-
=item C<email_filter>
Removes the hostname from email addresses in the string, if the user
diff --git a/template/en/default/global/user-error.html.tmpl b/template/en/default/global/user-error.html.tmpl
index 3dda5de7f..57fa180e7 100644
--- a/template/en/default/global/user-error.html.tmpl
+++ b/template/en/default/global/user-error.html.tmpl
@@ -1436,6 +1436,27 @@
The name of the query must be less than [% constants.MAX_LEN_QUERY_NAME FILTER html %]
characters long.
+ [% ELSIF error == "quicksearch_invalid_query" %]
+ [% title = "Invalid Query" %]
+ Your query is invalid.
+ [% IF operators %]
+ The [% operators.shift FILTER html %] operator cannot be followed by
+ [%+ operators.shift FILTER html %].
+ [% ELSE %]
+ A query cannot start with AND or OR, nor can it end with AND, OR or NOT.
+ They are reserved operators and must be quoted if you want to look for
+ these strings.
+ [% END %]
+
+ [% ELSIF error == "quicksearch_unbalanced_quotes" %]
+ [% title = "Badly Formatted Query" %]
+ [% terms.Bugzilla %] is unable to parse your query correctly:
+ <em>[% string FILTER html %]</em>.<br>
+ If you use quotes to enclose strings, make sure both quotes are present.
+ If you really want to look for a quote in a string, type \" instead of ".
+ For instance, <em>"I'm so \"special\", really"</em> (with quotes) will be
+ interpreted as <em>I'm so "special", really</em>.
+
[% ELSIF error == "quicksearch_unknown_field" %]
[% title = "QuickSearch Error" %]
There is a problem with your search:
diff --git a/template/en/default/pages/quicksearch.html.tmpl b/template/en/default/pages/quicksearch.html.tmpl
index 669ea6cc1..f3794e1ed 100644
--- a/template/en/default/pages/quicksearch.html.tmpl
+++ b/template/en/default/pages/quicksearch.html.tmpl
@@ -25,7 +25,16 @@
<input type="submit" value="Search" id="find">
</form>
-<h2>The Basics</h2>
+<ul>
+ <li><a href="#basics">The Basics</a></li>
+ <li><a href="#basic_examples">Examples of Simple Queries</a></li>
+ <li><a href="#fields">Fields You Can Search On</a></li>
+ <li><a href="#advanced_features">Advanced Features</a></li>
+ <li><a href="#shortcuts">Advanced Shortcuts</a></li>
+ <li><a href="#advanced_examples">Examples of Complex Queries</a></li>
+</ul>
+
+<h2 id="basics">The Basics</h2>
<ul class="qs_help">
<li>If you just put a word or series of words in the search box,
@@ -73,8 +82,32 @@
<em>any</em> of those values will be searched for.</li>
</ul>
-<p>You may also want to read up on the <a href="#advanced">Advanced
- Features</a>.</p>
+<h2 id="basic_examples">Examples of Simple Queries</h2>
+
+<p>Here are some examples of how to write some simple queries.
+ <a href="#advanced_examples">Examples for more complex queries</a> can be
+ found lower in this page.</p>
+
+<ul class="qs_help">
+ <li>All open [% terms.bugs %] where userA@company.com is in the CC list
+ (no need to mention open [% terms.bugs %], this is the default):<br>
+ <kbd>cc:userA@company.com</kbd></li>
+ <li>All unconfirmed [% terms.bugs %] in product productA (putting the
+ [%+ terms.bug %] status at the first position make it being automagically
+ considered as [% terms.abug %] status):<br>
+ <kbd>UNCONFIRMED product:productA</kbd>
+ <li>All open and closed [% terms.bugs %] reported by userB@company.com
+ (we must specify ALL as the first word, else only open [% terms.bugs %]
+ are taken into account):<br>
+ <kbd>ALL reporter:userB@company.com</kbd>
+ <li>All open [% terms.bugs %] with severity blocker or critical with the
+ target milestone set to 2.5:<br>
+ <kbd>severity:blocker,critical milestone:2.5</kbd>
+ <li>All open [% terms.bugs %] in the component Research & Development
+ with priority P1 or P2 (we must use quotes for the component as its name
+ contains whitespaces):<br>
+ <kbd>component:"Research & Development" priority:P1,P2</kbd></li>
+</ul>
<h2 id="fields">Fields You Can Search On</h2>
@@ -127,15 +160,18 @@
</tbody>
</table>
-<h2 id="advanced">Advanced Features</h2>
+<h2 id="advanced_features">Advanced Features</h2>
<ul class="qs_help">
<li>If you want to search for a <strong>phrase</strong> or something that
- contains spaces, you can put it in quotes, like:
- <kbd>"this is a phrase"</kbd>. You can also use quotes to search for
+ contains spaces, commas, colons or quotes, you must put it in quotes, like:
+ <kbd>"yes, this is a phrase"</kbd>. You must also use quotes to search for
characters that would otherwise be interpreted specially by quicksearch.
- For example, <kbd>"this|thing"</kbd> would search for the literal phrase
- <em>this|thing</em>.</li>
+ For example, <kbd>"this|that"</kbd> would search for the literal string
+ <em>this|that</em> and would not be parsed as <kbd>"this OR that"</kbd>.
+ Also, <kbd>"-field:value"</kbd> would search for the literal phrase
+ <em>-field:value</em> and would not be parsed as
+ <kbd>"NOT field:value"</kbd>.</li>
<li>You can use <strong>AND</strong>, <strong>NOT</strong>,
and <strong>OR</strong> in searches.
@@ -165,6 +201,12 @@
</li>
</ul>
+ <p>You cannot use | nor OR to enumerate possible values for a given field.
+ You must use commas instead. So <kbd>field:value1,value2</kbd> does what
+ you expect, but <kbd>field:value1|value2</kbd> would be treated as
+ <kbd>field:value1 OR value2</kbd>, which means value2 is not bound to
+ the given field.</p>
+
<p>OR has higher precedence than AND; AND is the top level operation.
For example:</p>
<p>Searching for <em><kbd>url|location bar|field -focus</kbd></em> means
@@ -255,4 +297,29 @@
</tbody>
</table>
+<h2 id="advanced_examples">Examples of Complex Queries</h2>
+
+<p>It is pretty easy to write rather complex queries without too much effort.
+ For very complex queries, you have to use the
+ <a href="query.cgi?format=advanced">Advanced Search</a> form.</p>
+
+<ul class="qs_help">
+ <li>All [% terms.bugs %] reported by userA@company.com or assigned to him
+ (the initial @ is a shortcut for the assignee, see the
+ <a href="#shortcuts">Advanced Shortcuts</a> section above):<br>
+ <kbd>ALL @userA@company.com OR reporter:userA@company.com</kbd></li>
+ <li>All open [% terms.bugs %] in product productA with either severity
+ blocker, critical or major, or with priority P1, or with the blocker+
+ flag set, and which are neither assigned to userB@company.com nor to
+ userC@company.com (we make the assumption that there are only two users
+ matching userB and userC, else we would write the whole login name):<br>
+ <kbd>:productA sev:blocker,critical,major OR pri:P1 OR flag:blocker+ -assign:userB,userC</kbd></li>
+ <li>All FIXED [% terms.bugs %] with the blocker+ flag set, but without
+ the approval+ nor approval? flags set:<br>
+ <kbd>FIXED flag:blocker+ -flag:approval+ -flag:approval?</kbd></li>
+ <li>[% terms.Bugs %] with <em>That's a "unusual" issue</em> in the
+ [%+ terms.bug %] summary (double quotes are escaped using <em>\"</em>):<br>
+ <kbd>summary:"That's a \"unusual\" issue"</kbd></li>
+</ul>
+
[% PROCESS global/footer.html.tmpl %]