aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--phpBB/includes/search/fulltext_mysql.php206
-rw-r--r--phpBB/language/en/acp/search.php1
-rw-r--r--phpBB/search.php4
3 files changed, 116 insertions, 95 deletions
diff --git a/phpBB/includes/search/fulltext_mysql.php b/phpBB/includes/search/fulltext_mysql.php
index a106f40abf..7d7e811ed1 100644
--- a/phpBB/includes/search/fulltext_mysql.php
+++ b/phpBB/includes/search/fulltext_mysql.php
@@ -122,39 +122,33 @@ class fulltext_mysql extends search_backend
$keywords = preg_replace($match, $replace, $keywords);
}
- $match = array();
- // New lines, carriage returns
- $match[] = "#[\n\r]+#";
- // NCRs like   etc.
- $match[] = '#(&|&)[\#a-z0-9]+?;#i';
-
// Filter out as above
- $keywords = preg_replace($match, ' ', trim($keywords));
+ $split_keywords = preg_replace("#[\n\r\t]+#", ' ', trim(htmlspecialchars_decode($keywords)));
// Split words
if ($this->pcre_properties)
{
- $split_keywords = preg_replace('#([^\p{L}\p{N}\'*])#u', '$1$1', str_replace('\'\'', '\' \'', trim($keywords)));
+ $split_keywords = preg_replace('#([^\p{L}\p{N}\'*"()])#u', '$1$1', str_replace('\'\'', '\' \'', trim($split_keywords)));
}
else if ($this->mbstring_regex)
{
- $split_keywords = mb_ereg_replace('([^\w\'*])', '\\1\\1', str_replace('\'\'', '\' \'', trim($keywords)));
+ $split_keywords = mb_ereg_replace('([^\w\'*"()])', '\\1\\1', str_replace('\'\'', '\' \'', trim($split_keywords)));
}
else
{
- $split_keywords = preg_replace('#([^\w\'*])#u', '$1$1', str_replace('\'\'', '\' \'', trim($keywords)));
+ $split_keywords = preg_replace('#([^\w\'*"()])#u', '$1$1', str_replace('\'\'', '\' \'', trim($split_keywords)));
}
if ($this->pcre_properties)
{
$matches = array();
- preg_match_all('#(?:[^\p{L}\p{N}*]|^)([+\-|]?(?:[\p{L}\p{N}*]+\'?)*[\p{L}\p{N}*])(?:[^\p{L}\p{N}*]|$)#u', $split_keywords, $matches);
+ preg_match_all('#(?:[^\p{L}\p{N}*"()]|^)([+\-|]?(?:[\p{L}\p{N}*"()]+\'?)*[\p{L}\p{N}*"()])(?:[^\p{L}\p{N}*"()]|$)#u', $split_keywords, $matches);
$this->split_words = $matches[1];
}
else if ($this->mbstring_regex)
{
mb_regex_encoding('UTF-8');
- mb_ereg_search_init($split_keywords, '(?:[^\w*]|^)([+\-|]?(?:[\w*]+\'?)*[\w*])(?:[^\w*]|$)');
+ mb_ereg_search_init($split_keywords, '(?:[^\w*"()]|^)([+\-|]?(?:[\w*"()]+\'?)*[\w*"()])(?:[^\w*"()]|$)');
while (($word = mb_ereg_search_regs()))
{
@@ -164,13 +158,46 @@ class fulltext_mysql extends search_backend
else
{
$matches = array();
- preg_match_all('#(?:[^\w*]|^)([+\-|]?(?:[\w*]+\'?)*[\w*])(?:[^\w*]|$)#u', $split_keywords, $matches);
+ preg_match_all('#(?:[^\w*"()]|^)([+\-|]?(?:[\w*"()]+\'?)*[\w*"()])(?:[^\w*"()]|$)#u', $split_keywords, $matches);
$this->split_words = $matches[1];
}
+ // to allow phrase search, we need to concatenate quoted words
+ $tmp_split_words = array();
+ $phrase = '';
+ foreach ($this->split_words as $word)
+ {
+ if ($phrase)
+ {
+ $phrase .= ' ' . $word;
+ if (strpos($word, '"') !== false && substr_count($word, '"') % 2 == 1)
+ {
+ $tmp_split_words[] = $phrase;
+ $phrase = '';
+ }
+ }
+ else if (strpos($word, '"') !== false && substr_count($word, '"') % 2 == 1)
+ {
+ $phrase = $word;
+ }
+ else
+ {
+ $tmp_split_words[] = $word . ' ';
+ }
+ }
+ if ($phrase)
+ {
+ $tmp_split_words[] = $phrase;
+ }
+
+ $this->split_words = $tmp_split_words;
+
+ unset($tmp_split_words);
+ unset($phrase);
+
foreach ($this->split_words as $i => $word)
{
- $clean_word = preg_replace('#^[+\-|]#', '', $word);
+ $clean_word = preg_replace('#^[+\-|"]#', '', $word);
// check word length
$clean_len = utf8_strlen(str_replace('*', '', $clean_word));
@@ -181,9 +208,41 @@ class fulltext_mysql extends search_backend
}
}
- $this->search_query = implode(' ', $this->split_words);
+ if ($terms == 'any')
+ {
+ $this->search_query = '';
+ foreach ($this->split_words as $word)
+ {
+ if ((strpos($word, '+') === 0) || (strpos($word, '-') === 0) || (strpos($word, '|') === 0))
+ {
+ $word = substr($word, 1);
+ }
+ $this->search_query .= $word . ' ';
+ }
+ }
+ else
+ {
+ $this->search_query = '';
+ foreach ($this->split_words as $word)
+ {
+ if ((strpos($word, '+') === 0) || (strpos($word, '-') === 0))
+ {
+ $this->search_query .= $word . ' ';
+ }
+ else if (strpos($word, '|') === 0)
+ {
+ $this->search_query .= substr($word, 1) . ' ';
+ }
+ else
+ {
+ $this->search_query .= '+' . $word . ' ';
+ }
+ }
+ }
+
+ $this->search_query = utf8_htmlspecialchars($this->search_query);
- if (sizeof($this->split_words))
+ if ($this->search_query)
{
$this->split_words = array_values($this->split_words);
sort($this->split_words);
@@ -252,26 +311,39 @@ class fulltext_mysql extends search_backend
}
/**
- * Performs a search on keywords depending on display specific params.
+ * Performs a search on keywords depending on display specific params. You have to run split_keywords() first.
*
- * @param array &$id_ary passed by reference, to be filled with ids for the page specified by $start and $per_page, should be ordered
- * @param int $start indicates the first index of the page
- * @param int $per_page number of ids each page is supposed to contain
- * @return total number of results
+ * @param string $type contains either posts or topics depending on what should be searched for
+ * @param string &$fields contains either titleonly (topic titles should be searched), msgonly (only message bodies should be searched), firstpost (only subject and body of the first post should be searched) or all (all post bodies and subjects should be searched)
+ * @param string &$terms is either 'all' (use query as entered, words without prefix should default to "have to be in field") or 'any' (ignore search query parts and just return all posts that contain any of the specified words)
+ * @param array &$sort_by_sql contains SQL code for the ORDER BY part of a query
+ * @param string &$sort_key is the key of $sort_by_sql for the selected sorting
+ * @param string &$sort_dir is either a or d representing ASC and DESC
+ * @param string &$sort_days specifies the maximum amount of days a post may be old
+ * @param array &$ex_fid_ary specifies an array of forum ids which should not be searched
+ * @param array &$m_approve_fid_ary specifies an array of forum ids in which the searcher is allowed to view unapproved posts
+ * @param int &$topic_id is set to 0 or a topic id, if it is not 0 then only posts in this topic should be searched
+ * @param array &$author_ary an array of author ids if the author should be ignored during the search the array is empty
+ * @param array &$id_ary passed by reference, to be filled with ids for the page specified by $start and $per_page, should be ordered
+ * @param int $start indicates the first index of the page
+ * @param int $per_page number of ids each page is supposed to contain
+ * @return boolean|int total number of results
+ *
+ * @access public
*/
function keyword_search($type, &$fields, &$terms, &$sort_by_sql, &$sort_key, &$sort_dir, &$sort_days, &$ex_fid_ary, &$m_approve_fid_ary, &$topic_id, &$author_ary, &$id_ary, $start, $per_page)
{
global $config, $db;
// No keywords? No posts.
- if (!sizeof($this->split_words))
+ if (!$this->search_query)
{
return false;
}
// generate a search_key from all the options to identify the results
$search_key = md5(implode('#', array(
- implode(',', $this->split_words),
+ implode(', ', $this->split_words),
$type,
$fields,
$terms,
@@ -369,47 +441,9 @@ class fulltext_mysql extends search_backend
$sql_where_options .= ($sort_days) ? ' AND p.post_time >= ' . (time() - ($sort_days * 86400)) : '';
$sql_where_options .= $sql_match_where;
- // split the words into words that have to be included or must not be included and optional words
- $words = $any_words = array();
- if ($terms == 'all')
- {
- foreach ($this->split_words as $id => $word)
- {
- if ((strpos($word, '+') === 0) || (strpos($word, '-') === 0))
- {
- $words[] = $word;
- }
- else if (strpos($word, '|') === 0)
- {
- $any_words[] = substr($word, 1);
- }
- else
- {
- $words[] = '+' . $word;
- }
- }
- }
- else
- {
- foreach ($this->split_words as $id => $word)
- {
- if ((strpos($word, '+') === 0) || (strpos($word, '-') === 0) || (strpos($word, '|') === 0))
- {
- $words[] = substr($word, 1);
- }
- else
- {
- $words[] = $word;
- }
- }
- }
-
- // Get the ids for the current result block
- $any_words = (sizeof($any_words)) ? ' +(' . implode(' ', $any_words) . ')' : '';
-
$sql = "SELECT $sql_select
FROM $sql_from$sql_sort_table" . POSTS_TABLE . " p
- WHERE MATCH ($sql_match) AGAINST ('" . $db->sql_escape(implode(' ', $words)) . $any_words . "' IN BOOLEAN MODE)
+ WHERE MATCH ($sql_match) AGAINST ('" . $db->sql_escape(htmlspecialchars_decode($this->search_query)) . "' IN BOOLEAN MODE)
$sql_where_options
ORDER BY $sql_sort";
$result = $db->sql_query_limit($sql, $config['search_block_size'], $start);
@@ -608,37 +642,8 @@ class fulltext_mysql extends search_backend
$split_text = $this->split_message($message);
$split_title = ($subject) ? $this->split_message($subject) : array();
- $words = array();
- if ($mode == 'edit')
- {
- $old_text = array();
- $old_title = array();
-
- $sql = 'SELECT post_text, post_subject
- FROM ' . POSTS_TABLE . "
- WHERE post_id = $post_id";
- $result = $db->sql_query($sql);
+ $words = array_unique(array_merge($split_text, $split_title));
- if ($row = $db->sql_fetchrow($result))
- {
- $old_text = $this->split_message($row['post_text']);
- $old_title = $this->split_message($row['post_subject']);
- }
- $db->sql_freeresult($result);
-
- $words = array_unique(array_merge(
- array_diff($split_text, $old_text),
- array_diff($split_title, $old_title),
- array_diff($old_text, $split_text),
- array_diff($old_title, $split_title)
- ));
- unset($old_title);
- unset($old_text);
- }
- else
- {
- $words = array_unique(array_merge($split_text, $split_title));
- }
unset($split_text);
unset($split_title);
@@ -715,6 +720,11 @@ class fulltext_mysql extends search_backend
$alter[] = 'ADD FULLTEXT (post_text)';
}
+ if (!isset($this->stats['post_content']))
+ {
+ $alter[] = 'ADD FULLTEXT post_content (post_subject, post_text)';
+ }
+
if (sizeof($alter))
{
$db->sql_query('ALTER TABLE ' . POSTS_TABLE . ' ' . implode(', ', $alter));
@@ -755,6 +765,11 @@ class fulltext_mysql extends search_backend
$alter[] = 'DROP INDEX post_text';
}
+ if (isset($this->stats['post_content']))
+ {
+ $alter[] = 'DROP INDEX post_content';
+ }
+
if (sizeof($alter))
{
$db->sql_query('ALTER TABLE ' . POSTS_TABLE . ' ' . implode(', ', $alter));
@@ -775,7 +790,7 @@ class fulltext_mysql extends search_backend
$this->get_stats();
}
- return (isset($this->stats['post_text']) && isset($this->stats['post_subject'])) ? true : false;
+ return (isset($this->stats['post_text']) && isset($this->stats['post_subject']) && isset($this->stats['post_content'])) ? true : false;
}
/**
@@ -794,6 +809,7 @@ class fulltext_mysql extends search_backend
$user->lang['FULLTEXT_MYSQL_TOTAL_POSTS'] => ($this->index_created()) ? $this->stats['total_posts'] : 0,
$user->lang['FULLTEXT_MYSQL_TEXT_CARDINALITY'] => isset($this->stats['post_text']['Cardinality']) ? $this->stats['post_text']['Cardinality'] : 0,
$user->lang['FULLTEXT_MYSQL_SUBJECT_CARDINALITY'] => isset($this->stats['post_subject']['Cardinality']) ? $this->stats['post_subject']['Cardinality'] : 0,
+ $user->lang['FULLTEXT_MYSQL_COMBINED_CARDINALITY'] => isset($this->stats['post_content']['Cardinality']) ? $this->stats['post_content']['Cardinality'] : 0,
);
}
@@ -826,6 +842,10 @@ class fulltext_mysql extends search_backend
{
$this->stats['post_subject'] = $row;
}
+ else if ($row['Column_name'] == 'post_content')
+ {
+ $this->stats['post_content'] = $row;
+ }
}
}
$db->sql_freeresult($result);
diff --git a/phpBB/language/en/acp/search.php b/phpBB/language/en/acp/search.php
index 7f1c5c708b..af27a20f6e 100644
--- a/phpBB/language/en/acp/search.php
+++ b/phpBB/language/en/acp/search.php
@@ -51,6 +51,7 @@ $lang = array_merge($lang, array(
'FULLTEXT_MYSQL_NOT_MYISAM' => 'MySQL fulltext indexes can only be used with MyISAM tables.',
'FULLTEXT_MYSQL_SUBJECT_CARDINALITY' => 'Cardinality of the post_subject fulltext index (estimate of unique values)',
'FULLTEXT_MYSQL_TEXT_CARDINALITY' => 'Cardinality of the post_text fulltext index (estimate of unique values)',
+ 'FULLTEXT_MYSQL_COMBINED_CARDINALITY' => 'Cardinality of the post_content fulltext index (estimate of unique values)',
'FULLTEXT_MYSQL_TOTAL_POSTS' => 'Total number of indexed posts',
'FULLTEXT_MYSQL_MBSTRING' => 'Support for non-latin UTF-8 characters using mbstring:',
'FULLTEXT_MYSQL_PCRE' => 'Support for non-latin UTF-8 characters using PCRE:',
diff --git a/phpBB/search.php b/phpBB/search.php
index 3bffb00ddd..7ba7035f03 100644
--- a/phpBB/search.php
+++ b/phpBB/search.php
@@ -453,8 +453,8 @@ if ($keywords || $author || $author_id || $search_id || $submit)
}
// define some vars for urls
- $hilit = implode('|', explode(' ', preg_replace('#\s+#u', ' ', str_replace(array('+', '-', '|', '(', ')'), ' ', $keywords))));
- $u_hilit = urlencode(htmlspecialchars_decode($keywords));
+ $hilit = implode('|', explode(' ', preg_replace('#\s+#u', ' ', str_replace(array('+', '-', '|', '(', ')', '"'), ' ', $keywords))));
+ $u_hilit = urlencode(htmlspecialchars_decode(str_replace('|', ' ', $hilit)));
$u_show_results = ($show_results != 'posts') ? '&sr=' . $show_results : '';
$u_search_forum = implode('&fid%5B%5D=', $search_forum);