diff options
author | Nils Adermann <naderman@naderman.de> | 2006-04-27 14:20:43 +0000 |
---|---|---|
committer | Nils Adermann <naderman@naderman.de> | 2006-04-27 14:20:43 +0000 |
commit | 5994371c0a098bf2f866b89ba0a8bba11f414a41 (patch) | |
tree | 78b0d65900b7f969702bac61142010766149ea80 | |
parent | 5f503ddabf90693d0293d2c4fe91c9d965aa86de (diff) | |
download | forums-5994371c0a098bf2f866b89ba0a8bba11f414a41.tar forums-5994371c0a098bf2f866b89ba0a8bba11f414a41.tar.gz forums-5994371c0a098bf2f866b89ba0a8bba11f414a41.tar.bz2 forums-5994371c0a098bf2f866b89ba0a8bba11f414a41.tar.xz forums-5994371c0a098bf2f866b89ba0a8bba11f414a41.zip |
- allow searches for multibyte characters in fulltext_native
- use preg_quote safely
git-svn-id: file:///svn/phpbb/trunk@5853 89ea8834-ac86-4346-8a33-228a782c2dd0
-rw-r--r-- | phpBB/includes/functions_compress.php | 2 | ||||
-rw-r--r-- | phpBB/includes/functions_messenger.php | 2 | ||||
-rw-r--r-- | phpBB/includes/functions_privmsgs.php | 20 | ||||
-rwxr-xr-x | phpBB/includes/search/fulltext_native.php | 42 | ||||
-rw-r--r-- | phpBB/search.php | 6 |
5 files changed, 43 insertions, 29 deletions
diff --git a/phpBB/includes/functions_compress.php b/phpBB/includes/functions_compress.php index 700303fd7d..51fbdab6ae 100644 --- a/phpBB/includes/functions_compress.php +++ b/phpBB/includes/functions_compress.php @@ -23,7 +23,7 @@ class compress $skip_files = explode(',', $skip_files); // Remove rm prefix from src path - $src_path = ($src_rm_prefix) ? preg_replace('#^(' . preg_quote($src_rm_prefix) . ')#', '', $src) : $src; + $src_path = ($src_rm_prefix) ? preg_replace('#^(' . preg_quote($src_rm_prefix, '#') . ')#', '', $src) : $src; // Add src prefix $src_path = ($src_add_prefix) ? ($src_add_prefix . ((substr($src_add_prefix, -1) != '/') ? '/' : '') . $src_path) : $src_path; // Remove initial "/" if present diff --git a/phpBB/includes/functions_messenger.php b/phpBB/includes/functions_messenger.php index d6755857ab..8e20a418e2 100644 --- a/phpBB/includes/functions_messenger.php +++ b/phpBB/includes/functions_messenger.php @@ -1227,7 +1227,7 @@ function mail_encode($str, $encoding) $str = chunk_split(base64_encode($str), $length, $spacer); // remove trailing spacer and add start and end delimiters - $str = preg_replace('#' . preg_quote($spacer) . '$#', '', $str); + $str = preg_replace('#' . preg_quote($spacer, '#') . '$#', '', $str); return $start . $str . $end; } diff --git a/phpBB/includes/functions_privmsgs.php b/phpBB/includes/functions_privmsgs.php index 6adfdf5605..d24db82b09 100644 --- a/phpBB/includes/functions_privmsgs.php +++ b/phpBB/includes/functions_privmsgs.php @@ -53,28 +53,28 @@ define('CHECK_TO', 5); $global_privmsgs_rules = array( CHECK_SUBJECT => array( - RULE_IS_LIKE => array('check0' => 'message_subject', 'function' => 'preg_match("/" . preg_quote({STRING}) . "/i", {CHECK0})'), - RULE_IS_NOT_LIKE => array('check0' => 'message_subject', 'function' => '!(preg_match("/" . preg_quote({STRING}) . "/i", {CHECK0}))'), + RULE_IS_LIKE => array('check0' => 'message_subject', 'function' => 'preg_match("/" . preg_quote({STRING}, "/") . "/i", {CHECK0})'), + RULE_IS_NOT_LIKE => array('check0' => 'message_subject', 'function' => '!(preg_match("/" . preg_quote({STRING}, "/") . "/i", {CHECK0}))'), RULE_IS => array('check0' => 'message_subject', 'function' => '{CHECK0} == {STRING}'), RULE_IS_NOT => array('check0' => 'message_subject', 'function' => '{CHECK0} != {STRING}'), - RULE_BEGINS_WITH => array('check0' => 'message_subject', 'function' => 'preg_match("/^" . preg_quote({STRING}) . "/i", {CHECK0})'), - RULE_ENDS_WITH => array('check0' => 'message_subject', 'function' => 'preg_match("/" . preg_quote({STRING}) . "$/i", {CHECK0})')), + RULE_BEGINS_WITH => array('check0' => 'message_subject', 'function' => 'preg_match("/^" . preg_quote({STRING}, "/") . "/i", {CHECK0})'), + RULE_ENDS_WITH => array('check0' => 'message_subject', 'function' => 'preg_match("/" . preg_quote({STRING}, "/") . "$/i", {CHECK0})')), CHECK_SENDER => array( - RULE_IS_LIKE => array('check0' => 'username', 'function' => 'preg_match("/" . preg_quote({STRING}) . "/i", {CHECK0})'), - RULE_IS_NOT_LIKE => array('check0' => 'username', 'function' => '!(preg_match("/" . preg_quote({STRING}) . "/i", {CHECK0}))'), + RULE_IS_LIKE => array('check0' => 'username', 'function' => 'preg_match("/" . preg_quote({STRING}, "/") . "/i", {CHECK0})'), + RULE_IS_NOT_LIKE => array('check0' => 'username', 'function' => '!(preg_match("/" . preg_quote({STRING}, "/") . "/i", {CHECK0}))'), RULE_IS => array('check0' => 'username', 'function' => '{CHECK0} == {STRING}'), RULE_IS_NOT => array('check0' => 'username', 'function' => '{CHECK0} != {STRING}'), - RULE_BEGINS_WITH => array('check0' => 'username', 'function' => 'preg_match("/^" . preg_quote({STRING}) . "/i", {CHECK0})'), - RULE_ENDS_WITH => array('check0' => 'username', 'function' => 'preg_match("/" . preg_quote({STRING}) . "$/i", {CHECK0})'), + RULE_BEGINS_WITH => array('check0' => 'username', 'function' => 'preg_match("/^" . preg_quote({STRING}, "/") . "/i", {CHECK0})'), + RULE_ENDS_WITH => array('check0' => 'username', 'function' => 'preg_match("/" . preg_quote({STRING}, "/") . "$/i", {CHECK0})'), RULE_IS_FRIEND => array('check0' => 'friend', 'function' => '{CHECK0} == 1'), RULE_IS_FOE => array('check0' => 'foe', 'function' => '{CHECK0} == 1'), RULE_IS_USER => array('check0' => 'author_id', 'function' => '{CHECK0} == {USER_ID}'), RULE_IS_GROUP => array('check0' => 'author_in_group', 'function' => 'in_array({GROUP_ID}, {CHECK0})')), CHECK_MESSAGE => array( - RULE_IS_LIKE => array('check0' => 'message_text', 'function' => 'preg_match("/" . preg_quote({STRING}) . "/i", {CHECK0})'), - RULE_IS_NOT_LIKE => array('check0' => 'message_text', 'function' => '!(preg_match("/" . preg_quote({STRING}) . "/i", {CHECK0}))'), + RULE_IS_LIKE => array('check0' => 'message_text', 'function' => 'preg_match("/" . preg_quote({STRING}, "/") . "/i", {CHECK0})'), + RULE_IS_NOT_LIKE => array('check0' => 'message_text', 'function' => '!(preg_match("/" . preg_quote({STRING}, "/") . "/i", {CHECK0}))'), RULE_IS => array('check0' => 'message_text', 'function' => '{CHECK0} == {STRING}'), RULE_IS_NOT => array('check0' => 'message_text', 'function' => '{CHECK0} != {STRING}')), diff --git a/phpBB/includes/search/fulltext_native.php b/phpBB/includes/search/fulltext_native.php index b69dc18061..48415ab409 100755 --- a/phpBB/includes/search/fulltext_native.php +++ b/phpBB/includes/search/fulltext_native.php @@ -50,8 +50,8 @@ class fulltext_native extends search_backend { global $db, $config; - $drop_char_match = array('^', '$', ';', '#', '&', '(', ')', '<', '>', '`', '\'', '"', ',', '@', '_', '?', '%', '~', '.', '[', ']', '{', '}', ':', '\\', '/', '=', '!'); - $drop_char_replace = array(' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '', '', ' ', ' ', ' ', '', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '' , ' ', ' ', ' '); + $drop_char_match = array('^', '$', '(', ')', '<', '>', '`', '\'', '"', ',', '@', '_', '?', '%', '~', '.', '[', ']', '{', '}', ':', '\\', '/', '=', '!', "\n", "\r"); + $drop_char_replace = array(' ', ' ', ' ', ' ', ' ', '', '', ' ', ' ', ' ', '', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '' , ' ', ' ', ' ', ' ', ' ', ' '); $this->get_ignore_words(); $this->get_synonyms(); @@ -65,15 +65,20 @@ class fulltext_native extends search_backend } $match = array(); - // New lines, carriage returns - $match[] = "#[\n\r]+#"; // NCRs like etc. - $match[] = '#(&|&)[\#a-z0-9]+?;#i'; + $match[] = '#(&|&)[a-z0-9]+?;#i'; // Filter out as above $keywords = preg_replace($match, ' ', strtolower(trim($keywords))); + + // Filter out non alphabetical characters $keywords = str_replace($drop_char_match, $drop_char_replace, $keywords); + // Filter out ; and # but not &#[0-9]+; + $keywords = preg_replace('#&\#([0-9]+);#', '<$1>', $keywords); + $keywords = str_replace(array(';', '&', '#'), ' ', $keywords); + $keywords = str_replace(array('<', '>'), array('&#', ';'), $keywords); + // Split words $this->split_words = explode(' ', preg_replace('#\s+#', ' ', $keywords)); @@ -100,7 +105,7 @@ class fulltext_native extends search_backend } // check word length - $clean_len = strlen(str_replace('*', '', $word)); + $clean_len = $this->word_length($word); if (($clean_len < $config['fulltext_native_min_chars']) || ($clean_len > $config['fulltext_native_max_chars'])) { if ($prefixed) @@ -152,6 +157,14 @@ class fulltext_native extends search_backend } /** + * Returns the string length but it counts multibyte characters as single characters and ignores "*" + */ + function word_length($word) + { + return strlen(str_replace('*', '', preg_replace('#&\#[0-9]+;#', 'x', $word))); + } + + /** * Turns text into an array of words that can be stored in the word list table */ function split_message($text) @@ -165,29 +178,30 @@ class fulltext_native extends search_backend if (!is_array($drop_char_match)) { - $drop_char_match = array('-', '^', '$', ';', '#', '&', '(', ')', '<', '>', '`', '\'', '"', '|', ',', '@', '_', '?', '%', '~', '.', '[', ']', '{', '}', ':', '\\', '/', '=', '\'', '!', '*', '+'); - $drop_char_replace = array(' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '', '', ' ', ' ', ' ', ' ', '', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '' , ' ', ' ', ' ', ' ', ' ', ' '); + $drop_char_match = array('-', '^', '$', '(', ')', '<', '>', '`', '\'', '"', '|', ',', '@', '_', '?', '%', '~', '.', '[', ']', '{', '}', ':', '\\', '/', '=', '\'', '!', '*', '+', "\n", "\r"); + $drop_char_replace = array(' ', ' ', ' ', ' ', ' ', ' ', ' ', '', '', ' ', ' ', ' ', ' ', '', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '' , ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' '); } $match = array(); // Comments for hardcoded bbcode elements (urls, smilies, html) $match[] = '#<!\-\- .* \-\->(.*?)<!\-\- .* \-\->#is'; - // New lines, carriage returns - $match[] = "#[\n\r]+#"; // NCRs like etc. - $match[] = '#(&|&)[\#a-z0-9]+?;#i'; + $match[] = '#(&|&)[a-z0-9]+;#i'; // Do not index code $match[] = '#\[code(?:=.*?)?(\:?[0-9a-z]{5,})\].*?\[\/code(\:?[0-9a-z]{5,})\]#is'; // BBcode $match[] = '#\[\/?[a-z\*\+\-]+(?:=.*?)?(\:?[0-9a-z]{5,})\]#'; - // Filter out ; and # but not &#[0-9]+; - //$match[] = '#(&\#[0-9]+;)|;|\#|&#'; $text = preg_replace($match, ' ', ' ' . strtolower(trim($text)) . ' '); // Filter out non-alphabetical chars $text = str_replace($drop_char_match, $drop_char_replace, $text); + // Filter out ; and # but not &#[0-9]+; + $text = preg_replace('#&\#([0-9]+);#', '<$1>', $text); + $text = str_replace(array(';', '&', '#'), ' ', $text); + $text = str_replace(array('<', '>'), array('&#', ';'), $text); + // Split words $text = explode(' ', preg_replace('#\s+#', ' ', trim($text))); @@ -206,7 +220,7 @@ class fulltext_native extends search_backend for ($i = 0, $n = sizeof($text); $i < $n; $i++) { $text[$i] = trim($text[$i]); - if (strlen($text[$i]) < $config['fulltext_native_min_chars'] || strlen($text[$i]) > $config['fulltext_native_max_chars']) + if ($this->word_length($text[$i]) < $config['fulltext_native_min_chars'] || $this->word_length($text[$i]) > $config['fulltext_native_max_chars']) { unset($text[$i]); } diff --git a/phpBB/search.php b/phpBB/search.php index 80ad3dccbe..2095bc1e70 100644 --- a/phpBB/search.php +++ b/phpBB/search.php @@ -29,8 +29,8 @@ $topic_id = request_var('t', 0); $view = request_var('view', ''); $submit = request_var('submit', false); -$keywords = request_var('keywords', ''); -$add_keywords = request_var('add_keywords', ''); +$keywords = request_var('keywords', '', true); +$add_keywords = request_var('add_keywords', '', true); $author = request_var('author', ''); $show_results = ($topic_id) ? 'posts' : request_var('sr', 'posts'); $show_results = ($show_results == 'posts') ? 'posts' : 'topics'; @@ -657,7 +657,7 @@ if ($keywords || $author || $search_id || $submit) if ($hilit) { - $row['post_text'] = preg_replace('#(?!<.*)(?<!\w)(' . preg_quote($hilit) . ')(?!\w|[^<>]*>)#i', '<span class="posthilit">$1</span>', $row['post_text']); + $row['post_text'] = preg_replace('#(?!<.*)(?<!\w)(' . preg_quote($hilit, '#') . ')(?!\w|[^<>]*>)#i', '<span class="posthilit">$1</span>', $row['post_text']); } $row['post_text'] = smiley_text($row['post_text']); |