aboutsummaryrefslogtreecommitdiffstats
path: root/phpBB/includes/search
diff options
context:
space:
mode:
authorLudovic Arnaud <ludovic_arnaud@users.sourceforge.net>2006-07-15 17:01:59 +0000
committerLudovic Arnaud <ludovic_arnaud@users.sourceforge.net>2006-07-15 17:01:59 +0000
commit29d92430c5e364d63a5ec7db96d306cdf1c02e72 (patch)
tree94b33eb4813a4d1a8046ec5ec86020210ca78bf9 /phpBB/includes/search
parent7b8f0da356ea2fdc852a9ca719ed6909636d0d1c (diff)
downloadforums-29d92430c5e364d63a5ec7db96d306cdf1c02e72.tar
forums-29d92430c5e364d63a5ec7db96d306cdf1c02e72.tar.gz
forums-29d92430c5e364d63a5ec7db96d306cdf1c02e72.tar.bz2
forums-29d92430c5e364d63a5ec7db96d306cdf1c02e72.tar.xz
forums-29d92430c5e364d63a5ec7db96d306cdf1c02e72.zip
Changed: moved functions that encode/decode NCRs from and to UTF-8 to utf_tools.php
git-svn-id: file:///svn/phpbb/trunk@6187 89ea8834-ac86-4346-8a33-228a782c2dd0
Diffstat (limited to 'phpBB/includes/search')
-rw-r--r--phpBB/includes/search/fulltext_native_improved.php77
1 files changed, 9 insertions, 68 deletions
diff --git a/phpBB/includes/search/fulltext_native_improved.php b/phpBB/includes/search/fulltext_native_improved.php
index dac964a64a..a01ccd64ed 100644
--- a/phpBB/includes/search/fulltext_native_improved.php
+++ b/phpBB/includes/search/fulltext_native_improved.php
@@ -47,10 +47,18 @@ class fulltext_native_improved extends search_backend
$this->word_length = array('min' => $config['fulltext_native_min_chars'], 'max' => $config['fulltext_native_max_chars']);
+ /**
+ * Load the UTF tools
+ */
if (!class_exists('utf_normalizer'))
{
include($phpbb_root_path . 'includes/utf/utf_normalizer.' . $phpEx);
}
+ if (!function_exists('utf8_strlen'))
+ {
+ include($phpbb_root_path . 'includes/utf/utf_tools.' . $phpEx);
+ }
+
$error = false;
}
@@ -865,14 +873,6 @@ class fulltext_native_improved extends search_backend
$isset_min = $min - 1;
/**
- * Load the UTF tools
- */
- if (!function_exists('utf8_strlen'))
- {
- include($phpbb_root_path . 'includes/utf/utf_tools.' . $phpEx);
- }
-
- /**
* Clean up the string, remove HTML tags, remove BBCodes
*/
$word = strtok($this->cleanup(preg_replace($match, ' ', strip_tags($text)), '', $user->lang['ENCODING']), ' ');
@@ -1259,11 +1259,6 @@ class fulltext_native_improved extends search_backend
$encoding = strtolower($encoding);
if ($encoding != 'utf-8')
{
- if (!function_exists('utf8_recode'))
- {
- include($phpbb_root_path . 'includes/utf/utf_tools.' . $phpEx);
- }
-
$text = utf8_recode($text, $encoding);
}
@@ -1277,7 +1272,7 @@ class fulltext_native_improved extends search_backend
/**
* Replace HTML entities and NCRs
*/
- $text = html_entity_decode($this->decode_ncr($text), ENT_QUOTES);
+ $text = html_entity_decode(utf8_decode_ncr($text), ENT_QUOTES);
/**
* Load the UTF-8 normalizer
@@ -1482,60 +1477,6 @@ class fulltext_native_improved extends search_backend
}
/**
- * Convert Numeric Character References to UTF-8 chars
- *
- * Notes:
- * - we do not convert NCRs recursively, if you pass &#38;#38; it will return &#38;
- * - we DO NOT check for the existence of the Unicode characters, therefore an entity
- * may be converted to an inexistent codepoint
- *
- * @param string $text String to convert, encoded in UTF-8 (no normal form required)
- * @return string UTF-8 string where NCRs have been replaced with the actual chars
- */
- function decode_ncr($text)
- {
- /**
- * @todo replace me with preg_replace_callback() or a loop
- */
- return preg_replace(
- '/&#([0-9]{1,6});/e',
- "\$this->cp_to_utf(\$1)",
-
- preg_replace(
- '/&#x([0-9A-F]{1,5});/ie',
- "\$this->cp_to_utf(hexdec('\$1'))",
- $text
- )
- );
- }
-
- /**
- * Convert a codepoint to a UTF-8 char
- *
- * @param integer $cp Unicode codepoint
- * @return string UTF-8 string
- */
- function cp_to_utf($cp)
- {
- if ($cp > 0xFFFF)
- {
- return chr(0xF0 | ($cp >> 18)) . chr(0x80 | (($cp >> 12) & 0x3F)) . chr(0x80 | (($cp >> 6) & 0x3F)) . chr(0x80 | ($cp & 0x3F));
- }
- elseif ($cp > 0x7FF)
- {
- return chr(0xE0 | ($cp >> 12)) . chr(0x80 | (($cp >> 6) & 0x3F)) . chr(0x80 | ($cp & 0x3F));
- }
- elseif ($cp > 0x7F)
- {
- return chr(0xC0 | ($cp >> 6)) . chr(0x80 | ($cp & 0x3F));
- }
- else
- {
- return chr($cp);
- }
- }
-
- /**
* Returns a list of options for the ACP to display
*/
function acp()