aboutsummaryrefslogtreecommitdiffstats
path: root/phpBB/includes/utf
diff options
context:
space:
mode:
authorLudovic Arnaud <ludovic_arnaud@users.sourceforge.net>2006-07-12 22:56:04 +0000
committerLudovic Arnaud <ludovic_arnaud@users.sourceforge.net>2006-07-12 22:56:04 +0000
commit44b78d7c8dcb8026d4bb7f3bca0c78e93027a2ac (patch)
tree1abfbfff271ed1e738b5145e9a231ff757e681b6 /phpBB/includes/utf
parent7768d67e226b2a5aef9b624bf8badad8e558cb87 (diff)
downloadforums-44b78d7c8dcb8026d4bb7f3bca0c78e93027a2ac.tar
forums-44b78d7c8dcb8026d4bb7f3bca0c78e93027a2ac.tar.gz
forums-44b78d7c8dcb8026d4bb7f3bca0c78e93027a2ac.tar.bz2
forums-44b78d7c8dcb8026d4bb7f3bca0c78e93027a2ac.tar.xz
forums-44b78d7c8dcb8026d4bb7f3bca0c78e93027a2ac.zip
Added: UTF-8 tools. Both functions are used by the "improved" search engine. We'll need utf8_strlen() if we go with UTF-8 in 3.2
TODO: native recoding engine, written in PHP git-svn-id: file:///svn/phpbb/trunk@6174 89ea8834-ac86-4346-8a33-228a782c2dd0
Diffstat (limited to 'phpBB/includes/utf')
-rw-r--r--phpBB/includes/utf/utf_tools.php130
1 files changed, 130 insertions, 0 deletions
diff --git a/phpBB/includes/utf/utf_tools.php b/phpBB/includes/utf/utf_tools.php
new file mode 100644
index 0000000000..4d8ba05340
--- /dev/null
+++ b/phpBB/includes/utf/utf_tools.php
@@ -0,0 +1,130 @@
+<?php
+/**
+*
+* @package phpBB3
+* @version $Id$
+* @copyright (c) 2005 phpBB Group
+* @license http://opensource.org/licenses/gpl-license.php GNU Public License
+*
+*/
+
+/**
+* UTF-8 tools
+*
+* Whenever possible, these functions will try to use PHP's built-in functions or
+* extensions, otherwise they will default to custom routines.
+*
+* If we go with UTF-8 in 3.2, we will also need a Unicode-aware replacement
+* to strtolower()
+*
+* @package phpBB3
+*/
+
+/**
+* Return the length (in characters) of a UTF-8 string
+*
+* @param string $text UTF-8 string
+* @return integer Length (in chars) of given string
+*/
+function utf8_strlen($text)
+{
+ if (function_exists('iconv_strlen'))
+ {
+ return iconv_strlen($text, 'utf-8');
+ }
+
+ if (function_exists('mb_strlen'))
+ {
+ return mb_strlen($text, 'utf-8');
+ }
+
+ return strlen(utf8_decode($text));
+}
+
+/**
+* Recode a string to UTF-8
+*
+* If the encoding is not supported, the string is returned as-is
+*
+* @param string $string Original string
+* @param string $encoding Original encoding
+* @return string The string, encoded in UTF-8
+*/
+function utf8_recode($string, $encoding)
+{
+ $encoding = strtolower($encoding);
+
+ if ($encoding == 'utf-8' || !is_string($string) || !isset($string[0]))
+ {
+ return $string;
+ }
+
+ /**
+ * PHP has a built-in function for encoding from iso-8859-1, let's use that
+ */
+ if ($encoding == 'iso-8859-1')
+ {
+ return utf8_encode($string);
+ }
+
+ /**
+ * First, try iconv()
+ */
+ if (function_exists('iconv'))
+ {
+ $ret = @iconv($encoding, 'utf-8', $string);
+
+ if (isset($ret[0]))
+ {
+ return $ret;
+ }
+ }
+
+ /**
+ * Try the mb_string extension
+ */
+ if (function_exists('mb_convert_encoding'))
+ {
+ $ret = @mb_convert_encoding($string, 'utf-8', $encoding);
+
+ if (isset($ret[0]))
+ {
+ return $ret;
+ }
+ }
+
+ /**
+ * Try the recode extension
+ */
+ if (function_exists('recode_string'))
+ {
+ $ret = @recode_string($encoding . '..utf-8', $string);
+
+ if (isset($ret[0]))
+ {
+ return $ret;
+ }
+ }
+
+ /**
+ * If nothing works, check if we have a custom transcoder available
+ */
+ if (!preg_match('#^[a-z0-9\\-]+$#', $encoding))
+ {
+ /**
+ * Make sure the encoding name is alphanumeric, we don't want it
+ * to be abused into loading arbitrary files
+ */
+ trigger_error('Unknown encoding: ' . $encoding);
+ }
+
+ global $phpbb_root_path;
+ if (!file_exists($phpbb_root_path . 'includes/utf/data/'))
+ {
+ return $string;
+ }
+
+ die('Finish me!! '.basename(__FILE__).' at line '.__LINE__);
+}
+
+?> \ No newline at end of file