diff options
author | Ludovic Arnaud <ludovic_arnaud@users.sourceforge.net> | 2006-07-12 22:56:04 +0000 |
---|---|---|
committer | Ludovic Arnaud <ludovic_arnaud@users.sourceforge.net> | 2006-07-12 22:56:04 +0000 |
commit | 44b78d7c8dcb8026d4bb7f3bca0c78e93027a2ac (patch) | |
tree | 1abfbfff271ed1e738b5145e9a231ff757e681b6 /phpBB/includes/utf | |
parent | 7768d67e226b2a5aef9b624bf8badad8e558cb87 (diff) | |
download | forums-44b78d7c8dcb8026d4bb7f3bca0c78e93027a2ac.tar forums-44b78d7c8dcb8026d4bb7f3bca0c78e93027a2ac.tar.gz forums-44b78d7c8dcb8026d4bb7f3bca0c78e93027a2ac.tar.bz2 forums-44b78d7c8dcb8026d4bb7f3bca0c78e93027a2ac.tar.xz forums-44b78d7c8dcb8026d4bb7f3bca0c78e93027a2ac.zip |
Added: UTF-8 tools. Both functions are used by the "improved" search engine. We'll need utf8_strlen() if we go with UTF-8 in 3.2
TODO: native recoding engine, written in PHP
git-svn-id: file:///svn/phpbb/trunk@6174 89ea8834-ac86-4346-8a33-228a782c2dd0
Diffstat (limited to 'phpBB/includes/utf')
-rw-r--r-- | phpBB/includes/utf/utf_tools.php | 130 |
1 files changed, 130 insertions, 0 deletions
diff --git a/phpBB/includes/utf/utf_tools.php b/phpBB/includes/utf/utf_tools.php new file mode 100644 index 0000000000..4d8ba05340 --- /dev/null +++ b/phpBB/includes/utf/utf_tools.php @@ -0,0 +1,130 @@ +<?php +/** +* +* @package phpBB3 +* @version $Id$ +* @copyright (c) 2005 phpBB Group +* @license http://opensource.org/licenses/gpl-license.php GNU Public License +* +*/ + +/** +* UTF-8 tools +* +* Whenever possible, these functions will try to use PHP's built-in functions or +* extensions, otherwise they will default to custom routines. +* +* If we go with UTF-8 in 3.2, we will also need a Unicode-aware replacement +* to strtolower() +* +* @package phpBB3 +*/ + +/** +* Return the length (in characters) of a UTF-8 string +* +* @param string $text UTF-8 string +* @return integer Length (in chars) of given string +*/ +function utf8_strlen($text) +{ + if (function_exists('iconv_strlen')) + { + return iconv_strlen($text, 'utf-8'); + } + + if (function_exists('mb_strlen')) + { + return mb_strlen($text, 'utf-8'); + } + + return strlen(utf8_decode($text)); +} + +/** +* Recode a string to UTF-8 +* +* If the encoding is not supported, the string is returned as-is +* +* @param string $string Original string +* @param string $encoding Original encoding +* @return string The string, encoded in UTF-8 +*/ +function utf8_recode($string, $encoding) +{ + $encoding = strtolower($encoding); + + if ($encoding == 'utf-8' || !is_string($string) || !isset($string[0])) + { + return $string; + } + + /** + * PHP has a built-in function for encoding from iso-8859-1, let's use that + */ + if ($encoding == 'iso-8859-1') + { + return utf8_encode($string); + } + + /** + * First, try iconv() + */ + if (function_exists('iconv')) + { + $ret = @iconv($encoding, 'utf-8', $string); + + if (isset($ret[0])) + { + return $ret; + } + } + + /** + * Try the mb_string extension + */ + if (function_exists('mb_convert_encoding')) + { + $ret = @mb_convert_encoding($string, 'utf-8', $encoding); + + if (isset($ret[0])) + { + return $ret; + } + } + + /** + * Try the recode extension + */ + if (function_exists('recode_string')) + { + $ret = @recode_string($encoding . '..utf-8', $string); + + if (isset($ret[0])) + { + return $ret; + } + } + + /** + * If nothing works, check if we have a custom transcoder available + */ + if (!preg_match('#^[a-z0-9\\-]+$#', $encoding)) + { + /** + * Make sure the encoding name is alphanumeric, we don't want it + * to be abused into loading arbitrary files + */ + trigger_error('Unknown encoding: ' . $encoding); + } + + global $phpbb_root_path; + if (!file_exists($phpbb_root_path . 'includes/utf/data/')) + { + return $string; + } + + die('Finish me!! '.basename(__FILE__).' at line '.__LINE__); +} + +?>
\ No newline at end of file |