diff options
Diffstat (limited to 'phpBB/includes/utf/utf_tools.php')
-rw-r--r-- | phpBB/includes/utf/utf_tools.php | 229 |
1 files changed, 2 insertions, 227 deletions
diff --git a/phpBB/includes/utf/utf_tools.php b/phpBB/includes/utf/utf_tools.php index b30f28aac9..e2ef3edd0e 100644 --- a/phpBB/includes/utf/utf_tools.php +++ b/phpBB/includes/utf/utf_tools.php @@ -688,231 +688,6 @@ function utf8_ucfirst($str) } /** -* Recode a string to UTF-8 -* -* If the encoding is not supported, the string is returned as-is -* -* @param string $string Original string -* @param string $encoding Original encoding (lowered) -* @return string The string, encoded in UTF-8 -*/ -function utf8_recode($string, $encoding) -{ - $encoding = strtolower($encoding); - - if ($encoding == 'utf-8' || !is_string($string) || empty($string)) - { - return $string; - } - - // we force iso-8859-1 to be cp1252 - if ($encoding == 'iso-8859-1') - { - $encoding = 'cp1252'; - } - // convert iso-8859-8-i to iso-8859-8 - else if ($encoding == 'iso-8859-8-i') - { - $encoding = 'iso-8859-8'; - $string = hebrev($string); - } - - // First, try iconv() - if (function_exists('iconv')) - { - $ret = @iconv($encoding, 'utf-8', $string); - - if (!empty($ret)) - { - return $ret; - } - } - - // Try the mb_string extension - if (function_exists('mb_convert_encoding')) - { - // mbstring is nasty on PHP4, we must make *sure* that we send a good encoding - switch ($encoding) - { - case 'iso-8859-1': - case 'iso-8859-2': - case 'iso-8859-4': - case 'iso-8859-7': - case 'iso-8859-9': - case 'iso-8859-15': - case 'windows-1251': - case 'windows-1252': - case 'cp1252': - case 'shift_jis': - case 'euc-kr': - case 'big5': - case 'gb2312': - $ret = @mb_convert_encoding($string, 'utf-8', $encoding); - - if (!empty($ret)) - { - return $ret; - } - } - } - - // Try the recode extension - if (function_exists('recode_string')) - { - $ret = @recode_string($encoding . '..utf-8', $string); - - if (!empty($ret)) - { - return $ret; - } - } - - // If nothing works, check if we have a custom transcoder available - if (!preg_match('#^[a-z0-9_ \\-]+$#', $encoding)) - { - // Make sure the encoding name is alphanumeric, we don't want it to be abused into loading arbitrary files - trigger_error('Unknown encoding: ' . $encoding, E_USER_ERROR); - } - - global $phpbb_root_path, $phpEx; - - // iso-8859-* character encoding - if (preg_match('/iso[_ -]?8859[_ -]?(\\d+)/', $encoding, $array)) - { - switch ($array[1]) - { - case '1': - case '2': - case '4': - case '7': - case '8': - case '9': - case '15': - if (!function_exists('iso_8859_' . $array[1])) - { - if (!file_exists($phpbb_root_path . 'includes/utf/data/recode_basic.' . $phpEx)) - { - trigger_error('Basic reencoder file is missing', E_USER_ERROR); - } - include($phpbb_root_path . 'includes/utf/data/recode_basic.' . $phpEx); - } - return call_user_func('iso_8859_' . $array[1], $string); - break; - - default: - trigger_error('Unknown encoding: ' . $encoding, E_USER_ERROR); - break; - } - } - - // CP/WIN character encoding - if (preg_match('/(?:cp|windows)[_\- ]?(\\d+)/', $encoding, $array)) - { - switch ($array[1]) - { - case '932': - break; - case '1250': - case '1251': - case '1252': - case '1254': - case '1255': - case '1256': - case '1257': - case '874': - if (!function_exists('cp' . $array[1])) - { - if (!file_exists($phpbb_root_path . 'includes/utf/data/recode_basic.' . $phpEx)) - { - trigger_error('Basic reencoder file is missing', E_USER_ERROR); - } - include($phpbb_root_path . 'includes/utf/data/recode_basic.' . $phpEx); - } - return call_user_func('cp' . $array[1], $string); - break; - - default: - trigger_error('Unknown encoding: ' . $encoding, E_USER_ERROR); - break; - } - } - - // TIS-620 - if (preg_match('/tis[_ -]?620/', $encoding)) - { - if (!function_exists('tis_620')) - { - if (!file_exists($phpbb_root_path . 'includes/utf/data/recode_basic.' . $phpEx)) - { - trigger_error('Basic reencoder file is missing', E_USER_ERROR); - } - include($phpbb_root_path . 'includes/utf/data/recode_basic.' . $phpEx); - } - return tis_620($string); - } - - // SJIS - if (preg_match('/sjis(?:[_ -]?win)?|(?:cp|ibm)[_ -]?932|shift[_ -]?jis/', $encoding)) - { - if (!function_exists('sjis')) - { - if (!file_exists($phpbb_root_path . 'includes/utf/data/recode_cjk.' . $phpEx)) - { - trigger_error('CJK reencoder file is missing', E_USER_ERROR); - } - include($phpbb_root_path . 'includes/utf/data/recode_cjk.' . $phpEx); - } - return sjis($string); - } - - // EUC_KR - if (preg_match('/euc[_ -]?kr/', $encoding)) - { - if (!function_exists('euc_kr')) - { - if (!file_exists($phpbb_root_path . 'includes/utf/data/recode_cjk.' . $phpEx)) - { - trigger_error('CJK reencoder file is missing', E_USER_ERROR); - } - include($phpbb_root_path . 'includes/utf/data/recode_cjk.' . $phpEx); - } - return euc_kr($string); - } - - // BIG-5 - if (preg_match('/big[_ -]?5/', $encoding)) - { - if (!function_exists('big5')) - { - if (!file_exists($phpbb_root_path . 'includes/utf/data/recode_cjk.' . $phpEx)) - { - trigger_error('CJK reencoder file is missing', E_USER_ERROR); - } - include($phpbb_root_path . 'includes/utf/data/recode_cjk.' . $phpEx); - } - return big5($string); - } - - // GB2312 - if (preg_match('/gb[_ -]?2312/', $encoding)) - { - if (!function_exists('gb2312')) - { - if (!file_exists($phpbb_root_path . 'includes/utf/data/recode_cjk.' . $phpEx)) - { - trigger_error('CJK reencoder file is missing', E_USER_ERROR); - } - include($phpbb_root_path . 'includes/utf/data/recode_cjk.' . $phpEx); - } - return gb2312($string); - } - - // Trigger an error?! Fow now just give bad data :-( - trigger_error('Unknown encoding: ' . $encoding, E_USER_ERROR); - //return $string; // use utf_normalizer::cleanup() ? -} - -/** * Replace all UTF-8 chars that are not in ASCII with their NCR * * @param string $text UTF-8 string in NFC @@ -1827,7 +1602,7 @@ function utf8_clean_string($text) $text = utf8_case_fold_nfkc($text); $text = strtr($text, $homographs); // Other control characters - $text = preg_replace('#(?:[\x00-\x1F\x7F]+|(?:\xC2[\x80-\x9F])+)#', '', $text); + $text = preg_replace('#[\x00-\x1F\x7F]+|(?:\xC2[\x80-\x9F])+#', '', $text); // we need to reduce multiple spaces to a single one $text = preg_replace('# {2,}#', ' ', $text); @@ -1861,7 +1636,7 @@ function utf8_convert_message($message) } // else we need to convert some part of the message - return utf8_htmlspecialchars(utf8_recode($message, 'ISO-8859-1')); + return utf8_htmlspecialchars(utf8_encode($message)); } /** |