aboutsummaryrefslogtreecommitdiffstats
path: root/phpBB/includes/utf/utf_tools.php
diff options
context:
space:
mode:
Diffstat (limited to 'phpBB/includes/utf/utf_tools.php')
-rw-r--r--phpBB/includes/utf/utf_tools.php229
1 files changed, 2 insertions, 227 deletions
diff --git a/phpBB/includes/utf/utf_tools.php b/phpBB/includes/utf/utf_tools.php
index b30f28aac9..e2ef3edd0e 100644
--- a/phpBB/includes/utf/utf_tools.php
+++ b/phpBB/includes/utf/utf_tools.php
@@ -688,231 +688,6 @@ function utf8_ucfirst($str)
}
/**
-* Recode a string to UTF-8
-*
-* If the encoding is not supported, the string is returned as-is
-*
-* @param string $string Original string
-* @param string $encoding Original encoding (lowered)
-* @return string The string, encoded in UTF-8
-*/
-function utf8_recode($string, $encoding)
-{
- $encoding = strtolower($encoding);
-
- if ($encoding == 'utf-8' || !is_string($string) || empty($string))
- {
- return $string;
- }
-
- // we force iso-8859-1 to be cp1252
- if ($encoding == 'iso-8859-1')
- {
- $encoding = 'cp1252';
- }
- // convert iso-8859-8-i to iso-8859-8
- else if ($encoding == 'iso-8859-8-i')
- {
- $encoding = 'iso-8859-8';
- $string = hebrev($string);
- }
-
- // First, try iconv()
- if (function_exists('iconv'))
- {
- $ret = @iconv($encoding, 'utf-8', $string);
-
- if (!empty($ret))
- {
- return $ret;
- }
- }
-
- // Try the mb_string extension
- if (function_exists('mb_convert_encoding'))
- {
- // mbstring is nasty on PHP4, we must make *sure* that we send a good encoding
- switch ($encoding)
- {
- case 'iso-8859-1':
- case 'iso-8859-2':
- case 'iso-8859-4':
- case 'iso-8859-7':
- case 'iso-8859-9':
- case 'iso-8859-15':
- case 'windows-1251':
- case 'windows-1252':
- case 'cp1252':
- case 'shift_jis':
- case 'euc-kr':
- case 'big5':
- case 'gb2312':
- $ret = @mb_convert_encoding($string, 'utf-8', $encoding);
-
- if (!empty($ret))
- {
- return $ret;
- }
- }
- }
-
- // Try the recode extension
- if (function_exists('recode_string'))
- {
- $ret = @recode_string($encoding . '..utf-8', $string);
-
- if (!empty($ret))
- {
- return $ret;
- }
- }
-
- // If nothing works, check if we have a custom transcoder available
- if (!preg_match('#^[a-z0-9_ \\-]+$#', $encoding))
- {
- // Make sure the encoding name is alphanumeric, we don't want it to be abused into loading arbitrary files
- trigger_error('Unknown encoding: ' . $encoding, E_USER_ERROR);
- }
-
- global $phpbb_root_path, $phpEx;
-
- // iso-8859-* character encoding
- if (preg_match('/iso[_ -]?8859[_ -]?(\\d+)/', $encoding, $array))
- {
- switch ($array[1])
- {
- case '1':
- case '2':
- case '4':
- case '7':
- case '8':
- case '9':
- case '15':
- if (!function_exists('iso_8859_' . $array[1]))
- {
- if (!file_exists($phpbb_root_path . 'includes/utf/data/recode_basic.' . $phpEx))
- {
- trigger_error('Basic reencoder file is missing', E_USER_ERROR);
- }
- include($phpbb_root_path . 'includes/utf/data/recode_basic.' . $phpEx);
- }
- return call_user_func('iso_8859_' . $array[1], $string);
- break;
-
- default:
- trigger_error('Unknown encoding: ' . $encoding, E_USER_ERROR);
- break;
- }
- }
-
- // CP/WIN character encoding
- if (preg_match('/(?:cp|windows)[_\- ]?(\\d+)/', $encoding, $array))
- {
- switch ($array[1])
- {
- case '932':
- break;
- case '1250':
- case '1251':
- case '1252':
- case '1254':
- case '1255':
- case '1256':
- case '1257':
- case '874':
- if (!function_exists('cp' . $array[1]))
- {
- if (!file_exists($phpbb_root_path . 'includes/utf/data/recode_basic.' . $phpEx))
- {
- trigger_error('Basic reencoder file is missing', E_USER_ERROR);
- }
- include($phpbb_root_path . 'includes/utf/data/recode_basic.' . $phpEx);
- }
- return call_user_func('cp' . $array[1], $string);
- break;
-
- default:
- trigger_error('Unknown encoding: ' . $encoding, E_USER_ERROR);
- break;
- }
- }
-
- // TIS-620
- if (preg_match('/tis[_ -]?620/', $encoding))
- {
- if (!function_exists('tis_620'))
- {
- if (!file_exists($phpbb_root_path . 'includes/utf/data/recode_basic.' . $phpEx))
- {
- trigger_error('Basic reencoder file is missing', E_USER_ERROR);
- }
- include($phpbb_root_path . 'includes/utf/data/recode_basic.' . $phpEx);
- }
- return tis_620($string);
- }
-
- // SJIS
- if (preg_match('/sjis(?:[_ -]?win)?|(?:cp|ibm)[_ -]?932|shift[_ -]?jis/', $encoding))
- {
- if (!function_exists('sjis'))
- {
- if (!file_exists($phpbb_root_path . 'includes/utf/data/recode_cjk.' . $phpEx))
- {
- trigger_error('CJK reencoder file is missing', E_USER_ERROR);
- }
- include($phpbb_root_path . 'includes/utf/data/recode_cjk.' . $phpEx);
- }
- return sjis($string);
- }
-
- // EUC_KR
- if (preg_match('/euc[_ -]?kr/', $encoding))
- {
- if (!function_exists('euc_kr'))
- {
- if (!file_exists($phpbb_root_path . 'includes/utf/data/recode_cjk.' . $phpEx))
- {
- trigger_error('CJK reencoder file is missing', E_USER_ERROR);
- }
- include($phpbb_root_path . 'includes/utf/data/recode_cjk.' . $phpEx);
- }
- return euc_kr($string);
- }
-
- // BIG-5
- if (preg_match('/big[_ -]?5/', $encoding))
- {
- if (!function_exists('big5'))
- {
- if (!file_exists($phpbb_root_path . 'includes/utf/data/recode_cjk.' . $phpEx))
- {
- trigger_error('CJK reencoder file is missing', E_USER_ERROR);
- }
- include($phpbb_root_path . 'includes/utf/data/recode_cjk.' . $phpEx);
- }
- return big5($string);
- }
-
- // GB2312
- if (preg_match('/gb[_ -]?2312/', $encoding))
- {
- if (!function_exists('gb2312'))
- {
- if (!file_exists($phpbb_root_path . 'includes/utf/data/recode_cjk.' . $phpEx))
- {
- trigger_error('CJK reencoder file is missing', E_USER_ERROR);
- }
- include($phpbb_root_path . 'includes/utf/data/recode_cjk.' . $phpEx);
- }
- return gb2312($string);
- }
-
- // Trigger an error?! Fow now just give bad data :-(
- trigger_error('Unknown encoding: ' . $encoding, E_USER_ERROR);
- //return $string; // use utf_normalizer::cleanup() ?
-}
-
-/**
* Replace all UTF-8 chars that are not in ASCII with their NCR
*
* @param string $text UTF-8 string in NFC
@@ -1827,7 +1602,7 @@ function utf8_clean_string($text)
$text = utf8_case_fold_nfkc($text);
$text = strtr($text, $homographs);
// Other control characters
- $text = preg_replace('#(?:[\x00-\x1F\x7F]+|(?:\xC2[\x80-\x9F])+)#', '', $text);
+ $text = preg_replace('#[\x00-\x1F\x7F]+|(?:\xC2[\x80-\x9F])+#', '', $text);
// we need to reduce multiple spaces to a single one
$text = preg_replace('# {2,}#', ' ', $text);
@@ -1861,7 +1636,7 @@ function utf8_convert_message($message)
}
// else we need to convert some part of the message
- return utf8_htmlspecialchars(utf8_recode($message, 'ISO-8859-1'));
+ return utf8_htmlspecialchars(utf8_encode($message));
}
/**