diff options
author | Nils Adermann <naderman@naderman.de> | 2006-11-12 14:29:32 +0000 |
---|---|---|
committer | Nils Adermann <naderman@naderman.de> | 2006-11-12 14:29:32 +0000 |
commit | cf34efb06ce62407232d63dd4e73b8afc6e2a4ef (patch) | |
tree | 59501fb88bc314e1fd63c122990d35eb9dc96a7f /phpBB/includes/utf | |
parent | fa9d7e4ab47cf3655617a815adcfa369ae0e8706 (diff) | |
download | forums-cf34efb06ce62407232d63dd4e73b8afc6e2a4ef.tar forums-cf34efb06ce62407232d63dd4e73b8afc6e2a4ef.tar.gz forums-cf34efb06ce62407232d63dd4e73b8afc6e2a4ef.tar.bz2 forums-cf34efb06ce62407232d63dd4e73b8afc6e2a4ef.tar.xz forums-cf34efb06ce62407232d63dd4e73b8afc6e2a4ef.zip |
message
git-svn-id: file:///svn/phpbb/trunk@6569 89ea8834-ac86-4346-8a33-228a782c2dd0
Diffstat (limited to 'phpBB/includes/utf')
-rw-r--r-- | phpBB/includes/utf/utf_tools.php | 45 |
1 files changed, 42 insertions, 3 deletions
diff --git a/phpBB/includes/utf/utf_tools.php b/phpBB/includes/utf/utf_tools.php index d90590e813..b91fd51c20 100644 --- a/phpBB/includes/utf/utf_tools.php +++ b/phpBB/includes/utf/utf_tools.php @@ -930,15 +930,52 @@ function utf8_case_fold($text, $option = 'full') } /** -* @todo needs documenting +* A wrapper function for the normalizer which takes care of including the class if required and modifies the passed strings +* to be in NFC (Normalization Form Composition). +* +* @param mixed $strings Either an array of references to strings, a reference to an array of strings or a reference to a single string +*/ +function utf8_normalize_nfc($strings) +{ + if (!is_array($strings) || (sizeof($strings) > 0)) + { + if (!class_exists('utf_normalizer')) + { + global $phpbb_root_path, $phpEx; + include($phpbb_root_path . 'includes/utf/utf_normalizer.' . $phpEx); + } + + if (is_array($strings)) + { + foreach ($strings as $key => $string) + { + $strings[$key] = utf_normalizer::nfc($strings[$key]); + } + } + else + { + $strings = utf_normalizer::nfc($strings); + } + } +} + +/** +* This function is used to generate a "clean" version of a string. +* Clean means that it is a case insensitive form (case folding) and that it is normalized (NFC). +* Additionally a homographs of one character are transformed into one specific character (preferably ASCII +* if it is an ASCII character). * * Please be aware that if you change something within this function or within -* functions used here you need to rebuild/update the complete users table. +* functions used here you need to rebuild/update the username_clean column in the users table. And all other +* columns that store a clean string otherwise you will break this functionality. +* +* @param $text An unclean string, mabye user input (has to be valid UTF-8!) +* @return Cleaned up version of the input string */ function utf8_clean_string($text) { $text = utf8_case_fold($text); - + if (!class_exists('utf_normalizer')) { global $phpbb_root_path, $phpEx; @@ -963,6 +1000,8 @@ function utf8_clean_string($text) // greek "\xCE\xB1" => "\x61", "\xCE\xBF" => "\x6F", + // other + "\xC2\xA1" => "\x69", ); $text = strtr($text, $homographs); |