From 1d42d1b9817050974c8bc8b91bc34a6c3cfbfef8 Mon Sep 17 00:00:00 2001
From: Meik Sievertsen <acydburn@phpbb.com>
Date: Tue, 22 Aug 2006 21:26:06 +0000
Subject: some updates. Also adjusted the utf tools and normalizer more to our
 coding guidelines.

git-svn-id: file:///svn/phpbb/trunk@6312 89ea8834-ac86-4346-8a33-228a782c2dd0
---
 phpBB/includes/utf/utf_normalizer.php | 2665 ++++++++++++++-------------------
 phpBB/includes/utf/utf_tools.php      |   34 +-
 2 files changed, 1160 insertions(+), 1539 deletions(-)

(limited to 'phpBB/includes/utf')

diff --git a/phpBB/includes/utf/utf_normalizer.php b/phpBB/includes/utf/utf_normalizer.php
index 0b567fad6b..613a1098a7 100644
--- a/phpBB/includes/utf/utf_normalizer.php
+++ b/phpBB/includes/utf/utf_normalizer.php
@@ -28,1443 +28,1184 @@ define('UTF8_CJK_B_FIRST', "\xF0\xA0\x80\x80");
 define('UTF8_CJK_B_LAST', "\xF0\xAA\x9B\x96");
 
 
+// Wrapper for the utfnormal extension, ICU wrapper
 if (function_exists('utf8_normalize'))
 {
+	define('UNORM_NONE', 1);
+	define('UNORM_NFD',  2);
+	define('UNORM_NFKD', 3);
+	define('UNORM_NFC',  4);
+	define('UNORM_NFKC', 5);
+	define('UNORM_FCD',  6);
+	define('UNORM_DEFAULT', UNORM_NFC);
 
-////////////////////////////////////////////////////////////////////////////////
-//              Wrapper for the utfnormal extension, ICU wrapper              //
-////////////////////////////////////////////////////////////////////////////////
-
-define('UNORM_NONE', 1);
-define('UNORM_NFD',  2);
-define('UNORM_NFKD', 3);
-define('UNORM_NFC',  4);
-define('UNORM_NFKC', 5);
-define('UNORM_FCD',  6);
-define('UNORM_DEFAULT', UNORM_NFC);
-
-/**
-* utf_normalizer class for the utfnormal extension
-*
-* @ignore
-*/
-class utf_normalizer
-{
-	function cleanup($str)
+	/**
+	* utf_normalizer class for the utfnormal extension
+	*
+	* @ignore
+	*/
+	class utf_normalizer
 	{
-		/**
-		* The string below is the list of all autorized characters, sorted by
-		* frequency in latin text
-		*/
-		$pos = strspn(
-			$str,
-			"\x20\x65\x69\x61\x73\x6E\x74\x72\x6F\x6C\x75\x64\x5D\x5B\x63\x6D\x70\x27\x0A\x67\x7C\x68\x76\x2E\x66\x62\x2C\x3A\x3D\x2D\x71\x31\x30\x43\x32\x2A\x79\x78\x29\x28\x4C\x39\x41\x53\x2F\x50\x22\x45\x6A\x4D\x49\x6B\x33\x3E\x35\x54\x3C\x44\x34\x7D\x42\x7B\x38\x46\x77\x52\x36\x37\x55\x47\x4E\x3B\x4A\x7A\x56\x23\x48\x4F\x57\x5F\x26\x21\x4B\x3F\x58\x51\x25\x59\x5C\x09\x5A\x2B\x7E\x5E\x24\x40\x60\x7F\x0D"
-		);
-
-		if (!isset($str[$pos]))
+		function cleanup($str)
 		{
 			/**
-			* ASCII strings with no special chars return immediately
+			* The string below is the list of all autorized characters, sorted by
+			* frequency in latin text
 			*/
-			return $str;
-		}
+			$pos = strspn($str, "\x20\x65\x69\x61\x73\x6E\x74\x72\x6F\x6C\x75\x64\x5D\x5B\x63\x6D\x70\x27\x0A\x67\x7C\x68\x76\x2E\x66\x62\x2C\x3A\x3D\x2D\x71\x31\x30\x43\x32\x2A\x79\x78\x29\x28\x4C\x39\x41\x53\x2F\x50\x22\x45\x6A\x4D\x49\x6B\x33\x3E\x35\x54\x3C\x44\x34\x7D\x42\x7B\x38\x46\x77\x52\x36\x37\x55\x47\x4E\x3B\x4A\x7A\x56\x23\x48\x4F\x57\x5F\x26\x21\x4B\x3F\x58\x51\x25\x59\x5C\x09\x5A\x2B\x7E\x5E\x24\x40\x60\x7F\x0D");
 
-		/**
-		* Check if there is potentially a U+FFFE or U+FFFF char (UTF sequence
-		* 0xEFBFBE or 0xEFBFBF) and replace them
-		*
-		* Note: we start searching at position $pos
-		*/
-		if (is_int(strpos($str, "\xEF\xBF", $pos)))
-		{
-			$str = str_replace(
-				array("\xEF\xBF\xBE", "\xEF\xBF\xBF"),
-				array(UTF8_REPLACEMENT, UTF8_REPLACEMENT),
-				$str
+			if (!isset($str[$pos]))
+			{
+				// ASCII strings with no special chars return immediately
+				return $str;
+			}
+
+			// Check if there is potentially a U+FFFE or U+FFFF char (UTF sequence 0xEFBFBE or 0xEFBFBF) and replace them
+			// Note: we start searching at position $pos
+			if (is_int(strpos($str, "\xEF\xBF", $pos)))
+			{
+				$str = str_replace(
+					array("\xEF\xBF\xBE", "\xEF\xBF\xBF"),
+					array(UTF8_REPLACEMENT, UTF8_REPLACEMENT),
+					$str
+				);
+			}
+
+			// Replace any byte in the range 0x00..0x1F, except for \r, \n and \t
+			// We replace those characters with a 0xFF byte, which is illegal in
+			// UTF-8 and will in turn be replaced with a Unicode replacement char
+			$str = strtr(
+				$str,
+				"\x00\x01\x02\x03\x04\x05\x06\x07\x08\x0B\x0C\x0E\x0F\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1A\x1B\x1C\x1D\x1E\x1F",
+				"\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF"
 			);
-		}
 
-		/**
-		* Replace any byte in the range 0x00..0x1F, except for \r, \n and \t
-		*
-		* We replace those characters with a 0xFF byte, which is illegal in
-		* UTF-8 and will in turn be replaced with a Unicode replacement char
-		*/
-		$str = strtr(
-			$str,
-			"\x00\x01\x02\x03\x04\x05\x06\x07\x08\x0B\x0C\x0E\x0F\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1A\x1B\x1C\x1D\x1E\x1F",
-			"\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF"
-		);
+			// As per the original implementation, "the UnicodeString constructor fails if the string ends with a head byte".
+			// Therefore, if the string ends with a leading byte we replace it with 0xFF, which is illegal too and will be
+			// replaced with a Unicode replacement character
+			if (substr($str, -1) >= "\xC0")
+			{
+				$str[strlen($str) - 1] = "\xFF";
+			}
 
-		/**
-		* As per the original implementation, "the UnicodeString constructor fails
-		* if the string ends with a head byte". Therefore, if the string ends with
-		* a leading byte we replace it with 0xFF, which is illegal too and will be
-		* replaced with a Unicode replacement character
-		*/
-		if (substr($str, -1) >= "\xC0")
-		{
-			$str[strlen($str) - 1] = "\xFF";
+			return utf8_normalize($str, UNORM_NFC);
 		}
 
-		return utf8_normalize($str, UNORM_NFC);
-	}
-
-	function nfc($str)
-	{
-		return utf8_normalize($str, UNORM_NFC);
-	}
+		function nfc($str)
+		{
+			return utf8_normalize($str, UNORM_NFC);
+		}
 
-	function nfkc($str)
-	{
-		return utf8_normalize($str, UNORM_NFKC);
-	}
+		function nfkc($str)
+		{
+			return utf8_normalize($str, UNORM_NFKC);
+		}
 
-	function nfd($str)
-	{
-		return utf8_normalize($str, UNORM_NFD);
-	}
+		function nfd($str)
+		{
+			return utf8_normalize($str, UNORM_NFD);
+		}
 
-	function nfkd($str)
-	{
-		return utf8_normalize($str, UNORM_NFKD);
+		function nfkd($str)
+		{
+			return utf8_normalize($str, UNORM_NFKD);
+		}
 	}
-}
-
-////////////////////////////////////////////////////////////////////////////////
-//                           End of the ICU wrapper                           //
-////////////////////////////////////////////////////////////////////////////////
-
-
 }
 else
 {
+	// This block will NOT be loaded if the utfnormal extension is
+
+	// Unset global variables
+	unset($GLOBALS['utf_jamo_index'], $GLOBALS['utf_jamo_type'], $GLOBALS['utf_nfc_qc'], $GLOBALS['utf_combining_class'], $GLOBALS['utf_canonical_comp'], $GLOBALS['utf_canonical_decomp'], $GLOBALS['utf_nfkc_qc'], $GLOBALS['utf_compatibility_decomp']);
+
+	// NFC_QC and NFKC_QC values
+	define('UNICODE_QC_MAYBE', 0);
+	define('UNICODE_QC_NO', 1);
+
+	// Contains all the ASCII characters appearing in UTF-8, sorted by frequency
+	define('UTF8_ASCII_RANGE', "\x20\x65\x69\x61\x73\x6E\x74\x72\x6F\x6C\x75\x64\x5D\x5B\x63\x6D\x70\x27\x0A\x67\x7C\x68\x76\x2E\x66\x62\x2C\x3A\x3D\x2D\x71\x31\x30\x43\x32\x2A\x79\x78\x29\x28\x4C\x39\x41\x53\x2F\x50\x22\x45\x6A\x4D\x49\x6B\x33\x3E\x35\x54\x3C\x44\x34\x7D\x42\x7B\x38\x46\x77\x52\x36\x37\x55\x47\x4E\x3B\x4A\x7A\x56\x23\x48\x4F\x57\x5F\x26\x21\x4B\x3F\x58\x51\x25\x59\x5C\x09\x5A\x2B\x7E\x5E\x24\x40\x60\x7F\x00\x01\x02\x03\x04\x05\x06\x07\x08\x0B\x0C\x0D\x0E\x0F\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1A\x1B\x1C\x1D\x1E\x1F");
+
+	// Contains all the tail bytes that can appear in the composition of a UTF-8 char
+	define('UTF8_TRAILING_BYTES', "\xA9\xA0\xA8\x80\xAA\x99\xA7\xBB\xAB\x89\x94\x82\xB4\xA2\xAE\x83\xB0\xB9\xB8\x93\xAF\xBC\xB3\x81\xA4\xB2\x9C\xA1\xB5\xBE\xBD\xBA\x98\xAD\xB1\x84\x95\xA6\xB6\x88\x8D\x90\xB7\xBF\x92\x85\xA5\x97\x8C\x86\xA3\x8E\x9F\x8F\x87\x91\x9D\xAC\x9E\x8B\x96\x9B\x8A\x9A");
+
+	// Constants used by the Hangul [de]composition algorithms
+	define('UNICODE_HANGUL_SBASE', 0xAC00);
+	define('UNICODE_HANGUL_LBASE', 0x1100);
+	define('UNICODE_HANGUL_VBASE', 0x1161);
+	define('UNICODE_HANGUL_TBASE', 0x11A7);
+	define('UNICODE_HANGUL_SCOUNT', 11172);
+	define('UNICODE_HANGUL_LCOUNT', 19);
+	define('UNICODE_HANGUL_VCOUNT', 21);
+	define('UNICODE_HANGUL_TCOUNT', 28);
+	define('UNICODE_HANGUL_NCOUNT', 588);
+	define('UNICODE_JAMO_L', 0);
+	define('UNICODE_JAMO_V', 1);
+	define('UNICODE_JAMO_T', 2);
 
-
-////////////////////////////////////////////////////////////////////////////////
-//        This block will NOT be loaded if the utfnormal extension is         //
-////////////////////////////////////////////////////////////////////////////////
-
-/**
-* Unset global variables
-*/
-unset($GLOBALS['utf_jamo_index'], $GLOBALS['utf_jamo_type'], $GLOBALS['utf_nfc_qc'], $GLOBALS['utf_combining_class'], $GLOBALS['utf_canonical_comp'], $GLOBALS['utf_canonical_decomp'], $GLOBALS['utf_nfkc_qc'], $GLOBALS['utf_compatibility_decomp']);
-
-/**
-* NFC_QC and NFKC_QC values
-*/
-define('UNICODE_QC_MAYBE', 0);
-define('UNICODE_QC_NO', 1);
-
-/**
-* Contains all the ASCII characters appearing in UTF-8, sorted by frequency
-*/
-define('UTF8_ASCII_RANGE', "\x20\x65\x69\x61\x73\x6E\x74\x72\x6F\x6C\x75\x64\x5D\x5B\x63\x6D\x70\x27\x0A\x67\x7C\x68\x76\x2E\x66\x62\x2C\x3A\x3D\x2D\x71\x31\x30\x43\x32\x2A\x79\x78\x29\x28\x4C\x39\x41\x53\x2F\x50\x22\x45\x6A\x4D\x49\x6B\x33\x3E\x35\x54\x3C\x44\x34\x7D\x42\x7B\x38\x46\x77\x52\x36\x37\x55\x47\x4E\x3B\x4A\x7A\x56\x23\x48\x4F\x57\x5F\x26\x21\x4B\x3F\x58\x51\x25\x59\x5C\x09\x5A\x2B\x7E\x5E\x24\x40\x60\x7F\x00\x01\x02\x03\x04\x05\x06\x07\x08\x0B\x0C\x0D\x0E\x0F\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1A\x1B\x1C\x1D\x1E\x1F");
-
-/**
-* Contains all the tail bytes that can appear in the composition of a UTF-8 char
-*/
-define('UTF8_TRAILING_BYTES', "\xA9\xA0\xA8\x80\xAA\x99\xA7\xBB\xAB\x89\x94\x82\xB4\xA2\xAE\x83\xB0\xB9\xB8\x93\xAF\xBC\xB3\x81\xA4\xB2\x9C\xA1\xB5\xBE\xBD\xBA\x98\xAD\xB1\x84\x95\xA6\xB6\x88\x8D\x90\xB7\xBF\x92\x85\xA5\x97\x8C\x86\xA3\x8E\x9F\x8F\x87\x91\x9D\xAC\x9E\x8B\x96\x9B\x8A\x9A");
-
-/**
-* Constants used by the Hangul [de]composition algorithms
-*/
-define('UNICODE_HANGUL_SBASE', 0xAC00);
-define('UNICODE_HANGUL_LBASE', 0x1100);
-define('UNICODE_HANGUL_VBASE', 0x1161);
-define('UNICODE_HANGUL_TBASE', 0x11A7);
-define('UNICODE_HANGUL_SCOUNT', 11172);
-define('UNICODE_HANGUL_LCOUNT', 19);
-define('UNICODE_HANGUL_VCOUNT', 21);
-define('UNICODE_HANGUL_TCOUNT', 28);
-define('UNICODE_HANGUL_NCOUNT', 588);
-define('UNICODE_JAMO_L', 0);
-define('UNICODE_JAMO_V', 1);
-define('UNICODE_JAMO_T', 2);
-
-/**
-* Unicode normalization routines
-*
-* @package		phpBB3
-*/
-class utf_normalizer
-{
 	/**
-	* Validate, cleanup and normalize a string
-	*
-	* The ultimate convenience function! Clean up invalid UTF-8 sequences,
-	* and convert to Normal Form C, canonical composition.
+	* Unicode normalization routines
 	*
-	* @param	string	$str	The dirty string
-	* @return	string			The same string, all shiny and cleaned-up
+	* @package phpBB3
 	*/
-	function cleanup($str)
+	class utf_normalizer
 	{
 		/**
-		* The string below is the list of all autorized characters, sorted by
-		* frequency in latin text
+		* Validate, cleanup and normalize a string
+		*
+		* The ultimate convenience function! Clean up invalid UTF-8 sequences,
+		* and convert to Normal Form C, canonical composition.
+		*
+		* @param	string	$str	The dirty string
+		* @return	string			The same string, all shiny and cleaned-up
 		*/
-		$pos = strspn($str, "\x20\x65\x69\x61\x73\x6E\x74\x72\x6F\x6C\x75\x64\x5D\x5B\x63\x6D\x70\x27\x0A\x67\x7C\x68\x76\x2E\x66\x62\x2C\x3A\x3D\x2D\x71\x31\x30\x43\x32\x2A\x79\x78\x29\x28\x4C\x39\x41\x53\x2F\x50\x22\x45\x6A\x4D\x49\x6B\x33\x3E\x35\x54\x3C\x44\x34\x7D\x42\x7B\x38\x46\x77\x52\x36\x37\x55\x47\x4E\x3B\x4A\x7A\x56\x23\x48\x4F\x57\x5F\x26\x21\x4B\x3F\x58\x51\x25\x59\x5C\x09\x5A\x2B\x7E\x5E\x24\x40\x60\x7F\x0D");
-		$len = strlen($str);
-
-		if ($pos == $len)
+		function cleanup($str)
 		{
-			/**
-			* ASCII strings with no special chars return immediately
-			*/
-			return $str;
-		}
+			// The string below is the list of all autorized characters, sorted by frequency in latin text
+			$pos = strspn($str, "\x20\x65\x69\x61\x73\x6E\x74\x72\x6F\x6C\x75\x64\x5D\x5B\x63\x6D\x70\x27\x0A\x67\x7C\x68\x76\x2E\x66\x62\x2C\x3A\x3D\x2D\x71\x31\x30\x43\x32\x2A\x79\x78\x29\x28\x4C\x39\x41\x53\x2F\x50\x22\x45\x6A\x4D\x49\x6B\x33\x3E\x35\x54\x3C\x44\x34\x7D\x42\x7B\x38\x46\x77\x52\x36\x37\x55\x47\x4E\x3B\x4A\x7A\x56\x23\x48\x4F\x57\x5F\x26\x21\x4B\x3F\x58\x51\x25\x59\x5C\x09\x5A\x2B\x7E\x5E\x24\x40\x60\x7F\x0D");
+			$len = strlen($str);
 
-		/**
-		* Note: we do not check for $GLOBALS['utf_canonical_decomp']. It is assumed
-		* they are always loaded together
-		*/
-		if (!isset($GLOBALS['utf_nfc_qc']))
-		{
-			global $phpbb_root_path, $phpEx;
-			include($phpbb_root_path . 'includes/utf/data/utf_nfc_qc.' . $phpEx);
+			if ($pos == $len)
+			{
+				// ASCII strings with no special chars return immediately
+				return $str;
+			}
+
+			// Note: we do not check for $GLOBALS['utf_canonical_decomp']. It is assumed they are always loaded together
+			if (!isset($GLOBALS['utf_nfc_qc']))
+			{
+				global $phpbb_root_path, $phpEx;
+				include($phpbb_root_path . 'includes/utf/data/utf_nfc_qc.' . $phpEx);
+			}
+
+			// Replace any byte in the range 0x00..0x1F, except for \r, \n and \t
+			// We replace those characters with a 0xFF byte, which is illegal in UTF-8 and will in turn be replaced with a UTF replacement char
+			return utf_normalizer::recompose(
+				strtr(
+					$str,
+					"\x00\x01\x02\x03\x04\x05\x06\x07\x08\x0B\x0C\x0E\x0F\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1A\x1B\x1C\x1D\x1E\x1F",
+					"\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF"
+				),
+				$pos, $len, $GLOBALS['utf_nfc_qc'], $GLOBALS['utf_canonical_decomp']
+			);
 		}
 
 		/**
-		* Replace any byte in the range 0x00..0x1F, except for \r, \n and \t
+		* Validate and normalize a UTF string to NFC
 		*
-		* We replace those characters with a 0xFF byte, which is illegal in
-		* UTF-8 and will in turn be replaced with a UTF replacement char
+		* @param	string	$str	Unchecked UTF string
+		* @return	string			The string, validated and in normal form
 		*/
-		return utf_normalizer::recompose(
-			strtr(
-				$str,
-				"\x00\x01\x02\x03\x04\x05\x06\x07\x08\x0B\x0C\x0E\x0F\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1A\x1B\x1C\x1D\x1E\x1F",
-				"\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF"
-			),
-			$pos, $len, $GLOBALS['utf_nfc_qc'], $GLOBALS['utf_canonical_decomp']
-		);
-	}
-
-	/**
-	* Validate and normalize a UTF string to NFC
-	*
-	* @param	string	$str	Unchecked UTF string
-	* @return	string			The string, validated and in normal form
-	*/
-	function nfc($str)
-	{
-		$pos = strspn($str, UTF8_ASCII_RANGE);
-		$len = strlen($str);
-
-		if ($pos == $len)
-		{
-			/**
-			* ASCII strings return immediately
-			*/
-			return $str;
-		}
-
-		if (!isset($GLOBALS['utf_nfc_qc']))
+		function nfc($str)
 		{
-			global $phpbb_root_path, $phpEx;
-			include($phpbb_root_path . 'includes/utf/data/utf_nfc_qc.' . $phpEx);
-		}
+			$pos = strspn($str, UTF8_ASCII_RANGE);
+			$len = strlen($str);
 
-		return utf_normalizer::recompose($str, $pos, $len, $GLOBALS['utf_nfc_qc'], $GLOBALS['utf_canonical_decomp']);
-	}
+			if ($pos == $len)
+			{
+				// ASCII strings return immediately
+				return $str;
+			}
 
-	/**
-	* Validate and normalize a UTF string to NFKC
-	*
-	* @param	string	$str	Unchecked UTF string
-	* @return	string			The string, validated and in normal form
-	*/
-	function nfkc($str)
-	{
-		$pos = strspn($str, UTF8_ASCII_RANGE);
-		$len = strlen($str);
+			if (!isset($GLOBALS['utf_nfc_qc']))
+			{
+				global $phpbb_root_path, $phpEx;
+				include($phpbb_root_path . 'includes/utf/data/utf_nfc_qc.' . $phpEx);
+			}
 
-		if ($pos == $len)
-		{
-			/**
-			* ASCII strings return immediately
-			*/
-			return $str;
+			return utf_normalizer::recompose($str, $pos, $len, $GLOBALS['utf_nfc_qc'], $GLOBALS['utf_canonical_decomp']);
 		}
 
-		if (!isset($GLOBALS['utf_nfkc_qc']))
-		{
-			global $phpbb_root_path, $phpEx;
-			include($phpbb_root_path . 'includes/utf/data/utf_nfkc_qc.' . $phpEx);
-		}
-		if (!isset($GLOBALS['utf_canonical_comp']))
+		/**
+		* Validate and normalize a UTF string to NFKC
+		*
+		* @param	string	$str	Unchecked UTF string
+		* @return	string			The string, validated and in normal form
+		*/
+		function nfkc($str)
 		{
-			global $phpbb_root_path, $phpEx;
-			include($phpbb_root_path . 'includes/utf/data/utf_canonical_comp.' . $phpEx);
-		}
+			$pos = strspn($str, UTF8_ASCII_RANGE);
+			$len = strlen($str);
 
-		return utf_normalizer::recompose($str, $pos, $len, $GLOBALS['utf_nfkc_qc'], $GLOBALS['utf_compatibility_decomp']);
-	}
+			if ($pos == $len)
+			{
+				// ASCII strings return immediately
+				return $str;
+			}
 
-	/**
-	* Validate and normalize a UTF string to NFD
-	*
-	* @param	string	$str	Unchecked UTF string
-	* @return	string			The string, validated and in normal form
-	*/
-	function nfd($str)
-	{
-		$pos = strspn($str, UTF8_ASCII_RANGE);
-		$len = strlen($str);
+			if (!isset($GLOBALS['utf_nfkc_qc']))
+			{
+				global $phpbb_root_path, $phpEx;
+				include($phpbb_root_path . 'includes/utf/data/utf_nfkc_qc.' . $phpEx);
+			}
 
-		if ($pos == $len)
-		{
-			/**
-			* ASCII strings return immediately
-			*/
-			return $str;
+			if (!isset($GLOBALS['utf_canonical_comp']))
+			{
+				global $phpbb_root_path, $phpEx;
+				include($phpbb_root_path . 'includes/utf/data/utf_canonical_comp.' . $phpEx);
+			}
+
+			return utf_normalizer::recompose($str, $pos, $len, $GLOBALS['utf_nfkc_qc'], $GLOBALS['utf_compatibility_decomp']);
 		}
 
-		if (!isset($GLOBALS['utf_canonical_decomp']))
+		/**
+		* Validate and normalize a UTF string to NFD
+		*
+		* @param	string	$str	Unchecked UTF string
+		* @return	string			The string, validated and in normal form
+		*/
+		function nfd($str)
 		{
-			global $phpbb_root_path, $phpEx;
-			include($phpbb_root_path . 'includes/utf/data/utf_canonical_decomp.' . $phpEx);
-		}
+			$pos = strspn($str, UTF8_ASCII_RANGE);
+			$len = strlen($str);
 
-		return utf_normalizer::decompose($str, $pos, $len, $GLOBALS['utf_canonical_decomp']);
-	}
+			if ($pos == $len)
+			{
+				// ASCII strings return immediately
+				return $str;
+			}
 
-	/**
-	* Validate and normalize a UTF string to NFKD
-	*
-	* @param	string	$str	Unchecked UTF string
-	* @return	string			The string, validated and in normal form
-	*/
-	function nfkd($str)
-	{
-		$pos = strspn($str, UTF8_ASCII_RANGE);
-		$len = strlen($str);
+			if (!isset($GLOBALS['utf_canonical_decomp']))
+			{
+				global $phpbb_root_path, $phpEx;
+				include($phpbb_root_path . 'includes/utf/data/utf_canonical_decomp.' . $phpEx);
+			}
 
-		if ($pos == $len)
-		{
-			/**
-			* ASCII strings return immediately
-			*/
-			return $str;
+			return utf_normalizer::decompose($str, $pos, $len, $GLOBALS['utf_canonical_decomp']);
 		}
 
-		if (!isset($GLOBALS['utf_compatibility_decomp']))
+		/**
+		* Validate and normalize a UTF string to NFKD
+		*
+		* @param	string	$str	Unchecked UTF string
+		* @return	string			The string, validated and in normal form
+		*/
+		function nfkd($str)
 		{
-			global $phpbb_root_path, $phpEx;
-			include($phpbb_root_path . 'includes/utf/data/utf_compatibility_decomp.' . $phpEx);
-		}
+			$pos = strspn($str, UTF8_ASCII_RANGE);
+			$len = strlen($str);
 
-		return utf_normalizer::decompose($str, $pos, $len, $GLOBALS['utf_compatibility_decomp']);
-	}
+			if ($pos == $len)
+			{
+				// ASCII strings return immediately
+				return $str;
+			}
 
+			if (!isset($GLOBALS['utf_compatibility_decomp']))
+			{
+				global $phpbb_root_path, $phpEx;
+				include($phpbb_root_path . 'includes/utf/data/utf_compatibility_decomp.' . $phpEx);
+			}
 
-	////////////////////////////////////////////////////////////////////////////
-	//                           Internal functions                           //
-	////////////////////////////////////////////////////////////////////////////
+			return utf_normalizer::decompose($str, $pos, $len, $GLOBALS['utf_compatibility_decomp']);
+		}
 
-	/**
-	* Recompose a UTF string
-	*
-	* @param	string	$str		Unchecked UTF string
-	* @param	integer	$pos		Position of the first UTF char (in bytes)
-	* @param	integer	$len		Length of the string (in bytes)
-	* @param	array	$qc			Quick-check array, passed by reference but never modified
-	* @param	array	$decomp_map	Decomposition mapping, passed by reference but never modified
-	* @return	string				The string, validated and recomposed
-	*
-	* @access	private
-	*/
-	function recompose($str, $pos, $len, &$qc, &$decomp_map)
-	{
-		global $utf_combining_class, $utf_canonical_comp, $utf_jamo_type, $utf_jamo_index;
 
 		/**
-		* Load some commonly-used tables
+		* Recompose a UTF string
+		*
+		* @param	string	$str		Unchecked UTF string
+		* @param	integer	$pos		Position of the first UTF char (in bytes)
+		* @param	integer	$len		Length of the string (in bytes)
+		* @param	array	$qc			Quick-check array, passed by reference but never modified
+		* @param	array	$decomp_map	Decomposition mapping, passed by reference but never modified
+		* @return	string				The string, validated and recomposed
+		*
+		* @access	private
 		*/
-		if (!isset($utf_jamo_index, $utf_jamo_type, $utf_combining_class))
+		function recompose($str, $pos, $len, &$qc, &$decomp_map)
 		{
-			global $phpbb_root_path;
-			include($phpbb_root_path . 'includes/utf/data/utf_normalizer_common.php');
-		}
+			global $utf_combining_class, $utf_canonical_comp, $utf_jamo_type, $utf_jamo_index;
 
-		/**
-		* Buffer the last ASCII char before the UTF-8 stuff if applicable
-		*/
-		$tmp = '';
-		$i = $tmp_pos = $last_cc = 0;
+			// Load some commonly-used tables
+			if (!isset($utf_jamo_index, $utf_jamo_type, $utf_combining_class))
+			{
+				global $phpbb_root_path;
+				include($phpbb_root_path . 'includes/utf/data/utf_normalizer_common.php');
+			}
 
-		if ($pos)
-		{
-			$buffer = array(++$i => $str[$pos - 1]);
-		}
-		else
-		{
-			$buffer = array();
-		}
+			// Buffer the last ASCII char before the UTF-8 stuff if applicable
+			$tmp = '';
+			$i = $tmp_pos = $last_cc = 0;
 
-		/**
-		* UTF char length array
-		*
-		* This array is used to determine the length of a UTF character. Be $c the
-		* result of ($str[$pos] & "\xF0") --where $str is the string we're operating
-		* on and $pos the position of the cursor--, if $utf_len_mask[$c] does not
-		* exist, the byte is an ASCII char. Otherwise, if $utf_len_mask[$c] is greater
-		* than 0, we have a the leading byte of a multibyte character whose length is
-		* $utf_len_mask[$c] and if it is equal to 0, the byte is a trailing byte.
-		*/
-		$utf_len_mask = array(
-			/**
-			* Leading bytes masks
-			*/
-			"\xC0" => 2, "\xD0" => 2, "\xE0" => 3, "\xF0" => 4,
+			if ($pos)
+			{
+				$buffer = array(++$i => $str[$pos - 1]);
+			}
+			else
+			{
+				$buffer = array();
+			}
 
-			/**
-			* Trailing bytes masks
-			*/
-			"\x80" => 0, "\x90" => 0, "\xA0" => 0, "\xB0" => 0
-		);
-
-		$extra_check = array(
-			"\xED"=>1, "\xEF"=>1, "\xC0"=>1, "\xC1"=>1, "\xE0"=>1, "\xF0"=>1,
-			"\xF4"=>1, "\xF5"=>1, "\xF6"=>1, "\xF7"=>1, "\xF8"=>1, "\xF9"=>1,
-			"\xFA"=>1, "\xFB"=>1, "\xFC"=>1, "\xFD"=>1, "\xFE"=>1, "\xFF"=>1
-		);
-
-		$utf_validation_mask = array(
-			2	=>	"\xE0\xC0",
-			3	=>	"\xF0\xC0\xC0",
-			4	=>	"\xF8\xC0\xC0\xC0"
-		);
-
-		$utf_validation_check = array(
-			2	=>	"\xC0\x80",
-			3	=>	"\xE0\x80\x80",
-			4	=>	"\xF0\x80\x80\x80"
-		);
-
-		////////////////////////////////////////////////////////////////////////
-		//                             Main loop                              //
-		////////////////////////////////////////////////////////////////////////
-
-		do
-		{
-			////////////////////////////////////////////////////////////////////
-			//         STEP 0: Capture the current char and buffer it         //
-			////////////////////////////////////////////////////////////////////
+			// UTF char length array
+			// This array is used to determine the length of a UTF character.
+			// Be $c the result of ($str[$pos] & "\xF0") --where $str is the string we're operating on and $pos
+			// the position of the cursor--, if $utf_len_mask[$c] does not exist, the byte is an ASCII char.
+			// Otherwise, if $utf_len_mask[$c] is greater than 0, we have a the leading byte of a multibyte character
+			// whose length is $utf_len_mask[$c] and if it is equal to 0, the byte is a trailing byte.
+			$utf_len_mask = array(
+				// Leading bytes masks
+				"\xC0" => 2, "\xD0" => 2, "\xE0" => 3, "\xF0" => 4,
+				// Trailing bytes masks
+				"\x80" => 0, "\x90" => 0, "\xA0" => 0, "\xB0" => 0
+			);
+
+			$extra_check = array(
+				"\xED" => 1, "\xEF" => 1, "\xC0" => 1, "\xC1" => 1, "\xE0" => 1, "\xF0" => 1,
+				"\xF4" => 1, "\xF5" => 1, "\xF6" => 1, "\xF7" => 1, "\xF8" => 1, "\xF9" => 1,
+				"\xFA" => 1, "\xFB" => 1, "\xFC" => 1, "\xFD" => 1, "\xFE" => 1, "\xFF" => 1
+			);
+
+			$utf_validation_mask = array(
+				2	=> "\xE0\xC0",
+				3	=> "\xF0\xC0\xC0",
+				4	=> "\xF8\xC0\xC0\xC0"
+			);
 
-			$c = $str[$pos];
-			$c_mask = $c & "\xF0";
+			$utf_validation_check = array(
+				2	=> "\xC0\x80",
+				3	=> "\xE0\x80\x80",
+				4	=> "\xF0\x80\x80\x80"
+			);
 
-			if (isset($utf_len_mask[$c_mask]))
+			// Main loop
+			do
 			{
-				/**
-				* Byte at $pos is either a leading byte or a missplaced trailing byte
-				*/
-				if ($utf_len = $utf_len_mask[$c_mask])
+				// STEP 0: Capture the current char and buffer it
+				$c = $str[$pos];
+				$c_mask = $c & "\xF0";
+
+				if (isset($utf_len_mask[$c_mask]))
 				{
-					/**
-					* Capture the char
-					*/
-					$buffer[++$i & 7] = $utf_char = substr($str, $pos, $utf_len);
-
-					/**
-					* Let's find out if a thorough check is needed
-					*/
-					if (isset($qc[$utf_char]))
-					{
-						/**
-						* If the UTF char is in the qc array then it may not be in normal
-						* form. We do nothing here, the actual processing is below this
-						* "if" block
-						*/
-					}
-					elseif (isset($utf_combining_class[$utf_char]))
+					// Byte at $pos is either a leading byte or a missplaced trailing byte
+					if ($utf_len = $utf_len_mask[$c_mask])
 					{
-						if ($utf_combining_class[$utf_char] < $last_cc)
-						{
-							/**
-							* A combining character that is NOT canonically ordered
-							*/
-						}
-						else
-						{
-							/**
-							* A combining character that IS canonically ordered, skip
-							* to the next char
-							*/
-							$last_cc = $utf_combining_class[$utf_char];
+						// Capture the char
+						$buffer[++$i & 7] = $utf_char = substr($str, $pos, $utf_len);
 
-							$pos += $utf_len;
-							continue;
+						// Let's find out if a thorough check is needed
+						if (isset($qc[$utf_char]))
+						{
+							// If the UTF char is in the qc array then it may not be in normal form. We do nothing here, the actual processing is below this "if" block
 						}
-					}
-					else
-					{
-						/**
-						* At this point, $utf_char holds a UTF char that we know
-						* is not a NF[K]C_QC and is not a combining character. It can
-						* be a singleton, a canonical composite, a replacement char or
-						* an even an ill-formed bunch of bytes. Let's find out
-						*/
-						$last_cc = 0;
-
-						/**
-						* Check that we have the correct number of trailing bytes
-						*/
-						if (($utf_char & $utf_validation_mask[$utf_len]) != $utf_validation_check[$utf_len])
+						else if (isset($utf_combining_class[$utf_char]))
 						{
-							/**
-							* Current char isn't well-formed or legal: either one or
-							* several trailing bytes are missing, or the Unicode char
-							* has been encoded in a five- or six- byte sequence
-							*/
-							if ($utf_char[0] >= "\xF8")
+							if ($utf_combining_class[$utf_char] < $last_cc)
 							{
-								if ($utf_char[0] < "\xF8")
-								{
-									$trailing_bytes = 3;
-								}
-								elseif ($utf_char[0] < "\xFC")
-								{
-									$trailing_bytes = 4;
-								}
-								if ($utf_char[0] > "\xFD")
-								{
-									$trailing_bytes = 0;
-								}
-								else
-								{
-									$trailing_bytes = 5;
-								}
+								// A combining character that is NOT canonically ordered
 							}
 							else
 							{
-								$trailing_bytes = $utf_len - 1;
-							}
-
-							$tmp .= substr($str, $tmp_pos, $pos - $tmp_pos) . UTF8_REPLACEMENT;
-							$pos += strspn($str, UTF8_TRAILING_BYTES, ++$pos, $trailing_bytes);
-							$tmp_pos = $pos;
+								// A combining character that IS canonically ordered, skip to the next char
+								$last_cc = $utf_combining_class[$utf_char];
 
-							continue;
+								$pos += $utf_len;
+								continue;
+							}
 						}
-
-						if (isset($extra_check[$c]))
+						else
 						{
-							switch($c)
+							// At this point, $utf_char holds a UTF char that we know is not a NF[K]C_QC and is not a combining character.
+							// It can be a singleton, a canonical composite, a replacement char or an even an ill-formed bunch of bytes. Let's find out
+							$last_cc = 0;
+
+							// Check that we have the correct number of trailing bytes
+							if (($utf_char & $utf_validation_mask[$utf_len]) != $utf_validation_check[$utf_len])
 							{
-								/**
-								* Note: 0xED is quite common in Korean
-								*/
-								case "\xED":
-									if ($utf_char >= "\xED\xA0\x80")
+								// Current char isn't well-formed or legal: either one or several trailing bytes are missing, or the Unicode char
+								// has been encoded in a five- or six- byte sequence
+								if ($utf_char[0] >= "\xF8")
+								{
+									if ($utf_char[0] < "\xF8")
 									{
-										/**
-										* Surrogates (U+D800..U+DFFF) are not allowed in UTF-8
-										* (UTF sequence 0xEDA080..0xEDBFBF)
-										*/
-										$tmp .= substr($str, $tmp_pos, $pos - $tmp_pos) . UTF8_REPLACEMENT;
-										$pos += $utf_len;
-										$tmp_pos = $pos;
-										continue 2;
+										$trailing_bytes = 3;
 									}
-									break;
-
-								/**
-								* Note: 0xEF is quite common in Japanese
-								*/
-								case "\xEF":
-									if ($utf_char == "\xEF\xBF\xBE" || $utf_char == "\xEF\xBF\xBF")
+									else if ($utf_char[0] < "\xFC")
 									{
-										/**
-										* U+FFFE and U+FFFF are explicitly disallowed
-										* (UTF sequence 0xEFBFBE..0xEFBFBF)
-										*/
-										$tmp .= substr($str, $tmp_pos, $pos - $tmp_pos) . UTF8_REPLACEMENT;
-										$pos += $utf_len;
-										$tmp_pos = $pos;
-										continue 2;
+										$trailing_bytes = 4;
 									}
-									break;
 
-								case "\xC0":
-								case "\xC1":
-									if ($utf_char <= "\xC1\xBF")
+									if ($utf_char[0] > "\xFD")
 									{
-										/**
-										* Overlong sequence: Unicode char U+0000..U+007F encoded as a
-										* double-byte UTF char
-										*/
-										$tmp .= substr($str, $tmp_pos, $pos - $tmp_pos) . UTF8_REPLACEMENT;
-										$pos += $utf_len;
-										$tmp_pos = $pos;
-										continue 2;
+										$trailing_bytes = 0;
 									}
-									break;
-
-								case "\xE0":
-									if ($utf_char <= "\xE0\x9F\xBF")
+									else
 									{
-										/**
-										* Unicode char U+0000..U+07FF encoded in 3 bytes
-										*/
-										$tmp .= substr($str, $tmp_pos, $pos - $tmp_pos) . UTF8_REPLACEMENT;
-										$pos += $utf_len;
-										$tmp_pos = $pos;
-										continue 2;
+										$trailing_bytes = 5;
 									}
-									break;
+								}
+								else
+								{
+									$trailing_bytes = $utf_len - 1;
+								}
 
-								case "\xF0":
-									if ($utf_char <= "\xF0\x8F\xBF\xBF")
-									{
-										/**
-										* Unicode char U+0000..U+FFFF encoded in 4 bytes
-										*/
-										$tmp .= substr($str, $tmp_pos, $pos - $tmp_pos) . UTF8_REPLACEMENT;
-										$pos += $utf_len;
-										$tmp_pos = $pos;
-										continue 2;
-									}
+								$tmp .= substr($str, $tmp_pos, $pos - $tmp_pos) . UTF8_REPLACEMENT;
+								$pos += strspn($str, UTF8_TRAILING_BYTES, ++$pos, $trailing_bytes);
+								$tmp_pos = $pos;
+
+								continue;
+							}
+
+							if (isset($extra_check[$c]))
+							{
+								switch ($c)
+								{
+									// Note: 0xED is quite common in Korean
+									case "\xED":
+										if ($utf_char >= "\xED\xA0\x80")
+										{
+											// Surrogates (U+D800..U+DFFF) are not allowed in UTF-8 (UTF sequence 0xEDA080..0xEDBFBF)
+											$tmp .= substr($str, $tmp_pos, $pos - $tmp_pos) . UTF8_REPLACEMENT;
+											$pos += $utf_len;
+											$tmp_pos = $pos;
+											continue 2;
+										}
 									break;
 
-								default:
-									/**
-									* Five- and six- byte sequences do not need being checked for here anymore
-									*/
-									if ($utf_char > UTF8_MAX)
-									{
-										/**
-										* Out of the Unicode range
-										*/
-										if ($utf_char[0] < "\xF8")
+									// Note: 0xEF is quite common in Japanese
+									case "\xEF":
+										if ($utf_char == "\xEF\xBF\xBE" || $utf_char == "\xEF\xBF\xBF")
 										{
-											$trailing_bytes = 3;
+											// U+FFFE and U+FFFF are explicitly disallowed (UTF sequence 0xEFBFBE..0xEFBFBF)
+											$tmp .= substr($str, $tmp_pos, $pos - $tmp_pos) . UTF8_REPLACEMENT;
+											$pos += $utf_len;
+											$tmp_pos = $pos;
+											continue 2;
 										}
-										elseif ($utf_char[0] < "\xFC")
+									break;
+
+									case "\xC0":
+									case "\xC1":
+										if ($utf_char <= "\xC1\xBF")
 										{
-											$trailing_bytes = 4;
+											// Overlong sequence: Unicode char U+0000..U+007F encoded as a double-byte UTF char
+											$tmp .= substr($str, $tmp_pos, $pos - $tmp_pos) . UTF8_REPLACEMENT;
+											$pos += $utf_len;
+											$tmp_pos = $pos;
+											continue 2;
 										}
-										elseif ($utf_char[0] > "\xFD")
+									break;
+
+									case "\xE0":
+										if ($utf_char <= "\xE0\x9F\xBF")
 										{
-											$trailing_bytes = 0;
+											// Unicode char U+0000..U+07FF encoded in 3 bytes
+											$tmp .= substr($str, $tmp_pos, $pos - $tmp_pos) . UTF8_REPLACEMENT;
+											$pos += $utf_len;
+											$tmp_pos = $pos;
+											continue 2;
 										}
-										else
+									break;
+
+									case "\xF0":
+										if ($utf_char <= "\xF0\x8F\xBF\xBF")
 										{
-											$trailing_bytes = 5;
+											// Unicode char U+0000..U+FFFF encoded in 4 bytes
+											$tmp .= substr($str, $tmp_pos, $pos - $tmp_pos) . UTF8_REPLACEMENT;
+											$pos += $utf_len;
+											$tmp_pos = $pos;
+											continue 2;
 										}
+									break;
 
-										$tmp .= substr($str, $tmp_pos, $pos - $tmp_pos) . UTF8_REPLACEMENT;
-										$pos += strspn($str, UTF8_TRAILING_BYTES, ++$pos, $trailing_bytes);
-										$tmp_pos = $pos;
-										continue 2;
-									}
+									default:
+										// Five- and six- byte sequences do not need being checked for here anymore
+										if ($utf_char > UTF8_MAX)
+										{
+											// Out of the Unicode range
+											if ($utf_char[0] < "\xF8")
+											{
+												$trailing_bytes = 3;
+											}
+											else if ($utf_char[0] < "\xFC")
+											{
+												$trailing_bytes = 4;
+											}
+											else if ($utf_char[0] > "\xFD")
+											{
+												$trailing_bytes = 0;
+											}
+											else
+											{
+												$trailing_bytes = 5;
+											}
+
+											$tmp .= substr($str, $tmp_pos, $pos - $tmp_pos) . UTF8_REPLACEMENT;
+											$pos += strspn($str, UTF8_TRAILING_BYTES, ++$pos, $trailing_bytes);
+											$tmp_pos = $pos;
+											continue 2;
+										}
+									break;
+								}
 							}
+
+							// The char is a valid starter, move the cursor and go on
+							$pos += $utf_len;
+							continue;
 						}
+					}
+					else
+					{
+						// A trailing byte came out of nowhere, we will advance the cursor and treat the this byte and all following trailing bytes as if
+						// each of them was a Unicode replacement char
+						$spn = strspn($str, UTF8_TRAILING_BYTES, $pos);
+						$tmp .= substr($str, $tmp_pos, $pos - $tmp_pos) . str_repeat(UTF8_REPLACEMENT, $spn);
 
-						/**
-						* The char is a valid starter, move the cursor and go on
-						*/
-						$pos += $utf_len;
+						$pos += $spn;
+						$tmp_pos = $pos;
 						continue;
 					}
-				}
-				else
-				{
-					/**
-					* A trailing byte came out of nowhere, we will advance the cursor
-					* and treat the this byte and all following trailing bytes as if
-					* each of them was a Unicode replacement char
-					*/
-					$spn = strspn($str, UTF8_TRAILING_BYTES, $pos);
-					$tmp .= substr($str, $tmp_pos, $pos - $tmp_pos) . str_repeat(UTF8_REPLACEMENT, $spn);
-
-					$pos += $spn;
-					$tmp_pos = $pos;
-					continue;
-				}
 
 
-				////////////////////////////////////////////////////////////////////
-				//                 STEP 1: Decompose current char                 //
-				////////////////////////////////////////////////////////////////////
-
-				/**
-				* We have found a character that is either:
-				*  - in the NFC_QC/NFKC_QC list
-				*  - a non-starter char that is not canonically ordered
-				*
-				* We are going to capture the shortest UTF sequence that satisfies
-				* these two conditions:
-				*
-				*  1 - If the sequence does not start at the begginning of the string,
-				*      it must begin with a starter, and that starter must not have the
-				*      NF[K]C_QC property equal to "MAYBE"
-				*
-				*  2 - If the sequence does not end at the end of the string, it must end
-				*      with a non-starter and be immediately followed by a starter that
-				*      is not on the QC list
-				*/
-				$utf_seq = array();
-				$last_cc = 0;
-				$lpos = $pos;
-				$pos += $utf_len;
-
-				if (isset($decomp_map[$utf_char]))
-				{
-					$_pos = 0;
-					$_len = strlen($decomp_map[$utf_char]);
-					do
+					// STEP 1: Decompose current char
+
+					// We have found a character that is either:
+					//  - in the NFC_QC/NFKC_QC list
+					//  - a non-starter char that is not canonically ordered
+					//
+					// We are going to capture the shortest UTF sequence that satisfies these two conditions:
+					//
+					//  1 - If the sequence does not start at the begginning of the string, it must begin with a starter,
+					// and that starter must not have the NF[K]C_QC property equal to "MAYBE"
+					//
+					//  2 - If the sequence does not end at the end of the string, it must end with a non-starter and be
+					// immediately followed by a starter that is not on the QC list
+					//
+					$utf_seq = array();
+					$last_cc = 0;
+					$lpos = $pos;
+					$pos += $utf_len;
+
+					if (isset($decomp_map[$utf_char]))
 					{
-						$_utf_len =& $utf_len_mask[$decomp_map[$utf_char][$_pos] & "\xF0"];
+						$_pos = 0;
+						$_len = strlen($decomp_map[$utf_char]);
 
-						if (isset($_utf_len))
-						{
-							$utf_seq[] = substr($decomp_map[$utf_char], $_pos, $_utf_len);
-							$_pos += $_utf_len;
-						}
-						else
+						do
 						{
-							$utf_seq[] = $decomp_map[$utf_char][$_pos];
-							++$_pos;
+							$_utf_len =& $utf_len_mask[$decomp_map[$utf_char][$_pos] & "\xF0"];
+
+							if (isset($_utf_len))
+							{
+								$utf_seq[] = substr($decomp_map[$utf_char], $_pos, $_utf_len);
+								$_pos += $_utf_len;
+							}
+							else
+							{
+								$utf_seq[] = $decomp_map[$utf_char][$_pos];
+								++$_pos;
+							}
 						}
+						while ($_pos < $_len);
+					}
+					else
+					{
+						// The char is not decomposable
+						$utf_seq = array($utf_char);
 					}
-					while($_pos < $_len);
-				}
-				else
-				{
-					/**
-					* The char is not decomposable
-					*/
-					$utf_seq = array($utf_char);
-				}
 
 
-				////////////////////////////////////////////////////////////////
-				//                STEP 2: Capture the starter                 //
-				////////////////////////////////////////////////////////////////
+					// STEP 2: Capture the starter
 
-				/**
-				* Check out the combining class of the first character of the UTF sequence
-				*/
-				$k = 0;
-				if (isset($utf_combining_class[$utf_seq[0]]) || $qc[$utf_char] == UNICODE_QC_MAYBE)
-				{
-					/**
-					* Not a starter, inspect previous characters
-					*
-					* The last 8 characters are kept in a buffer so that we don't have
-					* to capture them everytime. This is enough for all real-life strings
-					* but even if it wasn't, we can capture characters in backward mode,
-					* although it is slower than this method.
-					*
-					* In the following loop, $j starts at the previous buffered character
-					* ($i - 1, because current character is at offset $i) and process them
-					* in backward mode until we find a starter.
-					*
-					* $k is the index on each UTF character inside of our UTF sequence.
-					* At this time, $utf_seq contains one or more characters numbered 0 to
-					* n. $k starts at 0 and for each char we prepend we pre-decrement it
-					* and for numbering
-					*/
-					$starter_found = 0;
-					$j_min = max(1, $i - 7);
-					for($j = $i - 1; $j >= $j_min && $lpos > $tmp_pos; --$j)
+					// Check out the combining class of the first character of the UTF sequence
+					$k = 0;
+					if (isset($utf_combining_class[$utf_seq[0]]) || $qc[$utf_char] == UNICODE_QC_MAYBE)
 					{
-						$utf_char = $buffer[$j & 7];
-						$lpos -= strlen($utf_char);
-
-						if (isset($decomp_map[$utf_char]))
+						// Not a starter, inspect previous characters
+						// The last 8 characters are kept in a buffer so that we don't have to capture them everytime.
+						// This is enough for all real-life strings but even if it wasn't, we can capture characters in backward mode,
+						// although it is slower than this method.
+						//
+						// In the following loop, $j starts at the previous buffered character ($i - 1, because current character is
+						// at offset $i) and process them in backward mode until we find a starter.
+						//
+						// $k is the index on each UTF character inside of our UTF sequence. At this time, $utf_seq contains one or more
+						// characters numbered 0 to n. $k starts at 0 and for each char we prepend we pre-decrement it and for numbering
+						$starter_found = 0;
+						$j_min = max(1, $i - 7);
+
+						for ($j = $i - 1; $j >= $j_min && $lpos > $tmp_pos; --$j)
 						{
-							/**
-							* The char is a composite, decompose for storage
-							*/
-							$decomp_seq = array();
-							$_pos = 0;
-							$_len = strlen($decomp_map[$utf_char]);
-							do
+							$utf_char = $buffer[$j & 7];
+							$lpos -= strlen($utf_char);
+
+							if (isset($decomp_map[$utf_char]))
 							{
-								$c = $decomp_map[$utf_char][$_pos];
-								$_utf_len =& $utf_len_mask[$c & "\xF0"];
+								// The char is a composite, decompose for storage
+								$decomp_seq = array();
+								$_pos = 0;
+								$_len = strlen($decomp_map[$utf_char]);
 
-								if (isset($_utf_len))
+								do
 								{
-									$decomp_seq[] = substr($decomp_map[$utf_char], $_pos, $_utf_len);
-									$_pos += $_utf_len;
+									$c = $decomp_map[$utf_char][$_pos];
+									$_utf_len =& $utf_len_mask[$c & "\xF0"];
+
+									if (isset($_utf_len))
+									{
+										$decomp_seq[] = substr($decomp_map[$utf_char], $_pos, $_utf_len);
+										$_pos += $_utf_len;
+									}
+									else
+									{
+										$decomp_seq[] = $c;
+										++$_pos;
+									}
 								}
-								else
+								while ($_pos < $_len);
+
+								// Prepend the UTF sequence with our decomposed sequence
+								if (isset($decomp_seq[1]))
 								{
-									$decomp_seq[] = $c;
-									++$_pos;
-								}
-							}
-							while($_pos < $_len);
+									// The char expanded into several chars
+									$decomp_cnt = sizeof($decomp_seq);
 
-							/**
-							* Prepend the UTF sequence with our decomposed sequence
-							*/
-							if (isset($decomp_seq[1]))
-							{
-								/**
-								* The char expanded into several chars
-								*/
-								$decomp_cnt = count($decomp_seq);
-								foreach($decomp_seq as $decomp_i => $decomp_char)
+									foreach ($decomp_seq as $decomp_i => $decomp_char)
+									{
+										$utf_seq[$k + $decomp_i - $decomp_cnt] = $decomp_char;
+									}
+									$k -= $decomp_cnt;
+								}
+								else
 								{
-									$utf_seq[$k + $decomp_i - $decomp_cnt] = $decomp_char;
+									// Decomposed to a single char, easier to prepend
+									$utf_seq[--$k] = $decomp_seq[0];
 								}
-								$k -= $decomp_cnt;
 							}
 							else
 							{
-								/**
-								* Decomposed to a single char, easier to prepend
-								*/
-								$utf_seq[--$k] = $decomp_seq[0];
+								$utf_seq[--$k] = $utf_char;
 							}
-						}
-						else
-						{
-							$utf_seq[--$k] = $utf_char;
-						}
 
-						if (!isset($utf_combining_class[$utf_seq[$k]]))
-						{
-							/**
-							* We have found our starter
-							*/
-							$starter_found = 1;
-							break;
+							if (!isset($utf_combining_class[$utf_seq[$k]]))
+							{
+								// We have found our starter
+								$starter_found = 1;
+								break;
+							}
 						}
-					}
 
-					if (!$starter_found && $lpos > $tmp_pos)
-					{
-						/**
-						* The starter was not found in the buffer, let's rewind some more
-						*/
-						do
+						if (!$starter_found && $lpos > $tmp_pos)
 						{
-							/**
-							* $utf_len_mask contains the masks of both leading bytes and
-							* trailing bytes. If $utf_en > 0 then it's a leading byte,
-							* otherwise it's a trailing byte.
-							*/
-							$c = $str[--$lpos];
-							$c_mask = $c & "\xF0";
-
-							if (isset($utf_len_mask[$c_mask]))
+							// The starter was not found in the buffer, let's rewind some more
+							do
 							{
-								/**
-								* UTF byte
-								*/
-								if ($utf_len = $utf_len_mask[$c_mask])
-								{
-									/**
-									* UTF *leading* byte
-									*/
-									$utf_char = substr($str, $lpos, $utf_len);
+								// $utf_len_mask contains the masks of both leading bytes and trailing bytes. If $utf_en > 0 then it's a leading byte, otherwise it's a trailing byte.
+								$c = $str[--$lpos];
+								$c_mask = $c & "\xF0";
 
-									if (isset($decomp_map[$utf_char]))
+								if (isset($utf_len_mask[$c_mask]))
+								{
+									// UTF byte
+									if ($utf_len = $utf_len_mask[$c_mask])
 									{
-										/**
-										* Decompose the character
-										*/
-										$decomp_seq = array();
-										$_pos = 0;
-										$_len = strlen($decomp_map[$utf_char]);
-										do
+										// UTF *leading* byte
+										$utf_char = substr($str, $lpos, $utf_len);
+
+										if (isset($decomp_map[$utf_char]))
 										{
-											$c = $decomp_map[$utf_char][$_pos];
-											$_utf_len =& $utf_len_mask[$c & "\xF0"];
+											// Decompose the character
+											$decomp_seq = array();
+											$_pos = 0;
+											$_len = strlen($decomp_map[$utf_char]);
 
-											if (isset($_utf_len))
+											do
 											{
-												$decomp_seq[] = substr($decomp_map[$utf_char], $_pos, $_utf_len);
-												$_pos += $_utf_len;
+												$c = $decomp_map[$utf_char][$_pos];
+												$_utf_len =& $utf_len_mask[$c & "\xF0"];
+
+												if (isset($_utf_len))
+												{
+													$decomp_seq[] = substr($decomp_map[$utf_char], $_pos, $_utf_len);
+													$_pos += $_utf_len;
+												}
+												else
+												{
+													$decomp_seq[] = $c;
+													++$_pos;
+												}
 											}
-											else
+											while ($_pos < $_len);
+
+											// Prepend the UTF sequence with our decomposed sequence
+											if (isset($decomp_seq[1]))
 											{
-												$decomp_seq[] = $c;
-												++$_pos;
+												// The char expanded into several chars
+												$decomp_cnt = sizeof($decomp_seq);
+												foreach ($decomp_seq as $decomp_i => $utf_char)
+												{
+													$utf_seq[$k + $decomp_i - $decomp_cnt] = $utf_char;
+												}
+												$k -= $decomp_cnt;
 											}
-										}
-										while($_pos < $_len);
-
-										/**
-										* Prepend the UTF sequence with our decomposed sequence
-										*/
-										if (isset($decomp_seq[1]))
-										{
-											/**
-											* The char expanded into several chars
-											*/
-											$decomp_cnt = count($decomp_seq);
-											foreach($decomp_seq as $decomp_i => $utf_char)
+											else
 											{
-												$utf_seq[$k + $decomp_i - $decomp_cnt] = $utf_char;
+												// Decomposed to a single char, easier to prepend
+												$utf_seq[--$k] = $decomp_seq[0];
 											}
-											$k -= $decomp_cnt;
 										}
 										else
 										{
-											/**
-											* Decomposed to a single char, easier to prepend
-											*/
-											$utf_seq[--$k] = $decomp_seq[0];
+											$utf_seq[--$k] = $utf_char;
 										}
 									}
-									else
-									{
-										$utf_seq[--$k] = $utf_char;
-									}
+								}
+								else
+								{
+									// ASCII char
+									$utf_seq[--$k] = $c;
 								}
 							}
-							else
-							{
-								/**
-								* ASCII char
-								*/
-								$utf_seq[--$k] = $c;
-							}
+							while ($lpos > $tmp_pos);
 						}
-						while($lpos > $tmp_pos);
 					}
-				}
 
 
-				////////////////////////////////////////////////////////////////
-				//       STEP 3: Capture following combining modifiers        //
-				////////////////////////////////////////////////////////////////
-
-				while($pos < $len)
-				{
-					$c_mask = $str[$pos] & "\xF0";
+					// STEP 3: Capture following combining modifiers
 
-					if (isset($utf_len_mask[$c_mask]))
+					while ($pos < $len)
 					{
-						if ($utf_len = $utf_len_mask[$c_mask])
-						{
-							$utf_char = substr($str, $pos, $utf_len);
-						}
-						else
-						{
-							/**
-							* A trailing byte came out of nowhere
-							*
-							* Trailing bytes are replaced with Unicode replacement chars,
-							* we will just ignore it for now, break out of the loop
-							* as if it was a starter (replacement chars ARE starters)
-							* and let the next loop replace it
-							*/
-							break;
-						}
+						$c_mask = $str[$pos] & "\xF0";
 
-						if (isset($utf_combining_class[$utf_char]) || isset($qc[$utf_char]))
+						if (isset($utf_len_mask[$c_mask]))
 						{
-							/**
-							* Combining character, add it to the sequence and move the cursor
-							*/
-							if (isset($decomp_map[$utf_char]))
+							if ($utf_len = $utf_len_mask[$c_mask])
 							{
-								/**
-								* Decompose the character
-								*/
-								$_pos = 0;
-								$_len = strlen($decomp_map[$utf_char]);
-								do
+								$utf_char = substr($str, $pos, $utf_len);
+							}
+							else
+							{
+								// A trailing byte came out of nowhere
+								// Trailing bytes are replaced with Unicode replacement chars, we will just ignore it for now, break out of the loop
+								// as if it was a starter (replacement chars ARE starters) and let the next loop replace it
+								break;
+							}
+
+							if (isset($utf_combining_class[$utf_char]) || isset($qc[$utf_char]))
+							{
+								// Combining character, add it to the sequence and move the cursor
+								if (isset($decomp_map[$utf_char]))
 								{
-									$c = $decomp_map[$utf_char][$_pos];
-									$_utf_len =& $utf_len_mask[$c & "\xF0"];
+									// Decompose the character
+									$_pos = 0;
+									$_len = strlen($decomp_map[$utf_char]);
 
-									if (isset($_utf_len))
-									{
-										$utf_seq[] = substr($decomp_map[$utf_char], $_pos, $_utf_len);
-										$_pos += $_utf_len;
-									}
-									else
+									do
 									{
-										$utf_seq[] = $c;
-										++$_pos;
+										$c = $decomp_map[$utf_char][$_pos];
+										$_utf_len =& $utf_len_mask[$c & "\xF0"];
+
+										if (isset($_utf_len))
+										{
+											$utf_seq[] = substr($decomp_map[$utf_char], $_pos, $_utf_len);
+											$_pos += $_utf_len;
+										}
+										else
+										{
+											$utf_seq[] = $c;
+											++$_pos;
+										}
 									}
+									while ($_pos < $_len);
+								}
+								else
+								{
+									$utf_seq[] = $utf_char;
 								}
-								while($_pos < $_len);
+
+								$pos += $utf_len;
 							}
 							else
 							{
-								$utf_seq[] = $utf_char;
+								// Combining class 0 and no QC, break out of the loop
+								// Note: we do not know if that character is valid. If it's not, the next iteration will replace it
+								break;
 							}
-
-							$pos += $utf_len;
 						}
 						else
 						{
-							/**
-							* Combining class 0 and no QC, break out of the loop
-							*
-							* Note: we do not know if that character is valid. If
-							* it's not, the next iteration will replace it
-							*/
+							// ASCII chars are starters
 							break;
 						}
 					}
-					else
-					{
-						/**
-						* ASCII chars are starters
-						*/
-						break;
-					}
-				}
 
 
-				////////////////////////////////////////////////////////////////
-				//                  STEP 4: Sort and combine                  //
-				////////////////////////////////////////////////////////////////
+					// STEP 4: Sort and combine
 
-				/**
-				* Here we sort...
-				*/
-				$k_max = $k + count($utf_seq);
-				if (!$k && $k_max == 1)
-				{
-					/**
-					* There is only one char in the UTF sequence, add it then
-					* jump to the next iteration of main loop
-					*
-					* Note: the two commented lines below can be enabled under PHP5
-					* for a very small performance gain in most cases
-					*/
-//					if (substr_compare($str, $utf_seq[0], $lpos, $pos - $lpos))
-//					{
-						$tmp .= substr($str, $tmp_pos, $lpos - $tmp_pos) . $utf_seq[0];
-						$tmp_pos = $pos;
-//					}
-
-					continue;
-				}
-
-				/**
-				* ...there we combine
-				*/
-				if (isset($utf_combining_class[$utf_seq[$k]]))
-				{
-					$starter = $nf_seq = '';
-				}
-				else
-				{
-					$starter = $utf_seq[$k++];
-					$nf_seq = '';
-				}
-				$utf_sort = array();
+					// Here we sort...
+					$k_max = $k + sizeof($utf_seq);
 
-				/**
-				* We add an empty char at the end of the UTF char sequence.
-				* It will act as a starter and trigger the sort/combine routine
-				* at the end of the string without altering it
-				*/
-				$utf_seq[] = '';
+					if (!$k && $k_max == 1)
+					{
+						// There is only one char in the UTF sequence, add it then jump to the next iteration of main loop
+						// Note: the two commented lines below can be enabled under PHP5 for a very small performance gain in most cases
+//						if (substr_compare($str, $utf_seq[0], $lpos, $pos - $lpos))
+//						{
+							$tmp .= substr($str, $tmp_pos, $lpos - $tmp_pos) . $utf_seq[0];
+							$tmp_pos = $pos;
+//						}
 
-				do
-				{
-					$utf_char = $utf_seq[$k++];
+						continue;
+					}
 
-					if (isset($utf_combining_class[$utf_char]))
+					// ...there we combine
+					if (isset($utf_combining_class[$utf_seq[$k]]))
 					{
-						$utf_sort[$utf_combining_class[$utf_char]][] = $utf_char;
+						$starter = $nf_seq = '';
 					}
 					else
 					{
-						if (empty($utf_sort))
+						$starter = $utf_seq[$k++];
+						$nf_seq = '';
+					}
+					$utf_sort = array();
+
+					// We add an empty char at the end of the UTF char sequence. It will act as a starter and trigger the sort/combine routine
+					// at the end of the string without altering it
+					$utf_seq[] = '';
+
+					do
+					{
+						$utf_char = $utf_seq[$k++];
+
+						if (isset($utf_combining_class[$utf_char]))
 						{
-							/**
-							* No combining characters... check for a composite
-							* of the two starters
-							*/
-							if (isset($utf_canonical_comp[$starter . $utf_char]))
-							{
-								/**
-								* Good ol' composite character
-								*/
-								$starter = $utf_canonical_comp[$starter . $utf_char];
-							}
-							elseif (isset($utf_jamo_type[$utf_char]))
+							$utf_sort[$utf_combining_class[$utf_char]][] = $utf_char;
+						}
+						else
+						{
+							if (empty($utf_sort))
 							{
-								/**
-								* Current char is a composable jamo
-								*/
-								if (isset($utf_jamo_type[$starter])
-								 && $utf_jamo_type[$starter] == UNICODE_JAMO_L
-								 && $utf_jamo_type[$utf_char] == UNICODE_JAMO_V)
+								// No combining characters... check for a composite of the two starters
+								if (isset($utf_canonical_comp[$starter . $utf_char]))
 								{
-									/**
-									* We have a L jamo followed by a V jamo, we are going
-									* to prefetch the next char to see if it's a T jamo
-									*/
-									if (isset($utf_jamo_type[$utf_seq[$k]]) && $utf_jamo_type[$utf_seq[$k]] == UNICODE_JAMO_T)
+									// Good ol' composite character
+									$starter = $utf_canonical_comp[$starter . $utf_char];
+								}
+								else if (isset($utf_jamo_type[$utf_char]))
+								{
+									// Current char is a composable jamo
+									if (isset($utf_jamo_type[$starter]) && $utf_jamo_type[$starter] == UNICODE_JAMO_L && $utf_jamo_type[$utf_char] == UNICODE_JAMO_V)
 									{
-										/**
-										* L+V+T jamos, combine to a LVT Hangul syllable
-										* ($k is incremented)
-										*/
-										$cp = $utf_jamo_index[$starter] + $utf_jamo_index[$utf_char] + $utf_jamo_index[$utf_seq[$k]];
+										// We have a L jamo followed by a V jamo, we are going to prefetch the next char to see if it's a T jamo
+										if (isset($utf_jamo_type[$utf_seq[$k]]) && $utf_jamo_type[$utf_seq[$k]] == UNICODE_JAMO_T)
+										{
+											// L+V+T jamos, combine to a LVT Hangul syllable ($k is incremented)
+											$cp = $utf_jamo_index[$starter] + $utf_jamo_index[$utf_char] + $utf_jamo_index[$utf_seq[$k]];
+											++$k;
+										}
+										else
+										{
+											// L+V jamos, combine to a LV Hangul syllable
+											$cp = $utf_jamo_index[$starter] + $utf_jamo_index[$utf_char];
+										}
 
-										++$k;
+										$starter = chr(0xE0 | ($cp >> 12)) . chr(0x80 | (($cp >> 6) & 0x3F)) . chr(0x80 | ($cp & 0x3F));
 									}
 									else
 									{
-										/**
-										* L+V jamos, combine to a LV Hangul syllable
-										*/
-										$cp = $utf_jamo_index[$starter] + $utf_jamo_index[$utf_char];
+										// Non-composable jamo, just add it to the sequence
+										$nf_seq .= $starter;
+										$starter = $utf_char;
 									}
-
-									$starter = chr(0xE0 | ($cp >> 12)) . chr(0x80 | (($cp >> 6) & 0x3F)) . chr(0x80 | ($cp & 0x3F));
 								}
 								else
 								{
-									/**
-									* Non-composable jamo, just add it to the sequence
-									*/
+									// No composite, just add the first starter to the sequence then continue with the other one
 									$nf_seq .= $starter;
 									$starter = $utf_char;
 								}
 							}
 							else
 							{
-								/**
-								* No composite, just add the first starter to the sequence
-								* then continue with the other one
-								*/
-								$nf_seq .= $starter;
-								$starter = $utf_char;
-							}
-						}
-						else
-						{
-							ksort($utf_sort);
-
-							/**
-							* For each class of combining characters
-							*/
-							foreach($utf_sort as $cc => $utf_chars)
-							{
-								$j = 0;
+								ksort($utf_sort);
 
-								do
+								// For each class of combining characters
+								foreach ($utf_sort as $cc => $utf_chars)
 								{
-									/**
-									* Look for a composite
-									*/
-									if (isset($utf_canonical_comp[$starter . $utf_chars[$j]]))
-									{
-										/**
-										* Found a composite, replace the starter
-										*/
-										$starter = $utf_canonical_comp[$starter . $utf_chars[$j]];
-										unset($utf_sort[$cc][$j]);
-									}
-									else
+									$j = 0;
+
+									do
 									{
-										/**
-										* No composite, all following characters in that
-										* class are blocked
-										*/
-										break;
+										// Look for a composite
+										if (isset($utf_canonical_comp[$starter . $utf_chars[$j]]))
+										{
+											// Found a composite, replace the starter
+											$starter = $utf_canonical_comp[$starter . $utf_chars[$j]];
+											unset($utf_sort[$cc][$j]);
+										}
+										else
+										{
+											// No composite, all following characters in that class are blocked
+											break;
+										}
 									}
+									while (isset($utf_sort[$cc][++$j]));
 								}
-								while(isset($utf_sort[$cc][++$j]));
-							}
 
-							/**
-							* Add the starter to the normalized sequence, followed by
-							* non-starters in canonical order
-							*/
-							$nf_seq .= $starter;
-							foreach($utf_sort as $utf_chars)
-							{
-								if (!empty($utf_chars))
+								// Add the starter to the normalized sequence, followed by non-starters in canonical order
+								$nf_seq .= $starter;
+
+								foreach ($utf_sort as $utf_chars)
 								{
-									$nf_seq .= implode('', $utf_chars);
+									if (!empty($utf_chars))
+									{
+										$nf_seq .= implode('', $utf_chars);
+									}
 								}
+
+								// Reset the array and go on
+								$utf_sort = array();
+								$starter = $utf_char;
 							}
+						}
+					}
+					while ($k <= $k_max);
 
-							/**
-							* Reset the array and go on
-							*/
-							$utf_sort = array();
-							$starter = $utf_char;
+					$tmp .= substr($str, $tmp_pos, $lpos - $tmp_pos) . $nf_seq;
+					$tmp_pos = $pos;
+				}
+				else
+				{
+					// Only a ASCII char can make the program get here
+					//
+					// First we skip the current byte with ++$pos, then we quickly skip following ASCII chars with strspn().
+					//
+					// The first two "if"'s here can be removed, with the consequences of being faster on latin text (lots of ASCII) and slower on
+					// multi-byte text (where the only ASCII chars are spaces and punctuation)
+					if (++$pos != $len)
+					{
+						if ($str[$pos] < "\x80")
+						{
+							$pos += strspn($str, UTF8_ASCII_RANGE, ++$pos);
+							$buffer[++$i & 7] = $str[$pos - 1];
+						}
+						else
+						{
+							$buffer[++$i & 7] = $c;
 						}
 					}
 				}
-				while($k <= $k_max);
-
-				$tmp .= substr($str, $tmp_pos, $lpos - $tmp_pos) . $nf_seq;
-				$tmp_pos = $pos;
 			}
-			else
+			while ($pos < $len);
+
+			// Now is time to return the string
+			if ($tmp_pos)
 			{
-				/**
-				* Only a ASCII char can make the program get here
-				*
-				* First we skip the current byte with ++$pos, then we quickly
-				* skip following ASCII chars with strspn().
-				*
-				* The first two "if"'s here can be removed, with the consequences
-				* of being faster on latin text (lots of ASCII) and slower on
-				* multi-byte text (where the only ASCII chars are spaces and punctuation)
-				*/
-				if (++$pos != $len)
+				// If the $tmp_pos cursor is not at the beggining of the string then at least one character was not in normal form. Replace $str with the fixed version
+				if ($tmp_pos == $len)
 				{
-					if ($str[$pos] < "\x80")
-					{
-						$pos += strspn($str, UTF8_ASCII_RANGE, ++$pos);
-						$buffer[++$i & 7] = $str[$pos - 1];
-					}
-					else
-					{
-						$buffer[++$i & 7] = $c;
-					}
+					// The $tmp_pos cursor is at the end of $str, therefore $tmp holds the whole $str
+					return $tmp;
+				}
+				else
+				{
+					// The rightmost chunk of $str has not been appended to $tmp yet
+					return $tmp . substr($str, $tmp_pos);
 				}
 			}
+
+			// The string was already in normal form
+			return $str;
 		}
-		while($pos < $len);
 
 		/**
-		* Now is time to return the string
+		* Decompose a UTF string
+		*
+		* @param	string	$str		UTF string
+		* @param	integer	$pos		Position of the first UTF char (in bytes)
+		* @param	integer	$len		Length of the string (in bytes)
+		* @param	array	$decomp_map	Decomposition mapping, passed by reference but never modified
+		* @return	string				The string, decomposed and sorted canonically
+		*
+		* @access	private
 		*/
-		if ($tmp_pos)
+		function decompose($str, $pos, $len, &$decomp_map)
 		{
-			/**
-			* If the $tmp_pos cursor is not at the beggining of the string then at least
-			* one character was not in normal form. Replace $str with the fixed version
-			*/
-			if ($tmp_pos == $len)
-			{
-				/**
-				* The $tmp_pos cursor is at the end of $str, therefore $tmp holds the
-				* whole $str
-				*/
-				return $tmp;
-			}
-			else
+			global $utf_combining_class, $utf_canonical_decomp, $phpbb_root_path;
+
+			// Load some commonly-used tables
+			if (!isset($utf_combining_class))
 			{
-				/**
-				* The rightmost chunk of $str has not been appended to $tmp yet
-				*/
-				return $tmp . substr($str, $tmp_pos);
+				include($phpbb_root_path . 'includes/utf/data/utf_normalizer_common.php');
 			}
-		}
-
-		/**
-		* The string was already in normal form
-		*/
-		return $str;
-	}
 
-	/**
-	* Decompose a UTF string
-	*
-	* @param	string	$str		UTF string
-	* @param	integer	$pos		Position of the first UTF char (in bytes)
-	* @param	integer	$len		Length of the string (in bytes)
-	* @param	array	$decomp_map	Decomposition mapping, passed by reference but never modified
-	* @return	string				The string, decomposed and sorted canonically
-	*
-	* @access	private
-	*/
-	function decompose($str, $pos, $len, &$decomp_map)
-	{
-		global $utf_combining_class, $utf_canonical_decomp, $phpbb_root_path;
+			// UTF char length array
+			$utf_len_mask = array(
+				// Leading bytes masks
+				"\xC0" => 2, "\xD0" => 2, "\xE0" => 3, "\xF0" => 4,
+				// Trailing bytes masks
+				"\x80" => 0, "\x90" => 0, "\xA0" => 0, "\xB0" => 0
+			);
 
-		/**
-		* Load some commonly-used tables
-		*/
-		if (!isset($utf_combining_class))
-		{
-			include($phpbb_root_path . 'includes/utf/data/utf_normalizer_common.php');
-		}
+			// Some extra checks are triggered on the first byte of a UTF sequence
+			$extra_check = array(
+				"\xED" => 1, "\xEF" => 1, "\xC0" => 1, "\xC1" => 1, "\xE0" => 1, "\xF0" => 1,
+				"\xF4" => 1, "\xF5" => 1, "\xF6" => 1, "\xF7" => 1, "\xF8" => 1, "\xF9" => 1,
+				"\xFA" => 1, "\xFB" => 1, "\xFC" => 1, "\xFD" => 1, "\xFE" => 1, "\xFF" => 1
+			);
 
-		/**
-		* UTF char length array
-		*/
-		$utf_len_mask = array(
-			/**
-			* Leading bytes masks
-			*/
-			"\xC0" => 2, "\xD0" => 2, "\xE0" => 3, "\xF0" => 4,
+			// These masks are used to check if a UTF sequence is well formed. Here are the only 3 lengths we acknowledge:
+			//   - 2-byte: 110? ???? 10?? ????
+			//   - 3-byte: 1110 ???? 10?? ???? 10?? ????
+			//   - 4-byte: 1111 0??? 10?? ???? 10?? ???? 10?? ????
+			// Note that 5- and 6- byte sequences are automatically discarded
+			$utf_validation_mask = array(
+				2	=> "\xE0\xC0",
+				3	=> "\xF0\xC0\xC0",
+				4	=> "\xF8\xC0\xC0\xC0"
+			);
 
-			/**
-			* Trailing bytes masks
-			*/
-			"\x80" => 0, "\x90" => 0, "\xA0" => 0, "\xB0" => 0
-		);
+			$utf_validation_check = array(
+				2	=> "\xC0\x80",
+				3	=> "\xE0\x80\x80",
+				4	=> "\xF0\x80\x80\x80"
+			);
 
-		/**
-		* Some extra checks are triggered on the first byte of a UTF sequence
-		*/
-		$extra_check = array(
-			"\xED"=>1, "\xEF"=>1, "\xC0"=>1, "\xC1"=>1, "\xE0"=>1, "\xF0"=>1,
-			"\xF4"=>1, "\xF5"=>1, "\xF6"=>1, "\xF7"=>1, "\xF8"=>1, "\xF9"=>1,
-			"\xFA"=>1, "\xFB"=>1, "\xFC"=>1, "\xFD"=>1, "\xFE"=>1, "\xFF"=>1
-		);
+			$tmp = '';
+			$starter_pos = $pos;
+			$tmp_pos = $last_cc = $sort = $dump = 0;
+			$utf_sort = array();
 
-		/**
-		* These masks are used to check if a UTF sequence is well formed.
-		* Here are the only 3 lengths we acknowledge:
-		*   - 2-byte: 110? ???? 10?? ????
-		*   - 3-byte: 1110 ???? 10?? ???? 10?? ????
-		*   - 4-byte: 1111 0??? 10?? ???? 10?? ???? 10?? ????
-		*
-		* Note that 5- and 6- byte sequences are automatically discarded
-		*/
-		$utf_validation_mask = array(
-			2	=>	"\xE0\xC0",
-			3	=>	"\xF0\xC0\xC0",
-			4	=>	"\xF8\xC0\xC0\xC0"
-		);
-		$utf_validation_check = array(
-			2	=>	"\xC0\x80",
-			3	=>	"\xE0\x80\x80",
-			4	=>	"\xF0\x80\x80\x80"
-		);
-
-		$tmp = '';
-		$starter_pos = $pos;
-		$tmp_pos = $last_cc = $sort = $dump = 0;
-		$utf_sort = array();
-
-
-		////////////////////////////////////////////////////////////////////////
-		//                             Main loop                              //
-		////////////////////////////////////////////////////////////////////////
-
-		do
-		{
-			////////////////////////////////////////////////////////////////////
-			//                STEP 0: Capture the current char                //
-			////////////////////////////////////////////////////////////////////
 
-			$cur_mask = $str[$pos] & "\xF0";
-			if (isset($utf_len_mask[$cur_mask]))
+			// Main loop
+			do
 			{
-				if ($utf_len = $utf_len_mask[$cur_mask])
-				{
-					/**
-					* Multibyte char
-					*/
-					$utf_char = substr($str, $pos, $utf_len);
-					$pos += $utf_len;
-				}
-				else
-				{
-					/**
-					* A trailing byte came out of nowhere, we will treat it and all
-					* following trailing bytes as if each of them was a Unicode
-					* replacement char and we will advance the cursor
-					*/
-					$spn = strspn($str, UTF8_TRAILING_BYTES, $pos);
+				// STEP 0: Capture the current char
 
-					if ($dump)
+				$cur_mask = $str[$pos] & "\xF0";
+				if (isset($utf_len_mask[$cur_mask]))
+				{
+					if ($utf_len = $utf_len_mask[$cur_mask])
 					{
-						$tmp .= substr($str, $tmp_pos, $starter_pos - $tmp_pos);
+						// Multibyte char
+						$utf_char = substr($str, $pos, $utf_len);
+						$pos += $utf_len;
+					}
+					else
+					{
+						// A trailing byte came out of nowhere, we will treat it and all following trailing bytes as if each of them was a Unicode
+						// replacement char and we will advance the cursor
+						$spn = strspn($str, UTF8_TRAILING_BYTES, $pos);
 
-						/**
-						* Dump combiners
-						*/
-						if (!empty($utf_sort))
+						if ($dump)
 						{
-							if ($sort)
+							$tmp .= substr($str, $tmp_pos, $starter_pos - $tmp_pos);
+
+							// Dump combiners
+							if (!empty($utf_sort))
 							{
-								ksort($utf_sort);
+								if ($sort)
+								{
+									ksort($utf_sort);
+								}
+
+								foreach($utf_sort as $utf_chars)
+								{
+									$tmp .= implode('', $utf_chars);
+								}
 							}
 
-							foreach($utf_sort as $utf_chars)
-							{
-								$tmp .= implode('', $utf_chars);
-							}
+							$tmp .= str_repeat(UTF8_REPLACEMENT, $spn);
+							$dump = $sort = 0;
+						}
+						else
+						{
+							$tmp .= substr($str, $tmp_pos, $pos - $tmp_pos) . str_repeat(UTF8_REPLACEMENT, $spn);
 						}
 
-						$tmp .= str_repeat(UTF8_REPLACEMENT, $spn);
-						$dump = $sort = 0;
-					}
-					else
-					{
-						$tmp .= substr($str, $tmp_pos, $pos - $tmp_pos) . str_repeat(UTF8_REPLACEMENT, $spn);
-					}
-
-					$pos += $spn;
-					$tmp_pos = $starter_pos = $pos;
-
-					$utf_sort = array();
-					$last_cc = 0;
+						$pos += $spn;
+						$tmp_pos = $starter_pos = $pos;
 
-					continue;
-				}
+						$utf_sort = array();
+						$last_cc = 0;
 
+						continue;
+					}
 
-				////////////////////////////////////////////////////////////////////
-				//          STEP 1: Decide what to do with current char           //
-				////////////////////////////////////////////////////////////////////
 
-				/**
-				* Now, in that order:
-				*  - check if that character is decomposable
-				*  - check if that character is a non-starter
-				*  - check if that character requires extra checks to be performed
-				*/
-				if (isset($decomp_map[$utf_char]))
-				{
-					/**
-					* Decompose the char
-					*/
-					$_pos = 0;
-					$_len = strlen($decomp_map[$utf_char]);
+					// STEP 1: Decide what to do with current char
 
-					do
+					// Now, in that order:
+					//  - check if that character is decomposable
+					//  - check if that character is a non-starter
+					//  - check if that character requires extra checks to be performed
+					if (isset($decomp_map[$utf_char]))
 					{
-						$c = $decomp_map[$utf_char][$_pos];
-						$_utf_len =& $utf_len_mask[$c & "\xF0"];
+						// Decompose the char
+						$_pos = 0;
+						$_len = strlen($decomp_map[$utf_char]);
 
-						if (isset($_utf_len))
+						do
 						{
-							$_utf_char = substr($decomp_map[$utf_char], $_pos, $_utf_len);
-							$_pos += $_utf_len;
+							$c = $decomp_map[$utf_char][$_pos];
+							$_utf_len =& $utf_len_mask[$c & "\xF0"];
 
-							if (isset($utf_combining_class[$_utf_char]))
+							if (isset($_utf_len))
 							{
-								/**
-								* The character decomposed to a non-starter, buffer it for sorting
-								*/
-								$utf_sort[$utf_combining_class[$_utf_char]][] = $_utf_char;
+								$_utf_char = substr($decomp_map[$utf_char], $_pos, $_utf_len);
+								$_pos += $_utf_len;
 
-								if ($utf_combining_class[$_utf_char] < $last_cc)
+								if (isset($utf_combining_class[$_utf_char]))
 								{
-									/**
-									* Not canonically ordered, will require sorting
-									*/
-									$sort = $dump = 1;
+									// The character decomposed to a non-starter, buffer it for sorting
+									$utf_sort[$utf_combining_class[$_utf_char]][] = $_utf_char;
+
+									if ($utf_combining_class[$_utf_char] < $last_cc)
+									{
+										// Not canonically ordered, will require sorting
+										$sort = $dump = 1;
+									}
+									else
+									{
+										$dump = 1;
+										$last_cc = $utf_combining_class[$_utf_char];
+									}
 								}
 								else
 								{
-									$dump = 1;
-									$last_cc = $utf_combining_class[$_utf_char];
+									// This character decomposition contains a starter, dump the buffer and continue
+									if ($dump)
+									{
+										$tmp .= substr($str, $tmp_pos, $starter_pos - $tmp_pos);
+
+										// Dump combiners
+										if (!empty($utf_sort))
+										{
+											if ($sort)
+											{
+												ksort($utf_sort);
+											}
+
+											foreach ($utf_sort as $utf_chars)
+											{
+												$tmp .= implode('', $utf_chars);
+											}
+										}
+
+										$tmp .= $_utf_char;
+										$dump = $sort = 0;
+									}
+									else
+									{
+										$tmp .= substr($str, $tmp_pos, $starter_pos - $tmp_pos) . $_utf_char;
+									}
+
+									$tmp_pos = $starter_pos = $pos;
+									$utf_sort = array();
+									$last_cc = 0;
 								}
 							}
 							else
 							{
-								/**
-								* This character decomposition contains a starter,
-								* dump the buffer and continue
-								*/
+								// This character decomposition contains an ASCII char, which is a starter. Dump the buffer and continue
+								++$_pos;
+
 								if ($dump)
 								{
 									$tmp .= substr($str, $tmp_pos, $starter_pos - $tmp_pos);
 
-									/**
-									* Dump combiners
-									*/
+									// Dump combiners
 									if (!empty($utf_sort))
 									{
 										if ($sort)
@@ -1472,18 +1213,18 @@ class utf_normalizer
 											ksort($utf_sort);
 										}
 
-										foreach($utf_sort as $utf_chars)
+										foreach ($utf_sort as $utf_chars)
 										{
 											$tmp .= implode('', $utf_chars);
 										}
 									}
 
-									$tmp .= $_utf_char;
+									$tmp .= $c;
 									$dump = $sort = 0;
 								}
 								else
 								{
-									$tmp .= substr($str, $tmp_pos, $starter_pos - $tmp_pos) . $_utf_char;
+									$tmp .= substr($str, $tmp_pos, $pos - $utf_len - $tmp_pos) . $c;
 								}
 
 								$tmp_pos = $starter_pos = $pos;
@@ -1491,350 +1232,290 @@ class utf_normalizer
 								$last_cc = 0;
 							}
 						}
+						while ($_pos < $_len);
+					}
+					else if (isset($utf_combining_class[$utf_char]))
+					{
+						// Combining character
+						if ($utf_combining_class[$utf_char] < $last_cc)
+						{
+							// Not in canonical order
+							$sort = $dump = 1;
+						}
 						else
 						{
-							/**
-							* This character decomposition contains an ASCII char,
-							* which is a starter. Dump the buffer and continue
-							*/
-							++$_pos;
-							if ($dump)
+							$last_cc = $utf_combining_class[$utf_char];
+						}
+
+						$utf_sort[$utf_combining_class[$utf_char]][] = $utf_char;
+					}
+					else
+					{
+						// Non-decomposable starter, check out if it's a Hangul syllable
+						if ($utf_char < UTF8_HANGUL_FIRST || $utf_char > UTF8_HANGUL_LAST)
+						{
+							// Nope, regular UTF char, check that we have the correct number of trailing bytes
+							if (($utf_char & $utf_validation_mask[$utf_len]) != $utf_validation_check[$utf_len])
 							{
+								// Current char isn't well-formed or legal: either one or several trailing bytes are missing, or the Unicode char
+								// has been encoded in a five- or six- byte sequence.
+								// Move the cursor back to its original position then advance it to the position it should really be at
+								$pos -= $utf_len;
 								$tmp .= substr($str, $tmp_pos, $starter_pos - $tmp_pos);
 
-								/**
-								* Dump combiners
-								*/
 								if (!empty($utf_sort))
 								{
-									if ($sort)
-									{
-										ksort($utf_sort);
-									}
+									ksort($utf_sort);
 
-									foreach($utf_sort as $utf_chars)
+									foreach ($utf_sort as $utf_chars)
 									{
 										$tmp .= implode('', $utf_chars);
 									}
+									$utf_sort = array();
 								}
 
-								$tmp .= $c;
-								$dump = $sort = 0;
-							}
-							else
-							{
-								$tmp .= substr($str, $tmp_pos, $pos - $utf_len - $tmp_pos) . $c;
-							}
-
-							$tmp_pos = $starter_pos = $pos;
-							$utf_sort = array();
-							$last_cc = 0;
-						}
-					}
-					while($_pos < $_len);
-				}
-				elseif (isset($utf_combining_class[$utf_char]))
-				{
-					/**
-					* Combining character
-					*/
-					if ($utf_combining_class[$utf_char] < $last_cc)
-					{
-						/**
-						* Not in canonical order
-						*/
-						$sort = $dump = 1;
-					}
-					else
-					{
-						$last_cc = $utf_combining_class[$utf_char];
-					}
-
-					$utf_sort[$utf_combining_class[$utf_char]][] = $utf_char;
-				}
-				else
-				{
-					/**
-					* Non-decomposable starter, check out if it's a Hangul syllable
-					*/
-					if ($utf_char < UTF8_HANGUL_FIRST || $utf_char > UTF8_HANGUL_LAST)
-					{
-						/**
-						* Nope, regular UTF char, check that we have the correct number of trailing bytes
-						*/
-						if (($utf_char & $utf_validation_mask[$utf_len]) != $utf_validation_check[$utf_len])
-						{
-							/**
-							* Current char isn't well-formed or legal: either one or
-							* several trailing bytes are missing, or the Unicode char
-							* has been encoded in a five- or six- byte sequence.
-							*
-							* Move the cursor back to its original position then advance
-							* it to the position it should really be at
-							*/
-							$pos -= $utf_len;
-							$tmp .= substr($str, $tmp_pos, $starter_pos - $tmp_pos);
+								// Add a replacement char then another replacement char for every trailing byte.
+								//
+								// @todo I'm not entirely sure that's how we're supposed to mark invalidated byte sequences, check this
+								$spn = strspn($str, UTF8_TRAILING_BYTES, ++$pos);
+								$tmp .= str_repeat(UTF8_REPLACEMENT, $spn + 1);
 
-							if (!empty($utf_sort))
-							{
-								ksort($utf_sort);
+								$dump = $sort = 0;
 
-								foreach($utf_sort as $utf_chars)
-								{
-									$tmp .= implode('', $utf_chars);
-								}
-								$utf_sort = array();
+								$pos += $spn;
+								$tmp_pos = $pos;
+								continue;
 							}
 
-							/**
-							* Add a replacement char then another replacement char for
-							* every trailing byte.
-							*
-							* @todo I'm not entirely sure that's how we're supposed to
-							* mark invalidated byte sequences, check this
-							*/
-							$spn = strspn($str, UTF8_TRAILING_BYTES, ++$pos);
-							$tmp .= str_repeat(UTF8_REPLACEMENT, $spn + 1);
-
-							$dump = $sort = 0;
-
-							$pos += $spn;
-							$tmp_pos = $pos;
-							continue;
-						}
-
-						if (isset($extra_check[$utf_char[0]]))
-						{
-							switch($utf_char[0])
+							if (isset($extra_check[$utf_char[0]]))
 							{
-								/**
-								* Note: 0xED is quite common in Korean
-								*/
-								case "\xED":
-									if ($utf_char >= "\xED\xA0\x80")
-									{
-										/**
-										* Surrogates (U+D800..U+DFFF) are not allowed in UTF-8
-										* (UTF sequence 0xEDA080..0xEDBFBF)
-										*/
-										$tmp .= substr($str, $tmp_pos, $starter_pos - $tmp_pos);
-
-										if (!empty($utf_sort))
+								switch ($utf_char[0])
+								{
+									// Note: 0xED is quite common in Korean
+									case "\xED":
+										if ($utf_char >= "\xED\xA0\x80")
 										{
-											ksort($utf_sort);
+											// Surrogates (U+D800..U+DFFF) are not allowed in UTF-8 (UTF sequence 0xEDA080..0xEDBFBF)
+											$tmp .= substr($str, $tmp_pos, $starter_pos - $tmp_pos);
 
-											foreach($utf_sort as $utf_chars)
+											if (!empty($utf_sort))
 											{
-												$tmp .= implode('', $utf_chars);
+												ksort($utf_sort);
+
+												foreach ($utf_sort as $utf_chars)
+												{
+													$tmp .= implode('', $utf_chars);
+												}
+												$utf_sort = array();
 											}
-											$utf_sort = array();
-										}
 
-										$tmp .= UTF8_REPLACEMENT;
-										$dump = $sort = 0;
+											$tmp .= UTF8_REPLACEMENT;
+											$dump = $sort = 0;
 
-										$tmp_pos = $starter_pos = $pos;
-										continue 2;
-									}
+											$tmp_pos = $starter_pos = $pos;
+											continue 2;
+										}
 									break;
 
-								/**
-								* Note: 0xEF is quite common in Japanese
-								*/
-								case "\xEF":
-									if ($utf_char == "\xEF\xBF\xBE" || $utf_char == "\xEF\xBF\xBF")
-									{
-										/**
-										* U+FFFE and U+FFFF are explicitly disallowed
-										* (UTF sequence 0xEFBFBE..0xEFBFBF)
-										*/
-										$tmp .= substr($str, $tmp_pos, $starter_pos - $tmp_pos);
-
-										if (!empty($utf_sort))
+									// Note: 0xEF is quite common in Japanese
+									case "\xEF":
+										if ($utf_char == "\xEF\xBF\xBE" || $utf_char == "\xEF\xBF\xBF")
 										{
-											ksort($utf_sort);
+											// U+FFFE and U+FFFF are explicitly disallowed (UTF sequence 0xEFBFBE..0xEFBFBF)
+											$tmp .= substr($str, $tmp_pos, $starter_pos - $tmp_pos);
 
-											foreach($utf_sort as $utf_chars)
+											if (!empty($utf_sort))
 											{
-												$tmp .= implode('', $utf_chars);
+												ksort($utf_sort);
+
+												foreach ($utf_sort as $utf_chars)
+												{
+													$tmp .= implode('', $utf_chars);
+												}
+												$utf_sort = array();
 											}
-											$utf_sort = array();
-										}
 
-										$tmp .= UTF8_REPLACEMENT;
-										$dump = $sort = 0;
+											$tmp .= UTF8_REPLACEMENT;
+											$dump = $sort = 0;
 
-										$tmp_pos = $starter_pos = $pos;
-										continue 2;
-									}
+											$tmp_pos = $starter_pos = $pos;
+											continue 2;
+										}
 									break;
 
-								case "\xC0":
-								case "\xC1":
-									if ($utf_char <= "\xC1\xBF")
-									{
-										/**
-										* Overlong sequence: Unicode char U+0000..U+007F encoded as a
-										* double-byte UTF char
-										*/
-										$tmp .= substr($str, $tmp_pos, $starter_pos - $tmp_pos);
-
-										if (!empty($utf_sort))
+									case "\xC0":
+									case "\xC1":
+										if ($utf_char <= "\xC1\xBF")
 										{
-											ksort($utf_sort);
+											// Overlong sequence: Unicode char U+0000..U+007F encoded as a double-byte UTF char
+											$tmp .= substr($str, $tmp_pos, $starter_pos - $tmp_pos);
 
-											foreach($utf_sort as $utf_chars)
+											if (!empty($utf_sort))
 											{
-												$tmp .= implode('', $utf_chars);
+												ksort($utf_sort);
+
+												foreach ($utf_sort as $utf_chars)
+												{
+													$tmp .= implode('', $utf_chars);
+												}
+												$utf_sort = array();
 											}
-											$utf_sort = array();
-										}
 
-										$tmp .= UTF8_REPLACEMENT;
-										$dump = $sort = 0;
+											$tmp .= UTF8_REPLACEMENT;
+											$dump = $sort = 0;
 
-										$tmp_pos = $starter_pos = $pos;
-										continue 2;
-									}
+											$tmp_pos = $starter_pos = $pos;
+											continue 2;
+										}
 									break;
 
-								case "\xE0":
-									if ($utf_char <= "\xE0\x9F\xBF")
-									{
-										/**
-										* Unicode char U+0000..U+07FF encoded in 3 bytes
-										*/
-										$tmp .= substr($str, $tmp_pos, $starter_pos - $tmp_pos);
-
-										if (!empty($utf_sort))
+									case "\xE0":
+										if ($utf_char <= "\xE0\x9F\xBF")
 										{
-											ksort($utf_sort);
+											// Unicode char U+0000..U+07FF encoded in 3 bytes
+											$tmp .= substr($str, $tmp_pos, $starter_pos - $tmp_pos);
 
-											foreach($utf_sort as $utf_chars)
+											if (!empty($utf_sort))
 											{
-												$tmp .= implode('', $utf_chars);
+												ksort($utf_sort);
+
+												foreach ($utf_sort as $utf_chars)
+												{
+													$tmp .= implode('', $utf_chars);
+												}
+												$utf_sort = array();
 											}
-											$utf_sort = array();
-										}
 
-										$tmp .= UTF8_REPLACEMENT;
-										$dump = $sort = 0;
+											$tmp .= UTF8_REPLACEMENT;
+											$dump = $sort = 0;
 
-										$tmp_pos = $starter_pos = $pos;
-										continue 2;
-									}
+											$tmp_pos = $starter_pos = $pos;
+											continue 2;
+										}
 									break;
 
-								case "\xF0":
-									if ($utf_char <= "\xF0\x8F\xBF\xBF")
-									{
-										/**
-										* Unicode char U+0000..U+FFFF encoded in 4 bytes
-										*/
-										$tmp .= substr($str, $tmp_pos, $starter_pos - $tmp_pos);
-
-										if (!empty($utf_sort))
+									case "\xF0":
+										if ($utf_char <= "\xF0\x8F\xBF\xBF")
 										{
-											ksort($utf_sort);
+											// Unicode char U+0000..U+FFFF encoded in 4 bytes
+											$tmp .= substr($str, $tmp_pos, $starter_pos - $tmp_pos);
 
-											foreach($utf_sort as $utf_chars)
+											if (!empty($utf_sort))
 											{
-												$tmp .= implode('', $utf_chars);
+												ksort($utf_sort);
+
+												foreach ($utf_sort as $utf_chars)
+												{
+													$tmp .= implode('', $utf_chars);
+												}
+												$utf_sort = array();
 											}
-											$utf_sort = array();
-										}
 
-										$tmp .= UTF8_REPLACEMENT;
-										$dump = $sort = 0;
+											$tmp .= UTF8_REPLACEMENT;
+											$dump = $sort = 0;
 
-										$tmp_pos = $starter_pos = $pos;
-										continue 2;
-									}
+											$tmp_pos = $starter_pos = $pos;
+											continue 2;
+										}
 									break;
 
-								default:
-									if ($utf_char > UTF8_MAX)
-									{
-										/**
-										* Out of the Unicode range
-										*/
-										$tmp .= substr($str, $tmp_pos, $starter_pos - $tmp_pos);
-
-										if (!empty($utf_sort))
+									default:
+										if ($utf_char > UTF8_MAX)
 										{
-											ksort($utf_sort);
+											// Out of the Unicode range
+											$tmp .= substr($str, $tmp_pos, $starter_pos - $tmp_pos);
 
-											foreach($utf_sort as $utf_chars)
+											if (!empty($utf_sort))
 											{
-												$tmp .= implode('', $utf_chars);
+												ksort($utf_sort);
+
+												foreach ($utf_sort as $utf_chars)
+												{
+													$tmp .= implode('', $utf_chars);
+												}
+												$utf_sort = array();
 											}
-											$utf_sort = array();
-										}
 
-										$tmp .= UTF8_REPLACEMENT;
-										$dump = $sort = 0;
+											$tmp .= UTF8_REPLACEMENT;
+											$dump = $sort = 0;
 
-										$tmp_pos = $starter_pos = $pos;
-										continue 2;
-									}
+											$tmp_pos = $starter_pos = $pos;
+											continue 2;
+										}
+									break;
+								}
 							}
 						}
-					}
-					else
-					{
-						/**
-						* Hangul syllable
-						*/
-						$idx = (((ord($utf_char[0]) & 0x0F) << 12) | ((ord($utf_char[1]) & 0x3F) << 6) | (ord($utf_char[2]) & 0x3F)) - UNICODE_HANGUL_SBASE;
-
-						/**
-						* LIndex can only range from 0 to 18, therefore it cannot influence
-						* the first two bytes of the L Jamo, which allows us to hardcode
-						* them (based on LBase).
-						*
-						* The same goes for VIndex, but for TIndex there's a catch: the value
-						* of the third byte could exceed 0xBF and we would have to increment
-						* the second byte
-						*/
-						if ($tIndex = $idx % UNICODE_HANGUL_TCOUNT)
+						else
 						{
-							if ($tIndex < 25)
+							// Hangul syllable
+							$idx = (((ord($utf_char[0]) & 0x0F) << 12) | ((ord($utf_char[1]) & 0x3F) << 6) | (ord($utf_char[2]) & 0x3F)) - UNICODE_HANGUL_SBASE;
+
+							// LIndex can only range from 0 to 18, therefore it cannot influence the first two bytes of the L Jamo, which allows us to hardcode them (based on LBase).
+							//
+							// The same goes for VIndex, but for TIndex there's a catch: the value of the third byte could exceed 0xBF and we would have to increment the second byte
+							if ($tIndex = $idx % UNICODE_HANGUL_TCOUNT)
 							{
-								$utf_char = "\xE1\x84\x00\xE1\x85\x00\xE1\x86\x00";
-								$utf_char[8] = chr(0xA7 + $tIndex);
+								if ($tIndex < 25)
+								{
+									$utf_char = "\xE1\x84\x00\xE1\x85\x00\xE1\x86\x00";
+									$utf_char[8] = chr(0xA7 + $tIndex);
+								}
+								else
+								{
+									$utf_char = "\xE1\x84\x00\xE1\x85\x00\xE1\x87\x00";
+									$utf_char[8] = chr(0x67 + $tIndex);
+								}
 							}
 							else
 							{
-								$utf_char = "\xE1\x84\x00\xE1\x85\x00\xE1\x87\x00";
-								$utf_char[8] = chr(0x67 + $tIndex);
+								$utf_char = "\xE1\x84\x00\xE1\x85\x00";
 							}
+
+							$utf_char[2] = chr(0x80 + (int) ($idx / UNICODE_HANGUL_NCOUNT));
+							$utf_char[5] = chr(0xA1 + (int) (($idx % UNICODE_HANGUL_NCOUNT) / UNICODE_HANGUL_TCOUNT));
+
+							// Just like other decompositions, the resulting Jamos must be dumped to the tmp string
+							$dump = 1;
 						}
-						else
+
+						// Do we need to dump stuff to the tmp string?
+						if ($dump)
 						{
-							$utf_char = "\xE1\x84\x00\xE1\x85\x00";
-						}
+							$tmp .= substr($str, $tmp_pos, $starter_pos - $tmp_pos);
+
+							// Dump combiners
+							if (!empty($utf_sort))
+							{
+								if ($sort)
+								{
+									ksort($utf_sort);
+								}
 
-						$utf_char[2] = chr(0x80 + (int) ($idx / UNICODE_HANGUL_NCOUNT));
-						$utf_char[5] = chr(0xA1 + (int) (($idx % UNICODE_HANGUL_NCOUNT) / UNICODE_HANGUL_TCOUNT));
+								foreach ($utf_sort as $utf_chars)
+								{
+									$tmp .= implode('', $utf_chars);
+								}
+							}
 
+							$tmp .= $utf_char;
+							$dump = $sort = 0;
+							$tmp_pos = $pos;
+						}
 
-						/**
-						* Just like other decompositions, the resulting Jamos must
-						* be dumped to the tmp string
-						*/
-						$dump = 1;
+						$last_cc = 0;
+						$utf_sort = array();
+						$starter_pos = $pos;
 					}
-
-					/**
-					* Do we need to dump stuff to the tmp string?
-					*/
+				}
+				else
+				{
+					// ASCII char, which happens to be a starter (as any other ASCII char)
 					if ($dump)
 					{
 						$tmp .= substr($str, $tmp_pos, $starter_pos - $tmp_pos);
 
-						/**
-						* Dump combiners
-						*/
+						// Dump combiners
 						if (!empty($utf_sort))
 						{
 							if ($sort)
@@ -1842,15 +1523,21 @@ class utf_normalizer
 								ksort($utf_sort);
 							}
 
-							foreach($utf_sort as $utf_chars)
+							foreach ($utf_sort as $utf_chars)
 							{
 								$tmp .= implode('', $utf_chars);
 							}
 						}
 
-						$tmp .= $utf_char;
+						$tmp .= $str[$pos];
 						$dump = $sort = 0;
-						$tmp_pos = $pos;
+						$tmp_pos = ++$pos;
+
+						$pos += strspn($str, UTF8_ASCII_RANGE, $pos);
+					}
+					else
+					{
+						$pos += strspn($str, UTF8_ASCII_RANGE, ++$pos);
 					}
 
 					$last_cc = 0;
@@ -1858,103 +1545,49 @@ class utf_normalizer
 					$starter_pos = $pos;
 				}
 			}
-			else
+			while ($pos < $len);
+
+			// Now is time to return the string
+			if ($dump)
 			{
-				/**
-				* ASCII char, which happens to be a starter (as any other ASCII char)
-				*/
-				if ($dump)
-				{
-					$tmp .= substr($str, $tmp_pos, $starter_pos - $tmp_pos);
+				$tmp .= substr($str, $tmp_pos, $starter_pos - $tmp_pos);
 
-					/**
-					* Dump combiners
-					*/
-					if (!empty($utf_sort))
+				// Dump combiners
+				if (!empty($utf_sort))
+				{
+					if ($sort)
 					{
-						if ($sort)
-						{
-							ksort($utf_sort);
-						}
-
-						foreach($utf_sort as $utf_chars)
-						{
-							$tmp .= implode('', $utf_chars);
-						}
+						ksort($utf_sort);
 					}
 
-					$tmp .= $str[$pos];
-					$dump = $sort = 0;
-					$tmp_pos = ++$pos;
-
-					$pos += strspn($str, UTF8_ASCII_RANGE, $pos);
-				}
-				else
-				{
-					$pos += strspn($str, UTF8_ASCII_RANGE, ++$pos);
+					foreach ($utf_sort as $utf_chars)
+					{
+						$tmp .= implode('', $utf_chars);
+					}
 				}
 
-				$last_cc = 0;
-				$utf_sort = array();
-				$starter_pos = $pos;
-			}
-		}
-		while($pos < $len);
-
-		/**
-		* Now is time to return the string
-		*/
-		if ($dump)
-		{
-			$tmp .= substr($str, $tmp_pos, $starter_pos - $tmp_pos);
+				return $tmp;
 
-			/**
-			* Dump combiners
-			*/
-			if (!empty($utf_sort))
+			}
+			else if ($tmp_pos)
 			{
-				if ($sort)
+				// If the $tmp_pos cursor was moved then at least one character was not in normal form. Replace $str with the fixed version
+				if ($tmp_pos == $len)
 				{
-					ksort($utf_sort);
+					// The $tmp_pos cursor is at the end of $str, therefore $tmp holds the whole $str
+					return $tmp;
 				}
-
-				foreach($utf_sort as $utf_chars)
+				else
 				{
-					$tmp .= implode('', $utf_chars);
+					// The rightmost chunk of $str has not been appended to $tmp yet
+					return $tmp . substr($str, $tmp_pos);
 				}
 			}
 
-			return $tmp;
-
-		}
-		elseif ($tmp_pos)
-		{
-			/**
-			* If the $tmp_pos cursor was moved then at least one character was not in
-			* normal form. Replace $str with the fixed version
-			*/
-			if ($tmp_pos == $len)
-			{
-				/**
-				* The $tmp_pos cursor is at the end of $str, therefore $tmp holds
-				* the whole $str
-				*/
-				return $tmp;
-			}
-			else
-			{
-				/**
-				* The rightmost chunk of $str has not been appended to $tmp yet
-				*/
-				return $tmp . substr($str, $tmp_pos);
-			}
+			// The string was already in normal form
+			return $str;
 		}
-
-		/**
-		* The string was already in normal form
-		*/
-		return $str;
 	}
 }
 
-}
\ No newline at end of file
+?>
\ No newline at end of file
diff --git a/phpBB/includes/utf/utf_tools.php b/phpBB/includes/utf/utf_tools.php
index b3c3c5ed5e..ede1dd85ea 100644
--- a/phpBB/includes/utf/utf_tools.php
+++ b/phpBB/includes/utf/utf_tools.php
@@ -59,17 +59,13 @@ function utf8_recode($string, $encoding)
 		return $string;
 	}
 
-	/**
-	* PHP has a built-in function for encoding from iso-8859-1, let's use that
-	*/
+	// PHP has a built-in function for encoding from iso-8859-1, let's use that
 	if ($encoding == 'iso-8859-1')
 	{
 		return utf8_encode($string);
 	}
 
-	/**
-	* First, try iconv()
-	*/
+	// First, try iconv()
 	if (function_exists('iconv'))
 	{
 		$ret = @iconv($encoding, 'utf-8', $string);
@@ -80,9 +76,7 @@ function utf8_recode($string, $encoding)
 		}
 	}
 
-	/**
-	* Try the mb_string extension
-	*/
+	// Try the mb_string extension
 	if (function_exists('mb_convert_encoding'))
 	{
 		$ret = @mb_convert_encoding($string, 'utf-8', $encoding);
@@ -93,9 +87,7 @@ function utf8_recode($string, $encoding)
 		}
 	}
 
-	/**
-	* Try the recode extension
-	*/
+	// Try the recode extension
 	if (function_exists('recode_string'))
 	{
 		$ret = @recode_string($encoding . '..utf-8', $string);
@@ -106,25 +98,21 @@ function utf8_recode($string, $encoding)
 		}
 	}
 
-	/**
-	* If nothing works, check if we have a custom transcoder available
-	*/
+	// If nothing works, check if we have a custom transcoder available
 	if (!preg_match('#^[a-z0-9\\-]+$#', $encoding))
 	{
-		/**
-		* Make sure the encoding name is alphanumeric, we don't want it
-		* to be abused into loading arbitrary files
-		*/
-		trigger_error('Unknown encoding: ' . $encoding);
+		// Make sure the encoding name is alphanumeric, we don't want it to be abused into loading arbitrary files
+		trigger_error('Unknown encoding: ' . $encoding, E_USER_ERROR);
 	}
 
 	global $phpbb_root_path;
+
 	if (!file_exists($phpbb_root_path . 'includes/utf/data/'))
 	{
 		return $string;
 	}
 
-	die('Finish me!! '.basename(__FILE__).' at line '.__LINE__);
+	die('Finish me!! ' . basename(__FILE__) . ' at line ' . __LINE__);
 }
 
 /**
@@ -200,11 +188,11 @@ function utf8_decode_ncr_callback($m)
 	{
 		return chr(0xF0 | ($cp >> 18)) . chr(0x80 | (($cp >> 12) & 0x3F)) . chr(0x80 | (($cp >> 6) & 0x3F)) . chr(0x80 | ($cp & 0x3F));
 	}
-	elseif ($cp > 0x7FF)
+	else if ($cp > 0x7FF)
 	{
 		return chr(0xE0 | ($cp >> 12)) . chr(0x80 | (($cp >> 6) & 0x3F)) . chr(0x80 | ($cp & 0x3F));
 	}
-	elseif ($cp > 0x7F)
+	else if ($cp > 0x7F)
 	{
 		return chr(0xC0 | ($cp >> 6)) . chr(0x80 | ($cp & 0x3F));
 	}
-- 
cgit v1.2.1


From 5f30881c2c11ffce73a75c3e5c18d1368e6777da Mon Sep 17 00:00:00 2001
From: Meik Sievertsen <acydburn@phpbb.com>
Date: Fri, 1 Sep 2006 13:16:22 +0000
Subject: fix some bugs - hopefully not breaking anything...

git-svn-id: file:///svn/phpbb/trunk@6342 89ea8834-ac86-4346-8a33-228a782c2dd0
---
 phpBB/includes/utf/utf_tools.php | 1 +
 1 file changed, 1 insertion(+)

(limited to 'phpBB/includes/utf')

diff --git a/phpBB/includes/utf/utf_tools.php b/phpBB/includes/utf/utf_tools.php
index ede1dd85ea..a187253bca 100644
--- a/phpBB/includes/utf/utf_tools.php
+++ b/phpBB/includes/utf/utf_tools.php
@@ -38,6 +38,7 @@ function utf8_strlen($text)
 		return mb_strlen($text, 'utf-8');
 	}
 
+	// Since utf8_decode is replacing multibyte characters to ? strlen works fine
 	return strlen(utf8_decode($text));
 }
 
-- 
cgit v1.2.1


From 8ab85ebdb03105bffbd30e9ac374068a92972752 Mon Sep 17 00:00:00 2001
From: Meik Sievertsen <acydburn@phpbb.com>
Date: Mon, 4 Sep 2006 20:35:46 +0000
Subject: - fix security issue in download.php - fixing some phpdocumentor
 warnings/errors - adjust pop-before-smtp "auth" (nowadays no one should rely
 on it) - add backtrace for smtp email errors if DEBUG_EXTRA is enabled

git-svn-id: file:///svn/phpbb/trunk@6352 89ea8834-ac86-4346-8a33-228a782c2dd0
---
 phpBB/includes/utf/utf_normalizer.php | 1 +
 1 file changed, 1 insertion(+)

(limited to 'phpBB/includes/utf')

diff --git a/phpBB/includes/utf/utf_normalizer.php b/phpBB/includes/utf/utf_normalizer.php
index 613a1098a7..d8e0ba0fa6 100644
--- a/phpBB/includes/utf/utf_normalizer.php
+++ b/phpBB/includes/utf/utf_normalizer.php
@@ -43,6 +43,7 @@ if (function_exists('utf8_normalize'))
 	* utf_normalizer class for the utfnormal extension
 	*
 	* @ignore
+	* @package phpBB3
 	*/
 	class utf_normalizer
 	{
-- 
cgit v1.2.1


From 2a4c853f871b75a65df784c79f7859e97f44b92a Mon Sep 17 00:00:00 2001
From: David M <davidmj@users.sourceforge.net>
Date: Fri, 15 Sep 2006 22:15:10 +0000
Subject: new utf8_* stuff

git-svn-id: file:///svn/phpbb/trunk@6368 89ea8834-ac86-4346-8a33-228a782c2dd0
---
 phpBB/includes/utf/utf_tools.php | 597 +++++++++++++++++++++++++++++++++++++--
 1 file changed, 579 insertions(+), 18 deletions(-)

(limited to 'phpBB/includes/utf')

diff --git a/phpBB/includes/utf/utf_tools.php b/phpBB/includes/utf/utf_tools.php
index a187253bca..058205e68a 100644
--- a/phpBB/includes/utf/utf_tools.php
+++ b/phpBB/includes/utf/utf_tools.php
@@ -14,12 +14,485 @@
 * Whenever possible, these functions will try to use PHP's built-in functions or
 * extensions, otherwise they will default to custom routines.
 *
-* If we go with UTF-8 in 3.2, we will also need a Unicode-aware replacement
-* to strtolower()
-*
 * @package phpBB3
 */
 
+// huge chunks of this code belong to the PHP UTF-8 project
+// TODO: document the functions!
+
+// utf8_encode and utf8_decode are both XML functions
+if (!extension_loaded('xml'))
+{
+	// This function exploits some nice things that ISO-8859-1 and UTF-8 have in common
+	function utf8_encode($str)
+	{
+		$out = '';
+		for ($i = 0, $len = strlen($str); $i < $len; $i++)
+		{
+			$letter = $str[$i];
+			$num = ord($letter);
+			if ($num < 0x80)
+			{
+				$out .= $letter;
+			}
+			else if ($num < 0xC0)
+			{
+				$out .= "\xC2$letter";
+			}
+			else
+			{
+				$chr = chr($num - 64);
+				$out .= "\xC3$chr";
+			}
+		}
+		return $out;
+	}
+
+	// "borrowed" from getID3
+	function utf8_decode($string)
+	{
+        $newcharstring = '';
+        $offset = 0;
+        $stringlength = strlen($string);
+        while ($offset < $stringlength)
+		{
+        	$ord = ord($string{$offset});
+            if (($ord | 0x07) == 0xF7)
+			{
+                // 11110bbb 10bbbbbb 10bbbbbb 10bbbbbb
+                $charval = (($ord & 0x07) << 18) &
+                           ((ord($string{($offset + 1)}) & 0x3F) << 12) &
+                           ((ord($string{($offset + 2)}) & 0x3F) <<  6) &
+                            (ord($string{($offset + 3)}) & 0x3F);
+                $offset += 4;
+            }
+			else if (($ord | 0x0F) == 0xEF)
+			{
+                // 1110bbbb 10bbbbbb 10bbbbbb
+                $charval = (($ord & 0x0F) << 12) &
+                           ((ord($string{($offset + 1)}) & 0x3F) <<  6) &
+                            (ord($string{($offset + 2)}) & 0x3F);
+                $offset += 3;
+            }
+			else if (($ord | 0x1F) == 0xDF)
+			{
+                // 110bbbbb 10bbbbbb
+                $charval = ((ord($string{($offset + 0)}) & 0x1F) <<  6) &
+                            (ord($string{($offset + 1)}) & 0x3F);
+                $offset += 2;
+            }
+			else if (($ord | 0x7F) == 0x7F)
+			{
+                // 0bbbbbbb
+                $charval = $ord;
+                $offset += 1;
+            }
+			else
+			{
+                // error? throw some kind of warning here?
+                $charval = false;
+                $offset += 1;
+            }
+            if ($charval !== false)
+			{
+                $newcharstring .= (($charval < 256) ? chr($charval) : '?');
+            }
+        }
+        return $newcharstring;
+	}
+}
+
+// mbstring is old and has it's functions around for older versions of PHP.
+// if mbstring is not loaded, we go into native mode.
+if (extension_loaded('mbstring'))
+{
+	function utf8_strrpos($str,	$needle, $offset = null)
+	{
+		// offset for mb_strrpos was added in 5.2.0
+		if ($offset === false || version_compare(phpversion(), '5.2.0', '>='))
+		{
+			// Emulate behaviour of strrpos rather than raising warning
+			if (empty($str))
+			{
+				return false;
+			}
+			return mb_strrpos($str, $search);
+		}
+		else
+		{
+			if (!is_int($offset))
+			{
+				trigger_error('utf8_strrpos expects parameter 3 to be long', E_USER_WARNING);
+				return false;
+			}
+
+			$str = mb_substr($str, $offset);
+
+			if (false !== ($pos = mb_strrpos($str, $search)))
+			{
+				return $pos + $offset;
+			}
+
+			return false;
+		}
+	}
+
+	function utf8_strpos($str, $needle, $offset = null)
+	{
+		if ($offset === false)
+		{
+			return mb_strpos($str, $needle);
+		}
+		else
+		{
+			return mb_strpos($str, $needle, $offset);
+		}
+	}
+
+	function utf8_strtolower($str)
+	{
+		return mb_strtolower($str);
+	}
+
+	function utf8_strtoupper($str)
+	{
+		return mb_strtoupper($str);
+	}
+
+	function utf8_substr($str, $offset,	$length	= null)
+	{
+		if ($length === false)
+		{
+			return mb_substr($str, $offset);
+		}
+		else
+		{
+			return mb_substr($str, $offset, $length);
+		}
+	}
+}
+else
+{
+	function utf8_strrpos($str,	$needle, $offset = null)
+	{
+		if (is_null($offset))
+		{
+			$ar	= explode($needle, $str);
+			
+			if (count($ar) > 1)
+			{
+				// Pop off the end of the string where the last	match was made
+				array_pop($ar);
+				$str = join($needle,$ar);
+				return utf8_strlen($str);
+			}
+			return false;
+		}
+		else
+		{
+			if (!is_int($offset))
+			{
+				trigger_error('utf8_strrpos	expects	parameter 3	to be long', E_USER_WARNING);
+				return false;
+			}
+			
+			$str = utf8_substr($str, $offset);
+			
+			if (false !== ($pos = utf8_strrpos($str, $needle)))
+			{
+				return $pos	+ $offset;
+			}
+			
+			return false;
+		}
+	}
+
+	function utf8_strpos($str, $needle, $offset = null)
+	{
+		// native
+		if (is_null($offset))
+		{
+			$ar = explode($needle, $str);
+			if (count($ar) > 1)
+			{
+				return utf8_strlen($ar[0]);
+			}
+			return false;
+		}
+		else
+		{
+			if (!is_int($offset))
+			{
+				trigger_error('utf8_strpos:  Offset must  be an integer', E_USER_ERROR);
+				return false;
+			}
+
+			$str = utf8_substr($str, $offset);
+
+			if (false !== ($pos = utf8_strpos($str, $needle)))
+			{
+				return $pos + $offset;
+			}
+
+			return false;
+		}
+	}
+
+$UTF8_UPPER_TO_LOWER = array(
+			0x0041=>0x0061, 0x03A6=>0x03C6, 0x0162=>0x0163, 0x00C5=>0x00E5, 0x0042=>0x0062,
+			0x0139=>0x013A, 0x00C1=>0x00E1, 0x0141=>0x0142, 0x038E=>0x03CD, 0x0100=>0x0101,
+			0x0490=>0x0491, 0x0394=>0x03B4, 0x015A=>0x015B, 0x0044=>0x0064, 0x0393=>0x03B3,
+			0x00D4=>0x00F4, 0x042A=>0x044A, 0x0419=>0x0439, 0x0112=>0x0113, 0x041C=>0x043C,
+			0x015E=>0x015F, 0x0143=>0x0144, 0x00CE=>0x00EE, 0x040E=>0x045E, 0x042F=>0x044F,
+			0x039A=>0x03BA, 0x0154=>0x0155, 0x0049=>0x0069, 0x0053=>0x0073, 0x1E1E=>0x1E1F,
+			0x0134=>0x0135, 0x0427=>0x0447, 0x03A0=>0x03C0, 0x0418=>0x0438, 0x00D3=>0x00F3,
+			0x0420=>0x0440, 0x0404=>0x0454, 0x0415=>0x0435, 0x0429=>0x0449, 0x014A=>0x014B,
+			0x0411=>0x0431, 0x0409=>0x0459, 0x1E02=>0x1E03, 0x00D6=>0x00F6, 0x00D9=>0x00F9,
+			0x004E=>0x006E, 0x0401=>0x0451, 0x03A4=>0x03C4, 0x0423=>0x0443, 0x015C=>0x015D,
+			0x0403=>0x0453, 0x03A8=>0x03C8, 0x0158=>0x0159, 0x0047=>0x0067, 0x00C4=>0x00E4,
+			0x0386=>0x03AC, 0x0389=>0x03AE, 0x0166=>0x0167, 0x039E=>0x03BE, 0x0164=>0x0165,
+			0x0116=>0x0117, 0x0108=>0x0109, 0x0056=>0x0076, 0x00DE=>0x00FE, 0x0156=>0x0157,
+			0x00DA=>0x00FA, 0x1E60=>0x1E61, 0x1E82=>0x1E83, 0x00C2=>0x00E2, 0x0118=>0x0119,
+			0x0145=>0x0146, 0x0050=>0x0070, 0x0150=>0x0151, 0x042E=>0x044E, 0x0128=>0x0129,
+			0x03A7=>0x03C7, 0x013D=>0x013E, 0x0422=>0x0442, 0x005A=>0x007A, 0x0428=>0x0448,
+			0x03A1=>0x03C1, 0x1E80=>0x1E81, 0x016C=>0x016D, 0x00D5=>0x00F5, 0x0055=>0x0075,
+			0x0176=>0x0177, 0x00DC=>0x00FC, 0x1E56=>0x1E57, 0x03A3=>0x03C3, 0x041A=>0x043A,
+			0x004D=>0x006D, 0x016A=>0x016B, 0x0170=>0x0171, 0x0424=>0x0444, 0x00CC=>0x00EC,
+			0x0168=>0x0169, 0x039F=>0x03BF, 0x004B=>0x006B, 0x00D2=>0x00F2, 0x00C0=>0x00E0,
+			0x0414=>0x0434, 0x03A9=>0x03C9, 0x1E6A=>0x1E6B, 0x00C3=>0x00E3, 0x042D=>0x044D,
+			0x0416=>0x0436, 0x01A0=>0x01A1, 0x010C=>0x010D, 0x011C=>0x011D, 0x00D0=>0x00F0,
+			0x013B=>0x013C, 0x040F=>0x045F, 0x040A=>0x045A, 0x00C8=>0x00E8, 0x03A5=>0x03C5,
+			0x0046=>0x0066, 0x00DD=>0x00FD, 0x0043=>0x0063, 0x021A=>0x021B, 0x00CA=>0x00EA,
+			0x0399=>0x03B9, 0x0179=>0x017A, 0x00CF=>0x00EF, 0x01AF=>0x01B0, 0x0045=>0x0065,
+			0x039B=>0x03BB, 0x0398=>0x03B8, 0x039C=>0x03BC, 0x040C=>0x045C, 0x041F=>0x043F,
+			0x042C=>0x044C, 0x00DE=>0x00FE, 0x00D0=>0x00F0, 0x1EF2=>0x1EF3, 0x0048=>0x0068,
+			0x00CB=>0x00EB, 0x0110=>0x0111, 0x0413=>0x0433, 0x012E=>0x012F, 0x00C6=>0x00E6,
+			0x0058=>0x0078, 0x0160=>0x0161, 0x016E=>0x016F, 0x0391=>0x03B1, 0x0407=>0x0457,
+			0x0172=>0x0173, 0x0178=>0x00FF, 0x004F=>0x006F, 0x041B=>0x043B, 0x0395=>0x03B5,
+			0x0425=>0x0445, 0x0120=>0x0121, 0x017D=>0x017E, 0x017B=>0x017C, 0x0396=>0x03B6,
+			0x0392=>0x03B2, 0x0388=>0x03AD, 0x1E84=>0x1E85, 0x0174=>0x0175, 0x0051=>0x0071,
+			0x0417=>0x0437, 0x1E0A=>0x1E0B, 0x0147=>0x0148, 0x0104=>0x0105, 0x0408=>0x0458,
+			0x014C=>0x014D, 0x00CD=>0x00ED, 0x0059=>0x0079, 0x010A=>0x010B, 0x038F=>0x03CE,
+			0x0052=>0x0072, 0x0410=>0x0430, 0x0405=>0x0455, 0x0402=>0x0452, 0x0126=>0x0127,
+			0x0136=>0x0137, 0x012A=>0x012B, 0x038A=>0x03AF, 0x042B=>0x044B, 0x004C=>0x006C,
+			0x0397=>0x03B7, 0x0124=>0x0125, 0x0218=>0x0219, 0x00DB=>0x00FB, 0x011E=>0x011F,
+			0x041E=>0x043E, 0x1E40=>0x1E41, 0x039D=>0x03BD, 0x0106=>0x0107, 0x03AB=>0x03CB,
+			0x0426=>0x0446, 0x00DE=>0x00FE, 0x00C7=>0x00E7, 0x03AA=>0x03CA, 0x0421=>0x0441,
+			0x0412=>0x0432, 0x010E=>0x010F, 0x00D8=>0x00F8, 0x0057=>0x0077, 0x011A=>0x011B,
+			0x0054=>0x0074, 0x004A=>0x006A, 0x040B=>0x045B, 0x0406=>0x0456, 0x0102=>0x0103,
+			0x039B=>0x03BB, 0x00D1=>0x00F1, 0x041D=>0x043D, 0x038C=>0x03CC, 0x00C9=>0x00E9,
+			0x00D0=>0x00F0, 0x0407=>0x0457, 0x0122=>0x0123,
+		);
+
+$UTF8_LOWER_TO_UPPER = array(
+			0x0061=>0x0041, 0x03C6=>0x03A6, 0x0163=>0x0162, 0x00E5=>0x00C5, 0x0062=>0x0042,
+			0x013A=>0x0139, 0x00E1=>0x00C1, 0x0142=>0x0141, 0x03CD=>0x038E, 0x0101=>0x0100,
+			0x0491=>0x0490, 0x03B4=>0x0394, 0x015B=>0x015A, 0x0064=>0x0044, 0x03B3=>0x0393,
+			0x00F4=>0x00D4, 0x044A=>0x042A, 0x0439=>0x0419, 0x0113=>0x0112, 0x043C=>0x041C,
+			0x015F=>0x015E, 0x0144=>0x0143, 0x00EE=>0x00CE, 0x045E=>0x040E, 0x044F=>0x042F,
+			0x03BA=>0x039A, 0x0155=>0x0154, 0x0069=>0x0049, 0x0073=>0x0053, 0x1E1F=>0x1E1E,
+			0x0135=>0x0134, 0x0447=>0x0427, 0x03C0=>0x03A0, 0x0438=>0x0418, 0x00F3=>0x00D3,
+			0x0440=>0x0420, 0x0454=>0x0404, 0x0435=>0x0415, 0x0449=>0x0429, 0x014B=>0x014A,
+			0x0431=>0x0411, 0x0459=>0x0409, 0x1E03=>0x1E02, 0x00F6=>0x00D6, 0x00F9=>0x00D9,
+			0x006E=>0x004E, 0x0451=>0x0401, 0x03C4=>0x03A4, 0x0443=>0x0423, 0x015D=>0x015C,
+			0x0453=>0x0403, 0x03C8=>0x03A8, 0x0159=>0x0158, 0x0067=>0x0047, 0x00E4=>0x00C4,
+			0x03AC=>0x0386, 0x03AE=>0x0389, 0x0167=>0x0166, 0x03BE=>0x039E, 0x0165=>0x0164,
+			0x0117=>0x0116, 0x0109=>0x0108, 0x0076=>0x0056, 0x00FE=>0x00DE, 0x0157=>0x0156,
+			0x00FA=>0x00DA, 0x1E61=>0x1E60, 0x1E83=>0x1E82, 0x00E2=>0x00C2, 0x0119=>0x0118,
+			0x0146=>0x0145, 0x0070=>0x0050, 0x0151=>0x0150, 0x044E=>0x042E, 0x0129=>0x0128,
+			0x03C7=>0x03A7, 0x013E=>0x013D, 0x0442=>0x0422, 0x007A=>0x005A, 0x0448=>0x0428,
+			0x03C1=>0x03A1, 0x1E81=>0x1E80, 0x016D=>0x016C, 0x00F5=>0x00D5, 0x0075=>0x0055,
+			0x0177=>0x0176, 0x00FC=>0x00DC, 0x1E57=>0x1E56, 0x03C3=>0x03A3, 0x043A=>0x041A,
+			0x006D=>0x004D, 0x016B=>0x016A, 0x0171=>0x0170, 0x0444=>0x0424, 0x00EC=>0x00CC,
+			0x0169=>0x0168, 0x03BF=>0x039F, 0x006B=>0x004B, 0x00F2=>0x00D2, 0x00E0=>0x00C0,
+			0x0434=>0x0414, 0x03C9=>0x03A9, 0x1E6B=>0x1E6A, 0x00E3=>0x00C3, 0x044D=>0x042D,
+			0x0436=>0x0416, 0x01A1=>0x01A0, 0x010D=>0x010C, 0x011D=>0x011C, 0x00F0=>0x00D0,
+			0x013C=>0x013B, 0x045F=>0x040F, 0x045A=>0x040A, 0x00E8=>0x00C8, 0x03C5=>0x03A5,
+			0x0066=>0x0046, 0x00FD=>0x00DD, 0x0063=>0x0043, 0x021B=>0x021A, 0x00EA=>0x00CA,
+			0x03B9=>0x0399, 0x017A=>0x0179, 0x00EF=>0x00CF, 0x01B0=>0x01AF, 0x0065=>0x0045,
+			0x03BB=>0x039B, 0x03B8=>0x0398, 0x03BC=>0x039C, 0x045C=>0x040C, 0x043F=>0x041F,
+			0x044C=>0x042C, 0x00FE=>0x00DE, 0x00F0=>0x00D0, 0x1EF3=>0x1EF2, 0x0068=>0x0048,
+			0x00EB=>0x00CB, 0x0111=>0x0110, 0x0433=>0x0413, 0x012F=>0x012E, 0x00E6=>0x00C6,
+			0x0078=>0x0058, 0x0161=>0x0160, 0x016F=>0x016E, 0x03B1=>0x0391, 0x0457=>0x0407,
+			0x0173=>0x0172, 0x00FF=>0x0178, 0x006F=>0x004F, 0x043B=>0x041B, 0x03B5=>0x0395,
+			0x0445=>0x0425, 0x0121=>0x0120, 0x017E=>0x017D, 0x017C=>0x017B, 0x03B6=>0x0396,
+			0x03B2=>0x0392, 0x03AD=>0x0388, 0x1E85=>0x1E84, 0x0175=>0x0174, 0x0071=>0x0051,
+			0x0437=>0x0417, 0x1E0B=>0x1E0A, 0x0148=>0x0147, 0x0105=>0x0104, 0x0458=>0x0408,
+			0x014D=>0x014C, 0x00ED=>0x00CD, 0x0079=>0x0059, 0x010B=>0x010A, 0x03CE=>0x038F,
+			0x0072=>0x0052, 0x0430=>0x0410, 0x0455=>0x0405, 0x0452=>0x0402, 0x0127=>0x0126,
+			0x0137=>0x0136, 0x012B=>0x012A, 0x03AF=>0x038A, 0x044B=>0x042B, 0x006C=>0x004C,
+			0x03B7=>0x0397, 0x0125=>0x0124, 0x0219=>0x0218, 0x00FB=>0x00DB, 0x011F=>0x011E,
+			0x043E=>0x041E, 0x1E41=>0x1E40, 0x03BD=>0x039D, 0x0107=>0x0106, 0x03CB=>0x03AB,
+			0x0446=>0x0426, 0x00FE=>0x00DE, 0x00E7=>0x00C7, 0x03CA=>0x03AA, 0x0441=>0x0421,
+			0x0432=>0x0412, 0x010F=>0x010E, 0x00F8=>0x00D8, 0x0077=>0x0057, 0x011B=>0x011A,
+			0x0074=>0x0054, 0x006A=>0x004A, 0x045B=>0x040B, 0x0456=>0x0406, 0x0103=>0x0102,
+			0x03BB=>0x039B, 0x00F1=>0x00D1, 0x043D=>0x041D, 0x03CC=>0x038C, 0x00E9=>0x00C9,
+			0x00F0=>0x00D0, 0x0457=>0x0407, 0x0123=>0x0122,
+		);
+
+	function utf8_strtolower($string)
+	{
+		global $UTF8_UPPER_TO_LOWER;
+		$uni = utf8_to_unicode($string);
+		if (!$uni)
+		{
+			return false;
+		}
+
+		$cnt = count($uni);
+		for ($i = 0; $i < $cnt; $i++)
+		{
+			if (isset($UTF8_UPPER_TO_LOWER[$uni[$i]]))
+			{
+				$uni[$i] = $UTF8_UPPER_TO_LOWER[$uni[$i]];
+			}
+		}
+
+		return utf8_from_unicode($uni);
+	}
+
+	function utf8_strtoupper($str)
+	{
+		global $UTF8_LOWER_TO_UPPER;
+		$uni = utf8_to_unicode($string);
+		if (!$uni)
+		{
+			return false;
+		}
+
+		$cnt = count($uni);
+		for ($i = 0; $i < $cnt; $i++)
+		{
+			if (isset($UTF8_LOWER_TO_UPPER[$uni[$i]]))
+			{
+				$uni[$i] = $UTF8_LOWER_TO_UPPER[$uni[$i]];
+			}
+		}
+
+		return utf8_from_unicode($uni);
+	}
+
+	function utf8_substr($str, $offset,	$length	= null)
+	{
+		if ($offset >= 0 && $length >= 0)
+		{
+			if ($length === null)
+			{
+				$length = '*';
+			}
+			else
+			{
+				if (!preg_match('/^[0-9]+$/', $length))
+				{
+					trigger_error('utf8_substr expects parameter 3 to be long', E_USER_WARNING);
+					return false;
+				}
+				
+				$strlen = strlen(utf8_decode($str));
+				if ($offset > $strlen)
+				{
+					return '';
+				}
+				
+				if (($offset + $length) >	$strlen)
+				{
+					$length = '*';
+				}
+				else
+				{
+					$length = '{' . $length . '}';
+				}
+			}
+			
+			if (!preg_match('/^[0-9]+$/', $offset))
+			{
+				trigger_error('utf8_substr expects parameter 2 to be long', E_USER_WARNING);
+				return false;
+			}
+			
+			$pattern = '/^.{' . $offset . '}(.' . $length . ')/us';
+			
+			preg_match($pattern, $str, $matches);
+			
+			if (isset($matches[1]))
+			{
+				return $matches[1];
+			}
+			
+			return false;
+		}
+		else
+		{
+			// Handle	negatives using	different, slower technique
+			// From: http://www.php.net/manual/en/function.substr.php#44838
+			preg_match_all('/./u', $str, $ar);
+			if ($length !== null)
+			{
+				return join('', array_slice($ar[0], $offset, $length));
+			}
+			else
+			{
+				return join('', array_slice($ar[0], $offset));
+			}
+		}
+	}
+}
+
+function utf8_str_split($str, $split_len = 1)
+{
+	if (!preg_match('/^[0-9]+$/', $split_len) || $split_len < 1)
+	{
+		return false;
+	}
+	
+	$len = utf8_strlen($str);
+	if ($len <= $split_len)
+	{
+		return array($str);
+	}
+	
+	preg_match_all('/.{' . $split_len . '}|[^\x00]{1,' . $split_len . '}$/us', $str, $ar);
+	return $ar[0];
+}
+
+function utf8_strspn($str, $mask, $start = null, $length = null)
+{
+	$mask = preg_replace('!([\\\\\\-\\]\\[/^])!', '\\\${1}', $mask);
+    
+	if ($start !== null || $length !== null)
+	{
+		$str = utf8_substr($str, $start, $length);
+    }
+        
+	preg_match('/^[' . $mask . ']+/u', $str, $matches);
+
+	if (isset($matches[0]))
+	{
+		return utf8_strlen($matches[0]);
+	}
+
+    return 0;
+}
+
+function utf8_ucfirst($str)
+{
+	switch (utf8_strlen($str))
+	{
+		case 0:
+			return '';
+		break;
+
+		case 1:
+			return utf8_strtoupper($str);
+		break;
+
+		default:
+			preg_match('/^(.{1})(.*)$/us', $str, $matches);
+			return utf8_strtoupper($matches[1]) . $matches[2];
+		break;
+	}
+}
+
 /**
 * Return the length (in characters) of a UTF-8 string
 *
@@ -60,7 +533,8 @@ function utf8_recode($string, $encoding)
 		return $string;
 	}
 
-	// PHP has a built-in function for encoding from iso-8859-1, let's use that
+
+	// start with something simple
 	if ($encoding == 'iso-8859-1')
 	{
 		return utf8_encode($string);
@@ -137,23 +611,52 @@ function utf8_encode_ncr($text)
 */
 function utf8_encode_ncr_callback($m)
 {
-	switch (strlen($m[0]))
+	return utf8_ord($m[0]);
+}
+
+function utf8_ord($chr)
+{
+	switch (strlen($chr))
 	{
 		case 1:
-			return '&#' . ord($m[0]) . ';';
+			return ord($chr);
+		break;
 
 		case 2:
-			return '&#' . (((ord($m[0][0]) & 0x1F) << 6) | (ord($m[0][1]) & 0x3F)) . ';';
+			return ((ord($chr[0]) & 0x1F) << 6) | (ord($chr[1]) & 0x3F);
+		break;
 
 		case 3:
-			return '&#' . (((ord($m[0][0]) & 0x0F) << 12) | ((ord($m[0][1]) & 0x3F) << 6) | (ord($m[0][2]) & 0x3F)) . ';';
+			return ((ord($chr[0]) & 0x0F) << 12) | ((ord($chr[1]) & 0x3F) << 6) | (ord($chr[2]) & 0x3F);
+		break;
 
 		case 4:
-			return '&#' . (((ord($m[0][0]) & 0x07) << 18) | ((ord($m[0][1]) & 0x3F) << 12) | ((ord($m[0][2]) & 0x3F) << 6) | (ord($m[0][3]) & 0x3F)) . ';';
+			return ((ord($chr[0]) & 0x07) << 18) | ((ord($chr[1]) & 0x3F) << 12) | ((ord($chr[2]) & 0x3F) << 6) | (ord($chr[3]) & 0x3F);
+		break;
 
 		default:
-			return $m[0];
-	}		
+			return $m;
+	}
+}
+
+function utf8_chr($cp)
+{
+	if ($cp > 0xFFFF)
+	{
+		return chr(0xF0 | ($cp >> 18)) . chr(0x80 | (($cp >> 12) & 0x3F)) . chr(0x80 | (($cp >> 6) & 0x3F)) . chr(0x80 | ($cp & 0x3F));
+	}
+	elseif ($cp > 0x7FF)
+	{
+		return chr(0xE0 | ($cp >> 12)) . chr(0x80 | (($cp >> 6) & 0x3F)) . chr(0x80 | ($cp & 0x3F));
+	}
+	elseif ($cp > 0x7F)
+	{
+		return chr(0xC0 | ($cp >> 6)) . chr(0x80 | ($cp & 0x3F));
+	}
+	else
+	{
+		return chr($cp);
+	}
 }
 
 /**
@@ -185,22 +688,80 @@ function utf8_decode_ncr_callback($m)
 {
 	$cp = (strncasecmp($m[1], 'x', 1)) ? $m[1] : hexdec(substr($m[1], 1));
 
-	if ($cp > 0xFFFF)
+	return utf8_chr($cp);
+}
+
+/**
+ * Takes an UTF-8 string and returns an array of ints representing the
+ * Unicode characters.
+ * @param  string  UTF-8 encoded string
+ */
+function utf8_to_unicode($$string)
+{
+    $unicode = array();
+    $offset = 0;
+    $stringlength = strlen($string);
+    while ($offset < $stringlength)
 	{
-		return chr(0xF0 | ($cp >> 18)) . chr(0x80 | (($cp >> 12) & 0x3F)) . chr(0x80 | (($cp >> 6) & 0x3F)) . chr(0x80 | ($cp & 0x3F));
+    	$ord = ord($string{$offset});
+        if (($ord | 0x07) == 0xF7)
+		{
+            // 11110bbb 10bbbbbb 10bbbbbb 10bbbbbb
+            $charval = (($ord & 0x07) << 18) &
+                       ((ord($string{($offset + 1)}) & 0x3F) << 12) &
+                       ((ord($string{($offset + 2)}) & 0x3F) <<  6) &
+                        (ord($string{($offset + 3)}) & 0x3F);
+            $offset += 4;
+        }
+		else if (($ord | 0x0F) == 0xEF)
+		{
+            // 1110bbbb 10bbbbbb 10bbbbbb
+            $charval = (($ord & 0x0F) << 12) &
+                       ((ord($string{($offset + 1)}) & 0x3F) <<  6) &
+                        (ord($string{($offset + 2)}) & 0x3F);
+            $offset += 3;
 	}
-	else if ($cp > 0x7FF)
+		else if (($ord | 0x1F) == 0xDF)
 	{
-		return chr(0xE0 | ($cp >> 12)) . chr(0x80 | (($cp >> 6) & 0x3F)) . chr(0x80 | ($cp & 0x3F));
+            // 110bbbbb 10bbbbbb
+            $charval = (($ord & 0x1F) <<  6) &
+                        (ord($string{($offset + 1)}) & 0x3F);
+            $offset += 2;
 	}
-	else if ($cp > 0x7F)
+		else if (($ord | 0x7F) == 0x7F)
 	{
-		return chr(0xC0 | ($cp >> 6)) . chr(0x80 | ($cp & 0x3F));
+            // 0bbbbbbb
+            $charval = $ord;
+            $offset += 1;
 	}
 	else
 	{
-		return chr($cp);
+            // error? throw some kind of warning here?
+            $charval = false;
+            $offset += 1;
+        }
+        if ($charval !== false)
+		{
+            $unicode[] = $charval;
+        }
+    }
+    return $unicode;
+}
+
+/**
+ * Takes an array of ints representing the Unicode characters and returns
+ * a UTF-8 string.
+ *
+ * @param  array of unicode code points representing a string
+ */
+function utf8_from_unicode($array)
+{
+	$str = '';
+	foreach ($array as $value)
+	{
+		$str .= utf8_chr($value);
 	}
+	return $str;
 }
 
 ?>
\ No newline at end of file
-- 
cgit v1.2.1


From ccaaa3a307badb2abf70425722e216c916351a3c Mon Sep 17 00:00:00 2001
From: David M <davidmj@users.sourceforge.net>
Date: Fri, 15 Sep 2006 22:24:41 +0000
Subject: whitespace :P

git-svn-id: file:///svn/phpbb/trunk@6369 89ea8834-ac86-4346-8a33-228a782c2dd0
---
 phpBB/includes/utf/utf_tools.php | 156 +++++++++++++++++++--------------------
 1 file changed, 78 insertions(+), 78 deletions(-)

(limited to 'phpBB/includes/utf')

diff --git a/phpBB/includes/utf/utf_tools.php b/phpBB/includes/utf/utf_tools.php
index 058205e68a..8a99f0b2b5 100644
--- a/phpBB/includes/utf/utf_tools.php
+++ b/phpBB/includes/utf/utf_tools.php
@@ -51,54 +51,54 @@ if (!extension_loaded('xml'))
 	// "borrowed" from getID3
 	function utf8_decode($string)
 	{
-        $newcharstring = '';
-        $offset = 0;
-        $stringlength = strlen($string);
-        while ($offset < $stringlength)
+		$newcharstring = '';
+		$offset = 0;
+		$stringlength = strlen($string);
+		while ($offset < $stringlength)
 		{
-        	$ord = ord($string{$offset});
-            if (($ord | 0x07) == 0xF7)
+			$ord = ord($string{$offset});
+			if (($ord | 0x07) == 0xF7)
 			{
-                // 11110bbb 10bbbbbb 10bbbbbb 10bbbbbb
-                $charval = (($ord & 0x07) << 18) &
-                           ((ord($string{($offset + 1)}) & 0x3F) << 12) &
-                           ((ord($string{($offset + 2)}) & 0x3F) <<  6) &
-                            (ord($string{($offset + 3)}) & 0x3F);
-                $offset += 4;
-            }
+				// 11110bbb 10bbbbbb 10bbbbbb 10bbbbbb
+				$charval = (($ord & 0x07) << 18) &
+							((ord($string{($offset + 1)}) & 0x3F) << 12) &
+							((ord($string{($offset + 2)}) & 0x3F) <<  6) &
+							(ord($string{($offset + 3)}) & 0x3F);
+				$offset += 4;
+			}
 			else if (($ord | 0x0F) == 0xEF)
 			{
-                // 1110bbbb 10bbbbbb 10bbbbbb
-                $charval = (($ord & 0x0F) << 12) &
-                           ((ord($string{($offset + 1)}) & 0x3F) <<  6) &
-                            (ord($string{($offset + 2)}) & 0x3F);
-                $offset += 3;
-            }
+				// 1110bbbb 10bbbbbb 10bbbbbb
+				$charval = (($ord & 0x0F) << 12) &
+							((ord($string{($offset + 1)}) & 0x3F) <<  6) &
+							(ord($string{($offset + 2)}) & 0x3F);
+				$offset += 3;
+			}
 			else if (($ord | 0x1F) == 0xDF)
 			{
-                // 110bbbbb 10bbbbbb
-                $charval = ((ord($string{($offset + 0)}) & 0x1F) <<  6) &
-                            (ord($string{($offset + 1)}) & 0x3F);
-                $offset += 2;
-            }
+				// 110bbbbb 10bbbbbb
+				$charval = ((ord($string{($offset + 0)}) & 0x1F) <<  6) &
+							(ord($string{($offset + 1)}) & 0x3F);
+				$offset += 2;
+			}
 			else if (($ord | 0x7F) == 0x7F)
 			{
-                // 0bbbbbbb
-                $charval = $ord;
-                $offset += 1;
-            }
+				// 0bbbbbbb
+				$charval = $ord;
+				$offset += 1;
+			}
 			else
 			{
-                // error? throw some kind of warning here?
-                $charval = false;
-                $offset += 1;
-            }
-            if ($charval !== false)
+				// error? throw some kind of warning here?
+				$charval = false;
+				$offset += 1;
+			}
+			if ($charval !== false)
 			{
-                $newcharstring .= (($charval < 256) ? chr($charval) : '?');
-            }
-        }
-        return $newcharstring;
+				$newcharstring .= (($charval < 256) ? chr($charval) : '?');
+			}
+		}
+		return $newcharstring;
 	}
 }
 
@@ -698,54 +698,54 @@ function utf8_decode_ncr_callback($m)
  */
 function utf8_to_unicode($$string)
 {
-    $unicode = array();
-    $offset = 0;
-    $stringlength = strlen($string);
-    while ($offset < $stringlength)
+	$unicode = array();
+	$offset = 0;
+	$stringlength = strlen($string);
+	while ($offset < $stringlength)
 	{
-    	$ord = ord($string{$offset});
-        if (($ord | 0x07) == 0xF7)
+		$ord = ord($string{$offset});
+		if (($ord | 0x07) == 0xF7)
 		{
-            // 11110bbb 10bbbbbb 10bbbbbb 10bbbbbb
-            $charval = (($ord & 0x07) << 18) &
-                       ((ord($string{($offset + 1)}) & 0x3F) << 12) &
-                       ((ord($string{($offset + 2)}) & 0x3F) <<  6) &
-                        (ord($string{($offset + 3)}) & 0x3F);
-            $offset += 4;
-        }
+			// 11110bbb 10bbbbbb 10bbbbbb 10bbbbbb
+			$charval = (($ord & 0x07) << 18) &
+						((ord($string{($offset + 1)}) & 0x3F) << 12) &
+						((ord($string{($offset + 2)}) & 0x3F) <<  6) &
+						(ord($string{($offset + 3)}) & 0x3F);
+			$offset += 4;
+		}
 		else if (($ord | 0x0F) == 0xEF)
 		{
-            // 1110bbbb 10bbbbbb 10bbbbbb
-            $charval = (($ord & 0x0F) << 12) &
-                       ((ord($string{($offset + 1)}) & 0x3F) <<  6) &
-                        (ord($string{($offset + 2)}) & 0x3F);
-            $offset += 3;
-	}
+			// 1110bbbb 10bbbbbb 10bbbbbb
+			$charval = (($ord & 0x0F) << 12) &
+						((ord($string{($offset + 1)}) & 0x3F) <<  6) &
+						(ord($string{($offset + 2)}) & 0x3F);
+			$offset += 3;
+		}
 		else if (($ord | 0x1F) == 0xDF)
-	{
-            // 110bbbbb 10bbbbbb
-            $charval = (($ord & 0x1F) <<  6) &
-                        (ord($string{($offset + 1)}) & 0x3F);
-            $offset += 2;
-	}
+		{
+			// 110bbbbb 10bbbbbb
+			$charval = (($ord & 0x1F) <<  6) &
+						(ord($string{($offset + 1)}) & 0x3F);
+			$offset += 2;
+		}
 		else if (($ord | 0x7F) == 0x7F)
-	{
-            // 0bbbbbbb
-            $charval = $ord;
-            $offset += 1;
-	}
-	else
-	{
-            // error? throw some kind of warning here?
-            $charval = false;
-            $offset += 1;
-        }
-        if ($charval !== false)
 		{
-            $unicode[] = $charval;
-        }
-    }
-    return $unicode;
+			// 0bbbbbbb
+			$charval = $ord;
+			$offset += 1;
+		}
+		else
+		{
+			// error? throw some kind of warning here?
+			$charval = false;
+			$offset += 1;
+		}
+		if ($charval !== false)
+		{
+			$unicode[] = $charval;
+		}
+	}
+	return $unicode;
 }
 
 /**
-- 
cgit v1.2.1


From 9479bc428f10e166682087e8e61c19efcbbe2751 Mon Sep 17 00:00:00 2001
From: David M <davidmj@users.sourceforge.net>
Date: Fri, 15 Sep 2006 22:38:42 +0000
Subject: yet another oops...

git-svn-id: file:///svn/phpbb/trunk@6370 89ea8834-ac86-4346-8a33-228a782c2dd0
---
 phpBB/includes/utf/utf_tools.php | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'phpBB/includes/utf')

diff --git a/phpBB/includes/utf/utf_tools.php b/phpBB/includes/utf/utf_tools.php
index 8a99f0b2b5..84ae0d2dd2 100644
--- a/phpBB/includes/utf/utf_tools.php
+++ b/phpBB/includes/utf/utf_tools.php
@@ -394,7 +394,7 @@ $UTF8_LOWER_TO_UPPER = array(
 					return '';
 				}
 				
-				if (($offset + $length) >	$strlen)
+				if (($offset + $length) > $strlen)
 				{
 					$length = '*';
 				}
@@ -611,7 +611,7 @@ function utf8_encode_ncr($text)
 */
 function utf8_encode_ncr_callback($m)
 {
-	return utf8_ord($m[0]);
+	return '&#' . utf8_ord($m[0]) . ';';
 }
 
 function utf8_ord($chr)
-- 
cgit v1.2.1


From a46a0c10c9ac2044fc0551af6493acdb63c8c7ea Mon Sep 17 00:00:00 2001
From: David M <davidmj@users.sourceforge.net>
Date: Sat, 16 Sep 2006 13:25:31 +0000
Subject: drat

git-svn-id: file:///svn/phpbb/trunk@6372 89ea8834-ac86-4346-8a33-228a782c2dd0
---
 phpBB/includes/utf/utf_tools.php | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'phpBB/includes/utf')

diff --git a/phpBB/includes/utf/utf_tools.php b/phpBB/includes/utf/utf_tools.php
index 84ae0d2dd2..1e7e25c43f 100644
--- a/phpBB/includes/utf/utf_tools.php
+++ b/phpBB/includes/utf/utf_tools.php
@@ -696,7 +696,7 @@ function utf8_decode_ncr_callback($m)
  * Unicode characters.
  * @param  string  UTF-8 encoded string
  */
-function utf8_to_unicode($$string)
+function utf8_to_unicode($string)
 {
 	$unicode = array();
 	$offset = 0;
-- 
cgit v1.2.1


From c6c3df2a730bb825a315c21cdd6c38988d683062 Mon Sep 17 00:00:00 2001
From: David M <davidmj@users.sourceforge.net>
Date: Sun, 17 Sep 2006 02:52:19 +0000
Subject: commenting some code :D

git-svn-id: file:///svn/phpbb/trunk@6376 89ea8834-ac86-4346-8a33-228a782c2dd0
---
 phpBB/includes/utf/utf_tools.php | 216 +++++++++++++++++++++++++++++++++------
 1 file changed, 187 insertions(+), 29 deletions(-)

(limited to 'phpBB/includes/utf')

diff --git a/phpBB/includes/utf/utf_tools.php b/phpBB/includes/utf/utf_tools.php
index 1e7e25c43f..2f7c8de69a 100644
--- a/phpBB/includes/utf/utf_tools.php
+++ b/phpBB/includes/utf/utf_tools.php
@@ -17,13 +17,15 @@
 * @package phpBB3
 */
 
-// huge chunks of this code belong to the PHP UTF-8 project
-// TODO: document the functions!
-
-// utf8_encode and utf8_decode are both XML functions
 if (!extension_loaded('xml'))
 {
-	// This function exploits some nice things that ISO-8859-1 and UTF-8 have in common
+	/**
+	 * Implementation of PHP's native utf8_encode for people without XML support
+	 * This function exploits some nice things that ISO-8859-1 and UTF-8 have in common
+	 *
+	 * @param string $str ISO-8859-1 encoded data
+	 * @return string UTF-8 encoded data
+	 */
 	function utf8_encode($str)
 	{
 		$out = '';
@@ -48,7 +50,13 @@ if (!extension_loaded('xml'))
 		return $out;
 	}
 
-	// "borrowed" from getID3
+	/**
+	 * Implementation of PHP's native utf8_decode for people without XML support
+	 *
+	 * @author GetID3()
+	 * @param string $string UTF-8 encoded data
+	 * @return string ISO-8859-1 encoded data
+	 */
 	function utf8_decode($string)
 	{
 		$newcharstring = '';
@@ -106,6 +114,16 @@ if (!extension_loaded('xml'))
 // if mbstring is not loaded, we go into native mode.
 if (extension_loaded('mbstring'))
 {
+	/**
+	* UTF-8 aware alternative to strrpos
+	* Find position of last occurrence of a char in a string
+	* 
+	* @author Harry Fuecks
+	* @param string haystack
+	* @param string needle
+	* @param integer (optional) offset (from left)
+	* @return mixed integer position or FALSE on failure
+	*/
 	function utf8_strrpos($str,	$needle, $offset = null)
 	{
 		// offset for mb_strrpos was added in 5.2.0
@@ -137,6 +155,16 @@ if (extension_loaded('mbstring'))
 		}
 	}
 
+	/**
+	* UTF-8 aware alternative to strpos
+	* Find position of first occurrence of a string
+	*
+	* @author Harry Fuecks
+	* @param string haystack
+	* @param string needle
+	* @param integer offset in characters (from left)
+	* @return mixed integer position or FALSE on failure
+	 */
 	function utf8_strpos($str, $needle, $offset = null)
 	{
 		if ($offset === false)
@@ -149,16 +177,50 @@ if (extension_loaded('mbstring'))
 		}
 	}
 
+	/**
+	* UTF-8 aware alternative to strtolower
+	* Make a string lowercase
+	* Note: The concept of a characters "case" only exists is some alphabets
+	* such as Latin, Greek, Cyrillic, Armenian and archaic Georgian - it does
+	* not exist in the Chinese alphabet, for example. See Unicode Standard
+	* Annex #21: Case Mappings
+	* 
+	* @author Andreas Gohr <andi@splitbrain.org>
+	* @param string
+	* @return mixed either string in lowercase or FALSE is UTF-8 invalid
+	*/
 	function utf8_strtolower($str)
 	{
 		return mb_strtolower($str);
 	}
 
+	/**
+	* UTF-8 aware alternative to strtoupper
+	* Make a string uppercase
+	* Note: The concept of a characters "case" only exists is some alphabets
+	* such as Latin, Greek, Cyrillic, Armenian and archaic Georgian - it does
+	* not exist in the Chinese alphabet, for example. See Unicode Standard
+	* Annex #21: Case Mappings
+	* 
+	* @author Andreas Gohr <andi@splitbrain.org>
+	* @param string
+	* @return mixed either string in lowercase or FALSE is UTF-8 invalid
+	*/
 	function utf8_strtoupper($str)
 	{
 		return mb_strtoupper($str);
 	}
 
+	/**
+	* UTF-8 aware alternative to substr
+	* Return part of a string given character offset (and optionally length)
+	* 
+	* @author Harry Fuecks
+	* @param string
+	* @param integer number of UTF-8 characters offset (from left)
+	* @param integer (optional) length in UTF-8 characters from offset
+	* @return mixed string or FALSE if failure
+	*/
 	function utf8_substr($str, $offset,	$length	= null)
 	{
 		if ($length === false)
@@ -170,9 +232,30 @@ if (extension_loaded('mbstring'))
 			return mb_substr($str, $offset, $length);
 		}
 	}
+
+	/**
+	* Return the length (in characters) of a UTF-8 string
+	*
+	* @param	string	$text		UTF-8 string
+	* @return	integer				Length (in chars) of given string
+	*/
+	function utf8_strlen($text)
+	{
+		return mb_strlen($text, 'utf-8');
+	}
 }
 else
 {
+	/**
+	* UTF-8 aware alternative to strrpos
+	* Find position of last occurrence of a char in a string
+	* 
+	* @author Harry Fuecks
+	* @param string haystack
+	* @param string needle
+	* @param integer (optional) offset (from left)
+	* @return mixed integer position or FALSE on failure
+	*/
 	function utf8_strrpos($str,	$needle, $offset = null)
 	{
 		if (is_null($offset))
@@ -207,6 +290,16 @@ else
 		}
 	}
 
+	/**
+	* UTF-8 aware alternative to strpos
+	* Find position of first occurrence of a string
+	*
+	* @author Harry Fuecks
+	* @param string haystack
+	* @param string needle
+	* @param integer offset in characters (from left)
+	* @return mixed integer position or FALSE on failure
+	 */
 	function utf8_strpos($str, $needle, $offset = null)
 	{
 		// native
@@ -330,6 +423,18 @@ $UTF8_LOWER_TO_UPPER = array(
 			0x00F0=>0x00D0, 0x0457=>0x0407, 0x0123=>0x0122,
 		);
 
+	/**
+	* UTF-8 aware alternative to strtolower
+	* Make a string lowercase
+	* Note: The concept of a characters "case" only exists is some alphabets
+	* such as Latin, Greek, Cyrillic, Armenian and archaic Georgian - it does
+	* not exist in the Chinese alphabet, for example. See Unicode Standard
+	* Annex #21: Case Mappings
+	* 
+	* @author Andreas Gohr <andi@splitbrain.org>
+	* @param string
+	* @return mixed either string in lowercase or FALSE is UTF-8 invalid
+	*/
 	function utf8_strtolower($string)
 	{
 		global $UTF8_UPPER_TO_LOWER;
@@ -351,6 +456,18 @@ $UTF8_LOWER_TO_UPPER = array(
 		return utf8_from_unicode($uni);
 	}
 
+	/**
+	* UTF-8 aware alternative to strtoupper
+	* Make a string uppercase
+	* Note: The concept of a characters "case" only exists is some alphabets
+	* such as Latin, Greek, Cyrillic, Armenian and archaic Georgian - it does
+	* not exist in the Chinese alphabet, for example. See Unicode Standard
+	* Annex #21: Case Mappings
+	* 
+	* @author Andreas Gohr <andi@splitbrain.org>
+	* @param string
+	* @return mixed either string in lowercase or FALSE is UTF-8 invalid
+	*/
 	function utf8_strtoupper($str)
 	{
 		global $UTF8_LOWER_TO_UPPER;
@@ -372,6 +489,16 @@ $UTF8_LOWER_TO_UPPER = array(
 		return utf8_from_unicode($uni);
 	}
 
+	/**
+	* UTF-8 aware alternative to substr
+	* Return part of a string given character offset (and optionally length)
+	* 
+	* @author Harry Fuecks
+	* @param string
+	* @param integer number of UTF-8 characters offset (from left)
+	* @param integer (optional) length in UTF-8 characters from offset
+	* @return mixed string or FALSE if failure
+	*/
 	function utf8_substr($str, $offset,	$length	= null)
 	{
 		if ($offset >= 0 && $length >= 0)
@@ -436,8 +563,30 @@ $UTF8_LOWER_TO_UPPER = array(
 			}
 		}
 	}
+
+	/**
+	* Return the length (in characters) of a UTF-8 string
+	*
+	* @param	string	$text		UTF-8 string
+	* @return	integer				Length (in chars) of given string
+	*/
+	function utf8_strlen($text)
+	{
+		// Since utf8_decode is replacing multibyte characters to ? strlen works fine
+		return strlen(utf8_decode($text));
+	}
+
 }
 
+/**
+* UTF-8 aware alternative to str_split
+* Convert a string to an array
+* 
+* @author Harry Fuecks
+* @param string UTF-8 encoded
+* @param int number to characters to split string by
+* @return string characters in string reverses
+*/
 function utf8_str_split($str, $split_len = 1)
 {
 	if (!preg_match('/^[0-9]+$/', $split_len) || $split_len < 1)
@@ -455,6 +604,14 @@ function utf8_str_split($str, $split_len = 1)
 	return $ar[0];
 }
 
+/**
+* UTF-8 aware alternative to strcspn
+* Find length of initial segment not matching mask
+* 
+* @author Harry Fuecks
+* @param string
+* @return int
+*/
 function utf8_strspn($str, $mask, $start = null, $length = null)
 {
 	$mask = preg_replace('!([\\\\\\-\\]\\[/^])!', '\\\${1}', $mask);
@@ -474,6 +631,14 @@ function utf8_strspn($str, $mask, $start = null, $length = null)
     return 0;
 }
 
+/**
+* UTF-8 aware alternative to ucfirst
+* Make a string's first character uppercase
+* 
+* @author Harry Fuecks
+* @param string
+* @return string with first character as upper case (if applicable)
+*/
 function utf8_ucfirst($str)
 {
 	switch (utf8_strlen($str))
@@ -493,28 +658,6 @@ function utf8_ucfirst($str)
 	}
 }
 
-/**
-* Return the length (in characters) of a UTF-8 string
-*
-* @param	string	$text		UTF-8 string
-* @return	integer				Length (in chars) of given string
-*/
-function utf8_strlen($text)
-{
-	if (function_exists('iconv_strlen'))
-	{
-		return iconv_strlen($text, 'utf-8');
-	}
-
-	if (function_exists('mb_strlen'))
-	{
-		return mb_strlen($text, 'utf-8');
-	}
-
-	// Since utf8_decode is replacing multibyte characters to ? strlen works fine
-	return strlen(utf8_decode($text));
-}
-
 /**
 * Recode a string to UTF-8
 *
@@ -614,6 +757,12 @@ function utf8_encode_ncr_callback($m)
 	return '&#' . utf8_ord($m[0]) . ';';
 }
 
+/**
+ * Enter description here...
+ *
+ * @param string $chr UTF-8 char
+ * @return integer UNICODE code point
+ */
 function utf8_ord($chr)
 {
 	switch (strlen($chr))
@@ -639,6 +788,12 @@ function utf8_ord($chr)
 	}
 }
 
+/**
+ * Converts an NCR to a UTF-8 char
+ *
+ * @param integer $cp UNICODE code point
+ * @return string UTF-8 char
+ */
 function utf8_chr($cp)
 {
 	if ($cp > 0xFFFF)
@@ -694,7 +849,9 @@ function utf8_decode_ncr_callback($m)
 /**
  * Takes an UTF-8 string and returns an array of ints representing the
  * Unicode characters.
+ * 
  * @param  string  UTF-8 encoded string
+ * @return array array of UNICODE code points
  */
 function utf8_to_unicode($string)
 {
@@ -752,7 +909,8 @@ function utf8_to_unicode($string)
  * Takes an array of ints representing the Unicode characters and returns
  * a UTF-8 string.
  *
- * @param  array of unicode code points representing a string
+ * @param array $array array of unicode code points representing a string
+ * @return string UTF-8 character string
  */
 function utf8_from_unicode($array)
 {
-- 
cgit v1.2.1


From ea065f3e67ee0c0cb28c88569042461dd8cf2c27 Mon Sep 17 00:00:00 2001
From: Nils Adermann <naderman@naderman.de>
Date: Sun, 17 Sep 2006 22:02:28 +0000
Subject: - no more encoding mixture, say hello to UTF-8 (I'll add a validation
 solution for PHP 4.3.3/4 ASAP) [side effect: fixes Bug #3762] - take local
 server time into consideration for birthday/age calculation - faster active
 topic search - allow changing active topic time frame [Bug #4150] - reload
 stylesheet on language change [Bug #4222]

git-svn-id: file:///svn/phpbb/trunk@6380 89ea8834-ac86-4346-8a33-228a782c2dd0
---
 phpBB/includes/utf/utf_tools.php | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'phpBB/includes/utf')

diff --git a/phpBB/includes/utf/utf_tools.php b/phpBB/includes/utf/utf_tools.php
index 2f7c8de69a..a4de83ab0f 100644
--- a/phpBB/includes/utf/utf_tools.php
+++ b/phpBB/includes/utf/utf_tools.php
@@ -741,7 +741,7 @@ function utf8_recode($string, $encoding)
 */
 function utf8_encode_ncr($text)
 {
-	return preg_replace_callback('#[\\xC2-\\xF4][\\x80-\\xBF]+#', 'utf8_encode_ncr_callback', $text);
+	return preg_replace_callback('#[\\xC2-\\xF4][\\x80-\\xBF]?[\\x80-\\xBF]?[\\x80-\\xBF]+#', 'utf8_encode_ncr_callback', $text);
 }
 
 /**
-- 
cgit v1.2.1


From 26befa094147b542e48e36867eb41eaf424225f7 Mon Sep 17 00:00:00 2001
From: Meik Sievertsen <acydburn@phpbb.com>
Date: Thu, 28 Sep 2006 15:04:59 +0000
Subject: - added confirmation to removing bbcodes - added optional MX and
 DNSBL checks - added backtrace (triggering sql error) on error within
 sql_in_set as well as making sure it is handling an array - let users having
 f_list access to a forum actually see the forum without a topic list and not
 displaying an error message - this allows for giving people access to
 subforums but not the parent forum without the need to add the (sub-)forum to
 the index. - some additional bugfixes

git-svn-id: file:///svn/phpbb/trunk@6414 89ea8834-ac86-4346-8a33-228a782c2dd0
---
 phpBB/includes/utf/utf_tools.php | 326 ++++++++++++++++++++-------------------
 1 file changed, 170 insertions(+), 156 deletions(-)

(limited to 'phpBB/includes/utf')

diff --git a/phpBB/includes/utf/utf_tools.php b/phpBB/includes/utf/utf_tools.php
index a4de83ab0f..739b939f31 100644
--- a/phpBB/includes/utf/utf_tools.php
+++ b/phpBB/includes/utf/utf_tools.php
@@ -3,11 +3,18 @@
 *
 * @package phpBB3
 * @version $Id$
-* @copyright (c) 2005 phpBB Group 
+* @copyright (c) 2006 phpBB Group 
 * @license http://opensource.org/licenses/gpl-license.php GNU Public License 
 *
 */
 
+/**
+*/
+if (!defined('IN_PHPBB'))
+{
+	exit;
+}
+
 /**
 * UTF-8 tools
 *
@@ -20,12 +27,12 @@
 if (!extension_loaded('xml'))
 {
 	/**
-	 * Implementation of PHP's native utf8_encode for people without XML support
-	 * This function exploits some nice things that ISO-8859-1 and UTF-8 have in common
-	 *
-	 * @param string $str ISO-8859-1 encoded data
-	 * @return string UTF-8 encoded data
-	 */
+	* Implementation of PHP's native utf8_encode for people without XML support
+	* This function exploits some nice things that ISO-8859-1 and UTF-8 have in common
+	*
+	* @param string $str ISO-8859-1 encoded data
+	* @return string UTF-8 encoded data
+	*/
 	function utf8_encode($str)
 	{
 		$out = '';
@@ -51,17 +58,18 @@ if (!extension_loaded('xml'))
 	}
 
 	/**
-	 * Implementation of PHP's native utf8_decode for people without XML support
-	 *
-	 * @author GetID3()
-	 * @param string $string UTF-8 encoded data
-	 * @return string ISO-8859-1 encoded data
-	 */
+	* Implementation of PHP's native utf8_decode for people without XML support
+	*
+	* @author GetID3()
+	* @param string $string UTF-8 encoded data
+	* @return string ISO-8859-1 encoded data
+	*/
 	function utf8_decode($string)
 	{
 		$newcharstring = '';
 		$offset = 0;
 		$stringlength = strlen($string);
+
 		while ($offset < $stringlength)
 		{
 			$ord = ord($string{$offset});
@@ -101,11 +109,13 @@ if (!extension_loaded('xml'))
 				$charval = false;
 				$offset += 1;
 			}
+
 			if ($charval !== false)
 			{
 				$newcharstring .= (($charval < 256) ? chr($charval) : '?');
 			}
 		}
+
 		return $newcharstring;
 	}
 }
@@ -134,6 +144,7 @@ if (extension_loaded('mbstring'))
 			{
 				return false;
 			}
+
 			return mb_strrpos($str, $search);
 		}
 		else
@@ -164,7 +175,7 @@ if (extension_loaded('mbstring'))
 	* @param string needle
 	* @param integer offset in characters (from left)
 	* @return mixed integer position or FALSE on failure
-	 */
+	*/
 	function utf8_strpos($str, $needle, $offset = null)
 	{
 		if ($offset === false)
@@ -262,11 +273,12 @@ else
 		{
 			$ar	= explode($needle, $str);
 			
-			if (count($ar) > 1)
+			if (sizeof($ar) > 1)
 			{
 				// Pop off the end of the string where the last	match was made
 				array_pop($ar);
-				$str = join($needle,$ar);
+				$str = join($needle, $ar);
+
 				return utf8_strlen($str);
 			}
 			return false;
@@ -278,14 +290,14 @@ else
 				trigger_error('utf8_strrpos	expects	parameter 3	to be long', E_USER_WARNING);
 				return false;
 			}
-			
+
 			$str = utf8_substr($str, $offset);
-			
+
 			if (false !== ($pos = utf8_strrpos($str, $needle)))
 			{
 				return $pos	+ $offset;
 			}
-			
+
 			return false;
 		}
 	}
@@ -299,14 +311,14 @@ else
 	* @param string needle
 	* @param integer offset in characters (from left)
 	* @return mixed integer position or FALSE on failure
-	 */
+	*/
 	function utf8_strpos($str, $needle, $offset = null)
 	{
 		// native
 		if (is_null($offset))
 		{
 			$ar = explode($needle, $str);
-			if (count($ar) > 1)
+			if (sizeof($ar) > 1)
 			{
 				return utf8_strlen($ar[0]);
 			}
@@ -331,97 +343,97 @@ else
 		}
 	}
 
-$UTF8_UPPER_TO_LOWER = array(
-			0x0041=>0x0061, 0x03A6=>0x03C6, 0x0162=>0x0163, 0x00C5=>0x00E5, 0x0042=>0x0062,
-			0x0139=>0x013A, 0x00C1=>0x00E1, 0x0141=>0x0142, 0x038E=>0x03CD, 0x0100=>0x0101,
-			0x0490=>0x0491, 0x0394=>0x03B4, 0x015A=>0x015B, 0x0044=>0x0064, 0x0393=>0x03B3,
-			0x00D4=>0x00F4, 0x042A=>0x044A, 0x0419=>0x0439, 0x0112=>0x0113, 0x041C=>0x043C,
-			0x015E=>0x015F, 0x0143=>0x0144, 0x00CE=>0x00EE, 0x040E=>0x045E, 0x042F=>0x044F,
-			0x039A=>0x03BA, 0x0154=>0x0155, 0x0049=>0x0069, 0x0053=>0x0073, 0x1E1E=>0x1E1F,
-			0x0134=>0x0135, 0x0427=>0x0447, 0x03A0=>0x03C0, 0x0418=>0x0438, 0x00D3=>0x00F3,
-			0x0420=>0x0440, 0x0404=>0x0454, 0x0415=>0x0435, 0x0429=>0x0449, 0x014A=>0x014B,
-			0x0411=>0x0431, 0x0409=>0x0459, 0x1E02=>0x1E03, 0x00D6=>0x00F6, 0x00D9=>0x00F9,
-			0x004E=>0x006E, 0x0401=>0x0451, 0x03A4=>0x03C4, 0x0423=>0x0443, 0x015C=>0x015D,
-			0x0403=>0x0453, 0x03A8=>0x03C8, 0x0158=>0x0159, 0x0047=>0x0067, 0x00C4=>0x00E4,
-			0x0386=>0x03AC, 0x0389=>0x03AE, 0x0166=>0x0167, 0x039E=>0x03BE, 0x0164=>0x0165,
-			0x0116=>0x0117, 0x0108=>0x0109, 0x0056=>0x0076, 0x00DE=>0x00FE, 0x0156=>0x0157,
-			0x00DA=>0x00FA, 0x1E60=>0x1E61, 0x1E82=>0x1E83, 0x00C2=>0x00E2, 0x0118=>0x0119,
-			0x0145=>0x0146, 0x0050=>0x0070, 0x0150=>0x0151, 0x042E=>0x044E, 0x0128=>0x0129,
-			0x03A7=>0x03C7, 0x013D=>0x013E, 0x0422=>0x0442, 0x005A=>0x007A, 0x0428=>0x0448,
-			0x03A1=>0x03C1, 0x1E80=>0x1E81, 0x016C=>0x016D, 0x00D5=>0x00F5, 0x0055=>0x0075,
-			0x0176=>0x0177, 0x00DC=>0x00FC, 0x1E56=>0x1E57, 0x03A3=>0x03C3, 0x041A=>0x043A,
-			0x004D=>0x006D, 0x016A=>0x016B, 0x0170=>0x0171, 0x0424=>0x0444, 0x00CC=>0x00EC,
-			0x0168=>0x0169, 0x039F=>0x03BF, 0x004B=>0x006B, 0x00D2=>0x00F2, 0x00C0=>0x00E0,
-			0x0414=>0x0434, 0x03A9=>0x03C9, 0x1E6A=>0x1E6B, 0x00C3=>0x00E3, 0x042D=>0x044D,
-			0x0416=>0x0436, 0x01A0=>0x01A1, 0x010C=>0x010D, 0x011C=>0x011D, 0x00D0=>0x00F0,
-			0x013B=>0x013C, 0x040F=>0x045F, 0x040A=>0x045A, 0x00C8=>0x00E8, 0x03A5=>0x03C5,
-			0x0046=>0x0066, 0x00DD=>0x00FD, 0x0043=>0x0063, 0x021A=>0x021B, 0x00CA=>0x00EA,
-			0x0399=>0x03B9, 0x0179=>0x017A, 0x00CF=>0x00EF, 0x01AF=>0x01B0, 0x0045=>0x0065,
-			0x039B=>0x03BB, 0x0398=>0x03B8, 0x039C=>0x03BC, 0x040C=>0x045C, 0x041F=>0x043F,
-			0x042C=>0x044C, 0x00DE=>0x00FE, 0x00D0=>0x00F0, 0x1EF2=>0x1EF3, 0x0048=>0x0068,
-			0x00CB=>0x00EB, 0x0110=>0x0111, 0x0413=>0x0433, 0x012E=>0x012F, 0x00C6=>0x00E6,
-			0x0058=>0x0078, 0x0160=>0x0161, 0x016E=>0x016F, 0x0391=>0x03B1, 0x0407=>0x0457,
-			0x0172=>0x0173, 0x0178=>0x00FF, 0x004F=>0x006F, 0x041B=>0x043B, 0x0395=>0x03B5,
-			0x0425=>0x0445, 0x0120=>0x0121, 0x017D=>0x017E, 0x017B=>0x017C, 0x0396=>0x03B6,
-			0x0392=>0x03B2, 0x0388=>0x03AD, 0x1E84=>0x1E85, 0x0174=>0x0175, 0x0051=>0x0071,
-			0x0417=>0x0437, 0x1E0A=>0x1E0B, 0x0147=>0x0148, 0x0104=>0x0105, 0x0408=>0x0458,
-			0x014C=>0x014D, 0x00CD=>0x00ED, 0x0059=>0x0079, 0x010A=>0x010B, 0x038F=>0x03CE,
-			0x0052=>0x0072, 0x0410=>0x0430, 0x0405=>0x0455, 0x0402=>0x0452, 0x0126=>0x0127,
-			0x0136=>0x0137, 0x012A=>0x012B, 0x038A=>0x03AF, 0x042B=>0x044B, 0x004C=>0x006C,
-			0x0397=>0x03B7, 0x0124=>0x0125, 0x0218=>0x0219, 0x00DB=>0x00FB, 0x011E=>0x011F,
-			0x041E=>0x043E, 0x1E40=>0x1E41, 0x039D=>0x03BD, 0x0106=>0x0107, 0x03AB=>0x03CB,
-			0x0426=>0x0446, 0x00DE=>0x00FE, 0x00C7=>0x00E7, 0x03AA=>0x03CA, 0x0421=>0x0441,
-			0x0412=>0x0432, 0x010E=>0x010F, 0x00D8=>0x00F8, 0x0057=>0x0077, 0x011A=>0x011B,
-			0x0054=>0x0074, 0x004A=>0x006A, 0x040B=>0x045B, 0x0406=>0x0456, 0x0102=>0x0103,
-			0x039B=>0x03BB, 0x00D1=>0x00F1, 0x041D=>0x043D, 0x038C=>0x03CC, 0x00C9=>0x00E9,
-			0x00D0=>0x00F0, 0x0407=>0x0457, 0x0122=>0x0123,
-		);
-
-$UTF8_LOWER_TO_UPPER = array(
-			0x0061=>0x0041, 0x03C6=>0x03A6, 0x0163=>0x0162, 0x00E5=>0x00C5, 0x0062=>0x0042,
-			0x013A=>0x0139, 0x00E1=>0x00C1, 0x0142=>0x0141, 0x03CD=>0x038E, 0x0101=>0x0100,
-			0x0491=>0x0490, 0x03B4=>0x0394, 0x015B=>0x015A, 0x0064=>0x0044, 0x03B3=>0x0393,
-			0x00F4=>0x00D4, 0x044A=>0x042A, 0x0439=>0x0419, 0x0113=>0x0112, 0x043C=>0x041C,
-			0x015F=>0x015E, 0x0144=>0x0143, 0x00EE=>0x00CE, 0x045E=>0x040E, 0x044F=>0x042F,
-			0x03BA=>0x039A, 0x0155=>0x0154, 0x0069=>0x0049, 0x0073=>0x0053, 0x1E1F=>0x1E1E,
-			0x0135=>0x0134, 0x0447=>0x0427, 0x03C0=>0x03A0, 0x0438=>0x0418, 0x00F3=>0x00D3,
-			0x0440=>0x0420, 0x0454=>0x0404, 0x0435=>0x0415, 0x0449=>0x0429, 0x014B=>0x014A,
-			0x0431=>0x0411, 0x0459=>0x0409, 0x1E03=>0x1E02, 0x00F6=>0x00D6, 0x00F9=>0x00D9,
-			0x006E=>0x004E, 0x0451=>0x0401, 0x03C4=>0x03A4, 0x0443=>0x0423, 0x015D=>0x015C,
-			0x0453=>0x0403, 0x03C8=>0x03A8, 0x0159=>0x0158, 0x0067=>0x0047, 0x00E4=>0x00C4,
-			0x03AC=>0x0386, 0x03AE=>0x0389, 0x0167=>0x0166, 0x03BE=>0x039E, 0x0165=>0x0164,
-			0x0117=>0x0116, 0x0109=>0x0108, 0x0076=>0x0056, 0x00FE=>0x00DE, 0x0157=>0x0156,
-			0x00FA=>0x00DA, 0x1E61=>0x1E60, 0x1E83=>0x1E82, 0x00E2=>0x00C2, 0x0119=>0x0118,
-			0x0146=>0x0145, 0x0070=>0x0050, 0x0151=>0x0150, 0x044E=>0x042E, 0x0129=>0x0128,
-			0x03C7=>0x03A7, 0x013E=>0x013D, 0x0442=>0x0422, 0x007A=>0x005A, 0x0448=>0x0428,
-			0x03C1=>0x03A1, 0x1E81=>0x1E80, 0x016D=>0x016C, 0x00F5=>0x00D5, 0x0075=>0x0055,
-			0x0177=>0x0176, 0x00FC=>0x00DC, 0x1E57=>0x1E56, 0x03C3=>0x03A3, 0x043A=>0x041A,
-			0x006D=>0x004D, 0x016B=>0x016A, 0x0171=>0x0170, 0x0444=>0x0424, 0x00EC=>0x00CC,
-			0x0169=>0x0168, 0x03BF=>0x039F, 0x006B=>0x004B, 0x00F2=>0x00D2, 0x00E0=>0x00C0,
-			0x0434=>0x0414, 0x03C9=>0x03A9, 0x1E6B=>0x1E6A, 0x00E3=>0x00C3, 0x044D=>0x042D,
-			0x0436=>0x0416, 0x01A1=>0x01A0, 0x010D=>0x010C, 0x011D=>0x011C, 0x00F0=>0x00D0,
-			0x013C=>0x013B, 0x045F=>0x040F, 0x045A=>0x040A, 0x00E8=>0x00C8, 0x03C5=>0x03A5,
-			0x0066=>0x0046, 0x00FD=>0x00DD, 0x0063=>0x0043, 0x021B=>0x021A, 0x00EA=>0x00CA,
-			0x03B9=>0x0399, 0x017A=>0x0179, 0x00EF=>0x00CF, 0x01B0=>0x01AF, 0x0065=>0x0045,
-			0x03BB=>0x039B, 0x03B8=>0x0398, 0x03BC=>0x039C, 0x045C=>0x040C, 0x043F=>0x041F,
-			0x044C=>0x042C, 0x00FE=>0x00DE, 0x00F0=>0x00D0, 0x1EF3=>0x1EF2, 0x0068=>0x0048,
-			0x00EB=>0x00CB, 0x0111=>0x0110, 0x0433=>0x0413, 0x012F=>0x012E, 0x00E6=>0x00C6,
-			0x0078=>0x0058, 0x0161=>0x0160, 0x016F=>0x016E, 0x03B1=>0x0391, 0x0457=>0x0407,
-			0x0173=>0x0172, 0x00FF=>0x0178, 0x006F=>0x004F, 0x043B=>0x041B, 0x03B5=>0x0395,
-			0x0445=>0x0425, 0x0121=>0x0120, 0x017E=>0x017D, 0x017C=>0x017B, 0x03B6=>0x0396,
-			0x03B2=>0x0392, 0x03AD=>0x0388, 0x1E85=>0x1E84, 0x0175=>0x0174, 0x0071=>0x0051,
-			0x0437=>0x0417, 0x1E0B=>0x1E0A, 0x0148=>0x0147, 0x0105=>0x0104, 0x0458=>0x0408,
-			0x014D=>0x014C, 0x00ED=>0x00CD, 0x0079=>0x0059, 0x010B=>0x010A, 0x03CE=>0x038F,
-			0x0072=>0x0052, 0x0430=>0x0410, 0x0455=>0x0405, 0x0452=>0x0402, 0x0127=>0x0126,
-			0x0137=>0x0136, 0x012B=>0x012A, 0x03AF=>0x038A, 0x044B=>0x042B, 0x006C=>0x004C,
-			0x03B7=>0x0397, 0x0125=>0x0124, 0x0219=>0x0218, 0x00FB=>0x00DB, 0x011F=>0x011E,
-			0x043E=>0x041E, 0x1E41=>0x1E40, 0x03BD=>0x039D, 0x0107=>0x0106, 0x03CB=>0x03AB,
-			0x0446=>0x0426, 0x00FE=>0x00DE, 0x00E7=>0x00C7, 0x03CA=>0x03AA, 0x0441=>0x0421,
-			0x0432=>0x0412, 0x010F=>0x010E, 0x00F8=>0x00D8, 0x0077=>0x0057, 0x011B=>0x011A,
-			0x0074=>0x0054, 0x006A=>0x004A, 0x045B=>0x040B, 0x0456=>0x0406, 0x0103=>0x0102,
-			0x03BB=>0x039B, 0x00F1=>0x00D1, 0x043D=>0x041D, 0x03CC=>0x038C, 0x00E9=>0x00C9,
-			0x00F0=>0x00D0, 0x0457=>0x0407, 0x0123=>0x0122,
-		);
+	$UTF8_UPPER_TO_LOWER = array(
+		0x0041=>0x0061, 0x03A6=>0x03C6, 0x0162=>0x0163, 0x00C5=>0x00E5, 0x0042=>0x0062,
+		0x0139=>0x013A, 0x00C1=>0x00E1, 0x0141=>0x0142, 0x038E=>0x03CD, 0x0100=>0x0101,
+		0x0490=>0x0491, 0x0394=>0x03B4, 0x015A=>0x015B, 0x0044=>0x0064, 0x0393=>0x03B3,
+		0x00D4=>0x00F4, 0x042A=>0x044A, 0x0419=>0x0439, 0x0112=>0x0113, 0x041C=>0x043C,
+		0x015E=>0x015F, 0x0143=>0x0144, 0x00CE=>0x00EE, 0x040E=>0x045E, 0x042F=>0x044F,
+		0x039A=>0x03BA, 0x0154=>0x0155, 0x0049=>0x0069, 0x0053=>0x0073, 0x1E1E=>0x1E1F,
+		0x0134=>0x0135, 0x0427=>0x0447, 0x03A0=>0x03C0, 0x0418=>0x0438, 0x00D3=>0x00F3,
+		0x0420=>0x0440, 0x0404=>0x0454, 0x0415=>0x0435, 0x0429=>0x0449, 0x014A=>0x014B,
+		0x0411=>0x0431, 0x0409=>0x0459, 0x1E02=>0x1E03, 0x00D6=>0x00F6, 0x00D9=>0x00F9,
+		0x004E=>0x006E, 0x0401=>0x0451, 0x03A4=>0x03C4, 0x0423=>0x0443, 0x015C=>0x015D,
+		0x0403=>0x0453, 0x03A8=>0x03C8, 0x0158=>0x0159, 0x0047=>0x0067, 0x00C4=>0x00E4,
+		0x0386=>0x03AC, 0x0389=>0x03AE, 0x0166=>0x0167, 0x039E=>0x03BE, 0x0164=>0x0165,
+		0x0116=>0x0117, 0x0108=>0x0109, 0x0056=>0x0076, 0x00DE=>0x00FE, 0x0156=>0x0157,
+		0x00DA=>0x00FA, 0x1E60=>0x1E61, 0x1E82=>0x1E83, 0x00C2=>0x00E2, 0x0118=>0x0119,
+		0x0145=>0x0146, 0x0050=>0x0070, 0x0150=>0x0151, 0x042E=>0x044E, 0x0128=>0x0129,
+		0x03A7=>0x03C7, 0x013D=>0x013E, 0x0422=>0x0442, 0x005A=>0x007A, 0x0428=>0x0448,
+		0x03A1=>0x03C1, 0x1E80=>0x1E81, 0x016C=>0x016D, 0x00D5=>0x00F5, 0x0055=>0x0075,
+		0x0176=>0x0177, 0x00DC=>0x00FC, 0x1E56=>0x1E57, 0x03A3=>0x03C3, 0x041A=>0x043A,
+		0x004D=>0x006D, 0x016A=>0x016B, 0x0170=>0x0171, 0x0424=>0x0444, 0x00CC=>0x00EC,
+		0x0168=>0x0169, 0x039F=>0x03BF, 0x004B=>0x006B, 0x00D2=>0x00F2, 0x00C0=>0x00E0,
+		0x0414=>0x0434, 0x03A9=>0x03C9, 0x1E6A=>0x1E6B, 0x00C3=>0x00E3, 0x042D=>0x044D,
+		0x0416=>0x0436, 0x01A0=>0x01A1, 0x010C=>0x010D, 0x011C=>0x011D, 0x00D0=>0x00F0,
+		0x013B=>0x013C, 0x040F=>0x045F, 0x040A=>0x045A, 0x00C8=>0x00E8, 0x03A5=>0x03C5,
+		0x0046=>0x0066, 0x00DD=>0x00FD, 0x0043=>0x0063, 0x021A=>0x021B, 0x00CA=>0x00EA,
+		0x0399=>0x03B9, 0x0179=>0x017A, 0x00CF=>0x00EF, 0x01AF=>0x01B0, 0x0045=>0x0065,
+		0x039B=>0x03BB, 0x0398=>0x03B8, 0x039C=>0x03BC, 0x040C=>0x045C, 0x041F=>0x043F,
+		0x042C=>0x044C, 0x00DE=>0x00FE, 0x00D0=>0x00F0, 0x1EF2=>0x1EF3, 0x0048=>0x0068,
+		0x00CB=>0x00EB, 0x0110=>0x0111, 0x0413=>0x0433, 0x012E=>0x012F, 0x00C6=>0x00E6,
+		0x0058=>0x0078, 0x0160=>0x0161, 0x016E=>0x016F, 0x0391=>0x03B1, 0x0407=>0x0457,
+		0x0172=>0x0173, 0x0178=>0x00FF, 0x004F=>0x006F, 0x041B=>0x043B, 0x0395=>0x03B5,
+		0x0425=>0x0445, 0x0120=>0x0121, 0x017D=>0x017E, 0x017B=>0x017C, 0x0396=>0x03B6,
+		0x0392=>0x03B2, 0x0388=>0x03AD, 0x1E84=>0x1E85, 0x0174=>0x0175, 0x0051=>0x0071,
+		0x0417=>0x0437, 0x1E0A=>0x1E0B, 0x0147=>0x0148, 0x0104=>0x0105, 0x0408=>0x0458,
+		0x014C=>0x014D, 0x00CD=>0x00ED, 0x0059=>0x0079, 0x010A=>0x010B, 0x038F=>0x03CE,
+		0x0052=>0x0072, 0x0410=>0x0430, 0x0405=>0x0455, 0x0402=>0x0452, 0x0126=>0x0127,
+		0x0136=>0x0137, 0x012A=>0x012B, 0x038A=>0x03AF, 0x042B=>0x044B, 0x004C=>0x006C,
+		0x0397=>0x03B7, 0x0124=>0x0125, 0x0218=>0x0219, 0x00DB=>0x00FB, 0x011E=>0x011F,
+		0x041E=>0x043E, 0x1E40=>0x1E41, 0x039D=>0x03BD, 0x0106=>0x0107, 0x03AB=>0x03CB,
+		0x0426=>0x0446, 0x00DE=>0x00FE, 0x00C7=>0x00E7, 0x03AA=>0x03CA, 0x0421=>0x0441,
+		0x0412=>0x0432, 0x010E=>0x010F, 0x00D8=>0x00F8, 0x0057=>0x0077, 0x011A=>0x011B,
+		0x0054=>0x0074, 0x004A=>0x006A, 0x040B=>0x045B, 0x0406=>0x0456, 0x0102=>0x0103,
+		0x039B=>0x03BB, 0x00D1=>0x00F1, 0x041D=>0x043D, 0x038C=>0x03CC, 0x00C9=>0x00E9,
+		0x00D0=>0x00F0, 0x0407=>0x0457, 0x0122=>0x0123,
+	);
+
+	$UTF8_LOWER_TO_UPPER = array(
+		0x0061=>0x0041, 0x03C6=>0x03A6, 0x0163=>0x0162, 0x00E5=>0x00C5, 0x0062=>0x0042,
+		0x013A=>0x0139, 0x00E1=>0x00C1, 0x0142=>0x0141, 0x03CD=>0x038E, 0x0101=>0x0100,
+		0x0491=>0x0490, 0x03B4=>0x0394, 0x015B=>0x015A, 0x0064=>0x0044, 0x03B3=>0x0393,
+		0x00F4=>0x00D4, 0x044A=>0x042A, 0x0439=>0x0419, 0x0113=>0x0112, 0x043C=>0x041C,
+		0x015F=>0x015E, 0x0144=>0x0143, 0x00EE=>0x00CE, 0x045E=>0x040E, 0x044F=>0x042F,
+		0x03BA=>0x039A, 0x0155=>0x0154, 0x0069=>0x0049, 0x0073=>0x0053, 0x1E1F=>0x1E1E,
+		0x0135=>0x0134, 0x0447=>0x0427, 0x03C0=>0x03A0, 0x0438=>0x0418, 0x00F3=>0x00D3,
+		0x0440=>0x0420, 0x0454=>0x0404, 0x0435=>0x0415, 0x0449=>0x0429, 0x014B=>0x014A,
+		0x0431=>0x0411, 0x0459=>0x0409, 0x1E03=>0x1E02, 0x00F6=>0x00D6, 0x00F9=>0x00D9,
+		0x006E=>0x004E, 0x0451=>0x0401, 0x03C4=>0x03A4, 0x0443=>0x0423, 0x015D=>0x015C,
+		0x0453=>0x0403, 0x03C8=>0x03A8, 0x0159=>0x0158, 0x0067=>0x0047, 0x00E4=>0x00C4,
+		0x03AC=>0x0386, 0x03AE=>0x0389, 0x0167=>0x0166, 0x03BE=>0x039E, 0x0165=>0x0164,
+		0x0117=>0x0116, 0x0109=>0x0108, 0x0076=>0x0056, 0x00FE=>0x00DE, 0x0157=>0x0156,
+		0x00FA=>0x00DA, 0x1E61=>0x1E60, 0x1E83=>0x1E82, 0x00E2=>0x00C2, 0x0119=>0x0118,
+		0x0146=>0x0145, 0x0070=>0x0050, 0x0151=>0x0150, 0x044E=>0x042E, 0x0129=>0x0128,
+		0x03C7=>0x03A7, 0x013E=>0x013D, 0x0442=>0x0422, 0x007A=>0x005A, 0x0448=>0x0428,
+		0x03C1=>0x03A1, 0x1E81=>0x1E80, 0x016D=>0x016C, 0x00F5=>0x00D5, 0x0075=>0x0055,
+		0x0177=>0x0176, 0x00FC=>0x00DC, 0x1E57=>0x1E56, 0x03C3=>0x03A3, 0x043A=>0x041A,
+		0x006D=>0x004D, 0x016B=>0x016A, 0x0171=>0x0170, 0x0444=>0x0424, 0x00EC=>0x00CC,
+		0x0169=>0x0168, 0x03BF=>0x039F, 0x006B=>0x004B, 0x00F2=>0x00D2, 0x00E0=>0x00C0,
+		0x0434=>0x0414, 0x03C9=>0x03A9, 0x1E6B=>0x1E6A, 0x00E3=>0x00C3, 0x044D=>0x042D,
+		0x0436=>0x0416, 0x01A1=>0x01A0, 0x010D=>0x010C, 0x011D=>0x011C, 0x00F0=>0x00D0,
+		0x013C=>0x013B, 0x045F=>0x040F, 0x045A=>0x040A, 0x00E8=>0x00C8, 0x03C5=>0x03A5,
+		0x0066=>0x0046, 0x00FD=>0x00DD, 0x0063=>0x0043, 0x021B=>0x021A, 0x00EA=>0x00CA,
+		0x03B9=>0x0399, 0x017A=>0x0179, 0x00EF=>0x00CF, 0x01B0=>0x01AF, 0x0065=>0x0045,
+		0x03BB=>0x039B, 0x03B8=>0x0398, 0x03BC=>0x039C, 0x045C=>0x040C, 0x043F=>0x041F,
+		0x044C=>0x042C, 0x00FE=>0x00DE, 0x00F0=>0x00D0, 0x1EF3=>0x1EF2, 0x0068=>0x0048,
+		0x00EB=>0x00CB, 0x0111=>0x0110, 0x0433=>0x0413, 0x012F=>0x012E, 0x00E6=>0x00C6,
+		0x0078=>0x0058, 0x0161=>0x0160, 0x016F=>0x016E, 0x03B1=>0x0391, 0x0457=>0x0407,
+		0x0173=>0x0172, 0x00FF=>0x0178, 0x006F=>0x004F, 0x043B=>0x041B, 0x03B5=>0x0395,
+		0x0445=>0x0425, 0x0121=>0x0120, 0x017E=>0x017D, 0x017C=>0x017B, 0x03B6=>0x0396,
+		0x03B2=>0x0392, 0x03AD=>0x0388, 0x1E85=>0x1E84, 0x0175=>0x0174, 0x0071=>0x0051,
+		0x0437=>0x0417, 0x1E0B=>0x1E0A, 0x0148=>0x0147, 0x0105=>0x0104, 0x0458=>0x0408,
+		0x014D=>0x014C, 0x00ED=>0x00CD, 0x0079=>0x0059, 0x010B=>0x010A, 0x03CE=>0x038F,
+		0x0072=>0x0052, 0x0430=>0x0410, 0x0455=>0x0405, 0x0452=>0x0402, 0x0127=>0x0126,
+		0x0137=>0x0136, 0x012B=>0x012A, 0x03AF=>0x038A, 0x044B=>0x042B, 0x006C=>0x004C,
+		0x03B7=>0x0397, 0x0125=>0x0124, 0x0219=>0x0218, 0x00FB=>0x00DB, 0x011F=>0x011E,
+		0x043E=>0x041E, 0x1E41=>0x1E40, 0x03BD=>0x039D, 0x0107=>0x0106, 0x03CB=>0x03AB,
+		0x0446=>0x0426, 0x00FE=>0x00DE, 0x00E7=>0x00C7, 0x03CA=>0x03AA, 0x0441=>0x0421,
+		0x0432=>0x0412, 0x010F=>0x010E, 0x00F8=>0x00D8, 0x0077=>0x0057, 0x011B=>0x011A,
+		0x0074=>0x0054, 0x006A=>0x004A, 0x045B=>0x040B, 0x0456=>0x0406, 0x0103=>0x0102,
+		0x03BB=>0x039B, 0x00F1=>0x00D1, 0x043D=>0x041D, 0x03CC=>0x038C, 0x00E9=>0x00C9,
+		0x00F0=>0x00D0, 0x0457=>0x0407, 0x0123=>0x0122,
+	);
 
 	/**
 	* UTF-8 aware alternative to strtolower
@@ -438,14 +450,15 @@ $UTF8_LOWER_TO_UPPER = array(
 	function utf8_strtolower($string)
 	{
 		global $UTF8_UPPER_TO_LOWER;
+
 		$uni = utf8_to_unicode($string);
+
 		if (!$uni)
 		{
 			return false;
 		}
 
-		$cnt = count($uni);
-		for ($i = 0; $i < $cnt; $i++)
+		for ($i = 0, $cnt = sizeof($uni); $i < $cnt; $i++)
 		{
 			if (isset($UTF8_UPPER_TO_LOWER[$uni[$i]]))
 			{
@@ -471,14 +484,15 @@ $UTF8_LOWER_TO_UPPER = array(
 	function utf8_strtoupper($str)
 	{
 		global $UTF8_LOWER_TO_UPPER;
+
 		$uni = utf8_to_unicode($string);
+
 		if (!$uni)
 		{
 			return false;
 		}
 
-		$cnt = count($uni);
-		for ($i = 0; $i < $cnt; $i++)
+		for ($i = 0, $cnt = sizeof($uni); $i < $cnt; $i++)
 		{
 			if (isset($UTF8_LOWER_TO_UPPER[$uni[$i]]))
 			{
@@ -514,13 +528,13 @@ $UTF8_LOWER_TO_UPPER = array(
 					trigger_error('utf8_substr expects parameter 3 to be long', E_USER_WARNING);
 					return false;
 				}
-				
+
 				$strlen = strlen(utf8_decode($str));
 				if ($offset > $strlen)
 				{
 					return '';
 				}
-				
+
 				if (($offset + $length) > $strlen)
 				{
 					$length = '*';
@@ -530,29 +544,30 @@ $UTF8_LOWER_TO_UPPER = array(
 					$length = '{' . $length . '}';
 				}
 			}
-			
+
 			if (!preg_match('/^[0-9]+$/', $offset))
 			{
 				trigger_error('utf8_substr expects parameter 2 to be long', E_USER_WARNING);
 				return false;
 			}
-			
+
 			$pattern = '/^.{' . $offset . '}(.' . $length . ')/us';
-			
+
 			preg_match($pattern, $str, $matches);
-			
+
 			if (isset($matches[1]))
 			{
 				return $matches[1];
 			}
-			
+
 			return false;
 		}
 		else
 		{
-			// Handle	negatives using	different, slower technique
+			// Handle negatives using different, slower technique
 			// From: http://www.php.net/manual/en/function.substr.php#44838
 			preg_match_all('/./u', $str, $ar);
+
 			if ($length !== null)
 			{
 				return join('', array_slice($ar[0], $offset, $length));
@@ -575,7 +590,6 @@ $UTF8_LOWER_TO_UPPER = array(
 		// Since utf8_decode is replacing multibyte characters to ? strlen works fine
 		return strlen(utf8_decode($text));
 	}
-
 }
 
 /**
@@ -593,7 +607,7 @@ function utf8_str_split($str, $split_len = 1)
 	{
 		return false;
 	}
-	
+
 	$len = utf8_strlen($str);
 	if ($len <= $split_len)
 	{
@@ -615,12 +629,12 @@ function utf8_str_split($str, $split_len = 1)
 function utf8_strspn($str, $mask, $start = null, $length = null)
 {
 	$mask = preg_replace('!([\\\\\\-\\]\\[/^])!', '\\\${1}', $mask);
-    
+
 	if ($start !== null || $length !== null)
 	{
 		$str = utf8_substr($str, $start, $length);
-    }
-        
+	}
+
 	preg_match('/^[' . $mask . ']+/u', $str, $matches);
 
 	if (isset($matches[0]))
@@ -628,7 +642,7 @@ function utf8_strspn($str, $mask, $start = null, $length = null)
 		return utf8_strlen($matches[0]);
 	}
 
-    return 0;
+	return 0;
 }
 
 /**
@@ -664,7 +678,7 @@ function utf8_ucfirst($str)
 * If the encoding is not supported, the string is returned as-is
 *
 * @param	string	$string		Original string
-* @param	string	$encoding	Original encoding
+* @param	string	$encoding	Original encoding (lowered)
 * @return	string				The string, encoded in UTF-8
 */
 function utf8_recode($string, $encoding)
@@ -676,7 +690,6 @@ function utf8_recode($string, $encoding)
 		return $string;
 	}
 
-
 	// start with something simple
 	if ($encoding == 'iso-8859-1')
 	{
@@ -758,11 +771,11 @@ function utf8_encode_ncr_callback($m)
 }
 
 /**
- * Enter description here...
- *
- * @param string $chr UTF-8 char
- * @return integer UNICODE code point
- */
+* Enter description here...
+*
+* @param string $chr UTF-8 char
+* @return integer UNICODE code point
+*/
 function utf8_ord($chr)
 {
 	switch (strlen($chr))
@@ -789,22 +802,22 @@ function utf8_ord($chr)
 }
 
 /**
- * Converts an NCR to a UTF-8 char
- *
- * @param integer $cp UNICODE code point
- * @return string UTF-8 char
- */
+* Converts an NCR to a UTF-8 char
+*
+* @param integer $cp UNICODE code point
+* @return string UTF-8 char
+*/
 function utf8_chr($cp)
 {
 	if ($cp > 0xFFFF)
 	{
 		return chr(0xF0 | ($cp >> 18)) . chr(0x80 | (($cp >> 12) & 0x3F)) . chr(0x80 | (($cp >> 6) & 0x3F)) . chr(0x80 | ($cp & 0x3F));
 	}
-	elseif ($cp > 0x7FF)
+	else if ($cp > 0x7FF)
 	{
 		return chr(0xE0 | ($cp >> 12)) . chr(0x80 | (($cp >> 6) & 0x3F)) . chr(0x80 | ($cp & 0x3F));
 	}
-	elseif ($cp > 0x7F)
+	else if ($cp > 0x7F)
 	{
 		return chr(0xC0 | ($cp >> 6)) . chr(0x80 | ($cp & 0x3F));
 	}
@@ -847,17 +860,18 @@ function utf8_decode_ncr_callback($m)
 }
 
 /**
- * Takes an UTF-8 string and returns an array of ints representing the
- * Unicode characters.
- * 
- * @param  string  UTF-8 encoded string
- * @return array array of UNICODE code points
- */
+* Takes an UTF-8 string and returns an array of ints representing the
+* Unicode characters.
+* 
+* @param  string  UTF-8 encoded string
+* @return array array of UNICODE code points
+*/
 function utf8_to_unicode($string)
 {
 	$unicode = array();
 	$offset = 0;
 	$stringlength = strlen($string);
+
 	while ($offset < $stringlength)
 	{
 		$ord = ord($string{$offset});
@@ -906,12 +920,12 @@ function utf8_to_unicode($string)
 }
 
 /**
- * Takes an array of ints representing the Unicode characters and returns
- * a UTF-8 string.
- *
- * @param array $array array of unicode code points representing a string
- * @return string UTF-8 character string
- */
+* Takes an array of ints representing the Unicode characters and returns
+* a UTF-8 string.
+*
+* @param array $array array of unicode code points representing a string
+* @return string UTF-8 character string
+*/
 function utf8_from_unicode($array)
 {
 	$str = '';
-- 
cgit v1.2.1


From bc15445b58403c92ebca9e23ef3d9a59fbdccc92 Mon Sep 17 00:00:00 2001
From: David M <davidmj@users.sourceforge.net>
Date: Sun, 1 Oct 2006 08:48:32 +0000
Subject: - forgot to make the same change to the ODBC driver - MySQL 3.x works
 now - FirebirdSQL is now on the same level as MySQL and PostgreSQL, zero
 hacks exist inside the core code now

git-svn-id: file:///svn/phpbb/trunk@6422 89ea8834-ac86-4346-8a33-228a782c2dd0
---
 phpBB/includes/utf/utf_tools.php | 45 ++++++++++++++++++++++++++++------------
 1 file changed, 32 insertions(+), 13 deletions(-)

(limited to 'phpBB/includes/utf')

diff --git a/phpBB/includes/utf/utf_tools.php b/phpBB/includes/utf/utf_tools.php
index 739b939f31..a906cc6ffb 100644
--- a/phpBB/includes/utf/utf_tools.php
+++ b/phpBB/includes/utf/utf_tools.php
@@ -127,6 +127,9 @@ if (extension_loaded('mbstring'))
 	/**
 	* UTF-8 aware alternative to strrpos
 	* Find position of last occurrence of a char in a string
+	*
+	* Notes:
+	* - offset for mb_strrpos was added in 5.2.0, we emulate if it is lower
 	* 
 	* @author Harry Fuecks
 	* @param string haystack
@@ -134,10 +137,9 @@ if (extension_loaded('mbstring'))
 	* @param integer (optional) offset (from left)
 	* @return mixed integer position or FALSE on failure
 	*/
-	function utf8_strrpos($str,	$needle, $offset = null)
+	if (version_compare(phpversion(), '5.2.0', '>='))
 	{
-		// offset for mb_strrpos was added in 5.2.0
-		if ($offset === false || version_compare(phpversion(), '5.2.0', '>='))
+		function utf8_strrpos($str,	$needle, $offset = null)
 		{
 			// Emulate behaviour of strrpos rather than raising warning
 			if (empty($str))
@@ -147,22 +149,39 @@ if (extension_loaded('mbstring'))
 
 			return mb_strrpos($str, $search);
 		}
-		else
+	}
+	else
+	{
+		function utf8_strrpos($str,	$needle, $offset = null)
 		{
-			if (!is_int($offset))
+			// offset for mb_strrpos was added in 5.2.0
+			if ($offset === false)
 			{
-				trigger_error('utf8_strrpos expects parameter 3 to be long', E_USER_WARNING);
-				return false;
+				// Emulate behaviour of strrpos rather than raising warning
+				if (empty($str))
+				{
+					return false;
+				}
+
+				return mb_strrpos($str, $search);
 			}
+			else
+			{
+				if (!is_int($offset))
+				{
+					trigger_error('utf8_strrpos expects parameter 3 to be long', E_USER_WARNING);
+					return false;
+				}
 
-			$str = mb_substr($str, $offset);
+				$str = mb_substr($str, $offset);
 
-			if (false !== ($pos = mb_strrpos($str, $search)))
-			{
-				return $pos + $offset;
-			}
+				if (false !== ($pos = mb_strrpos($str, $search)))
+				{
+					return $pos + $offset;
+				}
 
-			return false;
+				return false;
+			}
 		}
 	}
 
-- 
cgit v1.2.1


From f8528a659c919d7bc1e78c5aacc95e27cddec627 Mon Sep 17 00:00:00 2001
From: Meik Sievertsen <acydburn@phpbb.com>
Date: Sat, 7 Oct 2006 12:36:31 +0000
Subject: tried to begin adjusting all string functions where applicable -
 still a *lot* to do. i hope i catched all relevant sections and did not mess
 something up.

git-svn-id: file:///svn/phpbb/trunk@6452 89ea8834-ac86-4346-8a33-228a782c2dd0
---
 phpBB/includes/utf/utf_tools.php | 25 +++++++------------------
 1 file changed, 7 insertions(+), 18 deletions(-)

(limited to 'phpBB/includes/utf')

diff --git a/phpBB/includes/utf/utf_tools.php b/phpBB/includes/utf/utf_tools.php
index a906cc6ffb..342952db69 100644
--- a/phpBB/includes/utf/utf_tools.php
+++ b/phpBB/includes/utf/utf_tools.php
@@ -6,6 +6,10 @@
 * @copyright (c) 2006 phpBB Group 
 * @license http://opensource.org/licenses/gpl-license.php GNU Public License 
 *
+* @todo make sure the replacements are called correctly
+* already done: strtolower, strtoupper, ucfirst, str_split, strrpos, strlen (hopefully!)
+* remaining:	clean_username, htmlentities (no longer needed for internal data?), htmlspecialchars (using charset), html_entity_decode (own function to reverse htmlspecialchars and not htmlentities)
+*				substr, strpos, strspn, chr, ord
 */
 
 /**
@@ -136,6 +140,7 @@ if (extension_loaded('mbstring'))
 	* @param string needle
 	* @param integer (optional) offset (from left)
 	* @return mixed integer position or FALSE on failure
+	* @ignore
 	*/
 	if (version_compare(phpversion(), '5.2.0', '>='))
 	{
@@ -209,15 +214,7 @@ if (extension_loaded('mbstring'))
 
 	/**
 	* UTF-8 aware alternative to strtolower
-	* Make a string lowercase
-	* Note: The concept of a characters "case" only exists is some alphabets
-	* such as Latin, Greek, Cyrillic, Armenian and archaic Georgian - it does
-	* not exist in the Chinese alphabet, for example. See Unicode Standard
-	* Annex #21: Case Mappings
-	* 
-	* @author Andreas Gohr <andi@splitbrain.org>
-	* @param string
-	* @return mixed either string in lowercase or FALSE is UTF-8 invalid
+	* @ignore
 	*/
 	function utf8_strtolower($str)
 	{
@@ -226,15 +223,7 @@ if (extension_loaded('mbstring'))
 
 	/**
 	* UTF-8 aware alternative to strtoupper
-	* Make a string uppercase
-	* Note: The concept of a characters "case" only exists is some alphabets
-	* such as Latin, Greek, Cyrillic, Armenian and archaic Georgian - it does
-	* not exist in the Chinese alphabet, for example. See Unicode Standard
-	* Annex #21: Case Mappings
-	* 
-	* @author Andreas Gohr <andi@splitbrain.org>
-	* @param string
-	* @return mixed either string in lowercase or FALSE is UTF-8 invalid
+	* @ignore
 	*/
 	function utf8_strtoupper($str)
 	{
-- 
cgit v1.2.1


From c40783e6a24f56818407e824ca847565d222b542 Mon Sep 17 00:00:00 2001
From: Meik Sievertsen <acydburn@phpbb.com>
Date: Sat, 7 Oct 2006 16:49:44 +0000
Subject: strpos...

git-svn-id: file:///svn/phpbb/trunk@6457 89ea8834-ac86-4346-8a33-228a782c2dd0
---
 phpBB/includes/utf/utf_tools.php | 12 +++---------
 1 file changed, 3 insertions(+), 9 deletions(-)

(limited to 'phpBB/includes/utf')

diff --git a/phpBB/includes/utf/utf_tools.php b/phpBB/includes/utf/utf_tools.php
index 342952db69..9cd4026c2c 100644
--- a/phpBB/includes/utf/utf_tools.php
+++ b/phpBB/includes/utf/utf_tools.php
@@ -7,9 +7,9 @@
 * @license http://opensource.org/licenses/gpl-license.php GNU Public License 
 *
 * @todo make sure the replacements are called correctly
-* already done: strtolower, strtoupper, ucfirst, str_split, strrpos, strlen (hopefully!)
+* already done: strtolower, strtoupper, ucfirst, str_split, strrpos, strlen (hopefully!), strpos
 * remaining:	clean_username, htmlentities (no longer needed for internal data?), htmlspecialchars (using charset), html_entity_decode (own function to reverse htmlspecialchars and not htmlentities)
-*				substr, strpos, strspn, chr, ord
+*				substr, strspn, chr, ord
 */
 
 /**
@@ -192,13 +192,7 @@ if (extension_loaded('mbstring'))
 
 	/**
 	* UTF-8 aware alternative to strpos
-	* Find position of first occurrence of a string
-	*
-	* @author Harry Fuecks
-	* @param string haystack
-	* @param string needle
-	* @param integer offset in characters (from left)
-	* @return mixed integer position or FALSE on failure
+	* @ignore
 	*/
 	function utf8_strpos($str, $needle, $offset = null)
 	{
-- 
cgit v1.2.1


From 485935e1f1a3a773260cda0b7ac3f3800dca990e Mon Sep 17 00:00:00 2001
From: Meik Sievertsen <acydburn@phpbb.com>
Date: Sat, 7 Oct 2006 17:40:07 +0000
Subject: he braces style is deprecated as of PHP 6

git-svn-id: file:///svn/phpbb/trunk@6459 89ea8834-ac86-4346-8a33-228a782c2dd0
---
 phpBB/includes/utf/utf_tools.php | 16 ++++------------
 1 file changed, 4 insertions(+), 12 deletions(-)

(limited to 'phpBB/includes/utf')

diff --git a/phpBB/includes/utf/utf_tools.php b/phpBB/includes/utf/utf_tools.php
index 9cd4026c2c..930595b36f 100644
--- a/phpBB/includes/utf/utf_tools.php
+++ b/phpBB/includes/utf/utf_tools.php
@@ -7,9 +7,9 @@
 * @license http://opensource.org/licenses/gpl-license.php GNU Public License 
 *
 * @todo make sure the replacements are called correctly
-* already done: strtolower, strtoupper, ucfirst, str_split, strrpos, strlen (hopefully!), strpos
+* already done: strtolower, strtoupper, ucfirst, str_split, strrpos, strlen (hopefully!), strpos, substr
 * remaining:	clean_username, htmlentities (no longer needed for internal data?), htmlspecialchars (using charset), html_entity_decode (own function to reverse htmlspecialchars and not htmlentities)
-*				substr, strspn, chr, ord
+*				strspn, chr, ord
 */
 
 /**
@@ -226,13 +226,7 @@ if (extension_loaded('mbstring'))
 
 	/**
 	* UTF-8 aware alternative to substr
-	* Return part of a string given character offset (and optionally length)
-	* 
-	* @author Harry Fuecks
-	* @param string
-	* @param integer number of UTF-8 characters offset (from left)
-	* @param integer (optional) length in UTF-8 characters from offset
-	* @return mixed string or FALSE if failure
+	* @ignore
 	*/
 	function utf8_substr($str, $offset,	$length	= null)
 	{
@@ -248,9 +242,7 @@ if (extension_loaded('mbstring'))
 
 	/**
 	* Return the length (in characters) of a UTF-8 string
-	*
-	* @param	string	$text		UTF-8 string
-	* @return	integer				Length (in chars) of given string
+	* @ignore
 	*/
 	function utf8_strlen($text)
 	{
-- 
cgit v1.2.1


From 722ab535a67d3193356b851e071cef6fdfa6c40b Mon Sep 17 00:00:00 2001
From: David M <davidmj@users.sourceforge.net>
Date: Sat, 7 Oct 2006 22:51:55 +0000
Subject: Case folding! :D

git-svn-id: file:///svn/phpbb/trunk@6464 89ea8834-ac86-4346-8a33-228a782c2dd0
---
 phpBB/includes/utf/data/case_fold_C.php |  1 +
 phpBB/includes/utf/data/case_fold_F.php |  1 +
 phpBB/includes/utf/data/case_fold_S.php |  1 +
 phpBB/includes/utf/utf_tools.php        | 44 +++++++++++++++++++++++++++++++++
 4 files changed, 47 insertions(+)
 create mode 100644 phpBB/includes/utf/data/case_fold_C.php
 create mode 100644 phpBB/includes/utf/data/case_fold_F.php
 create mode 100644 phpBB/includes/utf/data/case_fold_S.php

(limited to 'phpBB/includes/utf')

diff --git a/phpBB/includes/utf/data/case_fold_C.php b/phpBB/includes/utf/data/case_fold_C.php
new file mode 100644
index 0000000000..00de1ba349
--- /dev/null
+++ b/phpBB/includes/utf/data/case_fold_C.php
@@ -0,0 +1 @@
+<?php return array('A'=>'a','B'=>'b','C'=>'c','D'=>'d','E'=>'e','F'=>'f','G'=>'g','H'=>'h','I'=>'i','J'=>'j','K'=>'k','L'=>'l','M'=>'m','N'=>'n','O'=>'o','P'=>'p','Q'=>'q','R'=>'r','S'=>'s','T'=>'t','U'=>'u','V'=>'v','W'=>'w','X'=>'x','Y'=>'y','Z'=>'z','µ'=>'μ','À'=>'à','Á'=>'á','Â'=>'â','Ã'=>'ã','Ä'=>'ä','Å'=>'å','Æ'=>'æ','Ç'=>'ç','È'=>'è','É'=>'é','Ê'=>'ê','Ë'=>'ë','Ì'=>'ì','Í'=>'í','Î'=>'î','Ï'=>'ï','Ð'=>'ð','Ñ'=>'ñ','Ò'=>'ò','Ó'=>'ó','Ô'=>'ô','Õ'=>'õ','Ö'=>'ö','Ø'=>'ø','Ù'=>'ù','Ú'=>'ú','Û'=>'û','Ü'=>'ü','Ý'=>'ý','Þ'=>'þ','Ā'=>'ā','Ă'=>'ă','Ą'=>'ą','Ć'=>'ć','Ĉ'=>'ĉ','Ċ'=>'ċ','Č'=>'č','Ď'=>'ď','Đ'=>'đ','Ē'=>'ē','Ĕ'=>'ĕ','Ė'=>'ė','Ę'=>'ę','Ě'=>'ě','Ĝ'=>'ĝ','Ğ'=>'ğ','Ġ'=>'ġ','Ģ'=>'ģ','Ĥ'=>'ĥ','Ħ'=>'ħ','Ĩ'=>'ĩ','Ī'=>'ī','Ĭ'=>'ĭ','Į'=>'į','Ĳ'=>'ĳ','Ĵ'=>'ĵ','Ķ'=>'ķ','Ĺ'=>'ĺ','Ļ'=>'ļ','Ľ'=>'ľ','Ŀ'=>'ŀ','Ł'=>'ł','Ń'=>'ń','Ņ'=>'ņ','Ň'=>'ň','Ŋ'=>'ŋ','Ō'=>'ō','Ŏ'=>'ŏ','Ő'=>'ő','Œ'=>'œ','Ŕ'=>'ŕ','Ŗ'=>'ŗ','Ř'=>'ř','Ś'=>'ś','Ŝ'=>'ŝ','Ş'=>'ş','Š'=>'š','Ţ'=>'ţ','Ť'=>'ť','Ŧ'=>'ŧ','Ũ'=>'ũ','Ū'=>'ū','Ŭ'=>'ŭ','Ů'=>'ů','Ű'=>'ű','Ų'=>'ų','Ŵ'=>'ŵ','Ŷ'=>'ŷ','Ÿ'=>'ÿ','Ź'=>'ź','Ż'=>'ż','Ž'=>'ž','ſ'=>'s','Ɓ'=>'ɓ','Ƃ'=>'ƃ','Ƅ'=>'ƅ','Ɔ'=>'ɔ','Ƈ'=>'ƈ','Ɖ'=>'ɖ','Ɗ'=>'ɗ','Ƌ'=>'ƌ','Ǝ'=>'ǝ','Ə'=>'ə','Ɛ'=>'ɛ','Ƒ'=>'ƒ','Ɠ'=>'ɠ','Ɣ'=>'ɣ','Ɩ'=>'ɩ','Ɨ'=>'ɨ','Ƙ'=>'ƙ','Ɯ'=>'ɯ','Ɲ'=>'ɲ','Ɵ'=>'ɵ','Ơ'=>'ơ','Ƣ'=>'ƣ','Ƥ'=>'ƥ','Ʀ'=>'ʀ','Ƨ'=>'ƨ','Ʃ'=>'ʃ','Ƭ'=>'ƭ','Ʈ'=>'ʈ','Ư'=>'ư','Ʊ'=>'ʊ','Ʋ'=>'ʋ','Ƴ'=>'ƴ','Ƶ'=>'ƶ','Ʒ'=>'ʒ','Ƹ'=>'ƹ','Ƽ'=>'ƽ','Ǆ'=>'ǆ','ǅ'=>'ǆ','Ǉ'=>'ǉ','ǈ'=>'ǉ','Ǌ'=>'ǌ','ǋ'=>'ǌ','Ǎ'=>'ǎ','Ǐ'=>'ǐ','Ǒ'=>'ǒ','Ǔ'=>'ǔ','Ǖ'=>'ǖ','Ǘ'=>'ǘ','Ǚ'=>'ǚ','Ǜ'=>'ǜ','Ǟ'=>'ǟ','Ǡ'=>'ǡ','Ǣ'=>'ǣ','Ǥ'=>'ǥ','Ǧ'=>'ǧ','Ǩ'=>'ǩ','Ǫ'=>'ǫ','Ǭ'=>'ǭ','Ǯ'=>'ǯ','Ǳ'=>'ǳ','ǲ'=>'ǳ','Ǵ'=>'ǵ','Ƕ'=>'ƕ','Ƿ'=>'ƿ','Ǹ'=>'ǹ','Ǻ'=>'ǻ','Ǽ'=>'ǽ','Ǿ'=>'ǿ','Ȁ'=>'ȁ','Ȃ'=>'ȃ','Ȅ'=>'ȅ','Ȇ'=>'ȇ','Ȉ'=>'ȉ','Ȋ'=>'ȋ','Ȍ'=>'ȍ','Ȏ'=>'ȏ','Ȑ'=>'ȑ','Ȓ'=>'ȓ','Ȕ'=>'ȕ','Ȗ'=>'ȗ','Ș'=>'ș','Ț'=>'ț','Ȝ'=>'ȝ','Ȟ'=>'ȟ','Ƞ'=>'ƞ','Ȣ'=>'ȣ','Ȥ'=>'ȥ','Ȧ'=>'ȧ','Ȩ'=>'ȩ','Ȫ'=>'ȫ','Ȭ'=>'ȭ','Ȯ'=>'ȯ','Ȱ'=>'ȱ','Ȳ'=>'ȳ','Ⱥ'=>'ⱥ','Ȼ'=>'ȼ','Ƚ'=>'ƚ','Ⱦ'=>'ⱦ','Ɂ'=>'ɂ','Ƀ'=>'ƀ','Ʉ'=>'ʉ','Ʌ'=>'ʌ','Ɇ'=>'ɇ','Ɉ'=>'ɉ','Ɋ'=>'ɋ','Ɍ'=>'ɍ','Ɏ'=>'ɏ','ͅ'=>'ι','Ά'=>'ά','Έ'=>'έ','Ή'=>'ή','Ί'=>'ί','Ό'=>'ό','Ύ'=>'ύ','Ώ'=>'ώ','Α'=>'α','Β'=>'β','Γ'=>'γ','Δ'=>'δ','Ε'=>'ε','Ζ'=>'ζ','Η'=>'η','Θ'=>'θ','Ι'=>'ι','Κ'=>'κ','Λ'=>'λ','Μ'=>'μ','Ν'=>'ν','Ξ'=>'ξ','Ο'=>'ο','Π'=>'π','Ρ'=>'ρ','Σ'=>'σ','Τ'=>'τ','Υ'=>'υ','Φ'=>'φ','Χ'=>'χ','Ψ'=>'ψ','Ω'=>'ω','Ϊ'=>'ϊ','Ϋ'=>'ϋ','ς'=>'σ','ϐ'=>'β','ϑ'=>'θ','ϕ'=>'φ','ϖ'=>'π','Ϙ'=>'ϙ','Ϛ'=>'ϛ','Ϝ'=>'ϝ','Ϟ'=>'ϟ','Ϡ'=>'ϡ','Ϣ'=>'ϣ','Ϥ'=>'ϥ','Ϧ'=>'ϧ','Ϩ'=>'ϩ','Ϫ'=>'ϫ','Ϭ'=>'ϭ','Ϯ'=>'ϯ','ϰ'=>'κ','ϱ'=>'ρ','ϴ'=>'θ','ϵ'=>'ε','Ϸ'=>'ϸ','Ϲ'=>'ϲ','Ϻ'=>'ϻ','Ͻ'=>'ͻ','Ͼ'=>'ͼ','Ͽ'=>'ͽ','Ѐ'=>'ѐ','Ё'=>'ё','Ђ'=>'ђ','Ѓ'=>'ѓ','Є'=>'є','Ѕ'=>'ѕ','І'=>'і','Ї'=>'ї','Ј'=>'ј','Љ'=>'љ','Њ'=>'њ','Ћ'=>'ћ','Ќ'=>'ќ','Ѝ'=>'ѝ','Ў'=>'ў','Џ'=>'џ','А'=>'а','Б'=>'б','В'=>'в','Г'=>'г','Д'=>'д','Е'=>'е','Ж'=>'ж','З'=>'з','И'=>'и','Й'=>'й','К'=>'к','Л'=>'л','М'=>'м','Н'=>'н','О'=>'о','П'=>'п','Р'=>'р','С'=>'с','Т'=>'т','У'=>'у','Ф'=>'ф','Х'=>'х','Ц'=>'ц','Ч'=>'ч','Ш'=>'ш','Щ'=>'щ','Ъ'=>'ъ','Ы'=>'ы','Ь'=>'ь','Э'=>'э','Ю'=>'ю','Я'=>'я','Ѡ'=>'ѡ','Ѣ'=>'ѣ','Ѥ'=>'ѥ','Ѧ'=>'ѧ','Ѩ'=>'ѩ','Ѫ'=>'ѫ','Ѭ'=>'ѭ','Ѯ'=>'ѯ','Ѱ'=>'ѱ','Ѳ'=>'ѳ','Ѵ'=>'ѵ','Ѷ'=>'ѷ','Ѹ'=>'ѹ','Ѻ'=>'ѻ','Ѽ'=>'ѽ','Ѿ'=>'ѿ','Ҁ'=>'ҁ','Ҋ'=>'ҋ','Ҍ'=>'ҍ','Ҏ'=>'ҏ','Ґ'=>'ґ','Ғ'=>'ғ','Ҕ'=>'ҕ','Җ'=>'җ','Ҙ'=>'ҙ','Қ'=>'қ','Ҝ'=>'ҝ','Ҟ'=>'ҟ','Ҡ'=>'ҡ','Ң'=>'ң','Ҥ'=>'ҥ','Ҧ'=>'ҧ','Ҩ'=>'ҩ','Ҫ'=>'ҫ','Ҭ'=>'ҭ','Ү'=>'ү','Ұ'=>'ұ','Ҳ'=>'ҳ','Ҵ'=>'ҵ','Ҷ'=>'ҷ','Ҹ'=>'ҹ','Һ'=>'һ','Ҽ'=>'ҽ','Ҿ'=>'ҿ','Ӏ'=>'ӏ','Ӂ'=>'ӂ','Ӄ'=>'ӄ','Ӆ'=>'ӆ','Ӈ'=>'ӈ','Ӊ'=>'ӊ','Ӌ'=>'ӌ','Ӎ'=>'ӎ','Ӑ'=>'ӑ','Ӓ'=>'ӓ','Ӕ'=>'ӕ','Ӗ'=>'ӗ','Ә'=>'ә','Ӛ'=>'ӛ','Ӝ'=>'ӝ','Ӟ'=>'ӟ','Ӡ'=>'ӡ','Ӣ'=>'ӣ','Ӥ'=>'ӥ','Ӧ'=>'ӧ','Ө'=>'ө','Ӫ'=>'ӫ','Ӭ'=>'ӭ','Ӯ'=>'ӯ','Ӱ'=>'ӱ','Ӳ'=>'ӳ','Ӵ'=>'ӵ','Ӷ'=>'ӷ','Ӹ'=>'ӹ','Ӻ'=>'ӻ','Ӽ'=>'ӽ','Ӿ'=>'ӿ','Ԁ'=>'ԁ','Ԃ'=>'ԃ','Ԅ'=>'ԅ','Ԇ'=>'ԇ','Ԉ'=>'ԉ','Ԋ'=>'ԋ','Ԍ'=>'ԍ','Ԏ'=>'ԏ','Ԑ'=>'ԑ','Ԓ'=>'ԓ','Ա'=>'ա','Բ'=>'բ','Գ'=>'գ','Դ'=>'դ','Ե'=>'ե','Զ'=>'զ','Է'=>'է','Ը'=>'ը','Թ'=>'թ','Ժ'=>'ժ','Ի'=>'ի','Լ'=>'լ','Խ'=>'խ','Ծ'=>'ծ','Կ'=>'կ','Հ'=>'հ','Ձ'=>'ձ','Ղ'=>'ղ','Ճ'=>'ճ','Մ'=>'մ','Յ'=>'յ','Ն'=>'ն','Շ'=>'շ','Ո'=>'ո','Չ'=>'չ','Պ'=>'պ','Ջ'=>'ջ','Ռ'=>'ռ','Ս'=>'ս','Վ'=>'վ','Տ'=>'տ','Ր'=>'ր','Ց'=>'ց','Ւ'=>'ւ','Փ'=>'փ','Ք'=>'ք','Օ'=>'օ','Ֆ'=>'ֆ','Ⴀ'=>'ⴀ','Ⴁ'=>'ⴁ','Ⴂ'=>'ⴂ','Ⴃ'=>'ⴃ','Ⴄ'=>'ⴄ','Ⴅ'=>'ⴅ','Ⴆ'=>'ⴆ','Ⴇ'=>'ⴇ','Ⴈ'=>'ⴈ','Ⴉ'=>'ⴉ','Ⴊ'=>'ⴊ','Ⴋ'=>'ⴋ','Ⴌ'=>'ⴌ','Ⴍ'=>'ⴍ','Ⴎ'=>'ⴎ','Ⴏ'=>'ⴏ','Ⴐ'=>'ⴐ','Ⴑ'=>'ⴑ','Ⴒ'=>'ⴒ','Ⴓ'=>'ⴓ','Ⴔ'=>'ⴔ','Ⴕ'=>'ⴕ','Ⴖ'=>'ⴖ','Ⴗ'=>'ⴗ','Ⴘ'=>'ⴘ','Ⴙ'=>'ⴙ','Ⴚ'=>'ⴚ','Ⴛ'=>'ⴛ','Ⴜ'=>'ⴜ','Ⴝ'=>'ⴝ','Ⴞ'=>'ⴞ','Ⴟ'=>'ⴟ','Ⴠ'=>'ⴠ','Ⴡ'=>'ⴡ','Ⴢ'=>'ⴢ','Ⴣ'=>'ⴣ','Ⴤ'=>'ⴤ','Ⴥ'=>'ⴥ','Ḁ'=>'ḁ','Ḃ'=>'ḃ','Ḅ'=>'ḅ','Ḇ'=>'ḇ','Ḉ'=>'ḉ','Ḋ'=>'ḋ','Ḍ'=>'ḍ','Ḏ'=>'ḏ','Ḑ'=>'ḑ','Ḓ'=>'ḓ','Ḕ'=>'ḕ','Ḗ'=>'ḗ','Ḙ'=>'ḙ','Ḛ'=>'ḛ','Ḝ'=>'ḝ','Ḟ'=>'ḟ','Ḡ'=>'ḡ','Ḣ'=>'ḣ','Ḥ'=>'ḥ','Ḧ'=>'ḧ','Ḩ'=>'ḩ','Ḫ'=>'ḫ','Ḭ'=>'ḭ','Ḯ'=>'ḯ','Ḱ'=>'ḱ','Ḳ'=>'ḳ','Ḵ'=>'ḵ','Ḷ'=>'ḷ','Ḹ'=>'ḹ','Ḻ'=>'ḻ','Ḽ'=>'ḽ','Ḿ'=>'ḿ','Ṁ'=>'ṁ','Ṃ'=>'ṃ','Ṅ'=>'ṅ','Ṇ'=>'ṇ','Ṉ'=>'ṉ','Ṋ'=>'ṋ','Ṍ'=>'ṍ','Ṏ'=>'ṏ','Ṑ'=>'ṑ','Ṓ'=>'ṓ','Ṕ'=>'ṕ','Ṗ'=>'ṗ','Ṙ'=>'ṙ','Ṛ'=>'ṛ','Ṝ'=>'ṝ','Ṟ'=>'ṟ','Ṡ'=>'ṡ','Ṣ'=>'ṣ','Ṥ'=>'ṥ','Ṧ'=>'ṧ','Ṩ'=>'ṩ','Ṫ'=>'ṫ','Ṭ'=>'ṭ','Ṯ'=>'ṯ','Ṱ'=>'ṱ','Ṳ'=>'ṳ','Ṵ'=>'ṵ','Ṷ'=>'ṷ','Ṹ'=>'ṹ','Ṻ'=>'ṻ','Ṽ'=>'ṽ','Ṿ'=>'ṿ','Ẁ'=>'ẁ','Ẃ'=>'ẃ','Ẅ'=>'ẅ','Ẇ'=>'ẇ','Ẉ'=>'ẉ','Ẋ'=>'ẋ','Ẍ'=>'ẍ','Ẏ'=>'ẏ','Ẑ'=>'ẑ','Ẓ'=>'ẓ','Ẕ'=>'ẕ','ẛ'=>'ṡ','Ạ'=>'ạ','Ả'=>'ả','Ấ'=>'ấ','Ầ'=>'ầ','Ẩ'=>'ẩ','Ẫ'=>'ẫ','Ậ'=>'ậ','Ắ'=>'ắ','Ằ'=>'ằ','Ẳ'=>'ẳ','Ẵ'=>'ẵ','Ặ'=>'ặ','Ẹ'=>'ẹ','Ẻ'=>'ẻ','Ẽ'=>'ẽ','Ế'=>'ế','Ề'=>'ề','Ể'=>'ể','Ễ'=>'ễ','Ệ'=>'ệ','Ỉ'=>'ỉ','Ị'=>'ị','Ọ'=>'ọ','Ỏ'=>'ỏ','Ố'=>'ố','Ồ'=>'ồ','Ổ'=>'ổ','Ỗ'=>'ỗ','Ộ'=>'ộ','Ớ'=>'ớ','Ờ'=>'ờ','Ở'=>'ở','Ỡ'=>'ỡ','Ợ'=>'ợ','Ụ'=>'ụ','Ủ'=>'ủ','Ứ'=>'ứ','Ừ'=>'ừ','Ử'=>'ử','Ữ'=>'ữ','Ự'=>'ự','Ỳ'=>'ỳ','Ỵ'=>'ỵ','Ỷ'=>'ỷ','Ỹ'=>'ỹ','Ἀ'=>'ἀ','Ἁ'=>'ἁ','Ἂ'=>'ἂ','Ἃ'=>'ἃ','Ἄ'=>'ἄ','Ἅ'=>'ἅ','Ἆ'=>'ἆ','Ἇ'=>'ἇ','Ἐ'=>'ἐ','Ἑ'=>'ἑ','Ἒ'=>'ἒ','Ἓ'=>'ἓ','Ἔ'=>'ἔ','Ἕ'=>'ἕ','Ἠ'=>'ἠ','Ἡ'=>'ἡ','Ἢ'=>'ἢ','Ἣ'=>'ἣ','Ἤ'=>'ἤ','Ἥ'=>'ἥ','Ἦ'=>'ἦ','Ἧ'=>'ἧ','Ἰ'=>'ἰ','Ἱ'=>'ἱ','Ἲ'=>'ἲ','Ἳ'=>'ἳ','Ἴ'=>'ἴ','Ἵ'=>'ἵ','Ἶ'=>'ἶ','Ἷ'=>'ἷ','Ὀ'=>'ὀ','Ὁ'=>'ὁ','Ὂ'=>'ὂ','Ὃ'=>'ὃ','Ὄ'=>'ὄ','Ὅ'=>'ὅ','Ὑ'=>'ὑ','Ὓ'=>'ὓ','Ὕ'=>'ὕ','Ὗ'=>'ὗ','Ὠ'=>'ὠ','Ὡ'=>'ὡ','Ὢ'=>'ὢ','Ὣ'=>'ὣ','Ὤ'=>'ὤ','Ὥ'=>'ὥ','Ὦ'=>'ὦ','Ὧ'=>'ὧ','Ᾰ'=>'ᾰ','Ᾱ'=>'ᾱ','Ὰ'=>'ὰ','Ά'=>'ά','ι'=>'ι','Ὲ'=>'ὲ','Έ'=>'έ','Ὴ'=>'ὴ','Ή'=>'ή','Ῐ'=>'ῐ','Ῑ'=>'ῑ','Ὶ'=>'ὶ','Ί'=>'ί','Ῠ'=>'ῠ','Ῡ'=>'ῡ','Ὺ'=>'ὺ','Ύ'=>'ύ','Ῥ'=>'ῥ','Ὸ'=>'ὸ','Ό'=>'ό','Ὼ'=>'ὼ','Ώ'=>'ώ','Ω'=>'ω','K'=>'k','Å'=>'å','Ⅎ'=>'ⅎ','Ⅰ'=>'ⅰ','Ⅱ'=>'ⅱ','Ⅲ'=>'ⅲ','Ⅳ'=>'ⅳ','Ⅴ'=>'ⅴ','Ⅵ'=>'ⅵ','Ⅶ'=>'ⅶ','Ⅷ'=>'ⅷ','Ⅸ'=>'ⅸ','Ⅹ'=>'ⅹ','Ⅺ'=>'ⅺ','Ⅻ'=>'ⅻ','Ⅼ'=>'ⅼ','Ⅽ'=>'ⅽ','Ⅾ'=>'ⅾ','Ⅿ'=>'ⅿ','Ↄ'=>'ↄ','Ⓐ'=>'ⓐ','Ⓑ'=>'ⓑ','Ⓒ'=>'ⓒ','Ⓓ'=>'ⓓ','Ⓔ'=>'ⓔ','Ⓕ'=>'ⓕ','Ⓖ'=>'ⓖ','Ⓗ'=>'ⓗ','Ⓘ'=>'ⓘ','Ⓙ'=>'ⓙ','Ⓚ'=>'ⓚ','Ⓛ'=>'ⓛ','Ⓜ'=>'ⓜ','Ⓝ'=>'ⓝ','Ⓞ'=>'ⓞ','Ⓟ'=>'ⓟ','Ⓠ'=>'ⓠ','Ⓡ'=>'ⓡ','Ⓢ'=>'ⓢ','Ⓣ'=>'ⓣ','Ⓤ'=>'ⓤ','Ⓥ'=>'ⓥ','Ⓦ'=>'ⓦ','Ⓧ'=>'ⓧ','Ⓨ'=>'ⓨ','Ⓩ'=>'ⓩ','Ⰰ'=>'ⰰ','Ⰱ'=>'ⰱ','Ⰲ'=>'ⰲ','Ⰳ'=>'ⰳ','Ⰴ'=>'ⰴ','Ⰵ'=>'ⰵ','Ⰶ'=>'ⰶ','Ⰷ'=>'ⰷ','Ⰸ'=>'ⰸ','Ⰹ'=>'ⰹ','Ⰺ'=>'ⰺ','Ⰻ'=>'ⰻ','Ⰼ'=>'ⰼ','Ⰽ'=>'ⰽ','Ⰾ'=>'ⰾ','Ⰿ'=>'ⰿ','Ⱀ'=>'ⱀ','Ⱁ'=>'ⱁ','Ⱂ'=>'ⱂ','Ⱃ'=>'ⱃ','Ⱄ'=>'ⱄ','Ⱅ'=>'ⱅ','Ⱆ'=>'ⱆ','Ⱇ'=>'ⱇ','Ⱈ'=>'ⱈ','Ⱉ'=>'ⱉ','Ⱊ'=>'ⱊ','Ⱋ'=>'ⱋ','Ⱌ'=>'ⱌ','Ⱍ'=>'ⱍ','Ⱎ'=>'ⱎ','Ⱏ'=>'ⱏ','Ⱐ'=>'ⱐ','Ⱑ'=>'ⱑ','Ⱒ'=>'ⱒ','Ⱓ'=>'ⱓ','Ⱔ'=>'ⱔ','Ⱕ'=>'ⱕ','Ⱖ'=>'ⱖ','Ⱗ'=>'ⱗ','Ⱘ'=>'ⱘ','Ⱙ'=>'ⱙ','Ⱚ'=>'ⱚ','Ⱛ'=>'ⱛ','Ⱜ'=>'ⱜ','Ⱝ'=>'ⱝ','Ⱞ'=>'ⱞ','Ⱡ'=>'ⱡ','Ɫ'=>'ɫ','Ᵽ'=>'ᵽ','Ɽ'=>'ɽ','Ⱨ'=>'ⱨ','Ⱪ'=>'ⱪ','Ⱬ'=>'ⱬ','Ⱶ'=>'ⱶ','Ⲁ'=>'ⲁ','Ⲃ'=>'ⲃ','Ⲅ'=>'ⲅ','Ⲇ'=>'ⲇ','Ⲉ'=>'ⲉ','Ⲋ'=>'ⲋ','Ⲍ'=>'ⲍ','Ⲏ'=>'ⲏ','Ⲑ'=>'ⲑ','Ⲓ'=>'ⲓ','Ⲕ'=>'ⲕ','Ⲗ'=>'ⲗ','Ⲙ'=>'ⲙ','Ⲛ'=>'ⲛ','Ⲝ'=>'ⲝ','Ⲟ'=>'ⲟ','Ⲡ'=>'ⲡ','Ⲣ'=>'ⲣ','Ⲥ'=>'ⲥ','Ⲧ'=>'ⲧ','Ⲩ'=>'ⲩ','Ⲫ'=>'ⲫ','Ⲭ'=>'ⲭ','Ⲯ'=>'ⲯ','Ⲱ'=>'ⲱ','Ⲳ'=>'ⲳ','Ⲵ'=>'ⲵ','Ⲷ'=>'ⲷ','Ⲹ'=>'ⲹ','Ⲻ'=>'ⲻ','Ⲽ'=>'ⲽ','Ⲿ'=>'ⲿ','Ⳁ'=>'ⳁ','Ⳃ'=>'ⳃ','Ⳅ'=>'ⳅ','Ⳇ'=>'ⳇ','Ⳉ'=>'ⳉ','Ⳋ'=>'ⳋ','Ⳍ'=>'ⳍ','Ⳏ'=>'ⳏ','Ⳑ'=>'ⳑ','Ⳓ'=>'ⳓ','Ⳕ'=>'ⳕ','Ⳗ'=>'ⳗ','Ⳙ'=>'ⳙ','Ⳛ'=>'ⳛ','Ⳝ'=>'ⳝ','Ⳟ'=>'ⳟ','Ⳡ'=>'ⳡ','Ⳣ'=>'ⳣ','Ａ'=>'ａ','Ｂ'=>'ｂ','Ｃ'=>'ｃ','Ｄ'=>'ｄ','Ｅ'=>'ｅ','Ｆ'=>'ｆ','Ｇ'=>'ｇ','Ｈ'=>'ｈ','Ｉ'=>'ｉ','Ｊ'=>'ｊ','Ｋ'=>'ｋ','Ｌ'=>'ｌ','Ｍ'=>'ｍ','Ｎ'=>'ｎ','Ｏ'=>'ｏ','Ｐ'=>'ｐ','Ｑ'=>'ｑ','Ｒ'=>'ｒ','Ｓ'=>'ｓ','Ｔ'=>'ｔ','Ｕ'=>'ｕ','Ｖ'=>'ｖ','Ｗ'=>'ｗ','Ｘ'=>'ｘ','Ｙ'=>'ｙ','Ｚ'=>'ｚ','𐐀'=>'𐐨','𐐁'=>'𐐩','𐐂'=>'𐐪','𐐃'=>'𐐫','𐐄'=>'𐐬','𐐅'=>'𐐭','𐐆'=>'𐐮','𐐇'=>'𐐯','𐐈'=>'𐐰','𐐉'=>'𐐱','𐐊'=>'𐐲','𐐋'=>'𐐳','𐐌'=>'𐐴','𐐍'=>'𐐵','𐐎'=>'𐐶','𐐏'=>'𐐷','𐐐'=>'𐐸','𐐑'=>'𐐹','𐐒'=>'𐐺','𐐓'=>'𐐻','𐐔'=>'𐐼','𐐕'=>'𐐽','𐐖'=>'𐐾','𐐗'=>'𐐿','𐐘'=>'𐑀','𐐙'=>'𐑁','𐐚'=>'𐑂','𐐛'=>'𐑃','𐐜'=>'𐑄','𐐝'=>'𐑅','𐐞'=>'𐑆','𐐟'=>'𐑇','𐐠'=>'𐑈','𐐡'=>'𐑉','𐐢'=>'𐑊','𐐣'=>'𐑋','𐐤'=>'𐑌','𐐥'=>'𐑍','𐐦'=>'𐑎','𐐧'=>'𐑏');
\ No newline at end of file
diff --git a/phpBB/includes/utf/data/case_fold_F.php b/phpBB/includes/utf/data/case_fold_F.php
new file mode 100644
index 0000000000..7e2ffb25ec
--- /dev/null
+++ b/phpBB/includes/utf/data/case_fold_F.php
@@ -0,0 +1 @@
+<?php return array('ß'=>'ss','İ'=>'i̇','ŉ'=>'ʼn','ǰ'=>'ǰ','ΐ'=>'ΐ','ΰ'=>'ΰ','և'=>'եւ','ẖ'=>'ẖ','ẗ'=>'ẗ','ẘ'=>'ẘ','ẙ'=>'ẙ','ẚ'=>'aʾ','ὐ'=>'ὐ','ὒ'=>'ὒ','ὔ'=>'ὔ','ὖ'=>'ὖ','ᾀ'=>'ἀι','ᾁ'=>'ἁι','ᾂ'=>'ἂι','ᾃ'=>'ἃι','ᾄ'=>'ἄι','ᾅ'=>'ἅι','ᾆ'=>'ἆι','ᾇ'=>'ἇι','ᾈ'=>'ἀι','ᾉ'=>'ἁι','ᾊ'=>'ἂι','ᾋ'=>'ἃι','ᾌ'=>'ἄι','ᾍ'=>'ἅι','ᾎ'=>'ἆι','ᾏ'=>'ἇι','ᾐ'=>'ἠι','ᾑ'=>'ἡι','ᾒ'=>'ἢι','ᾓ'=>'ἣι','ᾔ'=>'ἤι','ᾕ'=>'ἥι','ᾖ'=>'ἦι','ᾗ'=>'ἧι','ᾘ'=>'ἠι','ᾙ'=>'ἡι','ᾚ'=>'ἢι','ᾛ'=>'ἣι','ᾜ'=>'ἤι','ᾝ'=>'ἥι','ᾞ'=>'ἦι','ᾟ'=>'ἧι','ᾠ'=>'ὠι','ᾡ'=>'ὡι','ᾢ'=>'ὢι','ᾣ'=>'ὣι','ᾤ'=>'ὤι','ᾥ'=>'ὥι','ᾦ'=>'ὦι','ᾧ'=>'ὧι','ᾨ'=>'ὠι','ᾩ'=>'ὡι','ᾪ'=>'ὢι','ᾫ'=>'ὣι','ᾬ'=>'ὤι','ᾭ'=>'ὥι','ᾮ'=>'ὦι','ᾯ'=>'ὧι','ᾲ'=>'ὰι','ᾳ'=>'αι','ᾴ'=>'άι','ᾶ'=>'ᾶ','ᾷ'=>'ᾶι','ᾼ'=>'αι','ῂ'=>'ὴι','ῃ'=>'ηι','ῄ'=>'ήι','ῆ'=>'ῆ','ῇ'=>'ῆι','ῌ'=>'ηι','ῒ'=>'ῒ','ΐ'=>'ΐ','ῖ'=>'ῖ','ῗ'=>'ῗ','ῢ'=>'ῢ','ΰ'=>'ΰ','ῤ'=>'ῤ','ῦ'=>'ῦ','ῧ'=>'ῧ','ῲ'=>'ὼι','ῳ'=>'ωι','ῴ'=>'ώι','ῶ'=>'ῶ','ῷ'=>'ῶι','ῼ'=>'ωι','ﬀ'=>'ff','ﬁ'=>'fi','ﬂ'=>'fl','ﬃ'=>'ffi','ﬄ'=>'ffl','ﬅ'=>'st','ﬆ'=>'st','ﬓ'=>'մն','ﬔ'=>'մե','ﬕ'=>'մի','ﬖ'=>'վն','ﬗ'=>'մխ');
\ No newline at end of file
diff --git a/phpBB/includes/utf/data/case_fold_S.php b/phpBB/includes/utf/data/case_fold_S.php
new file mode 100644
index 0000000000..5f09ffa1dd
--- /dev/null
+++ b/phpBB/includes/utf/data/case_fold_S.php
@@ -0,0 +1 @@
+<?php return array('ᾈ'=>'ᾀ','ᾉ'=>'ᾁ','ᾊ'=>'ᾂ','ᾋ'=>'ᾃ','ᾌ'=>'ᾄ','ᾍ'=>'ᾅ','ᾎ'=>'ᾆ','ᾏ'=>'ᾇ','ᾘ'=>'ᾐ','ᾙ'=>'ᾑ','ᾚ'=>'ᾒ','ᾛ'=>'ᾓ','ᾜ'=>'ᾔ','ᾝ'=>'ᾕ','ᾞ'=>'ᾖ','ᾟ'=>'ᾗ','ᾨ'=>'ᾠ','ᾩ'=>'ᾡ','ᾪ'=>'ᾢ','ᾫ'=>'ᾣ','ᾬ'=>'ᾤ','ᾭ'=>'ᾥ','ᾮ'=>'ᾦ','ᾯ'=>'ᾧ','ᾼ'=>'ᾳ','ῌ'=>'ῃ','ῼ'=>'ῳ');
\ No newline at end of file
diff --git a/phpBB/includes/utf/utf_tools.php b/phpBB/includes/utf/utf_tools.php
index 930595b36f..4adb1b2952 100644
--- a/phpBB/includes/utf/utf_tools.php
+++ b/phpBB/includes/utf/utf_tools.php
@@ -930,4 +930,48 @@ function utf8_from_unicode($array)
 	return $str;
 }
 
+
+/**
+* Takes an array of ints representing the Unicode characters and returns
+* a UTF-8 string.
+*
+* @param string $text text to be case folded
+* @param string $option determines how we will fold the cases
+* @return string case folded text
+*/
+function utf8_case_fold($text, $option = 'full')
+{
+	static $uniarray = array();
+	global $phpbb_root_path, $phpEx;
+
+	// common is always set
+	if (!isset($uniarray['C']))
+	{
+		$uniarray['C'] = include($phpbb_root_path . 'includes/utf/data/case_fold_C.' . $phpEx);
+	}
+
+	// only set full if we need to
+	if ($option === 'full' && !isset($uniarray['F']))
+	{
+		$uniarray['F'] = include($phpbb_root_path . 'includes/utf/data/case_fold_F.' . $phpEx);
+	}
+
+	// only set simple if we need to
+	if ($option !== 'full' && !isset($uniarray['S']))
+	{
+		$uniarray['S'] = include($phpbb_root_path . 'includes/utf/data/case_fold_S.' . $phpEx);
+	}
+
+	$text = strtr($text, $uniarray['C']);
+	if ($option === 'full')
+	{
+		$text = strtr($text, $uniarray['F']);
+	}
+	else
+	{
+		$text = strtr($text, $uniarray['S']);
+	}
+	return $text;
+}
+
 ?>
\ No newline at end of file
-- 
cgit v1.2.1


From 9a73fb26b29bc79281c12192ae0f303ed40ad1d0 Mon Sep 17 00:00:00 2001
From: Meik Sievertsen <acydburn@phpbb.com>
Date: Sun, 8 Oct 2006 11:21:40 +0000
Subject: filenames and directories are lowercase only

git-svn-id: file:///svn/phpbb/trunk@6466 89ea8834-ac86-4346-8a33-228a782c2dd0
---
 phpBB/includes/utf/data/case_fold_C.php |  1 -
 phpBB/includes/utf/data/case_fold_F.php |  1 -
 phpBB/includes/utf/data/case_fold_S.php |  1 -
 phpBB/includes/utf/data/case_fold_c.php |  1 +
 phpBB/includes/utf/data/case_fold_f.php |  1 +
 phpBB/includes/utf/data/case_fold_s.php |  1 +
 phpBB/includes/utf/utf_tools.php        | 18 +++++++++---------
 7 files changed, 12 insertions(+), 12 deletions(-)
 delete mode 100644 phpBB/includes/utf/data/case_fold_C.php
 delete mode 100644 phpBB/includes/utf/data/case_fold_F.php
 delete mode 100644 phpBB/includes/utf/data/case_fold_S.php
 create mode 100644 phpBB/includes/utf/data/case_fold_c.php
 create mode 100644 phpBB/includes/utf/data/case_fold_f.php
 create mode 100644 phpBB/includes/utf/data/case_fold_s.php

(limited to 'phpBB/includes/utf')

diff --git a/phpBB/includes/utf/data/case_fold_C.php b/phpBB/includes/utf/data/case_fold_C.php
deleted file mode 100644
index 00de1ba349..0000000000
--- a/phpBB/includes/utf/data/case_fold_C.php
+++ /dev/null
@@ -1 +0,0 @@
-<?php return array('A'=>'a','B'=>'b','C'=>'c','D'=>'d','E'=>'e','F'=>'f','G'=>'g','H'=>'h','I'=>'i','J'=>'j','K'=>'k','L'=>'l','M'=>'m','N'=>'n','O'=>'o','P'=>'p','Q'=>'q','R'=>'r','S'=>'s','T'=>'t','U'=>'u','V'=>'v','W'=>'w','X'=>'x','Y'=>'y','Z'=>'z','µ'=>'μ','À'=>'à','Á'=>'á','Â'=>'â','Ã'=>'ã','Ä'=>'ä','Å'=>'å','Æ'=>'æ','Ç'=>'ç','È'=>'è','É'=>'é','Ê'=>'ê','Ë'=>'ë','Ì'=>'ì','Í'=>'í','Î'=>'î','Ï'=>'ï','Ð'=>'ð','Ñ'=>'ñ','Ò'=>'ò','Ó'=>'ó','Ô'=>'ô','Õ'=>'õ','Ö'=>'ö','Ø'=>'ø','Ù'=>'ù','Ú'=>'ú','Û'=>'û','Ü'=>'ü','Ý'=>'ý','Þ'=>'þ','Ā'=>'ā','Ă'=>'ă','Ą'=>'ą','Ć'=>'ć','Ĉ'=>'ĉ','Ċ'=>'ċ','Č'=>'č','Ď'=>'ď','Đ'=>'đ','Ē'=>'ē','Ĕ'=>'ĕ','Ė'=>'ė','Ę'=>'ę','Ě'=>'ě','Ĝ'=>'ĝ','Ğ'=>'ğ','Ġ'=>'ġ','Ģ'=>'ģ','Ĥ'=>'ĥ','Ħ'=>'ħ','Ĩ'=>'ĩ','Ī'=>'ī','Ĭ'=>'ĭ','Į'=>'į','Ĳ'=>'ĳ','Ĵ'=>'ĵ','Ķ'=>'ķ','Ĺ'=>'ĺ','Ļ'=>'ļ','Ľ'=>'ľ','Ŀ'=>'ŀ','Ł'=>'ł','Ń'=>'ń','Ņ'=>'ņ','Ň'=>'ň','Ŋ'=>'ŋ','Ō'=>'ō','Ŏ'=>'ŏ','Ő'=>'ő','Œ'=>'œ','Ŕ'=>'ŕ','Ŗ'=>'ŗ','Ř'=>'ř','Ś'=>'ś','Ŝ'=>'ŝ','Ş'=>'ş','Š'=>'š','Ţ'=>'ţ','Ť'=>'ť','Ŧ'=>'ŧ','Ũ'=>'ũ','Ū'=>'ū','Ŭ'=>'ŭ','Ů'=>'ů','Ű'=>'ű','Ų'=>'ų','Ŵ'=>'ŵ','Ŷ'=>'ŷ','Ÿ'=>'ÿ','Ź'=>'ź','Ż'=>'ż','Ž'=>'ž','ſ'=>'s','Ɓ'=>'ɓ','Ƃ'=>'ƃ','Ƅ'=>'ƅ','Ɔ'=>'ɔ','Ƈ'=>'ƈ','Ɖ'=>'ɖ','Ɗ'=>'ɗ','Ƌ'=>'ƌ','Ǝ'=>'ǝ','Ə'=>'ə','Ɛ'=>'ɛ','Ƒ'=>'ƒ','Ɠ'=>'ɠ','Ɣ'=>'ɣ','Ɩ'=>'ɩ','Ɨ'=>'ɨ','Ƙ'=>'ƙ','Ɯ'=>'ɯ','Ɲ'=>'ɲ','Ɵ'=>'ɵ','Ơ'=>'ơ','Ƣ'=>'ƣ','Ƥ'=>'ƥ','Ʀ'=>'ʀ','Ƨ'=>'ƨ','Ʃ'=>'ʃ','Ƭ'=>'ƭ','Ʈ'=>'ʈ','Ư'=>'ư','Ʊ'=>'ʊ','Ʋ'=>'ʋ','Ƴ'=>'ƴ','Ƶ'=>'ƶ','Ʒ'=>'ʒ','Ƹ'=>'ƹ','Ƽ'=>'ƽ','Ǆ'=>'ǆ','ǅ'=>'ǆ','Ǉ'=>'ǉ','ǈ'=>'ǉ','Ǌ'=>'ǌ','ǋ'=>'ǌ','Ǎ'=>'ǎ','Ǐ'=>'ǐ','Ǒ'=>'ǒ','Ǔ'=>'ǔ','Ǖ'=>'ǖ','Ǘ'=>'ǘ','Ǚ'=>'ǚ','Ǜ'=>'ǜ','Ǟ'=>'ǟ','Ǡ'=>'ǡ','Ǣ'=>'ǣ','Ǥ'=>'ǥ','Ǧ'=>'ǧ','Ǩ'=>'ǩ','Ǫ'=>'ǫ','Ǭ'=>'ǭ','Ǯ'=>'ǯ','Ǳ'=>'ǳ','ǲ'=>'ǳ','Ǵ'=>'ǵ','Ƕ'=>'ƕ','Ƿ'=>'ƿ','Ǹ'=>'ǹ','Ǻ'=>'ǻ','Ǽ'=>'ǽ','Ǿ'=>'ǿ','Ȁ'=>'ȁ','Ȃ'=>'ȃ','Ȅ'=>'ȅ','Ȇ'=>'ȇ','Ȉ'=>'ȉ','Ȋ'=>'ȋ','Ȍ'=>'ȍ','Ȏ'=>'ȏ','Ȑ'=>'ȑ','Ȓ'=>'ȓ','Ȕ'=>'ȕ','Ȗ'=>'ȗ','Ș'=>'ș','Ț'=>'ț','Ȝ'=>'ȝ','Ȟ'=>'ȟ','Ƞ'=>'ƞ','Ȣ'=>'ȣ','Ȥ'=>'ȥ','Ȧ'=>'ȧ','Ȩ'=>'ȩ','Ȫ'=>'ȫ','Ȭ'=>'ȭ','Ȯ'=>'ȯ','Ȱ'=>'ȱ','Ȳ'=>'ȳ','Ⱥ'=>'ⱥ','Ȼ'=>'ȼ','Ƚ'=>'ƚ','Ⱦ'=>'ⱦ','Ɂ'=>'ɂ','Ƀ'=>'ƀ','Ʉ'=>'ʉ','Ʌ'=>'ʌ','Ɇ'=>'ɇ','Ɉ'=>'ɉ','Ɋ'=>'ɋ','Ɍ'=>'ɍ','Ɏ'=>'ɏ','ͅ'=>'ι','Ά'=>'ά','Έ'=>'έ','Ή'=>'ή','Ί'=>'ί','Ό'=>'ό','Ύ'=>'ύ','Ώ'=>'ώ','Α'=>'α','Β'=>'β','Γ'=>'γ','Δ'=>'δ','Ε'=>'ε','Ζ'=>'ζ','Η'=>'η','Θ'=>'θ','Ι'=>'ι','Κ'=>'κ','Λ'=>'λ','Μ'=>'μ','Ν'=>'ν','Ξ'=>'ξ','Ο'=>'ο','Π'=>'π','Ρ'=>'ρ','Σ'=>'σ','Τ'=>'τ','Υ'=>'υ','Φ'=>'φ','Χ'=>'χ','Ψ'=>'ψ','Ω'=>'ω','Ϊ'=>'ϊ','Ϋ'=>'ϋ','ς'=>'σ','ϐ'=>'β','ϑ'=>'θ','ϕ'=>'φ','ϖ'=>'π','Ϙ'=>'ϙ','Ϛ'=>'ϛ','Ϝ'=>'ϝ','Ϟ'=>'ϟ','Ϡ'=>'ϡ','Ϣ'=>'ϣ','Ϥ'=>'ϥ','Ϧ'=>'ϧ','Ϩ'=>'ϩ','Ϫ'=>'ϫ','Ϭ'=>'ϭ','Ϯ'=>'ϯ','ϰ'=>'κ','ϱ'=>'ρ','ϴ'=>'θ','ϵ'=>'ε','Ϸ'=>'ϸ','Ϲ'=>'ϲ','Ϻ'=>'ϻ','Ͻ'=>'ͻ','Ͼ'=>'ͼ','Ͽ'=>'ͽ','Ѐ'=>'ѐ','Ё'=>'ё','Ђ'=>'ђ','Ѓ'=>'ѓ','Є'=>'є','Ѕ'=>'ѕ','І'=>'і','Ї'=>'ї','Ј'=>'ј','Љ'=>'љ','Њ'=>'њ','Ћ'=>'ћ','Ќ'=>'ќ','Ѝ'=>'ѝ','Ў'=>'ў','Џ'=>'џ','А'=>'а','Б'=>'б','В'=>'в','Г'=>'г','Д'=>'д','Е'=>'е','Ж'=>'ж','З'=>'з','И'=>'и','Й'=>'й','К'=>'к','Л'=>'л','М'=>'м','Н'=>'н','О'=>'о','П'=>'п','Р'=>'р','С'=>'с','Т'=>'т','У'=>'у','Ф'=>'ф','Х'=>'х','Ц'=>'ц','Ч'=>'ч','Ш'=>'ш','Щ'=>'щ','Ъ'=>'ъ','Ы'=>'ы','Ь'=>'ь','Э'=>'э','Ю'=>'ю','Я'=>'я','Ѡ'=>'ѡ','Ѣ'=>'ѣ','Ѥ'=>'ѥ','Ѧ'=>'ѧ','Ѩ'=>'ѩ','Ѫ'=>'ѫ','Ѭ'=>'ѭ','Ѯ'=>'ѯ','Ѱ'=>'ѱ','Ѳ'=>'ѳ','Ѵ'=>'ѵ','Ѷ'=>'ѷ','Ѹ'=>'ѹ','Ѻ'=>'ѻ','Ѽ'=>'ѽ','Ѿ'=>'ѿ','Ҁ'=>'ҁ','Ҋ'=>'ҋ','Ҍ'=>'ҍ','Ҏ'=>'ҏ','Ґ'=>'ґ','Ғ'=>'ғ','Ҕ'=>'ҕ','Җ'=>'җ','Ҙ'=>'ҙ','Қ'=>'қ','Ҝ'=>'ҝ','Ҟ'=>'ҟ','Ҡ'=>'ҡ','Ң'=>'ң','Ҥ'=>'ҥ','Ҧ'=>'ҧ','Ҩ'=>'ҩ','Ҫ'=>'ҫ','Ҭ'=>'ҭ','Ү'=>'ү','Ұ'=>'ұ','Ҳ'=>'ҳ','Ҵ'=>'ҵ','Ҷ'=>'ҷ','Ҹ'=>'ҹ','Һ'=>'һ','Ҽ'=>'ҽ','Ҿ'=>'ҿ','Ӏ'=>'ӏ','Ӂ'=>'ӂ','Ӄ'=>'ӄ','Ӆ'=>'ӆ','Ӈ'=>'ӈ','Ӊ'=>'ӊ','Ӌ'=>'ӌ','Ӎ'=>'ӎ','Ӑ'=>'ӑ','Ӓ'=>'ӓ','Ӕ'=>'ӕ','Ӗ'=>'ӗ','Ә'=>'ә','Ӛ'=>'ӛ','Ӝ'=>'ӝ','Ӟ'=>'ӟ','Ӡ'=>'ӡ','Ӣ'=>'ӣ','Ӥ'=>'ӥ','Ӧ'=>'ӧ','Ө'=>'ө','Ӫ'=>'ӫ','Ӭ'=>'ӭ','Ӯ'=>'ӯ','Ӱ'=>'ӱ','Ӳ'=>'ӳ','Ӵ'=>'ӵ','Ӷ'=>'ӷ','Ӹ'=>'ӹ','Ӻ'=>'ӻ','Ӽ'=>'ӽ','Ӿ'=>'ӿ','Ԁ'=>'ԁ','Ԃ'=>'ԃ','Ԅ'=>'ԅ','Ԇ'=>'ԇ','Ԉ'=>'ԉ','Ԋ'=>'ԋ','Ԍ'=>'ԍ','Ԏ'=>'ԏ','Ԑ'=>'ԑ','Ԓ'=>'ԓ','Ա'=>'ա','Բ'=>'բ','Գ'=>'գ','Դ'=>'դ','Ե'=>'ե','Զ'=>'զ','Է'=>'է','Ը'=>'ը','Թ'=>'թ','Ժ'=>'ժ','Ի'=>'ի','Լ'=>'լ','Խ'=>'խ','Ծ'=>'ծ','Կ'=>'կ','Հ'=>'հ','Ձ'=>'ձ','Ղ'=>'ղ','Ճ'=>'ճ','Մ'=>'մ','Յ'=>'յ','Ն'=>'ն','Շ'=>'շ','Ո'=>'ո','Չ'=>'չ','Պ'=>'պ','Ջ'=>'ջ','Ռ'=>'ռ','Ս'=>'ս','Վ'=>'վ','Տ'=>'տ','Ր'=>'ր','Ց'=>'ց','Ւ'=>'ւ','Փ'=>'փ','Ք'=>'ք','Օ'=>'օ','Ֆ'=>'ֆ','Ⴀ'=>'ⴀ','Ⴁ'=>'ⴁ','Ⴂ'=>'ⴂ','Ⴃ'=>'ⴃ','Ⴄ'=>'ⴄ','Ⴅ'=>'ⴅ','Ⴆ'=>'ⴆ','Ⴇ'=>'ⴇ','Ⴈ'=>'ⴈ','Ⴉ'=>'ⴉ','Ⴊ'=>'ⴊ','Ⴋ'=>'ⴋ','Ⴌ'=>'ⴌ','Ⴍ'=>'ⴍ','Ⴎ'=>'ⴎ','Ⴏ'=>'ⴏ','Ⴐ'=>'ⴐ','Ⴑ'=>'ⴑ','Ⴒ'=>'ⴒ','Ⴓ'=>'ⴓ','Ⴔ'=>'ⴔ','Ⴕ'=>'ⴕ','Ⴖ'=>'ⴖ','Ⴗ'=>'ⴗ','Ⴘ'=>'ⴘ','Ⴙ'=>'ⴙ','Ⴚ'=>'ⴚ','Ⴛ'=>'ⴛ','Ⴜ'=>'ⴜ','Ⴝ'=>'ⴝ','Ⴞ'=>'ⴞ','Ⴟ'=>'ⴟ','Ⴠ'=>'ⴠ','Ⴡ'=>'ⴡ','Ⴢ'=>'ⴢ','Ⴣ'=>'ⴣ','Ⴤ'=>'ⴤ','Ⴥ'=>'ⴥ','Ḁ'=>'ḁ','Ḃ'=>'ḃ','Ḅ'=>'ḅ','Ḇ'=>'ḇ','Ḉ'=>'ḉ','Ḋ'=>'ḋ','Ḍ'=>'ḍ','Ḏ'=>'ḏ','Ḑ'=>'ḑ','Ḓ'=>'ḓ','Ḕ'=>'ḕ','Ḗ'=>'ḗ','Ḙ'=>'ḙ','Ḛ'=>'ḛ','Ḝ'=>'ḝ','Ḟ'=>'ḟ','Ḡ'=>'ḡ','Ḣ'=>'ḣ','Ḥ'=>'ḥ','Ḧ'=>'ḧ','Ḩ'=>'ḩ','Ḫ'=>'ḫ','Ḭ'=>'ḭ','Ḯ'=>'ḯ','Ḱ'=>'ḱ','Ḳ'=>'ḳ','Ḵ'=>'ḵ','Ḷ'=>'ḷ','Ḹ'=>'ḹ','Ḻ'=>'ḻ','Ḽ'=>'ḽ','Ḿ'=>'ḿ','Ṁ'=>'ṁ','Ṃ'=>'ṃ','Ṅ'=>'ṅ','Ṇ'=>'ṇ','Ṉ'=>'ṉ','Ṋ'=>'ṋ','Ṍ'=>'ṍ','Ṏ'=>'ṏ','Ṑ'=>'ṑ','Ṓ'=>'ṓ','Ṕ'=>'ṕ','Ṗ'=>'ṗ','Ṙ'=>'ṙ','Ṛ'=>'ṛ','Ṝ'=>'ṝ','Ṟ'=>'ṟ','Ṡ'=>'ṡ','Ṣ'=>'ṣ','Ṥ'=>'ṥ','Ṧ'=>'ṧ','Ṩ'=>'ṩ','Ṫ'=>'ṫ','Ṭ'=>'ṭ','Ṯ'=>'ṯ','Ṱ'=>'ṱ','Ṳ'=>'ṳ','Ṵ'=>'ṵ','Ṷ'=>'ṷ','Ṹ'=>'ṹ','Ṻ'=>'ṻ','Ṽ'=>'ṽ','Ṿ'=>'ṿ','Ẁ'=>'ẁ','Ẃ'=>'ẃ','Ẅ'=>'ẅ','Ẇ'=>'ẇ','Ẉ'=>'ẉ','Ẋ'=>'ẋ','Ẍ'=>'ẍ','Ẏ'=>'ẏ','Ẑ'=>'ẑ','Ẓ'=>'ẓ','Ẕ'=>'ẕ','ẛ'=>'ṡ','Ạ'=>'ạ','Ả'=>'ả','Ấ'=>'ấ','Ầ'=>'ầ','Ẩ'=>'ẩ','Ẫ'=>'ẫ','Ậ'=>'ậ','Ắ'=>'ắ','Ằ'=>'ằ','Ẳ'=>'ẳ','Ẵ'=>'ẵ','Ặ'=>'ặ','Ẹ'=>'ẹ','Ẻ'=>'ẻ','Ẽ'=>'ẽ','Ế'=>'ế','Ề'=>'ề','Ể'=>'ể','Ễ'=>'ễ','Ệ'=>'ệ','Ỉ'=>'ỉ','Ị'=>'ị','Ọ'=>'ọ','Ỏ'=>'ỏ','Ố'=>'ố','Ồ'=>'ồ','Ổ'=>'ổ','Ỗ'=>'ỗ','Ộ'=>'ộ','Ớ'=>'ớ','Ờ'=>'ờ','Ở'=>'ở','Ỡ'=>'ỡ','Ợ'=>'ợ','Ụ'=>'ụ','Ủ'=>'ủ','Ứ'=>'ứ','Ừ'=>'ừ','Ử'=>'ử','Ữ'=>'ữ','Ự'=>'ự','Ỳ'=>'ỳ','Ỵ'=>'ỵ','Ỷ'=>'ỷ','Ỹ'=>'ỹ','Ἀ'=>'ἀ','Ἁ'=>'ἁ','Ἂ'=>'ἂ','Ἃ'=>'ἃ','Ἄ'=>'ἄ','Ἅ'=>'ἅ','Ἆ'=>'ἆ','Ἇ'=>'ἇ','Ἐ'=>'ἐ','Ἑ'=>'ἑ','Ἒ'=>'ἒ','Ἓ'=>'ἓ','Ἔ'=>'ἔ','Ἕ'=>'ἕ','Ἠ'=>'ἠ','Ἡ'=>'ἡ','Ἢ'=>'ἢ','Ἣ'=>'ἣ','Ἤ'=>'ἤ','Ἥ'=>'ἥ','Ἦ'=>'ἦ','Ἧ'=>'ἧ','Ἰ'=>'ἰ','Ἱ'=>'ἱ','Ἲ'=>'ἲ','Ἳ'=>'ἳ','Ἴ'=>'ἴ','Ἵ'=>'ἵ','Ἶ'=>'ἶ','Ἷ'=>'ἷ','Ὀ'=>'ὀ','Ὁ'=>'ὁ','Ὂ'=>'ὂ','Ὃ'=>'ὃ','Ὄ'=>'ὄ','Ὅ'=>'ὅ','Ὑ'=>'ὑ','Ὓ'=>'ὓ','Ὕ'=>'ὕ','Ὗ'=>'ὗ','Ὠ'=>'ὠ','Ὡ'=>'ὡ','Ὢ'=>'ὢ','Ὣ'=>'ὣ','Ὤ'=>'ὤ','Ὥ'=>'ὥ','Ὦ'=>'ὦ','Ὧ'=>'ὧ','Ᾰ'=>'ᾰ','Ᾱ'=>'ᾱ','Ὰ'=>'ὰ','Ά'=>'ά','ι'=>'ι','Ὲ'=>'ὲ','Έ'=>'έ','Ὴ'=>'ὴ','Ή'=>'ή','Ῐ'=>'ῐ','Ῑ'=>'ῑ','Ὶ'=>'ὶ','Ί'=>'ί','Ῠ'=>'ῠ','Ῡ'=>'ῡ','Ὺ'=>'ὺ','Ύ'=>'ύ','Ῥ'=>'ῥ','Ὸ'=>'ὸ','Ό'=>'ό','Ὼ'=>'ὼ','Ώ'=>'ώ','Ω'=>'ω','K'=>'k','Å'=>'å','Ⅎ'=>'ⅎ','Ⅰ'=>'ⅰ','Ⅱ'=>'ⅱ','Ⅲ'=>'ⅲ','Ⅳ'=>'ⅳ','Ⅴ'=>'ⅴ','Ⅵ'=>'ⅵ','Ⅶ'=>'ⅶ','Ⅷ'=>'ⅷ','Ⅸ'=>'ⅸ','Ⅹ'=>'ⅹ','Ⅺ'=>'ⅺ','Ⅻ'=>'ⅻ','Ⅼ'=>'ⅼ','Ⅽ'=>'ⅽ','Ⅾ'=>'ⅾ','Ⅿ'=>'ⅿ','Ↄ'=>'ↄ','Ⓐ'=>'ⓐ','Ⓑ'=>'ⓑ','Ⓒ'=>'ⓒ','Ⓓ'=>'ⓓ','Ⓔ'=>'ⓔ','Ⓕ'=>'ⓕ','Ⓖ'=>'ⓖ','Ⓗ'=>'ⓗ','Ⓘ'=>'ⓘ','Ⓙ'=>'ⓙ','Ⓚ'=>'ⓚ','Ⓛ'=>'ⓛ','Ⓜ'=>'ⓜ','Ⓝ'=>'ⓝ','Ⓞ'=>'ⓞ','Ⓟ'=>'ⓟ','Ⓠ'=>'ⓠ','Ⓡ'=>'ⓡ','Ⓢ'=>'ⓢ','Ⓣ'=>'ⓣ','Ⓤ'=>'ⓤ','Ⓥ'=>'ⓥ','Ⓦ'=>'ⓦ','Ⓧ'=>'ⓧ','Ⓨ'=>'ⓨ','Ⓩ'=>'ⓩ','Ⰰ'=>'ⰰ','Ⰱ'=>'ⰱ','Ⰲ'=>'ⰲ','Ⰳ'=>'ⰳ','Ⰴ'=>'ⰴ','Ⰵ'=>'ⰵ','Ⰶ'=>'ⰶ','Ⰷ'=>'ⰷ','Ⰸ'=>'ⰸ','Ⰹ'=>'ⰹ','Ⰺ'=>'ⰺ','Ⰻ'=>'ⰻ','Ⰼ'=>'ⰼ','Ⰽ'=>'ⰽ','Ⰾ'=>'ⰾ','Ⰿ'=>'ⰿ','Ⱀ'=>'ⱀ','Ⱁ'=>'ⱁ','Ⱂ'=>'ⱂ','Ⱃ'=>'ⱃ','Ⱄ'=>'ⱄ','Ⱅ'=>'ⱅ','Ⱆ'=>'ⱆ','Ⱇ'=>'ⱇ','Ⱈ'=>'ⱈ','Ⱉ'=>'ⱉ','Ⱊ'=>'ⱊ','Ⱋ'=>'ⱋ','Ⱌ'=>'ⱌ','Ⱍ'=>'ⱍ','Ⱎ'=>'ⱎ','Ⱏ'=>'ⱏ','Ⱐ'=>'ⱐ','Ⱑ'=>'ⱑ','Ⱒ'=>'ⱒ','Ⱓ'=>'ⱓ','Ⱔ'=>'ⱔ','Ⱕ'=>'ⱕ','Ⱖ'=>'ⱖ','Ⱗ'=>'ⱗ','Ⱘ'=>'ⱘ','Ⱙ'=>'ⱙ','Ⱚ'=>'ⱚ','Ⱛ'=>'ⱛ','Ⱜ'=>'ⱜ','Ⱝ'=>'ⱝ','Ⱞ'=>'ⱞ','Ⱡ'=>'ⱡ','Ɫ'=>'ɫ','Ᵽ'=>'ᵽ','Ɽ'=>'ɽ','Ⱨ'=>'ⱨ','Ⱪ'=>'ⱪ','Ⱬ'=>'ⱬ','Ⱶ'=>'ⱶ','Ⲁ'=>'ⲁ','Ⲃ'=>'ⲃ','Ⲅ'=>'ⲅ','Ⲇ'=>'ⲇ','Ⲉ'=>'ⲉ','Ⲋ'=>'ⲋ','Ⲍ'=>'ⲍ','Ⲏ'=>'ⲏ','Ⲑ'=>'ⲑ','Ⲓ'=>'ⲓ','Ⲕ'=>'ⲕ','Ⲗ'=>'ⲗ','Ⲙ'=>'ⲙ','Ⲛ'=>'ⲛ','Ⲝ'=>'ⲝ','Ⲟ'=>'ⲟ','Ⲡ'=>'ⲡ','Ⲣ'=>'ⲣ','Ⲥ'=>'ⲥ','Ⲧ'=>'ⲧ','Ⲩ'=>'ⲩ','Ⲫ'=>'ⲫ','Ⲭ'=>'ⲭ','Ⲯ'=>'ⲯ','Ⲱ'=>'ⲱ','Ⲳ'=>'ⲳ','Ⲵ'=>'ⲵ','Ⲷ'=>'ⲷ','Ⲹ'=>'ⲹ','Ⲻ'=>'ⲻ','Ⲽ'=>'ⲽ','Ⲿ'=>'ⲿ','Ⳁ'=>'ⳁ','Ⳃ'=>'ⳃ','Ⳅ'=>'ⳅ','Ⳇ'=>'ⳇ','Ⳉ'=>'ⳉ','Ⳋ'=>'ⳋ','Ⳍ'=>'ⳍ','Ⳏ'=>'ⳏ','Ⳑ'=>'ⳑ','Ⳓ'=>'ⳓ','Ⳕ'=>'ⳕ','Ⳗ'=>'ⳗ','Ⳙ'=>'ⳙ','Ⳛ'=>'ⳛ','Ⳝ'=>'ⳝ','Ⳟ'=>'ⳟ','Ⳡ'=>'ⳡ','Ⳣ'=>'ⳣ','Ａ'=>'ａ','Ｂ'=>'ｂ','Ｃ'=>'ｃ','Ｄ'=>'ｄ','Ｅ'=>'ｅ','Ｆ'=>'ｆ','Ｇ'=>'ｇ','Ｈ'=>'ｈ','Ｉ'=>'ｉ','Ｊ'=>'ｊ','Ｋ'=>'ｋ','Ｌ'=>'ｌ','Ｍ'=>'ｍ','Ｎ'=>'ｎ','Ｏ'=>'ｏ','Ｐ'=>'ｐ','Ｑ'=>'ｑ','Ｒ'=>'ｒ','Ｓ'=>'ｓ','Ｔ'=>'ｔ','Ｕ'=>'ｕ','Ｖ'=>'ｖ','Ｗ'=>'ｗ','Ｘ'=>'ｘ','Ｙ'=>'ｙ','Ｚ'=>'ｚ','𐐀'=>'𐐨','𐐁'=>'𐐩','𐐂'=>'𐐪','𐐃'=>'𐐫','𐐄'=>'𐐬','𐐅'=>'𐐭','𐐆'=>'𐐮','𐐇'=>'𐐯','𐐈'=>'𐐰','𐐉'=>'𐐱','𐐊'=>'𐐲','𐐋'=>'𐐳','𐐌'=>'𐐴','𐐍'=>'𐐵','𐐎'=>'𐐶','𐐏'=>'𐐷','𐐐'=>'𐐸','𐐑'=>'𐐹','𐐒'=>'𐐺','𐐓'=>'𐐻','𐐔'=>'𐐼','𐐕'=>'𐐽','𐐖'=>'𐐾','𐐗'=>'𐐿','𐐘'=>'𐑀','𐐙'=>'𐑁','𐐚'=>'𐑂','𐐛'=>'𐑃','𐐜'=>'𐑄','𐐝'=>'𐑅','𐐞'=>'𐑆','𐐟'=>'𐑇','𐐠'=>'𐑈','𐐡'=>'𐑉','𐐢'=>'𐑊','𐐣'=>'𐑋','𐐤'=>'𐑌','𐐥'=>'𐑍','𐐦'=>'𐑎','𐐧'=>'𐑏');
\ No newline at end of file
diff --git a/phpBB/includes/utf/data/case_fold_F.php b/phpBB/includes/utf/data/case_fold_F.php
deleted file mode 100644
index 7e2ffb25ec..0000000000
--- a/phpBB/includes/utf/data/case_fold_F.php
+++ /dev/null
@@ -1 +0,0 @@
-<?php return array('ß'=>'ss','İ'=>'i̇','ŉ'=>'ʼn','ǰ'=>'ǰ','ΐ'=>'ΐ','ΰ'=>'ΰ','և'=>'եւ','ẖ'=>'ẖ','ẗ'=>'ẗ','ẘ'=>'ẘ','ẙ'=>'ẙ','ẚ'=>'aʾ','ὐ'=>'ὐ','ὒ'=>'ὒ','ὔ'=>'ὔ','ὖ'=>'ὖ','ᾀ'=>'ἀι','ᾁ'=>'ἁι','ᾂ'=>'ἂι','ᾃ'=>'ἃι','ᾄ'=>'ἄι','ᾅ'=>'ἅι','ᾆ'=>'ἆι','ᾇ'=>'ἇι','ᾈ'=>'ἀι','ᾉ'=>'ἁι','ᾊ'=>'ἂι','ᾋ'=>'ἃι','ᾌ'=>'ἄι','ᾍ'=>'ἅι','ᾎ'=>'ἆι','ᾏ'=>'ἇι','ᾐ'=>'ἠι','ᾑ'=>'ἡι','ᾒ'=>'ἢι','ᾓ'=>'ἣι','ᾔ'=>'ἤι','ᾕ'=>'ἥι','ᾖ'=>'ἦι','ᾗ'=>'ἧι','ᾘ'=>'ἠι','ᾙ'=>'ἡι','ᾚ'=>'ἢι','ᾛ'=>'ἣι','ᾜ'=>'ἤι','ᾝ'=>'ἥι','ᾞ'=>'ἦι','ᾟ'=>'ἧι','ᾠ'=>'ὠι','ᾡ'=>'ὡι','ᾢ'=>'ὢι','ᾣ'=>'ὣι','ᾤ'=>'ὤι','ᾥ'=>'ὥι','ᾦ'=>'ὦι','ᾧ'=>'ὧι','ᾨ'=>'ὠι','ᾩ'=>'ὡι','ᾪ'=>'ὢι','ᾫ'=>'ὣι','ᾬ'=>'ὤι','ᾭ'=>'ὥι','ᾮ'=>'ὦι','ᾯ'=>'ὧι','ᾲ'=>'ὰι','ᾳ'=>'αι','ᾴ'=>'άι','ᾶ'=>'ᾶ','ᾷ'=>'ᾶι','ᾼ'=>'αι','ῂ'=>'ὴι','ῃ'=>'ηι','ῄ'=>'ήι','ῆ'=>'ῆ','ῇ'=>'ῆι','ῌ'=>'ηι','ῒ'=>'ῒ','ΐ'=>'ΐ','ῖ'=>'ῖ','ῗ'=>'ῗ','ῢ'=>'ῢ','ΰ'=>'ΰ','ῤ'=>'ῤ','ῦ'=>'ῦ','ῧ'=>'ῧ','ῲ'=>'ὼι','ῳ'=>'ωι','ῴ'=>'ώι','ῶ'=>'ῶ','ῷ'=>'ῶι','ῼ'=>'ωι','ﬀ'=>'ff','ﬁ'=>'fi','ﬂ'=>'fl','ﬃ'=>'ffi','ﬄ'=>'ffl','ﬅ'=>'st','ﬆ'=>'st','ﬓ'=>'մն','ﬔ'=>'մե','ﬕ'=>'մի','ﬖ'=>'վն','ﬗ'=>'մխ');
\ No newline at end of file
diff --git a/phpBB/includes/utf/data/case_fold_S.php b/phpBB/includes/utf/data/case_fold_S.php
deleted file mode 100644
index 5f09ffa1dd..0000000000
--- a/phpBB/includes/utf/data/case_fold_S.php
+++ /dev/null
@@ -1 +0,0 @@
-<?php return array('ᾈ'=>'ᾀ','ᾉ'=>'ᾁ','ᾊ'=>'ᾂ','ᾋ'=>'ᾃ','ᾌ'=>'ᾄ','ᾍ'=>'ᾅ','ᾎ'=>'ᾆ','ᾏ'=>'ᾇ','ᾘ'=>'ᾐ','ᾙ'=>'ᾑ','ᾚ'=>'ᾒ','ᾛ'=>'ᾓ','ᾜ'=>'ᾔ','ᾝ'=>'ᾕ','ᾞ'=>'ᾖ','ᾟ'=>'ᾗ','ᾨ'=>'ᾠ','ᾩ'=>'ᾡ','ᾪ'=>'ᾢ','ᾫ'=>'ᾣ','ᾬ'=>'ᾤ','ᾭ'=>'ᾥ','ᾮ'=>'ᾦ','ᾯ'=>'ᾧ','ᾼ'=>'ᾳ','ῌ'=>'ῃ','ῼ'=>'ῳ');
\ No newline at end of file
diff --git a/phpBB/includes/utf/data/case_fold_c.php b/phpBB/includes/utf/data/case_fold_c.php
new file mode 100644
index 0000000000..00de1ba349
--- /dev/null
+++ b/phpBB/includes/utf/data/case_fold_c.php
@@ -0,0 +1 @@
+<?php return array('A'=>'a','B'=>'b','C'=>'c','D'=>'d','E'=>'e','F'=>'f','G'=>'g','H'=>'h','I'=>'i','J'=>'j','K'=>'k','L'=>'l','M'=>'m','N'=>'n','O'=>'o','P'=>'p','Q'=>'q','R'=>'r','S'=>'s','T'=>'t','U'=>'u','V'=>'v','W'=>'w','X'=>'x','Y'=>'y','Z'=>'z','µ'=>'μ','À'=>'à','Á'=>'á','Â'=>'â','Ã'=>'ã','Ä'=>'ä','Å'=>'å','Æ'=>'æ','Ç'=>'ç','È'=>'è','É'=>'é','Ê'=>'ê','Ë'=>'ë','Ì'=>'ì','Í'=>'í','Î'=>'î','Ï'=>'ï','Ð'=>'ð','Ñ'=>'ñ','Ò'=>'ò','Ó'=>'ó','Ô'=>'ô','Õ'=>'õ','Ö'=>'ö','Ø'=>'ø','Ù'=>'ù','Ú'=>'ú','Û'=>'û','Ü'=>'ü','Ý'=>'ý','Þ'=>'þ','Ā'=>'ā','Ă'=>'ă','Ą'=>'ą','Ć'=>'ć','Ĉ'=>'ĉ','Ċ'=>'ċ','Č'=>'č','Ď'=>'ď','Đ'=>'đ','Ē'=>'ē','Ĕ'=>'ĕ','Ė'=>'ė','Ę'=>'ę','Ě'=>'ě','Ĝ'=>'ĝ','Ğ'=>'ğ','Ġ'=>'ġ','Ģ'=>'ģ','Ĥ'=>'ĥ','Ħ'=>'ħ','Ĩ'=>'ĩ','Ī'=>'ī','Ĭ'=>'ĭ','Į'=>'į','Ĳ'=>'ĳ','Ĵ'=>'ĵ','Ķ'=>'ķ','Ĺ'=>'ĺ','Ļ'=>'ļ','Ľ'=>'ľ','Ŀ'=>'ŀ','Ł'=>'ł','Ń'=>'ń','Ņ'=>'ņ','Ň'=>'ň','Ŋ'=>'ŋ','Ō'=>'ō','Ŏ'=>'ŏ','Ő'=>'ő','Œ'=>'œ','Ŕ'=>'ŕ','Ŗ'=>'ŗ','Ř'=>'ř','Ś'=>'ś','Ŝ'=>'ŝ','Ş'=>'ş','Š'=>'š','Ţ'=>'ţ','Ť'=>'ť','Ŧ'=>'ŧ','Ũ'=>'ũ','Ū'=>'ū','Ŭ'=>'ŭ','Ů'=>'ů','Ű'=>'ű','Ų'=>'ų','Ŵ'=>'ŵ','Ŷ'=>'ŷ','Ÿ'=>'ÿ','Ź'=>'ź','Ż'=>'ż','Ž'=>'ž','ſ'=>'s','Ɓ'=>'ɓ','Ƃ'=>'ƃ','Ƅ'=>'ƅ','Ɔ'=>'ɔ','Ƈ'=>'ƈ','Ɖ'=>'ɖ','Ɗ'=>'ɗ','Ƌ'=>'ƌ','Ǝ'=>'ǝ','Ə'=>'ə','Ɛ'=>'ɛ','Ƒ'=>'ƒ','Ɠ'=>'ɠ','Ɣ'=>'ɣ','Ɩ'=>'ɩ','Ɨ'=>'ɨ','Ƙ'=>'ƙ','Ɯ'=>'ɯ','Ɲ'=>'ɲ','Ɵ'=>'ɵ','Ơ'=>'ơ','Ƣ'=>'ƣ','Ƥ'=>'ƥ','Ʀ'=>'ʀ','Ƨ'=>'ƨ','Ʃ'=>'ʃ','Ƭ'=>'ƭ','Ʈ'=>'ʈ','Ư'=>'ư','Ʊ'=>'ʊ','Ʋ'=>'ʋ','Ƴ'=>'ƴ','Ƶ'=>'ƶ','Ʒ'=>'ʒ','Ƹ'=>'ƹ','Ƽ'=>'ƽ','Ǆ'=>'ǆ','ǅ'=>'ǆ','Ǉ'=>'ǉ','ǈ'=>'ǉ','Ǌ'=>'ǌ','ǋ'=>'ǌ','Ǎ'=>'ǎ','Ǐ'=>'ǐ','Ǒ'=>'ǒ','Ǔ'=>'ǔ','Ǖ'=>'ǖ','Ǘ'=>'ǘ','Ǚ'=>'ǚ','Ǜ'=>'ǜ','Ǟ'=>'ǟ','Ǡ'=>'ǡ','Ǣ'=>'ǣ','Ǥ'=>'ǥ','Ǧ'=>'ǧ','Ǩ'=>'ǩ','Ǫ'=>'ǫ','Ǭ'=>'ǭ','Ǯ'=>'ǯ','Ǳ'=>'ǳ','ǲ'=>'ǳ','Ǵ'=>'ǵ','Ƕ'=>'ƕ','Ƿ'=>'ƿ','Ǹ'=>'ǹ','Ǻ'=>'ǻ','Ǽ'=>'ǽ','Ǿ'=>'ǿ','Ȁ'=>'ȁ','Ȃ'=>'ȃ','Ȅ'=>'ȅ','Ȇ'=>'ȇ','Ȉ'=>'ȉ','Ȋ'=>'ȋ','Ȍ'=>'ȍ','Ȏ'=>'ȏ','Ȑ'=>'ȑ','Ȓ'=>'ȓ','Ȕ'=>'ȕ','Ȗ'=>'ȗ','Ș'=>'ș','Ț'=>'ț','Ȝ'=>'ȝ','Ȟ'=>'ȟ','Ƞ'=>'ƞ','Ȣ'=>'ȣ','Ȥ'=>'ȥ','Ȧ'=>'ȧ','Ȩ'=>'ȩ','Ȫ'=>'ȫ','Ȭ'=>'ȭ','Ȯ'=>'ȯ','Ȱ'=>'ȱ','Ȳ'=>'ȳ','Ⱥ'=>'ⱥ','Ȼ'=>'ȼ','Ƚ'=>'ƚ','Ⱦ'=>'ⱦ','Ɂ'=>'ɂ','Ƀ'=>'ƀ','Ʉ'=>'ʉ','Ʌ'=>'ʌ','Ɇ'=>'ɇ','Ɉ'=>'ɉ','Ɋ'=>'ɋ','Ɍ'=>'ɍ','Ɏ'=>'ɏ','ͅ'=>'ι','Ά'=>'ά','Έ'=>'έ','Ή'=>'ή','Ί'=>'ί','Ό'=>'ό','Ύ'=>'ύ','Ώ'=>'ώ','Α'=>'α','Β'=>'β','Γ'=>'γ','Δ'=>'δ','Ε'=>'ε','Ζ'=>'ζ','Η'=>'η','Θ'=>'θ','Ι'=>'ι','Κ'=>'κ','Λ'=>'λ','Μ'=>'μ','Ν'=>'ν','Ξ'=>'ξ','Ο'=>'ο','Π'=>'π','Ρ'=>'ρ','Σ'=>'σ','Τ'=>'τ','Υ'=>'υ','Φ'=>'φ','Χ'=>'χ','Ψ'=>'ψ','Ω'=>'ω','Ϊ'=>'ϊ','Ϋ'=>'ϋ','ς'=>'σ','ϐ'=>'β','ϑ'=>'θ','ϕ'=>'φ','ϖ'=>'π','Ϙ'=>'ϙ','Ϛ'=>'ϛ','Ϝ'=>'ϝ','Ϟ'=>'ϟ','Ϡ'=>'ϡ','Ϣ'=>'ϣ','Ϥ'=>'ϥ','Ϧ'=>'ϧ','Ϩ'=>'ϩ','Ϫ'=>'ϫ','Ϭ'=>'ϭ','Ϯ'=>'ϯ','ϰ'=>'κ','ϱ'=>'ρ','ϴ'=>'θ','ϵ'=>'ε','Ϸ'=>'ϸ','Ϲ'=>'ϲ','Ϻ'=>'ϻ','Ͻ'=>'ͻ','Ͼ'=>'ͼ','Ͽ'=>'ͽ','Ѐ'=>'ѐ','Ё'=>'ё','Ђ'=>'ђ','Ѓ'=>'ѓ','Є'=>'є','Ѕ'=>'ѕ','І'=>'і','Ї'=>'ї','Ј'=>'ј','Љ'=>'љ','Њ'=>'њ','Ћ'=>'ћ','Ќ'=>'ќ','Ѝ'=>'ѝ','Ў'=>'ў','Џ'=>'џ','А'=>'а','Б'=>'б','В'=>'в','Г'=>'г','Д'=>'д','Е'=>'е','Ж'=>'ж','З'=>'з','И'=>'и','Й'=>'й','К'=>'к','Л'=>'л','М'=>'м','Н'=>'н','О'=>'о','П'=>'п','Р'=>'р','С'=>'с','Т'=>'т','У'=>'у','Ф'=>'ф','Х'=>'х','Ц'=>'ц','Ч'=>'ч','Ш'=>'ш','Щ'=>'щ','Ъ'=>'ъ','Ы'=>'ы','Ь'=>'ь','Э'=>'э','Ю'=>'ю','Я'=>'я','Ѡ'=>'ѡ','Ѣ'=>'ѣ','Ѥ'=>'ѥ','Ѧ'=>'ѧ','Ѩ'=>'ѩ','Ѫ'=>'ѫ','Ѭ'=>'ѭ','Ѯ'=>'ѯ','Ѱ'=>'ѱ','Ѳ'=>'ѳ','Ѵ'=>'ѵ','Ѷ'=>'ѷ','Ѹ'=>'ѹ','Ѻ'=>'ѻ','Ѽ'=>'ѽ','Ѿ'=>'ѿ','Ҁ'=>'ҁ','Ҋ'=>'ҋ','Ҍ'=>'ҍ','Ҏ'=>'ҏ','Ґ'=>'ґ','Ғ'=>'ғ','Ҕ'=>'ҕ','Җ'=>'җ','Ҙ'=>'ҙ','Қ'=>'қ','Ҝ'=>'ҝ','Ҟ'=>'ҟ','Ҡ'=>'ҡ','Ң'=>'ң','Ҥ'=>'ҥ','Ҧ'=>'ҧ','Ҩ'=>'ҩ','Ҫ'=>'ҫ','Ҭ'=>'ҭ','Ү'=>'ү','Ұ'=>'ұ','Ҳ'=>'ҳ','Ҵ'=>'ҵ','Ҷ'=>'ҷ','Ҹ'=>'ҹ','Һ'=>'һ','Ҽ'=>'ҽ','Ҿ'=>'ҿ','Ӏ'=>'ӏ','Ӂ'=>'ӂ','Ӄ'=>'ӄ','Ӆ'=>'ӆ','Ӈ'=>'ӈ','Ӊ'=>'ӊ','Ӌ'=>'ӌ','Ӎ'=>'ӎ','Ӑ'=>'ӑ','Ӓ'=>'ӓ','Ӕ'=>'ӕ','Ӗ'=>'ӗ','Ә'=>'ә','Ӛ'=>'ӛ','Ӝ'=>'ӝ','Ӟ'=>'ӟ','Ӡ'=>'ӡ','Ӣ'=>'ӣ','Ӥ'=>'ӥ','Ӧ'=>'ӧ','Ө'=>'ө','Ӫ'=>'ӫ','Ӭ'=>'ӭ','Ӯ'=>'ӯ','Ӱ'=>'ӱ','Ӳ'=>'ӳ','Ӵ'=>'ӵ','Ӷ'=>'ӷ','Ӹ'=>'ӹ','Ӻ'=>'ӻ','Ӽ'=>'ӽ','Ӿ'=>'ӿ','Ԁ'=>'ԁ','Ԃ'=>'ԃ','Ԅ'=>'ԅ','Ԇ'=>'ԇ','Ԉ'=>'ԉ','Ԋ'=>'ԋ','Ԍ'=>'ԍ','Ԏ'=>'ԏ','Ԑ'=>'ԑ','Ԓ'=>'ԓ','Ա'=>'ա','Բ'=>'բ','Գ'=>'գ','Դ'=>'դ','Ե'=>'ե','Զ'=>'զ','Է'=>'է','Ը'=>'ը','Թ'=>'թ','Ժ'=>'ժ','Ի'=>'ի','Լ'=>'լ','Խ'=>'խ','Ծ'=>'ծ','Կ'=>'կ','Հ'=>'հ','Ձ'=>'ձ','Ղ'=>'ղ','Ճ'=>'ճ','Մ'=>'մ','Յ'=>'յ','Ն'=>'ն','Շ'=>'շ','Ո'=>'ո','Չ'=>'չ','Պ'=>'պ','Ջ'=>'ջ','Ռ'=>'ռ','Ս'=>'ս','Վ'=>'վ','Տ'=>'տ','Ր'=>'ր','Ց'=>'ց','Ւ'=>'ւ','Փ'=>'փ','Ք'=>'ք','Օ'=>'օ','Ֆ'=>'ֆ','Ⴀ'=>'ⴀ','Ⴁ'=>'ⴁ','Ⴂ'=>'ⴂ','Ⴃ'=>'ⴃ','Ⴄ'=>'ⴄ','Ⴅ'=>'ⴅ','Ⴆ'=>'ⴆ','Ⴇ'=>'ⴇ','Ⴈ'=>'ⴈ','Ⴉ'=>'ⴉ','Ⴊ'=>'ⴊ','Ⴋ'=>'ⴋ','Ⴌ'=>'ⴌ','Ⴍ'=>'ⴍ','Ⴎ'=>'ⴎ','Ⴏ'=>'ⴏ','Ⴐ'=>'ⴐ','Ⴑ'=>'ⴑ','Ⴒ'=>'ⴒ','Ⴓ'=>'ⴓ','Ⴔ'=>'ⴔ','Ⴕ'=>'ⴕ','Ⴖ'=>'ⴖ','Ⴗ'=>'ⴗ','Ⴘ'=>'ⴘ','Ⴙ'=>'ⴙ','Ⴚ'=>'ⴚ','Ⴛ'=>'ⴛ','Ⴜ'=>'ⴜ','Ⴝ'=>'ⴝ','Ⴞ'=>'ⴞ','Ⴟ'=>'ⴟ','Ⴠ'=>'ⴠ','Ⴡ'=>'ⴡ','Ⴢ'=>'ⴢ','Ⴣ'=>'ⴣ','Ⴤ'=>'ⴤ','Ⴥ'=>'ⴥ','Ḁ'=>'ḁ','Ḃ'=>'ḃ','Ḅ'=>'ḅ','Ḇ'=>'ḇ','Ḉ'=>'ḉ','Ḋ'=>'ḋ','Ḍ'=>'ḍ','Ḏ'=>'ḏ','Ḑ'=>'ḑ','Ḓ'=>'ḓ','Ḕ'=>'ḕ','Ḗ'=>'ḗ','Ḙ'=>'ḙ','Ḛ'=>'ḛ','Ḝ'=>'ḝ','Ḟ'=>'ḟ','Ḡ'=>'ḡ','Ḣ'=>'ḣ','Ḥ'=>'ḥ','Ḧ'=>'ḧ','Ḩ'=>'ḩ','Ḫ'=>'ḫ','Ḭ'=>'ḭ','Ḯ'=>'ḯ','Ḱ'=>'ḱ','Ḳ'=>'ḳ','Ḵ'=>'ḵ','Ḷ'=>'ḷ','Ḹ'=>'ḹ','Ḻ'=>'ḻ','Ḽ'=>'ḽ','Ḿ'=>'ḿ','Ṁ'=>'ṁ','Ṃ'=>'ṃ','Ṅ'=>'ṅ','Ṇ'=>'ṇ','Ṉ'=>'ṉ','Ṋ'=>'ṋ','Ṍ'=>'ṍ','Ṏ'=>'ṏ','Ṑ'=>'ṑ','Ṓ'=>'ṓ','Ṕ'=>'ṕ','Ṗ'=>'ṗ','Ṙ'=>'ṙ','Ṛ'=>'ṛ','Ṝ'=>'ṝ','Ṟ'=>'ṟ','Ṡ'=>'ṡ','Ṣ'=>'ṣ','Ṥ'=>'ṥ','Ṧ'=>'ṧ','Ṩ'=>'ṩ','Ṫ'=>'ṫ','Ṭ'=>'ṭ','Ṯ'=>'ṯ','Ṱ'=>'ṱ','Ṳ'=>'ṳ','Ṵ'=>'ṵ','Ṷ'=>'ṷ','Ṹ'=>'ṹ','Ṻ'=>'ṻ','Ṽ'=>'ṽ','Ṿ'=>'ṿ','Ẁ'=>'ẁ','Ẃ'=>'ẃ','Ẅ'=>'ẅ','Ẇ'=>'ẇ','Ẉ'=>'ẉ','Ẋ'=>'ẋ','Ẍ'=>'ẍ','Ẏ'=>'ẏ','Ẑ'=>'ẑ','Ẓ'=>'ẓ','Ẕ'=>'ẕ','ẛ'=>'ṡ','Ạ'=>'ạ','Ả'=>'ả','Ấ'=>'ấ','Ầ'=>'ầ','Ẩ'=>'ẩ','Ẫ'=>'ẫ','Ậ'=>'ậ','Ắ'=>'ắ','Ằ'=>'ằ','Ẳ'=>'ẳ','Ẵ'=>'ẵ','Ặ'=>'ặ','Ẹ'=>'ẹ','Ẻ'=>'ẻ','Ẽ'=>'ẽ','Ế'=>'ế','Ề'=>'ề','Ể'=>'ể','Ễ'=>'ễ','Ệ'=>'ệ','Ỉ'=>'ỉ','Ị'=>'ị','Ọ'=>'ọ','Ỏ'=>'ỏ','Ố'=>'ố','Ồ'=>'ồ','Ổ'=>'ổ','Ỗ'=>'ỗ','Ộ'=>'ộ','Ớ'=>'ớ','Ờ'=>'ờ','Ở'=>'ở','Ỡ'=>'ỡ','Ợ'=>'ợ','Ụ'=>'ụ','Ủ'=>'ủ','Ứ'=>'ứ','Ừ'=>'ừ','Ử'=>'ử','Ữ'=>'ữ','Ự'=>'ự','Ỳ'=>'ỳ','Ỵ'=>'ỵ','Ỷ'=>'ỷ','Ỹ'=>'ỹ','Ἀ'=>'ἀ','Ἁ'=>'ἁ','Ἂ'=>'ἂ','Ἃ'=>'ἃ','Ἄ'=>'ἄ','Ἅ'=>'ἅ','Ἆ'=>'ἆ','Ἇ'=>'ἇ','Ἐ'=>'ἐ','Ἑ'=>'ἑ','Ἒ'=>'ἒ','Ἓ'=>'ἓ','Ἔ'=>'ἔ','Ἕ'=>'ἕ','Ἠ'=>'ἠ','Ἡ'=>'ἡ','Ἢ'=>'ἢ','Ἣ'=>'ἣ','Ἤ'=>'ἤ','Ἥ'=>'ἥ','Ἦ'=>'ἦ','Ἧ'=>'ἧ','Ἰ'=>'ἰ','Ἱ'=>'ἱ','Ἲ'=>'ἲ','Ἳ'=>'ἳ','Ἴ'=>'ἴ','Ἵ'=>'ἵ','Ἶ'=>'ἶ','Ἷ'=>'ἷ','Ὀ'=>'ὀ','Ὁ'=>'ὁ','Ὂ'=>'ὂ','Ὃ'=>'ὃ','Ὄ'=>'ὄ','Ὅ'=>'ὅ','Ὑ'=>'ὑ','Ὓ'=>'ὓ','Ὕ'=>'ὕ','Ὗ'=>'ὗ','Ὠ'=>'ὠ','Ὡ'=>'ὡ','Ὢ'=>'ὢ','Ὣ'=>'ὣ','Ὤ'=>'ὤ','Ὥ'=>'ὥ','Ὦ'=>'ὦ','Ὧ'=>'ὧ','Ᾰ'=>'ᾰ','Ᾱ'=>'ᾱ','Ὰ'=>'ὰ','Ά'=>'ά','ι'=>'ι','Ὲ'=>'ὲ','Έ'=>'έ','Ὴ'=>'ὴ','Ή'=>'ή','Ῐ'=>'ῐ','Ῑ'=>'ῑ','Ὶ'=>'ὶ','Ί'=>'ί','Ῠ'=>'ῠ','Ῡ'=>'ῡ','Ὺ'=>'ὺ','Ύ'=>'ύ','Ῥ'=>'ῥ','Ὸ'=>'ὸ','Ό'=>'ό','Ὼ'=>'ὼ','Ώ'=>'ώ','Ω'=>'ω','K'=>'k','Å'=>'å','Ⅎ'=>'ⅎ','Ⅰ'=>'ⅰ','Ⅱ'=>'ⅱ','Ⅲ'=>'ⅲ','Ⅳ'=>'ⅳ','Ⅴ'=>'ⅴ','Ⅵ'=>'ⅵ','Ⅶ'=>'ⅶ','Ⅷ'=>'ⅷ','Ⅸ'=>'ⅸ','Ⅹ'=>'ⅹ','Ⅺ'=>'ⅺ','Ⅻ'=>'ⅻ','Ⅼ'=>'ⅼ','Ⅽ'=>'ⅽ','Ⅾ'=>'ⅾ','Ⅿ'=>'ⅿ','Ↄ'=>'ↄ','Ⓐ'=>'ⓐ','Ⓑ'=>'ⓑ','Ⓒ'=>'ⓒ','Ⓓ'=>'ⓓ','Ⓔ'=>'ⓔ','Ⓕ'=>'ⓕ','Ⓖ'=>'ⓖ','Ⓗ'=>'ⓗ','Ⓘ'=>'ⓘ','Ⓙ'=>'ⓙ','Ⓚ'=>'ⓚ','Ⓛ'=>'ⓛ','Ⓜ'=>'ⓜ','Ⓝ'=>'ⓝ','Ⓞ'=>'ⓞ','Ⓟ'=>'ⓟ','Ⓠ'=>'ⓠ','Ⓡ'=>'ⓡ','Ⓢ'=>'ⓢ','Ⓣ'=>'ⓣ','Ⓤ'=>'ⓤ','Ⓥ'=>'ⓥ','Ⓦ'=>'ⓦ','Ⓧ'=>'ⓧ','Ⓨ'=>'ⓨ','Ⓩ'=>'ⓩ','Ⰰ'=>'ⰰ','Ⰱ'=>'ⰱ','Ⰲ'=>'ⰲ','Ⰳ'=>'ⰳ','Ⰴ'=>'ⰴ','Ⰵ'=>'ⰵ','Ⰶ'=>'ⰶ','Ⰷ'=>'ⰷ','Ⰸ'=>'ⰸ','Ⰹ'=>'ⰹ','Ⰺ'=>'ⰺ','Ⰻ'=>'ⰻ','Ⰼ'=>'ⰼ','Ⰽ'=>'ⰽ','Ⰾ'=>'ⰾ','Ⰿ'=>'ⰿ','Ⱀ'=>'ⱀ','Ⱁ'=>'ⱁ','Ⱂ'=>'ⱂ','Ⱃ'=>'ⱃ','Ⱄ'=>'ⱄ','Ⱅ'=>'ⱅ','Ⱆ'=>'ⱆ','Ⱇ'=>'ⱇ','Ⱈ'=>'ⱈ','Ⱉ'=>'ⱉ','Ⱊ'=>'ⱊ','Ⱋ'=>'ⱋ','Ⱌ'=>'ⱌ','Ⱍ'=>'ⱍ','Ⱎ'=>'ⱎ','Ⱏ'=>'ⱏ','Ⱐ'=>'ⱐ','Ⱑ'=>'ⱑ','Ⱒ'=>'ⱒ','Ⱓ'=>'ⱓ','Ⱔ'=>'ⱔ','Ⱕ'=>'ⱕ','Ⱖ'=>'ⱖ','Ⱗ'=>'ⱗ','Ⱘ'=>'ⱘ','Ⱙ'=>'ⱙ','Ⱚ'=>'ⱚ','Ⱛ'=>'ⱛ','Ⱜ'=>'ⱜ','Ⱝ'=>'ⱝ','Ⱞ'=>'ⱞ','Ⱡ'=>'ⱡ','Ɫ'=>'ɫ','Ᵽ'=>'ᵽ','Ɽ'=>'ɽ','Ⱨ'=>'ⱨ','Ⱪ'=>'ⱪ','Ⱬ'=>'ⱬ','Ⱶ'=>'ⱶ','Ⲁ'=>'ⲁ','Ⲃ'=>'ⲃ','Ⲅ'=>'ⲅ','Ⲇ'=>'ⲇ','Ⲉ'=>'ⲉ','Ⲋ'=>'ⲋ','Ⲍ'=>'ⲍ','Ⲏ'=>'ⲏ','Ⲑ'=>'ⲑ','Ⲓ'=>'ⲓ','Ⲕ'=>'ⲕ','Ⲗ'=>'ⲗ','Ⲙ'=>'ⲙ','Ⲛ'=>'ⲛ','Ⲝ'=>'ⲝ','Ⲟ'=>'ⲟ','Ⲡ'=>'ⲡ','Ⲣ'=>'ⲣ','Ⲥ'=>'ⲥ','Ⲧ'=>'ⲧ','Ⲩ'=>'ⲩ','Ⲫ'=>'ⲫ','Ⲭ'=>'ⲭ','Ⲯ'=>'ⲯ','Ⲱ'=>'ⲱ','Ⲳ'=>'ⲳ','Ⲵ'=>'ⲵ','Ⲷ'=>'ⲷ','Ⲹ'=>'ⲹ','Ⲻ'=>'ⲻ','Ⲽ'=>'ⲽ','Ⲿ'=>'ⲿ','Ⳁ'=>'ⳁ','Ⳃ'=>'ⳃ','Ⳅ'=>'ⳅ','Ⳇ'=>'ⳇ','Ⳉ'=>'ⳉ','Ⳋ'=>'ⳋ','Ⳍ'=>'ⳍ','Ⳏ'=>'ⳏ','Ⳑ'=>'ⳑ','Ⳓ'=>'ⳓ','Ⳕ'=>'ⳕ','Ⳗ'=>'ⳗ','Ⳙ'=>'ⳙ','Ⳛ'=>'ⳛ','Ⳝ'=>'ⳝ','Ⳟ'=>'ⳟ','Ⳡ'=>'ⳡ','Ⳣ'=>'ⳣ','Ａ'=>'ａ','Ｂ'=>'ｂ','Ｃ'=>'ｃ','Ｄ'=>'ｄ','Ｅ'=>'ｅ','Ｆ'=>'ｆ','Ｇ'=>'ｇ','Ｈ'=>'ｈ','Ｉ'=>'ｉ','Ｊ'=>'ｊ','Ｋ'=>'ｋ','Ｌ'=>'ｌ','Ｍ'=>'ｍ','Ｎ'=>'ｎ','Ｏ'=>'ｏ','Ｐ'=>'ｐ','Ｑ'=>'ｑ','Ｒ'=>'ｒ','Ｓ'=>'ｓ','Ｔ'=>'ｔ','Ｕ'=>'ｕ','Ｖ'=>'ｖ','Ｗ'=>'ｗ','Ｘ'=>'ｘ','Ｙ'=>'ｙ','Ｚ'=>'ｚ','𐐀'=>'𐐨','𐐁'=>'𐐩','𐐂'=>'𐐪','𐐃'=>'𐐫','𐐄'=>'𐐬','𐐅'=>'𐐭','𐐆'=>'𐐮','𐐇'=>'𐐯','𐐈'=>'𐐰','𐐉'=>'𐐱','𐐊'=>'𐐲','𐐋'=>'𐐳','𐐌'=>'𐐴','𐐍'=>'𐐵','𐐎'=>'𐐶','𐐏'=>'𐐷','𐐐'=>'𐐸','𐐑'=>'𐐹','𐐒'=>'𐐺','𐐓'=>'𐐻','𐐔'=>'𐐼','𐐕'=>'𐐽','𐐖'=>'𐐾','𐐗'=>'𐐿','𐐘'=>'𐑀','𐐙'=>'𐑁','𐐚'=>'𐑂','𐐛'=>'𐑃','𐐜'=>'𐑄','𐐝'=>'𐑅','𐐞'=>'𐑆','𐐟'=>'𐑇','𐐠'=>'𐑈','𐐡'=>'𐑉','𐐢'=>'𐑊','𐐣'=>'𐑋','𐐤'=>'𐑌','𐐥'=>'𐑍','𐐦'=>'𐑎','𐐧'=>'𐑏');
\ No newline at end of file
diff --git a/phpBB/includes/utf/data/case_fold_f.php b/phpBB/includes/utf/data/case_fold_f.php
new file mode 100644
index 0000000000..7e2ffb25ec
--- /dev/null
+++ b/phpBB/includes/utf/data/case_fold_f.php
@@ -0,0 +1 @@
+<?php return array('ß'=>'ss','İ'=>'i̇','ŉ'=>'ʼn','ǰ'=>'ǰ','ΐ'=>'ΐ','ΰ'=>'ΰ','և'=>'եւ','ẖ'=>'ẖ','ẗ'=>'ẗ','ẘ'=>'ẘ','ẙ'=>'ẙ','ẚ'=>'aʾ','ὐ'=>'ὐ','ὒ'=>'ὒ','ὔ'=>'ὔ','ὖ'=>'ὖ','ᾀ'=>'ἀι','ᾁ'=>'ἁι','ᾂ'=>'ἂι','ᾃ'=>'ἃι','ᾄ'=>'ἄι','ᾅ'=>'ἅι','ᾆ'=>'ἆι','ᾇ'=>'ἇι','ᾈ'=>'ἀι','ᾉ'=>'ἁι','ᾊ'=>'ἂι','ᾋ'=>'ἃι','ᾌ'=>'ἄι','ᾍ'=>'ἅι','ᾎ'=>'ἆι','ᾏ'=>'ἇι','ᾐ'=>'ἠι','ᾑ'=>'ἡι','ᾒ'=>'ἢι','ᾓ'=>'ἣι','ᾔ'=>'ἤι','ᾕ'=>'ἥι','ᾖ'=>'ἦι','ᾗ'=>'ἧι','ᾘ'=>'ἠι','ᾙ'=>'ἡι','ᾚ'=>'ἢι','ᾛ'=>'ἣι','ᾜ'=>'ἤι','ᾝ'=>'ἥι','ᾞ'=>'ἦι','ᾟ'=>'ἧι','ᾠ'=>'ὠι','ᾡ'=>'ὡι','ᾢ'=>'ὢι','ᾣ'=>'ὣι','ᾤ'=>'ὤι','ᾥ'=>'ὥι','ᾦ'=>'ὦι','ᾧ'=>'ὧι','ᾨ'=>'ὠι','ᾩ'=>'ὡι','ᾪ'=>'ὢι','ᾫ'=>'ὣι','ᾬ'=>'ὤι','ᾭ'=>'ὥι','ᾮ'=>'ὦι','ᾯ'=>'ὧι','ᾲ'=>'ὰι','ᾳ'=>'αι','ᾴ'=>'άι','ᾶ'=>'ᾶ','ᾷ'=>'ᾶι','ᾼ'=>'αι','ῂ'=>'ὴι','ῃ'=>'ηι','ῄ'=>'ήι','ῆ'=>'ῆ','ῇ'=>'ῆι','ῌ'=>'ηι','ῒ'=>'ῒ','ΐ'=>'ΐ','ῖ'=>'ῖ','ῗ'=>'ῗ','ῢ'=>'ῢ','ΰ'=>'ΰ','ῤ'=>'ῤ','ῦ'=>'ῦ','ῧ'=>'ῧ','ῲ'=>'ὼι','ῳ'=>'ωι','ῴ'=>'ώι','ῶ'=>'ῶ','ῷ'=>'ῶι','ῼ'=>'ωι','ﬀ'=>'ff','ﬁ'=>'fi','ﬂ'=>'fl','ﬃ'=>'ffi','ﬄ'=>'ffl','ﬅ'=>'st','ﬆ'=>'st','ﬓ'=>'մն','ﬔ'=>'մե','ﬕ'=>'մի','ﬖ'=>'վն','ﬗ'=>'մխ');
\ No newline at end of file
diff --git a/phpBB/includes/utf/data/case_fold_s.php b/phpBB/includes/utf/data/case_fold_s.php
new file mode 100644
index 0000000000..5f09ffa1dd
--- /dev/null
+++ b/phpBB/includes/utf/data/case_fold_s.php
@@ -0,0 +1 @@
+<?php return array('ᾈ'=>'ᾀ','ᾉ'=>'ᾁ','ᾊ'=>'ᾂ','ᾋ'=>'ᾃ','ᾌ'=>'ᾄ','ᾍ'=>'ᾅ','ᾎ'=>'ᾆ','ᾏ'=>'ᾇ','ᾘ'=>'ᾐ','ᾙ'=>'ᾑ','ᾚ'=>'ᾒ','ᾛ'=>'ᾓ','ᾜ'=>'ᾔ','ᾝ'=>'ᾕ','ᾞ'=>'ᾖ','ᾟ'=>'ᾗ','ᾨ'=>'ᾠ','ᾩ'=>'ᾡ','ᾪ'=>'ᾢ','ᾫ'=>'ᾣ','ᾬ'=>'ᾤ','ᾭ'=>'ᾥ','ᾮ'=>'ᾦ','ᾯ'=>'ᾧ','ᾼ'=>'ᾳ','ῌ'=>'ῃ','ῼ'=>'ῳ');
\ No newline at end of file
diff --git a/phpBB/includes/utf/utf_tools.php b/phpBB/includes/utf/utf_tools.php
index 4adb1b2952..8f88cc391f 100644
--- a/phpBB/includes/utf/utf_tools.php
+++ b/phpBB/includes/utf/utf_tools.php
@@ -945,31 +945,31 @@ function utf8_case_fold($text, $option = 'full')
 	global $phpbb_root_path, $phpEx;
 
 	// common is always set
-	if (!isset($uniarray['C']))
+	if (!isset($uniarray['c']))
 	{
-		$uniarray['C'] = include($phpbb_root_path . 'includes/utf/data/case_fold_C.' . $phpEx);
+		$uniarray['c'] = include_once($phpbb_root_path . 'includes/utf/data/case_fold_c.' . $phpEx);
 	}
 
 	// only set full if we need to
-	if ($option === 'full' && !isset($uniarray['F']))
+	if ($option === 'full' && !isset($uniarray['f']))
 	{
-		$uniarray['F'] = include($phpbb_root_path . 'includes/utf/data/case_fold_F.' . $phpEx);
+		$uniarray['f'] = include_once($phpbb_root_path . 'includes/utf/data/case_fold_f.' . $phpEx);
 	}
 
 	// only set simple if we need to
-	if ($option !== 'full' && !isset($uniarray['S']))
+	if ($option !== 'full' && !isset($uniarray['s']))
 	{
-		$uniarray['S'] = include($phpbb_root_path . 'includes/utf/data/case_fold_S.' . $phpEx);
+		$uniarray['s'] = include_once($phpbb_root_path . 'includes/utf/data/case_fold_s.' . $phpEx);
 	}
 
-	$text = strtr($text, $uniarray['C']);
+	$text = strtr($text, $uniarray['c']);
 	if ($option === 'full')
 	{
-		$text = strtr($text, $uniarray['F']);
+		$text = strtr($text, $uniarray['f']);
 	}
 	else
 	{
-		$text = strtr($text, $uniarray['S']);
+		$text = strtr($text, $uniarray['s']);
 	}
 	return $text;
 }
-- 
cgit v1.2.1


From 42dcd7929e7433ecdac023baa870c0bba62cf728 Mon Sep 17 00:00:00 2001
From: David M <davidmj@users.sourceforge.net>
Date: Mon, 9 Oct 2006 17:44:06 +0000
Subject: hopefully no bugs...

- faster encoding and way faster decoding (hopefully)
- waaaaay faster strtoupper strtolower transforms
- more powerful utf8_substr
- removed two (now) useless functions

P.S.
decode and upper/lower all had bugs before :P


git-svn-id: file:///svn/phpbb/trunk@6471 89ea8834-ac86-4346-8a33-228a782c2dd0
---
 phpBB/includes/utf/utf_tools.php | 538 ++++++++++++++++++---------------------
 1 file changed, 246 insertions(+), 292 deletions(-)

(limited to 'phpBB/includes/utf')

diff --git a/phpBB/includes/utf/utf_tools.php b/phpBB/includes/utf/utf_tools.php
index 8f88cc391f..1bcd92e75f 100644
--- a/phpBB/includes/utf/utf_tools.php
+++ b/phpBB/includes/utf/utf_tools.php
@@ -50,12 +50,11 @@ if (!extension_loaded('xml'))
 			}
 			else if ($num < 0xC0)
 			{
-				$out .= "\xC2$letter";
+				$out .= "\xC2" . $letter;
 			}
 			else
 			{
-				$chr = chr($num - 64);
-				$out .= "\xC3$chr";
+				$out .= "\xC3" . chr($num - 64);
 			}
 		}
 		return $out;
@@ -64,63 +63,41 @@ if (!extension_loaded('xml'))
 	/**
 	* Implementation of PHP's native utf8_decode for people without XML support
 	*
-	* @author GetID3()
 	* @param string $string UTF-8 encoded data
 	* @return string ISO-8859-1 encoded data
 	*/
-	function utf8_decode($string)
+	function utf8_decode($str)
 	{
-		$newcharstring = '';
-		$offset = 0;
-		$stringlength = strlen($string);
-
-		while ($offset < $stringlength)
+		$pos = 0;
+		$len = strlen($str);
+		$ret = '';
+	
+		while ($pos < $len)
 		{
-			$ord = ord($string{$offset});
-			if (($ord | 0x07) == 0xF7)
-			{
-				// 11110bbb 10bbbbbb 10bbbbbb 10bbbbbb
-				$charval = (($ord & 0x07) << 18) &
-							((ord($string{($offset + 1)}) & 0x3F) << 12) &
-							((ord($string{($offset + 2)}) & 0x3F) <<  6) &
-							(ord($string{($offset + 3)}) & 0x3F);
-				$offset += 4;
-			}
-			else if (($ord | 0x0F) == 0xEF)
+			$ord = ord($str[$pos]) & 0xF0;
+			if ($ord === 0xC0 || $ord === 0xD0)
 			{
-				// 1110bbbb 10bbbbbb 10bbbbbb
-				$charval = (($ord & 0x0F) << 12) &
-							((ord($string{($offset + 1)}) & 0x3F) <<  6) &
-							(ord($string{($offset + 2)}) & 0x3F);
-				$offset += 3;
+				$charval = ((ord($str[$pos]) & 0x1F) << 6) | (ord($str[$pos + 1]) & 0x3F);
+				$pos += 2;
+				$ret .= (($charval < 256) ? chr($charval) : '?');
 			}
-			else if (($ord | 0x1F) == 0xDF)
+			else if ($ord === 0xE0)
 			{
-				// 110bbbbb 10bbbbbb
-				$charval = ((ord($string{($offset + 0)}) & 0x1F) <<  6) &
-							(ord($string{($offset + 1)}) & 0x3F);
-				$offset += 2;
+				$ret .= '?';
+				$pos += 3;
 			}
-			else if (($ord | 0x7F) == 0x7F)
+			else if ($ord === 0xF0)
 			{
-				// 0bbbbbbb
-				$charval = $ord;
-				$offset += 1;
+				$ret .= '?';
+				$pos += 4;
 			}
 			else
 			{
-				// error? throw some kind of warning here?
-				$charval = false;
-				$offset += 1;
-			}
-
-			if ($charval !== false)
-			{
-				$newcharstring .= (($charval < 256) ? chr($charval) : '?');
+				$ret .= $str[$pos];
+				++$pos;
 			}
 		}
-
-		return $newcharstring;
+		return $ret;
 	}
 }
 
@@ -308,7 +285,6 @@ else
 	*/
 	function utf8_strpos($str, $needle, $offset = null)
 	{
-		// native
 		if (is_null($offset))
 		{
 			$ar = explode($needle, $str);
@@ -338,95 +314,113 @@ else
 	}
 
 	$UTF8_UPPER_TO_LOWER = array(
-		0x0041=>0x0061, 0x03A6=>0x03C6, 0x0162=>0x0163, 0x00C5=>0x00E5, 0x0042=>0x0062,
-		0x0139=>0x013A, 0x00C1=>0x00E1, 0x0141=>0x0142, 0x038E=>0x03CD, 0x0100=>0x0101,
-		0x0490=>0x0491, 0x0394=>0x03B4, 0x015A=>0x015B, 0x0044=>0x0064, 0x0393=>0x03B3,
-		0x00D4=>0x00F4, 0x042A=>0x044A, 0x0419=>0x0439, 0x0112=>0x0113, 0x041C=>0x043C,
-		0x015E=>0x015F, 0x0143=>0x0144, 0x00CE=>0x00EE, 0x040E=>0x045E, 0x042F=>0x044F,
-		0x039A=>0x03BA, 0x0154=>0x0155, 0x0049=>0x0069, 0x0053=>0x0073, 0x1E1E=>0x1E1F,
-		0x0134=>0x0135, 0x0427=>0x0447, 0x03A0=>0x03C0, 0x0418=>0x0438, 0x00D3=>0x00F3,
-		0x0420=>0x0440, 0x0404=>0x0454, 0x0415=>0x0435, 0x0429=>0x0449, 0x014A=>0x014B,
-		0x0411=>0x0431, 0x0409=>0x0459, 0x1E02=>0x1E03, 0x00D6=>0x00F6, 0x00D9=>0x00F9,
-		0x004E=>0x006E, 0x0401=>0x0451, 0x03A4=>0x03C4, 0x0423=>0x0443, 0x015C=>0x015D,
-		0x0403=>0x0453, 0x03A8=>0x03C8, 0x0158=>0x0159, 0x0047=>0x0067, 0x00C4=>0x00E4,
-		0x0386=>0x03AC, 0x0389=>0x03AE, 0x0166=>0x0167, 0x039E=>0x03BE, 0x0164=>0x0165,
-		0x0116=>0x0117, 0x0108=>0x0109, 0x0056=>0x0076, 0x00DE=>0x00FE, 0x0156=>0x0157,
-		0x00DA=>0x00FA, 0x1E60=>0x1E61, 0x1E82=>0x1E83, 0x00C2=>0x00E2, 0x0118=>0x0119,
-		0x0145=>0x0146, 0x0050=>0x0070, 0x0150=>0x0151, 0x042E=>0x044E, 0x0128=>0x0129,
-		0x03A7=>0x03C7, 0x013D=>0x013E, 0x0422=>0x0442, 0x005A=>0x007A, 0x0428=>0x0448,
-		0x03A1=>0x03C1, 0x1E80=>0x1E81, 0x016C=>0x016D, 0x00D5=>0x00F5, 0x0055=>0x0075,
-		0x0176=>0x0177, 0x00DC=>0x00FC, 0x1E56=>0x1E57, 0x03A3=>0x03C3, 0x041A=>0x043A,
-		0x004D=>0x006D, 0x016A=>0x016B, 0x0170=>0x0171, 0x0424=>0x0444, 0x00CC=>0x00EC,
-		0x0168=>0x0169, 0x039F=>0x03BF, 0x004B=>0x006B, 0x00D2=>0x00F2, 0x00C0=>0x00E0,
-		0x0414=>0x0434, 0x03A9=>0x03C9, 0x1E6A=>0x1E6B, 0x00C3=>0x00E3, 0x042D=>0x044D,
-		0x0416=>0x0436, 0x01A0=>0x01A1, 0x010C=>0x010D, 0x011C=>0x011D, 0x00D0=>0x00F0,
-		0x013B=>0x013C, 0x040F=>0x045F, 0x040A=>0x045A, 0x00C8=>0x00E8, 0x03A5=>0x03C5,
-		0x0046=>0x0066, 0x00DD=>0x00FD, 0x0043=>0x0063, 0x021A=>0x021B, 0x00CA=>0x00EA,
-		0x0399=>0x03B9, 0x0179=>0x017A, 0x00CF=>0x00EF, 0x01AF=>0x01B0, 0x0045=>0x0065,
-		0x039B=>0x03BB, 0x0398=>0x03B8, 0x039C=>0x03BC, 0x040C=>0x045C, 0x041F=>0x043F,
-		0x042C=>0x044C, 0x00DE=>0x00FE, 0x00D0=>0x00F0, 0x1EF2=>0x1EF3, 0x0048=>0x0068,
-		0x00CB=>0x00EB, 0x0110=>0x0111, 0x0413=>0x0433, 0x012E=>0x012F, 0x00C6=>0x00E6,
-		0x0058=>0x0078, 0x0160=>0x0161, 0x016E=>0x016F, 0x0391=>0x03B1, 0x0407=>0x0457,
-		0x0172=>0x0173, 0x0178=>0x00FF, 0x004F=>0x006F, 0x041B=>0x043B, 0x0395=>0x03B5,
-		0x0425=>0x0445, 0x0120=>0x0121, 0x017D=>0x017E, 0x017B=>0x017C, 0x0396=>0x03B6,
-		0x0392=>0x03B2, 0x0388=>0x03AD, 0x1E84=>0x1E85, 0x0174=>0x0175, 0x0051=>0x0071,
-		0x0417=>0x0437, 0x1E0A=>0x1E0B, 0x0147=>0x0148, 0x0104=>0x0105, 0x0408=>0x0458,
-		0x014C=>0x014D, 0x00CD=>0x00ED, 0x0059=>0x0079, 0x010A=>0x010B, 0x038F=>0x03CE,
-		0x0052=>0x0072, 0x0410=>0x0430, 0x0405=>0x0455, 0x0402=>0x0452, 0x0126=>0x0127,
-		0x0136=>0x0137, 0x012A=>0x012B, 0x038A=>0x03AF, 0x042B=>0x044B, 0x004C=>0x006C,
-		0x0397=>0x03B7, 0x0124=>0x0125, 0x0218=>0x0219, 0x00DB=>0x00FB, 0x011E=>0x011F,
-		0x041E=>0x043E, 0x1E40=>0x1E41, 0x039D=>0x03BD, 0x0106=>0x0107, 0x03AB=>0x03CB,
-		0x0426=>0x0446, 0x00DE=>0x00FE, 0x00C7=>0x00E7, 0x03AA=>0x03CA, 0x0421=>0x0441,
-		0x0412=>0x0432, 0x010E=>0x010F, 0x00D8=>0x00F8, 0x0057=>0x0077, 0x011A=>0x011B,
-		0x0054=>0x0074, 0x004A=>0x006A, 0x040B=>0x045B, 0x0406=>0x0456, 0x0102=>0x0103,
-		0x039B=>0x03BB, 0x00D1=>0x00F1, 0x041D=>0x043D, 0x038C=>0x03CC, 0x00C9=>0x00E9,
-		0x00D0=>0x00F0, 0x0407=>0x0457, 0x0122=>0x0123,
+		"\x41" => "\x61", "\x42" => "\x62", "\x43" => "\x63", "\x44" => "\x64",
+		"\x45" => "\x65", "\x46" => "\x66", "\x47" => "\x67", "\x48" => "\x68",
+		"\x49" => "\x69", "\x4A" => "\x6A", "\x4B" => "\x6B", "\x4C" => "\x6C",
+		"\x4D" => "\x6D", "\x4E" => "\x6E", "\x4F" => "\x6F", "\x50" => "\x70",
+		"\x51" => "\x71", "\x52" => "\x72", "\x53" => "\x73", "\x54" => "\x74",
+		"\x55" => "\x75", "\x56" => "\x76", "\x57" => "\x77", "\x58" => "\x78",
+		"\x59" => "\x79", "\x5A" => "\x7A", "\xC3\x80" => "\xC3\xA0", "\xC3\x81" => "\xC3\xA1",
+		"\xC3\x82" => "\xC3\xA2", "\xC3\x83" => "\xC3\xA3", "\xC3\x84" => "\xC3\xA4", "\xC3\x85" => "\xC3\xA5",
+		"\xC3\x86" => "\xC3\xA6", "\xC3\x87" => "\xC3\xA7", "\xC3\x88" => "\xC3\xA8", "\xC3\x89" => "\xC3\xA9",
+		"\xC3\x8A" => "\xC3\xAA", "\xC3\x8B" => "\xC3\xAB", "\xC3\x8C" => "\xC3\xAC", "\xC3\x8D" => "\xC3\xAD",
+		"\xC3\x8E" => "\xC3\xAE", "\xC3\x8F" => "\xC3\xAF", "\xC3\x90" => "\xC3\xB0", "\xC3\x91" => "\xC3\xB1",
+		"\xC3\x92" => "\xC3\xB2", "\xC3\x93" => "\xC3\xB3", "\xC3\x94" => "\xC3\xB4", "\xC3\x95" => "\xC3\xB5",
+		"\xC3\x96" => "\xC3\xB6", "\xC3\x98" => "\xC3\xB8", "\xC3\x99" => "\xC3\xB9", "\xC3\x9A" => "\xC3\xBA",
+		"\xC3\x9B" => "\xC3\xBB", "\xC3\x9C" => "\xC3\xBC", "\xC3\x9D" => "\xC3\xBD", "\xC3\x9E" => "\xC3\xBE",
+		"\xC4\x80" => "\xC4\x81", "\xC4\x82" => "\xC4\x83", "\xC4\x84" => "\xC4\x85", "\xC4\x86" => "\xC4\x87",
+		"\xC4\x88" => "\xC4\x89", "\xC4\x8A" => "\xC4\x8B", "\xC4\x8C" => "\xC4\x8D", "\xC4\x8E" => "\xC4\x8F",
+		"\xC4\x90" => "\xC4\x91", "\xC4\x92" => "\xC4\x93", "\xC4\x96" => "\xC4\x97", "\xC4\x98" => "\xC4\x99",
+		"\xC4\x9A" => "\xC4\x9B", "\xC4\x9C" => "\xC4\x9D", "\xC4\x9E" => "\xC4\x9F", "\xC4\xA0" => "\xC4\xA1",
+		"\xC4\xA2" => "\xC4\xA3", "\xC4\xA4" => "\xC4\xA5", "\xC4\xA6" => "\xC4\xA7", "\xC4\xA8" => "\xC4\xA9",
+		"\xC4\xAA" => "\xC4\xAB", "\xC4\xAE" => "\xC4\xAF", "\xC4\xB4" => "\xC4\xB5", "\xC4\xB6" => "\xC4\xB7",
+		"\xC4\xB9" => "\xC4\xBA", "\xC4\xBB" => "\xC4\xBC", "\xC4\xBD" => "\xC4\xBE", "\xC5\x81" => "\xC5\x82",
+		"\xC5\x83" => "\xC5\x84", "\xC5\x85" => "\xC5\x86", "\xC5\x87" => "\xC5\x88", "\xC5\x8A" => "\xC5\x8B",
+		"\xC5\x8C" => "\xC5\x8D", "\xC5\x90" => "\xC5\x91", "\xC5\x94" => "\xC5\x95", "\xC5\x96" => "\xC5\x97",
+		"\xC5\x98" => "\xC5\x99", "\xC5\x9A" => "\xC5\x9B", "\xC5\x9C" => "\xC5\x9D", "\xC5\x9E" => "\xC5\x9F",
+		"\xC5\xA0" => "\xC5\xA1", "\xC5\xA2" => "\xC5\xA3", "\xC5\xA4" => "\xC5\xA5", "\xC5\xA6" => "\xC5\xA7",
+		"\xC5\xA8" => "\xC5\xA9", "\xC5\xAA" => "\xC5\xAB", "\xC5\xAC" => "\xC5\xAD", "\xC5\xAE" => "\xC5\xAF",
+		"\xC5\xB0" => "\xC5\xB1", "\xC5\xB2" => "\xC5\xB3", "\xC5\xB4" => "\xC5\xB5", "\xC5\xB6" => "\xC5\xB7",
+		"\xC5\xB8" => "\xC3\xBF", "\xC5\xB9" => "\xC5\xBA", "\xC5\xBB" => "\xC5\xBC", "\xC5\xBD" => "\xC5\xBE",
+		"\xC6\xA0" => "\xC6\xA1", "\xC6\xAF" => "\xC6\xB0", "\xC8\x98" => "\xC8\x99", "\xC8\x9A" => "\xC8\x9B",
+		"\xCE\x86" => "\xCE\xAC", "\xCE\x88" => "\xCE\xAD", "\xCE\x89" => "\xCE\xAE", "\xCE\x8A" => "\xCE\xAF",
+		"\xCE\x8C" => "\xCF\x8C", "\xCE\x8E" => "\xCF\x8D", "\xCE\x8F" => "\xCF\x8E", "\xCE\x91" => "\xCE\xB1",
+		"\xCE\x92" => "\xCE\xB2", "\xCE\x93" => "\xCE\xB3", "\xCE\x94" => "\xCE\xB4", "\xCE\x95" => "\xCE\xB5",
+		"\xCE\x96" => "\xCE\xB6", "\xCE\x97" => "\xCE\xB7", "\xCE\x98" => "\xCE\xB8", "\xCE\x99" => "\xCE\xB9",
+		"\xCE\x9A" => "\xCE\xBA", "\xCE\x9B" => "\xCE\xBB", "\xCE\x9C" => "\xCE\xBC", "\xCE\x9D" => "\xCE\xBD",
+		"\xCE\x9E" => "\xCE\xBE", "\xCE\x9F" => "\xCE\xBF", "\xCE\xA0" => "\xCF\x80", "\xCE\xA1" => "\xCF\x81",
+		"\xCE\xA3" => "\xCF\x83", "\xCE\xA4" => "\xCF\x84", "\xCE\xA5" => "\xCF\x85", "\xCE\xA6" => "\xCF\x86",
+		"\xCE\xA7" => "\xCF\x87", "\xCE\xA8" => "\xCF\x88", "\xCE\xA9" => "\xCF\x89", "\xCE\xAA" => "\xCF\x8A",
+		"\xCE\xAB" => "\xCF\x8B", "\xD0\x81" => "\xD1\x91", "\xD0\x82" => "\xD1\x92", "\xD0\x83" => "\xD1\x93",
+		"\xD0\x84" => "\xD1\x94", "\xD0\x85" => "\xD1\x95", "\xD0\x86" => "\xD1\x96", "\xD0\x87" => "\xD1\x97",
+		"\xD0\x88" => "\xD1\x98", "\xD0\x89" => "\xD1\x99", "\xD0\x8A" => "\xD1\x9A", "\xD0\x8B" => "\xD1\x9B",
+		"\xD0\x8C" => "\xD1\x9C", "\xD0\x8E" => "\xD1\x9E", "\xD0\x8F" => "\xD1\x9F", "\xD0\x90" => "\xD0\xB0",
+		"\xD0\x91" => "\xD0\xB1", "\xD0\x92" => "\xD0\xB2", "\xD0\x93" => "\xD0\xB3", "\xD0\x94" => "\xD0\xB4",
+		"\xD0\x95" => "\xD0\xB5", "\xD0\x96" => "\xD0\xB6", "\xD0\x97" => "\xD0\xB7", "\xD0\x98" => "\xD0\xB8",
+		"\xD0\x99" => "\xD0\xB9", "\xD0\x9A" => "\xD0\xBA", "\xD0\x9B" => "\xD0\xBB", "\xD0\x9C" => "\xD0\xBC",
+		"\xD0\x9D" => "\xD0\xBD", "\xD0\x9E" => "\xD0\xBE", "\xD0\x9F" => "\xD0\xBF", "\xD0\xA0" => "\xD1\x80",
+		"\xD0\xA1" => "\xD1\x81", "\xD0\xA2" => "\xD1\x82", "\xD0\xA3" => "\xD1\x83", "\xD0\xA4" => "\xD1\x84",
+		"\xD0\xA5" => "\xD1\x85", "\xD0\xA6" => "\xD1\x86", "\xD0\xA7" => "\xD1\x87", "\xD0\xA8" => "\xD1\x88",
+		"\xD0\xA9" => "\xD1\x89", "\xD0\xAA" => "\xD1\x8A", "\xD0\xAB" => "\xD1\x8B", "\xD0\xAC" => "\xD1\x8C",
+		"\xD0\xAD" => "\xD1\x8D", "\xD0\xAE" => "\xD1\x8E", "\xD0\xAF" => "\xD1\x8F", "\xD2\x90" => "\xD2\x91",
+		"\xE1\xB8\x82" => "\xE1\xB8\x83", "\xE1\xB8\x8A" => "\xE1\xB8\x8B", "\xE1\xB8\x9E" => "\xE1\xB8\x9F", "\xE1\xB9\x80" => "\xE1\xB9\x81",
+		"\xE1\xB9\x96" => "\xE1\xB9\x97", "\xE1\xB9\xA0" => "\xE1\xB9\xA1", "\xE1\xB9\xAA" => "\xE1\xB9\xAB", "\xE1\xBA\x80" => "\xE1\xBA\x81",
+		"\xE1\xBA\x82" => "\xE1\xBA\x83", "\xE1\xBA\x84" => "\xE1\xBA\x85", "\xE1\xBB\xB2" => "\xE1\xBB\xB3"
 	);
 
 	$UTF8_LOWER_TO_UPPER = array(
-		0x0061=>0x0041, 0x03C6=>0x03A6, 0x0163=>0x0162, 0x00E5=>0x00C5, 0x0062=>0x0042,
-		0x013A=>0x0139, 0x00E1=>0x00C1, 0x0142=>0x0141, 0x03CD=>0x038E, 0x0101=>0x0100,
-		0x0491=>0x0490, 0x03B4=>0x0394, 0x015B=>0x015A, 0x0064=>0x0044, 0x03B3=>0x0393,
-		0x00F4=>0x00D4, 0x044A=>0x042A, 0x0439=>0x0419, 0x0113=>0x0112, 0x043C=>0x041C,
-		0x015F=>0x015E, 0x0144=>0x0143, 0x00EE=>0x00CE, 0x045E=>0x040E, 0x044F=>0x042F,
-		0x03BA=>0x039A, 0x0155=>0x0154, 0x0069=>0x0049, 0x0073=>0x0053, 0x1E1F=>0x1E1E,
-		0x0135=>0x0134, 0x0447=>0x0427, 0x03C0=>0x03A0, 0x0438=>0x0418, 0x00F3=>0x00D3,
-		0x0440=>0x0420, 0x0454=>0x0404, 0x0435=>0x0415, 0x0449=>0x0429, 0x014B=>0x014A,
-		0x0431=>0x0411, 0x0459=>0x0409, 0x1E03=>0x1E02, 0x00F6=>0x00D6, 0x00F9=>0x00D9,
-		0x006E=>0x004E, 0x0451=>0x0401, 0x03C4=>0x03A4, 0x0443=>0x0423, 0x015D=>0x015C,
-		0x0453=>0x0403, 0x03C8=>0x03A8, 0x0159=>0x0158, 0x0067=>0x0047, 0x00E4=>0x00C4,
-		0x03AC=>0x0386, 0x03AE=>0x0389, 0x0167=>0x0166, 0x03BE=>0x039E, 0x0165=>0x0164,
-		0x0117=>0x0116, 0x0109=>0x0108, 0x0076=>0x0056, 0x00FE=>0x00DE, 0x0157=>0x0156,
-		0x00FA=>0x00DA, 0x1E61=>0x1E60, 0x1E83=>0x1E82, 0x00E2=>0x00C2, 0x0119=>0x0118,
-		0x0146=>0x0145, 0x0070=>0x0050, 0x0151=>0x0150, 0x044E=>0x042E, 0x0129=>0x0128,
-		0x03C7=>0x03A7, 0x013E=>0x013D, 0x0442=>0x0422, 0x007A=>0x005A, 0x0448=>0x0428,
-		0x03C1=>0x03A1, 0x1E81=>0x1E80, 0x016D=>0x016C, 0x00F5=>0x00D5, 0x0075=>0x0055,
-		0x0177=>0x0176, 0x00FC=>0x00DC, 0x1E57=>0x1E56, 0x03C3=>0x03A3, 0x043A=>0x041A,
-		0x006D=>0x004D, 0x016B=>0x016A, 0x0171=>0x0170, 0x0444=>0x0424, 0x00EC=>0x00CC,
-		0x0169=>0x0168, 0x03BF=>0x039F, 0x006B=>0x004B, 0x00F2=>0x00D2, 0x00E0=>0x00C0,
-		0x0434=>0x0414, 0x03C9=>0x03A9, 0x1E6B=>0x1E6A, 0x00E3=>0x00C3, 0x044D=>0x042D,
-		0x0436=>0x0416, 0x01A1=>0x01A0, 0x010D=>0x010C, 0x011D=>0x011C, 0x00F0=>0x00D0,
-		0x013C=>0x013B, 0x045F=>0x040F, 0x045A=>0x040A, 0x00E8=>0x00C8, 0x03C5=>0x03A5,
-		0x0066=>0x0046, 0x00FD=>0x00DD, 0x0063=>0x0043, 0x021B=>0x021A, 0x00EA=>0x00CA,
-		0x03B9=>0x0399, 0x017A=>0x0179, 0x00EF=>0x00CF, 0x01B0=>0x01AF, 0x0065=>0x0045,
-		0x03BB=>0x039B, 0x03B8=>0x0398, 0x03BC=>0x039C, 0x045C=>0x040C, 0x043F=>0x041F,
-		0x044C=>0x042C, 0x00FE=>0x00DE, 0x00F0=>0x00D0, 0x1EF3=>0x1EF2, 0x0068=>0x0048,
-		0x00EB=>0x00CB, 0x0111=>0x0110, 0x0433=>0x0413, 0x012F=>0x012E, 0x00E6=>0x00C6,
-		0x0078=>0x0058, 0x0161=>0x0160, 0x016F=>0x016E, 0x03B1=>0x0391, 0x0457=>0x0407,
-		0x0173=>0x0172, 0x00FF=>0x0178, 0x006F=>0x004F, 0x043B=>0x041B, 0x03B5=>0x0395,
-		0x0445=>0x0425, 0x0121=>0x0120, 0x017E=>0x017D, 0x017C=>0x017B, 0x03B6=>0x0396,
-		0x03B2=>0x0392, 0x03AD=>0x0388, 0x1E85=>0x1E84, 0x0175=>0x0174, 0x0071=>0x0051,
-		0x0437=>0x0417, 0x1E0B=>0x1E0A, 0x0148=>0x0147, 0x0105=>0x0104, 0x0458=>0x0408,
-		0x014D=>0x014C, 0x00ED=>0x00CD, 0x0079=>0x0059, 0x010B=>0x010A, 0x03CE=>0x038F,
-		0x0072=>0x0052, 0x0430=>0x0410, 0x0455=>0x0405, 0x0452=>0x0402, 0x0127=>0x0126,
-		0x0137=>0x0136, 0x012B=>0x012A, 0x03AF=>0x038A, 0x044B=>0x042B, 0x006C=>0x004C,
-		0x03B7=>0x0397, 0x0125=>0x0124, 0x0219=>0x0218, 0x00FB=>0x00DB, 0x011F=>0x011E,
-		0x043E=>0x041E, 0x1E41=>0x1E40, 0x03BD=>0x039D, 0x0107=>0x0106, 0x03CB=>0x03AB,
-		0x0446=>0x0426, 0x00FE=>0x00DE, 0x00E7=>0x00C7, 0x03CA=>0x03AA, 0x0441=>0x0421,
-		0x0432=>0x0412, 0x010F=>0x010E, 0x00F8=>0x00D8, 0x0077=>0x0057, 0x011B=>0x011A,
-		0x0074=>0x0054, 0x006A=>0x004A, 0x045B=>0x040B, 0x0456=>0x0406, 0x0103=>0x0102,
-		0x03BB=>0x039B, 0x00F1=>0x00D1, 0x043D=>0x041D, 0x03CC=>0x038C, 0x00E9=>0x00C9,
-		0x00F0=>0x00D0, 0x0457=>0x0407, 0x0123=>0x0122,
+		"\x61" => "\x41", "\x62" => "\x42", "\x63" => "\x43", "\x64" => "\x44",
+		"\x65" => "\x45", "\x66" => "\x46", "\x67" => "\x47", "\x68" => "\x48",
+		"\x69" => "\x49", "\x6A" => "\x4A", "\x6B" => "\x4B", "\x6C" => "\x4C",
+		"\x6D" => "\x4D", "\x6E" => "\x4E", "\x6F" => "\x4F", "\x70" => "\x50",
+		"\x71" => "\x51", "\x72" => "\x52", "\x73" => "\x53", "\x74" => "\x54",
+		"\x75" => "\x55", "\x76" => "\x56", "\x77" => "\x57", "\x78" => "\x58",
+		"\x79" => "\x59", "\x7A" => "\x5A", "\xC3\xA0" => "\xC3\x80", "\xC3\xA1" => "\xC3\x81",
+		"\xC3\xA2" => "\xC3\x82", "\xC3\xA3" => "\xC3\x83", "\xC3\xA4" => "\xC3\x84", "\xC3\xA5" => "\xC3\x85",
+		"\xC3\xA6" => "\xC3\x86", "\xC3\xA7" => "\xC3\x87", "\xC3\xA8" => "\xC3\x88", "\xC3\xA9" => "\xC3\x89",
+		"\xC3\xAA" => "\xC3\x8A", "\xC3\xAB" => "\xC3\x8B", "\xC3\xAC" => "\xC3\x8C", "\xC3\xAD" => "\xC3\x8D",
+		"\xC3\xAE" => "\xC3\x8E", "\xC3\xAF" => "\xC3\x8F", "\xC3\xB0" => "\xC3\x90", "\xC3\xB1" => "\xC3\x91",
+		"\xC3\xB2" => "\xC3\x92", "\xC3\xB3" => "\xC3\x93", "\xC3\xB4" => "\xC3\x94", "\xC3\xB5" => "\xC3\x95",
+		"\xC3\xB6" => "\xC3\x96", "\xC3\xB8" => "\xC3\x98", "\xC3\xB9" => "\xC3\x99", "\xC3\xBA" => "\xC3\x9A",
+		"\xC3\xBB" => "\xC3\x9B", "\xC3\xBC" => "\xC3\x9C", "\xC3\xBD" => "\xC3\x9D", "\xC3\xBE" => "\xC3\x9E",
+		"\xC3\xBF" => "\xC5\xB8", "\xC4\x81" => "\xC4\x80", "\xC4\x83" => "\xC4\x82", "\xC4\x85" => "\xC4\x84",
+		"\xC4\x87" => "\xC4\x86", "\xC4\x89" => "\xC4\x88", "\xC4\x8B" => "\xC4\x8A", "\xC4\x8D" => "\xC4\x8C",
+		"\xC4\x8F" => "\xC4\x8E", "\xC4\x91" => "\xC4\x90", "\xC4\x93" => "\xC4\x92", "\xC4\x97" => "\xC4\x96",
+		"\xC4\x99" => "\xC4\x98", "\xC4\x9B" => "\xC4\x9A", "\xC4\x9D" => "\xC4\x9C", "\xC4\x9F" => "\xC4\x9E",
+		"\xC4\xA1" => "\xC4\xA0", "\xC4\xA3" => "\xC4\xA2", "\xC4\xA5" => "\xC4\xA4", "\xC4\xA7" => "\xC4\xA6",
+		"\xC4\xA9" => "\xC4\xA8", "\xC4\xAB" => "\xC4\xAA", "\xC4\xAF" => "\xC4\xAE", "\xC4\xB5" => "\xC4\xB4",
+		"\xC4\xB7" => "\xC4\xB6", "\xC4\xBA" => "\xC4\xB9", "\xC4\xBC" => "\xC4\xBB", "\xC4\xBE" => "\xC4\xBD",
+		"\xC5\x82" => "\xC5\x81", "\xC5\x84" => "\xC5\x83", "\xC5\x86" => "\xC5\x85", "\xC5\x88" => "\xC5\x87",
+		"\xC5\x8B" => "\xC5\x8A", "\xC5\x8D" => "\xC5\x8C", "\xC5\x91" => "\xC5\x90", "\xC5\x95" => "\xC5\x94",
+		"\xC5\x97" => "\xC5\x96", "\xC5\x99" => "\xC5\x98", "\xC5\x9B" => "\xC5\x9A", "\xC5\x9D" => "\xC5\x9C",
+		"\xC5\x9F" => "\xC5\x9E", "\xC5\xA1" => "\xC5\xA0", "\xC5\xA3" => "\xC5\xA2", "\xC5\xA5" => "\xC5\xA4",
+		"\xC5\xA7" => "\xC5\xA6", "\xC5\xA9" => "\xC5\xA8", "\xC5\xAB" => "\xC5\xAA", "\xC5\xAD" => "\xC5\xAC",
+		"\xC5\xAF" => "\xC5\xAE", "\xC5\xB1" => "\xC5\xB0", "\xC5\xB3" => "\xC5\xB2", "\xC5\xB5" => "\xC5\xB4",
+		"\xC5\xB7" => "\xC5\xB6", "\xC5\xBA" => "\xC5\xB9", "\xC5\xBC" => "\xC5\xBB", "\xC5\xBE" => "\xC5\xBD",
+		"\xC6\xA1" => "\xC6\xA0", "\xC6\xB0" => "\xC6\xAF", "\xC8\x99" => "\xC8\x98", "\xC8\x9B" => "\xC8\x9A",
+		"\xCE\xAC" => "\xCE\x86", "\xCE\xAD" => "\xCE\x88", "\xCE\xAE" => "\xCE\x89", "\xCE\xAF" => "\xCE\x8A",
+		"\xCE\xB1" => "\xCE\x91", "\xCE\xB2" => "\xCE\x92", "\xCE\xB3" => "\xCE\x93", "\xCE\xB4" => "\xCE\x94",
+		"\xCE\xB5" => "\xCE\x95", "\xCE\xB6" => "\xCE\x96", "\xCE\xB7" => "\xCE\x97", "\xCE\xB8" => "\xCE\x98",
+		"\xCE\xB9" => "\xCE\x99", "\xCE\xBA" => "\xCE\x9A", "\xCE\xBB" => "\xCE\x9B", "\xCE\xBC" => "\xCE\x9C",
+		"\xCE\xBD" => "\xCE\x9D", "\xCE\xBE" => "\xCE\x9E", "\xCE\xBF" => "\xCE\x9F", "\xCF\x80" => "\xCE\xA0",
+		"\xCF\x81" => "\xCE\xA1", "\xCF\x83" => "\xCE\xA3", "\xCF\x84" => "\xCE\xA4", "\xCF\x85" => "\xCE\xA5",
+		"\xCF\x86" => "\xCE\xA6", "\xCF\x87" => "\xCE\xA7", "\xCF\x88" => "\xCE\xA8", "\xCF\x89" => "\xCE\xA9",
+		"\xCF\x8A" => "\xCE\xAA", "\xCF\x8B" => "\xCE\xAB", "\xCF\x8C" => "\xCE\x8C", "\xCF\x8D" => "\xCE\x8E",
+		"\xCF\x8E" => "\xCE\x8F", "\xD0\xB0" => "\xD0\x90", "\xD0\xB1" => "\xD0\x91", "\xD0\xB2" => "\xD0\x92",
+		"\xD0\xB3" => "\xD0\x93", "\xD0\xB4" => "\xD0\x94", "\xD0\xB5" => "\xD0\x95", "\xD0\xB6" => "\xD0\x96",
+		"\xD0\xB7" => "\xD0\x97", "\xD0\xB8" => "\xD0\x98", "\xD0\xB9" => "\xD0\x99", "\xD0\xBA" => "\xD0\x9A",
+		"\xD0\xBB" => "\xD0\x9B", "\xD0\xBC" => "\xD0\x9C", "\xD0\xBD" => "\xD0\x9D", "\xD0\xBE" => "\xD0\x9E",
+		"\xD0\xBF" => "\xD0\x9F", "\xD1\x80" => "\xD0\xA0", "\xD1\x81" => "\xD0\xA1", "\xD1\x82" => "\xD0\xA2",
+		"\xD1\x83" => "\xD0\xA3", "\xD1\x84" => "\xD0\xA4", "\xD1\x85" => "\xD0\xA5", "\xD1\x86" => "\xD0\xA6",
+		"\xD1\x87" => "\xD0\xA7", "\xD1\x88" => "\xD0\xA8", "\xD1\x89" => "\xD0\xA9", "\xD1\x8A" => "\xD0\xAA",
+		"\xD1\x8B" => "\xD0\xAB", "\xD1\x8C" => "\xD0\xAC", "\xD1\x8D" => "\xD0\xAD", "\xD1\x8E" => "\xD0\xAE",
+		"\xD1\x8F" => "\xD0\xAF", "\xD1\x91" => "\xD0\x81", "\xD1\x92" => "\xD0\x82", "\xD1\x93" => "\xD0\x83",
+		"\xD1\x94" => "\xD0\x84", "\xD1\x95" => "\xD0\x85", "\xD1\x96" => "\xD0\x86", "\xD1\x97" => "\xD0\x87",
+		"\xD1\x98" => "\xD0\x88", "\xD1\x99" => "\xD0\x89", "\xD1\x9A" => "\xD0\x8A", "\xD1\x9B" => "\xD0\x8B",
+		"\xD1\x9C" => "\xD0\x8C", "\xD1\x9E" => "\xD0\x8E", "\xD1\x9F" => "\xD0\x8F", "\xD2\x91" => "\xD2\x90",
+		"\xE1\xB8\x83" => "\xE1\xB8\x82", "\xE1\xB8\x8B" => "\xE1\xB8\x8A", "\xE1\xB8\x9F" => "\xE1\xB8\x9E", "\xE1\xB9\x81" => "\xE1\xB9\x80",
+		"\xE1\xB9\x97" => "\xE1\xB9\x96", "\xE1\xB9\xA1" => "\xE1\xB9\xA0", "\xE1\xB9\xAB" => "\xE1\xB9\xAA", "\xE1\xBA\x81" => "\xE1\xBA\x80",
+		"\xE1\xBA\x83" => "\xE1\xBA\x82", "\xE1\xBA\x85" => "\xE1\xBA\x84", "\xE1\xBB\xB3" => "\xE1\xBB\xB2"
 	);
 
 	/**
@@ -437,30 +431,14 @@ else
 	* not exist in the Chinese alphabet, for example. See Unicode Standard
 	* Annex #21: Case Mappings
 	* 
-	* @author Andreas Gohr <andi@splitbrain.org>
 	* @param string
-	* @return mixed either string in lowercase or FALSE is UTF-8 invalid
+	* @return string string in lowercase
 	*/
 	function utf8_strtolower($string)
 	{
 		global $UTF8_UPPER_TO_LOWER;
 
-		$uni = utf8_to_unicode($string);
-
-		if (!$uni)
-		{
-			return false;
-		}
-
-		for ($i = 0, $cnt = sizeof($uni); $i < $cnt; $i++)
-		{
-			if (isset($UTF8_UPPER_TO_LOWER[$uni[$i]]))
-			{
-				$uni[$i] = $UTF8_UPPER_TO_LOWER[$uni[$i]];
-			}
-		}
-
-		return utf8_from_unicode($uni);
+		return strtr($string, $UTF8_UPPER_TO_LOWER);
 	}
 
 	/**
@@ -471,106 +449,160 @@ else
 	* not exist in the Chinese alphabet, for example. See Unicode Standard
 	* Annex #21: Case Mappings
 	* 
-	* @author Andreas Gohr <andi@splitbrain.org>
 	* @param string
-	* @return mixed either string in lowercase or FALSE is UTF-8 invalid
+	* @return string string in uppercase
 	*/
-	function utf8_strtoupper($str)
+	function utf8_strtoupper($string)
 	{
 		global $UTF8_LOWER_TO_UPPER;
 
-		$uni = utf8_to_unicode($string);
-
-		if (!$uni)
-		{
-			return false;
-		}
-
-		for ($i = 0, $cnt = sizeof($uni); $i < $cnt; $i++)
-		{
-			if (isset($UTF8_LOWER_TO_UPPER[$uni[$i]]))
-			{
-				$uni[$i] = $UTF8_LOWER_TO_UPPER[$uni[$i]];
-			}
-		}
-
-		return utf8_from_unicode($uni);
+		return strtr($string, $UTF8_LOWER_TO_UPPER);
 	}
 
 	/**
 	* UTF-8 aware alternative to substr
 	* Return part of a string given character offset (and optionally length)
-	* 
-	* @author Harry Fuecks
+	*
+	* Note arguments: comparied to substr - if offset or length are
+	* not integers, this version will not complain but rather massages them
+	* into an integer.
+	*
+	* Note on returned values: substr documentation states false can be
+	* returned in some cases (e.g. offset > string length)
+	* mb_substr never returns false, it will return an empty string instead.
+	* This adopts the mb_substr approach
+	*
+	* Note on implementation: PCRE only supports repetitions of less than
+	* 65536, in order to accept up to MAXINT values for offset and length,
+	* we'll repeat a group of 65535 characters when needed.
+	*
+	* Note on implementation: calculating the number of characters in the
+	* string is a relatively expensive operation, so we only carry it out when
+	* necessary. It isn't necessary for +ve offsets and no specified length
+	*
+	* @author Chris Smith<chris@jalakai.co.uk>
 	* @param string
 	* @param integer number of UTF-8 characters offset (from left)
 	* @param integer (optional) length in UTF-8 characters from offset
 	* @return mixed string or FALSE if failure
 	*/
-	function utf8_substr($str, $offset,	$length	= null)
+	function utf8_substr($str, $offset, $length = NULL)
 	{
-		if ($offset >= 0 && $length >= 0)
+		// generates E_NOTICE
+		// for PHP4 objects, but not PHP5 objects
+		$str = (string) $str;
+		$offset = (int) $offset;
+		if (!is_null($length))
 		{
-			if ($length === null)
-			{
-				$length = '*';
-			}
-			else
-			{
-				if (!preg_match('/^[0-9]+$/', $length))
-				{
-					trigger_error('utf8_substr expects parameter 3 to be long', E_USER_WARNING);
-					return false;
-				}
-
-				$strlen = strlen(utf8_decode($str));
-				if ($offset > $strlen)
-				{
-					return '';
-				}
+			$length = (int) $length;
+		}
 
-				if (($offset + $length) > $strlen)
-				{
-					$length = '*';
-				}
-				else
-				{
-					$length = '{' . $length . '}';
-				}
-			}
+		// handle trivial cases
+		if ($length === 0 || ($offset < 0 && $length < 0 && $length < $offset))
+		{
+			return '';
+		}
 
-			if (!preg_match('/^[0-9]+$/', $offset))
+		// normalise negative offsets (we could use a tail
+		// anchored pattern, but they are horribly slow!)
+		if ($offset < 0)
+		{
+			// see notes
+			$strlen = utf8_strlen($str);
+			$offset = $strlen + $offset;
+			if ($offset < 0)
 			{
-				trigger_error('utf8_substr expects parameter 2 to be long', E_USER_WARNING);
-				return false;
+				$offset = 0;
 			}
+		}
 
-			$pattern = '/^.{' . $offset . '}(.' . $length . ')/us';
+		$op = '';
+		$lp = '';
 
-			preg_match($pattern, $str, $matches);
+		// establish a pattern for offset, a
+		// non-captured group equal in length to offset
+		if ($offset > 0)
+		{
+			$ox = (int) ($offset / 65535);
+			$oy = $offset % 65535;
 
-			if (isset($matches[1]))
+			if ($ox)
 			{
-				return $matches[1];
+				$op = '(?:.{65535}){' . $ox . '}';
 			}
 
-			return false;
+			$op = '^(?:' . $op . '.{' . $oy . '})';
+		}
+		else
+		{	
+			// offset == 0; just anchor the pattern
+			$op = '^';
+		}
+
+		// establish a pattern for length
+		if (is_null($length))
+		{
+			// the rest of the string
+			$lp = '(.*)$';
 		}
 		else
 		{
-			// Handle negatives using different, slower technique
-			// From: http://www.php.net/manual/en/function.substr.php#44838
-			preg_match_all('/./u', $str, $ar);
+			if (!isset($strlen))
+			{
+				// see notes
+				$strlen = utf8_strlen($str);
+			}
 
-			if ($length !== null)
+			// another trivial case
+			if ($offset > $strlen)
 			{
-				return join('', array_slice($ar[0], $offset, $length));
+				return '';
 			}
-			else
+
+			if ($length > 0)
+			{
+				// reduce any length that would
+				// go passed the end of the string
+				$length = min($strlen - $offset, $length);
+
+				$lx = (int) ($length / 65535);
+				$ly = $length % 65535;
+				
+				// negative length requires a captured group
+				// of length characters
+				if ($lx)
+				{
+					$lp = '(?:.{65535}){' . $lx . '}';
+				}
+				$lp = '(' . $lp . '.{'. $ly . '})';
+			}
+			else if ($length < 0)
 			{
-				return join('', array_slice($ar[0], $offset));
+				if ($length < ($offset - $strlen))
+				{
+					return '';
+				}
+
+				$lx = (int)((-$length) / 65535);
+				$ly = (-$length) % 65535;
+
+				// negative length requires ... capture everything
+				// except a group of  -length characters
+				// anchored at the tail-end of the string
+				if ($lx)
+				{
+					$lp = '(?:.{65535}){' . $lx . '}';
+				}
+				$lp = '(.*)(?:' . $lp . '.{' . $ly . '})$';
 			}
 		}
+
+		if (!preg_match('#' . $op . $lp . '#us', $str, $match))
+		{
+			return '';
+		}
+
+		return $match[1];
 	}
 
 	/**
@@ -853,84 +885,6 @@ function utf8_decode_ncr_callback($m)
 	return utf8_chr($cp);
 }
 
-/**
-* Takes an UTF-8 string and returns an array of ints representing the
-* Unicode characters.
-* 
-* @param  string  UTF-8 encoded string
-* @return array array of UNICODE code points
-*/
-function utf8_to_unicode($string)
-{
-	$unicode = array();
-	$offset = 0;
-	$stringlength = strlen($string);
-
-	while ($offset < $stringlength)
-	{
-		$ord = ord($string{$offset});
-		if (($ord | 0x07) == 0xF7)
-		{
-			// 11110bbb 10bbbbbb 10bbbbbb 10bbbbbb
-			$charval = (($ord & 0x07) << 18) &
-						((ord($string{($offset + 1)}) & 0x3F) << 12) &
-						((ord($string{($offset + 2)}) & 0x3F) <<  6) &
-						(ord($string{($offset + 3)}) & 0x3F);
-			$offset += 4;
-		}
-		else if (($ord | 0x0F) == 0xEF)
-		{
-			// 1110bbbb 10bbbbbb 10bbbbbb
-			$charval = (($ord & 0x0F) << 12) &
-						((ord($string{($offset + 1)}) & 0x3F) <<  6) &
-						(ord($string{($offset + 2)}) & 0x3F);
-			$offset += 3;
-		}
-		else if (($ord | 0x1F) == 0xDF)
-		{
-			// 110bbbbb 10bbbbbb
-			$charval = (($ord & 0x1F) <<  6) &
-						(ord($string{($offset + 1)}) & 0x3F);
-			$offset += 2;
-		}
-		else if (($ord | 0x7F) == 0x7F)
-		{
-			// 0bbbbbbb
-			$charval = $ord;
-			$offset += 1;
-		}
-		else
-		{
-			// error? throw some kind of warning here?
-			$charval = false;
-			$offset += 1;
-		}
-		if ($charval !== false)
-		{
-			$unicode[] = $charval;
-		}
-	}
-	return $unicode;
-}
-
-/**
-* Takes an array of ints representing the Unicode characters and returns
-* a UTF-8 string.
-*
-* @param array $array array of unicode code points representing a string
-* @return string UTF-8 character string
-*/
-function utf8_from_unicode($array)
-{
-	$str = '';
-	foreach ($array as $value)
-	{
-		$str .= utf8_chr($value);
-	}
-	return $str;
-}
-
-
 /**
 * Takes an array of ints representing the Unicode characters and returns
 * a UTF-8 string.
-- 
cgit v1.2.1


From c65048bd9132175e9ba780457fdf00438932c5fe Mon Sep 17 00:00:00 2001
From: Nils Adermann <naderman@naderman.de>
Date: Fri, 13 Oct 2006 22:10:18 +0000
Subject: - introducing clean usernames, needs to be tested more, I'm not sure
 I didn't miss anything - homograph list should probably be extended

git-svn-id: file:///svn/phpbb/trunk@6494 89ea8834-ac86-4346-8a33-228a782c2dd0
---
 phpBB/includes/utf/utf_tools.php | 35 +++++++++++++++++++++++++++++++++++
 1 file changed, 35 insertions(+)

(limited to 'phpBB/includes/utf')

diff --git a/phpBB/includes/utf/utf_tools.php b/phpBB/includes/utf/utf_tools.php
index 1bcd92e75f..cb3e3b69ac 100644
--- a/phpBB/includes/utf/utf_tools.php
+++ b/phpBB/includes/utf/utf_tools.php
@@ -928,4 +928,39 @@ function utf8_case_fold($text, $option = 'full')
 	return $text;
 }
 
+function utf8_clean_string($text)
+{
+	$text = utf8_case_fold($text);
+
+	if (!class_exists('utf_normalizer'))
+	{
+		global $phpbb_root_path, $phpEx;
+		include($phpbb_root_path . 'includes/utf/utf_normalizer.' . $phpEx);
+	}
+
+	$text = utf_normalizer::nfc($text);
+
+	static $homographs = array(
+		// cyrllic
+		"\xD0\xB0" => "\x61",
+		"\xD0\xB5" => "\x65",
+		"\xD0\xBE" => "\x6F",
+		"\xD1\x80" => "\x70",
+		"\xD1\x81" => "\x63",
+		"\xD1\x83" => "\x79",
+		"\xD1\x85" => "\x78",
+		"\xD1\x95" => "\x73",
+		"\xD1\x96" => "\x69",
+		"\xD1\x98" => "\x6A",
+		"\xD2\xBB" => "\x68",
+		// greek
+		"\xCE\xB1" => "\x61",
+		"\xCE\xBF" => "\x6F",
+	);
+
+	$text = strtr($text, $homographs);
+
+	return $text;
+}
+
 ?>
\ No newline at end of file
-- 
cgit v1.2.1


From 22129be21f3d08a27681e0ac6ba8ad401ed8ba96 Mon Sep 17 00:00:00 2001
From: Meik Sievertsen <acydburn@phpbb.com>
Date: Sun, 22 Oct 2006 13:32:33 +0000
Subject: - now username changes should work as desired - removed some
 extract() calls

git-svn-id: file:///svn/phpbb/trunk@6517 89ea8834-ac86-4346-8a33-228a782c2dd0
---
 phpBB/includes/utf/utf_tools.php | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'phpBB/includes/utf')

diff --git a/phpBB/includes/utf/utf_tools.php b/phpBB/includes/utf/utf_tools.php
index cb3e3b69ac..1f9a698163 100644
--- a/phpBB/includes/utf/utf_tools.php
+++ b/phpBB/includes/utf/utf_tools.php
@@ -901,19 +901,19 @@ function utf8_case_fold($text, $option = 'full')
 	// common is always set
 	if (!isset($uniarray['c']))
 	{
-		$uniarray['c'] = include_once($phpbb_root_path . 'includes/utf/data/case_fold_c.' . $phpEx);
+		$uniarray['c'] = include($phpbb_root_path . 'includes/utf/data/case_fold_c.' . $phpEx);
 	}
 
 	// only set full if we need to
 	if ($option === 'full' && !isset($uniarray['f']))
 	{
-		$uniarray['f'] = include_once($phpbb_root_path . 'includes/utf/data/case_fold_f.' . $phpEx);
+		$uniarray['f'] = include($phpbb_root_path . 'includes/utf/data/case_fold_f.' . $phpEx);
 	}
 
 	// only set simple if we need to
 	if ($option !== 'full' && !isset($uniarray['s']))
 	{
-		$uniarray['s'] = include_once($phpbb_root_path . 'includes/utf/data/case_fold_s.' . $phpEx);
+		$uniarray['s'] = include($phpbb_root_path . 'includes/utf/data/case_fold_s.' . $phpEx);
 	}
 
 	$text = strtr($text, $uniarray['c']);
-- 
cgit v1.2.1


From 12c75a0991a59eecd274eb2b03476e80ae608eaa Mon Sep 17 00:00:00 2001
From: Meik Sievertsen <acydburn@phpbb.com>
Date: Mon, 30 Oct 2006 19:51:56 +0000
Subject: - temporarily disable x-sendfile support (we need to look into
 methods of checking if it is enabled/disabled or introducing a switch) -
 finally allow custom permission settings files (in acp/ as well as in mods/)

git-svn-id: file:///svn/phpbb/trunk@6539 89ea8834-ac86-4346-8a33-228a782c2dd0
---
 phpBB/includes/utf/utf_tools.php | 15 ++++++++-------
 1 file changed, 8 insertions(+), 7 deletions(-)

(limited to 'phpBB/includes/utf')

diff --git a/phpBB/includes/utf/utf_tools.php b/phpBB/includes/utf/utf_tools.php
index 1f9a698163..24aeb35d02 100644
--- a/phpBB/includes/utf/utf_tools.php
+++ b/phpBB/includes/utf/utf_tools.php
@@ -111,16 +111,13 @@ if (extension_loaded('mbstring'))
 	*
 	* Notes:
 	* - offset for mb_strrpos was added in 5.2.0, we emulate if it is lower
-	* 
-	* @author Harry Fuecks
-	* @param string haystack
-	* @param string needle
-	* @param integer (optional) offset (from left)
-	* @return mixed integer position or FALSE on failure
-	* @ignore
 	*/
 	if (version_compare(phpversion(), '5.2.0', '>='))
 	{
+		/**
+		* UTF-8 aware alternative to strrpos
+		* @ignore
+		*/
 		function utf8_strrpos($str,	$needle, $offset = null)
 		{
 			// Emulate behaviour of strrpos rather than raising warning
@@ -134,6 +131,10 @@ if (extension_loaded('mbstring'))
 	}
 	else
 	{
+		/**
+		* UTF-8 aware alternative to strrpos
+		* @ignore
+		*/
 		function utf8_strrpos($str,	$needle, $offset = null)
 		{
 			// offset for mb_strrpos was added in 5.2.0
-- 
cgit v1.2.1


From daa3288a368ddac0335dde7ee2a718883bfb2fdc Mon Sep 17 00:00:00 2001
From: Meik Sievertsen <acydburn@phpbb.com>
Date: Fri, 3 Nov 2006 11:26:14 +0000
Subject: - implemented the suggested html_entity_decode function made by david
 - fixed string length checking by also decoding entities for the sake of
 checking - used the new html_entity_decode function

git-svn-id: file:///svn/phpbb/trunk@6545 89ea8834-ac86-4346-8a33-228a782c2dd0
---
 phpBB/includes/utf/utf_tools.php | 45 +++++++++++++++++++++++++++++++++++++++-
 1 file changed, 44 insertions(+), 1 deletion(-)

(limited to 'phpBB/includes/utf')

diff --git a/phpBB/includes/utf/utf_tools.php b/phpBB/includes/utf/utf_tools.php
index 24aeb35d02..fdf68d092e 100644
--- a/phpBB/includes/utf/utf_tools.php
+++ b/phpBB/includes/utf/utf_tools.php
@@ -8,7 +8,7 @@
 *
 * @todo make sure the replacements are called correctly
 * already done: strtolower, strtoupper, ucfirst, str_split, strrpos, strlen (hopefully!), strpos, substr
-* remaining:	clean_username, htmlentities (no longer needed for internal data?), htmlspecialchars (using charset), html_entity_decode (own function to reverse htmlspecialchars and not htmlentities)
+* remaining:	clean_username, htmlentities (no longer needed for internal data?), htmlspecialchars (using charset)
 *				strspn, chr, ord
 */
 
@@ -929,6 +929,9 @@ function utf8_case_fold($text, $option = 'full')
 	return $text;
 }
 
+/**
+* @todo needs documenting
+*/
 function utf8_clean_string($text)
 {
 	$text = utf8_case_fold($text);
@@ -964,4 +967,44 @@ function utf8_clean_string($text)
 	return $text;
 }
 
+if (version_compare(phpversion(), '5', '>='))
+{
+	/**
+	* @ignore
+	*/
+	function utf8_html_entity_decode($string, $quote_style = ENT_COMPAT)
+	{
+		return html_entity_decode($string, $quote_style, 'UTF-8');
+	}
+}
+else
+{
+	/**
+	* @todo needs documenting
+	*/
+	function utf8_html_entity_decode($string, $quote_style = ENT_COMPAT)
+	{
+		static $static_table;
+
+		if ($static_table === null)
+		{
+			$static_table = array_map('utf8_encode', array_flip(get_html_translation_table(HTML_ENTITIES)));
+		}
+
+		$modified_table = $static_table;
+
+		if ($quote_style === ENT_QUOTES)
+		{
+			$modified_table['&#039;'] = "'";
+		}
+
+		if ($quote_style === ENT_NOQUOTES)
+		{
+			unset($modified_table['&quot;']);
+		}
+
+		return strtr($string, $modified_table);
+	}
+}
+
 ?>
\ No newline at end of file
-- 
cgit v1.2.1


From 7ab232a45504ef357a19d9ab58dd27c454e12784 Mon Sep 17 00:00:00 2001
From: Meik Sievertsen <acydburn@phpbb.com>
Date: Fri, 3 Nov 2006 21:05:25 +0000
Subject: ok, i am an idiot...

git-svn-id: file:///svn/phpbb/trunk@6548 89ea8834-ac86-4346-8a33-228a782c2dd0
---
 phpBB/includes/utf/utf_tools.php | 40 ----------------------------------------
 1 file changed, 40 deletions(-)

(limited to 'phpBB/includes/utf')

diff --git a/phpBB/includes/utf/utf_tools.php b/phpBB/includes/utf/utf_tools.php
index fdf68d092e..aa29159d5e 100644
--- a/phpBB/includes/utf/utf_tools.php
+++ b/phpBB/includes/utf/utf_tools.php
@@ -967,44 +967,4 @@ function utf8_clean_string($text)
 	return $text;
 }
 
-if (version_compare(phpversion(), '5', '>='))
-{
-	/**
-	* @ignore
-	*/
-	function utf8_html_entity_decode($string, $quote_style = ENT_COMPAT)
-	{
-		return html_entity_decode($string, $quote_style, 'UTF-8');
-	}
-}
-else
-{
-	/**
-	* @todo needs documenting
-	*/
-	function utf8_html_entity_decode($string, $quote_style = ENT_COMPAT)
-	{
-		static $static_table;
-
-		if ($static_table === null)
-		{
-			$static_table = array_map('utf8_encode', array_flip(get_html_translation_table(HTML_ENTITIES)));
-		}
-
-		$modified_table = $static_table;
-
-		if ($quote_style === ENT_QUOTES)
-		{
-			$modified_table['&#039;'] = "'";
-		}
-
-		if ($quote_style === ENT_NOQUOTES)
-		{
-			unset($modified_table['&quot;']);
-		}
-
-		return strtr($string, $modified_table);
-	}
-}
-
 ?>
\ No newline at end of file
-- 
cgit v1.2.1


From 8b0ec6e02d5a53ea3d1b87abd122d39cc3e8366f Mon Sep 17 00:00:00 2001
From: David M <davidmj@users.sourceforge.net>
Date: Fri, 3 Nov 2006 23:09:16 +0000
Subject: - compress is nicer (fixed a bug :P) - UTF-8 code is nicer (fixed a
 bug :P) - new CAPTCHA. Replaced the old one for size and usability issues.
 The old CAPTCHA will most likely be released as a separate package

git-svn-id: file:///svn/phpbb/trunk@6549 89ea8834-ac86-4346-8a33-228a782c2dd0
---
 phpBB/includes/utf/utf_normalizer.php | 2227 ++++++++++++++++-----------------
 phpBB/includes/utf/utf_tools.php      |    4 +-
 2 files changed, 1070 insertions(+), 1161 deletions(-)

(limited to 'phpBB/includes/utf')

diff --git a/phpBB/includes/utf/utf_normalizer.php b/phpBB/includes/utf/utf_normalizer.php
index d8e0ba0fa6..0d1d74539a 100644
--- a/phpBB/includes/utf/utf_normalizer.php
+++ b/phpBB/includes/utf/utf_normalizer.php
@@ -27,1181 +27,1055 @@ define('UTF8_CJK_LAST', "\xE9\xBE\xBB");
 define('UTF8_CJK_B_FIRST', "\xF0\xA0\x80\x80");
 define('UTF8_CJK_B_LAST', "\xF0\xAA\x9B\x96");
 
+// Unset global variables
+unset($GLOBALS['utf_jamo_index'], $GLOBALS['utf_jamo_type'], $GLOBALS['utf_nfc_qc'], $GLOBALS['utf_combining_class'], $GLOBALS['utf_canonical_comp'], $GLOBALS['utf_canonical_decomp'], $GLOBALS['utf_nfkc_qc'], $GLOBALS['utf_compatibility_decomp']);
+
+// NFC_QC and NFKC_QC values
+define('UNICODE_QC_MAYBE', 0);
+define('UNICODE_QC_NO', 1);
+
+// Contains all the ASCII characters appearing in UTF-8, sorted by frequency
+define('UTF8_ASCII_RANGE', "\x20\x65\x69\x61\x73\x6E\x74\x72\x6F\x6C\x75\x64\x5D\x5B\x63\x6D\x70\x27\x0A\x67\x7C\x68\x76\x2E\x66\x62\x2C\x3A\x3D\x2D\x71\x31\x30\x43\x32\x2A\x79\x78\x29\x28\x4C\x39\x41\x53\x2F\x50\x22\x45\x6A\x4D\x49\x6B\x33\x3E\x35\x54\x3C\x44\x34\x7D\x42\x7B\x38\x46\x77\x52\x36\x37\x55\x47\x4E\x3B\x4A\x7A\x56\x23\x48\x4F\x57\x5F\x26\x21\x4B\x3F\x58\x51\x25\x59\x5C\x09\x5A\x2B\x7E\x5E\x24\x40\x60\x7F\x00\x01\x02\x03\x04\x05\x06\x07\x08\x0B\x0C\x0D\x0E\x0F\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1A\x1B\x1C\x1D\x1E\x1F");
+
+// Contains all the tail bytes that can appear in the composition of a UTF-8 char
+define('UTF8_TRAILING_BYTES', "\xA9\xA0\xA8\x80\xAA\x99\xA7\xBB\xAB\x89\x94\x82\xB4\xA2\xAE\x83\xB0\xB9\xB8\x93\xAF\xBC\xB3\x81\xA4\xB2\x9C\xA1\xB5\xBE\xBD\xBA\x98\xAD\xB1\x84\x95\xA6\xB6\x88\x8D\x90\xB7\xBF\x92\x85\xA5\x97\x8C\x86\xA3\x8E\x9F\x8F\x87\x91\x9D\xAC\x9E\x8B\x96\x9B\x8A\x9A");
+
+// Constants used by the Hangul [de]composition algorithms
+define('UNICODE_HANGUL_SBASE', 0xAC00);
+define('UNICODE_HANGUL_LBASE', 0x1100);
+define('UNICODE_HANGUL_VBASE', 0x1161);
+define('UNICODE_HANGUL_TBASE', 0x11A7);
+define('UNICODE_HANGUL_SCOUNT', 11172);
+define('UNICODE_HANGUL_LCOUNT', 19);
+define('UNICODE_HANGUL_VCOUNT', 21);
+define('UNICODE_HANGUL_TCOUNT', 28);
+define('UNICODE_HANGUL_NCOUNT', 588);
+define('UNICODE_JAMO_L', 0);
+define('UNICODE_JAMO_V', 1);
+define('UNICODE_JAMO_T', 2);
 
-// Wrapper for the utfnormal extension, ICU wrapper
-if (function_exists('utf8_normalize'))
+/**
+* Unicode normalization routines
+*
+* @package phpBB3
+*/
+class utf_normalizer
 {
-	define('UNORM_NONE', 1);
-	define('UNORM_NFD',  2);
-	define('UNORM_NFKD', 3);
-	define('UNORM_NFC',  4);
-	define('UNORM_NFKC', 5);
-	define('UNORM_FCD',  6);
-	define('UNORM_DEFAULT', UNORM_NFC);
-
 	/**
-	* utf_normalizer class for the utfnormal extension
+	* Validate, cleanup and normalize a string
+	*
+	* The ultimate convenience function! Clean up invalid UTF-8 sequences,
+	* and convert to Normal Form C, canonical composition.
 	*
-	* @ignore
-	* @package phpBB3
+	* @param	string	$str	The dirty string
+	* @return	string			The same string, all shiny and cleaned-up
 	*/
-	class utf_normalizer
+	function cleanup($str)
 	{
-		function cleanup($str)
-		{
-			/**
-			* The string below is the list of all autorized characters, sorted by
-			* frequency in latin text
-			*/
-			$pos = strspn($str, "\x20\x65\x69\x61\x73\x6E\x74\x72\x6F\x6C\x75\x64\x5D\x5B\x63\x6D\x70\x27\x0A\x67\x7C\x68\x76\x2E\x66\x62\x2C\x3A\x3D\x2D\x71\x31\x30\x43\x32\x2A\x79\x78\x29\x28\x4C\x39\x41\x53\x2F\x50\x22\x45\x6A\x4D\x49\x6B\x33\x3E\x35\x54\x3C\x44\x34\x7D\x42\x7B\x38\x46\x77\x52\x36\x37\x55\x47\x4E\x3B\x4A\x7A\x56\x23\x48\x4F\x57\x5F\x26\x21\x4B\x3F\x58\x51\x25\x59\x5C\x09\x5A\x2B\x7E\x5E\x24\x40\x60\x7F\x0D");
+		// The string below is the list of all autorized characters, sorted by frequency in latin text
+		$pos = strspn($str, "\x20\x65\x69\x61\x73\x6E\x74\x72\x6F\x6C\x75\x64\x5D\x5B\x63\x6D\x70\x27\x0A\x67\x7C\x68\x76\x2E\x66\x62\x2C\x3A\x3D\x2D\x71\x31\x30\x43\x32\x2A\x79\x78\x29\x28\x4C\x39\x41\x53\x2F\x50\x22\x45\x6A\x4D\x49\x6B\x33\x3E\x35\x54\x3C\x44\x34\x7D\x42\x7B\x38\x46\x77\x52\x36\x37\x55\x47\x4E\x3B\x4A\x7A\x56\x23\x48\x4F\x57\x5F\x26\x21\x4B\x3F\x58\x51\x25\x59\x5C\x09\x5A\x2B\x7E\x5E\x24\x40\x60\x7F\x0D");
+		$len = strlen($str);
 
-			if (!isset($str[$pos]))
-			{
-				// ASCII strings with no special chars return immediately
-				return $str;
-			}
+		if ($pos == $len)
+		{
+			// ASCII strings with no special chars return immediately
+			return $str;
+		}
 
-			// Check if there is potentially a U+FFFE or U+FFFF char (UTF sequence 0xEFBFBE or 0xEFBFBF) and replace them
-			// Note: we start searching at position $pos
-			if (is_int(strpos($str, "\xEF\xBF", $pos)))
-			{
-				$str = str_replace(
-					array("\xEF\xBF\xBE", "\xEF\xBF\xBF"),
-					array(UTF8_REPLACEMENT, UTF8_REPLACEMENT),
-					$str
-				);
-			}
+		// Note: we do not check for $GLOBALS['utf_canonical_decomp']. It is assumed they are always loaded together
+		if (!isset($GLOBALS['utf_nfc_qc']))
+		{
+			global $phpbb_root_path, $phpEx;
+			include($phpbb_root_path . 'includes/utf/data/utf_nfc_qc.' . $phpEx);
+		}
 
-			// Replace any byte in the range 0x00..0x1F, except for \r, \n and \t
-			// We replace those characters with a 0xFF byte, which is illegal in
-			// UTF-8 and will in turn be replaced with a Unicode replacement char
-			$str = strtr(
+		// Replace any byte in the range 0x00..0x1F, except for \r, \n and \t
+		// We replace those characters with a 0xFF byte, which is illegal in UTF-8 and will in turn be replaced with a UTF replacement char
+		return utf_normalizer::recompose(
+			strtr(
 				$str,
 				"\x00\x01\x02\x03\x04\x05\x06\x07\x08\x0B\x0C\x0E\x0F\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1A\x1B\x1C\x1D\x1E\x1F",
 				"\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF"
-			);
+			),
+			$pos, $len, $GLOBALS['utf_nfc_qc'], $GLOBALS['utf_canonical_decomp']
+		);
+	}
 
-			// As per the original implementation, "the UnicodeString constructor fails if the string ends with a head byte".
-			// Therefore, if the string ends with a leading byte we replace it with 0xFF, which is illegal too and will be
-			// replaced with a Unicode replacement character
-			if (substr($str, -1) >= "\xC0")
-			{
-				$str[strlen($str) - 1] = "\xFF";
-			}
+	/**
+	* Validate and normalize a UTF string to NFC
+	*
+	* @param	string	$str	Unchecked UTF string
+	* @return	string			The string, validated and in normal form
+	*/
+	function nfc($str)
+	{
+		$pos = strspn($str, UTF8_ASCII_RANGE);
+		$len = strlen($str);
 
-			return utf8_normalize($str, UNORM_NFC);
+		if ($pos == $len)
+		{
+			// ASCII strings return immediately
+			return $str;
 		}
 
-		function nfc($str)
+		if (!isset($GLOBALS['utf_nfc_qc']))
 		{
-			return utf8_normalize($str, UNORM_NFC);
+			global $phpbb_root_path, $phpEx;
+			include($phpbb_root_path . 'includes/utf/data/utf_nfc_qc.' . $phpEx);
 		}
 
-		function nfkc($str)
+		return utf_normalizer::recompose($str, $pos, $len, $GLOBALS['utf_nfc_qc'], $GLOBALS['utf_canonical_decomp']);
+	}
+
+	/**
+	* Validate and normalize a UTF string to NFKC
+	*
+	* @param	string	$str	Unchecked UTF string
+	* @return	string			The string, validated and in normal form
+	*/
+	function nfkc($str)
+	{
+		$pos = strspn($str, UTF8_ASCII_RANGE);
+		$len = strlen($str);
+
+		if ($pos == $len)
 		{
-			return utf8_normalize($str, UNORM_NFKC);
+			// ASCII strings return immediately
+			return $str;
 		}
 
-		function nfd($str)
+		if (!isset($GLOBALS['utf_nfkc_qc']))
 		{
-			return utf8_normalize($str, UNORM_NFD);
+			global $phpbb_root_path, $phpEx;
+			include($phpbb_root_path . 'includes/utf/data/utf_nfkc_qc.' . $phpEx);
 		}
 
-		function nfkd($str)
+		if (!isset($GLOBALS['utf_canonical_comp']))
 		{
-			return utf8_normalize($str, UNORM_NFKD);
+			global $phpbb_root_path, $phpEx;
+			include($phpbb_root_path . 'includes/utf/data/utf_canonical_comp.' . $phpEx);
 		}
+
+		return utf_normalizer::recompose($str, $pos, $len, $GLOBALS['utf_nfkc_qc'], $GLOBALS['utf_compatibility_decomp']);
 	}
-}
-else
-{
-	// This block will NOT be loaded if the utfnormal extension is
-
-	// Unset global variables
-	unset($GLOBALS['utf_jamo_index'], $GLOBALS['utf_jamo_type'], $GLOBALS['utf_nfc_qc'], $GLOBALS['utf_combining_class'], $GLOBALS['utf_canonical_comp'], $GLOBALS['utf_canonical_decomp'], $GLOBALS['utf_nfkc_qc'], $GLOBALS['utf_compatibility_decomp']);
-
-	// NFC_QC and NFKC_QC values
-	define('UNICODE_QC_MAYBE', 0);
-	define('UNICODE_QC_NO', 1);
-
-	// Contains all the ASCII characters appearing in UTF-8, sorted by frequency
-	define('UTF8_ASCII_RANGE', "\x20\x65\x69\x61\x73\x6E\x74\x72\x6F\x6C\x75\x64\x5D\x5B\x63\x6D\x70\x27\x0A\x67\x7C\x68\x76\x2E\x66\x62\x2C\x3A\x3D\x2D\x71\x31\x30\x43\x32\x2A\x79\x78\x29\x28\x4C\x39\x41\x53\x2F\x50\x22\x45\x6A\x4D\x49\x6B\x33\x3E\x35\x54\x3C\x44\x34\x7D\x42\x7B\x38\x46\x77\x52\x36\x37\x55\x47\x4E\x3B\x4A\x7A\x56\x23\x48\x4F\x57\x5F\x26\x21\x4B\x3F\x58\x51\x25\x59\x5C\x09\x5A\x2B\x7E\x5E\x24\x40\x60\x7F\x00\x01\x02\x03\x04\x05\x06\x07\x08\x0B\x0C\x0D\x0E\x0F\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1A\x1B\x1C\x1D\x1E\x1F");
-
-	// Contains all the tail bytes that can appear in the composition of a UTF-8 char
-	define('UTF8_TRAILING_BYTES', "\xA9\xA0\xA8\x80\xAA\x99\xA7\xBB\xAB\x89\x94\x82\xB4\xA2\xAE\x83\xB0\xB9\xB8\x93\xAF\xBC\xB3\x81\xA4\xB2\x9C\xA1\xB5\xBE\xBD\xBA\x98\xAD\xB1\x84\x95\xA6\xB6\x88\x8D\x90\xB7\xBF\x92\x85\xA5\x97\x8C\x86\xA3\x8E\x9F\x8F\x87\x91\x9D\xAC\x9E\x8B\x96\x9B\x8A\x9A");
-
-	// Constants used by the Hangul [de]composition algorithms
-	define('UNICODE_HANGUL_SBASE', 0xAC00);
-	define('UNICODE_HANGUL_LBASE', 0x1100);
-	define('UNICODE_HANGUL_VBASE', 0x1161);
-	define('UNICODE_HANGUL_TBASE', 0x11A7);
-	define('UNICODE_HANGUL_SCOUNT', 11172);
-	define('UNICODE_HANGUL_LCOUNT', 19);
-	define('UNICODE_HANGUL_VCOUNT', 21);
-	define('UNICODE_HANGUL_TCOUNT', 28);
-	define('UNICODE_HANGUL_NCOUNT', 588);
-	define('UNICODE_JAMO_L', 0);
-	define('UNICODE_JAMO_V', 1);
-	define('UNICODE_JAMO_T', 2);
 
 	/**
-	* Unicode normalization routines
+	* Validate and normalize a UTF string to NFD
 	*
-	* @package phpBB3
+	* @param	string	$str	Unchecked UTF string
+	* @return	string			The string, validated and in normal form
 	*/
-	class utf_normalizer
+	function nfd($str)
 	{
-		/**
-		* Validate, cleanup and normalize a string
-		*
-		* The ultimate convenience function! Clean up invalid UTF-8 sequences,
-		* and convert to Normal Form C, canonical composition.
-		*
-		* @param	string	$str	The dirty string
-		* @return	string			The same string, all shiny and cleaned-up
-		*/
-		function cleanup($str)
-		{
-			// The string below is the list of all autorized characters, sorted by frequency in latin text
-			$pos = strspn($str, "\x20\x65\x69\x61\x73\x6E\x74\x72\x6F\x6C\x75\x64\x5D\x5B\x63\x6D\x70\x27\x0A\x67\x7C\x68\x76\x2E\x66\x62\x2C\x3A\x3D\x2D\x71\x31\x30\x43\x32\x2A\x79\x78\x29\x28\x4C\x39\x41\x53\x2F\x50\x22\x45\x6A\x4D\x49\x6B\x33\x3E\x35\x54\x3C\x44\x34\x7D\x42\x7B\x38\x46\x77\x52\x36\x37\x55\x47\x4E\x3B\x4A\x7A\x56\x23\x48\x4F\x57\x5F\x26\x21\x4B\x3F\x58\x51\x25\x59\x5C\x09\x5A\x2B\x7E\x5E\x24\x40\x60\x7F\x0D");
-			$len = strlen($str);
+		$pos = strspn($str, UTF8_ASCII_RANGE);
+		$len = strlen($str);
 
-			if ($pos == $len)
-			{
-				// ASCII strings with no special chars return immediately
-				return $str;
-			}
-
-			// Note: we do not check for $GLOBALS['utf_canonical_decomp']. It is assumed they are always loaded together
-			if (!isset($GLOBALS['utf_nfc_qc']))
-			{
-				global $phpbb_root_path, $phpEx;
-				include($phpbb_root_path . 'includes/utf/data/utf_nfc_qc.' . $phpEx);
-			}
-
-			// Replace any byte in the range 0x00..0x1F, except for \r, \n and \t
-			// We replace those characters with a 0xFF byte, which is illegal in UTF-8 and will in turn be replaced with a UTF replacement char
-			return utf_normalizer::recompose(
-				strtr(
-					$str,
-					"\x00\x01\x02\x03\x04\x05\x06\x07\x08\x0B\x0C\x0E\x0F\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1A\x1B\x1C\x1D\x1E\x1F",
-					"\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF"
-				),
-				$pos, $len, $GLOBALS['utf_nfc_qc'], $GLOBALS['utf_canonical_decomp']
-			);
+		if ($pos == $len)
+		{
+			// ASCII strings return immediately
+			return $str;
 		}
 
-		/**
-		* Validate and normalize a UTF string to NFC
-		*
-		* @param	string	$str	Unchecked UTF string
-		* @return	string			The string, validated and in normal form
-		*/
-		function nfc($str)
+		if (!isset($GLOBALS['utf_canonical_decomp']))
 		{
-			$pos = strspn($str, UTF8_ASCII_RANGE);
-			$len = strlen($str);
+			global $phpbb_root_path, $phpEx;
+			include($phpbb_root_path . 'includes/utf/data/utf_canonical_decomp.' . $phpEx);
+		}
 
-			if ($pos == $len)
-			{
-				// ASCII strings return immediately
-				return $str;
-			}
+		return utf_normalizer::decompose($str, $pos, $len, $GLOBALS['utf_canonical_decomp']);
+	}
 
-			if (!isset($GLOBALS['utf_nfc_qc']))
-			{
-				global $phpbb_root_path, $phpEx;
-				include($phpbb_root_path . 'includes/utf/data/utf_nfc_qc.' . $phpEx);
-			}
+	/**
+	* Validate and normalize a UTF string to NFKD
+	*
+	* @param	string	$str	Unchecked UTF string
+	* @return	string			The string, validated and in normal form
+	*/
+	function nfkd($str)
+	{
+		$pos = strspn($str, UTF8_ASCII_RANGE);
+		$len = strlen($str);
 
-			return utf_normalizer::recompose($str, $pos, $len, $GLOBALS['utf_nfc_qc'], $GLOBALS['utf_canonical_decomp']);
+		if ($pos == $len)
+		{
+			// ASCII strings return immediately
+			return $str;
 		}
 
-		/**
-		* Validate and normalize a UTF string to NFKC
-		*
-		* @param	string	$str	Unchecked UTF string
-		* @return	string			The string, validated and in normal form
-		*/
-		function nfkc($str)
+		if (!isset($GLOBALS['utf_compatibility_decomp']))
 		{
-			$pos = strspn($str, UTF8_ASCII_RANGE);
-			$len = strlen($str);
-
-			if ($pos == $len)
-			{
-				// ASCII strings return immediately
-				return $str;
-			}
+			global $phpbb_root_path, $phpEx;
+			include($phpbb_root_path . 'includes/utf/data/utf_compatibility_decomp.' . $phpEx);
+		}
 
-			if (!isset($GLOBALS['utf_nfkc_qc']))
-			{
-				global $phpbb_root_path, $phpEx;
-				include($phpbb_root_path . 'includes/utf/data/utf_nfkc_qc.' . $phpEx);
-			}
+		return utf_normalizer::decompose($str, $pos, $len, $GLOBALS['utf_compatibility_decomp']);
+	}
 
-			if (!isset($GLOBALS['utf_canonical_comp']))
-			{
-				global $phpbb_root_path, $phpEx;
-				include($phpbb_root_path . 'includes/utf/data/utf_canonical_comp.' . $phpEx);
-			}
 
-			return utf_normalizer::recompose($str, $pos, $len, $GLOBALS['utf_nfkc_qc'], $GLOBALS['utf_compatibility_decomp']);
-		}
+	/**
+	* Recompose a UTF string
+	*
+	* @param	string	$str		Unchecked UTF string
+	* @param	integer	$pos		Position of the first UTF char (in bytes)
+	* @param	integer	$len		Length of the string (in bytes)
+	* @param	array	$qc			Quick-check array, passed by reference but never modified
+	* @param	array	$decomp_map	Decomposition mapping, passed by reference but never modified
+	* @return	string				The string, validated and recomposed
+	*
+	* @access	private
+	*/
+	function recompose($str, $pos, $len, &$qc, &$decomp_map)
+	{
+		global $utf_combining_class, $utf_canonical_comp, $utf_jamo_type, $utf_jamo_index;
 
-		/**
-		* Validate and normalize a UTF string to NFD
-		*
-		* @param	string	$str	Unchecked UTF string
-		* @return	string			The string, validated and in normal form
-		*/
-		function nfd($str)
+		// Load some commonly-used tables
+		if (!isset($utf_jamo_index, $utf_jamo_type, $utf_combining_class))
 		{
-			$pos = strspn($str, UTF8_ASCII_RANGE);
-			$len = strlen($str);
-
-			if ($pos == $len)
-			{
-				// ASCII strings return immediately
-				return $str;
-			}
+			global $phpbb_root_path;
+			include($phpbb_root_path . 'includes/utf/data/utf_normalizer_common.php');
+		}
 
-			if (!isset($GLOBALS['utf_canonical_decomp']))
-			{
-				global $phpbb_root_path, $phpEx;
-				include($phpbb_root_path . 'includes/utf/data/utf_canonical_decomp.' . $phpEx);
-			}
+		// Buffer the last ASCII char before the UTF-8 stuff if applicable
+		$tmp = '';
+		$i = $tmp_pos = $last_cc = 0;
 
-			return utf_normalizer::decompose($str, $pos, $len, $GLOBALS['utf_canonical_decomp']);
+		if ($pos)
+		{
+			$buffer = array(++$i => $str[$pos - 1]);
 		}
-
-		/**
-		* Validate and normalize a UTF string to NFKD
-		*
-		* @param	string	$str	Unchecked UTF string
-		* @return	string			The string, validated and in normal form
-		*/
-		function nfkd($str)
+		else
 		{
-			$pos = strspn($str, UTF8_ASCII_RANGE);
-			$len = strlen($str);
-
-			if ($pos == $len)
-			{
-				// ASCII strings return immediately
-				return $str;
-			}
-
-			if (!isset($GLOBALS['utf_compatibility_decomp']))
-			{
-				global $phpbb_root_path, $phpEx;
-				include($phpbb_root_path . 'includes/utf/data/utf_compatibility_decomp.' . $phpEx);
-			}
-
-			return utf_normalizer::decompose($str, $pos, $len, $GLOBALS['utf_compatibility_decomp']);
+			$buffer = array();
 		}
 
-
-		/**
-		* Recompose a UTF string
-		*
-		* @param	string	$str		Unchecked UTF string
-		* @param	integer	$pos		Position of the first UTF char (in bytes)
-		* @param	integer	$len		Length of the string (in bytes)
-		* @param	array	$qc			Quick-check array, passed by reference but never modified
-		* @param	array	$decomp_map	Decomposition mapping, passed by reference but never modified
-		* @return	string				The string, validated and recomposed
-		*
-		* @access	private
-		*/
-		function recompose($str, $pos, $len, &$qc, &$decomp_map)
+		// UTF char length array
+		// This array is used to determine the length of a UTF character.
+		// Be $c the result of ($str[$pos] & "\xF0") --where $str is the string we're operating on and $pos
+		// the position of the cursor--, if $utf_len_mask[$c] does not exist, the byte is an ASCII char.
+		// Otherwise, if $utf_len_mask[$c] is greater than 0, we have a the leading byte of a multibyte character
+		// whose length is $utf_len_mask[$c] and if it is equal to 0, the byte is a trailing byte.
+		$utf_len_mask = array(
+			// Leading bytes masks
+			"\xC0" => 2, "\xD0" => 2, "\xE0" => 3, "\xF0" => 4,
+			// Trailing bytes masks
+			"\x80" => 0, "\x90" => 0, "\xA0" => 0, "\xB0" => 0
+		);
+
+		$extra_check = array(
+			"\xED" => 1, "\xEF" => 1, "\xC0" => 1, "\xC1" => 1, "\xE0" => 1, "\xF0" => 1,
+			"\xF4" => 1, "\xF5" => 1, "\xF6" => 1, "\xF7" => 1, "\xF8" => 1, "\xF9" => 1,
+			"\xFA" => 1, "\xFB" => 1, "\xFC" => 1, "\xFD" => 1, "\xFE" => 1, "\xFF" => 1
+		);
+
+		$utf_validation_mask = array(
+			2	=> "\xE0\xC0",
+			3	=> "\xF0\xC0\xC0",
+			4	=> "\xF8\xC0\xC0\xC0"
+		);
+
+		$utf_validation_check = array(
+			2	=> "\xC0\x80",
+			3	=> "\xE0\x80\x80",
+			4	=> "\xF0\x80\x80\x80"
+		);
+
+		// Main loop
+		do
 		{
-			global $utf_combining_class, $utf_canonical_comp, $utf_jamo_type, $utf_jamo_index;
+			// STEP 0: Capture the current char and buffer it
+			$c = $str[$pos];
+			$c_mask = $c & "\xF0";
 
-			// Load some commonly-used tables
-			if (!isset($utf_jamo_index, $utf_jamo_type, $utf_combining_class))
+			if (isset($utf_len_mask[$c_mask]))
 			{
-				global $phpbb_root_path;
-				include($phpbb_root_path . 'includes/utf/data/utf_normalizer_common.php');
-			}
-
-			// Buffer the last ASCII char before the UTF-8 stuff if applicable
-			$tmp = '';
-			$i = $tmp_pos = $last_cc = 0;
-
-			if ($pos)
-			{
-				$buffer = array(++$i => $str[$pos - 1]);
-			}
-			else
-			{
-				$buffer = array();
-			}
-
-			// UTF char length array
-			// This array is used to determine the length of a UTF character.
-			// Be $c the result of ($str[$pos] & "\xF0") --where $str is the string we're operating on and $pos
-			// the position of the cursor--, if $utf_len_mask[$c] does not exist, the byte is an ASCII char.
-			// Otherwise, if $utf_len_mask[$c] is greater than 0, we have a the leading byte of a multibyte character
-			// whose length is $utf_len_mask[$c] and if it is equal to 0, the byte is a trailing byte.
-			$utf_len_mask = array(
-				// Leading bytes masks
-				"\xC0" => 2, "\xD0" => 2, "\xE0" => 3, "\xF0" => 4,
-				// Trailing bytes masks
-				"\x80" => 0, "\x90" => 0, "\xA0" => 0, "\xB0" => 0
-			);
-
-			$extra_check = array(
-				"\xED" => 1, "\xEF" => 1, "\xC0" => 1, "\xC1" => 1, "\xE0" => 1, "\xF0" => 1,
-				"\xF4" => 1, "\xF5" => 1, "\xF6" => 1, "\xF7" => 1, "\xF8" => 1, "\xF9" => 1,
-				"\xFA" => 1, "\xFB" => 1, "\xFC" => 1, "\xFD" => 1, "\xFE" => 1, "\xFF" => 1
-			);
-
-			$utf_validation_mask = array(
-				2	=> "\xE0\xC0",
-				3	=> "\xF0\xC0\xC0",
-				4	=> "\xF8\xC0\xC0\xC0"
-			);
-
-			$utf_validation_check = array(
-				2	=> "\xC0\x80",
-				3	=> "\xE0\x80\x80",
-				4	=> "\xF0\x80\x80\x80"
-			);
-
-			// Main loop
-			do
-			{
-				// STEP 0: Capture the current char and buffer it
-				$c = $str[$pos];
-				$c_mask = $c & "\xF0";
-
-				if (isset($utf_len_mask[$c_mask]))
+				// Byte at $pos is either a leading byte or a missplaced trailing byte
+				if ($utf_len = $utf_len_mask[$c_mask])
 				{
-					// Byte at $pos is either a leading byte or a missplaced trailing byte
-					if ($utf_len = $utf_len_mask[$c_mask])
-					{
-						// Capture the char
-						$buffer[++$i & 7] = $utf_char = substr($str, $pos, $utf_len);
+					// Capture the char
+					$buffer[++$i & 7] = $utf_char = substr($str, $pos, $utf_len);
 
-						// Let's find out if a thorough check is needed
-						if (isset($qc[$utf_char]))
+					// Let's find out if a thorough check is needed
+					if (isset($qc[$utf_char]))
+					{
+						// If the UTF char is in the qc array then it may not be in normal form. We do nothing here, the actual processing is below this "if" block
+					}
+					else if (isset($utf_combining_class[$utf_char]))
+					{
+						if ($utf_combining_class[$utf_char] < $last_cc)
+						{
+							// A combining character that is NOT canonically ordered
+						}
+						else
 						{
-							// If the UTF char is in the qc array then it may not be in normal form. We do nothing here, the actual processing is below this "if" block
+							// A combining character that IS canonically ordered, skip to the next char
+							$last_cc = $utf_combining_class[$utf_char];
+
+							$pos += $utf_len;
+							continue;
 						}
-						else if (isset($utf_combining_class[$utf_char]))
+					}
+					else
+					{
+						// At this point, $utf_char holds a UTF char that we know is not a NF[K]C_QC and is not a combining character.
+						// It can be a singleton, a canonical composite, a replacement char or an even an ill-formed bunch of bytes. Let's find out
+						$last_cc = 0;
+
+						// Check that we have the correct number of trailing bytes
+						if (($utf_char & $utf_validation_mask[$utf_len]) != $utf_validation_check[$utf_len])
 						{
-							if ($utf_combining_class[$utf_char] < $last_cc)
+							// Current char isn't well-formed or legal: either one or several trailing bytes are missing, or the Unicode char
+							// has been encoded in a five- or six- byte sequence
+							if ($utf_char[0] >= "\xF8")
 							{
-								// A combining character that is NOT canonically ordered
+								if ($utf_char[0] < "\xF8")
+								{
+									$trailing_bytes = 3;
+								}
+								else if ($utf_char[0] < "\xFC")
+								{
+									$trailing_bytes = 4;
+								}
+
+								if ($utf_char[0] > "\xFD")
+								{
+									$trailing_bytes = 0;
+								}
+								else
+								{
+									$trailing_bytes = 5;
+								}
 							}
 							else
 							{
-								// A combining character that IS canonically ordered, skip to the next char
-								$last_cc = $utf_combining_class[$utf_char];
-
-								$pos += $utf_len;
-								continue;
+								$trailing_bytes = $utf_len - 1;
 							}
+
+							$tmp .= substr($str, $tmp_pos, $pos - $tmp_pos) . UTF8_REPLACEMENT;
+							$pos += strspn($str, UTF8_TRAILING_BYTES, ++$pos, $trailing_bytes);
+							$tmp_pos = $pos;
+
+							continue;
 						}
-						else
-						{
-							// At this point, $utf_char holds a UTF char that we know is not a NF[K]C_QC and is not a combining character.
-							// It can be a singleton, a canonical composite, a replacement char or an even an ill-formed bunch of bytes. Let's find out
-							$last_cc = 0;
 
-							// Check that we have the correct number of trailing bytes
-							if (($utf_char & $utf_validation_mask[$utf_len]) != $utf_validation_check[$utf_len])
+						if (isset($extra_check[$c]))
+						{
+							switch ($c)
 							{
-								// Current char isn't well-formed or legal: either one or several trailing bytes are missing, or the Unicode char
-								// has been encoded in a five- or six- byte sequence
-								if ($utf_char[0] >= "\xF8")
-								{
-									if ($utf_char[0] < "\xF8")
+								// Note: 0xED is quite common in Korean
+								case "\xED":
+									if ($utf_char >= "\xED\xA0\x80")
 									{
-										$trailing_bytes = 3;
+										// Surrogates (U+D800..U+DFFF) are not allowed in UTF-8 (UTF sequence 0xEDA080..0xEDBFBF)
+										$tmp .= substr($str, $tmp_pos, $pos - $tmp_pos) . UTF8_REPLACEMENT;
+										$pos += $utf_len;
+										$tmp_pos = $pos;
+										continue 2;
 									}
-									else if ($utf_char[0] < "\xFC")
+								break;
+
+								// Note: 0xEF is quite common in Japanese
+								case "\xEF":
+									if ($utf_char == "\xEF\xBF\xBE" || $utf_char == "\xEF\xBF\xBF")
 									{
-										$trailing_bytes = 4;
+										// U+FFFE and U+FFFF are explicitly disallowed (UTF sequence 0xEFBFBE..0xEFBFBF)
+										$tmp .= substr($str, $tmp_pos, $pos - $tmp_pos) . UTF8_REPLACEMENT;
+										$pos += $utf_len;
+										$tmp_pos = $pos;
+										continue 2;
 									}
+								break;
 
-									if ($utf_char[0] > "\xFD")
+								case "\xC0":
+								case "\xC1":
+									if ($utf_char <= "\xC1\xBF")
 									{
-										$trailing_bytes = 0;
+										// Overlong sequence: Unicode char U+0000..U+007F encoded as a double-byte UTF char
+										$tmp .= substr($str, $tmp_pos, $pos - $tmp_pos) . UTF8_REPLACEMENT;
+										$pos += $utf_len;
+										$tmp_pos = $pos;
+										continue 2;
 									}
-									else
+								break;
+
+								case "\xE0":
+									if ($utf_char <= "\xE0\x9F\xBF")
 									{
-										$trailing_bytes = 5;
+										// Unicode char U+0000..U+07FF encoded in 3 bytes
+										$tmp .= substr($str, $tmp_pos, $pos - $tmp_pos) . UTF8_REPLACEMENT;
+										$pos += $utf_len;
+										$tmp_pos = $pos;
+										continue 2;
 									}
-								}
-								else
-								{
-									$trailing_bytes = $utf_len - 1;
-								}
-
-								$tmp .= substr($str, $tmp_pos, $pos - $tmp_pos) . UTF8_REPLACEMENT;
-								$pos += strspn($str, UTF8_TRAILING_BYTES, ++$pos, $trailing_bytes);
-								$tmp_pos = $pos;
-
-								continue;
-							}
+								break;
 
-							if (isset($extra_check[$c]))
-							{
-								switch ($c)
-								{
-									// Note: 0xED is quite common in Korean
-									case "\xED":
-										if ($utf_char >= "\xED\xA0\x80")
-										{
-											// Surrogates (U+D800..U+DFFF) are not allowed in UTF-8 (UTF sequence 0xEDA080..0xEDBFBF)
-											$tmp .= substr($str, $tmp_pos, $pos - $tmp_pos) . UTF8_REPLACEMENT;
-											$pos += $utf_len;
-											$tmp_pos = $pos;
-											continue 2;
-										}
-									break;
+								case "\xF0":
+									if ($utf_char <= "\xF0\x8F\xBF\xBF")
+									{
+										// Unicode char U+0000..U+FFFF encoded in 4 bytes
+										$tmp .= substr($str, $tmp_pos, $pos - $tmp_pos) . UTF8_REPLACEMENT;
+										$pos += $utf_len;
+										$tmp_pos = $pos;
+										continue 2;
+									}
+								break;
 
-									// Note: 0xEF is quite common in Japanese
-									case "\xEF":
-										if ($utf_char == "\xEF\xBF\xBE" || $utf_char == "\xEF\xBF\xBF")
+								default:
+									// Five- and six- byte sequences do not need being checked for here anymore
+									if ($utf_char > UTF8_MAX)
+									{
+										// Out of the Unicode range
+										if ($utf_char[0] < "\xF8")
 										{
-											// U+FFFE and U+FFFF are explicitly disallowed (UTF sequence 0xEFBFBE..0xEFBFBF)
-											$tmp .= substr($str, $tmp_pos, $pos - $tmp_pos) . UTF8_REPLACEMENT;
-											$pos += $utf_len;
-											$tmp_pos = $pos;
-											continue 2;
+											$trailing_bytes = 3;
 										}
-									break;
-
-									case "\xC0":
-									case "\xC1":
-										if ($utf_char <= "\xC1\xBF")
+										else if ($utf_char[0] < "\xFC")
 										{
-											// Overlong sequence: Unicode char U+0000..U+007F encoded as a double-byte UTF char
-											$tmp .= substr($str, $tmp_pos, $pos - $tmp_pos) . UTF8_REPLACEMENT;
-											$pos += $utf_len;
-											$tmp_pos = $pos;
-											continue 2;
+											$trailing_bytes = 4;
 										}
-									break;
-
-									case "\xE0":
-										if ($utf_char <= "\xE0\x9F\xBF")
+										else if ($utf_char[0] > "\xFD")
 										{
-											// Unicode char U+0000..U+07FF encoded in 3 bytes
-											$tmp .= substr($str, $tmp_pos, $pos - $tmp_pos) . UTF8_REPLACEMENT;
-											$pos += $utf_len;
-											$tmp_pos = $pos;
-											continue 2;
+											$trailing_bytes = 0;
 										}
-									break;
-
-									case "\xF0":
-										if ($utf_char <= "\xF0\x8F\xBF\xBF")
+										else
 										{
-											// Unicode char U+0000..U+FFFF encoded in 4 bytes
-											$tmp .= substr($str, $tmp_pos, $pos - $tmp_pos) . UTF8_REPLACEMENT;
-											$pos += $utf_len;
-											$tmp_pos = $pos;
-											continue 2;
+											$trailing_bytes = 5;
 										}
-									break;
 
-									default:
-										// Five- and six- byte sequences do not need being checked for here anymore
-										if ($utf_char > UTF8_MAX)
-										{
-											// Out of the Unicode range
-											if ($utf_char[0] < "\xF8")
-											{
-												$trailing_bytes = 3;
-											}
-											else if ($utf_char[0] < "\xFC")
-											{
-												$trailing_bytes = 4;
-											}
-											else if ($utf_char[0] > "\xFD")
-											{
-												$trailing_bytes = 0;
-											}
-											else
-											{
-												$trailing_bytes = 5;
-											}
-
-											$tmp .= substr($str, $tmp_pos, $pos - $tmp_pos) . UTF8_REPLACEMENT;
-											$pos += strspn($str, UTF8_TRAILING_BYTES, ++$pos, $trailing_bytes);
-											$tmp_pos = $pos;
-											continue 2;
-										}
-									break;
-								}
+										$tmp .= substr($str, $tmp_pos, $pos - $tmp_pos) . UTF8_REPLACEMENT;
+										$pos += strspn($str, UTF8_TRAILING_BYTES, ++$pos, $trailing_bytes);
+										$tmp_pos = $pos;
+										continue 2;
+									}
+								break;
 							}
-
-							// The char is a valid starter, move the cursor and go on
-							$pos += $utf_len;
-							continue;
 						}
-					}
-					else
-					{
-						// A trailing byte came out of nowhere, we will advance the cursor and treat the this byte and all following trailing bytes as if
-						// each of them was a Unicode replacement char
-						$spn = strspn($str, UTF8_TRAILING_BYTES, $pos);
-						$tmp .= substr($str, $tmp_pos, $pos - $tmp_pos) . str_repeat(UTF8_REPLACEMENT, $spn);
 
-						$pos += $spn;
-						$tmp_pos = $pos;
+						// The char is a valid starter, move the cursor and go on
+						$pos += $utf_len;
 						continue;
 					}
+				}
+				else
+				{
+					// A trailing byte came out of nowhere, we will advance the cursor and treat the this byte and all following trailing bytes as if
+					// each of them was a Unicode replacement char
+					$spn = strspn($str, UTF8_TRAILING_BYTES, $pos);
+					$tmp .= substr($str, $tmp_pos, $pos - $tmp_pos) . str_repeat(UTF8_REPLACEMENT, $spn);
 
+					$pos += $spn;
+					$tmp_pos = $pos;
+					continue;
+				}
 
-					// STEP 1: Decompose current char
 
-					// We have found a character that is either:
-					//  - in the NFC_QC/NFKC_QC list
-					//  - a non-starter char that is not canonically ordered
-					//
-					// We are going to capture the shortest UTF sequence that satisfies these two conditions:
-					//
-					//  1 - If the sequence does not start at the begginning of the string, it must begin with a starter,
-					// and that starter must not have the NF[K]C_QC property equal to "MAYBE"
-					//
-					//  2 - If the sequence does not end at the end of the string, it must end with a non-starter and be
-					// immediately followed by a starter that is not on the QC list
-					//
-					$utf_seq = array();
-					$last_cc = 0;
-					$lpos = $pos;
-					$pos += $utf_len;
+				// STEP 1: Decompose current char
+
+				// We have found a character that is either:
+				//  - in the NFC_QC/NFKC_QC list
+				//  - a non-starter char that is not canonically ordered
+				//
+				// We are going to capture the shortest UTF sequence that satisfies these two conditions:
+				//
+				//  1 - If the sequence does not start at the begginning of the string, it must begin with a starter,
+				// and that starter must not have the NF[K]C_QC property equal to "MAYBE"
+				//
+				//  2 - If the sequence does not end at the end of the string, it must end with a non-starter and be
+				// immediately followed by a starter that is not on the QC list
+				//
+				$utf_seq = array();
+				$last_cc = 0;
+				$lpos = $pos;
+				$pos += $utf_len;
+
+				if (isset($decomp_map[$utf_char]))
+				{
+					$_pos = 0;
+					$_len = strlen($decomp_map[$utf_char]);
 
-					if (isset($decomp_map[$utf_char]))
+					do
 					{
-						$_pos = 0;
-						$_len = strlen($decomp_map[$utf_char]);
+						$_utf_len =& $utf_len_mask[$decomp_map[$utf_char][$_pos] & "\xF0"];
 
-						do
+						if (isset($_utf_len))
 						{
-							$_utf_len =& $utf_len_mask[$decomp_map[$utf_char][$_pos] & "\xF0"];
-
-							if (isset($_utf_len))
-							{
-								$utf_seq[] = substr($decomp_map[$utf_char], $_pos, $_utf_len);
-								$_pos += $_utf_len;
-							}
-							else
-							{
-								$utf_seq[] = $decomp_map[$utf_char][$_pos];
-								++$_pos;
-							}
+							$utf_seq[] = substr($decomp_map[$utf_char], $_pos, $_utf_len);
+							$_pos += $_utf_len;
+						}
+						else
+						{
+							$utf_seq[] = $decomp_map[$utf_char][$_pos];
+							++$_pos;
 						}
-						while ($_pos < $_len);
-					}
-					else
-					{
-						// The char is not decomposable
-						$utf_seq = array($utf_char);
 					}
+					while ($_pos < $_len);
+				}
+				else
+				{
+					// The char is not decomposable
+					$utf_seq = array($utf_char);
+				}
 
 
-					// STEP 2: Capture the starter
+				// STEP 2: Capture the starter
 
-					// Check out the combining class of the first character of the UTF sequence
-					$k = 0;
-					if (isset($utf_combining_class[$utf_seq[0]]) || $qc[$utf_char] == UNICODE_QC_MAYBE)
+				// Check out the combining class of the first character of the UTF sequence
+				$k = 0;
+				if (isset($utf_combining_class[$utf_seq[0]]) || $qc[$utf_char] == UNICODE_QC_MAYBE)
+				{
+					// Not a starter, inspect previous characters
+					// The last 8 characters are kept in a buffer so that we don't have to capture them everytime.
+					// This is enough for all real-life strings but even if it wasn't, we can capture characters in backward mode,
+					// although it is slower than this method.
+					//
+					// In the following loop, $j starts at the previous buffered character ($i - 1, because current character is
+					// at offset $i) and process them in backward mode until we find a starter.
+					//
+					// $k is the index on each UTF character inside of our UTF sequence. At this time, $utf_seq contains one or more
+					// characters numbered 0 to n. $k starts at 0 and for each char we prepend we pre-decrement it and for numbering
+					$starter_found = 0;
+					$j_min = max(1, $i - 7);
+
+					for ($j = $i - 1; $j >= $j_min && $lpos > $tmp_pos; --$j)
 					{
-						// Not a starter, inspect previous characters
-						// The last 8 characters are kept in a buffer so that we don't have to capture them everytime.
-						// This is enough for all real-life strings but even if it wasn't, we can capture characters in backward mode,
-						// although it is slower than this method.
-						//
-						// In the following loop, $j starts at the previous buffered character ($i - 1, because current character is
-						// at offset $i) and process them in backward mode until we find a starter.
-						//
-						// $k is the index on each UTF character inside of our UTF sequence. At this time, $utf_seq contains one or more
-						// characters numbered 0 to n. $k starts at 0 and for each char we prepend we pre-decrement it and for numbering
-						$starter_found = 0;
-						$j_min = max(1, $i - 7);
+						$utf_char = $buffer[$j & 7];
+						$lpos -= strlen($utf_char);
 
-						for ($j = $i - 1; $j >= $j_min && $lpos > $tmp_pos; --$j)
+						if (isset($decomp_map[$utf_char]))
 						{
-							$utf_char = $buffer[$j & 7];
-							$lpos -= strlen($utf_char);
+							// The char is a composite, decompose for storage
+							$decomp_seq = array();
+							$_pos = 0;
+							$_len = strlen($decomp_map[$utf_char]);
 
-							if (isset($decomp_map[$utf_char]))
+							do
 							{
-								// The char is a composite, decompose for storage
-								$decomp_seq = array();
-								$_pos = 0;
-								$_len = strlen($decomp_map[$utf_char]);
+								$c = $decomp_map[$utf_char][$_pos];
+								$_utf_len =& $utf_len_mask[$c & "\xF0"];
 
-								do
+								if (isset($_utf_len))
 								{
-									$c = $decomp_map[$utf_char][$_pos];
-									$_utf_len =& $utf_len_mask[$c & "\xF0"];
-
-									if (isset($_utf_len))
-									{
-										$decomp_seq[] = substr($decomp_map[$utf_char], $_pos, $_utf_len);
-										$_pos += $_utf_len;
-									}
-									else
-									{
-										$decomp_seq[] = $c;
-										++$_pos;
-									}
+									$decomp_seq[] = substr($decomp_map[$utf_char], $_pos, $_utf_len);
+									$_pos += $_utf_len;
 								}
-								while ($_pos < $_len);
-
-								// Prepend the UTF sequence with our decomposed sequence
-								if (isset($decomp_seq[1]))
+								else
 								{
-									// The char expanded into several chars
-									$decomp_cnt = sizeof($decomp_seq);
-
-									foreach ($decomp_seq as $decomp_i => $decomp_char)
-									{
-										$utf_seq[$k + $decomp_i - $decomp_cnt] = $decomp_char;
-									}
-									$k -= $decomp_cnt;
+									$decomp_seq[] = $c;
+									++$_pos;
 								}
-								else
+							}
+							while ($_pos < $_len);
+
+							// Prepend the UTF sequence with our decomposed sequence
+							if (isset($decomp_seq[1]))
+							{
+								// The char expanded into several chars
+								$decomp_cnt = sizeof($decomp_seq);
+
+								foreach ($decomp_seq as $decomp_i => $decomp_char)
 								{
-									// Decomposed to a single char, easier to prepend
-									$utf_seq[--$k] = $decomp_seq[0];
+									$utf_seq[$k + $decomp_i - $decomp_cnt] = $decomp_char;
 								}
+								$k -= $decomp_cnt;
 							}
 							else
 							{
-								$utf_seq[--$k] = $utf_char;
+								// Decomposed to a single char, easier to prepend
+								$utf_seq[--$k] = $decomp_seq[0];
 							}
+						}
+						else
+						{
+							$utf_seq[--$k] = $utf_char;
+						}
 
-							if (!isset($utf_combining_class[$utf_seq[$k]]))
-							{
-								// We have found our starter
-								$starter_found = 1;
-								break;
-							}
+						if (!isset($utf_combining_class[$utf_seq[$k]]))
+						{
+							// We have found our starter
+							$starter_found = 1;
+							break;
 						}
+					}
 
-						if (!$starter_found && $lpos > $tmp_pos)
+					if (!$starter_found && $lpos > $tmp_pos)
+					{
+						// The starter was not found in the buffer, let's rewind some more
+						do
 						{
-							// The starter was not found in the buffer, let's rewind some more
-							do
-							{
-								// $utf_len_mask contains the masks of both leading bytes and trailing bytes. If $utf_en > 0 then it's a leading byte, otherwise it's a trailing byte.
-								$c = $str[--$lpos];
-								$c_mask = $c & "\xF0";
+							// $utf_len_mask contains the masks of both leading bytes and trailing bytes. If $utf_en > 0 then it's a leading byte, otherwise it's a trailing byte.
+							$c = $str[--$lpos];
+							$c_mask = $c & "\xF0";
 
-								if (isset($utf_len_mask[$c_mask]))
+							if (isset($utf_len_mask[$c_mask]))
+							{
+								// UTF byte
+								if ($utf_len = $utf_len_mask[$c_mask])
 								{
-									// UTF byte
-									if ($utf_len = $utf_len_mask[$c_mask])
+									// UTF *leading* byte
+									$utf_char = substr($str, $lpos, $utf_len);
+
+									if (isset($decomp_map[$utf_char]))
 									{
-										// UTF *leading* byte
-										$utf_char = substr($str, $lpos, $utf_len);
+										// Decompose the character
+										$decomp_seq = array();
+										$_pos = 0;
+										$_len = strlen($decomp_map[$utf_char]);
 
-										if (isset($decomp_map[$utf_char]))
+										do
 										{
-											// Decompose the character
-											$decomp_seq = array();
-											$_pos = 0;
-											$_len = strlen($decomp_map[$utf_char]);
+											$c = $decomp_map[$utf_char][$_pos];
+											$_utf_len =& $utf_len_mask[$c & "\xF0"];
 
-											do
+											if (isset($_utf_len))
 											{
-												$c = $decomp_map[$utf_char][$_pos];
-												$_utf_len =& $utf_len_mask[$c & "\xF0"];
-
-												if (isset($_utf_len))
-												{
-													$decomp_seq[] = substr($decomp_map[$utf_char], $_pos, $_utf_len);
-													$_pos += $_utf_len;
-												}
-												else
-												{
-													$decomp_seq[] = $c;
-													++$_pos;
-												}
+												$decomp_seq[] = substr($decomp_map[$utf_char], $_pos, $_utf_len);
+												$_pos += $_utf_len;
 											}
-											while ($_pos < $_len);
-
-											// Prepend the UTF sequence with our decomposed sequence
-											if (isset($decomp_seq[1]))
+											else
 											{
-												// The char expanded into several chars
-												$decomp_cnt = sizeof($decomp_seq);
-												foreach ($decomp_seq as $decomp_i => $utf_char)
-												{
-													$utf_seq[$k + $decomp_i - $decomp_cnt] = $utf_char;
-												}
-												$k -= $decomp_cnt;
+												$decomp_seq[] = $c;
+												++$_pos;
 											}
-											else
+										}
+										while ($_pos < $_len);
+
+										// Prepend the UTF sequence with our decomposed sequence
+										if (isset($decomp_seq[1]))
+										{
+											// The char expanded into several chars
+											$decomp_cnt = sizeof($decomp_seq);
+											foreach ($decomp_seq as $decomp_i => $utf_char)
 											{
-												// Decomposed to a single char, easier to prepend
-												$utf_seq[--$k] = $decomp_seq[0];
+												$utf_seq[$k + $decomp_i - $decomp_cnt] = $utf_char;
 											}
+											$k -= $decomp_cnt;
 										}
 										else
 										{
-											$utf_seq[--$k] = $utf_char;
+											// Decomposed to a single char, easier to prepend
+											$utf_seq[--$k] = $decomp_seq[0];
 										}
 									}
+									else
+									{
+										$utf_seq[--$k] = $utf_char;
+									}
 								}
-								else
-								{
-									// ASCII char
-									$utf_seq[--$k] = $c;
-								}
 							}
-							while ($lpos > $tmp_pos);
+							else
+							{
+								// ASCII char
+								$utf_seq[--$k] = $c;
+							}
 						}
+						while ($lpos > $tmp_pos);
 					}
+				}
+
 
+				// STEP 3: Capture following combining modifiers
 
-					// STEP 3: Capture following combining modifiers
+				while ($pos < $len)
+				{
+					$c_mask = $str[$pos] & "\xF0";
 
-					while ($pos < $len)
+					if (isset($utf_len_mask[$c_mask]))
 					{
-						$c_mask = $str[$pos] & "\xF0";
+						if ($utf_len = $utf_len_mask[$c_mask])
+						{
+							$utf_char = substr($str, $pos, $utf_len);
+						}
+						else
+						{
+							// A trailing byte came out of nowhere
+							// Trailing bytes are replaced with Unicode replacement chars, we will just ignore it for now, break out of the loop
+							// as if it was a starter (replacement chars ARE starters) and let the next loop replace it
+							break;
+						}
 
-						if (isset($utf_len_mask[$c_mask]))
+						if (isset($utf_combining_class[$utf_char]) || isset($qc[$utf_char]))
 						{
-							if ($utf_len = $utf_len_mask[$c_mask])
-							{
-								$utf_char = substr($str, $pos, $utf_len);
-							}
-							else
+							// Combining character, add it to the sequence and move the cursor
+							if (isset($decomp_map[$utf_char]))
 							{
-								// A trailing byte came out of nowhere
-								// Trailing bytes are replaced with Unicode replacement chars, we will just ignore it for now, break out of the loop
-								// as if it was a starter (replacement chars ARE starters) and let the next loop replace it
-								break;
-							}
+								// Decompose the character
+								$_pos = 0;
+								$_len = strlen($decomp_map[$utf_char]);
 
-							if (isset($utf_combining_class[$utf_char]) || isset($qc[$utf_char]))
-							{
-								// Combining character, add it to the sequence and move the cursor
-								if (isset($decomp_map[$utf_char]))
+								do
 								{
-									// Decompose the character
-									$_pos = 0;
-									$_len = strlen($decomp_map[$utf_char]);
+									$c = $decomp_map[$utf_char][$_pos];
+									$_utf_len =& $utf_len_mask[$c & "\xF0"];
 
-									do
+									if (isset($_utf_len))
 									{
-										$c = $decomp_map[$utf_char][$_pos];
-										$_utf_len =& $utf_len_mask[$c & "\xF0"];
-
-										if (isset($_utf_len))
-										{
-											$utf_seq[] = substr($decomp_map[$utf_char], $_pos, $_utf_len);
-											$_pos += $_utf_len;
-										}
-										else
-										{
-											$utf_seq[] = $c;
-											++$_pos;
-										}
+										$utf_seq[] = substr($decomp_map[$utf_char], $_pos, $_utf_len);
+										$_pos += $_utf_len;
+									}
+									else
+									{
+										$utf_seq[] = $c;
+										++$_pos;
 									}
-									while ($_pos < $_len);
-								}
-								else
-								{
-									$utf_seq[] = $utf_char;
 								}
-
-								$pos += $utf_len;
+								while ($_pos < $_len);
 							}
 							else
 							{
-								// Combining class 0 and no QC, break out of the loop
-								// Note: we do not know if that character is valid. If it's not, the next iteration will replace it
-								break;
+								$utf_seq[] = $utf_char;
 							}
+
+							$pos += $utf_len;
 						}
 						else
 						{
-							// ASCII chars are starters
+							// Combining class 0 and no QC, break out of the loop
+							// Note: we do not know if that character is valid. If it's not, the next iteration will replace it
 							break;
 						}
 					}
+					else
+					{
+						// ASCII chars are starters
+						break;
+					}
+				}
 
 
-					// STEP 4: Sort and combine
+				// STEP 4: Sort and combine
 
-					// Here we sort...
-					$k_max = $k + sizeof($utf_seq);
+				// Here we sort...
+				$k_max = $k + sizeof($utf_seq);
 
-					if (!$k && $k_max == 1)
-					{
-						// There is only one char in the UTF sequence, add it then jump to the next iteration of main loop
+				if (!$k && $k_max == 1)
+				{
+					// There is only one char in the UTF sequence, add it then jump to the next iteration of main loop
 						// Note: the two commented lines below can be enabled under PHP5 for a very small performance gain in most cases
 //						if (substr_compare($str, $utf_seq[0], $lpos, $pos - $lpos))
 //						{
-							$tmp .= substr($str, $tmp_pos, $lpos - $tmp_pos) . $utf_seq[0];
-							$tmp_pos = $pos;
+						$tmp .= substr($str, $tmp_pos, $lpos - $tmp_pos) . $utf_seq[0];
+						$tmp_pos = $pos;
 //						}
 
-						continue;
-					}
+					continue;
+				}
+
+				// ...there we combine
+				if (isset($utf_combining_class[$utf_seq[$k]]))
+				{
+					$starter = $nf_seq = '';
+				}
+				else
+				{
+					$starter = $utf_seq[$k++];
+					$nf_seq = '';
+				}
+				$utf_sort = array();
+
+				// We add an empty char at the end of the UTF char sequence. It will act as a starter and trigger the sort/combine routine
+				// at the end of the string without altering it
+				$utf_seq[] = '';
 
-					// ...there we combine
-					if (isset($utf_combining_class[$utf_seq[$k]]))
+				do
+				{
+					$utf_char = $utf_seq[$k++];
+
+					if (isset($utf_combining_class[$utf_char]))
 					{
-						$starter = $nf_seq = '';
+						$utf_sort[$utf_combining_class[$utf_char]][] = $utf_char;
 					}
 					else
 					{
-						$starter = $utf_seq[$k++];
-						$nf_seq = '';
-					}
-					$utf_sort = array();
-
-					// We add an empty char at the end of the UTF char sequence. It will act as a starter and trigger the sort/combine routine
-					// at the end of the string without altering it
-					$utf_seq[] = '';
-
-					do
-					{
-						$utf_char = $utf_seq[$k++];
-
-						if (isset($utf_combining_class[$utf_char]))
+						if (empty($utf_sort))
 						{
-							$utf_sort[$utf_combining_class[$utf_char]][] = $utf_char;
-						}
-						else
-						{
-							if (empty($utf_sort))
+							// No combining characters... check for a composite of the two starters
+							if (isset($utf_canonical_comp[$starter . $utf_char]))
 							{
-								// No combining characters... check for a composite of the two starters
-								if (isset($utf_canonical_comp[$starter . $utf_char]))
-								{
-									// Good ol' composite character
-									$starter = $utf_canonical_comp[$starter . $utf_char];
-								}
-								else if (isset($utf_jamo_type[$utf_char]))
+								// Good ol' composite character
+								$starter = $utf_canonical_comp[$starter . $utf_char];
+							}
+							else if (isset($utf_jamo_type[$utf_char]))
+							{
+								// Current char is a composable jamo
+								if (isset($utf_jamo_type[$starter]) && $utf_jamo_type[$starter] == UNICODE_JAMO_L && $utf_jamo_type[$utf_char] == UNICODE_JAMO_V)
 								{
-									// Current char is a composable jamo
-									if (isset($utf_jamo_type[$starter]) && $utf_jamo_type[$starter] == UNICODE_JAMO_L && $utf_jamo_type[$utf_char] == UNICODE_JAMO_V)
+									// We have a L jamo followed by a V jamo, we are going to prefetch the next char to see if it's a T jamo
+									if (isset($utf_jamo_type[$utf_seq[$k]]) && $utf_jamo_type[$utf_seq[$k]] == UNICODE_JAMO_T)
 									{
-										// We have a L jamo followed by a V jamo, we are going to prefetch the next char to see if it's a T jamo
-										if (isset($utf_jamo_type[$utf_seq[$k]]) && $utf_jamo_type[$utf_seq[$k]] == UNICODE_JAMO_T)
-										{
-											// L+V+T jamos, combine to a LVT Hangul syllable ($k is incremented)
-											$cp = $utf_jamo_index[$starter] + $utf_jamo_index[$utf_char] + $utf_jamo_index[$utf_seq[$k]];
-											++$k;
-										}
-										else
-										{
-											// L+V jamos, combine to a LV Hangul syllable
-											$cp = $utf_jamo_index[$starter] + $utf_jamo_index[$utf_char];
-										}
-
-										$starter = chr(0xE0 | ($cp >> 12)) . chr(0x80 | (($cp >> 6) & 0x3F)) . chr(0x80 | ($cp & 0x3F));
+										// L+V+T jamos, combine to a LVT Hangul syllable ($k is incremented)
+										$cp = $utf_jamo_index[$starter] + $utf_jamo_index[$utf_char] + $utf_jamo_index[$utf_seq[$k]];
+										++$k;
 									}
 									else
 									{
-										// Non-composable jamo, just add it to the sequence
-										$nf_seq .= $starter;
-										$starter = $utf_char;
+										// L+V jamos, combine to a LV Hangul syllable
+										$cp = $utf_jamo_index[$starter] + $utf_jamo_index[$utf_char];
 									}
+
+									$starter = chr(0xE0 | ($cp >> 12)) . chr(0x80 | (($cp >> 6) & 0x3F)) . chr(0x80 | ($cp & 0x3F));
 								}
 								else
 								{
-									// No composite, just add the first starter to the sequence then continue with the other one
+									// Non-composable jamo, just add it to the sequence
 									$nf_seq .= $starter;
 									$starter = $utf_char;
 								}
 							}
 							else
 							{
-								ksort($utf_sort);
+								// No composite, just add the first starter to the sequence then continue with the other one
+								$nf_seq .= $starter;
+								$starter = $utf_char;
+							}
+						}
+						else
+						{
+							ksort($utf_sort);
 
-								// For each class of combining characters
-								foreach ($utf_sort as $cc => $utf_chars)
-								{
-									$j = 0;
+							// For each class of combining characters
+							foreach ($utf_sort as $cc => $utf_chars)
+							{
+								$j = 0;
 
-									do
+								do
+								{
+									// Look for a composite
+									if (isset($utf_canonical_comp[$starter . $utf_chars[$j]]))
 									{
-										// Look for a composite
-										if (isset($utf_canonical_comp[$starter . $utf_chars[$j]]))
-										{
-											// Found a composite, replace the starter
-											$starter = $utf_canonical_comp[$starter . $utf_chars[$j]];
-											unset($utf_sort[$cc][$j]);
-										}
-										else
-										{
-											// No composite, all following characters in that class are blocked
-											break;
-										}
+										// Found a composite, replace the starter
+										$starter = $utf_canonical_comp[$starter . $utf_chars[$j]];
+										unset($utf_sort[$cc][$j]);
+									}
+									else
+									{
+										// No composite, all following characters in that class are blocked
+										break;
 									}
-									while (isset($utf_sort[$cc][++$j]));
 								}
+								while (isset($utf_sort[$cc][++$j]));
+							}
 
-								// Add the starter to the normalized sequence, followed by non-starters in canonical order
-								$nf_seq .= $starter;
+							// Add the starter to the normalized sequence, followed by non-starters in canonical order
+							$nf_seq .= $starter;
 
-								foreach ($utf_sort as $utf_chars)
+							foreach ($utf_sort as $utf_chars)
+							{
+								if (!empty($utf_chars))
 								{
-									if (!empty($utf_chars))
-									{
-										$nf_seq .= implode('', $utf_chars);
-									}
+									$nf_seq .= implode('', $utf_chars);
 								}
-
-								// Reset the array and go on
-								$utf_sort = array();
-								$starter = $utf_char;
 							}
+
+							// Reset the array and go on
+							$utf_sort = array();
+							$starter = $utf_char;
 						}
 					}
-					while ($k <= $k_max);
-
-					$tmp .= substr($str, $tmp_pos, $lpos - $tmp_pos) . $nf_seq;
-					$tmp_pos = $pos;
 				}
-				else
+				while ($k <= $k_max);
+
+				$tmp .= substr($str, $tmp_pos, $lpos - $tmp_pos) . $nf_seq;
+				$tmp_pos = $pos;
+			}
+			else
+			{
+				// Only a ASCII char can make the program get here
+				//
+				// First we skip the current byte with ++$pos, then we quickly skip following ASCII chars with strspn().
+				//
+				// The first two "if"'s here can be removed, with the consequences of being faster on latin text (lots of ASCII) and slower on
+				// multi-byte text (where the only ASCII chars are spaces and punctuation)
+				if (++$pos != $len)
 				{
-					// Only a ASCII char can make the program get here
-					//
-					// First we skip the current byte with ++$pos, then we quickly skip following ASCII chars with strspn().
-					//
-					// The first two "if"'s here can be removed, with the consequences of being faster on latin text (lots of ASCII) and slower on
-					// multi-byte text (where the only ASCII chars are spaces and punctuation)
-					if (++$pos != $len)
+					if ($str[$pos] < "\x80")
 					{
-						if ($str[$pos] < "\x80")
-						{
-							$pos += strspn($str, UTF8_ASCII_RANGE, ++$pos);
-							$buffer[++$i & 7] = $str[$pos - 1];
-						}
-						else
-						{
-							$buffer[++$i & 7] = $c;
-						}
+						$pos += strspn($str, UTF8_ASCII_RANGE, ++$pos);
+						$buffer[++$i & 7] = $str[$pos - 1];
+					}
+					else
+					{
+						$buffer[++$i & 7] = $c;
 					}
 				}
 			}
-			while ($pos < $len);
+		}
+		while ($pos < $len);
 
-			// Now is time to return the string
-			if ($tmp_pos)
+		// Now is time to return the string
+		if ($tmp_pos)
+		{
+			// If the $tmp_pos cursor is not at the beggining of the string then at least one character was not in normal form. Replace $str with the fixed version
+			if ($tmp_pos == $len)
 			{
-				// If the $tmp_pos cursor is not at the beggining of the string then at least one character was not in normal form. Replace $str with the fixed version
-				if ($tmp_pos == $len)
-				{
-					// The $tmp_pos cursor is at the end of $str, therefore $tmp holds the whole $str
-					return $tmp;
-				}
-				else
-				{
-					// The rightmost chunk of $str has not been appended to $tmp yet
-					return $tmp . substr($str, $tmp_pos);
-				}
+				// The $tmp_pos cursor is at the end of $str, therefore $tmp holds the whole $str
+				return $tmp;
+			}
+			else
+			{
+				// The rightmost chunk of $str has not been appended to $tmp yet
+				return $tmp . substr($str, $tmp_pos);
 			}
-
-			// The string was already in normal form
-			return $str;
 		}
 
-		/**
-		* Decompose a UTF string
-		*
-		* @param	string	$str		UTF string
-		* @param	integer	$pos		Position of the first UTF char (in bytes)
-		* @param	integer	$len		Length of the string (in bytes)
-		* @param	array	$decomp_map	Decomposition mapping, passed by reference but never modified
-		* @return	string				The string, decomposed and sorted canonically
-		*
-		* @access	private
-		*/
-		function decompose($str, $pos, $len, &$decomp_map)
+		// The string was already in normal form
+		return $str;
+	}
+
+	/**
+	* Decompose a UTF string
+	*
+	* @param	string	$str		UTF string
+	* @param	integer	$pos		Position of the first UTF char (in bytes)
+	* @param	integer	$len		Length of the string (in bytes)
+	* @param	array	$decomp_map	Decomposition mapping, passed by reference but never modified
+	* @return	string				The string, decomposed and sorted canonically
+	*
+	* @access	private
+	*/
+	function decompose($str, $pos, $len, &$decomp_map)
+	{
+		global $utf_combining_class, $utf_canonical_decomp, $phpbb_root_path;
+
+		// Load some commonly-used tables
+		if (!isset($utf_combining_class))
 		{
-			global $utf_combining_class, $utf_canonical_decomp, $phpbb_root_path;
+			include($phpbb_root_path . 'includes/utf/data/utf_normalizer_common.php');
+		}
 
-			// Load some commonly-used tables
-			if (!isset($utf_combining_class))
-			{
-				include($phpbb_root_path . 'includes/utf/data/utf_normalizer_common.php');
-			}
+		// UTF char length array
+		$utf_len_mask = array(
+			// Leading bytes masks
+			"\xC0" => 2, "\xD0" => 2, "\xE0" => 3, "\xF0" => 4,
+			// Trailing bytes masks
+			"\x80" => 0, "\x90" => 0, "\xA0" => 0, "\xB0" => 0
+		);
+
+		// Some extra checks are triggered on the first byte of a UTF sequence
+		$extra_check = array(
+			"\xED" => 1, "\xEF" => 1, "\xC0" => 1, "\xC1" => 1, "\xE0" => 1, "\xF0" => 1,
+			"\xF4" => 1, "\xF5" => 1, "\xF6" => 1, "\xF7" => 1, "\xF8" => 1, "\xF9" => 1,
+			"\xFA" => 1, "\xFB" => 1, "\xFC" => 1, "\xFD" => 1, "\xFE" => 1, "\xFF" => 1
+		);
+
+		// These masks are used to check if a UTF sequence is well formed. Here are the only 3 lengths we acknowledge:
+		//   - 2-byte: 110? ???? 10?? ????
+		//   - 3-byte: 1110 ???? 10?? ???? 10?? ????
+		//   - 4-byte: 1111 0??? 10?? ???? 10?? ???? 10?? ????
+		// Note that 5- and 6- byte sequences are automatically discarded
+		$utf_validation_mask = array(
+			2	=> "\xE0\xC0",
+			3	=> "\xF0\xC0\xC0",
+			4	=> "\xF8\xC0\xC0\xC0"
+		);
+
+		$utf_validation_check = array(
+			2	=> "\xC0\x80",
+			3	=> "\xE0\x80\x80",
+			4	=> "\xF0\x80\x80\x80"
+		);
+
+		$tmp = '';
+		$starter_pos = $pos;
+		$tmp_pos = $last_cc = $sort = $dump = 0;
+		$utf_sort = array();
+
+
+		// Main loop
+		do
+		{
+			// STEP 0: Capture the current char
 
-			// UTF char length array
-			$utf_len_mask = array(
-				// Leading bytes masks
-				"\xC0" => 2, "\xD0" => 2, "\xE0" => 3, "\xF0" => 4,
-				// Trailing bytes masks
-				"\x80" => 0, "\x90" => 0, "\xA0" => 0, "\xB0" => 0
-			);
-
-			// Some extra checks are triggered on the first byte of a UTF sequence
-			$extra_check = array(
-				"\xED" => 1, "\xEF" => 1, "\xC0" => 1, "\xC1" => 1, "\xE0" => 1, "\xF0" => 1,
-				"\xF4" => 1, "\xF5" => 1, "\xF6" => 1, "\xF7" => 1, "\xF8" => 1, "\xF9" => 1,
-				"\xFA" => 1, "\xFB" => 1, "\xFC" => 1, "\xFD" => 1, "\xFE" => 1, "\xFF" => 1
-			);
-
-			// These masks are used to check if a UTF sequence is well formed. Here are the only 3 lengths we acknowledge:
-			//   - 2-byte: 110? ???? 10?? ????
-			//   - 3-byte: 1110 ???? 10?? ???? 10?? ????
-			//   - 4-byte: 1111 0??? 10?? ???? 10?? ???? 10?? ????
-			// Note that 5- and 6- byte sequences are automatically discarded
-			$utf_validation_mask = array(
-				2	=> "\xE0\xC0",
-				3	=> "\xF0\xC0\xC0",
-				4	=> "\xF8\xC0\xC0\xC0"
-			);
-
-			$utf_validation_check = array(
-				2	=> "\xC0\x80",
-				3	=> "\xE0\x80\x80",
-				4	=> "\xF0\x80\x80\x80"
-			);
-
-			$tmp = '';
-			$starter_pos = $pos;
-			$tmp_pos = $last_cc = $sort = $dump = 0;
-			$utf_sort = array();
-
-
-			// Main loop
-			do
+			$cur_mask = $str[$pos] & "\xF0";
+			if (isset($utf_len_mask[$cur_mask]))
 			{
-				// STEP 0: Capture the current char
-
-				$cur_mask = $str[$pos] & "\xF0";
-				if (isset($utf_len_mask[$cur_mask]))
+				if ($utf_len = $utf_len_mask[$cur_mask])
 				{
-					if ($utf_len = $utf_len_mask[$cur_mask])
-					{
-						// Multibyte char
-						$utf_char = substr($str, $pos, $utf_len);
-						$pos += $utf_len;
-					}
-					else
+					// Multibyte char
+					$utf_char = substr($str, $pos, $utf_len);
+					$pos += $utf_len;
+				}
+				else
+				{
+					// A trailing byte came out of nowhere, we will treat it and all following trailing bytes as if each of them was a Unicode
+					// replacement char and we will advance the cursor
+					$spn = strspn($str, UTF8_TRAILING_BYTES, $pos);
+
+					if ($dump)
 					{
-						// A trailing byte came out of nowhere, we will treat it and all following trailing bytes as if each of them was a Unicode
-						// replacement char and we will advance the cursor
-						$spn = strspn($str, UTF8_TRAILING_BYTES, $pos);
+						$tmp .= substr($str, $tmp_pos, $starter_pos - $tmp_pos);
 
-						if ($dump)
+						// Dump combiners
+						if (!empty($utf_sort))
 						{
-							$tmp .= substr($str, $tmp_pos, $starter_pos - $tmp_pos);
-
-							// Dump combiners
-							if (!empty($utf_sort))
+							if ($sort)
 							{
-								if ($sort)
-								{
-									ksort($utf_sort);
-								}
+								ksort($utf_sort);
+							}
 
-								foreach($utf_sort as $utf_chars)
-								{
-									$tmp .= implode('', $utf_chars);
-								}
+							foreach($utf_sort as $utf_chars)
+							{
+								$tmp .= implode('', $utf_chars);
 							}
+						}
+
+						$tmp .= str_repeat(UTF8_REPLACEMENT, $spn);
+						$dump = $sort = 0;
+					}
+					else
+					{
+						$tmp .= substr($str, $tmp_pos, $pos - $tmp_pos) . str_repeat(UTF8_REPLACEMENT, $spn);
+					}
 
-							$tmp .= str_repeat(UTF8_REPLACEMENT, $spn);
-							$dump = $sort = 0;
-						}
-						else
-						{
-							$tmp .= substr($str, $tmp_pos, $pos - $tmp_pos) . str_repeat(UTF8_REPLACEMENT, $spn);
-						}
+					$pos += $spn;
+					$tmp_pos = $starter_pos = $pos;
 
-						$pos += $spn;
-						$tmp_pos = $starter_pos = $pos;
+					$utf_sort = array();
+					$last_cc = 0;
 
-						$utf_sort = array();
-						$last_cc = 0;
+					continue;
+				}
 
-						continue;
-					}
 
+				// STEP 1: Decide what to do with current char
 
-					// STEP 1: Decide what to do with current char
+				// Now, in that order:
+				//  - check if that character is decomposable
+				//  - check if that character is a non-starter
+				//  - check if that character requires extra checks to be performed
+				if (isset($decomp_map[$utf_char]))
+				{
+					// Decompose the char
+					$_pos = 0;
+					$_len = strlen($decomp_map[$utf_char]);
 
-					// Now, in that order:
-					//  - check if that character is decomposable
-					//  - check if that character is a non-starter
-					//  - check if that character requires extra checks to be performed
-					if (isset($decomp_map[$utf_char]))
+					do
 					{
-						// Decompose the char
-						$_pos = 0;
-						$_len = strlen($decomp_map[$utf_char]);
+						$c = $decomp_map[$utf_char][$_pos];
+						$_utf_len =& $utf_len_mask[$c & "\xF0"];
 
-						do
+						if (isset($_utf_len))
 						{
-							$c = $decomp_map[$utf_char][$_pos];
-							$_utf_len =& $utf_len_mask[$c & "\xF0"];
+							$_utf_char = substr($decomp_map[$utf_char], $_pos, $_utf_len);
+							$_pos += $_utf_len;
 
-							if (isset($_utf_len))
+							if (isset($utf_combining_class[$_utf_char]))
 							{
-								$_utf_char = substr($decomp_map[$utf_char], $_pos, $_utf_len);
-								$_pos += $_utf_len;
+								// The character decomposed to a non-starter, buffer it for sorting
+								$utf_sort[$utf_combining_class[$_utf_char]][] = $_utf_char;
 
-								if (isset($utf_combining_class[$_utf_char]))
+								if ($utf_combining_class[$_utf_char] < $last_cc)
 								{
-									// The character decomposed to a non-starter, buffer it for sorting
-									$utf_sort[$utf_combining_class[$_utf_char]][] = $_utf_char;
-
-									if ($utf_combining_class[$_utf_char] < $last_cc)
-									{
-										// Not canonically ordered, will require sorting
-										$sort = $dump = 1;
-									}
-									else
-									{
-										$dump = 1;
-										$last_cc = $utf_combining_class[$_utf_char];
-									}
+									// Not canonically ordered, will require sorting
+									$sort = $dump = 1;
 								}
 								else
 								{
-									// This character decomposition contains a starter, dump the buffer and continue
-									if ($dump)
-									{
-										$tmp .= substr($str, $tmp_pos, $starter_pos - $tmp_pos);
-
-										// Dump combiners
-										if (!empty($utf_sort))
-										{
-											if ($sort)
-											{
-												ksort($utf_sort);
-											}
-
-											foreach ($utf_sort as $utf_chars)
-											{
-												$tmp .= implode('', $utf_chars);
-											}
-										}
-
-										$tmp .= $_utf_char;
-										$dump = $sort = 0;
-									}
-									else
-									{
-										$tmp .= substr($str, $tmp_pos, $starter_pos - $tmp_pos) . $_utf_char;
-									}
-
-									$tmp_pos = $starter_pos = $pos;
-									$utf_sort = array();
-									$last_cc = 0;
+									$dump = 1;
+									$last_cc = $utf_combining_class[$_utf_char];
 								}
 							}
 							else
 							{
-								// This character decomposition contains an ASCII char, which is a starter. Dump the buffer and continue
-								++$_pos;
-
+								// This character decomposition contains a starter, dump the buffer and continue
 								if ($dump)
 								{
 									$tmp .= substr($str, $tmp_pos, $starter_pos - $tmp_pos);
@@ -1220,12 +1094,12 @@ else
 										}
 									}
 
-									$tmp .= $c;
+									$tmp .= $_utf_char;
 									$dump = $sort = 0;
 								}
 								else
 								{
-									$tmp .= substr($str, $tmp_pos, $pos - $utf_len - $tmp_pos) . $c;
+									$tmp .= substr($str, $tmp_pos, $starter_pos - $tmp_pos) . $_utf_char;
 								}
 
 								$tmp_pos = $starter_pos = $pos;
@@ -1233,285 +1107,290 @@ else
 								$last_cc = 0;
 							}
 						}
-						while ($_pos < $_len);
-					}
-					else if (isset($utf_combining_class[$utf_char]))
-					{
-						// Combining character
-						if ($utf_combining_class[$utf_char] < $last_cc)
-						{
-							// Not in canonical order
-							$sort = $dump = 1;
-						}
 						else
 						{
-							$last_cc = $utf_combining_class[$utf_char];
-						}
+							// This character decomposition contains an ASCII char, which is a starter. Dump the buffer and continue
+							++$_pos;
 
-						$utf_sort[$utf_combining_class[$utf_char]][] = $utf_char;
-					}
-					else
-					{
-						// Non-decomposable starter, check out if it's a Hangul syllable
-						if ($utf_char < UTF8_HANGUL_FIRST || $utf_char > UTF8_HANGUL_LAST)
-						{
-							// Nope, regular UTF char, check that we have the correct number of trailing bytes
-							if (($utf_char & $utf_validation_mask[$utf_len]) != $utf_validation_check[$utf_len])
+							if ($dump)
 							{
-								// Current char isn't well-formed or legal: either one or several trailing bytes are missing, or the Unicode char
-								// has been encoded in a five- or six- byte sequence.
-								// Move the cursor back to its original position then advance it to the position it should really be at
-								$pos -= $utf_len;
 								$tmp .= substr($str, $tmp_pos, $starter_pos - $tmp_pos);
 
+								// Dump combiners
 								if (!empty($utf_sort))
 								{
-									ksort($utf_sort);
+									if ($sort)
+									{
+										ksort($utf_sort);
+									}
 
 									foreach ($utf_sort as $utf_chars)
 									{
 										$tmp .= implode('', $utf_chars);
 									}
-									$utf_sort = array();
 								}
 
-								// Add a replacement char then another replacement char for every trailing byte.
-								//
-								// @todo I'm not entirely sure that's how we're supposed to mark invalidated byte sequences, check this
-								$spn = strspn($str, UTF8_TRAILING_BYTES, ++$pos);
-								$tmp .= str_repeat(UTF8_REPLACEMENT, $spn + 1);
-
+								$tmp .= $c;
 								$dump = $sort = 0;
-
-								$pos += $spn;
-								$tmp_pos = $pos;
-								continue;
 							}
+							else
+							{
+								$tmp .= substr($str, $tmp_pos, $pos - $utf_len - $tmp_pos) . $c;
+							}
+
+							$tmp_pos = $starter_pos = $pos;
+							$utf_sort = array();
+							$last_cc = 0;
+						}
+					}
+					while ($_pos < $_len);
+				}
+				else if (isset($utf_combining_class[$utf_char]))
+				{
+					// Combining character
+					if ($utf_combining_class[$utf_char] < $last_cc)
+					{
+						// Not in canonical order
+						$sort = $dump = 1;
+					}
+					else
+					{
+						$last_cc = $utf_combining_class[$utf_char];
+					}
+
+					$utf_sort[$utf_combining_class[$utf_char]][] = $utf_char;
+				}
+				else
+				{
+					// Non-decomposable starter, check out if it's a Hangul syllable
+					if ($utf_char < UTF8_HANGUL_FIRST || $utf_char > UTF8_HANGUL_LAST)
+					{
+						// Nope, regular UTF char, check that we have the correct number of trailing bytes
+						if (($utf_char & $utf_validation_mask[$utf_len]) != $utf_validation_check[$utf_len])
+						{
+							// Current char isn't well-formed or legal: either one or several trailing bytes are missing, or the Unicode char
+							// has been encoded in a five- or six- byte sequence.
+							// Move the cursor back to its original position then advance it to the position it should really be at
+							$pos -= $utf_len;
+							$tmp .= substr($str, $tmp_pos, $starter_pos - $tmp_pos);
 
-							if (isset($extra_check[$utf_char[0]]))
+							if (!empty($utf_sort))
 							{
-								switch ($utf_char[0])
+								ksort($utf_sort);
+
+								foreach ($utf_sort as $utf_chars)
 								{
-									// Note: 0xED is quite common in Korean
-									case "\xED":
-										if ($utf_char >= "\xED\xA0\x80")
+									$tmp .= implode('', $utf_chars);
+								}
+								$utf_sort = array();
+							}
+
+							// Add a replacement char then another replacement char for every trailing byte.
+							//
+							// @todo I'm not entirely sure that's how we're supposed to mark invalidated byte sequences, check this
+							$spn = strspn($str, UTF8_TRAILING_BYTES, ++$pos);
+							$tmp .= str_repeat(UTF8_REPLACEMENT, $spn + 1);
+
+							$dump = $sort = 0;
+
+							$pos += $spn;
+							$tmp_pos = $pos;
+							continue;
+						}
+
+						if (isset($extra_check[$utf_char[0]]))
+						{
+							switch ($utf_char[0])
+							{
+								// Note: 0xED is quite common in Korean
+								case "\xED":
+									if ($utf_char >= "\xED\xA0\x80")
+									{
+										// Surrogates (U+D800..U+DFFF) are not allowed in UTF-8 (UTF sequence 0xEDA080..0xEDBFBF)
+										$tmp .= substr($str, $tmp_pos, $starter_pos - $tmp_pos);
+
+										if (!empty($utf_sort))
 										{
-											// Surrogates (U+D800..U+DFFF) are not allowed in UTF-8 (UTF sequence 0xEDA080..0xEDBFBF)
-											$tmp .= substr($str, $tmp_pos, $starter_pos - $tmp_pos);
+											ksort($utf_sort);
 
-											if (!empty($utf_sort))
+											foreach ($utf_sort as $utf_chars)
 											{
-												ksort($utf_sort);
-
-												foreach ($utf_sort as $utf_chars)
-												{
-													$tmp .= implode('', $utf_chars);
-												}
-												$utf_sort = array();
+												$tmp .= implode('', $utf_chars);
 											}
+											$utf_sort = array();
+										}
 
-											$tmp .= UTF8_REPLACEMENT;
-											$dump = $sort = 0;
+										$tmp .= UTF8_REPLACEMENT;
+										$dump = $sort = 0;
 
-											$tmp_pos = $starter_pos = $pos;
-											continue 2;
-										}
-									break;
+										$tmp_pos = $starter_pos = $pos;
+										continue 2;
+									}
+								break;
 
-									// Note: 0xEF is quite common in Japanese
-									case "\xEF":
-										if ($utf_char == "\xEF\xBF\xBE" || $utf_char == "\xEF\xBF\xBF")
+								// Note: 0xEF is quite common in Japanese
+								case "\xEF":
+									if ($utf_char == "\xEF\xBF\xBE" || $utf_char == "\xEF\xBF\xBF")
+									{
+										// U+FFFE and U+FFFF are explicitly disallowed (UTF sequence 0xEFBFBE..0xEFBFBF)
+										$tmp .= substr($str, $tmp_pos, $starter_pos - $tmp_pos);
+
+										if (!empty($utf_sort))
 										{
-											// U+FFFE and U+FFFF are explicitly disallowed (UTF sequence 0xEFBFBE..0xEFBFBF)
-											$tmp .= substr($str, $tmp_pos, $starter_pos - $tmp_pos);
+											ksort($utf_sort);
 
-											if (!empty($utf_sort))
+											foreach ($utf_sort as $utf_chars)
 											{
-												ksort($utf_sort);
-
-												foreach ($utf_sort as $utf_chars)
-												{
-													$tmp .= implode('', $utf_chars);
-												}
-												$utf_sort = array();
+												$tmp .= implode('', $utf_chars);
 											}
+											$utf_sort = array();
+										}
 
-											$tmp .= UTF8_REPLACEMENT;
-											$dump = $sort = 0;
+										$tmp .= UTF8_REPLACEMENT;
+										$dump = $sort = 0;
 
-											$tmp_pos = $starter_pos = $pos;
-											continue 2;
-										}
-									break;
+										$tmp_pos = $starter_pos = $pos;
+										continue 2;
+									}
+								break;
+
+								case "\xC0":
+								case "\xC1":
+									if ($utf_char <= "\xC1\xBF")
+									{
+										// Overlong sequence: Unicode char U+0000..U+007F encoded as a double-byte UTF char
+										$tmp .= substr($str, $tmp_pos, $starter_pos - $tmp_pos);
 
-									case "\xC0":
-									case "\xC1":
-										if ($utf_char <= "\xC1\xBF")
+										if (!empty($utf_sort))
 										{
-											// Overlong sequence: Unicode char U+0000..U+007F encoded as a double-byte UTF char
-											$tmp .= substr($str, $tmp_pos, $starter_pos - $tmp_pos);
+											ksort($utf_sort);
 
-											if (!empty($utf_sort))
+											foreach ($utf_sort as $utf_chars)
 											{
-												ksort($utf_sort);
-
-												foreach ($utf_sort as $utf_chars)
-												{
-													$tmp .= implode('', $utf_chars);
-												}
-												$utf_sort = array();
+												$tmp .= implode('', $utf_chars);
 											}
+											$utf_sort = array();
+										}
 
-											$tmp .= UTF8_REPLACEMENT;
-											$dump = $sort = 0;
+										$tmp .= UTF8_REPLACEMENT;
+										$dump = $sort = 0;
 
-											$tmp_pos = $starter_pos = $pos;
-											continue 2;
-										}
-									break;
+										$tmp_pos = $starter_pos = $pos;
+										continue 2;
+									}
+								break;
+
+								case "\xE0":
+									if ($utf_char <= "\xE0\x9F\xBF")
+									{
+										// Unicode char U+0000..U+07FF encoded in 3 bytes
+										$tmp .= substr($str, $tmp_pos, $starter_pos - $tmp_pos);
 
-									case "\xE0":
-										if ($utf_char <= "\xE0\x9F\xBF")
+										if (!empty($utf_sort))
 										{
-											// Unicode char U+0000..U+07FF encoded in 3 bytes
-											$tmp .= substr($str, $tmp_pos, $starter_pos - $tmp_pos);
+											ksort($utf_sort);
 
-											if (!empty($utf_sort))
+											foreach ($utf_sort as $utf_chars)
 											{
-												ksort($utf_sort);
-
-												foreach ($utf_sort as $utf_chars)
-												{
-													$tmp .= implode('', $utf_chars);
-												}
-												$utf_sort = array();
+												$tmp .= implode('', $utf_chars);
 											}
+											$utf_sort = array();
+										}
+
+										$tmp .= UTF8_REPLACEMENT;
+										$dump = $sort = 0;
 
-											$tmp .= UTF8_REPLACEMENT;
-											$dump = $sort = 0;
+										$tmp_pos = $starter_pos = $pos;
+										continue 2;
+									}
+								break;
 
-											$tmp_pos = $starter_pos = $pos;
-											continue 2;
-										}
-									break;
+								case "\xF0":
+									if ($utf_char <= "\xF0\x8F\xBF\xBF")
+									{
+										// Unicode char U+0000..U+FFFF encoded in 4 bytes
+										$tmp .= substr($str, $tmp_pos, $starter_pos - $tmp_pos);
 
-									case "\xF0":
-										if ($utf_char <= "\xF0\x8F\xBF\xBF")
+										if (!empty($utf_sort))
 										{
-											// Unicode char U+0000..U+FFFF encoded in 4 bytes
-											$tmp .= substr($str, $tmp_pos, $starter_pos - $tmp_pos);
+											ksort($utf_sort);
 
-											if (!empty($utf_sort))
+											foreach ($utf_sort as $utf_chars)
 											{
-												ksort($utf_sort);
-
-												foreach ($utf_sort as $utf_chars)
-												{
-													$tmp .= implode('', $utf_chars);
-												}
-												$utf_sort = array();
+												$tmp .= implode('', $utf_chars);
 											}
+											$utf_sort = array();
+										}
 
-											$tmp .= UTF8_REPLACEMENT;
-											$dump = $sort = 0;
+										$tmp .= UTF8_REPLACEMENT;
+										$dump = $sort = 0;
 
-											$tmp_pos = $starter_pos = $pos;
-											continue 2;
-										}
-									break;
+										$tmp_pos = $starter_pos = $pos;
+										continue 2;
+									}
+								break;
 
-									default:
-										if ($utf_char > UTF8_MAX)
+								default:
+									if ($utf_char > UTF8_MAX)
+									{
+										// Out of the Unicode range
+										$tmp .= substr($str, $tmp_pos, $starter_pos - $tmp_pos);
+
+										if (!empty($utf_sort))
 										{
-											// Out of the Unicode range
-											$tmp .= substr($str, $tmp_pos, $starter_pos - $tmp_pos);
+											ksort($utf_sort);
 
-											if (!empty($utf_sort))
+											foreach ($utf_sort as $utf_chars)
 											{
-												ksort($utf_sort);
-
-												foreach ($utf_sort as $utf_chars)
-												{
-													$tmp .= implode('', $utf_chars);
-												}
-												$utf_sort = array();
+												$tmp .= implode('', $utf_chars);
 											}
+											$utf_sort = array();
+										}
 
-											$tmp .= UTF8_REPLACEMENT;
-											$dump = $sort = 0;
+										$tmp .= UTF8_REPLACEMENT;
+										$dump = $sort = 0;
 
-											$tmp_pos = $starter_pos = $pos;
-											continue 2;
-										}
-									break;
-								}
+										$tmp_pos = $starter_pos = $pos;
+										continue 2;
+									}
+								break;
 							}
 						}
-						else
-						{
-							// Hangul syllable
-							$idx = (((ord($utf_char[0]) & 0x0F) << 12) | ((ord($utf_char[1]) & 0x3F) << 6) | (ord($utf_char[2]) & 0x3F)) - UNICODE_HANGUL_SBASE;
+					}
+					else
+					{
+						// Hangul syllable
+						$idx = (((ord($utf_char[0]) & 0x0F) << 12) | ((ord($utf_char[1]) & 0x3F) << 6) | (ord($utf_char[2]) & 0x3F)) - UNICODE_HANGUL_SBASE;
 
-							// LIndex can only range from 0 to 18, therefore it cannot influence the first two bytes of the L Jamo, which allows us to hardcode them (based on LBase).
-							//
-							// The same goes for VIndex, but for TIndex there's a catch: the value of the third byte could exceed 0xBF and we would have to increment the second byte
-							if ($tIndex = $idx % UNICODE_HANGUL_TCOUNT)
+						// LIndex can only range from 0 to 18, therefore it cannot influence the first two bytes of the L Jamo, which allows us to hardcode them (based on LBase).
+						//
+						// The same goes for VIndex, but for TIndex there's a catch: the value of the third byte could exceed 0xBF and we would have to increment the second byte
+						if ($tIndex = $idx % UNICODE_HANGUL_TCOUNT)
+						{
+							if ($tIndex < 25)
 							{
-								if ($tIndex < 25)
-								{
-									$utf_char = "\xE1\x84\x00\xE1\x85\x00\xE1\x86\x00";
-									$utf_char[8] = chr(0xA7 + $tIndex);
-								}
-								else
-								{
-									$utf_char = "\xE1\x84\x00\xE1\x85\x00\xE1\x87\x00";
-									$utf_char[8] = chr(0x67 + $tIndex);
-								}
+								$utf_char = "\xE1\x84\x00\xE1\x85\x00\xE1\x86\x00";
+								$utf_char[8] = chr(0xA7 + $tIndex);
 							}
 							else
 							{
-								$utf_char = "\xE1\x84\x00\xE1\x85\x00";
+								$utf_char = "\xE1\x84\x00\xE1\x85\x00\xE1\x87\x00";
+								$utf_char[8] = chr(0x67 + $tIndex);
 							}
-
-							$utf_char[2] = chr(0x80 + (int) ($idx / UNICODE_HANGUL_NCOUNT));
-							$utf_char[5] = chr(0xA1 + (int) (($idx % UNICODE_HANGUL_NCOUNT) / UNICODE_HANGUL_TCOUNT));
-
-							// Just like other decompositions, the resulting Jamos must be dumped to the tmp string
-							$dump = 1;
 						}
-
-						// Do we need to dump stuff to the tmp string?
-						if ($dump)
+						else
 						{
-							$tmp .= substr($str, $tmp_pos, $starter_pos - $tmp_pos);
-
-							// Dump combiners
-							if (!empty($utf_sort))
-							{
-								if ($sort)
-								{
-									ksort($utf_sort);
-								}
-
-								foreach ($utf_sort as $utf_chars)
-								{
-									$tmp .= implode('', $utf_chars);
-								}
-							}
-
-							$tmp .= $utf_char;
-							$dump = $sort = 0;
-							$tmp_pos = $pos;
+							$utf_char = "\xE1\x84\x00\xE1\x85\x00";
 						}
 
-						$last_cc = 0;
-						$utf_sort = array();
-						$starter_pos = $pos;
+						$utf_char[2] = chr(0x80 + (int) ($idx / UNICODE_HANGUL_NCOUNT));
+						$utf_char[5] = chr(0xA1 + (int) (($idx % UNICODE_HANGUL_NCOUNT) / UNICODE_HANGUL_TCOUNT));
+
+						// Just like other decompositions, the resulting Jamos must be dumped to the tmp string
+						$dump = 1;
 					}
-				}
-				else
-				{
-					// ASCII char, which happens to be a starter (as any other ASCII char)
+
+					// Do we need to dump stuff to the tmp string?
 					if ($dump)
 					{
 						$tmp .= substr($str, $tmp_pos, $starter_pos - $tmp_pos);
@@ -1530,15 +1409,9 @@ else
 							}
 						}
 
-						$tmp .= $str[$pos];
+						$tmp .= $utf_char;
 						$dump = $sort = 0;
-						$tmp_pos = ++$pos;
-
-						$pos += strspn($str, UTF8_ASCII_RANGE, $pos);
-					}
-					else
-					{
-						$pos += strspn($str, UTF8_ASCII_RANGE, ++$pos);
+						$tmp_pos = $pos;
 					}
 
 					$last_cc = 0;
@@ -1546,48 +1419,84 @@ else
 					$starter_pos = $pos;
 				}
 			}
-			while ($pos < $len);
-
-			// Now is time to return the string
-			if ($dump)
+			else
 			{
-				$tmp .= substr($str, $tmp_pos, $starter_pos - $tmp_pos);
-
-				// Dump combiners
-				if (!empty($utf_sort))
+				// ASCII char, which happens to be a starter (as any other ASCII char)
+				if ($dump)
 				{
-					if ($sort)
-					{
-						ksort($utf_sort);
-					}
+					$tmp .= substr($str, $tmp_pos, $starter_pos - $tmp_pos);
 
-					foreach ($utf_sort as $utf_chars)
+					// Dump combiners
+					if (!empty($utf_sort))
 					{
-						$tmp .= implode('', $utf_chars);
+						if ($sort)
+						{
+							ksort($utf_sort);
+						}
+
+						foreach ($utf_sort as $utf_chars)
+						{
+							$tmp .= implode('', $utf_chars);
+						}
 					}
-				}
 
-				return $tmp;
+					$tmp .= $str[$pos];
+					$dump = $sort = 0;
+					$tmp_pos = ++$pos;
+
+					$pos += strspn($str, UTF8_ASCII_RANGE, $pos);
+				}
+				else
+				{
+					$pos += strspn($str, UTF8_ASCII_RANGE, ++$pos);
+				}
 
+				$last_cc = 0;
+				$utf_sort = array();
+				$starter_pos = $pos;
 			}
-			else if ($tmp_pos)
+		}
+		while ($pos < $len);
+
+		// Now is time to return the string
+		if ($dump)
+		{
+			$tmp .= substr($str, $tmp_pos, $starter_pos - $tmp_pos);
+
+			// Dump combiners
+			if (!empty($utf_sort))
 			{
-				// If the $tmp_pos cursor was moved then at least one character was not in normal form. Replace $str with the fixed version
-				if ($tmp_pos == $len)
+				if ($sort)
 				{
-					// The $tmp_pos cursor is at the end of $str, therefore $tmp holds the whole $str
-					return $tmp;
+					ksort($utf_sort);
 				}
-				else
+
+				foreach ($utf_sort as $utf_chars)
 				{
-					// The rightmost chunk of $str has not been appended to $tmp yet
-					return $tmp . substr($str, $tmp_pos);
+					$tmp .= implode('', $utf_chars);
 				}
 			}
 
-			// The string was already in normal form
-			return $str;
+			return $tmp;
+
+		}
+		else if ($tmp_pos)
+		{
+			// If the $tmp_pos cursor was moved then at least one character was not in normal form. Replace $str with the fixed version
+			if ($tmp_pos == $len)
+			{
+				// The $tmp_pos cursor is at the end of $str, therefore $tmp holds the whole $str
+				return $tmp;
+			}
+			else
+			{
+				// The rightmost chunk of $str has not been appended to $tmp yet
+				return $tmp . substr($str, $tmp_pos);
+			}
 		}
+
+		// The string was already in normal form
+		return $str;
 	}
 }
 
diff --git a/phpBB/includes/utf/utf_tools.php b/phpBB/includes/utf/utf_tools.php
index aa29159d5e..7a2b536e97 100644
--- a/phpBB/includes/utf/utf_tools.php
+++ b/phpBB/includes/utf/utf_tools.php
@@ -781,7 +781,7 @@ function utf8_recode($string, $encoding)
 */
 function utf8_encode_ncr($text)
 {
-	return preg_replace_callback('#[\\xC2-\\xF4][\\x80-\\xBF]?[\\x80-\\xBF]?[\\x80-\\xBF]+#', 'utf8_encode_ncr_callback', $text);
+	return preg_replace_callback('#[\\xC2-\\xF4][\\x80-\\xBF]{1,3}#', 'utf8_encode_ncr_callback', $text);
 }
 
 /**
@@ -824,7 +824,7 @@ function utf8_ord($chr)
 		break;
 
 		default:
-			return $m;
+			return $chr;
 	}
 }
 
-- 
cgit v1.2.1


From 3d0759974b3679aa75f23840e0b2f7c725091560 Mon Sep 17 00:00:00 2001
From: Meik Sievertsen <acydburn@phpbb.com>
Date: Fri, 10 Nov 2006 13:49:52 +0000
Subject: - some fixes - added script for easy adjustement of username_clean
 column within the users table (please see the note i added to the
 utf8_clean_string() function)

git-svn-id: file:///svn/phpbb/trunk@6561 89ea8834-ac86-4346-8a33-228a782c2dd0
---
 phpBB/includes/utf/utf_tools.php | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'phpBB/includes/utf')

diff --git a/phpBB/includes/utf/utf_tools.php b/phpBB/includes/utf/utf_tools.php
index 7a2b536e97..d90590e813 100644
--- a/phpBB/includes/utf/utf_tools.php
+++ b/phpBB/includes/utf/utf_tools.php
@@ -931,6 +931,9 @@ function utf8_case_fold($text, $option = 'full')
 
 /**
 * @todo needs documenting
+*
+* Please be aware that if you change something within this function or within
+* functions used here you need to rebuild/update the complete users table.
 */
 function utf8_clean_string($text)
 {
-- 
cgit v1.2.1


From cf34efb06ce62407232d63dd4e73b8afc6e2a4ef Mon Sep 17 00:00:00 2001
From: Nils Adermann <naderman@naderman.de>
Date: Sun, 12 Nov 2006 14:29:32 +0000
Subject: message

git-svn-id: file:///svn/phpbb/trunk@6569 89ea8834-ac86-4346-8a33-228a782c2dd0
---
 phpBB/includes/utf/utf_tools.php | 45 +++++++++++++++++++++++++++++++++++++---
 1 file changed, 42 insertions(+), 3 deletions(-)

(limited to 'phpBB/includes/utf')

diff --git a/phpBB/includes/utf/utf_tools.php b/phpBB/includes/utf/utf_tools.php
index d90590e813..b91fd51c20 100644
--- a/phpBB/includes/utf/utf_tools.php
+++ b/phpBB/includes/utf/utf_tools.php
@@ -930,15 +930,52 @@ function utf8_case_fold($text, $option = 'full')
 }
 
 /**
-* @todo needs documenting
+* A wrapper function for the normalizer which takes care of including the class if required and modifies the passed strings
+* to be in NFC (Normalization Form Composition).
+*
+* @param	mixed	$strings Either an array of references to strings, a reference to an array of strings or a reference to a single string
+*/
+function utf8_normalize_nfc($strings)
+{
+	if (!is_array($strings) || (sizeof($strings) > 0))
+    {	
+		if (!class_exists('utf_normalizer'))
+		{
+			global $phpbb_root_path, $phpEx;
+			include($phpbb_root_path . 'includes/utf/utf_normalizer.' . $phpEx);
+		}
+
+		if (is_array($strings))
+		{
+			foreach ($strings as $key => $string)
+			{
+				$strings[$key] = utf_normalizer::nfc($strings[$key]);
+			}
+		}
+		else
+		{
+			$strings = utf_normalizer::nfc($strings);
+		}
+	}
+}
+
+/**
+* This function is used to generate a "clean" version of a string.
+* Clean means that it is a case insensitive form (case folding) and that it is normalized (NFC).
+* Additionally a homographs of one character are transformed into one specific character (preferably ASCII
+* if it is an ASCII character).
 *
 * Please be aware that if you change something within this function or within
-* functions used here you need to rebuild/update the complete users table.
+* functions used here you need to rebuild/update the username_clean column in the users table. And all other
+* columns that store a clean string otherwise you will break this functionality.
+*
+* @param	$text	An unclean string, mabye user input (has to be valid UTF-8!)
+* @return			Cleaned up version of the input string
 */
 function utf8_clean_string($text)
 {
 	$text = utf8_case_fold($text);
-
+	
 	if (!class_exists('utf_normalizer'))
 	{
 		global $phpbb_root_path, $phpEx;
@@ -963,6 +1000,8 @@ function utf8_clean_string($text)
 		// greek
 		"\xCE\xB1" => "\x61",
 		"\xCE\xBF" => "\x6F",
+		// other
+		"\xC2\xA1" => "\x69",
 	);
 
 	$text = strtr($text, $homographs);
-- 
cgit v1.2.1