Merge branch 'prep-release-3.0.9'

* prep-release-3.0.9: (359 commits) [prep-release-3.0.9] Bumping version number for 3.0.9 final. [prep-release-3.0.9] Update Changelog for 3.0.9-RC4 release. [prep-release-3.0.9] Decreasing version for an RC4 release. [ticket/9859] Changing all phpBB footers to match the new credit line [ticket/9859] New footer copyright line with registered symbol [ticket/10250] The site_logo hash is different depending on imageset & language [ticket/10250] Destroy cached md5 hash of site_logo on refreshing an imageset [ticket/10250] Overwrite the site_logo width&height when the phpbb logo is used [ticket/10247] Remove attempt_id as primary key from database_update.php [ticket/10250] Added the new phpBB Logo with the Registered Trademark Symbol [ticket/10247] Use COUNT(*) instead of COUNT(attempt_id) [prep-release-3.0.9] Update Changelog for 3.0.9 release. [prep-release-3.0.9] Bumping version number for the final 3.0.9 release. [ticket/10247] Removing attempt_id column from the 3.0.8 to 3.0.9-RC1 updater. [ticket/10247] Add a db_tools test for the removal of a primary key column. [ticket/10247] Add empty data section to database update for RC4 [ticket/10247] Remove unecessary attempt_id primary key column [prep-release-3.0.9] Bump database version to RC3 too. [prep-release-3.0.9] Update Changelog for 3.0.9-RC3 release. [prep-release-3.0.9] Bumping version number for 3.0.9-RC3. ...
author: Andreas Fischer <bantu@phpbb.com> 2011-07-11 00:29:45 +0200
committer: Andreas Fischer <bantu@phpbb.com> 2011-07-11 00:29:45 +0200
commit: c8da5ad9f42d8ced1aead79a42cc5caee5c5a2ff (patch)
tree: b7e507311afa3db9e372f9d5b8b01455dbd50841 /tests/utf/normalizer_test.php
parent: 7f21a5f46156660d7ea6a4bdb59166ac553e2be8 (diff)
parent: e6572b766f7fd5f8547b28fd52d25e4a96cfc2cd (diff)
download: forums-c8da5ad9f42d8ced1aead79a42cc5caee5c5a2ff.tar
forums-c8da5ad9f42d8ced1aead79a42cc5caee5c5a2ff.tar.gz
forums-c8da5ad9f42d8ced1aead79a42cc5caee5c5a2ff.tar.bz2
forums-c8da5ad9f42d8ced1aead79a42cc5caee5c5a2ff.tar.xz
forums-c8da5ad9f42d8ced1aead79a42cc5caee5c5a2ff.zip
1 files changed, 320 insertions, 0 deletions
diff --git a/tests/utf/normalizer_test.php b/tests/utf/normalizer_test.php
new file mode 100644
index 0000000000..f78dba8004
--- /dev/null
+++ b/tests/utf/normalizer_test.php
@@ -0,0 +1,320 @@
+<?php
+/**
+*
+* @package testing
+* @copyright (c) 2011 phpBB Group
+* @license http://opensource.org/licenses/gpl-license.php GNU Public License
+*
+*/
+
+require_once dirname(__FILE__) . '/../../phpBB/includes/utf/utf_normalizer.php';
+
+/**
+* @group slow
+*/
+class phpbb_utf_normalizer_test extends phpbb_test_case
+{
+	static public function setUpBeforeClass()
+	{
+		self::download('http://www.unicode.org/Public/UNIDATA/NormalizationTest.txt', dirname(__FILE__).'/data');
+		self::download('http://www.unicode.org/Public/UNIDATA/UnicodeData.txt', dirname(__FILE__).'/data');
+	}
+
+	public function test_normalizer()
+	{
+		$test_suite = array(
+			/**
+			* NFC
+			*   c2 ==  NFC(c1) ==  NFC(c2) ==  NFC(c3)
+			*   c4 ==  NFC(c4) ==  NFC(c5)
+			*/
+			'NFC'	=>	array(
+				'c2'	=>	array('c1', 'c2', 'c3'),
+				'c4'	=>	array('c4', 'c5')
+			),
+
+			/**
+			* NFD
+			*   c3 ==  NFD(c1) ==  NFD(c2) ==  NFD(c3)
+			*   c5 ==  NFD(c4) ==  NFD(c5)
+			*/
+			'NFD'	=>	array(
+				'c3'	=>	array('c1', 'c2', 'c3'),
+				'c5'	=>	array('c4', 'c5')
+			),
+
+			/**
+			* NFKC
+			*   c4 == NFKC(c1) == NFKC(c2) == NFKC(c3) == NFKC(c4) == NFKC(c5)
+			*/
+			'NFKC'	=>	array(
+				'c4'	=>	array('c1', 'c2', 'c3', 'c4', 'c5')
+			),
+
+			/**
+			* NFKD
+			*   c5 == NFKD(c1) == NFKD(c2) == NFKD(c3) == NFKD(c4) == NFKD(c5)
+			*/
+			'NFKD'	=>	array(
+				'c5'	=>	array('c1', 'c2', 'c3', 'c4', 'c5')
+			)
+		);
+
+		$tested_chars = array();
+
+		$fp = fopen(dirname(__FILE__).'/data/NormalizationTest.txt', 'rb');
+		while (!feof($fp))
+		{
+			$line = fgets($fp);
+
+			if ($line[0] == '@')
+			{
+				continue;
+			}
+
+			if (!strpos(' 0123456789ABCDEF', $line[0]))
+			{
+				continue;
+			}
+
+			list($c1, $c2, $c3, $c4, $c5) = explode(';', $line);
+
+			if (!strpos($c1, ' '))
+			{
+				/**
+				* We are currently testing a single character, we add it to the list of
+				* characters we have processed so that we can exclude it when testing
+				* for invariants
+				*/
+				$tested_chars[$c1] = 1;
+			}
+
+			foreach ($test_suite as $form => $serie)
+			{
+				foreach ($serie as $expected => $tests)
+				{
+					$hex_expected = ${$expected};
+					$utf_expected = $this->hexseq_to_utf($hex_expected);
+
+					foreach ($tests as $test)
+					{
+						$utf_result = $utf_expected;
+						call_user_func(array('utf_normalizer', $form), &$utf_result);
+
+						$hex_result = $this->utf_to_hexseq($utf_result);
+						$this->assertEquals($utf_expected, $utf_result, "$expected == $form($test) ($hex_expected != $hex_result)");
+					}
+				}
+			}
+		}
+		fclose($fp);
+
+		return $tested_chars;
+	}
+
+	/**
+	* @depends test_normalizer
+	*/
+	public function test_invariants(array $tested_chars)
+	{
+		$fp = fopen(dirname(__FILE__).'/data/UnicodeData.txt', 'rb');
+
+		while (!feof($fp))
+		{
+			$line = fgets($fp, 1024);
+
+			if (!$pos = strpos($line, ';'))
+			{
+				continue;
+			}
+
+			$hex_tested = $hex_expected = substr($line, 0, $pos);
+
+			if (isset($tested_chars[$hex_tested]))
+			{
+				continue;
+			}
+
+			$utf_expected = $this->hex_to_utf($hex_expected);
+
+			if ($utf_expected >= UTF8_SURROGATE_FIRST
+			 && $utf_expected <= UTF8_SURROGATE_LAST)
+			{
+				/**
+				* Surrogates are illegal on their own, we expect the normalizer
+				* to return a replacement char
+				*/
+				$utf_expected = UTF8_REPLACEMENT;
+				$hex_expected = $this->utf_to_hexseq($utf_expected);
+			}
+
+			foreach (array('nfc', 'nfkc', 'nfd', 'nfkd') as $form)
+			{
+				$utf_result = $utf_expected;
+				call_user_func(array('utf_normalizer', $form), &$utf_result);
+				$hex_result = $this->utf_to_hexseq($utf_result);
+
+				$this->assertEquals($utf_expected, $utf_result, "$hex_expected == $form($hex_tested) ($hex_expected != $hex_result)");
+			}
+		}
+		fclose($fp);
+	}
+
+	/**
+	* Convert a UTF string to a sequence of codepoints in hexadecimal
+	*
+	* @param	string	$utf	UTF string
+	* @return	integer			Unicode codepoints in hex
+	*/
+	protected function utf_to_hexseq($str)
+	{
+		$pos = 0;
+		$len = strlen($str);
+		$ret = array();
+
+		while ($pos < $len)
+		{
+			$c = $str[$pos];
+			switch ($c & "\xF0")
+			{
+				case "\xC0":
+				case "\xD0":
+					$utf_char = substr($str, $pos, 2);
+					$pos += 2;
+					break;
+
+				case "\xE0":
+					$utf_char = substr($str, $pos, 3);
+					$pos += 3;
+					break;
+
+				case "\xF0":
+					$utf_char = substr($str, $pos, 4);
+					$pos += 4;
+					break;
+
+				default:
+					$utf_char = $c;
+					++$pos;
+			}
+
+			$hex = dechex($this->utf_to_cp($utf_char));
+
+			if (!isset($hex[3]))
+			{
+				$hex = substr('000' . $hex, -4);
+			}
+
+			$ret[] = $hex;
+		}
+
+		return strtr(implode(' ', $ret), 'abcdef', 'ABCDEF');
+	}
+
+	/**
+	* Convert a UTF-8 char to its codepoint
+	*
+	* @param	string	$utf_char	UTF-8 char
+	* @return	integer				Unicode codepoint
+	*/
+	protected function utf_to_cp($utf_char)
+	{
+		switch (strlen($utf_char))
+		{
+			case 1:
+				return ord($utf_char);
+
+			case 2:
+				return ((ord($utf_char[0]) & 0x1F) << 6) | (ord($utf_char[1]) & 0x3F);
+
+			case 3:
+				return ((ord($utf_char[0]) & 0x0F) << 12) | ((ord($utf_char[1]) & 0x3F) << 6) | (ord($utf_char[2]) & 0x3F);
+
+			case 4:
+				return ((ord($utf_char[0]) & 0x07) << 18) | ((ord($utf_char[1]) & 0x3F) << 12) | ((ord($utf_char[2]) & 0x3F) << 6) | (ord($utf_char[3]) & 0x3F);
+
+			default:
+				throw new RuntimeException('UTF-8 chars can only be 1-4 bytes long');
+		}
+	}
+
+	/**
+	* Return a UTF string formed from a sequence of codepoints in hexadecimal
+	*
+	* @param	string	$seq		Sequence of codepoints, separated with a space
+	* @return	string				UTF-8 string
+	*/
+	protected function hexseq_to_utf($seq)
+	{
+		return implode('', array_map(array($this, 'hex_to_utf'), explode(' ', $seq)));
+	}
+
+	/**
+	* Convert a codepoint in hexadecimal to a UTF-8 char
+	*
+	* @param	string	$hex		Codepoint, in hexadecimal
+	* @return	string				UTF-8 char
+	*/
+	protected function hex_to_utf($hex)
+	{
+		return $this->cp_to_utf(hexdec($hex));
+	}
+
+	/**
+	* Convert a codepoint to a UTF-8 char
+	*
+	* @param	integer	$cp			Unicode codepoint
+	* @return	string				UTF-8 string
+	*/
+	protected function cp_to_utf($cp)
+	{
+		if ($cp > 0xFFFF)
+		{
+			return chr(0xF0 | ($cp >> 18)) . chr(0x80 | (($cp >> 12) & 0x3F)) . chr(0x80 | (($cp >> 6) & 0x3F)) . chr(0x80 | ($cp & 0x3F));
+		}
+		else if ($cp > 0x7FF)
+		{
+			return chr(0xE0 | ($cp >> 12)) . chr(0x80 | (($cp >> 6) & 0x3F)) . chr(0x80 | ($cp & 0x3F));
+		}
+		else if ($cp > 0x7F)
+		{
+			return chr(0xC0 | ($cp >> 6)) . chr(0x80 | ($cp & 0x3F));
+		}
+		else
+		{
+			return chr($cp);
+		}
+	}
+
+	// chunked download helper
+	static protected function download($url, $to)
+	{
+		$target = $to . '/' . basename($url);
+
+		if (file_exists($target))
+		{
+			return;
+		}
+
+		if (!$fpr = fopen($url, 'rb'))
+		{
+			echo "Failed to download $url\n";
+			return;
+		}
+
+		if (!$fpw = fopen($target, 'wb'))
+		{
+			echo "Failed to open $target for writing\n";
+			return;
+		}
+
+		$chunk = 32768;
+
+		while (!feof($fpr))
+		{
+			fwrite($fpw, fread($fpr, $chunk));
+		}
+		fclose($fpr);
+		fclose($fpw);
+	}
+}
author	Andreas Fischer <bantu@phpbb.com>	2011-07-11 00:29:45 +0200
committer	Andreas Fischer <bantu@phpbb.com>	2011-07-11 00:29:45 +0200
commit	c8da5ad9f42d8ced1aead79a42cc5caee5c5a2ff (patch)
tree	b7e507311afa3db9e372f9d5b8b01455dbd50841 /tests/utf/normalizer_test.php
parent	7f21a5f46156660d7ea6a4bdb59166ac553e2be8 (diff)
parent	e6572b766f7fd5f8547b28fd52d25e4a96cfc2cd (diff)
download	forums-c8da5ad9f42d8ced1aead79a42cc5caee5c5a2ff.tar forums-c8da5ad9f42d8ced1aead79a42cc5caee5c5a2ff.tar.gz forums-c8da5ad9f42d8ced1aead79a42cc5caee5c5a2ff.tar.bz2 forums-c8da5ad9f42d8ced1aead79a42cc5caee5c5a2ff.tar.xz forums-c8da5ad9f42d8ced1aead79a42cc5caee5c5a2ff.zip