diff options
| -rw-r--r-- | .gitignore | 1 | ||||
| -rw-r--r-- | phpunit.xml.dist | 10 | ||||
| -rw-r--r-- | tests/network/checkdnsrr_test.php | 3 | ||||
| -rw-r--r-- | tests/utf/data/.gitkeep | 0 | ||||
| -rw-r--r-- | tests/utf/normalizer_test.php | 318 | 
5 files changed, 330 insertions, 2 deletions
diff --git a/.gitignore b/.gitignore index 39b9e0a7f4..c417bf01c1 100644 --- a/.gitignore +++ b/.gitignore @@ -7,3 +7,4 @@ phpBB/images/avatars/upload/*  phpBB/store/*  tests/phpbb_unit_tests.sqlite2  tests/test_config.php +tests/utf/data/*.txt diff --git a/phpunit.xml.dist b/phpunit.xml.dist index 78c7fdd93a..de8134da8e 100644 --- a/phpunit.xml.dist +++ b/phpunit.xml.dist @@ -2,7 +2,7 @@  <phpunit backupGlobals="true"           backupStaticAttributes="true" -         colors="false" +         colors="true"           convertErrorsToExceptions="true"           convertNoticesToExceptions="true"           convertWarningsToExceptions="true" @@ -16,7 +16,13 @@              <directory suffix="_test.php">./tests/</directory>          </testsuite>      </testsuites> -     + +    <groups> +        <exclude> +            <group>slow</group> +        </exclude> +    </groups> +      <filter>          <blacklist>              <directory>./tests/</directory> diff --git a/tests/network/checkdnsrr_test.php b/tests/network/checkdnsrr_test.php index 427132e508..9410deaf64 100644 --- a/tests/network/checkdnsrr_test.php +++ b/tests/network/checkdnsrr_test.php @@ -9,6 +9,9 @@  require_once __DIR__ . '/../../phpBB/includes/functions.php'; +/** +* @group slow +*/  class phpbb_network_checkdnsrr_test extends phpbb_test_case  {  	public function data_provider() diff --git a/tests/utf/data/.gitkeep b/tests/utf/data/.gitkeep new file mode 100644 index 0000000000..e69de29bb2 --- /dev/null +++ b/tests/utf/data/.gitkeep diff --git a/tests/utf/normalizer_test.php b/tests/utf/normalizer_test.php new file mode 100644 index 0000000000..9a9011c0fe --- /dev/null +++ b/tests/utf/normalizer_test.php @@ -0,0 +1,318 @@ +<?php +/** +* +* @package testing +* @copyright (c) 2011 phpBB Group +* @license http://opensource.org/licenses/gpl-license.php GNU Public License +* +*/ + +require_once __DIR__ . '/../../phpBB/includes/utf/utf_normalizer.php'; + +/** +* @group slow +*/ +class phpbb_utf_normalizer_test extends phpbb_test_case +{ +	static public function setUpBeforeClass() +	{ +		self::download('http://www.unicode.org/Public/UNIDATA/NormalizationTest.txt', __DIR__.'/data'); +		self::download('http://www.unicode.org/Public/UNIDATA/UnicodeData.txt', __DIR__.'/data'); +	} + +	public function test_normalizer() +	{ +		$test_suite = array( +			/** +			* NFC +			*   c2 ==  NFC(c1) ==  NFC(c2) ==  NFC(c3) +			*   c4 ==  NFC(c4) ==  NFC(c5) +			*/ +			'NFC'	=>	array( +				'c2'	=>	array('c1', 'c2', 'c3'), +				'c4'	=>	array('c4', 'c5') +			), + +			/** +			* NFD +			*   c3 ==  NFD(c1) ==  NFD(c2) ==  NFD(c3) +			*   c5 ==  NFD(c4) ==  NFD(c5) +			*/ +			'NFD'	=>	array( +				'c3'	=>	array('c1', 'c2', 'c3'), +				'c5'	=>	array('c4', 'c5') +			), + +			/** +			* NFKC +			*   c4 == NFKC(c1) == NFKC(c2) == NFKC(c3) == NFKC(c4) == NFKC(c5) +			*/ +			'NFKC'	=>	array( +				'c4'	=>	array('c1', 'c2', 'c3', 'c4', 'c5') +			), + +			/** +			* NFKD +			*   c5 == NFKD(c1) == NFKD(c2) == NFKD(c3) == NFKD(c4) == NFKD(c5) +			*/ +			'NFKD'	=>	array( +				'c5'	=>	array('c1', 'c2', 'c3', 'c4', 'c5') +			) +		); + +		$tested_chars = array(); + +		$fp = fopen(__DIR__.'/data/NormalizationTest.txt', 'rb'); +		while (!feof($fp)) +		{ +			$line = fgets($fp); + +			if ($line[0] == '@') +			{ +				continue; +			} + +			if (!strpos(' 0123456789ABCDEF', $line[0])) +			{ +				continue; +			} + +			list($c1, $c2, $c3, $c4, $c5) = explode(';', $line); + +			if (!strpos($c1, ' ')) +			{ +				/** +				* We are currently testing a single character, we add it to the list of +				* characters we have processed so that we can exclude it when testing +				* for invariants +				*/ +				$tested_chars[$c1] = 1; +			} + +			foreach ($test_suite as $form => $serie) +			{ +				foreach ($serie as $expected => $tests) +				{ +					$hex_expected = ${$expected}; +					$utf_expected = $this->hexseq_to_utf($hex_expected); + +					foreach ($tests as $test) +					{ +						$utf_result = $utf_expected; +						call_user_func(array('utf_normalizer', $form), &$utf_result); + +						$hex_result = $this->utf_to_hexseq($utf_result); +						$this->assertEquals($utf_expected, $utf_result, "$expected == $form($test) ($hex_expected != $hex_result)"); +					} +				} +			} +		} +		fclose($fp); + +		return $tested_chars; +	} + +	/** +	* @depends test_normalizer +	*/ +	public function test_invariants(array $tested_chars) +	{ +		$fp = fopen(__DIR__.'/data/UnicodeData.txt', 'rb'); + +		while (!feof($fp)) +		{ +			$line = fgets($fp, 1024); + +			if (!$pos = strpos($line, ';')) +			{ +				continue; +			} + +			$hex_tested = $hex_expected = substr($line, 0, $pos); + +			if (isset($tested_chars[$hex_tested])) +			{ +				continue; +			} + +			$utf_expected = $this->hex_to_utf($hex_expected); + +			if ($utf_expected >= UTF8_SURROGATE_FIRST +			 && $utf_expected <= UTF8_SURROGATE_LAST) +			{ +				/** +				* Surrogates are illegal on their own, we expect the normalizer +				* to return a replacement char +				*/ +				$utf_expected = UTF8_REPLACEMENT; +				$hex_expected = $this->utf_to_hexseq($utf_expected); +			} + +			foreach (array('nfc', 'nfkc', 'nfd', 'nfkd') as $form) +			{ +				$utf_result = $utf_expected; +				call_user_func(array('utf_normalizer', $form), &$utf_result); +				$hex_result = $this->utf_to_hexseq($utf_result); + +				$this->assertEquals($utf_expected, $utf_result, "$hex_expected == $form($hex_tested) ($hex_expected != $hex_result)"); +			} +		} +		fclose($fp); +	} + +	/** +	* Convert a UTF string to a sequence of codepoints in hexadecimal +	* +	* @param	string	$utf	UTF string +	* @return	integer			Unicode codepoints in hex +	*/ +	protected function utf_to_hexseq($str) +	{ +		$pos = 0; +		$len = strlen($str); +		$ret = array(); + +		while ($pos < $len) +		{ +			$c = $str[$pos]; +			switch ($c & "\xF0") +			{ +				case "\xC0": +				case "\xD0": +					$utf_char = substr($str, $pos, 2); +					$pos += 2; +					break; + +				case "\xE0": +					$utf_char = substr($str, $pos, 3); +					$pos += 3; +					break; + +				case "\xF0": +					$utf_char = substr($str, $pos, 4); +					$pos += 4; +					break; + +				default: +					$utf_char = $c; +					++$pos; +			} + +			$hex = dechex($this->utf_to_cp($utf_char)); + +			if (!isset($hex[3])) +			{ +				$hex = substr('000' . $hex, -4); +			} + +			$ret[] = $hex; +		} + +		return strtr(implode(' ', $ret), 'abcdef', 'ABCDEF'); +	} + +	/** +	* Convert a UTF-8 char to its codepoint +	* +	* @param	string	$utf_char	UTF-8 char +	* @return	integer				Unicode codepoint +	*/ +	protected function utf_to_cp($utf_char) +	{ +		switch (strlen($utf_char)) +		{ +			case 1: +				return ord($utf_char); + +			case 2: +				return ((ord($utf_char[0]) & 0x1F) << 6) | (ord($utf_char[1]) & 0x3F); + +			case 3: +				return ((ord($utf_char[0]) & 0x0F) << 12) | ((ord($utf_char[1]) & 0x3F) << 6) | (ord($utf_char[2]) & 0x3F); + +			case 4: +				return ((ord($utf_char[0]) & 0x07) << 18) | ((ord($utf_char[1]) & 0x3F) << 12) | ((ord($utf_char[2]) & 0x3F) << 6) | (ord($utf_char[3]) & 0x3F); + +			default: +				throw new RuntimeException('UTF-8 chars can only be 1-4 bytes long'); +		} +	} + +	/** +	* Return a UTF string formed from a sequence of codepoints in hexadecimal +	* +	* @param	string	$seq		Sequence of codepoints, separated with a space +	* @return	string				UTF-8 string +	*/ +	protected function hexseq_to_utf($seq) +	{ +		return implode('', array_map(array($this, 'hex_to_utf'), explode(' ', $seq))); +	} + +	/** +	* Convert a codepoint in hexadecimal to a UTF-8 char +	* +	* @param	string	$hex		Codepoint, in hexadecimal +	* @return	string				UTF-8 char +	*/ +	protected function hex_to_utf($hex) +	{ +		return $this->cp_to_utf(hexdec($hex)); +	} + +	/** +	* Convert a codepoint to a UTF-8 char +	* +	* @param	integer	$cp			Unicode codepoint +	* @return	string				UTF-8 string +	*/ +	protected function cp_to_utf($cp) +	{ +		if ($cp > 0xFFFF) +		{ +			return chr(0xF0 | ($cp >> 18)) . chr(0x80 | (($cp >> 12) & 0x3F)) . chr(0x80 | (($cp >> 6) & 0x3F)) . chr(0x80 | ($cp & 0x3F)); +		} +		else if ($cp > 0x7FF) +		{ +			return chr(0xE0 | ($cp >> 12)) . chr(0x80 | (($cp >> 6) & 0x3F)) . chr(0x80 | ($cp & 0x3F)); +		} +		else if ($cp > 0x7F) +		{ +			return chr(0xC0 | ($cp >> 6)) . chr(0x80 | ($cp & 0x3F)); +		} +		else +		{ +			return chr($cp); +		} +	} + +	// chunked download helper +	static protected function download($url, $to) +	{ +		$target = $to . '/' . basename($url); + +		if (file_exists($target)) +		{ +			return; +		} + +		if (!$fpr = fopen($url, 'rb')) +		{ +			throw new RuntimeException("Failed to download $url"); +		} + +		if (!$fpw = fopen($target, 'wb')) +		{ +			throw new RuntimeException("Failed to open $target for writing"); +		} + +		$chunk = 32768; + +		while (!feof($fpr)) +		{ +			fwrite($fpw, fread($fpr, $chunk)); +		} +		fclose($fpr); +		fclose($fpw); +	} +}  | 
