Added: finished UTF normalization conformance tests

Added: now generate_utf_files.php also generates the files needed by the search indexer git-svn-id: file:///svn/phpbb/trunk@6162 89ea8834-ac86-4346-8a33-228a782c2dd0
author: Ludovic Arnaud <ludovic_arnaud@users.sourceforge.net> 2006-07-10 01:40:59 +0000
committer: Ludovic Arnaud <ludovic_arnaud@users.sourceforge.net> 2006-07-10 01:40:59 +0000
commit: 8c5b957228cb2f4e183387ea4d29df445ff682fa (patch)
tree: 38d4e40fe904074a4fc85cb458eb68b4e84291c4 /phpBB/develop/utf_normalizer_test.php
parent: 46af817cb058e2eecd89081af4a40075426a32ef (diff)
download: forums-8c5b957228cb2f4e183387ea4d29df445ff682fa.tar
forums-8c5b957228cb2f4e183387ea4d29df445ff682fa.tar.gz
forums-8c5b957228cb2f4e183387ea4d29df445ff682fa.tar.bz2
forums-8c5b957228cb2f4e183387ea4d29df445ff682fa.tar.xz
forums-8c5b957228cb2f4e183387ea4d29df445ff682fa.zip
1 files changed, 77 insertions, 2 deletions
diff --git a/phpBB/develop/utf_normalizer_test.php b/phpBB/develop/utf_normalizer_test.php
index 1878f74dbc..63c73c05e4 100644
--- a/phpBB/develop/utf_normalizer_test.php
+++ b/phpBB/develop/utf_normalizer_test.php
@@ -72,6 +72,7 @@ require_once($phpbb_root_path . 'includes/utf/utf_normalizer.' . $phpEx);
 
 $i = $n = 0;
 $failed = FALSE;
+$tested_chars = array();
 
 $fp = fopen($phpbb_root_path . 'develop/NormalizationTest.txt', 'rb');
 while (!feof($fp))
@@ -103,6 +104,16 @@ while (!feof($fp))
 
 	list($c1, $c2, $c3, $c4, $c5) = explode(';', $line);
 
+	if (!strpos($c1, ' '))
+	{
+		/**
+		* We are currently testing a single character, we add it to the list of
+		* characters we have processed so that we can exclude it when testing
+		* for invariants
+		*/
+		$tested_chars[$c1] = 1;
+	}
+
 	foreach ($test_suite as $form => $serie)
 	{
 		foreach ($serie as $expected => $tests)
@@ -119,17 +130,81 @@ while (!feof($fp))
 					$failed = TRUE;
 					$hex_result = utf_to_hexseq($utf_result);
 
-					echo "FAILED $expected == $form($test) ($hex_expected != $hex_result)\n";
+					echo "\nFAILED $expected == $form($test) ($hex_expected != $hex_result)";
 				}
 			}
 		}
 
 		if ($failed)
 		{
-			die("\nFailed at line $n\n");
+			die("\n\nFailed at line $n\n");
 		}
 	}
 }
+fclose($fp);
+
+/**
+* Test for invariants
+*/
+echo "\n\nTesting for invariants...\n\n";
+
+$fp = fopen($phpbb_root_path . 'develop/UnicodeData.txt', 'rt');
+
+$n = 0;
+while (!feof($fp))
+{
+	if (++$n % 100 == 0)
+	{
+		echo $n, ' ';
+	}
+
+	$line = fgets($fp, 1024);
+
+	if (!$pos = strpos($line, ';'))
+	{
+		continue;
+	}
+
+	$hex_tested = $hex_expected = substr($line, 0, $pos);
+
+	if (isset($tested_chars[$hex_tested]))
+	{
+		continue;
+	}
+
+	$utf_expected = hex_to_utf($hex_expected);
+
+	if ($utf_expected >= UTF8_SURROGATE_FIRST
+	 && $utf_expected <= UTF8_SURROGATE_LAST)
+	{
+		/**
+		* Surrogates are illegal on their own, we expect the normalizer
+		* to return a replacement char
+		*/
+		$utf_expected = UTF8_REPLACEMENT;
+		$hex_expected = utf_to_hexseq($utf_expected);
+	}
+
+	foreach (array('nfc', 'nfkc', 'nfd', 'nfkd') as $form)
+	{
+		$utf_result = utf_normalizer::$form($utf_expected);
+		$hex_result = utf_to_hexseq($utf_result);
+//		echo "$form($utf_expected) == $utf_result\n";
+
+		if (strcmp($utf_expected, $utf_result))
+		{
+			$failed = 1;
+
+			echo "\nFAILED $hex_expected == $form($hex_tested) ($hex_expected != $hex_result)";
+		}
+	}
+
+	if ($failed)
+	{
+		die("\n\nFailed at line $n\n");
+	}
+}
+fclose($fp);
 
 die("\n\nALL TESTS PASSED SUCCESSFULLY\n");
author	Ludovic Arnaud <ludovic_arnaud@users.sourceforge.net>	2006-07-10 01:40:59 +0000
committer	Ludovic Arnaud <ludovic_arnaud@users.sourceforge.net>	2006-07-10 01:40:59 +0000
commit	8c5b957228cb2f4e183387ea4d29df445ff682fa (patch)
tree	38d4e40fe904074a4fc85cb458eb68b4e84291c4 /phpBB/develop/utf_normalizer_test.php
parent	46af817cb058e2eecd89081af4a40075426a32ef (diff)
download	forums-8c5b957228cb2f4e183387ea4d29df445ff682fa.tar forums-8c5b957228cb2f4e183387ea4d29df445ff682fa.tar.gz forums-8c5b957228cb2f4e183387ea4d29df445ff682fa.tar.bz2 forums-8c5b957228cb2f4e183387ea4d29df445ff682fa.tar.xz forums-8c5b957228cb2f4e183387ea4d29df445ff682fa.zip