aboutsummaryrefslogtreecommitdiffstats
path: root/phpBB/develop/utf_normalizer_test.php
diff options
context:
space:
mode:
authorLudovic Arnaud <ludovic_arnaud@users.sourceforge.net>2006-07-10 01:40:59 +0000
committerLudovic Arnaud <ludovic_arnaud@users.sourceforge.net>2006-07-10 01:40:59 +0000
commit8c5b957228cb2f4e183387ea4d29df445ff682fa (patch)
tree38d4e40fe904074a4fc85cb458eb68b4e84291c4 /phpBB/develop/utf_normalizer_test.php
parent46af817cb058e2eecd89081af4a40075426a32ef (diff)
downloadforums-8c5b957228cb2f4e183387ea4d29df445ff682fa.tar
forums-8c5b957228cb2f4e183387ea4d29df445ff682fa.tar.gz
forums-8c5b957228cb2f4e183387ea4d29df445ff682fa.tar.bz2
forums-8c5b957228cb2f4e183387ea4d29df445ff682fa.tar.xz
forums-8c5b957228cb2f4e183387ea4d29df445ff682fa.zip
Added: finished UTF normalization conformance tests
Added: now generate_utf_files.php also generates the files needed by the search indexer git-svn-id: file:///svn/phpbb/trunk@6162 89ea8834-ac86-4346-8a33-228a782c2dd0
Diffstat (limited to 'phpBB/develop/utf_normalizer_test.php')
-rw-r--r--phpBB/develop/utf_normalizer_test.php79
1 files changed, 77 insertions, 2 deletions
diff --git a/phpBB/develop/utf_normalizer_test.php b/phpBB/develop/utf_normalizer_test.php
index 1878f74dbc..63c73c05e4 100644
--- a/phpBB/develop/utf_normalizer_test.php
+++ b/phpBB/develop/utf_normalizer_test.php
@@ -72,6 +72,7 @@ require_once($phpbb_root_path . 'includes/utf/utf_normalizer.' . $phpEx);
$i = $n = 0;
$failed = FALSE;
+$tested_chars = array();
$fp = fopen($phpbb_root_path . 'develop/NormalizationTest.txt', 'rb');
while (!feof($fp))
@@ -103,6 +104,16 @@ while (!feof($fp))
list($c1, $c2, $c3, $c4, $c5) = explode(';', $line);
+ if (!strpos($c1, ' '))
+ {
+ /**
+ * We are currently testing a single character, we add it to the list of
+ * characters we have processed so that we can exclude it when testing
+ * for invariants
+ */
+ $tested_chars[$c1] = 1;
+ }
+
foreach ($test_suite as $form => $serie)
{
foreach ($serie as $expected => $tests)
@@ -119,17 +130,81 @@ while (!feof($fp))
$failed = TRUE;
$hex_result = utf_to_hexseq($utf_result);
- echo "FAILED $expected == $form($test) ($hex_expected != $hex_result)\n";
+ echo "\nFAILED $expected == $form($test) ($hex_expected != $hex_result)";
}
}
}
if ($failed)
{
- die("\nFailed at line $n\n");
+ die("\n\nFailed at line $n\n");
}
}
}
+fclose($fp);
+
+/**
+* Test for invariants
+*/
+echo "\n\nTesting for invariants...\n\n";
+
+$fp = fopen($phpbb_root_path . 'develop/UnicodeData.txt', 'rt');
+
+$n = 0;
+while (!feof($fp))
+{
+ if (++$n % 100 == 0)
+ {
+ echo $n, ' ';
+ }
+
+ $line = fgets($fp, 1024);
+
+ if (!$pos = strpos($line, ';'))
+ {
+ continue;
+ }
+
+ $hex_tested = $hex_expected = substr($line, 0, $pos);
+
+ if (isset($tested_chars[$hex_tested]))
+ {
+ continue;
+ }
+
+ $utf_expected = hex_to_utf($hex_expected);
+
+ if ($utf_expected >= UTF8_SURROGATE_FIRST
+ && $utf_expected <= UTF8_SURROGATE_LAST)
+ {
+ /**
+ * Surrogates are illegal on their own, we expect the normalizer
+ * to return a replacement char
+ */
+ $utf_expected = UTF8_REPLACEMENT;
+ $hex_expected = utf_to_hexseq($utf_expected);
+ }
+
+ foreach (array('nfc', 'nfkc', 'nfd', 'nfkd') as $form)
+ {
+ $utf_result = utf_normalizer::$form($utf_expected);
+ $hex_result = utf_to_hexseq($utf_result);
+// echo "$form($utf_expected) == $utf_result\n";
+
+ if (strcmp($utf_expected, $utf_result))
+ {
+ $failed = 1;
+
+ echo "\nFAILED $hex_expected == $form($hex_tested) ($hex_expected != $hex_result)";
+ }
+ }
+
+ if ($failed)
+ {
+ die("\n\nFailed at line $n\n");
+ }
+}
+fclose($fp);
die("\n\nALL TESTS PASSED SUCCESSFULLY\n");