From c1531d9925585dbbbaa395ba10f16e7575aa1272 Mon Sep 17 00:00:00 2001 From: David M Date: Thu, 12 Jul 2007 17:26:48 +0000 Subject: #13179 git-svn-id: file:///svn/phpbb/trunk@7876 89ea8834-ac86-4346-8a33-228a782c2dd0 --- phpBB/develop/create_schema_files.php | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) (limited to 'phpBB/develop') diff --git a/phpBB/develop/create_schema_files.php b/phpBB/develop/create_schema_files.php index 1261d4f57e..7f8b091324 100644 --- a/phpBB/develop/create_schema_files.php +++ b/phpBB/develop/create_schema_files.php @@ -1028,7 +1028,7 @@ function get_schema_struct() 'forum_desc_uid' => array('VCHAR:5', ''), 'forum_link' => array('VCHAR_UNI', ''), 'forum_password' => array('VCHAR_UNI:40', ''), - 'forum_style' => array('TINT:4', 0), + 'forum_style' => array('USINT', 0), 'forum_image' => array('VCHAR', ''), 'forum_rules' => array('TEXT_UNI', ''), 'forum_rules_link' => array('VCHAR_UNI', ''), @@ -1556,13 +1556,13 @@ function get_schema_struct() $schema_data['phpbb_styles'] = array( 'COLUMNS' => array( - 'style_id' => array('TINT:4', NULL, 'auto_increment'), + 'style_id' => array('USINT', NULL, 'auto_increment'), 'style_name' => array('VCHAR_UNI:255', ''), 'style_copyright' => array('VCHAR_UNI', ''), 'style_active' => array('BOOL', 1), - 'template_id' => array('TINT:4', 0), - 'theme_id' => array('TINT:4', 0), - 'imageset_id' => array('TINT:4', 0), + 'template_id' => array('USINT', 0), + 'theme_id' => array('USINT', 0), + 'imageset_id' => array('USINT', 0), ), 'PRIMARY_KEY' => 'style_id', 'KEYS' => array( @@ -1575,7 +1575,7 @@ function get_schema_struct() $schema_data['phpbb_styles_template'] = array( 'COLUMNS' => array( - 'template_id' => array('TINT:4', NULL, 'auto_increment'), + 'template_id' => array('USINT', NULL, 'auto_increment'), 'template_name' => array('VCHAR_UNI:255', ''), 'template_copyright' => array('VCHAR_UNI', ''), 'template_path' => array('VCHAR:100', ''), @@ -1590,7 +1590,7 @@ function get_schema_struct() $schema_data['phpbb_styles_template_data'] = array( 'COLUMNS' => array( - 'template_id' => array('TINT:4', NULL, 'auto_increment'), + 'template_id' => array('USINT', NULL, 'auto_increment'), 'template_filename' => array('VCHAR:100', ''), 'template_included' => array('TEXT', ''), 'template_mtime' => array('TIMESTAMP', 0), @@ -1604,7 +1604,7 @@ function get_schema_struct() $schema_data['phpbb_styles_theme'] = array( 'COLUMNS' => array( - 'theme_id' => array('TINT:4', NULL, 'auto_increment'), + 'theme_id' => array('USINT', NULL, 'auto_increment'), 'theme_name' => array('VCHAR_UNI:255', ''), 'theme_copyright' => array('VCHAR_UNI', ''), 'theme_path' => array('VCHAR:100', ''), @@ -1620,7 +1620,7 @@ function get_schema_struct() $schema_data['phpbb_styles_imageset'] = array( 'COLUMNS' => array( - 'imageset_id' => array('TINT:4', NULL, 'auto_increment'), + 'imageset_id' => array('USINT', NULL, 'auto_increment'), 'imageset_name' => array('VCHAR_UNI:255', ''), 'imageset_copyright' => array('VCHAR_UNI', ''), 'imageset_path' => array('VCHAR:100', ''), @@ -1639,7 +1639,7 @@ function get_schema_struct() 'image_lang' => array('VCHAR:30', ''), 'image_height' => array('USINT', 0), 'image_width' => array('USINT', 0), - 'imageset_id' => array('TINT:4', 0), + 'imageset_id' => array('USINT', 0), ), 'PRIMARY_KEY' => 'image_id', 'KEYS' => array( @@ -1777,7 +1777,7 @@ function get_schema_struct() 'user_timezone' => array('DECIMAL', 0), 'user_dst' => array('BOOL', 0), 'user_dateformat' => array('VCHAR_UNI:30', 'd M Y H:i'), - 'user_style' => array('TINT:4', 0), + 'user_style' => array('USINT', 0), 'user_rank' => array('UINT', 0), 'user_colour' => array('VCHAR:6', ''), 'user_new_privmsg' => array('TINT:4', 0), -- cgit v1.2.1 From 909e195a9b54f38294f217ee8e10b17a62876756 Mon Sep 17 00:00:00 2001 From: Nils Adermann Date: Sun, 15 Jul 2007 20:53:27 +0000 Subject: - search result extract shouldn't end in the middle of a multibyte character [Bug #11863] - missing localisation for an imageset shouldn't create lots of "imageset refreshed" log messages [Bug #12027] - explain that themes which need parsing cannot be stored on the filesystem [Bug #11134] - normalize usernames (we really need to make sure we normalize everything) - improved utf8_clean_string, more complete list of homographs and NFKC normalization, also the resulting string is now trimmed - corrected searching subforums explanation [Bug #12209] git-svn-id: file:///svn/phpbb/trunk@7890 89ea8834-ac86-4346-8a33-228a782c2dd0 --- phpBB/develop/generate_utf_confusables.php | 211 +++++++++++++++++++++++++++++ phpBB/develop/unicode_testing.php | 120 ++++++++++++++++ 2 files changed, 331 insertions(+) create mode 100644 phpBB/develop/generate_utf_confusables.php create mode 100644 phpBB/develop/unicode_testing.php (limited to 'phpBB/develop') diff --git a/phpBB/develop/generate_utf_confusables.php b/phpBB/develop/generate_utf_confusables.php new file mode 100644 index 0000000000..c4ffd21fef --- /dev/null +++ b/phpBB/develop/generate_utf_confusables.php @@ -0,0 +1,211 @@ + 0xFFFF) + { + return chr(0xF0 | ($cp >> 18)) . chr(0x80 | (($cp >> 12) & 0x3F)) . chr(0x80 | (($cp >> 6) & 0x3F)) . chr(0x80 | ($cp & 0x3F)); + } + else if ($cp > 0x7FF) + { + return chr(0xE0 | ($cp >> 12)) . chr(0x80 | (($cp >> 6) & 0x3F)) . chr(0x80 | ($cp & 0x3F)); + } + else if ($cp > 0x7F) + { + return chr(0xC0 | ($cp >> 6)) . chr(0x80 | ($cp & 0x3F)); + } + else + { + return chr($cp); + } +} + +preg_match_all('/^([0-9A-F]+) ;\s((?:[0-9A-F]+ )*);/im', $unidata, $array, PREG_SET_ORDER); + +// some that we defined ourselves +$uniarray = array( + "\xC2\xA1" => "\x69", // EXCLAMATION MARK, INVERTED => LATIN SMALL LETTER I + "\xC7\x83" => "\x21", // LATIN LETTER RETROFLEX CLICK => EXCLAMATION MARK + "\xCE\xB1" => "\x61", // GREEK SMALL LETTER ALPHA => LATIN SMALL LETTER A + "\xE1\x9A\x80" => "\x20", // OGHAM SPACE MARK + + "\xC2\xAD" => '', // HYPHEN, SOFT => empty string + "\xDB\x9D" => '', // ARABIC END OF AYAH + "\xDC\x8F" => '', // SYRIAC ABBREVIATION MARK + "\xE1\xA0\x86" => '', // MONGOLIAN TODO SOFT HYPHEN + "\xE1\xA0\x8E" => '', // MONGOLIAN VOWEL SEPARATOR + "\xE2\x80\x8B" => '', // ZERO WIDTH SPACE + "\xE2\x80\x8C" => '', // ZERO WIDTH NON-JOINER + "\xE2\x80\x8D" => '', // ZERO WIDTH JOINER + "\xE2\x80\xA8" => '', // LINE SEPARATOR + "\xE2\x80\xA9" => '', // PARAGRAPH SEPARATOR + "\xE2\x81\xA0" => '', // WORD JOINER + "\xE2\x81\xA1" => '', // FUNCTION APPLICATION + "\xE2\x81\xA2" => '', // INVISIBLE TIMES + "\xE2\x81\xA3" => '', // INVISIBLE SEPARATOR + "\xE2\x81\xAA" => '', // [CONTROL CHARACTERS] + "\xE2\x81\xAB" => '', // [CONTROL CHARACTERS] + "\xE2\x81\xAC" => '', // [CONTROL CHARACTERS] + "\xE2\x81\xAD" => '', // [CONTROL CHARACTERS] + "\xE2\x81\xAE" => '', // [CONTROL CHARACTERS] + "\xE2\x81\xAF" => '', // [CONTROL CHARACTERS] + "\xEF\xBB\xBF" => '', // ZERO WIDTH NO-BREAK SPACE + "\xEF\xBF\xB9" => '', // [CONTROL CHARACTERS] + "\xEF\xBF\xBA" => '', // [CONTROL CHARACTERS] + "\xEF\xBF\xBB" => '', // [CONTROL CHARACTERS] + "\xEF\xBF\xBC" => '', // [CONTROL CHARACTERS] + "\xF0\x9D\x85\xB3" => '', // [MUSICAL CONTROL CHARACTERS] + "\xF0\x9D\x85\xB4" => '', // [MUSICAL CONTROL CHARACTERS] + "\xF0\x9D\x85\xB5" => '', // [MUSICAL CONTROL CHARACTERS] + "\xF0\x9D\x85\xB6" => '', // [MUSICAL CONTROL CHARACTERS] + "\xF0\x9D\x85\xB7" => '', // [MUSICAL CONTROL CHARACTERS] + "\xF0\x9D\x85\xB8" => '', // [MUSICAL CONTROL CHARACTERS] + "\xF0\x9D\x85\xB9" => '', // [MUSICAL CONTROL CHARACTERS] + "\xF0\x9D\x85\xBA" => '', // [MUSICAL CONTROL CHARACTERS] +); + +$copy = $uniarray; + +foreach ($array as $value) +{ + if (isset($copy[utf8_chr(hexdec((string)$value[1]))])) + { + $num = ''; + $string = utf8_chr(hexdec((string)$value[1])); + for ($i = 0; $i < strlen($string); $i++) + { + $num .= '\x' . str_pad(base_convert(ord($string[$i]), 10, 16), 2, '0', STR_PAD_LEFT); + } + echo $num . "\n"; + if ($uniarray[$string] != implode(array_map('utf8_chr', array_map('hexdec', explode(' ', trim($value[2])))))) + { + echo " --> $string\n"; + echo " --> " . implode(array_map('utf8_chr', array_map('hexdec', explode(' ', trim($value[2]))))) . "\n"; + } + } + $uniarray[utf8_chr(hexdec((string)$value[1]))] = implode(array_map('utf8_chr', array_map('hexdec', explode(' ', trim($value[2]))))); +} + +echo "Writing to confusables.$phpEx\n"; + +$fp = fopen($phpbb_root_path . 'includes/utf/data/confusables.' . $phpEx, 'wb'); +fwrite($fp, ' $v) + { + $lines[] = my_var_export($k) . '=>' . my_var_export($v); + } + + return 'array(' . implode(',', $lines) . ')'; + } + else if (is_string($var)) + { + return "'" . str_replace(array('\\', "'"), array('\\\\', "\\'"), $var) . "'"; + } + else + { + return $var; + } +} + +/** +* Download a file to the develop/ dir +* +* @param string $url URL of the file to download +* @return void +*/ +function download($url) +{ + global $phpbb_root_path; + + if (file_exists($phpbb_root_path . 'develop/' . basename($url))) + { + return; + } + + echo 'Downloading from ', $url, ' '; + + if (!$fpr = fopen($url, 'rb')) + { + die("Can't download from $url\nPlease download it yourself and put it in the develop/ dir, kthxbai"); + } + + if (!$fpw = fopen($phpbb_root_path . 'develop/' . basename($url), 'wb')) + { + die("Can't open develop/" . basename($url) . " for output... please check your permissions or something"); + } + + $i = 0; + $chunk = 32768; + $done = ''; + + while (!feof($fpr)) + { + $i += fwrite($fpw, fread($fpr, $chunk)); + echo str_repeat("\x08", strlen($done)); + + $done = ($i >> 10) . ' KiB'; + echo $done; + } + fclose($fpr); + fclose($fpw); + + echo "\n"; +} + +?> \ No newline at end of file diff --git a/phpBB/develop/unicode_testing.php b/phpBB/develop/unicode_testing.php new file mode 100644 index 0000000000..25a13d1325 --- /dev/null +++ b/phpBB/develop/unicode_testing.php @@ -0,0 +1,120 @@ + $string) + { + utf_normalizer::nfkc($strings[$key]); + } + } + + return $strings; +} + +?> \ No newline at end of file -- cgit v1.2.1 From 5c14f323cd1fac0ebe3a74226dab6adf125dad65 Mon Sep 17 00:00:00 2001 From: David M Date: Thu, 19 Jul 2007 19:57:01 +0000 Subject: - replace all of the mapings that transform some sort of letter l to number one instead to some sort of letter l to latin small letter l git-svn-id: file:///svn/phpbb/trunk@7906 89ea8834-ac86-4346-8a33-228a782c2dd0 --- phpBB/develop/generate_utf_confusables.php | 25 +++++++++++++++++++------ 1 file changed, 19 insertions(+), 6 deletions(-) (limited to 'phpBB/develop') diff --git a/phpBB/develop/generate_utf_confusables.php b/phpBB/develop/generate_utf_confusables.php index c4ffd21fef..39a9a386b4 100644 --- a/phpBB/develop/generate_utf_confusables.php +++ b/phpBB/develop/generate_utf_confusables.php @@ -10,7 +10,7 @@ if (php_sapi_name() != 'cli') { - die("This program must be run from the command line.\n"); +// die("This program must be run from the command line.\n"); } // @@ -20,7 +20,7 @@ if (php_sapi_name() != 'cli') // Remove or comment the next line (die(".... ) to enable this script. // Do NOT FORGET to either remove this script or disable it after you have used it. // -die("Please read the first lines of this script for instructions on how to enable it"); +//die("Please read the first lines of this script for instructions on how to enable it"); set_time_limit(0); @@ -60,7 +60,7 @@ function utf8_chr($cp) } } -preg_match_all('/^([0-9A-F]+) ;\s((?:[0-9A-F]+ )*);/im', $unidata, $array, PREG_SET_ORDER); +preg_match_all('/^([0-9A-F]+) ;\s((?:[0-9A-F]+ )*);.*?$/im', $unidata, $array, PREG_SET_ORDER); // some that we defined ourselves $uniarray = array( @@ -108,6 +108,8 @@ $copy = $uniarray; foreach ($array as $value) { + $temp_hold = implode(array_map('utf8_chr', array_map('hexdec', explode(' ', trim($value[2]))))); + if (isset($copy[utf8_chr(hexdec((string)$value[1]))])) { $num = ''; @@ -117,13 +119,24 @@ foreach ($array as $value) $num .= '\x' . str_pad(base_convert(ord($string[$i]), 10, 16), 2, '0', STR_PAD_LEFT); } echo $num . "\n"; - if ($uniarray[$string] != implode(array_map('utf8_chr', array_map('hexdec', explode(' ', trim($value[2])))))) + if ($uniarray[$string] != $temp_hold) { echo " --> $string\n"; - echo " --> " . implode(array_map('utf8_chr', array_map('hexdec', explode(' ', trim($value[2]))))) . "\n"; + echo " --> " . $temp_hold . "\n"; } } - $uniarray[utf8_chr(hexdec((string)$value[1]))] = implode(array_map('utf8_chr', array_map('hexdec', explode(' ', trim($value[2]))))); + + // do some tests for things that transform into something with the number one + if (strpos($temp_hold, utf8_chr(0x0031)) !== false) + { + // any kind of letter L? + if (strpos($value[0], 'LETTER L') !== false || strpos($value[0], 'IOTA') !== false || strpos($value[0], 'SMALL L ') !== false || preg_match('/SMALL LIGATURE [^L]*L /', $value[0])) + { + // replace all of the mappings that transform some sort of letter l to number one instead to some sort of letter l to latin small letter l + $temp_hold = str_replace(utf8_chr(0x0031), utf8_chr(0x006C), $temp_hold); + } + } + $uniarray[utf8_chr(hexdec((string)$value[1]))] = $temp_hold; } echo "Writing to confusables.$phpEx\n"; -- cgit v1.2.1 From e9b908174cbeed0c780c56d6d74b761e40d8b594 Mon Sep 17 00:00:00 2001 From: David M Date: Thu, 19 Jul 2007 20:01:19 +0000 Subject: yay for security! git-svn-id: file:///svn/phpbb/trunk@7907 89ea8834-ac86-4346-8a33-228a782c2dd0 --- phpBB/develop/generate_utf_confusables.php | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'phpBB/develop') diff --git a/phpBB/develop/generate_utf_confusables.php b/phpBB/develop/generate_utf_confusables.php index 39a9a386b4..e2fd5bbaa3 100644 --- a/phpBB/develop/generate_utf_confusables.php +++ b/phpBB/develop/generate_utf_confusables.php @@ -10,7 +10,7 @@ if (php_sapi_name() != 'cli') { -// die("This program must be run from the command line.\n"); + die("This program must be run from the command line.\n"); } // @@ -20,7 +20,7 @@ if (php_sapi_name() != 'cli') // Remove or comment the next line (die(".... ) to enable this script. // Do NOT FORGET to either remove this script or disable it after you have used it. // -//die("Please read the first lines of this script for instructions on how to enable it"); +die("Please read the first lines of this script for instructions on how to enable it"); set_time_limit(0); -- cgit v1.2.1 From 870991c0608caf7467018245a0a4e1f1d55efd12 Mon Sep 17 00:00:00 2001 From: David M Date: Thu, 19 Jul 2007 20:38:08 +0000 Subject: let's remove 350+ impossible entries git-svn-id: file:///svn/phpbb/trunk@7908 89ea8834-ac86-4346-8a33-228a782c2dd0 --- phpBB/develop/generate_utf_confusables.php | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) (limited to 'phpBB/develop') diff --git a/phpBB/develop/generate_utf_confusables.php b/phpBB/develop/generate_utf_confusables.php index e2fd5bbaa3..908ebbf6f4 100644 --- a/phpBB/develop/generate_utf_confusables.php +++ b/phpBB/develop/generate_utf_confusables.php @@ -30,15 +30,22 @@ $phpEx = substr(strrchr(__FILE__, '.'), 1); echo "Checking for required files\n"; download('http://unicode.org/reports/tr39/data/confusables.txt'); +download('http://unicode.org/Public/UNIDATA/CaseFolding.txt'); echo "\n"; /** -* Load the CaseFolding table +* Load the confusables table */ echo "Loading confusables\n"; $unidata = file_get_contents('confusables.txt'); +/** +* Load the CaseFolding table +*/ +echo "Loading CaseFolding\n"; +$casefolds = file_get_contents('CaseFolding.txt'); + function utf8_chr($cp) { @@ -61,6 +68,7 @@ function utf8_chr($cp) } preg_match_all('/^([0-9A-F]+) ;\s((?:[0-9A-F]+ )*);.*?$/im', $unidata, $array, PREG_SET_ORDER); +preg_match_all('/^([0-9A-F]+); ([CFS]); ([0-9A-F]+(?: [0-9A-F]+)*);/im', $casefolds, $casefold_array); // some that we defined ourselves $uniarray = array( @@ -136,6 +144,14 @@ foreach ($array as $value) $temp_hold = str_replace(utf8_chr(0x0031), utf8_chr(0x006C), $temp_hold); } } + + // uppercased chars that were folded do not exist in this universe, + // no amount of normalization could ever "trick" this into not working + if (in_array($value[1], $casefold_array[1])) + { + continue; + } + $uniarray[utf8_chr(hexdec((string)$value[1]))] = $temp_hold; } -- cgit v1.2.1 From 1db9e62e084e29638e785c93f917e6e2acc13986 Mon Sep 17 00:00:00 2001 From: Meik Sievertsen Date: Sat, 28 Jul 2007 11:47:02 +0000 Subject: same right for all. ;) Groups table using same column types as the users table for avatars. git-svn-id: file:///svn/phpbb/trunk@7968 89ea8834-ac86-4346-8a33-228a782c2dd0 --- phpBB/develop/create_schema_files.php | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'phpBB/develop') diff --git a/phpBB/develop/create_schema_files.php b/phpBB/develop/create_schema_files.php index 7f8b091324..1675fc0899 100644 --- a/phpBB/develop/create_schema_files.php +++ b/phpBB/develop/create_schema_files.php @@ -14,7 +14,7 @@ die("Please read the first lines of this script for instructions on how to enable it"); -set_time_limit(0); +@set_time_limit(0); $schema_path = './../install/schemas/'; @@ -1107,9 +1107,9 @@ function get_schema_struct() 'group_desc_uid' => array('VCHAR:5', ''), 'group_display' => array('BOOL', 0), 'group_avatar' => array('VCHAR', ''), - 'group_avatar_type' => array('TINT:4', 0), - 'group_avatar_width' => array('TINT:4', 0), - 'group_avatar_height' => array('TINT:4', 0), + 'group_avatar_type' => array('TINT:2', 0), + 'group_avatar_width' => array('USINT', 0), + 'group_avatar_height' => array('USINT', 0), 'group_rank' => array('UINT', 0), 'group_colour' => array('VCHAR:6', ''), 'group_sig_chars' => array('UINT', 0), -- cgit v1.2.1