From 2f4a618900e2c3b6ea14c68cbeb5897cd2ac1a04 Mon Sep 17 00:00:00 2001 From: Meik Sievertsen Date: Thu, 29 May 2008 12:25:56 +0000 Subject: ok... i hope i haven't messed too much with the code and everything is still working. Changes: - Ascraeus now uses constants for the phpbb root path and the php extension. This ensures more security for external applications and modifications (no more overwriting of root path and extension possible through insecure mods and register globals enabled) as well as no more globalizing needed. - A second change implemented here is an additional short-hand-notation for append_sid(). It is allowed to omit the root path and extension now (for example calling append_sid('memberlist')) - in this case the root path and extension get added automatically. The hook is called after these are added. git-svn-id: file:///svn/phpbb/trunk@8572 89ea8834-ac86-4346-8a33-228a782c2dd0 --- phpBB/develop/generate_utf_tables.php | 24 +++++++++++------------- 1 file changed, 11 insertions(+), 13 deletions(-) (limited to 'phpBB/develop/generate_utf_tables.php') diff --git a/phpBB/develop/generate_utf_tables.php b/phpBB/develop/generate_utf_tables.php index 3d5188163d..fcf6395b24 100644 --- a/phpBB/develop/generate_utf_tables.php +++ b/phpBB/develop/generate_utf_tables.php @@ -25,8 +25,8 @@ die("Please read the first lines of this script for instructions on how to enabl set_time_limit(0); define('IN_PHPBB', true); -$phpbb_root_path = '../'; -$phpEx = substr(strrchr(__FILE__, '.'), 1); +define('PHPBB_ROOT_PATH', './../'); +define('PHP_EXT', substr(strrchr(__FILE__, '.'), 1)); echo "Checking for required files\n"; download('http://www.unicode.org/Public/UNIDATA/CompositionExclusions.txt'); @@ -34,7 +34,7 @@ download('http://www.unicode.org/Public/UNIDATA/DerivedNormalizationProps.txt'); download('http://www.unicode.org/Public/UNIDATA/UnicodeData.txt'); echo "\n"; -require_once($phpbb_root_path . 'includes/utf/utf_normalizer.' . $phpEx); +require_once(PHPBB_ROOT_PATH . 'includes/utf/utf_normalizer.' . PHP_EXT); $file_contents = array(); /** @@ -172,7 +172,7 @@ fclose($fp); * Do mappings */ echo "Loading Unicode decomposition mappings\n"; -$fp = fopen($phpbb_root_path . 'develop/UnicodeData.txt', 'rt'); +$fp = fopen(PHPBB_ROOT_PATH . 'develop/UnicodeData.txt', 'rt'); $map = array(); while (!feof($fp)) @@ -266,9 +266,9 @@ foreach ($file_contents as $file => $contents) /** * Generate a new file */ - echo "Writing to $file.$phpEx\n"; + echo "Writing to $file." . PHP_EXT . "\n"; - if (!$fp = fopen($phpbb_root_path . 'includes/utf/data/' . $file . '.' . $phpEx, 'wb')) + if (!$fp = fopen(PHPBB_ROOT_PATH . 'includes/utf/data/' . $file . '.' . PHP_EXT, 'wb')) { trigger_error('Cannot open ' . $file . ' for write'); } @@ -288,7 +288,7 @@ echo "\n*** UTF-8 normalization tables done\n\n"; */ echo "Generating search indexer tables\n"; -$fp = fopen($phpbb_root_path . 'develop/UnicodeData.txt', 'rt'); +$fp = fopen(PHPBB_ROOT_PATH . 'develop/UnicodeData.txt', 'rt'); $map = array(); while ($line = fgets($fp, 1024)) @@ -406,8 +406,8 @@ unset($map); foreach ($file_contents as $idx => $contents) { - echo "Writing to search_indexer_$idx.$phpEx\n"; - $fp = fopen($phpbb_root_path . 'includes/utf/data/search_indexer_' . $idx . '.' . $phpEx, 'wb'); + echo "Writing to search_indexer_$idx." . PHP_EXT . "\n"; + $fp = fopen(PHPBB_ROOT_PATH . 'includes/utf/data/search_indexer_' . $idx . '.' . PHP_EXT, 'wb'); fwrite($fp, ' Date: Sun, 4 Oct 2009 18:13:59 +0000 Subject: Move trunk/phpBB to old_trunk/phpBB git-svn-id: file:///svn/phpbb/trunk@10210 89ea8834-ac86-4346-8a33-228a782c2dd0 --- phpBB/develop/generate_utf_tables.php | 570 ---------------------------------- 1 file changed, 570 deletions(-) delete mode 100644 phpBB/develop/generate_utf_tables.php (limited to 'phpBB/develop/generate_utf_tables.php') diff --git a/phpBB/develop/generate_utf_tables.php b/phpBB/develop/generate_utf_tables.php deleted file mode 100644 index fcf6395b24..0000000000 --- a/phpBB/develop/generate_utf_tables.php +++ /dev/null @@ -1,570 +0,0 @@ -= UTF8_HANGUL_FIRST && $end <= UTF8_HANGUL_LAST) - { - /** - * We do not store Hangul syllables in the array - */ - continue; - } - - if ($p[2] == 'M') - { - $val = UNICODE_QC_MAYBE; - } - else - { - $val = UNICODE_QC_NO; - } - - if ($p[1] == 'NFKC_QC') - { - $file = 'utf_nfkc_qc'; - } - else - { - $file = 'utf_nfc_qc'; - } - - for ($i = $start; $i <= $end; ++$i) - { - /** - * The vars have the same name as the file: $utf_nfc_qc is in utf_nfc_qc.php - */ - $file_contents[$file][$file][cp_to_utf($i)] = $val; - } -} -fclose($fp); - -/** -* Do mappings -*/ -echo "Loading Unicode decomposition mappings\n"; -$fp = fopen(PHPBB_ROOT_PATH . 'develop/UnicodeData.txt', 'rt'); - -$map = array(); -while (!feof($fp)) -{ - $p = explode(';', fgets($fp, 1024)); - $cp = hexdec($p[0]); - - if (!empty($p[3])) - { - /** - * Store combining class > 0 - */ - $file_contents['utf_normalizer_common']['utf_combining_class'][cp_to_utf($cp)] = (int) $p[3]; - } - - if (!isset($p[5]) || !preg_match_all('#[0-9A-F]+#', strip_tags($p[5]), $m)) - { - continue; - } - - if (strpos($p[5], '>')) - { - $map['NFKD'][$cp] = implode(' ', array_map('hexdec', $m[0])); - } - else - { - $map['NFD'][$cp] = $map['NFKD'][$cp] = implode(' ', array_map('hexdec', $m[0])); - } -} -fclose($fp); - -/** -* Build the canonical composition table -*/ -echo "Generating the Canonical Composition table\n"; -foreach ($map['NFD'] as $cp => $decomp_seq) -{ - if (!strpos($decomp_seq, ' ') || isset($exclude[$cp])) - { - /** - * Singletons are excluded from canonical composition - */ - continue; - } - - $utf_seq = implode('', array_map('cp_to_utf', explode(' ', $decomp_seq))); - - if (!isset($file_contents['utf_canonical_comp']['utf_canonical_comp'][$utf_seq])) - { - $file_contents['utf_canonical_comp']['utf_canonical_comp'][$utf_seq] = cp_to_utf($cp); - } -} - -/** -* Decompose the NF[K]D mappings recursively and prepare the file contents -*/ -echo "Generating the Canonical and Compatibility Decomposition tables\n\n"; -foreach ($map as $type => $decomp_map) -{ - foreach ($decomp_map as $cp => $decomp_seq) - { - $decomp_map[$cp] = decompose($decomp_map, $decomp_seq); - } - unset($decomp_seq); - - if ($type == 'NFKD') - { - $file = 'utf_compatibility_decomp'; - $var = 'utf_compatibility_decomp'; - } - else - { - $file = 'utf_canonical_decomp'; - $var = 'utf_canonical_decomp'; - } - - /** - * Generate the corresponding file - */ - foreach ($decomp_map as $cp => $decomp_seq) - { - $file_contents[$file][$var][cp_to_utf($cp)] = implode('', array_map('cp_to_utf', explode(' ', $decomp_seq))); - } -} - -/** -* Generate and/or alter the files -*/ -foreach ($file_contents as $file => $contents) -{ - /** - * Generate a new file - */ - echo "Writing to $file." . PHP_EXT . "\n"; - - if (!$fp = fopen(PHPBB_ROOT_PATH . 'includes/utf/data/' . $file . '.' . PHP_EXT, 'wb')) - { - trigger_error('Cannot open ' . $file . ' for write'); - } - - fwrite($fp, ' $val) - { - fwrite($fp, "\n\$GLOBALS[" . my_var_export($var) . ']=' . my_var_export($val) . ";"); - } - fclose($fp); -} - -echo "\n*** UTF-8 normalization tables done\n\n"; - -/** -* Now we'll generate the files needed by the search indexer -*/ -echo "Generating search indexer tables\n"; - -$fp = fopen(PHPBB_ROOT_PATH . 'develop/UnicodeData.txt', 'rt'); - -$map = array(); -while ($line = fgets($fp, 1024)) -{ - /** - * The current line is split, $m[0] hold the codepoint in hexadecimal and - * all other fields numbered as in http://www.unicode.org/Public/UNIDATA/UCD.html#UnicodeData.txt - */ - $m = explode(';', $line); - - /** - * @var integer $cp Current char codepoint - * @var string $utf_char UTF-8 representation of current char - */ - $cp = hexdec($m[0]); - $utf_char = cp_to_utf($cp); - - /** - * $m[2] holds the "General Category" of the character - * @link http://www.unicode.org/Public/UNIDATA/UCD.html#General_Category_Values - */ - switch ($m[2][0]) - { - case 'L': - /** - * We allow all letters and map them to their lowercased counterpart on the fly - */ - $map_to_hex = (isset($m[13][0])) ? $m[13] : $m[0]; - - if (preg_match('#^LATIN.*(?:LETTER|LIGATURE) ([A-Z]{2}(?![A-Z]))$#', $m[1], $capture)) - { - /** - * Special hack for some latin ligatures. Using the name of a character - * is bad practice, but for now it works well enough. - * - * @todo Note that ligatures with combining marks such as U+01E2 are - * not supported at this time - */ - $map[$cp] = strtolower($capture[1]); - } - else if (isset($m[13][0])) - { - /** - * If the letter has a lowercased form, use it - */ - $map[$cp] = hex_to_utf($m[13]); - } - else - { - /** - * In all other cases, map the letter to itself - */ - $map[$cp] = $utf_char; - } - break; - - case 'M': - /** - * We allow all marks, they are mapped to themselves - */ - $map[$cp] = $utf_char; - break; - - case 'N': - /** - * We allow all numbers, but we map them to their numeric value whenever - * possible. The numeric value (field #8) is in ASCII already - * - * @todo Note that fractions such as U+00BD will be converted to something - * like "1/2", with a slash. However, "1/2" entered in ASCII is converted - * to "1 2". This will have to be fixed. - */ - $map[$cp] = (isset($m[8][0])) ? $m[8] : $utf_char; - break; - - default: - /** - * Everything else is ignored, skip to the next line - */ - continue 2; - } -} -fclose($fp); - -/** -* Add some cheating -*/ -$cheats = array( - '00DF' => 'ss', # German sharp S - '00C5' => 'ae', # Capital A with diaeresis - '00E4' => 'ae', # Small A with diaeresis - '00D6' => 'oe', # Capital O with diaeresis - '00F6' => 'oe', # Small O with diaeresis - '00DC' => 'ue', # Capital U with diaeresis - '00FC' => 'ue', # Small U with diaeresis -); - -/** -* Add our "cheat replacements" to the map -*/ -foreach ($cheats as $hex => $map_to) -{ - $map[hexdec($hex)] = $map_to; -} - -/** -* Split the map into smaller blocks -*/ -$file_contents = array(); -foreach ($map as $cp => $map_to) -{ - $file_contents[$cp >> 11][cp_to_utf($cp)] = $map_to; -} -unset($map); - -foreach ($file_contents as $idx => $contents) -{ - echo "Writing to search_indexer_$idx." . PHP_EXT . "\n"; - $fp = fopen(PHPBB_ROOT_PATH . 'includes/utf/data/search_indexer_' . $idx . '.' . PHP_EXT, 'wb'); - fwrite($fp, ' $v) - { - $lines[] = my_var_export($k) . '=>' . my_var_export($v); - } - - return 'array(' . implode(',', $lines) . ')'; - } - else if (is_string($var)) - { - return "'" . str_replace(array('\\', "'"), array('\\\\', "\\'"), $var) . "'"; - } - else - { - return $var; - } -} - -/** -* Download a file to the develop/ dir -* -* @param string $url URL of the file to download -* @return void -*/ -function download($url) -{ - if (file_exists(PHPBB_ROOT_PATH . 'develop/' . basename($url))) - { - return; - } - - echo 'Downloading from ', $url, ' '; - - if (!$fpr = fopen($url, 'rb')) - { - die("Can't download from $url\nPlease download it yourself and put it in the develop/ dir, kthxbai"); - } - - if (!$fpw = fopen(PHPBB_ROOT_PATH . 'develop/' . basename($url), 'wb')) - { - die("Can't open develop/" . basename($url) . " for output... please check your permissions or something"); - } - - $i = 0; - $chunk = 32768; - $done = ''; - - while (!feof($fpr)) - { - $i += fwrite($fpw, fread($fpr, $chunk)); - echo str_repeat("\x08", strlen($done)); - - $done = ($i >> 10) . ' KiB'; - echo $done; - } - fclose($fpr); - fclose($fpw); - - echo "\n"; -} - -/** -* Convert a codepoint in hexadecimal to a UTF-8 char -* -* @param string $hex Codepoint, in hexadecimal -* @return string UTF-8 char -*/ -function hex_to_utf($hex) -{ - return cp_to_utf(hexdec($hex)); -} - -/** -* Return a UTF string formed from a sequence of codepoints in hexadecimal -* -* @param string $seq Sequence of codepoints, separated with a space -* @return string UTF-8 string -*/ -function hexseq_to_utf($seq) -{ - return implode('', array_map('hex_to_utf', explode(' ', $seq))); -} - -/** -* Convert a codepoint to a UTF-8 char -* -* @param integer $cp Unicode codepoint -* @return string UTF-8 string -*/ -function cp_to_utf($cp) -{ - if ($cp > 0xFFFF) - { - return chr(0xF0 | ($cp >> 18)) . chr(0x80 | (($cp >> 12) & 0x3F)) . chr(0x80 | (($cp >> 6) & 0x3F)) . chr(0x80 | ($cp & 0x3F)); - } - else if ($cp > 0x7FF) - { - return chr(0xE0 | ($cp >> 12)) . chr(0x80 | (($cp >> 6) & 0x3F)) . chr(0x80 | ($cp & 0x3F)); - } - else if ($cp > 0x7F) - { - return chr(0xC0 | ($cp >> 6)) . chr(0x80 | ($cp & 0x3F)); - } - else - { - return chr($cp); - } -} \ No newline at end of file -- cgit v1.2.1 From 2e17e448deed073f8614bb555a8ef20c57291c2a Mon Sep 17 00:00:00 2001 From: Meik Sievertsen Date: Sun, 4 Oct 2009 18:14:59 +0000 Subject: Copy 3.0.x branch to trunk git-svn-id: file:///svn/phpbb/trunk@10211 89ea8834-ac86-4346-8a33-228a782c2dd0 --- phpBB/develop/generate_utf_tables.php | 572 ++++++++++++++++++++++++++++++++++ 1 file changed, 572 insertions(+) create mode 100644 phpBB/develop/generate_utf_tables.php (limited to 'phpBB/develop/generate_utf_tables.php') diff --git a/phpBB/develop/generate_utf_tables.php b/phpBB/develop/generate_utf_tables.php new file mode 100644 index 0000000000..3d5188163d --- /dev/null +++ b/phpBB/develop/generate_utf_tables.php @@ -0,0 +1,572 @@ += UTF8_HANGUL_FIRST && $end <= UTF8_HANGUL_LAST) + { + /** + * We do not store Hangul syllables in the array + */ + continue; + } + + if ($p[2] == 'M') + { + $val = UNICODE_QC_MAYBE; + } + else + { + $val = UNICODE_QC_NO; + } + + if ($p[1] == 'NFKC_QC') + { + $file = 'utf_nfkc_qc'; + } + else + { + $file = 'utf_nfc_qc'; + } + + for ($i = $start; $i <= $end; ++$i) + { + /** + * The vars have the same name as the file: $utf_nfc_qc is in utf_nfc_qc.php + */ + $file_contents[$file][$file][cp_to_utf($i)] = $val; + } +} +fclose($fp); + +/** +* Do mappings +*/ +echo "Loading Unicode decomposition mappings\n"; +$fp = fopen($phpbb_root_path . 'develop/UnicodeData.txt', 'rt'); + +$map = array(); +while (!feof($fp)) +{ + $p = explode(';', fgets($fp, 1024)); + $cp = hexdec($p[0]); + + if (!empty($p[3])) + { + /** + * Store combining class > 0 + */ + $file_contents['utf_normalizer_common']['utf_combining_class'][cp_to_utf($cp)] = (int) $p[3]; + } + + if (!isset($p[5]) || !preg_match_all('#[0-9A-F]+#', strip_tags($p[5]), $m)) + { + continue; + } + + if (strpos($p[5], '>')) + { + $map['NFKD'][$cp] = implode(' ', array_map('hexdec', $m[0])); + } + else + { + $map['NFD'][$cp] = $map['NFKD'][$cp] = implode(' ', array_map('hexdec', $m[0])); + } +} +fclose($fp); + +/** +* Build the canonical composition table +*/ +echo "Generating the Canonical Composition table\n"; +foreach ($map['NFD'] as $cp => $decomp_seq) +{ + if (!strpos($decomp_seq, ' ') || isset($exclude[$cp])) + { + /** + * Singletons are excluded from canonical composition + */ + continue; + } + + $utf_seq = implode('', array_map('cp_to_utf', explode(' ', $decomp_seq))); + + if (!isset($file_contents['utf_canonical_comp']['utf_canonical_comp'][$utf_seq])) + { + $file_contents['utf_canonical_comp']['utf_canonical_comp'][$utf_seq] = cp_to_utf($cp); + } +} + +/** +* Decompose the NF[K]D mappings recursively and prepare the file contents +*/ +echo "Generating the Canonical and Compatibility Decomposition tables\n\n"; +foreach ($map as $type => $decomp_map) +{ + foreach ($decomp_map as $cp => $decomp_seq) + { + $decomp_map[$cp] = decompose($decomp_map, $decomp_seq); + } + unset($decomp_seq); + + if ($type == 'NFKD') + { + $file = 'utf_compatibility_decomp'; + $var = 'utf_compatibility_decomp'; + } + else + { + $file = 'utf_canonical_decomp'; + $var = 'utf_canonical_decomp'; + } + + /** + * Generate the corresponding file + */ + foreach ($decomp_map as $cp => $decomp_seq) + { + $file_contents[$file][$var][cp_to_utf($cp)] = implode('', array_map('cp_to_utf', explode(' ', $decomp_seq))); + } +} + +/** +* Generate and/or alter the files +*/ +foreach ($file_contents as $file => $contents) +{ + /** + * Generate a new file + */ + echo "Writing to $file.$phpEx\n"; + + if (!$fp = fopen($phpbb_root_path . 'includes/utf/data/' . $file . '.' . $phpEx, 'wb')) + { + trigger_error('Cannot open ' . $file . ' for write'); + } + + fwrite($fp, ' $val) + { + fwrite($fp, "\n\$GLOBALS[" . my_var_export($var) . ']=' . my_var_export($val) . ";"); + } + fclose($fp); +} + +echo "\n*** UTF-8 normalization tables done\n\n"; + +/** +* Now we'll generate the files needed by the search indexer +*/ +echo "Generating search indexer tables\n"; + +$fp = fopen($phpbb_root_path . 'develop/UnicodeData.txt', 'rt'); + +$map = array(); +while ($line = fgets($fp, 1024)) +{ + /** + * The current line is split, $m[0] hold the codepoint in hexadecimal and + * all other fields numbered as in http://www.unicode.org/Public/UNIDATA/UCD.html#UnicodeData.txt + */ + $m = explode(';', $line); + + /** + * @var integer $cp Current char codepoint + * @var string $utf_char UTF-8 representation of current char + */ + $cp = hexdec($m[0]); + $utf_char = cp_to_utf($cp); + + /** + * $m[2] holds the "General Category" of the character + * @link http://www.unicode.org/Public/UNIDATA/UCD.html#General_Category_Values + */ + switch ($m[2][0]) + { + case 'L': + /** + * We allow all letters and map them to their lowercased counterpart on the fly + */ + $map_to_hex = (isset($m[13][0])) ? $m[13] : $m[0]; + + if (preg_match('#^LATIN.*(?:LETTER|LIGATURE) ([A-Z]{2}(?![A-Z]))$#', $m[1], $capture)) + { + /** + * Special hack for some latin ligatures. Using the name of a character + * is bad practice, but for now it works well enough. + * + * @todo Note that ligatures with combining marks such as U+01E2 are + * not supported at this time + */ + $map[$cp] = strtolower($capture[1]); + } + else if (isset($m[13][0])) + { + /** + * If the letter has a lowercased form, use it + */ + $map[$cp] = hex_to_utf($m[13]); + } + else + { + /** + * In all other cases, map the letter to itself + */ + $map[$cp] = $utf_char; + } + break; + + case 'M': + /** + * We allow all marks, they are mapped to themselves + */ + $map[$cp] = $utf_char; + break; + + case 'N': + /** + * We allow all numbers, but we map them to their numeric value whenever + * possible. The numeric value (field #8) is in ASCII already + * + * @todo Note that fractions such as U+00BD will be converted to something + * like "1/2", with a slash. However, "1/2" entered in ASCII is converted + * to "1 2". This will have to be fixed. + */ + $map[$cp] = (isset($m[8][0])) ? $m[8] : $utf_char; + break; + + default: + /** + * Everything else is ignored, skip to the next line + */ + continue 2; + } +} +fclose($fp); + +/** +* Add some cheating +*/ +$cheats = array( + '00DF' => 'ss', # German sharp S + '00C5' => 'ae', # Capital A with diaeresis + '00E4' => 'ae', # Small A with diaeresis + '00D6' => 'oe', # Capital O with diaeresis + '00F6' => 'oe', # Small O with diaeresis + '00DC' => 'ue', # Capital U with diaeresis + '00FC' => 'ue', # Small U with diaeresis +); + +/** +* Add our "cheat replacements" to the map +*/ +foreach ($cheats as $hex => $map_to) +{ + $map[hexdec($hex)] = $map_to; +} + +/** +* Split the map into smaller blocks +*/ +$file_contents = array(); +foreach ($map as $cp => $map_to) +{ + $file_contents[$cp >> 11][cp_to_utf($cp)] = $map_to; +} +unset($map); + +foreach ($file_contents as $idx => $contents) +{ + echo "Writing to search_indexer_$idx.$phpEx\n"; + $fp = fopen($phpbb_root_path . 'includes/utf/data/search_indexer_' . $idx . '.' . $phpEx, 'wb'); + fwrite($fp, ' $v) + { + $lines[] = my_var_export($k) . '=>' . my_var_export($v); + } + + return 'array(' . implode(',', $lines) . ')'; + } + else if (is_string($var)) + { + return "'" . str_replace(array('\\', "'"), array('\\\\', "\\'"), $var) . "'"; + } + else + { + return $var; + } +} + +/** +* Download a file to the develop/ dir +* +* @param string $url URL of the file to download +* @return void +*/ +function download($url) +{ + global $phpbb_root_path; + + if (file_exists($phpbb_root_path . 'develop/' . basename($url))) + { + return; + } + + echo 'Downloading from ', $url, ' '; + + if (!$fpr = fopen($url, 'rb')) + { + die("Can't download from $url\nPlease download it yourself and put it in the develop/ dir, kthxbai"); + } + + if (!$fpw = fopen($phpbb_root_path . 'develop/' . basename($url), 'wb')) + { + die("Can't open develop/" . basename($url) . " for output... please check your permissions or something"); + } + + $i = 0; + $chunk = 32768; + $done = ''; + + while (!feof($fpr)) + { + $i += fwrite($fpw, fread($fpr, $chunk)); + echo str_repeat("\x08", strlen($done)); + + $done = ($i >> 10) . ' KiB'; + echo $done; + } + fclose($fpr); + fclose($fpw); + + echo "\n"; +} + +/** +* Convert a codepoint in hexadecimal to a UTF-8 char +* +* @param string $hex Codepoint, in hexadecimal +* @return string UTF-8 char +*/ +function hex_to_utf($hex) +{ + return cp_to_utf(hexdec($hex)); +} + +/** +* Return a UTF string formed from a sequence of codepoints in hexadecimal +* +* @param string $seq Sequence of codepoints, separated with a space +* @return string UTF-8 string +*/ +function hexseq_to_utf($seq) +{ + return implode('', array_map('hex_to_utf', explode(' ', $seq))); +} + +/** +* Convert a codepoint to a UTF-8 char +* +* @param integer $cp Unicode codepoint +* @return string UTF-8 string +*/ +function cp_to_utf($cp) +{ + if ($cp > 0xFFFF) + { + return chr(0xF0 | ($cp >> 18)) . chr(0x80 | (($cp >> 12) & 0x3F)) . chr(0x80 | (($cp >> 6) & 0x3F)) . chr(0x80 | ($cp & 0x3F)); + } + else if ($cp > 0x7FF) + { + return chr(0xE0 | ($cp >> 12)) . chr(0x80 | (($cp >> 6) & 0x3F)) . chr(0x80 | ($cp & 0x3F)); + } + else if ($cp > 0x7F) + { + return chr(0xC0 | ($cp >> 6)) . chr(0x80 | ($cp & 0x3F)); + } + else + { + return chr($cp); + } +} \ No newline at end of file -- cgit v1.2.1 From af5b9a96409d788733fcb1ff367e0c7fb0583702 Mon Sep 17 00:00:00 2001 From: Igor Wiedler Date: Tue, 9 Nov 2010 08:59:25 +0100 Subject: [ticket/9556] Drop php closing tags, add trailing newline Closing tags converted using Oleg's script. remove-php-end-tags.py -a . Trailing newlines added using the following where $ext is file extension. find . -type f -name "*.$ext" -print | xargs printf "e %s\nw\n" | ed -s; Extensions: php, css, html, js, xml. PHPBB3-9556 --- phpBB/develop/generate_utf_tables.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'phpBB/develop/generate_utf_tables.php') diff --git a/phpBB/develop/generate_utf_tables.php b/phpBB/develop/generate_utf_tables.php index 3d5188163d..dbef125803 100644 --- a/phpBB/develop/generate_utf_tables.php +++ b/phpBB/develop/generate_utf_tables.php @@ -569,4 +569,4 @@ function cp_to_utf($cp) { return chr($cp); } -} \ No newline at end of file +} -- cgit v1.2.1 From a759704b39fc1c1353f865a633759b1369589b67 Mon Sep 17 00:00:00 2001 From: Yuriy Rusko Date: Tue, 27 May 2014 20:18:06 +0200 Subject: [ticket/12594] Remove @package tags and update file headers PHPBB3-12594 --- phpBB/develop/generate_utf_tables.php | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) (limited to 'phpBB/develop/generate_utf_tables.php') diff --git a/phpBB/develop/generate_utf_tables.php b/phpBB/develop/generate_utf_tables.php index e5d907d6a5..16a449679b 100644 --- a/phpBB/develop/generate_utf_tables.php +++ b/phpBB/develop/generate_utf_tables.php @@ -1,9 +1,13 @@ +* @license GNU General Public License, version 2 (GPL-2.0) +* +* For full copyright and license information, please see +* the docs/CREDITS.txt file. * */ -- cgit v1.2.1