From 2f4a618900e2c3b6ea14c68cbeb5897cd2ac1a04 Mon Sep 17 00:00:00 2001
From: Meik Sievertsen <acydburn@phpbb.com>
Date: Thu, 29 May 2008 12:25:56 +0000
Subject: ok... i hope i haven't messed too much with the code and everything
 is still working. Changes: - Ascraeus now uses constants for the phpbb root
 path and the php extension. This ensures more security for external
 applications and modifications (no more overwriting of root path and
 extension possible through insecure mods and register globals enabled) as
 well as no more globalizing needed. - A second change implemented here is an
 additional short-hand-notation for append_sid(). It is allowed to omit the
 root path and extension now (for example calling append_sid('memberlist')) -
 in this case the root path and extension get added automatically. The hook is
 called after these are added.

git-svn-id: file:///svn/phpbb/trunk@8572 89ea8834-ac86-4346-8a33-228a782c2dd0
---
 phpBB/develop/generate_utf_tables.php | 24 +++++++++++-------------
 1 file changed, 11 insertions(+), 13 deletions(-)

(limited to 'phpBB/develop/generate_utf_tables.php')

diff --git a/phpBB/develop/generate_utf_tables.php b/phpBB/develop/generate_utf_tables.php
index 3d5188163d..fcf6395b24 100644
--- a/phpBB/develop/generate_utf_tables.php
+++ b/phpBB/develop/generate_utf_tables.php
@@ -25,8 +25,8 @@ die("Please read the first lines of this script for instructions on how to enabl
 set_time_limit(0);
 
 define('IN_PHPBB', true);
-$phpbb_root_path = '../';
-$phpEx = substr(strrchr(__FILE__, '.'), 1);
+define('PHPBB_ROOT_PATH', './../');
+define('PHP_EXT', substr(strrchr(__FILE__, '.'), 1));
 
 echo "Checking for required files\n";
 download('http://www.unicode.org/Public/UNIDATA/CompositionExclusions.txt');
@@ -34,7 +34,7 @@ download('http://www.unicode.org/Public/UNIDATA/DerivedNormalizationProps.txt');
 download('http://www.unicode.org/Public/UNIDATA/UnicodeData.txt');
 echo "\n";
 
-require_once($phpbb_root_path . 'includes/utf/utf_normalizer.' . $phpEx);
+require_once(PHPBB_ROOT_PATH . 'includes/utf/utf_normalizer.' . PHP_EXT);
 $file_contents = array();
 
 /**
@@ -172,7 +172,7 @@ fclose($fp);
 * Do mappings
 */
 echo "Loading Unicode decomposition mappings\n";
-$fp = fopen($phpbb_root_path . 'develop/UnicodeData.txt', 'rt');
+$fp = fopen(PHPBB_ROOT_PATH . 'develop/UnicodeData.txt', 'rt');
 
 $map = array();
 while (!feof($fp))
@@ -266,9 +266,9 @@ foreach ($file_contents as $file => $contents)
 	/**
 	* Generate a new file
 	*/
-	echo "Writing to $file.$phpEx\n";
+	echo "Writing to $file." . PHP_EXT . "\n";
 
-	if (!$fp = fopen($phpbb_root_path . 'includes/utf/data/' . $file . '.' . $phpEx, 'wb'))
+	if (!$fp = fopen(PHPBB_ROOT_PATH . 'includes/utf/data/' . $file . '.' . PHP_EXT, 'wb'))
 	{
 		trigger_error('Cannot open ' . $file . ' for write');
 	}
@@ -288,7 +288,7 @@ echo "\n*** UTF-8 normalization tables done\n\n";
 */
 echo "Generating search indexer tables\n";
 
-$fp = fopen($phpbb_root_path . 'develop/UnicodeData.txt', 'rt');
+$fp = fopen(PHPBB_ROOT_PATH . 'develop/UnicodeData.txt', 'rt');
 
 $map = array();
 while ($line = fgets($fp, 1024))
@@ -406,8 +406,8 @@ unset($map);
 
 foreach ($file_contents as $idx => $contents)
 {
-	echo "Writing to search_indexer_$idx.$phpEx\n";
-	$fp = fopen($phpbb_root_path . 'includes/utf/data/search_indexer_' . $idx . '.' . $phpEx, 'wb');
+	echo "Writing to search_indexer_$idx." . PHP_EXT . "\n";
+	$fp = fopen(PHPBB_ROOT_PATH . 'includes/utf/data/search_indexer_' . $idx . '.' . PHP_EXT, 'wb');
 	fwrite($fp, '<?php return ' . my_var_export($contents) . ';');
 	fclose($fp);
 }
@@ -486,9 +486,7 @@ function my_var_export($var)
 */
 function download($url)
 {
-	global $phpbb_root_path;
-
-	if (file_exists($phpbb_root_path . 'develop/' . basename($url)))
+	if (file_exists(PHPBB_ROOT_PATH . 'develop/' . basename($url)))
 	{
 		return;
 	}
@@ -500,7 +498,7 @@ function download($url)
 		die("Can't download from $url\nPlease download it yourself and put it in the develop/ dir, kthxbai");
 	}
 
-	if (!$fpw = fopen($phpbb_root_path . 'develop/' . basename($url), 'wb'))
+	if (!$fpw = fopen(PHPBB_ROOT_PATH . 'develop/' . basename($url), 'wb'))
 	{
 		die("Can't open develop/" . basename($url) . " for output... please check your permissions or something");
 	}
-- 
cgit v1.2.1


From bf8ac19eaa8d74f9dfd6d597190f5664e7339382 Mon Sep 17 00:00:00 2001
From: Meik Sievertsen <acydburn@phpbb.com>
Date: Sun, 4 Oct 2009 18:13:59 +0000
Subject: Move trunk/phpBB to old_trunk/phpBB

git-svn-id: file:///svn/phpbb/trunk@10210 89ea8834-ac86-4346-8a33-228a782c2dd0
---
 phpBB/develop/generate_utf_tables.php | 570 ----------------------------------
 1 file changed, 570 deletions(-)
 delete mode 100644 phpBB/develop/generate_utf_tables.php

(limited to 'phpBB/develop/generate_utf_tables.php')

diff --git a/phpBB/develop/generate_utf_tables.php b/phpBB/develop/generate_utf_tables.php
deleted file mode 100644
index fcf6395b24..0000000000
--- a/phpBB/develop/generate_utf_tables.php
+++ /dev/null
@@ -1,570 +0,0 @@
-<?php
-/**
-*
-* @package phpBB3
-* @version $Id$
-* @copyright (c) 2005 phpBB Group
-* @license http://opensource.org/licenses/gpl-license.php GNU Public License
-*
-*/
-
-if (php_sapi_name() != 'cli')
-{
-	die("This program must be run from the command line.\n");
-}
-
-//
-// Security message:
-//
-// This script is potentially dangerous.
-// Remove or comment the next line (die(".... ) to enable this script.
-// Do NOT FORGET to either remove this script or disable it after you have used it.
-//
-die("Please read the first lines of this script for instructions on how to enable it");
-
-set_time_limit(0);
-
-define('IN_PHPBB', true);
-define('PHPBB_ROOT_PATH', './../');
-define('PHP_EXT', substr(strrchr(__FILE__, '.'), 1));
-
-echo "Checking for required files\n";
-download('http://www.unicode.org/Public/UNIDATA/CompositionExclusions.txt');
-download('http://www.unicode.org/Public/UNIDATA/DerivedNormalizationProps.txt');
-download('http://www.unicode.org/Public/UNIDATA/UnicodeData.txt');
-echo "\n";
-
-require_once(PHPBB_ROOT_PATH . 'includes/utf/utf_normalizer.' . PHP_EXT);
-$file_contents = array();
-
-/**
-* Generate some Hangul/Jamo stuff
-*/
-echo "\nGenerating Hangul and Jamo tables\n";
-for ($i = 0; $i < UNICODE_HANGUL_LCOUNT; ++$i)
-{
-	$utf_char = cp_to_utf(UNICODE_HANGUL_LBASE + $i);
-	$file_contents['utf_normalizer_common']['utf_jamo_index'][$utf_char] = $i * UNICODE_HANGUL_VCOUNT * UNICODE_HANGUL_TCOUNT + UNICODE_HANGUL_SBASE;
-	$file_contents['utf_normalizer_common']['utf_jamo_type'][$utf_char] = UNICODE_JAMO_L;
-}
-
-for ($i = 0; $i < UNICODE_HANGUL_VCOUNT; ++$i)
-{
-	$utf_char = cp_to_utf(UNICODE_HANGUL_VBASE + $i);
-	$file_contents['utf_normalizer_common']['utf_jamo_index'][$utf_char] = $i * UNICODE_HANGUL_TCOUNT;
-	$file_contents['utf_normalizer_common']['utf_jamo_type'][$utf_char] = UNICODE_JAMO_V;
-}
-
-for ($i = 0; $i < UNICODE_HANGUL_TCOUNT; ++$i)
-{
-	$utf_char = cp_to_utf(UNICODE_HANGUL_TBASE + $i);
-	$file_contents['utf_normalizer_common']['utf_jamo_index'][$utf_char] = $i;
-	$file_contents['utf_normalizer_common']['utf_jamo_type'][$utf_char] = UNICODE_JAMO_T;
-}
-
-/**
-* Load the CompositionExclusions table
-*/
-echo "Loading CompositionExclusion\n";
-$fp = fopen('CompositionExclusions.txt', 'rt');
-
-$exclude = array();
-while (!feof($fp))
-{
-	$line = fgets($fp, 1024);
-
-	if (!strpos(' 0123456789ABCDEFabcdef', $line[0]))
-	{
-		continue;
-	}
-
-	$cp = strtok($line, ' ');
-
-	if ($pos = strpos($cp, '..'))
-	{
-		$start = hexdec(substr($cp, 0, $pos));
-		$end = hexdec(substr($cp, $pos + 2));
-
-		for ($i = $start; $i < $end; ++$i)
-		{
-			$exclude[$i] = 1;
-		}
-	}
-	else
-	{
-		$exclude[hexdec($cp)] = 1;
-	}
-}
-fclose($fp);
-
-/**
-* Load QuickCheck tables
-*/
-echo "Generating QuickCheck tables\n";
-$fp = fopen('DerivedNormalizationProps.txt', 'rt');
-
-while (!feof($fp))
-{
-	$line = fgets($fp, 1024);
-
-	if (!strpos(' 0123456789ABCDEFabcdef', $line[0]))
-	{
-		continue;
-	}
-
-	$p = array_map('trim', explode(';', strtok($line, '#')));
-
-	/**
-	* Capture only NFC_QC, NFKC_QC
-	*/
-	if (!preg_match('#^NFK?C_QC$#', $p[1]))
-	{
-		continue;
-	}
-
-	if ($pos = strpos($p[0], '..'))
-	{
-		$start = hexdec(substr($p[0], 0, $pos));
-		$end = hexdec(substr($p[0], $pos + 2));
-	}
-	else
-	{
-		$start = $end = hexdec($p[0]);
-	}
-
-	if ($start >= UTF8_HANGUL_FIRST && $end <= UTF8_HANGUL_LAST)
-	{
-		/**
-		* We do not store Hangul syllables in the array
-		*/
-		continue;
-	}
-
-	if ($p[2] == 'M')
-	{
-		$val = UNICODE_QC_MAYBE;
-	}
-	else
-	{
-		$val = UNICODE_QC_NO;
-	}
-
-	if ($p[1] == 'NFKC_QC')
-	{
-		$file = 'utf_nfkc_qc';
-	}
-	else
-	{
-		$file = 'utf_nfc_qc';
-	}
-
-	for ($i = $start; $i <= $end; ++$i)
-	{
-		/**
-		* The vars have the same name as the file: $utf_nfc_qc is in utf_nfc_qc.php
-		*/
-		$file_contents[$file][$file][cp_to_utf($i)] = $val;
-	}
-}
-fclose($fp);
-
-/**
-* Do mappings
-*/
-echo "Loading Unicode decomposition mappings\n";
-$fp = fopen(PHPBB_ROOT_PATH . 'develop/UnicodeData.txt', 'rt');
-
-$map = array();
-while (!feof($fp))
-{
-	$p = explode(';', fgets($fp, 1024));
-	$cp = hexdec($p[0]);
-
-	if (!empty($p[3]))
-	{
-		/**
-		* Store combining class > 0
-		*/
-		$file_contents['utf_normalizer_common']['utf_combining_class'][cp_to_utf($cp)] = (int) $p[3];
-	}
-
-	if (!isset($p[5]) || !preg_match_all('#[0-9A-F]+#', strip_tags($p[5]), $m))
-	{
-		continue;
-	}
-
-	if (strpos($p[5], '>'))
-	{
-		$map['NFKD'][$cp] = implode(' ', array_map('hexdec', $m[0]));
-	}
-	else
-	{
-		$map['NFD'][$cp] = $map['NFKD'][$cp] = implode(' ', array_map('hexdec', $m[0]));
-	}
-}
-fclose($fp);
-
-/**
-* Build the canonical composition table
-*/
-echo "Generating the Canonical Composition table\n";
-foreach ($map['NFD'] as $cp => $decomp_seq)
-{
-	if (!strpos($decomp_seq, ' ') || isset($exclude[$cp]))
-	{
-		/**
-		* Singletons are excluded from canonical composition
-		*/
-		continue;
-	}
-
-	$utf_seq = implode('', array_map('cp_to_utf', explode(' ', $decomp_seq)));
-
-	if (!isset($file_contents['utf_canonical_comp']['utf_canonical_comp'][$utf_seq]))
-	{
-		$file_contents['utf_canonical_comp']['utf_canonical_comp'][$utf_seq] = cp_to_utf($cp);
-	}
-}
-
-/**
-* Decompose the NF[K]D mappings recursively and prepare the file contents
-*/
-echo "Generating the Canonical and Compatibility Decomposition tables\n\n";
-foreach ($map as $type => $decomp_map)
-{
-	foreach ($decomp_map as $cp => $decomp_seq)
-	{
-		$decomp_map[$cp] = decompose($decomp_map, $decomp_seq);
-	}
-	unset($decomp_seq);
-
-	if ($type == 'NFKD')
-	{
-		$file = 'utf_compatibility_decomp';
-		$var = 'utf_compatibility_decomp';
-	}
-	else
-	{
-		$file = 'utf_canonical_decomp';
-		$var = 'utf_canonical_decomp';
-	}
-
-	/**
-	* Generate the corresponding file
-	*/
-	foreach ($decomp_map as $cp => $decomp_seq)
-	{
-		$file_contents[$file][$var][cp_to_utf($cp)] = implode('', array_map('cp_to_utf', explode(' ', $decomp_seq)));
-	}
-}
-
-/**
-* Generate and/or alter the files
-*/
-foreach ($file_contents as $file => $contents)
-{
-	/**
-	* Generate a new file
-	*/
-	echo "Writing to $file." . PHP_EXT . "\n";
-
-	if (!$fp = fopen(PHPBB_ROOT_PATH . 'includes/utf/data/' . $file . '.' . PHP_EXT, 'wb'))
-	{
-		trigger_error('Cannot open ' . $file . ' for write');
-	}
-
-	fwrite($fp, '<?php');
-	foreach ($contents as $var => $val)
-	{
-		fwrite($fp, "\n\$GLOBALS[" . my_var_export($var) . ']=' . my_var_export($val) . ";");
-	}
-	fclose($fp);
-}
-
-echo "\n*** UTF-8 normalization tables done\n\n";
-
-/**
-* Now we'll generate the files needed by the search indexer
-*/
-echo "Generating search indexer tables\n";
-
-$fp = fopen(PHPBB_ROOT_PATH . 'develop/UnicodeData.txt', 'rt');
-
-$map = array();
-while ($line = fgets($fp, 1024))
-{
-	/**
-	* The current line is split, $m[0] hold the codepoint in hexadecimal and
-	* all other fields numbered as in http://www.unicode.org/Public/UNIDATA/UCD.html#UnicodeData.txt
-	*/
-	$m = explode(';', $line);
-
-	/**
-	* @var	integer	$cp			Current char codepoint
-	* @var	string	$utf_char	UTF-8 representation of current char
-	*/
-	$cp = hexdec($m[0]);
-	$utf_char = cp_to_utf($cp);
-
-	/**
-	* $m[2] holds the "General Category" of the character
-	* @link http://www.unicode.org/Public/UNIDATA/UCD.html#General_Category_Values
-	*/
-	switch ($m[2][0])
-	{
-		case 'L':
-			/**
-			* We allow all letters and map them to their lowercased counterpart on the fly
-			*/
-			$map_to_hex = (isset($m[13][0])) ? $m[13] : $m[0];
-
-			if (preg_match('#^LATIN.*(?:LETTER|LIGATURE) ([A-Z]{2}(?![A-Z]))$#', $m[1], $capture))
-			{
-				/**
-				* Special hack for some latin ligatures. Using the name of a character
-				* is bad practice, but for now it works well enough.
-				*
-				* @todo Note that ligatures with combining marks such as U+01E2 are
-				* not supported at this time
-				*/
-				$map[$cp] = strtolower($capture[1]);
-			}
-			else if (isset($m[13][0]))
-			{
-				/**
-				* If the letter has a lowercased form, use it
-				*/
-				$map[$cp] = hex_to_utf($m[13]);
-			}
-			else
-			{
-				/**
-				* In all other cases, map the letter to itself
-				*/
-				$map[$cp] = $utf_char;
-			}
-			break;
-
-		case 'M':
-			/**
-			* We allow all marks, they are mapped to themselves
-			*/
-			$map[$cp] = $utf_char;
-			break;
-
-		case 'N':
-			/**
-			* We allow all numbers, but we map them to their numeric value whenever
-			* possible. The numeric value (field #8) is in ASCII already
-			*
-			* @todo Note that fractions such as U+00BD will be converted to something
-			* like "1/2", with a slash. However, "1/2" entered in ASCII is converted
-			* to "1 2". This will have to be fixed.
-			*/
-			$map[$cp] = (isset($m[8][0])) ? $m[8] : $utf_char;
-			break;
-
-		default:
-			/**
-			* Everything else is ignored, skip to the next line
-			*/
-			continue 2;
-	}
-}
-fclose($fp);
-
-/**
-* Add some cheating
-*/
-$cheats = array(
-	'00DF'	=>	'ss',		#	German sharp S
-	'00C5'	=>	'ae',		#	Capital A with diaeresis
-	'00E4'	=>	'ae',		#	Small A with diaeresis
-	'00D6'	=>	'oe',		#	Capital O with diaeresis
-	'00F6'	=>	'oe',		#	Small O with diaeresis
-	'00DC'	=>	'ue',		#	Capital U with diaeresis
-	'00FC'	=>	'ue',		#	Small U with diaeresis
-);
-
-/**
-* Add our "cheat replacements" to the map
-*/
-foreach ($cheats as $hex => $map_to)
-{
-	$map[hexdec($hex)] = $map_to;
-}
-
-/**
-* Split the map into smaller blocks
-*/
-$file_contents = array();
-foreach ($map as $cp => $map_to)
-{
-	$file_contents[$cp >> 11][cp_to_utf($cp)] = $map_to;
-}
-unset($map);
-
-foreach ($file_contents as $idx => $contents)
-{
-	echo "Writing to search_indexer_$idx." . PHP_EXT . "\n";
-	$fp = fopen(PHPBB_ROOT_PATH . 'includes/utf/data/search_indexer_' . $idx . '.' . PHP_EXT, 'wb');
-	fwrite($fp, '<?php return ' . my_var_export($contents) . ';');
-	fclose($fp);
-}
-echo "\n*** Search indexer tables done\n\n";
-
-
-die("\nAll done!\n");
-
-
-////////////////////////////////////////////////////////////////////////////////
-//                             Internal functions                             //
-////////////////////////////////////////////////////////////////////////////////
-
-/**
-* Decompose a sequence recusively
-*
-* @param	array	$decomp_map	Decomposition mapping, passed by reference
-* @param	string	$decomp_seq	Decomposition sequence as decimal codepoints separated with a space
-* @return	string				Decomposition sequence, fully decomposed
-*/
-function decompose(&$decomp_map, $decomp_seq)
-{
-	$ret = array();
-	foreach (explode(' ', $decomp_seq) as $cp)
-	{
-		if (isset($decomp_map[$cp]))
-		{
-			$ret[] = decompose($decomp_map, $decomp_map[$cp]);
-		}
-		else
-		{
-			$ret[] = $cp;
-		}
-	}
-
-	return implode(' ', $ret);
-}
-
-
-/**
-* Return a parsable string representation of a variable
-*
-* This is function is limited to array/strings/integers
-*
-* @param	mixed	$var		Variable
-* @return	string				PHP code representing the variable
-*/
-function my_var_export($var)
-{
-	if (is_array($var))
-	{
-		$lines = array();
-
-		foreach ($var as $k => $v)
-		{
-			$lines[] = my_var_export($k) . '=>' . my_var_export($v);
-		}
-
-		return 'array(' . implode(',', $lines) . ')';
-	}
-	else if (is_string($var))
-	{
-		return "'" . str_replace(array('\\', "'"), array('\\\\', "\\'"), $var) . "'";
-	}
-	else
-	{
-		return $var;
-	}
-}
-
-/**
-* Download a file to the develop/ dir
-*
-* @param	string	$url		URL of the file to download
-* @return	void
-*/
-function download($url)
-{
-	if (file_exists(PHPBB_ROOT_PATH . 'develop/' . basename($url)))
-	{
-		return;
-	}
-
-	echo 'Downloading from ', $url, ' ';
-
-	if (!$fpr = fopen($url, 'rb'))
-	{
-		die("Can't download from $url\nPlease download it yourself and put it in the develop/ dir, kthxbai");
-	}
-
-	if (!$fpw = fopen(PHPBB_ROOT_PATH . 'develop/' . basename($url), 'wb'))
-	{
-		die("Can't open develop/" . basename($url) . " for output... please check your permissions or something");
-	}
-
-	$i = 0;
-	$chunk = 32768;
-	$done = '';
-
-	while (!feof($fpr))
-	{
-		$i += fwrite($fpw, fread($fpr, $chunk));
-		echo str_repeat("\x08", strlen($done));
-
-		$done = ($i >> 10) . ' KiB';
-		echo $done;
-	}
-	fclose($fpr);
-	fclose($fpw);
-
-	echo "\n";
-}
-
-/**
-* Convert a codepoint in hexadecimal to a UTF-8 char
-*
-* @param	string	$hex		Codepoint, in hexadecimal
-* @return	string				UTF-8 char
-*/
-function hex_to_utf($hex)
-{
-	return cp_to_utf(hexdec($hex));
-}
-
-/**
-* Return a UTF string formed from a sequence of codepoints in hexadecimal
-*
-* @param	string	$seq		Sequence of codepoints, separated with a space
-* @return	string				UTF-8 string
-*/
-function hexseq_to_utf($seq)
-{
-	return implode('', array_map('hex_to_utf', explode(' ', $seq)));
-}
-
-/**
-* Convert a codepoint to a UTF-8 char
-*
-* @param	integer	$cp			Unicode codepoint
-* @return	string				UTF-8 string
-*/
-function cp_to_utf($cp)
-{
-	if ($cp > 0xFFFF)
-	{
-		return chr(0xF0 | ($cp >> 18)) . chr(0x80 | (($cp >> 12) & 0x3F)) . chr(0x80 | (($cp >> 6) & 0x3F)) . chr(0x80 | ($cp & 0x3F));
-	}
-	else if ($cp > 0x7FF)
-	{
-		return chr(0xE0 | ($cp >> 12)) . chr(0x80 | (($cp >> 6) & 0x3F)) . chr(0x80 | ($cp & 0x3F));
-	}
-	else if ($cp > 0x7F)
-	{
-		return chr(0xC0 | ($cp >> 6)) . chr(0x80 | ($cp & 0x3F));
-	}
-	else
-	{
-		return chr($cp);
-	}
-}
\ No newline at end of file
-- 
cgit v1.2.1


From 2e17e448deed073f8614bb555a8ef20c57291c2a Mon Sep 17 00:00:00 2001
From: Meik Sievertsen <acydburn@phpbb.com>
Date: Sun, 4 Oct 2009 18:14:59 +0000
Subject: Copy 3.0.x branch to trunk

git-svn-id: file:///svn/phpbb/trunk@10211 89ea8834-ac86-4346-8a33-228a782c2dd0
---
 phpBB/develop/generate_utf_tables.php | 572 ++++++++++++++++++++++++++++++++++
 1 file changed, 572 insertions(+)
 create mode 100644 phpBB/develop/generate_utf_tables.php

(limited to 'phpBB/develop/generate_utf_tables.php')

diff --git a/phpBB/develop/generate_utf_tables.php b/phpBB/develop/generate_utf_tables.php
new file mode 100644
index 0000000000..3d5188163d
--- /dev/null
+++ b/phpBB/develop/generate_utf_tables.php
@@ -0,0 +1,572 @@
+<?php
+/**
+*
+* @package phpBB3
+* @version $Id$
+* @copyright (c) 2005 phpBB Group
+* @license http://opensource.org/licenses/gpl-license.php GNU Public License
+*
+*/
+
+if (php_sapi_name() != 'cli')
+{
+	die("This program must be run from the command line.\n");
+}
+
+//
+// Security message:
+//
+// This script is potentially dangerous.
+// Remove or comment the next line (die(".... ) to enable this script.
+// Do NOT FORGET to either remove this script or disable it after you have used it.
+//
+die("Please read the first lines of this script for instructions on how to enable it");
+
+set_time_limit(0);
+
+define('IN_PHPBB', true);
+$phpbb_root_path = '../';
+$phpEx = substr(strrchr(__FILE__, '.'), 1);
+
+echo "Checking for required files\n";
+download('http://www.unicode.org/Public/UNIDATA/CompositionExclusions.txt');
+download('http://www.unicode.org/Public/UNIDATA/DerivedNormalizationProps.txt');
+download('http://www.unicode.org/Public/UNIDATA/UnicodeData.txt');
+echo "\n";
+
+require_once($phpbb_root_path . 'includes/utf/utf_normalizer.' . $phpEx);
+$file_contents = array();
+
+/**
+* Generate some Hangul/Jamo stuff
+*/
+echo "\nGenerating Hangul and Jamo tables\n";
+for ($i = 0; $i < UNICODE_HANGUL_LCOUNT; ++$i)
+{
+	$utf_char = cp_to_utf(UNICODE_HANGUL_LBASE + $i);
+	$file_contents['utf_normalizer_common']['utf_jamo_index'][$utf_char] = $i * UNICODE_HANGUL_VCOUNT * UNICODE_HANGUL_TCOUNT + UNICODE_HANGUL_SBASE;
+	$file_contents['utf_normalizer_common']['utf_jamo_type'][$utf_char] = UNICODE_JAMO_L;
+}
+
+for ($i = 0; $i < UNICODE_HANGUL_VCOUNT; ++$i)
+{
+	$utf_char = cp_to_utf(UNICODE_HANGUL_VBASE + $i);
+	$file_contents['utf_normalizer_common']['utf_jamo_index'][$utf_char] = $i * UNICODE_HANGUL_TCOUNT;
+	$file_contents['utf_normalizer_common']['utf_jamo_type'][$utf_char] = UNICODE_JAMO_V;
+}
+
+for ($i = 0; $i < UNICODE_HANGUL_TCOUNT; ++$i)
+{
+	$utf_char = cp_to_utf(UNICODE_HANGUL_TBASE + $i);
+	$file_contents['utf_normalizer_common']['utf_jamo_index'][$utf_char] = $i;
+	$file_contents['utf_normalizer_common']['utf_jamo_type'][$utf_char] = UNICODE_JAMO_T;
+}
+
+/**
+* Load the CompositionExclusions table
+*/
+echo "Loading CompositionExclusion\n";
+$fp = fopen('CompositionExclusions.txt', 'rt');
+
+$exclude = array();
+while (!feof($fp))
+{
+	$line = fgets($fp, 1024);
+
+	if (!strpos(' 0123456789ABCDEFabcdef', $line[0]))
+	{
+		continue;
+	}
+
+	$cp = strtok($line, ' ');
+
+	if ($pos = strpos($cp, '..'))
+	{
+		$start = hexdec(substr($cp, 0, $pos));
+		$end = hexdec(substr($cp, $pos + 2));
+
+		for ($i = $start; $i < $end; ++$i)
+		{
+			$exclude[$i] = 1;
+		}
+	}
+	else
+	{
+		$exclude[hexdec($cp)] = 1;
+	}
+}
+fclose($fp);
+
+/**
+* Load QuickCheck tables
+*/
+echo "Generating QuickCheck tables\n";
+$fp = fopen('DerivedNormalizationProps.txt', 'rt');
+
+while (!feof($fp))
+{
+	$line = fgets($fp, 1024);
+
+	if (!strpos(' 0123456789ABCDEFabcdef', $line[0]))
+	{
+		continue;
+	}
+
+	$p = array_map('trim', explode(';', strtok($line, '#')));
+
+	/**
+	* Capture only NFC_QC, NFKC_QC
+	*/
+	if (!preg_match('#^NFK?C_QC$#', $p[1]))
+	{
+		continue;
+	}
+
+	if ($pos = strpos($p[0], '..'))
+	{
+		$start = hexdec(substr($p[0], 0, $pos));
+		$end = hexdec(substr($p[0], $pos + 2));
+	}
+	else
+	{
+		$start = $end = hexdec($p[0]);
+	}
+
+	if ($start >= UTF8_HANGUL_FIRST && $end <= UTF8_HANGUL_LAST)
+	{
+		/**
+		* We do not store Hangul syllables in the array
+		*/
+		continue;
+	}
+
+	if ($p[2] == 'M')
+	{
+		$val = UNICODE_QC_MAYBE;
+	}
+	else
+	{
+		$val = UNICODE_QC_NO;
+	}
+
+	if ($p[1] == 'NFKC_QC')
+	{
+		$file = 'utf_nfkc_qc';
+	}
+	else
+	{
+		$file = 'utf_nfc_qc';
+	}
+
+	for ($i = $start; $i <= $end; ++$i)
+	{
+		/**
+		* The vars have the same name as the file: $utf_nfc_qc is in utf_nfc_qc.php
+		*/
+		$file_contents[$file][$file][cp_to_utf($i)] = $val;
+	}
+}
+fclose($fp);
+
+/**
+* Do mappings
+*/
+echo "Loading Unicode decomposition mappings\n";
+$fp = fopen($phpbb_root_path . 'develop/UnicodeData.txt', 'rt');
+
+$map = array();
+while (!feof($fp))
+{
+	$p = explode(';', fgets($fp, 1024));
+	$cp = hexdec($p[0]);
+
+	if (!empty($p[3]))
+	{
+		/**
+		* Store combining class > 0
+		*/
+		$file_contents['utf_normalizer_common']['utf_combining_class'][cp_to_utf($cp)] = (int) $p[3];
+	}
+
+	if (!isset($p[5]) || !preg_match_all('#[0-9A-F]+#', strip_tags($p[5]), $m))
+	{
+		continue;
+	}
+
+	if (strpos($p[5], '>'))
+	{
+		$map['NFKD'][$cp] = implode(' ', array_map('hexdec', $m[0]));
+	}
+	else
+	{
+		$map['NFD'][$cp] = $map['NFKD'][$cp] = implode(' ', array_map('hexdec', $m[0]));
+	}
+}
+fclose($fp);
+
+/**
+* Build the canonical composition table
+*/
+echo "Generating the Canonical Composition table\n";
+foreach ($map['NFD'] as $cp => $decomp_seq)
+{
+	if (!strpos($decomp_seq, ' ') || isset($exclude[$cp]))
+	{
+		/**
+		* Singletons are excluded from canonical composition
+		*/
+		continue;
+	}
+
+	$utf_seq = implode('', array_map('cp_to_utf', explode(' ', $decomp_seq)));
+
+	if (!isset($file_contents['utf_canonical_comp']['utf_canonical_comp'][$utf_seq]))
+	{
+		$file_contents['utf_canonical_comp']['utf_canonical_comp'][$utf_seq] = cp_to_utf($cp);
+	}
+}
+
+/**
+* Decompose the NF[K]D mappings recursively and prepare the file contents
+*/
+echo "Generating the Canonical and Compatibility Decomposition tables\n\n";
+foreach ($map as $type => $decomp_map)
+{
+	foreach ($decomp_map as $cp => $decomp_seq)
+	{
+		$decomp_map[$cp] = decompose($decomp_map, $decomp_seq);
+	}
+	unset($decomp_seq);
+
+	if ($type == 'NFKD')
+	{
+		$file = 'utf_compatibility_decomp';
+		$var = 'utf_compatibility_decomp';
+	}
+	else
+	{
+		$file = 'utf_canonical_decomp';
+		$var = 'utf_canonical_decomp';
+	}
+
+	/**
+	* Generate the corresponding file
+	*/
+	foreach ($decomp_map as $cp => $decomp_seq)
+	{
+		$file_contents[$file][$var][cp_to_utf($cp)] = implode('', array_map('cp_to_utf', explode(' ', $decomp_seq)));
+	}
+}
+
+/**
+* Generate and/or alter the files
+*/
+foreach ($file_contents as $file => $contents)
+{
+	/**
+	* Generate a new file
+	*/
+	echo "Writing to $file.$phpEx\n";
+
+	if (!$fp = fopen($phpbb_root_path . 'includes/utf/data/' . $file . '.' . $phpEx, 'wb'))
+	{
+		trigger_error('Cannot open ' . $file . ' for write');
+	}
+
+	fwrite($fp, '<?php');
+	foreach ($contents as $var => $val)
+	{
+		fwrite($fp, "\n\$GLOBALS[" . my_var_export($var) . ']=' . my_var_export($val) . ";");
+	}
+	fclose($fp);
+}
+
+echo "\n*** UTF-8 normalization tables done\n\n";
+
+/**
+* Now we'll generate the files needed by the search indexer
+*/
+echo "Generating search indexer tables\n";
+
+$fp = fopen($phpbb_root_path . 'develop/UnicodeData.txt', 'rt');
+
+$map = array();
+while ($line = fgets($fp, 1024))
+{
+	/**
+	* The current line is split, $m[0] hold the codepoint in hexadecimal and
+	* all other fields numbered as in http://www.unicode.org/Public/UNIDATA/UCD.html#UnicodeData.txt
+	*/
+	$m = explode(';', $line);
+
+	/**
+	* @var	integer	$cp			Current char codepoint
+	* @var	string	$utf_char	UTF-8 representation of current char
+	*/
+	$cp = hexdec($m[0]);
+	$utf_char = cp_to_utf($cp);
+
+	/**
+	* $m[2] holds the "General Category" of the character
+	* @link http://www.unicode.org/Public/UNIDATA/UCD.html#General_Category_Values
+	*/
+	switch ($m[2][0])
+	{
+		case 'L':
+			/**
+			* We allow all letters and map them to their lowercased counterpart on the fly
+			*/
+			$map_to_hex = (isset($m[13][0])) ? $m[13] : $m[0];
+
+			if (preg_match('#^LATIN.*(?:LETTER|LIGATURE) ([A-Z]{2}(?![A-Z]))$#', $m[1], $capture))
+			{
+				/**
+				* Special hack for some latin ligatures. Using the name of a character
+				* is bad practice, but for now it works well enough.
+				*
+				* @todo Note that ligatures with combining marks such as U+01E2 are
+				* not supported at this time
+				*/
+				$map[$cp] = strtolower($capture[1]);
+			}
+			else if (isset($m[13][0]))
+			{
+				/**
+				* If the letter has a lowercased form, use it
+				*/
+				$map[$cp] = hex_to_utf($m[13]);
+			}
+			else
+			{
+				/**
+				* In all other cases, map the letter to itself
+				*/
+				$map[$cp] = $utf_char;
+			}
+			break;
+
+		case 'M':
+			/**
+			* We allow all marks, they are mapped to themselves
+			*/
+			$map[$cp] = $utf_char;
+			break;
+
+		case 'N':
+			/**
+			* We allow all numbers, but we map them to their numeric value whenever
+			* possible. The numeric value (field #8) is in ASCII already
+			*
+			* @todo Note that fractions such as U+00BD will be converted to something
+			* like "1/2", with a slash. However, "1/2" entered in ASCII is converted
+			* to "1 2". This will have to be fixed.
+			*/
+			$map[$cp] = (isset($m[8][0])) ? $m[8] : $utf_char;
+			break;
+
+		default:
+			/**
+			* Everything else is ignored, skip to the next line
+			*/
+			continue 2;
+	}
+}
+fclose($fp);
+
+/**
+* Add some cheating
+*/
+$cheats = array(
+	'00DF'	=>	'ss',		#	German sharp S
+	'00C5'	=>	'ae',		#	Capital A with diaeresis
+	'00E4'	=>	'ae',		#	Small A with diaeresis
+	'00D6'	=>	'oe',		#	Capital O with diaeresis
+	'00F6'	=>	'oe',		#	Small O with diaeresis
+	'00DC'	=>	'ue',		#	Capital U with diaeresis
+	'00FC'	=>	'ue',		#	Small U with diaeresis
+);
+
+/**
+* Add our "cheat replacements" to the map
+*/
+foreach ($cheats as $hex => $map_to)
+{
+	$map[hexdec($hex)] = $map_to;
+}
+
+/**
+* Split the map into smaller blocks
+*/
+$file_contents = array();
+foreach ($map as $cp => $map_to)
+{
+	$file_contents[$cp >> 11][cp_to_utf($cp)] = $map_to;
+}
+unset($map);
+
+foreach ($file_contents as $idx => $contents)
+{
+	echo "Writing to search_indexer_$idx.$phpEx\n";
+	$fp = fopen($phpbb_root_path . 'includes/utf/data/search_indexer_' . $idx . '.' . $phpEx, 'wb');
+	fwrite($fp, '<?php return ' . my_var_export($contents) . ';');
+	fclose($fp);
+}
+echo "\n*** Search indexer tables done\n\n";
+
+
+die("\nAll done!\n");
+
+
+////////////////////////////////////////////////////////////////////////////////
+//                             Internal functions                             //
+////////////////////////////////////////////////////////////////////////////////
+
+/**
+* Decompose a sequence recusively
+*
+* @param	array	$decomp_map	Decomposition mapping, passed by reference
+* @param	string	$decomp_seq	Decomposition sequence as decimal codepoints separated with a space
+* @return	string				Decomposition sequence, fully decomposed
+*/
+function decompose(&$decomp_map, $decomp_seq)
+{
+	$ret = array();
+	foreach (explode(' ', $decomp_seq) as $cp)
+	{
+		if (isset($decomp_map[$cp]))
+		{
+			$ret[] = decompose($decomp_map, $decomp_map[$cp]);
+		}
+		else
+		{
+			$ret[] = $cp;
+		}
+	}
+
+	return implode(' ', $ret);
+}
+
+
+/**
+* Return a parsable string representation of a variable
+*
+* This is function is limited to array/strings/integers
+*
+* @param	mixed	$var		Variable
+* @return	string				PHP code representing the variable
+*/
+function my_var_export($var)
+{
+	if (is_array($var))
+	{
+		$lines = array();
+
+		foreach ($var as $k => $v)
+		{
+			$lines[] = my_var_export($k) . '=>' . my_var_export($v);
+		}
+
+		return 'array(' . implode(',', $lines) . ')';
+	}
+	else if (is_string($var))
+	{
+		return "'" . str_replace(array('\\', "'"), array('\\\\', "\\'"), $var) . "'";
+	}
+	else
+	{
+		return $var;
+	}
+}
+
+/**
+* Download a file to the develop/ dir
+*
+* @param	string	$url		URL of the file to download
+* @return	void
+*/
+function download($url)
+{
+	global $phpbb_root_path;
+
+	if (file_exists($phpbb_root_path . 'develop/' . basename($url)))
+	{
+		return;
+	}
+
+	echo 'Downloading from ', $url, ' ';
+
+	if (!$fpr = fopen($url, 'rb'))
+	{
+		die("Can't download from $url\nPlease download it yourself and put it in the develop/ dir, kthxbai");
+	}
+
+	if (!$fpw = fopen($phpbb_root_path . 'develop/' . basename($url), 'wb'))
+	{
+		die("Can't open develop/" . basename($url) . " for output... please check your permissions or something");
+	}
+
+	$i = 0;
+	$chunk = 32768;
+	$done = '';
+
+	while (!feof($fpr))
+	{
+		$i += fwrite($fpw, fread($fpr, $chunk));
+		echo str_repeat("\x08", strlen($done));
+
+		$done = ($i >> 10) . ' KiB';
+		echo $done;
+	}
+	fclose($fpr);
+	fclose($fpw);
+
+	echo "\n";
+}
+
+/**
+* Convert a codepoint in hexadecimal to a UTF-8 char
+*
+* @param	string	$hex		Codepoint, in hexadecimal
+* @return	string				UTF-8 char
+*/
+function hex_to_utf($hex)
+{
+	return cp_to_utf(hexdec($hex));
+}
+
+/**
+* Return a UTF string formed from a sequence of codepoints in hexadecimal
+*
+* @param	string	$seq		Sequence of codepoints, separated with a space
+* @return	string				UTF-8 string
+*/
+function hexseq_to_utf($seq)
+{
+	return implode('', array_map('hex_to_utf', explode(' ', $seq)));
+}
+
+/**
+* Convert a codepoint to a UTF-8 char
+*
+* @param	integer	$cp			Unicode codepoint
+* @return	string				UTF-8 string
+*/
+function cp_to_utf($cp)
+{
+	if ($cp > 0xFFFF)
+	{
+		return chr(0xF0 | ($cp >> 18)) . chr(0x80 | (($cp >> 12) & 0x3F)) . chr(0x80 | (($cp >> 6) & 0x3F)) . chr(0x80 | ($cp & 0x3F));
+	}
+	else if ($cp > 0x7FF)
+	{
+		return chr(0xE0 | ($cp >> 12)) . chr(0x80 | (($cp >> 6) & 0x3F)) . chr(0x80 | ($cp & 0x3F));
+	}
+	else if ($cp > 0x7F)
+	{
+		return chr(0xC0 | ($cp >> 6)) . chr(0x80 | ($cp & 0x3F));
+	}
+	else
+	{
+		return chr($cp);
+	}
+}
\ No newline at end of file
-- 
cgit v1.2.1


From af5b9a96409d788733fcb1ff367e0c7fb0583702 Mon Sep 17 00:00:00 2001
From: Igor Wiedler <igor@wiedler.ch>
Date: Tue, 9 Nov 2010 08:59:25 +0100
Subject: [ticket/9556] Drop php closing tags, add trailing newline

Closing tags converted using Oleg's script.
remove-php-end-tags.py -a .

Trailing newlines added using the following where $ext is file extension.
find . -type f -name "*.$ext" -print | xargs printf "e %s\nw\n" | ed -s;

Extensions: php, css, html, js, xml.

PHPBB3-9556
---
 phpBB/develop/generate_utf_tables.php | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'phpBB/develop/generate_utf_tables.php')

diff --git a/phpBB/develop/generate_utf_tables.php b/phpBB/develop/generate_utf_tables.php
index 3d5188163d..dbef125803 100644
--- a/phpBB/develop/generate_utf_tables.php
+++ b/phpBB/develop/generate_utf_tables.php
@@ -569,4 +569,4 @@ function cp_to_utf($cp)
 	{
 		return chr($cp);
 	}
-}
\ No newline at end of file
+}
-- 
cgit v1.2.1


From a759704b39fc1c1353f865a633759b1369589b67 Mon Sep 17 00:00:00 2001
From: Yuriy Rusko <github@rusko.net>
Date: Tue, 27 May 2014 20:18:06 +0200
Subject: [ticket/12594] Remove @package tags and update file headers

PHPBB3-12594
---
 phpBB/develop/generate_utf_tables.php | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

(limited to 'phpBB/develop/generate_utf_tables.php')

diff --git a/phpBB/develop/generate_utf_tables.php b/phpBB/develop/generate_utf_tables.php
index e5d907d6a5..16a449679b 100644
--- a/phpBB/develop/generate_utf_tables.php
+++ b/phpBB/develop/generate_utf_tables.php
@@ -1,9 +1,13 @@
 <?php
 /**
 *
-* @package phpBB3
-* @copyright (c) 2005 phpBB Group
-* @license http://opensource.org/licenses/gpl-2.0.php GNU General Public License v2
+* This file is part of the phpBB Forum Software package.
+*
+* @copyright (c) phpBB Limited <https://www.phpbb.com>
+* @license GNU General Public License, version 2 (GPL-2.0)
+*
+* For full copyright and license information, please see
+* the docs/CREDITS.txt file.
 *
 */
 
-- 
cgit v1.2.1