aboutsummaryrefslogtreecommitdiffstats
path: root/phpBB/develop/generate_utf_tables.php
diff options
context:
space:
mode:
Diffstat (limited to 'phpBB/develop/generate_utf_tables.php')
-rw-r--r--phpBB/develop/generate_utf_tables.php290
1 files changed, 1 insertions, 289 deletions
diff --git a/phpBB/develop/generate_utf_tables.php b/phpBB/develop/generate_utf_tables.php
index 16a449679b..888c07676d 100644
--- a/phpBB/develop/generate_utf_tables.php
+++ b/phpBB/develop/generate_utf_tables.php
@@ -32,262 +32,11 @@ $phpbb_root_path = '../';
$phpEx = substr(strrchr(__FILE__, '.'), 1);
echo "Checking for required files\n";
-download('http://www.unicode.org/Public/UNIDATA/CompositionExclusions.txt');
-download('http://www.unicode.org/Public/UNIDATA/DerivedNormalizationProps.txt');
download('http://www.unicode.org/Public/UNIDATA/UnicodeData.txt');
echo "\n";
-require_once($phpbb_root_path . 'includes/utf/utf_normalizer.' . $phpEx);
-$file_contents = array();
-
-/**
-* Generate some Hangul/Jamo stuff
-*/
-echo "\nGenerating Hangul and Jamo tables\n";
-for ($i = 0; $i < UNICODE_HANGUL_LCOUNT; ++$i)
-{
- $utf_char = cp_to_utf(UNICODE_HANGUL_LBASE + $i);
- $file_contents['utf_normalizer_common']['utf_jamo_index'][$utf_char] = $i * UNICODE_HANGUL_VCOUNT * UNICODE_HANGUL_TCOUNT + UNICODE_HANGUL_SBASE;
- $file_contents['utf_normalizer_common']['utf_jamo_type'][$utf_char] = UNICODE_JAMO_L;
-}
-
-for ($i = 0; $i < UNICODE_HANGUL_VCOUNT; ++$i)
-{
- $utf_char = cp_to_utf(UNICODE_HANGUL_VBASE + $i);
- $file_contents['utf_normalizer_common']['utf_jamo_index'][$utf_char] = $i * UNICODE_HANGUL_TCOUNT;
- $file_contents['utf_normalizer_common']['utf_jamo_type'][$utf_char] = UNICODE_JAMO_V;
-}
-
-for ($i = 0; $i < UNICODE_HANGUL_TCOUNT; ++$i)
-{
- $utf_char = cp_to_utf(UNICODE_HANGUL_TBASE + $i);
- $file_contents['utf_normalizer_common']['utf_jamo_index'][$utf_char] = $i;
- $file_contents['utf_normalizer_common']['utf_jamo_type'][$utf_char] = UNICODE_JAMO_T;
-}
-
-/**
-* Load the CompositionExclusions table
-*/
-echo "Loading CompositionExclusion\n";
-$fp = fopen('CompositionExclusions.txt', 'rt');
-
-$exclude = array();
-while (!feof($fp))
-{
- $line = fgets($fp, 1024);
-
- if (!strpos(' 0123456789ABCDEFabcdef', $line[0]))
- {
- continue;
- }
-
- $cp = strtok($line, ' ');
-
- if ($pos = strpos($cp, '..'))
- {
- $start = hexdec(substr($cp, 0, $pos));
- $end = hexdec(substr($cp, $pos + 2));
-
- for ($i = $start; $i < $end; ++$i)
- {
- $exclude[$i] = 1;
- }
- }
- else
- {
- $exclude[hexdec($cp)] = 1;
- }
-}
-fclose($fp);
-
-/**
-* Load QuickCheck tables
-*/
-echo "Generating QuickCheck tables\n";
-$fp = fopen('DerivedNormalizationProps.txt', 'rt');
-
-while (!feof($fp))
-{
- $line = fgets($fp, 1024);
-
- if (!strpos(' 0123456789ABCDEFabcdef', $line[0]))
- {
- continue;
- }
-
- $p = array_map('trim', explode(';', strtok($line, '#')));
-
- /**
- * Capture only NFC_QC, NFKC_QC
- */
- if (!preg_match('#^NFK?C_QC$#', $p[1]))
- {
- continue;
- }
-
- if ($pos = strpos($p[0], '..'))
- {
- $start = hexdec(substr($p[0], 0, $pos));
- $end = hexdec(substr($p[0], $pos + 2));
- }
- else
- {
- $start = $end = hexdec($p[0]);
- }
-
- if ($start >= UTF8_HANGUL_FIRST && $end <= UTF8_HANGUL_LAST)
- {
- /**
- * We do not store Hangul syllables in the array
- */
- continue;
- }
-
- if ($p[2] == 'M')
- {
- $val = UNICODE_QC_MAYBE;
- }
- else
- {
- $val = UNICODE_QC_NO;
- }
-
- if ($p[1] == 'NFKC_QC')
- {
- $file = 'utf_nfkc_qc';
- }
- else
- {
- $file = 'utf_nfc_qc';
- }
-
- for ($i = $start; $i <= $end; ++$i)
- {
- /**
- * The vars have the same name as the file: $utf_nfc_qc is in utf_nfc_qc.php
- */
- $file_contents[$file][$file][cp_to_utf($i)] = $val;
- }
-}
-fclose($fp);
-
/**
-* Do mappings
-*/
-echo "Loading Unicode decomposition mappings\n";
-$fp = fopen($phpbb_root_path . 'develop/UnicodeData.txt', 'rt');
-
-$map = array();
-while (!feof($fp))
-{
- $p = explode(';', fgets($fp, 1024));
- $cp = hexdec($p[0]);
-
- if (!empty($p[3]))
- {
- /**
- * Store combining class > 0
- */
- $file_contents['utf_normalizer_common']['utf_combining_class'][cp_to_utf($cp)] = (int) $p[3];
- }
-
- if (!isset($p[5]) || !preg_match_all('#[0-9A-F]+#', strip_tags($p[5]), $m))
- {
- continue;
- }
-
- if (strpos($p[5], '>'))
- {
- $map['NFKD'][$cp] = implode(' ', array_map('hexdec', $m[0]));
- }
- else
- {
- $map['NFD'][$cp] = $map['NFKD'][$cp] = implode(' ', array_map('hexdec', $m[0]));
- }
-}
-fclose($fp);
-
-/**
-* Build the canonical composition table
-*/
-echo "Generating the Canonical Composition table\n";
-foreach ($map['NFD'] as $cp => $decomp_seq)
-{
- if (!strpos($decomp_seq, ' ') || isset($exclude[$cp]))
- {
- /**
- * Singletons are excluded from canonical composition
- */
- continue;
- }
-
- $utf_seq = implode('', array_map('cp_to_utf', explode(' ', $decomp_seq)));
-
- if (!isset($file_contents['utf_canonical_comp']['utf_canonical_comp'][$utf_seq]))
- {
- $file_contents['utf_canonical_comp']['utf_canonical_comp'][$utf_seq] = cp_to_utf($cp);
- }
-}
-
-/**
-* Decompose the NF[K]D mappings recursively and prepare the file contents
-*/
-echo "Generating the Canonical and Compatibility Decomposition tables\n\n";
-foreach ($map as $type => $decomp_map)
-{
- foreach ($decomp_map as $cp => $decomp_seq)
- {
- $decomp_map[$cp] = decompose($decomp_map, $decomp_seq);
- }
- unset($decomp_seq);
-
- if ($type == 'NFKD')
- {
- $file = 'utf_compatibility_decomp';
- $var = 'utf_compatibility_decomp';
- }
- else
- {
- $file = 'utf_canonical_decomp';
- $var = 'utf_canonical_decomp';
- }
-
- /**
- * Generate the corresponding file
- */
- foreach ($decomp_map as $cp => $decomp_seq)
- {
- $file_contents[$file][$var][cp_to_utf($cp)] = implode('', array_map('cp_to_utf', explode(' ', $decomp_seq)));
- }
-}
-
-/**
-* Generate and/or alter the files
-*/
-foreach ($file_contents as $file => $contents)
-{
- /**
- * Generate a new file
- */
- echo "Writing to $file.$phpEx\n";
-
- if (!$fp = fopen($phpbb_root_path . 'includes/utf/data/' . $file . '.' . $phpEx, 'wb'))
- {
- trigger_error('Cannot open ' . $file . ' for write');
- }
-
- fwrite($fp, '<?php');
- foreach ($contents as $var => $val)
- {
- fwrite($fp, "\n\$GLOBALS[" . my_var_export($var) . ']=' . my_var_export($val) . ";");
- }
- fclose($fp);
-}
-
-echo "\n*** UTF-8 normalization tables done\n\n";
-
-/**
-* Now we'll generate the files needed by the search indexer
+* Generate the files needed by the search indexer
*/
echo "Generating search indexer tables\n";
@@ -425,32 +174,6 @@ die("\nAll done!\n");
////////////////////////////////////////////////////////////////////////////////
/**
-* Decompose a sequence recusively
-*
-* @param array $decomp_map Decomposition mapping, passed by reference
-* @param string $decomp_seq Decomposition sequence as decimal codepoints separated with a space
-* @return string Decomposition sequence, fully decomposed
-*/
-function decompose(&$decomp_map, $decomp_seq)
-{
- $ret = array();
- foreach (explode(' ', $decomp_seq) as $cp)
- {
- if (isset($decomp_map[$cp]))
- {
- $ret[] = decompose($decomp_map, $decomp_map[$cp]);
- }
- else
- {
- $ret[] = $cp;
- }
- }
-
- return implode(' ', $ret);
-}
-
-
-/**
* Return a parsable string representation of a variable
*
* This is function is limited to array/strings/integers
@@ -538,17 +261,6 @@ function hex_to_utf($hex)
}
/**
-* Return a UTF string formed from a sequence of codepoints in hexadecimal
-*
-* @param string $seq Sequence of codepoints, separated with a space
-* @return string UTF-8 string
-*/
-function hexseq_to_utf($seq)
-{
- return implode('', array_map('hex_to_utf', explode(' ', $seq)));
-}
-
-/**
* Convert a codepoint to a UTF-8 char
*
* @param integer $cp Unicode codepoint
ass="hl kwa">sub description { my %services = ( alsa => N_("Launch the ALSA (Advanced Linux Sound Architecture) sound system"), anacron => N_("Anacron is a periodic command scheduler."), apmd => N_("apmd is used for monitoring battery status and logging it via syslog. It can also be used for shutting down the machine when the battery is low."), atd => N_("Runs commands scheduled by the at command at the time specified when at was run, and runs batch commands when the load average is low enough."), crond => N_("cron is a standard UNIX program that runs user-specified programs at periodic scheduled times. vixie cron adds a number of features to the basic UNIX cron, including better security and more powerful configuration options."), cups => N_("Common UNIX Printing System (CUPS) is an advanced printer spooling system"), dm => N_("Launches the graphical display manager"), fam => N_("FAM is a file monitoring daemon. It is used to get reports when files change. It is used by GNOME and KDE"), gpm => N_("GPM adds mouse support to text-based Linux applications such the Midnight Commander. It also allows mouse-based console cut-and-paste operations, and includes support for pop-up menus on the console."), haldaemon => N_("HAL is a daemon that collects and maintains information about hardware"), harddrake => N_("HardDrake runs a hardware probe, and optionally configures new/changed hardware."), httpd => N_("Apache is a World Wide Web server. It is used to serve HTML files and CGI."), inet => N_("The internet superserver daemon (commonly called inetd) starts a variety of other internet services as needed. It is responsible for starting many services, including telnet, ftp, rsh, and rlogin. Disabling inetd disables all of the services it is responsible for."), ipchains => N_("Launch packet filtering for Linux kernel 2.2 series, to set up a firewall to protect your machine from network attacks."), keytable => N_("This package loads the selected keyboard map as set in /etc/sysconfig/keyboard. This can be selected using the kbdconfig utility. You should leave this enabled for most machines."), kheader => N_("Automatic regeneration of kernel header in /boot for /usr/include/linux/{autoconf,version}.h"), kudzu => N_("Automatic detection and configuration of hardware at boot."), linuxconf => N_("Linuxconf will sometimes arrange to perform various tasks at boot-time to maintain the system configuration."), lpd => N_("lpd is the print daemon required for lpr to work properly. It is basically a server that arbitrates print jobs to printer(s)."), lvs => N_("Linux Virtual Server, used to build a high-performance and highly available server."), messagebus => N_("DBUS is a daemon which broadcasts notifications of system events and other messages"), named => N_("named (BIND) is a Domain Name Server (DNS) that is used to resolve host names to IP addresses."), netfs => N_("Mounts and unmounts all Network File System (NFS), SMB (Lan Manager/Windows), and NCP (NetWare) mount points."), network => N_("Activates/Deactivates all network interfaces configured to start at boot time."), nfs => N_("NFS is a popular protocol for file sharing across TCP/IP networks. This service provides NFS server functionality, which is configured via the /etc/exports file."), nfslock => N_("NFS is a popular protocol for file sharing across TCP/IP networks. This service provides NFS file locking functionality."), ntpd => N_("Synchronizes system time using the Network Time Protocol (NTP)"), numlock => N_("Automatically switch on numlock key locker under console and Xorg at boot."), oki4daemon => N_("Support the OKI 4w and compatible winprinters."), pcmcia => N_("PCMCIA support is usually to support things like ethernet and modems in laptops. It will not get started unless configured so it is safe to have it installed on machines that do not need it."), portmap => N_("The portmapper manages RPC connections, which are used by protocols such as NFS and NIS. The portmap server must be running on machines which act as servers for protocols which make use of the RPC mechanism."), postfix => N_("Postfix is a Mail Transport Agent, which is the program that moves mail from one machine to another."), random => N_("Saves and restores system entropy pool for higher quality random number generation."), rawdevices => N_("Assign raw devices to block devices (such as hard drive partitions), for the use of applications such as Oracle or DVD players"), routed => N_("The routed daemon allows for automatic IP router table updated via the RIP protocol. While RIP is widely used on small networks, more complex routing protocols are needed for complex networks."), rstatd => N_("The rstat protocol allows users on a network to retrieve performance metrics for any machine on that network."), rusersd => N_("The rusers protocol allows users on a network to identify who is logged in on other responding machines."), rwhod => N_("The rwho protocol lets remote users get a list of all of the users logged into a machine running the rwho daemon (similar to finger)."), saned => N_("SANE (Scanner Access Now Easy) enables to access scanners, video cameras, ..."), smb => N_("The SMB/CIFS protocol enables to share access to files & printers and also integrates with a Windows Server domain"), sound => N_("Launch the sound system on your machine"), sshd => N_("Secure Shell is a network protocol that allows data to be exchanged over a secure channel between two computers"), syslog => N_("Syslog is the facility by which many daemons use to log messages to various system log files. It is a good idea to always run syslog."), usb => N_("Load the drivers for your usb devices."), xfs => N_("Starts the X Font Server."), xinetd => N_("Starts other deamons on demand."), ); my ($name) = @_; my $s = $services{$name}; if ($s) { $s = translate($s); } else { my $file = find { -e $_ } map { "$::prefix$_/$name" } '/etc/rc.d/init.d', '/etc/init.d', '/etc/xinetd.d'; $s = cat_($file); $s =~ s/\\\s*\n#\s*//mg; $s = $s =~ /^#\s+(?:Short-)?[dD]escription:\s+(.*?)^(?:[^#]|# {0,2}\S)/sm ? $1 : $s =~ /^#\s*(.*?)^[^#]/sm ? $1 : ''; $s =~ s/#\s*//mg; } $s =~ s/\n/ /gm; $s =~ s/\s+$//; $s; } sub ask_ { my ($in) = @_; my %root_services = ( N("Printing") => [ qw(cups cupslpd lpr lpd oki4daemon hpoj cups-lpd) ], N("Internet") => [ qw(httpd boa tux roxen ftp pftp tftp proftpd wu-ftpd pure-ftpdipsec proftpd-xinetd ipchains iptables ip6tables ipvsadm isdn4linux ibod jabber jabber-icq adsl squid portsentry prelude nessusd junkbuster radvd cddbp ippl iptoip jail.init) ], N("File sharing") => [ qw(nfs-common nfs-server nfslock smb nettalk netfs mcserv autofs amd venus.init auth2.init codasrv.init update.init swat) ], N("System") => [ qw(usb usbd pcmcia irda xinetd inetd kudzu harddrake apmd sound network xfs alsa functions halt kheader killall mandrake_everytime mandrake_firstime random rawdevices single keytable syslog crond medusa-init portmap rpcbind acon anacron atd gpm psacct wine acpid numlock jserver sensors mosix bpowerd bpowerfail fcron powertweak.init ups syslog-ng cvs apcupsd) ], N("Remote Administration") => [ qw(sshd telnetd telnet rsh rlogin rexec webmin cfd heartbeat ldirectord iplog mon vncserver netsaint olympusd drakxtools_http) ], # N("Network Client") => [ qw(ypbind nscd arpwatch fetchmail dnrd_rc diald rsync) ], # N("Network Server") => [ qw(named bootparamd ntpd xntpd chronyd postfix sendmail # imap imaps ipop2 ipop3 pop3s routed yppasswdd ypserv ldap dhcpd dhcrelay # hylafax innd identd rstatd rusersd rwalld rwhod gated # kadmin kprop krb524 krb5kdc krb5server hldsld bayonne sockd dhsd gnu-pop3d # gdips pptpd.conf vrrpd crossfire bnetd pvmd ircd sympa finger ntalk talk) ], N("Database Server") => [ qw(mysql postgresql) ], ); my %services_root; foreach my $root (keys %root_services) { $services_root{$_} = $root foreach @{$root_services{$root}}; }