diff options
Diffstat (limited to 'po/intltool-merge')
-rwxr-xr-x | po/intltool-merge | 657 |
1 files changed, 0 insertions, 657 deletions
diff --git a/po/intltool-merge b/po/intltool-merge deleted file mode 100755 index 06637146..00000000 --- a/po/intltool-merge +++ /dev/null @@ -1,657 +0,0 @@ -#!/usr/bin/perl -w - -# -# The Intltool Message Merger -# -# Copyright (C) 2000, 2002 Free Software Foundation. -# Copyright (C) 2000, 2001 Eazel, Inc -# -# Intltool is free software; you can redistribute it and/or -# modify it under the terms of the GNU General Public License -# version 2 published by the Free Software Foundation. -# -# Intltool is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -# General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with this program; if not, write to the Free Software -# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. -# -# As a special exception to the GNU General Public License, if you -# distribute this file as part of a program that contains a -# configuration script generated by Autoconf, you may include it under -# the same distribution terms that you use for the rest of that program. -# -# Authors: Maciej Stachowiak <mjs@noisehavoc.org> -# Kenneth Christiansen <kenneth@gnu.org> -# Darin Adler <darin@bentspoon.com> -# -# Proper XML UTF-8'ification written by Cyrille Chepelov <chepelov@calixo.net> -# - -## Release information -my $PROGRAM = "intltool-merge"; -my $PACKAGE = "intltool"; -my $VERSION = "0.22"; - -## Loaded modules -use strict; -use Getopt::Long; - -## Scalars used by the option stuff -my $HELP_ARG = 0; -my $VERSION_ARG = 0; -my $BA_STYLE_ARG = 0; -my $XML_STYLE_ARG = 0; -my $KEYS_STYLE_ARG = 0; -my $DESKTOP_STYLE_ARG = 0; -my $SCHEMAS_STYLE_ARG = 0; -my $QUIET_ARG = 0; -my $PASS_THROUGH_ARG = 0; -my $UTF8_ARG = 0; -my $cache_file; - -## Handle options -GetOptions -( - "help" => \$HELP_ARG, - "version" => \$VERSION_ARG, - "quiet|q" => \$QUIET_ARG, - "oaf-style|o" => \$BA_STYLE_ARG, ## for compatibility - "ba-style|b" => \$BA_STYLE_ARG, - "xml-style|x" => \$XML_STYLE_ARG, - "keys-style|k" => \$KEYS_STYLE_ARG, - "desktop-style|d" => \$DESKTOP_STYLE_ARG, - "schemas-style|s" => \$SCHEMAS_STYLE_ARG, - "pass-through|p" => \$PASS_THROUGH_ARG, - "utf8|u" => \$UTF8_ARG, - "cache|c=s" => \$cache_file - ) or &error; - -my $PO_DIR; -my $FILE; -my $OUTFILE; - -my %po_files_by_lang = (); -my %translations = (); - -# Use this instead of \w for XML files to handle more possible characters. -my $w = "[-A-Za-z0-9._:]"; - -# XML quoted string contents -my $q = "[^\\\"]*"; - -## Check for options. - -if ($VERSION_ARG) { - &print_version; -} elsif ($HELP_ARG) { - &print_help; -} elsif ($BA_STYLE_ARG && @ARGV > 2) { - &preparation; - &print_message; - &ba_merge_translations; - &finalize; -} elsif ($XML_STYLE_ARG && @ARGV > 2) { - &utf8_sanity_check; - &preparation; - &print_message; - &xml_merge_translations; - &finalize; -} elsif ($KEYS_STYLE_ARG && @ARGV > 2) { - &utf8_sanity_check; - &preparation; - &print_message; - &keys_merge_translations; - &finalize; -} elsif ($DESKTOP_STYLE_ARG && @ARGV > 2) { - &preparation; - &print_message; - &desktop_merge_translations; - &finalize; -} elsif ($SCHEMAS_STYLE_ARG && @ARGV > 2) { - &preparation; - &print_message; - &schemas_merge_translations; - &finalize; -} else { - &print_help; -} - -exit; - -## Sub for printing release information -sub print_version -{ - print "${PROGRAM} (${PACKAGE}) ${VERSION}\n"; - print "Written by Maciej Stachowiak, Darin Adler and Kenneth Christiansen.\n\n"; - print "Copyright (C) 2000-2002 Free Software Foundation, Inc.\n"; - print "Copyright (C) 2000-2001 Eazel, Inc.\n"; - print "This is free software; see the source for copying conditions. There is NO\n"; - print "warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.\n"; - exit; -} - -## Sub for printing usage information -sub print_help -{ - print "Usage: ${PROGRAM} [OPTIONS] PO_DIRECTORY FILENAME OUTPUT_FILE\n"; - print "Generates an output file that includes translated versions of some attributes,\n"; - print "from an untranslated source and a po directory that includes translations.\n\n"; - print " -b, --ba-style includes translations in the bonobo-activation style\n"; - print " -d, --desktop-style includes translations in the desktop style\n"; - print " -k, --keys-style includes translations in the keys style\n"; - print " -s, --schemas-style includes translations in the schemas style\n"; - print " -x, --xml-style includes translations in the standard xml style\n"; - print " -u, --utf8 convert all strings to UTF-8 before merging\n"; - print " -p, --pass-through use strings as found in .po files, without\n"; - print " conversion (STRONGLY unrecommended with -x)\n"; - print " -q, --quiet suppress most messages\n"; - print " --help display this help and exit\n"; - print " --version output version information and exit\n"; - print "\nReport bugs to bugzilla.gnome.org, module intltool, or contact us through \n"; - print "<xml-i18n-tools-list\@gnome.org>.\n"; - exit; -} - - -## Sub for printing error messages -sub print_error -{ - print "Try `${PROGRAM} --help' for more information.\n"; - exit; -} - - -sub print_message -{ - print "Merging translations into $OUTFILE.\n" unless $QUIET_ARG; -} - - -sub preparation -{ - $PO_DIR = $ARGV[0]; - $FILE = $ARGV[1]; - $OUTFILE = $ARGV[2]; - - &gather_po_files; - &get_translation_database; -} - -# General-purpose code for looking up translations in .po files - -sub po_file2lang -{ - my ($tmp) = @_; - $tmp =~ s/^.*\/(.*)\.po$/$1/; - return $tmp; -} - -sub gather_po_files -{ - for my $po_file (glob "$PO_DIR/*.po") { - $po_files_by_lang{po_file2lang($po_file)} = $po_file; - } -} - -sub get_po_encoding -{ - my ($in_po_file) = @_; - my $encoding = ""; - - open IN_PO_FILE, $in_po_file or die; - while (<IN_PO_FILE>) { - ## example: "Content-Type: text/plain; charset=ISO-8859-1\n" - if (/Content-Type\:.*charset=([-a-zA-Z0-9]+)\\n/) { - $encoding = $1; - last; - } - } - close IN_PO_FILE; - - if (!$encoding) { - print "Warning: no encoding found in $in_po_file. Assuming ISO-8859-1\n"; - $encoding = "ISO-8859-1"; - } - return $encoding -} - -sub utf8_sanity_check -{ - if (!$UTF8_ARG) { - if (!$PASS_THROUGH_ARG) { - $PASS_THROUGH_ARG="1"; - } - } -} - -sub get_translation_database -{ - if ($cache_file) { - &get_cached_translation_database; - } else { - &create_translation_database; - } -} - -sub get_newest_po_age -{ - my $newest_age; - - foreach my $file (values %po_files_by_lang) { - my $file_age = -M $file; - $newest_age = $file_age if !$newest_age || $file_age < $newest_age; - } - - return $newest_age; -} - -sub create_cache -{ - print "Generating and caching the translation database\n" unless $QUIET_ARG; - - &create_translation_database; - - open CACHE, ">$cache_file" || die; - print CACHE join "\x01", %translations; - close CACHE; -} - -sub load_cache -{ - print "Found cached translation database\n" unless $QUIET_ARG; - - my $contents; - open CACHE, "<$cache_file" || die; - { - local $/; - $contents = <CACHE>; - } - close CACHE; - %translations = split "\x01", $contents; -} - -sub get_cached_translation_database -{ - my $cache_file_age = -M $cache_file; - if (defined $cache_file_age) { - if ($cache_file_age <= &get_newest_po_age) { - &load_cache; - return; - } - print "Found too-old cached translation database\n" unless $QUIET_ARG; - } - - &create_cache; -} - -sub create_translation_database -{ - for my $lang (keys %po_files_by_lang) { - my $po_file = $po_files_by_lang{$lang}; - - if ($UTF8_ARG) { - my $encoding = get_po_encoding ($po_file); - if (lc $encoding eq "utf-8") { - open PO_FILE, "<$po_file"; - } else { - my $iconv = $ENV{"INTLTOOL_ICONV"} || "iconv"; - open PO_FILE, "$iconv -f $encoding -t UTF-8 $po_file|"; - } - } else { - open PO_FILE, "<$po_file"; - } - - my $nextfuzzy = 0; - my $inmsgid = 0; - my $inmsgstr = 0; - my $msgid = ""; - my $msgstr = ""; - while (<PO_FILE>) { - $nextfuzzy = 1 if /^#, fuzzy/; - if (/^msgid "((\\.|[^\\])*)"/ ) { - $translations{$lang, $msgid} = $msgstr if $inmsgstr && $msgid && $msgstr; - $msgid = ""; - $msgstr = ""; - - if ($nextfuzzy) { - $inmsgid = 0; - } else { - $msgid = unescape_po_string($1); - $inmsgid = 1; - } - $inmsgstr = 0; - $nextfuzzy = 0; - } - if (/^msgstr "((\\.|[^\\])*)"/) { - $msgstr = unescape_po_string($1); - $inmsgstr = 1; - $inmsgid = 0; - } - if (/^"((\\.|[^\\])*)"/) { - $msgid .= unescape_po_string($1) if $inmsgid; - $msgstr .= unescape_po_string($1) if $inmsgstr; - } - } - $translations{$lang, $msgid} = $msgstr if $inmsgstr && $msgid && $msgstr; - } -} - -sub finalize -{ -} - -sub unescape_one_sequence -{ - my ($sequence) = @_; - - return "\\" if $sequence eq "\\\\"; - return "\"" if $sequence eq "\\\""; - - # gettext also handles \n, \t, \b, \r, \f, \v, \a, \xxx (octal), - # \xXX (hex) and has a comment saying they want to handle \u and \U. - - return $sequence; -} - -sub unescape_po_string -{ - my ($string) = @_; - - $string =~ s/(\\.)/unescape_one_sequence($1)/eg; - - return $string; -} - -sub entity_decode -{ - local ($_) = @_; - - s/'/'/g; # ' - s/"/"/g; # " - s/&/&/g; - - return $_; -} - -sub entity_encode -{ - my ($pre_encoded) = @_; - - my @list_of_chars = unpack ('C*', $pre_encoded); - - if ($PASS_THROUGH_ARG) { - return join ('', map (&entity_encode_int_even_high_bit, @list_of_chars)); - } else { - return join ('', map (&entity_encode_int_minimalist, @list_of_chars)); - } -} - -sub entity_encode_int_minimalist -{ - return """ if $_ == 34; - return "&" if $_ == 38; - return "'" if $_ == 39; - return chr $_; -} - -sub entity_encode_int_even_high_bit -{ - if ($_ > 127 || $_ == 34 || $_ == 38 || $_ == 39) { - # the ($_ > 127) should probably be removed - return "&#" . $_ . ";"; - } else { - return chr $_; - } -} - -sub entity_encoded_translation -{ - my ($lang, $string) = @_; - - my $translation = $translations{$lang, $string}; - return $string if !$translation; - return entity_encode ($translation); -} - -## XML (bonobo-activation specific) merge code - -sub ba_merge_translations -{ - my $source; - - { - local $/; # slurp mode - open INPUT, "<$FILE" or die "can't open $FILE: $!"; - $source = <INPUT>; - close INPUT; - } - - open OUTPUT, ">$OUTFILE" or die "can't open $OUTFILE: $!"; - - while ($source =~ s|^(.*?)([ \t]*<\s*$w+\s+($w+\s*=\s*"$q"\s*)+/?>)([ \t]*\n)?||s) { - print OUTPUT $1; - - my $node = $2 . "\n"; - - my @strings = (); - $_ = $node; - while (s/(\s)_($w+\s*=\s*"($q)")/$1$2/s) { - push @strings, entity_decode($3); - } - print OUTPUT; - - my %langs; - for my $string (@strings) { - for my $lang (keys %po_files_by_lang) { - $langs{$lang} = 1 if $translations{$lang, $string}; - } - } - - for my $lang (sort keys %langs) { - $_ = $node; - s/(\sname\s*=\s*)"($q)"/$1"$2-$lang"/s; - s/(\s)_($w+\s*=\s*")($q)"/$1 . $2 . entity_encoded_translation($lang, $3) . '"'/seg; - print OUTPUT; - } - } - - print OUTPUT $source; - - close OUTPUT; -} - - -## XML (non-bonobo-activation) merge code - -sub xml_merge_translations -{ - my $source; - - { - local $/; # slurp mode - open INPUT, "<$FILE" or die "can't open $FILE: $!"; - $source = <INPUT>; - close INPUT; - } - - open OUTPUT, ">$OUTFILE" or die; - - # FIXME: support attribute translations - - # Empty nodes never need translation, so unmark all of them. - # For example, <_foo/> is just replaced by <foo/>. - $source =~ s|<\s*_($w+)\s*/>|<$1/>|g; - - # Support for <_foo>blah</_foo> style translations. - while ($source =~ s|^(.*?)([ \t]*)<\s*_($w+)\s*>(.*?)<\s*/_\3\s*>([ \t]*\n)?||s) { - print OUTPUT $1; - - my $spaces = $2; - my $tag = $3; - my $string = $4; - - print OUTPUT "$spaces<$tag>$string</$tag>\n"; - - $string =~ s/\s+/ /g; - $string =~ s/^ //; - $string =~ s/ $//; - $string = entity_decode($string); - - for my $lang (sort keys %po_files_by_lang) { - my $translation = $translations{$lang, $string}; - next if !$translation; - $translation = entity_encode($translation); - print OUTPUT "$spaces<$tag xml:lang=\"$lang\">$translation</$tag>\n"; - } - } - - print OUTPUT $source; - - close OUTPUT; -} - -sub keys_merge_translations -{ - open INPUT, "<${FILE}" or die; - open OUTPUT, ">${OUTFILE}" or die; - - while (<INPUT>) { - if (s/^(\s*)_(\w+=(.*))/$1$2/) { - my $string = $3; - - print OUTPUT; - - my $non_translated_line = $_; - - for my $lang (sort keys %po_files_by_lang) { - my $translation = $translations{$lang, $string}; - next if !$translation; - - $_ = $non_translated_line; - s/(\w+)=.*/[$lang]$1=$translation/; - print OUTPUT; - } - } else { - print OUTPUT; - } - } - - close OUTPUT; - close INPUT; -} - -sub desktop_merge_translations -{ - open INPUT, "<${FILE}" or die; - open OUTPUT, ">${OUTFILE}" or die; - - while (<INPUT>) { - if (s/^(\s*)_(\w+=(.*))/$1$2/) { - my $string = $3; - - print OUTPUT; - - my $non_translated_line = $_; - - for my $lang (sort keys %po_files_by_lang) { - my $translation = $translations{$lang, $string}; - next if !$translation; - - $_ = $non_translated_line; - s/(\w+)=.*/${1}[$lang]=$translation/; - print OUTPUT; - } - } else { - print OUTPUT; - } - } - - close OUTPUT; - close INPUT; -} - -sub schemas_merge_translations -{ - my $source; - - { - local $/; # slurp mode - open INPUT, "<$FILE" or die "can't open $FILE: $!"; - $source = <INPUT>; - close INPUT; - } - - open OUTPUT, ">$OUTFILE" or die; - - # FIXME: support attribute translations - - # Empty nodes never need translation, so unmark all of them. - # For example, <_foo/> is just replaced by <foo/>. - $source =~ s|<\s*_($w+)\s*/>|<$1/>|g; - - # Support for <_foo>blah</_foo> style translations. - - my $regex_start = "^(.*?)([ \t]*)<locale name=\"C\">"; - my $regex_short = "([ \t\n]*)<short>(.*?)</short>"; - my $regex_long = "([ \t\n]*)<long>(.*?)</long>"; - my $regex_end = "([ \t\n]*)</locale>"; - - while ($source =~ s|$regex_start$regex_short$regex_long$regex_end||s) { - print OUTPUT $1; - - my $locale_start_spaces = $2; - my $locale_end_spaces = $7; - my $short_spaces = $3; - my $short_string = $4; - my $long_spaces = $5; - my $long_string = $6; - - # English first - - print OUTPUT "$locale_start_spaces<locale name=\"C\">"; - print OUTPUT "$short_spaces<short>$short_string</short>"; - print OUTPUT "$long_spaces<long>$long_string</long>"; - print OUTPUT "$locale_end_spaces</locale>"; - - $short_string =~ s/\s+/ /g; - $short_string =~ s/^ //; - $short_string =~ s/ $//; - $short_string = entity_decode($short_string); - - $long_string =~ s/\s+/ /g; - $long_string =~ s/^ //; - $long_string =~ s/ $//; - $long_string = entity_decode($long_string); - - for my $lang (sort keys %po_files_by_lang) { - my $short_translation = $translations{$lang, $short_string}; - my $long_translation = $translations{$lang, $long_string}; - - next if (!$short_translation && !$long_translation); - - print OUTPUT "\n$locale_start_spaces<locale name=\"$lang\">"; - - if ($short_translation) - { - $short_translation = entity_encode($short_translation); - print OUTPUT "$short_spaces<short>$short_translation</short>"; - } - - if ($long_translation) - { - $long_translation = entity_encode($long_translation); - print OUTPUT "$long_spaces<long>$long_translation</long>"; - } - - print OUTPUT "$locale_end_spaces</locale>"; - } - } - - print OUTPUT $source; - - close OUTPUT; -} |