1 files changed, 657 insertions, 0 deletions
diff --git a/data/intltool-merge b/data/intltool-merge
new file mode 100755
index 00000000..06637146
--- /dev/null
+++ b/data/intltool-merge
@@ -0,0 +1,657 @@
+#!/usr/bin/perl -w
+
+#
+#  The Intltool Message Merger
+#
+#  Copyright (C) 2000, 2002 Free Software Foundation.
+#  Copyright (C) 2000, 2001 Eazel, Inc
+#
+#  Intltool is free software; you can redistribute it and/or
+#  modify it under the terms of the GNU General Public License 
+#  version 2 published by the Free Software Foundation.
+#
+#  Intltool is distributed in the hope that it will be useful,
+#  but WITHOUT ANY WARRANTY; without even the implied warranty of
+#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+#  General Public License for more details.
+#
+#  You should have received a copy of the GNU General Public License
+#  along with this program; if not, write to the Free Software
+#  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+#
+#  As a special exception to the GNU General Public License, if you
+#  distribute this file as part of a program that contains a
+#  configuration script generated by Autoconf, you may include it under
+#  the same distribution terms that you use for the rest of that program.
+#
+#  Authors:  Maciej Stachowiak <mjs@noisehavoc.org>
+#            Kenneth Christiansen <kenneth@gnu.org>
+#            Darin Adler <darin@bentspoon.com>
+#
+#  Proper XML UTF-8'ification written by Cyrille Chepelov <chepelov@calixo.net>
+#
+
+## Release information
+my $PROGRAM = "intltool-merge";
+my $PACKAGE = "intltool";
+my $VERSION = "0.22";
+
+## Loaded modules
+use strict; 
+use Getopt::Long;
+
+## Scalars used by the option stuff
+my $HELP_ARG = 0;
+my $VERSION_ARG = 0;
+my $BA_STYLE_ARG = 0;
+my $XML_STYLE_ARG = 0;
+my $KEYS_STYLE_ARG = 0;
+my $DESKTOP_STYLE_ARG = 0;
+my $SCHEMAS_STYLE_ARG = 0;
+my $QUIET_ARG = 0;
+my $PASS_THROUGH_ARG = 0;
+my $UTF8_ARG = 0;
+my $cache_file;
+
+## Handle options
+GetOptions 
+(
+ "help" => \$HELP_ARG,
+ "version" => \$VERSION_ARG,
+ "quiet|q" => \$QUIET_ARG,
+ "oaf-style|o" => \$BA_STYLE_ARG, ## for compatibility
+ "ba-style|b" => \$BA_STYLE_ARG,
+ "xml-style|x" => \$XML_STYLE_ARG,
+ "keys-style|k" => \$KEYS_STYLE_ARG,
+ "desktop-style|d" => \$DESKTOP_STYLE_ARG,
+ "schemas-style|s" => \$SCHEMAS_STYLE_ARG,
+ "pass-through|p" => \$PASS_THROUGH_ARG,
+ "utf8|u" => \$UTF8_ARG,
+ "cache|c=s" => \$cache_file
+ ) or &error;
+
+my $PO_DIR;
+my $FILE;
+my $OUTFILE;
+
+my %po_files_by_lang = ();
+my %translations = ();
+
+# Use this instead of \w for XML files to handle more possible characters.
+my $w = "[-A-Za-z0-9._:]";
+
+# XML quoted string contents
+my $q = "[^\\\"]*";
+
+## Check for options. 
+
+if ($VERSION_ARG) {
+	&print_version;
+} elsif ($HELP_ARG) {
+	&print_help;
+} elsif ($BA_STYLE_ARG && @ARGV > 2) {
+	&preparation;
+	&print_message;
+	&ba_merge_translations;
+	&finalize;
+} elsif ($XML_STYLE_ARG && @ARGV > 2) {
+        &utf8_sanity_check;
+	&preparation;
+	&print_message;
+	&xml_merge_translations;
+	&finalize;
+} elsif ($KEYS_STYLE_ARG && @ARGV > 2) {
+        &utf8_sanity_check;
+        &preparation;
+        &print_message;
+        &keys_merge_translations;
+	&finalize;
+} elsif ($DESKTOP_STYLE_ARG && @ARGV > 2) {
+        &preparation;
+        &print_message;
+        &desktop_merge_translations;
+	&finalize;
+} elsif ($SCHEMAS_STYLE_ARG && @ARGV > 2) {
+        &preparation;
+        &print_message;
+        &schemas_merge_translations;
+	&finalize;
+} else {
+	&print_help;
+}
+
+exit;
+
+## Sub for printing release information
+sub print_version
+{
+    print "${PROGRAM} (${PACKAGE}) ${VERSION}\n";
+    print "Written by Maciej Stachowiak, Darin Adler and Kenneth Christiansen.\n\n";
+    print "Copyright (C) 2000-2002 Free Software Foundation, Inc.\n";
+    print "Copyright (C) 2000-2001 Eazel, Inc.\n";
+    print "This is free software; see the source for copying conditions.  There is NO\n";
+    print "warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.\n";
+    exit;
+}
+
+## Sub for printing usage information
+sub print_help
+{
+    print "Usage: ${PROGRAM} [OPTIONS] PO_DIRECTORY FILENAME OUTPUT_FILE\n";
+    print "Generates an output file that includes translated versions of some attributes,\n";
+    print "from an untranslated source and a po directory that includes translations.\n\n";
+    print "  -b, --ba-style         includes translations in the bonobo-activation style\n";
+    print "  -d, --desktop-style    includes translations in the desktop style\n";
+    print "  -k, --keys-style       includes translations in the keys style\n";
+    print "  -s, --schemas-style    includes translations in the schemas style\n";
+    print "  -x, --xml-style        includes translations in the standard xml style\n";
+    print "  -u, --utf8             convert all strings to UTF-8 before merging\n";
+    print "  -p, --pass-through     use strings as found in .po files, without\n";
+    print "                         conversion (STRONGLY unrecommended with -x)\n";
+    print "  -q, --quiet            suppress most messages\n";
+    print "      --help             display this help and exit\n";
+    print "      --version          output version information and exit\n";
+    print "\nReport bugs to bugzilla.gnome.org, module intltool, or contact us through \n";
+    print "<xml-i18n-tools-list\@gnome.org>.\n";
+    exit;
+}
+
+
+## Sub for printing error messages
+sub print_error
+{
+    print "Try `${PROGRAM} --help' for more information.\n";
+    exit;
+}
+
+
+sub print_message 
+{
+    print "Merging translations into $OUTFILE.\n" unless $QUIET_ARG;
+}
+
+
+sub preparation 
+{
+    $PO_DIR = $ARGV[0];
+    $FILE = $ARGV[1];
+    $OUTFILE = $ARGV[2];
+
+    &gather_po_files;
+    &get_translation_database;
+}
+
+# General-purpose code for looking up translations in .po files
+
+sub po_file2lang
+{
+    my ($tmp) = @_; 
+    $tmp =~ s/^.*\/(.*)\.po$/$1/; 
+    return $tmp; 
+}
+
+sub gather_po_files
+{
+    for my $po_file (glob "$PO_DIR/*.po") {
+	$po_files_by_lang{po_file2lang($po_file)} = $po_file;
+    }
+}
+
+sub get_po_encoding
+{
+    my ($in_po_file) = @_;
+    my $encoding = "";
+
+    open IN_PO_FILE, $in_po_file or die;
+    while (<IN_PO_FILE>) {
+        ## example: "Content-Type: text/plain; charset=ISO-8859-1\n"
+        if (/Content-Type\:.*charset=([-a-zA-Z0-9]+)\\n/) {
+            $encoding = $1; 
+            last;
+        }
+    }
+    close IN_PO_FILE;
+
+    if (!$encoding) {
+        print "Warning: no encoding found in $in_po_file. Assuming ISO-8859-1\n";
+        $encoding = "ISO-8859-1";
+    }
+    return $encoding
+}
+
+sub utf8_sanity_check 
+{
+    if (!$UTF8_ARG) {
+        if (!$PASS_THROUGH_ARG) {
+            $PASS_THROUGH_ARG="1";
+        }
+    }
+}
+
+sub get_translation_database
+{
+    if ($cache_file) {
+	&get_cached_translation_database;
+    } else {
+        &create_translation_database;
+    }
+}
+
+sub get_newest_po_age
+{
+    my $newest_age;
+
+    foreach my $file (values %po_files_by_lang) {
+	my $file_age = -M $file;
+	$newest_age = $file_age if !$newest_age || $file_age < $newest_age;
+    }
+
+    return $newest_age;
+}
+
+sub create_cache
+{
+    print "Generating and caching the translation database\n" unless $QUIET_ARG;
+
+    &create_translation_database;
+
+    open CACHE, ">$cache_file" || die;
+    print CACHE join "\x01", %translations;
+    close CACHE;
+}
+
+sub load_cache 
+{
+    print "Found cached translation database\n" unless $QUIET_ARG;
+
+    my $contents;
+    open CACHE, "<$cache_file" || die;
+    {
+        local $/;
+        $contents = <CACHE>;
+    }
+    close CACHE;
+    %translations = split "\x01", $contents;
+}
+
+sub get_cached_translation_database
+{
+    my $cache_file_age = -M $cache_file;
+    if (defined $cache_file_age) {
+        if ($cache_file_age <= &get_newest_po_age) {
+            &load_cache;
+            return;
+        }
+        print "Found too-old cached translation database\n" unless $QUIET_ARG;
+    }
+
+    &create_cache;
+}
+
+sub create_translation_database
+{
+    for my $lang (keys %po_files_by_lang) {
+    	my $po_file = $po_files_by_lang{$lang};
+
+        if ($UTF8_ARG) {
+            my $encoding = get_po_encoding ($po_file);
+            if (lc $encoding eq "utf-8") {
+                open PO_FILE, "<$po_file";	
+            } else {
+                my $iconv = $ENV{"INTLTOOL_ICONV"} || "iconv";
+                open PO_FILE, "$iconv -f $encoding -t UTF-8 $po_file|";	
+            }
+        } else {
+            open PO_FILE, "<$po_file";	
+        }
+
+	my $nextfuzzy = 0;
+	my $inmsgid = 0;
+	my $inmsgstr = 0;
+	my $msgid = "";
+	my $msgstr = "";
+        while (<PO_FILE>) {
+	    $nextfuzzy = 1 if /^#, fuzzy/;
+            if (/^msgid "((\\.|[^\\])*)"/ ) {
+		$translations{$lang, $msgid} = $msgstr if $inmsgstr && $msgid && $msgstr;
+		$msgid = "";
+		$msgstr = "";
+
+		if ($nextfuzzy) {
+		    $inmsgid = 0;
+		} else {
+		    $msgid = unescape_po_string($1);
+		    $inmsgid = 1;
+		}
+		$inmsgstr = 0;
+		$nextfuzzy = 0;
+	    }
+	    if (/^msgstr "((\\.|[^\\])*)"/) {
+	        $msgstr = unescape_po_string($1);
+		$inmsgstr = 1;
+		$inmsgid = 0;
+	    }
+	    if (/^"((\\.|[^\\])*)"/) {
+	        $msgid .= unescape_po_string($1) if $inmsgid;
+	        $msgstr .= unescape_po_string($1) if $inmsgstr;
+	    }
+	}
+	$translations{$lang, $msgid} = $msgstr if $inmsgstr && $msgid && $msgstr;
+    }
+}
+
+sub finalize
+{
+}
+
+sub unescape_one_sequence
+{
+    my ($sequence) = @_;
+
+    return "\\" if $sequence eq "\\\\";
+    return "\"" if $sequence eq "\\\"";
+
+    # gettext also handles \n, \t, \b, \r, \f, \v, \a, \xxx (octal),
+    # \xXX (hex) and has a comment saying they want to handle \u and \U.
+
+    return $sequence;
+}
+
+sub unescape_po_string
+{
+    my ($string) = @_;
+
+    $string =~ s/(\\.)/unescape_one_sequence($1)/eg;
+
+    return $string;
+}
+
+sub entity_decode
+{
+    local ($_) = @_;
+
+    s/&apos;/'/g; # '
+    s/&quot;/"/g; # "
+    s/&amp;/&/g;
+
+    return $_;
+}
+
+sub entity_encode
+{
+    my ($pre_encoded) = @_;
+
+    my @list_of_chars = unpack ('C*', $pre_encoded);
+
+    if ($PASS_THROUGH_ARG) {
+        return join ('', map (&entity_encode_int_even_high_bit, @list_of_chars));
+    } else {
+        return join ('', map (&entity_encode_int_minimalist, @list_of_chars));
+    }
+}
+
+sub entity_encode_int_minimalist
+{
+    return "&quot;" if $_ == 34;
+    return "&amp;" if $_ == 38;
+    return "&apos;" if $_ == 39;
+    return chr $_;
+}
+
+sub entity_encode_int_even_high_bit
+{
+    if ($_ > 127 || $_ == 34 || $_ == 38 || $_ == 39) {
+        # the ($_ > 127) should probably be removed
+	return "&#" . $_ . ";"; 
+    } else {
+	return chr $_;
+    }
+}
+
+sub entity_encoded_translation
+{
+    my ($lang, $string) = @_;
+
+    my $translation = $translations{$lang, $string};
+    return $string if !$translation;
+    return entity_encode ($translation);
+}
+
+## XML (bonobo-activation specific) merge code
+
+sub ba_merge_translations
+{
+    my $source;
+
+    {
+       local $/; # slurp mode
+       open INPUT, "<$FILE" or die "can't open $FILE: $!";
+       $source = <INPUT>;
+       close INPUT;
+    }
+
+    open OUTPUT, ">$OUTFILE" or die "can't open $OUTFILE: $!";
+
+    while ($source =~ s|^(.*?)([ \t]*<\s*$w+\s+($w+\s*=\s*"$q"\s*)+/?>)([ \t]*\n)?||s) {
+        print OUTPUT $1;
+
+        my $node = $2 . "\n";
+
+        my @strings = ();
+        $_ = $node;
+	while (s/(\s)_($w+\s*=\s*"($q)")/$1$2/s) {
+             push @strings, entity_decode($3);
+        }
+	print OUTPUT;
+
+	my %langs;
+	for my $string (@strings) {
+	    for my $lang (keys %po_files_by_lang) {
+                $langs{$lang} = 1 if $translations{$lang, $string};
+	    }
+	}
+	
+	for my $lang (sort keys %langs) {
+	    $_ = $node;
+	    s/(\sname\s*=\s*)"($q)"/$1"$2-$lang"/s;
+	    s/(\s)_($w+\s*=\s*")($q)"/$1 . $2 . entity_encoded_translation($lang, $3) . '"'/seg;
+	    print OUTPUT;
+        }
+    }
+
+    print OUTPUT $source;
+
+    close OUTPUT;
+}
+
+
+## XML (non-bonobo-activation) merge code
+
+sub xml_merge_translations
+{
+    my $source;
+
+    {
+       local $/; # slurp mode
+       open INPUT, "<$FILE" or die "can't open $FILE: $!";
+       $source = <INPUT>;
+       close INPUT;
+    }
+
+    open OUTPUT, ">$OUTFILE" or die;
+
+    # FIXME: support attribute translations
+
+    # Empty nodes never need translation, so unmark all of them.
+    # For example, <_foo/> is just replaced by <foo/>.
+    $source =~ s|<\s*_($w+)\s*/>|<$1/>|g;
+
+    # Support for <_foo>blah</_foo> style translations.
+    while ($source =~ s|^(.*?)([ \t]*)<\s*_($w+)\s*>(.*?)<\s*/_\3\s*>([ \t]*\n)?||s) {
+        print OUTPUT $1;
+
+        my $spaces = $2;
+        my $tag = $3;
+        my $string = $4;
+
+	print OUTPUT "$spaces<$tag>$string</$tag>\n";
+
+	$string =~ s/\s+/ /g;
+	$string =~ s/^ //;
+	$string =~ s/ $//;
+	$string = entity_decode($string);
+
+	for my $lang (sort keys %po_files_by_lang) {
+	    my $translation = $translations{$lang, $string};
+	    next if !$translation;
+	    $translation = entity_encode($translation);
+	    print OUTPUT "$spaces<$tag xml:lang=\"$lang\">$translation</$tag>\n";
+        }
+    }
+
+    print OUTPUT $source;
+
+    close OUTPUT;
+}
+
+sub keys_merge_translations
+{
+    open INPUT, "<${FILE}" or die;
+    open OUTPUT, ">${OUTFILE}" or die;
+
+    while (<INPUT>) {
+        if (s/^(\s*)_(\w+=(.*))/$1$2/)  {
+	    my $string = $3;
+
+            print OUTPUT;
+
+	    my $non_translated_line = $_;
+
+            for my $lang (sort keys %po_files_by_lang) {
+		my $translation = $translations{$lang, $string};
+                next if !$translation;
+
+                $_ = $non_translated_line;
+		s/(\w+)=.*/[$lang]$1=$translation/;
+                print OUTPUT;
+            }
+	} else {
+            print OUTPUT;
+        }
+    }
+
+    close OUTPUT;
+    close INPUT;
+}
+
+sub desktop_merge_translations
+{
+    open INPUT, "<${FILE}" or die;
+    open OUTPUT, ">${OUTFILE}" or die;
+
+    while (<INPUT>) {
+        if (s/^(\s*)_(\w+=(.*))/$1$2/)  {
+	    my $string = $3;
+
+            print OUTPUT;
+
+	    my $non_translated_line = $_;
+
+            for my $lang (sort keys %po_files_by_lang) {
+                my $translation = $translations{$lang, $string};
+                next if !$translation;
+
+                $_ = $non_translated_line;
+                s/(\w+)=.*/${1}[$lang]=$translation/;
+                print OUTPUT;
+            }
+	} else {
+            print OUTPUT;
+        }
+    }
+
+    close OUTPUT;
+    close INPUT;
+}
+
+sub schemas_merge_translations
+{
+    my $source;
+
+    {
+       local $/; # slurp mode
+       open INPUT, "<$FILE" or die "can't open $FILE: $!";
+       $source = <INPUT>;
+       close INPUT;
+    }
+
+    open OUTPUT, ">$OUTFILE" or die;
+
+    # FIXME: support attribute translations
+
+    # Empty nodes never need translation, so unmark all of them.
+    # For example, <_foo/> is just replaced by <foo/>.
+    $source =~ s|<\s*_($w+)\s*/>|<$1/>|g;
+
+    # Support for <_foo>blah</_foo> style translations.
+
+    my $regex_start = "^(.*?)([ \t]*)<locale name=\"C\">";
+    my $regex_short = "([ \t\n]*)<short>(.*?)</short>";
+    my $regex_long  = "([ \t\n]*)<long>(.*?)</long>";
+    my $regex_end   = "([ \t\n]*)</locale>";
+
+    while ($source =~ s|$regex_start$regex_short$regex_long$regex_end||s) {
+        print OUTPUT $1;
+
+        my $locale_start_spaces = $2;
+	my $locale_end_spaces = $7;
+	my $short_spaces = $3;
+	my $short_string = $4;
+	my $long_spaces = $5;
+	my $long_string = $6;
+
+	# English first
+
+	print OUTPUT "$locale_start_spaces<locale name=\"C\">";
+	print OUTPUT "$short_spaces<short>$short_string</short>";
+	print OUTPUT "$long_spaces<long>$long_string</long>";
+	print OUTPUT "$locale_end_spaces</locale>";
+
+	$short_string =~ s/\s+/ /g;
+	$short_string =~ s/^ //;
+	$short_string =~ s/ $//;
+	$short_string = entity_decode($short_string);
+
+	$long_string =~ s/\s+/ /g;
+	$long_string =~ s/^ //;
+	$long_string =~ s/ $//;
+	$long_string = entity_decode($long_string);
+
+	for my $lang (sort keys %po_files_by_lang) {
+	    my $short_translation = $translations{$lang, $short_string};
+	    my $long_translation  = $translations{$lang, $long_string};
+
+	    next if (!$short_translation && !$long_translation);
+
+	    print OUTPUT "\n$locale_start_spaces<locale name=\"$lang\">";
+
+	    if ($short_translation)
+	    {
+		$short_translation = entity_encode($short_translation);
+		print OUTPUT "$short_spaces<short>$short_translation</short>";
+	    }
+
+	    if ($long_translation)
+	    {
+		$long_translation = entity_encode($long_translation);
+		print OUTPUT "$long_spaces<long>$long_translation</long>";
+	    }	    
+
+	    print OUTPUT "$locale_end_spaces</locale>";
+        }
+    }
+
+    print OUTPUT $source;
+
+    close OUTPUT;
+}