summaryrefslogtreecommitdiffstats
path: root/data/intltool-merge
diff options
context:
space:
mode:
Diffstat (limited to 'data/intltool-merge')
-rwxr-xr-xdata/intltool-merge657
1 files changed, 657 insertions, 0 deletions
diff --git a/data/intltool-merge b/data/intltool-merge
new file mode 100755
index 00000000..06637146
--- /dev/null
+++ b/data/intltool-merge
@@ -0,0 +1,657 @@
+#!/usr/bin/perl -w
+
+#
+# The Intltool Message Merger
+#
+# Copyright (C) 2000, 2002 Free Software Foundation.
+# Copyright (C) 2000, 2001 Eazel, Inc
+#
+# Intltool is free software; you can redistribute it and/or
+# modify it under the terms of the GNU General Public License
+# version 2 published by the Free Software Foundation.
+#
+# Intltool is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+# General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+#
+# As a special exception to the GNU General Public License, if you
+# distribute this file as part of a program that contains a
+# configuration script generated by Autoconf, you may include it under
+# the same distribution terms that you use for the rest of that program.
+#
+# Authors: Maciej Stachowiak <mjs@noisehavoc.org>
+# Kenneth Christiansen <kenneth@gnu.org>
+# Darin Adler <darin@bentspoon.com>
+#
+# Proper XML UTF-8'ification written by Cyrille Chepelov <chepelov@calixo.net>
+#
+
+## Release information
+my $PROGRAM = "intltool-merge";
+my $PACKAGE = "intltool";
+my $VERSION = "0.22";
+
+## Loaded modules
+use strict;
+use Getopt::Long;
+
+## Scalars used by the option stuff
+my $HELP_ARG = 0;
+my $VERSION_ARG = 0;
+my $BA_STYLE_ARG = 0;
+my $XML_STYLE_ARG = 0;
+my $KEYS_STYLE_ARG = 0;
+my $DESKTOP_STYLE_ARG = 0;
+my $SCHEMAS_STYLE_ARG = 0;
+my $QUIET_ARG = 0;
+my $PASS_THROUGH_ARG = 0;
+my $UTF8_ARG = 0;
+my $cache_file;
+
+## Handle options
+GetOptions
+(
+ "help" => \$HELP_ARG,
+ "version" => \$VERSION_ARG,
+ "quiet|q" => \$QUIET_ARG,
+ "oaf-style|o" => \$BA_STYLE_ARG, ## for compatibility
+ "ba-style|b" => \$BA_STYLE_ARG,
+ "xml-style|x" => \$XML_STYLE_ARG,
+ "keys-style|k" => \$KEYS_STYLE_ARG,
+ "desktop-style|d" => \$DESKTOP_STYLE_ARG,
+ "schemas-style|s" => \$SCHEMAS_STYLE_ARG,
+ "pass-through|p" => \$PASS_THROUGH_ARG,
+ "utf8|u" => \$UTF8_ARG,
+ "cache|c=s" => \$cache_file
+ ) or &error;
+
+my $PO_DIR;
+my $FILE;
+my $OUTFILE;
+
+my %po_files_by_lang = ();
+my %translations = ();
+
+# Use this instead of \w for XML files to handle more possible characters.
+my $w = "[-A-Za-z0-9._:]";
+
+# XML quoted string contents
+my $q = "[^\\\"]*";
+
+## Check for options.
+
+if ($VERSION_ARG) {
+ &print_version;
+} elsif ($HELP_ARG) {
+ &print_help;
+} elsif ($BA_STYLE_ARG && @ARGV > 2) {
+ &preparation;
+ &print_message;
+ &ba_merge_translations;
+ &finalize;
+} elsif ($XML_STYLE_ARG && @ARGV > 2) {
+ &utf8_sanity_check;
+ &preparation;
+ &print_message;
+ &xml_merge_translations;
+ &finalize;
+} elsif ($KEYS_STYLE_ARG && @ARGV > 2) {
+ &utf8_sanity_check;
+ &preparation;
+ &print_message;
+ &keys_merge_translations;
+ &finalize;
+} elsif ($DESKTOP_STYLE_ARG && @ARGV > 2) {
+ &preparation;
+ &print_message;
+ &desktop_merge_translations;
+ &finalize;
+} elsif ($SCHEMAS_STYLE_ARG && @ARGV > 2) {
+ &preparation;
+ &print_message;
+ &schemas_merge_translations;
+ &finalize;
+} else {
+ &print_help;
+}
+
+exit;
+
+## Sub for printing release information
+sub print_version
+{
+ print "${PROGRAM} (${PACKAGE}) ${VERSION}\n";
+ print "Written by Maciej Stachowiak, Darin Adler and Kenneth Christiansen.\n\n";
+ print "Copyright (C) 2000-2002 Free Software Foundation, Inc.\n";
+ print "Copyright (C) 2000-2001 Eazel, Inc.\n";
+ print "This is free software; see the source for copying conditions. There is NO\n";
+ print "warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.\n";
+ exit;
+}
+
+## Sub for printing usage information
+sub print_help
+{
+ print "Usage: ${PROGRAM} [OPTIONS] PO_DIRECTORY FILENAME OUTPUT_FILE\n";
+ print "Generates an output file that includes translated versions of some attributes,\n";
+ print "from an untranslated source and a po directory that includes translations.\n\n";
+ print " -b, --ba-style includes translations in the bonobo-activation style\n";
+ print " -d, --desktop-style includes translations in the desktop style\n";
+ print " -k, --keys-style includes translations in the keys style\n";
+ print " -s, --schemas-style includes translations in the schemas style\n";
+ print " -x, --xml-style includes translations in the standard xml style\n";
+ print " -u, --utf8 convert all strings to UTF-8 before merging\n";
+ print " -p, --pass-through use strings as found in .po files, without\n";
+ print " conversion (STRONGLY unrecommended with -x)\n";
+ print " -q, --quiet suppress most messages\n";
+ print " --help display this help and exit\n";
+ print " --version output version information and exit\n";
+ print "\nReport bugs to bugzilla.gnome.org, module intltool, or contact us through \n";
+ print "<xml-i18n-tools-list\@gnome.org>.\n";
+ exit;
+}
+
+
+## Sub for printing error messages
+sub print_error
+{
+ print "Try `${PROGRAM} --help' for more information.\n";
+ exit;
+}
+
+
+sub print_message
+{
+ print "Merging translations into $OUTFILE.\n" unless $QUIET_ARG;
+}
+
+
+sub preparation
+{
+ $PO_DIR = $ARGV[0];
+ $FILE = $ARGV[1];
+ $OUTFILE = $ARGV[2];
+
+ &gather_po_files;
+ &get_translation_database;
+}
+
+# General-purpose code for looking up translations in .po files
+
+sub po_file2lang
+{
+ my ($tmp) = @_;
+ $tmp =~ s/^.*\/(.*)\.po$/$1/;
+ return $tmp;
+}
+
+sub gather_po_files
+{
+ for my $po_file (glob "$PO_DIR/*.po") {
+ $po_files_by_lang{po_file2lang($po_file)} = $po_file;
+ }
+}
+
+sub get_po_encoding
+{
+ my ($in_po_file) = @_;
+ my $encoding = "";
+
+ open IN_PO_FILE, $in_po_file or die;
+ while (<IN_PO_FILE>) {
+ ## example: "Content-Type: text/plain; charset=ISO-8859-1\n"
+ if (/Content-Type\:.*charset=([-a-zA-Z0-9]+)\\n/) {
+ $encoding = $1;
+ last;
+ }
+ }
+ close IN_PO_FILE;
+
+ if (!$encoding) {
+ print "Warning: no encoding found in $in_po_file. Assuming ISO-8859-1\n";
+ $encoding = "ISO-8859-1";
+ }
+ return $encoding
+}
+
+sub utf8_sanity_check
+{
+ if (!$UTF8_ARG) {
+ if (!$PASS_THROUGH_ARG) {
+ $PASS_THROUGH_ARG="1";
+ }
+ }
+}
+
+sub get_translation_database
+{
+ if ($cache_file) {
+ &get_cached_translation_database;
+ } else {
+ &create_translation_database;
+ }
+}
+
+sub get_newest_po_age
+{
+ my $newest_age;
+
+ foreach my $file (values %po_files_by_lang) {
+ my $file_age = -M $file;
+ $newest_age = $file_age if !$newest_age || $file_age < $newest_age;
+ }
+
+ return $newest_age;
+}
+
+sub create_cache
+{
+ print "Generating and caching the translation database\n" unless $QUIET_ARG;
+
+ &create_translation_database;
+
+ open CACHE, ">$cache_file" || die;
+ print CACHE join "\x01", %translations;
+ close CACHE;
+}
+
+sub load_cache
+{
+ print "Found cached translation database\n" unless $QUIET_ARG;
+
+ my $contents;
+ open CACHE, "<$cache_file" || die;
+ {
+ local $/;
+ $contents = <CACHE>;
+ }
+ close CACHE;
+ %translations = split "\x01", $contents;
+}
+
+sub get_cached_translation_database
+{
+ my $cache_file_age = -M $cache_file;
+ if (defined $cache_file_age) {
+ if ($cache_file_age <= &get_newest_po_age) {
+ &load_cache;
+ return;
+ }
+ print "Found too-old cached translation database\n" unless $QUIET_ARG;
+ }
+
+ &create_cache;
+}
+
+sub create_translation_database
+{
+ for my $lang (keys %po_files_by_lang) {
+ my $po_file = $po_files_by_lang{$lang};
+
+ if ($UTF8_ARG) {
+ my $encoding = get_po_encoding ($po_file);
+ if (lc $encoding eq "utf-8") {
+ open PO_FILE, "<$po_file";
+ } else {
+ my $iconv = $ENV{"INTLTOOL_ICONV"} || "iconv";
+ open PO_FILE, "$iconv -f $encoding -t UTF-8 $po_file|";
+ }
+ } else {
+ open PO_FILE, "<$po_file";
+ }
+
+ my $nextfuzzy = 0;
+ my $inmsgid = 0;
+ my $inmsgstr = 0;
+ my $msgid = "";
+ my $msgstr = "";
+ while (<PO_FILE>) {
+ $nextfuzzy = 1 if /^#, fuzzy/;
+ if (/^msgid "((\\.|[^\\])*)"/ ) {
+ $translations{$lang, $msgid} = $msgstr if $inmsgstr && $msgid && $msgstr;
+ $msgid = "";
+ $msgstr = "";
+
+ if ($nextfuzzy) {
+ $inmsgid = 0;
+ } else {
+ $msgid = unescape_po_string($1);
+ $inmsgid = 1;
+ }
+ $inmsgstr = 0;
+ $nextfuzzy = 0;
+ }
+ if (/^msgstr "((\\.|[^\\])*)"/) {
+ $msgstr = unescape_po_string($1);
+ $inmsgstr = 1;
+ $inmsgid = 0;
+ }
+ if (/^"((\\.|[^\\])*)"/) {
+ $msgid .= unescape_po_string($1) if $inmsgid;
+ $msgstr .= unescape_po_string($1) if $inmsgstr;
+ }
+ }
+ $translations{$lang, $msgid} = $msgstr if $inmsgstr && $msgid && $msgstr;
+ }
+}
+
+sub finalize
+{
+}
+
+sub unescape_one_sequence
+{
+ my ($sequence) = @_;
+
+ return "\\" if $sequence eq "\\\\";
+ return "\"" if $sequence eq "\\\"";
+
+ # gettext also handles \n, \t, \b, \r, \f, \v, \a, \xxx (octal),
+ # \xXX (hex) and has a comment saying they want to handle \u and \U.
+
+ return $sequence;
+}
+
+sub unescape_po_string
+{
+ my ($string) = @_;
+
+ $string =~ s/(\\.)/unescape_one_sequence($1)/eg;
+
+ return $string;
+}
+
+sub entity_decode
+{
+ local ($_) = @_;
+
+ s/&apos;/'/g; # '
+ s/&quot;/"/g; # "
+ s/&amp;/&/g;
+
+ return $_;
+}
+
+sub entity_encode
+{
+ my ($pre_encoded) = @_;
+
+ my @list_of_chars = unpack ('C*', $pre_encoded);
+
+ if ($PASS_THROUGH_ARG) {
+ return join ('', map (&entity_encode_int_even_high_bit, @list_of_chars));
+ } else {
+ return join ('', map (&entity_encode_int_minimalist, @list_of_chars));
+ }
+}
+
+sub entity_encode_int_minimalist
+{
+ return "&quot;" if $_ == 34;
+ return "&amp;" if $_ == 38;
+ return "&apos;" if $_ == 39;
+ return chr $_;
+}
+
+sub entity_encode_int_even_high_bit
+{
+ if ($_ > 127 || $_ == 34 || $_ == 38 || $_ == 39) {
+ # the ($_ > 127) should probably be removed
+ return "&#" . $_ . ";";
+ } else {
+ return chr $_;
+ }
+}
+
+sub entity_encoded_translation
+{
+ my ($lang, $string) = @_;
+
+ my $translation = $translations{$lang, $string};
+ return $string if !$translation;
+ return entity_encode ($translation);
+}
+
+## XML (bonobo-activation specific) merge code
+
+sub ba_merge_translations
+{
+ my $source;
+
+ {
+ local $/; # slurp mode
+ open INPUT, "<$FILE" or die "can't open $FILE: $!";
+ $source = <INPUT>;
+ close INPUT;
+ }
+
+ open OUTPUT, ">$OUTFILE" or die "can't open $OUTFILE: $!";
+
+ while ($source =~ s|^(.*?)([ \t]*<\s*$w+\s+($w+\s*=\s*"$q"\s*)+/?>)([ \t]*\n)?||s) {
+ print OUTPUT $1;
+
+ my $node = $2 . "\n";
+
+ my @strings = ();
+ $_ = $node;
+ while (s/(\s)_($w+\s*=\s*"($q)")/$1$2/s) {
+ push @strings, entity_decode($3);
+ }
+ print OUTPUT;
+
+ my %langs;
+ for my $string (@strings) {
+ for my $lang (keys %po_files_by_lang) {
+ $langs{$lang} = 1 if $translations{$lang, $string};
+ }
+ }
+
+ for my $lang (sort keys %langs) {
+ $_ = $node;
+ s/(\sname\s*=\s*)"($q)"/$1"$2-$lang"/s;
+ s/(\s)_($w+\s*=\s*")($q)"/$1 . $2 . entity_encoded_translation($lang, $3) . '"'/seg;
+ print OUTPUT;
+ }
+ }
+
+ print OUTPUT $source;
+
+ close OUTPUT;
+}
+
+
+## XML (non-bonobo-activation) merge code
+
+sub xml_merge_translations
+{
+ my $source;
+
+ {
+ local $/; # slurp mode
+ open INPUT, "<$FILE" or die "can't open $FILE: $!";
+ $source = <INPUT>;
+ close INPUT;
+ }
+
+ open OUTPUT, ">$OUTFILE" or die;
+
+ # FIXME: support attribute translations
+
+ # Empty nodes never need translation, so unmark all of them.
+ # For example, <_foo/> is just replaced by <foo/>.
+ $source =~ s|<\s*_($w+)\s*/>|<$1/>|g;
+
+ # Support for <_foo>blah</_foo> style translations.
+ while ($source =~ s|^(.*?)([ \t]*)<\s*_($w+)\s*>(.*?)<\s*/_\3\s*>([ \t]*\n)?||s) {
+ print OUTPUT $1;
+
+ my $spaces = $2;
+ my $tag = $3;
+ my $string = $4;
+
+ print OUTPUT "$spaces<$tag>$string</$tag>\n";
+
+ $string =~ s/\s+/ /g;
+ $string =~ s/^ //;
+ $string =~ s/ $//;
+ $string = entity_decode($string);
+
+ for my $lang (sort keys %po_files_by_lang) {
+ my $translation = $translations{$lang, $string};
+ next if !$translation;
+ $translation = entity_encode($translation);
+ print OUTPUT "$spaces<$tag xml:lang=\"$lang\">$translation</$tag>\n";
+ }
+ }
+
+ print OUTPUT $source;
+
+ close OUTPUT;
+}
+
+sub keys_merge_translations
+{
+ open INPUT, "<${FILE}" or die;
+ open OUTPUT, ">${OUTFILE}" or die;
+
+ while (<INPUT>) {
+ if (s/^(\s*)_(\w+=(.*))/$1$2/) {
+ my $string = $3;
+
+ print OUTPUT;
+
+ my $non_translated_line = $_;
+
+ for my $lang (sort keys %po_files_by_lang) {
+ my $translation = $translations{$lang, $string};
+ next if !$translation;
+
+ $_ = $non_translated_line;
+ s/(\w+)=.*/[$lang]$1=$translation/;
+ print OUTPUT;
+ }
+ } else {
+ print OUTPUT;
+ }
+ }
+
+ close OUTPUT;
+ close INPUT;
+}
+
+sub desktop_merge_translations
+{
+ open INPUT, "<${FILE}" or die;
+ open OUTPUT, ">${OUTFILE}" or die;
+
+ while (<INPUT>) {
+ if (s/^(\s*)_(\w+=(.*))/$1$2/) {
+ my $string = $3;
+
+ print OUTPUT;
+
+ my $non_translated_line = $_;
+
+ for my $lang (sort keys %po_files_by_lang) {
+ my $translation = $translations{$lang, $string};
+ next if !$translation;
+
+ $_ = $non_translated_line;
+ s/(\w+)=.*/${1}[$lang]=$translation/;
+ print OUTPUT;
+ }
+ } else {
+ print OUTPUT;
+ }
+ }
+
+ close OUTPUT;
+ close INPUT;
+}
+
+sub schemas_merge_translations
+{
+ my $source;
+
+ {
+ local $/; # slurp mode
+ open INPUT, "<$FILE" or die "can't open $FILE: $!";
+ $source = <INPUT>;
+ close INPUT;
+ }
+
+ open OUTPUT, ">$OUTFILE" or die;
+
+ # FIXME: support attribute translations
+
+ # Empty nodes never need translation, so unmark all of them.
+ # For example, <_foo/> is just replaced by <foo/>.
+ $source =~ s|<\s*_($w+)\s*/>|<$1/>|g;
+
+ # Support for <_foo>blah</_foo> style translations.
+
+ my $regex_start = "^(.*?)([ \t]*)<locale name=\"C\">";
+ my $regex_short = "([ \t\n]*)<short>(.*?)</short>";
+ my $regex_long = "([ \t\n]*)<long>(.*?)</long>";
+ my $regex_end = "([ \t\n]*)</locale>";
+
+ while ($source =~ s|$regex_start$regex_short$regex_long$regex_end||s) {
+ print OUTPUT $1;
+
+ my $locale_start_spaces = $2;
+ my $locale_end_spaces = $7;
+ my $short_spaces = $3;
+ my $short_string = $4;
+ my $long_spaces = $5;
+ my $long_string = $6;
+
+ # English first
+
+ print OUTPUT "$locale_start_spaces<locale name=\"C\">";
+ print OUTPUT "$short_spaces<short>$short_string</short>";
+ print OUTPUT "$long_spaces<long>$long_string</long>";
+ print OUTPUT "$locale_end_spaces</locale>";
+
+ $short_string =~ s/\s+/ /g;
+ $short_string =~ s/^ //;
+ $short_string =~ s/ $//;
+ $short_string = entity_decode($short_string);
+
+ $long_string =~ s/\s+/ /g;
+ $long_string =~ s/^ //;
+ $long_string =~ s/ $//;
+ $long_string = entity_decode($long_string);
+
+ for my $lang (sort keys %po_files_by_lang) {
+ my $short_translation = $translations{$lang, $short_string};
+ my $long_translation = $translations{$lang, $long_string};
+
+ next if (!$short_translation && !$long_translation);
+
+ print OUTPUT "\n$locale_start_spaces<locale name=\"$lang\">";
+
+ if ($short_translation)
+ {
+ $short_translation = entity_encode($short_translation);
+ print OUTPUT "$short_spaces<short>$short_translation</short>";
+ }
+
+ if ($long_translation)
+ {
+ $long_translation = entity_encode($long_translation);
+ print OUTPUT "$long_spaces<long>$long_translation</long>";
+ }
+
+ print OUTPUT "$locale_end_spaces</locale>";
+ }
+ }
+
+ print OUTPUT $source;
+
+ close OUTPUT;
+}