#!/usr/bin/perl -w 
# -*- Mode: perl; indent-tabs-mode: nil; c-basic-offset: 4  -*-

#
#  The Intltool Message Extractor
#
#  Copyright (C) 2000-2001 Free Software Foundation.
#
#  Intltool is free software; you can redistribute it and/or
#  modify it under the terms of the GNU General Public License as
#  published by the Free Software Foundation; either version 2 of the
#  License, or (at your option) any later version.
#
#  Intltool is distributed in the hope that it will be useful,
#  but WITHOUT ANY WARRANTY; without even the implied warranty of
#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
#  General Public License for more details.
#
#  You should have received a copy of the GNU General Public License
#  along with this program; if not, write to the Free Software
#  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
#
#  As a special exception to the GNU General Public License, if you
#  distribute this file as part of a program that contains a
#  configuration script generated by Autoconf, you may include it under
#  the same distribution terms that you use for the rest of that program.
#
#  Authors: Kenneth Christiansen <kenneth@gnu.org>
#           Darin Adler <darin@bentspoon.com>
#

## Release information
my $PROGRAM      = "intltool-extract";
my $PACKAGE      = "intltool";
my $VERSION      = "0.22";

## Loaded modules
use strict; 
use File::Basename;
use Getopt::Long;

## Scalars used by the option stuff
my $TYPE_ARG	= "0";
my $LOCAL_ARG	= "0";
my $HELP_ARG 	= "0";
my $VERSION_ARG = "0";
my $UPDATE_ARG  = "0";
my $QUIET_ARG   = "0";

my $FILE;
my $OUTFILE;

my $gettext_type = "";
my $input;
my %messages = ();

## Use this instead of \w for XML files to handle more possible characters.
my $w = "[-A-Za-z0-9._:]";

## Always print first
$| = 1;

## Handle options
GetOptions (
	    "type=s"     => \$TYPE_ARG,
            "local|l"    => \$LOCAL_ARG,
            "help|h"     => \$HELP_ARG,
            "version|v"  => \$VERSION_ARG,
            "update"     => \$UPDATE_ARG,
	    "quiet|q"    => \$QUIET_ARG,
            ) or &error;

&split_on_argument;


## Check for options. 
## This section will check for the different options.

sub split_on_argument {

    if ($VERSION_ARG) {
        &version;

    } elsif ($HELP_ARG) {
	&help;
        
    } elsif ($LOCAL_ARG) {
        &place_local;
        &extract;

    } elsif ($UPDATE_ARG) {
	&place_normal;
	&extract;

    } elsif (@ARGV > 0) {
	&place_normal;
	&message;
	&extract;

    } else {
	&help;

    }  
}    

sub place_normal {
    $FILE	 = $ARGV[0];
    $OUTFILE     = "$FILE.h";
}   

sub place_local {
    $OUTFILE     = fileparse($FILE, ());
    if (!-e "tmp/") { 
        system("mkdir tmp/"); 
    }
    $OUTFILE     = "./tmp/$OUTFILE.h"
}

sub determine_type {
   if ($TYPE_ARG =~ /^gettext\/(.*)/) {
	$gettext_type=$1
   }
}

## Sub for printing release information
sub version{
    print "${PROGRAM} (${PACKAGE}) $VERSION\n";
    print "Copyright (C) 2000 Free Software Foundation, Inc.\n";
    print "Written by Kenneth Christiansen, 2000.\n\n";
    print "This is free software; see the source for copying conditions. There is NO\n";
    print "warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.\n";
    exit;
}

## Sub for printing usage information
sub help{
    print "Usage: ${PROGRAM} [FILENAME] [OPTIONS] ...\n";
    print "Generates a header file from an xml source file.\n\nGrabs all strings ";
    print "between <_translatable_node> and it's end tag,\nwhere tag are all allowed ";
    print "xml tags. Read the docs for more info.\n\n"; 
    print "  -v, --version                shows the version\n";
    print "  -h, --help                   shows this help page\n";
    print "  -q, --quiet                  quiet mode\n";
    print "\nReport bugs to <kenneth\@gnu.org>.\n";
    exit;
}

## Sub for printing error messages
sub error{
    print "Try `${PROGRAM} --help' for more information.\n";
    exit;
}

sub message {
    print "Generating C format header file for translation.\n";
}

sub extract {
    &determine_type;

    &convert ($FILE);

    open OUT, ">$OUTFILE";
    &msg_write;
    close OUT;

    print "Wrote $OUTFILE\n" unless $QUIET_ARG;
}

sub convert($) {

    ## Reading the file
    {
	local (*IN);
	local $/; #slurp mode
	open (IN, "<$FILE") || die "can't open $FILE: $!";
	$input = <IN>;
    }

    &type_ini if $gettext_type eq "ini";
    &type_keys if $gettext_type eq "keys";
    &type_xml if $gettext_type eq "xml";
    &type_glade if $gettext_type eq "glade";
    &type_scheme if $gettext_type eq "scheme";
    &type_schemas  if $gettext_type eq "schemas";
}

sub entity_decode_minimal
{
    local ($_) = @_;

    s/&apos;/'/g; # '
    s/&quot;/"/g; # "
    s/&amp;/&/g;

    return $_;
}

sub entity_decode
{
    local ($_) = @_;

    s/&apos;/'/g; # '
    s/&quot;/"/g; # "
    s/&amp;/&/g;
    s/&lt;/</g;
    s/&gt;/>/g;

    return $_;
}

sub escape_char
{
    return '\"' if $_ eq '"';
    return '\n' if $_ eq "\n";
    return '\\' if $_ eq '\\';

    return $_;
}

sub escape
{
    my ($string) = @_;
    return join "", map &escape_char, split //, $string;
}

sub type_ini {
    ### For generic translatable desktop files ###
    while ($input =~ /^_.*=(.*)$/mg) {
        $messages{$1} = [];
    }
}

sub type_keys {
    ### For generic translatable mime/keys files ###
    while ($input =~ /^\s*_\w+=(.*)$/mg) {
        $messages{$1} = [];
    }
}

sub type_xml {
    ### For generic translatable XML files ###
        
    while ($input =~ /\s_$w+=\"([^"]+)\"/sg) { # "
        $messages{entity_decode_minimal($1)} = [];
    }

    while ($input =~ /<_($w+)>(.+?)<\/_\1>/sg) {
        $_ = $2;
        s/\s+/ /g;
	s/^ //;
	s/ $//;
        $messages{entity_decode_minimal($_)} = [];
    }
}

sub type_schemas {
    ### For schemas XML files ###
         
    # FIXME: We should handle escaped < (less than)
    while ($input =~ /<(short|long)>([^<]+)<\/\1>/sg) {
        $_ = $2;
        s/\s+/ /g;
	s/^ //;
	s/ $//;
        $messages{entity_decode_minimal($_)} = [];
    }
}

sub type_glade {
    ### For translatable Glade XML files ###

    my $tags = "label|title|text|format|copyright|comments|preview_text|tooltip|message";

    while ($input =~ /<($tags)>([^<]+)<\/($tags)>/sg) {
	# Glade sometimes uses tags that normally mark translatable things for
        # little bits of non-translatable content. We work around this by not
        # translating strings that only includes something like label4 or window1.
	$messages{entity_decode($2)} = [] unless $2 =~ /^(window|label)[0-9]+$/;
    }
    
    while ($input =~ /<items>(..[^<]*)<\/items>/sg) {
	for my $item (split (/\n/, $1)) {
	    $messages{entity_decode($item)} = [];
	}
    }

    ## handle new glade files
    while ($input =~ /<(property|atkproperty)\s+[^>]*translatable\s*=\s*"yes"[^>]*>([^<]+)<\/\1>/sg) {
	$messages{entity_decode($2)} = [] unless $2 =~ /^(window|label)[0-9]+$/;
    }
    while ($input =~ /<atkaction\s+action_name="([^>]*)"\s+description="([^>]+)"\/>/sg) {
        $messages{entity_decode_minimal($2)} = [];
    }
}

sub type_scheme {
    while ($input =~ /_\(?"((?:[^"\\]+|\\.)*)"\)?/sg) {
	$messages{$1} = [];
    }
}

sub msg_write {
    for my $message (sort keys %messages) { 
   	print OUT "/* xgettext:no-c-format */\n" if $message =~ /%/;
        
    	my @lines = split (/\n/, $message);
    	for (my $n = 0; $n < @lines; $n++) {
            if ($n == 0) { 
 		print OUT "char *s = N_(\""; 
            } else {  
 		print OUT "             \""; 
	    }

            print OUT escape($lines[$n]);

            if ($n < @lines - 1) { 
		print OUT "\\n\"\n"; 
 	    } else { 
		print OUT "\");\n";  
	    }
        }
    }
}