#!/usr/bin/perl

# modified xplgettext from perl-gettext (pablo)

# this program extracts internationalizable strings from a perl program.

# forward declaration of subroutines.
sub unescapeDQString;
sub replaceString($$$$);
sub doWarn($$);
sub displayHelp();
sub doNotify($);


# we do the option handling first.
use Getopt::Long;
Getopt::Long::config("no_ignore_case", 
                     "no_auto_abbrev", 
                     "require_order",
                     "bundling");
# since get options doesn't seem to handle '-' as option well,
# we'll preprocess here
foreach $i (0 ... $#ARGV)
{
   if($opt eq '-')
   {
      $ARGV[$i] = '__stdin__';
   }
}

if(!GetOptions(\%opts,
#           'default-domain|d=s',
#           'directory|D=s',
#           'no-escape|e',
#           'escape|E',
#           'files-from=s',
#           'sort-by-file|F',
#           'help|h',
#           'indent|i',
#           'join-existing|j',
#           'keyword|k:s@',
#           'string-limit|l=i',
#           'msgstr-prefix|m:s',
#           'msgstr-suffix|M:s',
#           'no-location',
#           'add-location|n',
#           'omit-header',
#           'sort-output|s',
#           'strict',
           'verbose|v',
#           'version|V',
#           'exclude-file|x=s'
           ) || defined($opts{"help"}))
{
   displayHelp;
}

# check if we have any file names to deal with.
if($#ARGV == -1)
{
   doWarn 1, 'File name not specified.  Standard in is used instead.';
   push @ARGV, '__stdin__';
}


# keeps track of the state of the parser.  basically means what we are 
# looking for next.
$state = 'func_head';

# initialize the text temporarry strage.
$text = "";

# repeat for all the lines.
for($i = 0; $i <= $#ARGV; $i++)
{
   # this keeps track of the line number.
   $linenum = 0;

   # open the file.  we take '__stdin__' as standard in.
   $ARGV[$i] = '-' if $ARGV[$i] eq '__stdin__';
   if(!open(IN, "< $ARGV[$i]"))
   {
      # failed to open the file.
      doWarn 0, "Failed to open input file $ARGV[$i].";
      next;
   }
   doNotify "File $ARGV[$i] opened.";

   # keep track of the files successfully opened.
   push @files, $ARGV[$i];

   while(<IN>)
   {
      $linenum++;
   
      # discard the comment lines.
      if(/^\s*\#/)
      {
         next;
      }
   
      # our assumption is that 'I_(' won't appear anywhere other than where 
      # internationalizable text is being marked.
      chop;
      $line = $_;
      while ($line)
      {
         if($state eq 'func_head')
         {
            # this means we are looking for the beginning of funciton call.
            #if($line =~ s/.*?I\_\(//)
            if($line =~ s/.*?\_\(//)
            {
               $state = 'quote_begin';
               next;
            }

            # nothing interesting in this line.
            last;
         }
   
         elsif($state eq 'quote_begin')
         {
            # we are looking for left quote.  there can be two of them.
            if($line =~ s/^\s*([\"\'])//)
            {
               # this has to happen.  check what kind of quoting we have.
               $start_line = $linenum;
               if($1 eq '"')
               {
                  $state = 'dq_text';
               }
               else
               {
                  $state = 'sq_text';
               }
            }
            else
            {
               # otherwise, this is something we can't handle.
               doWarn 1, "Unrecognizable expression within I_" .
                            " function call. (line: $linenum)";
               $state = 'dynamic_text';
            }
   
            # lines can't end in this state.
            next;
         }
   
         elsif($state eq 'dq_text')
         {
            # we are only interested in another double quote which is not
            # preceded by a back slash.
            if($line =~ s/(.*?)\"//)
            {
               # check if we have a back slash preceding it.
               $t = $1;
               if($t =~ /\\$/)
               {
                  # the double quote was within the string still.
                  $text .= unescapeDQString($t + '"');
               }
               else
               {
                  # end of the string.
                  $text .= unescapeDQString($t);
                  $state = 'end_text';
               }
               next;
            }

            # all this line is the string.
            $text .= unescapeDQString($line . '\n');
            last;
         }
   
         elsif($state eq 'sq_text')
         {
            # we are only interested in another single quote which is not 
            # preceded by a back slash.
            if($line =~ s/(.*?)\'//)
            {
               # check if we have a back slash preceding it.
               $t = $1;
               if($t =~ s/\\$//)
               {
                  # the single quote was still in the string.
                  $text .= unescapeDQString($t + "'");
               }
               else
               {
                  # end of the string.
                  $text .= unescapeDQString($t);
                  $state = 'end_text';
               }
               next;
            }

            # all this line is the string.
            $text .= unescapeDQStrig($line . '\n');
            last;
         }
   
         elsif($state eq 'dynamic_text')
         {
            # we can't really handle this situation, so we'll look for
            # a close pharenthy to return to the normal state.
            if($line =~ s/.*?\)//)
            {
               # we can go back to the normal state.
               $state = 'func_head';
               next;
            }

            # we have to find the close pharenthy to go
            # back to the normal state.
            last;
         }
   
         elsif($state eq 'end_text')
         {
            # we allow only period to appear between strings.
            if($line =~ s/^\s*([\.\)])//)
            {
               # check what we've got.
               if($1 eq '.')
               {
                  # another string should be comming.
                  $state = 'quote_begin';
               }
               else
               {
                  # we ended the function call.
                  $state = 'func_head';
   
                  # check if the string is a valid one.
                  if($text eq "")
                  {
                     # we don't accept null strings.
                     doWarn 1, "Null string appeared. (line: $linenum)";
                  }
                  else
                  {
                     # check if this string has appeared before.
                     if(exists($msgs{$text}))
                     {
                        # it does exit.
                        $msgs{$text} .= " $ARGV[$i]:$start_line";
                     }
                     else
                     {
                        # first time.
                        $msgs{$text} = "$ARGV[$i]:$start_line";
                     }
                     $text = "";
                  }
               }
               next;
            }
            else
            {
               # something weird follows the text.
               # in this case, we ignore the string processed too.
               doWarn 1, 'Unrecognizable expression within I_' .
                            " function call. (line: $linenum)";
               $text = "";
               $state = 'dynamic_text';
               next;
            }
         }
   
         else
         {
            # this should not happen.
            doWarn 2, "Unknown state $state.";
            last;
         }

         # end of while loop for state processing.
      }
      # end of while loop for input lines.
   }
   # end of for loop for input files.
} 


# we should have all the data we need now.
# output the message file.
if(!open(OUT, ">&STDOUT"))
{
   doWarn 0, 'Failed to open file placeholder.h.';
   exit(0);
}

# go through all the strings gathered.
foreach $msgid (keys(%msgs))
{
   # we find out if the text has to be split into lines.
   $#lines = - 1;
   while($msgid =~ /(.*?\\n)/gc)
   {
      push @lines, $1;
   }
   if(pos($msgid) < length($msgid))
   {
      # we have another line.
      push @lines, substr($msgid, pos($msgid));
   }

   # check how many lines we have.
   if($#lines == 0)
   {
# only multiline aren't found by xgettext
      # one line format.
      #print OUT "N_(\"$lines[0]\"),\n";
   }
   else
   {
      # multi-line format.
      print OUT "N_(\"\"\n";
      foreach $line (@lines)
      {
         print OUT "\"$line\"\n"
      }
      print OUT "\"\"),\n";
   }
}


# successfully completed the mission.
exit 0;


######################################################################
# helper funciton section
######################################################################

# this function has to return a value, so it doesn't deserve
# the statement style call, and therefore doesn't deserve prototype.
sub unescapeDQString
{
   my $str = shift;

   # check all the back slashes.
   while($str =~ /\\/g)
   {
      # we found a back slash.  let's check what the following
      # character is.
      my $char = substr($str, pos($str));

      if($char eq '')
      {
         # since we assume one call of this function is for one
         # complete string (term in string expression), we
         # assume we don't have to worry about escape sequences
         # span multiple calls.
         
         # it was the last character in the string.  this shouldn't
         # happen, but we respect the back slash.
         doWarn 2, "Back slash found at the end of string. (line: $lnenum)";
         $str .= '\\';
         last;
      }

      elsif($char =~ /^([tn]|[0-8][0-8][0-8])/)
      {
         # valid sequence.  no problem.
         next;
      }

      elsif($char =~ /^[rfb]/)
      {
         # we warn, and use them as is.
         doWarn 1, "International string sholdn't contain \\$&.";
         next;
      }

      elsif($char =~ /^[ae]/)
      {
         # we warn, and convert.
         doWarn 1, "International string shouldn't contain \\$&.";
         replaceString \$str, $& eq 'a' ? '007' : '033', 0, 1;
         next;
      }

      elsif($char =~ /^x([0-9a-fA-F][0-9a-fA-F])/)
      {
         # here, we do the conversion silently.
         replaceString \$str, sprintf("%03o", hex($1)), 0, 3;
         next;
      }

      elsif($char =~ /^c(.)/)
      {
         # we warn, and convert.
         doWarn 1, "International string shouldn't contain \\$&.";
         my $ctrl = ord($1) - 64;
         if($ctrl < 255 || $ctrl < 0)
         {
            doWarn 1, "Unrecognizable control sequence \\$&.";
            next;
         }
         replaceString \$str, sprintf("%03o", $ctrl), 0, 2;
         next;
      }

      elsif($char =~ /^[luLUEQ]/)
      {
         # perl special escape sequences.
         # we'll let them keep the slashes.
         replaceString \$str, '\\', 0, 0;
         next;
      }

      else
      {
         # in all other cases, the slash needs to be discarded.
         replaceString \$str, '', -1, 1;
         next;
      }
      # end of while loop for slashes.
   }

   # return value is the converted string.
   $str;
}


# this one replaces a part of string in the specified position.
# it also sets the search position to the end of the replaced substring.
# the arguments are
#     - reference of the target string
#     - replacement text
#     - starting position of the substring replaced (relative to pos)
#     - length of the substring replaced
sub replaceString($$$$)
{
   my $str_ref     = shift;
   my $replace_str = shift;
   my $del_start   = shift;
   my $num_del     = shift;

   my $p = pos($$str_ref);
   $$str_ref = substr($$str_ref, 0, $p + $del_start) . 
              $replace_str .
              substr($$str_ref, $p + $del_start + $num_del);

   # adjust the search position.
   pos($$str_ref) = $p + $del_start + length($replace_str);
}


# sends warning to standard error.  the first argument is for the
# warning level:
#     0: critical - always show
#     1: warning  - show if verbose
#     2: bug      - shouldn't happen
# the second argument is the message to be displayed.
sub doWarn($$)
{
   my $level   = shift;
   my $message = shift;

   if($level == 0)
   {
      # critical error.
      warn "ERROR:  " . $message . "\n";
   }

   elsif($level == 1)
   {
      # warning. 
      warn "WARNING:  " . $message . "\n" if exists($opts{"verbose"});
   }

   elsif($level == 2)
   {
      # programming error (of plxgettext).
      warn "BUG:  " . $message . "\n";
   }

   else
   {
      # shouldn't happen.  let's do a recursive call.
      doWarn 2, 'Invalid warning level specified.';
   }
}


# tell user what we are doing.
sub doNotify($)
{
   my $msg = shift;

   if(!defined($opts{"verbose"}))
   {
      # nothing we can do.
      return;
   }

   print $msg . "\n";
}


# show the help message.
sub displayHelp()
{
}