diff options
author | Pablo Saratxaga <pablo@mandriva.com> | 2001-02-14 17:22:34 +0000 |
---|---|---|
committer | Pablo Saratxaga <pablo@mandriva.com> | 2001-02-14 17:22:34 +0000 |
commit | 217226efe1f63f3b9383e22a6c2616a118d1e6de (patch) | |
tree | d3e026a2307be35f057d5fbc398df27a262c1590 /po/pl_create_placeholder | |
parent | 2a2de4e6275563b1d974f695ef87fa7748df6520 (diff) | |
download | urpmi-217226efe1f63f3b9383e22a6c2616a118d1e6de.tar urpmi-217226efe1f63f3b9383e22a6c2616a118d1e6de.tar.gz urpmi-217226efe1f63f3b9383e22a6c2616a118d1e6de.tar.bz2 urpmi-217226efe1f63f3b9383e22a6c2616a118d1e6de.tar.xz urpmi-217226efe1f63f3b9383e22a6c2616a118d1e6de.zip |
i18n improvements
Diffstat (limited to 'po/pl_create_placeholder')
-rwxr-xr-x | po/pl_create_placeholder | 508 |
1 files changed, 508 insertions, 0 deletions
diff --git a/po/pl_create_placeholder b/po/pl_create_placeholder new file mode 100755 index 00000000..3a34cbdb --- /dev/null +++ b/po/pl_create_placeholder @@ -0,0 +1,508 @@ +#!/usr/bin/perl + +# modified xplgettext from perl-gettext (pablo) + +# this program extracts internationalizable strings from a perl program. + +# forward declaration of subroutines. +sub unescapeDQString; +sub replaceString($$$$); +sub doWarn($$); +sub displayHelp(); +sub doNotify($); + + +# we do the option handling first. +use Getopt::Long; +Getopt::Long::config("no_ignore_case", + "no_auto_abbrev", + "require_order", + "bundling"); +# since get options doesn't seem to handle '-' as option well, +# we'll preprocess here +foreach $i (0 ... $#ARGV) +{ + if($opt eq '-') + { + $ARGV[$i] = '__stdin__'; + } +} + +if(!GetOptions(\%opts, +# 'default-domain|d=s', +# 'directory|D=s', +# 'no-escape|e', +# 'escape|E', +# 'files-from=s', +# 'sort-by-file|F', +# 'help|h', +# 'indent|i', +# 'join-existing|j', +# 'keyword|k:s@', +# 'string-limit|l=i', +# 'msgstr-prefix|m:s', +# 'msgstr-suffix|M:s', +# 'no-location', +# 'add-location|n', +# 'omit-header', +# 'sort-output|s', +# 'strict', + 'verbose|v', +# 'version|V', +# 'exclude-file|x=s' + ) || defined($opts{"help"})) +{ + displayHelp; +} + +# check if we have any file names to deal with. +if($#ARGV == -1) +{ + doWarn 1, 'File name not specified. Standard in is used instead.'; + push @ARGV, '__stdin__'; +} + + +# keeps track of the state of the parser. basically means what we are +# looking for next. +$state = 'func_head'; + +# initialize the text temporarry strage. +$text = ""; + +# repeat for all the lines. +for($i = 0; $i <= $#ARGV; $i++) +{ + # this keeps track of the line number. + $linenum = 0; + + # open the file. we take '__stdin__' as standard in. + $ARGV[$i] = '-' if $ARGV[$i] eq '__stdin__'; + if(!open(IN, "< $ARGV[$i]")) + { + # failed to open the file. + doWarn 0, "Failed to open input file $ARGV[$i]."; + next; + } + doNotify "File $ARGV[$i] opened."; + + # keep track of the files successfully opened. + push @files, $ARGV[$i]; + + while(<IN>) + { + $linenum++; + + # discard the comment lines. + if(/^\s*\#/) + { + next; + } + + # our assumption is that 'I_(' won't appear anywhere other than where + # internationalizable text is being marked. + chop; + $line = $_; + while ($line) + { + if($state eq 'func_head') + { + # this means we are looking for the beginning of funciton call. + #if($line =~ s/.*?I\_\(//) + if($line =~ s/.*?\_\(//) + { + $state = 'quote_begin'; + next; + } + + # nothing interesting in this line. + last; + } + + elsif($state eq 'quote_begin') + { + # we are looking for left quote. there can be two of them. + if($line =~ s/^\s*([\"\'])//) + { + # this has to happen. check what kind of quoting we have. + $start_line = $linenum; + if($1 eq '"') + { + $state = 'dq_text'; + } + else + { + $state = 'sq_text'; + } + } + else + { + # otherwise, this is something we can't handle. + doWarn 1, "Unrecognizable expression within I_" . + " function call. (line: $linenum)"; + $state = 'dynamic_text'; + } + + # lines can't end in this state. + next; + } + + elsif($state eq 'dq_text') + { + # we are only interested in another double quote which is not + # preceded by a back slash. + if($line =~ s/(.*?)\"//) + { + # check if we have a back slash preceding it. + $t = $1; + if($t =~ /\\$/) + { + # the double quote was within the string still. + $text .= unescapeDQString($t + '"'); + } + else + { + # end of the string. + $text .= unescapeDQString($t); + $state = 'end_text'; + } + next; + } + + # all this line is the string. + $text .= unescapeDQString($line . '\n'); + last; + } + + elsif($state eq 'sq_text') + { + # we are only interested in another single quote which is not + # preceded by a back slash. + if($line =~ s/(.*?)\'//) + { + # check if we have a back slash preceding it. + $t = $1; + if($t =~ s/\\$//) + { + # the single quote was still in the string. + $text .= unescapeDQString($t + "'"); + } + else + { + # end of the string. + $text .= unescapeDQString($t); + $state = 'end_text'; + } + next; + } + + # all this line is the string. + $text .= unescapeDQStrig($line . '\n'); + last; + } + + elsif($state eq 'dynamic_text') + { + # we can't really handle this situation, so we'll look for + # a close pharenthy to return to the normal state. + if($line =~ s/.*?\)//) + { + # we can go back to the normal state. + $state = 'func_head'; + next; + } + + # we have to find the close pharenthy to go + # back to the normal state. + last; + } + + elsif($state eq 'end_text') + { + # we allow only period to appear between strings. + if($line =~ s/^\s*([\.\)])//) + { + # check what we've got. + if($1 eq '.') + { + # another string should be comming. + $state = 'quote_begin'; + } + else + { + # we ended the function call. + $state = 'func_head'; + + # check if the string is a valid one. + if($text eq "") + { + # we don't accept null strings. + doWarn 1, "Null string appeared. (line: $linenum)"; + } + else + { + # check if this string has appeared before. + if(exists($msgs{$text})) + { + # it does exit. + $msgs{$text} .= " $ARGV[$i]:$start_line"; + } + else + { + # first time. + $msgs{$text} = "$ARGV[$i]:$start_line"; + } + $text = ""; + } + } + next; + } + else + { + # something weird follows the text. + # in this case, we ignore the string processed too. + doWarn 1, 'Unrecognizable expression within I_' . + " function call. (line: $linenum)"; + $text = ""; + $state = 'dynamic_text'; + next; + } + } + + else + { + # this should not happen. + doWarn 2, "Unknown state $state."; + last; + } + + # end of while loop for state processing. + } + # end of while loop for input lines. + } + # end of for loop for input files. +} + + +# we should have all the data we need now. +# output the message file. +if(!open(OUT, ">&STDOUT")) +{ + doWarn 0, 'Failed to open file placeholder.h.'; + exit(0); +} + +# go through all the strings gathered. +foreach $msgid (keys(%msgs)) +{ + # we find out if the text has to be split into lines. + $#lines = - 1; + while($msgid =~ /(.*?\\n)/gc) + { + push @lines, $1; + } + if(pos($msgid) < length($msgid)) + { + # we have another line. + push @lines, substr($msgid, pos($msgid)); + } + + # check how many lines we have. + if($#lines == 0) + { +# only multiline aren't found by xgettext + # one line format. + #print OUT "N_(\"$lines[0]\"),\n"; + } + else + { + # multi-line format. + print OUT "N_(\"\"\n"; + foreach $line (@lines) + { + print OUT "\"$line\"\n" + } + print OUT "\"\"),\n"; + } +} + + +# successfully completed the mission. +exit 0; + + +###################################################################### +# helper funciton section +###################################################################### + +# this function has to return a value, so it doesn't deserve +# the statement style call, and therefore doesn't deserve prototype. +sub unescapeDQString +{ + my $str = shift; + + # check all the back slashes. + while($str =~ /\\/g) + { + # we found a back slash. let's check what the following + # character is. + my $char = substr($str, pos($str)); + + if($char eq '') + { + # since we assume one call of this function is for one + # complete string (term in string expression), we + # assume we don't have to worry about escape sequences + # span multiple calls. + + # it was the last character in the string. this shouldn't + # happen, but we respect the back slash. + doWarn 2, "Back slash found at the end of string. (line: $lnenum)"; + $str .= '\\'; + last; + } + + elsif($char =~ /^([tn]|[0-8][0-8][0-8])/) + { + # valid sequence. no problem. + next; + } + + elsif($char =~ /^[rfb]/) + { + # we warn, and use them as is. + doWarn 1, "International string sholdn't contain \\$&."; + next; + } + + elsif($char =~ /^[ae]/) + { + # we warn, and convert. + doWarn 1, "International string shouldn't contain \\$&."; + replaceString \$str, $& eq 'a' ? '007' : '033', 0, 1; + next; + } + + elsif($char =~ /^x([0-9a-fA-F][0-9a-fA-F])/) + { + # here, we do the conversion silently. + replaceString \$str, sprintf("%03o", hex($1)), 0, 3; + next; + } + + elsif($char =~ /^c(.)/) + { + # we warn, and convert. + doWarn 1, "International string shouldn't contain \\$&."; + my $ctrl = ord($1) - 64; + if($ctrl < 255 || $ctrl < 0) + { + doWarn 1, "Unrecognizable control sequence \\$&."; + next; + } + replaceString \$str, sprintf("%03o", $ctrl), 0, 2; + next; + } + + elsif($char =~ /^[luLUEQ]/) + { + # perl special escape sequences. + # we'll let them keep the slashes. + replaceString \$str, '\\', 0, 0; + next; + } + + else + { + # in all other cases, the slash needs to be discarded. + replaceString \$str, '', -1, 1; + next; + } + # end of while loop for slashes. + } + + # return value is the converted string. + $str; +} + + +# this one replaces a part of string in the specified position. +# it also sets the search position to the end of the replaced substring. +# the arguments are +# - reference of the target string +# - replacement text +# - starting position of the substring replaced (relative to pos) +# - length of the substring replaced +sub replaceString($$$$) +{ + my $str_ref = shift; + my $replace_str = shift; + my $del_start = shift; + my $num_del = shift; + + my $p = pos($$str_ref); + $$str_ref = substr($$str_ref, 0, $p + $del_start) . + $replace_str . + substr($$str_ref, $p + $del_start + $num_del); + + # adjust the search position. + pos($$str_ref) = $p + $del_start + length($replace_str); +} + + +# sends warning to standard error. the first argument is for the +# warning level: +# 0: critical - always show +# 1: warning - show if verbose +# 2: bug - shouldn't happen +# the second argument is the message to be displayed. +sub doWarn($$) +{ + my $level = shift; + my $message = shift; + + if($level == 0) + { + # critical error. + warn "ERROR: " . $message . "\n"; + } + + elsif($level == 1) + { + # warning. + warn "WARNING: " . $message . "\n" if exists($opts{"verbose"}); + } + + elsif($level == 2) + { + # programming error (of plxgettext). + warn "BUG: " . $message . "\n"; + } + + else + { + # shouldn't happen. let's do a recursive call. + doWarn 2, 'Invalid warning level specified.'; + } +} + + +# tell user what we are doing. +sub doNotify($) +{ + my $msg = shift; + + if(!defined($opts{"verbose"})) + { + # nothing we can do. + return; + } + + print $msg . "\n"; +} + + +# show the help message. +sub displayHelp() +{ +} |