diff options
Diffstat (limited to 'po/bin/po2txt')
-rwxr-xr-x | po/bin/po2txt | 120 |
1 files changed, 118 insertions, 2 deletions
diff --git a/po/bin/po2txt b/po/bin/po2txt index f10ee5b..ba29001 100755 --- a/po/bin/po2txt +++ b/po/bin/po2txt @@ -5,23 +5,37 @@ # Note: en.po ist treated specially! use Getopt::Long; +use Encode; + +sub arabic_is_letter; +sub arabic_read_map; +sub arabic_conv; sub read_texts; sub join_msg; +sub fribidi; -$opt_product = "SUSE Linux"; +$opt_product = "openSUSE"; GetOptions( - 'product=s' => \$opt_product + 'product=s' => \$opt_product, + 'verbose|v' => \$opt_arabic_verbose, ); +chomp ($tmp_file = `mktemp /tmp/po2txt.XXXXXXXXXX`); + +arabic_read_map; + for $lang (@ARGV) { $lang = 'en' if $lang eq 'bootloader.pot'; $lang =~ s/\.po$//; read_texts $lang; } +unlink $tmp_file; + + sub read_texts { local $_; @@ -100,6 +114,7 @@ sub read_texts $txt =~ s/\\"/"/g; $txt =~ s/\\\\/\\/g; $txt =~ s/\\n/\n/g; + $txt = fribidi $txt if $lang =~ /^(ar|he)/; print "$txt\x00" } @@ -163,3 +178,104 @@ sub join_msg return $m; } + + +sub fribidi +{ + local $_; + my $m; + + $m = shift; + + open F, ">$tmp_file"; + print F arabic_conv($m); + close F; + + open F, "cat $tmp_file | fribidi --nopad --nobreak |"; + $m = undef; + while(read F, $_, 0x10000) { + $m .= $_; + } + close F; + + return $m; +} + + +sub arabic_conv { + local $_; + my (@c, @m, $i, @attr, @attr_name); + + push @c, 0; + push @c, unpack("V*", encode("utf32le", decode("utf8", $_[0]))); + push @c, 0; + + # isolated: 0, initial: 1, final: 2, medial: 3 + for ($i = 1; $i < @c - 1; $i++) { + next if !arabic_is_letter $c[$i]; + $attr[$i-1] += 2 if arabic_is_letter $c[$i-1]; + $attr[$i-1] += 1 if arabic_is_letter $c[$i+1]; + } + + shift @c; + pop @c; + + @attr_name = ( "isolated", "initial", "final", "medial" ); + + for ($i = 0; $i < @c; $i++) { + $m = $c[$i]; + if(arabic_is_letter $c[$i]) { + $m = $arabic_map->{$c[$i]}{$attr_name[$attr[$i]]}; + if(!$m && $attr[$i] == 3) { # medial -> final + $attr[$i] = 2; + $m = $arabic_map->{$c[$i]}{$attr_name[$attr[$i]]}; + if($m && $i < @c - 1) { + if($attr[$i+1] == 3) { # next char: medial -> initial + $attr[$i+1] = 1; + } + elsif($attr[$i+1] == 1) { # next char: initial ->isolated + $attr[$i+1] = 0; + } + } + } + $m = $c[$i] unless $m; + } + push @m, $m; + + printf STDERR "%04x -> %04x (%s)\n", $c[$i], $m, $attr_name[$attr[$i]] if $opt_arabic_verbose; + } + + return encode("utf8", decode("utf32le", pack("V*", @m))); +} + + +sub arabic_is_letter +{ + return $_[0] >= 0x600 && $_[0] <= 0x6ff ? 1 : 0 +} + + +sub arabic_read_map +{ + local $_; + my (@i, $u, $m); + + open F, "bin/arabic.txt"; + while(<F>) { + @i = split /;/; + $u = hex $i[0]; + if($i[5] =~ /^<(isolated|initial|final|medial)> (\S{4})$/) { + $m = hex $2; + if($arabic_map->{$m}{$1}) { + printf STDERR "%04x already has a '$1' form: %04x\n", $arabic_map->{$m}{$1} + } + $arabic_map->{$m}{$1} = $u; + # printf STDERR "%04x %s %04x\n", $u, $1, $m; + } + } + close F; + + print STDERR "warning: no arabic support\n" unless $arabic_map +} + + |