#! /usr/bin/perl # convert *.po files to texts.* files suitable for gfxboot # usage: po2txt lang.po >texts.lang # Note: en.po ist treated specially! use Getopt::Long; use Encode; sub arabic_is_letter; sub arabic_read_map; sub arabic_conv; sub read_texts; sub join_msg; sub fribidi; $opt_product = "openSUSE"; GetOptions( 'product=s' => \$opt_product, 'verbose|v' => \$opt_arabic_verbose, ); chomp ($tmp_file = `mktemp /tmp/po2txt.XXXXXXXXXX`); arabic_read_map; for $lang (@ARGV) { $lang = 'en' if $lang eq 'bootloader.pot'; $lang =~ s/\.po$//; read_texts $lang; } unlink $tmp_file; sub read_texts { local $_; my ($lang, @f, $txt, $context, $t, $p, $ids, $file); $lang = shift; $file = "$lang.po"; $file = 'bootloader.pot' if $lang eq 'en'; if($lang eq 'en') { $ids = 1; } open F, $file; @f = (); close F; map { s//$opt_product/g; } @f; $_ = $lang; s/.*\///; for (@f) { if(/^\s*#\.\s*(txt_\S+)/) { if($txt) { @msgstr = @msgid if $ids || join_msg(\@msgstr) eq ""; $txts{$txt} = join_msg(\@msgstr); } $txt = $1; undef @msgid; undef @msgstr; undef $context; next; } next if /^\s*#.*|^\s*$/; if(/^\s*msgid\s*(\".*\")\s*$/) { push @msgid, $1 unless $1 eq '""'; $context = 1; next; } if(/^\s*msgstr\s*(\".*\")\s*$/) { push @msgstr, $1 unless $1 eq '""'; $context = 2; next; } if(/^\s*(\".*\")\s*$/) { if($context == 1) { push @msgid, $1; } elsif($context == 2) { push @msgstr, $1; } else { die "format oops in ${lang}.po: $_" } } } if($txt) { @msgstr = @msgid if $ids || join_msg(\@msgstr) eq ""; $txts{$txt} = join_msg(\@msgstr); } @txts = sort keys %txts; for (@txts) { $txt = $txts{$_}; $txt =~ s/\\"/"/g; $txt =~ s/\\\\/\\/g; $txt =~ s/\\n/\n/g; $txt = fribidi $txt if $lang =~ /^(ar|he)/; print "$txt\x00" } if($ids) { open W, ">text.inc"; print W "%\n% This file is generated automatically. Editing it is pointless.\n%\n\n"; print W "/texts [\n"; $p = pop @txts; for (@txts) { print W " /$_\n" } print W " /$p\n] def\n"; close W; } else { open F, "text.inc"; for () { if(/\s+\/(txt_\S+)/) { $txt_ref{$1} = undef; } } close F; for (@txts) { $txt_list{$_}++; $txt_multi{$_} = 1 if $txt_list{$_} > 1; } for (@txts) { $txt_unknown{$_} = 1 unless exists $txt_ref{$_}; } for (keys %txt_ref) { $txt_miss{$_} = 1 unless exists $txt_list{$_}; } if(defined(%txt_miss) || defined(%txt_unknown) || defined(%txt_multi)) { print STDERR "$lang:\n"; for (sort keys %txt_miss) { print STDERR " missing: $_\n" } for (sort keys %txt_unknown) { print STDERR " unknown: $_\n" } for (sort keys %txt_multi) { print STDERR " multi: $_\n" } } } } sub join_msg { local $_; my ($s, $msg, $m); $msg = shift; for $s (@{$msg}) { $_ = $s; s/^\"(.*)\"$/$1/; $m .= $_; } return $m; } sub fribidi { local $_; my $m; $m = shift; open F, ">$tmp_file"; print F arabic_conv($m); close F; open F, "cat $tmp_file | fribidi --nopad --nobreak |"; $m = undef; while(read F, $_, 0x10000) { $m .= $_; } close F; return $m; } sub arabic_conv { local $_; my (@c, @m, $i, @attr, @attr_name); push @c, 0; push @c, unpack("V*", encode("utf32le", decode("utf8", $_[0]))); push @c, 0; # isolated: 0, initial: 1, final: 2, medial: 3 for ($i = 1; $i < @c - 1; $i++) { next if !arabic_is_letter $c[$i]; $attr[$i-1] += 2 if arabic_is_letter $c[$i-1]; $attr[$i-1] += 1 if arabic_is_letter $c[$i+1]; } shift @c; pop @c; @attr_name = ( "isolated", "initial", "final", "medial" ); for ($i = 0; $i < @c; $i++) { $m = $c[$i]; if(arabic_is_letter $c[$i]) { $m = $arabic_map->{$c[$i]}{$attr_name[$attr[$i]]}; if(!$m && $attr[$i] == 3) { # medial -> final $attr[$i] = 2; $m = $arabic_map->{$c[$i]}{$attr_name[$attr[$i]]}; if($m && $i < @c - 1) { if($attr[$i+1] == 3) { # next char: medial -> initial $attr[$i+1] = 1; } elsif($attr[$i+1] == 1) { # next char: initial ->isolated $attr[$i+1] = 0; } } } $m = $c[$i] unless $m; } push @m, $m; printf STDERR "%04x -> %04x (%s)\n", $c[$i], $m, $attr_name[$attr[$i]] if $opt_arabic_verbose; } return encode("utf8", decode("utf32le", pack("V*", @m))); } sub arabic_is_letter { return $_[0] >= 0x600 && $_[0] <= 0x6ff ? 1 : 0 } sub arabic_read_map { local $_; my (@i, $u, $m); open F, "bin/arabic.txt"; while() { @i = split /;/; $u = hex $i[0]; if($i[5] =~ /^<(isolated|initial|final|medial)> (\S{4})$/) { $m = hex $2; if($arabic_map->{$m}{$1}) { printf STDERR "%04x already has a '$1' form: %04x\n", $arabic_map->{$m}{$1} } $arabic_map->{$m}{$1} = $u; # printf STDERR "%04x %s %04x\n", $u, $1, $m; } } close F; print STDERR "warning: no arabic support\n" unless $arabic_map }