summaryrefslogtreecommitdiffstats
path: root/perl-install/share/po/validate.pl
blob: a25eeaaa829f62a715c74e29e2429d5177fabd67 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
#!/usr/bin/perl -w

#
# Guillaume Cottenceau (gc@mandrakesoft.com)
#
# Copyright 2000 Mandrakesoft
#
# This software may be freely redistributed under the terms of the GNU
# public license.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
#

# Tool to avoid common grammar errors in po files.

use MDK::Common;

my $col = $ENV{GREP_COLOR} || "01";
sub colorize { "[1;$col;40m@_" }

sub get_file($)
{
    local *FIL;
    open FIL, "$_[0]" or die "Can't open $_[0]";
    my @file_content = <FIL>;
    close FIL;
    my @out;
    my $msgstr = 0;
    my $line_number = 0;
    foreach (@file_content)
    {
	$line_number++;
	/msgid/ and $msgstr = 0;
	/msgstr/ and $msgstr = 1;
	$msgstr and push @out, sprintf("%4d ", $line_number).$_;
    }
    @out;
}


sub check
{
    my ($category, $condition, $msg, @exceptions) = @_;
    my $line = $_;
    my $rest = '';
    #- kinda hard to make multiple match and still can highlight the right one..
    my $adv = sub { $rest .= substr $line, 0, 1; $line =~ s/^.// };
    while (length($line) > 1) {
	$line =~ /$condition/ or return;
	my ($rest_, $before, $match, $after) = ($rest, $`, $&, $');
	$adv->() foreach 1 .. length($before);
	$_->($before, $match, $after) and goto next_char foreach @exceptions;
	printf "$category: %-30s ", $msg; print $rest_, $before, colorize($match), $after;
      next_char:
	$adv->() foreach 1 .. length($match);
    }
}


my @names = qw(XFree MHz GHz KBabel XFdrake IPv4 MTools iBook DrakX MacOS MacOSX G3 G4 DVD
               Drakbackup Inc Gnome Mandrake IceWM MySQL PostgreSQL Enlightenment Window WindowMaker Fvwm
               SunOS ReiserFS iMac
               CD OF LPRng ext2FS PowerBook OSs CUPS NIS KDE GNOME BootX TVout
               WebDAV IP SMB Boston MA MtoolsFM PCI USB ISA PnP XawTV PSC LaserJet Sony LPT\d
               Frank Thomas Sergey XSane M ClusterNFS 3Com drakTermServ RAMdisk LOCAL);

sub match {
    my ($e) = @_; sub {
	my ($before, $match, $after) = @_;
	$match =~ /^$e/
    }
}
sub match_after {
    my ($e) = @_; sub {
	my ($before, $match, $after) = @_;
	"$match$after" =~ /^$e/
    }
}
sub match_full {
    my ($e) = @_; sub {
	my ($before, $match, $after) = @_;
	"$match$after" =~ /^$e/
    }
}

sub mixed_case($)
{
    check('**', '\b\w[A-Z]\w*[a-z]\b', 'mixed-case',
	  sub { my ($b, $m, $a) = @_; $b =~ /\\$/ && $m =~ /^t/ },
	  sub { my ($b, $m, $a) = @_; $b =~ /\\$/ && $m =~ /^fI/ },
	  match('_[A-Z][a-z]+\b'),
	  map { match_after($_.'\b') } @names);
    check('**', '\b\w[a-z]\w*[A-Z]\b', 'mixed-case',
	  map { match_after($_.'\b') } @names);
}

sub uppercase_after_comma($)
{
    check('**', ', [A-Z]', 'uppercase-after-comma',
	  map { match_after(", $_".'\b') } @names);
}

sub lowercase_after_dot($)
{
    check('**', '\. [a-z]', 'lowercase-after-dot',
	  sub { my ($b, $m, $a) = @_; any { $b =~ /$_$/ } qw(id ex) },
	  sub { my ($b, $m, $a) = @_; any { $b =~ /\Q$_\E$/ } qw (S.A N.B) },
	  map { match_after('\. '.$_) } @names);
}

sub no_space_after_ponct($)
{
    check('**', '[,\.:;]\w', 'no-space-after-ponct',
	  sub { my ($b, $m, $a) = @_; any { my ($beg, $end) = /^(.)(..)/; $b =~ /$beg$/ && $m eq $end } qw(S.A N.B M.N L.P) },
	  sub { my ($b, $m, $a) = @_; any { my ($beg, $end) = /^(...)(..)/; $b =~ /\Q$beg\E$/ && $m eq $end } qw(M.N.F L.P.I) },
	  sub { my ($b, $m, $a) = @_; any { "$m$a" =~ /\S*\.$_\b/ }
		  qw(com fr h d htm o org php php3 cf conf img deny pfm afm cfg tftpd allow bin uk lzrom nbi net old dir scale tbxi) },
	  match_after('\.ex\.'),    #- p.ex.
	  match(':[a-fA-F]'),       #- ipv6
	  map { match_after(".$_") } qw(cmode mclk vmode LTR rpmnew backupignore root_squash all_squash), 0..9 );
}

sub doubly_ponct($)
{
    check('**', '([\.,:;])\1', 'doubly-ponct',
	  match_after(quotemeta('...')));
}

sub space_before_simple_ponct($)
{
    check('**', '\s[,\.]', 'space-before-simple-ponct',
	  map { match_after('\s\.'.$_) } qw(rpmnew backupignore afm pfm));
}



# --- fr.po

foreach (get_file("fr.po"))
{
    #- line oriented verifications
    /\s*#/ and next;

    check('fr', 'ez\s+\S+ez', 'infinitive-form-with-ez');
    check('fr', 'è[ \.,;:]', 'grave-accent-at-end-of-word');
    check('fr', '\b\w*[éêè][éêè]\w*\b', 'strange-accents-succession',
	  map { match($_) } qw(créé réécrire));
    mixed_case($_);
    uppercase_after_comma($_);
    lowercase_after_dot($_);
    no_space_after_ponct($_);
    doubly_ponct($_);
    space_before_simple_ponct($_);
}