summaryrefslogtreecommitdiffstats
path: root/perl_checker.src/parser_helper.ml
blob: 2a1bce297698e8a6e7ea90f587627b9e107e8d70 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
open Types
open Common
open Printf

let bpos = -1, -1

let raw_pos2pos(a, b) = !Info.current_file, a, b
let raw_pos_range { pos = (a1, b1) } { pos = (a2, b2) } = (if a1 = -1 then a2 else a1), (if b2 = -1 then b1 else b2)
let pos_range esp1 esp2 = raw_pos2pos (raw_pos_range esp1 esp2)
let get_pos pesp = raw_pos2pos pesp.pos
let get_pos_start { pos = (start, _) } = start
let get_pos_end { pos = (_, end_) } = end_
let var_dollar_ pos = Deref(I_scalar, Ident(None, "_", pos))
let var_STDOUT = Deref(I_star, Ident(None, "STDOUT", raw_pos2pos bpos))

let new_any any spaces pos = { any = any ; spaces = spaces ; pos = pos }
let new_esp e esp_start esp_end = new_any e esp_start.spaces (raw_pos_range esp_start esp_end)
let new_pesp prio e esp_start esp_end = new_any { priority = prio ; expr = e } esp_start.spaces (raw_pos_range esp_start esp_end)
let default_esp e = new_any e Space_none bpos
let default_pesp prio e = new_any { priority = prio ; expr = e } Space_none bpos

let split_name_or_fq_name full_ident =
  match split_at2 ':'':' full_ident with
  | [] -> internal_error "split_ident"
  | [ident] -> None, ident
  | l ->
      let fql, name = split_last l in
      let fq = String.concat "::" fql in
      Some fq, name

let is_var_dollar_ = function
  | Deref(I_scalar, Ident(None, "_", _)) -> true
  | _ -> false
let is_var_number_match = function
  | Deref(I_scalar, Ident(None, s, _)) -> String.length s = 1 && s.[0] <> '0' && char_is_number s.[0]
  | _ -> false

let non_scalar_context context = context = I_hash || context = I_array
let is_scalar_context context = context = I_scalar
  
let is_not_a_scalar = function
  | Deref_with(_, context, _, _)
  | Deref(context, _) -> non_scalar_context context
  | List []
  | List(_ :: _ :: _) -> true
  | _ -> false
  
let is_not_a_scalar_or_array = function
  | Deref_with(_, context, _, _)
  | Deref(context, _) -> context = I_hash
  | List []
  | List(_ :: _ :: _) -> true
  | _ -> false

let is_a_scalar = function
  | Ref _
  | Num _
  | Raw_string _
  | String _ -> true
  | Deref_with(_, context, _, _)
  | Deref(context, _) -> is_scalar_context context
  | _ -> false

let is_a_string = function
  | String _ | Raw_string _ -> true
  | _ -> false

let is_parenthesized = function
  | List[]
  | List[List _] -> true
  | _ -> false

let un_parenthesize = function
  | List[List[e]] -> e
  | List[e] -> e
  | _ -> internal_error "un_parenthesize"

let rec un_parenthesize_full = function
  | List[e] -> un_parenthesize_full e
  | e -> e

let is_always_true = function
  | Num(n, _) -> float_of_string n <> 0.
  | Raw_string(s, _) -> s <> ""
  | String(l, _) -> l <> []
  | Ref _ -> true
  | _ -> false

let is_always_false = function
  | Num(n, _) -> float_of_string n = 0.
  | Raw_string(s, _) -> s = ""
  | String(l, _) -> l = []
  | List [] -> true
  | _ -> false

let not_complex e =
  if is_parenthesized e then true else
  let rec not_complex_ op = function
    | Call_op("?:", _, _) -> false
    | Call_op(op', l, _) -> op <> op' && List.for_all (not_complex_ op') l
    | e -> not (is_parenthesized e)
  in not_complex_ "" (un_parenthesize_full e)

let not_simple = function
  | Num _ | Ident _ | Deref(_, Ident _) -> false
  | _ -> true

let string_of_Ident = function
  | Ident(None, s, _) -> s
  | Ident(Some fq, s, _) -> fq ^ "::" ^ s
  | _ -> internal_error "string_of_Ident"
let context2s = function
  | I_scalar -> "$"
  | I_hash -> "%"
  | I_array -> "@"
  | I_func -> "&"
  | I_raw -> ""
  | I_star -> "*"
let variable2s(context, ident) = context2s context ^ ident

let rec is_same_fromparser a b =
  match a, b with
  | Undef, Undef -> true
  | Ident(fq1, s1, _), Ident(fq2, s2, _) -> fq1 = fq2 && s1 = s2
  | Num(s1, _), Num(s2, _) 
  | Raw_string(s1, _), Raw_string(s2, _) -> s1 = s2

  | String(l1, _), String(l2, _) ->
      List.for_all2 (fun (s1, e1) (s2, e2) -> s1 = s2 && is_same_fromparser e1 e2) l1 l2

  | Ref(c1, e1), Ref(c2, e2)
  | Deref(c1, e1), Deref(c2, e2) -> c1 = c2 && is_same_fromparser e1 e2

  | Deref_with(c1, c_1, e1, e_1), Deref_with(c2, c_2, e2, e_2) -> c1 = c2 && c_1 = c_2 && is_same_fromparser e1 e2 && is_same_fromparser e_1 e_2

  | Diamond(None), Diamond(None) -> true
  | Diamond(Some e1), Diamond(Some e2) -> is_same_fromparser e1 e2

  | List(l1), List(l2) -> List.for_all2 is_same_fromparser l1 l2

  | Call_op(op1, l1, _), Call_op(op2, l2, _) -> op1 = op2 && List.for_all2 is_same_fromparser l1 l2
  | Call(e1, l1), Call(e2, l2) -> is_same_fromparser e1 e2 && List.for_all2 is_same_fromparser l1 l2

  | Method_call(e1, m1, l1), Method_call(e2, m2, l2) ->
      is_same_fromparser e1 e2 && is_same_fromparser m1 m2 && List.for_all2 is_same_fromparser l1 l2

  | _ -> false

let from_scalar esp =
  match esp.any with
  | Deref(I_scalar, ident) -> ident
  | _ -> internal_error "from_scalar"

let from_array esp =
  match esp.any with
  | Deref(I_array, ident) -> ident
  | _ -> internal_error "from_array"

let msg_with_rawpos (start, end_) msg = Info.pos2sfull_current start end_ ^ msg
let die_with_rawpos raw_pos msg = failwith (msg_with_rawpos raw_pos msg)
let warn raw_pos msg = print_endline_flush (msg_with_rawpos raw_pos msg)

let die_rule msg = die_with_rawpos (Parsing.symbol_start(), Parsing.symbol_end()) msg
let warn_rule msg = warn (Parsing.symbol_start(), Parsing.symbol_end()) msg
let debug msg = if true then print_endline_flush msg

let warn_verb pos msg = if not !Flags.quiet then warn (pos, pos) msg
let warn_too_many_space start = warn_verb start "you should have only one space here"
let warn_no_space	start = warn_verb start "you should have a space here"
let warn_cr		start = warn_verb start "you should not have a carriage-return (\\n) here"
let warn_space		start = warn_verb start "you should not have a space here"

let rec prio_less = function
  | P_none, _ | _, P_none -> internal_error "prio_less"

  | P_paren_wanted prio1, prio2
  | prio1, P_paren_wanted prio2 -> prio_less(prio1, prio2)

  | P_ternary, P_or -> false
  | P_ternary, P_and -> false

  | _, P_loose -> true
  | P_loose, _ -> false
  | _, P_or -> true
  | P_or, _ -> false

  | _, P_and -> true
  | P_and, _ -> false
  | _, P_comma -> true
  | P_comma, _ -> false
  | _, P_call_no_paren -> true
  | P_call_no_paren, _ -> false
  | _, P_assign -> true
  | P_assign, _ -> false
  | _, P_ternary -> true
  | P_ternary, _ -> false

  | _, P_tight_or -> true
  | P_tight_or, _ -> false
  | _, P_tight_and -> true
  | P_tight_and, _ -> false

  | P_bit, P_bit -> true
  | P_bit, _ -> false

  | _, P_expr -> true
  | P_expr, _ -> false

  | _, P_eq -> true
  | P_eq, _ -> false
  | _, P_cmp -> true
  | P_cmp, _ -> false
  | _, P_add -> true
  | P_add, _ -> false
  | _, P_mul -> true
  | P_mul, _ -> false
  | _, P_tight -> true
  | P_tight, _ -> false

  | _, P_paren _ -> true
  | P_paren _, _ -> true
  | P_tok, _ -> true

let prio_lo pri_out in_ =
  if prio_less(in_.any.priority, pri_out) then
    (match in_.any.priority with
    | P_paren (P_paren_wanted _) -> ()
    | P_paren pri_in' ->
	if pri_in' <> pri_out && 
	   prio_less(pri_in', pri_out) && not_complex (un_parenthesize in_.any.expr) then 
	  warn in_.pos "unneeded parentheses"
    | _ -> ())
  else warn in_.pos "missing parentheses (needed for clarity)" ;
  in_.any.expr
    
let prio_lo_after pri_out in_ =
  if in_.any.priority = P_call_no_paren then in_.any.expr else prio_lo pri_out in_

let prio_lo_concat esp = prio_lo P_mul { esp with any = { esp.any with priority = P_paren_wanted esp.any.priority } }

let sp_0 esp =
  match esp.spaces with
  | Space_none -> ()
  | Space_0 -> ()
  | Space_1
  | Space_n -> warn_space (get_pos_start esp)
  | Space_cr -> warn_cr (get_pos_start esp)

let sp_0_or_cr esp =
  match esp.spaces with
  | Space_none -> ()
  | Space_0 -> ()
  | Space_1
  | Space_n -> warn_space (get_pos_start esp)
  | Space_cr -> ()

let sp_1 esp =
  match esp.spaces with
  | Space_none -> ()
  | Space_0 -> warn_no_space (get_pos_start esp)
  | Space_1 -> ()
  | Space_n -> warn_too_many_space (get_pos_start esp)
  | Space_cr -> warn_cr (get_pos_start esp)

let sp_n esp =
  match esp.spaces with
  | Space_none -> ()
  | Space_0 -> warn_no_space (get_pos_start esp)
  | Space_1 -> ()
  | Space_n -> ()
  | Space_cr -> warn_cr (get_pos_start esp)

let sp_p esp =
  match esp.spaces with
  | Space_none -> ()
  | Space_0 -> warn_no_space (get_pos_start esp)
  | Space_1 -> ()
  | Space_n -> ()
  | Space_cr -> ()

let sp_cr esp =
  match esp.spaces with
  | Space_none -> ()
  | Space_0
  | Space_1
  | Space_n -> warn_verb (get_pos_start esp) "you should have a carriage-return (\\n) here"
  | Space_cr -> ()

let sp_same esp1 esp2 =
  if esp1.spaces <> Space_0 then sp_p esp2
  else if esp2.spaces <> Space_0 then sp_p esp1

let check_word_alone word =
  match word with
  | Ident(None, f, pos) ->
      (match f with
      | "length" | "stat" | "lstat" | "chop" | "chomp" | "quotemeta" | "lc" | "lcfirst" | "uc" | "ucfirst" ->
	  Call(Deref(I_func, word), [var_dollar_ pos])

      | "split" -> Call(Deref(I_func, word), [ Raw_string(" ", pos) ; var_dollar_ pos ])
      | "shift" -> Call(Deref(I_func, word), [ Deref(I_array,  Ident(None, "_", raw_pos2pos bpos)) ])
      | "die"   -> Call(Deref(I_func, word), [ Deref(I_scalar, Ident(None, "@", raw_pos2pos bpos)) ])
      | "return" | "eof" | "caller" 
      | "redo" | "next" | "last" -> 
	  Deref(I_func, word)

      | "hex" | "ref" -> 
	  warn_rule (sprintf "please use \"%s $_\" instead of \"%s\"" f f) ;
	  Call(Deref(I_func, word), [ Raw_string(" ", pos) ; var_dollar_ pos ])
      | "time" | "wantarray" | "fork" | "getppid" | "arch" -> 
	  warn_rule (sprintf "please use %s() instead of %s" f f) ;
	  Deref(I_func, word)
      | _ -> word)
  | _ -> word

let check_parenthesized_first_argexpr word esp =
  let want_space = word.[0] = '-' in
  if word = "return" then () else
  match esp.any.expr with
  | [ Call_op(_, (e' :: l), _) ]
  | e' :: l ->
      if is_parenthesized e' then
	if l = [] then 
	  (if want_space then sp_n else sp_0) esp
	else 
	  (* eg: join (" ", @l) . "\n" *)
	  die_with_rawpos (get_pos_start esp, get_pos_start esp) "please remove the space before the function call"
      else
	sp_p esp
  | _ -> 
      if word = "time" then die_rule "please use time() instead of time";
      sp_p esp

let check_parenthesized_first_argexpr_with_Ident ident esp =
  if esp.any.priority = P_tok then ();
  (match ident with
  | Ident(Some _, _, _) ->
      (match esp.any.expr with
      | [e] when is_parenthesized e -> ()
      | _ -> warn_rule "use parentheses around argument (otherwise it might cause syntax errors if the package is \"require\"d and not \"use\"d")
  | Ident(None, word, _) when List.mem word ["ref" ; "readlink"] ->
      if esp.any.priority <> P_tok then warn_rule "use parentheses around argument"
  | _ -> ());
  check_parenthesized_first_argexpr (string_of_Ident ident) esp

let check_hash_subscript esp =
  let can_be_raw_string = function
    | "" | "x" | "y" -> false (* special case for {'y'} otherwise the emacs mode goes wild, special case for {'x'} to have the same as {'y'} (since they usually go together) *)
    | s -> 
	char_is_alpha s.[0] && (String.length s = 1 || string_forall_with char_is_alphanumerical_ 1 s)
  in
  match esp.any.expr with
  | List [String ([(s, List [])], _)] when can_be_raw_string s -> warn esp.pos (sprintf "{\"%s\"} can be written {%s}" s s)
  | List [Raw_string(s, _)] when can_be_raw_string s -> warn esp.pos (sprintf "{'%s'} can be written {%s}" s s)
  | _ -> ()

let check_arrow_needed esp1 esp2 =
  match esp1.any.expr with
  | Deref_with(I_array, I_scalar, List [List [Call _]], _) -> () (* "->" needed for (f())[0]->{XX} *)
  | Deref_with _ -> warn esp2.pos "the arrow \"->\" is unneeded"
  | _ -> ()

let check_scalar_subscripted esp =
  match esp.any with
  | Deref(I_scalar, Deref _) -> warn_rule "for complex dereferencing, use \"->\""
  | _ -> ()

let check_negatable_expr esp =
  match un_parenthesize_full esp.any.expr with
  | Call_op("m//", var :: _, _) when not (is_var_dollar_ var) ->
      warn_rule "!($var =~ /.../) is better written $var !~ /.../"
  | Call_op("!m//", var :: _, _) when not (is_var_dollar_ var) ->
      warn_rule "!($var !~ /.../) is better written $var =~ /.../"
  | _ -> ()

let check_ternary_paras(cond, a, b) =
  let rec dont_need_short_circuit_rec = function
    | Num _
    | Raw_string _
    | String ([(_, List [])], _) 
    | Call_op("qw", _, _)
      -> true
    | Call(Deref(I_func, Ident(None, "N", _)), [ List(String _ :: l) ])
    | Call_op(".", l, _)
    | Ref(I_hash, List l)
    | List l -> List.for_all dont_need_short_circuit_rec l
    | _ -> false
  in
  let rec dont_need_short_circuit = function
    | Ref(_, Deref(_, Ident _))
    | Deref(_, Ident _) -> true
    | Ref(I_hash, List l)
    | List l -> List.for_all dont_need_short_circuit l
    | e -> dont_need_short_circuit_rec e
  in
  let check_ternary_para = function
    | List [] -> warn_rule "you may use if_() here\n  beware that the short-circuit semantic of ?: is not kept\n  if you want to keep the short-circuit behaviour, replace () with @{[]} and there will be no warning anymore"
    | _ -> ()
  in
  if dont_need_short_circuit a || is_same_fromparser cond a then check_ternary_para b;
  if dont_need_short_circuit b || is_same_fromparser cond b then check_ternary_para a;
  if is_same_fromparser cond a && dont_need_short_circuit b && is_a_scalar a && is_a_scalar b then warn_rule "you can replace \"$foo ? $foo : $bar\" with \"$foo || $bar\"";
  [ cond; a; b ]

let check_unneeded_var_dollar_ esp =
  if is_var_dollar_ esp.any.expr then warn esp.pos "\"$_ =~ /regexp/\" can be written \"/regexp/\"" else
  if is_var_number_match esp.any.expr then warn esp.pos "do not use the result of a match (eg: $1) to match another pattern"
let check_unneeded_var_dollar_not esp =
  if is_var_dollar_ esp.any.expr then warn esp.pos "\"$_ !~ /regexp/\" can be written \"!/regexp/\"" else
  if is_var_number_match esp.any.expr then warn esp.pos "do not use the result of a match (eg: $1) to match another pattern"
let check_unneeded_var_dollar_s esp = 
  if is_var_dollar_ esp.any.expr then warn esp.pos "\"$_ =~ s/regexp/.../\" can be written \"s/regexp/.../\"" else
  if is_var_number_match esp.any.expr then die_with_rawpos esp.pos "do not modify the result of a match (eg: $1)"

let check_MULT_is_x esp = if esp.any <> "x" then die_rule "syntax error"
let check_my esp = if esp.any <> "my" then die_rule "syntax error"
let check_foreach esp = if esp.any = "for"     then warn esp.pos "write \"foreach\" instead of \"for\""
let check_for     esp = if esp.any = "foreach" then warn esp.pos "write \"for\" instead of \"foreach\""
let check_for_foreach esp arg =
  match arg.any.expr with
  | List [ Deref(I_scalar, _) ] ->
      if esp.any = "foreach" then warn esp.pos "you are using the special fpons trick to locally set $_ with a value, for this please use \"for\" instead of \"foreach\""
  | List [ Deref_with(context, I_scalar, _, _) ] when context <> I_func -> 
      if esp.any = "foreach" then warn esp.pos "you are using the special fpons trick to locally set $_ with a value, for this please use \"for\" instead of \"foreach\""
  | _ -> 
      if esp.any = "for" then warn esp.pos "write \"foreach\" instead of \"for\""

let check_block_sub esp_lines esp_BRACKET_END =
  match esp_lines.any with
  | [] -> 
      sp_0_or_cr esp_BRACKET_END
  | l ->
      (if List.hd l = Semi_colon then sp_0 else sp_p) esp_lines ;
      sp_p esp_BRACKET_END ;

      if esp_BRACKET_END.spaces <> Space_cr then
	(if last l = Semi_colon then warn_verb (get_pos_end esp_lines) "spurious \";\" before closing block")

let check_block_ref esp_lines esp_BRACKET_END =
  let l = esp_lines.any in
  if l <> [] && List.hd l = Semi_colon 
  then (sp_0 esp_lines ; sp_p esp_BRACKET_END)
  else sp_same esp_lines esp_BRACKET_END ;

  if esp_BRACKET_END.spaces <> Space_cr then
    (if l <> [] && last l = Semi_colon then warn_verb (get_pos_end esp_lines) "spurious \";\" before closing block")

let check_my_our_paren { any = ((comma_closed, _), _) } = 
  if not comma_closed then die_rule "syntax error"

let check_simple_pattern = function
  | [ String([ st, List [] ], _); Raw_string("", _) ] ->
      if String.length st > 2 &&
	st.[0] = '^' && st.[String.length st - 1] = '$' then
	let st = skip_n_char_ 1 1 st in
	if string_forall_with char_is_alphanumerical_ 0 st then
	  warn_rule (sprintf "\"... =~ /^%s$/\" is better written \"... eq '%s'\"" st st)
  | _ -> ()

let rec only_one esp =
  match esp.any with
  | [List l'] -> only_one { esp with any = l' }
  | [e] -> e
  | [] -> die_with_rawpos esp.pos "you must give one argument"
  | _  -> die_with_rawpos esp.pos "you must give only one argument"

let only_one_array_ref esp =
  let e = only_one esp in
  (match e with
  | Call_op("last_array_index", [Deref(I_array, e)], _) ->
      warn esp.pos (sprintf "you can replace $#%s with -1" (string_of_Ident e))
  | _ -> ());
  e

let only_one_in_List esp =
  match esp.any.expr with
  | List l -> only_one { esp with any = l }
  | e -> e
  
let rec is_only_one_in_List = function
  | [List l] -> is_only_one_in_List l
  | [_] -> true
  | _ -> false

let maybe_to_Raw_string = function
  | Ident(None, s, pos) -> Raw_string(s, pos)
  | Ident(Some fq, s, pos) -> Raw_string(fq ^ "::" ^ s, pos)
  | e -> e

let to_List = function
  | [e] -> e
  | l -> List l

let deref_arraylen e = Call_op("last_array_index", [Deref(I_array, e)], raw_pos2pos bpos)
let deref_raw context e = 
  let e = match e with
  | Raw_string(s, pos) -> 
      let fq, ident = split_name_or_fq_name s in
      Ident(fq, ident, pos)
  | Deref(I_scalar, (Ident _ as ident)) ->
      warn_rule (sprintf "%s{$%s} can be written %s$%s" (context2s context) (string_of_Ident ident) (context2s context) (string_of_Ident ident));
      e
  | _ -> e
  in Deref(context, e)

let to_Ident { any = (fq, name); pos = pos } = Ident(fq, name, raw_pos2pos pos)
let to_Raw_string { any = s; pos = pos } = Raw_string(s, raw_pos2pos pos)
let to_Method_call (object_, method_, para) = 
  match method_ with
  | Ident(Some "SUPER", name, pos) -> Method_call(maybe_to_Raw_string object_, Raw_string(name, pos), para)
  | Ident(Some _, _, _) -> Call(Deref(I_func, method_), maybe_to_Raw_string object_ :: para)
  | _ -> Method_call(maybe_to_Raw_string object_, maybe_to_Raw_string method_, para)
let to_Deref_with(from_context, to_context, ref_, para) =
  if is_not_a_scalar ref_ then warn_rule "bad deref";
  Deref_with(from_context, to_context, ref_, para)

  
let to_Local esp =
  let l = 
    match esp.any.expr with
    | List[List l] -> l
    | e -> [e]
  in
  let local_vars, local_exprs = fpartition (function
    | Deref(I_star as context, Ident(None, ident, _))
    | Deref(I_scalar as context, Ident(None, ("_" as ident), _)) ->
	Some(context, ident)
    | Deref(I_scalar, Ident _)
    | Deref(I_array, Ident _)
    | Deref(I_star, Ident _)
    | Deref_with(I_hash, I_scalar, Ident _, _)
    | Deref_with(I_hash, I_scalar, Deref(I_scalar, _), _)
    | Deref_with(I_hash, I_scalar, Deref_with(I_hash, I_scalar, Ident _, _), _)
    | Deref_with(I_hash, I_scalar, Deref_with(I_hash, I_scalar, Deref(I_scalar, Ident _), _), _) ->
	None
    | _ -> die_with_rawpos esp.pos "bad argument to \"local\""
  ) l in
  if local_vars = [] then Call_op("local", local_exprs, raw_pos2pos esp.pos)
  else if local_exprs = [] then My_our("local", local_vars, raw_pos2pos esp.pos)
  else die_with_rawpos esp.pos "bad argument to \"local\""

let sub_declaration (name, proto) body = Sub_declaration(name, proto, Block body)
let anonymous_sub body = Anonymous_sub (Block body.any, raw_pos2pos body.pos)

let cook_call_op op para pos =
  (match op with
  | "==" | "!=" | "<=" | ">=" | ">"  | "<"  | "<=>" ->
      if List.exists (function 
	| Undef
	| List [] -> op <> "==" && op <> "!=" (* allowing @l == () *)
	| e -> is_not_a_scalar_or_array e) para then
	warn_rule "don't do this"
      else if List.exists is_a_string para then
	warn_rule (sprintf "you should use a string operator, not the number operator \"%s\"" op)
  | "le" | "ge" | "eq" | "ne" | "gt" | "lt" | "cmp" ->
      if List.exists is_not_a_scalar para then
	warn_rule "don't do this"
      else if List.exists (function Num _ -> true | _ -> false) para then
	warn_rule (sprintf "you should use a number operator, not the string operator \"%s\" (or replace the number with a string)" op)
  | "||=" | "&&=" ->
      (match List.hd para with
      | List [ List _ ] -> warn_rule "remove the parentheses"
      | e -> if is_not_a_scalar e then warn_rule (sprintf "\"%s\" is only useful with a scalar" op))
  | _ -> ());
  let call = Call_op(op, para, raw_pos2pos pos) in
  match op, para with
  | "=", [My_our _; Ident(None, "undef", _)] -> 
      warn pos "no need to initialize variable, it's done by default" ;
      call
  | "=", [My_our _; List[]] -> 
      if Info.is_on_same_line_current pos then warn pos "no need to initialize variables, it's done by default" ;
      call

  | "=", [ Deref(I_star, String ([(sf1, List [])], _)); _ ] ->
      warn_rule (sprintf "write *{'%s'} instead of *{\"%s\"}" sf1 sf1) ;
      call

  | "=", [ Deref(I_star, (Ident _ as f1)); Deref(I_star, (Ident _ as f2)) ] ->
      let s1, s2 = string_of_Ident f1, string_of_Ident f2 in
      warn pos (sprintf "\"*%s = *%s\" is better written \"*%s = \\&%s\"" s1 s2 s1 s2) ;
      sub_declaration (f1, "") [ Deref(I_func, f2) ]
  | "=", [ Deref(I_star, Raw_string(sf1, pos_f1)); Deref(I_star, (Ident _ as f2)) ] ->
      let s2 = string_of_Ident f2 in
      warn pos (sprintf "\"*{'%s'} = *%s\" is better written \"*{'%s'} = \\&%s\"" sf1 s2 sf1 s2) ;
      sub_declaration (Ident(None, sf1, pos_f1), "") [ Deref(I_func, f2) ]

  | "=", [ Deref(I_star, (Ident _ as f1)); Ref(I_scalar, Deref(I_func, (Ident _ as f2))) ] ->
      sub_declaration (f1, "") [ Deref(I_func, f2) ]
  | "=", [ Deref(I_star, Raw_string(sf1, pos_f1)); Ref(I_scalar, Deref(I_func, (Ident _ as f2))) ] ->
      sub_declaration (Ident(None, sf1, pos_f1), "") [ Deref(I_func, f2) ]

  | "=", [ Deref(I_star, (Ident _ as f1)); (Anonymous_sub _ as sub) ] ->
      sub_declaration (f1, "") [ sub ]

  | "||", e :: _ when is_always_true  e -> warn_rule "<constant> || ... is the same as <constant>"; call
  | "&&", e :: _ when is_always_false e -> warn_rule "<constant> && ... is the same as <constant>"; call
  | "||", e :: _ when is_always_false e -> warn_rule "<constant> || ... is the same as ..."; call
  | "&&", e :: _ when is_always_true  e -> warn_rule "<constant> && ... is the same as ..."; call

  | _ -> 
      call

let to_Call_op op para esp_start esp_end = 
  let pos = raw_pos_range esp_start esp_end in
  new_any (cook_call_op op para pos) esp_start.spaces pos
let to_Call_op_ prio op para esp_start esp_end = 
  let pos = raw_pos_range esp_start esp_end in
  new_any { priority = prio ; expr = cook_call_op op para pos } esp_start.spaces pos

let followed_by_comma pesp true_comma =
  if true_comma.any then pesp.any.expr else
    match split_last pesp.any.expr with
    | l, Ident(None, s, pos) -> l @ [Raw_string(s, pos)]
    | _ -> pesp.any.expr


let pot_strings = Hashtbl.create 16
let pot_strings_and_file = Hashtbl.create 16
let po_comments = ref []
let po_comment esp = lpush po_comments esp.any

let check_format_a_la_printf s pos =
  let rec check_format_a_la_printf_ i =
    try
      let i' = String.index_from s i '%' in
      try
	(match s.[i' + 1] with
	| '%' | 'd' | 's' | 'c' -> ()
	| c -> warn (pos + i', pos + i') (sprintf "invalid command %%%c" c));
	check_format_a_la_printf_ (i' + 2)
      with Invalid_argument _ -> warn (pos + i', pos + i') "invalid command %"
    with Not_found -> ()
  in check_format_a_la_printf_ 0
  
let generate_pot file = 
  let fd = open_out file in
  output_string fd 
("# SOME DESCRIPTIVE TITLE.
# Copyright (C) YEAR Free Software Foundation, Inc.
# FIRST AUTHOR <EMAIL@ADDRESS>, YEAR.
#
#, fuzzy
msgid \"\"
msgstr \"\"
\"Project-Id-Version: PACKAGE VERSION\\n\"
\"POT-Creation-Date: " ^ input_line (Unix.open_process_in "date '+%Y-%m-%d %H:%M%z'") ^ "\\n\"
\"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\\n\"
\"Last-Translator: FULL NAME <EMAIL@ADDRESS>\\n\"
\"Language-Team: LANGUAGE <LL@li.org>\\n\"
\"MIME-Version: 1.0\\n\"
\"Content-Type: text/plain; charset=CHARSET\\n\"
\"Content-Transfer-Encoding: 8-bit\\n\"

") ;

  let rec print_formatted_char = function
    | '"'  -> output_char fd '\\'; output_char fd '"'
    | '\t' -> output_char fd '\\'; output_char fd 't'
    | '\\' -> output_char fd '\\'; output_char fd '\\'
    | '\n' -> output_string fd "\\n\"\n\""
    | c -> output_char fd c
  in
  Hashtbl.iter (fun s po_comments ->
    match Hashtbl.find_all pot_strings_and_file s with
    | [] -> ()
    | l ->
	List.iter (fun po_comment -> output_string fd ("#. " ^ po_comment ^ "\n")) po_comments;

	List.iter (fun _ -> Hashtbl.remove pot_strings_and_file s) l ;
	fprintf fd "#: %s\n" (String.concat " " (List.map (fun s -> s ^ ":1") l)) ;
	output_string fd "#, c-format\n" ;

	output_string fd (if String.contains s '\n' then "msgid \"\"\n\"" else "msgid \"") ;
	String.iter print_formatted_char s ;
	output_string fd "\"\n" ;
	output_string fd "msgstr \"\"\n\n"
  ) pot_strings ;      
  close_out fd

let call_func is_a_func (e, para) =
  match e with
  | Deref(I_func, Ident(None, f, _)) ->
      let para' = match f with
      | "no" ->
	  (match para with
	  | [ Ident(_, _, pos) as s ] -> Some [ Raw_string(string_of_Ident s, pos) ]
	  | [ Call(Deref(I_func, (Ident(_, _, pos) as s)), l) ] -> Some(Raw_string(string_of_Ident s, pos) :: l)
	  | _ -> die_rule "use \"no PACKAGE <para>\"")
      | "undef" ->
	  (match para with
	  | [ Deref(I_star, ident) ] -> Some [ Deref(I_func, ident) ]
	  | _ -> None)

      | "N" | "N_" ->
	  (match para with
	  | [ List(String([ s, List [] ], (file, pos_a, _)) :: _) ] -> 
	      if !Flags.generate_pot then (
		Hashtbl.replace pot_strings s ((try Hashtbl.find pot_strings s with Not_found -> []) @ !po_comments) ;
		po_comments := [] ;
		Hashtbl.add pot_strings_and_file s file ;
	      ) ;
	      check_format_a_la_printf s pos_a ;
	      (*if String.contains s '\t' then warn_rule "tabulation in translated string must be written \\\\t";*)
	      (*if count_matching_char s '\n' > 10 then warn_rule "long string";*)
	      None
	  | [ List(String _ :: _) ] -> die_rule "don't use interpolated translated string, use %s or %d instead"
	  |  _ -> die_rule (sprintf "%s() must be used with a string" f))

      | "goto" ->
	  (match para with
	  | [ Ident(None, s, pos) ] -> Some [ Raw_string(s, pos) ]
	  | _ -> None)

      | "last" | "next" | "redo" when not is_a_func ->
	  (match para with
	  | [ Ident(None, s, pos) ] -> Some [ Raw_string(s, pos) ]
	  | _ -> die_rule (sprintf "%s must be used with a raw string" f))

      | "length" ->
	  if para = [] then warn_rule "length() with no parameter !?" else
	  if is_not_a_scalar (List.hd para) then warn_rule "never use \"length @l\", it returns the length of the string int(@l)" ;
	  None

      | "split" ->
	  (match para with
	  | [ List(Call_op("m//", Deref(I_scalar, Ident(None, "_", _)) :: pattern, pos) :: l) ]
	  | Call_op("m//", Deref(I_scalar, Ident(None, "_", _)) :: pattern, pos) :: l ->
	      Some(Call_op("qr//", pattern, pos) :: l)
	  | _ -> None)

      | "map" | "grep" -> 
	  (match para with
	  | Anonymous_sub _ :: _ -> ()
	  | _ -> warn_rule (sprintf "always use \"%s\" with a block (eg: %s { ... } @list)" f f));
	  None
	    
      | _ -> None
      in Call(e, some_or para' para)
  | _ -> Call(e, para)

let call(e, para) = call_func false (e, para)


let call_one_scalar_para { any = e ; pos = pos } para esp_start esp_end =
  let para =
    match para with
    | [] ->
	  if not (List.mem e [ "length" ]) then warn_rule (sprintf "please use \"%s $_\" instead of \"%s\"" e e) ;
	  [var_dollar_ (raw_pos2pos pos)]
    | _ -> para
  in
  new_pesp P_mul (call(Deref(I_func, Ident(None, e, raw_pos2pos pos)), para)) esp_start esp_end


let call_op_if_infix left right esp_start esp_end =
  (match left, right with
  | List [Call_op("=", [Deref(context, _); _], _)], _ when non_scalar_context context -> ()
  | List [Call_op("=", [v; _], _)],
    List [Call_op("not", [v'], _)] when is_same_fromparser v v' ->
      warn_rule "\"$foo = ... if !$foo\" can be written \"$foo ||= ...\""
  | _ -> ());
  let pos = raw_pos_range esp_start esp_end in
  new_any (Call_op("if infix", [ left ; right], raw_pos2pos pos)) esp_start.spaces pos

let call_op_unless_infix left right esp_start esp_end =
  (match left, right with
  | List [Call_op("=", [Deref(context, _); _], _)], _ when non_scalar_context context -> ()
  | List [Call_op("=", [v; _], _)], List [v'] when is_same_fromparser v v' ->
      warn_rule "\"$foo = ... unless $foo\" can be written \"$foo ||= ...\""
  | _ -> ());
  (match right with
  | List [Call_op(op, _, _)] ->
      (match op with
      | "&&" | "||" | "not" | "ne" | "?:" -> warn_rule "don't use \"unless\" when the condition is complex, use \"if\" instead"
      | _ -> ());
  | _ -> ());
  let pos = raw_pos_range esp_start esp_end in
  new_any (Call_op("unless infix", [ left ; right], raw_pos2pos pos)) esp_start.spaces pos


let (current_lexbuf : Lexing.lexbuf option ref) = ref None

let rec list2tokens l =
  let rl = ref l in
  fun lexbuf ->
    match !rl with
    | [] -> internal_error "list2tokens"
    | ((start, end_), e) :: l -> 
	lexbuf.Lexing.lex_abs_pos <- 0 ;
	lexbuf.Lexing.lex_start_pos <- start ;
	lexbuf.Lexing.lex_curr_pos <- end_ ;
	rl := l ; e

let parse_tokens parse tokens lexbuf_opt =
  if lexbuf_opt <> None then current_lexbuf := lexbuf_opt ;
  if tokens = [] then [] else
  parse (list2tokens tokens) (some !current_lexbuf)

let parse_interpolated parse l = 
  let l' = List.map (fun (s, tokens) -> s, to_List(parse_tokens parse tokens None)) l in
  match split_last l' with
  | pl, ("", List []) -> pl
  | _ -> l'

let to_String parse strict { any = l ; pos = pos } = 
  let l' = parse_interpolated parse l in
  (match l' with
  | [ "", List [Deref(I_scalar, Ident(None, ident, _))]] -> 
      if ident <> "!" && strict then warn pos (sprintf "%s is better written without the double quotes" (variable2s(I_scalar, ident)))
  | [ "", List [Deref(I_hash, _)]] -> 
      warn pos "don't use a hash in string context"
  | [ "", List [Deref(I_array, _)]] -> 
      ()
  | [("", _)] -> 
      if strict then warn pos "double quotes are unneeded"
  | _ -> ());
  String(l', raw_pos2pos pos)

let from_PATTERN parse { any = (s, opts) ; pos = pos } = 
  let re = parse_interpolated parse s in
  (match List.rev re with
  | (s, List []) :: _ ->
      if str_ends_with s ".*" then
	warn_rule (sprintf "you can remove \"%s\" at the end of your regexp" ".*")
      else if str_ends_with s ".*$" then
	warn_rule (sprintf "you can remove \"%s\" at the end of your regexp" ".*$")
  | _ -> ());
  [ String(re, raw_pos2pos pos) ; 
    Raw_string(opts, raw_pos2pos pos) ]
let from_PATTERN_SUBST parse { any = (s1, s2, opts) ; pos = pos } = 
  [ String(parse_interpolated parse s1, raw_pos2pos pos) ; 
    String(parse_interpolated parse s2, raw_pos2pos pos) ; 
    Raw_string(opts, raw_pos2pos pos) ]