From c8ff668a19bfca65bbcd8f72f939729034c138c2 Mon Sep 17 00:00:00 2001
From: Pascal Rigaux <pixel@mandriva.com>
Date: Sat, 9 Nov 2002 21:31:59 +0000
Subject: *** empty log message ***

---
 perl_checker.src/.cvsignore      |   8 ++
 perl_checker.src/Makefile        |   7 +-
 perl_checker.src/common.ml       |  12 +++
 perl_checker.src/common.mli      |   6 ++
 perl_checker.src/info.ml         |   7 +-
 perl_checker.src/info.mli        |   2 +
 perl_checker.src/lexer.mll       | 187 ++++++++++++++++++++++++++++-----------
 perl_checker.src/parser.mly      | 126 +++++++++++++++-----------
 perl_checker.src/perl_checker.ml |   4 +-
 9 files changed, 248 insertions(+), 111 deletions(-)
 create mode 100644 perl_checker.src/.cvsignore

(limited to 'perl_checker.src')

diff --git a/perl_checker.src/.cvsignore b/perl_checker.src/.cvsignore
new file mode 100644
index 0000000..fe1e303
--- /dev/null
+++ b/perl_checker.src/.cvsignore
@@ -0,0 +1,8 @@
+.depend
+perl_checker
+perl_checker_debug
+gmon.out
+*.cmi
+*.cmo
+*.cmx
+parser.ml
diff --git a/perl_checker.src/Makefile b/perl_checker.src/Makefile
index 0dc65c2..fcabe3c 100644
--- a/perl_checker.src/Makefile
+++ b/perl_checker.src/Makefile
@@ -12,7 +12,7 @@ YACC_FILES = $(wildcard *.mly)
 TMP_MLFILES = $(YACC_FILES:%.mly=%.ml) $(LEX_FILES:%.mll=%.ml)
 TMP_MLIFILES = $(YACC_FILES:%.mly=%.mli)
 
-ALL_PROGS = perl_checker
+ALL_PROGS = perl_checker_debug perl_checker
 
 PROG_OBJS_WITH_CMI = parser.cmo print.cmo perl_checker.cmo
 PROG_OBJS = common.cmo flags.cmo info.cmo $(LEX_FILES:%.mll=%.cmo) $(PROG_OBJS_WITH_CMI)
@@ -29,11 +29,10 @@ default: .compiling TAGS $(ALL_PROGS)
 
 all: perl_checker
 
-perl_checker: .depend $(PROG_OBJS)
+perl_checker_debug: .depend $(PROG_OBJS)
 	$(CSLC) -custom $(CSLFLAGS) $(LIBDIRS) -o $@ $(CMA_FILES) $(PROG_OBJS)
-	cp -f perl_checker perl_checker_debug
 
-perl_checker_opt: .depend $(PROG_OBJX)
+perl_checker: .depend $(PROG_OBJX)
 	$(CSLOPT) $(CSLOPTFLAGS) $(LIBDIRS) -o $@ $(CMXA_FILES) $(PROG_OBJX)
 
 .compiling:
diff --git a/perl_checker.src/common.ml b/perl_checker.src/common.ml
index 4b33f8f..0f20e7a 100644
--- a/perl_checker.src/common.ml
+++ b/perl_checker.src/common.ml
@@ -653,6 +653,18 @@ let skip_n_char_ beg end_ s =
   String.sub s beg (String.length s - beg - end_)
 let skip_n_char n s = skip_n_char_ n 0 s
 
+let rec index_spaces_from beg s =
+  if s.[beg] = ' ' || s.[beg] = '\t' then beg else index_spaces_from (beg+1) s
+let index_spaces s = index_spaces_from 0 s
+
+let rec index_non_spaces_from beg s =
+  if s.[beg] = ' ' || s.[beg] = '\t' then index_non_spaces_from (beg+1) s else beg
+let index_non_spaces s = index_non_spaces_from 0 s
+
+let rec rindex_non_spaces_from beg s =
+  if s.[beg] = ' ' || s.[beg] = '\t' then rindex_non_spaces_from (beg-1) s else beg
+let rindex_non_spaces s = rindex_non_spaces_from (String.length s - 1) s
+
 let rec explode_string = function
   | "" -> []
   | s -> (String.get s 0) :: explode_string (String.sub s 1 (String.length s - 1))
diff --git a/perl_checker.src/common.mli b/perl_checker.src/common.mli
index 5fe8ade..353a2a3 100644
--- a/perl_checker.src/common.mli
+++ b/perl_checker.src/common.mli
@@ -175,6 +175,12 @@ val chop : string -> string
 val chomps : string -> string
 val skip_n_char_ : int -> int -> string -> string
 val skip_n_char : int -> string -> string
+val index_spaces_from : int -> string -> int
+val index_spaces : string -> int
+val index_non_spaces_from : int -> string -> int
+val index_non_spaces : string -> int
+val rindex_non_spaces_from : int -> string -> int
+val rindex_non_spaces : string -> int
 val explode_string : string -> char list
 val is_uppercase : char -> bool
 val is_lowercase : char -> bool
diff --git a/perl_checker.src/info.ml b/perl_checker.src/info.ml
index 5c002ae..8e27bd3 100644
--- a/perl_checker.src/info.ml
+++ b/perl_checker.src/info.ml
@@ -4,6 +4,7 @@ open Common
 
 let (lines_starts : (string * int list ref) list ref) = ref []
 let current_file_lines_starts = ref []
+let current_file_current_line = ref 0
 let current_file = ref ""
 
 let start_a_new_file file = 
@@ -11,9 +12,13 @@ let start_a_new_file file =
   current_file_lines_starts := [0] ;
   lines_starts := (file, current_file_lines_starts) :: !lines_starts
 
-let pos2line (file, a, b) = 
+let raw_pos2raw_line file a =
   let starts = map_index (fun a b -> a,b) (rev !(assoc file !lines_starts)) in
   let ((offset, line), _) = find (fun (_,(e,_)) -> e > a) (combine starts (tl starts @ [99999, 99999])) in
+  line, offset
+
+let pos2line (file, a, b) = 
+  let line, offset = raw_pos2raw_line file a in
   file, line, a - offset, b - offset
 
 let pos2sfull pos = 
diff --git a/perl_checker.src/info.mli b/perl_checker.src/info.mli
index 5751d2b..ed9455e 100644
--- a/perl_checker.src/info.mli
+++ b/perl_checker.src/info.mli
@@ -1,7 +1,9 @@
 val lines_starts : (string * int list ref) list ref
 val current_file_lines_starts : int list ref
+val current_file_current_line : int ref
 val current_file : string ref
 val start_a_new_file : string -> unit
+val raw_pos2raw_line : string -> int -> int * int
 val pos2line : string * int * int -> string * int * int * int
 val pos2sfull : string * int * int -> string
 val pos2sfull_current : int -> int -> string
diff --git a/perl_checker.src/lexer.mll b/perl_checker.src/lexer.mll
index 73c40bb..49f5102 100644
--- a/perl_checker.src/lexer.mll
+++ b/perl_checker.src/lexer.mll
@@ -7,21 +7,31 @@ open Info
 
 let next_rule = ref None
 
+let add_a_new_line raw_pos = 
+   incr current_file_current_line ;
+   lpush current_file_lines_starts raw_pos
+
 let here_docs = Queue.create()
 let current_here_doc_mark = ref ""
 
+let here_doc_next_line mark interpolate =
+  let here_doc_ref = ref("", bpos) in
+  Queue.push (interpolate, mark, here_doc_ref) here_docs ;
+  here_doc_ref
+
 let delimit_char = ref '/'
 let not_ok_for_match = ref (-1)
 let string_nestness = ref 0
 
 let building_current_string = ref ""
 let current_string_start_pos = ref 0
+let current_string_start_line = ref 0
 
-let ins_with_offset offset t lexbuf = 
-  building_current_string := ""; current_string_start_pos := lexeme_start lexbuf + offset;
+let ins t lexbuf = 
+  building_current_string := ""; 
+  current_string_start_pos := lexeme_start lexbuf;
   t lexbuf ;
   !building_current_string, (!current_file, !current_string_start_pos, lexeme_end lexbuf)
-let ins t lexbuf = ins_with_offset 0 t lexbuf
 let ins_to_string t lexbuf =
   let s, pos = ins t lexbuf in
   not_ok_for_match := lexeme_end lexbuf; 
@@ -33,11 +43,11 @@ let next_s s t lexbuf =
 let next t lexbuf = next_s (lexeme lexbuf) t lexbuf
 
 let pos lexbuf = !current_file, lexeme_start lexbuf, lexeme_end lexbuf
-
 let pos2sfull_with start end_ = Info.pos2sfull (!current_file, start, end_)
-
 let pos2sfull lexbuf = pos2sfull_with (lexeme_start lexbuf) (lexeme_end lexbuf)
 
+let putback lexbuf nb = lexbuf.Lexing.lex_curr_pos <- lexbuf.Lexing.lex_curr_pos - nb
+
 let die lexbuf err = failwith (pos2sfull_with !current_string_start_pos (lexeme_end lexbuf) ^ err)
 
 let rec lexbuf2list t lexbuf =
@@ -82,6 +92,14 @@ let arraylen_fqident_from_lexbuf lexbuf =
   let fq, name = split_at_two_colons (skip_n_char 2 s) in
   ARRAYLEN_IDENT(Some fq, name, pos lexbuf)
 
+let check_multi_line_delimited_string opts (_, start, end_) =
+   let check =
+     match opts with
+     | None -> true
+     | Some s -> not (String.contains s 'x') in
+   if check then
+     if !current_file_current_line <> !current_string_start_line then
+     failwith (pos2sfull_with start end_ ^ "multi-line patterns are not allowed (or use /x modifier)")
 }
 
 let space = [' ' '\t']
@@ -99,10 +117,12 @@ rule token = parse
 | '#' [^ '\n']* { (*COMMENT(lexeme lexbuf, pos lexbuf)*) token lexbuf }
 
 | "\n=" { 
-  let s, pos = ins_with_offset 1 pod_command lexbuf in POD(s, pos) }
+    add_a_new_line(lexeme_end lexbuf - 1);
+    let _ = ins pod_command lexbuf in token lexbuf 
+  }
 
 | '\n' { 
-    lpush current_file_lines_starts (lexeme_end lexbuf);
+    add_a_new_line(lexeme_end lexbuf);
     (try
       let (interpolate, mark, r) = Queue.pop here_docs in
       current_here_doc_mark := mark ;
@@ -126,7 +146,9 @@ rule token = parse
 | "." { CONCAT }
 | "<<" { BIT_SHIFT_LEFT }
 | ">>" { BIT_SHIFT_RIGHT }
-| "<" | ">" | "<=" | ">=" | "lt" | "gt" | "le" | "ge" { COMPARE_OP(lexeme lexbuf) }
+| "<" { LT }
+| ">" { GT }
+| "<=" | ">=" | "lt" | "gt" | "le" | "ge" { COMPARE_OP(lexeme lexbuf) }
 | "==" | "!=" | "<=>" | "eq" | "ne" | "cmp" { EQ_OP(lexeme lexbuf) }
 | "&" { BIT_AND }
 | "|" { BIT_OR }
@@ -139,7 +161,7 @@ rule token = parse
 | ":" { COLON }
 | "::" { PKG_SCOPE }
 
-| "=" | "+=" | "-=" | "*=" | "/=" | ".=" | "|=" | "&=" | "^=" | "||=" | "&&=" { ASSIGN(lexeme lexbuf) }
+| "=" | "+=" | "-=" | "*=" | ".=" | "|=" | "&=" | "^=" | "||=" | "&&=" { ASSIGN(lexeme lexbuf) }
 
 | "," { COMMA }
 | "=>" { RIGHT_ARROW }
@@ -149,6 +171,8 @@ rule token = parse
 | "xor" { XOR }
 
 | "if"       { IF }
+| "else"     { ELSE }
+| "elsif"    { ELSIF }
 | "unless"   { UNLESS }
 | "do"       { DO }
 | "while"    { WHILE }
@@ -159,36 +183,63 @@ rule token = parse
 | "local"    { LOCAL }
 | "continue" { CONTINUE }
 | "sub"      { SUB }
-| "format"   { FORMAT }
 | "package"  { PACKAGE }
 | "use"      { USE }
+| "BEGIN"    { BEGIN }
+| "END"      { END }
 | "print"    { PRINT(pos lexbuf) }
 | "new"      { NEW(pos lexbuf) }
+| "format"   { let _ = here_doc_next_line "." false in FORMAT(pos lexbuf) }
+
+| "split"
+| "grep"  { (* ok_for_match! *) BAREWORD(lexeme lexbuf, pos lexbuf) }
+
+| ident space* "=>" { (* needed so that (if => 1) works *)
+    let s = lexeme lexbuf in
+    let end_ = String.length s - 1 in
+    let ident_end = rindex_non_spaces_from (end_ - 2) s in
+    putback lexbuf (end_ - ident_end); 
+    BAREWORD(String.sub s 0 (ident_end+1), pos lexbuf)
+  }
+
+| "{" ident "}" { (* needed so that $h{if} works *)
+    not_ok_for_match := lexeme_end lexbuf;
+    COMPACT_HASH_SUBSCRIPT(lexeme lexbuf, pos lexbuf)
+  }
 
 | '@' { AT }
 | '$' { DOLLAR }
-| '%' { PERCENT }
-| '&' { AMPERSAND }
-| '*' { STAR }
-| "$#" { ARRAYLEN }
+| '$' '#' { ARRAYLEN }
+| '%' ['$' '{'] { putback lexbuf 1; PERCENT }
+| '&' ['$' '{'] { putback lexbuf 1; AMPERSAND }
+| '*' ['$' '{'] { putback lexbuf 1; if lexeme_start lexbuf = !not_ok_for_match then MULT else STAR }
 
 
 | ';' { SEMI_COLON }
 | '(' { PAREN }
 | '{' { BRACKET }
+| "+{"{ BRACKET_HASHREF }
 | '[' { ARRAYREF }
 | ')' { not_ok_for_match := lexeme_end lexbuf; PAREN_END }
 | '}' { not_ok_for_match := lexeme_end lexbuf; BRACKET_END }
 | ']' { not_ok_for_match := lexeme_end lexbuf; ARRAYREF_END }
 
-| '(' [ '$' '@' '\\' '&' ';' ]+ ')' {
-    (* bloody prototypes, must be caught especially otherwise "($)" is badly tokenized *)
-    PROTOTYPE(lexeme lexbuf, pos lexbuf)
-  }
-
 | "/" {
     if lexeme_start lexbuf = !not_ok_for_match then DIVISION 
     else (
+      delimit_char := '/' ;
+      current_string_start_line := !current_file_current_line;
+      let s, pos = ins delimited_string lexbuf in
+      let opts, _ = ins pattern_options lexbuf in
+      check_multi_line_delimited_string (Some opts) pos ;
+      PATTERN(s, opts, pos)
+    ) 
+  }
+
+| "/=" {
+    if lexeme_start lexbuf = !not_ok_for_match then ASSIGN(lexeme lexbuf)
+    else (
+      putback lexbuf 1 ;
       delimit_char := '/' ;
       let s, pos = ins delimited_string lexbuf in
       let opts, _ = ins pattern_options lexbuf in
@@ -198,44 +249,51 @@ rule token = parse
 
 | "m" pattern_separator {
   delimit_char := lexeme_char lexbuf 1 ;
+  current_string_start_line := !current_file_current_line;
   let s, pos = ins delimited_string lexbuf in
   let opts, _ = ins pattern_options lexbuf in
+  check_multi_line_delimited_string (Some opts) pos ;
   PATTERN(s, opts, pos)
 }
 
 | "qr" pattern_separator {
   delimit_char := lexeme_char lexbuf 2 ;
+  current_string_start_line := !current_file_current_line;
   let s, pos = ins delimited_string lexbuf in
   let opts, _ = ins pattern_options lexbuf in
+  check_multi_line_delimited_string (Some opts) pos ;
   PATTERN(s, opts, pos)
 }
 
 | "s" pattern_separator {
   delimit_char := lexeme_char lexbuf 1 ;
+  current_string_start_line := !current_file_current_line;
   let s1, (_, start, _) = ins delimited_string lexbuf in 
   let s2, (_, _, end_)  = ins delimited_string lexbuf in 
   let opts, _ = ins pattern_options lexbuf in
-  PATTERN_SUBST(s1, s2, opts, (!current_file, start, end_))
+  let pos = !current_file, start, end_ in
+  check_multi_line_delimited_string (Some opts) pos ;
+  PATTERN_SUBST(s1, s2, opts, pos)
 }
 
 | "tr" pattern_separator {
   delimit_char := lexeme_char lexbuf 2 ;
+  current_string_start_line := !current_file_current_line;
   let s1, (_, start, _) = ins delimited_string lexbuf in 
   let s2, (_, _, end_)  = ins delimited_string lexbuf in 
   let opts, _ = ins pattern_options lexbuf in
-  PATTERN_SUBST(s1, s2, opts, (!current_file, start, end_))
+  let pos = !current_file, start, end_ in
+  check_multi_line_delimited_string None pos ;
+  PATTERN_SUBST(s1, s2, opts, pos)
 }
     
 | "<<" ident { 
-    let here_doc_ref = ref("", bpos) in
-    Queue.push (true, skip_n_char 2 (lexeme lexbuf), here_doc_ref) here_docs ;
-    HERE_DOC here_doc_ref
+    not_ok_for_match := lexeme_end lexbuf; 
+    HERE_DOC(here_doc_next_line (skip_n_char 2 (lexeme lexbuf)) true)
   }
 | "<<'" ident "'" { 
     not_ok_for_match := lexeme_end lexbuf; 
-    let here_doc_ref = ref("", bpos) in
-    Queue.push (false, skip_n_char_ 3 1 (lexeme lexbuf), here_doc_ref) here_docs ;
-    HERE_DOC here_doc_ref
+    HERE_DOC(here_doc_next_line (skip_n_char_ 3 1 (lexeme lexbuf)) false)
   }
 | "<<" space+ "'"
 | "<<" space+ ident { 
@@ -248,20 +306,41 @@ rule token = parse
 | "\\" stash
 | "\\" ['0'-'9' 'A'-'Z' 'a'-'z']
 | "\\" space* '('
-    { lexbuf.Lexing.lex_curr_pos <- lexbuf.Lexing.lex_curr_pos - 1; REF }
+    { putback lexbuf 1; REF }
+
+| "sub" space+ ident space* '(' [ '$' '@' '\\' '&' ';' ]* ')' {
+    (* bloody prototypes, must be caught especially otherwise "($)" is badly tokenized *)
+    (* and alas "($@)" is both valid as an expression and a prototype *)
+    let s = lexeme lexbuf in
+    let ident_start = index_non_spaces_from 3 s in
+      
+    let proto_start = String.index_from s ident_start '(' in
+    let ident_end = rindex_non_spaces_from (proto_start-1) s in
+    let ident = String.sub s ident_start (ident_end - ident_start + 1) in
+    let prototype = skip_n_char_ (proto_start + 1) 1 s in
+
+    FUNC_DECL_WITH_PROTO(ident, prototype, pos lexbuf)
+  }
 
 | "$#" ident? ("::" ident)+ { arraylen_fqident_from_lexbuf lexbuf }
 | "$#" ident { arraylen_ident_from_lexbuf lexbuf }
 
 | stash ident? ("::" ident)+ { typed_fqident_from_lexbuf lexbuf }
 | stash ident
-| stash '^'? [^ '{' ' ' '\t' '\n'] { typed_ident_from_lexbuf lexbuf }
+| '$' [^ '{' ' ' '\t' '\n' '$']
+| "$^" [^ '{' ' ' '\t' '\n'] { typed_ident_from_lexbuf lexbuf }
+
+| "$$" [^ 'a'-'z' 'A'-'Z' '_' '{'] { putback lexbuf 1; SCALAR_IDENT(None, "$$", pos lexbuf) }
+
+| stash "::" { putback lexbuf 2; ident_type_from_char None "main" lexbuf (lexeme_char lexbuf 0) }
 
 | ident? ("::" ident)+ { ident_from_lexbuf lexbuf }
-| ident { BAREWORD(lexeme lexbuf, pos lexbuf) }
+| ident { not_ok_for_match := lexeme_end lexbuf; BAREWORD(lexeme lexbuf, pos lexbuf) }
 
 | ident ":" { LABEL(lexeme lexbuf, pos lexbuf) }
 
+| '-' [ 'a'-'z' 'A'-'Z' ] [ ' ' '\t' '(' ] { putback lexbuf 1; BAREWORD(lexeme lexbuf, pos lexbuf) }
+
 | ['0'-'9'] ['0'-'9' '_']* ('.' ['0'-'9'] ['0'-'9' '_']*)+ 
 | 'v' ['0'-'9'] ['0'-'9' '_']* ('.' ['0'-'9'] ['0'-'9' '_']*)* 
   { 
@@ -269,14 +348,8 @@ rule token = parse
     REVISION(lexeme lexbuf, pos lexbuf)
   }
 
-| ['0'-'9']* '.' ['0'-'9']+ (['e' 'E']['-' '+']?['0'-'9']+)? { 
-    not_ok_for_match := lexeme_end lexbuf; 
-    NUM(lexeme lexbuf, pos lexbuf) 
-  }
-| ['0'-'9'] ['0'-'9' '_']* { 
-    not_ok_for_match := lexeme_end lexbuf; 
-    NUM(lexeme lexbuf, pos lexbuf)
-  }
+| ['0'-'9']* '.' ['0'-'9']+ (['e' 'E']['-' '+']?['0'-'9']+)? 
+| ['0'-'9'] ['0'-'9' '_']*  (['e' 'E']['-' '+']?['0'-'9']+)?
 | "0x" ['0'-'9' 'a'-'f' 'A'-'F']+ { 
     not_ok_for_match := lexeme_end lexbuf; 
     NUM(lexeme lexbuf, pos lexbuf)
@@ -285,8 +358,11 @@ rule token = parse
 | '"'   { ins_to_string string lexbuf }
 | "'"   { ins_to_string rawstring lexbuf }
 | '`'   { delimit_char := '`'; 
+	  current_string_start_line := !current_file_current_line;
 	  not_ok_for_match := lexeme_end lexbuf; 
-	  let s, pos = ins delimited_string lexbuf in COMMAND_STRING(s, pos) }
+	  let s, pos = ins delimited_string lexbuf in
+	  check_multi_line_delimited_string None pos ;
+	  COMMAND_STRING(s, pos) }
 | "q("  { ins_to_string qstring lexbuf }
 | "qq(" { ins_to_string qqstring lexbuf }
 | "qw(" { let s, pos = ins qstring lexbuf in QUOTEWORDS(s, pos) }
@@ -298,7 +374,7 @@ and string = parse
   '"' { () }
 | '\\' { next_rule := Some string ; string_escape lexbuf }
 | '\n' { 
-    lpush current_file_lines_starts (lexeme_end lexbuf);
+    add_a_new_line(lexeme_end lexbuf);
     next string lexbuf
   }
 | [^ '\n' '\\' '"']+ { next string lexbuf }
@@ -307,7 +383,7 @@ and string = parse
 and delimited_string = parse
 | '\\' { next_rule := Some delimited_string ; string_escape lexbuf }
 | '\n' { 
-    lpush current_file_lines_starts (lexeme_end lexbuf);
+    add_a_new_line(lexeme_end lexbuf);
     next delimited_string lexbuf
   }
 | eof { die lexbuf "Unterminated_delimited_string" }
@@ -316,11 +392,12 @@ and delimited_string = parse
 and rawstring = parse
   ''' { () }
 | '\n' { 
-    lpush current_file_lines_starts (lexeme_end lexbuf);
+    add_a_new_line(lexeme_end lexbuf);
     next rawstring lexbuf
   }
-| "\\'"
-| [^ '\n' ''']+ { next rawstring lexbuf }
+| '\\' { next rawstring lexbuf }
+| "\\'" { next_s "'" rawstring lexbuf }
+| [^ '\n' ''' '\\']+ { next rawstring lexbuf }
 | eof { die lexbuf "Unterminated_rawstring" }
 
 and qqstring = parse
@@ -333,7 +410,7 @@ and qqstring = parse
   }
 | '\\' { next_rule := Some qqstring ; string_escape lexbuf }
 | '\n' { 
-    lpush current_file_lines_starts (lexeme_end lexbuf);
+    add_a_new_line(lexeme_end lexbuf);
     next qqstring lexbuf
   }
 | [^ '\n' '(' ')' '\\']+ { next qqstring lexbuf }
@@ -348,7 +425,7 @@ and qstring = parse
     next qstring lexbuf
   }
 | '\n' { 
-    lpush current_file_lines_starts (lexeme_end lexbuf);
+    add_a_new_line(lexeme_end lexbuf);
     next qstring lexbuf
   }
 | [^ '\n' '(' ')']+ { next qstring lexbuf }
@@ -363,7 +440,7 @@ and here_doc = parse
     else if s <> !current_here_doc_mark then Printf.eprintf "%sTrailing spaces after HERE-document mark\n" (pos2sfull lexbuf)
   }
 | '\n' { 
-    lpush current_file_lines_starts (lexeme_end lexbuf);
+    add_a_new_line(lexeme_end lexbuf);
     next here_doc lexbuf
   }
 | eof { die lexbuf "Unterminated_here_doc" }
@@ -376,7 +453,7 @@ and raw_here_doc = parse
     else if s <> !current_here_doc_mark then Printf.eprintf "%sTrailing spaces after HERE-document mark\n" (pos2sfull lexbuf)
   }
 | '\n' { 
-    lpush current_file_lines_starts (lexeme_end lexbuf);
+    add_a_new_line(lexeme_end lexbuf);
     next raw_here_doc lexbuf
   }
 | eof { die lexbuf "Unterminated_raw_here_doc" }
@@ -397,8 +474,8 @@ and string_escape = parse
 | _ { next_s ("\\" ^ lexeme lexbuf) (some !next_rule) lexbuf }
 
 and pattern_options = parse
-| [ 'g' 'i' 'm' 'o' 's' 'x' 'e' ] { next pattern_options lexbuf }
-| _ { lexbuf.Lexing.lex_curr_pos <- lexbuf.Lexing.lex_curr_pos - 1; () }
+| [ 'g' 'i' 'm' 'o' 's' 'x' 'e' 'd' ] { next pattern_options lexbuf }
+| _ { putback lexbuf 1; () }
 
 and pod_command = parse
 | [^ '\n' ]+ {
@@ -416,8 +493,14 @@ and pod_command = parse
 | _ { failwith(pos2sfull lexbuf ^ "POD command expected") }
 
 and pod = parse
-| "\n=" { next pod_command lexbuf }
+| "\n=" { 
+    add_a_new_line(lexeme_end lexbuf - 1);
+    next pod_command lexbuf 
+  }
 | "\n" [^ '=' '\n'] [^ '\n']*
-| "\n" { next pod lexbuf }
+| "\n" { 
+    add_a_new_line(lexeme_end lexbuf);
+    next pod lexbuf 
+  }
 | eof
 | _ { failwith(pos2sfull_with !current_string_start_pos (lexeme_end lexbuf) ^ "POD block still open") }
diff --git a/perl_checker.src/parser.mly b/perl_checker.src/parser.mly
index 0f5f34b..37ec3ce 100644
--- a/perl_checker.src/parser.mly
+++ b/perl_checker.src/parser.mly
@@ -2,8 +2,8 @@
   open Types
   open Common
 
-  let parse_error _ =
-   failwith (Info.pos2sfull_current (Parsing.symbol_start()) (Parsing.symbol_end()) ^ "parse error")   
+  let parse_error msg =
+   failwith (Info.pos2sfull_current (Parsing.symbol_start()) (Parsing.symbol_end()) ^ msg)
 
   let to_Ident = function
    | BAREWORD(name, pos) -> Ident(I_raw, None, name, pos)
@@ -19,22 +19,23 @@
 
 %token EOF
 %token <Types.pos> SPACE
-%token <string * Types.pos> NUM STRING BAREWORD PROTOTYPE REVISION COMMENT POD LABEL
-%token <string * Types.pos> COMMAND_STRING QUOTEWORDS
+%token <string * Types.pos> NUM STRING BAREWORD REVISION COMMENT POD LABEL
+%token <string * Types.pos> COMMAND_STRING QUOTEWORDS COMPACT_HASH_SUBSCRIPT
 %token <(string * Types.pos) ref> HERE_DOC
 %token <string * string * Types.pos> PATTERN
 %token <string * string * string * Types.pos> PATTERN_SUBST
 
 %token <string option * string * Types.pos> SCALAR_IDENT ARRAY_IDENT HASH_IDENT FUNC_IDENT STAR_IDENT RAW_IDENT ARRAYLEN_IDENT
+%token <string * string * Types.pos> FUNC_DECL_WITH_PROTO
 
 %token IF ELSIF ELSE UNLESS DO WHILE UNTIL MY CONTINUE SUB LOCAL
 %token <string> FOR
-%token USE PACKAGE FORMAT
-%token <Types.pos> PRINT NEW
+%token USE PACKAGE BEGIN END
+%token <Types.pos> PRINT NEW FORMAT
 %token AT DOLLAR PERCENT AMPERSAND STAR ARRAYLEN
 %token SEMI_COLON PKG_SCOPE
 %token PAREN PAREN_END
-%token BRACKET BRACKET_END
+%token BRACKET BRACKET_END BRACKET_HASHREF
 %token ARRAYREF ARRAYREF_END
 
 
@@ -46,6 +47,7 @@
 %token MULT DIVISION MODULO REPLICATE
 %token PLUS MINUS CONCAT
 %token BIT_SHIFT_LEFT BIT_SHIFT_RIGHT
+%token LT GT
 %token <string> COMPARE_OP EQ_OP
 %token BIT_AND
 %token BIT_OR BIT_XOR
@@ -76,7 +78,7 @@
 %left       BIT_OR BIT_XOR
 %left       BIT_AND
 %nonassoc   EQ_OP
-%nonassoc   COMPARE_OP
+%nonassoc   LT GT COMPARE_OP
 %nonassoc   UNIOP
 %left       BIT_SHIFT_LEFT BIT_SHIFT_RIGHT
 %left       PLUS MINUS CONCAT
@@ -104,7 +106,9 @@ block: BRACKET lineseq BRACKET_END { $2 }
 lineseq: /* A collection of "lines" in the program */
 |              {[]}
 | decl lineseq {[]}
-| label line {[]}
+| line {[]}
+| LABEL lineseq {[]}
+| line {[]}
 
 line:
 | if_then_else lineseq { [] }
@@ -133,7 +137,7 @@ loop:
 | FOR MY SCALAR_IDENT PAREN expr PAREN_END block cont {[]}
 | FOR SCALAR_IDENT PAREN expr PAREN_END block cont {[]}
 | FOR PAREN expr PAREN_END block cont {[]}
-| FOR PAREN expr_or_empty ';' expr_or_empty ';' expr_or_empty PAREN_END block {[]}
+| FOR PAREN expr_or_empty SEMI_COLON expr_or_empty SEMI_COLON expr_or_empty PAREN_END block {[]}
 | block cont {[]} /* a block is a loop that happens once */
 
 cont: /* Continue blocks */
@@ -150,9 +154,12 @@ sideff: /* An expression which may have a side-effect */
 | expr  FOR    expr { [ (*Binary($2,        $1, $3)*) ] }
 
 decl:
-| FORMAT formname block {[]}
-| SUB word prototype_or_empty subbody {[]}
+| FORMAT bareword_or_empty ASSIGN {[]}
+| SUB word subbody {[]}
+| FUNC_DECL_WITH_PROTO subbody {[]}
 | PACKAGE word SEMI_COLON {[]}
+| BEGIN block {[]}
+| END block {[]}
 | USE word_or_empty revision_or_empty listexpr SEMI_COLON {[]}
 
 formname: {[]} | BAREWORD {[]}
@@ -184,11 +191,7 @@ term:
 | LOCAL term    %prec UNIOP {[]}
 | PAREN expr_or_empty PAREN_END {[]}
 
-| scalar        %prec PAREN {[]}
-| star          %prec PAREN {[]}
-| hash          %prec PAREN {[]}
-| array         %prec PAREN {[]}
-| arraylen      %prec PAREN {[]} /* $#x, $#{ something } */
+| variable {[]}
 
 | subscripted {[]}
 
@@ -199,15 +202,9 @@ term:
 | function_call {[]}
 
 | word {[]}
-| NUM {[]}
-| STRING {[]}
-| REVISION {[]}
-| COMMAND_STRING {[]}
-| QUOTEWORDS {[]}
-| HERE_DOC {[]}
+| value {[]}
 
 function_call:
-| func {[]} /* &foo; */
 | func PAREN expr_or_empty PAREN_END {[]} /* &foo(@args) */
 | word argexpr {[]} /* foo(@args) */
 | word block listexpr %prec LSTOP {[]} /* map { foo } @bar */
@@ -216,6 +213,7 @@ function_call:
 | term ARROW word_or_scalar {[]} /* $foo->bar */
 
 | NEW word listexpr {[]} /* new Class @args */
+| PRINT argexpr {[]} /* print $fh @args */
 | PRINT word_or_scalar argexpr {[]} /* print $fh @args */
 
 termdo: /* Things called with "do" */
@@ -240,15 +238,25 @@ myterm: /* Things that can be "my"'d */
 | array {[]}
 
 subscripted: /* Some kind of subscripted expression */
-| star PKG_SCOPE BRACKET expr BRACKET_END {[]} /* *main::{something} */
-| scalar ARRAYREF expr ARRAYREF_END         {[]} /* $array[$element] */
-| scalar BRACKET expr BRACKET_END           {[]} /* $foo{bar} */
-| term ARROW ARRAYREF expr ARRAYREF_END     {[]} /* somearef->[$element] */
-| term ARROW BRACKET expr BRACKET_END       {[]} /* somehref->{bar} */
-| term ARROW PAREN expr_or_empty PAREN_END  {[]} /* $subref->(@args) */
-| subscripted ARRAYREF expr ARRAYREF_END    {[]} /* $foo->[$bar][$baz] */
-| subscripted BRACKET expr BRACKET_END      {[]} /* $foo->[bar]{baz;} */
-| subscripted PAREN expr_or_empty PAREN_END {[]} /* $foo->{bar}(@args) */
+| variable PKG_SCOPE bracket_subscript        {[]} /* *main::{something} */
+| scalar bracket_subscript                    {[]} /* $foo{bar} */
+| scalar ARRAYREF expr ARRAYREF_END           {[]} /* $array[$element] */
+| term ARROW bracket_subscript                {[]} /* somehref->{bar} */
+| term ARROW ARRAYREF expr ARRAYREF_END       {[]} /* somearef->[$element] */
+| term ARROW PAREN expr_or_empty PAREN_END    {[]} /* $subref->(@args) */
+| subscripted bracket_subscript               {[]} /* $foo->[bar]{baz;} */
+| subscripted ARRAYREF expr ARRAYREF_END      {[]} /* $foo->[$bar][$baz] */
+| subscripted PAREN expr_or_empty PAREN_END   {[]} /* $foo->{bar}(@args) */
+
+bracket_subscript:
+| BRACKET expr BRACKET_END {[]}
+| COMPACT_HASH_SUBSCRIPT {[]}
+
+anonymous: /* Constructors for anonymous data */
+| ARRAYREF expr_or_empty ARRAYREF_END {[]}
+| BRACKET expr_or_empty BRACKET_END %prec PAREN {[]} /* { foo => "Bar" } */
+| BRACKET_HASHREF expr_or_empty BRACKET_END %prec PAREN {[]} /* { foo => "Bar" } */
+| SUB block %prec PAREN {[]}
 
 binop:
 | ASSIGN {[]}
@@ -256,7 +264,7 @@ binop:
 | MULT {[]} | DIVISION {[]} | MODULO {[]} | REPLICATE {[]}
 | PLUS {[]} | MINUS {[]} | CONCAT {[]}
 | BIT_SHIFT_LEFT {[]} | BIT_SHIFT_RIGHT {[]}
-| COMPARE_OP {[]}
+| LT {[]} | GT {[]} | COMPARE_OP {[]}
 | EQ_OP {[]}
 | BIT_AND {[]}
 | BIT_OR {[]} | BIT_XOR {[]}
@@ -265,14 +273,25 @@ binop:
 | OR_TIGHT {[]} | XOR {[]}
 | PATTERN_MATCH {[]} | PATTERN_MATCH_NOT {[]}
 
-anonymous: /* Constructors for anonymous data */
-| ARRAYREF expr_or_empty ARRAYREF_END {[]}
-| BRACKET expr_or_empty BRACKET_END %prec PAREN {[]} /* { foo => "Bar" } */
-| SUB prototype_or_empty block %prec PAREN {[]}
-
-label:
-|                 { None }
-| BAREWORD COLON { Some $1 }
+value:
+| NUM {[]}
+| STRING {[]}
+| REVISION {[]}
+| COMMAND_STRING {[]}
+| QUOTEWORDS {[]}
+| HERE_DOC {[]}
+| PATTERN {[]}
+| PATTERN_SUBST {[]}
+| LT GT {[]}
+| LT term GT {[]}
+
+variable:
+| scalar   %prec PAREN {[]}
+| star     %prec PAREN {[]}
+| hash     %prec PAREN {[]}
+| array    %prec PAREN {[]}
+| arraylen %prec PAREN {[]} /* $#x, $#{ something } */
+| func     %prec PAREN {[]} /* &foo; */
 
 word:
 | bareword { fst $1 }
@@ -285,23 +304,26 @@ word:
 comma: COMMA {[]} | RIGHT_ARROW {[]}
 
 word_or_scalar:
-| bareword     { [] }
-| RAW_IDENT    { [] }
-| SCALAR_IDENT { [] }
+| bareword  { [] }
+| RAW_IDENT { [] }
+| scalar    { [] }
+
+block_or_scalar: block {[]} | scalar {[]}
 
 bareword:
 | NEW { "new", $1 }
 | PRINT { "print", $1 }
+| FORMAT { "format", $1 }
 | BAREWORD { $1 }
 
-arraylen: ARRAYLEN_IDENT {[]} | ARRAYLEN block {[]}
-scalar: SCALAR_IDENT {[]} | DOLLAR    block {[]}
-func:   FUNC_IDENT   {[]} | AMPERSAND block {[]}
-array:  ARRAY_IDENT  {[]} | AT        block {[]}
-hash:   HASH_IDENT   {[]} | PERCENT   block {[]}
-star:   STAR_IDENT   {[]} | STAR      block {[]}
+arraylen: ARRAYLEN_IDENT {[]} | ARRAYLEN block_or_scalar {[]}
+scalar: SCALAR_IDENT {[]} | DOLLAR    block_or_scalar {[]}
+func:   FUNC_IDENT   {[]} | AMPERSAND block_or_scalar {[]}
+array:  ARRAY_IDENT  {[]} | AT        block_or_scalar {[]}
+hash:   HASH_IDENT   {[]} | PERCENT   block_or_scalar {[]}
+star:   STAR_IDENT   {[]} | STAR      block_or_scalar {[]}
 
 expr_or_empty: {[]} | expr {[]}
 word_or_empty: {[]} | word {[]}
-prototype_or_empty: {[]} | PROTOTYPE {[]}
+bareword_or_empty: {[]} | bareword {[]}
 revision_or_empty: {[]} | REVISION {[]}
diff --git a/perl_checker.src/perl_checker.ml b/perl_checker.src/perl_checker.ml
index dd7b75e..cc5bb19 100644
--- a/perl_checker.src/perl_checker.ml
+++ b/perl_checker.src/perl_checker.ml
@@ -7,8 +7,8 @@ let _ =
     try
       Info.start_a_new_file file ;
       if false then
-	let tokens = Lexer.lexbuf2list Lexer.token lexbuf in
-	let _,_ = tokens, tokens in ""
+	let t = Lexer.lexbuf2list Lexer.token lexbuf in
+	let _,_ = t, t in ""
       else
 	Parser.prog Lexer.token lexbuf
     with Failure s -> (
-- 
cgit v1.2.1