10 files changed, 803 insertions, 443 deletions
diff --git a/perl_checker.src/Makefile b/perl_checker.src/Makefile
index 1c73153..6318ac5 100644
--- a/perl_checker.src/Makefile
+++ b/perl_checker.src/Makefile
@@ -13,9 +13,9 @@ TMP_FILES =  $(YACC_FILES:%.mly=%.mli) $(YACC_FILES:%.mly=%.output) $(YACC_FILES
 
 ALL_PROGS = perl_checker_debug perl_checker
 
-PROG_OBJS_WITH_CMI = parser.cmo print.cmo perl_checker.cmo
+PROG_OBJS_WITH_CMI = parser_helper.cmo parser.cmo print.cmo perl_checker.cmo
 PROG_OBJS = common.cmo flags.cmo info.cmo $(LEX_FILES:%.mll=%.cmo) $(PROG_OBJS_WITH_CMI)
-CMA_FILES = 
+CMA_FILES = unix.cma
 
 PROG_OBJX_WITH_CMI = $(PROG_OBJS_WITH_CMI:%.cmo=%.cmx)
 PROG_OBJX = $(PROG_OBJS:%.cmo=%.cmx)
@@ -48,7 +48,7 @@ perl_checker: .depend $(PROG_OBJX)
 
 $(PROG_OBJS_WITH_CMI): %.cmo: %.cmi
 
-$(PROG_OBJX_WITH_CMI): %.cmx: %.cmi
+$(PROG_OBJX_WITH_CMI): %.cmx: %.cmi 
 
 %.cmo: %.ml
 	$(CSLC) $(CSLFLAGS) -c $<
@@ -74,9 +74,11 @@ depend: .depend
 	$(CSLDEP) $(INCLUDES) *.mli *.mll *.ml > .depend
 
 # missing dependencies:
-perl_checker.cmo: lexer.cmi
-perl_checker.cmx: lexer.cmi
-lexer.cmx: common.cmi
-lexer.cmo: common.cmi
+perl_checker.cmo: lexer.cmi parser.cmi
+perl_checker.cmx: lexer.cmi parser.cmi
+lexer.cmx: common.cmi parser.cmi
+lexer.cmo: common.cmi parser.cmi
+parser.cmo: parser_helper.cmi
+parser.cmx: parser_helper.cmi
 
 -include .depend
diff --git a/perl_checker.src/common.ml b/perl_checker.src/common.ml
index 0f20e7a..f600e01 100644
--- a/perl_checker.src/common.ml
+++ b/perl_checker.src/common.ml
@@ -8,18 +8,6 @@ exception GraphSort_circular_deps
 type ('a, 'b) either = Left of 'a | Right of 'b
 type ('a, 'b) or_option = Or_some of 'a | Or_error of 'b
 
-let bpos = "",-1,-1
-
-let norm (a,b) = if a = -1 then b else a
-let unipos (s,a,b) (s2,c,d) = 
-  if (a,b) = (-1,-1) then s2,c,d else
-  if (c,d) = (-1,-1) then s,a,b else
-  if s <> s2 then bpos else s, min (norm(a,c)) (norm(c,a)), max (norm(b,d)) (norm(d,b))
-
-let uniposl l = match l with
-  | [] -> bpos
-  | e::l -> fold_left unipos e l
-
 (**********************************************************************************)
 
 let internal_error s = failwith ("internal error: " ^ s)
@@ -267,6 +255,9 @@ let rec l_option2option_l = function
   | Some e :: l -> map_option (fun l -> e :: l) (l_option2option_l l)
 let map_option_env f (e, env) = map_option f e, env
 
+let t2_to_list (a,b) = [ a ; b ]
+let t3_to_list (a,b,c) = [ a ; b ; c ]
+
 let if_some bool val_ = if bool then Some val_ else None
 
 let rec fold_left_option f val_ = function
@@ -653,17 +644,13 @@ let skip_n_char_ beg end_ s =
   String.sub s beg (String.length s - beg - end_)
 let skip_n_char n s = skip_n_char_ n 0 s
 
-let rec index_spaces_from beg s =
-  if s.[beg] = ' ' || s.[beg] = '\t' then beg else index_spaces_from (beg+1) s
-let index_spaces s = index_spaces_from 0 s
-
-let rec index_non_spaces_from beg s =
-  if s.[beg] = ' ' || s.[beg] = '\t' then index_non_spaces_from (beg+1) s else beg
-let index_non_spaces s = index_non_spaces_from 0 s
+let rec non_index_from s beg c =
+  if s.[beg] = c then non_index_from s (beg+1) c else beg
+let non_index s c = non_index_from s 0 c
 
-let rec rindex_non_spaces_from beg s =
-  if s.[beg] = ' ' || s.[beg] = '\t' then rindex_non_spaces_from (beg-1) s else beg
-let rindex_non_spaces s = rindex_non_spaces_from (String.length s - 1) s
+let rec non_rindex_from s beg c =
+  if s.[beg] = c then non_rindex_from s (beg-1) c else beg
+let non_rindex s c = non_rindex_from s (String.length s - 1) c
 
 let rec explode_string = function
   | "" -> []
diff --git a/perl_checker.src/common.mli b/perl_checker.src/common.mli
index 353a2a3..ccf6587 100644
--- a/perl_checker.src/common.mli
+++ b/perl_checker.src/common.mli
@@ -3,10 +3,6 @@ exception Not_comparable
 exception GraphSort_circular_deps
 type ('a, 'b) either = Left of 'a | Right of 'b
 and ('a, 'b) or_option = Or_some of 'a | Or_error of 'b
-val bpos : string * int * int
-val norm : int * int -> int
-val unipos : string * int * int -> string * int * int -> string * int * int
-val uniposl : (string * int * int) list -> string * int * int
 val internal_error : string -> 'a
 val id : 'a -> 'a
 val double : 'a -> 'a * 'a
@@ -65,6 +61,8 @@ val map_optionoption : ('a -> 'b option) -> 'a option -> 'b option
 val t2_option2option_t2 : 'a option * 'b option -> ('a * 'b) option
 val l_option2option_l : 'a option list -> 'a list option
 val map_option_env : ('a -> 'b) -> 'a option * 'c -> 'b option * 'c
+val t2_to_list : 'a * 'a -> 'a list
+val t3_to_list : 'a * 'a * 'a -> 'a list
 val if_some : bool -> 'a -> 'a option
 val fold_left_option : ('a -> 'b -> 'a option) -> 'a -> 'b list -> 'a option
 val collect_some_withenv :
@@ -175,12 +173,10 @@ val chop : string -> string
 val chomps : string -> string
 val skip_n_char_ : int -> int -> string -> string
 val skip_n_char : int -> string -> string
-val index_spaces_from : int -> string -> int
-val index_spaces : string -> int
-val index_non_spaces_from : int -> string -> int
-val index_non_spaces : string -> int
-val rindex_non_spaces_from : int -> string -> int
-val rindex_non_spaces : string -> int
+val non_index_from : string -> int -> char -> int
+val non_index : string -> char -> int
+val non_rindex_from : string -> int -> char -> int
+val non_rindex : string -> char -> int
 val explode_string : string -> char list
 val is_uppercase : char -> bool
 val is_lowercase : char -> bool
diff --git a/perl_checker.src/info.ml b/perl_checker.src/info.ml
index 8e27bd3..3d68439 100644
--- a/perl_checker.src/info.ml
+++ b/perl_checker.src/info.ml
@@ -14,17 +14,19 @@ let start_a_new_file file =
 
 let raw_pos2raw_line file a =
   let starts = map_index (fun a b -> a,b) (rev !(assoc file !lines_starts)) in
-  let ((offset, line), _) = find (fun (_,(e,_)) -> e > a) (combine starts (tl starts @ [99999, 99999])) in
+  let ((offset, line), _) = find (fun (_,(e,_)) -> e > a) (combine starts (tl starts @ [999999999, 999999999])) in
   line, offset
 
 let pos2line (file, a, b) = 
   let line, offset = raw_pos2raw_line file a in
-  file, line, a - offset, b - offset
+  file, line, a - offset + 1, b - offset + 1
+
+let pos2s (file, a, b) = sprintf "(%s, %d, %d)" file a b
 
 let pos2sfull pos = 
   try
     let (file, line, n1,n2) = pos2line pos in
     sprintf "File \"%s\", line %d, character %d-%d\n" file (line + 1) n1 n2
-  with Not_found -> ""
+  with Not_found -> failwith ("bad position " ^ pos2s pos)
 
 let pos2sfull_current a b = pos2sfull (!current_file, a, b)
diff --git a/perl_checker.src/lexer.mll b/perl_checker.src/lexer.mll
index 6abbbc2..8450aa9 100644
--- a/perl_checker.src/lexer.mll
+++ b/perl_checker.src/lexer.mll
@@ -1,12 +1,202 @@
 { (* -*- caml -*- *)
-open Parser
 open Common
+open Types
 open Lexing
 open Info
 
+type raw_token =
+  | EOF of raw_pos
+  | SPACE of int
+  | CR
+  | NUM of (string * raw_pos)
+  | STRING of (string * raw_pos)
+  | BAREWORD of (string * raw_pos)
+  | REVISION of (string * raw_pos)
+  | COMMENT of (string * raw_pos)
+  | POD of (string * raw_pos)
+  | LABEL of (string * raw_pos)
+  | COMMAND_STRING of (string * raw_pos)
+  | PRINT_TO_STAR of (string * raw_pos)
+  | PRINT_TO_SCALAR of (string * raw_pos)
+  | QUOTEWORDS of (string * raw_pos)
+  | COMPACT_HASH_SUBSCRIPT of (string * raw_pos)
+  | HERE_DOC of ((string * raw_pos) ref * raw_pos)
+  | PATTERN of (string * string * raw_pos)
+  | PATTERN_SUBST of (string * string * string * raw_pos)
+  | SCALAR_IDENT of (string option * string * raw_pos)
+  | ARRAY_IDENT of (string option * string * raw_pos)
+  | HASH_IDENT of (string option * string * raw_pos)
+  | FUNC_IDENT of (string option * string * raw_pos)
+  | STAR_IDENT of (string option * string * raw_pos)
+  | RAW_IDENT of (string option * string * raw_pos)
+  | ARRAYLEN_IDENT of (string option * string * raw_pos)
+  | FUNC_DECL_WITH_PROTO of (string * string * raw_pos)
+
+  | IF of raw_pos | ELSIF of raw_pos | ELSE of raw_pos | UNLESS of raw_pos | DO of raw_pos | WHILE of raw_pos | UNTIL of raw_pos | MY of raw_pos | CONTINUE of raw_pos | SUB of raw_pos
+  | LOCAL of raw_pos | FOR of (string * raw_pos) | USE of raw_pos | PACKAGE of raw_pos | BEGIN of raw_pos | END of raw_pos | PRINT of (string * raw_pos) 
+  | NEW of (raw_pos) | FORMAT of (raw_pos) | AT of raw_pos | DOLLAR of raw_pos | PERCENT of raw_pos | AMPERSAND of raw_pos
+  | STAR of raw_pos | ARRAYLEN of raw_pos | SEMI_COLON of raw_pos | PKG_SCOPE of raw_pos | PAREN of raw_pos | PAREN_END of raw_pos | BRACKET of raw_pos
+  | BRACKET_END of raw_pos | BRACKET_HASHREF of raw_pos | ARRAYREF of raw_pos | ARRAYREF_END of raw_pos | ARROW of raw_pos | INCR of raw_pos | DECR of raw_pos
+  | POWER of raw_pos | TIGHT_NOT of raw_pos | BIT_NEG of raw_pos | REF of raw_pos | PATTERN_MATCH of raw_pos | PATTERN_MATCH_NOT of raw_pos | MULT of raw_pos
+  | DIVISION of raw_pos | MODULO of raw_pos | REPLICATE of raw_pos | PLUS of raw_pos | MINUS of raw_pos | CONCAT of raw_pos | BIT_SHIFT_LEFT of raw_pos
+  | BIT_SHIFT_RIGHT of raw_pos | LT of raw_pos | GT of raw_pos | COMPARE_OP of (string * raw_pos) | EQ_OP of (string * raw_pos)
+  | BIT_AND of raw_pos | BIT_OR of raw_pos | BIT_XOR of raw_pos | AND_TIGHT of raw_pos | OR_TIGHT of raw_pos | DOTDOT of raw_pos | DOTDOTDOT of raw_pos
+  | QUESTION_MARK of raw_pos | COLON of raw_pos | ASSIGN of (string * raw_pos) | COMMA of raw_pos | RIGHT_ARROW of raw_pos | NOT of raw_pos | AND of raw_pos | OR of raw_pos | XOR of raw_pos
+
+let saved_token = ref None
+
+let concat_bareword_paren get_token lexbuf =
+  let token = match !saved_token with
+  | None -> get_token lexbuf
+  | Some t -> t
+  in
+  let token, next =
+    match token with
+    | Parser.PRINT(s, both)
+    | Parser.BAREWORD(s, both) ->
+	let next_token = get_token lexbuf in
+	(match next_token with Parser.PAREN(_, (Space_0, _)) -> Parser.BAREWORD_PAREN(s, both) | _ -> token), Some next_token
+    | Parser.RAW_IDENT(ident, both) ->
+	let next_token = get_token lexbuf in
+	(match next_token with Parser.PAREN(_, (Space_0, _)) -> Parser.RAW_IDENT_PAREN(ident, both) | _ -> token), Some next_token
+    | _ -> token, None
+  in
+  saved_token := next ; token
+
+let rec concat_spaces get_token lexbuf =
+  let rec get_spaces spaces lexbuf =
+    match get_token lexbuf with
+    | CR -> get_spaces Space_cr lexbuf
+    | SPACE n ->
+	let spaces' = 
+	  match spaces with
+	  | Space_cr -> Space_cr
+	  | Space_0 -> if n = 1 then Space_1 else Space_n
+	  | _ -> Space_n
+	in
+	get_spaces spaces' lexbuf
+    | token -> token, spaces
+  in
+  let token, spaces = get_spaces Space_0 lexbuf in
+  match token with
+  | NUM(s, pos) -> Parser.NUM(s, (spaces, pos))
+  | STRING(s, pos) -> Parser.STRING(s, (spaces, pos))
+  | BAREWORD(s, pos) -> Parser.BAREWORD(s, (spaces, pos))
+  | REVISION(s, pos) -> Parser.REVISION(s, (spaces, pos))
+  | COMMENT(s, pos) -> Parser.COMMENT(s, (spaces, pos))
+  | POD(s, pos) -> Parser.POD(s, (spaces, pos))
+  | LABEL(s, pos) -> Parser.LABEL(s, (spaces, pos))
+  | COMMAND_STRING(s, pos) -> Parser.COMMAND_STRING(s, (spaces, pos))
+  | PRINT(s, pos) -> Parser.PRINT(s, (spaces, pos))
+  | PRINT_TO_STAR(s, pos) -> Parser.PRINT_TO_STAR(s, (spaces, pos))
+  | PRINT_TO_SCALAR(s, pos) -> Parser.PRINT_TO_SCALAR(s, (spaces, pos))
+  | QUOTEWORDS(s, pos) -> Parser.QUOTEWORDS(s, (spaces, pos))
+  | COMPACT_HASH_SUBSCRIPT(s, pos) -> Parser.COMPACT_HASH_SUBSCRIPT(s, (spaces, pos))
+  | HERE_DOC(r, pos) -> Parser.HERE_DOC(r, (spaces, pos))
+  | PATTERN(s, opts, pos) -> Parser.PATTERN((s, opts), (spaces, pos))
+  | PATTERN_SUBST(from, to_, opts, pos) -> Parser.PATTERN_SUBST((from, to_, opts), (spaces, pos))
+  | SCALAR_IDENT(kind, name, pos) -> Parser.SCALAR_IDENT((kind, name), (spaces, pos))
+  | ARRAY_IDENT(kind, name, pos) -> Parser.ARRAY_IDENT((kind, name), (spaces, pos))
+  | HASH_IDENT(kind, name, pos) -> Parser.HASH_IDENT((kind, name), (spaces, pos))
+  | FUNC_IDENT(kind, name, pos) -> Parser.FUNC_IDENT((kind, name), (spaces, pos))
+  | STAR_IDENT(kind, name, pos) -> Parser.STAR_IDENT((kind, name), (spaces, pos))
+  | RAW_IDENT(kind, name, pos) -> Parser.RAW_IDENT((kind, name), (spaces, pos))
+  | ARRAYLEN_IDENT(kind, name, pos) -> Parser.ARRAYLEN_IDENT((kind, name), (spaces, pos))
+  | FUNC_DECL_WITH_PROTO(name, proto, pos) -> Parser.FUNC_DECL_WITH_PROTO((name, proto), (spaces, pos))
+
+  | NEW(pos) -> Parser.NEW((), (spaces, pos))
+  | FORMAT(pos) -> Parser.FORMAT((), (spaces, pos))
+  | COMPARE_OP(s, pos) -> Parser.COMPARE_OP(s, (spaces, pos))
+  | EQ_OP(s, pos) -> Parser.EQ_OP(s, (spaces, pos))
+  | ASSIGN(s, pos) -> Parser.ASSIGN(s, (spaces, pos))
+  | FOR(s, pos) -> Parser.FOR(s, (spaces, pos))
+
+  | EOF              (pos) -> Parser.EOF              ((), (spaces, pos))
+  | IF               (pos) -> Parser.IF               ((), (spaces, pos))
+  | ELSIF            (pos) -> Parser.ELSIF            ((), (spaces, pos))
+  | ELSE             (pos) -> Parser.ELSE             ((), (spaces, pos))
+  | UNLESS           (pos) -> Parser.UNLESS           ((), (spaces, pos))
+  | DO               (pos) -> Parser.DO               ((), (spaces, pos))
+  | WHILE            (pos) -> Parser.WHILE            ((), (spaces, pos))
+  | UNTIL            (pos) -> Parser.UNTIL            ((), (spaces, pos))
+  | MY               (pos) -> Parser.MY               ((), (spaces, pos))
+  | CONTINUE         (pos) -> Parser.CONTINUE         ((), (spaces, pos))
+  | SUB              (pos) -> Parser.SUB              ((), (spaces, pos))
+  | LOCAL            (pos) -> Parser.LOCAL            ((), (spaces, pos))
+  | USE              (pos) -> Parser.USE              ((), (spaces, pos))
+  | PACKAGE          (pos) -> Parser.PACKAGE          ((), (spaces, pos))
+  | BEGIN            (pos) -> Parser.BEGIN            ((), (spaces, pos))
+  | END              (pos) -> Parser.END              ((), (spaces, pos))
+  | AT               (pos) -> Parser.AT               ((), (spaces, pos))
+  | DOLLAR           (pos) -> Parser.DOLLAR           ((), (spaces, pos))
+  | PERCENT          (pos) -> Parser.PERCENT          ((), (spaces, pos))
+  | AMPERSAND        (pos) -> Parser.AMPERSAND        ((), (spaces, pos))
+  | STAR             (pos) -> Parser.STAR             ((), (spaces, pos))
+  | ARRAYLEN         (pos) -> Parser.ARRAYLEN         ((), (spaces, pos))
+  | SEMI_COLON       (pos) -> Parser.SEMI_COLON       ((), (spaces, pos))
+  | PKG_SCOPE        (pos) -> Parser.PKG_SCOPE        ((), (spaces, pos))
+  | PAREN            (pos) -> Parser.PAREN            ((), (spaces, pos))
+  | PAREN_END        (pos) -> Parser.PAREN_END        ((), (spaces, pos))
+  | BRACKET          (pos) -> Parser.BRACKET          ((), (spaces, pos))
+  | BRACKET_END      (pos) -> Parser.BRACKET_END      ((), (spaces, pos))
+  | BRACKET_HASHREF  (pos) -> Parser.BRACKET_HASHREF  ((), (spaces, pos))
+  | ARRAYREF         (pos) -> Parser.ARRAYREF         ((), (spaces, pos))
+  | ARRAYREF_END     (pos) -> Parser.ARRAYREF_END     ((), (spaces, pos))
+  | ARROW            (pos) -> Parser.ARROW            ((), (spaces, pos))
+  | INCR             (pos) -> Parser.INCR             ((), (spaces, pos))
+  | DECR             (pos) -> Parser.DECR             ((), (spaces, pos))
+  | POWER            (pos) -> Parser.POWER            ((), (spaces, pos))
+  | TIGHT_NOT        (pos) -> Parser.TIGHT_NOT        ((), (spaces, pos))
+  | BIT_NEG          (pos) -> Parser.BIT_NEG          ((), (spaces, pos))
+  | REF              (pos) -> Parser.REF              ((), (spaces, pos))
+  | PATTERN_MATCH    (pos) -> Parser.PATTERN_MATCH    ((), (spaces, pos))
+  | PATTERN_MATCH_NOT(pos) -> Parser.PATTERN_MATCH_NOT((), (spaces, pos))
+  | MULT             (pos) -> Parser.MULT             ((), (spaces, pos))
+  | DIVISION         (pos) -> Parser.DIVISION         ((), (spaces, pos))
+  | MODULO           (pos) -> Parser.MODULO           ((), (spaces, pos))
+  | REPLICATE        (pos) -> Parser.REPLICATE        ((), (spaces, pos))
+  | PLUS             (pos) -> Parser.PLUS             ((), (spaces, pos))
+  | MINUS            (pos) -> Parser.MINUS            ((), (spaces, pos))
+  | CONCAT           (pos) -> Parser.CONCAT           ((), (spaces, pos))
+  | BIT_SHIFT_LEFT   (pos) -> Parser.BIT_SHIFT_LEFT   ((), (spaces, pos))
+  | BIT_SHIFT_RIGHT  (pos) -> Parser.BIT_SHIFT_RIGHT  ((), (spaces, pos))
+  | LT               (pos) -> Parser.LT               ((), (spaces, pos))
+  | GT               (pos) -> Parser.GT               ((), (spaces, pos))
+  | BIT_AND          (pos) -> Parser.BIT_AND          ((), (spaces, pos))
+  | BIT_OR           (pos) -> Parser.BIT_OR           ((), (spaces, pos))
+  | BIT_XOR          (pos) -> Parser.BIT_XOR          ((), (spaces, pos))
+  | AND_TIGHT        (pos) -> Parser.AND_TIGHT        ((), (spaces, pos))
+  | OR_TIGHT         (pos) -> Parser.OR_TIGHT         ((), (spaces, pos))
+  | DOTDOT           (pos) -> Parser.DOTDOT           ((), (spaces, pos))
+  | DOTDOTDOT        (pos) -> Parser.DOTDOTDOT        ((), (spaces, pos))
+  | QUESTION_MARK    (pos) -> Parser.QUESTION_MARK    ((), (spaces, pos))
+  | COLON            (pos) -> Parser.COLON            ((), (spaces, pos))
+  | COMMA            (pos) -> Parser.COMMA            ((), (spaces, pos))
+  | RIGHT_ARROW      (pos) -> Parser.RIGHT_ARROW      ((), (spaces, pos))
+  | NOT              (pos) -> Parser.NOT              ((), (spaces, pos))
+  | AND              (pos) -> Parser.AND              ((), (spaces, pos))
+  | OR               (pos) -> Parser.OR               ((), (spaces, pos))
+  | XOR              (pos) -> Parser.XOR              ((), (spaces, pos))
+
+  | SPACE _ | CR -> internal_error "raw_token_to_token"
+
+let rec lexbuf2list t lexbuf =
+  let rec f () =
+    match t lexbuf with
+    | Parser.EOF _ -> []
+    | e -> e :: f()
+  in
+  let l = f() in
+  l
 
 let next_rule = ref None
 
+let bpos = -1,-1
+let pos lexbuf = lexeme_start lexbuf, lexeme_end lexbuf
+let pos2sfull_with start end_ = Info.pos2sfull (!current_file, start, end_)
+let pos2sfull lexbuf = pos2sfull_with (lexeme_start lexbuf) (lexeme_end lexbuf)
+let putback lexbuf nb = lexbuf.Lexing.lex_curr_pos <- lexbuf.Lexing.lex_curr_pos - nb
+
 let add_a_new_line raw_pos = 
    incr current_file_current_line ;
    lpush current_file_lines_starts raw_pos
@@ -26,12 +216,13 @@ let string_nestness = ref 0
 let building_current_string = ref ""
 let current_string_start_pos = ref 0
 let current_string_start_line = ref 0
+let die lexbuf err = failwith (pos2sfull_with !current_string_start_pos (lexeme_end lexbuf) ^ err)
 
 let ins t lexbuf = 
   building_current_string := ""; 
   current_string_start_pos := lexeme_start lexbuf;
   t lexbuf ;
-  !building_current_string, (!current_file, !current_string_start_pos, lexeme_end lexbuf)
+  !building_current_string, (!current_string_start_pos, lexeme_end lexbuf)
 let ins_to_string t lexbuf =
   let s, pos = ins t lexbuf in
   not_ok_for_match := lexeme_end lexbuf; 
@@ -42,23 +233,6 @@ let next_s s t lexbuf =
   t lexbuf
 let next t lexbuf = next_s (lexeme lexbuf) t lexbuf
 
-let pos lexbuf = !current_file, lexeme_start lexbuf, lexeme_end lexbuf
-let pos2sfull_with start end_ = Info.pos2sfull (!current_file, start, end_)
-let pos2sfull lexbuf = pos2sfull_with (lexeme_start lexbuf) (lexeme_end lexbuf)
-
-let putback lexbuf nb = lexbuf.Lexing.lex_curr_pos <- lexbuf.Lexing.lex_curr_pos - nb
-
-let die lexbuf err = failwith (pos2sfull_with !current_string_start_pos (lexeme_end lexbuf) ^ err)
-
-let rec lexbuf2list t lexbuf =
-  let rec f () =
-    match t lexbuf with
-    | EOF -> []
-    | e -> e :: f()
-  in
-  let l = f() in
-  l
-
 let ident_type_from_char fq name lexbuf c = 
   not_ok_for_match := lexeme_end lexbuf; 
   match c with
@@ -92,7 +266,7 @@ let arraylen_fqident_from_lexbuf lexbuf =
   let fq, name = split_at_two_colons (skip_n_char 2 s) in
   ARRAYLEN_IDENT(Some fq, name, pos lexbuf)
 
-let check_multi_line_delimited_string opts (_, start, end_) =
+let check_multi_line_delimited_string opts (start, end_) =
    let check =
      match opts with
      | None -> true
@@ -102,19 +276,18 @@ let check_multi_line_delimited_string opts (_, start, end_) =
      failwith (pos2sfull_with start end_ ^ "multi-line patterns are not allowed (or use /x modifier)")
 }
 
-let space = [' ' '\t']
 let stash = [ '$' '@' '%' '&' '*' ]
 let ident_start = ['a'-'z' 'A'-'Z' '_']
 let ident = ident_start ['0'-'9' 'A'-'Z' 'a'-'z' '_'] *
 let pattern_separator = [ '/' '!' ',' '|' ]
 
 rule token = parse
-| space+ { 
+| ' '+ { 
     (* propagate not_ok_for_match when it was set by the previous token *)
     if lexeme_start lexbuf = !not_ok_for_match then not_ok_for_match := lexeme_end lexbuf; 
-   (*SPACE(pos lexbuf) *) token lexbuf
+    SPACE(lexeme_end lexbuf - lexeme_start lexbuf)
   }
-| '#' [^ '\n']* { (*COMMENT(lexeme lexbuf, pos lexbuf)*) token lexbuf }
+| '#' [^ '\n']* { SPACE(1) }
 
 | "\n=" { 
     add_a_new_line(lexeme_end lexbuf - 1);
@@ -128,76 +301,85 @@ rule token = parse
       current_here_doc_mark := mark ;
       r := ins (if interpolate then here_doc else raw_here_doc) lexbuf
     with Queue.Empty -> ());
-    token lexbuf
-  }
-| "->" { ARROW }
-| "++" { INCR }
-| "--" { DECR }
-| "**" { POWER }
-| "!" { TIGHT_NOT }
-| "~" { BIT_NEG }
-| "=~" { PATTERN_MATCH }
-| "!~" { PATTERN_MATCH_NOT }
-| "*" { MULT }
-| "%" { MODULO }
-| "x" { REPLICATE }
-| "+" { PLUS }
-| "-" { MINUS }
-| "." { CONCAT }
-| "<<" { BIT_SHIFT_LEFT }
-| ">>" { BIT_SHIFT_RIGHT }
-| "<" { LT }
-| ">" { GT }
-| "<=" | ">=" | "lt" | "gt" | "le" | "ge" { COMPARE_OP(lexeme lexbuf) }
-| "==" | "!=" | "<=>" | "eq" | "ne" | "cmp" { EQ_OP(lexeme lexbuf) }
-| "&" { BIT_AND }
-| "|" { BIT_OR }
-| "^" { BIT_XOR }
-| "&&" { AND_TIGHT }
-| "||" { OR_TIGHT }
-| ".." { DOTDOT }
-| "..." { DOTDOTDOT }
-| "?" { QUESTION_MARK }
-| ":" { COLON }
-| "::" { PKG_SCOPE }
-
-| "=" | "+=" | "-=" | "*=" | ".=" | "|=" | "&=" | "^=" | "||=" | "&&=" { ASSIGN(lexeme lexbuf) }
-
-| "," { COMMA }
-| "=>" { RIGHT_ARROW }
-| "not" { NOT }
-| "and" { AND }
-| "or" { OR }
-| "xor" { XOR }
-
-| "if"       { IF }
-| "else"     { ELSE }
-| "elsif"    { ELSIF }
-| "unless"   { UNLESS }
-| "do"       { DO }
-| "while"    { WHILE }
-| "until"    { UNTIL }
-| "foreach"  { FOR("foreach") }
-| "for"      { FOR("for") }
-| "my"       { MY }
-| "local"    { LOCAL }
-| "continue" { CONTINUE }
-| "sub"      { SUB }
-| "package"  { PACKAGE }
-| "use"      { USE }
-| "BEGIN"    { BEGIN }
-| "END"      { END }
-| "print"    { PRINT(pos lexbuf) }
+    CR
+  }
+| "->" { ARROW(pos lexbuf) }
+| "++" { INCR(pos lexbuf) }
+| "--" { DECR(pos lexbuf) }
+| "**" { POWER(pos lexbuf) }
+| "!" { TIGHT_NOT(pos lexbuf) }
+| "~" { BIT_NEG(pos lexbuf) }
+| "=~" { PATTERN_MATCH(pos lexbuf) }
+| "!~" { PATTERN_MATCH_NOT(pos lexbuf) }
+| "*" { MULT(pos lexbuf) }
+| "%" { MODULO(pos lexbuf) }
+| "x" { REPLICATE(pos lexbuf) }
+| "+" { PLUS(pos lexbuf) }
+| "-" { MINUS(pos lexbuf) }
+| "." { CONCAT(pos lexbuf) }
+| "<<" { BIT_SHIFT_LEFT(pos lexbuf) }
+| ">>" { BIT_SHIFT_RIGHT(pos lexbuf) }
+| "<" { LT(pos lexbuf) }
+| ">" { GT(pos lexbuf) }
+| "<=" | ">=" | "lt" | "gt" | "le" | "ge" { COMPARE_OP(lexeme lexbuf, pos lexbuf) }
+| "==" | "!=" | "<=>" | "eq" | "ne" | "cmp" { EQ_OP(lexeme lexbuf, pos lexbuf) }
+| "&" { BIT_AND(pos lexbuf) }
+| "|" { BIT_OR(pos lexbuf) }
+| "^" { BIT_XOR(pos lexbuf) }
+| "&&" { AND_TIGHT(pos lexbuf) }
+| "||" { OR_TIGHT(pos lexbuf) }
+| ".." { DOTDOT(pos lexbuf) }
+| "..." { DOTDOTDOT(pos lexbuf) }
+| "?" { QUESTION_MARK(pos lexbuf) }
+| ":" { COLON(pos lexbuf) }
+| "::" { PKG_SCOPE(pos lexbuf) }
+
+| "=" | "+=" | "-=" | "*=" | ".=" | "|=" | "&=" | "^=" | "||=" | "&&=" { ASSIGN(lexeme lexbuf, pos lexbuf) }
+
+| "," { COMMA(pos lexbuf) }
+| "=>" { RIGHT_ARROW(pos lexbuf) }
+| "not" { NOT(pos lexbuf) }
+| "and" { AND(pos lexbuf) }
+| "or" { OR(pos lexbuf) }
+| "xor" { XOR(pos lexbuf) }
+
+| "if"       { IF(pos lexbuf) }
+| "else"     { ELSE(pos lexbuf) }
+| "elsif"    { ELSIF(pos lexbuf) }
+| "unless"   { UNLESS(pos lexbuf) }
+| "do"       { DO(pos lexbuf) }
+| "while"    { WHILE(pos lexbuf) }
+| "until"    { UNTIL(pos lexbuf) }
+| "foreach"  { FOR(lexeme lexbuf, pos lexbuf) }
+| "for"      { FOR(lexeme lexbuf, pos lexbuf) }
+| "my"       { MY(pos lexbuf) }
+| "local"    { LOCAL(pos lexbuf) }
+| "continue" { CONTINUE(pos lexbuf) }
+| "sub"      { SUB(pos lexbuf) }
+| "package"  { PACKAGE(pos lexbuf) }
+| "use"      { USE(pos lexbuf) }
+| "BEGIN"    { BEGIN(pos lexbuf) }
+| "END"      { END(pos lexbuf) }
+| "print"    { PRINT(lexeme lexbuf, pos lexbuf) }
 | "new"      { NEW(pos lexbuf) }
 | "format"   { let _ = here_doc_next_line "." false in FORMAT(pos lexbuf) }
 
 | "split"
 | "grep"  { (* ok_for_match! *) BAREWORD(lexeme lexbuf, pos lexbuf) }
 
-| ident space* "=>" { (* needed so that (if => 1) works *)
+| "print " ident ' ' { 
+    putback lexbuf 1; 
+    PRINT_TO_STAR(skip_n_char_ 6 1 (lexeme lexbuf), pos lexbuf);
+  }
+| "print $" ident ' ' { 
+    putback lexbuf 1; 
+    PRINT_TO_SCALAR(skip_n_char_ 7 1 (lexeme lexbuf), pos lexbuf);
+  }
+
+| ident ' '* "=>" { (* needed so that (if => 1) works *)
     let s = lexeme lexbuf in
     let end_ = String.length s - 1 in
-    let ident_end = rindex_non_spaces_from (end_ - 2) s in
+    let ident_end = non_rindex_from s (end_ - 2) ' ' in
     putback lexbuf (end_ - ident_end); 
     BAREWORD(String.sub s 0 (ident_end+1), pos lexbuf)
   }
@@ -207,25 +389,25 @@ rule token = parse
     COMPACT_HASH_SUBSCRIPT(lexeme lexbuf, pos lexbuf)
   }
 
-| '@' { AT }
-| '$' { DOLLAR }
-| '$' '#' { ARRAYLEN }
-| '%' ['$' '{'] { putback lexbuf 1; PERCENT }
-| '&' ['$' '{'] { putback lexbuf 1; AMPERSAND }
-| '*' ['$' '{'] { putback lexbuf 1; if lexeme_start lexbuf = !not_ok_for_match then MULT else STAR }
+| '@' { AT(pos lexbuf) }
+| '$' { DOLLAR(pos lexbuf) }
+| '$' '#' { ARRAYLEN(pos lexbuf) }
+| '%' ['$' '{'] { putback lexbuf 1; PERCENT(pos lexbuf) }
+| '&' ['$' '{'] { putback lexbuf 1; AMPERSAND(pos lexbuf) }
+| '*' ['$' '{'] { putback lexbuf 1; if lexeme_start lexbuf = !not_ok_for_match then MULT(pos lexbuf) else STAR(pos lexbuf) }
 
 
-| ';' { SEMI_COLON }
-| '(' { PAREN }
-| '{' { BRACKET }
-| "+{"{ BRACKET_HASHREF }
-| '[' { ARRAYREF }
-| ')' { not_ok_for_match := lexeme_end lexbuf; PAREN_END }
-| '}' { not_ok_for_match := lexeme_end lexbuf; BRACKET_END }
-| ']' { not_ok_for_match := lexeme_end lexbuf; ARRAYREF_END }
+| ';' { SEMI_COLON(pos lexbuf) }
+| '(' { PAREN(pos lexbuf) }
+| '{' { BRACKET(pos lexbuf) }
+| "+{"{ BRACKET_HASHREF(pos lexbuf) }
+| '[' { ARRAYREF(pos lexbuf) }
+| ')' { not_ok_for_match := lexeme_end lexbuf; PAREN_END(pos lexbuf) }
+| '}' { not_ok_for_match := lexeme_end lexbuf; BRACKET_END(pos lexbuf) }
+| ']' { not_ok_for_match := lexeme_end lexbuf; ARRAYREF_END(pos lexbuf) }
 
 | "/" {
-    if lexeme_start lexbuf = !not_ok_for_match then DIVISION 
+    if lexeme_start lexbuf = !not_ok_for_match then DIVISION(pos lexbuf)
     else (
       delimit_char := '/' ;
       current_string_start_line := !current_file_current_line;
@@ -237,7 +419,7 @@ rule token = parse
   }
 
 | "/=" {
-    if lexeme_start lexbuf = !not_ok_for_match then ASSIGN(lexeme lexbuf)
+    if lexeme_start lexbuf = !not_ok_for_match then ASSIGN(lexeme lexbuf, pos lexbuf)
     else (
       putback lexbuf 1 ;
       delimit_char := '/' ;
@@ -268,10 +450,10 @@ rule token = parse
 | "s" pattern_separator {
   delimit_char := lexeme_char lexbuf 1 ;
   current_string_start_line := !current_file_current_line;
-  let s1, (_, start, _) = ins delimited_string lexbuf in 
-  let s2, (_, _, end_)  = ins delimited_string lexbuf in 
+  let s1, (start, _) = ins delimited_string lexbuf in 
+  let s2, (_, end_)  = ins delimited_string lexbuf in 
   let opts, _ = ins pattern_options lexbuf in
-  let pos = !current_file, start, end_ in
+  let pos = start, end_ in
   check_multi_line_delimited_string (Some opts) pos ;
   PATTERN_SUBST(s1, s2, opts, pos)
 }
@@ -279,43 +461,43 @@ rule token = parse
 | "tr" pattern_separator {
   delimit_char := lexeme_char lexbuf 2 ;
   current_string_start_line := !current_file_current_line;
-  let s1, (_, start, _) = ins delimited_string lexbuf in 
-  let s2, (_, _, end_)  = ins delimited_string lexbuf in 
+  let s1, (start, _) = ins delimited_string lexbuf in 
+  let s2, (_, end_)  = ins delimited_string lexbuf in 
   let opts, _ = ins pattern_options lexbuf in
-  let pos = !current_file, start, end_ in
+  let pos = start, end_ in
   check_multi_line_delimited_string None pos ;
   PATTERN_SUBST(s1, s2, opts, pos)
 }
     
 | "<<" ident { 
     not_ok_for_match := lexeme_end lexbuf; 
-    HERE_DOC(here_doc_next_line (skip_n_char 2 (lexeme lexbuf)) true)
+    HERE_DOC(here_doc_next_line (skip_n_char 2 (lexeme lexbuf)) true, pos lexbuf)
   }
 | "<<'" ident "'" { 
     not_ok_for_match := lexeme_end lexbuf; 
-    HERE_DOC(here_doc_next_line (skip_n_char_ 3 1 (lexeme lexbuf)) false)
+    HERE_DOC(here_doc_next_line (skip_n_char_ 3 1 (lexeme lexbuf)) false, pos lexbuf)
   }
-| "<<" space+ "'"
-| "<<" space+ ident { 
+| "<<" ' '+ "'"
+| "<<" ' '+ ident { 
     failwith (pos2sfull_with (lexeme_start lexbuf + 2) (lexeme_end lexbuf) ^ "No space allowed between \"<<\" and the marker")
   }
-| "<<" space* '"' { 
+| "<<" ' '* '"' { 
     failwith (pos2sfull_with (lexeme_start lexbuf + 2) (lexeme_end lexbuf) ^ "Don't use <<\"MARK\", use <<MARK instead")
   }
 
 | "\\" stash
 | "\\" ['0'-'9' 'A'-'Z' 'a'-'z']
-| "\\" space* '('
-    { putback lexbuf 1; REF }
+| "\\" ' '* '('
+    { putback lexbuf 1; REF(pos lexbuf) }
 
-| "sub" space+ ident space* '(' [ '$' '@' '\\' '&' ';' ]* ')' {
+| "sub" ' '+ ident ' '* '(' [ '$' '@' '\\' '&' ';' ]* ')' {
     (* bloody prototypes, must be caught especially otherwise "($)" is badly tokenized *)
     (* and alas "($@)" is both valid as an expression and a prototype *)
     let s = lexeme lexbuf in
-    let ident_start = index_non_spaces_from 3 s in
+    let ident_start = non_index_from s 3 ' ' in
       
     let proto_start = String.index_from s ident_start '(' in
-    let ident_end = rindex_non_spaces_from (proto_start-1) s in
+    let ident_end = non_rindex_from s (proto_start-1) ' ' in
     let ident = String.sub s ident_start (ident_end - ident_start + 1) in
     let prototype = skip_n_char_ (proto_start + 1) 1 s in
 
@@ -327,8 +509,8 @@ rule token = parse
 
 | stash ident? ("::" ident)+ { typed_fqident_from_lexbuf lexbuf }
 | stash ident
-| '$' [^ '{' ' ' '\t' '\n' '$']
-| "$^" [^ '{' ' ' '\t' '\n'] { typed_ident_from_lexbuf lexbuf }
+| '$' [^ '{' ' ' '\n' '$']
+| "$^" [^ '{' ' ' '\n'] { typed_ident_from_lexbuf lexbuf }
 
 | "$$" [^ 'a'-'z' 'A'-'Z' '_' '{'] { putback lexbuf 1; SCALAR_IDENT(None, "$$", pos lexbuf) }
 
@@ -339,7 +521,7 @@ rule token = parse
 
 | ident ":" { LABEL(lexeme lexbuf, pos lexbuf) }
 
-| '-' [ 'a'-'z' 'A'-'Z' ] [ ' ' '\t' '(' ] { putback lexbuf 1; BAREWORD(lexeme lexbuf, pos lexbuf) }
+| '-' [ 'a'-'z' 'A'-'Z' ] [ ' ' '(' ] { putback lexbuf 1; BAREWORD(lexeme lexbuf, pos lexbuf) }
 
 | ['0'-'9'] ['0'-'9' '_']* ('.' ['0'-'9'] ['0'-'9' '_']*)+ 
 | 'v' ['0'-'9'] ['0'-'9' '_']* ('.' ['0'-'9'] ['0'-'9' '_']*)* 
@@ -367,7 +549,7 @@ rule token = parse
 | "qq(" { ins_to_string qqstring lexbuf }
 | "qw(" { let s, pos = ins qstring lexbuf in QUOTEWORDS(s, pos) }
 
-| eof   { EOF }
+| eof   { EOF(pos lexbuf) }
 | _ { failwith (Printf.sprintf "%serror tokenizing <<%s>>" (pos2sfull lexbuf) (lexeme lexbuf)) }
 
 and string = parse
@@ -480,7 +662,6 @@ and pattern_options = parse
 and pod_command = parse
 | [^ '\n' ]+ {
   let s = lexeme lexbuf in
-  if String.contains s '\t' then failwith(pos2sfull lexbuf ^ "tabulations not accepted in POD commands") else
   let command = String.sub s 0 (try String.index s ' ' with Not_found -> String.length s) in
   match command with
   | "cut" ->
diff --git a/perl_checker.src/parser.mly b/perl_checker.src/parser.mly
index 0157714..e94f622 100644
--- a/perl_checker.src/parser.mly
+++ b/perl_checker.src/parser.mly
@@ -1,69 +1,56 @@
 %{ (* -*- caml -*- *)
   open Types
   open Common
+  open Parser_helper
 
-  let die msg =
-      failwith (Info.pos2sfull_current (Parsing.symbol_start()) (Parsing.symbol_end()) ^ msg)
+  let parse_error msg = die_rule msg
 
-  let parse_error msg = die msg
-
-  let warn msg = if false then prerr_endline msg
-
-  let to_Ident = function
-   | BAREWORD(name, pos) -> Ident(I_raw, None, name, pos)
-   | SCALAR_IDENT(fq, name, pos) -> Ident(I_scalar, fq, name, pos)
-   | ARRAY_IDENT (fq, name, pos) -> Ident(I_array,  fq, name, pos)
-   | HASH_IDENT  (fq, name, pos) -> Ident(I_hash,   fq, name, pos)
-   | FUNC_IDENT  (fq, name, pos) -> Ident(I_func,   fq, name, pos)
-   | STAR_IDENT  (fq, name, pos) -> Ident(I_star,   fq, name, pos)
-   | RAW_IDENT   (fq, name, pos) -> Ident(I_raw,    fq, name, pos)
-   | _ -> internal_error "Parser.to_Ident"
 %}
 
 
-%token EOF
-%token <Types.pos> SPACE
-%token <string * Types.pos> NUM STRING BAREWORD REVISION COMMENT POD LABEL
-%token <string * Types.pos> COMMAND_STRING QUOTEWORDS COMPACT_HASH_SUBSCRIPT
-%token <(string * Types.pos) ref> HERE_DOC
-%token <string * string * Types.pos> PATTERN
-%token <string * string * string * Types.pos> PATTERN_SUBST
-
-%token <string option * string * Types.pos> SCALAR_IDENT ARRAY_IDENT HASH_IDENT FUNC_IDENT STAR_IDENT RAW_IDENT ARRAYLEN_IDENT
-%token <string * string * Types.pos> FUNC_DECL_WITH_PROTO
-
-%token IF ELSIF ELSE UNLESS DO WHILE UNTIL MY CONTINUE SUB LOCAL
-%token <string> FOR
-%token USE PACKAGE BEGIN END
-%token <Types.pos> PRINT NEW FORMAT
-%token AT DOLLAR PERCENT AMPERSAND STAR ARRAYLEN
-%token SEMI_COLON PKG_SCOPE
-%token PAREN PAREN_END
-%token BRACKET BRACKET_END BRACKET_HASHREF
-%token ARRAYREF ARRAYREF_END
-
-
-%token ARROW
-%token INCR DECR
-%token POWER
-%token TIGHT_NOT BIT_NEG REF
-%token PATTERN_MATCH PATTERN_MATCH_NOT
-%token MULT DIVISION MODULO REPLICATE
-%token PLUS MINUS CONCAT
-%token BIT_SHIFT_LEFT BIT_SHIFT_RIGHT
-%token LT GT
-%token <string> COMPARE_OP EQ_OP
-%token BIT_AND
-%token BIT_OR BIT_XOR
-%token AND_TIGHT
-%token OR_TIGHT
-%token DOTDOT DOTDOTDOT
-%token QUESTION_MARK COLON
-%token <string> ASSIGN
-%token COMMA RIGHT_ARROW
-%token NOT
-%token AND
-%token OR XOR
+%token <unit   * (Types.spaces * Types.raw_pos)> EOF
+%token <string * (Types.spaces * Types.raw_pos)> NUM STRING BAREWORD BAREWORD_PAREN REVISION COMMENT POD LABEL PRINT_TO_STAR PRINT_TO_SCALAR
+%token <string * (Types.spaces * Types.raw_pos)> COMMAND_STRING QUOTEWORDS COMPACT_HASH_SUBSCRIPT
+
+%token <(string * Types.raw_pos) ref * (Types.spaces * Types.raw_pos)> HERE_DOC
+%token <(string * string) * (Types.spaces * Types.raw_pos)> PATTERN
+%token <(string * string * string) * (Types.spaces * Types.raw_pos)> PATTERN_SUBST
+
+%token <(string option * string) * (Types.spaces * Types.raw_pos)> SCALAR_IDENT ARRAY_IDENT HASH_IDENT FUNC_IDENT STAR_IDENT RAW_IDENT RAW_IDENT_PAREN ARRAYLEN_IDENT
+%token <(string * string) * (Types.spaces * Types.raw_pos)> FUNC_DECL_WITH_PROTO
+
+%token <string * (Types.spaces * Types.raw_pos)> FOR PRINT
+%token <unit   * (Types.spaces * Types.raw_pos)> NEW FORMAT
+%token <string * (Types.spaces * Types.raw_pos)> COMPARE_OP EQ_OP
+%token <string * (Types.spaces * Types.raw_pos)> ASSIGN
+
+%token <unit   * (Types.spaces * Types.raw_pos)> IF ELSIF ELSE UNLESS DO WHILE UNTIL MY CONTINUE SUB LOCAL
+%token <unit   * (Types.spaces * Types.raw_pos)> USE PACKAGE BEGIN END
+%token <unit   * (Types.spaces * Types.raw_pos)> AT DOLLAR PERCENT AMPERSAND STAR ARRAYLEN
+%token <unit   * (Types.spaces * Types.raw_pos)> SEMI_COLON PKG_SCOPE
+%token <unit   * (Types.spaces * Types.raw_pos)> PAREN PAREN_END
+%token <unit   * (Types.spaces * Types.raw_pos)> BRACKET BRACKET_END BRACKET_HASHREF
+%token <unit   * (Types.spaces * Types.raw_pos)> ARRAYREF ARRAYREF_END
+
+%token <unit   * (Types.spaces * Types.raw_pos)> ARROW
+%token <unit   * (Types.spaces * Types.raw_pos)> INCR DECR
+%token <unit   * (Types.spaces * Types.raw_pos)> POWER
+%token <unit   * (Types.spaces * Types.raw_pos)> TIGHT_NOT BIT_NEG REF
+%token <unit   * (Types.spaces * Types.raw_pos)> PATTERN_MATCH PATTERN_MATCH_NOT
+%token <unit   * (Types.spaces * Types.raw_pos)> MULT DIVISION MODULO REPLICATE
+%token <unit   * (Types.spaces * Types.raw_pos)> PLUS MINUS CONCAT
+%token <unit   * (Types.spaces * Types.raw_pos)> BIT_SHIFT_LEFT BIT_SHIFT_RIGHT
+%token <unit   * (Types.spaces * Types.raw_pos)> LT GT
+%token <unit   * (Types.spaces * Types.raw_pos)> BIT_AND
+%token <unit   * (Types.spaces * Types.raw_pos)> BIT_OR BIT_XOR
+%token <unit   * (Types.spaces * Types.raw_pos)> AND_TIGHT
+%token <unit   * (Types.spaces * Types.raw_pos)> OR_TIGHT
+%token <unit   * (Types.spaces * Types.raw_pos)> DOTDOT DOTDOTDOT
+%token <unit   * (Types.spaces * Types.raw_pos)> QUESTION_MARK COLON
+%token <unit   * (Types.spaces * Types.raw_pos)> COMMA RIGHT_ARROW
+%token <unit   * (Types.spaces * Types.raw_pos)> NOT
+%token <unit   * (Types.spaces * Types.raw_pos)> AND
+%token <unit   * (Types.spaces * Types.raw_pos)> OR XOR
 
 %nonassoc PREC_LOW
 %nonassoc LOOPEX
@@ -94,289 +81,299 @@
 %left       ARROW
 
 %nonassoc PAREN_END
-%left PAREN
+%left PAREN PREC_HIGH
 %left ARRAYREF BRACKET
 
-%type <string> prog
+%type <Types.fromparser list> prog
+%type <Types.fromparser * (Types.spaces * Types.raw_pos)> expr
 
 %start prog
 
 
 %%
-prog: lines EOF { "" }
+prog: lines EOF { fst $1 }
 
 lines: /* A collection of "lines" in the program */
-| {()}
-| sideff {()}
-| line lines {()}
+| {[], (Space_none, bpos)}
+| sideff {[fst $1], snd $1}
+| line lines {fst $1 :: fst $2, snd $1}
 
 line:
-| decl {()}
-| if_then_else {()}
-| loop {()}
-| LABEL {()}
-| SEMI_COLON {()}
-| sideff SEMI_COLON {()}
-| BRACKET lines BRACKET_END {()}
+| decl {$1}
+| if_then_else {$1}
+| loop {$1}
+| LABEL {sp_cr($1); Label(fst $1), snd $1}
+| semi_colon {List [], snd $1}
+| sideff semi_colon {$1}
+| BRACKET lines BRACKET_END {sp_p($2); sp_p($3); Block(fst $2), snd $1}
 
 if_then_else: /* Real conditional expressions */
-| IF     PAREN expr PAREN_END BRACKET lines BRACKET_END elsif else_ {()}
-| UNLESS PAREN expr PAREN_END BRACKET lines BRACKET_END elsif else_ {()}
+| IF     PAREN expr PAREN_END BRACKET lines BRACKET_END elsif else_ {sp_p($1); sp_n($2); sp_0($3); sp_0_or_cr($4); sp_p($5); sp_p($6); sp_p($7); Call_op("if",     fst $3 :: Block(fst $6) :: fst $8 @ fst $9), snd $1}
+| UNLESS PAREN expr PAREN_END BRACKET lines BRACKET_END elsif else_ {sp_p($1); sp_n($2); sp_0($3); sp_0_or_cr($4); sp_p($5); sp_p($6); sp_p($7); Call_op("unless", fst $3 :: Block(fst $6) :: fst $8 @ fst $9), snd $1}
 
 elsif:
-|  {()}
-| ELSIF PAREN expr PAREN_END BRACKET lines BRACKET_END elsif {()}
+|  {[], (Space_none, bpos)}
+| ELSIF PAREN expr PAREN_END BRACKET lines BRACKET_END elsif {sp_p($1); sp_n($2); sp_0($3); sp_0_or_cr($4); sp_p($5); sp_p($7); fst $3 :: Block(fst $6) :: fst $8, snd $1}
 
 else_: 
-|            { () }
-| ELSE BRACKET lines BRACKET_END { () }
+|            { [], (Space_none, bpos) }
+| ELSE BRACKET lines BRACKET_END {sp_p($1); sp_n($2); sp_p($3); sp_p($4); [ Block(fst $3) ], snd $1 }
 
 loop:
-| WHILE PAREN expr PAREN_END BRACKET lines BRACKET_END cont {()}
-| UNTIL PAREN expr PAREN_END BRACKET lines BRACKET_END cont {()}
-| FOR MY SCALAR_IDENT PAREN expr PAREN_END BRACKET lines BRACKET_END cont {()}
-| FOR SCALAR_IDENT PAREN expr PAREN_END BRACKET lines BRACKET_END cont {()}
-| FOR PAREN expr PAREN_END BRACKET lines BRACKET_END cont {()}
-| FOR PAREN expr_or_empty SEMI_COLON expr_or_empty SEMI_COLON expr_or_empty PAREN_END BRACKET lines BRACKET_END {()}
+| WHILE PAREN expr PAREN_END BRACKET lines BRACKET_END cont {sp_p($1); sp_n($2); sp_0($3); sp_0_or_cr($4); sp_p($5); sp_p($6); sp_p($7); Call_op("while", fst $3 :: fst $6), snd $1}
+| UNTIL PAREN expr PAREN_END BRACKET lines BRACKET_END cont {sp_p($1); sp_n($2); sp_0($3); sp_0_or_cr($4); sp_p($5); sp_p($6); sp_p($7); Call_op("until", fst $3 :: fst $6), snd $1}
+| FOR MY SCALAR_IDENT PAREN expr PAREN_END BRACKET lines BRACKET_END cont {sp_p($1); check_foreach($1); sp_n($4); sp_0($5); sp_0_or_cr($6); sp_p($7); sp_p($8); sp_p($9); Call_op("foreach my", to_Ident $3 :: fst $5 :: fst $8), snd $1}
+| FOR SCALAR_IDENT PAREN expr PAREN_END BRACKET lines BRACKET_END cont { die_rule "don't use for without \"my\"ing the iteration variable" }
+| FOR PAREN expr PAREN_END BRACKET lines BRACKET_END cont {sp_p($1); sp_n($2); sp_0($3); sp_0_or_cr($4); sp_p($5); sp_p($6); sp_p($7); check_foreach($1); Call_op("foreach", fst $3 :: fst $6), snd $1}
+| FOR PAREN expr_or_empty semi_colon expr_or_empty semi_colon expr_or_empty PAREN_END BRACKET lines BRACKET_END {sp_p($1); sp_n($2); sp_0($3); check_for($1); Call_op("for", fst $3 :: fst $5 :: fst $7 :: fst $10), snd $1}
 
 cont: /* Continue blocks */
-|  {()}
-| CONTINUE BRACKET lines BRACKET_END {()}
+|  {(), (Space_none, bpos)}
+| CONTINUE BRACKET lines BRACKET_END {sp_p($1); sp_n($2); sp_p($4); (), snd $1}
 
 sideff: /* An expression which may have a side-effect */
-| error {()}
-| expr  {()}
-| expr   IF    expr {()}
-| expr UNLESS  expr {()}
-| expr  WHILE  expr {()}
-| expr  UNTIL  expr {()}
-| expr  FOR    expr {()}
+| expr  {$1}
+| expr   IF    expr {let f = "if"     in sp_p($2); sp_p($3);                    check_no_paren f $3; Call_op(f ^ " infix",     [ fst $1 ; fst $3 ]), snd $1}
+| expr UNLESS  expr {let f = "unless" in sp_p($2); sp_p($3);                    check_no_paren f $3; Call_op(f ^ " infix", [ fst $1 ; fst $3 ]), snd $1}
+| expr  WHILE  expr {let f = "while"  in sp_p($2); sp_p($3);                    check_no_paren f $3; Call_op(f ^ " infix",  [ fst $1 ; fst $3 ]), snd $1}
+| expr  UNTIL  expr {let f = "until"  in sp_p($2); sp_p($3);                    check_no_paren f $3; Call_op(f ^ " infix",  [ fst $1 ; fst $3 ]), snd $1}
+| expr  FOR    expr {let f = "for"    in sp_p($2); sp_p($3); check_foreach($2); check_no_paren f $3; Call_op(f ^ " infix",    [ fst $1 ; fst $3 ]), snd $1}
 
 decl:
-| FORMAT BAREWORD ASSIGN {()}
-| FORMAT ASSIGN {()}
-| func_decl SEMI_COLON {()}
-| func_decl BRACKET lines BRACKET_END {()}
-| func_decl BRACKET BRACKET expr BRACKET_END BRACKET_END {()}
-| func_decl BRACKET BRACKET expr BRACKET_END SEMI_COLON BRACKET_END {()}
-| PACKAGE word SEMI_COLON {()}
-| BEGIN BRACKET lines BRACKET_END {()}
-| END BRACKET lines BRACKET_END {()}
-| use {()}
+| FORMAT BAREWORD ASSIGN {Too_complex, snd $1}
+| FORMAT ASSIGN {Too_complex, snd $1}
+| func_decl semi_colon {die_rule (if snd (fst $1) = "" then "there is no need to pre-declare in Perl!" else "please don't use prototype pre-declaration") }
+| func_decl BRACKET BRACKET_END {sp_n($2); sp_0_or_cr($3); let name, proto = fst $1 in sub_declaration (name, proto) [], snd $1}
+| func_decl BRACKET lines BRACKET_END {sp_n($2); sp_p($3); sp_p($4); sub_declaration (fst $1) (fst $3), snd $1}
+| func_decl BRACKET BRACKET expr BRACKET_END            BRACKET_END {sp_n($2); sp_p($3); sp_p($4); sp_p($5); sp_p($6); sub_declaration (fst $1) [Ref(I_hash, fst $4)], snd $1}
+| func_decl BRACKET BRACKET expr BRACKET_END semi_colon BRACKET_END {sp_n($2); sp_p($3); sp_p($4); sp_p($5); sp_p($7); sub_declaration (fst $1) [Ref(I_hash, fst $4)], snd $1}
+| PACKAGE word semi_colon {sp_0_or_cr($1); Package(fst $2), snd $1}
+| BEGIN BRACKET lines BRACKET_END {sp_0_or_cr($1); sp_1($2); sp_p($3); sp_p($4); Sub_declaration(Ident(None, "BEGIN", get_pos $1), "", fst $3), snd $1}
+| END   BRACKET lines BRACKET_END {sp_0_or_cr($1); sp_1($2); sp_p($3); sp_p($4); Sub_declaration(Ident(None, "END",   get_pos $1), "", fst $3), snd $1}
+| use {$1}
 
 use:
-| use_word listexpr SEMI_COLON {()}
+| use_word listexpr semi_colon {Use(fst $1, fst $2), snd $1}
 
 use_word:
-| use_revision word COLON {()}
-| use_revision word {()}
-| use_revision {()}
+| use_revision word comma {fst $2, snd $1}
+| use_revision word {fst $2, snd $1}
+| use_revision {Ident(None, "", get_pos $1), snd $1}
 
 use_revision:
-| USE REVISION COLON {()}
-| USE REVISION {()}
-| USE {()}
+| USE REVISION comma {$1}
+| USE REVISION {$1}
+| USE {$1}
 
 func_decl:
-| SUB word {()}
-| FUNC_DECL_WITH_PROTO {()}
+| SUB word {(fst $2, ""), snd $1}
+| FUNC_DECL_WITH_PROTO {(Ident(None, fst(fst $1), get_pos $1), snd(fst $1)), snd $1}
 
 listexpr: /* Basic list expressions */
-| %prec PREC_LOW {()}
-| argexpr %prec PREC_LOW {()}
+| %prec PREC_LOW {[], (Space_none, bpos)}
+| argexpr %prec PREC_LOW {$1}
 
 expr: /* Ordinary expressions; logical combinations */
-| expr AND expr {()}
-| expr OR expr {()}
-| argexpr %prec PREC_LOW {()}
+| expr AND expr {sp_p($2); sp_p($3); Call_op("and", [ fst $1; fst $3 ]), snd $1}
+| expr OR  expr {sp_p($2); sp_p($3); Call_op("or",  [ fst $1; fst $3 ]), snd $1}
+| argexpr %prec PREC_LOW {List(fst $1), snd $1}
 
 argexpr: /* Expressions are a list of terms joined by commas */
-| argexpr comma {()}
-| argexpr comma term {()}
-| argexpr comma BRACKET expr BRACKET_END {()}
-| term %prec PREC_LOW {()}
+| argexpr comma {fst $1, snd $1}
+| argexpr comma term {fst $1 @ [fst $3], snd $1}
+| argexpr comma BRACKET expr BRACKET_END {sp_p($3); sp_p($5); fst $1 @ [ Ref(I_hash, fst $4) ], snd $1}
+| term %prec PREC_LOW {[fst $1], snd $1}
 
 /********************************************************************************/
 term:
-| term binop term {()}
-| term binop BRACKET expr BRACKET_END {()}
-| term LT term {()}
-| term LT BRACKET expr BRACKET_END {()}
-| term GT term {()}
-| term GT BRACKET expr BRACKET_END {()}
+| term binop term {Call_op(fst $2, [fst $1 ; fst $3]), snd $1}
+| term binop BRACKET expr BRACKET_END {sp_p($3); sp_p($5); Call_op(fst $2, [fst $1 ; Ref(I_hash, fst $4)]), snd $1}
+| term LT term {Call_op("<", [fst $1 ; fst $3]), snd $1}
+| term GT term {Call_op(">", [fst $1 ; fst $3]), snd $1}
+
+| term PATTERN_MATCH     PATTERN   {Call_op("m//",  fst $1 :: from_PATTERN $3), snd $1}
+| term PATTERN_MATCH_NOT PATTERN   {Call_op("!m//", fst $1 :: from_PATTERN $3), snd $1}
+| term PATTERN_MATCH PATTERN_SUBST {Call_op("s///", fst $1 :: from_PATTERN_SUBST $3), snd $1}
+
+| term PATTERN_MATCH     scalar { Too_complex, snd $1 }
+| term PATTERN_MATCH_NOT scalar { Too_complex, snd $1 }
+
+| term PATTERN_MATCH     STRING {failwith (msg_with_pos (snd (snd $3)) "use a regexp, not a string")}
+| term PATTERN_MATCH_NOT STRING {failwith (msg_with_pos (snd (snd $3)) "use a regexp, not a string")}
 
 /* Unary operators and terms */
-| MINUS term %prec UNARY_MINUS {()}
-| TIGHT_NOT term {()}
-| BIT_NEG term {()}
-| INCR term    {()}
-| DECR term    {()}
-| term INCR    {()}
-| term DECR    {()}
+| MINUS term %prec UNARY_MINUS {Call_op("- unary", [fst $2]), snd $1}
+| TIGHT_NOT term {Call_op("not", [fst $2]), snd $1}
+| BIT_NEG term {Call_op("~", [fst $2]), snd $1}
+| INCR term    {Call_op("++", [fst $2]), snd $1}
+| DECR term    {Call_op("--", [fst $2]), snd $1}
+| term INCR    {sp_0($2); Call_op("++ post", [fst $1]), snd $1}
+| term DECR    {sp_0($2); Call_op("-- post", [fst $1]), snd $1}
 
-| NOT argexpr  {()}
+| NOT argexpr  {Call_op("not", fst $2), snd $1}
 
 
 /* Constructors for anonymous data */
-| arrayref {()} /* [ 1, 2 ] */
-| BRACKET BRACKET_END {()} /* empty hash */
-| BRACKET_HASHREF expr_or_empty BRACKET_END %prec PAREN {()} /* { foo => "Bar" } */
-| SUB BRACKET lines BRACKET_END %prec PAREN {()}
 
-| termdo {()}
-| term question_mark_ colon_ {()}
-| REF term {()} /* \$x, \@y, \%z */
-| REF BRACKET expr BRACKET_END {()} /* \$x, \@y, \%z */
-| my %prec UNIOP {()}
-| LOCAL term    %prec UNIOP {()}
+| ARRAYREF ARRAYREF_END {sp_0($2); Ref(I_array, List[]), snd $1}
+| arrayref_start ARRAYREF_END {Ref(I_array, List(fst $1)), snd $1}
+| arrayref_start expr ARRAYREF_END {Ref(I_array, List(fst $1 @ [fst $2])), snd $1}
+| arrayref_start BRACKET expr BRACKET_END ARRAYREF_END {Ref(I_array, List(fst $1 @ [Ref(I_hash, fst $3)])), snd $1}
 
-| parenthesized {()} /* (1, 2) */
-| parenthesized arrayref {()} /* list slice */
+| BRACKET BRACKET_END {Ref(I_hash, List []), snd $1} /* empty hash */
+| BRACKET_HASHREF expr BRACKET_END %prec PREC_HIGH {sp_p($3); Ref(I_hash, fst $2), snd $1} /* { foo => "Bar" } */
+| SUB BRACKET BRACKET_END %prec PREC_HIGH {sp_n($2); sp_0($3); Anonymous_sub(Block[]), snd $1}
+| SUB BRACKET lines BRACKET_END %prec PREC_HIGH {sp_n($2); sp_p($4); Anonymous_sub(Block(fst $3)), snd $1}
 
-| variable {()}
+| termdo {$1}
+| term question_mark_ colon_ { Call_op("?:", [ fst $1 ; fst $2; fst $3]), snd $1}
+| REF term { Ref(I_scalar, fst $2), snd $1} /* \$x, \@y, \%z */
+| my %prec UNIOP {List(fst $1), snd $1}
+| LOCAL term    %prec UNIOP {Local(fst $2), snd $1}
 
-| subscripted {()}
+| parenthesized {List(fst $1), snd $1} /* (1, 2) */
+| parenthesized arrayref {Deref_with(I_array, List(fst $1), List(fst $2)), snd $1} /* list slice */
 
-| array arrayref {()} /* array slice */
-| array BRACKET expr BRACKET_END {()} /* @hash{@keys} */
+| variable {$1}
 
+| subscripted {$1}
 
-/* function_calls */
-| func parenthesized {()} /* &foo(@args) */
-| word argexpr {()} /* foo(@args) */
-| word BRACKET lines BRACKET_END listexpr %prec LSTOP {()} /* map { foo } @bar */
-| word BRACKET BRACKET expr BRACKET_END BRACKET_END listexpr %prec LSTOP {()} /* map { foo } @bar */
-| word BRACKET BRACKET expr BRACKET_END SEMI_COLON BRACKET_END listexpr %prec LSTOP {()} /* map { foo } @bar */
-
-| term ARROW word_or_scalar parenthesized {warn "term->word_or_scalar(expr_or_empty) -> function_call"} /* $foo->bar(list) */
-| term ARROW word_or_scalar {warn "term->word_or_scalar -> function_call"} /* $foo->bar */
-
-| NEW word listexpr {()} /* new Class @args */
-| print listexpr {()}
-
-| word {()}
-
-| NUM {()}
-| STRING {()}
-| REVISION {()}
-| COMMAND_STRING {()}
-| QUOTEWORDS {()}
-| HERE_DOC {()}
-| PATTERN {()}
-| PATTERN_SUBST {()}
-| diamond {()}
+| array arrayref { Deref_with(I_array, fst $1, List(fst $2)), snd $1} /* array slice: @array[vals] */
+| array BRACKET expr BRACKET_END {sp_0($2); sp_0($4); Deref_with(I_hash, array_ident_to_hash_ident $1, fst $3), snd $1} /* hash slice: @hash{@keys} */
 
-diamond:
-| LT GT {()}
-| LT term GT {()}
 
-print:
-| PRINT bareword COLON {()}
-| PRINT bareword {()} /* print FH @args */
-| PRINT scalar COLON {()}
-| PRINT scalar {()} /* print $fh @args */
-| PRINT {()}
+/* function_calls */
+| func parenthesized {Call(fst $1, fst $2), snd $1} /* &foo(@args) */
+| word argexpr {check_parenthesized_first_argexpr (string_of_Ident (fst $1)) $2; Call(fst $1, fst $2), snd $1} /* foo $a, $b */
+| word_paren parenthesized {Call(fst $1, fst $2), snd $1} /* foo(@args) */
+| word BRACKET lines BRACKET_END listexpr %prec LSTOP {sp_n($2); sp_p($4); Call(fst $1, Anonymous_sub(Block(fst $3)) :: fst $5), snd $1} /* map { foo } @bar */
+| word BRACKET BRACKET expr BRACKET_END            BRACKET_END listexpr %prec LSTOP {sp_n($2); sp_p($3); sp_p($5); sp_p($6); Call(fst $1, Anonymous_sub(Ref(I_hash, fst $4)) :: fst $7), snd $1} /* map { { foo } } @bar */
+| word BRACKET BRACKET expr BRACKET_END semi_colon BRACKET_END listexpr %prec LSTOP {sp_n($2); sp_p($3); sp_p($5); sp_p($7); Call(fst $1, Anonymous_sub(Ref(I_hash, fst $4)) :: fst $8), snd $1} /* map { { foo }; } @bar */
+
+| term ARROW word_or_scalar parenthesized {sp_0($2); Method_call(fst $1, fst $3, fst $4), snd $1} /* $foo->bar(list) */
+| term ARROW word_or_scalar {sp_0($2); Method_call(fst $1, fst $3, []), snd $1} /* $foo->bar */
+
+| NEW word listexpr { Method_call(fst $2, Ident(None, "new", get_pos $1), fst $3), snd $1} /* new Class @args */
+
+| PRINT { Call_op("print", var_STDOUT :: [ var_dollar_ ]), snd $1 }
+| PRINT argexpr {check_parenthesized_first_argexpr (fst $1) $2; Call_op("print", var_STDOUT :: fst $2), snd $1 }
+| PRINT_TO_STAR argexpr { Call_op("print", Deref(I_star,   Ident(None, fst $1, get_pos $1)) :: fst $2), snd $1 }
+| PRINT_TO_SCALAR { Call_op("print", var_STDOUT :: [ Deref(I_scalar, Ident(None, fst $1, get_pos $1)) ]), snd $1 }
+| PRINT_TO_SCALAR argexpr { Call_op("print", Deref(I_scalar, Ident(None, fst $1, get_pos $1)) :: fst $2), snd $1 }
+
+| word {$1}
+
+| NUM {Num(fst $1, get_pos $1), snd $1}
+| STRING {to_String $1, snd $1}
+| REVISION {to_String $1, snd $1}
+| COMMAND_STRING {Call_op("``", [to_String $1]), snd $1}
+| QUOTEWORDS {Call_op("qw", [to_String $1]), snd $1}
+| HERE_DOC {String(fst!(fst $1), get_pos $1), snd $1}
+| PATTERN {Call_op("m//", var_dollar_ :: from_PATTERN $1), snd $1}
+| PATTERN_SUBST {Call_op("s///", var_dollar_ :: from_PATTERN_SUBST $1), snd $1}
+| diamond {$1}
 
-| PRINT bareword BRACKET {die "use parentheses around print"}
+diamond:
+| LT GT {sp_0($2); Call_op("<>", []), snd $1}
+| LT term GT {sp_0($3); Call_op("<>", [fst $2]), snd $1}
 
 subscripted: /* Some kind of subscripted expression */
-| variable PKG_SCOPE bracket_subscript {()} /* *main::{something} */
-| scalar bracket_subscript             {()} /* $foo{bar} */
-| scalar arrayref                      {()} /* $array[$element] */
-| term ARROW bracket_subscript         {()} /* somehref->{bar} */
-| term ARROW arrayref                  {()} /* somearef->[$element] */
-| term ARROW parenthesized             {()} /* $subref->(@args) */
-| subscripted bracket_subscript        {()} /* $foo->[bar]{baz} */
-| subscripted arrayref                 {()} /* $foo->[$bar][$baz] */
-| subscripted parenthesized            {()} /* $foo->{bar}(@args) */
+| variable PKG_SCOPE bracket_subscript {sp_0($2); Too_complex, snd $1} /* $foo::{something} */
+| scalar bracket_subscript             {Deref_with(I_hash , fst $1, fst      $2), snd $1} /* $foo{bar} */
+| scalar arrayref                      {Deref_with(I_array, fst $1, only_one $2), snd $1} /* $array[$element] */
+| term ARROW bracket_subscript         {sp_0($2); Deref_with(I_hash , fst $1, fst      $3), snd $1} /* somehref->{bar} */
+| term ARROW arrayref                  {sp_0($2); Deref_with(I_array, fst $1, only_one $3), snd $1} /* somearef->[$element] */
+| term ARROW parenthesized             {sp_0($2); Deref_with(I_func , fst $1, List(fst $3)), snd $1} /* $subref->(@args) */
+| subscripted bracket_subscript        {Deref_with(I_hash , fst $1, fst      $2), snd $1} /* $foo->[bar]{baz} */
+| subscripted arrayref                 {Deref_with(I_array, fst $1, only_one $2), snd $1} /* $foo->[$bar][$baz] */
+| subscripted parenthesized            {Deref_with(I_func , fst $1, List(fst $2)), snd $1} /* $foo->{bar}(@args) */
 
 arrayref:
-| arrayref_start ARRAYREF_END {()}
-| arrayref_start expr ARRAYREF_END {()}
-| arrayref_start BRACKET expr BRACKET_END ARRAYREF_END {()}
+| arrayref_start ARRAYREF_END {sp_0($2); fst $1, snd $1}
+| arrayref_start expr ARRAYREF_END {sp_0($3); fst $1 @ [fst $2], snd $1}
+| arrayref_start BRACKET expr BRACKET_END ARRAYREF_END {sp_p($2); sp_p($4); sp_0($5); fst $1 @ [Ref(I_hash, fst $3)], snd $1}
 parenthesized:
-| parenthesized_start PAREN_END {()}
-| parenthesized_start expr PAREN_END {()}
-| parenthesized_start BRACKET expr BRACKET_END PAREN_END {()}
+| parenthesized_start PAREN_END {sp_0_or_cr($2); fst $1, snd $1}
+| parenthesized_start expr PAREN_END {sp_0_or_cr($3); fst $1 @ [fst $2], snd $1}
+| parenthesized_start BRACKET expr BRACKET_END PAREN_END {sp_p($4); sp_0_or_cr($5); fst $1 @ [Ref(I_hash, fst $3)], snd $1}
 
 arrayref_start:
-| ARRAYREF {()}
-| arrayref_start expr comma {()}
-| arrayref_start BRACKET expr BRACKET_END comma {()}
+| ARRAYREF {[], snd $1}
+| arrayref_start BRACKET expr BRACKET_END comma {sp_p($4); fst $1 @ [Ref(I_hash, fst $3)], snd $1}
 parenthesized_start:
-| PAREN {()}
-| parenthesized_start expr comma {()}
-| parenthesized_start BRACKET expr BRACKET_END comma {()}
+| PAREN {[], snd $1}
+| parenthesized_start BRACKET expr BRACKET_END comma {sp_p($4); fst $1 @ [Ref(I_hash, fst $3)], snd $1}
 
 my: /* Things that can be "my"'d */
-| MY parenthesized {()}
-| MY scalar {()}
-| MY hash {()}
-| MY array {()}
+| MY parenthesized {List.map (fun e -> My e) (fst $2), snd $1}
+| MY scalar {[My(fst $2)], snd $1}
+| MY hash {[My(fst $2)], snd $1}
+| MY array {[My(fst $2)], snd $1}
 
 termdo: /* Things called with "do" */
-| DO term %prec UNIOP {()} /* do $filename */
-| DO BRACKET lines BRACKET_END %prec PAREN {()} /* do { code */
+| DO term %prec UNIOP { die_rule "\"do EXPR\" not allowed" } /* do $filename */
+| DO BRACKET lines BRACKET_END %prec PREC_HIGH {sp_n($2); sp_p($4); Block(fst $3), snd $1} /* do { code */
 
 question_mark_:
-| QUESTION_MARK term {()}
-| QUESTION_MARK BRACKET expr BRACKET_END {()}
+| QUESTION_MARK term {sp_n($1); fst $2, snd $1}
+| QUESTION_MARK BRACKET expr BRACKET_END {sp_n($1); sp_p($2); sp_p($4); Ref(I_hash, fst $3), snd $1}
 colon_:
-| COLON term {()}
-| COLON BRACKET expr BRACKET_END {()}
+| COLON term {sp_n($1); fst $2, snd $1}
+| COLON BRACKET expr BRACKET_END {sp_n($1); sp_p($2); sp_p($4); Ref(I_hash, fst $3), snd $1}
 
 bracket_subscript:
-| BRACKET expr BRACKET_END {()}
-| COMPACT_HASH_SUBSCRIPT {()}
+| BRACKET expr BRACKET_END {sp_0($1); sp_0($3); only_one_in_List $2, snd $1}
+| COMPACT_HASH_SUBSCRIPT {sp_0($1); to_String $1, snd $1}
 
 binop:
-| ASSIGN {()}
-| POWER {()}
-| MULT {()} | DIVISION {()} | MODULO {()} | REPLICATE {()}
-| PLUS {()} | MINUS {()} | CONCAT {()}
-| BIT_SHIFT_LEFT {()} | BIT_SHIFT_RIGHT {()}
-| COMPARE_OP {()}
-| EQ_OP {()}
-| BIT_AND {()}
-| BIT_OR {()} | BIT_XOR {()}
-| DOTDOT {()} | DOTDOTDOT {()}
-| AND_TIGHT {()}
-| OR_TIGHT {()} | XOR {()}
-| PATTERN_MATCH {()} | PATTERN_MATCH_NOT {()}
+| ASSIGN {"=", snd $1}
+| POWER {"**", snd $1}
+| MULT {"*", snd $1} | DIVISION {"/", snd $1} | MODULO {"%", snd $1} | REPLICATE {"x", snd $1}
+| PLUS {"+", snd $1} | MINUS {"-", snd $1} | CONCAT {".", snd $1}
+| BIT_SHIFT_LEFT {"<<", snd $1} | BIT_SHIFT_RIGHT {">>", snd $1}
+| COMPARE_OP {fst $1, snd $1}
+| EQ_OP {fst $1, snd $1}
+| BIT_AND {"&", snd $1}
+| BIT_OR {"|", snd $1} | BIT_XOR {"^", snd $1}
+| DOTDOT {"..", snd $1} | DOTDOTDOT {"...", snd $1}
+| AND_TIGHT {"&&", snd $1}
+| OR_TIGHT {"||", snd $1} | XOR {"xor", snd $1}
 
 variable:
-| scalar   %prec PAREN {()}
-| star     %prec PAREN {()}
-| hash     %prec PAREN {()}
-| array    %prec PAREN {()}
-| arraylen %prec PAREN {()} /* $#x, $#{ something } */
-| func     %prec PAREN {()} /* &foo; */
+| scalar   %prec PREC_HIGH {$1}
+| star     %prec PREC_HIGH {$1}
+| hash     %prec PREC_HIGH {$1}
+| array    %prec PREC_HIGH {$1}
+| arraylen %prec PREC_HIGH {$1} /* $#x, $#{ something } */
+| func     %prec PREC_HIGH {$1} /* &foo; */
 
 word:
-| bareword { fst $1 }
-| RAW_IDENT { 
-    match $1 with
-    | None, name, _ -> name
-    | Some s, name, _ -> s ^ "::" ^ name
-  }
+| bareword { $1 }
+| RAW_IDENT { to_Ident $1, snd $1 }
+
+comma: COMMA {$1} | RIGHT_ARROW {sp_p($1); $1}
 
-comma: COMMA {()} | RIGHT_ARROW {()}
+semi_colon: SEMI_COLON {sp_0($1); $1}
 
 word_or_scalar:
-| word      {()}
-| scalar    {()}
+| word      {$1}
+| scalar    {$1}
+| word_paren {$1}
 
 bareword:
-| NEW { "new", $1 }
-| FORMAT { "format", $1 }
-| BAREWORD { $1 }
-
-arraylen: ARRAYLEN_IDENT {()} | ARRAYLEN  scalar {()} | ARRAYLEN  BRACKET lines BRACKET_END {()}
-scalar:   SCALAR_IDENT   {()} | DOLLAR    scalar {()} | DOLLAR    BRACKET lines BRACKET_END {()} | DOLLAR BRACKET BRACKET expr BRACKET_END BRACKET_END {()}
-func:     FUNC_IDENT     {()} | AMPERSAND scalar {()} | AMPERSAND BRACKET lines BRACKET_END {()}
-array:    ARRAY_IDENT    {()} | AT        scalar {()} | AT        BRACKET lines BRACKET_END {()}
-hash:     HASH_IDENT     {()} | PERCENT   scalar {()} | PERCENT   BRACKET lines BRACKET_END {()}
-star:     STAR_IDENT     {()} | STAR      scalar {()} | STAR      BRACKET lines BRACKET_END {()}
-
-expr_or_empty: {()} | expr {()}
-
+| NEW { Ident(None, "new", get_pos $1), snd $1 }
+| FORMAT { Ident(None, "format", get_pos $1), snd $1 }
+| BAREWORD { Ident(None, fst $1, get_pos $1), snd $1 }
+
+word_paren:
+| BAREWORD_PAREN { Ident(None, fst $1, get_pos $1), snd $1 }
+| RAW_IDENT_PAREN { to_Ident $1, snd $1 }
+
+arraylen: ARRAYLEN_IDENT {Deref(I_arraylen, to_Ident $1), snd $1} | ARRAYLEN  scalar {Deref(I_arraylen, fst $2), snd $1} | ARRAYLEN  BRACKET lines BRACKET_END {sp_0($2); Deref(I_arraylen, Block(fst $3)), snd $1}
+scalar:   SCALAR_IDENT   {Deref(I_scalar  , to_Ident $1), snd $1} | DOLLAR    scalar {Deref(I_scalar  , fst $2), snd $1} | DOLLAR    BRACKET lines BRACKET_END {sp_0($2); Deref(I_scalar  , Block(fst $3)), snd $1} | DOLLAR BRACKET BRACKET expr BRACKET_END BRACKET_END {sp_0($2); sp_0($3); sp_p($5); sp_0($6); Deref(I_scalar, Ref(I_hash, fst $4)), snd $1}
+func:     FUNC_IDENT     {Deref(I_func    , to_Ident $1), snd $1} | AMPERSAND scalar {Deref(I_func    , fst $2), snd $1} | AMPERSAND BRACKET lines BRACKET_END {sp_0($2); Deref(I_func    , Block(fst $3)), snd $1}
+array:    ARRAY_IDENT    {Deref(I_array   , to_Ident $1), snd $1} | AT        scalar {Deref(I_array   , fst $2), snd $1} | AT        BRACKET lines BRACKET_END {sp_0($2); Deref(I_array   , Block(fst $3)), snd $1}
+hash:     HASH_IDENT     {Deref(I_hash    , to_Ident $1), snd $1} | PERCENT   scalar {Deref(I_hash    , fst $2), snd $1} | PERCENT   BRACKET lines BRACKET_END {sp_0($2); Deref(I_hash    , Block(fst $3)), snd $1}
+star:     STAR_IDENT     {Deref(I_star    , to_Ident $1), snd $1} | STAR      scalar {Deref(I_star    , fst $2), snd $1} | STAR      BRACKET lines BRACKET_END {sp_0($2); Deref(I_star    , Block(fst $3)), snd $1}
+
+expr_or_empty: {Block [], (Space_none, bpos)} | expr {$1}
diff --git a/perl_checker.src/parser_helper.ml b/perl_checker.src/parser_helper.ml
new file mode 100644
index 0000000..cc91c83
--- /dev/null
+++ b/perl_checker.src/parser_helper.ml
@@ -0,0 +1,125 @@
+open Types
+open Common
+
+let bpos = -1, -1
+let msg_with_pos (start, end_) msg = Info.pos2sfull_current start end_ ^ msg
+let die_with_pos raw_pos msg = failwith      (msg_with_pos raw_pos msg)
+let warn         raw_pos msg = prerr_endline (msg_with_pos raw_pos msg)
+
+let die_rule msg = die_with_pos (Parsing.symbol_start(), Parsing.symbol_end()) msg
+let debug msg = if false then prerr_endline msg
+
+let raw_pos2pos(a, b) = !Info.current_file, a, b
+let get_pos (_, (_, pos)) = raw_pos2pos pos
+
+let warn_too_many_space start = warn (start, start) "you should have only one space here"
+let warn_no_space	start = warn (start, start) "you should have a space here"
+let warn_cr		start = warn (start, start) "you should not have a carriage-return (\\n) here"
+let warn_space		start = warn (start, start) "you should not have a space here"
+
+let sp_0(_, (spaces, (start, _))) =
+  match spaces with
+  | Space_none -> ()
+  | Space_0 -> ()
+  | Space_1
+  | Space_n -> warn_space start
+  | Space_cr -> warn_cr start
+
+let sp_0_or_cr(_, (spaces, (start, _))) =
+  match spaces with
+  | Space_none -> ()
+  | Space_0 -> ()
+  | Space_1
+  | Space_n -> warn_space start
+  | Space_cr -> ()
+
+let sp_1(_, (spaces, (start, _))) =
+  match spaces with
+  | Space_none -> ()
+  | Space_0 -> warn_no_space start
+  | Space_1 -> ()
+  | Space_n -> warn_too_many_space start
+  | Space_cr -> warn_cr start
+
+let sp_n(_, (spaces, (start, _))) =
+  match spaces with
+  | Space_none -> ()
+  | Space_0 -> warn_no_space start
+  | Space_1 -> ()
+  | Space_n -> ()
+  | Space_cr -> warn_cr start
+
+let sp_p(_, (spaces, (start, _))) =
+  match spaces with
+  | Space_none -> ()
+  | Space_0 -> warn_no_space start
+  | Space_1 -> ()
+  | Space_n -> ()
+  | Space_cr -> ()
+
+let sp_cr(_, (spaces, (start, _))) =
+  match spaces with
+  | Space_none -> ()
+  | Space_0
+  | Space_1
+  | Space_n -> warn (start, start) "you should have a carriage-return (\\n) here"
+  | Space_cr -> ()
+
+let not_complex = function
+  | Call_op("?:", _) -> false
+  | _ -> true
+
+let string_of_Ident = function
+  | Ident(None, s, _) -> s
+  | Ident(Some fq, s, _) -> fq ^ "::" ^ s
+  | _ -> internal_error "string_of_Ident"
+
+let check_parenthesized_first_argexpr word (e, (_, (start, _)) as ex) =
+  let want_space = word.[0] = '-' in
+  match e with
+  | List[List[_]] :: l ->
+      if want_space then
+	if l = [] then sp_n(ex) else die_with_pos (start, start) "can't handle this nicely"
+      else
+	if l = [] then sp_0(ex) else die_with_pos (start, start) "you must not have a space here"
+  | _ -> sp_p(ex)
+
+let check_foreach (s, (_, pos)) = if s = "for"     then warn pos "write \"foreach\" instead of \"for\""
+let check_for     (s, (_, pos)) = if s = "foreach" then warn pos "write \"for\" instead of \"foreach\""
+
+let check_no_paren f_name (e, (_, pos)) =
+  match e with
+  | List[List[List[e]]] when not_complex e -> warn pos (Printf.sprintf "''... %s (...)'' can be written ''... %s ...''" f_name f_name)
+  | _ -> ()
+
+let to_Ident ((fq, name), (_, pos)) = Ident(fq, name, raw_pos2pos pos)
+let to_String (s, (_, pos)) = String(s, raw_pos2pos pos)
+
+let rec only_one (l, (spaces, pos)) =
+  match l with
+  | [List l'] -> only_one (l', (spaces, pos))
+  | [e] -> e
+  | [] -> die_with_pos pos "you must give one argument"
+  | _  -> die_with_pos pos "you must give only one argument"
+
+let only_one_in_List (e, both) =
+  match e with
+  | List l -> only_one(l, both)
+  | _ -> e
+
+let array_ident_to_hash_ident (e, (_, pos)) =
+  match e with
+  | Deref(I_array, e) -> Deref(I_hash, e)
+  | _ -> die_with_pos pos "internal error (array_ident_to_hash_ident)"
+
+let from_PATTERN ((s, opts), (_, pos)) = [ String(s, raw_pos2pos pos) ; String(opts, raw_pos2pos pos) ]
+let from_PATTERN_SUBST ((s1, s2, opts), (_, pos)) = [ String(s1, raw_pos2pos pos) ; String(s2, raw_pos2pos pos) ; String(opts, raw_pos2pos pos) ]
+  
+let to_List = function
+  | [e] -> e
+  | l -> List l
+
+let sub_declaration (name, proto) body = Sub_declaration(name, proto, body)
+
+let var_dollar_ = Deref(I_scalar, Ident(None, "_", raw_pos2pos bpos))
+let var_STDOUT = Deref(I_star, Ident(None, "STDOUT", raw_pos2pos bpos))
diff --git a/perl_checker.src/parser_helper.mli b/perl_checker.src/parser_helper.mli
new file mode 100644
index 0000000..5d1c47b
--- /dev/null
+++ b/perl_checker.src/parser_helper.mli
@@ -0,0 +1,42 @@
+val bpos : int * int
+val msg_with_pos : int * int -> string -> string
+val die_with_pos : int * int -> string -> 'a
+val warn : int * int -> string -> unit
+val die_rule : string -> 'a
+val debug : string -> unit
+val raw_pos2pos : 'a * 'b -> string * 'a * 'b
+val get_pos : 'a * ('b * ('c * 'd)) -> string * 'c * 'd
+val warn_too_many_space : int -> unit
+val warn_no_space : int -> unit
+val warn_cr : int -> unit
+val warn_space : int -> unit
+val sp_0 : 'a * (Types.spaces * (int * 'b)) -> unit
+val sp_0_or_cr : 'a * (Types.spaces * (int * 'b)) -> unit
+val sp_1 : 'a * (Types.spaces * (int * 'b)) -> unit
+val sp_n : 'a * (Types.spaces * (int * 'b)) -> unit
+val sp_p : 'a * (Types.spaces * (int * 'b)) -> unit
+val sp_cr : 'a * (Types.spaces * (int * 'b)) -> unit
+val not_complex : Types.fromparser -> bool
+val string_of_Ident : Types.fromparser -> string
+val check_parenthesized_first_argexpr :
+  string -> Types.fromparser list * (Types.spaces * (int * 'a)) -> unit
+val check_foreach : string * ('a * (int * int)) -> unit
+val check_for : string * ('a * (int * int)) -> unit
+val check_no_paren : string -> Types.fromparser * ('a * (int * int)) -> unit
+val to_Ident :
+  (string option * string) * ('a * (int * int)) -> Types.fromparser
+val to_String : string * ('a * (int * int)) -> Types.fromparser
+val only_one : Types.fromparser list * ('a * (int * int)) -> Types.fromparser
+val only_one_in_List :
+  Types.fromparser * ('a * (int * int)) -> Types.fromparser
+val array_ident_to_hash_ident :
+  Types.fromparser * ('a * (int * int)) -> Types.fromparser
+val from_PATTERN :
+  (string * string) * ('a * (int * int)) -> Types.fromparser list
+val from_PATTERN_SUBST :
+  (string * string * string) * ('a * (int * int)) -> Types.fromparser list
+val to_List : Types.fromparser list -> Types.fromparser
+val sub_declaration :
+  Types.fromparser * string -> Types.fromparser list -> Types.fromparser
+val var_dollar_ : Types.fromparser
+val var_STDOUT : Types.fromparser
diff --git a/perl_checker.src/perl_checker.ml b/perl_checker.src/perl_checker.ml
index fbb3f4d..79b3ac9 100644
--- a/perl_checker.src/perl_checker.ml
+++ b/perl_checker.src/perl_checker.ml
@@ -2,21 +2,21 @@ open Types
 
 let _ = 
   let args = List.tl (Array.to_list Sys.argv) in
+  let args = if args = [] then ["/tmp/t.pl"] else args in
   List.iter (fun file ->
     try
-      let lexbuf = Lexing.from_channel (open_in file) in
-      let _ = 
-	try
-	  Info.start_a_new_file file ;
-	  if false then
-	    let t = Lexer.lexbuf2list Lexer.token lexbuf in
-	    let _,_ = t, t in ""
-	  else
-	    Parser.prog Lexer.token lexbuf
-	with Failure s -> (
-	  prerr_endline s ;
-	  exit 1
-     ) in
-      ()
+      let lexbuf = Lexing.from_channel (Unix.open_process_in (Printf.sprintf "expand \"%s\"" file)) in
+      try
+	Info.start_a_new_file file ;
+	if false then
+	  let t = Lexer.lexbuf2list (Lexer.concat_bareword_paren (Lexer.concat_spaces Lexer.token)) lexbuf in
+	  let _,_ = t, t in ()
+	else
+	  let t = Parser.prog (Lexer.concat_bareword_paren (Lexer.concat_spaces Lexer.token)) lexbuf in
+	  let _,_ = t, t in ()
+      with Failure s -> (
+	prerr_endline s ;
+	exit 1
+     )
     with _ -> prerr_endline ("bad file " ^ file)
   ) args
diff --git a/perl_checker.src/types.mli b/perl_checker.src/types.mli
index 570da0f..5567eb9 100644
--- a/perl_checker.src/types.mli
+++ b/perl_checker.src/types.mli
@@ -1,16 +1,44 @@
 exception TooMuchRParen
 
+type raw_pos = int * int
+
 type pos = string * int * int
 
-type ident_type = I_scalar | I_hash | I_array | I_func | I_raw | I_star
+type spaces =
+  | Space_0
+  | Space_1
+  | Space_n
+  | Space_cr
+  | Space_none
 
-type fromparser = 
-   | Ident of ident_type * string option * string * pos
+type context = I_scalar | I_hash | I_array | I_func | I_raw | I_star | I_arraylen
 
-   | Num of float * pos
+type fromparser = 
+   | Ident of string option * string * pos
+   | Num of string * pos
    | String of string * pos
-   | Nil
 
-   | Binary of string * fromparser * fromparser
+   | Ref of context * fromparser
+   | Deref of context * fromparser
+   | Deref_with of context * fromparser * fromparser
+
+   | Diamond of fromparser option
+   | Binop of string * fromparser * fromparser
    | If_then_else of string * (fromparser * fromparser) list * fromparser option
 
+   | List of fromparser list
+   | Block of fromparser list
+
+   | Call of fromparser * fromparser list
+   | Call_op of string * fromparser list
+   | Method_call of fromparser * fromparser * fromparser list
+
+   | Anonymous_sub of fromparser
+   | My of fromparser
+   | Local of fromparser
+   | Use of fromparser * fromparser list
+   | Sub_declaration of fromparser * string * fromparser list (* name, prototype, body *)
+   | Package of fromparser
+   | Label of string
+
+   | Too_complex