summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--perl_checker.src/Makefile16
-rw-r--r--perl_checker.src/common.ml31
-rw-r--r--perl_checker.src/common.mli16
-rw-r--r--perl_checker.src/info.ml8
-rw-r--r--perl_checker.src/lexer.mll427
-rw-r--r--perl_checker.src/parser.mly513
-rw-r--r--perl_checker.src/parser_helper.ml125
-rw-r--r--perl_checker.src/parser_helper.mli42
-rw-r--r--perl_checker.src/perl_checker.ml28
-rw-r--r--perl_checker.src/types.mli40
10 files changed, 803 insertions, 443 deletions
diff --git a/perl_checker.src/Makefile b/perl_checker.src/Makefile
index 1c73153..6318ac5 100644
--- a/perl_checker.src/Makefile
+++ b/perl_checker.src/Makefile
@@ -13,9 +13,9 @@ TMP_FILES = $(YACC_FILES:%.mly=%.mli) $(YACC_FILES:%.mly=%.output) $(YACC_FILES
ALL_PROGS = perl_checker_debug perl_checker
-PROG_OBJS_WITH_CMI = parser.cmo print.cmo perl_checker.cmo
+PROG_OBJS_WITH_CMI = parser_helper.cmo parser.cmo print.cmo perl_checker.cmo
PROG_OBJS = common.cmo flags.cmo info.cmo $(LEX_FILES:%.mll=%.cmo) $(PROG_OBJS_WITH_CMI)
-CMA_FILES =
+CMA_FILES = unix.cma
PROG_OBJX_WITH_CMI = $(PROG_OBJS_WITH_CMI:%.cmo=%.cmx)
PROG_OBJX = $(PROG_OBJS:%.cmo=%.cmx)
@@ -48,7 +48,7 @@ perl_checker: .depend $(PROG_OBJX)
$(PROG_OBJS_WITH_CMI): %.cmo: %.cmi
-$(PROG_OBJX_WITH_CMI): %.cmx: %.cmi
+$(PROG_OBJX_WITH_CMI): %.cmx: %.cmi
%.cmo: %.ml
$(CSLC) $(CSLFLAGS) -c $<
@@ -74,9 +74,11 @@ depend: .depend
$(CSLDEP) $(INCLUDES) *.mli *.mll *.ml > .depend
# missing dependencies:
-perl_checker.cmo: lexer.cmi
-perl_checker.cmx: lexer.cmi
-lexer.cmx: common.cmi
-lexer.cmo: common.cmi
+perl_checker.cmo: lexer.cmi parser.cmi
+perl_checker.cmx: lexer.cmi parser.cmi
+lexer.cmx: common.cmi parser.cmi
+lexer.cmo: common.cmi parser.cmi
+parser.cmo: parser_helper.cmi
+parser.cmx: parser_helper.cmi
-include .depend
diff --git a/perl_checker.src/common.ml b/perl_checker.src/common.ml
index 0f20e7a..f600e01 100644
--- a/perl_checker.src/common.ml
+++ b/perl_checker.src/common.ml
@@ -8,18 +8,6 @@ exception GraphSort_circular_deps
type ('a, 'b) either = Left of 'a | Right of 'b
type ('a, 'b) or_option = Or_some of 'a | Or_error of 'b
-let bpos = "",-1,-1
-
-let norm (a,b) = if a = -1 then b else a
-let unipos (s,a,b) (s2,c,d) =
- if (a,b) = (-1,-1) then s2,c,d else
- if (c,d) = (-1,-1) then s,a,b else
- if s <> s2 then bpos else s, min (norm(a,c)) (norm(c,a)), max (norm(b,d)) (norm(d,b))
-
-let uniposl l = match l with
- | [] -> bpos
- | e::l -> fold_left unipos e l
-
(**********************************************************************************)
let internal_error s = failwith ("internal error: " ^ s)
@@ -267,6 +255,9 @@ let rec l_option2option_l = function
| Some e :: l -> map_option (fun l -> e :: l) (l_option2option_l l)
let map_option_env f (e, env) = map_option f e, env
+let t2_to_list (a,b) = [ a ; b ]
+let t3_to_list (a,b,c) = [ a ; b ; c ]
+
let if_some bool val_ = if bool then Some val_ else None
let rec fold_left_option f val_ = function
@@ -653,17 +644,13 @@ let skip_n_char_ beg end_ s =
String.sub s beg (String.length s - beg - end_)
let skip_n_char n s = skip_n_char_ n 0 s
-let rec index_spaces_from beg s =
- if s.[beg] = ' ' || s.[beg] = '\t' then beg else index_spaces_from (beg+1) s
-let index_spaces s = index_spaces_from 0 s
-
-let rec index_non_spaces_from beg s =
- if s.[beg] = ' ' || s.[beg] = '\t' then index_non_spaces_from (beg+1) s else beg
-let index_non_spaces s = index_non_spaces_from 0 s
+let rec non_index_from s beg c =
+ if s.[beg] = c then non_index_from s (beg+1) c else beg
+let non_index s c = non_index_from s 0 c
-let rec rindex_non_spaces_from beg s =
- if s.[beg] = ' ' || s.[beg] = '\t' then rindex_non_spaces_from (beg-1) s else beg
-let rindex_non_spaces s = rindex_non_spaces_from (String.length s - 1) s
+let rec non_rindex_from s beg c =
+ if s.[beg] = c then non_rindex_from s (beg-1) c else beg
+let non_rindex s c = non_rindex_from s (String.length s - 1) c
let rec explode_string = function
| "" -> []
diff --git a/perl_checker.src/common.mli b/perl_checker.src/common.mli
index 353a2a3..ccf6587 100644
--- a/perl_checker.src/common.mli
+++ b/perl_checker.src/common.mli
@@ -3,10 +3,6 @@ exception Not_comparable
exception GraphSort_circular_deps
type ('a, 'b) either = Left of 'a | Right of 'b
and ('a, 'b) or_option = Or_some of 'a | Or_error of 'b
-val bpos : string * int * int
-val norm : int * int -> int
-val unipos : string * int * int -> string * int * int -> string * int * int
-val uniposl : (string * int * int) list -> string * int * int
val internal_error : string -> 'a
val id : 'a -> 'a
val double : 'a -> 'a * 'a
@@ -65,6 +61,8 @@ val map_optionoption : ('a -> 'b option) -> 'a option -> 'b option
val t2_option2option_t2 : 'a option * 'b option -> ('a * 'b) option
val l_option2option_l : 'a option list -> 'a list option
val map_option_env : ('a -> 'b) -> 'a option * 'c -> 'b option * 'c
+val t2_to_list : 'a * 'a -> 'a list
+val t3_to_list : 'a * 'a * 'a -> 'a list
val if_some : bool -> 'a -> 'a option
val fold_left_option : ('a -> 'b -> 'a option) -> 'a -> 'b list -> 'a option
val collect_some_withenv :
@@ -175,12 +173,10 @@ val chop : string -> string
val chomps : string -> string
val skip_n_char_ : int -> int -> string -> string
val skip_n_char : int -> string -> string
-val index_spaces_from : int -> string -> int
-val index_spaces : string -> int
-val index_non_spaces_from : int -> string -> int
-val index_non_spaces : string -> int
-val rindex_non_spaces_from : int -> string -> int
-val rindex_non_spaces : string -> int
+val non_index_from : string -> int -> char -> int
+val non_index : string -> char -> int
+val non_rindex_from : string -> int -> char -> int
+val non_rindex : string -> char -> int
val explode_string : string -> char list
val is_uppercase : char -> bool
val is_lowercase : char -> bool
diff --git a/perl_checker.src/info.ml b/perl_checker.src/info.ml
index 8e27bd3..3d68439 100644
--- a/perl_checker.src/info.ml
+++ b/perl_checker.src/info.ml
@@ -14,17 +14,19 @@ let start_a_new_file file =
let raw_pos2raw_line file a =
let starts = map_index (fun a b -> a,b) (rev !(assoc file !lines_starts)) in
- let ((offset, line), _) = find (fun (_,(e,_)) -> e > a) (combine starts (tl starts @ [99999, 99999])) in
+ let ((offset, line), _) = find (fun (_,(e,_)) -> e > a) (combine starts (tl starts @ [999999999, 999999999])) in
line, offset
let pos2line (file, a, b) =
let line, offset = raw_pos2raw_line file a in
- file, line, a - offset, b - offset
+ file, line, a - offset + 1, b - offset + 1
+
+let pos2s (file, a, b) = sprintf "(%s, %d, %d)" file a b
let pos2sfull pos =
try
let (file, line, n1,n2) = pos2line pos in
sprintf "File \"%s\", line %d, character %d-%d\n" file (line + 1) n1 n2
- with Not_found -> ""
+ with Not_found -> failwith ("bad position " ^ pos2s pos)
let pos2sfull_current a b = pos2sfull (!current_file, a, b)
diff --git a/perl_checker.src/lexer.mll b/perl_checker.src/lexer.mll
index 6abbbc2..8450aa9 100644
--- a/perl_checker.src/lexer.mll
+++ b/perl_checker.src/lexer.mll
@@ -1,12 +1,202 @@
{ (* -*- caml -*- *)
-open Parser
open Common
+open Types
open Lexing
open Info
+type raw_token =
+ | EOF of raw_pos
+ | SPACE of int
+ | CR
+ | NUM of (string * raw_pos)
+ | STRING of (string * raw_pos)
+ | BAREWORD of (string * raw_pos)
+ | REVISION of (string * raw_pos)
+ | COMMENT of (string * raw_pos)
+ | POD of (string * raw_pos)
+ | LABEL of (string * raw_pos)
+ | COMMAND_STRING of (string * raw_pos)
+ | PRINT_TO_STAR of (string * raw_pos)
+ | PRINT_TO_SCALAR of (string * raw_pos)
+ | QUOTEWORDS of (string * raw_pos)
+ | COMPACT_HASH_SUBSCRIPT of (string * raw_pos)
+ | HERE_DOC of ((string * raw_pos) ref * raw_pos)
+ | PATTERN of (string * string * raw_pos)
+ | PATTERN_SUBST of (string * string * string * raw_pos)
+ | SCALAR_IDENT of (string option * string * raw_pos)
+ | ARRAY_IDENT of (string option * string * raw_pos)
+ | HASH_IDENT of (string option * string * raw_pos)
+ | FUNC_IDENT of (string option * string * raw_pos)
+ | STAR_IDENT of (string option * string * raw_pos)
+ | RAW_IDENT of (string option * string * raw_pos)
+ | ARRAYLEN_IDENT of (string option * string * raw_pos)
+ | FUNC_DECL_WITH_PROTO of (string * string * raw_pos)
+
+ | IF of raw_pos | ELSIF of raw_pos | ELSE of raw_pos | UNLESS of raw_pos | DO of raw_pos | WHILE of raw_pos | UNTIL of raw_pos | MY of raw_pos | CONTINUE of raw_pos | SUB of raw_pos
+ | LOCAL of raw_pos | FOR of (string * raw_pos) | USE of raw_pos | PACKAGE of raw_pos | BEGIN of raw_pos | END of raw_pos | PRINT of (string * raw_pos)
+ | NEW of (raw_pos) | FORMAT of (raw_pos) | AT of raw_pos | DOLLAR of raw_pos | PERCENT of raw_pos | AMPERSAND of raw_pos
+ | STAR of raw_pos | ARRAYLEN of raw_pos | SEMI_COLON of raw_pos | PKG_SCOPE of raw_pos | PAREN of raw_pos | PAREN_END of raw_pos | BRACKET of raw_pos
+ | BRACKET_END of raw_pos | BRACKET_HASHREF of raw_pos | ARRAYREF of raw_pos | ARRAYREF_END of raw_pos | ARROW of raw_pos | INCR of raw_pos | DECR of raw_pos
+ | POWER of raw_pos | TIGHT_NOT of raw_pos | BIT_NEG of raw_pos | REF of raw_pos | PATTERN_MATCH of raw_pos | PATTERN_MATCH_NOT of raw_pos | MULT of raw_pos
+ | DIVISION of raw_pos | MODULO of raw_pos | REPLICATE of raw_pos | PLUS of raw_pos | MINUS of raw_pos | CONCAT of raw_pos | BIT_SHIFT_LEFT of raw_pos
+ | BIT_SHIFT_RIGHT of raw_pos | LT of raw_pos | GT of raw_pos | COMPARE_OP of (string * raw_pos) | EQ_OP of (string * raw_pos)
+ | BIT_AND of raw_pos | BIT_OR of raw_pos | BIT_XOR of raw_pos | AND_TIGHT of raw_pos | OR_TIGHT of raw_pos | DOTDOT of raw_pos | DOTDOTDOT of raw_pos
+ | QUESTION_MARK of raw_pos | COLON of raw_pos | ASSIGN of (string * raw_pos) | COMMA of raw_pos | RIGHT_ARROW of raw_pos | NOT of raw_pos | AND of raw_pos | OR of raw_pos | XOR of raw_pos
+
+let saved_token = ref None
+
+let concat_bareword_paren get_token lexbuf =
+ let token = match !saved_token with
+ | None -> get_token lexbuf
+ | Some t -> t
+ in
+ let token, next =
+ match token with
+ | Parser.PRINT(s, both)
+ | Parser.BAREWORD(s, both) ->
+ let next_token = get_token lexbuf in
+ (match next_token with Parser.PAREN(_, (Space_0, _)) -> Parser.BAREWORD_PAREN(s, both) | _ -> token), Some next_token
+ | Parser.RAW_IDENT(ident, both) ->
+ let next_token = get_token lexbuf in
+ (match next_token with Parser.PAREN(_, (Space_0, _)) -> Parser.RAW_IDENT_PAREN(ident, both) | _ -> token), Some next_token
+ | _ -> token, None
+ in
+ saved_token := next ; token
+
+let rec concat_spaces get_token lexbuf =
+ let rec get_spaces spaces lexbuf =
+ match get_token lexbuf with
+ | CR -> get_spaces Space_cr lexbuf
+ | SPACE n ->
+ let spaces' =
+ match spaces with
+ | Space_cr -> Space_cr
+ | Space_0 -> if n = 1 then Space_1 else Space_n
+ | _ -> Space_n
+ in
+ get_spaces spaces' lexbuf
+ | token -> token, spaces
+ in
+ let token, spaces = get_spaces Space_0 lexbuf in
+ match token with
+ | NUM(s, pos) -> Parser.NUM(s, (spaces, pos))
+ | STRING(s, pos) -> Parser.STRING(s, (spaces, pos))
+ | BAREWORD(s, pos) -> Parser.BAREWORD(s, (spaces, pos))
+ | REVISION(s, pos) -> Parser.REVISION(s, (spaces, pos))
+ | COMMENT(s, pos) -> Parser.COMMENT(s, (spaces, pos))
+ | POD(s, pos) -> Parser.POD(s, (spaces, pos))
+ | LABEL(s, pos) -> Parser.LABEL(s, (spaces, pos))
+ | COMMAND_STRING(s, pos) -> Parser.COMMAND_STRING(s, (spaces, pos))
+ | PRINT(s, pos) -> Parser.PRINT(s, (spaces, pos))
+ | PRINT_TO_STAR(s, pos) -> Parser.PRINT_TO_STAR(s, (spaces, pos))
+ | PRINT_TO_SCALAR(s, pos) -> Parser.PRINT_TO_SCALAR(s, (spaces, pos))
+ | QUOTEWORDS(s, pos) -> Parser.QUOTEWORDS(s, (spaces, pos))
+ | COMPACT_HASH_SUBSCRIPT(s, pos) -> Parser.COMPACT_HASH_SUBSCRIPT(s, (spaces, pos))
+ | HERE_DOC(r, pos) -> Parser.HERE_DOC(r, (spaces, pos))
+ | PATTERN(s, opts, pos) -> Parser.PATTERN((s, opts), (spaces, pos))
+ | PATTERN_SUBST(from, to_, opts, pos) -> Parser.PATTERN_SUBST((from, to_, opts), (spaces, pos))
+ | SCALAR_IDENT(kind, name, pos) -> Parser.SCALAR_IDENT((kind, name), (spaces, pos))
+ | ARRAY_IDENT(kind, name, pos) -> Parser.ARRAY_IDENT((kind, name), (spaces, pos))
+ | HASH_IDENT(kind, name, pos) -> Parser.HASH_IDENT((kind, name), (spaces, pos))
+ | FUNC_IDENT(kind, name, pos) -> Parser.FUNC_IDENT((kind, name), (spaces, pos))
+ | STAR_IDENT(kind, name, pos) -> Parser.STAR_IDENT((kind, name), (spaces, pos))
+ | RAW_IDENT(kind, name, pos) -> Parser.RAW_IDENT((kind, name), (spaces, pos))
+ | ARRAYLEN_IDENT(kind, name, pos) -> Parser.ARRAYLEN_IDENT((kind, name), (spaces, pos))
+ | FUNC_DECL_WITH_PROTO(name, proto, pos) -> Parser.FUNC_DECL_WITH_PROTO((name, proto), (spaces, pos))
+
+ | NEW(pos) -> Parser.NEW((), (spaces, pos))
+ | FORMAT(pos) -> Parser.FORMAT((), (spaces, pos))
+ | COMPARE_OP(s, pos) -> Parser.COMPARE_OP(s, (spaces, pos))
+ | EQ_OP(s, pos) -> Parser.EQ_OP(s, (spaces, pos))
+ | ASSIGN(s, pos) -> Parser.ASSIGN(s, (spaces, pos))
+ | FOR(s, pos) -> Parser.FOR(s, (spaces, pos))
+
+ | EOF (pos) -> Parser.EOF ((), (spaces, pos))
+ | IF (pos) -> Parser.IF ((), (spaces, pos))
+ | ELSIF (pos) -> Parser.ELSIF ((), (spaces, pos))
+ | ELSE (pos) -> Parser.ELSE ((), (spaces, pos))
+ | UNLESS (pos) -> Parser.UNLESS ((), (spaces, pos))
+ | DO (pos) -> Parser.DO ((), (spaces, pos))
+ | WHILE (pos) -> Parser.WHILE ((), (spaces, pos))
+ | UNTIL (pos) -> Parser.UNTIL ((), (spaces, pos))
+ | MY (pos) -> Parser.MY ((), (spaces, pos))
+ | CONTINUE (pos) -> Parser.CONTINUE ((), (spaces, pos))
+ | SUB (pos) -> Parser.SUB ((), (spaces, pos))
+ | LOCAL (pos) -> Parser.LOCAL ((), (spaces, pos))
+ | USE (pos) -> Parser.USE ((), (spaces, pos))
+ | PACKAGE (pos) -> Parser.PACKAGE ((), (spaces, pos))
+ | BEGIN (pos) -> Parser.BEGIN ((), (spaces, pos))
+ | END (pos) -> Parser.END ((), (spaces, pos))
+ | AT (pos) -> Parser.AT ((), (spaces, pos))
+ | DOLLAR (pos) -> Parser.DOLLAR ((), (spaces, pos))
+ | PERCENT (pos) -> Parser.PERCENT ((), (spaces, pos))
+ | AMPERSAND (pos) -> Parser.AMPERSAND ((), (spaces, pos))
+ | STAR (pos) -> Parser.STAR ((), (spaces, pos))
+ | ARRAYLEN (pos) -> Parser.ARRAYLEN ((), (spaces, pos))
+ | SEMI_COLON (pos) -> Parser.SEMI_COLON ((), (spaces, pos))
+ | PKG_SCOPE (pos) -> Parser.PKG_SCOPE ((), (spaces, pos))
+ | PAREN (pos) -> Parser.PAREN ((), (spaces, pos))
+ | PAREN_END (pos) -> Parser.PAREN_END ((), (spaces, pos))
+ | BRACKET (pos) -> Parser.BRACKET ((), (spaces, pos))
+ | BRACKET_END (pos) -> Parser.BRACKET_END ((), (spaces, pos))
+ | BRACKET_HASHREF (pos) -> Parser.BRACKET_HASHREF ((), (spaces, pos))
+ | ARRAYREF (pos) -> Parser.ARRAYREF ((), (spaces, pos))
+ | ARRAYREF_END (pos) -> Parser.ARRAYREF_END ((), (spaces, pos))
+ | ARROW (pos) -> Parser.ARROW ((), (spaces, pos))
+ | INCR (pos) -> Parser.INCR ((), (spaces, pos))
+ | DECR (pos) -> Parser.DECR ((), (spaces, pos))
+ | POWER (pos) -> Parser.POWER ((), (spaces, pos))
+ | TIGHT_NOT (pos) -> Parser.TIGHT_NOT ((), (spaces, pos))
+ | BIT_NEG (pos) -> Parser.BIT_NEG ((), (spaces, pos))
+ | REF (pos) -> Parser.REF ((), (spaces, pos))
+ | PATTERN_MATCH (pos) -> Parser.PATTERN_MATCH ((), (spaces, pos))
+ | PATTERN_MATCH_NOT(pos) -> Parser.PATTERN_MATCH_NOT((), (spaces, pos))
+ | MULT (pos) -> Parser.MULT ((), (spaces, pos))
+ | DIVISION (pos) -> Parser.DIVISION ((), (spaces, pos))
+ | MODULO (pos) -> Parser.MODULO ((), (spaces, pos))
+ | REPLICATE (pos) -> Parser.REPLICATE ((), (spaces, pos))
+ | PLUS (pos) -> Parser.PLUS ((), (spaces, pos))
+ | MINUS (pos) -> Parser.MINUS ((), (spaces, pos))
+ | CONCAT (pos) -> Parser.CONCAT ((), (spaces, pos))
+ | BIT_SHIFT_LEFT (pos) -> Parser.BIT_SHIFT_LEFT ((), (spaces, pos))
+ | BIT_SHIFT_RIGHT (pos) -> Parser.BIT_SHIFT_RIGHT ((), (spaces, pos))
+ | LT (pos) -> Parser.LT ((), (spaces, pos))
+ | GT (pos) -> Parser.GT ((), (spaces, pos))
+ | BIT_AND (pos) -> Parser.BIT_AND ((), (spaces, pos))
+ | BIT_OR (pos) -> Parser.BIT_OR ((), (spaces, pos))
+ | BIT_XOR (pos) -> Parser.BIT_XOR ((), (spaces, pos))
+ | AND_TIGHT (pos) -> Parser.AND_TIGHT ((), (spaces, pos))
+ | OR_TIGHT (pos) -> Parser.OR_TIGHT ((), (spaces, pos))
+ | DOTDOT (pos) -> Parser.DOTDOT ((), (spaces, pos))
+ | DOTDOTDOT (pos) -> Parser.DOTDOTDOT ((), (spaces, pos))
+ | QUESTION_MARK (pos) -> Parser.QUESTION_MARK ((), (spaces, pos))
+ | COLON (pos) -> Parser.COLON ((), (spaces, pos))
+ | COMMA (pos) -> Parser.COMMA ((), (spaces, pos))
+ | RIGHT_ARROW (pos) -> Parser.RIGHT_ARROW ((), (spaces, pos))
+ | NOT (pos) -> Parser.NOT ((), (spaces, pos))
+ | AND (pos) -> Parser.AND ((), (spaces, pos))
+ | OR (pos) -> Parser.OR ((), (spaces, pos))
+ | XOR (pos) -> Parser.XOR ((), (spaces, pos))
+
+ | SPACE _ | CR -> internal_error "raw_token_to_token"
+
+let rec lexbuf2list t lexbuf =
+ let rec f () =
+ match t lexbuf with
+ | Parser.EOF _ -> []
+ | e -> e :: f()
+ in
+ let l = f() in
+ l
let next_rule = ref None
+let bpos = -1,-1
+let pos lexbuf = lexeme_start lexbuf, lexeme_end lexbuf
+let pos2sfull_with start end_ = Info.pos2sfull (!current_file, start, end_)
+let pos2sfull lexbuf = pos2sfull_with (lexeme_start lexbuf) (lexeme_end lexbuf)
+let putback lexbuf nb = lexbuf.Lexing.lex_curr_pos <- lexbuf.Lexing.lex_curr_pos - nb
+
let add_a_new_line raw_pos =
incr current_file_current_line ;
lpush current_file_lines_starts raw_pos
@@ -26,12 +216,13 @@ let string_nestness = ref 0
let building_current_string = ref ""
let current_string_start_pos = ref 0
let current_string_start_line = ref 0
+let die lexbuf err = failwith (pos2sfull_with !current_string_start_pos (lexeme_end lexbuf) ^ err)
let ins t lexbuf =
building_current_string := "";
current_string_start_pos := lexeme_start lexbuf;
t lexbuf ;
- !building_current_string, (!current_file, !current_string_start_pos, lexeme_end lexbuf)
+ !building_current_string, (!current_string_start_pos, lexeme_end lexbuf)
let ins_to_string t lexbuf =
let s, pos = ins t lexbuf in
not_ok_for_match := lexeme_end lexbuf;
@@ -42,23 +233,6 @@ let next_s s t lexbuf =
t lexbuf
let next t lexbuf = next_s (lexeme lexbuf) t lexbuf
-let pos lexbuf = !current_file, lexeme_start lexbuf, lexeme_end lexbuf
-let pos2sfull_with start end_ = Info.pos2sfull (!current_file, start, end_)
-let pos2sfull lexbuf = pos2sfull_with (lexeme_start lexbuf) (lexeme_end lexbuf)
-
-let putback lexbuf nb = lexbuf.Lexing.lex_curr_pos <- lexbuf.Lexing.lex_curr_pos - nb
-
-let die lexbuf err = failwith (pos2sfull_with !current_string_start_pos (lexeme_end lexbuf) ^ err)
-
-let rec lexbuf2list t lexbuf =
- let rec f () =
- match t lexbuf with
- | EOF -> []
- | e -> e :: f()
- in
- let l = f() in
- l
-
let ident_type_from_char fq name lexbuf c =
not_ok_for_match := lexeme_end lexbuf;
match c with
@@ -92,7 +266,7 @@ let arraylen_fqident_from_lexbuf lexbuf =
let fq, name = split_at_two_colons (skip_n_char 2 s) in
ARRAYLEN_IDENT(Some fq, name, pos lexbuf)
-let check_multi_line_delimited_string opts (_, start, end_) =
+let check_multi_line_delimited_string opts (start, end_) =
let check =
match opts with
| None -> true
@@ -102,19 +276,18 @@ let check_multi_line_delimited_string opts (_, start, end_) =
failwith (pos2sfull_with start end_ ^ "multi-line patterns are not allowed (or use /x modifier)")
}
-let space = [' ' '\t']
let stash = [ '$' '@' '%' '&' '*' ]
let ident_start = ['a'-'z' 'A'-'Z' '_']
let ident = ident_start ['0'-'9' 'A'-'Z' 'a'-'z' '_'] *
let pattern_separator = [ '/' '!' ',' '|' ]
rule token = parse
-| space+ {
+| ' '+ {
(* propagate not_ok_for_match when it was set by the previous token *)
if lexeme_start lexbuf = !not_ok_for_match then not_ok_for_match := lexeme_end lexbuf;
- (*SPACE(pos lexbuf) *) token lexbuf
+ SPACE(lexeme_end lexbuf - lexeme_start lexbuf)
}
-| '#' [^ '\n']* { (*COMMENT(lexeme lexbuf, pos lexbuf)*) token lexbuf }
+| '#' [^ '\n']* { SPACE(1) }
| "\n=" {
add_a_new_line(lexeme_end lexbuf - 1);
@@ -128,76 +301,85 @@ rule token = parse
current_here_doc_mark := mark ;
r := ins (if interpolate then here_doc else raw_here_doc) lexbuf
with Queue.Empty -> ());
- token lexbuf
- }
-| "->" { ARROW }
-| "++" { INCR }
-| "--" { DECR }
-| "**" { POWER }
-| "!" { TIGHT_NOT }
-| "~" { BIT_NEG }
-| "=~" { PATTERN_MATCH }
-| "!~" { PATTERN_MATCH_NOT }
-| "*" { MULT }
-| "%" { MODULO }
-| "x" { REPLICATE }
-| "+" { PLUS }
-| "-" { MINUS }
-| "." { CONCAT }
-| "<<" { BIT_SHIFT_LEFT }
-| ">>" { BIT_SHIFT_RIGHT }
-| "<" { LT }
-| ">" { GT }
-| "<=" | ">=" | "lt" | "gt" | "le" | "ge" { COMPARE_OP(lexeme lexbuf) }
-| "==" | "!=" | "<=>" | "eq" | "ne" | "cmp" { EQ_OP(lexeme lexbuf) }
-| "&" { BIT_AND }
-| "|" { BIT_OR }
-| "^" { BIT_XOR }
-| "&&" { AND_TIGHT }
-| "||" { OR_TIGHT }
-| ".." { DOTDOT }
-| "..." { DOTDOTDOT }
-| "?" { QUESTION_MARK }
-| ":" { COLON }
-| "::" { PKG_SCOPE }
-
-| "=" | "+=" | "-=" | "*=" | ".=" | "|=" | "&=" | "^=" | "||=" | "&&=" { ASSIGN(lexeme lexbuf) }
-
-| "," { COMMA }
-| "=>" { RIGHT_ARROW }
-| "not" { NOT }
-| "and" { AND }
-| "or" { OR }
-| "xor" { XOR }
-
-| "if" { IF }
-| "else" { ELSE }
-| "elsif" { ELSIF }
-| "unless" { UNLESS }
-| "do" { DO }
-| "while" { WHILE }
-| "until" { UNTIL }
-| "foreach" { FOR("foreach") }
-| "for" { FOR("for") }
-| "my" { MY }
-| "local" { LOCAL }
-| "continue" { CONTINUE }
-| "sub" { SUB }
-| "package" { PACKAGE }
-| "use" { USE }
-| "BEGIN" { BEGIN }
-| "END" { END }
-| "print" { PRINT(pos lexbuf) }
+ CR
+ }
+| "->" { ARROW(pos lexbuf) }
+| "++" { INCR(pos lexbuf) }
+| "--" { DECR(pos lexbuf) }
+| "**" { POWER(pos lexbuf) }
+| "!" { TIGHT_NOT(pos lexbuf) }
+| "~" { BIT_NEG(pos lexbuf) }
+| "=~" { PATTERN_MATCH(pos lexbuf) }
+| "!~" { PATTERN_MATCH_NOT(pos lexbuf) }
+| "*" { MULT(pos lexbuf) }
+| "%" { MODULO(pos lexbuf) }
+| "x" { REPLICATE(pos lexbuf) }
+| "+" { PLUS(pos lexbuf) }
+| "-" { MINUS(pos lexbuf) }
+| "." { CONCAT(pos lexbuf) }
+| "<<" { BIT_SHIFT_LEFT(pos lexbuf) }
+| ">>" { BIT_SHIFT_RIGHT(pos lexbuf) }
+| "<" { LT(pos lexbuf) }
+| ">" { GT(pos lexbuf) }
+| "<=" | ">=" | "lt" | "gt" | "le" | "ge" { COMPARE_OP(lexeme lexbuf, pos lexbuf) }
+| "==" | "!=" | "<=>" | "eq" | "ne" | "cmp" { EQ_OP(lexeme lexbuf, pos lexbuf) }
+| "&" { BIT_AND(pos lexbuf) }
+| "|" { BIT_OR(pos lexbuf) }
+| "^" { BIT_XOR(pos lexbuf) }
+| "&&" { AND_TIGHT(pos lexbuf) }
+| "||" { OR_TIGHT(pos lexbuf) }
+| ".." { DOTDOT(pos lexbuf) }
+| "..." { DOTDOTDOT(pos lexbuf) }
+| "?" { QUESTION_MARK(pos lexbuf) }
+| ":" { COLON(pos lexbuf) }
+| "::" { PKG_SCOPE(pos lexbuf) }
+
+| "=" | "+=" | "-=" | "*=" | ".=" | "|=" | "&=" | "^=" | "||=" | "&&=" { ASSIGN(lexeme lexbuf, pos lexbuf) }
+
+| "," { COMMA(pos lexbuf) }
+| "=>" { RIGHT_ARROW(pos lexbuf) }
+| "not" { NOT(pos lexbuf) }
+| "and" { AND(pos lexbuf) }
+| "or" { OR(pos lexbuf) }
+| "xor" { XOR(pos lexbuf) }
+
+| "if" { IF(pos lexbuf) }
+| "else" { ELSE(pos lexbuf) }
+| "elsif" { ELSIF(pos lexbuf) }
+| "unless" { UNLESS(pos lexbuf) }
+| "do" { DO(pos lexbuf) }
+| "while" { WHILE(pos lexbuf) }
+| "until" { UNTIL(pos lexbuf) }
+| "foreach" { FOR(lexeme lexbuf, pos lexbuf) }
+| "for" { FOR(lexeme lexbuf, pos lexbuf) }
+| "my" { MY(pos lexbuf) }
+| "local" { LOCAL(pos lexbuf) }
+| "continue" { CONTINUE(pos lexbuf) }
+| "sub" { SUB(pos lexbuf) }
+| "package" { PACKAGE(pos lexbuf) }
+| "use" { USE(pos lexbuf) }
+| "BEGIN" { BEGIN(pos lexbuf) }
+| "END" { END(pos lexbuf) }
+| "print" { PRINT(lexeme lexbuf, pos lexbuf) }
| "new" { NEW(pos lexbuf) }
| "format" { let _ = here_doc_next_line "." false in FORMAT(pos lexbuf) }
| "split"
| "grep" { (* ok_for_match! *) BAREWORD(lexeme lexbuf, pos lexbuf) }
-| ident space* "=>" { (* needed so that (if => 1) works *)
+| "print " ident ' ' {
+ putback lexbuf 1;
+ PRINT_TO_STAR(skip_n_char_ 6 1 (lexeme lexbuf), pos lexbuf);
+ }
+| "print $" ident ' ' {
+ putback lexbuf 1;
+ PRINT_TO_SCALAR(skip_n_char_ 7 1 (lexeme lexbuf), pos lexbuf);
+ }
+
+| ident ' '* "=>" { (* needed so that (if => 1) works *)
let s = lexeme lexbuf in
let end_ = String.length s - 1 in
- let ident_end = rindex_non_spaces_from (end_ - 2) s in
+ let ident_end = non_rindex_from s (end_ - 2) ' ' in
putback lexbuf (end_ - ident_end);
BAREWORD(String.sub s 0 (ident_end+1), pos lexbuf)
}
@@ -207,25 +389,25 @@ rule token = parse
COMPACT_HASH_SUBSCRIPT(lexeme lexbuf, pos lexbuf)
}
-| '@' { AT }
-| '$' { DOLLAR }
-| '$' '#' { ARRAYLEN }
-| '%' ['$' '{'] { putback lexbuf 1; PERCENT }
-| '&' ['$' '{'] { putback lexbuf 1; AMPERSAND }
-| '*' ['$' '{'] { putback lexbuf 1; if lexeme_start lexbuf = !not_ok_for_match then MULT else STAR }
+| '@' { AT(pos lexbuf) }
+| '$' { DOLLAR(pos lexbuf) }
+| '$' '#' { ARRAYLEN(pos lexbuf) }
+| '%' ['$' '{'] { putback lexbuf 1; PERCENT(pos lexbuf) }
+| '&' ['$' '{'] { putback lexbuf 1; AMPERSAND(pos lexbuf) }
+| '*' ['$' '{'] { putback lexbuf 1; if lexeme_start lexbuf = !not_ok_for_match then MULT(pos lexbuf) else STAR(pos lexbuf) }
-| ';' { SEMI_COLON }
-| '(' { PAREN }
-| '{' { BRACKET }
-| "+{"{ BRACKET_HASHREF }
-| '[' { ARRAYREF }
-| ')' { not_ok_for_match := lexeme_end lexbuf; PAREN_END }
-| '}' { not_ok_for_match := lexeme_end lexbuf; BRACKET_END }
-| ']' { not_ok_for_match := lexeme_end lexbuf; ARRAYREF_END }
+| ';' { SEMI_COLON(pos lexbuf) }
+| '(' { PAREN(pos lexbuf) }
+| '{' { BRACKET(pos lexbuf) }
+| "+{"{ BRACKET_HASHREF(pos lexbuf) }
+| '[' { ARRAYREF(pos lexbuf) }
+| ')' { not_ok_for_match := lexeme_end lexbuf; PAREN_END(pos lexbuf) }
+| '}' { not_ok_for_match := lexeme_end lexbuf; BRACKET_END(pos lexbuf) }
+| ']' { not_ok_for_match := lexeme_end lexbuf; ARRAYREF_END(pos lexbuf) }
| "/" {
- if lexeme_start lexbuf = !not_ok_for_match then DIVISION
+ if lexeme_start lexbuf = !not_ok_for_match then DIVISION(pos lexbuf)
else (
delimit_char := '/' ;
current_string_start_line := !current_file_current_line;
@@ -237,7 +419,7 @@ rule token = parse
}
| "/=" {
- if lexeme_start lexbuf = !not_ok_for_match then ASSIGN(lexeme lexbuf)
+ if lexeme_start lexbuf = !not_ok_for_match then ASSIGN(lexeme lexbuf, pos lexbuf)
else (
putback lexbuf 1 ;
delimit_char := '/' ;
@@ -268,10 +450,10 @@ rule token = parse
| "s" pattern_separator {
delimit_char := lexeme_char lexbuf 1 ;
current_string_start_line := !current_file_current_line;
- let s1, (_, start, _) = ins delimited_string lexbuf in
- let s2, (_, _, end_) = ins delimited_string lexbuf in
+ let s1, (start, _) = ins delimited_string lexbuf in
+ let s2, (_, end_) = ins delimited_string lexbuf in
let opts, _ = ins pattern_options lexbuf in
- let pos = !current_file, start, end_ in
+ let pos = start, end_ in
check_multi_line_delimited_string (Some opts) pos ;
PATTERN_SUBST(s1, s2, opts, pos)
}
@@ -279,43 +461,43 @@ rule token = parse
| "tr" pattern_separator {
delimit_char := lexeme_char lexbuf 2 ;
current_string_start_line := !current_file_current_line;
- let s1, (_, start, _) = ins delimited_string lexbuf in
- let s2, (_, _, end_) = ins delimited_string lexbuf in
+ let s1, (start, _) = ins delimited_string lexbuf in
+ let s2, (_, end_) = ins delimited_string lexbuf in
let opts, _ = ins pattern_options lexbuf in
- let pos = !current_file, start, end_ in
+ let pos = start, end_ in
check_multi_line_delimited_string None pos ;
PATTERN_SUBST(s1, s2, opts, pos)
}
| "<<" ident {
not_ok_for_match := lexeme_end lexbuf;
- HERE_DOC(here_doc_next_line (skip_n_char 2 (lexeme lexbuf)) true)
+ HERE_DOC(here_doc_next_line (skip_n_char 2 (lexeme lexbuf)) true, pos lexbuf)
}
| "<<'" ident "'" {
not_ok_for_match := lexeme_end lexbuf;
- HERE_DOC(here_doc_next_line (skip_n_char_ 3 1 (lexeme lexbuf)) false)
+ HERE_DOC(here_doc_next_line (skip_n_char_ 3 1 (lexeme lexbuf)) false, pos lexbuf)
}
-| "<<" space+ "'"
-| "<<" space+ ident {
+| "<<" ' '+ "'"
+| "<<" ' '+ ident {
failwith (pos2sfull_with (lexeme_start lexbuf + 2) (lexeme_end lexbuf) ^ "No space allowed between \"<<\" and the marker")
}
-| "<<" space* '"' {
+| "<<" ' '* '"' {
failwith (pos2sfull_with (lexeme_start lexbuf + 2) (lexeme_end lexbuf) ^ "Don't use <<\"MARK\", use <<MARK instead")
}
| "\\" stash
| "\\" ['0'-'9' 'A'-'Z' 'a'-'z']
-| "\\" space* '('
- { putback lexbuf 1; REF }
+| "\\" ' '* '('
+ { putback lexbuf 1; REF(pos lexbuf) }
-| "sub" space+ ident space* '(' [ '$' '@' '\\' '&' ';' ]* ')' {
+| "sub" ' '+ ident ' '* '(' [ '$' '@' '\\' '&' ';' ]* ')' {
(* bloody prototypes, must be caught especially otherwise "($)" is badly tokenized *)
(* and alas "($@)" is both valid as an expression and a prototype *)
let s = lexeme lexbuf in
- let ident_start = index_non_spaces_from 3 s in
+ let ident_start = non_index_from s 3 ' ' in
let proto_start = String.index_from s ident_start '(' in
- let ident_end = rindex_non_spaces_from (proto_start-1) s in
+ let ident_end = non_rindex_from s (proto_start-1) ' ' in
let ident = String.sub s ident_start (ident_end - ident_start + 1) in
let prototype = skip_n_char_ (proto_start + 1) 1 s in
@@ -327,8 +509,8 @@ rule token = parse
| stash ident? ("::" ident)+ { typed_fqident_from_lexbuf lexbuf }
| stash ident
-| '$' [^ '{' ' ' '\t' '\n' '$']
-| "$^" [^ '{' ' ' '\t' '\n'] { typed_ident_from_lexbuf lexbuf }
+| '$' [^ '{' ' ' '\n' '$']
+| "$^" [^ '{' ' ' '\n'] { typed_ident_from_lexbuf lexbuf }
| "$$" [^ 'a'-'z' 'A'-'Z' '_' '{'] { putback lexbuf 1; SCALAR_IDENT(None, "$$", pos lexbuf) }
@@ -339,7 +521,7 @@ rule token = parse
| ident ":" { LABEL(lexeme lexbuf, pos lexbuf) }
-| '-' [ 'a'-'z' 'A'-'Z' ] [ ' ' '\t' '(' ] { putback lexbuf 1; BAREWORD(lexeme lexbuf, pos lexbuf) }
+| '-' [ 'a'-'z' 'A'-'Z' ] [ ' ' '(' ] { putback lexbuf 1; BAREWORD(lexeme lexbuf, pos lexbuf) }
| ['0'-'9'] ['0'-'9' '_']* ('.' ['0'-'9'] ['0'-'9' '_']*)+
| 'v' ['0'-'9'] ['0'-'9' '_']* ('.' ['0'-'9'] ['0'-'9' '_']*)*
@@ -367,7 +549,7 @@ rule token = parse
| "qq(" { ins_to_string qqstring lexbuf }
| "qw(" { let s, pos = ins qstring lexbuf in QUOTEWORDS(s, pos) }
-| eof { EOF }
+| eof { EOF(pos lexbuf) }
| _ { failwith (Printf.sprintf "%serror tokenizing <<%s>>" (pos2sfull lexbuf) (lexeme lexbuf)) }
and string = parse
@@ -480,7 +662,6 @@ and pattern_options = parse
and pod_command = parse
| [^ '\n' ]+ {
let s = lexeme lexbuf in
- if String.contains s '\t' then failwith(pos2sfull lexbuf ^ "tabulations not accepted in POD commands") else
let command = String.sub s 0 (try String.index s ' ' with Not_found -> String.length s) in
match command with
| "cut" ->
diff --git a/perl_checker.src/parser.mly b/perl_checker.src/parser.mly
index 0157714..e94f622 100644
--- a/perl_checker.src/parser.mly
+++ b/perl_checker.src/parser.mly
@@ -1,69 +1,56 @@
%{ (* -*- caml -*- *)
open Types
open Common
+ open Parser_helper
- let die msg =
- failwith (Info.pos2sfull_current (Parsing.symbol_start()) (Parsing.symbol_end()) ^ msg)
+ let parse_error msg = die_rule msg
- let parse_error msg = die msg
-
- let warn msg = if false then prerr_endline msg
-
- let to_Ident = function
- | BAREWORD(name, pos) -> Ident(I_raw, None, name, pos)
- | SCALAR_IDENT(fq, name, pos) -> Ident(I_scalar, fq, name, pos)
- | ARRAY_IDENT (fq, name, pos) -> Ident(I_array, fq, name, pos)
- | HASH_IDENT (fq, name, pos) -> Ident(I_hash, fq, name, pos)
- | FUNC_IDENT (fq, name, pos) -> Ident(I_func, fq, name, pos)
- | STAR_IDENT (fq, name, pos) -> Ident(I_star, fq, name, pos)
- | RAW_IDENT (fq, name, pos) -> Ident(I_raw, fq, name, pos)
- | _ -> internal_error "Parser.to_Ident"
%}
-%token EOF
-%token <Types.pos> SPACE
-%token <string * Types.pos> NUM STRING BAREWORD REVISION COMMENT POD LABEL
-%token <string * Types.pos> COMMAND_STRING QUOTEWORDS COMPACT_HASH_SUBSCRIPT
-%token <(string * Types.pos) ref> HERE_DOC
-%token <string * string * Types.pos> PATTERN
-%token <string * string * string * Types.pos> PATTERN_SUBST
-
-%token <string option * string * Types.pos> SCALAR_IDENT ARRAY_IDENT HASH_IDENT FUNC_IDENT STAR_IDENT RAW_IDENT ARRAYLEN_IDENT
-%token <string * string * Types.pos> FUNC_DECL_WITH_PROTO
-
-%token IF ELSIF ELSE UNLESS DO WHILE UNTIL MY CONTINUE SUB LOCAL
-%token <string> FOR
-%token USE PACKAGE BEGIN END
-%token <Types.pos> PRINT NEW FORMAT
-%token AT DOLLAR PERCENT AMPERSAND STAR ARRAYLEN
-%token SEMI_COLON PKG_SCOPE
-%token PAREN PAREN_END
-%token BRACKET BRACKET_END BRACKET_HASHREF
-%token ARRAYREF ARRAYREF_END
-
-
-%token ARROW
-%token INCR DECR
-%token POWER
-%token TIGHT_NOT BIT_NEG REF
-%token PATTERN_MATCH PATTERN_MATCH_NOT
-%token MULT DIVISION MODULO REPLICATE
-%token PLUS MINUS CONCAT
-%token BIT_SHIFT_LEFT BIT_SHIFT_RIGHT
-%token LT GT
-%token <string> COMPARE_OP EQ_OP
-%token BIT_AND
-%token BIT_OR BIT_XOR
-%token AND_TIGHT
-%token OR_TIGHT
-%token DOTDOT DOTDOTDOT
-%token QUESTION_MARK COLON
-%token <string> ASSIGN
-%token COMMA RIGHT_ARROW
-%token NOT
-%token AND
-%token OR XOR
+%token <unit * (Types.spaces * Types.raw_pos)> EOF
+%token <string * (Types.spaces * Types.raw_pos)> NUM STRING BAREWORD BAREWORD_PAREN REVISION COMMENT POD LABEL PRINT_TO_STAR PRINT_TO_SCALAR
+%token <string * (Types.spaces * Types.raw_pos)> COMMAND_STRING QUOTEWORDS COMPACT_HASH_SUBSCRIPT
+
+%token <(string * Types.raw_pos) ref * (Types.spaces * Types.raw_pos)> HERE_DOC
+%token <(string * string) * (Types.spaces * Types.raw_pos)> PATTERN
+%token <(string * string * string) * (Types.spaces * Types.raw_pos)> PATTERN_SUBST
+
+%token <(string option * string) * (Types.spaces * Types.raw_pos)> SCALAR_IDENT ARRAY_IDENT HASH_IDENT FUNC_IDENT STAR_IDENT RAW_IDENT RAW_IDENT_PAREN ARRAYLEN_IDENT
+%token <(string * string) * (Types.spaces * Types.raw_pos)> FUNC_DECL_WITH_PROTO
+
+%token <string * (Types.spaces * Types.raw_pos)> FOR PRINT
+%token <unit * (Types.spaces * Types.raw_pos)> NEW FORMAT
+%token <string * (Types.spaces * Types.raw_pos)> COMPARE_OP EQ_OP
+%token <string * (Types.spaces * Types.raw_pos)> ASSIGN
+
+%token <unit * (Types.spaces * Types.raw_pos)> IF ELSIF ELSE UNLESS DO WHILE UNTIL MY CONTINUE SUB LOCAL
+%token <unit * (Types.spaces * Types.raw_pos)> USE PACKAGE BEGIN END
+%token <unit * (Types.spaces * Types.raw_pos)> AT DOLLAR PERCENT AMPERSAND STAR ARRAYLEN
+%token <unit * (Types.spaces * Types.raw_pos)> SEMI_COLON PKG_SCOPE
+%token <unit * (Types.spaces * Types.raw_pos)> PAREN PAREN_END
+%token <unit * (Types.spaces * Types.raw_pos)> BRACKET BRACKET_END BRACKET_HASHREF
+%token <unit * (Types.spaces * Types.raw_pos)> ARRAYREF ARRAYREF_END
+
+%token <unit * (Types.spaces * Types.raw_pos)> ARROW
+%token <unit * (Types.spaces * Types.raw_pos)> INCR DECR
+%token <unit * (Types.spaces * Types.raw_pos)> POWER
+%token <unit * (Types.spaces * Types.raw_pos)> TIGHT_NOT BIT_NEG REF
+%token <unit * (Types.spaces * Types.raw_pos)> PATTERN_MATCH PATTERN_MATCH_NOT
+%token <unit * (Types.spaces * Types.raw_pos)> MULT DIVISION MODULO REPLICATE
+%token <unit * (Types.spaces * Types.raw_pos)> PLUS MINUS CONCAT
+%token <unit * (Types.spaces * Types.raw_pos)> BIT_SHIFT_LEFT BIT_SHIFT_RIGHT
+%token <unit * (Types.spaces * Types.raw_pos)> LT GT
+%token <unit * (Types.spaces * Types.raw_pos)> BIT_AND
+%token <unit * (Types.spaces * Types.raw_pos)> BIT_OR BIT_XOR
+%token <unit * (Types.spaces * Types.raw_pos)> AND_TIGHT
+%token <unit * (Types.spaces * Types.raw_pos)> OR_TIGHT
+%token <unit * (Types.spaces * Types.raw_pos)> DOTDOT DOTDOTDOT
+%token <unit * (Types.spaces * Types.raw_pos)> QUESTION_MARK COLON
+%token <unit * (Types.spaces * Types.raw_pos)> COMMA RIGHT_ARROW
+%token <unit * (Types.spaces * Types.raw_pos)> NOT
+%token <unit * (Types.spaces * Types.raw_pos)> AND
+%token <unit * (Types.spaces * Types.raw_pos)> OR XOR
%nonassoc PREC_LOW
%nonassoc LOOPEX
@@ -94,289 +81,299 @@
%left ARROW
%nonassoc PAREN_END
-%left PAREN
+%left PAREN PREC_HIGH
%left ARRAYREF BRACKET
-%type <string> prog
+%type <Types.fromparser list> prog
+%type <Types.fromparser * (Types.spaces * Types.raw_pos)> expr
%start prog
%%
-prog: lines EOF { "" }
+prog: lines EOF { fst $1 }
lines: /* A collection of "lines" in the program */
-| {()}
-| sideff {()}
-| line lines {()}
+| {[], (Space_none, bpos)}
+| sideff {[fst $1], snd $1}
+| line lines {fst $1 :: fst $2, snd $1}
line:
-| decl {()}
-| if_then_else {()}
-| loop {()}
-| LABEL {()}
-| SEMI_COLON {()}
-| sideff SEMI_COLON {()}
-| BRACKET lines BRACKET_END {()}
+| decl {$1}
+| if_then_else {$1}
+| loop {$1}
+| LABEL {sp_cr($1); Label(fst $1), snd $1}
+| semi_colon {List [], snd $1}
+| sideff semi_colon {$1}
+| BRACKET lines BRACKET_END {sp_p($2); sp_p($3); Block(fst $2), snd $1}
if_then_else: /* Real conditional expressions */
-| IF PAREN expr PAREN_END BRACKET lines BRACKET_END elsif else_ {()}
-| UNLESS PAREN expr PAREN_END BRACKET lines BRACKET_END elsif else_ {()}
+| IF PAREN expr PAREN_END BRACKET lines BRACKET_END elsif else_ {sp_p($1); sp_n($2); sp_0($3); sp_0_or_cr($4); sp_p($5); sp_p($6); sp_p($7); Call_op("if", fst $3 :: Block(fst $6) :: fst $8 @ fst $9), snd $1}
+| UNLESS PAREN expr PAREN_END BRACKET lines BRACKET_END elsif else_ {sp_p($1); sp_n($2); sp_0($3); sp_0_or_cr($4); sp_p($5); sp_p($6); sp_p($7); Call_op("unless", fst $3 :: Block(fst $6) :: fst $8 @ fst $9), snd $1}
elsif:
-| {()}
-| ELSIF PAREN expr PAREN_END BRACKET lines BRACKET_END elsif {()}
+| {[], (Space_none, bpos)}
+| ELSIF PAREN expr PAREN_END BRACKET lines BRACKET_END elsif {sp_p($1); sp_n($2); sp_0($3); sp_0_or_cr($4); sp_p($5); sp_p($7); fst $3 :: Block(fst $6) :: fst $8, snd $1}
else_:
-| { () }
-| ELSE BRACKET lines BRACKET_END { () }
+| { [], (Space_none, bpos) }
+| ELSE BRACKET lines BRACKET_END {sp_p($1); sp_n($2); sp_p($3); sp_p($4); [ Block(fst $3) ], snd $1 }
loop:
-| WHILE PAREN expr PAREN_END BRACKET lines BRACKET_END cont {()}
-| UNTIL PAREN expr PAREN_END BRACKET lines BRACKET_END cont {()}
-| FOR MY SCALAR_IDENT PAREN expr PAREN_END BRACKET lines BRACKET_END cont {()}
-| FOR SCALAR_IDENT PAREN expr PAREN_END BRACKET lines BRACKET_END cont {()}
-| FOR PAREN expr PAREN_END BRACKET lines BRACKET_END cont {()}
-| FOR PAREN expr_or_empty SEMI_COLON expr_or_empty SEMI_COLON expr_or_empty PAREN_END BRACKET lines BRACKET_END {()}
+| WHILE PAREN expr PAREN_END BRACKET lines BRACKET_END cont {sp_p($1); sp_n($2); sp_0($3); sp_0_or_cr($4); sp_p($5); sp_p($6); sp_p($7); Call_op("while", fst $3 :: fst $6), snd $1}
+| UNTIL PAREN expr PAREN_END BRACKET lines BRACKET_END cont {sp_p($1); sp_n($2); sp_0($3); sp_0_or_cr($4); sp_p($5); sp_p($6); sp_p($7); Call_op("until", fst $3 :: fst $6), snd $1}
+| FOR MY SCALAR_IDENT PAREN expr PAREN_END BRACKET lines BRACKET_END cont {sp_p($1); check_foreach($1); sp_n($4); sp_0($5); sp_0_or_cr($6); sp_p($7); sp_p($8); sp_p($9); Call_op("foreach my", to_Ident $3 :: fst $5 :: fst $8), snd $1}
+| FOR SCALAR_IDENT PAREN expr PAREN_END BRACKET lines BRACKET_END cont { die_rule "don't use for without \"my\"ing the iteration variable" }
+| FOR PAREN expr PAREN_END BRACKET lines BRACKET_END cont {sp_p($1); sp_n($2); sp_0($3); sp_0_or_cr($4); sp_p($5); sp_p($6); sp_p($7); check_foreach($1); Call_op("foreach", fst $3 :: fst $6), snd $1}
+| FOR PAREN expr_or_empty semi_colon expr_or_empty semi_colon expr_or_empty PAREN_END BRACKET lines BRACKET_END {sp_p($1); sp_n($2); sp_0($3); check_for($1); Call_op("for", fst $3 :: fst $5 :: fst $7 :: fst $10), snd $1}
cont: /* Continue blocks */
-| {()}
-| CONTINUE BRACKET lines BRACKET_END {()}
+| {(), (Space_none, bpos)}
+| CONTINUE BRACKET lines BRACKET_END {sp_p($1); sp_n($2); sp_p($4); (), snd $1}
sideff: /* An expression which may have a side-effect */
-| error {()}
-| expr {()}
-| expr IF expr {()}
-| expr UNLESS expr {()}
-| expr WHILE expr {()}
-| expr UNTIL expr {()}
-| expr FOR expr {()}
+| expr {$1}
+| expr IF expr {let f = "if" in sp_p($2); sp_p($3); check_no_paren f $3; Call_op(f ^ " infix", [ fst $1 ; fst $3 ]), snd $1}
+| expr UNLESS expr {let f = "unless" in sp_p($2); sp_p($3); check_no_paren f $3; Call_op(f ^ " infix", [ fst $1 ; fst $3 ]), snd $1}
+| expr WHILE expr {let f = "while" in sp_p($2); sp_p($3); check_no_paren f $3; Call_op(f ^ " infix", [ fst $1 ; fst $3 ]), snd $1}
+| expr UNTIL expr {let f = "until" in sp_p($2); sp_p($3); check_no_paren f $3; Call_op(f ^ " infix", [ fst $1 ; fst $3 ]), snd $1}
+| expr FOR expr {let f = "for" in sp_p($2); sp_p($3); check_foreach($2); check_no_paren f $3; Call_op(f ^ " infix", [ fst $1 ; fst $3 ]), snd $1}
decl:
-| FORMAT BAREWORD ASSIGN {()}
-| FORMAT ASSIGN {()}
-| func_decl SEMI_COLON {()}
-| func_decl BRACKET lines BRACKET_END {()}
-| func_decl BRACKET BRACKET expr BRACKET_END BRACKET_END {()}
-| func_decl BRACKET BRACKET expr BRACKET_END SEMI_COLON BRACKET_END {()}
-| PACKAGE word SEMI_COLON {()}
-| BEGIN BRACKET lines BRACKET_END {()}
-| END BRACKET lines BRACKET_END {()}
-| use {()}
+| FORMAT BAREWORD ASSIGN {Too_complex, snd $1}
+| FORMAT ASSIGN {Too_complex, snd $1}
+| func_decl semi_colon {die_rule (if snd (fst $1) = "" then "there is no need to pre-declare in Perl!" else "please don't use prototype pre-declaration") }
+| func_decl BRACKET BRACKET_END {sp_n($2); sp_0_or_cr($3); let name, proto = fst $1 in sub_declaration (name, proto) [], snd $1}
+| func_decl BRACKET lines BRACKET_END {sp_n($2); sp_p($3); sp_p($4); sub_declaration (fst $1) (fst $3), snd $1}
+| func_decl BRACKET BRACKET expr BRACKET_END BRACKET_END {sp_n($2); sp_p($3); sp_p($4); sp_p($5); sp_p($6); sub_declaration (fst $1) [Ref(I_hash, fst $4)], snd $1}
+| func_decl BRACKET BRACKET expr BRACKET_END semi_colon BRACKET_END {sp_n($2); sp_p($3); sp_p($4); sp_p($5); sp_p($7); sub_declaration (fst $1) [Ref(I_hash, fst $4)], snd $1}
+| PACKAGE word semi_colon {sp_0_or_cr($1); Package(fst $2), snd $1}
+| BEGIN BRACKET lines BRACKET_END {sp_0_or_cr($1); sp_1($2); sp_p($3); sp_p($4); Sub_declaration(Ident(None, "BEGIN", get_pos $1), "", fst $3), snd $1}
+| END BRACKET lines BRACKET_END {sp_0_or_cr($1); sp_1($2); sp_p($3); sp_p($4); Sub_declaration(Ident(None, "END", get_pos $1), "", fst $3), snd $1}
+| use {$1}
use:
-| use_word listexpr SEMI_COLON {()}
+| use_word listexpr semi_colon {Use(fst $1, fst $2), snd $1}
use_word:
-| use_revision word COLON {()}
-| use_revision word {()}
-| use_revision {()}
+| use_revision word comma {fst $2, snd $1}
+| use_revision word {fst $2, snd $1}
+| use_revision {Ident(None, "", get_pos $1), snd $1}
use_revision:
-| USE REVISION COLON {()}
-| USE REVISION {()}
-| USE {()}
+| USE REVISION comma {$1}
+| USE REVISION {$1}
+| USE {$1}
func_decl:
-| SUB word {()}
-| FUNC_DECL_WITH_PROTO {()}
+| SUB word {(fst $2, ""), snd $1}
+| FUNC_DECL_WITH_PROTO {(Ident(None, fst(fst $1), get_pos $1), snd(fst $1)), snd $1}
listexpr: /* Basic list expressions */
-| %prec PREC_LOW {()}
-| argexpr %prec PREC_LOW {()}
+| %prec PREC_LOW {[], (Space_none, bpos)}
+| argexpr %prec PREC_LOW {$1}
expr: /* Ordinary expressions; logical combinations */
-| expr AND expr {()}
-| expr OR expr {()}
-| argexpr %prec PREC_LOW {()}
+| expr AND expr {sp_p($2); sp_p($3); Call_op("and", [ fst $1; fst $3 ]), snd $1}
+| expr OR expr {sp_p($2); sp_p($3); Call_op("or", [ fst $1; fst $3 ]), snd $1}
+| argexpr %prec PREC_LOW {List(fst $1), snd $1}
argexpr: /* Expressions are a list of terms joined by commas */
-| argexpr comma {()}
-| argexpr comma term {()}
-| argexpr comma BRACKET expr BRACKET_END {()}
-| term %prec PREC_LOW {()}
+| argexpr comma {fst $1, snd $1}
+| argexpr comma term {fst $1 @ [fst $3], snd $1}
+| argexpr comma BRACKET expr BRACKET_END {sp_p($3); sp_p($5); fst $1 @ [ Ref(I_hash, fst $4) ], snd $1}
+| term %prec PREC_LOW {[fst $1], snd $1}
/********************************************************************************/
term:
-| term binop term {()}
-| term binop BRACKET expr BRACKET_END {()}
-| term LT term {()}
-| term LT BRACKET expr BRACKET_END {()}
-| term GT term {()}
-| term GT BRACKET expr BRACKET_END {()}
+| term binop term {Call_op(fst $2, [fst $1 ; fst $3]), snd $1}
+| term binop BRACKET expr BRACKET_END {sp_p($3); sp_p($5); Call_op(fst $2, [fst $1 ; Ref(I_hash, fst $4)]), snd $1}
+| term LT term {Call_op("<", [fst $1 ; fst $3]), snd $1}
+| term GT term {Call_op(">", [fst $1 ; fst $3]), snd $1}
+
+| term PATTERN_MATCH PATTERN {Call_op("m//", fst $1 :: from_PATTERN $3), snd $1}
+| term PATTERN_MATCH_NOT PATTERN {Call_op("!m//", fst $1 :: from_PATTERN $3), snd $1}
+| term PATTERN_MATCH PATTERN_SUBST {Call_op("s///", fst $1 :: from_PATTERN_SUBST $3), snd $1}
+
+| term PATTERN_MATCH scalar { Too_complex, snd $1 }
+| term PATTERN_MATCH_NOT scalar { Too_complex, snd $1 }
+
+| term PATTERN_MATCH STRING {failwith (msg_with_pos (snd (snd $3)) "use a regexp, not a string")}
+| term PATTERN_MATCH_NOT STRING {failwith (msg_with_pos (snd (snd $3)) "use a regexp, not a string")}
/* Unary operators and terms */
-| MINUS term %prec UNARY_MINUS {()}
-| TIGHT_NOT term {()}
-| BIT_NEG term {()}
-| INCR term {()}
-| DECR term {()}
-| term INCR {()}
-| term DECR {()}
+| MINUS term %prec UNARY_MINUS {Call_op("- unary", [fst $2]), snd $1}
+| TIGHT_NOT term {Call_op("not", [fst $2]), snd $1}
+| BIT_NEG term {Call_op("~", [fst $2]), snd $1}
+| INCR term {Call_op("++", [fst $2]), snd $1}
+| DECR term {Call_op("--", [fst $2]), snd $1}
+| term INCR {sp_0($2); Call_op("++ post", [fst $1]), snd $1}
+| term DECR {sp_0($2); Call_op("-- post", [fst $1]), snd $1}
-| NOT argexpr {()}
+| NOT argexpr {Call_op("not", fst $2), snd $1}
/* Constructors for anonymous data */
-| arrayref {()} /* [ 1, 2 ] */
-| BRACKET BRACKET_END {()} /* empty hash */
-| BRACKET_HASHREF expr_or_empty BRACKET_END %prec PAREN {()} /* { foo => "Bar" } */
-| SUB BRACKET lines BRACKET_END %prec PAREN {()}
-| termdo {()}
-| term question_mark_ colon_ {()}
-| REF term {()} /* \$x, \@y, \%z */
-| REF BRACKET expr BRACKET_END {()} /* \$x, \@y, \%z */
-| my %prec UNIOP {()}
-| LOCAL term %prec UNIOP {()}
+| ARRAYREF ARRAYREF_END {sp_0($2); Ref(I_array, List[]), snd $1}
+| arrayref_start ARRAYREF_END {Ref(I_array, List(fst $1)), snd $1}
+| arrayref_start expr ARRAYREF_END {Ref(I_array, List(fst $1 @ [fst $2])), snd $1}
+| arrayref_start BRACKET expr BRACKET_END ARRAYREF_END {Ref(I_array, List(fst $1 @ [Ref(I_hash, fst $3)])), snd $1}
-| parenthesized {()} /* (1, 2) */
-| parenthesized arrayref {()} /* list slice */
+| BRACKET BRACKET_END {Ref(I_hash, List []), snd $1} /* empty hash */
+| BRACKET_HASHREF expr BRACKET_END %prec PREC_HIGH {sp_p($3); Ref(I_hash, fst $2), snd $1} /* { foo => "Bar" } */
+| SUB BRACKET BRACKET_END %prec PREC_HIGH {sp_n($2); sp_0($3); Anonymous_sub(Block[]), snd $1}
+| SUB BRACKET lines BRACKET_END %prec PREC_HIGH {sp_n($2); sp_p($4); Anonymous_sub(Block(fst $3)), snd $1}
-| variable {()}
+| termdo {$1}
+| term question_mark_ colon_ { Call_op("?:", [ fst $1 ; fst $2; fst $3]), snd $1}
+| REF term { Ref(I_scalar, fst $2), snd $1} /* \$x, \@y, \%z */
+| my %prec UNIOP {List(fst $1), snd $1}
+| LOCAL term %prec UNIOP {Local(fst $2), snd $1}
-| subscripted {()}
+| parenthesized {List(fst $1), snd $1} /* (1, 2) */
+| parenthesized arrayref {Deref_with(I_array, List(fst $1), List(fst $2)), snd $1} /* list slice */
-| array arrayref {()} /* array slice */
-| array BRACKET expr BRACKET_END {()} /* @hash{@keys} */
+| variable {$1}
+| subscripted {$1}
-/* function_calls */
-| func parenthesized {()} /* &foo(@args) */
-| word argexpr {()} /* foo(@args) */
-| word BRACKET lines BRACKET_END listexpr %prec LSTOP {()} /* map { foo } @bar */
-| word BRACKET BRACKET expr BRACKET_END BRACKET_END listexpr %prec LSTOP {()} /* map { foo } @bar */
-| word BRACKET BRACKET expr BRACKET_END SEMI_COLON BRACKET_END listexpr %prec LSTOP {()} /* map { foo } @bar */
-
-| term ARROW word_or_scalar parenthesized {warn "term->word_or_scalar(expr_or_empty) -> function_call"} /* $foo->bar(list) */
-| term ARROW word_or_scalar {warn "term->word_or_scalar -> function_call"} /* $foo->bar */
-
-| NEW word listexpr {()} /* new Class @args */
-| print listexpr {()}
-
-| word {()}
-
-| NUM {()}
-| STRING {()}
-| REVISION {()}
-| COMMAND_STRING {()}
-| QUOTEWORDS {()}
-| HERE_DOC {()}
-| PATTERN {()}
-| PATTERN_SUBST {()}
-| diamond {()}
+| array arrayref { Deref_with(I_array, fst $1, List(fst $2)), snd $1} /* array slice: @array[vals] */
+| array BRACKET expr BRACKET_END {sp_0($2); sp_0($4); Deref_with(I_hash, array_ident_to_hash_ident $1, fst $3), snd $1} /* hash slice: @hash{@keys} */
-diamond:
-| LT GT {()}
-| LT term GT {()}
-print:
-| PRINT bareword COLON {()}
-| PRINT bareword {()} /* print FH @args */
-| PRINT scalar COLON {()}
-| PRINT scalar {()} /* print $fh @args */
-| PRINT {()}
+/* function_calls */
+| func parenthesized {Call(fst $1, fst $2), snd $1} /* &foo(@args) */
+| word argexpr {check_parenthesized_first_argexpr (string_of_Ident (fst $1)) $2; Call(fst $1, fst $2), snd $1} /* foo $a, $b */
+| word_paren parenthesized {Call(fst $1, fst $2), snd $1} /* foo(@args) */
+| word BRACKET lines BRACKET_END listexpr %prec LSTOP {sp_n($2); sp_p($4); Call(fst $1, Anonymous_sub(Block(fst $3)) :: fst $5), snd $1} /* map { foo } @bar */
+| word BRACKET BRACKET expr BRACKET_END BRACKET_END listexpr %prec LSTOP {sp_n($2); sp_p($3); sp_p($5); sp_p($6); Call(fst $1, Anonymous_sub(Ref(I_hash, fst $4)) :: fst $7), snd $1} /* map { { foo } } @bar */
+| word BRACKET BRACKET expr BRACKET_END semi_colon BRACKET_END listexpr %prec LSTOP {sp_n($2); sp_p($3); sp_p($5); sp_p($7); Call(fst $1, Anonymous_sub(Ref(I_hash, fst $4)) :: fst $8), snd $1} /* map { { foo }; } @bar */
+
+| term ARROW word_or_scalar parenthesized {sp_0($2); Method_call(fst $1, fst $3, fst $4), snd $1} /* $foo->bar(list) */
+| term ARROW word_or_scalar {sp_0($2); Method_call(fst $1, fst $3, []), snd $1} /* $foo->bar */
+
+| NEW word listexpr { Method_call(fst $2, Ident(None, "new", get_pos $1), fst $3), snd $1} /* new Class @args */
+
+| PRINT { Call_op("print", var_STDOUT :: [ var_dollar_ ]), snd $1 }
+| PRINT argexpr {check_parenthesized_first_argexpr (fst $1) $2; Call_op("print", var_STDOUT :: fst $2), snd $1 }
+| PRINT_TO_STAR argexpr { Call_op("print", Deref(I_star, Ident(None, fst $1, get_pos $1)) :: fst $2), snd $1 }
+| PRINT_TO_SCALAR { Call_op("print", var_STDOUT :: [ Deref(I_scalar, Ident(None, fst $1, get_pos $1)) ]), snd $1 }
+| PRINT_TO_SCALAR argexpr { Call_op("print", Deref(I_scalar, Ident(None, fst $1, get_pos $1)) :: fst $2), snd $1 }
+
+| word {$1}
+
+| NUM {Num(fst $1, get_pos $1), snd $1}
+| STRING {to_String $1, snd $1}
+| REVISION {to_String $1, snd $1}
+| COMMAND_STRING {Call_op("``", [to_String $1]), snd $1}
+| QUOTEWORDS {Call_op("qw", [to_String $1]), snd $1}
+| HERE_DOC {String(fst!(fst $1), get_pos $1), snd $1}
+| PATTERN {Call_op("m//", var_dollar_ :: from_PATTERN $1), snd $1}
+| PATTERN_SUBST {Call_op("s///", var_dollar_ :: from_PATTERN_SUBST $1), snd $1}
+| diamond {$1}
-| PRINT bareword BRACKET {die "use parentheses around print"}
+diamond:
+| LT GT {sp_0($2); Call_op("<>", []), snd $1}
+| LT term GT {sp_0($3); Call_op("<>", [fst $2]), snd $1}
subscripted: /* Some kind of subscripted expression */
-| variable PKG_SCOPE bracket_subscript {()} /* *main::{something} */
-| scalar bracket_subscript {()} /* $foo{bar} */
-| scalar arrayref {()} /* $array[$element] */
-| term ARROW bracket_subscript {()} /* somehref->{bar} */
-| term ARROW arrayref {()} /* somearef->[$element] */
-| term ARROW parenthesized {()} /* $subref->(@args) */
-| subscripted bracket_subscript {()} /* $foo->[bar]{baz} */
-| subscripted arrayref {()} /* $foo->[$bar][$baz] */
-| subscripted parenthesized {()} /* $foo->{bar}(@args) */
+| variable PKG_SCOPE bracket_subscript {sp_0($2); Too_complex, snd $1} /* $foo::{something} */
+| scalar bracket_subscript {Deref_with(I_hash , fst $1, fst $2), snd $1} /* $foo{bar} */
+| scalar arrayref {Deref_with(I_array, fst $1, only_one $2), snd $1} /* $array[$element] */
+| term ARROW bracket_subscript {sp_0($2); Deref_with(I_hash , fst $1, fst $3), snd $1} /* somehref->{bar} */
+| term ARROW arrayref {sp_0($2); Deref_with(I_array, fst $1, only_one $3), snd $1} /* somearef->[$element] */
+| term ARROW parenthesized {sp_0($2); Deref_with(I_func , fst $1, List(fst $3)), snd $1} /* $subref->(@args) */
+| subscripted bracket_subscript {Deref_with(I_hash , fst $1, fst $2), snd $1} /* $foo->[bar]{baz} */
+| subscripted arrayref {Deref_with(I_array, fst $1, only_one $2), snd $1} /* $foo->[$bar][$baz] */
+| subscripted parenthesized {Deref_with(I_func , fst $1, List(fst $2)), snd $1} /* $foo->{bar}(@args) */
arrayref:
-| arrayref_start ARRAYREF_END {()}
-| arrayref_start expr ARRAYREF_END {()}
-| arrayref_start BRACKET expr BRACKET_END ARRAYREF_END {()}
+| arrayref_start ARRAYREF_END {sp_0($2); fst $1, snd $1}
+| arrayref_start expr ARRAYREF_END {sp_0($3); fst $1 @ [fst $2], snd $1}
+| arrayref_start BRACKET expr BRACKET_END ARRAYREF_END {sp_p($2); sp_p($4); sp_0($5); fst $1 @ [Ref(I_hash, fst $3)], snd $1}
parenthesized:
-| parenthesized_start PAREN_END {()}
-| parenthesized_start expr PAREN_END {()}
-| parenthesized_start BRACKET expr BRACKET_END PAREN_END {()}
+| parenthesized_start PAREN_END {sp_0_or_cr($2); fst $1, snd $1}
+| parenthesized_start expr PAREN_END {sp_0_or_cr($3); fst $1 @ [fst $2], snd $1}
+| parenthesized_start BRACKET expr BRACKET_END PAREN_END {sp_p($4); sp_0_or_cr($5); fst $1 @ [Ref(I_hash, fst $3)], snd $1}
arrayref_start:
-| ARRAYREF {()}
-| arrayref_start expr comma {()}
-| arrayref_start BRACKET expr BRACKET_END comma {()}
+| ARRAYREF {[], snd $1}
+| arrayref_start BRACKET expr BRACKET_END comma {sp_p($4); fst $1 @ [Ref(I_hash, fst $3)], snd $1}
parenthesized_start:
-| PAREN {()}
-| parenthesized_start expr comma {()}
-| parenthesized_start BRACKET expr BRACKET_END comma {()}
+| PAREN {[], snd $1}
+| parenthesized_start BRACKET expr BRACKET_END comma {sp_p($4); fst $1 @ [Ref(I_hash, fst $3)], snd $1}
my: /* Things that can be "my"'d */
-| MY parenthesized {()}
-| MY scalar {()}
-| MY hash {()}
-| MY array {()}
+| MY parenthesized {List.map (fun e -> My e) (fst $2), snd $1}
+| MY scalar {[My(fst $2)], snd $1}
+| MY hash {[My(fst $2)], snd $1}
+| MY array {[My(fst $2)], snd $1}
termdo: /* Things called with "do" */
-| DO term %prec UNIOP {()} /* do $filename */
-| DO BRACKET lines BRACKET_END %prec PAREN {()} /* do { code */
+| DO term %prec UNIOP { die_rule "\"do EXPR\" not allowed" } /* do $filename */
+| DO BRACKET lines BRACKET_END %prec PREC_HIGH {sp_n($2); sp_p($4); Block(fst $3), snd $1} /* do { code */
question_mark_:
-| QUESTION_MARK term {()}
-| QUESTION_MARK BRACKET expr BRACKET_END {()}
+| QUESTION_MARK term {sp_n($1); fst $2, snd $1}
+| QUESTION_MARK BRACKET expr BRACKET_END {sp_n($1); sp_p($2); sp_p($4); Ref(I_hash, fst $3), snd $1}
colon_:
-| COLON term {()}
-| COLON BRACKET expr BRACKET_END {()}
+| COLON term {sp_n($1); fst $2, snd $1}
+| COLON BRACKET expr BRACKET_END {sp_n($1); sp_p($2); sp_p($4); Ref(I_hash, fst $3), snd $1}
bracket_subscript:
-| BRACKET expr BRACKET_END {()}
-| COMPACT_HASH_SUBSCRIPT {()}
+| BRACKET expr BRACKET_END {sp_0($1); sp_0($3); only_one_in_List $2, snd $1}
+| COMPACT_HASH_SUBSCRIPT {sp_0($1); to_String $1, snd $1}
binop:
-| ASSIGN {()}
-| POWER {()}
-| MULT {()} | DIVISION {()} | MODULO {()} | REPLICATE {()}
-| PLUS {()} | MINUS {()} | CONCAT {()}
-| BIT_SHIFT_LEFT {()} | BIT_SHIFT_RIGHT {()}
-| COMPARE_OP {()}
-| EQ_OP {()}
-| BIT_AND {()}
-| BIT_OR {()} | BIT_XOR {()}
-| DOTDOT {()} | DOTDOTDOT {()}
-| AND_TIGHT {()}
-| OR_TIGHT {()} | XOR {()}
-| PATTERN_MATCH {()} | PATTERN_MATCH_NOT {()}
+| ASSIGN {"=", snd $1}
+| POWER {"**", snd $1}
+| MULT {"*", snd $1} | DIVISION {"/", snd $1} | MODULO {"%", snd $1} | REPLICATE {"x", snd $1}
+| PLUS {"+", snd $1} | MINUS {"-", snd $1} | CONCAT {".", snd $1}
+| BIT_SHIFT_LEFT {"<<", snd $1} | BIT_SHIFT_RIGHT {">>", snd $1}
+| COMPARE_OP {fst $1, snd $1}
+| EQ_OP {fst $1, snd $1}
+| BIT_AND {"&", snd $1}
+| BIT_OR {"|", snd $1} | BIT_XOR {"^", snd $1}
+| DOTDOT {"..", snd $1} | DOTDOTDOT {"...", snd $1}
+| AND_TIGHT {"&&", snd $1}
+| OR_TIGHT {"||", snd $1} | XOR {"xor", snd $1}
variable:
-| scalar %prec PAREN {()}
-| star %prec PAREN {()}
-| hash %prec PAREN {()}
-| array %prec PAREN {()}
-| arraylen %prec PAREN {()} /* $#x, $#{ something } */
-| func %prec PAREN {()} /* &foo; */
+| scalar %prec PREC_HIGH {$1}
+| star %prec PREC_HIGH {$1}
+| hash %prec PREC_HIGH {$1}
+| array %prec PREC_HIGH {$1}
+| arraylen %prec PREC_HIGH {$1} /* $#x, $#{ something } */
+| func %prec PREC_HIGH {$1} /* &foo; */
word:
-| bareword { fst $1 }
-| RAW_IDENT {
- match $1 with
- | None, name, _ -> name
- | Some s, name, _ -> s ^ "::" ^ name
- }
+| bareword { $1 }
+| RAW_IDENT { to_Ident $1, snd $1 }
+
+comma: COMMA {$1} | RIGHT_ARROW {sp_p($1); $1}
-comma: COMMA {()} | RIGHT_ARROW {()}
+semi_colon: SEMI_COLON {sp_0($1); $1}
word_or_scalar:
-| word {()}
-| scalar {()}
+| word {$1}
+| scalar {$1}
+| word_paren {$1}
bareword:
-| NEW { "new", $1 }
-| FORMAT { "format", $1 }
-| BAREWORD { $1 }
-
-arraylen: ARRAYLEN_IDENT {()} | ARRAYLEN scalar {()} | ARRAYLEN BRACKET lines BRACKET_END {()}
-scalar: SCALAR_IDENT {()} | DOLLAR scalar {()} | DOLLAR BRACKET lines BRACKET_END {()} | DOLLAR BRACKET BRACKET expr BRACKET_END BRACKET_END {()}
-func: FUNC_IDENT {()} | AMPERSAND scalar {()} | AMPERSAND BRACKET lines BRACKET_END {()}
-array: ARRAY_IDENT {()} | AT scalar {()} | AT BRACKET lines BRACKET_END {()}
-hash: HASH_IDENT {()} | PERCENT scalar {()} | PERCENT BRACKET lines BRACKET_END {()}
-star: STAR_IDENT {()} | STAR scalar {()} | STAR BRACKET lines BRACKET_END {()}
-
-expr_or_empty: {()} | expr {()}
-
+| NEW { Ident(None, "new", get_pos $1), snd $1 }
+| FORMAT { Ident(None, "format", get_pos $1), snd $1 }
+| BAREWORD { Ident(None, fst $1, get_pos $1), snd $1 }
+
+word_paren:
+| BAREWORD_PAREN { Ident(None, fst $1, get_pos $1), snd $1 }
+| RAW_IDENT_PAREN { to_Ident $1, snd $1 }
+
+arraylen: ARRAYLEN_IDENT {Deref(I_arraylen, to_Ident $1), snd $1} | ARRAYLEN scalar {Deref(I_arraylen, fst $2), snd $1} | ARRAYLEN BRACKET lines BRACKET_END {sp_0($2); Deref(I_arraylen, Block(fst $3)), snd $1}
+scalar: SCALAR_IDENT {Deref(I_scalar , to_Ident $1), snd $1} | DOLLAR scalar {Deref(I_scalar , fst $2), snd $1} | DOLLAR BRACKET lines BRACKET_END {sp_0($2); Deref(I_scalar , Block(fst $3)), snd $1} | DOLLAR BRACKET BRACKET expr BRACKET_END BRACKET_END {sp_0($2); sp_0($3); sp_p($5); sp_0($6); Deref(I_scalar, Ref(I_hash, fst $4)), snd $1}
+func: FUNC_IDENT {Deref(I_func , to_Ident $1), snd $1} | AMPERSAND scalar {Deref(I_func , fst $2), snd $1} | AMPERSAND BRACKET lines BRACKET_END {sp_0($2); Deref(I_func , Block(fst $3)), snd $1}
+array: ARRAY_IDENT {Deref(I_array , to_Ident $1), snd $1} | AT scalar {Deref(I_array , fst $2), snd $1} | AT BRACKET lines BRACKET_END {sp_0($2); Deref(I_array , Block(fst $3)), snd $1}
+hash: HASH_IDENT {Deref(I_hash , to_Ident $1), snd $1} | PERCENT scalar {Deref(I_hash , fst $2), snd $1} | PERCENT BRACKET lines BRACKET_END {sp_0($2); Deref(I_hash , Block(fst $3)), snd $1}
+star: STAR_IDENT {Deref(I_star , to_Ident $1), snd $1} | STAR scalar {Deref(I_star , fst $2), snd $1} | STAR BRACKET lines BRACKET_END {sp_0($2); Deref(I_star , Block(fst $3)), snd $1}
+
+expr_or_empty: {Block [], (Space_none, bpos)} | expr {$1}
diff --git a/perl_checker.src/parser_helper.ml b/perl_checker.src/parser_helper.ml
new file mode 100644
index 0000000..cc91c83
--- /dev/null
+++ b/perl_checker.src/parser_helper.ml
@@ -0,0 +1,125 @@
+open Types
+open Common
+
+let bpos = -1, -1
+let msg_with_pos (start, end_) msg = Info.pos2sfull_current start end_ ^ msg
+let die_with_pos raw_pos msg = failwith (msg_with_pos raw_pos msg)
+let warn raw_pos msg = prerr_endline (msg_with_pos raw_pos msg)
+
+let die_rule msg = die_with_pos (Parsing.symbol_start(), Parsing.symbol_end()) msg
+let debug msg = if false then prerr_endline msg
+
+let raw_pos2pos(a, b) = !Info.current_file, a, b
+let get_pos (_, (_, pos)) = raw_pos2pos pos
+
+let warn_too_many_space start = warn (start, start) "you should have only one space here"
+let warn_no_space start = warn (start, start) "you should have a space here"
+let warn_cr start = warn (start, start) "you should not have a carriage-return (\\n) here"
+let warn_space start = warn (start, start) "you should not have a space here"
+
+let sp_0(_, (spaces, (start, _))) =
+ match spaces with
+ | Space_none -> ()
+ | Space_0 -> ()
+ | Space_1
+ | Space_n -> warn_space start
+ | Space_cr -> warn_cr start
+
+let sp_0_or_cr(_, (spaces, (start, _))) =
+ match spaces with
+ | Space_none -> ()
+ | Space_0 -> ()
+ | Space_1
+ | Space_n -> warn_space start
+ | Space_cr -> ()
+
+let sp_1(_, (spaces, (start, _))) =
+ match spaces with
+ | Space_none -> ()
+ | Space_0 -> warn_no_space start
+ | Space_1 -> ()
+ | Space_n -> warn_too_many_space start
+ | Space_cr -> warn_cr start
+
+let sp_n(_, (spaces, (start, _))) =
+ match spaces with
+ | Space_none -> ()
+ | Space_0 -> warn_no_space start
+ | Space_1 -> ()
+ | Space_n -> ()
+ | Space_cr -> warn_cr start
+
+let sp_p(_, (spaces, (start, _))) =
+ match spaces with
+ | Space_none -> ()
+ | Space_0 -> warn_no_space start
+ | Space_1 -> ()
+ | Space_n -> ()
+ | Space_cr -> ()
+
+let sp_cr(_, (spaces, (start, _))) =
+ match spaces with
+ | Space_none -> ()
+ | Space_0
+ | Space_1
+ | Space_n -> warn (start, start) "you should have a carriage-return (\\n) here"
+ | Space_cr -> ()
+
+let not_complex = function
+ | Call_op("?:", _) -> false
+ | _ -> true
+
+let string_of_Ident = function
+ | Ident(None, s, _) -> s
+ | Ident(Some fq, s, _) -> fq ^ "::" ^ s
+ | _ -> internal_error "string_of_Ident"
+
+let check_parenthesized_first_argexpr word (e, (_, (start, _)) as ex) =
+ let want_space = word.[0] = '-' in
+ match e with
+ | List[List[_]] :: l ->
+ if want_space then
+ if l = [] then sp_n(ex) else die_with_pos (start, start) "can't handle this nicely"
+ else
+ if l = [] then sp_0(ex) else die_with_pos (start, start) "you must not have a space here"
+ | _ -> sp_p(ex)
+
+let check_foreach (s, (_, pos)) = if s = "for" then warn pos "write \"foreach\" instead of \"for\""
+let check_for (s, (_, pos)) = if s = "foreach" then warn pos "write \"for\" instead of \"foreach\""
+
+let check_no_paren f_name (e, (_, pos)) =
+ match e with
+ | List[List[List[e]]] when not_complex e -> warn pos (Printf.sprintf "''... %s (...)'' can be written ''... %s ...''" f_name f_name)
+ | _ -> ()
+
+let to_Ident ((fq, name), (_, pos)) = Ident(fq, name, raw_pos2pos pos)
+let to_String (s, (_, pos)) = String(s, raw_pos2pos pos)
+
+let rec only_one (l, (spaces, pos)) =
+ match l with
+ | [List l'] -> only_one (l', (spaces, pos))
+ | [e] -> e
+ | [] -> die_with_pos pos "you must give one argument"
+ | _ -> die_with_pos pos "you must give only one argument"
+
+let only_one_in_List (e, both) =
+ match e with
+ | List l -> only_one(l, both)
+ | _ -> e
+
+let array_ident_to_hash_ident (e, (_, pos)) =
+ match e with
+ | Deref(I_array, e) -> Deref(I_hash, e)
+ | _ -> die_with_pos pos "internal error (array_ident_to_hash_ident)"
+
+let from_PATTERN ((s, opts), (_, pos)) = [ String(s, raw_pos2pos pos) ; String(opts, raw_pos2pos pos) ]
+let from_PATTERN_SUBST ((s1, s2, opts), (_, pos)) = [ String(s1, raw_pos2pos pos) ; String(s2, raw_pos2pos pos) ; String(opts, raw_pos2pos pos) ]
+
+let to_List = function
+ | [e] -> e
+ | l -> List l
+
+let sub_declaration (name, proto) body = Sub_declaration(name, proto, body)
+
+let var_dollar_ = Deref(I_scalar, Ident(None, "_", raw_pos2pos bpos))
+let var_STDOUT = Deref(I_star, Ident(None, "STDOUT", raw_pos2pos bpos))
diff --git a/perl_checker.src/parser_helper.mli b/perl_checker.src/parser_helper.mli
new file mode 100644
index 0000000..5d1c47b
--- /dev/null
+++ b/perl_checker.src/parser_helper.mli
@@ -0,0 +1,42 @@
+val bpos : int * int
+val msg_with_pos : int * int -> string -> string
+val die_with_pos : int * int -> string -> 'a
+val warn : int * int -> string -> unit
+val die_rule : string -> 'a
+val debug : string -> unit
+val raw_pos2pos : 'a * 'b -> string * 'a * 'b
+val get_pos : 'a * ('b * ('c * 'd)) -> string * 'c * 'd
+val warn_too_many_space : int -> unit
+val warn_no_space : int -> unit
+val warn_cr : int -> unit
+val warn_space : int -> unit
+val sp_0 : 'a * (Types.spaces * (int * 'b)) -> unit
+val sp_0_or_cr : 'a * (Types.spaces * (int * 'b)) -> unit
+val sp_1 : 'a * (Types.spaces * (int * 'b)) -> unit
+val sp_n : 'a * (Types.spaces * (int * 'b)) -> unit
+val sp_p : 'a * (Types.spaces * (int * 'b)) -> unit
+val sp_cr : 'a * (Types.spaces * (int * 'b)) -> unit
+val not_complex : Types.fromparser -> bool
+val string_of_Ident : Types.fromparser -> string
+val check_parenthesized_first_argexpr :
+ string -> Types.fromparser list * (Types.spaces * (int * 'a)) -> unit
+val check_foreach : string * ('a * (int * int)) -> unit
+val check_for : string * ('a * (int * int)) -> unit
+val check_no_paren : string -> Types.fromparser * ('a * (int * int)) -> unit
+val to_Ident :
+ (string option * string) * ('a * (int * int)) -> Types.fromparser
+val to_String : string * ('a * (int * int)) -> Types.fromparser
+val only_one : Types.fromparser list * ('a * (int * int)) -> Types.fromparser
+val only_one_in_List :
+ Types.fromparser * ('a * (int * int)) -> Types.fromparser
+val array_ident_to_hash_ident :
+ Types.fromparser * ('a * (int * int)) -> Types.fromparser
+val from_PATTERN :
+ (string * string) * ('a * (int * int)) -> Types.fromparser list
+val from_PATTERN_SUBST :
+ (string * string * string) * ('a * (int * int)) -> Types.fromparser list
+val to_List : Types.fromparser list -> Types.fromparser
+val sub_declaration :
+ Types.fromparser * string -> Types.fromparser list -> Types.fromparser
+val var_dollar_ : Types.fromparser
+val var_STDOUT : Types.fromparser
diff --git a/perl_checker.src/perl_checker.ml b/perl_checker.src/perl_checker.ml
index fbb3f4d..79b3ac9 100644
--- a/perl_checker.src/perl_checker.ml
+++ b/perl_checker.src/perl_checker.ml
@@ -2,21 +2,21 @@ open Types
let _ =
let args = List.tl (Array.to_list Sys.argv) in
+ let args = if args = [] then ["/tmp/t.pl"] else args in
List.iter (fun file ->
try
- let lexbuf = Lexing.from_channel (open_in file) in
- let _ =
- try
- Info.start_a_new_file file ;
- if false then
- let t = Lexer.lexbuf2list Lexer.token lexbuf in
- let _,_ = t, t in ""
- else
- Parser.prog Lexer.token lexbuf
- with Failure s -> (
- prerr_endline s ;
- exit 1
- ) in
- ()
+ let lexbuf = Lexing.from_channel (Unix.open_process_in (Printf.sprintf "expand \"%s\"" file)) in
+ try
+ Info.start_a_new_file file ;
+ if false then
+ let t = Lexer.lexbuf2list (Lexer.concat_bareword_paren (Lexer.concat_spaces Lexer.token)) lexbuf in
+ let _,_ = t, t in ()
+ else
+ let t = Parser.prog (Lexer.concat_bareword_paren (Lexer.concat_spaces Lexer.token)) lexbuf in
+ let _,_ = t, t in ()
+ with Failure s -> (
+ prerr_endline s ;
+ exit 1
+ )
with _ -> prerr_endline ("bad file " ^ file)
) args
diff --git a/perl_checker.src/types.mli b/perl_checker.src/types.mli
index 570da0f..5567eb9 100644
--- a/perl_checker.src/types.mli
+++ b/perl_checker.src/types.mli
@@ -1,16 +1,44 @@
exception TooMuchRParen
+type raw_pos = int * int
+
type pos = string * int * int
-type ident_type = I_scalar | I_hash | I_array | I_func | I_raw | I_star
+type spaces =
+ | Space_0
+ | Space_1
+ | Space_n
+ | Space_cr
+ | Space_none
-type fromparser =
- | Ident of ident_type * string option * string * pos
+type context = I_scalar | I_hash | I_array | I_func | I_raw | I_star | I_arraylen
- | Num of float * pos
+type fromparser =
+ | Ident of string option * string * pos
+ | Num of string * pos
| String of string * pos
- | Nil
- | Binary of string * fromparser * fromparser
+ | Ref of context * fromparser
+ | Deref of context * fromparser
+ | Deref_with of context * fromparser * fromparser
+
+ | Diamond of fromparser option
+ | Binop of string * fromparser * fromparser
| If_then_else of string * (fromparser * fromparser) list * fromparser option
+ | List of fromparser list
+ | Block of fromparser list
+
+ | Call of fromparser * fromparser list
+ | Call_op of string * fromparser list
+ | Method_call of fromparser * fromparser * fromparser list
+
+ | Anonymous_sub of fromparser
+ | My of fromparser
+ | Local of fromparser
+ | Use of fromparser * fromparser list
+ | Sub_declaration of fromparser * string * fromparser list (* name, prototype, body *)
+ | Package of fromparser
+ | Label of string
+
+ | Too_complex