diff options
Diffstat (limited to 'perl_checker.src')
-rw-r--r-- | perl_checker.src/Makefile | 16 | ||||
-rw-r--r-- | perl_checker.src/common.ml | 31 | ||||
-rw-r--r-- | perl_checker.src/common.mli | 16 | ||||
-rw-r--r-- | perl_checker.src/info.ml | 8 | ||||
-rw-r--r-- | perl_checker.src/lexer.mll | 427 | ||||
-rw-r--r-- | perl_checker.src/parser.mly | 513 | ||||
-rw-r--r-- | perl_checker.src/parser_helper.ml | 125 | ||||
-rw-r--r-- | perl_checker.src/parser_helper.mli | 42 | ||||
-rw-r--r-- | perl_checker.src/perl_checker.ml | 28 | ||||
-rw-r--r-- | perl_checker.src/types.mli | 40 |
10 files changed, 803 insertions, 443 deletions
diff --git a/perl_checker.src/Makefile b/perl_checker.src/Makefile index 1c73153..6318ac5 100644 --- a/perl_checker.src/Makefile +++ b/perl_checker.src/Makefile @@ -13,9 +13,9 @@ TMP_FILES = $(YACC_FILES:%.mly=%.mli) $(YACC_FILES:%.mly=%.output) $(YACC_FILES ALL_PROGS = perl_checker_debug perl_checker -PROG_OBJS_WITH_CMI = parser.cmo print.cmo perl_checker.cmo +PROG_OBJS_WITH_CMI = parser_helper.cmo parser.cmo print.cmo perl_checker.cmo PROG_OBJS = common.cmo flags.cmo info.cmo $(LEX_FILES:%.mll=%.cmo) $(PROG_OBJS_WITH_CMI) -CMA_FILES = +CMA_FILES = unix.cma PROG_OBJX_WITH_CMI = $(PROG_OBJS_WITH_CMI:%.cmo=%.cmx) PROG_OBJX = $(PROG_OBJS:%.cmo=%.cmx) @@ -48,7 +48,7 @@ perl_checker: .depend $(PROG_OBJX) $(PROG_OBJS_WITH_CMI): %.cmo: %.cmi -$(PROG_OBJX_WITH_CMI): %.cmx: %.cmi +$(PROG_OBJX_WITH_CMI): %.cmx: %.cmi %.cmo: %.ml $(CSLC) $(CSLFLAGS) -c $< @@ -74,9 +74,11 @@ depend: .depend $(CSLDEP) $(INCLUDES) *.mli *.mll *.ml > .depend # missing dependencies: -perl_checker.cmo: lexer.cmi -perl_checker.cmx: lexer.cmi -lexer.cmx: common.cmi -lexer.cmo: common.cmi +perl_checker.cmo: lexer.cmi parser.cmi +perl_checker.cmx: lexer.cmi parser.cmi +lexer.cmx: common.cmi parser.cmi +lexer.cmo: common.cmi parser.cmi +parser.cmo: parser_helper.cmi +parser.cmx: parser_helper.cmi -include .depend diff --git a/perl_checker.src/common.ml b/perl_checker.src/common.ml index 0f20e7a..f600e01 100644 --- a/perl_checker.src/common.ml +++ b/perl_checker.src/common.ml @@ -8,18 +8,6 @@ exception GraphSort_circular_deps type ('a, 'b) either = Left of 'a | Right of 'b type ('a, 'b) or_option = Or_some of 'a | Or_error of 'b -let bpos = "",-1,-1 - -let norm (a,b) = if a = -1 then b else a -let unipos (s,a,b) (s2,c,d) = - if (a,b) = (-1,-1) then s2,c,d else - if (c,d) = (-1,-1) then s,a,b else - if s <> s2 then bpos else s, min (norm(a,c)) (norm(c,a)), max (norm(b,d)) (norm(d,b)) - -let uniposl l = match l with - | [] -> bpos - | e::l -> fold_left unipos e l - (**********************************************************************************) let internal_error s = failwith ("internal error: " ^ s) @@ -267,6 +255,9 @@ let rec l_option2option_l = function | Some e :: l -> map_option (fun l -> e :: l) (l_option2option_l l) let map_option_env f (e, env) = map_option f e, env +let t2_to_list (a,b) = [ a ; b ] +let t3_to_list (a,b,c) = [ a ; b ; c ] + let if_some bool val_ = if bool then Some val_ else None let rec fold_left_option f val_ = function @@ -653,17 +644,13 @@ let skip_n_char_ beg end_ s = String.sub s beg (String.length s - beg - end_) let skip_n_char n s = skip_n_char_ n 0 s -let rec index_spaces_from beg s = - if s.[beg] = ' ' || s.[beg] = '\t' then beg else index_spaces_from (beg+1) s -let index_spaces s = index_spaces_from 0 s - -let rec index_non_spaces_from beg s = - if s.[beg] = ' ' || s.[beg] = '\t' then index_non_spaces_from (beg+1) s else beg -let index_non_spaces s = index_non_spaces_from 0 s +let rec non_index_from s beg c = + if s.[beg] = c then non_index_from s (beg+1) c else beg +let non_index s c = non_index_from s 0 c -let rec rindex_non_spaces_from beg s = - if s.[beg] = ' ' || s.[beg] = '\t' then rindex_non_spaces_from (beg-1) s else beg -let rindex_non_spaces s = rindex_non_spaces_from (String.length s - 1) s +let rec non_rindex_from s beg c = + if s.[beg] = c then non_rindex_from s (beg-1) c else beg +let non_rindex s c = non_rindex_from s (String.length s - 1) c let rec explode_string = function | "" -> [] diff --git a/perl_checker.src/common.mli b/perl_checker.src/common.mli index 353a2a3..ccf6587 100644 --- a/perl_checker.src/common.mli +++ b/perl_checker.src/common.mli @@ -3,10 +3,6 @@ exception Not_comparable exception GraphSort_circular_deps type ('a, 'b) either = Left of 'a | Right of 'b and ('a, 'b) or_option = Or_some of 'a | Or_error of 'b -val bpos : string * int * int -val norm : int * int -> int -val unipos : string * int * int -> string * int * int -> string * int * int -val uniposl : (string * int * int) list -> string * int * int val internal_error : string -> 'a val id : 'a -> 'a val double : 'a -> 'a * 'a @@ -65,6 +61,8 @@ val map_optionoption : ('a -> 'b option) -> 'a option -> 'b option val t2_option2option_t2 : 'a option * 'b option -> ('a * 'b) option val l_option2option_l : 'a option list -> 'a list option val map_option_env : ('a -> 'b) -> 'a option * 'c -> 'b option * 'c +val t2_to_list : 'a * 'a -> 'a list +val t3_to_list : 'a * 'a * 'a -> 'a list val if_some : bool -> 'a -> 'a option val fold_left_option : ('a -> 'b -> 'a option) -> 'a -> 'b list -> 'a option val collect_some_withenv : @@ -175,12 +173,10 @@ val chop : string -> string val chomps : string -> string val skip_n_char_ : int -> int -> string -> string val skip_n_char : int -> string -> string -val index_spaces_from : int -> string -> int -val index_spaces : string -> int -val index_non_spaces_from : int -> string -> int -val index_non_spaces : string -> int -val rindex_non_spaces_from : int -> string -> int -val rindex_non_spaces : string -> int +val non_index_from : string -> int -> char -> int +val non_index : string -> char -> int +val non_rindex_from : string -> int -> char -> int +val non_rindex : string -> char -> int val explode_string : string -> char list val is_uppercase : char -> bool val is_lowercase : char -> bool diff --git a/perl_checker.src/info.ml b/perl_checker.src/info.ml index 8e27bd3..3d68439 100644 --- a/perl_checker.src/info.ml +++ b/perl_checker.src/info.ml @@ -14,17 +14,19 @@ let start_a_new_file file = let raw_pos2raw_line file a = let starts = map_index (fun a b -> a,b) (rev !(assoc file !lines_starts)) in - let ((offset, line), _) = find (fun (_,(e,_)) -> e > a) (combine starts (tl starts @ [99999, 99999])) in + let ((offset, line), _) = find (fun (_,(e,_)) -> e > a) (combine starts (tl starts @ [999999999, 999999999])) in line, offset let pos2line (file, a, b) = let line, offset = raw_pos2raw_line file a in - file, line, a - offset, b - offset + file, line, a - offset + 1, b - offset + 1 + +let pos2s (file, a, b) = sprintf "(%s, %d, %d)" file a b let pos2sfull pos = try let (file, line, n1,n2) = pos2line pos in sprintf "File \"%s\", line %d, character %d-%d\n" file (line + 1) n1 n2 - with Not_found -> "" + with Not_found -> failwith ("bad position " ^ pos2s pos) let pos2sfull_current a b = pos2sfull (!current_file, a, b) diff --git a/perl_checker.src/lexer.mll b/perl_checker.src/lexer.mll index 6abbbc2..8450aa9 100644 --- a/perl_checker.src/lexer.mll +++ b/perl_checker.src/lexer.mll @@ -1,12 +1,202 @@ { (* -*- caml -*- *) -open Parser open Common +open Types open Lexing open Info +type raw_token = + | EOF of raw_pos + | SPACE of int + | CR + | NUM of (string * raw_pos) + | STRING of (string * raw_pos) + | BAREWORD of (string * raw_pos) + | REVISION of (string * raw_pos) + | COMMENT of (string * raw_pos) + | POD of (string * raw_pos) + | LABEL of (string * raw_pos) + | COMMAND_STRING of (string * raw_pos) + | PRINT_TO_STAR of (string * raw_pos) + | PRINT_TO_SCALAR of (string * raw_pos) + | QUOTEWORDS of (string * raw_pos) + | COMPACT_HASH_SUBSCRIPT of (string * raw_pos) + | HERE_DOC of ((string * raw_pos) ref * raw_pos) + | PATTERN of (string * string * raw_pos) + | PATTERN_SUBST of (string * string * string * raw_pos) + | SCALAR_IDENT of (string option * string * raw_pos) + | ARRAY_IDENT of (string option * string * raw_pos) + | HASH_IDENT of (string option * string * raw_pos) + | FUNC_IDENT of (string option * string * raw_pos) + | STAR_IDENT of (string option * string * raw_pos) + | RAW_IDENT of (string option * string * raw_pos) + | ARRAYLEN_IDENT of (string option * string * raw_pos) + | FUNC_DECL_WITH_PROTO of (string * string * raw_pos) + + | IF of raw_pos | ELSIF of raw_pos | ELSE of raw_pos | UNLESS of raw_pos | DO of raw_pos | WHILE of raw_pos | UNTIL of raw_pos | MY of raw_pos | CONTINUE of raw_pos | SUB of raw_pos + | LOCAL of raw_pos | FOR of (string * raw_pos) | USE of raw_pos | PACKAGE of raw_pos | BEGIN of raw_pos | END of raw_pos | PRINT of (string * raw_pos) + | NEW of (raw_pos) | FORMAT of (raw_pos) | AT of raw_pos | DOLLAR of raw_pos | PERCENT of raw_pos | AMPERSAND of raw_pos + | STAR of raw_pos | ARRAYLEN of raw_pos | SEMI_COLON of raw_pos | PKG_SCOPE of raw_pos | PAREN of raw_pos | PAREN_END of raw_pos | BRACKET of raw_pos + | BRACKET_END of raw_pos | BRACKET_HASHREF of raw_pos | ARRAYREF of raw_pos | ARRAYREF_END of raw_pos | ARROW of raw_pos | INCR of raw_pos | DECR of raw_pos + | POWER of raw_pos | TIGHT_NOT of raw_pos | BIT_NEG of raw_pos | REF of raw_pos | PATTERN_MATCH of raw_pos | PATTERN_MATCH_NOT of raw_pos | MULT of raw_pos + | DIVISION of raw_pos | MODULO of raw_pos | REPLICATE of raw_pos | PLUS of raw_pos | MINUS of raw_pos | CONCAT of raw_pos | BIT_SHIFT_LEFT of raw_pos + | BIT_SHIFT_RIGHT of raw_pos | LT of raw_pos | GT of raw_pos | COMPARE_OP of (string * raw_pos) | EQ_OP of (string * raw_pos) + | BIT_AND of raw_pos | BIT_OR of raw_pos | BIT_XOR of raw_pos | AND_TIGHT of raw_pos | OR_TIGHT of raw_pos | DOTDOT of raw_pos | DOTDOTDOT of raw_pos + | QUESTION_MARK of raw_pos | COLON of raw_pos | ASSIGN of (string * raw_pos) | COMMA of raw_pos | RIGHT_ARROW of raw_pos | NOT of raw_pos | AND of raw_pos | OR of raw_pos | XOR of raw_pos + +let saved_token = ref None + +let concat_bareword_paren get_token lexbuf = + let token = match !saved_token with + | None -> get_token lexbuf + | Some t -> t + in + let token, next = + match token with + | Parser.PRINT(s, both) + | Parser.BAREWORD(s, both) -> + let next_token = get_token lexbuf in + (match next_token with Parser.PAREN(_, (Space_0, _)) -> Parser.BAREWORD_PAREN(s, both) | _ -> token), Some next_token + | Parser.RAW_IDENT(ident, both) -> + let next_token = get_token lexbuf in + (match next_token with Parser.PAREN(_, (Space_0, _)) -> Parser.RAW_IDENT_PAREN(ident, both) | _ -> token), Some next_token + | _ -> token, None + in + saved_token := next ; token + +let rec concat_spaces get_token lexbuf = + let rec get_spaces spaces lexbuf = + match get_token lexbuf with + | CR -> get_spaces Space_cr lexbuf + | SPACE n -> + let spaces' = + match spaces with + | Space_cr -> Space_cr + | Space_0 -> if n = 1 then Space_1 else Space_n + | _ -> Space_n + in + get_spaces spaces' lexbuf + | token -> token, spaces + in + let token, spaces = get_spaces Space_0 lexbuf in + match token with + | NUM(s, pos) -> Parser.NUM(s, (spaces, pos)) + | STRING(s, pos) -> Parser.STRING(s, (spaces, pos)) + | BAREWORD(s, pos) -> Parser.BAREWORD(s, (spaces, pos)) + | REVISION(s, pos) -> Parser.REVISION(s, (spaces, pos)) + | COMMENT(s, pos) -> Parser.COMMENT(s, (spaces, pos)) + | POD(s, pos) -> Parser.POD(s, (spaces, pos)) + | LABEL(s, pos) -> Parser.LABEL(s, (spaces, pos)) + | COMMAND_STRING(s, pos) -> Parser.COMMAND_STRING(s, (spaces, pos)) + | PRINT(s, pos) -> Parser.PRINT(s, (spaces, pos)) + | PRINT_TO_STAR(s, pos) -> Parser.PRINT_TO_STAR(s, (spaces, pos)) + | PRINT_TO_SCALAR(s, pos) -> Parser.PRINT_TO_SCALAR(s, (spaces, pos)) + | QUOTEWORDS(s, pos) -> Parser.QUOTEWORDS(s, (spaces, pos)) + | COMPACT_HASH_SUBSCRIPT(s, pos) -> Parser.COMPACT_HASH_SUBSCRIPT(s, (spaces, pos)) + | HERE_DOC(r, pos) -> Parser.HERE_DOC(r, (spaces, pos)) + | PATTERN(s, opts, pos) -> Parser.PATTERN((s, opts), (spaces, pos)) + | PATTERN_SUBST(from, to_, opts, pos) -> Parser.PATTERN_SUBST((from, to_, opts), (spaces, pos)) + | SCALAR_IDENT(kind, name, pos) -> Parser.SCALAR_IDENT((kind, name), (spaces, pos)) + | ARRAY_IDENT(kind, name, pos) -> Parser.ARRAY_IDENT((kind, name), (spaces, pos)) + | HASH_IDENT(kind, name, pos) -> Parser.HASH_IDENT((kind, name), (spaces, pos)) + | FUNC_IDENT(kind, name, pos) -> Parser.FUNC_IDENT((kind, name), (spaces, pos)) + | STAR_IDENT(kind, name, pos) -> Parser.STAR_IDENT((kind, name), (spaces, pos)) + | RAW_IDENT(kind, name, pos) -> Parser.RAW_IDENT((kind, name), (spaces, pos)) + | ARRAYLEN_IDENT(kind, name, pos) -> Parser.ARRAYLEN_IDENT((kind, name), (spaces, pos)) + | FUNC_DECL_WITH_PROTO(name, proto, pos) -> Parser.FUNC_DECL_WITH_PROTO((name, proto), (spaces, pos)) + + | NEW(pos) -> Parser.NEW((), (spaces, pos)) + | FORMAT(pos) -> Parser.FORMAT((), (spaces, pos)) + | COMPARE_OP(s, pos) -> Parser.COMPARE_OP(s, (spaces, pos)) + | EQ_OP(s, pos) -> Parser.EQ_OP(s, (spaces, pos)) + | ASSIGN(s, pos) -> Parser.ASSIGN(s, (spaces, pos)) + | FOR(s, pos) -> Parser.FOR(s, (spaces, pos)) + + | EOF (pos) -> Parser.EOF ((), (spaces, pos)) + | IF (pos) -> Parser.IF ((), (spaces, pos)) + | ELSIF (pos) -> Parser.ELSIF ((), (spaces, pos)) + | ELSE (pos) -> Parser.ELSE ((), (spaces, pos)) + | UNLESS (pos) -> Parser.UNLESS ((), (spaces, pos)) + | DO (pos) -> Parser.DO ((), (spaces, pos)) + | WHILE (pos) -> Parser.WHILE ((), (spaces, pos)) + | UNTIL (pos) -> Parser.UNTIL ((), (spaces, pos)) + | MY (pos) -> Parser.MY ((), (spaces, pos)) + | CONTINUE (pos) -> Parser.CONTINUE ((), (spaces, pos)) + | SUB (pos) -> Parser.SUB ((), (spaces, pos)) + | LOCAL (pos) -> Parser.LOCAL ((), (spaces, pos)) + | USE (pos) -> Parser.USE ((), (spaces, pos)) + | PACKAGE (pos) -> Parser.PACKAGE ((), (spaces, pos)) + | BEGIN (pos) -> Parser.BEGIN ((), (spaces, pos)) + | END (pos) -> Parser.END ((), (spaces, pos)) + | AT (pos) -> Parser.AT ((), (spaces, pos)) + | DOLLAR (pos) -> Parser.DOLLAR ((), (spaces, pos)) + | PERCENT (pos) -> Parser.PERCENT ((), (spaces, pos)) + | AMPERSAND (pos) -> Parser.AMPERSAND ((), (spaces, pos)) + | STAR (pos) -> Parser.STAR ((), (spaces, pos)) + | ARRAYLEN (pos) -> Parser.ARRAYLEN ((), (spaces, pos)) + | SEMI_COLON (pos) -> Parser.SEMI_COLON ((), (spaces, pos)) + | PKG_SCOPE (pos) -> Parser.PKG_SCOPE ((), (spaces, pos)) + | PAREN (pos) -> Parser.PAREN ((), (spaces, pos)) + | PAREN_END (pos) -> Parser.PAREN_END ((), (spaces, pos)) + | BRACKET (pos) -> Parser.BRACKET ((), (spaces, pos)) + | BRACKET_END (pos) -> Parser.BRACKET_END ((), (spaces, pos)) + | BRACKET_HASHREF (pos) -> Parser.BRACKET_HASHREF ((), (spaces, pos)) + | ARRAYREF (pos) -> Parser.ARRAYREF ((), (spaces, pos)) + | ARRAYREF_END (pos) -> Parser.ARRAYREF_END ((), (spaces, pos)) + | ARROW (pos) -> Parser.ARROW ((), (spaces, pos)) + | INCR (pos) -> Parser.INCR ((), (spaces, pos)) + | DECR (pos) -> Parser.DECR ((), (spaces, pos)) + | POWER (pos) -> Parser.POWER ((), (spaces, pos)) + | TIGHT_NOT (pos) -> Parser.TIGHT_NOT ((), (spaces, pos)) + | BIT_NEG (pos) -> Parser.BIT_NEG ((), (spaces, pos)) + | REF (pos) -> Parser.REF ((), (spaces, pos)) + | PATTERN_MATCH (pos) -> Parser.PATTERN_MATCH ((), (spaces, pos)) + | PATTERN_MATCH_NOT(pos) -> Parser.PATTERN_MATCH_NOT((), (spaces, pos)) + | MULT (pos) -> Parser.MULT ((), (spaces, pos)) + | DIVISION (pos) -> Parser.DIVISION ((), (spaces, pos)) + | MODULO (pos) -> Parser.MODULO ((), (spaces, pos)) + | REPLICATE (pos) -> Parser.REPLICATE ((), (spaces, pos)) + | PLUS (pos) -> Parser.PLUS ((), (spaces, pos)) + | MINUS (pos) -> Parser.MINUS ((), (spaces, pos)) + | CONCAT (pos) -> Parser.CONCAT ((), (spaces, pos)) + | BIT_SHIFT_LEFT (pos) -> Parser.BIT_SHIFT_LEFT ((), (spaces, pos)) + | BIT_SHIFT_RIGHT (pos) -> Parser.BIT_SHIFT_RIGHT ((), (spaces, pos)) + | LT (pos) -> Parser.LT ((), (spaces, pos)) + | GT (pos) -> Parser.GT ((), (spaces, pos)) + | BIT_AND (pos) -> Parser.BIT_AND ((), (spaces, pos)) + | BIT_OR (pos) -> Parser.BIT_OR ((), (spaces, pos)) + | BIT_XOR (pos) -> Parser.BIT_XOR ((), (spaces, pos)) + | AND_TIGHT (pos) -> Parser.AND_TIGHT ((), (spaces, pos)) + | OR_TIGHT (pos) -> Parser.OR_TIGHT ((), (spaces, pos)) + | DOTDOT (pos) -> Parser.DOTDOT ((), (spaces, pos)) + | DOTDOTDOT (pos) -> Parser.DOTDOTDOT ((), (spaces, pos)) + | QUESTION_MARK (pos) -> Parser.QUESTION_MARK ((), (spaces, pos)) + | COLON (pos) -> Parser.COLON ((), (spaces, pos)) + | COMMA (pos) -> Parser.COMMA ((), (spaces, pos)) + | RIGHT_ARROW (pos) -> Parser.RIGHT_ARROW ((), (spaces, pos)) + | NOT (pos) -> Parser.NOT ((), (spaces, pos)) + | AND (pos) -> Parser.AND ((), (spaces, pos)) + | OR (pos) -> Parser.OR ((), (spaces, pos)) + | XOR (pos) -> Parser.XOR ((), (spaces, pos)) + + | SPACE _ | CR -> internal_error "raw_token_to_token" + +let rec lexbuf2list t lexbuf = + let rec f () = + match t lexbuf with + | Parser.EOF _ -> [] + | e -> e :: f() + in + let l = f() in + l let next_rule = ref None +let bpos = -1,-1 +let pos lexbuf = lexeme_start lexbuf, lexeme_end lexbuf +let pos2sfull_with start end_ = Info.pos2sfull (!current_file, start, end_) +let pos2sfull lexbuf = pos2sfull_with (lexeme_start lexbuf) (lexeme_end lexbuf) +let putback lexbuf nb = lexbuf.Lexing.lex_curr_pos <- lexbuf.Lexing.lex_curr_pos - nb + let add_a_new_line raw_pos = incr current_file_current_line ; lpush current_file_lines_starts raw_pos @@ -26,12 +216,13 @@ let string_nestness = ref 0 let building_current_string = ref "" let current_string_start_pos = ref 0 let current_string_start_line = ref 0 +let die lexbuf err = failwith (pos2sfull_with !current_string_start_pos (lexeme_end lexbuf) ^ err) let ins t lexbuf = building_current_string := ""; current_string_start_pos := lexeme_start lexbuf; t lexbuf ; - !building_current_string, (!current_file, !current_string_start_pos, lexeme_end lexbuf) + !building_current_string, (!current_string_start_pos, lexeme_end lexbuf) let ins_to_string t lexbuf = let s, pos = ins t lexbuf in not_ok_for_match := lexeme_end lexbuf; @@ -42,23 +233,6 @@ let next_s s t lexbuf = t lexbuf let next t lexbuf = next_s (lexeme lexbuf) t lexbuf -let pos lexbuf = !current_file, lexeme_start lexbuf, lexeme_end lexbuf -let pos2sfull_with start end_ = Info.pos2sfull (!current_file, start, end_) -let pos2sfull lexbuf = pos2sfull_with (lexeme_start lexbuf) (lexeme_end lexbuf) - -let putback lexbuf nb = lexbuf.Lexing.lex_curr_pos <- lexbuf.Lexing.lex_curr_pos - nb - -let die lexbuf err = failwith (pos2sfull_with !current_string_start_pos (lexeme_end lexbuf) ^ err) - -let rec lexbuf2list t lexbuf = - let rec f () = - match t lexbuf with - | EOF -> [] - | e -> e :: f() - in - let l = f() in - l - let ident_type_from_char fq name lexbuf c = not_ok_for_match := lexeme_end lexbuf; match c with @@ -92,7 +266,7 @@ let arraylen_fqident_from_lexbuf lexbuf = let fq, name = split_at_two_colons (skip_n_char 2 s) in ARRAYLEN_IDENT(Some fq, name, pos lexbuf) -let check_multi_line_delimited_string opts (_, start, end_) = +let check_multi_line_delimited_string opts (start, end_) = let check = match opts with | None -> true @@ -102,19 +276,18 @@ let check_multi_line_delimited_string opts (_, start, end_) = failwith (pos2sfull_with start end_ ^ "multi-line patterns are not allowed (or use /x modifier)") } -let space = [' ' '\t'] let stash = [ '$' '@' '%' '&' '*' ] let ident_start = ['a'-'z' 'A'-'Z' '_'] let ident = ident_start ['0'-'9' 'A'-'Z' 'a'-'z' '_'] * let pattern_separator = [ '/' '!' ',' '|' ] rule token = parse -| space+ { +| ' '+ { (* propagate not_ok_for_match when it was set by the previous token *) if lexeme_start lexbuf = !not_ok_for_match then not_ok_for_match := lexeme_end lexbuf; - (*SPACE(pos lexbuf) *) token lexbuf + SPACE(lexeme_end lexbuf - lexeme_start lexbuf) } -| '#' [^ '\n']* { (*COMMENT(lexeme lexbuf, pos lexbuf)*) token lexbuf } +| '#' [^ '\n']* { SPACE(1) } | "\n=" { add_a_new_line(lexeme_end lexbuf - 1); @@ -128,76 +301,85 @@ rule token = parse current_here_doc_mark := mark ; r := ins (if interpolate then here_doc else raw_here_doc) lexbuf with Queue.Empty -> ()); - token lexbuf - } -| "->" { ARROW } -| "++" { INCR } -| "--" { DECR } -| "**" { POWER } -| "!" { TIGHT_NOT } -| "~" { BIT_NEG } -| "=~" { PATTERN_MATCH } -| "!~" { PATTERN_MATCH_NOT } -| "*" { MULT } -| "%" { MODULO } -| "x" { REPLICATE } -| "+" { PLUS } -| "-" { MINUS } -| "." { CONCAT } -| "<<" { BIT_SHIFT_LEFT } -| ">>" { BIT_SHIFT_RIGHT } -| "<" { LT } -| ">" { GT } -| "<=" | ">=" | "lt" | "gt" | "le" | "ge" { COMPARE_OP(lexeme lexbuf) } -| "==" | "!=" | "<=>" | "eq" | "ne" | "cmp" { EQ_OP(lexeme lexbuf) } -| "&" { BIT_AND } -| "|" { BIT_OR } -| "^" { BIT_XOR } -| "&&" { AND_TIGHT } -| "||" { OR_TIGHT } -| ".." { DOTDOT } -| "..." { DOTDOTDOT } -| "?" { QUESTION_MARK } -| ":" { COLON } -| "::" { PKG_SCOPE } - -| "=" | "+=" | "-=" | "*=" | ".=" | "|=" | "&=" | "^=" | "||=" | "&&=" { ASSIGN(lexeme lexbuf) } - -| "," { COMMA } -| "=>" { RIGHT_ARROW } -| "not" { NOT } -| "and" { AND } -| "or" { OR } -| "xor" { XOR } - -| "if" { IF } -| "else" { ELSE } -| "elsif" { ELSIF } -| "unless" { UNLESS } -| "do" { DO } -| "while" { WHILE } -| "until" { UNTIL } -| "foreach" { FOR("foreach") } -| "for" { FOR("for") } -| "my" { MY } -| "local" { LOCAL } -| "continue" { CONTINUE } -| "sub" { SUB } -| "package" { PACKAGE } -| "use" { USE } -| "BEGIN" { BEGIN } -| "END" { END } -| "print" { PRINT(pos lexbuf) } + CR + } +| "->" { ARROW(pos lexbuf) } +| "++" { INCR(pos lexbuf) } +| "--" { DECR(pos lexbuf) } +| "**" { POWER(pos lexbuf) } +| "!" { TIGHT_NOT(pos lexbuf) } +| "~" { BIT_NEG(pos lexbuf) } +| "=~" { PATTERN_MATCH(pos lexbuf) } +| "!~" { PATTERN_MATCH_NOT(pos lexbuf) } +| "*" { MULT(pos lexbuf) } +| "%" { MODULO(pos lexbuf) } +| "x" { REPLICATE(pos lexbuf) } +| "+" { PLUS(pos lexbuf) } +| "-" { MINUS(pos lexbuf) } +| "." { CONCAT(pos lexbuf) } +| "<<" { BIT_SHIFT_LEFT(pos lexbuf) } +| ">>" { BIT_SHIFT_RIGHT(pos lexbuf) } +| "<" { LT(pos lexbuf) } +| ">" { GT(pos lexbuf) } +| "<=" | ">=" | "lt" | "gt" | "le" | "ge" { COMPARE_OP(lexeme lexbuf, pos lexbuf) } +| "==" | "!=" | "<=>" | "eq" | "ne" | "cmp" { EQ_OP(lexeme lexbuf, pos lexbuf) } +| "&" { BIT_AND(pos lexbuf) } +| "|" { BIT_OR(pos lexbuf) } +| "^" { BIT_XOR(pos lexbuf) } +| "&&" { AND_TIGHT(pos lexbuf) } +| "||" { OR_TIGHT(pos lexbuf) } +| ".." { DOTDOT(pos lexbuf) } +| "..." { DOTDOTDOT(pos lexbuf) } +| "?" { QUESTION_MARK(pos lexbuf) } +| ":" { COLON(pos lexbuf) } +| "::" { PKG_SCOPE(pos lexbuf) } + +| "=" | "+=" | "-=" | "*=" | ".=" | "|=" | "&=" | "^=" | "||=" | "&&=" { ASSIGN(lexeme lexbuf, pos lexbuf) } + +| "," { COMMA(pos lexbuf) } +| "=>" { RIGHT_ARROW(pos lexbuf) } +| "not" { NOT(pos lexbuf) } +| "and" { AND(pos lexbuf) } +| "or" { OR(pos lexbuf) } +| "xor" { XOR(pos lexbuf) } + +| "if" { IF(pos lexbuf) } +| "else" { ELSE(pos lexbuf) } +| "elsif" { ELSIF(pos lexbuf) } +| "unless" { UNLESS(pos lexbuf) } +| "do" { DO(pos lexbuf) } +| "while" { WHILE(pos lexbuf) } +| "until" { UNTIL(pos lexbuf) } +| "foreach" { FOR(lexeme lexbuf, pos lexbuf) } +| "for" { FOR(lexeme lexbuf, pos lexbuf) } +| "my" { MY(pos lexbuf) } +| "local" { LOCAL(pos lexbuf) } +| "continue" { CONTINUE(pos lexbuf) } +| "sub" { SUB(pos lexbuf) } +| "package" { PACKAGE(pos lexbuf) } +| "use" { USE(pos lexbuf) } +| "BEGIN" { BEGIN(pos lexbuf) } +| "END" { END(pos lexbuf) } +| "print" { PRINT(lexeme lexbuf, pos lexbuf) } | "new" { NEW(pos lexbuf) } | "format" { let _ = here_doc_next_line "." false in FORMAT(pos lexbuf) } | "split" | "grep" { (* ok_for_match! *) BAREWORD(lexeme lexbuf, pos lexbuf) } -| ident space* "=>" { (* needed so that (if => 1) works *) +| "print " ident ' ' { + putback lexbuf 1; + PRINT_TO_STAR(skip_n_char_ 6 1 (lexeme lexbuf), pos lexbuf); + } +| "print $" ident ' ' { + putback lexbuf 1; + PRINT_TO_SCALAR(skip_n_char_ 7 1 (lexeme lexbuf), pos lexbuf); + } + +| ident ' '* "=>" { (* needed so that (if => 1) works *) let s = lexeme lexbuf in let end_ = String.length s - 1 in - let ident_end = rindex_non_spaces_from (end_ - 2) s in + let ident_end = non_rindex_from s (end_ - 2) ' ' in putback lexbuf (end_ - ident_end); BAREWORD(String.sub s 0 (ident_end+1), pos lexbuf) } @@ -207,25 +389,25 @@ rule token = parse COMPACT_HASH_SUBSCRIPT(lexeme lexbuf, pos lexbuf) } -| '@' { AT } -| '$' { DOLLAR } -| '$' '#' { ARRAYLEN } -| '%' ['$' '{'] { putback lexbuf 1; PERCENT } -| '&' ['$' '{'] { putback lexbuf 1; AMPERSAND } -| '*' ['$' '{'] { putback lexbuf 1; if lexeme_start lexbuf = !not_ok_for_match then MULT else STAR } +| '@' { AT(pos lexbuf) } +| '$' { DOLLAR(pos lexbuf) } +| '$' '#' { ARRAYLEN(pos lexbuf) } +| '%' ['$' '{'] { putback lexbuf 1; PERCENT(pos lexbuf) } +| '&' ['$' '{'] { putback lexbuf 1; AMPERSAND(pos lexbuf) } +| '*' ['$' '{'] { putback lexbuf 1; if lexeme_start lexbuf = !not_ok_for_match then MULT(pos lexbuf) else STAR(pos lexbuf) } -| ';' { SEMI_COLON } -| '(' { PAREN } -| '{' { BRACKET } -| "+{"{ BRACKET_HASHREF } -| '[' { ARRAYREF } -| ')' { not_ok_for_match := lexeme_end lexbuf; PAREN_END } -| '}' { not_ok_for_match := lexeme_end lexbuf; BRACKET_END } -| ']' { not_ok_for_match := lexeme_end lexbuf; ARRAYREF_END } +| ';' { SEMI_COLON(pos lexbuf) } +| '(' { PAREN(pos lexbuf) } +| '{' { BRACKET(pos lexbuf) } +| "+{"{ BRACKET_HASHREF(pos lexbuf) } +| '[' { ARRAYREF(pos lexbuf) } +| ')' { not_ok_for_match := lexeme_end lexbuf; PAREN_END(pos lexbuf) } +| '}' { not_ok_for_match := lexeme_end lexbuf; BRACKET_END(pos lexbuf) } +| ']' { not_ok_for_match := lexeme_end lexbuf; ARRAYREF_END(pos lexbuf) } | "/" { - if lexeme_start lexbuf = !not_ok_for_match then DIVISION + if lexeme_start lexbuf = !not_ok_for_match then DIVISION(pos lexbuf) else ( delimit_char := '/' ; current_string_start_line := !current_file_current_line; @@ -237,7 +419,7 @@ rule token = parse } | "/=" { - if lexeme_start lexbuf = !not_ok_for_match then ASSIGN(lexeme lexbuf) + if lexeme_start lexbuf = !not_ok_for_match then ASSIGN(lexeme lexbuf, pos lexbuf) else ( putback lexbuf 1 ; delimit_char := '/' ; @@ -268,10 +450,10 @@ rule token = parse | "s" pattern_separator { delimit_char := lexeme_char lexbuf 1 ; current_string_start_line := !current_file_current_line; - let s1, (_, start, _) = ins delimited_string lexbuf in - let s2, (_, _, end_) = ins delimited_string lexbuf in + let s1, (start, _) = ins delimited_string lexbuf in + let s2, (_, end_) = ins delimited_string lexbuf in let opts, _ = ins pattern_options lexbuf in - let pos = !current_file, start, end_ in + let pos = start, end_ in check_multi_line_delimited_string (Some opts) pos ; PATTERN_SUBST(s1, s2, opts, pos) } @@ -279,43 +461,43 @@ rule token = parse | "tr" pattern_separator { delimit_char := lexeme_char lexbuf 2 ; current_string_start_line := !current_file_current_line; - let s1, (_, start, _) = ins delimited_string lexbuf in - let s2, (_, _, end_) = ins delimited_string lexbuf in + let s1, (start, _) = ins delimited_string lexbuf in + let s2, (_, end_) = ins delimited_string lexbuf in let opts, _ = ins pattern_options lexbuf in - let pos = !current_file, start, end_ in + let pos = start, end_ in check_multi_line_delimited_string None pos ; PATTERN_SUBST(s1, s2, opts, pos) } | "<<" ident { not_ok_for_match := lexeme_end lexbuf; - HERE_DOC(here_doc_next_line (skip_n_char 2 (lexeme lexbuf)) true) + HERE_DOC(here_doc_next_line (skip_n_char 2 (lexeme lexbuf)) true, pos lexbuf) } | "<<'" ident "'" { not_ok_for_match := lexeme_end lexbuf; - HERE_DOC(here_doc_next_line (skip_n_char_ 3 1 (lexeme lexbuf)) false) + HERE_DOC(here_doc_next_line (skip_n_char_ 3 1 (lexeme lexbuf)) false, pos lexbuf) } -| "<<" space+ "'" -| "<<" space+ ident { +| "<<" ' '+ "'" +| "<<" ' '+ ident { failwith (pos2sfull_with (lexeme_start lexbuf + 2) (lexeme_end lexbuf) ^ "No space allowed between \"<<\" and the marker") } -| "<<" space* '"' { +| "<<" ' '* '"' { failwith (pos2sfull_with (lexeme_start lexbuf + 2) (lexeme_end lexbuf) ^ "Don't use <<\"MARK\", use <<MARK instead") } | "\\" stash | "\\" ['0'-'9' 'A'-'Z' 'a'-'z'] -| "\\" space* '(' - { putback lexbuf 1; REF } +| "\\" ' '* '(' + { putback lexbuf 1; REF(pos lexbuf) } -| "sub" space+ ident space* '(' [ '$' '@' '\\' '&' ';' ]* ')' { +| "sub" ' '+ ident ' '* '(' [ '$' '@' '\\' '&' ';' ]* ')' { (* bloody prototypes, must be caught especially otherwise "($)" is badly tokenized *) (* and alas "($@)" is both valid as an expression and a prototype *) let s = lexeme lexbuf in - let ident_start = index_non_spaces_from 3 s in + let ident_start = non_index_from s 3 ' ' in let proto_start = String.index_from s ident_start '(' in - let ident_end = rindex_non_spaces_from (proto_start-1) s in + let ident_end = non_rindex_from s (proto_start-1) ' ' in let ident = String.sub s ident_start (ident_end - ident_start + 1) in let prototype = skip_n_char_ (proto_start + 1) 1 s in @@ -327,8 +509,8 @@ rule token = parse | stash ident? ("::" ident)+ { typed_fqident_from_lexbuf lexbuf } | stash ident -| '$' [^ '{' ' ' '\t' '\n' '$'] -| "$^" [^ '{' ' ' '\t' '\n'] { typed_ident_from_lexbuf lexbuf } +| '$' [^ '{' ' ' '\n' '$'] +| "$^" [^ '{' ' ' '\n'] { typed_ident_from_lexbuf lexbuf } | "$$" [^ 'a'-'z' 'A'-'Z' '_' '{'] { putback lexbuf 1; SCALAR_IDENT(None, "$$", pos lexbuf) } @@ -339,7 +521,7 @@ rule token = parse | ident ":" { LABEL(lexeme lexbuf, pos lexbuf) } -| '-' [ 'a'-'z' 'A'-'Z' ] [ ' ' '\t' '(' ] { putback lexbuf 1; BAREWORD(lexeme lexbuf, pos lexbuf) } +| '-' [ 'a'-'z' 'A'-'Z' ] [ ' ' '(' ] { putback lexbuf 1; BAREWORD(lexeme lexbuf, pos lexbuf) } | ['0'-'9'] ['0'-'9' '_']* ('.' ['0'-'9'] ['0'-'9' '_']*)+ | 'v' ['0'-'9'] ['0'-'9' '_']* ('.' ['0'-'9'] ['0'-'9' '_']*)* @@ -367,7 +549,7 @@ rule token = parse | "qq(" { ins_to_string qqstring lexbuf } | "qw(" { let s, pos = ins qstring lexbuf in QUOTEWORDS(s, pos) } -| eof { EOF } +| eof { EOF(pos lexbuf) } | _ { failwith (Printf.sprintf "%serror tokenizing <<%s>>" (pos2sfull lexbuf) (lexeme lexbuf)) } and string = parse @@ -480,7 +662,6 @@ and pattern_options = parse and pod_command = parse | [^ '\n' ]+ { let s = lexeme lexbuf in - if String.contains s '\t' then failwith(pos2sfull lexbuf ^ "tabulations not accepted in POD commands") else let command = String.sub s 0 (try String.index s ' ' with Not_found -> String.length s) in match command with | "cut" -> diff --git a/perl_checker.src/parser.mly b/perl_checker.src/parser.mly index 0157714..e94f622 100644 --- a/perl_checker.src/parser.mly +++ b/perl_checker.src/parser.mly @@ -1,69 +1,56 @@ %{ (* -*- caml -*- *) open Types open Common + open Parser_helper - let die msg = - failwith (Info.pos2sfull_current (Parsing.symbol_start()) (Parsing.symbol_end()) ^ msg) + let parse_error msg = die_rule msg - let parse_error msg = die msg - - let warn msg = if false then prerr_endline msg - - let to_Ident = function - | BAREWORD(name, pos) -> Ident(I_raw, None, name, pos) - | SCALAR_IDENT(fq, name, pos) -> Ident(I_scalar, fq, name, pos) - | ARRAY_IDENT (fq, name, pos) -> Ident(I_array, fq, name, pos) - | HASH_IDENT (fq, name, pos) -> Ident(I_hash, fq, name, pos) - | FUNC_IDENT (fq, name, pos) -> Ident(I_func, fq, name, pos) - | STAR_IDENT (fq, name, pos) -> Ident(I_star, fq, name, pos) - | RAW_IDENT (fq, name, pos) -> Ident(I_raw, fq, name, pos) - | _ -> internal_error "Parser.to_Ident" %} -%token EOF -%token <Types.pos> SPACE -%token <string * Types.pos> NUM STRING BAREWORD REVISION COMMENT POD LABEL -%token <string * Types.pos> COMMAND_STRING QUOTEWORDS COMPACT_HASH_SUBSCRIPT -%token <(string * Types.pos) ref> HERE_DOC -%token <string * string * Types.pos> PATTERN -%token <string * string * string * Types.pos> PATTERN_SUBST - -%token <string option * string * Types.pos> SCALAR_IDENT ARRAY_IDENT HASH_IDENT FUNC_IDENT STAR_IDENT RAW_IDENT ARRAYLEN_IDENT -%token <string * string * Types.pos> FUNC_DECL_WITH_PROTO - -%token IF ELSIF ELSE UNLESS DO WHILE UNTIL MY CONTINUE SUB LOCAL -%token <string> FOR -%token USE PACKAGE BEGIN END -%token <Types.pos> PRINT NEW FORMAT -%token AT DOLLAR PERCENT AMPERSAND STAR ARRAYLEN -%token SEMI_COLON PKG_SCOPE -%token PAREN PAREN_END -%token BRACKET BRACKET_END BRACKET_HASHREF -%token ARRAYREF ARRAYREF_END - - -%token ARROW -%token INCR DECR -%token POWER -%token TIGHT_NOT BIT_NEG REF -%token PATTERN_MATCH PATTERN_MATCH_NOT -%token MULT DIVISION MODULO REPLICATE -%token PLUS MINUS CONCAT -%token BIT_SHIFT_LEFT BIT_SHIFT_RIGHT -%token LT GT -%token <string> COMPARE_OP EQ_OP -%token BIT_AND -%token BIT_OR BIT_XOR -%token AND_TIGHT -%token OR_TIGHT -%token DOTDOT DOTDOTDOT -%token QUESTION_MARK COLON -%token <string> ASSIGN -%token COMMA RIGHT_ARROW -%token NOT -%token AND -%token OR XOR +%token <unit * (Types.spaces * Types.raw_pos)> EOF +%token <string * (Types.spaces * Types.raw_pos)> NUM STRING BAREWORD BAREWORD_PAREN REVISION COMMENT POD LABEL PRINT_TO_STAR PRINT_TO_SCALAR +%token <string * (Types.spaces * Types.raw_pos)> COMMAND_STRING QUOTEWORDS COMPACT_HASH_SUBSCRIPT + +%token <(string * Types.raw_pos) ref * (Types.spaces * Types.raw_pos)> HERE_DOC +%token <(string * string) * (Types.spaces * Types.raw_pos)> PATTERN +%token <(string * string * string) * (Types.spaces * Types.raw_pos)> PATTERN_SUBST + +%token <(string option * string) * (Types.spaces * Types.raw_pos)> SCALAR_IDENT ARRAY_IDENT HASH_IDENT FUNC_IDENT STAR_IDENT RAW_IDENT RAW_IDENT_PAREN ARRAYLEN_IDENT +%token <(string * string) * (Types.spaces * Types.raw_pos)> FUNC_DECL_WITH_PROTO + +%token <string * (Types.spaces * Types.raw_pos)> FOR PRINT +%token <unit * (Types.spaces * Types.raw_pos)> NEW FORMAT +%token <string * (Types.spaces * Types.raw_pos)> COMPARE_OP EQ_OP +%token <string * (Types.spaces * Types.raw_pos)> ASSIGN + +%token <unit * (Types.spaces * Types.raw_pos)> IF ELSIF ELSE UNLESS DO WHILE UNTIL MY CONTINUE SUB LOCAL +%token <unit * (Types.spaces * Types.raw_pos)> USE PACKAGE BEGIN END +%token <unit * (Types.spaces * Types.raw_pos)> AT DOLLAR PERCENT AMPERSAND STAR ARRAYLEN +%token <unit * (Types.spaces * Types.raw_pos)> SEMI_COLON PKG_SCOPE +%token <unit * (Types.spaces * Types.raw_pos)> PAREN PAREN_END +%token <unit * (Types.spaces * Types.raw_pos)> BRACKET BRACKET_END BRACKET_HASHREF +%token <unit * (Types.spaces * Types.raw_pos)> ARRAYREF ARRAYREF_END + +%token <unit * (Types.spaces * Types.raw_pos)> ARROW +%token <unit * (Types.spaces * Types.raw_pos)> INCR DECR +%token <unit * (Types.spaces * Types.raw_pos)> POWER +%token <unit * (Types.spaces * Types.raw_pos)> TIGHT_NOT BIT_NEG REF +%token <unit * (Types.spaces * Types.raw_pos)> PATTERN_MATCH PATTERN_MATCH_NOT +%token <unit * (Types.spaces * Types.raw_pos)> MULT DIVISION MODULO REPLICATE +%token <unit * (Types.spaces * Types.raw_pos)> PLUS MINUS CONCAT +%token <unit * (Types.spaces * Types.raw_pos)> BIT_SHIFT_LEFT BIT_SHIFT_RIGHT +%token <unit * (Types.spaces * Types.raw_pos)> LT GT +%token <unit * (Types.spaces * Types.raw_pos)> BIT_AND +%token <unit * (Types.spaces * Types.raw_pos)> BIT_OR BIT_XOR +%token <unit * (Types.spaces * Types.raw_pos)> AND_TIGHT +%token <unit * (Types.spaces * Types.raw_pos)> OR_TIGHT +%token <unit * (Types.spaces * Types.raw_pos)> DOTDOT DOTDOTDOT +%token <unit * (Types.spaces * Types.raw_pos)> QUESTION_MARK COLON +%token <unit * (Types.spaces * Types.raw_pos)> COMMA RIGHT_ARROW +%token <unit * (Types.spaces * Types.raw_pos)> NOT +%token <unit * (Types.spaces * Types.raw_pos)> AND +%token <unit * (Types.spaces * Types.raw_pos)> OR XOR %nonassoc PREC_LOW %nonassoc LOOPEX @@ -94,289 +81,299 @@ %left ARROW %nonassoc PAREN_END -%left PAREN +%left PAREN PREC_HIGH %left ARRAYREF BRACKET -%type <string> prog +%type <Types.fromparser list> prog +%type <Types.fromparser * (Types.spaces * Types.raw_pos)> expr %start prog %% -prog: lines EOF { "" } +prog: lines EOF { fst $1 } lines: /* A collection of "lines" in the program */ -| {()} -| sideff {()} -| line lines {()} +| {[], (Space_none, bpos)} +| sideff {[fst $1], snd $1} +| line lines {fst $1 :: fst $2, snd $1} line: -| decl {()} -| if_then_else {()} -| loop {()} -| LABEL {()} -| SEMI_COLON {()} -| sideff SEMI_COLON {()} -| BRACKET lines BRACKET_END {()} +| decl {$1} +| if_then_else {$1} +| loop {$1} +| LABEL {sp_cr($1); Label(fst $1), snd $1} +| semi_colon {List [], snd $1} +| sideff semi_colon {$1} +| BRACKET lines BRACKET_END {sp_p($2); sp_p($3); Block(fst $2), snd $1} if_then_else: /* Real conditional expressions */ -| IF PAREN expr PAREN_END BRACKET lines BRACKET_END elsif else_ {()} -| UNLESS PAREN expr PAREN_END BRACKET lines BRACKET_END elsif else_ {()} +| IF PAREN expr PAREN_END BRACKET lines BRACKET_END elsif else_ {sp_p($1); sp_n($2); sp_0($3); sp_0_or_cr($4); sp_p($5); sp_p($6); sp_p($7); Call_op("if", fst $3 :: Block(fst $6) :: fst $8 @ fst $9), snd $1} +| UNLESS PAREN expr PAREN_END BRACKET lines BRACKET_END elsif else_ {sp_p($1); sp_n($2); sp_0($3); sp_0_or_cr($4); sp_p($5); sp_p($6); sp_p($7); Call_op("unless", fst $3 :: Block(fst $6) :: fst $8 @ fst $9), snd $1} elsif: -| {()} -| ELSIF PAREN expr PAREN_END BRACKET lines BRACKET_END elsif {()} +| {[], (Space_none, bpos)} +| ELSIF PAREN expr PAREN_END BRACKET lines BRACKET_END elsif {sp_p($1); sp_n($2); sp_0($3); sp_0_or_cr($4); sp_p($5); sp_p($7); fst $3 :: Block(fst $6) :: fst $8, snd $1} else_: -| { () } -| ELSE BRACKET lines BRACKET_END { () } +| { [], (Space_none, bpos) } +| ELSE BRACKET lines BRACKET_END {sp_p($1); sp_n($2); sp_p($3); sp_p($4); [ Block(fst $3) ], snd $1 } loop: -| WHILE PAREN expr PAREN_END BRACKET lines BRACKET_END cont {()} -| UNTIL PAREN expr PAREN_END BRACKET lines BRACKET_END cont {()} -| FOR MY SCALAR_IDENT PAREN expr PAREN_END BRACKET lines BRACKET_END cont {()} -| FOR SCALAR_IDENT PAREN expr PAREN_END BRACKET lines BRACKET_END cont {()} -| FOR PAREN expr PAREN_END BRACKET lines BRACKET_END cont {()} -| FOR PAREN expr_or_empty SEMI_COLON expr_or_empty SEMI_COLON expr_or_empty PAREN_END BRACKET lines BRACKET_END {()} +| WHILE PAREN expr PAREN_END BRACKET lines BRACKET_END cont {sp_p($1); sp_n($2); sp_0($3); sp_0_or_cr($4); sp_p($5); sp_p($6); sp_p($7); Call_op("while", fst $3 :: fst $6), snd $1} +| UNTIL PAREN expr PAREN_END BRACKET lines BRACKET_END cont {sp_p($1); sp_n($2); sp_0($3); sp_0_or_cr($4); sp_p($5); sp_p($6); sp_p($7); Call_op("until", fst $3 :: fst $6), snd $1} +| FOR MY SCALAR_IDENT PAREN expr PAREN_END BRACKET lines BRACKET_END cont {sp_p($1); check_foreach($1); sp_n($4); sp_0($5); sp_0_or_cr($6); sp_p($7); sp_p($8); sp_p($9); Call_op("foreach my", to_Ident $3 :: fst $5 :: fst $8), snd $1} +| FOR SCALAR_IDENT PAREN expr PAREN_END BRACKET lines BRACKET_END cont { die_rule "don't use for without \"my\"ing the iteration variable" } +| FOR PAREN expr PAREN_END BRACKET lines BRACKET_END cont {sp_p($1); sp_n($2); sp_0($3); sp_0_or_cr($4); sp_p($5); sp_p($6); sp_p($7); check_foreach($1); Call_op("foreach", fst $3 :: fst $6), snd $1} +| FOR PAREN expr_or_empty semi_colon expr_or_empty semi_colon expr_or_empty PAREN_END BRACKET lines BRACKET_END {sp_p($1); sp_n($2); sp_0($3); check_for($1); Call_op("for", fst $3 :: fst $5 :: fst $7 :: fst $10), snd $1} cont: /* Continue blocks */ -| {()} -| CONTINUE BRACKET lines BRACKET_END {()} +| {(), (Space_none, bpos)} +| CONTINUE BRACKET lines BRACKET_END {sp_p($1); sp_n($2); sp_p($4); (), snd $1} sideff: /* An expression which may have a side-effect */ -| error {()} -| expr {()} -| expr IF expr {()} -| expr UNLESS expr {()} -| expr WHILE expr {()} -| expr UNTIL expr {()} -| expr FOR expr {()} +| expr {$1} +| expr IF expr {let f = "if" in sp_p($2); sp_p($3); check_no_paren f $3; Call_op(f ^ " infix", [ fst $1 ; fst $3 ]), snd $1} +| expr UNLESS expr {let f = "unless" in sp_p($2); sp_p($3); check_no_paren f $3; Call_op(f ^ " infix", [ fst $1 ; fst $3 ]), snd $1} +| expr WHILE expr {let f = "while" in sp_p($2); sp_p($3); check_no_paren f $3; Call_op(f ^ " infix", [ fst $1 ; fst $3 ]), snd $1} +| expr UNTIL expr {let f = "until" in sp_p($2); sp_p($3); check_no_paren f $3; Call_op(f ^ " infix", [ fst $1 ; fst $3 ]), snd $1} +| expr FOR expr {let f = "for" in sp_p($2); sp_p($3); check_foreach($2); check_no_paren f $3; Call_op(f ^ " infix", [ fst $1 ; fst $3 ]), snd $1} decl: -| FORMAT BAREWORD ASSIGN {()} -| FORMAT ASSIGN {()} -| func_decl SEMI_COLON {()} -| func_decl BRACKET lines BRACKET_END {()} -| func_decl BRACKET BRACKET expr BRACKET_END BRACKET_END {()} -| func_decl BRACKET BRACKET expr BRACKET_END SEMI_COLON BRACKET_END {()} -| PACKAGE word SEMI_COLON {()} -| BEGIN BRACKET lines BRACKET_END {()} -| END BRACKET lines BRACKET_END {()} -| use {()} +| FORMAT BAREWORD ASSIGN {Too_complex, snd $1} +| FORMAT ASSIGN {Too_complex, snd $1} +| func_decl semi_colon {die_rule (if snd (fst $1) = "" then "there is no need to pre-declare in Perl!" else "please don't use prototype pre-declaration") } +| func_decl BRACKET BRACKET_END {sp_n($2); sp_0_or_cr($3); let name, proto = fst $1 in sub_declaration (name, proto) [], snd $1} +| func_decl BRACKET lines BRACKET_END {sp_n($2); sp_p($3); sp_p($4); sub_declaration (fst $1) (fst $3), snd $1} +| func_decl BRACKET BRACKET expr BRACKET_END BRACKET_END {sp_n($2); sp_p($3); sp_p($4); sp_p($5); sp_p($6); sub_declaration (fst $1) [Ref(I_hash, fst $4)], snd $1} +| func_decl BRACKET BRACKET expr BRACKET_END semi_colon BRACKET_END {sp_n($2); sp_p($3); sp_p($4); sp_p($5); sp_p($7); sub_declaration (fst $1) [Ref(I_hash, fst $4)], snd $1} +| PACKAGE word semi_colon {sp_0_or_cr($1); Package(fst $2), snd $1} +| BEGIN BRACKET lines BRACKET_END {sp_0_or_cr($1); sp_1($2); sp_p($3); sp_p($4); Sub_declaration(Ident(None, "BEGIN", get_pos $1), "", fst $3), snd $1} +| END BRACKET lines BRACKET_END {sp_0_or_cr($1); sp_1($2); sp_p($3); sp_p($4); Sub_declaration(Ident(None, "END", get_pos $1), "", fst $3), snd $1} +| use {$1} use: -| use_word listexpr SEMI_COLON {()} +| use_word listexpr semi_colon {Use(fst $1, fst $2), snd $1} use_word: -| use_revision word COLON {()} -| use_revision word {()} -| use_revision {()} +| use_revision word comma {fst $2, snd $1} +| use_revision word {fst $2, snd $1} +| use_revision {Ident(None, "", get_pos $1), snd $1} use_revision: -| USE REVISION COLON {()} -| USE REVISION {()} -| USE {()} +| USE REVISION comma {$1} +| USE REVISION {$1} +| USE {$1} func_decl: -| SUB word {()} -| FUNC_DECL_WITH_PROTO {()} +| SUB word {(fst $2, ""), snd $1} +| FUNC_DECL_WITH_PROTO {(Ident(None, fst(fst $1), get_pos $1), snd(fst $1)), snd $1} listexpr: /* Basic list expressions */ -| %prec PREC_LOW {()} -| argexpr %prec PREC_LOW {()} +| %prec PREC_LOW {[], (Space_none, bpos)} +| argexpr %prec PREC_LOW {$1} expr: /* Ordinary expressions; logical combinations */ -| expr AND expr {()} -| expr OR expr {()} -| argexpr %prec PREC_LOW {()} +| expr AND expr {sp_p($2); sp_p($3); Call_op("and", [ fst $1; fst $3 ]), snd $1} +| expr OR expr {sp_p($2); sp_p($3); Call_op("or", [ fst $1; fst $3 ]), snd $1} +| argexpr %prec PREC_LOW {List(fst $1), snd $1} argexpr: /* Expressions are a list of terms joined by commas */ -| argexpr comma {()} -| argexpr comma term {()} -| argexpr comma BRACKET expr BRACKET_END {()} -| term %prec PREC_LOW {()} +| argexpr comma {fst $1, snd $1} +| argexpr comma term {fst $1 @ [fst $3], snd $1} +| argexpr comma BRACKET expr BRACKET_END {sp_p($3); sp_p($5); fst $1 @ [ Ref(I_hash, fst $4) ], snd $1} +| term %prec PREC_LOW {[fst $1], snd $1} /********************************************************************************/ term: -| term binop term {()} -| term binop BRACKET expr BRACKET_END {()} -| term LT term {()} -| term LT BRACKET expr BRACKET_END {()} -| term GT term {()} -| term GT BRACKET expr BRACKET_END {()} +| term binop term {Call_op(fst $2, [fst $1 ; fst $3]), snd $1} +| term binop BRACKET expr BRACKET_END {sp_p($3); sp_p($5); Call_op(fst $2, [fst $1 ; Ref(I_hash, fst $4)]), snd $1} +| term LT term {Call_op("<", [fst $1 ; fst $3]), snd $1} +| term GT term {Call_op(">", [fst $1 ; fst $3]), snd $1} + +| term PATTERN_MATCH PATTERN {Call_op("m//", fst $1 :: from_PATTERN $3), snd $1} +| term PATTERN_MATCH_NOT PATTERN {Call_op("!m//", fst $1 :: from_PATTERN $3), snd $1} +| term PATTERN_MATCH PATTERN_SUBST {Call_op("s///", fst $1 :: from_PATTERN_SUBST $3), snd $1} + +| term PATTERN_MATCH scalar { Too_complex, snd $1 } +| term PATTERN_MATCH_NOT scalar { Too_complex, snd $1 } + +| term PATTERN_MATCH STRING {failwith (msg_with_pos (snd (snd $3)) "use a regexp, not a string")} +| term PATTERN_MATCH_NOT STRING {failwith (msg_with_pos (snd (snd $3)) "use a regexp, not a string")} /* Unary operators and terms */ -| MINUS term %prec UNARY_MINUS {()} -| TIGHT_NOT term {()} -| BIT_NEG term {()} -| INCR term {()} -| DECR term {()} -| term INCR {()} -| term DECR {()} +| MINUS term %prec UNARY_MINUS {Call_op("- unary", [fst $2]), snd $1} +| TIGHT_NOT term {Call_op("not", [fst $2]), snd $1} +| BIT_NEG term {Call_op("~", [fst $2]), snd $1} +| INCR term {Call_op("++", [fst $2]), snd $1} +| DECR term {Call_op("--", [fst $2]), snd $1} +| term INCR {sp_0($2); Call_op("++ post", [fst $1]), snd $1} +| term DECR {sp_0($2); Call_op("-- post", [fst $1]), snd $1} -| NOT argexpr {()} +| NOT argexpr {Call_op("not", fst $2), snd $1} /* Constructors for anonymous data */ -| arrayref {()} /* [ 1, 2 ] */ -| BRACKET BRACKET_END {()} /* empty hash */ -| BRACKET_HASHREF expr_or_empty BRACKET_END %prec PAREN {()} /* { foo => "Bar" } */ -| SUB BRACKET lines BRACKET_END %prec PAREN {()} -| termdo {()} -| term question_mark_ colon_ {()} -| REF term {()} /* \$x, \@y, \%z */ -| REF BRACKET expr BRACKET_END {()} /* \$x, \@y, \%z */ -| my %prec UNIOP {()} -| LOCAL term %prec UNIOP {()} +| ARRAYREF ARRAYREF_END {sp_0($2); Ref(I_array, List[]), snd $1} +| arrayref_start ARRAYREF_END {Ref(I_array, List(fst $1)), snd $1} +| arrayref_start expr ARRAYREF_END {Ref(I_array, List(fst $1 @ [fst $2])), snd $1} +| arrayref_start BRACKET expr BRACKET_END ARRAYREF_END {Ref(I_array, List(fst $1 @ [Ref(I_hash, fst $3)])), snd $1} -| parenthesized {()} /* (1, 2) */ -| parenthesized arrayref {()} /* list slice */ +| BRACKET BRACKET_END {Ref(I_hash, List []), snd $1} /* empty hash */ +| BRACKET_HASHREF expr BRACKET_END %prec PREC_HIGH {sp_p($3); Ref(I_hash, fst $2), snd $1} /* { foo => "Bar" } */ +| SUB BRACKET BRACKET_END %prec PREC_HIGH {sp_n($2); sp_0($3); Anonymous_sub(Block[]), snd $1} +| SUB BRACKET lines BRACKET_END %prec PREC_HIGH {sp_n($2); sp_p($4); Anonymous_sub(Block(fst $3)), snd $1} -| variable {()} +| termdo {$1} +| term question_mark_ colon_ { Call_op("?:", [ fst $1 ; fst $2; fst $3]), snd $1} +| REF term { Ref(I_scalar, fst $2), snd $1} /* \$x, \@y, \%z */ +| my %prec UNIOP {List(fst $1), snd $1} +| LOCAL term %prec UNIOP {Local(fst $2), snd $1} -| subscripted {()} +| parenthesized {List(fst $1), snd $1} /* (1, 2) */ +| parenthesized arrayref {Deref_with(I_array, List(fst $1), List(fst $2)), snd $1} /* list slice */ -| array arrayref {()} /* array slice */ -| array BRACKET expr BRACKET_END {()} /* @hash{@keys} */ +| variable {$1} +| subscripted {$1} -/* function_calls */ -| func parenthesized {()} /* &foo(@args) */ -| word argexpr {()} /* foo(@args) */ -| word BRACKET lines BRACKET_END listexpr %prec LSTOP {()} /* map { foo } @bar */ -| word BRACKET BRACKET expr BRACKET_END BRACKET_END listexpr %prec LSTOP {()} /* map { foo } @bar */ -| word BRACKET BRACKET expr BRACKET_END SEMI_COLON BRACKET_END listexpr %prec LSTOP {()} /* map { foo } @bar */ - -| term ARROW word_or_scalar parenthesized {warn "term->word_or_scalar(expr_or_empty) -> function_call"} /* $foo->bar(list) */ -| term ARROW word_or_scalar {warn "term->word_or_scalar -> function_call"} /* $foo->bar */ - -| NEW word listexpr {()} /* new Class @args */ -| print listexpr {()} - -| word {()} - -| NUM {()} -| STRING {()} -| REVISION {()} -| COMMAND_STRING {()} -| QUOTEWORDS {()} -| HERE_DOC {()} -| PATTERN {()} -| PATTERN_SUBST {()} -| diamond {()} +| array arrayref { Deref_with(I_array, fst $1, List(fst $2)), snd $1} /* array slice: @array[vals] */ +| array BRACKET expr BRACKET_END {sp_0($2); sp_0($4); Deref_with(I_hash, array_ident_to_hash_ident $1, fst $3), snd $1} /* hash slice: @hash{@keys} */ -diamond: -| LT GT {()} -| LT term GT {()} -print: -| PRINT bareword COLON {()} -| PRINT bareword {()} /* print FH @args */ -| PRINT scalar COLON {()} -| PRINT scalar {()} /* print $fh @args */ -| PRINT {()} +/* function_calls */ +| func parenthesized {Call(fst $1, fst $2), snd $1} /* &foo(@args) */ +| word argexpr {check_parenthesized_first_argexpr (string_of_Ident (fst $1)) $2; Call(fst $1, fst $2), snd $1} /* foo $a, $b */ +| word_paren parenthesized {Call(fst $1, fst $2), snd $1} /* foo(@args) */ +| word BRACKET lines BRACKET_END listexpr %prec LSTOP {sp_n($2); sp_p($4); Call(fst $1, Anonymous_sub(Block(fst $3)) :: fst $5), snd $1} /* map { foo } @bar */ +| word BRACKET BRACKET expr BRACKET_END BRACKET_END listexpr %prec LSTOP {sp_n($2); sp_p($3); sp_p($5); sp_p($6); Call(fst $1, Anonymous_sub(Ref(I_hash, fst $4)) :: fst $7), snd $1} /* map { { foo } } @bar */ +| word BRACKET BRACKET expr BRACKET_END semi_colon BRACKET_END listexpr %prec LSTOP {sp_n($2); sp_p($3); sp_p($5); sp_p($7); Call(fst $1, Anonymous_sub(Ref(I_hash, fst $4)) :: fst $8), snd $1} /* map { { foo }; } @bar */ + +| term ARROW word_or_scalar parenthesized {sp_0($2); Method_call(fst $1, fst $3, fst $4), snd $1} /* $foo->bar(list) */ +| term ARROW word_or_scalar {sp_0($2); Method_call(fst $1, fst $3, []), snd $1} /* $foo->bar */ + +| NEW word listexpr { Method_call(fst $2, Ident(None, "new", get_pos $1), fst $3), snd $1} /* new Class @args */ + +| PRINT { Call_op("print", var_STDOUT :: [ var_dollar_ ]), snd $1 } +| PRINT argexpr {check_parenthesized_first_argexpr (fst $1) $2; Call_op("print", var_STDOUT :: fst $2), snd $1 } +| PRINT_TO_STAR argexpr { Call_op("print", Deref(I_star, Ident(None, fst $1, get_pos $1)) :: fst $2), snd $1 } +| PRINT_TO_SCALAR { Call_op("print", var_STDOUT :: [ Deref(I_scalar, Ident(None, fst $1, get_pos $1)) ]), snd $1 } +| PRINT_TO_SCALAR argexpr { Call_op("print", Deref(I_scalar, Ident(None, fst $1, get_pos $1)) :: fst $2), snd $1 } + +| word {$1} + +| NUM {Num(fst $1, get_pos $1), snd $1} +| STRING {to_String $1, snd $1} +| REVISION {to_String $1, snd $1} +| COMMAND_STRING {Call_op("``", [to_String $1]), snd $1} +| QUOTEWORDS {Call_op("qw", [to_String $1]), snd $1} +| HERE_DOC {String(fst!(fst $1), get_pos $1), snd $1} +| PATTERN {Call_op("m//", var_dollar_ :: from_PATTERN $1), snd $1} +| PATTERN_SUBST {Call_op("s///", var_dollar_ :: from_PATTERN_SUBST $1), snd $1} +| diamond {$1} -| PRINT bareword BRACKET {die "use parentheses around print"} +diamond: +| LT GT {sp_0($2); Call_op("<>", []), snd $1} +| LT term GT {sp_0($3); Call_op("<>", [fst $2]), snd $1} subscripted: /* Some kind of subscripted expression */ -| variable PKG_SCOPE bracket_subscript {()} /* *main::{something} */ -| scalar bracket_subscript {()} /* $foo{bar} */ -| scalar arrayref {()} /* $array[$element] */ -| term ARROW bracket_subscript {()} /* somehref->{bar} */ -| term ARROW arrayref {()} /* somearef->[$element] */ -| term ARROW parenthesized {()} /* $subref->(@args) */ -| subscripted bracket_subscript {()} /* $foo->[bar]{baz} */ -| subscripted arrayref {()} /* $foo->[$bar][$baz] */ -| subscripted parenthesized {()} /* $foo->{bar}(@args) */ +| variable PKG_SCOPE bracket_subscript {sp_0($2); Too_complex, snd $1} /* $foo::{something} */ +| scalar bracket_subscript {Deref_with(I_hash , fst $1, fst $2), snd $1} /* $foo{bar} */ +| scalar arrayref {Deref_with(I_array, fst $1, only_one $2), snd $1} /* $array[$element] */ +| term ARROW bracket_subscript {sp_0($2); Deref_with(I_hash , fst $1, fst $3), snd $1} /* somehref->{bar} */ +| term ARROW arrayref {sp_0($2); Deref_with(I_array, fst $1, only_one $3), snd $1} /* somearef->[$element] */ +| term ARROW parenthesized {sp_0($2); Deref_with(I_func , fst $1, List(fst $3)), snd $1} /* $subref->(@args) */ +| subscripted bracket_subscript {Deref_with(I_hash , fst $1, fst $2), snd $1} /* $foo->[bar]{baz} */ +| subscripted arrayref {Deref_with(I_array, fst $1, only_one $2), snd $1} /* $foo->[$bar][$baz] */ +| subscripted parenthesized {Deref_with(I_func , fst $1, List(fst $2)), snd $1} /* $foo->{bar}(@args) */ arrayref: -| arrayref_start ARRAYREF_END {()} -| arrayref_start expr ARRAYREF_END {()} -| arrayref_start BRACKET expr BRACKET_END ARRAYREF_END {()} +| arrayref_start ARRAYREF_END {sp_0($2); fst $1, snd $1} +| arrayref_start expr ARRAYREF_END {sp_0($3); fst $1 @ [fst $2], snd $1} +| arrayref_start BRACKET expr BRACKET_END ARRAYREF_END {sp_p($2); sp_p($4); sp_0($5); fst $1 @ [Ref(I_hash, fst $3)], snd $1} parenthesized: -| parenthesized_start PAREN_END {()} -| parenthesized_start expr PAREN_END {()} -| parenthesized_start BRACKET expr BRACKET_END PAREN_END {()} +| parenthesized_start PAREN_END {sp_0_or_cr($2); fst $1, snd $1} +| parenthesized_start expr PAREN_END {sp_0_or_cr($3); fst $1 @ [fst $2], snd $1} +| parenthesized_start BRACKET expr BRACKET_END PAREN_END {sp_p($4); sp_0_or_cr($5); fst $1 @ [Ref(I_hash, fst $3)], snd $1} arrayref_start: -| ARRAYREF {()} -| arrayref_start expr comma {()} -| arrayref_start BRACKET expr BRACKET_END comma {()} +| ARRAYREF {[], snd $1} +| arrayref_start BRACKET expr BRACKET_END comma {sp_p($4); fst $1 @ [Ref(I_hash, fst $3)], snd $1} parenthesized_start: -| PAREN {()} -| parenthesized_start expr comma {()} -| parenthesized_start BRACKET expr BRACKET_END comma {()} +| PAREN {[], snd $1} +| parenthesized_start BRACKET expr BRACKET_END comma {sp_p($4); fst $1 @ [Ref(I_hash, fst $3)], snd $1} my: /* Things that can be "my"'d */ -| MY parenthesized {()} -| MY scalar {()} -| MY hash {()} -| MY array {()} +| MY parenthesized {List.map (fun e -> My e) (fst $2), snd $1} +| MY scalar {[My(fst $2)], snd $1} +| MY hash {[My(fst $2)], snd $1} +| MY array {[My(fst $2)], snd $1} termdo: /* Things called with "do" */ -| DO term %prec UNIOP {()} /* do $filename */ -| DO BRACKET lines BRACKET_END %prec PAREN {()} /* do { code */ +| DO term %prec UNIOP { die_rule "\"do EXPR\" not allowed" } /* do $filename */ +| DO BRACKET lines BRACKET_END %prec PREC_HIGH {sp_n($2); sp_p($4); Block(fst $3), snd $1} /* do { code */ question_mark_: -| QUESTION_MARK term {()} -| QUESTION_MARK BRACKET expr BRACKET_END {()} +| QUESTION_MARK term {sp_n($1); fst $2, snd $1} +| QUESTION_MARK BRACKET expr BRACKET_END {sp_n($1); sp_p($2); sp_p($4); Ref(I_hash, fst $3), snd $1} colon_: -| COLON term {()} -| COLON BRACKET expr BRACKET_END {()} +| COLON term {sp_n($1); fst $2, snd $1} +| COLON BRACKET expr BRACKET_END {sp_n($1); sp_p($2); sp_p($4); Ref(I_hash, fst $3), snd $1} bracket_subscript: -| BRACKET expr BRACKET_END {()} -| COMPACT_HASH_SUBSCRIPT {()} +| BRACKET expr BRACKET_END {sp_0($1); sp_0($3); only_one_in_List $2, snd $1} +| COMPACT_HASH_SUBSCRIPT {sp_0($1); to_String $1, snd $1} binop: -| ASSIGN {()} -| POWER {()} -| MULT {()} | DIVISION {()} | MODULO {()} | REPLICATE {()} -| PLUS {()} | MINUS {()} | CONCAT {()} -| BIT_SHIFT_LEFT {()} | BIT_SHIFT_RIGHT {()} -| COMPARE_OP {()} -| EQ_OP {()} -| BIT_AND {()} -| BIT_OR {()} | BIT_XOR {()} -| DOTDOT {()} | DOTDOTDOT {()} -| AND_TIGHT {()} -| OR_TIGHT {()} | XOR {()} -| PATTERN_MATCH {()} | PATTERN_MATCH_NOT {()} +| ASSIGN {"=", snd $1} +| POWER {"**", snd $1} +| MULT {"*", snd $1} | DIVISION {"/", snd $1} | MODULO {"%", snd $1} | REPLICATE {"x", snd $1} +| PLUS {"+", snd $1} | MINUS {"-", snd $1} | CONCAT {".", snd $1} +| BIT_SHIFT_LEFT {"<<", snd $1} | BIT_SHIFT_RIGHT {">>", snd $1} +| COMPARE_OP {fst $1, snd $1} +| EQ_OP {fst $1, snd $1} +| BIT_AND {"&", snd $1} +| BIT_OR {"|", snd $1} | BIT_XOR {"^", snd $1} +| DOTDOT {"..", snd $1} | DOTDOTDOT {"...", snd $1} +| AND_TIGHT {"&&", snd $1} +| OR_TIGHT {"||", snd $1} | XOR {"xor", snd $1} variable: -| scalar %prec PAREN {()} -| star %prec PAREN {()} -| hash %prec PAREN {()} -| array %prec PAREN {()} -| arraylen %prec PAREN {()} /* $#x, $#{ something } */ -| func %prec PAREN {()} /* &foo; */ +| scalar %prec PREC_HIGH {$1} +| star %prec PREC_HIGH {$1} +| hash %prec PREC_HIGH {$1} +| array %prec PREC_HIGH {$1} +| arraylen %prec PREC_HIGH {$1} /* $#x, $#{ something } */ +| func %prec PREC_HIGH {$1} /* &foo; */ word: -| bareword { fst $1 } -| RAW_IDENT { - match $1 with - | None, name, _ -> name - | Some s, name, _ -> s ^ "::" ^ name - } +| bareword { $1 } +| RAW_IDENT { to_Ident $1, snd $1 } + +comma: COMMA {$1} | RIGHT_ARROW {sp_p($1); $1} -comma: COMMA {()} | RIGHT_ARROW {()} +semi_colon: SEMI_COLON {sp_0($1); $1} word_or_scalar: -| word {()} -| scalar {()} +| word {$1} +| scalar {$1} +| word_paren {$1} bareword: -| NEW { "new", $1 } -| FORMAT { "format", $1 } -| BAREWORD { $1 } - -arraylen: ARRAYLEN_IDENT {()} | ARRAYLEN scalar {()} | ARRAYLEN BRACKET lines BRACKET_END {()} -scalar: SCALAR_IDENT {()} | DOLLAR scalar {()} | DOLLAR BRACKET lines BRACKET_END {()} | DOLLAR BRACKET BRACKET expr BRACKET_END BRACKET_END {()} -func: FUNC_IDENT {()} | AMPERSAND scalar {()} | AMPERSAND BRACKET lines BRACKET_END {()} -array: ARRAY_IDENT {()} | AT scalar {()} | AT BRACKET lines BRACKET_END {()} -hash: HASH_IDENT {()} | PERCENT scalar {()} | PERCENT BRACKET lines BRACKET_END {()} -star: STAR_IDENT {()} | STAR scalar {()} | STAR BRACKET lines BRACKET_END {()} - -expr_or_empty: {()} | expr {()} - +| NEW { Ident(None, "new", get_pos $1), snd $1 } +| FORMAT { Ident(None, "format", get_pos $1), snd $1 } +| BAREWORD { Ident(None, fst $1, get_pos $1), snd $1 } + +word_paren: +| BAREWORD_PAREN { Ident(None, fst $1, get_pos $1), snd $1 } +| RAW_IDENT_PAREN { to_Ident $1, snd $1 } + +arraylen: ARRAYLEN_IDENT {Deref(I_arraylen, to_Ident $1), snd $1} | ARRAYLEN scalar {Deref(I_arraylen, fst $2), snd $1} | ARRAYLEN BRACKET lines BRACKET_END {sp_0($2); Deref(I_arraylen, Block(fst $3)), snd $1} +scalar: SCALAR_IDENT {Deref(I_scalar , to_Ident $1), snd $1} | DOLLAR scalar {Deref(I_scalar , fst $2), snd $1} | DOLLAR BRACKET lines BRACKET_END {sp_0($2); Deref(I_scalar , Block(fst $3)), snd $1} | DOLLAR BRACKET BRACKET expr BRACKET_END BRACKET_END {sp_0($2); sp_0($3); sp_p($5); sp_0($6); Deref(I_scalar, Ref(I_hash, fst $4)), snd $1} +func: FUNC_IDENT {Deref(I_func , to_Ident $1), snd $1} | AMPERSAND scalar {Deref(I_func , fst $2), snd $1} | AMPERSAND BRACKET lines BRACKET_END {sp_0($2); Deref(I_func , Block(fst $3)), snd $1} +array: ARRAY_IDENT {Deref(I_array , to_Ident $1), snd $1} | AT scalar {Deref(I_array , fst $2), snd $1} | AT BRACKET lines BRACKET_END {sp_0($2); Deref(I_array , Block(fst $3)), snd $1} +hash: HASH_IDENT {Deref(I_hash , to_Ident $1), snd $1} | PERCENT scalar {Deref(I_hash , fst $2), snd $1} | PERCENT BRACKET lines BRACKET_END {sp_0($2); Deref(I_hash , Block(fst $3)), snd $1} +star: STAR_IDENT {Deref(I_star , to_Ident $1), snd $1} | STAR scalar {Deref(I_star , fst $2), snd $1} | STAR BRACKET lines BRACKET_END {sp_0($2); Deref(I_star , Block(fst $3)), snd $1} + +expr_or_empty: {Block [], (Space_none, bpos)} | expr {$1} diff --git a/perl_checker.src/parser_helper.ml b/perl_checker.src/parser_helper.ml new file mode 100644 index 0000000..cc91c83 --- /dev/null +++ b/perl_checker.src/parser_helper.ml @@ -0,0 +1,125 @@ +open Types +open Common + +let bpos = -1, -1 +let msg_with_pos (start, end_) msg = Info.pos2sfull_current start end_ ^ msg +let die_with_pos raw_pos msg = failwith (msg_with_pos raw_pos msg) +let warn raw_pos msg = prerr_endline (msg_with_pos raw_pos msg) + +let die_rule msg = die_with_pos (Parsing.symbol_start(), Parsing.symbol_end()) msg +let debug msg = if false then prerr_endline msg + +let raw_pos2pos(a, b) = !Info.current_file, a, b +let get_pos (_, (_, pos)) = raw_pos2pos pos + +let warn_too_many_space start = warn (start, start) "you should have only one space here" +let warn_no_space start = warn (start, start) "you should have a space here" +let warn_cr start = warn (start, start) "you should not have a carriage-return (\\n) here" +let warn_space start = warn (start, start) "you should not have a space here" + +let sp_0(_, (spaces, (start, _))) = + match spaces with + | Space_none -> () + | Space_0 -> () + | Space_1 + | Space_n -> warn_space start + | Space_cr -> warn_cr start + +let sp_0_or_cr(_, (spaces, (start, _))) = + match spaces with + | Space_none -> () + | Space_0 -> () + | Space_1 + | Space_n -> warn_space start + | Space_cr -> () + +let sp_1(_, (spaces, (start, _))) = + match spaces with + | Space_none -> () + | Space_0 -> warn_no_space start + | Space_1 -> () + | Space_n -> warn_too_many_space start + | Space_cr -> warn_cr start + +let sp_n(_, (spaces, (start, _))) = + match spaces with + | Space_none -> () + | Space_0 -> warn_no_space start + | Space_1 -> () + | Space_n -> () + | Space_cr -> warn_cr start + +let sp_p(_, (spaces, (start, _))) = + match spaces with + | Space_none -> () + | Space_0 -> warn_no_space start + | Space_1 -> () + | Space_n -> () + | Space_cr -> () + +let sp_cr(_, (spaces, (start, _))) = + match spaces with + | Space_none -> () + | Space_0 + | Space_1 + | Space_n -> warn (start, start) "you should have a carriage-return (\\n) here" + | Space_cr -> () + +let not_complex = function + | Call_op("?:", _) -> false + | _ -> true + +let string_of_Ident = function + | Ident(None, s, _) -> s + | Ident(Some fq, s, _) -> fq ^ "::" ^ s + | _ -> internal_error "string_of_Ident" + +let check_parenthesized_first_argexpr word (e, (_, (start, _)) as ex) = + let want_space = word.[0] = '-' in + match e with + | List[List[_]] :: l -> + if want_space then + if l = [] then sp_n(ex) else die_with_pos (start, start) "can't handle this nicely" + else + if l = [] then sp_0(ex) else die_with_pos (start, start) "you must not have a space here" + | _ -> sp_p(ex) + +let check_foreach (s, (_, pos)) = if s = "for" then warn pos "write \"foreach\" instead of \"for\"" +let check_for (s, (_, pos)) = if s = "foreach" then warn pos "write \"for\" instead of \"foreach\"" + +let check_no_paren f_name (e, (_, pos)) = + match e with + | List[List[List[e]]] when not_complex e -> warn pos (Printf.sprintf "''... %s (...)'' can be written ''... %s ...''" f_name f_name) + | _ -> () + +let to_Ident ((fq, name), (_, pos)) = Ident(fq, name, raw_pos2pos pos) +let to_String (s, (_, pos)) = String(s, raw_pos2pos pos) + +let rec only_one (l, (spaces, pos)) = + match l with + | [List l'] -> only_one (l', (spaces, pos)) + | [e] -> e + | [] -> die_with_pos pos "you must give one argument" + | _ -> die_with_pos pos "you must give only one argument" + +let only_one_in_List (e, both) = + match e with + | List l -> only_one(l, both) + | _ -> e + +let array_ident_to_hash_ident (e, (_, pos)) = + match e with + | Deref(I_array, e) -> Deref(I_hash, e) + | _ -> die_with_pos pos "internal error (array_ident_to_hash_ident)" + +let from_PATTERN ((s, opts), (_, pos)) = [ String(s, raw_pos2pos pos) ; String(opts, raw_pos2pos pos) ] +let from_PATTERN_SUBST ((s1, s2, opts), (_, pos)) = [ String(s1, raw_pos2pos pos) ; String(s2, raw_pos2pos pos) ; String(opts, raw_pos2pos pos) ] + +let to_List = function + | [e] -> e + | l -> List l + +let sub_declaration (name, proto) body = Sub_declaration(name, proto, body) + +let var_dollar_ = Deref(I_scalar, Ident(None, "_", raw_pos2pos bpos)) +let var_STDOUT = Deref(I_star, Ident(None, "STDOUT", raw_pos2pos bpos)) diff --git a/perl_checker.src/parser_helper.mli b/perl_checker.src/parser_helper.mli new file mode 100644 index 0000000..5d1c47b --- /dev/null +++ b/perl_checker.src/parser_helper.mli @@ -0,0 +1,42 @@ +val bpos : int * int +val msg_with_pos : int * int -> string -> string +val die_with_pos : int * int -> string -> 'a +val warn : int * int -> string -> unit +val die_rule : string -> 'a +val debug : string -> unit +val raw_pos2pos : 'a * 'b -> string * 'a * 'b +val get_pos : 'a * ('b * ('c * 'd)) -> string * 'c * 'd +val warn_too_many_space : int -> unit +val warn_no_space : int -> unit +val warn_cr : int -> unit +val warn_space : int -> unit +val sp_0 : 'a * (Types.spaces * (int * 'b)) -> unit +val sp_0_or_cr : 'a * (Types.spaces * (int * 'b)) -> unit +val sp_1 : 'a * (Types.spaces * (int * 'b)) -> unit +val sp_n : 'a * (Types.spaces * (int * 'b)) -> unit +val sp_p : 'a * (Types.spaces * (int * 'b)) -> unit +val sp_cr : 'a * (Types.spaces * (int * 'b)) -> unit +val not_complex : Types.fromparser -> bool +val string_of_Ident : Types.fromparser -> string +val check_parenthesized_first_argexpr : + string -> Types.fromparser list * (Types.spaces * (int * 'a)) -> unit +val check_foreach : string * ('a * (int * int)) -> unit +val check_for : string * ('a * (int * int)) -> unit +val check_no_paren : string -> Types.fromparser * ('a * (int * int)) -> unit +val to_Ident : + (string option * string) * ('a * (int * int)) -> Types.fromparser +val to_String : string * ('a * (int * int)) -> Types.fromparser +val only_one : Types.fromparser list * ('a * (int * int)) -> Types.fromparser +val only_one_in_List : + Types.fromparser * ('a * (int * int)) -> Types.fromparser +val array_ident_to_hash_ident : + Types.fromparser * ('a * (int * int)) -> Types.fromparser +val from_PATTERN : + (string * string) * ('a * (int * int)) -> Types.fromparser list +val from_PATTERN_SUBST : + (string * string * string) * ('a * (int * int)) -> Types.fromparser list +val to_List : Types.fromparser list -> Types.fromparser +val sub_declaration : + Types.fromparser * string -> Types.fromparser list -> Types.fromparser +val var_dollar_ : Types.fromparser +val var_STDOUT : Types.fromparser diff --git a/perl_checker.src/perl_checker.ml b/perl_checker.src/perl_checker.ml index fbb3f4d..79b3ac9 100644 --- a/perl_checker.src/perl_checker.ml +++ b/perl_checker.src/perl_checker.ml @@ -2,21 +2,21 @@ open Types let _ = let args = List.tl (Array.to_list Sys.argv) in + let args = if args = [] then ["/tmp/t.pl"] else args in List.iter (fun file -> try - let lexbuf = Lexing.from_channel (open_in file) in - let _ = - try - Info.start_a_new_file file ; - if false then - let t = Lexer.lexbuf2list Lexer.token lexbuf in - let _,_ = t, t in "" - else - Parser.prog Lexer.token lexbuf - with Failure s -> ( - prerr_endline s ; - exit 1 - ) in - () + let lexbuf = Lexing.from_channel (Unix.open_process_in (Printf.sprintf "expand \"%s\"" file)) in + try + Info.start_a_new_file file ; + if false then + let t = Lexer.lexbuf2list (Lexer.concat_bareword_paren (Lexer.concat_spaces Lexer.token)) lexbuf in + let _,_ = t, t in () + else + let t = Parser.prog (Lexer.concat_bareword_paren (Lexer.concat_spaces Lexer.token)) lexbuf in + let _,_ = t, t in () + with Failure s -> ( + prerr_endline s ; + exit 1 + ) with _ -> prerr_endline ("bad file " ^ file) ) args diff --git a/perl_checker.src/types.mli b/perl_checker.src/types.mli index 570da0f..5567eb9 100644 --- a/perl_checker.src/types.mli +++ b/perl_checker.src/types.mli @@ -1,16 +1,44 @@ exception TooMuchRParen +type raw_pos = int * int + type pos = string * int * int -type ident_type = I_scalar | I_hash | I_array | I_func | I_raw | I_star +type spaces = + | Space_0 + | Space_1 + | Space_n + | Space_cr + | Space_none -type fromparser = - | Ident of ident_type * string option * string * pos +type context = I_scalar | I_hash | I_array | I_func | I_raw | I_star | I_arraylen - | Num of float * pos +type fromparser = + | Ident of string option * string * pos + | Num of string * pos | String of string * pos - | Nil - | Binary of string * fromparser * fromparser + | Ref of context * fromparser + | Deref of context * fromparser + | Deref_with of context * fromparser * fromparser + + | Diamond of fromparser option + | Binop of string * fromparser * fromparser | If_then_else of string * (fromparser * fromparser) list * fromparser option + | List of fromparser list + | Block of fromparser list + + | Call of fromparser * fromparser list + | Call_op of string * fromparser list + | Method_call of fromparser * fromparser * fromparser list + + | Anonymous_sub of fromparser + | My of fromparser + | Local of fromparser + | Use of fromparser * fromparser list + | Sub_declaration of fromparser * string * fromparser list (* name, prototype, body *) + | Package of fromparser + | Label of string + + | Too_complex |