summaryrefslogtreecommitdiffstats
path: root/perl_checker.src
diff options
context:
space:
mode:
Diffstat (limited to 'perl_checker.src')
-rw-r--r--perl_checker.src/common.ml2
-rw-r--r--perl_checker.src/common.mli1
-rw-r--r--perl_checker.src/lexer.mll489
-rw-r--r--perl_checker.src/parser.mly66
-rw-r--r--perl_checker.src/parser_helper.ml39
-rw-r--r--perl_checker.src/parser_helper.mli27
-rw-r--r--perl_checker.src/perl_checker.ml9
-rw-r--r--perl_checker.src/types.mli3
8 files changed, 407 insertions, 229 deletions
diff --git a/perl_checker.src/common.ml b/perl_checker.src/common.ml
index 45e6ec1..c4600ff 100644
--- a/perl_checker.src/common.ml
+++ b/perl_checker.src/common.ml
@@ -186,6 +186,8 @@ let rec stack2list s =
Stack.iter (fun e -> l := e :: !l) s ;
!l
+let rec queue2list q = rev (Queue.fold (fun b a -> a :: b) [] q)
+
let rec fix_point f p =
let p' = f p in
if p = p' then p else fix_point f p'
diff --git a/perl_checker.src/common.mli b/perl_checker.src/common.mli
index ec9c8ce..5398092 100644
--- a/perl_checker.src/common.mli
+++ b/perl_checker.src/common.mli
@@ -42,6 +42,7 @@ val fold_right1 : ('a -> 'a -> 'a) -> 'a list -> 'a
val for_all2_ : ('a -> 'b -> bool) -> 'a list -> 'b list -> bool
val maxl : 'a list -> 'a
val stack2list : 'a Stack.t -> 'a list
+val queue2list : 'a Queue.t -> 'a list
val fix_point : ('a -> 'a) -> 'a -> 'a
val fix_point_withenv : ('a -> 'b -> 'b * 'a) -> 'a -> 'b -> 'b * 'a
val fix_point_ : int -> ('a -> 'a) -> 'a -> 'a * int
diff --git a/perl_checker.src/lexer.mll b/perl_checker.src/lexer.mll
index 07aa48a..3ebce72 100644
--- a/perl_checker.src/lexer.mll
+++ b/perl_checker.src/lexer.mll
@@ -4,31 +4,37 @@ open Types
open Lexing
open Info
+let bpos = -1,-1
+
type raw_token =
| EOF of raw_pos
| SPACE of int
| CR
| NUM of (string * raw_pos)
- | STRING of (string * raw_pos)
+ | RAW_STRING of (string * raw_pos)
+ | STRING of (raw_interpolated_string * raw_pos)
+ | PATTERN of (raw_interpolated_string * string * raw_pos)
+ | PATTERN_SUBST of (raw_interpolated_string * raw_interpolated_string * string * raw_pos)
| BAREWORD of (string * raw_pos)
+ | BAREWORD_PAREN of (string * raw_pos)
| REVISION of (string * raw_pos)
| COMMENT of (string * raw_pos)
| POD of (string * raw_pos)
| LABEL of (string * raw_pos)
- | COMMAND_STRING of (string * raw_pos)
+ | COMMAND_STRING of (raw_interpolated_string * raw_pos)
| PRINT_TO_STAR of (string * raw_pos)
| PRINT_TO_SCALAR of (string * raw_pos)
| QUOTEWORDS of (string * raw_pos)
| COMPACT_HASH_SUBSCRIPT of (string * raw_pos)
- | HERE_DOC of ((string * raw_pos) ref * raw_pos)
- | PATTERN of (string * string * raw_pos)
- | PATTERN_SUBST of (string * string * string * raw_pos)
+ | RAW_HERE_DOC of ((string * raw_pos) ref * raw_pos)
+ | HERE_DOC of (raw_interpolated_string * raw_pos) ref * raw_pos
| SCALAR_IDENT of (string option * string * raw_pos)
| ARRAY_IDENT of (string option * string * raw_pos)
| HASH_IDENT of (string option * string * raw_pos)
| FUNC_IDENT of (string option * string * raw_pos)
| STAR_IDENT of (string option * string * raw_pos)
| RAW_IDENT of (string option * string * raw_pos)
+ | RAW_IDENT_PAREN of (string option * string * raw_pos)
| ARRAYLEN_IDENT of (string option * string * raw_pos)
| FUNC_DECL_WITH_PROTO of (string * string * raw_pos)
@@ -43,150 +49,152 @@ type raw_token =
| BIT_AND of raw_pos | BIT_OR of raw_pos | BIT_XOR of raw_pos | AND_TIGHT of raw_pos | OR_TIGHT of raw_pos | DOTDOT of (string * raw_pos)
| QUESTION_MARK of raw_pos | COLON of raw_pos | ASSIGN of (string * raw_pos) | COMMA of raw_pos | RIGHT_ARROW of raw_pos | NOT of raw_pos | AND of raw_pos | OR of raw_pos | XOR of raw_pos
-let saved_token = ref None
-
-let concat_bareword_paren get_token lexbuf =
- let token = match !saved_token with
- | None -> get_token lexbuf
- | Some t -> t
- in
- let token, next =
- match token with
- | Parser.PRINT(s, both)
- | Parser.BAREWORD(s, both) ->
- let next_token = get_token lexbuf in
- (match next_token with Parser.PAREN(_, (Space_0, _)) -> Parser.BAREWORD_PAREN(s, both) | _ -> token), Some next_token
- | Parser.RAW_IDENT(ident, both) ->
- let next_token = get_token lexbuf in
- (match next_token with Parser.PAREN(_, (Space_0, _)) -> Parser.RAW_IDENT_PAREN(ident, both) | _ -> token), Some next_token
- | _ -> token, None
- in
- saved_token := next ; token
-
-let rec concat_spaces get_token lexbuf =
- let rec get_spaces spaces lexbuf =
- match get_token lexbuf with
- | CR -> get_spaces Space_cr lexbuf
- | SPACE n ->
- let spaces' =
- match spaces with
- | Space_cr -> Space_cr
- | Space_0 -> if n = 1 then Space_1 else Space_n
- | _ -> Space_n
- in
- get_spaces spaces' lexbuf
- | token -> token, spaces
- in
- let token, spaces = get_spaces Space_0 lexbuf in
- match token with
- | NUM(s, pos) -> Parser.NUM(s, (spaces, pos))
- | STRING(s, pos) -> Parser.STRING(s, (spaces, pos))
- | BAREWORD(s, pos) -> Parser.BAREWORD(s, (spaces, pos))
- | REVISION(s, pos) -> Parser.REVISION(s, (spaces, pos))
- | COMMENT(s, pos) -> Parser.COMMENT(s, (spaces, pos))
- | POD(s, pos) -> Parser.POD(s, (spaces, pos))
- | LABEL(s, pos) -> Parser.LABEL(s, (spaces, pos))
- | COMMAND_STRING(s, pos) -> Parser.COMMAND_STRING(s, (spaces, pos))
- | PRINT(s, pos) -> Parser.PRINT(s, (spaces, pos))
- | PRINT_TO_STAR(s, pos) -> Parser.PRINT_TO_STAR(s, (spaces, pos))
- | PRINT_TO_SCALAR(s, pos) -> Parser.PRINT_TO_SCALAR(s, (spaces, pos))
- | QUOTEWORDS(s, pos) -> Parser.QUOTEWORDS(s, (spaces, pos))
- | COMPACT_HASH_SUBSCRIPT(s, pos) -> Parser.COMPACT_HASH_SUBSCRIPT(s, (spaces, pos))
- | HERE_DOC(r, pos) -> Parser.HERE_DOC(r, (spaces, pos))
- | PATTERN(s, opts, pos) -> Parser.PATTERN((s, opts), (spaces, pos))
- | PATTERN_SUBST(from, to_, opts, pos) -> Parser.PATTERN_SUBST((from, to_, opts), (spaces, pos))
- | SCALAR_IDENT(kind, name, pos) -> Parser.SCALAR_IDENT((kind, name), (spaces, pos))
- | ARRAY_IDENT(kind, name, pos) -> Parser.ARRAY_IDENT((kind, name), (spaces, pos))
- | HASH_IDENT(kind, name, pos) -> Parser.HASH_IDENT((kind, name), (spaces, pos))
- | FUNC_IDENT(kind, name, pos) -> Parser.FUNC_IDENT((kind, name), (spaces, pos))
- | STAR_IDENT(kind, name, pos) -> Parser.STAR_IDENT((kind, name), (spaces, pos))
- | RAW_IDENT(kind, name, pos) -> Parser.RAW_IDENT((kind, name), (spaces, pos))
- | ARRAYLEN_IDENT(kind, name, pos) -> Parser.ARRAYLEN_IDENT((kind, name), (spaces, pos))
- | FUNC_DECL_WITH_PROTO(name, proto, pos) -> Parser.FUNC_DECL_WITH_PROTO((name, proto), (spaces, pos))
-
- | NEW(pos) -> Parser.NEW((), (spaces, pos))
- | FORMAT(pos) -> Parser.FORMAT((), (spaces, pos))
- | COMPARE_OP(s, pos) -> Parser.COMPARE_OP(s, (spaces, pos))
- | EQ_OP(s, pos) -> Parser.EQ_OP(s, (spaces, pos))
- | ASSIGN(s, pos) -> Parser.ASSIGN(s, (spaces, pos))
- | FOR(s, pos) -> Parser.FOR(s, (spaces, pos))
-
- | DOTDOT(s, pos) -> Parser.DOTDOT(s, (spaces, pos))
- | MULT(s, pos) -> Parser.MULT(s, (spaces, pos))
- | BIT_SHIFT(s, pos) -> Parser.BIT_SHIFT(s, (spaces, pos))
- | PLUS(s, pos) -> Parser.PLUS(s, (spaces, pos))
- | ONE_SCALAR_PARA(s, pos) -> Parser.ONE_SCALAR_PARA(s, (spaces, pos))
-
- | EOF (pos) -> Parser.EOF ((), (spaces, pos))
- | IF (pos) -> Parser.IF ((), (spaces, pos))
- | ELSIF (pos) -> Parser.ELSIF ((), (spaces, pos))
- | ELSE (pos) -> Parser.ELSE ((), (spaces, pos))
- | UNLESS (pos) -> Parser.UNLESS ((), (spaces, pos))
- | DO (pos) -> Parser.DO ((), (spaces, pos))
- | WHILE (pos) -> Parser.WHILE ((), (spaces, pos))
- | UNTIL (pos) -> Parser.UNTIL ((), (spaces, pos))
- | MY (pos) -> Parser.MY ((), (spaces, pos))
- | CONTINUE (pos) -> Parser.CONTINUE ((), (spaces, pos))
- | SUB (pos) -> Parser.SUB ((), (spaces, pos))
- | LOCAL (pos) -> Parser.LOCAL ((), (spaces, pos))
- | USE (pos) -> Parser.USE ((), (spaces, pos))
- | PACKAGE (pos) -> Parser.PACKAGE ((), (spaces, pos))
- | BEGIN (pos) -> Parser.BEGIN ((), (spaces, pos))
- | END (pos) -> Parser.END ((), (spaces, pos))
- | AT (pos) -> Parser.AT ((), (spaces, pos))
- | DOLLAR (pos) -> Parser.DOLLAR ((), (spaces, pos))
- | PERCENT (pos) -> Parser.PERCENT ((), (spaces, pos))
- | AMPERSAND (pos) -> Parser.AMPERSAND ((), (spaces, pos))
- | STAR (pos) -> Parser.STAR ((), (spaces, pos))
- | ARRAYLEN (pos) -> Parser.ARRAYLEN ((), (spaces, pos))
- | SEMI_COLON (pos) -> Parser.SEMI_COLON ((), (spaces, pos))
- | PKG_SCOPE (pos) -> Parser.PKG_SCOPE ((), (spaces, pos))
- | PAREN (pos) -> Parser.PAREN ((), (spaces, pos))
- | PAREN_END (pos) -> Parser.PAREN_END ((), (spaces, pos))
- | BRACKET (pos) -> Parser.BRACKET ((), (spaces, pos))
- | BRACKET_END (pos) -> Parser.BRACKET_END ((), (spaces, pos))
- | BRACKET_HASHREF (pos) -> Parser.BRACKET_HASHREF ((), (spaces, pos))
- | ARRAYREF (pos) -> Parser.ARRAYREF ((), (spaces, pos))
- | ARRAYREF_END (pos) -> Parser.ARRAYREF_END ((), (spaces, pos))
- | ARROW (pos) -> Parser.ARROW ((), (spaces, pos))
- | INCR (pos) -> Parser.INCR ((), (spaces, pos))
- | DECR (pos) -> Parser.DECR ((), (spaces, pos))
- | POWER (pos) -> Parser.POWER ((), (spaces, pos))
- | TIGHT_NOT (pos) -> Parser.TIGHT_NOT ((), (spaces, pos))
- | BIT_NEG (pos) -> Parser.BIT_NEG ((), (spaces, pos))
- | REF (pos) -> Parser.REF ((), (spaces, pos))
- | PATTERN_MATCH (pos) -> Parser.PATTERN_MATCH ((), (spaces, pos))
- | PATTERN_MATCH_NOT(pos) -> Parser.PATTERN_MATCH_NOT((), (spaces, pos))
- | LT (pos) -> Parser.LT ((), (spaces, pos))
- | GT (pos) -> Parser.GT ((), (spaces, pos))
- | BIT_AND (pos) -> Parser.BIT_AND ((), (spaces, pos))
- | BIT_OR (pos) -> Parser.BIT_OR ((), (spaces, pos))
- | BIT_XOR (pos) -> Parser.BIT_XOR ((), (spaces, pos))
- | AND_TIGHT (pos) -> Parser.AND_TIGHT ((), (spaces, pos))
- | OR_TIGHT (pos) -> Parser.OR_TIGHT ((), (spaces, pos))
- | QUESTION_MARK (pos) -> Parser.QUESTION_MARK ((), (spaces, pos))
- | COLON (pos) -> Parser.COLON ((), (spaces, pos))
- | COMMA (pos) -> Parser.COMMA ((), (spaces, pos))
- | RIGHT_ARROW (pos) -> Parser.RIGHT_ARROW ((), (spaces, pos))
- | NOT (pos) -> Parser.NOT ((), (spaces, pos))
- | AND (pos) -> Parser.AND ((), (spaces, pos))
- | OR (pos) -> Parser.OR ((), (spaces, pos))
- | XOR (pos) -> Parser.XOR ((), (spaces, pos))
+and raw_interpolated_string = (string * raw_token list) list
+
+let rec concat_bareword_paren = function
+ | PRINT(s, pos1) :: PAREN(pos2) :: l
+ | BAREWORD(s, pos1) :: PAREN(pos2) :: l ->
+ BAREWORD_PAREN(s, pos1) :: PAREN(pos2) :: concat_bareword_paren l
+ | RAW_IDENT(kind, ident, pos1) :: PAREN(pos2) :: l ->
+ RAW_IDENT_PAREN(kind, ident, pos1) :: PAREN(pos2) :: concat_bareword_paren l
+ | [] -> []
+ | e :: l -> e :: concat_bareword_paren l
+
+let rec raw_token_to_pos_and_token spaces = function
+ | NUM(s, pos) -> pos, Parser.NUM(s, (spaces, pos))
+ | RAW_STRING(s, pos) -> pos, Parser.RAW_STRING(s, (spaces, pos))
+ | RAW_HERE_DOC(r, pos) -> pos, Parser.RAW_HERE_DOC(!r, (spaces, pos))
+ | STRING(l, pos) -> pos, Parser.STRING(raw_interpolated_string_to_tokens l, (spaces, pos))
+ | COMMAND_STRING(l, pos) -> pos, Parser.COMMAND_STRING(raw_interpolated_string_to_tokens l, (spaces, pos))
+ | PATTERN(s, opts, pos) -> pos, Parser.PATTERN((raw_interpolated_string_to_tokens s, opts), (spaces, pos))
+ | PATTERN_SUBST(from, to_, opts, pos) -> pos, Parser.PATTERN_SUBST((raw_interpolated_string_to_tokens from, raw_interpolated_string_to_tokens to_, opts), (spaces, pos))
+ | HERE_DOC(l, pos) -> pos, Parser.HERE_DOC((raw_interpolated_string_to_tokens (fst !l), snd !l), (spaces, pos))
+ | BAREWORD(s, pos) -> pos, Parser.BAREWORD(s, (spaces, pos))
+ | BAREWORD_PAREN(s, pos) -> pos, Parser.BAREWORD_PAREN(s, (spaces, pos))
+ | REVISION(s, pos) -> pos, Parser.REVISION(s, (spaces, pos))
+ | COMMENT(s, pos) -> pos, Parser.COMMENT(s, (spaces, pos))
+ | POD(s, pos) -> pos, Parser.POD(s, (spaces, pos))
+ | LABEL(s, pos) -> pos, Parser.LABEL(s, (spaces, pos))
+ | PRINT(s, pos) -> pos, Parser.PRINT(s, (spaces, pos))
+ | PRINT_TO_STAR(s, pos) -> pos, Parser.PRINT_TO_STAR(s, (spaces, pos))
+ | PRINT_TO_SCALAR(s, pos) -> pos, Parser.PRINT_TO_SCALAR(s, (spaces, pos))
+ | QUOTEWORDS(s, pos) -> pos, Parser.QUOTEWORDS(s, (spaces, pos))
+ | COMPACT_HASH_SUBSCRIPT(s, pos) -> pos, Parser.COMPACT_HASH_SUBSCRIPT(s, (spaces, pos))
+ | SCALAR_IDENT(kind, name, pos) -> pos, Parser.SCALAR_IDENT((kind, name), (spaces, pos))
+ | ARRAY_IDENT(kind, name, pos) -> pos, Parser.ARRAY_IDENT((kind, name), (spaces, pos))
+ | HASH_IDENT(kind, name, pos) -> pos, Parser.HASH_IDENT((kind, name), (spaces, pos))
+ | FUNC_IDENT(kind, name, pos) -> pos, Parser.FUNC_IDENT((kind, name), (spaces, pos))
+ | STAR_IDENT(kind, name, pos) -> pos, Parser.STAR_IDENT((kind, name), (spaces, pos))
+ | RAW_IDENT(kind, name, pos) -> pos, Parser.RAW_IDENT((kind, name), (spaces, pos))
+ | RAW_IDENT_PAREN(kind, name, pos) -> pos, Parser.RAW_IDENT_PAREN((kind, name), (spaces, pos))
+ | ARRAYLEN_IDENT(kind, name, pos) -> pos, Parser.ARRAYLEN_IDENT((kind, name), (spaces, pos))
+ | FUNC_DECL_WITH_PROTO(name, proto, pos) -> pos, Parser.FUNC_DECL_WITH_PROTO((name, proto), (spaces, pos))
+
+ | NEW(pos) -> pos, Parser.NEW((), (spaces, pos))
+ | FORMAT(pos) -> pos, Parser.FORMAT((), (spaces, pos))
+ | COMPARE_OP(s, pos) -> pos, Parser.COMPARE_OP(s, (spaces, pos))
+ | EQ_OP(s, pos) -> pos, Parser.EQ_OP(s, (spaces, pos))
+ | ASSIGN(s, pos) -> pos, Parser.ASSIGN(s, (spaces, pos))
+ | FOR(s, pos) -> pos, Parser.FOR(s, (spaces, pos))
+
+ | DOTDOT(s, pos) -> pos, Parser.DOTDOT(s, (spaces, pos))
+ | MULT(s, pos) -> pos, Parser.MULT(s, (spaces, pos))
+ | BIT_SHIFT(s, pos) -> pos, Parser.BIT_SHIFT(s, (spaces, pos))
+ | PLUS(s, pos) -> pos, Parser.PLUS(s, (spaces, pos))
+ | ONE_SCALAR_PARA(s, pos) -> pos, Parser.ONE_SCALAR_PARA(s, (spaces, pos))
+
+ | EOF (pos) -> pos, Parser.EOF ((), (spaces, pos))
+ | IF (pos) -> pos, Parser.IF ((), (spaces, pos))
+ | ELSIF (pos) -> pos, Parser.ELSIF ((), (spaces, pos))
+ | ELSE (pos) -> pos, Parser.ELSE ((), (spaces, pos))
+ | UNLESS (pos) -> pos, Parser.UNLESS ((), (spaces, pos))
+ | DO (pos) -> pos, Parser.DO ((), (spaces, pos))
+ | WHILE (pos) -> pos, Parser.WHILE ((), (spaces, pos))
+ | UNTIL (pos) -> pos, Parser.UNTIL ((), (spaces, pos))
+ | MY (pos) -> pos, Parser.MY ((), (spaces, pos))
+ | CONTINUE (pos) -> pos, Parser.CONTINUE ((), (spaces, pos))
+ | SUB (pos) -> pos, Parser.SUB ((), (spaces, pos))
+ | LOCAL (pos) -> pos, Parser.LOCAL ((), (spaces, pos))
+ | USE (pos) -> pos, Parser.USE ((), (spaces, pos))
+ | PACKAGE (pos) -> pos, Parser.PACKAGE ((), (spaces, pos))
+ | BEGIN (pos) -> pos, Parser.BEGIN ((), (spaces, pos))
+ | END (pos) -> pos, Parser.END ((), (spaces, pos))
+ | AT (pos) -> pos, Parser.AT ((), (spaces, pos))
+ | DOLLAR (pos) -> pos, Parser.DOLLAR ((), (spaces, pos))
+ | PERCENT (pos) -> pos, Parser.PERCENT ((), (spaces, pos))
+ | AMPERSAND (pos) -> pos, Parser.AMPERSAND ((), (spaces, pos))
+ | STAR (pos) -> pos, Parser.STAR ((), (spaces, pos))
+ | ARRAYLEN (pos) -> pos, Parser.ARRAYLEN ((), (spaces, pos))
+ | SEMI_COLON (pos) -> pos, Parser.SEMI_COLON ((), (spaces, pos))
+ | PKG_SCOPE (pos) -> pos, Parser.PKG_SCOPE ((), (spaces, pos))
+ | PAREN (pos) -> pos, Parser.PAREN ((), (spaces, pos))
+ | PAREN_END (pos) -> pos, Parser.PAREN_END ((), (spaces, pos))
+ | BRACKET (pos) -> pos, Parser.BRACKET ((), (spaces, pos))
+ | BRACKET_END (pos) -> pos, Parser.BRACKET_END ((), (spaces, pos))
+ | BRACKET_HASHREF (pos) -> pos, Parser.BRACKET_HASHREF ((), (spaces, pos))
+ | ARRAYREF (pos) -> pos, Parser.ARRAYREF ((), (spaces, pos))
+ | ARRAYREF_END (pos) -> pos, Parser.ARRAYREF_END ((), (spaces, pos))
+ | ARROW (pos) -> pos, Parser.ARROW ((), (spaces, pos))
+ | INCR (pos) -> pos, Parser.INCR ((), (spaces, pos))
+ | DECR (pos) -> pos, Parser.DECR ((), (spaces, pos))
+ | POWER (pos) -> pos, Parser.POWER ((), (spaces, pos))
+ | TIGHT_NOT (pos) -> pos, Parser.TIGHT_NOT ((), (spaces, pos))
+ | BIT_NEG (pos) -> pos, Parser.BIT_NEG ((), (spaces, pos))
+ | REF (pos) -> pos, Parser.REF ((), (spaces, pos))
+ | PATTERN_MATCH (pos) -> pos, Parser.PATTERN_MATCH ((), (spaces, pos))
+ | PATTERN_MATCH_NOT(pos) -> pos, Parser.PATTERN_MATCH_NOT((), (spaces, pos))
+ | LT (pos) -> pos, Parser.LT ((), (spaces, pos))
+ | GT (pos) -> pos, Parser.GT ((), (spaces, pos))
+ | BIT_AND (pos) -> pos, Parser.BIT_AND ((), (spaces, pos))
+ | BIT_OR (pos) -> pos, Parser.BIT_OR ((), (spaces, pos))
+ | BIT_XOR (pos) -> pos, Parser.BIT_XOR ((), (spaces, pos))
+ | AND_TIGHT (pos) -> pos, Parser.AND_TIGHT ((), (spaces, pos))
+ | OR_TIGHT (pos) -> pos, Parser.OR_TIGHT ((), (spaces, pos))
+ | QUESTION_MARK (pos) -> pos, Parser.QUESTION_MARK ((), (spaces, pos))
+ | COLON (pos) -> pos, Parser.COLON ((), (spaces, pos))
+ | COMMA (pos) -> pos, Parser.COMMA ((), (spaces, pos))
+ | RIGHT_ARROW (pos) -> pos, Parser.RIGHT_ARROW ((), (spaces, pos))
+ | NOT (pos) -> pos, Parser.NOT ((), (spaces, pos))
+ | AND (pos) -> pos, Parser.AND ((), (spaces, pos))
+ | OR (pos) -> pos, Parser.OR ((), (spaces, pos))
+ | XOR (pos) -> pos, Parser.XOR ((), (spaces, pos))
| SPACE _ | CR -> internal_error "raw_token_to_token"
-let rec lexbuf2list t lexbuf =
- let rec f () =
- match t lexbuf with
- | Parser.EOF _ -> []
- | e -> e :: f()
- in
- let l = f() in
- l
+and raw_token_to_token spaces raw_token =
+ let _, token = raw_token_to_pos_and_token spaces raw_token in
+ token
+
+and raw_interpolated_string_to_tokens l =
+ List.map (fun (s, rtok) -> s, concat_spaces Space_0 rtok) l
+
+and concat_spaces spaces = function
+ | CR :: l -> concat_spaces Space_cr l
+ | SPACE n :: l ->
+ let spaces' =
+ match spaces with
+ | Space_cr -> Space_cr
+ | Space_0 -> if n = 1 then Space_1 else Space_n
+ | _ -> Space_n
+ in
+ concat_spaces spaces' l
+ | [] -> []
+ | token :: l -> raw_token_to_pos_and_token spaces token :: concat_spaces Space_0 l
+
+let rec lexbuf2list accu t lexbuf =
+ match t lexbuf with
+ | EOF pos -> List.rev (EOF pos :: accu)
+ | e -> lexbuf2list (e :: accu) t lexbuf
+
+let get_token token lexbuf =
+ let tokens = lexbuf2list [] token lexbuf in
+ let tokens = concat_bareword_paren tokens in
+ let tokens = concat_spaces Space_0 tokens in
+ tokens
+
+let next_rule = Stack.create()
-let next_rule = ref None
-let bpos = -1,-1
let pos lexbuf = lexeme_start lexbuf, lexeme_end lexbuf
let pos2sfull_with start end_ = Info.pos2sfull (!current_file, start, end_)
let pos2sfull lexbuf = pos2sfull_with (lexeme_start lexbuf) (lexeme_end lexbuf)
@@ -197,37 +205,72 @@ let add_a_new_line raw_pos =
lpush current_file_lines_starts raw_pos
let here_docs = Queue.create()
+let raw_here_docs = Queue.create()
let current_here_doc_mark = ref ""
-let here_doc_next_line mark interpolate =
+let here_doc_next_line mark =
+ let here_doc_ref = ref([], bpos) in
+ Queue.push (mark, here_doc_ref) here_docs ;
+ here_doc_ref
+let raw_here_doc_next_line mark =
let here_doc_ref = ref("", bpos) in
- Queue.push (interpolate, mark, here_doc_ref) here_docs ;
+ Queue.push (mark, here_doc_ref) raw_here_docs ;
here_doc_ref
let delimit_char = ref '/'
let not_ok_for_match = ref (-1)
let string_nestness = ref 0
-let building_current_string = ref ""
+let building_current_interpolated_string = Stack.create()
+let building_current_string = Stack.create()
let current_string_start_pos = ref 0
let current_string_start_line = ref 0
-let die lexbuf err = failwith (pos2sfull_with !current_string_start_pos (lexeme_end lexbuf) ^ err)
+let warn lexbuf err = prerr_endline (pos2sfull_with (lexeme_start lexbuf) (lexeme_end lexbuf) ^ err)
+let die lexbuf err = failwith (pos2sfull_with (lexeme_start lexbuf) (lexeme_end lexbuf) ^ err)
+let die_in_string lexbuf err = failwith (pos2sfull_with !current_string_start_pos (lexeme_end lexbuf) ^ err)
+
+let raw_ins t lexbuf =
+ Stack.push (ref "") building_current_string;
+ current_string_start_pos := lexeme_start lexbuf;
+ t lexbuf ;
+ !(Stack.pop building_current_string), (!current_string_start_pos, lexeme_end lexbuf)
+let raw_ins_to_string t lexbuf =
+ let s, pos = raw_ins t lexbuf in
+ not_ok_for_match := lexeme_end lexbuf;
+ RAW_STRING(s, pos)
+
+let next_interpolated toks =
+ let r = Stack.top building_current_string in
+ Queue.push (!r, toks) (Stack.top building_current_interpolated_string) ;
+ r := ""
-let ins t lexbuf =
- building_current_string := "";
+let ins t lexbuf =
+ Stack.push (Queue.create()) building_current_interpolated_string ;
+ Stack.push (ref "") building_current_string;
current_string_start_pos := lexeme_start lexbuf;
t lexbuf ;
- !building_current_string, (!current_string_start_pos, lexeme_end lexbuf)
+ next_interpolated [] ;
+ let _ = Stack.pop building_current_string in
+ queue2list (Stack.pop building_current_interpolated_string), (!current_string_start_pos, lexeme_end lexbuf)
let ins_to_string t lexbuf =
let s, pos = ins t lexbuf in
not_ok_for_match := lexeme_end lexbuf;
STRING(s, pos)
let next_s s t lexbuf =
- building_current_string := !building_current_string ^ s ;
+ let r = Stack.top building_current_string in r := !r ^ s ;
t lexbuf
let next t lexbuf = next_s (lexeme lexbuf) t lexbuf
+let string_interpolate token pre lexbuf =
+ let s = lexeme lexbuf in
+ let local_lexbuf = Lexing.from_string (pre ^ s ^ " ") in (* add a space to help tokenizing "xxx$$" *)
+ local_lexbuf.lex_abs_pos <- lexeme_start lexbuf ;
+ let l = lexbuf2list [] token local_lexbuf in
+ let l = concat_bareword_paren l in
+ next_interpolated l;
+ (Stack.pop next_rule) lexbuf
+
let ident_type_from_char fq name lexbuf c =
not_ok_for_match := lexeme_end lexbuf;
match c with
@@ -292,9 +335,14 @@ rule token = parse
| '\n' {
add_a_new_line(lexeme_end lexbuf);
(try
- let (interpolate, mark, r) = Queue.pop here_docs in
+ let (mark, r) = Queue.pop here_docs in
+ current_here_doc_mark := mark ;
+ r := ins here_doc lexbuf
+ with Queue.Empty ->
+ try
+ let (mark, r) = Queue.pop raw_here_docs in
current_here_doc_mark := mark ;
- r := ins (if interpolate then here_doc else raw_here_doc) lexbuf
+ r := raw_ins raw_here_doc lexbuf
with Queue.Empty -> ());
CR
}
@@ -357,7 +405,7 @@ rule token = parse
| "END" { END(pos lexbuf) }
| "print" { PRINT(lexeme lexbuf, pos lexbuf) }
| "new" { NEW(pos lexbuf) }
-| "format" { let _ = here_doc_next_line "." false in FORMAT(pos lexbuf) }
+| "format" { let _ = raw_here_doc_next_line "." in FORMAT(pos lexbuf) }
| "defined" { ONE_SCALAR_PARA(lexeme lexbuf, pos lexbuf) }
| "split"
@@ -408,7 +456,7 @@ rule token = parse
delimit_char := '/' ;
current_string_start_line := !current_file_current_line;
let s, pos = ins delimited_string lexbuf in
- let opts, _ = ins pattern_options lexbuf in
+ let opts, _ = raw_ins pattern_options lexbuf in
check_multi_line_delimited_string (Some opts) pos ;
PATTERN(s, opts, pos)
)
@@ -420,7 +468,7 @@ rule token = parse
putback lexbuf 1 ;
delimit_char := '/' ;
let s, pos = ins delimited_string lexbuf in
- let opts, _ = ins pattern_options lexbuf in
+ let opts, _ = raw_ins pattern_options lexbuf in
PATTERN(s, opts, pos)
)
}
@@ -429,7 +477,7 @@ rule token = parse
delimit_char := lexeme_char lexbuf 1 ;
current_string_start_line := !current_file_current_line;
let s, pos = ins delimited_string lexbuf in
- let opts, _ = ins pattern_options lexbuf in
+ let opts, _ = raw_ins pattern_options lexbuf in
check_multi_line_delimited_string (Some opts) pos ;
PATTERN(s, opts, pos)
}
@@ -438,7 +486,7 @@ rule token = parse
delimit_char := lexeme_char lexbuf 2 ;
current_string_start_line := !current_file_current_line;
let s, pos = ins delimited_string lexbuf in
- let opts, _ = ins pattern_options lexbuf in
+ let opts, _ = raw_ins pattern_options lexbuf in
check_multi_line_delimited_string (Some opts) pos ;
PATTERN(s, opts, pos)
}
@@ -448,7 +496,7 @@ rule token = parse
current_string_start_line := !current_file_current_line;
let s1, (start, _) = ins delimited_string lexbuf in
let s2, (_, end_) = ins delimited_string lexbuf in
- let opts, _ = ins pattern_options lexbuf in
+ let opts, _ = raw_ins pattern_options lexbuf in
let pos = start, end_ in
check_multi_line_delimited_string (Some opts) pos ;
PATTERN_SUBST(s1, s2, opts, pos)
@@ -459,7 +507,7 @@ rule token = parse
current_string_start_line := !current_file_current_line;
let s1, (start, _) = ins delimited_string lexbuf in
let s2, (_, end_) = ins delimited_string lexbuf in
- let opts, _ = ins pattern_options lexbuf in
+ let opts, _ = raw_ins pattern_options lexbuf in
let pos = start, end_ in
check_multi_line_delimited_string None pos ;
PATTERN_SUBST(s1, s2, opts, pos)
@@ -467,11 +515,11 @@ rule token = parse
| "<<" ident {
not_ok_for_match := lexeme_end lexbuf;
- HERE_DOC(here_doc_next_line (skip_n_char 2 (lexeme lexbuf)) true, pos lexbuf)
+ HERE_DOC(here_doc_next_line (skip_n_char 2 (lexeme lexbuf)), pos lexbuf)
}
| "<<'" ident "'" {
not_ok_for_match := lexeme_end lexbuf;
- HERE_DOC(here_doc_next_line (skip_n_char_ 3 1 (lexeme lexbuf)) false, pos lexbuf)
+ RAW_HERE_DOC(raw_here_doc_next_line (skip_n_char_ 3 1 (lexeme lexbuf)), pos lexbuf)
}
| "<<" ' '+ "'"
| "<<" ' '+ ident {
@@ -543,32 +591,37 @@ rule token = parse
COMMAND_STRING(s, pos) }
| "q(" { ins_to_string qstring lexbuf }
| "qq(" { ins_to_string qqstring lexbuf }
-| "qw(" { let s, pos = ins qstring lexbuf in QUOTEWORDS(s, pos) }
+| "qw(" { let s, pos = raw_ins qstring lexbuf in QUOTEWORDS(s, pos) }
| eof { EOF(pos lexbuf) }
| _ { failwith (Printf.sprintf "%serror tokenizing <<%s>>" (pos2sfull lexbuf) (lexeme lexbuf)) }
and string = parse
- '"' { () }
-| '\\' { next_rule := Some string ; string_escape lexbuf }
+| '"' { () }
+| '\\' { Stack.push string next_rule ; string_escape lexbuf }
+| '$' { Stack.push string next_rule ; string_interpolate_scalar lexbuf }
+| '@' { Stack.push string next_rule ; string_interpolate_array lexbuf }
| '\n' {
add_a_new_line(lexeme_end lexbuf);
next string lexbuf
}
-| [^ '\n' '\\' '"']+ { next string lexbuf }
-| eof { die lexbuf "Unterminated_string" }
+
+| [^ '\n' '\\' '"' '$' '@']+ { next string lexbuf }
+| eof { die_in_string lexbuf "Unterminated_string" }
and delimited_string = parse
-| '\\' { next_rule := Some delimited_string ; string_escape lexbuf }
+| '\\' { Stack.push delimited_string next_rule ; string_escape lexbuf }
+| '$' { Stack.push delimited_string next_rule ; delimited_string_interpolate_scalar lexbuf }
+| '@' { Stack.push delimited_string next_rule ; string_interpolate_array lexbuf }
| '\n' {
add_a_new_line(lexeme_end lexbuf);
next delimited_string lexbuf
}
-| eof { die lexbuf "Unterminated_delimited_string" }
-| [ ^ '\\' '\n' ] { if lexeme_char lexbuf 0 <> !delimit_char then next delimited_string lexbuf }
+| eof { die_in_string lexbuf "Unterminated_delimited_string" }
+| [ ^ '\n' '\\' '$' '@'] { if lexeme_char lexbuf 0 <> !delimit_char then next delimited_string lexbuf }
and rawstring = parse
- ''' { () }
+| ''' { () }
| '\n' {
add_a_new_line(lexeme_end lexbuf);
next rawstring lexbuf
@@ -576,23 +629,25 @@ and rawstring = parse
| '\\' { next rawstring lexbuf }
| "\\'" { next_s "'" rawstring lexbuf }
| [^ '\n' ''' '\\']+ { next rawstring lexbuf }
-| eof { die lexbuf "Unterminated_rawstring" }
+| eof { die_in_string lexbuf "Unterminated_rawstring" }
and qqstring = parse
- ')' {
+| ')' {
if !string_nestness <> 0 then (decr string_nestness; next qqstring lexbuf)
}
| '(' {
incr string_nestness;
next qqstring lexbuf
}
-| '\\' { next_rule := Some qqstring ; string_escape lexbuf }
+| '\\' { Stack.push qqstring next_rule ; string_escape lexbuf }
+| '$' { Stack.push qqstring next_rule ; string_interpolate_scalar lexbuf }
+| '@' { Stack.push qqstring next_rule ; string_interpolate_array lexbuf }
| '\n' {
add_a_new_line(lexeme_end lexbuf);
next qqstring lexbuf
}
-| [^ '\n' '(' ')' '\\']+ { next qqstring lexbuf }
-| eof { die lexbuf "Unterminated_qqstring" }
+| [^ '\n' '(' ')' '\\' '$' '@']+ { next qqstring lexbuf }
+| eof { die_in_string lexbuf "Unterminated_qqstring" }
and qstring = parse
| ')' {
@@ -607,11 +662,13 @@ and qstring = parse
next qstring lexbuf
}
| [^ '\n' '(' ')']+ { next qstring lexbuf }
-| eof { die lexbuf "Unterminated_qstring" }
+| eof { die_in_string lexbuf "Unterminated_qstring" }
and here_doc = parse
-| '\\' { next_rule := Some here_doc ; string_escape lexbuf }
-| [ ^ '\n' '\\' ]* {
+| '\\' { Stack.push here_doc next_rule ; string_escape lexbuf }
+| '$' { Stack.push here_doc next_rule ; string_interpolate_scalar lexbuf }
+| '@' { Stack.push here_doc next_rule ; string_interpolate_array lexbuf }
+| [ ^ '\n' '\\' '$' '@' ]* {
let s = lexeme lexbuf in
if chomps s <> !current_here_doc_mark
then next_s s here_doc lexbuf
@@ -621,7 +678,7 @@ and here_doc = parse
add_a_new_line(lexeme_end lexbuf);
next here_doc lexbuf
}
-| eof { die lexbuf "Unterminated_here_doc" }
+| eof { die_in_string lexbuf "Unterminated_here_doc" }
and raw_here_doc = parse
| [ ^ '\n' ]* {
@@ -634,22 +691,72 @@ and raw_here_doc = parse
add_a_new_line(lexeme_end lexbuf);
next raw_here_doc lexbuf
}
-| eof { die lexbuf "Unterminated_raw_here_doc" }
+| eof { die_in_string lexbuf "Unterminated_raw_here_doc" }
and string_escape = parse
-| '0' { next_s "\000" (some !next_rule) lexbuf }
-| '"' { next_s "\"" (some !next_rule) lexbuf }
-| ''' { next_s "'" (some !next_rule) lexbuf }
-| 'n' { next_s "\n" (some !next_rule) lexbuf }
-| 't' { next_s "\t" (some !next_rule) lexbuf }
+| '0' { next_s "\000" (Stack.pop next_rule) lexbuf }
+| '"' { next_s "\"" (Stack.pop next_rule) lexbuf }
+| ''' { next_s "'" (Stack.pop next_rule) lexbuf }
+| 'n' { next_s "\n" (Stack.pop next_rule) lexbuf }
+| 't' { next_s "\t" (Stack.pop next_rule) lexbuf }
| 'x' _ _ {
try
let s = String.make 1 (Char.chr (int_of_string ("0" ^ lexeme lexbuf))) in
- next_s s (some !next_rule) lexbuf
- with Failure("int_of_string") -> die lexbuf ("Bad_hex_in_string \"" ^ lexeme lexbuf ^ "\"")
+ next_s s (Stack.pop next_rule) lexbuf
+ with Failure("int_of_string") -> die_in_string lexbuf ("Bad_hex_in_string \"" ^ lexeme lexbuf ^ "\"")
+ }
+| _ { next_s ("\\" ^ lexeme lexbuf) (Stack.pop next_rule) lexbuf }
+
+
+and string_interpolate_scalar = parse
+| '$' ident
+| ['0'-'9']
+| '{' [^ '{' '}']* '}'
+| (ident | (ident? ("::" ident)+)) "->"? (('{' [^ '{' '}' '\n']* '}') | ('[' [^ '[' ']' '\n']* ']'))*
+| [^ '{' '}' ' ' '\n' '"'] { (* eg: $! $$ *)
+ string_interpolate token "$" lexbuf
+ }
+
+| "{"
+| ident "->"? '{'
+| eof { next_s "$" (Stack.pop next_rule) lexbuf }
+| _ { warn lexbuf (Printf.sprintf "weird \"%s\" in string" (lexeme lexbuf)); next_s ("$" ^ lexeme lexbuf) (Stack.pop next_rule) lexbuf }
+
+and delimited_string_interpolate_scalar = parse (* needed for delimited string like m!foo$! where $! should not be taken as is... *)
+| '$' ident
+| ['0'-'9']
+| '{' [^ '{' '}']* '}'
+| (ident | (ident? ("::" ident)+)) "->"? ('{' [^ '{' '}' '\n']* '}')*
+| (ident | (ident? ("::" ident)+)) "->"? (('{' [^ '{' '}' '\n']* '}') | ('[' ('$' ident | ['0'-'9']+) ']'))*
+ {
+ string_interpolate token "$" lexbuf
}
-| _ { next_s ("\\" ^ lexeme lexbuf) (some !next_rule) lexbuf }
+
+| (ident | (ident? ("::" ident)+)) "->"? (('{' [^ '{' '}' '\n']* '}') | ('[' ['$' '0'-'9'] [^ '[' ']' '\n']* ']'))*
+ {
+ die lexbuf (Printf.sprintf "I really can't handle this, [xxx] can be indexing or not based on stellar position :-(")
+ }
+
+
+| "{"
+| ident "->"? '{'
+| eof { next_s "$" (Stack.pop next_rule) lexbuf }
+| _ {
+ let c = lexeme_char lexbuf 0 in
+ if c <> !delimit_char && c <> '|' && c<>')' && c<>'/' then warn lexbuf (Printf.sprintf "weird \"%s\" in string" (lexeme lexbuf));
+ putback lexbuf 1;
+ (Stack.pop next_rule) lexbuf
+ }
+
+and string_interpolate_array = parse
+| '$' ident
+| '{' [^ '{' '}']* '}'
+| (ident | (ident? ("::" ident)+)) { string_interpolate token "@" lexbuf }
+
+| [ '*' '<' '>' ']' '.' '('] { next_s ("@" ^ lexeme lexbuf) (Stack.pop next_rule) lexbuf }
+| eof { next_s "$" (Stack.pop next_rule) lexbuf }
+| _ { warn lexbuf (Printf.sprintf "weird \"%s\" in string" (lexeme lexbuf)); next_s ("$" ^ lexeme lexbuf) (Stack.pop next_rule) lexbuf }
and pattern_options = parse
| [ 'g' 'i' 'm' 'o' 's' 'x' 'e' 'd' ] { next pattern_options lexbuf }
@@ -661,7 +768,7 @@ and pod_command = parse
let command = String.sub s 0 (try String.index s ' ' with Not_found -> String.length s) in
match command with
| "cut" ->
- if !building_current_string = "" then
+ if !(Stack.top building_current_string) = "" then
failwith(pos2sfull lexbuf ^ "found POD command \"=cut\" but it is not a POD block")
| "head1" | "head2" | "head3" | "head4" | "over" | "item" | "back" | "pod" | "begin" | "end" | "for" ->
next pod lexbuf
diff --git a/perl_checker.src/parser.mly b/perl_checker.src/parser.mly
index 8793b9e..05bdfe4 100644
--- a/perl_checker.src/parser.mly
+++ b/perl_checker.src/parser.mly
@@ -4,17 +4,22 @@
open Parser_helper
let parse_error msg = die_rule msg
-
+ let prog_ref = ref None
+ let to_String e = Parser_helper.to_String (some !prog_ref) e
+ let from_PATTERN e = Parser_helper.from_PATTERN (some !prog_ref) e
+ let from_PATTERN_SUBST e = Parser_helper.from_PATTERN_SUBST (some !prog_ref) e
%}
%token <unit * (Types.spaces * Types.raw_pos)> EOF
-%token <string * (Types.spaces * Types.raw_pos)> NUM STRING BAREWORD BAREWORD_PAREN REVISION COMMENT POD LABEL PRINT_TO_STAR PRINT_TO_SCALAR ONE_SCALAR_PARA
-%token <string * (Types.spaces * Types.raw_pos)> COMMAND_STRING QUOTEWORDS COMPACT_HASH_SUBSCRIPT
+%token <string * (Types.spaces * Types.raw_pos)> NUM RAW_STRING BAREWORD BAREWORD_PAREN REVISION COMMENT POD LABEL PRINT_TO_STAR PRINT_TO_SCALAR ONE_SCALAR_PARA
+%token <string * (Types.spaces * Types.raw_pos)> QUOTEWORDS COMPACT_HASH_SUBSCRIPT
+%token <(string * Types.raw_pos) * (Types.spaces * Types.raw_pos)> RAW_HERE_DOC
+%token <(string * ((int * int) * token) list) list * (Types.spaces * Types.raw_pos)> STRING COMMAND_STRING
+%token <((string * ((int * int) * token) list) list * Types.raw_pos) * (Types.spaces * Types.raw_pos)> HERE_DOC
-%token <(string * Types.raw_pos) ref * (Types.spaces * Types.raw_pos)> HERE_DOC
-%token <(string * string) * (Types.spaces * Types.raw_pos)> PATTERN
-%token <(string * string * string) * (Types.spaces * Types.raw_pos)> PATTERN_SUBST
+%token <((string * ((int * int) * token) list) list * string) * (Types.spaces * Types.raw_pos)> PATTERN
+%token <((string * ((int * int) * token) list) list * (string * ((int * int) * token) list) list * string) * (Types.spaces * Types.raw_pos)> PATTERN_SUBST
%token <(string option * string) * (Types.spaces * Types.raw_pos)> SCALAR_IDENT ARRAY_IDENT HASH_IDENT FUNC_IDENT STAR_IDENT RAW_IDENT RAW_IDENT_PAREN ARRAYLEN_IDENT
%token <(string * string) * (Types.spaces * Types.raw_pos)> FUNC_DECL_WITH_PROTO
@@ -84,14 +89,15 @@
%left PAREN PREC_HIGH
%left ARRAYREF BRACKET
-%type <Types.fromparser list> prog
-%type <(Types.priority * Types.fromparser) * (Types.spaces * Types.raw_pos)> expr
+%type <Types.fromparser list> prog inside
+%type <(Types.priority * Types.fromparser) * (Types.spaces * Types.raw_pos)> expr term
-%start prog
+%start prog inside
%%
prog: lines EOF {check_package (fst $1); fst $1}
+inside: lines EOF {fst $1}
lines: /* A collection of "lines" in the program */
| {[], (Space_none, bpos)}
@@ -215,6 +221,8 @@ term:
| term PATTERN_MATCH scalar { (P_expr, Too_complex), pos_range $1 $3}
| term PATTERN_MATCH_NOT scalar { (P_expr, Too_complex), pos_range $1 $3}
+| term PATTERN_MATCH RAW_STRING {die_with_pos (sndsnd $3) "use a regexp, not a string"}
+| term PATTERN_MATCH_NOT RAW_STRING {die_with_pos (sndsnd $3) "use a regexp, not a string"}
| term PATTERN_MATCH STRING {die_with_pos (sndsnd $3) "use a regexp, not a string"}
| term PATTERN_MATCH_NOT STRING {die_with_pos (sndsnd $3) "use a regexp, not a string"}
@@ -235,9 +243,10 @@ term:
| term DECR {sp_0($2); (P_tight, Call_op("-- post", [sndfst $1])), pos_range $1 $2}
| NOT argexpr {(P_and, Call_op("not", sndfst $2)), pos_range $1 $2}
+| ONE_SCALAR_PARA RAW_STRING {call_one_scalar_para $1 [to_Raw_string $2], pos_range $1 $2}
| ONE_SCALAR_PARA STRING {call_one_scalar_para $1 [to_String $2], pos_range $1 $2}
| ONE_SCALAR_PARA variable {call_one_scalar_para $1 [fst $2], pos_range $1 $2}
-| ONE_SCALAR_PARA subscripted {call_one_scalar_para $1 [fst $2], pos_range $1 $2}
+| ONE_SCALAR_PARA restricted_subscripted {call_one_scalar_para $1 [fst $2], pos_range $1 $2}
| ONE_SCALAR_PARA parenthesized {call_one_scalar_para $1 (sndfst $2), pos_range $1 $2}
| ONE_SCALAR_PARA word_paren parenthesized {call_one_scalar_para $1 [Call(fst $2, sndfst $3)], pos_range $1 $3}
@@ -295,10 +304,12 @@ term:
| NUM {(P_tok, Num(fst $1, get_pos $1)), snd $1}
| STRING {(P_tok, to_String $1), snd $1}
-| REVISION {(P_tok, to_String $1), snd $1}
+| RAW_STRING {(P_tok, to_Raw_string $1), snd $1}
+| REVISION {(P_tok, to_Raw_string $1), snd $1}
| COMMAND_STRING {(P_expr, Call_op("``", [to_String $1])), snd $1}
-| QUOTEWORDS {(P_tok, Call_op("qw", [to_String $1])), snd $1}
-| HERE_DOC {(P_tok, String(fst!(fst $1), get_pos $1)), snd $1}
+| QUOTEWORDS {(P_tok, Call_op("qw", [to_Raw_string $1])), snd $1}
+| HERE_DOC {(P_tok, String([], raw_pos2pos (sndfst $1))), snd $1}
+| RAW_HERE_DOC {(P_tok, Raw_string(fstfst $1, raw_pos2pos (sndfst $1))), snd $1}
| PATTERN {(P_expr, Call_op("m//", var_dollar_ :: from_PATTERN $1)), snd $1}
| PATTERN_SUBST {(P_expr, Call_op("s///", var_dollar_ :: from_PATTERN_SUBST $1)), snd $1}
| diamond {(P_expr, fst $1), snd $1}
@@ -318,6 +329,13 @@ subscripted: /* Some kind of subscripted expression */
| subscripted arrayref {sp_0($2); Deref_with(I_array, fst $1, only_one $2), pos_range $1 $2} /* $foo->[$bar][$baz] */
| subscripted parenthesized {sp_0($2); Deref_with(I_func , fst $1, List(sndfst $2)), pos_range $1 $2} /* $foo->{bar}(@args) */
+restricted_subscripted: /* Some kind of subscripted expression */
+| scalar bracket_subscript {sp_0($2); Deref_with(I_hash , fst $1, fst $2), pos_range $1 $2} /* $foo{bar} */
+| scalar arrayref {sp_0($2); Deref_with(I_array, fst $1, only_one $2), pos_range $1 $2} /* $array[$element] */
+| restricted_subscripted bracket_subscript {sp_0($2); Deref_with(I_hash , fst $1, fst $2), pos_range $1 $2} /* $foo->[bar]{baz} */
+| restricted_subscripted arrayref {sp_0($2); Deref_with(I_array, fst $1, only_one $2), pos_range $1 $2} /* $foo->[$bar][$baz] */
+| restricted_subscripted parenthesized {sp_0($2); Deref_with(I_func , fst $1, List(sndfst $2)), pos_range $1 $2} /* $foo->{bar}(@args) */
+
arrayref:
| arrayref_start ARRAYREF_END {sp_0($2); fst $1, pos_range $1 $2}
| arrayref_start expr ARRAYREF_END {sp_0($3); fst $1 @ [sndfst $2], pos_range $1 $3}
@@ -345,8 +363,8 @@ termdo: /* Things called with "do" */
| DO BRACKET lines BRACKET_END %prec PREC_HIGH {sp_n($2); check_block_sub $3 $4; Block(fst $3), pos_range $1 $4} /* do { code */
bracket_subscript:
-| BRACKET expr BRACKET_END {sp_0($1); sp_0($2); sp_0($3); only_one_in_List $2, pos_range $1 $3}
-| COMPACT_HASH_SUBSCRIPT {sp_0($1); to_String $1, snd $1}
+| BRACKET expr BRACKET_END {sp_0($1); sp_same $2 $3; only_one_in_List $2, pos_range $1 $3}
+| COMPACT_HASH_SUBSCRIPT {sp_0($1); to_Raw_string $1, snd $1}
variable:
| scalar %prec PREC_HIGH {$1}
@@ -378,11 +396,17 @@ word_paren:
| BAREWORD_PAREN { Ident(None, fst $1, get_pos $1), snd $1}
| RAW_IDENT_PAREN { to_Ident $1, snd $1}
-arraylen: ARRAYLEN_IDENT {Deref(I_arraylen, to_Ident $1), snd $1} | ARRAYLEN scalar {sp_0($2); Deref(I_arraylen, fst $2), snd $1} | ARRAYLEN BRACKET lines BRACKET_END {sp_0($2); check_block_ref $3 $4; sp_same $3 $4; Deref(I_arraylen, Block(fst $3)), pos_range $1 $4}
-scalar: SCALAR_IDENT {Deref(I_scalar , to_Ident $1), snd $1} | DOLLAR scalar {sp_0($2); Deref(I_scalar , fst $2), snd $1} | DOLLAR BRACKET lines BRACKET_END {sp_0($2); check_block_ref $3 $4; sp_same $3 $4; Deref(I_scalar , Block(fst $3)), pos_range $1 $4} | DOLLAR BRACKET BRACKET expr BRACKET_END BRACKET_END {sp_0($2); sp_0($3); sp_p($5); sp_0($6); Deref(I_scalar, Ref(I_hash, sndfst $4)), pos_range $1 $6}
-func: FUNC_IDENT {Deref(I_func , to_Ident $1), snd $1} | AMPERSAND scalar {sp_0($2); Deref(I_func , fst $2), snd $1} | AMPERSAND BRACKET lines BRACKET_END {sp_0($2); check_block_ref $3 $4; sp_same $3 $4; Deref(I_func , Block(fst $3)), pos_range $1 $4}
-array: ARRAY_IDENT {Deref(I_array , to_Ident $1), snd $1} | AT scalar {sp_0($2); Deref(I_array , fst $2), snd $1} | AT BRACKET lines BRACKET_END {sp_0($2); check_block_ref $3 $4; sp_same $3 $4; Deref(I_array , Block(fst $3)), pos_range $1 $4}
-hash: HASH_IDENT {Deref(I_hash , to_Ident $1), snd $1} | PERCENT scalar {sp_0($2); Deref(I_hash , fst $2), snd $1} | PERCENT BRACKET lines BRACKET_END {sp_0($2); check_block_ref $3 $4; sp_same $3 $4; Deref(I_hash , Block(fst $3)), pos_range $1 $4}
-star: STAR_IDENT {Deref(I_star , to_Ident $1), snd $1} | STAR scalar {sp_0($2); Deref(I_star , fst $2), snd $1} | STAR BRACKET lines BRACKET_END {sp_0($2); check_block_ref $3 $4; sp_same $3 $4; Deref(I_star , Block(fst $3)), pos_range $1 $4}
+arraylen: ARRAYLEN_IDENT {Deref(I_arraylen, to_Ident $1), snd $1} | ARRAYLEN scalar {sp_0($2); Deref(I_arraylen, fst $2), snd $1} | ARRAYLEN bracket_subscript {Deref(I_arraylen, fst $2), pos_range $1 $2}
+scalar: SCALAR_IDENT {Deref(I_scalar , to_Ident $1), snd $1} | DOLLAR scalar {sp_0($2); Deref(I_scalar , fst $2), snd $1} | DOLLAR bracket_subscript {Deref(I_scalar , fst $2), pos_range $1 $2} | DOLLAR BRACKET BRACKET expr BRACKET_END BRACKET_END {sp_0($2); sp_0($3); sp_p($5); sp_0($6); Deref(I_scalar, Ref(I_hash, sndfst $4)), pos_range $1 $6}
+func: FUNC_IDENT {Deref(I_func , to_Ident $1), snd $1} | AMPERSAND scalar {sp_0($2); Deref(I_func , fst $2), snd $1} | AMPERSAND bracket_subscript {Deref(I_func , fst $2), pos_range $1 $2}
+array: ARRAY_IDENT {Deref(I_array , to_Ident $1), snd $1} | AT scalar {sp_0($2); Deref(I_array , fst $2), snd $1} | AT bracket_subscript {Deref(I_array , fst $2), pos_range $1 $2}
+hash: HASH_IDENT {Deref(I_hash , to_Ident $1), snd $1} | PERCENT scalar {sp_0($2); Deref(I_hash , fst $2), snd $1} | PERCENT bracket_subscript {Deref(I_hash , fst $2), pos_range $1 $2}
+star: STAR_IDENT {Deref(I_star , to_Ident $1), snd $1} | STAR scalar {sp_0($2); Deref(I_star , fst $2), snd $1} | STAR bracket_subscript {Deref(I_star , fst $2), pos_range $1 $2}
expr_or_empty: {Block [], (Space_none, bpos)} | expr {sndfst $1, snd $1}
+
+%%
+
+;;
+prog_ref := Some inside
+;;
diff --git a/perl_checker.src/parser_helper.ml b/perl_checker.src/parser_helper.ml
index ebd5cfd..c97c51d 100644
--- a/perl_checker.src/parser_helper.ml
+++ b/perl_checker.src/parser_helper.ml
@@ -46,7 +46,7 @@ let warn raw_pos msg = prerr_endline (msg_with_pos raw_pos msg)
let die_rule msg = die_with_pos (Parsing.symbol_start(), Parsing.symbol_end()) msg
let warn_rule msg = warn (Parsing.symbol_start(), Parsing.symbol_end()) msg
-let debug msg = if false then prerr_endline msg
+let debug msg = if true then prerr_endline msg
let warn_too_many_space start = warn (start, start) "you should have only one space here"
let warn_no_space start = warn (start, start) "you should have a space here"
@@ -225,7 +225,7 @@ let check_block_ref (l, (_, (_, end_)) as ter_lines) (_, (space, _) as ter_BRACK
let to_Ident ((fq, name), (_, pos)) = Ident(fq, name, raw_pos2pos pos)
-let to_String (s, (_, pos)) = String(s, raw_pos2pos pos)
+let to_Raw_string (s, (_, pos)) = Raw_string(s, raw_pos2pos pos)
let op prio s (_, both) = prio, (((), both), s)
let op_p prio s e = sp_p e ; op prio s e
@@ -251,9 +251,6 @@ let array_ident_to_hash_ident (e, (_, pos)) =
match e with
| Deref(I_array, e) -> Deref(I_hash, e)
| _ -> die_with_pos pos "internal error (array_ident_to_hash_ident)"
-
-let from_PATTERN ((s, opts), (_, pos)) = [ String(s, raw_pos2pos pos) ; String(opts, raw_pos2pos pos) ]
-let from_PATTERN_SUBST ((s1, s2, opts), (_, pos)) = [ String(s1, raw_pos2pos pos) ; String(s2, raw_pos2pos pos) ; String(opts, raw_pos2pos pos) ]
let to_List = function
| [e] -> e
@@ -267,6 +264,7 @@ let call(e, para) =
(match para with
| [ Ident _ ] -> ()
| [ String _ ] -> ()
+ | [ Raw_string _ ] -> ()
| _ -> die_rule "use either \"require PACKAGE\" or \"require 'PACKAGE.pm'\"")
| Ident(None, "N", _) ->
(match para with
@@ -282,3 +280,34 @@ let call_one_scalar_para (e, (_, pos)) para =
| _ -> P_add
in
pri, Call(Ident(None, e, raw_pos2pos pos), para)
+
+let (current_lexbuf : Lexing.lexbuf option ref) = ref None
+
+
+let rec list2tokens l =
+ let rl = ref l in
+ fun lexbuf ->
+ match !rl with
+ | [] -> internal_error "list2tokens"
+ | ((start, end_), e) :: l ->
+ lexbuf.Lexing.lex_abs_pos <- 0 ;
+ lexbuf.Lexing.lex_start_pos <- start ;
+ lexbuf.Lexing.lex_curr_pos <- end_ ;
+ rl := l ; e
+
+let parse_tokens parse tokens lexbuf_opt =
+ if lexbuf_opt <> None then current_lexbuf := lexbuf_opt ;
+ if tokens = [] then [] else
+ parse (list2tokens tokens) (some !current_lexbuf)
+
+let parse_interpolated parse l = List.map (fun (s, tokens) -> s, to_List(parse_tokens parse tokens None)) l
+
+let to_String parse (l, (_, pos)) = String(parse_interpolated parse l, raw_pos2pos pos)
+
+let from_PATTERN parse ((s, opts), (_, pos)) =
+ [ String(parse_interpolated parse s, raw_pos2pos pos) ;
+ Raw_string(opts, raw_pos2pos pos) ]
+let from_PATTERN_SUBST parse ((s1, s2, opts), (_, pos)) =
+ [ String(parse_interpolated parse s1, raw_pos2pos pos) ;
+ String(parse_interpolated parse s2, raw_pos2pos pos) ;
+ Raw_string(opts, raw_pos2pos pos) ]
diff --git a/perl_checker.src/parser_helper.mli b/perl_checker.src/parser_helper.mli
index db59ee5..4655810 100644
--- a/perl_checker.src/parser_helper.mli
+++ b/perl_checker.src/parser_helper.mli
@@ -58,7 +58,7 @@ val check_block_ref :
'a * (Types.spaces * (int * 'b)) -> unit
val to_Ident :
(string option * string) * ('a * (int * int)) -> Types.fromparser
-val to_String : string * ('a * (int * int)) -> Types.fromparser
+val to_Raw_string : string * ('a * (int * int)) -> Types.fromparser
val op : 'a -> 'b -> 'c * 'd -> 'a * ((unit * 'd) * 'b)
val op_p :
'a ->
@@ -74,10 +74,6 @@ val only_one_in_List :
('a * Types.fromparser) * ('b * (int * int)) -> Types.fromparser
val array_ident_to_hash_ident :
Types.fromparser * ('a * (int * int)) -> Types.fromparser
-val from_PATTERN :
- (string * string) * ('a * (int * int)) -> Types.fromparser list
-val from_PATTERN_SUBST :
- (string * string * string) * ('a * (int * int)) -> Types.fromparser list
val to_List : Types.fromparser list -> Types.fromparser
val sub_declaration :
Types.fromparser * string -> Types.fromparser list -> Types.fromparser
@@ -85,3 +81,24 @@ val call : Types.fromparser * Types.fromparser list -> Types.fromparser
val call_one_scalar_para :
string * ('a * (int * int)) ->
Types.fromparser list -> Types.priority * Types.fromparser
+val current_lexbuf : Lexing.lexbuf option ref
+val list2tokens : ((int * int) * 'a) list -> Lexing.lexbuf -> 'a
+val parse_tokens :
+ ((Lexing.lexbuf -> 'a) -> Lexing.lexbuf -> 'b list) ->
+ ((int * int) * 'a) list -> Lexing.lexbuf option -> 'b list
+val parse_interpolated :
+ ((Lexing.lexbuf -> 'a) -> Lexing.lexbuf -> Types.fromparser list) ->
+ ('b * ((int * int) * 'a) list) list -> ('b * Types.fromparser) list
+val to_String :
+ ((Lexing.lexbuf -> 'a) -> Lexing.lexbuf -> Types.fromparser list) ->
+ (string * ((int * int) * 'a) list) list * ('b * (int * int)) ->
+ Types.fromparser
+val from_PATTERN :
+ ((Lexing.lexbuf -> 'a) -> Lexing.lexbuf -> Types.fromparser list) ->
+ ((string * ((int * int) * 'a) list) list * string) * ('b * (int * int)) ->
+ Types.fromparser list
+val from_PATTERN_SUBST :
+ ((Lexing.lexbuf -> 'a) -> Lexing.lexbuf -> Types.fromparser list) ->
+ ((string * ((int * int) * 'a) list) list *
+ (string * ((int * int) * 'a) list) list * string) *
+ ('b * (int * int)) -> Types.fromparser list
diff --git a/perl_checker.src/perl_checker.ml b/perl_checker.src/perl_checker.ml
index 79b3ac9..7e951a8 100644
--- a/perl_checker.src/perl_checker.ml
+++ b/perl_checker.src/perl_checker.ml
@@ -8,12 +8,9 @@ let _ =
let lexbuf = Lexing.from_channel (Unix.open_process_in (Printf.sprintf "expand \"%s\"" file)) in
try
Info.start_a_new_file file ;
- if false then
- let t = Lexer.lexbuf2list (Lexer.concat_bareword_paren (Lexer.concat_spaces Lexer.token)) lexbuf in
- let _,_ = t, t in ()
- else
- let t = Parser.prog (Lexer.concat_bareword_paren (Lexer.concat_spaces Lexer.token)) lexbuf in
- let _,_ = t, t in ()
+ let tokens = Lexer.get_token Lexer.token lexbuf in
+ let t = Parser_helper.parse_tokens Parser.prog tokens (Some lexbuf) in
+ let _,_ = t, t in ()
with Failure s -> (
prerr_endline s ;
exit 1
diff --git a/perl_checker.src/types.mli b/perl_checker.src/types.mli
index 567c0c5..ceb5804 100644
--- a/perl_checker.src/types.mli
+++ b/perl_checker.src/types.mli
@@ -16,7 +16,8 @@ type context = I_scalar | I_hash | I_array | I_func | I_raw | I_star | I_arrayle
type fromparser =
| Ident of string option * string * pos
| Num of string * pos
- | String of string * pos
+ | Raw_string of string * pos
+ | String of (string * fromparser) list * pos
| Ref of context * fromparser
| Deref of context * fromparser