diff options
| author | Pascal Rigaux <pixel@mandriva.com> | 2002-11-20 00:53:48 +0000 | 
|---|---|---|
| committer | Pascal Rigaux <pixel@mandriva.com> | 2002-11-20 00:53:48 +0000 | 
| commit | 131207a1f99f85d2b8d272e7b47b058076b5c1cf (patch) | |
| tree | ccc59e6e5ce5197eecd70944a0f723f143edf031 | |
| parent | 659ced82c2465f81c3f14a1fa601d7df9ce2d2da (diff) | |
| download | perl_checker-131207a1f99f85d2b8d272e7b47b058076b5c1cf.tar perl_checker-131207a1f99f85d2b8d272e7b47b058076b5c1cf.tar.gz perl_checker-131207a1f99f85d2b8d272e7b47b058076b5c1cf.tar.bz2 perl_checker-131207a1f99f85d2b8d272e7b47b058076b5c1cf.tar.xz perl_checker-131207a1f99f85d2b8d272e7b47b058076b5c1cf.zip | |
*** empty log message ***
| -rw-r--r-- | perl_checker.src/common.ml | 2 | ||||
| -rw-r--r-- | perl_checker.src/common.mli | 1 | ||||
| -rw-r--r-- | perl_checker.src/lexer.mll | 489 | ||||
| -rw-r--r-- | perl_checker.src/parser.mly | 66 | ||||
| -rw-r--r-- | perl_checker.src/parser_helper.ml | 39 | ||||
| -rw-r--r-- | perl_checker.src/parser_helper.mli | 27 | ||||
| -rw-r--r-- | perl_checker.src/perl_checker.ml | 9 | ||||
| -rw-r--r-- | perl_checker.src/types.mli | 3 | 
8 files changed, 407 insertions, 229 deletions
| diff --git a/perl_checker.src/common.ml b/perl_checker.src/common.ml index 45e6ec1..c4600ff 100644 --- a/perl_checker.src/common.ml +++ b/perl_checker.src/common.ml @@ -186,6 +186,8 @@ let rec stack2list s =    Stack.iter (fun e -> l := e :: !l) s ;    !l +let rec queue2list q = rev (Queue.fold (fun b a -> a :: b) [] q) +  let rec fix_point f p =    let p' = f p in    if p = p' then p else fix_point f p' diff --git a/perl_checker.src/common.mli b/perl_checker.src/common.mli index ec9c8ce..5398092 100644 --- a/perl_checker.src/common.mli +++ b/perl_checker.src/common.mli @@ -42,6 +42,7 @@ val fold_right1 : ('a -> 'a -> 'a) -> 'a list -> 'a  val for_all2_ : ('a -> 'b -> bool) -> 'a list -> 'b list -> bool  val maxl : 'a list -> 'a  val stack2list : 'a Stack.t -> 'a list +val queue2list : 'a Queue.t -> 'a list  val fix_point : ('a -> 'a) -> 'a -> 'a  val fix_point_withenv : ('a -> 'b -> 'b * 'a) -> 'a -> 'b -> 'b * 'a  val fix_point_ : int -> ('a -> 'a) -> 'a -> 'a * int diff --git a/perl_checker.src/lexer.mll b/perl_checker.src/lexer.mll index 07aa48a..3ebce72 100644 --- a/perl_checker.src/lexer.mll +++ b/perl_checker.src/lexer.mll @@ -4,31 +4,37 @@ open Types  open Lexing  open Info +let bpos = -1,-1 +  type raw_token =    | EOF of raw_pos    | SPACE of int    | CR    | NUM of (string * raw_pos) -  | STRING of (string * raw_pos) +  | RAW_STRING of (string * raw_pos) +  | STRING of (raw_interpolated_string * raw_pos) +  | PATTERN of (raw_interpolated_string * string * raw_pos) +  | PATTERN_SUBST of (raw_interpolated_string * raw_interpolated_string * string * raw_pos)    | BAREWORD of (string * raw_pos) +  | BAREWORD_PAREN of (string * raw_pos)    | REVISION of (string * raw_pos)    | COMMENT of (string * raw_pos)    | POD of (string * raw_pos)    | LABEL of (string * raw_pos) -  | COMMAND_STRING of (string * raw_pos) +  | COMMAND_STRING of (raw_interpolated_string * raw_pos)    | PRINT_TO_STAR of (string * raw_pos)    | PRINT_TO_SCALAR of (string * raw_pos)    | QUOTEWORDS of (string * raw_pos)    | COMPACT_HASH_SUBSCRIPT of (string * raw_pos) -  | HERE_DOC of ((string * raw_pos) ref * raw_pos) -  | PATTERN of (string * string * raw_pos) -  | PATTERN_SUBST of (string * string * string * raw_pos) +  | RAW_HERE_DOC of ((string * raw_pos) ref * raw_pos) +  | HERE_DOC of (raw_interpolated_string * raw_pos) ref * raw_pos    | SCALAR_IDENT of (string option * string * raw_pos)    | ARRAY_IDENT of (string option * string * raw_pos)    | HASH_IDENT of (string option * string * raw_pos)    | FUNC_IDENT of (string option * string * raw_pos)    | STAR_IDENT of (string option * string * raw_pos)    | RAW_IDENT of (string option * string * raw_pos) +  | RAW_IDENT_PAREN of (string option * string * raw_pos)    | ARRAYLEN_IDENT of (string option * string * raw_pos)    | FUNC_DECL_WITH_PROTO of (string * string * raw_pos) @@ -43,150 +49,152 @@ type raw_token =    | BIT_AND of raw_pos | BIT_OR of raw_pos | BIT_XOR of raw_pos | AND_TIGHT of raw_pos | OR_TIGHT of raw_pos | DOTDOT of (string * raw_pos)    | QUESTION_MARK of raw_pos | COLON of raw_pos | ASSIGN of (string * raw_pos) | COMMA of raw_pos | RIGHT_ARROW of raw_pos | NOT of raw_pos | AND of raw_pos | OR of raw_pos | XOR of raw_pos -let saved_token = ref None - -let concat_bareword_paren get_token lexbuf = -  let token = match !saved_token with -  | None -> get_token lexbuf -  | Some t -> t -  in -  let token, next = -    match token with -    | Parser.PRINT(s, both) -    | Parser.BAREWORD(s, both) -> -	let next_token = get_token lexbuf in -	(match next_token with Parser.PAREN(_, (Space_0, _)) -> Parser.BAREWORD_PAREN(s, both) | _ -> token), Some next_token -    | Parser.RAW_IDENT(ident, both) -> -	let next_token = get_token lexbuf in -	(match next_token with Parser.PAREN(_, (Space_0, _)) -> Parser.RAW_IDENT_PAREN(ident, both) | _ -> token), Some next_token -    | _ -> token, None -  in -  saved_token := next ; token - -let rec concat_spaces get_token lexbuf = -  let rec get_spaces spaces lexbuf = -    match get_token lexbuf with -    | CR -> get_spaces Space_cr lexbuf -    | SPACE n -> -	let spaces' =  -	  match spaces with -	  | Space_cr -> Space_cr -	  | Space_0 -> if n = 1 then Space_1 else Space_n -	  | _ -> Space_n -	in -	get_spaces spaces' lexbuf -    | token -> token, spaces -  in -  let token, spaces = get_spaces Space_0 lexbuf in -  match token with -  | NUM(s, pos) -> Parser.NUM(s, (spaces, pos)) -  | STRING(s, pos) -> Parser.STRING(s, (spaces, pos)) -  | BAREWORD(s, pos) -> Parser.BAREWORD(s, (spaces, pos)) -  | REVISION(s, pos) -> Parser.REVISION(s, (spaces, pos)) -  | COMMENT(s, pos) -> Parser.COMMENT(s, (spaces, pos)) -  | POD(s, pos) -> Parser.POD(s, (spaces, pos)) -  | LABEL(s, pos) -> Parser.LABEL(s, (spaces, pos)) -  | COMMAND_STRING(s, pos) -> Parser.COMMAND_STRING(s, (spaces, pos)) -  | PRINT(s, pos) -> Parser.PRINT(s, (spaces, pos)) -  | PRINT_TO_STAR(s, pos) -> Parser.PRINT_TO_STAR(s, (spaces, pos)) -  | PRINT_TO_SCALAR(s, pos) -> Parser.PRINT_TO_SCALAR(s, (spaces, pos)) -  | QUOTEWORDS(s, pos) -> Parser.QUOTEWORDS(s, (spaces, pos)) -  | COMPACT_HASH_SUBSCRIPT(s, pos) -> Parser.COMPACT_HASH_SUBSCRIPT(s, (spaces, pos)) -  | HERE_DOC(r, pos) -> Parser.HERE_DOC(r, (spaces, pos)) -  | PATTERN(s, opts, pos) -> Parser.PATTERN((s, opts), (spaces, pos)) -  | PATTERN_SUBST(from, to_, opts, pos) -> Parser.PATTERN_SUBST((from, to_, opts), (spaces, pos)) -  | SCALAR_IDENT(kind, name, pos) -> Parser.SCALAR_IDENT((kind, name), (spaces, pos)) -  | ARRAY_IDENT(kind, name, pos) -> Parser.ARRAY_IDENT((kind, name), (spaces, pos)) -  | HASH_IDENT(kind, name, pos) -> Parser.HASH_IDENT((kind, name), (spaces, pos)) -  | FUNC_IDENT(kind, name, pos) -> Parser.FUNC_IDENT((kind, name), (spaces, pos)) -  | STAR_IDENT(kind, name, pos) -> Parser.STAR_IDENT((kind, name), (spaces, pos)) -  | RAW_IDENT(kind, name, pos) -> Parser.RAW_IDENT((kind, name), (spaces, pos)) -  | ARRAYLEN_IDENT(kind, name, pos) -> Parser.ARRAYLEN_IDENT((kind, name), (spaces, pos)) -  | FUNC_DECL_WITH_PROTO(name, proto, pos) -> Parser.FUNC_DECL_WITH_PROTO((name, proto), (spaces, pos)) - -  | NEW(pos) -> Parser.NEW((), (spaces, pos)) -  | FORMAT(pos) -> Parser.FORMAT((), (spaces, pos)) -  | COMPARE_OP(s, pos) -> Parser.COMPARE_OP(s, (spaces, pos)) -  | EQ_OP(s, pos) -> Parser.EQ_OP(s, (spaces, pos)) -  | ASSIGN(s, pos) -> Parser.ASSIGN(s, (spaces, pos)) -  | FOR(s, pos) -> Parser.FOR(s, (spaces, pos)) - -  | DOTDOT(s, pos) -> Parser.DOTDOT(s, (spaces, pos)) -  | MULT(s, pos) -> Parser.MULT(s, (spaces, pos)) -  | BIT_SHIFT(s, pos) -> Parser.BIT_SHIFT(s, (spaces, pos)) -  | PLUS(s, pos) -> Parser.PLUS(s, (spaces, pos)) -  | ONE_SCALAR_PARA(s, pos) -> Parser.ONE_SCALAR_PARA(s, (spaces, pos)) - -  | EOF              (pos) -> Parser.EOF              ((), (spaces, pos)) -  | IF               (pos) -> Parser.IF               ((), (spaces, pos)) -  | ELSIF            (pos) -> Parser.ELSIF            ((), (spaces, pos)) -  | ELSE             (pos) -> Parser.ELSE             ((), (spaces, pos)) -  | UNLESS           (pos) -> Parser.UNLESS           ((), (spaces, pos)) -  | DO               (pos) -> Parser.DO               ((), (spaces, pos)) -  | WHILE            (pos) -> Parser.WHILE            ((), (spaces, pos)) -  | UNTIL            (pos) -> Parser.UNTIL            ((), (spaces, pos)) -  | MY               (pos) -> Parser.MY               ((), (spaces, pos)) -  | CONTINUE         (pos) -> Parser.CONTINUE         ((), (spaces, pos)) -  | SUB              (pos) -> Parser.SUB              ((), (spaces, pos)) -  | LOCAL            (pos) -> Parser.LOCAL            ((), (spaces, pos)) -  | USE              (pos) -> Parser.USE              ((), (spaces, pos)) -  | PACKAGE          (pos) -> Parser.PACKAGE          ((), (spaces, pos)) -  | BEGIN            (pos) -> Parser.BEGIN            ((), (spaces, pos)) -  | END              (pos) -> Parser.END              ((), (spaces, pos)) -  | AT               (pos) -> Parser.AT               ((), (spaces, pos)) -  | DOLLAR           (pos) -> Parser.DOLLAR           ((), (spaces, pos)) -  | PERCENT          (pos) -> Parser.PERCENT          ((), (spaces, pos)) -  | AMPERSAND        (pos) -> Parser.AMPERSAND        ((), (spaces, pos)) -  | STAR             (pos) -> Parser.STAR             ((), (spaces, pos)) -  | ARRAYLEN         (pos) -> Parser.ARRAYLEN         ((), (spaces, pos)) -  | SEMI_COLON       (pos) -> Parser.SEMI_COLON       ((), (spaces, pos)) -  | PKG_SCOPE        (pos) -> Parser.PKG_SCOPE        ((), (spaces, pos)) -  | PAREN            (pos) -> Parser.PAREN            ((), (spaces, pos)) -  | PAREN_END        (pos) -> Parser.PAREN_END        ((), (spaces, pos)) -  | BRACKET          (pos) -> Parser.BRACKET          ((), (spaces, pos)) -  | BRACKET_END      (pos) -> Parser.BRACKET_END      ((), (spaces, pos)) -  | BRACKET_HASHREF  (pos) -> Parser.BRACKET_HASHREF  ((), (spaces, pos)) -  | ARRAYREF         (pos) -> Parser.ARRAYREF         ((), (spaces, pos)) -  | ARRAYREF_END     (pos) -> Parser.ARRAYREF_END     ((), (spaces, pos)) -  | ARROW            (pos) -> Parser.ARROW            ((), (spaces, pos)) -  | INCR             (pos) -> Parser.INCR             ((), (spaces, pos)) -  | DECR             (pos) -> Parser.DECR             ((), (spaces, pos)) -  | POWER            (pos) -> Parser.POWER            ((), (spaces, pos)) -  | TIGHT_NOT        (pos) -> Parser.TIGHT_NOT        ((), (spaces, pos)) -  | BIT_NEG          (pos) -> Parser.BIT_NEG          ((), (spaces, pos)) -  | REF              (pos) -> Parser.REF              ((), (spaces, pos)) -  | PATTERN_MATCH    (pos) -> Parser.PATTERN_MATCH    ((), (spaces, pos)) -  | PATTERN_MATCH_NOT(pos) -> Parser.PATTERN_MATCH_NOT((), (spaces, pos)) -  | LT               (pos) -> Parser.LT               ((), (spaces, pos)) -  | GT               (pos) -> Parser.GT               ((), (spaces, pos)) -  | BIT_AND          (pos) -> Parser.BIT_AND          ((), (spaces, pos)) -  | BIT_OR           (pos) -> Parser.BIT_OR           ((), (spaces, pos)) -  | BIT_XOR          (pos) -> Parser.BIT_XOR          ((), (spaces, pos)) -  | AND_TIGHT        (pos) -> Parser.AND_TIGHT        ((), (spaces, pos)) -  | OR_TIGHT         (pos) -> Parser.OR_TIGHT         ((), (spaces, pos)) -  | QUESTION_MARK    (pos) -> Parser.QUESTION_MARK    ((), (spaces, pos)) -  | COLON            (pos) -> Parser.COLON            ((), (spaces, pos)) -  | COMMA            (pos) -> Parser.COMMA            ((), (spaces, pos)) -  | RIGHT_ARROW      (pos) -> Parser.RIGHT_ARROW      ((), (spaces, pos)) -  | NOT              (pos) -> Parser.NOT              ((), (spaces, pos)) -  | AND              (pos) -> Parser.AND              ((), (spaces, pos)) -  | OR               (pos) -> Parser.OR               ((), (spaces, pos)) -  | XOR              (pos) -> Parser.XOR              ((), (spaces, pos)) +and raw_interpolated_string = (string * raw_token list) list + +let rec concat_bareword_paren = function +  | PRINT(s, pos1) :: PAREN(pos2) :: l +  | BAREWORD(s, pos1) :: PAREN(pos2) :: l -> +      BAREWORD_PAREN(s, pos1) :: PAREN(pos2) :: concat_bareword_paren l +  | RAW_IDENT(kind, ident, pos1) :: PAREN(pos2) :: l ->  +      RAW_IDENT_PAREN(kind, ident, pos1) :: PAREN(pos2) :: concat_bareword_paren l +  | [] -> [] +  | e :: l -> e :: concat_bareword_paren l + +let rec raw_token_to_pos_and_token spaces = function +  | NUM(s, pos) -> pos, Parser.NUM(s, (spaces, pos)) +  | RAW_STRING(s, pos) -> pos, Parser.RAW_STRING(s, (spaces, pos)) +  | RAW_HERE_DOC(r, pos) -> pos, Parser.RAW_HERE_DOC(!r, (spaces, pos)) +  | STRING(l, pos) -> pos, Parser.STRING(raw_interpolated_string_to_tokens l, (spaces, pos)) +  | COMMAND_STRING(l, pos) -> pos, Parser.COMMAND_STRING(raw_interpolated_string_to_tokens l, (spaces, pos)) +  | PATTERN(s, opts, pos) -> pos, Parser.PATTERN((raw_interpolated_string_to_tokens s, opts), (spaces, pos)) +  | PATTERN_SUBST(from, to_, opts, pos) -> pos, Parser.PATTERN_SUBST((raw_interpolated_string_to_tokens from, raw_interpolated_string_to_tokens to_, opts), (spaces, pos)) +  | HERE_DOC(l, pos) -> pos, Parser.HERE_DOC((raw_interpolated_string_to_tokens (fst !l), snd !l), (spaces, pos)) +  | BAREWORD(s, pos) -> pos, Parser.BAREWORD(s, (spaces, pos)) +  | BAREWORD_PAREN(s, pos) -> pos, Parser.BAREWORD_PAREN(s, (spaces, pos)) +  | REVISION(s, pos) -> pos, Parser.REVISION(s, (spaces, pos)) +  | COMMENT(s, pos) -> pos, Parser.COMMENT(s, (spaces, pos)) +  | POD(s, pos) -> pos, Parser.POD(s, (spaces, pos)) +  | LABEL(s, pos) -> pos, Parser.LABEL(s, (spaces, pos)) +  | PRINT(s, pos) -> pos, Parser.PRINT(s, (spaces, pos)) +  | PRINT_TO_STAR(s, pos) -> pos, Parser.PRINT_TO_STAR(s, (spaces, pos)) +  | PRINT_TO_SCALAR(s, pos) -> pos, Parser.PRINT_TO_SCALAR(s, (spaces, pos)) +  | QUOTEWORDS(s, pos) -> pos, Parser.QUOTEWORDS(s, (spaces, pos)) +  | COMPACT_HASH_SUBSCRIPT(s, pos) -> pos, Parser.COMPACT_HASH_SUBSCRIPT(s, (spaces, pos)) +  | SCALAR_IDENT(kind, name, pos) -> pos, Parser.SCALAR_IDENT((kind, name), (spaces, pos)) +  | ARRAY_IDENT(kind, name, pos) -> pos, Parser.ARRAY_IDENT((kind, name), (spaces, pos)) +  | HASH_IDENT(kind, name, pos) -> pos, Parser.HASH_IDENT((kind, name), (spaces, pos)) +  | FUNC_IDENT(kind, name, pos) -> pos, Parser.FUNC_IDENT((kind, name), (spaces, pos)) +  | STAR_IDENT(kind, name, pos) -> pos, Parser.STAR_IDENT((kind, name), (spaces, pos)) +  | RAW_IDENT(kind, name, pos) -> pos, Parser.RAW_IDENT((kind, name), (spaces, pos)) +  | RAW_IDENT_PAREN(kind, name, pos) -> pos, Parser.RAW_IDENT_PAREN((kind, name), (spaces, pos)) +  | ARRAYLEN_IDENT(kind, name, pos) -> pos, Parser.ARRAYLEN_IDENT((kind, name), (spaces, pos)) +  | FUNC_DECL_WITH_PROTO(name, proto, pos) -> pos, Parser.FUNC_DECL_WITH_PROTO((name, proto), (spaces, pos)) + +  | NEW(pos) -> pos, Parser.NEW((), (spaces, pos)) +  | FORMAT(pos) -> pos, Parser.FORMAT((), (spaces, pos)) +  | COMPARE_OP(s, pos) -> pos, Parser.COMPARE_OP(s, (spaces, pos)) +  | EQ_OP(s, pos) -> pos, Parser.EQ_OP(s, (spaces, pos)) +  | ASSIGN(s, pos) -> pos, Parser.ASSIGN(s, (spaces, pos)) +  | FOR(s, pos) -> pos, Parser.FOR(s, (spaces, pos)) + +  | DOTDOT(s, pos) -> pos, Parser.DOTDOT(s, (spaces, pos)) +  | MULT(s, pos) -> pos, Parser.MULT(s, (spaces, pos)) +  | BIT_SHIFT(s, pos) -> pos, Parser.BIT_SHIFT(s, (spaces, pos)) +  | PLUS(s, pos) -> pos, Parser.PLUS(s, (spaces, pos)) +  | ONE_SCALAR_PARA(s, pos) -> pos, Parser.ONE_SCALAR_PARA(s, (spaces, pos)) + +  | EOF              (pos) -> pos, Parser.EOF              ((), (spaces, pos)) +  | IF               (pos) -> pos, Parser.IF               ((), (spaces, pos)) +  | ELSIF            (pos) -> pos, Parser.ELSIF            ((), (spaces, pos)) +  | ELSE             (pos) -> pos, Parser.ELSE             ((), (spaces, pos)) +  | UNLESS           (pos) -> pos, Parser.UNLESS           ((), (spaces, pos)) +  | DO               (pos) -> pos, Parser.DO               ((), (spaces, pos)) +  | WHILE            (pos) -> pos, Parser.WHILE            ((), (spaces, pos)) +  | UNTIL            (pos) -> pos, Parser.UNTIL            ((), (spaces, pos)) +  | MY               (pos) -> pos, Parser.MY               ((), (spaces, pos)) +  | CONTINUE         (pos) -> pos, Parser.CONTINUE         ((), (spaces, pos)) +  | SUB              (pos) -> pos, Parser.SUB              ((), (spaces, pos)) +  | LOCAL            (pos) -> pos, Parser.LOCAL            ((), (spaces, pos)) +  | USE              (pos) -> pos, Parser.USE              ((), (spaces, pos)) +  | PACKAGE          (pos) -> pos, Parser.PACKAGE          ((), (spaces, pos)) +  | BEGIN            (pos) -> pos, Parser.BEGIN            ((), (spaces, pos)) +  | END              (pos) -> pos, Parser.END              ((), (spaces, pos)) +  | AT               (pos) -> pos, Parser.AT               ((), (spaces, pos)) +  | DOLLAR           (pos) -> pos, Parser.DOLLAR           ((), (spaces, pos)) +  | PERCENT          (pos) -> pos, Parser.PERCENT          ((), (spaces, pos)) +  | AMPERSAND        (pos) -> pos, Parser.AMPERSAND        ((), (spaces, pos)) +  | STAR             (pos) -> pos, Parser.STAR             ((), (spaces, pos)) +  | ARRAYLEN         (pos) -> pos, Parser.ARRAYLEN         ((), (spaces, pos)) +  | SEMI_COLON       (pos) -> pos, Parser.SEMI_COLON       ((), (spaces, pos)) +  | PKG_SCOPE        (pos) -> pos, Parser.PKG_SCOPE        ((), (spaces, pos)) +  | PAREN            (pos) -> pos, Parser.PAREN            ((), (spaces, pos)) +  | PAREN_END        (pos) -> pos, Parser.PAREN_END        ((), (spaces, pos)) +  | BRACKET          (pos) -> pos, Parser.BRACKET          ((), (spaces, pos)) +  | BRACKET_END      (pos) -> pos, Parser.BRACKET_END      ((), (spaces, pos)) +  | BRACKET_HASHREF  (pos) -> pos, Parser.BRACKET_HASHREF  ((), (spaces, pos)) +  | ARRAYREF         (pos) -> pos, Parser.ARRAYREF         ((), (spaces, pos)) +  | ARRAYREF_END     (pos) -> pos, Parser.ARRAYREF_END     ((), (spaces, pos)) +  | ARROW            (pos) -> pos, Parser.ARROW            ((), (spaces, pos)) +  | INCR             (pos) -> pos, Parser.INCR             ((), (spaces, pos)) +  | DECR             (pos) -> pos, Parser.DECR             ((), (spaces, pos)) +  | POWER            (pos) -> pos, Parser.POWER            ((), (spaces, pos)) +  | TIGHT_NOT        (pos) -> pos, Parser.TIGHT_NOT        ((), (spaces, pos)) +  | BIT_NEG          (pos) -> pos, Parser.BIT_NEG          ((), (spaces, pos)) +  | REF              (pos) -> pos, Parser.REF              ((), (spaces, pos)) +  | PATTERN_MATCH    (pos) -> pos, Parser.PATTERN_MATCH    ((), (spaces, pos)) +  | PATTERN_MATCH_NOT(pos) -> pos, Parser.PATTERN_MATCH_NOT((), (spaces, pos)) +  | LT               (pos) -> pos, Parser.LT               ((), (spaces, pos)) +  | GT               (pos) -> pos, Parser.GT               ((), (spaces, pos)) +  | BIT_AND          (pos) -> pos, Parser.BIT_AND          ((), (spaces, pos)) +  | BIT_OR           (pos) -> pos, Parser.BIT_OR           ((), (spaces, pos)) +  | BIT_XOR          (pos) -> pos, Parser.BIT_XOR          ((), (spaces, pos)) +  | AND_TIGHT        (pos) -> pos, Parser.AND_TIGHT        ((), (spaces, pos)) +  | OR_TIGHT         (pos) -> pos, Parser.OR_TIGHT         ((), (spaces, pos)) +  | QUESTION_MARK    (pos) -> pos, Parser.QUESTION_MARK    ((), (spaces, pos)) +  | COLON            (pos) -> pos, Parser.COLON            ((), (spaces, pos)) +  | COMMA            (pos) -> pos, Parser.COMMA            ((), (spaces, pos)) +  | RIGHT_ARROW      (pos) -> pos, Parser.RIGHT_ARROW      ((), (spaces, pos)) +  | NOT              (pos) -> pos, Parser.NOT              ((), (spaces, pos)) +  | AND              (pos) -> pos, Parser.AND              ((), (spaces, pos)) +  | OR               (pos) -> pos, Parser.OR               ((), (spaces, pos)) +  | XOR              (pos) -> pos, Parser.XOR              ((), (spaces, pos))    | SPACE _ | CR -> internal_error "raw_token_to_token" -let rec lexbuf2list t lexbuf = -  let rec f () = -    match t lexbuf with -    | Parser.EOF _ -> [] -    | e -> e :: f() -  in -  let l = f() in -  l +and raw_token_to_token spaces raw_token =  +  let _, token = raw_token_to_pos_and_token spaces raw_token in +  token + +and raw_interpolated_string_to_tokens l = +  List.map (fun (s, rtok) -> s, concat_spaces Space_0 rtok) l + +and concat_spaces spaces = function +  | CR :: l -> concat_spaces Space_cr l +  | SPACE n :: l -> +      let spaces' =  +	match spaces with +	| Space_cr -> Space_cr +	| Space_0 -> if n = 1 then Space_1 else Space_n +	| _ -> Space_n +      in +      concat_spaces spaces' l +  | [] -> [] +  | token :: l -> raw_token_to_pos_and_token spaces token :: concat_spaces Space_0 l + +let rec lexbuf2list accu t lexbuf = +  match t lexbuf with +  | EOF pos -> List.rev (EOF pos :: accu) +  | e -> lexbuf2list (e :: accu) t lexbuf + +let get_token token lexbuf =  +  let tokens = lexbuf2list [] token lexbuf in +  let tokens = concat_bareword_paren tokens in +  let tokens = concat_spaces Space_0 tokens in +  tokens + +let next_rule = Stack.create() -let next_rule = ref None -let bpos = -1,-1  let pos lexbuf = lexeme_start lexbuf, lexeme_end lexbuf  let pos2sfull_with start end_ = Info.pos2sfull (!current_file, start, end_)  let pos2sfull lexbuf = pos2sfull_with (lexeme_start lexbuf) (lexeme_end lexbuf) @@ -197,37 +205,72 @@ let add_a_new_line raw_pos =     lpush current_file_lines_starts raw_pos  let here_docs = Queue.create() +let raw_here_docs = Queue.create()  let current_here_doc_mark = ref "" -let here_doc_next_line mark interpolate = +let here_doc_next_line mark = +  let here_doc_ref = ref([], bpos) in +  Queue.push (mark, here_doc_ref) here_docs ; +  here_doc_ref +let raw_here_doc_next_line mark =    let here_doc_ref = ref("", bpos) in -  Queue.push (interpolate, mark, here_doc_ref) here_docs ; +  Queue.push (mark, here_doc_ref) raw_here_docs ;    here_doc_ref  let delimit_char = ref '/'  let not_ok_for_match = ref (-1)  let string_nestness = ref 0 -let building_current_string = ref "" +let building_current_interpolated_string = Stack.create() +let building_current_string = Stack.create()  let current_string_start_pos = ref 0  let current_string_start_line = ref 0 -let die lexbuf err = failwith (pos2sfull_with !current_string_start_pos (lexeme_end lexbuf) ^ err) +let warn lexbuf err = prerr_endline (pos2sfull_with (lexeme_start lexbuf) (lexeme_end lexbuf) ^ err) +let die lexbuf err = failwith (pos2sfull_with (lexeme_start lexbuf) (lexeme_end lexbuf) ^ err) +let die_in_string lexbuf err = failwith (pos2sfull_with !current_string_start_pos (lexeme_end lexbuf) ^ err) + +let raw_ins t lexbuf =  +  Stack.push (ref "") building_current_string;  +  current_string_start_pos := lexeme_start lexbuf; +  t lexbuf ; +  !(Stack.pop building_current_string), (!current_string_start_pos, lexeme_end lexbuf) +let raw_ins_to_string t lexbuf = +  let s, pos = raw_ins t lexbuf in +  not_ok_for_match := lexeme_end lexbuf;  +  RAW_STRING(s, pos) + +let next_interpolated toks = +  let r = Stack.top building_current_string in +  Queue.push (!r, toks) (Stack.top building_current_interpolated_string) ; +  r := "" -let ins t lexbuf =  -  building_current_string := "";  +let ins t lexbuf = +  Stack.push (Queue.create()) building_current_interpolated_string ; +  Stack.push (ref "") building_current_string;     current_string_start_pos := lexeme_start lexbuf;    t lexbuf ; -  !building_current_string, (!current_string_start_pos, lexeme_end lexbuf) +  next_interpolated [] ; +  let _ = Stack.pop building_current_string in +  queue2list (Stack.pop building_current_interpolated_string), (!current_string_start_pos, lexeme_end lexbuf)  let ins_to_string t lexbuf =    let s, pos = ins t lexbuf in    not_ok_for_match := lexeme_end lexbuf;     STRING(s, pos)  let next_s s t lexbuf = -  building_current_string := !building_current_string ^ s ; +  let r = Stack.top building_current_string in r := !r ^ s ;    t lexbuf  let next t lexbuf = next_s (lexeme lexbuf) t lexbuf +let string_interpolate token pre lexbuf = +   let s = lexeme lexbuf in +   let local_lexbuf = Lexing.from_string (pre ^ s ^ " ") in (* add a space to help tokenizing "xxx$$" *) +   local_lexbuf.lex_abs_pos <- lexeme_start lexbuf ; +   let l = lexbuf2list [] token local_lexbuf in +   let l = concat_bareword_paren l in +   next_interpolated l;  +   (Stack.pop next_rule) lexbuf +  let ident_type_from_char fq name lexbuf c =     not_ok_for_match := lexeme_end lexbuf;     match c with @@ -292,9 +335,14 @@ rule token = parse  | '\n' {       add_a_new_line(lexeme_end lexbuf);      (try -      let (interpolate, mark, r) = Queue.pop here_docs in +      let (mark, r) = Queue.pop here_docs in +      current_here_doc_mark := mark ; +      r := ins here_doc lexbuf +    with Queue.Empty ->  +     try +      let (mark, r) = Queue.pop raw_here_docs in        current_here_doc_mark := mark ; -      r := ins (if interpolate then here_doc else raw_here_doc) lexbuf +      r := raw_ins raw_here_doc lexbuf      with Queue.Empty -> ());      CR    } @@ -357,7 +405,7 @@ rule token = parse  | "END"      { END(pos lexbuf) }  | "print"    { PRINT(lexeme lexbuf, pos lexbuf) }  | "new"      { NEW(pos lexbuf) } -| "format"   { let _ = here_doc_next_line "." false in FORMAT(pos lexbuf) } +| "format"   { let _ = raw_here_doc_next_line "." in FORMAT(pos lexbuf) }  | "defined"  { ONE_SCALAR_PARA(lexeme lexbuf, pos lexbuf) }  | "split" @@ -408,7 +456,7 @@ rule token = parse        delimit_char := '/' ;        current_string_start_line := !current_file_current_line;        let s, pos = ins delimited_string lexbuf in -      let opts, _ = ins pattern_options lexbuf in +      let opts, _ = raw_ins pattern_options lexbuf in        check_multi_line_delimited_string (Some opts) pos ;        PATTERN(s, opts, pos)      )  @@ -420,7 +468,7 @@ rule token = parse        putback lexbuf 1 ;        delimit_char := '/' ;        let s, pos = ins delimited_string lexbuf in -      let opts, _ = ins pattern_options lexbuf in +      let opts, _ = raw_ins pattern_options lexbuf in        PATTERN(s, opts, pos)      )     } @@ -429,7 +477,7 @@ rule token = parse    delimit_char := lexeme_char lexbuf 1 ;    current_string_start_line := !current_file_current_line;    let s, pos = ins delimited_string lexbuf in -  let opts, _ = ins pattern_options lexbuf in +  let opts, _ = raw_ins pattern_options lexbuf in    check_multi_line_delimited_string (Some opts) pos ;    PATTERN(s, opts, pos)  } @@ -438,7 +486,7 @@ rule token = parse    delimit_char := lexeme_char lexbuf 2 ;    current_string_start_line := !current_file_current_line;    let s, pos = ins delimited_string lexbuf in -  let opts, _ = ins pattern_options lexbuf in +  let opts, _ = raw_ins pattern_options lexbuf in    check_multi_line_delimited_string (Some opts) pos ;    PATTERN(s, opts, pos)  } @@ -448,7 +496,7 @@ rule token = parse    current_string_start_line := !current_file_current_line;    let s1, (start, _) = ins delimited_string lexbuf in     let s2, (_, end_)  = ins delimited_string lexbuf in  -  let opts, _ = ins pattern_options lexbuf in +  let opts, _ = raw_ins pattern_options lexbuf in    let pos = start, end_ in    check_multi_line_delimited_string (Some opts) pos ;    PATTERN_SUBST(s1, s2, opts, pos) @@ -459,7 +507,7 @@ rule token = parse    current_string_start_line := !current_file_current_line;    let s1, (start, _) = ins delimited_string lexbuf in     let s2, (_, end_)  = ins delimited_string lexbuf in  -  let opts, _ = ins pattern_options lexbuf in +  let opts, _ = raw_ins pattern_options lexbuf in    let pos = start, end_ in    check_multi_line_delimited_string None pos ;    PATTERN_SUBST(s1, s2, opts, pos) @@ -467,11 +515,11 @@ rule token = parse  | "<<" ident {       not_ok_for_match := lexeme_end lexbuf;  -    HERE_DOC(here_doc_next_line (skip_n_char 2 (lexeme lexbuf)) true, pos lexbuf) +    HERE_DOC(here_doc_next_line (skip_n_char 2 (lexeme lexbuf)), pos lexbuf)    }  | "<<'" ident "'" {       not_ok_for_match := lexeme_end lexbuf;  -    HERE_DOC(here_doc_next_line (skip_n_char_ 3 1 (lexeme lexbuf)) false, pos lexbuf) +    RAW_HERE_DOC(raw_here_doc_next_line (skip_n_char_ 3 1 (lexeme lexbuf)), pos lexbuf)    }  | "<<" ' '+ "'"  | "<<" ' '+ ident {  @@ -543,32 +591,37 @@ rule token = parse  	  COMMAND_STRING(s, pos) }  | "q("  { ins_to_string qstring lexbuf }  | "qq(" { ins_to_string qqstring lexbuf } -| "qw(" { let s, pos = ins qstring lexbuf in QUOTEWORDS(s, pos) } +| "qw(" { let s, pos = raw_ins qstring lexbuf in QUOTEWORDS(s, pos) }  | eof   { EOF(pos lexbuf) }  | _ { failwith (Printf.sprintf "%serror tokenizing <<%s>>" (pos2sfull lexbuf) (lexeme lexbuf)) }  and string = parse -  '"' { () } -| '\\' { next_rule := Some string ; string_escape lexbuf } +| '"' { () } +| '\\' { Stack.push string next_rule ; string_escape lexbuf } +| '$'  { Stack.push string next_rule ; string_interpolate_scalar lexbuf } +| '@'  { Stack.push string next_rule ; string_interpolate_array lexbuf }  | '\n' {       add_a_new_line(lexeme_end lexbuf);      next string lexbuf    } -| [^ '\n' '\\' '"']+ { next string lexbuf } -| eof { die lexbuf "Unterminated_string" } + +| [^ '\n' '\\' '"' '$' '@']+ { next string lexbuf } +| eof { die_in_string lexbuf "Unterminated_string" }  and delimited_string = parse -| '\\' { next_rule := Some delimited_string ; string_escape lexbuf } +| '\\' { Stack.push delimited_string next_rule ; string_escape lexbuf } +| '$'  { Stack.push delimited_string next_rule ; delimited_string_interpolate_scalar lexbuf } +| '@'  { Stack.push delimited_string next_rule ; string_interpolate_array lexbuf }  | '\n' {       add_a_new_line(lexeme_end lexbuf);      next delimited_string lexbuf    } -| eof { die lexbuf "Unterminated_delimited_string" } -| [ ^ '\\' '\n' ] { if lexeme_char lexbuf 0 <> !delimit_char then next delimited_string lexbuf } +| eof { die_in_string lexbuf "Unterminated_delimited_string" } +| [ ^ '\n' '\\' '$' '@'] { if lexeme_char lexbuf 0 <> !delimit_char then next delimited_string lexbuf }  and rawstring = parse -  ''' { () } +| ''' { () }  | '\n' {       add_a_new_line(lexeme_end lexbuf);      next rawstring lexbuf @@ -576,23 +629,25 @@ and rawstring = parse  | '\\' { next rawstring lexbuf }  | "\\'" { next_s "'" rawstring lexbuf }  | [^ '\n' ''' '\\']+ { next rawstring lexbuf } -| eof { die lexbuf "Unterminated_rawstring" } +| eof { die_in_string lexbuf "Unterminated_rawstring" }  and qqstring = parse -  ')' {  +| ')' {       if !string_nestness <> 0 then (decr string_nestness; next qqstring lexbuf)    }  | '(' {      incr string_nestness;      next qqstring lexbuf    } -| '\\' { next_rule := Some qqstring ; string_escape lexbuf } +| '\\' { Stack.push qqstring next_rule ; string_escape lexbuf } +| '$'  { Stack.push qqstring next_rule ; string_interpolate_scalar lexbuf } +| '@'  { Stack.push qqstring next_rule ; string_interpolate_array lexbuf }  | '\n' {       add_a_new_line(lexeme_end lexbuf);      next qqstring lexbuf    } -| [^ '\n' '(' ')' '\\']+ { next qqstring lexbuf } -| eof { die lexbuf "Unterminated_qqstring" } +| [^ '\n' '(' ')' '\\' '$' '@']+ { next qqstring lexbuf } +| eof { die_in_string lexbuf "Unterminated_qqstring" }  and qstring = parse  | ')' {  @@ -607,11 +662,13 @@ and qstring = parse      next qstring lexbuf    }  | [^ '\n' '(' ')']+ { next qstring lexbuf } -| eof { die lexbuf "Unterminated_qstring" } +| eof { die_in_string lexbuf "Unterminated_qstring" }  and here_doc = parse -| '\\' { next_rule := Some here_doc ; string_escape lexbuf } -| [ ^ '\n' '\\' ]* { +| '\\' { Stack.push here_doc next_rule ; string_escape lexbuf } +| '$'  { Stack.push here_doc next_rule ; string_interpolate_scalar lexbuf } +| '@'  { Stack.push here_doc next_rule ; string_interpolate_array lexbuf } +| [ ^ '\n' '\\' '$' '@' ]* {      let s = lexeme lexbuf in      if chomps s <> !current_here_doc_mark      then next_s s here_doc lexbuf  @@ -621,7 +678,7 @@ and here_doc = parse      add_a_new_line(lexeme_end lexbuf);      next here_doc lexbuf    } -| eof { die lexbuf "Unterminated_here_doc" } +| eof { die_in_string lexbuf "Unterminated_here_doc" }  and raw_here_doc = parse  | [ ^ '\n' ]* { @@ -634,22 +691,72 @@ and raw_here_doc = parse      add_a_new_line(lexeme_end lexbuf);      next raw_here_doc lexbuf    } -| eof { die lexbuf "Unterminated_raw_here_doc" } +| eof { die_in_string lexbuf "Unterminated_raw_here_doc" }  and string_escape = parse -| '0' { next_s "\000" (some !next_rule) lexbuf } -| '"' { next_s "\"" (some !next_rule) lexbuf } -| ''' { next_s "'"  (some !next_rule) lexbuf } -| 'n' { next_s "\n" (some !next_rule) lexbuf } -| 't' { next_s "\t" (some !next_rule) lexbuf } +| '0' { next_s "\000" (Stack.pop next_rule) lexbuf } +| '"' { next_s "\"" (Stack.pop next_rule) lexbuf } +| ''' { next_s "'"  (Stack.pop next_rule) lexbuf } +| 'n' { next_s "\n" (Stack.pop next_rule) lexbuf } +| 't' { next_s "\t" (Stack.pop next_rule) lexbuf }  | 'x' _ _ {     try      let s = String.make 1 (Char.chr (int_of_string ("0" ^ lexeme lexbuf))) in -    next_s s (some !next_rule) lexbuf  -  with Failure("int_of_string") -> die lexbuf ("Bad_hex_in_string \"" ^ lexeme lexbuf ^ "\"") +    next_s s (Stack.pop next_rule) lexbuf  +  with Failure("int_of_string") -> die_in_string lexbuf ("Bad_hex_in_string \"" ^ lexeme lexbuf ^ "\"") +  } +| _ { next_s ("\\" ^ lexeme lexbuf) (Stack.pop next_rule) lexbuf } + + +and string_interpolate_scalar = parse +| '$' ident +| ['0'-'9'] +| '{' [^ '{' '}']* '}' +| (ident | (ident? ("::" ident)+)) "->"? (('{' [^ '{' '}' '\n']* '}') | ('[' [^ '[' ']' '\n']* ']'))* +| [^ '{' '}' ' ' '\n' '"'] { (* eg: $! $$ *) +      string_interpolate token "$" lexbuf +  } + +| "{" +| ident "->"? '{' +| eof { next_s "$" (Stack.pop next_rule) lexbuf } +| _ { warn lexbuf (Printf.sprintf "weird \"%s\" in string" (lexeme lexbuf)); next_s ("$" ^ lexeme lexbuf) (Stack.pop next_rule) lexbuf } + +and delimited_string_interpolate_scalar = parse (* needed for delimited string like m!foo$! where $! should not be taken as is... *) +| '$' ident +| ['0'-'9'] +| '{' [^ '{' '}']* '}' +| (ident | (ident? ("::" ident)+)) "->"? ('{' [^ '{' '}' '\n']* '}')* +| (ident | (ident? ("::" ident)+)) "->"? (('{' [^ '{' '}' '\n']* '}') | ('[' ('$' ident | ['0'-'9']+) ']'))* +  { +      string_interpolate token "$" lexbuf    } -| _ { next_s ("\\" ^ lexeme lexbuf) (some !next_rule) lexbuf } + +| (ident | (ident? ("::" ident)+)) "->"? (('{' [^ '{' '}' '\n']* '}') | ('[' ['$' '0'-'9'] [^ '[' ']' '\n']* ']'))* +  { +   die lexbuf (Printf.sprintf "I really can't handle this, [xxx] can be indexing or not based on stellar position :-(") +  } + + +| "{" +| ident "->"? '{' +| eof { next_s "$" (Stack.pop next_rule) lexbuf } +| _ {  +    let c = lexeme_char lexbuf 0 in +    if c <> !delimit_char && c <> '|' && c<>')' && c<>'/' then warn lexbuf (Printf.sprintf "weird \"%s\" in string" (lexeme lexbuf));  +    putback lexbuf 1; +    (Stack.pop next_rule) lexbuf  +  } + +and string_interpolate_array = parse +| '$' ident +| '{' [^ '{' '}']* '}' +| (ident | (ident? ("::" ident)+)) { string_interpolate token "@" lexbuf } + +| [ '*' '<' '>' ']' '.' '('] { next_s ("@" ^ lexeme lexbuf) (Stack.pop next_rule) lexbuf } +| eof { next_s "$" (Stack.pop next_rule) lexbuf } +| _ { warn lexbuf (Printf.sprintf "weird \"%s\" in string" (lexeme lexbuf)); next_s ("$" ^ lexeme lexbuf) (Stack.pop next_rule) lexbuf }  and pattern_options = parse  | [ 'g' 'i' 'm' 'o' 's' 'x' 'e' 'd' ] { next pattern_options lexbuf } @@ -661,7 +768,7 @@ and pod_command = parse    let command = String.sub s 0 (try String.index s ' ' with Not_found -> String.length s) in    match command with    | "cut" -> -    if !building_current_string = "" then +    if !(Stack.top building_current_string) = "" then        failwith(pos2sfull lexbuf ^ "found POD command \"=cut\" but it is not a POD block")    | "head1" | "head2" | "head3" | "head4" | "over" | "item" | "back" | "pod" | "begin" | "end" | "for" ->        next pod lexbuf diff --git a/perl_checker.src/parser.mly b/perl_checker.src/parser.mly index 8793b9e..05bdfe4 100644 --- a/perl_checker.src/parser.mly +++ b/perl_checker.src/parser.mly @@ -4,17 +4,22 @@    open Parser_helper    let parse_error msg = die_rule msg - +  let prog_ref = ref None +  let to_String e = Parser_helper.to_String (some !prog_ref) e +  let from_PATTERN e = Parser_helper.from_PATTERN (some !prog_ref) e +  let from_PATTERN_SUBST e = Parser_helper.from_PATTERN_SUBST (some !prog_ref) e  %}  %token <unit   * (Types.spaces * Types.raw_pos)> EOF -%token <string * (Types.spaces * Types.raw_pos)> NUM STRING BAREWORD BAREWORD_PAREN REVISION COMMENT POD LABEL PRINT_TO_STAR PRINT_TO_SCALAR ONE_SCALAR_PARA -%token <string * (Types.spaces * Types.raw_pos)> COMMAND_STRING QUOTEWORDS COMPACT_HASH_SUBSCRIPT +%token <string * (Types.spaces * Types.raw_pos)> NUM RAW_STRING BAREWORD BAREWORD_PAREN REVISION COMMENT POD LABEL PRINT_TO_STAR PRINT_TO_SCALAR ONE_SCALAR_PARA +%token <string * (Types.spaces * Types.raw_pos)> QUOTEWORDS COMPACT_HASH_SUBSCRIPT +%token <(string * Types.raw_pos) * (Types.spaces * Types.raw_pos)> RAW_HERE_DOC +%token <(string * ((int * int) * token) list) list * (Types.spaces * Types.raw_pos)> STRING COMMAND_STRING +%token <((string * ((int * int) * token) list) list * Types.raw_pos) * (Types.spaces * Types.raw_pos)> HERE_DOC -%token <(string * Types.raw_pos) ref * (Types.spaces * Types.raw_pos)> HERE_DOC -%token <(string * string) * (Types.spaces * Types.raw_pos)> PATTERN -%token <(string * string * string) * (Types.spaces * Types.raw_pos)> PATTERN_SUBST +%token <((string * ((int * int) * token) list) list * string) * (Types.spaces * Types.raw_pos)> PATTERN +%token <((string * ((int * int) * token) list) list * (string * ((int * int) * token) list) list * string) * (Types.spaces * Types.raw_pos)> PATTERN_SUBST  %token <(string option * string) * (Types.spaces * Types.raw_pos)> SCALAR_IDENT ARRAY_IDENT HASH_IDENT FUNC_IDENT STAR_IDENT RAW_IDENT RAW_IDENT_PAREN ARRAYLEN_IDENT  %token <(string * string) * (Types.spaces * Types.raw_pos)> FUNC_DECL_WITH_PROTO @@ -84,14 +89,15 @@  %left PAREN PREC_HIGH  %left ARRAYREF BRACKET -%type <Types.fromparser list> prog -%type <(Types.priority * Types.fromparser) * (Types.spaces * Types.raw_pos)> expr +%type <Types.fromparser list> prog inside +%type <(Types.priority * Types.fromparser) * (Types.spaces * Types.raw_pos)> expr term -%start prog +%start prog inside  %%  prog: lines EOF {check_package (fst $1); fst $1} +inside: lines EOF {fst $1}  lines: /* A collection of "lines" in the program */  | {[], (Space_none, bpos)} @@ -215,6 +221,8 @@ term:  | term PATTERN_MATCH     scalar { (P_expr, Too_complex), pos_range $1 $3}  | term PATTERN_MATCH_NOT scalar { (P_expr, Too_complex), pos_range $1 $3} +| term PATTERN_MATCH     RAW_STRING {die_with_pos (sndsnd $3) "use a regexp, not a string"} +| term PATTERN_MATCH_NOT RAW_STRING {die_with_pos (sndsnd $3) "use a regexp, not a string"}  | term PATTERN_MATCH     STRING {die_with_pos (sndsnd $3) "use a regexp, not a string"}  | term PATTERN_MATCH_NOT STRING {die_with_pos (sndsnd $3) "use a regexp, not a string"} @@ -235,9 +243,10 @@ term:  | term DECR    {sp_0($2); (P_tight, Call_op("-- post", [sndfst $1])), pos_range $1 $2}  | NOT argexpr  {(P_and, Call_op("not", sndfst $2)), pos_range $1 $2} +| ONE_SCALAR_PARA RAW_STRING               {call_one_scalar_para $1 [to_Raw_string $2], pos_range $1 $2}  | ONE_SCALAR_PARA STRING                   {call_one_scalar_para $1 [to_String $2], pos_range $1 $2}  | ONE_SCALAR_PARA variable                 {call_one_scalar_para $1 [fst $2], pos_range $1 $2} -| ONE_SCALAR_PARA subscripted              {call_one_scalar_para $1 [fst $2], pos_range $1 $2} +| ONE_SCALAR_PARA restricted_subscripted   {call_one_scalar_para $1 [fst $2], pos_range $1 $2}  | ONE_SCALAR_PARA parenthesized            {call_one_scalar_para $1 (sndfst $2), pos_range $1 $2}  | ONE_SCALAR_PARA word_paren parenthesized {call_one_scalar_para $1 [Call(fst $2, sndfst $3)], pos_range $1 $3} @@ -295,10 +304,12 @@ term:  | NUM {(P_tok, Num(fst $1, get_pos $1)), snd $1}  | STRING {(P_tok, to_String $1), snd $1} -| REVISION {(P_tok, to_String $1), snd $1} +| RAW_STRING {(P_tok, to_Raw_string $1), snd $1} +| REVISION {(P_tok, to_Raw_string $1), snd $1}  | COMMAND_STRING {(P_expr, Call_op("``", [to_String $1])), snd $1} -| QUOTEWORDS {(P_tok, Call_op("qw", [to_String $1])), snd $1} -| HERE_DOC {(P_tok, String(fst!(fst $1), get_pos $1)), snd $1} +| QUOTEWORDS {(P_tok, Call_op("qw", [to_Raw_string $1])), snd $1} +| HERE_DOC {(P_tok, String([], raw_pos2pos (sndfst $1))), snd $1} +| RAW_HERE_DOC {(P_tok, Raw_string(fstfst $1, raw_pos2pos (sndfst $1))), snd $1}  | PATTERN {(P_expr, Call_op("m//", var_dollar_ :: from_PATTERN $1)), snd $1}  | PATTERN_SUBST {(P_expr, Call_op("s///", var_dollar_ :: from_PATTERN_SUBST $1)), snd $1}  | diamond {(P_expr, fst $1), snd $1} @@ -318,6 +329,13 @@ subscripted: /* Some kind of subscripted expression */  | subscripted arrayref                 {sp_0($2); Deref_with(I_array, fst $1, only_one $2), pos_range $1 $2} /* $foo->[$bar][$baz] */  | subscripted parenthesized            {sp_0($2); Deref_with(I_func , fst $1, List(sndfst $2)), pos_range $1 $2} /* $foo->{bar}(@args) */ +restricted_subscripted: /* Some kind of subscripted expression */ +| scalar bracket_subscript             {sp_0($2); Deref_with(I_hash , fst $1, fst      $2), pos_range $1 $2} /* $foo{bar} */ +| scalar arrayref                      {sp_0($2); Deref_with(I_array, fst $1, only_one $2), pos_range $1 $2} /* $array[$element] */ +| restricted_subscripted bracket_subscript        {sp_0($2); Deref_with(I_hash , fst $1, fst      $2), pos_range $1 $2} /* $foo->[bar]{baz} */ +| restricted_subscripted arrayref                 {sp_0($2); Deref_with(I_array, fst $1, only_one $2), pos_range $1 $2} /* $foo->[$bar][$baz] */ +| restricted_subscripted parenthesized            {sp_0($2); Deref_with(I_func , fst $1, List(sndfst $2)), pos_range $1 $2} /* $foo->{bar}(@args) */ +  arrayref:  | arrayref_start ARRAYREF_END {sp_0($2); fst $1, pos_range $1 $2}  | arrayref_start expr ARRAYREF_END {sp_0($3); fst $1 @ [sndfst $2], pos_range $1 $3} @@ -345,8 +363,8 @@ termdo: /* Things called with "do" */  | DO BRACKET lines BRACKET_END %prec PREC_HIGH {sp_n($2); check_block_sub $3 $4; Block(fst $3), pos_range $1 $4} /* do { code */  bracket_subscript: -| BRACKET expr BRACKET_END {sp_0($1); sp_0($2); sp_0($3); only_one_in_List $2, pos_range $1 $3} -| COMPACT_HASH_SUBSCRIPT {sp_0($1); to_String $1, snd $1} +| BRACKET expr BRACKET_END {sp_0($1); sp_same $2 $3; only_one_in_List $2, pos_range $1 $3} +| COMPACT_HASH_SUBSCRIPT {sp_0($1); to_Raw_string $1, snd $1}  variable:  | scalar   %prec PREC_HIGH {$1} @@ -378,11 +396,17 @@ word_paren:  | BAREWORD_PAREN { Ident(None, fst $1, get_pos $1), snd $1}  | RAW_IDENT_PAREN { to_Ident $1, snd $1} -arraylen: ARRAYLEN_IDENT {Deref(I_arraylen, to_Ident $1), snd $1} | ARRAYLEN  scalar {sp_0($2); Deref(I_arraylen, fst $2), snd $1} | ARRAYLEN  BRACKET lines BRACKET_END {sp_0($2); check_block_ref $3 $4; sp_same $3 $4; Deref(I_arraylen, Block(fst $3)), pos_range $1 $4} -scalar:   SCALAR_IDENT   {Deref(I_scalar  , to_Ident $1), snd $1} | DOLLAR    scalar {sp_0($2); Deref(I_scalar  , fst $2), snd $1} | DOLLAR    BRACKET lines BRACKET_END {sp_0($2); check_block_ref $3 $4; sp_same $3 $4; Deref(I_scalar  , Block(fst $3)), pos_range $1 $4} | DOLLAR BRACKET BRACKET expr BRACKET_END BRACKET_END {sp_0($2); sp_0($3); sp_p($5); sp_0($6); Deref(I_scalar, Ref(I_hash, sndfst $4)), pos_range $1 $6} -func:     FUNC_IDENT     {Deref(I_func    , to_Ident $1), snd $1} | AMPERSAND scalar {sp_0($2); Deref(I_func    , fst $2), snd $1} | AMPERSAND BRACKET lines BRACKET_END {sp_0($2); check_block_ref $3 $4; sp_same $3 $4; Deref(I_func    , Block(fst $3)), pos_range $1 $4} -array:    ARRAY_IDENT    {Deref(I_array   , to_Ident $1), snd $1} | AT        scalar {sp_0($2); Deref(I_array   , fst $2), snd $1} | AT        BRACKET lines BRACKET_END {sp_0($2); check_block_ref $3 $4; sp_same $3 $4; Deref(I_array   , Block(fst $3)), pos_range $1 $4} -hash:     HASH_IDENT     {Deref(I_hash    , to_Ident $1), snd $1} | PERCENT   scalar {sp_0($2); Deref(I_hash    , fst $2), snd $1} | PERCENT   BRACKET lines BRACKET_END {sp_0($2); check_block_ref $3 $4; sp_same $3 $4; Deref(I_hash    , Block(fst $3)), pos_range $1 $4} -star:     STAR_IDENT     {Deref(I_star    , to_Ident $1), snd $1} | STAR      scalar {sp_0($2); Deref(I_star    , fst $2), snd $1} | STAR      BRACKET lines BRACKET_END {sp_0($2); check_block_ref $3 $4; sp_same $3 $4; Deref(I_star    , Block(fst $3)), pos_range $1 $4} +arraylen: ARRAYLEN_IDENT {Deref(I_arraylen, to_Ident $1), snd $1} | ARRAYLEN  scalar {sp_0($2); Deref(I_arraylen, fst $2), snd $1} | ARRAYLEN  bracket_subscript {Deref(I_arraylen, fst $2), pos_range $1 $2} +scalar:   SCALAR_IDENT   {Deref(I_scalar  , to_Ident $1), snd $1} | DOLLAR    scalar {sp_0($2); Deref(I_scalar  , fst $2), snd $1} | DOLLAR    bracket_subscript {Deref(I_scalar  , fst $2), pos_range $1 $2} | DOLLAR BRACKET BRACKET expr BRACKET_END BRACKET_END {sp_0($2); sp_0($3); sp_p($5); sp_0($6); Deref(I_scalar, Ref(I_hash, sndfst $4)), pos_range $1 $6} +func:     FUNC_IDENT     {Deref(I_func    , to_Ident $1), snd $1} | AMPERSAND scalar {sp_0($2); Deref(I_func    , fst $2), snd $1} | AMPERSAND bracket_subscript {Deref(I_func    , fst $2), pos_range $1 $2} +array:    ARRAY_IDENT    {Deref(I_array   , to_Ident $1), snd $1} | AT        scalar {sp_0($2); Deref(I_array   , fst $2), snd $1} | AT        bracket_subscript {Deref(I_array   , fst $2), pos_range $1 $2} +hash:     HASH_IDENT     {Deref(I_hash    , to_Ident $1), snd $1} | PERCENT   scalar {sp_0($2); Deref(I_hash    , fst $2), snd $1} | PERCENT   bracket_subscript {Deref(I_hash    , fst $2), pos_range $1 $2} +star:     STAR_IDENT     {Deref(I_star    , to_Ident $1), snd $1} | STAR      scalar {sp_0($2); Deref(I_star    , fst $2), snd $1} | STAR      bracket_subscript {Deref(I_star    , fst $2), pos_range $1 $2}  expr_or_empty: {Block [], (Space_none, bpos)} | expr {sndfst $1, snd $1} + +%% + +;; +prog_ref := Some inside +;; diff --git a/perl_checker.src/parser_helper.ml b/perl_checker.src/parser_helper.ml index ebd5cfd..c97c51d 100644 --- a/perl_checker.src/parser_helper.ml +++ b/perl_checker.src/parser_helper.ml @@ -46,7 +46,7 @@ let warn         raw_pos msg = prerr_endline (msg_with_pos raw_pos msg)  let die_rule msg = die_with_pos (Parsing.symbol_start(), Parsing.symbol_end()) msg  let warn_rule msg = warn (Parsing.symbol_start(), Parsing.symbol_end()) msg -let debug msg = if false then prerr_endline msg +let debug msg = if true then prerr_endline msg  let warn_too_many_space start = warn (start, start) "you should have only one space here"  let warn_no_space	start = warn (start, start) "you should have a space here" @@ -225,7 +225,7 @@ let check_block_ref (l, (_, (_, end_)) as ter_lines) (_, (space, _) as ter_BRACK  let to_Ident ((fq, name), (_, pos)) = Ident(fq, name, raw_pos2pos pos) -let to_String (s, (_, pos)) = String(s, raw_pos2pos pos) +let to_Raw_string (s, (_, pos)) = Raw_string(s, raw_pos2pos pos)  let op prio s (_, both) = prio, (((), both), s)  let op_p prio s e = sp_p e ; op prio s e @@ -251,9 +251,6 @@ let array_ident_to_hash_ident (e, (_, pos)) =    match e with    | Deref(I_array, e) -> Deref(I_hash, e)    | _ -> die_with_pos pos "internal error (array_ident_to_hash_ident)" - -let from_PATTERN ((s, opts), (_, pos)) = [ String(s, raw_pos2pos pos) ; String(opts, raw_pos2pos pos) ] -let from_PATTERN_SUBST ((s1, s2, opts), (_, pos)) = [ String(s1, raw_pos2pos pos) ; String(s2, raw_pos2pos pos) ; String(opts, raw_pos2pos pos) ]  let to_List = function    | [e] -> e @@ -267,6 +264,7 @@ let call(e, para) =        (match para with        | [ Ident _ ] -> ()        | [ String _ ] -> () +      | [ Raw_string _ ] -> ()        | _ -> die_rule "use either \"require PACKAGE\" or \"require 'PACKAGE.pm'\"")    | Ident(None, "N", _) ->        (match para with @@ -282,3 +280,34 @@ let call_one_scalar_para (e, (_, pos)) para =      | _ -> P_add    in    pri, Call(Ident(None, e, raw_pos2pos pos), para) + +let (current_lexbuf : Lexing.lexbuf option ref) = ref None + + +let rec list2tokens l = +  let rl = ref l in +  fun lexbuf -> +    match !rl with +    | [] -> internal_error "list2tokens" +    | ((start, end_), e) :: l ->  +	lexbuf.Lexing.lex_abs_pos <- 0 ; +	lexbuf.Lexing.lex_start_pos <- start ; +	lexbuf.Lexing.lex_curr_pos <- end_ ; +	rl := l ; e + +let parse_tokens parse tokens lexbuf_opt = +  if lexbuf_opt <> None then current_lexbuf := lexbuf_opt ; +  if tokens = [] then [] else +  parse (list2tokens tokens) (some !current_lexbuf) + +let parse_interpolated parse l = List.map (fun (s, tokens) -> s, to_List(parse_tokens parse tokens None)) l + +let to_String parse (l, (_, pos)) = String(parse_interpolated parse l, raw_pos2pos pos) + +let from_PATTERN parse ((s, opts), (_, pos)) =  +  [ String(parse_interpolated parse s, raw_pos2pos pos) ;  +    Raw_string(opts, raw_pos2pos pos) ] +let from_PATTERN_SUBST parse ((s1, s2, opts), (_, pos)) =  +  [ String(parse_interpolated parse s1, raw_pos2pos pos) ;  +    String(parse_interpolated parse s2, raw_pos2pos pos) ;  +    Raw_string(opts, raw_pos2pos pos) ] diff --git a/perl_checker.src/parser_helper.mli b/perl_checker.src/parser_helper.mli index db59ee5..4655810 100644 --- a/perl_checker.src/parser_helper.mli +++ b/perl_checker.src/parser_helper.mli @@ -58,7 +58,7 @@ val check_block_ref :    'a * (Types.spaces * (int * 'b)) -> unit  val to_Ident :    (string option * string) * ('a * (int * int)) -> Types.fromparser -val to_String : string * ('a * (int * int)) -> Types.fromparser +val to_Raw_string : string * ('a * (int * int)) -> Types.fromparser  val op : 'a -> 'b -> 'c * 'd -> 'a * ((unit * 'd) * 'b)  val op_p :    'a -> @@ -74,10 +74,6 @@ val only_one_in_List :    ('a * Types.fromparser) * ('b * (int * int)) -> Types.fromparser  val array_ident_to_hash_ident :    Types.fromparser * ('a * (int * int)) -> Types.fromparser -val from_PATTERN : -  (string * string) * ('a * (int * int)) -> Types.fromparser list -val from_PATTERN_SUBST : -  (string * string * string) * ('a * (int * int)) -> Types.fromparser list  val to_List : Types.fromparser list -> Types.fromparser  val sub_declaration :    Types.fromparser * string -> Types.fromparser list -> Types.fromparser @@ -85,3 +81,24 @@ val call : Types.fromparser * Types.fromparser list -> Types.fromparser  val call_one_scalar_para :    string * ('a * (int * int)) ->    Types.fromparser list -> Types.priority * Types.fromparser +val current_lexbuf : Lexing.lexbuf option ref +val list2tokens : ((int * int) * 'a) list -> Lexing.lexbuf -> 'a +val parse_tokens : +  ((Lexing.lexbuf -> 'a) -> Lexing.lexbuf -> 'b list) -> +  ((int * int) * 'a) list -> Lexing.lexbuf option -> 'b list +val parse_interpolated : +  ((Lexing.lexbuf -> 'a) -> Lexing.lexbuf -> Types.fromparser list) -> +  ('b * ((int * int) * 'a) list) list -> ('b * Types.fromparser) list +val to_String : +  ((Lexing.lexbuf -> 'a) -> Lexing.lexbuf -> Types.fromparser list) -> +  (string * ((int * int) * 'a) list) list * ('b * (int * int)) -> +  Types.fromparser +val from_PATTERN : +  ((Lexing.lexbuf -> 'a) -> Lexing.lexbuf -> Types.fromparser list) -> +  ((string * ((int * int) * 'a) list) list * string) * ('b * (int * int)) -> +  Types.fromparser list +val from_PATTERN_SUBST : +  ((Lexing.lexbuf -> 'a) -> Lexing.lexbuf -> Types.fromparser list) -> +  ((string * ((int * int) * 'a) list) list * +   (string * ((int * int) * 'a) list) list * string) * +  ('b * (int * int)) -> Types.fromparser list diff --git a/perl_checker.src/perl_checker.ml b/perl_checker.src/perl_checker.ml index 79b3ac9..7e951a8 100644 --- a/perl_checker.src/perl_checker.ml +++ b/perl_checker.src/perl_checker.ml @@ -8,12 +8,9 @@ let _ =        let lexbuf = Lexing.from_channel (Unix.open_process_in (Printf.sprintf "expand \"%s\"" file)) in        try  	Info.start_a_new_file file ; -	if false then -	  let t = Lexer.lexbuf2list (Lexer.concat_bareword_paren (Lexer.concat_spaces Lexer.token)) lexbuf in -	  let _,_ = t, t in () -	else -	  let t = Parser.prog (Lexer.concat_bareword_paren (Lexer.concat_spaces Lexer.token)) lexbuf in -	  let _,_ = t, t in () +	let tokens = Lexer.get_token Lexer.token lexbuf in +	let t = Parser_helper.parse_tokens Parser.prog tokens (Some lexbuf) in +	let _,_ = t, t in ()        with Failure s -> (  	prerr_endline s ;  	exit 1 diff --git a/perl_checker.src/types.mli b/perl_checker.src/types.mli index 567c0c5..ceb5804 100644 --- a/perl_checker.src/types.mli +++ b/perl_checker.src/types.mli @@ -16,7 +16,8 @@ type context = I_scalar | I_hash | I_array | I_func | I_raw | I_star | I_arrayle  type fromparser =      | Ident of string option * string * pos     | Num of string * pos -   | String of string * pos +   | Raw_string of string * pos +   | String of (string * fromparser) list * pos     | Ref of context * fromparser     | Deref of context * fromparser | 
