diff options
author | Pascal Rigaux <pixel@mandriva.com> | 2002-11-12 14:07:28 +0000 |
---|---|---|
committer | Pascal Rigaux <pixel@mandriva.com> | 2002-11-12 14:07:28 +0000 |
commit | 695c5b66f3a8da169c48e7989edb4db2a894e5aa (patch) | |
tree | 8ac6e0561f382c2636ab41c0be90b11da2033f7c /perl_checker.src/lexer.mll | |
parent | 7fff947a462837c0cc7ea4cbf4da146b796ac349 (diff) | |
download | perl-MDK-Common-695c5b66f3a8da169c48e7989edb4db2a894e5aa.tar perl-MDK-Common-695c5b66f3a8da169c48e7989edb4db2a894e5aa.tar.gz perl-MDK-Common-695c5b66f3a8da169c48e7989edb4db2a894e5aa.tar.bz2 perl-MDK-Common-695c5b66f3a8da169c48e7989edb4db2a894e5aa.tar.xz perl-MDK-Common-695c5b66f3a8da169c48e7989edb4db2a894e5aa.zip |
*** empty log message ***
Diffstat (limited to 'perl_checker.src/lexer.mll')
-rw-r--r-- | perl_checker.src/lexer.mll | 427 |
1 files changed, 304 insertions, 123 deletions
diff --git a/perl_checker.src/lexer.mll b/perl_checker.src/lexer.mll index 6abbbc2..8450aa9 100644 --- a/perl_checker.src/lexer.mll +++ b/perl_checker.src/lexer.mll @@ -1,12 +1,202 @@ { (* -*- caml -*- *) -open Parser open Common +open Types open Lexing open Info +type raw_token = + | EOF of raw_pos + | SPACE of int + | CR + | NUM of (string * raw_pos) + | STRING of (string * raw_pos) + | BAREWORD of (string * raw_pos) + | REVISION of (string * raw_pos) + | COMMENT of (string * raw_pos) + | POD of (string * raw_pos) + | LABEL of (string * raw_pos) + | COMMAND_STRING of (string * raw_pos) + | PRINT_TO_STAR of (string * raw_pos) + | PRINT_TO_SCALAR of (string * raw_pos) + | QUOTEWORDS of (string * raw_pos) + | COMPACT_HASH_SUBSCRIPT of (string * raw_pos) + | HERE_DOC of ((string * raw_pos) ref * raw_pos) + | PATTERN of (string * string * raw_pos) + | PATTERN_SUBST of (string * string * string * raw_pos) + | SCALAR_IDENT of (string option * string * raw_pos) + | ARRAY_IDENT of (string option * string * raw_pos) + | HASH_IDENT of (string option * string * raw_pos) + | FUNC_IDENT of (string option * string * raw_pos) + | STAR_IDENT of (string option * string * raw_pos) + | RAW_IDENT of (string option * string * raw_pos) + | ARRAYLEN_IDENT of (string option * string * raw_pos) + | FUNC_DECL_WITH_PROTO of (string * string * raw_pos) + + | IF of raw_pos | ELSIF of raw_pos | ELSE of raw_pos | UNLESS of raw_pos | DO of raw_pos | WHILE of raw_pos | UNTIL of raw_pos | MY of raw_pos | CONTINUE of raw_pos | SUB of raw_pos + | LOCAL of raw_pos | FOR of (string * raw_pos) | USE of raw_pos | PACKAGE of raw_pos | BEGIN of raw_pos | END of raw_pos | PRINT of (string * raw_pos) + | NEW of (raw_pos) | FORMAT of (raw_pos) | AT of raw_pos | DOLLAR of raw_pos | PERCENT of raw_pos | AMPERSAND of raw_pos + | STAR of raw_pos | ARRAYLEN of raw_pos | SEMI_COLON of raw_pos | PKG_SCOPE of raw_pos | PAREN of raw_pos | PAREN_END of raw_pos | BRACKET of raw_pos + | BRACKET_END of raw_pos | BRACKET_HASHREF of raw_pos | ARRAYREF of raw_pos | ARRAYREF_END of raw_pos | ARROW of raw_pos | INCR of raw_pos | DECR of raw_pos + | POWER of raw_pos | TIGHT_NOT of raw_pos | BIT_NEG of raw_pos | REF of raw_pos | PATTERN_MATCH of raw_pos | PATTERN_MATCH_NOT of raw_pos | MULT of raw_pos + | DIVISION of raw_pos | MODULO of raw_pos | REPLICATE of raw_pos | PLUS of raw_pos | MINUS of raw_pos | CONCAT of raw_pos | BIT_SHIFT_LEFT of raw_pos + | BIT_SHIFT_RIGHT of raw_pos | LT of raw_pos | GT of raw_pos | COMPARE_OP of (string * raw_pos) | EQ_OP of (string * raw_pos) + | BIT_AND of raw_pos | BIT_OR of raw_pos | BIT_XOR of raw_pos | AND_TIGHT of raw_pos | OR_TIGHT of raw_pos | DOTDOT of raw_pos | DOTDOTDOT of raw_pos + | QUESTION_MARK of raw_pos | COLON of raw_pos | ASSIGN of (string * raw_pos) | COMMA of raw_pos | RIGHT_ARROW of raw_pos | NOT of raw_pos | AND of raw_pos | OR of raw_pos | XOR of raw_pos + +let saved_token = ref None + +let concat_bareword_paren get_token lexbuf = + let token = match !saved_token with + | None -> get_token lexbuf + | Some t -> t + in + let token, next = + match token with + | Parser.PRINT(s, both) + | Parser.BAREWORD(s, both) -> + let next_token = get_token lexbuf in + (match next_token with Parser.PAREN(_, (Space_0, _)) -> Parser.BAREWORD_PAREN(s, both) | _ -> token), Some next_token + | Parser.RAW_IDENT(ident, both) -> + let next_token = get_token lexbuf in + (match next_token with Parser.PAREN(_, (Space_0, _)) -> Parser.RAW_IDENT_PAREN(ident, both) | _ -> token), Some next_token + | _ -> token, None + in + saved_token := next ; token + +let rec concat_spaces get_token lexbuf = + let rec get_spaces spaces lexbuf = + match get_token lexbuf with + | CR -> get_spaces Space_cr lexbuf + | SPACE n -> + let spaces' = + match spaces with + | Space_cr -> Space_cr + | Space_0 -> if n = 1 then Space_1 else Space_n + | _ -> Space_n + in + get_spaces spaces' lexbuf + | token -> token, spaces + in + let token, spaces = get_spaces Space_0 lexbuf in + match token with + | NUM(s, pos) -> Parser.NUM(s, (spaces, pos)) + | STRING(s, pos) -> Parser.STRING(s, (spaces, pos)) + | BAREWORD(s, pos) -> Parser.BAREWORD(s, (spaces, pos)) + | REVISION(s, pos) -> Parser.REVISION(s, (spaces, pos)) + | COMMENT(s, pos) -> Parser.COMMENT(s, (spaces, pos)) + | POD(s, pos) -> Parser.POD(s, (spaces, pos)) + | LABEL(s, pos) -> Parser.LABEL(s, (spaces, pos)) + | COMMAND_STRING(s, pos) -> Parser.COMMAND_STRING(s, (spaces, pos)) + | PRINT(s, pos) -> Parser.PRINT(s, (spaces, pos)) + | PRINT_TO_STAR(s, pos) -> Parser.PRINT_TO_STAR(s, (spaces, pos)) + | PRINT_TO_SCALAR(s, pos) -> Parser.PRINT_TO_SCALAR(s, (spaces, pos)) + | QUOTEWORDS(s, pos) -> Parser.QUOTEWORDS(s, (spaces, pos)) + | COMPACT_HASH_SUBSCRIPT(s, pos) -> Parser.COMPACT_HASH_SUBSCRIPT(s, (spaces, pos)) + | HERE_DOC(r, pos) -> Parser.HERE_DOC(r, (spaces, pos)) + | PATTERN(s, opts, pos) -> Parser.PATTERN((s, opts), (spaces, pos)) + | PATTERN_SUBST(from, to_, opts, pos) -> Parser.PATTERN_SUBST((from, to_, opts), (spaces, pos)) + | SCALAR_IDENT(kind, name, pos) -> Parser.SCALAR_IDENT((kind, name), (spaces, pos)) + | ARRAY_IDENT(kind, name, pos) -> Parser.ARRAY_IDENT((kind, name), (spaces, pos)) + | HASH_IDENT(kind, name, pos) -> Parser.HASH_IDENT((kind, name), (spaces, pos)) + | FUNC_IDENT(kind, name, pos) -> Parser.FUNC_IDENT((kind, name), (spaces, pos)) + | STAR_IDENT(kind, name, pos) -> Parser.STAR_IDENT((kind, name), (spaces, pos)) + | RAW_IDENT(kind, name, pos) -> Parser.RAW_IDENT((kind, name), (spaces, pos)) + | ARRAYLEN_IDENT(kind, name, pos) -> Parser.ARRAYLEN_IDENT((kind, name), (spaces, pos)) + | FUNC_DECL_WITH_PROTO(name, proto, pos) -> Parser.FUNC_DECL_WITH_PROTO((name, proto), (spaces, pos)) + + | NEW(pos) -> Parser.NEW((), (spaces, pos)) + | FORMAT(pos) -> Parser.FORMAT((), (spaces, pos)) + | COMPARE_OP(s, pos) -> Parser.COMPARE_OP(s, (spaces, pos)) + | EQ_OP(s, pos) -> Parser.EQ_OP(s, (spaces, pos)) + | ASSIGN(s, pos) -> Parser.ASSIGN(s, (spaces, pos)) + | FOR(s, pos) -> Parser.FOR(s, (spaces, pos)) + + | EOF (pos) -> Parser.EOF ((), (spaces, pos)) + | IF (pos) -> Parser.IF ((), (spaces, pos)) + | ELSIF (pos) -> Parser.ELSIF ((), (spaces, pos)) + | ELSE (pos) -> Parser.ELSE ((), (spaces, pos)) + | UNLESS (pos) -> Parser.UNLESS ((), (spaces, pos)) + | DO (pos) -> Parser.DO ((), (spaces, pos)) + | WHILE (pos) -> Parser.WHILE ((), (spaces, pos)) + | UNTIL (pos) -> Parser.UNTIL ((), (spaces, pos)) + | MY (pos) -> Parser.MY ((), (spaces, pos)) + | CONTINUE (pos) -> Parser.CONTINUE ((), (spaces, pos)) + | SUB (pos) -> Parser.SUB ((), (spaces, pos)) + | LOCAL (pos) -> Parser.LOCAL ((), (spaces, pos)) + | USE (pos) -> Parser.USE ((), (spaces, pos)) + | PACKAGE (pos) -> Parser.PACKAGE ((), (spaces, pos)) + | BEGIN (pos) -> Parser.BEGIN ((), (spaces, pos)) + | END (pos) -> Parser.END ((), (spaces, pos)) + | AT (pos) -> Parser.AT ((), (spaces, pos)) + | DOLLAR (pos) -> Parser.DOLLAR ((), (spaces, pos)) + | PERCENT (pos) -> Parser.PERCENT ((), (spaces, pos)) + | AMPERSAND (pos) -> Parser.AMPERSAND ((), (spaces, pos)) + | STAR (pos) -> Parser.STAR ((), (spaces, pos)) + | ARRAYLEN (pos) -> Parser.ARRAYLEN ((), (spaces, pos)) + | SEMI_COLON (pos) -> Parser.SEMI_COLON ((), (spaces, pos)) + | PKG_SCOPE (pos) -> Parser.PKG_SCOPE ((), (spaces, pos)) + | PAREN (pos) -> Parser.PAREN ((), (spaces, pos)) + | PAREN_END (pos) -> Parser.PAREN_END ((), (spaces, pos)) + | BRACKET (pos) -> Parser.BRACKET ((), (spaces, pos)) + | BRACKET_END (pos) -> Parser.BRACKET_END ((), (spaces, pos)) + | BRACKET_HASHREF (pos) -> Parser.BRACKET_HASHREF ((), (spaces, pos)) + | ARRAYREF (pos) -> Parser.ARRAYREF ((), (spaces, pos)) + | ARRAYREF_END (pos) -> Parser.ARRAYREF_END ((), (spaces, pos)) + | ARROW (pos) -> Parser.ARROW ((), (spaces, pos)) + | INCR (pos) -> Parser.INCR ((), (spaces, pos)) + | DECR (pos) -> Parser.DECR ((), (spaces, pos)) + | POWER (pos) -> Parser.POWER ((), (spaces, pos)) + | TIGHT_NOT (pos) -> Parser.TIGHT_NOT ((), (spaces, pos)) + | BIT_NEG (pos) -> Parser.BIT_NEG ((), (spaces, pos)) + | REF (pos) -> Parser.REF ((), (spaces, pos)) + | PATTERN_MATCH (pos) -> Parser.PATTERN_MATCH ((), (spaces, pos)) + | PATTERN_MATCH_NOT(pos) -> Parser.PATTERN_MATCH_NOT((), (spaces, pos)) + | MULT (pos) -> Parser.MULT ((), (spaces, pos)) + | DIVISION (pos) -> Parser.DIVISION ((), (spaces, pos)) + | MODULO (pos) -> Parser.MODULO ((), (spaces, pos)) + | REPLICATE (pos) -> Parser.REPLICATE ((), (spaces, pos)) + | PLUS (pos) -> Parser.PLUS ((), (spaces, pos)) + | MINUS (pos) -> Parser.MINUS ((), (spaces, pos)) + | CONCAT (pos) -> Parser.CONCAT ((), (spaces, pos)) + | BIT_SHIFT_LEFT (pos) -> Parser.BIT_SHIFT_LEFT ((), (spaces, pos)) + | BIT_SHIFT_RIGHT (pos) -> Parser.BIT_SHIFT_RIGHT ((), (spaces, pos)) + | LT (pos) -> Parser.LT ((), (spaces, pos)) + | GT (pos) -> Parser.GT ((), (spaces, pos)) + | BIT_AND (pos) -> Parser.BIT_AND ((), (spaces, pos)) + | BIT_OR (pos) -> Parser.BIT_OR ((), (spaces, pos)) + | BIT_XOR (pos) -> Parser.BIT_XOR ((), (spaces, pos)) + | AND_TIGHT (pos) -> Parser.AND_TIGHT ((), (spaces, pos)) + | OR_TIGHT (pos) -> Parser.OR_TIGHT ((), (spaces, pos)) + | DOTDOT (pos) -> Parser.DOTDOT ((), (spaces, pos)) + | DOTDOTDOT (pos) -> Parser.DOTDOTDOT ((), (spaces, pos)) + | QUESTION_MARK (pos) -> Parser.QUESTION_MARK ((), (spaces, pos)) + | COLON (pos) -> Parser.COLON ((), (spaces, pos)) + | COMMA (pos) -> Parser.COMMA ((), (spaces, pos)) + | RIGHT_ARROW (pos) -> Parser.RIGHT_ARROW ((), (spaces, pos)) + | NOT (pos) -> Parser.NOT ((), (spaces, pos)) + | AND (pos) -> Parser.AND ((), (spaces, pos)) + | OR (pos) -> Parser.OR ((), (spaces, pos)) + | XOR (pos) -> Parser.XOR ((), (spaces, pos)) + + | SPACE _ | CR -> internal_error "raw_token_to_token" + +let rec lexbuf2list t lexbuf = + let rec f () = + match t lexbuf with + | Parser.EOF _ -> [] + | e -> e :: f() + in + let l = f() in + l let next_rule = ref None +let bpos = -1,-1 +let pos lexbuf = lexeme_start lexbuf, lexeme_end lexbuf +let pos2sfull_with start end_ = Info.pos2sfull (!current_file, start, end_) +let pos2sfull lexbuf = pos2sfull_with (lexeme_start lexbuf) (lexeme_end lexbuf) +let putback lexbuf nb = lexbuf.Lexing.lex_curr_pos <- lexbuf.Lexing.lex_curr_pos - nb + let add_a_new_line raw_pos = incr current_file_current_line ; lpush current_file_lines_starts raw_pos @@ -26,12 +216,13 @@ let string_nestness = ref 0 let building_current_string = ref "" let current_string_start_pos = ref 0 let current_string_start_line = ref 0 +let die lexbuf err = failwith (pos2sfull_with !current_string_start_pos (lexeme_end lexbuf) ^ err) let ins t lexbuf = building_current_string := ""; current_string_start_pos := lexeme_start lexbuf; t lexbuf ; - !building_current_string, (!current_file, !current_string_start_pos, lexeme_end lexbuf) + !building_current_string, (!current_string_start_pos, lexeme_end lexbuf) let ins_to_string t lexbuf = let s, pos = ins t lexbuf in not_ok_for_match := lexeme_end lexbuf; @@ -42,23 +233,6 @@ let next_s s t lexbuf = t lexbuf let next t lexbuf = next_s (lexeme lexbuf) t lexbuf -let pos lexbuf = !current_file, lexeme_start lexbuf, lexeme_end lexbuf -let pos2sfull_with start end_ = Info.pos2sfull (!current_file, start, end_) -let pos2sfull lexbuf = pos2sfull_with (lexeme_start lexbuf) (lexeme_end lexbuf) - -let putback lexbuf nb = lexbuf.Lexing.lex_curr_pos <- lexbuf.Lexing.lex_curr_pos - nb - -let die lexbuf err = failwith (pos2sfull_with !current_string_start_pos (lexeme_end lexbuf) ^ err) - -let rec lexbuf2list t lexbuf = - let rec f () = - match t lexbuf with - | EOF -> [] - | e -> e :: f() - in - let l = f() in - l - let ident_type_from_char fq name lexbuf c = not_ok_for_match := lexeme_end lexbuf; match c with @@ -92,7 +266,7 @@ let arraylen_fqident_from_lexbuf lexbuf = let fq, name = split_at_two_colons (skip_n_char 2 s) in ARRAYLEN_IDENT(Some fq, name, pos lexbuf) -let check_multi_line_delimited_string opts (_, start, end_) = +let check_multi_line_delimited_string opts (start, end_) = let check = match opts with | None -> true @@ -102,19 +276,18 @@ let check_multi_line_delimited_string opts (_, start, end_) = failwith (pos2sfull_with start end_ ^ "multi-line patterns are not allowed (or use /x modifier)") } -let space = [' ' '\t'] let stash = [ '$' '@' '%' '&' '*' ] let ident_start = ['a'-'z' 'A'-'Z' '_'] let ident = ident_start ['0'-'9' 'A'-'Z' 'a'-'z' '_'] * let pattern_separator = [ '/' '!' ',' '|' ] rule token = parse -| space+ { +| ' '+ { (* propagate not_ok_for_match when it was set by the previous token *) if lexeme_start lexbuf = !not_ok_for_match then not_ok_for_match := lexeme_end lexbuf; - (*SPACE(pos lexbuf) *) token lexbuf + SPACE(lexeme_end lexbuf - lexeme_start lexbuf) } -| '#' [^ '\n']* { (*COMMENT(lexeme lexbuf, pos lexbuf)*) token lexbuf } +| '#' [^ '\n']* { SPACE(1) } | "\n=" { add_a_new_line(lexeme_end lexbuf - 1); @@ -128,76 +301,85 @@ rule token = parse current_here_doc_mark := mark ; r := ins (if interpolate then here_doc else raw_here_doc) lexbuf with Queue.Empty -> ()); - token lexbuf - } -| "->" { ARROW } -| "++" { INCR } -| "--" { DECR } -| "**" { POWER } -| "!" { TIGHT_NOT } -| "~" { BIT_NEG } -| "=~" { PATTERN_MATCH } -| "!~" { PATTERN_MATCH_NOT } -| "*" { MULT } -| "%" { MODULO } -| "x" { REPLICATE } -| "+" { PLUS } -| "-" { MINUS } -| "." { CONCAT } -| "<<" { BIT_SHIFT_LEFT } -| ">>" { BIT_SHIFT_RIGHT } -| "<" { LT } -| ">" { GT } -| "<=" | ">=" | "lt" | "gt" | "le" | "ge" { COMPARE_OP(lexeme lexbuf) } -| "==" | "!=" | "<=>" | "eq" | "ne" | "cmp" { EQ_OP(lexeme lexbuf) } -| "&" { BIT_AND } -| "|" { BIT_OR } -| "^" { BIT_XOR } -| "&&" { AND_TIGHT } -| "||" { OR_TIGHT } -| ".." { DOTDOT } -| "..." { DOTDOTDOT } -| "?" { QUESTION_MARK } -| ":" { COLON } -| "::" { PKG_SCOPE } - -| "=" | "+=" | "-=" | "*=" | ".=" | "|=" | "&=" | "^=" | "||=" | "&&=" { ASSIGN(lexeme lexbuf) } - -| "," { COMMA } -| "=>" { RIGHT_ARROW } -| "not" { NOT } -| "and" { AND } -| "or" { OR } -| "xor" { XOR } - -| "if" { IF } -| "else" { ELSE } -| "elsif" { ELSIF } -| "unless" { UNLESS } -| "do" { DO } -| "while" { WHILE } -| "until" { UNTIL } -| "foreach" { FOR("foreach") } -| "for" { FOR("for") } -| "my" { MY } -| "local" { LOCAL } -| "continue" { CONTINUE } -| "sub" { SUB } -| "package" { PACKAGE } -| "use" { USE } -| "BEGIN" { BEGIN } -| "END" { END } -| "print" { PRINT(pos lexbuf) } + CR + } +| "->" { ARROW(pos lexbuf) } +| "++" { INCR(pos lexbuf) } +| "--" { DECR(pos lexbuf) } +| "**" { POWER(pos lexbuf) } +| "!" { TIGHT_NOT(pos lexbuf) } +| "~" { BIT_NEG(pos lexbuf) } +| "=~" { PATTERN_MATCH(pos lexbuf) } +| "!~" { PATTERN_MATCH_NOT(pos lexbuf) } +| "*" { MULT(pos lexbuf) } +| "%" { MODULO(pos lexbuf) } +| "x" { REPLICATE(pos lexbuf) } +| "+" { PLUS(pos lexbuf) } +| "-" { MINUS(pos lexbuf) } +| "." { CONCAT(pos lexbuf) } +| "<<" { BIT_SHIFT_LEFT(pos lexbuf) } +| ">>" { BIT_SHIFT_RIGHT(pos lexbuf) } +| "<" { LT(pos lexbuf) } +| ">" { GT(pos lexbuf) } +| "<=" | ">=" | "lt" | "gt" | "le" | "ge" { COMPARE_OP(lexeme lexbuf, pos lexbuf) } +| "==" | "!=" | "<=>" | "eq" | "ne" | "cmp" { EQ_OP(lexeme lexbuf, pos lexbuf) } +| "&" { BIT_AND(pos lexbuf) } +| "|" { BIT_OR(pos lexbuf) } +| "^" { BIT_XOR(pos lexbuf) } +| "&&" { AND_TIGHT(pos lexbuf) } +| "||" { OR_TIGHT(pos lexbuf) } +| ".." { DOTDOT(pos lexbuf) } +| "..." { DOTDOTDOT(pos lexbuf) } +| "?" { QUESTION_MARK(pos lexbuf) } +| ":" { COLON(pos lexbuf) } +| "::" { PKG_SCOPE(pos lexbuf) } + +| "=" | "+=" | "-=" | "*=" | ".=" | "|=" | "&=" | "^=" | "||=" | "&&=" { ASSIGN(lexeme lexbuf, pos lexbuf) } + +| "," { COMMA(pos lexbuf) } +| "=>" { RIGHT_ARROW(pos lexbuf) } +| "not" { NOT(pos lexbuf) } +| "and" { AND(pos lexbuf) } +| "or" { OR(pos lexbuf) } +| "xor" { XOR(pos lexbuf) } + +| "if" { IF(pos lexbuf) } +| "else" { ELSE(pos lexbuf) } +| "elsif" { ELSIF(pos lexbuf) } +| "unless" { UNLESS(pos lexbuf) } +| "do" { DO(pos lexbuf) } +| "while" { WHILE(pos lexbuf) } +| "until" { UNTIL(pos lexbuf) } +| "foreach" { FOR(lexeme lexbuf, pos lexbuf) } +| "for" { FOR(lexeme lexbuf, pos lexbuf) } +| "my" { MY(pos lexbuf) } +| "local" { LOCAL(pos lexbuf) } +| "continue" { CONTINUE(pos lexbuf) } +| "sub" { SUB(pos lexbuf) } +| "package" { PACKAGE(pos lexbuf) } +| "use" { USE(pos lexbuf) } +| "BEGIN" { BEGIN(pos lexbuf) } +| "END" { END(pos lexbuf) } +| "print" { PRINT(lexeme lexbuf, pos lexbuf) } | "new" { NEW(pos lexbuf) } | "format" { let _ = here_doc_next_line "." false in FORMAT(pos lexbuf) } | "split" | "grep" { (* ok_for_match! *) BAREWORD(lexeme lexbuf, pos lexbuf) } -| ident space* "=>" { (* needed so that (if => 1) works *) +| "print " ident ' ' { + putback lexbuf 1; + PRINT_TO_STAR(skip_n_char_ 6 1 (lexeme lexbuf), pos lexbuf); + } +| "print $" ident ' ' { + putback lexbuf 1; + PRINT_TO_SCALAR(skip_n_char_ 7 1 (lexeme lexbuf), pos lexbuf); + } + +| ident ' '* "=>" { (* needed so that (if => 1) works *) let s = lexeme lexbuf in let end_ = String.length s - 1 in - let ident_end = rindex_non_spaces_from (end_ - 2) s in + let ident_end = non_rindex_from s (end_ - 2) ' ' in putback lexbuf (end_ - ident_end); BAREWORD(String.sub s 0 (ident_end+1), pos lexbuf) } @@ -207,25 +389,25 @@ rule token = parse COMPACT_HASH_SUBSCRIPT(lexeme lexbuf, pos lexbuf) } -| '@' { AT } -| '$' { DOLLAR } -| '$' '#' { ARRAYLEN } -| '%' ['$' '{'] { putback lexbuf 1; PERCENT } -| '&' ['$' '{'] { putback lexbuf 1; AMPERSAND } -| '*' ['$' '{'] { putback lexbuf 1; if lexeme_start lexbuf = !not_ok_for_match then MULT else STAR } +| '@' { AT(pos lexbuf) } +| '$' { DOLLAR(pos lexbuf) } +| '$' '#' { ARRAYLEN(pos lexbuf) } +| '%' ['$' '{'] { putback lexbuf 1; PERCENT(pos lexbuf) } +| '&' ['$' '{'] { putback lexbuf 1; AMPERSAND(pos lexbuf) } +| '*' ['$' '{'] { putback lexbuf 1; if lexeme_start lexbuf = !not_ok_for_match then MULT(pos lexbuf) else STAR(pos lexbuf) } -| ';' { SEMI_COLON } -| '(' { PAREN } -| '{' { BRACKET } -| "+{"{ BRACKET_HASHREF } -| '[' { ARRAYREF } -| ')' { not_ok_for_match := lexeme_end lexbuf; PAREN_END } -| '}' { not_ok_for_match := lexeme_end lexbuf; BRACKET_END } -| ']' { not_ok_for_match := lexeme_end lexbuf; ARRAYREF_END } +| ';' { SEMI_COLON(pos lexbuf) } +| '(' { PAREN(pos lexbuf) } +| '{' { BRACKET(pos lexbuf) } +| "+{"{ BRACKET_HASHREF(pos lexbuf) } +| '[' { ARRAYREF(pos lexbuf) } +| ')' { not_ok_for_match := lexeme_end lexbuf; PAREN_END(pos lexbuf) } +| '}' { not_ok_for_match := lexeme_end lexbuf; BRACKET_END(pos lexbuf) } +| ']' { not_ok_for_match := lexeme_end lexbuf; ARRAYREF_END(pos lexbuf) } | "/" { - if lexeme_start lexbuf = !not_ok_for_match then DIVISION + if lexeme_start lexbuf = !not_ok_for_match then DIVISION(pos lexbuf) else ( delimit_char := '/' ; current_string_start_line := !current_file_current_line; @@ -237,7 +419,7 @@ rule token = parse } | "/=" { - if lexeme_start lexbuf = !not_ok_for_match then ASSIGN(lexeme lexbuf) + if lexeme_start lexbuf = !not_ok_for_match then ASSIGN(lexeme lexbuf, pos lexbuf) else ( putback lexbuf 1 ; delimit_char := '/' ; @@ -268,10 +450,10 @@ rule token = parse | "s" pattern_separator { delimit_char := lexeme_char lexbuf 1 ; current_string_start_line := !current_file_current_line; - let s1, (_, start, _) = ins delimited_string lexbuf in - let s2, (_, _, end_) = ins delimited_string lexbuf in + let s1, (start, _) = ins delimited_string lexbuf in + let s2, (_, end_) = ins delimited_string lexbuf in let opts, _ = ins pattern_options lexbuf in - let pos = !current_file, start, end_ in + let pos = start, end_ in check_multi_line_delimited_string (Some opts) pos ; PATTERN_SUBST(s1, s2, opts, pos) } @@ -279,43 +461,43 @@ rule token = parse | "tr" pattern_separator { delimit_char := lexeme_char lexbuf 2 ; current_string_start_line := !current_file_current_line; - let s1, (_, start, _) = ins delimited_string lexbuf in - let s2, (_, _, end_) = ins delimited_string lexbuf in + let s1, (start, _) = ins delimited_string lexbuf in + let s2, (_, end_) = ins delimited_string lexbuf in let opts, _ = ins pattern_options lexbuf in - let pos = !current_file, start, end_ in + let pos = start, end_ in check_multi_line_delimited_string None pos ; PATTERN_SUBST(s1, s2, opts, pos) } | "<<" ident { not_ok_for_match := lexeme_end lexbuf; - HERE_DOC(here_doc_next_line (skip_n_char 2 (lexeme lexbuf)) true) + HERE_DOC(here_doc_next_line (skip_n_char 2 (lexeme lexbuf)) true, pos lexbuf) } | "<<'" ident "'" { not_ok_for_match := lexeme_end lexbuf; - HERE_DOC(here_doc_next_line (skip_n_char_ 3 1 (lexeme lexbuf)) false) + HERE_DOC(here_doc_next_line (skip_n_char_ 3 1 (lexeme lexbuf)) false, pos lexbuf) } -| "<<" space+ "'" -| "<<" space+ ident { +| "<<" ' '+ "'" +| "<<" ' '+ ident { failwith (pos2sfull_with (lexeme_start lexbuf + 2) (lexeme_end lexbuf) ^ "No space allowed between \"<<\" and the marker") } -| "<<" space* '"' { +| "<<" ' '* '"' { failwith (pos2sfull_with (lexeme_start lexbuf + 2) (lexeme_end lexbuf) ^ "Don't use <<\"MARK\", use <<MARK instead") } | "\\" stash | "\\" ['0'-'9' 'A'-'Z' 'a'-'z'] -| "\\" space* '(' - { putback lexbuf 1; REF } +| "\\" ' '* '(' + { putback lexbuf 1; REF(pos lexbuf) } -| "sub" space+ ident space* '(' [ '$' '@' '\\' '&' ';' ]* ')' { +| "sub" ' '+ ident ' '* '(' [ '$' '@' '\\' '&' ';' ]* ')' { (* bloody prototypes, must be caught especially otherwise "($)" is badly tokenized *) (* and alas "($@)" is both valid as an expression and a prototype *) let s = lexeme lexbuf in - let ident_start = index_non_spaces_from 3 s in + let ident_start = non_index_from s 3 ' ' in let proto_start = String.index_from s ident_start '(' in - let ident_end = rindex_non_spaces_from (proto_start-1) s in + let ident_end = non_rindex_from s (proto_start-1) ' ' in let ident = String.sub s ident_start (ident_end - ident_start + 1) in let prototype = skip_n_char_ (proto_start + 1) 1 s in @@ -327,8 +509,8 @@ rule token = parse | stash ident? ("::" ident)+ { typed_fqident_from_lexbuf lexbuf } | stash ident -| '$' [^ '{' ' ' '\t' '\n' '$'] -| "$^" [^ '{' ' ' '\t' '\n'] { typed_ident_from_lexbuf lexbuf } +| '$' [^ '{' ' ' '\n' '$'] +| "$^" [^ '{' ' ' '\n'] { typed_ident_from_lexbuf lexbuf } | "$$" [^ 'a'-'z' 'A'-'Z' '_' '{'] { putback lexbuf 1; SCALAR_IDENT(None, "$$", pos lexbuf) } @@ -339,7 +521,7 @@ rule token = parse | ident ":" { LABEL(lexeme lexbuf, pos lexbuf) } -| '-' [ 'a'-'z' 'A'-'Z' ] [ ' ' '\t' '(' ] { putback lexbuf 1; BAREWORD(lexeme lexbuf, pos lexbuf) } +| '-' [ 'a'-'z' 'A'-'Z' ] [ ' ' '(' ] { putback lexbuf 1; BAREWORD(lexeme lexbuf, pos lexbuf) } | ['0'-'9'] ['0'-'9' '_']* ('.' ['0'-'9'] ['0'-'9' '_']*)+ | 'v' ['0'-'9'] ['0'-'9' '_']* ('.' ['0'-'9'] ['0'-'9' '_']*)* @@ -367,7 +549,7 @@ rule token = parse | "qq(" { ins_to_string qqstring lexbuf } | "qw(" { let s, pos = ins qstring lexbuf in QUOTEWORDS(s, pos) } -| eof { EOF } +| eof { EOF(pos lexbuf) } | _ { failwith (Printf.sprintf "%serror tokenizing <<%s>>" (pos2sfull lexbuf) (lexeme lexbuf)) } and string = parse @@ -480,7 +662,6 @@ and pattern_options = parse and pod_command = parse | [^ '\n' ]+ { let s = lexeme lexbuf in - if String.contains s '\t' then failwith(pos2sfull lexbuf ^ "tabulations not accepted in POD commands") else let command = String.sub s 0 (try String.index s ' ' with Not_found -> String.length s) in match command with | "cut" -> |