{ (* -*- caml -*- *) open Common open Types open Lexing open Info let bpos = -1,-1 type raw_token = | EOF of raw_pos | SPACE of int | CR | INT of (string * raw_pos) | FLOAT of (string * raw_pos) | RAW_STRING of (string * raw_pos) | STRING of (raw_interpolated_string * raw_pos) | PATTERN of (raw_interpolated_string * string * raw_pos) | QR_PATTERN of (raw_interpolated_string * string * raw_pos) | PATTERN_SUBST of (raw_interpolated_string * raw_interpolated_string * string * raw_pos) | BAREWORD of (string * raw_pos) | BAREWORD_PAREN of (string * raw_pos) | REVISION of (string * raw_pos) | PERL_CHECKER_COMMENT of (string * raw_pos) | PO_COMMENT of (string * raw_pos) | POD of (string * raw_pos) | LABEL of (string * raw_pos) | COMMAND_STRING of (raw_interpolated_string * raw_pos) | PRINT_TO_STAR of ((string * string) * raw_pos) | PRINT_TO_SCALAR of ((string * string) * raw_pos) | QUOTEWORDS of (string * raw_pos) | COMPACT_HASH_SUBSCRIPT of (string * raw_pos) | RAW_HERE_DOC of ((string * raw_pos) ref * raw_pos) | HERE_DOC of (raw_interpolated_string * raw_pos) ref * raw_pos | SCALAR_IDENT of (string option * string * raw_pos) | ARRAY_IDENT of (string option * string * raw_pos) | HASH_IDENT of (string option * string * raw_pos) | FUNC_IDENT of (string option * string * raw_pos) | STAR_IDENT of (string option * string * raw_pos) | RAW_IDENT of (string option * string * raw_pos) | RAW_IDENT_PAREN of (string option * string * raw_pos) | ARRAYLEN_IDENT of (string option * string * raw_pos) | FUNC_DECL_WITH_PROTO of (string * string * raw_pos) | IF of raw_pos | ELSIF of raw_pos | ELSE of raw_pos | UNLESS of raw_pos | DO of raw_pos | WHILE of raw_pos | UNTIL of raw_pos | MY_OUR of (string * raw_pos) | CONTINUE of raw_pos | SUB of raw_pos | LOCAL of raw_pos | FOR of (string * raw_pos) | USE of raw_pos | PACKAGE of raw_pos | BEGIN of raw_pos | END of raw_pos | PRINT of (string * raw_pos) | NEW of (raw_pos) | FORMAT of (raw_pos) | AT of raw_pos | DOLLAR of raw_pos | PERCENT of raw_pos | AMPERSAND of raw_pos | STAR of raw_pos | ARRAYLEN of raw_pos | SEMI_COLON of raw_pos | PKG_SCOPE of raw_pos | PAREN of raw_pos | PAREN_END of raw_pos | BRACKET of raw_pos | BRACKET_END of raw_pos | BRACKET_HASHREF of raw_pos | ARRAYREF of raw_pos | ARRAYREF_END of raw_pos | ARROW of raw_pos | INCR of raw_pos | DECR of raw_pos | CONCAT of raw_pos | POWER of raw_pos | TIGHT_NOT of raw_pos | BIT_NEG of raw_pos | REF of raw_pos | ONE_SCALAR_PARA of (string * raw_pos) | PATTERN_MATCH of raw_pos | PATTERN_MATCH_NOT of raw_pos | MULT of (string * raw_pos) | MULT_L_STR of raw_pos | PLUS of (string * raw_pos) | BIT_SHIFT of (string * raw_pos) | LT of raw_pos | GT of raw_pos | COMPARE_OP of (string * raw_pos) | COMPARE_OP_STR of (string * raw_pos) | EQ_OP of (string * raw_pos) | EQ_OP_STR of (string * raw_pos) | BIT_AND of raw_pos | BIT_OR of raw_pos | BIT_XOR of raw_pos | AND_TIGHT of raw_pos | OR_TIGHT of raw_pos | DOTDOT of (string * raw_pos) | QUESTION_MARK of raw_pos | COLON of raw_pos | ASSIGN of (string * raw_pos) | COMMA of raw_pos | RIGHT_ARROW of raw_pos | NOT of raw_pos | AND of raw_pos | OR of raw_pos | XOR of raw_pos and raw_interpolated_string = (string * raw_token list) list let new_any mcontext any spaces pos = { mcontext = mcontext ; any = any ; spaces = spaces ; pos = pos } let rec concat_bareword_paren accu = function | PRINT(s, pos1) :: PAREN(pos2) :: l | BAREWORD(s, pos1) :: PAREN(pos2) :: l -> concat_bareword_paren (PAREN(pos2) :: BAREWORD_PAREN(s, pos1) :: accu) l | RAW_IDENT(kind, ident, pos1) :: PAREN(pos2) :: l -> concat_bareword_paren (PAREN(pos2) :: RAW_IDENT_PAREN(kind, ident, pos1) :: accu) l | [] -> List.rev accu | e :: l -> concat_bareword_paren (e :: accu) l let rec raw_token_to_pos_and_token spaces = function | INT(s, pos) -> pos, Parser.NUM(new_any M_int s spaces pos) | FLOAT(s, pos) -> pos, Parser.NUM(new_any M_float s spaces pos) | RAW_STRING(s, pos) -> pos, Parser.RAW_STRING(new_any M_string s spaces pos) | RAW_HERE_DOC(r, pos) -> pos, Parser.RAW_HERE_DOC(new_any M_string !r spaces pos) | STRING(l, pos) -> pos, Parser.STRING(new_any M_string (raw_interpolated_string_to_tokens l) spaces pos) | COMMAND_STRING(l, pos) -> pos, Parser.COMMAND_STRING(new_any (M_mixed (M_string, M_array)) (raw_interpolated_string_to_tokens l) spaces pos) | QR_PATTERN(s, opts, pos) -> pos, Parser.QR_PATTERN(new_any M_special (raw_interpolated_string_to_tokens s, opts) spaces pos) | PATTERN(s, opts, pos) -> pos, Parser.PATTERN(new_any M_special (raw_interpolated_string_to_tokens s, opts) spaces pos) | PATTERN_SUBST(from, to_, opts, pos) -> pos, Parser.PATTERN_SUBST(new_any M_special (raw_interpolated_string_to_tokens from, raw_interpolated_string_to_tokens to_, opts) spaces pos) | HERE_DOC(l, pos) -> pos, Parser.HERE_DOC(new_any M_string (raw_interpolated_string_to_tokens (fst !l), snd !l) spaces pos) | BAREWORD(s, pos) -> pos, Parser.BAREWORD(new_any M_special s spaces pos) | BAREWORD_PAREN(s, pos) -> pos, Parser.BAREWORD_PAREN(new_any M_special s spaces pos) | REVISION(s, pos) -> pos, Parser.REVISION(new_any M_revision s spaces pos) | PERL_CHECKER_COMMENT(s, pos) -> pos, Parser.PERL_CHECKER_COMMENT(new_any M_special s spaces pos) | PO_COMMENT(s, pos) -> pos, Parser.PO_COMMENT(new_any M_special s spaces pos) | POD(s, pos) -> pos, Parser.POD(new_any M_special s spaces pos) | LABEL(s, pos) -> pos, Parser.LABEL(new_any M_special s spaces pos) | PRINT(s, pos) -> pos, Parser.PRINT(new_any M_special s spaces pos) | PRINT_TO_STAR(s, pos) -> pos, Parser.PRINT_TO_STAR(new_any M_special s spaces pos) | PRINT_TO_SCALAR(s, pos) -> pos, Parser.PRINT_TO_SCALAR(new_any M_special s spaces pos) | QUOTEWORDS(s, pos) -> pos, Parser.QUOTEWORDS(new_any M_array s spaces pos) | COMPACT_HASH_SUBSCRIPT(s, pos) -> pos, Parser.COMPACT_HASH_SUBSCRIPT(new_any M_special s spaces pos) | SCALAR_IDENT(kind, name, pos) -> pos, Parser.SCALAR_IDENT(new_any M_special (kind, name) spaces pos) | ARRAY_IDENT(kind, name, pos) -> pos, Parser.ARRAY_IDENT(new_any M_special (kind, name) spaces pos) | HASH_IDENT(kind, name, pos) -> pos, Parser.HASH_IDENT(new_any M_special (kind, name) spaces pos) | FUNC_IDENT(kind, name, pos) -> pos, Parser.FUNC_IDENT(new_any M_special (kind, name) spaces pos) | STAR_IDENT(kind, name, pos) -> pos, Parser.STAR_IDENT(new_any M_special (kind, name) spaces pos) | RAW_IDENT(kind, name, pos) -> pos, Parser.RAW_IDENT(new_any M_special (kind, name) spaces pos) | RAW_IDENT_PAREN(kind, name, pos) -> pos, Parser.RAW_IDENT_PAREN(new_any M_special (kind, name) spaces pos) | ARRAYLEN_IDENT(kind, name, pos) -> pos, Parser.ARRAYLEN_IDENT(new_any M_special (kind, name) spaces pos) | FUNC_DECL_WITH_PROTO(name, proto, pos) -> pos, Parser.FUNC_DECL_WITH_PROTO(new_any M_special (name, proto) spaces pos) | NEW(pos) -> pos, Parser.NEW(new_any M_special () spaces pos) | FORMAT(pos) -> pos, Parser.FORMAT(new_any M_special () spaces pos) | COMPARE_OP(s, pos) -> pos, Parser.COMPARE_OP(new_any M_special s spaces pos) | COMPARE_OP_STR(s, pos) -> pos, Parser.COMPARE_OP_STR(new_any M_special s spaces pos) | EQ_OP(s, pos) -> pos, Parser.EQ_OP(new_any M_special s spaces pos) | EQ_OP_STR(s, pos) -> pos, Parser.EQ_OP_STR(new_any M_special s spaces pos) | ASSIGN(s, pos) -> pos, Parser.ASSIGN(new_any M_special s spaces pos) | FOR(s, pos) -> pos, Parser.FOR(new_any M_special s spaces pos) | DOTDOT(s, pos) -> pos, Parser.DOTDOT(new_any M_special s spaces pos) | MULT(s, pos) -> pos, Parser.MULT(new_any M_special s spaces pos) | BIT_SHIFT(s, pos) -> pos, Parser.BIT_SHIFT(new_any M_special s spaces pos) | PLUS(s, pos) -> pos, Parser.PLUS(new_any M_special s spaces pos) | ONE_SCALAR_PARA(s, pos) -> pos, Parser.ONE_SCALAR_PARA(new_any M_special s spaces pos) | MY_OUR(s, pos) -> pos, Parser.MY_OUR(new_any M_special s spaces pos) | EOF (pos) -> pos, Parser.EOF (new_any M_special () spaces pos) | IF (pos) -> pos, Parser.IF (new_any M_special () spaces pos) | ELSIF (pos) -> pos, Parser.ELSIF (new_any M_special () spaces pos) | ELSE (pos) -> pos, Parser.ELSE (new_any M_special () spaces pos) | UNLESS (pos) -> pos, Parser.UNLESS (new_any M_special () spaces pos) | DO (pos) -> pos, Parser.DO (new_any M_special () spaces pos) | WHILE (pos) -> pos, Parser.WHILE (new_any M_special () spaces pos) | UNTIL (pos) -> pos, Parser.UNTIL (new_any M_special () spaces pos) | CONTINUE (pos) -> pos, Parser.CONTINUE (new_any M_special () spaces pos) | SUB (pos) -> pos, Parser.SUB (new_any M_special () spaces pos) | LOCAL (pos) -> pos, Parser.LOCAL (new_any M_special () spaces pos) | USE (pos) -> pos, Parser.USE (new_any M_special () spaces pos) | PACKAGE (pos) -> pos, Parser.PACKAGE (new_any M_special () spaces pos) | BEGIN (pos) -> pos, Parser.BEGIN (new_any M_special () spaces pos) | END (pos) -> pos, Parser.END (new_any M_special () spaces pos) | AT (pos) -> pos, Parser.AT (new_any M_special () spaces pos) | DOLLAR (pos) -> pos, Parser.DOLLAR (new_any M_special () spaces pos) | PERCENT (pos) -> pos, Parser.PERCENT (new_any M_special () spaces pos) | AMPERSAND (pos) -> pos, Parser.AMPERSAND (new_any M_special () spaces pos) | STAR (pos) -> pos, Parser.STAR (new_any M_special () spaces pos) | ARRAYLEN (pos) -> pos, Parser.ARRAYLEN (new_any M_special () spaces pos) | SEMI_COLON (pos) -> pos, Parser.SEMI_COLON (new_any M_special () spaces pos) | PKG_SCOPE (pos) -> pos, Parser.PKG_SCOPE (new_any M_special () spaces pos) | PAREN (pos) -> pos, Parser.PAREN (new_any M_special () spaces pos) | PAREN_END (pos) -> pos, Parser.PAREN_END (new_any M_special () spaces pos) | BRACKET (pos) -> pos, Parser.BRACKET (new_any M_special () spaces pos) | BRACKET_END (pos) -> pos, Parser.BRACKET_END (new_any M_special () spaces pos) | BRACKET_HASHREF (pos) -> pos, Parser.BRACKET_HASHREF (new_any M_special () spaces pos) | ARRAYREF (pos) -> pos, Parser.ARRAYREF (new_any M_special () spaces pos) | ARRAYREF_END (pos) -> pos, Parser.ARRAYREF_END (new_any M_special () spaces pos) | ARROW (pos) -> pos, Parser.ARROW (new_any M_special () spaces pos) | INCR (pos) -> pos, Parser.INCR (new_any M_special () spaces pos) | DECR (pos) -> pos, Parser.DECR (new_any M_special () spaces pos) | POWER (pos) -> pos, Parser.POWER (new_any M_special () spaces pos) | TIGHT_NOT (pos) -> pos, Parser.TIGHT_NOT (new_any M_special () spaces pos) | BIT_NEG (pos) -> pos, Parser.BIT_NEG (new_any M_special () spaces pos) | REF (pos) -> pos, Parser.REF (new_any M_special () spaces pos) | PATTERN_MATCH (pos) -> pos, Parser.PATTERN_MATCH (new_any M_special () spaces pos) | PATTERN_MATCH_NOT(pos) -> pos, Parser.PATTERN_MATCH_NOT(new_any M_special () spaces pos) | LT (pos) -> pos, Parser.LT (new_any M_special () spaces pos) | GT (pos) -> pos, Parser.GT (new_any M_special () spaces pos) | BIT_AND (pos) -> pos, Parser.BIT_AND (new_any M_special () spaces pos) | BIT_OR (pos) -> pos, Parser.BIT_OR (new_any M_special () spaces pos) | BIT_XOR (pos) -> pos, Parser.BIT_XOR (new_any M_special () spaces pos) | AND_TIGHT (pos) -> pos, Parser.AND_TIGHT (new_any M_special () spaces pos) | OR_TIGHT (pos) -> pos, Parser.OR_TIGHT (new_any M_special () spaces pos) | QUESTION_MARK (pos) -> pos, Parser.QUESTION_MARK (new_any M_special () spaces pos) | COLON (pos) -> pos, Parser.COLON (new_any M_special () spaces pos) | COMMA (pos) -> pos, Parser.COMMA (new_any M_special () spaces pos) | CONCAT (pos) -> pos, Parser.CONCAT (new_any M_special () spaces pos) | MULT_L_STR (pos) -> pos, Parser.MULT_L_STR (new_any M_special () spaces pos) | RIGHT_ARROW (pos) -> pos, Parser.RIGHT_ARROW (new_any M_special () spaces pos) | NOT (pos) -> pos, Parser.NOT (new_any M_special () spaces pos) | AND (pos) -> pos, Parser.AND (new_any M_special () spaces pos) | OR (pos) -> pos, Parser.OR (new_any M_special () spaces pos) | XOR (pos) -> pos, Parser.XOR (new_any M_special () spaces pos) | SPACE _ | CR -> internal_error "raw_token_to_token" and raw_token_to_token spaces raw_token = let _, token = raw_token_to_pos_and_token spaces raw_token in token and raw_interpolated_string_to_tokens l = List.map (fun (s, rtok) -> s, concat_spaces Space_0 rtok) l and concat_spaces spaces = function | CR :: l -> concat_spaces Space_cr l | SPACE n :: l -> let spaces' = match spaces with | Space_cr -> Space_cr | Space_0 -> if n = 1 then Space_1 else Space_n | _ -> Space_n in concat_spaces spaces' l | [] -> [] | token :: l -> raw_token_to_pos_and_token spaces token :: concat_spaces Space_0 l let rec lexbuf2list accu t lexbuf = match t lexbuf with | EOF pos -> List.rev (EOF pos :: accu) | e -> lexbuf2list (e :: accu) t lexbuf let get_token token lexbuf = let tokens = lexbuf2list [] token lexbuf in let tokens = concat_bareword_paren [] tokens in let tokens = concat_spaces Space_0 tokens in tokens let next_rule = Stack.create() let pos lexbuf = lexeme_start lexbuf, lexeme_end lexbuf let pos2sfull_with start end_ = Info.pos2sfull (!current_file, start, end_) let pos2sfull lexbuf = pos2sfull_with (lexeme_start lexbuf) (lexeme_end lexbuf) let putback lexbuf nb = lexbuf.Lexing.lex_curr_pos <- lexbuf.Lexing.lex_curr_pos - nb let add_a_new_line raw_pos = incr current_file_current_line ; lpush current_file_lines_starts raw_pos let here_docs = Queue.create() let raw_here_docs = Queue.create() let current_here_doc_mark = ref "" let here_doc_next_line mark = let here_doc_ref = ref([], bpos) in Queue.push (mark, here_doc_ref) here_docs ; here_doc_ref let raw_here_doc_next_line mark = let here_doc_ref = ref("", bpos) in Queue.push (mark, here_doc_ref) raw_here_docs ; here_doc_ref let delimit_char = ref '/' type string_escape_kinds = Double_quote | Qq | Delimited | Here_doc let string_escape_kind = ref Double_quote let not_ok_for_match = ref (-1) let string_nestness = ref 0 let building_current_interpolated_string = Stack.create() let building_current_string = Stack.create() let current_string_start_pos = ref 0 let current_string_start_line = ref 0 let warn lexbuf err = prerr_endline (pos2sfull_with (lexeme_start lexbuf) (lexeme_end lexbuf) ^ err) let die lexbuf err = failwith (pos2sfull_with (lexeme_start lexbuf) (lexeme_end lexbuf) ^ err) let die_in_string lexbuf err = failwith (pos2sfull_with !current_string_start_pos (lexeme_end lexbuf) ^ err) let warn_escape_unneeded lexbuf c = warn lexbuf ("you can replace \\" ^ c ^ " with " ^ c) let next_interpolated toks = let r = Stack.top building_current_string in Queue.push (!r, toks) (Stack.top building_current_interpolated_string) ; r := "" let raw_ins t lexbuf = Stack.push (ref "") building_current_string; current_string_start_pos := lexeme_start lexbuf; t lexbuf ; !(Stack.pop building_current_string), (!current_string_start_pos, lexeme_end lexbuf) let ins t lexbuf = Stack.push (Queue.create()) building_current_interpolated_string ; Stack.push (ref "") building_current_string; current_string_start_pos := lexeme_start lexbuf; t lexbuf ; next_interpolated [] ; let _ = Stack.pop building_current_string in queue2list (Stack.pop building_current_interpolated_string), (!current_string_start_pos, lexeme_end lexbuf) let raw_ins_to_string t lexbuf = let s, pos = raw_ins t lexbuf in not_ok_for_match := lexeme_end lexbuf; RAW_STRING(s, pos) let ins_to_string t lexbuf = let s, pos = ins t lexbuf in not_ok_for_match := lexeme_end lexbuf; STRING(s, pos) let next_s s t lexbuf = let r = Stack.top building_current_string in r := !r ^ s ; t lexbuf let next t lexbuf = next_s (lexeme lexbuf) t lexbuf let string_interpolate token pre lexbuf = let s = lexeme lexbuf in let local_lexbuf = Lexing.from_string (pre ^ s ^ " ") in (* add a space to help tokenizing "xxx$$" *) local_lexbuf.lex_abs_pos <- lexeme_start lexbuf ; let l = lexbuf2list [] token local_lexbuf in let l = concat_bareword_paren [] l in next_interpolated l; (Stack.pop next_rule) lexbuf let ident_type_from_char fq name lexbuf c = not_ok_for_match := lexeme_end lexbuf; match c with | '$' -> SCALAR_IDENT(fq, name, pos lexbuf) | '@' -> ARRAY_IDENT (fq, name, pos lexbuf) | '%' -> HASH_IDENT (fq, name, pos lexbuf) | '&' -> FUNC_IDENT (fq, name, pos lexbuf) | '*' -> STAR_IDENT (fq, name, pos lexbuf) | _ -> internal_error "ident_type_from_char" let split_at_two_colons s = let i_fq = String.rindex s ':' in String.sub s 0 (i_fq - 1), skip_n_char (i_fq + 1) s let ident_from_lexbuf lexbuf = let fq, name = split_at_two_colons (lexeme lexbuf) in RAW_IDENT(Some fq, name, pos lexbuf) let typed_ident_from_lexbuf lexbuf = let s = lexeme lexbuf in ident_type_from_char None (skip_n_char 1 s) lexbuf s.[0] let typed_fqident_from_lexbuf lexbuf = let s = lexeme lexbuf in let fq, name = split_at_two_colons (skip_n_char 1 s) in ident_type_from_char (Some fq) name lexbuf s.[0] let arraylen_ident_from_lexbuf lexbuf = not_ok_for_match := lexeme_end lexbuf; let s = lexeme lexbuf in ARRAYLEN_IDENT(None, skip_n_char 2 s, pos lexbuf) let arraylen_fqident_from_lexbuf lexbuf = let s = lexeme lexbuf in let fq, name = split_at_two_colons (skip_n_char 2 s) in ARRAYLEN_IDENT(Some fq, name, pos lexbuf) let check_multi_line_delimited_string opts (start, end_) = let check = match opts with | None -> true | Some s -> not (String.contains s 'x') in if check then if !current_file_current_line <> !current_string_start_line then failwith (pos2sfull_with start end_ ^ "multi-line patterns are not allowed (or use /x modifier)") let hex_in_string lexbuf next_rule s = let i = try int_of_string ("0x" ^ s) with Failure("int_of_string") -> die_in_string lexbuf ("Bad_hex_in_string \"" ^ lexeme lexbuf ^ "\"") in let s = if i < 256 then String.make 1 (Char.chr i) else "\\x{" ^ s ^ "}" in next_s s (Stack.pop next_rule) lexbuf } let stash = [ '$' '@' '%' '&' '*' ] let ident_start = ['a'-'z' 'A'-'Z' '_'] let ident = ident_start ['0'-'9' 'A'-'Z' 'a'-'z' '_'] * let pattern_separator = [ '/' '!' ',' '|' ] rule token = parse | [' ' '\t']+ { (* propagate not_ok_for_match when it was set by the previous token *) if lexeme_start lexbuf = !not_ok_for_match then not_ok_for_match := lexeme_end lexbuf; SPACE(lexeme_end lexbuf - lexeme_start lexbuf) } | "# perl_checker: " [^ '\n']* { PERL_CHECKER_COMMENT(skip_n_char 16 (lexeme lexbuf), pos lexbuf) } | "#-PO: " [^ '\n']* { PO_COMMENT(skip_n_char 1 (lexeme lexbuf), pos lexbuf) } | '#' [^ '\n']* { SPACE(1) } | "\n=" { add_a_new_line(lexeme_end lexbuf - 1); let _ = ins pod_command lexbuf in token lexbuf } | '\n' { add_a_new_line(lexeme_end lexbuf); (try let (mark, r) = Queue.pop here_docs in current_here_doc_mark := mark ; r := ins here_doc lexbuf with Queue.Empty -> try let (mark, r) = Queue.pop raw_here_docs in current_here_doc_mark := mark ; r := raw_ins raw_here_doc lexbuf with Queue.Empty -> ()); CR } | "->" { ARROW(pos lexbuf) } | "++" { INCR(pos lexbuf) } | "--" { DECR(pos lexbuf) } | "**" { POWER(pos lexbuf) } | "!" { TIGHT_NOT(pos lexbuf) } | "~" { BIT_NEG(pos lexbuf) } | "=~" { PATTERN_MATCH(pos lexbuf) } | "!~" { PATTERN_MATCH_NOT(pos lexbuf) } | "*" { MULT(lexeme lexbuf, pos lexbuf) } | "%" { MULT(lexeme lexbuf, pos lexbuf) } | "x" { MULT_L_STR(pos lexbuf) } | "+" { PLUS(lexeme lexbuf, pos lexbuf) } | "-" { PLUS(lexeme lexbuf, pos lexbuf) } | "." { CONCAT(pos lexbuf) } | "<<" { BIT_SHIFT(lexeme lexbuf, pos lexbuf) } | ">>" { BIT_SHIFT(lexeme lexbuf, pos lexbuf) } | "<" { LT(pos lexbuf) } | ">" { GT(pos lexbuf) } | "<=" | ">=" { COMPARE_OP(lexeme lexbuf, pos lexbuf) } | "lt" | "gt" | "le" | "ge" { COMPARE_OP_STR(lexeme lexbuf, pos lexbuf) } | "==" | "!=" | "<=>" { EQ_OP(lexeme lexbuf, pos lexbuf) } | "eq" | "ne" | "cmp" { EQ_OP_STR(lexeme lexbuf, pos lexbuf) } | "&" { BIT_AND(pos lexbuf) } | "|" { BIT_OR(pos lexbuf) } | "^" { BIT_XOR(pos lexbuf) } | "&&" { AND_TIGHT(pos lexbuf) } | "||" { OR_TIGHT(pos lexbuf) } | ".." { DOTDOT(lexeme lexbuf, pos lexbuf) } | "..." { DOTDOT(lexeme lexbuf, pos lexbuf) } | "?" { QUESTION_MARK(pos lexbuf) } | ":" { COLON(pos lexbuf) } | "::" { PKG_SCOPE(pos lexbuf) } | "=" | "+=" | "-=" | "*=" | ".=" | "|=" | "&=" | "^=" | "||=" | "&&=" { ASSIGN(lexeme lexbuf, pos lexbuf) } | "," { COMMA(pos lexbuf) } | "=>" { RIGHT_ARROW(pos lexbuf) } | "not" { NOT(pos lexbuf) } | "and" { AND(pos lexbuf) } | "or" { OR(pos lexbuf) } | "xor" { XOR(pos lexbuf) } | "if" { IF(pos lexbuf) } | "else" { ELSE(pos lexbuf) } | "elsif" { ELSIF(pos lexbuf) } | "unless" { UNLESS(pos lexbuf) } | "do" { DO(pos lexbuf) } | "while" { WHILE(pos lexbuf) } | "until" { UNTIL(pos lexbuf) } | "foreach" { FOR(lexeme lexbuf, pos lexbuf) } | "for" { FOR(lexeme lexbuf, pos lexbuf) } | "my" { MY_OUR(lexeme lexbuf, pos lexbuf) } | "our" { MY_OUR(lexeme lexbuf, pos lexbuf) } | "local" { LOCAL(pos lexbuf) } | "continue" { CONTINUE(pos lexbuf) } | "sub" { SUB(pos lexbuf) } | "package" { PACKAGE(pos lexbuf) } | "use" { USE(pos lexbuf) } | "BEGIN" { BEGIN(pos lexbuf) } | "END" { END(pos lexbuf) } | "print" { PRINT(lexeme lexbuf, pos lexbuf) } | "printf" { PRINT(lexeme lexbuf, pos lexbuf) } | "new" { NEW(pos lexbuf) } | "format" { let _ = raw_here_doc_next_line "." in FORMAT(pos lexbuf) } | "defined" | "length" | "keys" | "exists" | "ref" { ONE_SCALAR_PARA(lexeme lexbuf, pos lexbuf) } | "split" | "grep" { (* ok_for_match! *) BAREWORD(lexeme lexbuf, pos lexbuf) } | "print " ['A'-'Z'] ['A'-'Z' '0'-'9']* ['\n' ' '] { putback lexbuf 1; PRINT_TO_STAR(("print", skip_n_char 6 (lexeme lexbuf)), pos lexbuf) } | "print $" ident ['\n' ' '] { putback lexbuf 1; PRINT_TO_SCALAR(("print", skip_n_char 7 (lexeme lexbuf)), pos lexbuf); } | "printf " ['A'-'Z'] ['A'-'Z' '0'-'9']* ['\n' ' '] { putback lexbuf 1; PRINT_TO_STAR(("printf", skip_n_char 7 (lexeme lexbuf)), pos lexbuf) } | "printf $" ident ['\n' ' '] { putback lexbuf 1; PRINT_TO_SCALAR(("printf", skip_n_char 8 (lexeme lexbuf)), pos lexbuf); } | ident ' '* "=>" { (* needed so that (if => 1) works *) let s = lexeme lexbuf in let end_ = String.length s - 1 in let ident_end = non_rindex_from s (end_ - 2) ' ' in putback lexbuf (end_ - ident_end); BAREWORD(String.sub s 0 (ident_end+1), pos lexbuf) } | "{" ident "}" { (* needed so that $h{if} works *) not_ok_for_match := lexeme_end lexbuf; COMPACT_HASH_SUBSCRIPT(skip_n_char_ 1 1 (lexeme lexbuf), pos lexbuf) } | '@' { AT(pos lexbuf) } | '$' { DOLLAR(pos lexbuf) } | '$' '#' { ARRAYLEN(pos lexbuf) } | '%' ['$' '{'] { putback lexbuf 1; PERCENT(pos lexbuf) } | '&' ['$' '{'] { putback lexbuf 1; AMPERSAND(pos lexbuf) } | '*' ['$' '{'] { putback lexbuf 1; if lexeme_start lexbuf = !not_ok_for_match then MULT("*", pos lexbuf) else STAR(pos lexbuf) } | ';' { SEMI_COLON(pos lexbuf) } | '(' { PAREN(pos lexbuf) } | '{' { BRACKET(pos lexbuf) } | "+{"{ BRACKET_HASHREF(pos lexbuf) } | '[' { ARRAYREF(pos lexbuf) } | ')' { not_ok_for_match := lexeme_end lexbuf; PAREN_END(pos lexbuf) } | '}' { not_ok_for_match := lexeme_end lexbuf; BRACKET_END(pos lexbuf) } | ']' { not_ok_for_match := lexeme_end lexbuf; ARRAYREF_END(pos lexbuf) } | "/" { if lexeme_start lexbuf = !not_ok_for_match then MULT("/", pos lexbuf) else ( delimit_char := '/' ; current_string_start_line := !current_file_current_line; let s, pos = ins re_delimited_string lexbuf in let opts, _ = raw_ins pattern_options lexbuf in check_multi_line_delimited_string (Some opts) pos ; PATTERN(s, opts, pos) ) } | "/=" { if lexeme_start lexbuf = !not_ok_for_match then ASSIGN(lexeme lexbuf, pos lexbuf) else ( putback lexbuf 1 ; delimit_char := '/' ; let s, pos = ins re_delimited_string lexbuf in let opts, _ = raw_ins pattern_options lexbuf in PATTERN(s, opts, pos) ) } | "m" pattern_separator { delimit_char := lexeme_char lexbuf 1 ; current_string_start_line := !current_file_current_line; let s, pos = ins re_delimited_string lexbuf in let opts, _ = raw_ins pattern_options lexbuf in check_multi_line_delimited_string (Some opts) pos ; PATTERN(s, opts, pos) } | "qr" pattern_separator { delimit_char := lexeme_char lexbuf 2 ; current_string_start_line := !current_file_current_line; let s, pos = ins re_delimited_string lexbuf in let opts, _ = raw_ins pattern_options lexbuf in check_multi_line_delimited_string (Some opts) pos ; QR_PATTERN(s, opts, pos) } | "s" pattern_separator { delimit_char := lexeme_char lexbuf 1 ; current_string_start_line := !current_file_current_line; let s1, (start, _) = ins re_delimited_string lexbuf in let s2, (_, end_) = ins delimited_string lexbuf in let opts, _ = raw_ins pattern_options lexbuf in let pos = start, end_ in check_multi_line_delimited_string (Some opts) pos ; PATTERN_SUBST(s1, s2, opts, pos) } | "tr" pattern_separator { delimit_char := lexeme_char lexbuf 2 ; current_string_start_line := !current_file_current_line; let s1, (start, _) = ins delimited_string lexbuf in let s2, (_, end_) = ins delimited_string lexbuf in let opts, _ = raw_ins pattern_options lexbuf in let pos = start, end_ in check_multi_line_delimited_string None pos ; PATTERN_SUBST(s1, s2, opts, pos) } | "<<" ident { not_ok_for_match := lexeme_end lexbuf; HERE_DOC(here_doc_next_line (skip_n_char 2 (lexeme lexbuf)), pos lexbuf) } | "<<'" ident "'" { not_ok_for_match := lexeme_end lexbuf; RAW_HERE_DOC(raw_here_doc_next_line (skip_n_char_ 3 1 (lexeme lexbuf)), pos lexbuf) } | "<<" ' '+ "'" | "<<" ' '+ ident { failwith (pos2sfull_with (lexeme_start lexbuf + 2) (lexeme_end lexbuf) ^ "No space allowed between \"<<\" and the marker") } | "<<" ' '* '"' { failwith (pos2sfull_with (lexeme_start lexbuf + 2) (lexeme_end lexbuf) ^ "Don't use <<\"MARK\", use <>" (pos2sfull lexbuf) (lexeme lexbuf)) } and string = parse | '"' { () } | '\\' { Stack.push string next_rule ; string_escape_kind := Double_quote; string_escape lexbuf } | '$' { Stack.push string next_rule ; string_interpolate_scalar lexbuf } | '@' { Stack.push string next_rule ; string_interpolate_array lexbuf } | '\n' { add_a_new_line(lexeme_end lexbuf); next string lexbuf } | [^ '\n' '\\' '"' '$' '@']+ { next string lexbuf } | eof { die_in_string lexbuf "Unterminated_string" } and delimited_string = parse | '\\' { Stack.push delimited_string next_rule ; string_escape_kind := Delimited; string_escape lexbuf } | '$' { Stack.push delimited_string next_rule ; delimited_string_interpolate_scalar lexbuf } | '@' { Stack.push delimited_string next_rule ; delimited_string_interpolate_array lexbuf } | '\n' { add_a_new_line(lexeme_end lexbuf); next delimited_string lexbuf } | eof { die_in_string lexbuf "Unterminated_delimited_string" } | [ ^ '\n' '\\' '$' '@'] { if lexeme_char lexbuf 0 <> !delimit_char then next delimited_string lexbuf } and re_delimited_string = parse | '\\' { Stack.push re_delimited_string next_rule ; re_string_escape lexbuf } | '$' { Stack.push re_delimited_string next_rule ; delimited_string_interpolate_scalar lexbuf } | '@' { Stack.push re_delimited_string next_rule ; delimited_string_interpolate_array lexbuf } | '\n' { add_a_new_line(lexeme_end lexbuf); next re_delimited_string lexbuf } | eof { die_in_string lexbuf "Unterminated_delimited_string" } | [ ^ '\n' '\\' '$' '@'] { if lexeme_char lexbuf 0 <> !delimit_char then next re_delimited_string lexbuf } and rawstring = parse | ''' { () } | '\n' { add_a_new_line(lexeme_end lexbuf); next rawstring lexbuf } | '\\' { next rawstring lexbuf } | "\\'" { next_s "'" rawstring lexbuf } | [^ '\n' ''' '\\']+ { next rawstring lexbuf } | eof { die_in_string lexbuf "Unterminated_rawstring" } and qqstring = parse | ')' { if !string_nestness <> 0 then (decr string_nestness; next qqstring lexbuf) } | '(' { incr string_nestness; next qqstring lexbuf } | '\\' { Stack.push qqstring next_rule ; string_escape_kind := Qq; string_escape lexbuf } | '$' { Stack.push qqstring next_rule ; string_interpolate_scalar lexbuf } | '@' { Stack.push qqstring next_rule ; string_interpolate_array lexbuf } | '\n' { add_a_new_line(lexeme_end lexbuf); next qqstring lexbuf } | [^ '\n' '(' ')' '\\' '$' '@']+ { next qqstring lexbuf } | eof { die_in_string lexbuf "Unterminated_qqstring" } and qstring = parse | ')' { if !string_nestness <> 0 then (decr string_nestness ; next qstring lexbuf) } | '(' { incr string_nestness; next qstring lexbuf } | '\n' { add_a_new_line(lexeme_end lexbuf); next qstring lexbuf } | [^ '\n' '(' ')']+ { next qstring lexbuf } | eof { die_in_string lexbuf "Unterminated_qstring" } and here_doc = parse | '\\' { Stack.push here_doc next_rule ; string_escape_kind := Here_doc; string_escape lexbuf } | '$' { Stack.push here_doc next_rule ; string_interpolate_scalar lexbuf } | '@' { Stack.push here_doc next_rule ; string_interpolate_array lexbuf } | [ ^ '\n' '\\' '$' '@' ]* { let s = lexeme lexbuf in if chomps s <> !current_here_doc_mark then next_s s here_doc lexbuf else if s <> !current_here_doc_mark then Printf.eprintf "%sTrailing spaces after HERE-document mark\n" (pos2sfull lexbuf) } | '\n' { add_a_new_line(lexeme_end lexbuf); next here_doc lexbuf } | eof { die_in_string lexbuf "Unterminated_here_doc" } and raw_here_doc = parse | [ ^ '\n' ]* { let s = lexeme lexbuf in if chomps s <> !current_here_doc_mark then next_s s raw_here_doc lexbuf else if s <> !current_here_doc_mark then Printf.eprintf "%sTrailing spaces after HERE-document mark\n" (pos2sfull lexbuf) } | '\n' { add_a_new_line(lexeme_end lexbuf); next raw_here_doc lexbuf } | eof { die_in_string lexbuf "Unterminated_raw_here_doc" } and string_escape = parse | ['0'-'9'] { next_s (String.make 1 (Char.chr (int_of_string (lexeme lexbuf)))) (Stack.pop next_rule) lexbuf } | '\\'{ next_s "\\" (Stack.pop next_rule) lexbuf } | 'n' { next_s "\n" (Stack.pop next_rule) lexbuf } | 't' { next_s "\t" (Stack.pop next_rule) lexbuf } | "x{" [^ '}']* '}' { hex_in_string lexbuf next_rule (skip_n_char_ 2 1 (lexeme lexbuf)) } | 'x' [^ '{'] _ { hex_in_string lexbuf next_rule (skip_n_char 1 (lexeme lexbuf)) } | '\n' { die lexbuf "do not use \"\\\" before end-of-line, it's useless and generally bad" } | ['b' 'f' '$' '@' '%' 'a' 'r'] { next_s ("\\" ^ lexeme lexbuf) (Stack.pop next_rule) lexbuf } | _ { let c = lexeme lexbuf in (match !string_escape_kind with | Double_quote -> if c = "\"" then (* don't warn since it's used a lot, esp. in N("xxx \"xxx\" xxx") *) (*warn lexbuf "you can replace \"xxx\\\"xxx\" with qq(xxx\"xxx), that way you don't need to escape <\">"*) () else warn_escape_unneeded lexbuf c | Qq -> if c <> "(" && c <> ")" then warn_escape_unneeded lexbuf c | Here_doc -> warn_escape_unneeded lexbuf c | Delimited -> if c = String.make 1 !delimit_char then warn lexbuf ("change the delimit character " ^ String.make 1 !delimit_char ^ " to get rid of this escape") else warn_escape_unneeded lexbuf c); next_s ("\\" ^ lexeme lexbuf) (Stack.pop next_rule) lexbuf } and re_string_escape = parse | ['0'-'9'] { next_s (String.make 1 (Char.chr (int_of_string (lexeme lexbuf)))) (Stack.pop next_rule) lexbuf } | '\\'{ next_s "\\" (Stack.pop next_rule) lexbuf } | 'n' { next_s "\n" (Stack.pop next_rule) lexbuf } | 't' { next_s "\t" (Stack.pop next_rule) lexbuf } | "x{" [^ '}']* '}' { hex_in_string lexbuf next_rule (skip_n_char_ 2 1 (lexeme lexbuf)) } | 'x' [^ '{'] _ { hex_in_string lexbuf next_rule (skip_n_char 1 (lexeme lexbuf)) } | '\n' { die lexbuf "do not use \"\\\" before end-of-line, it's useless and generally bad" } | ['b' 'f' '$' '@' '%' '"' 's' 'S' 'd' 'D' 'w' 'W' 'Q' 'E' 'b' '.' '*' '+' '?' '[' ']' '(' ')' '|' '{' '}' '-'] { next (Stack.pop next_rule) lexbuf } | _ { let c = lexeme lexbuf in if c = String.make 1 !delimit_char then warn lexbuf ("change the delimit character " ^ String.make 1 !delimit_char ^ " to get rid of this escape") else warn_escape_unneeded lexbuf c ; next_s ("\\" ^ lexeme lexbuf) (Stack.pop next_rule) lexbuf } and string_interpolate_scalar = parse | '$' ident | ['0'-'9'] | '{' [^ '{' '}']* '}' | (ident | (ident? ("::" ident)+)) "->"? (('{' [^ '{' '}' '\n']* '}') | ('[' [^ '[' ']' '\n']* ']'))* | [^ '{' '}' ' ' '\n' '"'] { (* eg: $! $$ *) string_interpolate token "$" lexbuf } | "{" | ident "->"? '{' | '"' { putback lexbuf 1; next_s "$" (Stack.pop next_rule) lexbuf } | eof { next_s "$" (Stack.pop next_rule) lexbuf } | _ { warn lexbuf (Printf.sprintf "weird \"%s\" in string" (lexeme lexbuf)); next_s ("$" ^ lexeme lexbuf) (Stack.pop next_rule) lexbuf } and delimited_string_interpolate_scalar = parse (* needed for delimited string like m!foo$! where $! should not be taken as is... *) | '$' ident | ['0'-'9'] | '{' [^ '{' '}']* '}' | (ident | (ident? ("::" ident)+)) "->"? ('{' [^ '{' '}' '\n']* '}')* | (ident | (ident? ("::" ident)+)) "->"? (('{' [^ '{' '}' '\n']* '}') | ('[' ('$' ident | ['0'-'9']+) ']'))* { string_interpolate token "$" lexbuf } | (ident | (ident? ("::" ident)+)) "->"? (('{' [^ '{' '}' '\n']* '}') | ('[' ['$' '0'-'9'] [^ '[' ']' '\n']* ']'))* { die lexbuf (Printf.sprintf "I really can't handle this, [xxx] can be indexing or not based on stellar position :-(") } | "{" | ident "->"? '{' | eof { next_s "$" (Stack.pop next_rule) lexbuf } | _ { let c = lexeme_char lexbuf 0 in if c <> !delimit_char && c <> '|' && c<>')' && c<>'/' && c<>' ' then warn lexbuf (Printf.sprintf "weird \"%s\" in string" (lexeme lexbuf)); putback lexbuf 1; next_s "$" (Stack.pop next_rule) lexbuf } and string_interpolate_array = parse | '$' ident | '{' [^ '{' '}']* '}' | (ident | (ident? ("::" ident)+)) { string_interpolate token "@" lexbuf } | [ '@' '*' '<' '>' ']' '.' '(' ' ' ] { next_s ("@" ^ lexeme lexbuf) (Stack.pop next_rule) lexbuf } | '"' { putback lexbuf 1; next_s "@" (Stack.pop next_rule) lexbuf } | eof { next_s "@" (Stack.pop next_rule) lexbuf } | _ { warn lexbuf (Printf.sprintf "weird \"%s\" in string" (lexeme lexbuf)); next_s ("@" ^ lexeme lexbuf) (Stack.pop next_rule) lexbuf } and delimited_string_interpolate_array = parse | '$' ident | '{' [^ '{' '}']* '}' | (ident | (ident? ("::" ident)+)) { string_interpolate token "@" lexbuf } | [ '@' '*' '<' '>' ']' '.' '(' ' ' ] { next_s ("@" ^ lexeme lexbuf) (Stack.pop next_rule) lexbuf } | eof { next_s "@" (Stack.pop next_rule) lexbuf } | _ { let c = lexeme_char lexbuf 0 in if c <> !delimit_char then warn lexbuf (Printf.sprintf "weird \"%s\" in string" (lexeme lexbuf)); putback lexbuf 1; next_s "@" (Stack.pop next_rule) lexbuf } and pattern_options = parse | [ 'g' 'i' 'm' 'o' 's' 'x' 'e' 'd' ] { next pattern_options lexbuf } | _ { putback lexbuf 1; () } and pod_command = parse | [^ '\n' ]+ { let s = lexeme lexbuf in let command = String.sub s 0 (try String.index s ' ' with Not_found -> String.length s) in match command with | "cut" -> if !(Stack.top building_current_string) = "" then failwith(pos2sfull lexbuf ^ "found POD command \"=cut\" but it is not a POD block") | "head1" | "head2" | "head3" | "head4" | "over" | "item" | "back" | "pod" | "begin" | "end" | "for" -> next pod lexbuf | s -> failwith(pos2sfull lexbuf ^ "unknown POD command \"" ^ s ^ "\"") } | _ { failwith(pos2sfull lexbuf ^ "POD command expected") } and pod = parse | "\n=" { add_a_new_line(lexeme_end lexbuf - 1); next pod_command lexbuf } | "\n" [^ '=' '\n'] [^ '\n']* | "\n" { add_a_new_line(lexeme_end lexbuf); next pod lexbuf } | eof | _ { failwith(pos2sfull_with !current_string_start_pos (lexeme_end lexbuf) ^ "POD block still open") }