{ (* -*- caml -*- *) open Common open Types open Lexing open Info let bpos = -1,-1 type raw_token = | EOF of raw_pos | SPACE of int | CR | NUM of (string * raw_pos) | RAW_STRING of (string * raw_pos) | STRING of (raw_interpolated_string * raw_pos) | PATTERN of (raw_interpolated_string * string * raw_pos) | QR_PATTERN of (raw_interpolated_string * string * raw_pos) | PATTERN_SUBST of (raw_interpolated_string * raw_interpolated_string * string * raw_pos) | BAREWORD of (string * raw_pos) | BAREWORD_PAREN of (string * raw_pos) | REVISION of (string * raw_pos) | PERL_CHECKER_COMMENT of (string * raw_pos) | PO_COMMENT of (string * raw_pos) | POD of (string * raw_pos) | LABEL of (string * raw_pos) | COMMAND_STRING of (raw_interpolated_string * raw_pos) | PRINT_TO_STAR of ((string * string) * raw_pos) | PRINT_TO_SCALAR of ((string * string) * raw_pos) | QUOTEWORDS of (string * raw_pos) | COMPACT_HASH_SUBSCRIPT of (string * raw_pos) | RAW_HERE_DOC of ((string * raw_pos) ref * raw_pos) | HERE_DOC of (raw_interpolated_string * raw_pos) ref * raw_pos | SCALAR_IDENT of (string option * string * raw_pos) | ARRAY_IDENT of (string option * string * raw_pos) | HASH_IDENT of (string option * string * raw_pos) | FUNC_IDENT of (string option * string * raw_pos) | STAR_IDENT of (string option * string * raw_pos) | RAW_IDENT of (string option * string * raw_pos) | RAW_IDENT_PAREN of (string option * string * raw_pos) | ARRAYLEN_IDENT of (string option * string * raw_pos) | FUNC_DECL_WITH_PROTO of (string * string * raw_pos) | IF of raw_pos | ELSIF of raw_pos | ELSE of raw_pos | UNLESS of raw_pos | DO of raw_pos | WHILE of raw_pos | UNTIL of raw_pos | MY_OUR of (string * raw_pos) | CONTINUE of raw_pos | SUB of raw_pos | LOCAL of raw_pos | FOR of (string * raw_pos) | USE of raw_pos | PACKAGE of raw_pos | BEGIN of raw_pos | END of raw_pos | PRINT of (string * raw_pos) | NEW of (raw_pos) | FORMAT of (raw_pos) | AT of raw_pos | DOLLAR of raw_pos | PERCENT of raw_pos | AMPERSAND of raw_pos | STAR of raw_pos | ARRAYLEN of raw_pos | SEMI_COLON of raw_pos | PKG_SCOPE of raw_pos | PAREN of raw_pos | PAREN_END of raw_pos | BRACKET of raw_pos | BRACKET_END of raw_pos | BRACKET_HASHREF of raw_pos | ARRAYREF of raw_pos | ARRAYREF_END of raw_pos | ARROW of raw_pos | INCR of raw_pos | DECR of raw_pos | POWER of raw_pos | TIGHT_NOT of raw_pos | BIT_NEG of raw_pos | REF of raw_pos | ONE_SCALAR_PARA of (string * raw_pos) | PATTERN_MATCH of raw_pos | PATTERN_MATCH_NOT of raw_pos | MULT of (string * raw_pos) | PLUS of (string * raw_pos) | BIT_SHIFT of (string * raw_pos) | LT of raw_pos | GT of raw_pos | COMPARE_OP of (string * raw_pos) | EQ_OP of (string * raw_pos) | BIT_AND of raw_pos | BIT_OR of raw_pos | BIT_XOR of raw_pos | AND_TIGHT of raw_pos | OR_TIGHT of raw_pos | DOTDOT of (string * raw_pos) | QUESTION_MARK of raw_pos | COLON of raw_pos | ASSIGN of (string * raw_pos) | COMMA of raw_pos | RIGHT_ARROW of raw_pos | NOT of raw_pos | AND of raw_pos | OR of raw_pos | XOR of raw_pos and raw_interpolated_string = (string * raw_token list) list let rec concat_bareword_paren accu = function | PRINT(s, pos1) :: PAREN(pos2) :: l | BAREWORD(s, pos1) :: PAREN(pos2) :: l -> concat_bareword_paren (PAREN(pos2) :: BAREWORD_PAREN(s, pos1) :: accu) l | RAW_IDENT(kind, ident, pos1) :: PAREN(pos2) :: l -> concat_bareword_paren (PAREN(pos2) :: RAW_IDENT_PAREN(kind, ident, pos1) :: accu) l | [] -> List.rev accu | e :: l -> concat_bareword_paren (e :: accu) l let rec raw_token_to_pos_and_token spaces = function | NUM(s, pos) -> pos, Parser.NUM(s, (spaces, pos)) | RAW_STRING(s, pos) -> pos, Parser.RAW_STRING(s, (spaces, pos)) | RAW_HERE_DOC(r, pos) -> pos, Parser.RAW_HERE_DOC(!r, (spaces, pos)) | STRING(l, pos) -> pos, Parser.STRING(raw_interpolated_string_to_tokens l, (spaces, pos)) | COMMAND_STRING(l, pos) -> pos, Parser.COMMAND_STRING(raw_interpolated_string_to_tokens l, (spaces, pos)) | QR_PATTERN(s, opts, pos) -> pos, Parser.QR_PATTERN((raw_interpolated_string_to_tokens s, opts), (spaces, pos)) | PATTERN(s, opts, pos) -> pos, Parser.PATTERN((raw_interpolated_string_to_tokens s, opts), (spaces, pos)) | PATTERN_SUBST(from, to_, opts, pos) -> pos, Parser.PATTERN_SUBST((raw_interpolated_string_to_tokens from, raw_interpolated_string_to_tokens to_, opts), (spaces, pos)) | HERE_DOC(l, pos) -> pos, Parser.HERE_DOC((raw_interpolated_string_to_tokens (fst !l), snd !l), (spaces, pos)) | BAREWORD(s, pos) -> pos, Parser.BAREWORD(s, (spaces, pos)) | BAREWORD_PAREN(s, pos) -> pos, Parser.BAREWORD_PAREN(s, (spaces, pos)) | REVISION(s, pos) -> pos, Parser.REVISION(s, (spaces, pos)) | PERL_CHECKER_COMMENT(s, pos) -> pos, Parser.PERL_CHECKER_COMMENT(s, (spaces, pos)) | PO_COMMENT(s, pos) -> pos, Parser.PO_COMMENT(s, (spaces, pos)) | POD(s, pos) -> pos, Parser.POD(s, (spaces, pos)) | LABEL(s, pos) -> pos, Parser.LABEL(s, (spaces, pos)) | PRINT(s, pos) -> pos, Parser.PRINT(s, (spaces, pos)) | PRINT_TO_STAR(s, pos) -> pos, Parser.PRINT_TO_STAR(s, (spaces, pos)) | PRINT_TO_SCALAR(s, pos) -> pos, Parser.PRINT_TO_SCALAR(s, (spaces, pos)) | QUOTEWORDS(s, pos) -> pos, Parser.QUOTEWORDS(s, (spaces, pos)) | COMPACT_HASH_SUBSCRIPT(s, pos) -> pos, Parser.COMPACT_HASH_SUBSCRIPT(s, (spaces, pos)) | SCALAR_IDENT(kind, name, pos) -> pos, Parser.SCALAR_IDENT((kind, name), (spaces, pos)) | ARRAY_IDENT(kind, name, pos) -> pos, Parser.ARRAY_IDENT((kind, name), (spaces, pos)) | HASH_IDENT(kind, name, pos) -> pos, Parser.HASH_IDENT((kind, name), (spaces, pos)) | FUNC_IDENT(kind, name, pos) -> pos, Parser.FUNC_IDENT((kind, name), (spaces, pos)) | STAR_IDENT(kind, name, pos) -> pos, Parser.STAR_IDENT((kind, name), (spaces, pos)) | RAW_IDENT(kind, name, pos) -> pos, Parser.RAW_IDENT((kind, name), (spaces, pos)) | RAW_IDENT_PAREN(kind, name, pos) -> pos, Parser.RAW_IDENT_PAREN((kind, name), (spaces, pos)) | ARRAYLEN_IDENT(kind, name, pos) -> pos, Parser.ARRAYLEN_IDENT((kind, name), (spaces, pos)) | FUNC_DECL_WITH_PROTO(name, proto, pos) -> pos, Parser.FUNC_DECL_WITH_PROTO((name, proto), (spaces, pos)) | NEW(pos) -> pos, Parser.NEW((), (spaces, pos)) | FORMAT(pos) -> pos, Parser.FORMAT((), (spaces, pos)) | COMPARE_OP(s, pos) -> pos, Parser.COMPARE_OP(s, (spaces, pos)) | EQ_OP(s, pos) -> pos, Parser.EQ_OP(s, (spaces, pos)) | ASSIGN(s, pos) -> pos, Parser.ASSIGN(s, (spaces, pos)) | FOR(s, pos) -> pos, Parser.FOR(s, (spaces, pos)) | DOTDOT(s, pos) -> pos, Parser.DOTDOT(s, (spaces, pos)) | MULT(s, pos) -> pos, Parser.MULT(s, (spaces, pos)) | BIT_SHIFT(s, pos) -> pos, Parser.BIT_SHIFT(s, (spaces, pos)) | PLUS(s, pos) -> pos, Parser.PLUS(s, (spaces, pos)) | ONE_SCALAR_PARA(s, pos) -> pos, Parser.ONE_SCALAR_PARA(s, (spaces, pos)) | MY_OUR(s, pos) -> pos, Parser.MY_OUR(s, (spaces, pos)) | EOF (pos) -> pos, Parser.EOF ((), (spaces, pos)) | IF (pos) -> pos, Parser.IF ((), (spaces, pos)) | ELSIF (pos) -> pos, Parser.ELSIF ((), (spaces, pos)) | ELSE (pos) -> pos, Parser.ELSE ((), (spaces, pos)) | UNLESS (pos) -> pos, Parser.UNLESS ((), (spaces, pos)) | DO (pos) -> pos, Parser.DO ((), (spaces, pos)) | WHILE (pos) -> pos, Parser.WHILE ((), (spaces, pos)) | UNTIL (pos) -> pos, Parser.UNTIL ((), (spaces, pos)) | CONTINUE (pos) -> pos, Parser.CONTINUE ((), (spaces, pos)) | SUB (pos) -> pos, Parser.SUB ((), (spaces, pos)) | LOCAL (pos) -> pos, Parser.LOCAL ((), (spaces, pos)) | USE (pos) -> pos, Parser.USE ((), (spaces, pos)) | PACKAGE (pos) -> pos, Parser.PACKAGE ((), (spaces, pos)) | BEGIN (pos) -> pos, Parser.BEGIN ((), (spaces, pos)) | END (pos) -> pos, Parser.END ((), (spaces, pos)) | AT (pos) -> pos, Parser.AT ((), (spaces, pos)) | DOLLAR (pos) -> pos, Parser.DOLLAR ((), (spaces, pos)) | PERCENT (pos) -> pos, Parser.PERCENT ((), (spaces, pos)) | AMPERSAND (pos) -> pos, Parser.AMPERSAND ((), (spaces, pos)) | STAR (pos) -> pos, Parser.STAR ((), (spaces, pos)) | ARRAYLEN (pos) -> pos, Parser.ARRAYLEN ((), (spaces, pos)) | SEMI_COLON (pos) -> pos, Parser.SEMI_COLON ((), (spaces, pos)) | PKG_SCOPE (pos) -> pos, Parser.PKG_SCOPE ((), (spaces, pos)) | PAREN (pos) -> pos, Parser.PAREN ((), (spaces, pos)) | PAREN_END (pos) -> pos, Parser.PAREN_END ((), (spaces, pos)) | BRACKET (pos) -> pos, Parser.BRACKET ((), (spaces, pos)) | BRACKET_END (pos) -> pos, Parser.BRACKET_END ((), (spaces, pos)) | BRACKET_HASHREF (pos) -> pos, Parser.BRACKET_HASHREF ((), (spaces, pos)) | ARRAYREF (pos) -> pos, Parser.ARRAYREF ((), (spaces, pos)) | ARRAYREF_END (pos) -> pos, Parser.ARRAYREF_END ((), (spaces, pos)) | ARROW (pos) -> pos, Parser.ARROW ((), (spaces, pos)) | INCR (pos) -> pos, Parser.INCR ((), (spaces, pos)) | DECR (pos) -> pos, Parser.DECR ((), (spaces, pos)) | POWER (pos) -> pos, Parser.POWER ((), (spaces, pos)) | TIGHT_NOT (pos) -> pos, Parser.TIGHT_NOT ((), (spaces, pos)) | BIT_NEG (pos) -> pos, Parser.BIT_NEG ((), (spaces, pos)) | REF (pos) -> pos, Parser.REF ((), (spaces, pos)) | PATTERN_MATCH (pos) -> pos, Parser.PATTERN_MATCH ((), (spaces, pos)) | PATTERN_MATCH_NOT(pos) -> pos, Parser.PATTERN_MATCH_NOT((), (spaces, pos)) | LT (pos) -> pos, Parser.LT ((), (spaces, pos)) | GT (pos) -> pos, Parser.GT ((), (spaces, pos)) | BIT_AND (pos) -> pos, Parser.BIT_AND ((), (spaces, pos)) | BIT_OR (pos) -> pos, Parser.BIT_OR ((), (spaces, pos)) | BIT_XOR (pos) -> pos, Parser.BIT_XOR ((), (spaces, pos)) | AND_TIGHT (pos) -> pos, Parser.AND_TIGHT ((), (spaces, pos)) | OR_TIGHT (pos) -> pos, Parser.OR_TIGHT ((), (spaces, pos)) | QUESTION_MARK (pos) -> pos, Parser.QUESTION_MARK ((), (spaces, pos)) | COLON (pos) -> pos, Parser.COLON ((), (spaces, pos)) | COMMA (pos) -> pos, Parser.COMMA ((), (spaces, pos)) | RIGHT_ARROW (pos) -> pos, Parser.RIGHT_ARROW ((), (spaces, pos)) | NOT (pos) -> pos, Parser.NOT ((), (spaces, pos)) | AND (pos) -> pos, Parser.AND ((), (spaces, pos)) | OR (pos) -> pos, Parser.OR ((), (spaces, pos)) | XOR (pos) -> pos, Parser.XOR ((), (spaces, pos)) | SPACE _ | CR -> internal_error "raw_token_to_token" and raw_token_to_token spaces raw_token = let _, token = raw_token_to_pos_and_token spaces raw_token in token and raw_interpolated_string_to_tokens l = List.map (fun (s, rtok) -> s, concat_spaces Space_0 rtok) l and concat_spaces spaces = function | CR :: l -> concat_spaces Space_cr l | SPACE n :: l -> let spaces' = match spaces with | Space_cr -> Space_cr | Space_0 -> if n = 1 then Space_1 else Space_n | _ -> Space_n in concat_spaces spaces' l | [] -> [] | token :: l -> raw_token_to_pos_and_token spaces token :: concat_spaces Space_0 l let rec lexbuf2list accu t lexbuf = match t lexbuf with | EOF pos -> List.rev (EOF pos :: accu) | e -> lexbuf2list (e :: accu) t lexbuf let get_token token lexbuf = let tokens = lexbuf2list [] token lexbuf in let tokens = concat_bareword_paren [] tokens in let tokens = concat_spaces Space_0 tokens in tokens let next_rule = Stack.create() let pos lexbuf = lexeme_start lexbuf, lexeme_end lexbuf let pos2sfull_with start end_ = Info.pos2sfull (!current_file, start, end_) let pos2sfull lexbuf = pos2sfull_with (lexeme_start lexbuf) (lexeme_end lexbuf) let putback lexbuf nb = lexbuf.Lexing.lex_curr_pos <- lexbuf.Lexing.lex_curr_pos - nb let add_a_new_line raw_pos = incr current_file_current_line ; lpush current_file_lines_starts raw_pos let here_docs = Queue.create() let raw_here_docs = Queue.create() let current_here_doc_mark = ref "" let here_doc_next_line mark = let here_doc_ref = ref([], bpos) in Queue.push (mark, here_doc_ref) here_docs ; here_doc_ref let raw_here_doc_next_line mark = let here_doc_ref = ref("", bpos) in Queue.push (mark, here_doc_ref) raw_here_docs ; here_doc_ref let delimit_char = ref '/' let not_ok_for_match = ref (-1) let string_nestness = ref 0 let building_current_interpolated_string = Stack.create() let building_current_string = Stack.create() let current_string_start_pos = ref 0 let current_string_start_line = ref 0 let warn lexbuf err = prerr_endline (pos2sfull_with (lexeme_start lexbuf) (lexeme_end lexbuf) ^ err) let die lexbuf err = failwith (pos2sfull_with (lexeme_start lexbuf) (lexeme_end lexbuf) ^ err) let die_in_string lexbuf err = failwith (pos2sfull_with !current_string_start_pos (lexeme_end lexbuf) ^ err) let next_interpolated toks = let r = Stack.top building_current_string in Queue.push (!r, toks) (Stack.top building_current_interpolated_string) ; r := "" let raw_ins t lexbuf = Stack.push (ref "") building_current_string; current_string_start_pos := lexeme_start lexbuf; t lexbuf ; !(Stack.pop building_current_string), (!current_string_start_pos, lexeme_end lexbuf) let ins t lexbuf = Stack.push (Queue.create()) building_current_interpolated_string ; Stack.push (ref "") building_current_string; current_string_start_pos := lexeme_start lexbuf; t lexbuf ; next_interpolated [] ; let _ = Stack.pop building_current_string in queue2list (Stack.pop building_current_interpolated_string), (!current_string_start_pos, lexeme_end lexbuf) let raw_ins_to_string t lexbuf = let s, pos = raw_ins t lexbuf in not_ok_for_match := lexeme_end lexbuf; RAW_STRING(s, pos) let ins_to_string t lexbuf = let s, pos = ins t lexbuf in not_ok_for_match := lexeme_end lexbuf; STRING(s, pos) let next_s s t lexbuf = let r = Stack.top building_current_string in r := !r ^ s ; t lexbuf let next t lexbuf = next_s (lexeme lexbuf) t lexbuf let string_interpolate token pre lexbuf = let s = lexeme lexbuf in let local_lexbuf = Lexing.from_string (pre ^ s ^ " ") in (* add a space to help tokenizing "xxx$$" *) local_lexbuf.lex_abs_pos <- lexeme_start lexbuf ; let l = lexbuf2list [] token local_lexbuf in let l = concat_bareword_paren [] l in next_interpolated l; (Stack.pop next_rule) lexbuf let ident_type_from_char fq name lexbuf c = not_ok_for_match := lexeme_end lexbuf; match c with | '$' -> SCALAR_IDENT(fq, name, pos lexbuf) | '@' -> ARRAY_IDENT (fq, name, pos lexbuf) | '%' -> HASH_IDENT (fq, name, pos lexbuf) | '&' -> FUNC_IDENT (fq, name, pos lexbuf) | '*' -> STAR_IDENT (fq, name, pos lexbuf) | _ -> internal_error "ident_type_from_char" let split_at_two_colons s = let i_fq = String.rindex s ':' in String.sub s 0 (i_fq - 1), skip_n_char (i_fq + 1) s let ident_from_lexbuf lexbuf = let fq, name = split_at_two_colons (lexeme lexbuf) in RAW_IDENT(Some fq, name, pos lexbuf) let typed_ident_from_lexbuf lexbuf = let s = lexeme lexbuf in ident_type_from_char None (skip_n_char 1 s) lexbuf s.[0] let typed_fqident_from_lexbuf lexbuf = let s = lexeme lexbuf in let fq, name = split_at_two_colons (skip_n_char 1 s) in ident_type_from_char (Some fq) name lexbuf s.[0] let arraylen_ident_from_lexbuf lexbuf = not_ok_for_match := lexeme_end lexbuf; let s = lexeme lexbuf in ARRAYLEN_IDENT(None, skip_n_char 2 s, pos lexbuf) let arraylen_fqident_from_lexbuf lexbuf = let s = lexeme lexbuf in let fq, name = split_at_two_colons (skip_n_char 2 s) in ARRAYLEN_IDENT(Some fq, name, pos lexbuf) let check_multi_line_delimited_string opts (start, end_) = let check = match opts with | None -> true | Some s -> not (String.contains s 'x') in if check then if !current_file_current_line <> !current_string_start_line then failwith (pos2sfull_with start end_ ^ "multi-line patterns are not allowed (or use /x modifier)") let hex_in_string lexbuf next_rule s = let i = try int_of_string ("0x" ^ s) with Failure("int_of_string") -> die_in_string lexbuf ("Bad_hex_in_string \"" ^ lexeme lexbuf ^ "\"") in let s = if i < 256 then String.make 1 (Char.chr i) else "\\x{" ^ s ^ "}" in next_s s (Stack.pop next_rule) lexbuf } let stash = [ '$' '@' '%' '&' '*' ] let ident_start = ['a'-'z' 'A'-'Z' '_'] let ident = ident_start ['0'-'9' 'A'-'Z' 'a'-'z' '_'] * let pattern_separator = [ '/' '!' ',' '|' ] rule token = parse | [' ' '\t']+ { (* propagate not_ok_for_match when it was set by the previous token *) if lexeme_start lexbuf = !not_ok_for_match then not_ok_for_match := lexeme_end lexbuf; SPACE(lexeme_end lexbuf - lexeme_start lexbuf) } | "# perl_checker: " [^ '\n']* { PERL_CHECKER_COMMENT(skip_n_char 16 (lexeme lexbuf), pos lexbuf) } | "#-PO: " [^ '\n']* { PO_COMMENT(skip_n_char 1 (lexeme lexbuf), pos lexbuf) } | '#' [^ '\n']* { SPACE(1) } | "\n=" { add_a_new_line(lexeme_end lexbuf - 1); let _ = ins pod_command lexbuf in token lexbuf } | '\n' { add_a_new_line(lexeme_end lexbuf); (try let (mark, r) = Queue.pop here_docs in current_here_doc_mark := mark ; r := ins here_doc lexbuf with Queue.Empty -> try let (mark, r) = Queue.pop raw_here_docs in current_here_doc_mark := mark ; r := raw_ins raw_here_doc lexbuf with Queue.Empty -> ()); CR } | "->" { ARROW(pos lexbuf) } | "++" { INCR(pos lexbuf) } | "--" { DECR(pos lexbuf) } | "**" { POWER(pos lexbuf) } | "!" { TIGHT_NOT(pos lexbuf) } | "~" { BIT_NEG(pos lexbuf) } | "=~" { PATTERN_MATCH(pos lexbuf) } | "!~" { PATTERN_MATCH_NOT(pos lexbuf) } | "*" { MULT(lexeme lexbuf, pos lexbuf) } | "%" { MULT(lexeme lexbuf, pos lexbuf) } | "x" { MULT(lexeme lexbuf, pos lexbuf) } | "+" { PLUS(lexeme lexbuf, pos lexbuf) } | "-" { PLUS(lexeme lexbuf, pos lexbuf) } | "." { PLUS(lexeme lexbuf, pos lexbuf) } | "<<" { BIT_SHIFT(lexeme lexbuf, pos lexbuf) } | ">>" { BIT_SHIFT(lexeme lexbuf, pos lexbuf) } | "<" { LT(pos lexbuf) } | ">" { GT(pos lexbuf) } | "<=" | ">=" | "lt" | "gt" | "le" | "ge" { COMPARE_OP(lexeme lexbuf, pos lexbuf) } | "==" | "!=" | "<=>" | "eq" | "ne" | "cmp" { EQ_OP(lexeme lexbuf, pos lexbuf) } | "&" { BIT_AND(pos lexbuf) } | "|" { BIT_OR(pos lexbuf) } | "^" { BIT_XOR(pos lexbuf) } | "&&" { AND_TIGHT(pos lexbuf) } | "||" { OR_TIGHT(pos lexbuf) } | ".." { DOTDOT(lexeme lexbuf, pos lexbuf) } | "..." { DOTDOT(lexeme lexbuf, pos lexbuf) } | "?" { QUESTION_MARK(pos lexbuf) } | ":" { COLON(pos lexbuf) } | "::" { PKG_SCOPE(pos lexbuf) } | "=" | "+=" | "-=" | "*=" | ".=" | "|=" | "&=" | "^=" | "||=" | "&&=" { ASSIGN(lexeme lexbuf, pos lexbuf) } | "," { COMMA(pos lexbuf) } | "=>" { RIGHT_ARROW(pos lexbuf) } | "not" { NOT(pos lexbuf) } | "and" { AND(pos lexbuf) } | "or" { OR(pos lexbuf) } | "xor" { XOR(pos lexbuf) } | "if" { IF(pos lexbuf) } | "else" { ELSE(pos lexbuf) } | "elsif" { ELSIF(pos lexbuf) } | "unless" { UNLESS(pos lexbuf) } | "do" { DO(pos lexbuf) } | "while" { WHILE(pos lexbuf) } | "until" { UNTIL(pos lexbuf) } | "foreach" { FOR(lexeme lexbuf, pos lexbuf) } | "for" { FOR(lexeme lexbuf, pos lexbuf) } | "my" { MY_OUR(lexeme lexbuf, pos lexbuf) } | "our" { MY_OUR(lexeme lexbuf, pos lexbuf) } | "local" { LOCAL(pos lexbuf) } | "continue" { CONTINUE(pos lexbuf) } | "sub" { SUB(pos lexbuf) } | "package" { PACKAGE(pos lexbuf) } | "use" { USE(pos lexbuf) } | "BEGIN" { BEGIN(pos lexbuf) } | "END" { END(pos lexbuf) } | "print" { PRINT(lexeme lexbuf, pos lexbuf) } | "printf" { PRINT(lexeme lexbuf, pos lexbuf) } | "new" { NEW(pos lexbuf) } | "format" { let _ = raw_here_doc_next_line "." in FORMAT(pos lexbuf) } | "defined" { ONE_SCALAR_PARA(lexeme lexbuf, pos lexbuf) } | "split" | "grep" { (* ok_for_match! *) BAREWORD(lexeme lexbuf, pos lexbuf) } | "print " ['A'-'Z'] ['A'-'Z' '0'-'9']* ['\n' ' '] { putback lexbuf 1; PRINT_TO_STAR(("print", skip_n_char 6 (lexeme lexbuf)), pos lexbuf) } | "print $" ident ['\n' ' '] { putback lexbuf 1; PRINT_TO_SCALAR(("print", skip_n_char 7 (lexeme lexbuf)), pos lexbuf); } | "printf " ['A'-'Z'] ['A'-'Z' '0'-'9']* ['\n' ' '] { putback lexbuf 1; PRINT_TO_STAR(("printf", skip_n_char 7 (lexeme lexbuf)), pos lexbuf) } | "printf $" ident ['\n' ' '] { putback lexbuf 1; PRINT_TO_SCALAR(("printf", skip_n_char 8 (lexeme lexbuf)), pos lexbuf); } | ident ' '* "=>" { (* needed so that (if => 1) works *) let s = lexeme lexbuf in let end_ = String.length s - 1 in let ident_end = non_rindex_from s (end_ - 2) ' ' in putback lexbuf (end_ - ident_end); BAREWORD(String.sub s 0 (ident_end+1), pos lexbuf) } | "{" ident "}" { (* needed so that $h{if} works *) not_ok_for_match := lexeme_end lexbuf; COMPACT_HASH_SUBSCRIPT(skip_n_char_ 1 1 (lexeme lexbuf), pos lexbuf) } | '@' { AT(pos lexbuf) } | '$' { DOLLAR(pos lexbuf) } | '$' '#' { ARRAYLEN(pos lexbuf) } | '%' ['$' '{'] { putback lexbuf 1; PERCENT(pos lexbuf) } | '&' ['$' '{'] { putback lexbuf 1; AMPERSAND(pos lexbuf) } | '*' ['$' '{'] { putback lexbuf 1; if lexeme_start lexbuf = !not_ok_for_match then MULT("*", pos lexbuf) else STAR(pos lexbuf) } | ';' { SEMI_COLON(pos lexbuf) } | '(' { PAREN(pos lexbuf) } | '{' { BRACKET(pos lexbuf) } | "+{"{ BRACKET_HASHREF(pos lexbuf) } | '[' { ARRAYREF(pos lexbuf) } | ')' { not_ok_for_match := lexeme_end lexbuf; PAREN_END(pos lexbuf) } | '}' { not_ok_for_match := lexeme_end lexbuf; BRACKET_END(pos lexbuf) } | ']' { not_ok_for_match := lexeme_end lexbuf; ARRAYREF_END(pos lexbuf) } | "/" { if lexeme_start lexbuf = !not_ok_for_match then MULT("/", pos lexbuf) else ( delimit_char := '/' ; current_string_start_line := !current_file_current_line; let s, pos = ins delimited_string lexbuf in let opts, _ = raw_ins pattern_options lexbuf in check_multi_line_delimited_string (Some opts) pos ; PATTERN(s, opts, pos) ) } | "/=" { if lexeme_start lexbuf = !not_ok_for_match then ASSIGN(lexeme lexbuf, pos lexbuf) else ( putback lexbuf 1 ; delimit_char := '/' ; let s, pos = ins delimited_string lexbuf in let opts, _ = raw_ins pattern_options lexbuf in PATTERN(s, opts, pos) ) } | "m" pattern_separator { delimit_char := lexeme_char lexbuf 1 ; current_string_start_line := !current_file_current_line; let s, pos = ins delimited_string lexbuf in let opts, _ = raw_ins pattern_options lexbuf in check_multi_line_delimited_string (Some opts) pos ; PATTERN(s, opts, pos) } | "qr" pattern_separator { delimit_char := lexeme_char lexbuf 2 ; current_string_start_line := !current_file_current_line; let s, pos = ins delimited_string lexbuf in let opts, _ = raw_ins pattern_options lexbuf in check_multi_line_delimited_string (Some opts) pos ; QR_PATTERN(s, opts, pos) } | "s" pattern_separator { delimit_char := lexeme_char lexbuf 1 ; current_string_start_line := !current_file_current_line; let s1, (start, _) = ins delimited_string lexbuf in let s2, (_, end_) = ins delimited_string lexbuf in let opts, _ = raw_ins pattern_options lexbuf in let pos = start, end_ in check_multi_line_delimited_string (Some opts) pos ; PATTERN_SUBST(s1, s2, opts, pos) } | "tr" pattern_separator { delimit_char := lexeme_char lexbuf 2 ; current_string_start_line := !current_file_current_line; let s1, (start, _) = ins delimited_string lexbuf in let s2, (_, end_) = ins delimited_string lexbuf in let opts, _ = raw_ins pattern_options lexbuf in let pos = start, end_ in check_multi_line_delimited_string None pos ; PATTERN_SUBST(s1, s2, opts, pos) } | "<<" ident { not_ok_for_match := lexeme_end lexbuf; HERE_DOC(here_doc_next_line (skip_n_char 2 (lexeme lexbuf)), pos lexbuf) } | "<<'" ident "'" { not_ok_for_match := lexeme_end lexbuf; RAW_HERE_DOC(raw_here_doc_next_line (skip_n_char_ 3 1 (lexeme lexbuf)), pos lexbuf) } | "<<" ' '+ "'" | "<<" ' '+ ident { failwith (pos2sfull_with (lexeme_start lexbuf + 2) (lexeme_end lexbuf) ^ "No space allowed between \"<<\" and the marker") } | "<<" ' '* '"' { failwith (pos2sfull_with (lexeme_start lexbuf + 2) (lexeme_end lexbuf) ^ "Don't use <<\"MARK\", use <>" (pos2sfull lexbuf) (lexeme lexbuf)) } and string = parse | '"' { () } | '\\' { Stack.push string next_rule ; string_escape lexbuf } | '$' { Stack.push string next_rule ; string_interpolate_scalar lexbuf } | '@' { Stack.push string next_rule ; string_interpolate_array lexbuf } | '\n' { add_a_new_line(lexeme_end lexbuf); next string lexbuf } | [^ '\n' '\\' '"' '$' '@']+ { next string lexbuf } | eof { die_in_string lexbuf "Unterminated_string" } and delimited_string = parse | '\\' { Stack.push delimited_string next_rule ; string_escape lexbuf } | '$' { Stack.push delimited_string next_rule ; delimited_string_interpolate_scalar lexbuf } | '@' { Stack.push delimited_string next_rule ; delimited_string_interpolate_array lexbuf } | '\n' { add_a_new_line(lexeme_end lexbuf); next delimited_string lexbuf } | eof { die_in_string lexbuf "Unterminated_delimited_string" } | [ ^ '\n' '\\' '$' '@'] { if lexeme_char lexbuf 0 <> !delimit_char then next delimited_string lexbuf } and rawstring = parse | ''' { () } | '\n' { add_a_new_line(lexeme_end lexbuf); next rawstring lexbuf } | '\\' { next rawstring lexbuf } | "\\'" { next_s "'" rawstring lexbuf } | [^ '\n' ''' '\\']+ { next rawstring lexbuf } | eof { die_in_string lexbuf "Unterminated_rawstring" } and qqstring = parse | ')' { if !string_nestness <> 0 then (decr string_nestness; next qqstring lexbuf) } | '(' { incr string_nestness; next qqstring lexbuf } | '\\' { Stack.push qqstring next_rule ; string_escape lexbuf } | '$' { Stack.push qqstring next_rule ; string_interpolate_scalar lexbuf } | '@' { Stack.push qqstring next_rule ; string_interpolate_array lexbuf } | '\n' { add_a_new_line(lexeme_end lexbuf); next qqstring lexbuf } | [^ '\n' '(' ')' '\\' '$' '@']+ { next qqstring lexbuf } | eof { die_in_string lexbuf "Unterminated_qqstring" } and qstring = parse | ')' { if !string_nestness <> 0 then (decr string_nestness ; next qstring lexbuf) } | '(' { incr string_nestness; next qstring lexbuf } | '\n' { add_a_new_line(lexeme_end lexbuf); next qstring lexbuf } | [^ '\n' '(' ')']+ { next qstring lexbuf } | eof { die_in_string lexbuf "Unterminated_qstring" } and here_doc = parse | '\\' { Stack.push here_doc next_rule ; string_escape lexbuf } | '$' { Stack.push here_doc next_rule ; string_interpolate_scalar lexbuf } | '@' { Stack.push here_doc next_rule ; string_interpolate_array lexbuf } | [ ^ '\n' '\\' '$' '@' ]* { let s = lexeme lexbuf in if chomps s <> !current_here_doc_mark then next_s s here_doc lexbuf else if s <> !current_here_doc_mark then Printf.eprintf "%sTrailing spaces after HERE-document mark\n" (pos2sfull lexbuf) } | '\n' { add_a_new_line(lexeme_end lexbuf); next here_doc lexbuf } | eof { die_in_string lexbuf "Unterminated_here_doc" } and raw_here_doc = parse | [ ^ '\n' ]* { let s = lexeme lexbuf in if chomps s <> !current_here_doc_mark then next_s s raw_here_doc lexbuf else if s <> !current_here_doc_mark then Printf.eprintf "%sTrailing spaces after HERE-document mark\n" (pos2sfull lexbuf) } | '\n' { add_a_new_line(lexeme_end lexbuf); next raw_here_doc lexbuf } | eof { die_in_string lexbuf "Unterminated_raw_here_doc" } and string_escape = parse | '0' { next_s "\000" (Stack.pop next_rule) lexbuf } | '"' { next_s "\"" (Stack.pop next_rule) lexbuf } | ''' { next_s "'" (Stack.pop next_rule) lexbuf } | ':' { next_s ":" (Stack.pop next_rule) lexbuf } | '\\'{ next_s "\\" (Stack.pop next_rule) lexbuf } | 'n' { next_s "\n" (Stack.pop next_rule) lexbuf } | 't' { next_s "\t" (Stack.pop next_rule) lexbuf } | "x{" [^ '}']* '}' { hex_in_string lexbuf next_rule (skip_n_char_ 2 1 (lexeme lexbuf)) } | 'x' [^ '{'] _ { hex_in_string lexbuf next_rule (skip_n_char 1 (lexeme lexbuf)) } | '\n' { die lexbuf "do not use \"\\\" before end-of-line, it's useless and generally bad" } | _ { next_s ("\\" ^ lexeme lexbuf) (Stack.pop next_rule) lexbuf } and string_interpolate_scalar = parse | '$' ident | ['0'-'9'] | '{' [^ '{' '}']* '}' | (ident | (ident? ("::" ident)+)) "->"? (('{' [^ '{' '}' '\n']* '}') | ('[' [^ '[' ']' '\n']* ']'))* | [^ '{' '}' ' ' '\n' '"'] { (* eg: $! $$ *) string_interpolate token "$" lexbuf } | "{" | ident "->"? '{' | '"' { putback lexbuf 1; next_s "$" (Stack.pop next_rule) lexbuf } | eof { next_s "$" (Stack.pop next_rule) lexbuf } | _ { warn lexbuf (Printf.sprintf "weird \"%s\" in string" (lexeme lexbuf)); next_s ("$" ^ lexeme lexbuf) (Stack.pop next_rule) lexbuf } and delimited_string_interpolate_scalar = parse (* needed for delimited string like m!foo$! where $! should not be taken as is... *) | '$' ident | ['0'-'9'] | '{' [^ '{' '}']* '}' | (ident | (ident? ("::" ident)+)) "->"? ('{' [^ '{' '}' '\n']* '}')* | (ident | (ident? ("::" ident)+)) "->"? (('{' [^ '{' '}' '\n']* '}') | ('[' ('$' ident | ['0'-'9']+) ']'))* { string_interpolate token "$" lexbuf } | (ident | (ident? ("::" ident)+)) "->"? (('{' [^ '{' '}' '\n']* '}') | ('[' ['$' '0'-'9'] [^ '[' ']' '\n']* ']'))* { die lexbuf (Printf.sprintf "I really can't handle this, [xxx] can be indexing or not based on stellar position :-(") } | "{" | ident "->"? '{' | eof { next_s "$" (Stack.pop next_rule) lexbuf } | _ { let c = lexeme_char lexbuf 0 in if c <> !delimit_char && c <> '|' && c<>')' && c<>'/' && c<>' ' then warn lexbuf (Printf.sprintf "weird \"%s\" in string" (lexeme lexbuf)); putback lexbuf 1; next_s "$" (Stack.pop next_rule) lexbuf } and string_interpolate_array = parse | '$' ident | '{' [^ '{' '}']* '}' | (ident | (ident? ("::" ident)+)) { string_interpolate token "@" lexbuf } | [ '@' '*' '<' '>' ']' '.' '(' ' ' ] { next_s ("@" ^ lexeme lexbuf) (Stack.pop next_rule) lexbuf } | '"' { putback lexbuf 1; next_s "@" (Stack.pop next_rule) lexbuf } | eof { next_s "@" (Stack.pop next_rule) lexbuf } | _ { warn lexbuf (Printf.sprintf "weird \"%s\" in string" (lexeme lexbuf)); next_s ("@" ^ lexeme lexbuf) (Stack.pop next_rule) lexbuf } and delimited_string_interpolate_array = parse | '$' ident | '{' [^ '{' '}']* '}' | (ident | (ident? ("::" ident)+)) { string_interpolate token "@" lexbuf } | [ '@' '*' '<' '>' ']' '.' '(' ' ' ] { next_s ("@" ^ lexeme lexbuf) (Stack.pop next_rule) lexbuf } | eof { next_s "@" (Stack.pop next_rule) lexbuf } | _ { let c = lexeme_char lexbuf 0 in if c <> !delimit_char then warn lexbuf (Printf.sprintf "weird \"%s\" in string" (lexeme lexbuf)); putback lexbuf 1; next_s "@" (Stack.pop next_rule) lexbuf } and pattern_options = parse | [ 'g' 'i' 'm' 'o' 's' 'x' 'e' 'd' ] { next pattern_options lexbuf } | _ { putback lexbuf 1; () } and pod_command = parse | [^ '\n' ]+ { let s = lexeme lexbuf in let command = String.sub s 0 (try String.index s ' ' with Not_found -> String.length s) in match command with | "cut" -> if !(Stack.top building_current_string) = "" then failwith(pos2sfull lexbuf ^ "found POD command \"=cut\" but it is not a POD block") | "head1" | "head2" | "head3" | "head4" | "over" | "item" | "back" | "pod" | "begin" | "end" | "for" -> next pod lexbuf | s -> failwith(pos2sfull lexbuf ^ "unknown POD command \"" ^ s ^ "\"") } | _ { failwith(pos2sfull lexbuf ^ "POD command expected") } and pod = parse | "\n=" { add_a_new_line(lexeme_end lexbuf - 1); next pod_command lexbuf } | "\n" [^ '=' '\n'] [^ '\n']* | "\n" { add_a_new_line(lexeme_end lexbuf); next pod lexbuf } | eof | _ { failwith(pos2sfull_with !current_string_start_pos (lexeme_end lexbuf) ^ "POD block still open") }