Rename folder to match history.

This is a Synthesized commit to combine perl-MDK-Common and perl_checker repository history.
author: Mageia SVN-Git Migration <svn-git-migration@mageia.org> 2007-04-25 15:16:21 +0000
committer: Mageia SVN-Git Migration <svn-git-migration@mageia.org> 2007-04-25 15:16:21 +0000
commit: be4fff49f0164e606d4b2f76f64d4d108895f236 (patch)
tree: a46bc8c23de0b885f8a2962a9069930b48836fd9 /src/lexer.mll
parent: 4746e8e79a5b3cdf3f72400a5a5d6742f6a76a8c (diff)
download: perl_checker-be4fff49f0164e606d4b2f76f64d4d108895f236.tar
perl_checker-be4fff49f0164e606d4b2f76f64d4d108895f236.tar.gz
perl_checker-be4fff49f0164e606d4b2f76f64d4d108895f236.tar.bz2
perl_checker-be4fff49f0164e606d4b2f76f64d4d108895f236.tar.xz
perl_checker-be4fff49f0164e606d4b2f76f64d4d108895f236.zip
1 files changed, 1057 insertions, 0 deletions
diff --git a/src/lexer.mll b/src/lexer.mll
new file mode 100644
index 0000000..f416499
--- /dev/null
+++ b/src/lexer.mll
@@ -0,0 +1,1057 @@
+{ (* -*- caml -*- *)
+open Common
+open Types
+open Lexing
+open Info
+
+let bpos = -1,-1
+
+type raw_token =
+  | EOF of raw_pos
+  | SPACE of int
+  | CR
+  | INT of (string * raw_pos)
+  | FLOAT of (string * raw_pos)
+  | RAW_STRING of (string * raw_pos)
+  | STRING of (raw_interpolated_string * raw_pos)
+  | PATTERN of (raw_interpolated_string * string * raw_pos)
+  | QR_PATTERN of (raw_interpolated_string * string * raw_pos)
+  | PATTERN_SUBST of (raw_interpolated_string * raw_interpolated_string * string * raw_pos)
+  | BAREWORD of (string * raw_pos)
+  | BAREWORD_PAREN of (string * raw_pos)
+  | REVISION of (string * raw_pos)
+  | PERL_CHECKER_COMMENT of (string * raw_pos)
+  | PO_COMMENT of (string * raw_pos)
+  | POD of (string * raw_pos)
+  | LABEL of (string * raw_pos)
+  | COMMAND_STRING of (raw_interpolated_string * raw_pos)
+  | PRINT_TO_STAR of ((string * string) * raw_pos)
+  | PRINT_TO_SCALAR of ((string * string) * raw_pos)
+  | QUOTEWORDS of (string * raw_pos)
+  | COMPACT_HASH_SUBSCRIPT of (string * raw_pos)
+  | RAW_HERE_DOC of ((string * raw_pos) ref * raw_pos)
+  | HERE_DOC of (raw_interpolated_string * raw_pos) ref * raw_pos
+  | FORMAT of (raw_interpolated_string * raw_pos) ref * raw_pos
+  | SCALAR_IDENT of (string option * string * raw_pos)
+  | ARRAY_IDENT of (string option * string * raw_pos)
+  | HASH_IDENT of (string option * string * raw_pos)
+  | FUNC_IDENT of (string option * string * raw_pos)
+  | STAR_IDENT of (string option * string * raw_pos)
+  | RAW_IDENT of (string option * string * raw_pos)
+  | RAW_IDENT_PAREN of (string option * string * raw_pos)
+  | ARRAYLEN_IDENT of (string option * string * raw_pos)
+  | SUB_WITH_PROTO of (string * raw_pos)
+  | FUNC_DECL_WITH_PROTO of (string option * string * string * raw_pos)
+
+  | IF of raw_pos | ELSIF of raw_pos | ELSE of raw_pos | UNLESS of raw_pos | DO of raw_pos | WHILE of raw_pos | UNTIL of raw_pos | MY_OUR of (string * raw_pos) | CONTINUE of raw_pos | SUB of raw_pos
+  | LOCAL of raw_pos | FOR of (string * raw_pos) | USE of raw_pos | PACKAGE of raw_pos | BEGIN of raw_pos | END of raw_pos | PRINT of (string * raw_pos) 
+  | NEW of (raw_pos) | AT of raw_pos | DOLLAR of raw_pos | PERCENT of raw_pos | AMPERSAND of raw_pos
+  | STAR of raw_pos | ARRAYLEN of raw_pos | SEMI_COLON of raw_pos | PKG_SCOPE of raw_pos | PAREN of raw_pos | PAREN_END of raw_pos | BRACKET of raw_pos
+  | BRACKET_END of raw_pos | BRACKET_HASHREF of raw_pos | ARRAYREF of raw_pos | ARRAYREF_END of raw_pos | ARROW of raw_pos | INCR of raw_pos | DECR of raw_pos
+  | CONCAT of raw_pos | POWER of raw_pos | TIGHT_NOT of raw_pos | BIT_NEG of raw_pos | REF of raw_pos | ONE_SCALAR_PARA of (string * raw_pos) | PATTERN_MATCH of raw_pos | PATTERN_MATCH_NOT of raw_pos | MULT of (string * raw_pos) | MULT_L_STR of raw_pos
+  | PLUS of (string * raw_pos) | BIT_SHIFT of (string * raw_pos)
+  | LT of raw_pos | GT of raw_pos | COMPARE_OP of (string * raw_pos) | COMPARE_OP_STR of (string * raw_pos) | EQ_OP of (string * raw_pos) | EQ_OP_STR of (string * raw_pos)
+  | BIT_AND of raw_pos | BIT_OR of raw_pos | BIT_XOR of raw_pos | AND_TIGHT of raw_pos | OR_TIGHT of raw_pos | DOTDOT of (string * raw_pos)
+  | QUESTION_MARK of raw_pos | COLON of raw_pos | ASSIGN of (string * raw_pos) | COMMA of raw_pos | RIGHT_ARROW of raw_pos | NOT of raw_pos | AND of raw_pos | OR of raw_pos | XOR of raw_pos
+
+and raw_interpolated_string = (string * raw_token list) list
+
+let new_any mcontext any spaces pos = { mcontext = mcontext ; any = any ; spaces = spaces ; pos = pos }
+
+let pos lexbuf = lexeme_start lexbuf, lexeme_end lexbuf
+let pos2sfull_with start end_ = Info.pos2sfull (!current_file, start, end_)
+let pos2sfull lexbuf = pos2sfull_with (lexeme_start lexbuf) (lexeme_end lexbuf)
+
+let warn_with_pos warn_types (start, end_) err = if Flags.are_warning_types_set warn_types then print_endline_flush (pos2sfull_with start end_ ^ err)
+let warn warn_types lexbuf err = warn_with_pos warn_types (pos lexbuf) err
+let die lexbuf err = failwith (pos2sfull_with (lexeme_start lexbuf) (lexeme_end lexbuf) ^ err)
+
+let rec concat_bareword_paren accu = function
+  | PRINT(s, pos1) :: PAREN(pos2) :: l
+  | BAREWORD(s, pos1) :: PAREN(pos2) :: l ->
+      concat_bareword_paren (PAREN(pos2) :: BAREWORD_PAREN(s, pos1) :: accu) l
+  | RAW_IDENT(kind, ident, pos1) :: PAREN(pos2) :: l -> 
+      concat_bareword_paren (PAREN(pos2) :: RAW_IDENT_PAREN(kind, ident, pos1) :: accu) l	
+  | PO_COMMENT(_, pos) as e :: l ->
+      let l = drop_while (function CR | SPACE _ -> true | _ -> false) l in
+      (match l with
+      | PO_COMMENT _ :: _
+	  (* the check will be done on this PO_COMMENT *)
+      | BAREWORD("N", _) :: PAREN(_) :: _
+      | BAREWORD("N_", _) :: PAREN(_) :: _ ->
+	  concat_bareword_paren (e :: accu) l
+      | _ -> 
+	  warn_with_pos [Warn_MDK_Common] pos "N(...) must follow the #-PO: comment, with nothing in between" ;
+	  concat_bareword_paren accu l)
+  | [] -> List.rev accu
+  | e :: l -> 
+      concat_bareword_paren (e :: accu) l
+
+let rec bracket_bareword_is_hashref accu = function
+  | (pos, Parser.BRACKET bracket) :: (_, Parser.BAREWORD _ as bareword) :: (_, Parser.RIGHT_ARROW _ as right_arrow) :: l ->
+      bracket_bareword_is_hashref (right_arrow :: bareword :: (pos, Parser.BRACKET_HASHREF bracket) :: accu) l
+  | [] -> List.rev accu
+  | e :: l -> 
+      bracket_bareword_is_hashref (e :: accu) l
+
+
+let rec raw_token_to_pos_and_token spaces = function
+  | INT(s, pos) -> pos, Parser.NUM(new_any M_int s spaces pos)
+  | FLOAT(s, pos) -> pos, Parser.NUM(new_any M_float s spaces pos)
+  | RAW_STRING(s, pos) -> pos, Parser.RAW_STRING(new_any M_string s spaces pos)
+  | RAW_HERE_DOC(r, pos) -> pos, Parser.RAW_HERE_DOC(new_any M_string !r spaces pos)
+  | STRING(l, pos) -> pos, Parser.STRING(new_any M_string (raw_interpolated_string_to_tokens l) spaces pos)
+  | COMMAND_STRING(l, pos) -> pos, Parser.COMMAND_STRING(new_any (M_mixed [M_string; M_array]) (raw_interpolated_string_to_tokens l) spaces pos)
+  | QR_PATTERN(s, opts, pos) -> pos, Parser.QR_PATTERN(new_any M_special (raw_interpolated_string_to_tokens s, opts) spaces pos)
+  | PATTERN(s, opts, pos) -> pos, Parser.PATTERN(new_any M_special (raw_interpolated_string_to_tokens s, opts) spaces pos)
+  | PATTERN_SUBST(from, to_, opts, pos) -> pos, Parser.PATTERN_SUBST(new_any M_special (raw_interpolated_string_to_tokens from, raw_interpolated_string_to_tokens to_, opts) spaces pos)
+  | HERE_DOC(l, pos) -> pos, Parser.HERE_DOC(new_any M_string (raw_interpolated_string_to_tokens (fst !l), snd !l) spaces pos)
+  | FORMAT(l, pos) -> pos, Parser.FORMAT(new_any M_string (raw_interpolated_string_to_tokens (fst !l), snd !l) spaces pos)
+  | BAREWORD(s, pos) -> pos, Parser.BAREWORD(new_any M_special s spaces pos)
+  | BAREWORD_PAREN(s, pos) -> pos, Parser.BAREWORD_PAREN(new_any M_special s spaces pos)
+  | REVISION(s, pos) -> pos, Parser.REVISION(new_any M_revision s spaces pos)
+  | PERL_CHECKER_COMMENT(s, pos) -> pos, Parser.PERL_CHECKER_COMMENT(new_any M_none s spaces pos)
+  | PO_COMMENT(s, pos) -> pos, Parser.PO_COMMENT(new_any M_special s spaces pos)
+  | POD(s, pos) -> pos, Parser.POD(new_any M_special s spaces pos)
+  | LABEL(s, pos) -> pos, Parser.LABEL(new_any M_none s spaces pos)
+  | PRINT(s, pos) -> pos, Parser.PRINT(new_any M_special s spaces pos)
+  | PRINT_TO_STAR(s, pos) -> pos, Parser.PRINT_TO_STAR(new_any M_special s spaces pos)
+  | PRINT_TO_SCALAR(s, pos) -> pos, Parser.PRINT_TO_SCALAR(new_any M_special s spaces pos)
+  | QUOTEWORDS(s, pos) -> pos, Parser.QUOTEWORDS(new_any M_array s spaces pos)
+  | COMPACT_HASH_SUBSCRIPT(s, pos) -> pos, Parser.COMPACT_HASH_SUBSCRIPT(new_any M_special s spaces pos)
+  | SCALAR_IDENT(kind, name, pos) -> pos, Parser.SCALAR_IDENT(new_any M_special (kind, name) spaces pos)
+  | ARRAY_IDENT(kind, name, pos) -> pos, Parser.ARRAY_IDENT(new_any M_special (kind, name) spaces pos)
+  | HASH_IDENT(kind, name, pos) -> pos, Parser.HASH_IDENT(new_any M_special (kind, name) spaces pos)
+  | FUNC_IDENT(kind, name, pos) -> pos, Parser.FUNC_IDENT(new_any M_special (kind, name) spaces pos)
+  | STAR_IDENT(kind, name, pos) -> pos, Parser.STAR_IDENT(new_any M_special (kind, name) spaces pos)
+  | RAW_IDENT(kind, name, pos) -> pos, Parser.RAW_IDENT(new_any M_special (kind, name) spaces pos)
+  | RAW_IDENT_PAREN(kind, name, pos) -> pos, Parser.RAW_IDENT_PAREN(new_any M_special (kind, name) spaces pos)
+  | ARRAYLEN_IDENT(kind, name, pos) -> pos, Parser.ARRAYLEN_IDENT(new_any M_special (kind, name) spaces pos)
+  | SUB_WITH_PROTO(proto, pos) -> pos, Parser.SUB_WITH_PROTO(new_any M_special proto spaces pos)
+  | FUNC_DECL_WITH_PROTO(fq, name, proto, pos) -> pos, Parser.FUNC_DECL_WITH_PROTO(new_any M_special (fq, name, proto) spaces pos)
+
+  | NEW(pos) -> pos, Parser.NEW(new_any M_special () spaces pos)
+  | COMPARE_OP(s, pos) -> pos, Parser.COMPARE_OP(new_any M_special s spaces pos)
+  | COMPARE_OP_STR(s, pos) -> pos, Parser.COMPARE_OP_STR(new_any M_special s spaces pos)
+  | EQ_OP(s, pos) -> pos, Parser.EQ_OP(new_any M_special s spaces pos)
+  | EQ_OP_STR(s, pos) -> pos, Parser.EQ_OP_STR(new_any M_special s spaces pos)
+  | ASSIGN(s, pos) -> pos, Parser.ASSIGN(new_any M_special s spaces pos)
+  | FOR(s, pos) -> pos, Parser.FOR(new_any M_special s spaces pos)
+
+  | DOTDOT(s, pos) -> pos, Parser.DOTDOT(new_any M_special s spaces pos)
+  | MULT(s, pos) -> pos, Parser.MULT(new_any M_special s spaces pos)
+  | BIT_SHIFT(s, pos) -> pos, Parser.BIT_SHIFT(new_any M_special s spaces pos)
+  | PLUS(s, pos) -> pos, Parser.PLUS(new_any M_special s spaces pos)
+  | ONE_SCALAR_PARA(s, pos) -> pos, Parser.ONE_SCALAR_PARA(new_any M_special s spaces pos)
+  | MY_OUR(s, pos) -> pos, Parser.MY_OUR(new_any M_special s spaces pos)
+
+  | EOF              (pos) -> pos, Parser.EOF              (new_any M_special () spaces pos)
+  | IF               (pos) -> pos, Parser.IF               (new_any M_special () spaces pos)
+  | ELSIF            (pos) -> pos, Parser.ELSIF            (new_any M_special () spaces pos)
+  | ELSE             (pos) -> pos, Parser.ELSE             (new_any M_special () spaces pos)
+  | UNLESS           (pos) -> pos, Parser.UNLESS           (new_any M_special () spaces pos)
+  | DO               (pos) -> pos, Parser.DO               (new_any M_special () spaces pos)
+  | WHILE            (pos) -> pos, Parser.WHILE            (new_any M_special () spaces pos)
+  | UNTIL            (pos) -> pos, Parser.UNTIL            (new_any M_special () spaces pos)
+  | CONTINUE         (pos) -> pos, Parser.CONTINUE         (new_any M_special () spaces pos)
+  | SUB              (pos) -> pos, Parser.SUB              (new_any M_special () spaces pos)
+  | LOCAL            (pos) -> pos, Parser.LOCAL            (new_any M_special () spaces pos)
+  | USE              (pos) -> pos, Parser.USE              (new_any M_special () spaces pos)
+  | PACKAGE          (pos) -> pos, Parser.PACKAGE          (new_any M_special () spaces pos)
+  | BEGIN            (pos) -> pos, Parser.BEGIN            (new_any M_special () spaces pos)
+  | END              (pos) -> pos, Parser.END              (new_any M_special () spaces pos)
+  | AT               (pos) -> pos, Parser.AT               (new_any M_special () spaces pos)
+  | DOLLAR           (pos) -> pos, Parser.DOLLAR           (new_any M_special () spaces pos)
+  | PERCENT          (pos) -> pos, Parser.PERCENT          (new_any M_special () spaces pos)
+  | AMPERSAND        (pos) -> pos, Parser.AMPERSAND        (new_any M_special () spaces pos)
+  | STAR             (pos) -> pos, Parser.STAR             (new_any M_special () spaces pos)
+  | ARRAYLEN         (pos) -> pos, Parser.ARRAYLEN         (new_any M_special () spaces pos)
+  | SEMI_COLON       (pos) -> pos, Parser.SEMI_COLON       (new_any M_none    () spaces pos)
+  | PKG_SCOPE        (pos) -> pos, Parser.PKG_SCOPE        (new_any M_special () spaces pos)
+  | PAREN            (pos) -> pos, Parser.PAREN            (new_any M_special () spaces pos)
+  | PAREN_END        (pos) -> pos, Parser.PAREN_END        (new_any M_special () spaces pos)
+  | BRACKET          (pos) -> pos, Parser.BRACKET          (new_any M_special () spaces pos)
+  | BRACKET_END      (pos) -> pos, Parser.BRACKET_END      (new_any M_special () spaces pos)
+  | BRACKET_HASHREF  (pos) -> pos, Parser.BRACKET_HASHREF  (new_any M_special () spaces pos)
+  | ARRAYREF         (pos) -> pos, Parser.ARRAYREF         (new_any M_special () spaces pos)
+  | ARRAYREF_END     (pos) -> pos, Parser.ARRAYREF_END     (new_any M_special () spaces pos)
+  | ARROW            (pos) -> pos, Parser.ARROW            (new_any M_special () spaces pos)
+  | INCR             (pos) -> pos, Parser.INCR             (new_any M_special () spaces pos)
+  | DECR             (pos) -> pos, Parser.DECR             (new_any M_special () spaces pos)
+  | POWER            (pos) -> pos, Parser.POWER            (new_any M_special () spaces pos)
+  | TIGHT_NOT        (pos) -> pos, Parser.TIGHT_NOT        (new_any M_special () spaces pos)
+  | BIT_NEG          (pos) -> pos, Parser.BIT_NEG          (new_any M_special () spaces pos)
+  | REF              (pos) -> pos, Parser.REF              (new_any M_special () spaces pos)
+  | PATTERN_MATCH    (pos) -> pos, Parser.PATTERN_MATCH    (new_any M_special () spaces pos)
+  | PATTERN_MATCH_NOT(pos) -> pos, Parser.PATTERN_MATCH_NOT(new_any M_special () spaces pos)
+  | LT               (pos) -> pos, Parser.LT               (new_any M_special () spaces pos)
+  | GT               (pos) -> pos, Parser.GT               (new_any M_special () spaces pos)
+  | BIT_AND          (pos) -> pos, Parser.BIT_AND          (new_any M_special () spaces pos)
+  | BIT_OR           (pos) -> pos, Parser.BIT_OR           (new_any M_special () spaces pos)
+  | BIT_XOR          (pos) -> pos, Parser.BIT_XOR          (new_any M_special () spaces pos)
+  | AND_TIGHT        (pos) -> pos, Parser.AND_TIGHT        (new_any M_special () spaces pos)
+  | OR_TIGHT         (pos) -> pos, Parser.OR_TIGHT         (new_any M_special () spaces pos)
+  | QUESTION_MARK    (pos) -> pos, Parser.QUESTION_MARK    (new_any M_special () spaces pos)
+  | COLON            (pos) -> pos, Parser.COLON            (new_any M_special () spaces pos)
+  | COMMA            (pos) -> pos, Parser.COMMA            (new_any M_special () spaces pos)
+  | CONCAT           (pos) -> pos, Parser.CONCAT           (new_any M_special () spaces pos)
+  | MULT_L_STR       (pos) -> pos, Parser.MULT_L_STR       (new_any M_special () spaces pos)
+  | RIGHT_ARROW      (pos) -> pos, Parser.RIGHT_ARROW      (new_any M_special () spaces pos)
+  | NOT              (pos) -> pos, Parser.NOT              (new_any M_special () spaces pos)
+  | AND              (pos) -> pos, Parser.AND              (new_any M_special () spaces pos)
+  | OR               (pos) -> pos, Parser.OR               (new_any M_special () spaces pos)
+  | XOR              (pos) -> pos, Parser.XOR              (new_any M_special () spaces pos)
+
+  | SPACE _ | CR -> internal_error "raw_token_to_token"
+
+and raw_token_to_token spaces raw_token = 
+  let _, token = raw_token_to_pos_and_token spaces raw_token in
+  token
+
+and raw_interpolated_string_to_tokens l =
+  List.map (fun (s, rtok) -> s, concat_spaces [] Space_0 rtok) l
+
+and concat_spaces ret spaces = function
+  | CR :: l -> concat_spaces ret Space_cr l
+  | SPACE n :: l ->
+      let spaces' = 
+	match spaces with
+	| Space_cr -> Space_cr
+	| Space_0 -> if n = 1 then Space_1 else Space_n
+	| _ -> Space_n
+      in
+      concat_spaces ret spaces' l
+  | [] -> List.rev ret
+  | token :: l -> concat_spaces (raw_token_to_pos_and_token spaces token :: ret) Space_0 l
+
+let rec lexbuf2list accu t lexbuf =
+  match t lexbuf with
+  | EOF pos -> List.rev (EOF pos :: accu)
+  | e -> lexbuf2list (e :: accu) t lexbuf
+
+let get_token token lexbuf = 
+  let tokens = lexbuf2list [] token lexbuf in
+  let tokens = concat_bareword_paren [] tokens in
+  let tokens = concat_spaces [] Space_0 tokens in
+  let tokens = bracket_bareword_is_hashref [] tokens in
+  tokens
+
+let next_rule = Stack.create()
+
+
+let putback lexbuf nb = lexbuf.Lexing.lex_curr_pos <- lexbuf.Lexing.lex_curr_pos - nb
+
+let add_a_new_line raw_pos = 
+   incr current_file_current_line ;
+   lpush current_file_lines_starts raw_pos
+
+let here_docs = Queue.create()
+let raw_here_docs = Queue.create()
+let current_here_doc_mark = ref ""
+
+let here_doc_next_line mark =
+  let here_doc_ref = ref([], bpos) in
+  Queue.push (mark, here_doc_ref) here_docs ;
+  here_doc_ref
+let raw_here_doc_next_line mark =
+  let here_doc_ref = ref("", bpos) in
+  Queue.push (mark, here_doc_ref) raw_here_docs ;
+  here_doc_ref
+
+let delimit_char = ref '/'
+let delimit_char_open = ref '('
+let delimit_char_close = ref ')'
+type string_escape_kinds = Double_quote | Qq | Delimited | Here_doc
+let string_escape_kind = ref Double_quote
+let string_quote_escape = ref false
+let string_escape_useful = ref (Left false)
+let not_ok_for_match = ref (-1)
+let string_nestness = ref 0
+let string_is_i18n = ref false
+
+let building_current_interpolated_string = Stack.create()
+let building_current_string = Stack.create()
+let current_string_start_pos = ref 0
+let current_string_start_line = ref 0
+
+let die_in_string lexbuf err = failwith (pos2sfull_with !current_string_start_pos (lexeme_end lexbuf) ^ err)
+let warn_escape_unneeded lexbuf c = 
+  let s = String.make 1 c in warn [Warn_suggest_simpler] lexbuf ("you can replace \\" ^ s ^ " with " ^ s)
+let next_interpolated toks =
+  let r = Stack.top building_current_string in
+  Queue.push (!r, toks) (Stack.top building_current_interpolated_string) ;
+  r := ""
+
+let raw_ins t lexbuf = 
+  Stack.push (ref "") building_current_string; 
+  current_string_start_pos := lexeme_start lexbuf;
+  t lexbuf ;
+  !(Stack.pop building_current_string), (!current_string_start_pos, lexeme_end lexbuf)
+
+let ins t lexbuf =
+  Stack.push (Queue.create()) building_current_interpolated_string ;
+  Stack.push (ref "") building_current_string; 
+  current_string_start_pos := lexeme_start lexbuf;
+  t lexbuf ;
+  next_interpolated [] ;
+  let _ = Stack.pop building_current_string in
+  queue2list (Stack.pop building_current_interpolated_string), (!current_string_start_pos, lexeme_end lexbuf)
+
+let raw_ins_to_string t lexbuf =
+  let s, pos = raw_ins t lexbuf in
+  not_ok_for_match := lexeme_end lexbuf; 
+  RAW_STRING(s, pos)
+let ins_to_string t lexbuf =
+  string_escape_useful := Left false ;
+  string_quote_escape := false ;
+  let s, pos = ins t lexbuf in
+
+  if not !string_is_i18n then
+    (match !string_escape_useful, s with
+    | Right c, [ _, [] ] ->
+	let s = String.make 1 c in
+	warn_with_pos [Warn_suggest_simpler] pos ("you can replace \"xxx\\" ^ s ^ "xxx\" with 'xxx" ^ s ^ "xxx', that way you don't need to escape <" ^ s ^ ">")
+    | _ -> 
+	if !string_quote_escape then
+	  let full_s = String.concat "" (List.map fst s) in
+	  let nb = string_fold_left (fun nb c ->
+	    if nb < 0 then nb else
+	    if c = '(' then nb + 1 else
+	    if c = ')' then nb - 1 else nb
+	  ) 0 full_s in
+	  if nb = 0 then
+	    warn_with_pos [Warn_suggest_simpler] pos "you can replace \"xxx\\\"xxx\" with qq(xxx\"xxx), that way you don't need to escape <\">"
+    );
+
+  not_ok_for_match := lexeme_end lexbuf; 
+  string_is_i18n := false ;
+  STRING(s, pos)
+
+let next_s s t lexbuf =
+  let r = Stack.top building_current_string in r := !r ^ s ;
+  t lexbuf
+let next t lexbuf = next_s (lexeme lexbuf) t lexbuf
+
+let ins_re re_delimited_string lexbuf =
+  let s, pos = ins re_delimited_string lexbuf in
+  List.iter (fun (s, _) ->
+    if str_contains s "[^\\s]" then warn [Warn_suggest_simpler] lexbuf "you can replace [^\\s] with \\S";
+    if str_contains s "[^\\w]" then warn [Warn_suggest_simpler] lexbuf "you can replace [^\\w] with \\W"
+  ) s ;
+  s, pos
+
+let string_interpolate token pre lexbuf =
+   let s = lexeme lexbuf in
+   let local_lexbuf = Lexing.from_string (pre ^ s ^ " ") in (* add a space to help tokenizing "xxx$$" *)
+   local_lexbuf.lex_start_p <- lexbuf.lex_start_p ;
+   local_lexbuf.lex_curr_p <- lexbuf.lex_start_p ;
+   local_lexbuf.lex_abs_pos <- lexeme_start lexbuf ;
+   let l = lexbuf2list [] token local_lexbuf in
+   let l = concat_bareword_paren [] l in
+   next_interpolated l; 
+   (Stack.pop next_rule) lexbuf
+
+let ident_type_from_char fq name lexbuf c = 
+  not_ok_for_match := lexeme_end lexbuf; 
+  match c with
+  | '$' -> SCALAR_IDENT(fq, name, pos lexbuf)
+  | '@' -> ARRAY_IDENT (fq, name, pos lexbuf)
+  | '%' -> HASH_IDENT  (fq, name, pos lexbuf)
+  | '&' -> FUNC_IDENT  (fq, name, pos lexbuf)
+  | '*' -> STAR_IDENT  (fq, name, pos lexbuf)
+  | _ -> internal_error "ident_type_from_char"
+
+let split_at_two_colons s =
+  let i_fq = String.rindex s ':' in
+  String.sub s 0 (i_fq - 1), skip_n_char (i_fq + 1) s
+
+let ident_from_lexbuf lexbuf = 
+  let fq, name = split_at_two_colons (lexeme lexbuf) in
+  RAW_IDENT(Some fq, name, pos lexbuf)
+  
+let typed_ident_from_lexbuf lexbuf = 
+  let s = lexeme lexbuf in
+  ident_type_from_char None (skip_n_char 1 s) lexbuf s.[0]
+
+let typed_fqident_from_lexbuf lexbuf = 
+  let s = lexeme lexbuf in
+  let fq, name = split_at_two_colons (skip_n_char 1 s) in
+  ident_type_from_char (Some fq) name lexbuf s.[0]
+  
+let arraylen_ident_from_lexbuf lexbuf = 
+  not_ok_for_match := lexeme_end lexbuf; 
+  let s = lexeme lexbuf in
+  ARRAYLEN_IDENT(None, skip_n_char 2 s, pos lexbuf)
+
+let arraylen_fqident_from_lexbuf lexbuf = 
+  let s = lexeme lexbuf in
+  let fq, name = split_at_two_colons (skip_n_char 2 s) in
+  ARRAYLEN_IDENT(Some fq, name, pos lexbuf)
+
+let check_multi_line_delimited_string opts (start, end_) =
+   let check =
+     match opts with
+     | None -> true
+     | Some s -> not (String.contains s 'x') in
+   if check then
+     if !current_file_current_line <> !current_string_start_line then
+     failwith (pos2sfull_with start end_ ^ "multi-line patterns are not allowed (or use /x modifier)")
+
+let hex_in_string lexbuf next_rule s =
+  let i = 
+    try int_of_string ("0x" ^ s)
+    with Failure("int_of_string") -> die_in_string lexbuf ("Bad_hex_in_string \"" ^ lexeme lexbuf ^ "\"")
+  in
+  let s =
+    if i < 256 then
+      String.make 1 (Char.chr i)
+    else
+      "\\x{" ^ s ^ "}" in
+  next_s s (Stack.pop next_rule) lexbuf 
+
+let set_delimit_char lexbuf op = 
+  let c = lexeme_char lexbuf (String.length op) in
+  delimit_char := c;
+  match c with
+  | '@' -> warn [Warn_complex_expressions] lexbuf ("don't use " ^ op ^ "@...@, replace @ with / ! , or |")
+  | ':' -> warn [Warn_complex_expressions] lexbuf ("don't use " ^ op ^ ":...:, replace : with / ! , or |")
+  | _ -> ()
+
+let set_delimit_char_open lexbuf op =
+  let char_open = lexeme_char lexbuf (String.length op) in
+  let char_close =
+    match char_open with
+    | '(' -> ')'
+    | '{' -> '}'
+    | _ -> internal_error "set_delimit_char_open"
+  in
+  if op = "qx" then 
+    warn [Warn_complex_expressions] lexbuf (Printf.sprintf "don't use qx%c...%c, use `...` instead" char_open char_close)
+  else if char_open = '{' then
+    warn [Warn_complex_expressions] lexbuf ("don't use " ^ op ^ "{...}, use " ^ op ^ "(...) instead");
+  delimit_char_open := char_open;
+  delimit_char_close := char_close
+}
+
+let stash = [ '$' '@' '%' '&' '*' ]
+let ident_start = ['a'-'z' 'A'-'Z' '_']
+let ident = ident_start ['0'-'9' 'A'-'Z' 'a'-'z' '_'] *
+let pattern_separator = [ '/' '!' ',' '|' '@' ':' ] 
+let pattern_open = [ '(' '{' ]
+let pattern_close = [ ')' '}' ]
+
+let in_string_expr = (ident | (ident? ("::" ident)+)) "->"? (('{' [^ '{' '}' '\n']* '}') | ('[' [^ '[' ']' '\n']* ']'))*
+
+rule token = parse
+| [' ' '\t']+ { 
+    (* propagate not_ok_for_match when it was set by the previous token *)
+    if lexeme_start lexbuf = !not_ok_for_match then not_ok_for_match := lexeme_end lexbuf; 
+    SPACE(lexeme_end lexbuf - lexeme_start lexbuf)
+  }
+| "# perl_checker: " [^ '\n']* { PERL_CHECKER_COMMENT(skip_n_char 16 (lexeme lexbuf), pos lexbuf) }
+| "#-PO: " [^ '\n']* { PO_COMMENT(skip_n_char 1 (lexeme lexbuf), pos lexbuf) }
+| '#' [^ '\n']* { SPACE(1) }
+
+| "\n=" { 
+    add_a_new_line(lexeme_end lexbuf - 1);
+    let _ = ins pod_command lexbuf in token lexbuf 
+  }
+
+| '\n' { 
+    add_a_new_line(lexeme_end lexbuf);
+    (try
+      let (mark, r) = Queue.pop here_docs in
+      current_here_doc_mark := mark ;
+      r := ins here_doc lexbuf
+    with Queue.Empty -> 
+     try
+      let (mark, r) = Queue.pop raw_here_docs in
+      current_here_doc_mark := mark ;
+      r := raw_ins raw_here_doc lexbuf
+    with Queue.Empty -> ());
+    CR
+  }
+| "->" { ARROW(pos lexbuf) }
+| "++" { INCR(pos lexbuf) }
+| "--" { DECR(pos lexbuf) }
+| "**" { POWER(pos lexbuf) }
+| "!" { TIGHT_NOT(pos lexbuf) }
+| "~" { BIT_NEG(pos lexbuf) }
+| "=~" { PATTERN_MATCH(pos lexbuf) }
+| "!~" { PATTERN_MATCH_NOT(pos lexbuf) }
+| "*" { MULT(lexeme lexbuf, pos lexbuf) }
+| "%" { MULT(lexeme lexbuf, pos lexbuf) }
+| "x" { MULT_L_STR(pos lexbuf) }
+| "+" { PLUS(lexeme lexbuf, pos lexbuf) }
+| "-" { PLUS(lexeme lexbuf, pos lexbuf) }
+| "." { CONCAT(pos lexbuf) }
+| "<<" { BIT_SHIFT(lexeme lexbuf, pos lexbuf) }
+| ">>" { BIT_SHIFT(lexeme lexbuf, pos lexbuf) }
+| "<" { LT(pos lexbuf) }
+| ">" { GT(pos lexbuf) }
+| "<=" | ">="  { COMPARE_OP(lexeme lexbuf, pos lexbuf) }
+| "lt" | "gt" | "le" | "ge" { COMPARE_OP_STR(lexeme lexbuf, pos lexbuf) }
+| "==" | "!=" | "<=>" { EQ_OP(lexeme lexbuf, pos lexbuf) }
+| "eq" | "ne" | "cmp" { EQ_OP_STR(lexeme lexbuf, pos lexbuf) }
+| "&" { BIT_AND(pos lexbuf) }
+| "|" { BIT_OR(pos lexbuf) }
+| "^" { BIT_XOR(pos lexbuf) }
+| "&&" { AND_TIGHT(pos lexbuf) }
+| "||" { OR_TIGHT(pos lexbuf) }
+| ".." { DOTDOT(lexeme lexbuf, pos lexbuf) }
+| "..." { DOTDOT(lexeme lexbuf, pos lexbuf) }
+| "?" { QUESTION_MARK(pos lexbuf) }
+| ":" { COLON(pos lexbuf) }
+| "::" { PKG_SCOPE(pos lexbuf) }
+
+| "=" | "+=" | "-=" | "*=" | ".=" | "|=" | "&=" | "^=" | "||=" | "&&=" { ASSIGN(lexeme lexbuf, pos lexbuf) }
+
+| "<<=" | ">>=" | "**=" { 
+    warn [Warn_complex_expressions] lexbuf (Printf.sprintf "don't use \"%s\", use the expanded version instead" (lexeme lexbuf)) ;
+    ASSIGN(lexeme lexbuf, pos lexbuf) 
+  }
+
+| "," { COMMA(pos lexbuf) }
+| "=>" { RIGHT_ARROW(pos lexbuf) }
+| "not" { NOT(pos lexbuf) }
+| "and" { AND(pos lexbuf) }
+| "or" { OR(pos lexbuf) }
+| "xor" { XOR(pos lexbuf) }
+
+| "if"       { IF(pos lexbuf) }
+| "else"     { ELSE(pos lexbuf) }
+| "elsif"    { ELSIF(pos lexbuf) }
+| "unless"   { UNLESS(pos lexbuf) }
+| "do"       { DO(pos lexbuf) }
+| "while"    { WHILE(pos lexbuf) }
+| "until"    { UNTIL(pos lexbuf) }
+| "foreach"  { FOR(lexeme lexbuf, pos lexbuf) }
+| "for"      { FOR(lexeme lexbuf, pos lexbuf) }
+| "my"       { MY_OUR(lexeme lexbuf, pos lexbuf) }
+| "our"      { MY_OUR(lexeme lexbuf, pos lexbuf) }
+| "local"    { LOCAL(pos lexbuf) }
+| "continue" { CONTINUE(pos lexbuf) }
+| "sub"      { SUB(pos lexbuf) }
+| "package"  { PACKAGE(pos lexbuf) }
+| "use"      { USE(pos lexbuf) }
+| "BEGIN"    { BEGIN(pos lexbuf) }
+| "END"      { END(pos lexbuf) }
+| "print"    { PRINT(lexeme lexbuf, pos lexbuf) }
+| "printf"   { PRINT(lexeme lexbuf, pos lexbuf) }
+| "new"      { NEW(pos lexbuf) }
+| "format"   { let pos = pos lexbuf in FORMAT(here_doc_next_line ".", pos) }
+| "delete"
+| "defined"
+| "length" 
+| "keys" 
+| "exists" 
+| "shift"
+| "pop"
+| "eval"
+| "ref"      { ONE_SCALAR_PARA(lexeme lexbuf, pos lexbuf) }
+
+| "split"
+| "grep"  { (* ok_for_match! *) BAREWORD(lexeme lexbuf, pos lexbuf) }
+
+| "print " ['A'-'Z'] ['A'-'Z' '0'-'9']* ['\n' ' '] { 
+    putback lexbuf 1;
+    PRINT_TO_STAR(("print", skip_n_char 6 (lexeme lexbuf)), pos lexbuf)
+  }
+| "print $" ident ['\n' ' '] { 
+    putback lexbuf 1; 
+    PRINT_TO_SCALAR(("print", skip_n_char 7 (lexeme lexbuf)), pos lexbuf);
+  }
+| "printf " ['A'-'Z'] ['A'-'Z' '0'-'9']* ['\n' ' '] { 
+    putback lexbuf 1;
+    PRINT_TO_STAR(("printf", skip_n_char 7 (lexeme lexbuf)), pos lexbuf)
+  }
+| "printf $" ident ['\n' ' '] { 
+    putback lexbuf 1; 
+    PRINT_TO_SCALAR(("printf", skip_n_char 8 (lexeme lexbuf)), pos lexbuf);
+  }
+
+| ident ' '* "=>" { (* needed so that (if => 1) works *)
+    let s = lexeme lexbuf in
+    let end_ = String.length s - 1 in
+    let ident_end = non_rindex_from s (end_ - 2) ' ' in
+    putback lexbuf (end_ - ident_end); 
+    BAREWORD(String.sub s 0 (ident_end+1), pos lexbuf)
+  }
+
+| "{" ident "}" { (* needed so that $h{if} works *)
+    not_ok_for_match := lexeme_end lexbuf;
+    COMPACT_HASH_SUBSCRIPT(skip_n_char_ 1 1 (lexeme lexbuf), pos lexbuf)
+  }
+
+| '@' { AT(pos lexbuf) }
+| '$' { DOLLAR(pos lexbuf) }
+| '$' '#' { ARRAYLEN(pos lexbuf) }
+| '%' ['$' '{'] { putback lexbuf 1; PERCENT(pos lexbuf) }
+| '&' ['$' '{'] { putback lexbuf 1; AMPERSAND(pos lexbuf) }
+| '*' ['$' '{'] { putback lexbuf 1; if lexeme_start lexbuf = !not_ok_for_match then MULT("*", pos lexbuf) else STAR(pos lexbuf) }
+
+
+| ';' { SEMI_COLON(pos lexbuf) }
+| '(' { PAREN(pos lexbuf) }
+| '{' { BRACKET(pos lexbuf) }
+| "+{"{ BRACKET_HASHREF(pos lexbuf) }
+| '[' { ARRAYREF(pos lexbuf) }
+| ')' { not_ok_for_match := lexeme_end lexbuf; PAREN_END(pos lexbuf) }
+| '}' { not_ok_for_match := lexeme_end lexbuf; BRACKET_END(pos lexbuf) }
+| ']' { not_ok_for_match := lexeme_end lexbuf; ARRAYREF_END(pos lexbuf) }
+
+| "/" {
+    if lexeme_start lexbuf = !not_ok_for_match then MULT("/", pos lexbuf)
+    else (
+      delimit_char := '/' ;
+      current_string_start_line := !current_file_current_line;
+      let s, pos = ins_re re_delimited_string lexbuf in
+      let opts, _ = raw_ins pattern_options lexbuf in
+      check_multi_line_delimited_string (Some opts) pos ;
+      PATTERN(s, opts, pos)
+    ) 
+  }
+
+| "/=" {
+    if lexeme_start lexbuf = !not_ok_for_match then ASSIGN(lexeme lexbuf, pos lexbuf)
+    else (
+      putback lexbuf 1 ;
+      delimit_char := '/' ;
+      let s, pos = ins_re re_delimited_string lexbuf in
+      let opts, _ = raw_ins pattern_options lexbuf in
+      PATTERN(s, opts, pos)
+    ) 
+  }
+
+| "m" pattern_separator {
+  set_delimit_char lexbuf "m" ;
+  current_string_start_line := !current_file_current_line;
+  let s, pos = ins_re re_delimited_string lexbuf in
+  let opts, _ = raw_ins pattern_options lexbuf in
+  check_multi_line_delimited_string (Some opts) pos ;
+  PATTERN(s, opts, pos)
+}
+
+| "qr" pattern_separator {
+  set_delimit_char lexbuf "qr" ;
+  current_string_start_line := !current_file_current_line;
+  let s, pos = ins_re re_delimited_string lexbuf in
+  let opts, _ = raw_ins pattern_options lexbuf in
+  check_multi_line_delimited_string (Some opts) pos ;
+  QR_PATTERN(s, opts, pos)
+}
+
+| "qw" pattern_separator {
+  set_delimit_char lexbuf "qw" ;
+  current_string_start_line := !current_file_current_line;
+  let s, pos = raw_ins delimited_string lexbuf in
+  warn_with_pos [Warn_complex_expressions] pos (Printf.sprintf "don't use qw%c...%c, use qw(...) instead" !delimit_char !delimit_char) ;
+  QUOTEWORDS(s, pos)
+}
+
+| "s" pattern_separator {
+  set_delimit_char lexbuf "s" ;
+  current_string_start_line := !current_file_current_line;
+  let s1, (start, _) = ins_re re_delimited_string lexbuf in 
+  let s2, (_, end_)  = ins delimited_string lexbuf in 
+  let opts, _ = raw_ins pattern_options lexbuf in
+  let pos = start, end_ in
+  if String.contains opts 'e' && sum (List.map (fun (s, _) -> count_chars_in_string s '"') s2) > 2 then
+    die lexbuf ("do not write so complicated things in the eval part of s///,\n" ^
+		"i generate wrong warnings for things like s/xxx/die \"yyy \\\"zzz\\\" \"/") ;
+  check_multi_line_delimited_string (Some opts) pos ;
+  PATTERN_SUBST(s1, s2, opts, pos)
+}
+
+| "tr" pattern_separator {
+  set_delimit_char lexbuf "tr" ;
+  current_string_start_line := !current_file_current_line;
+  let s1, (start, _) = ins delimited_string lexbuf in 
+  let s2, (_, end_)  = ins delimited_string lexbuf in 
+  let opts, _ = raw_ins pattern_options lexbuf in
+  let pos = start, end_ in
+  check_multi_line_delimited_string None pos ;
+  PATTERN_SUBST(s1, s2, opts, pos)
+}
+    
+| "<<" ident { 
+    not_ok_for_match := lexeme_end lexbuf; 
+    HERE_DOC(here_doc_next_line (skip_n_char 2 (lexeme lexbuf)), pos lexbuf)
+  }
+| "<<\"" ident "\"" { 
+    warn_with_pos [Warn_suggest_simpler] (lexeme_start lexbuf + 2, lexeme_end lexbuf) "Don't use <<\"MARK\", use <<MARK instead" ;
+    not_ok_for_match := lexeme_end lexbuf; 
+    HERE_DOC(here_doc_next_line (skip_n_char_ 3 1 (lexeme lexbuf)), pos lexbuf)
+  }
+| "<<'" ident "'" { 
+    not_ok_for_match := lexeme_end lexbuf; 
+    RAW_HERE_DOC(raw_here_doc_next_line (skip_n_char_ 3 1 (lexeme lexbuf)), pos lexbuf)
+  }
+| "<<" ' '+ "'"
+| "<<" ' '+ ident
+| "<<" ' '* '"' { 
+    failwith (pos2sfull_with (lexeme_start lexbuf + 2) (lexeme_end lexbuf) ^ "No space allowed between \"<<\" and the marker")
+  }
+
+| "\\"+ stash
+| "\\" ['0'-'9' 'A'-'Z' 'a'-'z']
+| "\\" ' '* '('
+    { lexbuf.lex_curr_pos <- lexbuf.lex_start_pos + 1; REF(pos lexbuf) }
+
+| "sub(" [ '$' '@' '\\' '&' ';' '%' ]* ')' {
+    SUB_WITH_PROTO(skip_n_char_ 4 1 (lexeme lexbuf), pos lexbuf)
+  }
+
+| "sub" ' '+ ident ' '* '(' [ '$' '@' '\\' '&' ';' '%' ]* ')' {
+    (* bloody prototypes, must be caught especially otherwise "($)" is badly tokenized *)
+    (* and alas "($@)" is both valid as an expression and a prototype *)
+    let s = lexeme lexbuf in
+    let ident_start = non_index_from s 3 ' ' in
+
+    let proto_start = String.index_from s ident_start '(' in
+    let ident_end = non_rindex_from s (proto_start-1) ' ' in
+    let ident = String.sub s ident_start (ident_end - ident_start + 1) in
+    let prototype = skip_n_char_ (proto_start + 1) 1 s in
+
+    FUNC_DECL_WITH_PROTO(None, ident, prototype, pos lexbuf)
+  }
+
+| "sub" ' '+ ident ("::" ident)+ ' '* '(' [ '$' '@' '\\' '&' ';' '%' ]* ')' {
+    (* bloody prototypes, must be caught especially otherwise "($)" is badly tokenized *)
+    (* and alas "($@)" is both valid as an expression and a prototype *)
+    let s = lexeme lexbuf in
+    let ident_start = non_index_from s 3 ' ' in
+
+    let proto_start = String.index_from s ident_start '(' in
+    let ident_end = non_rindex_from s (proto_start-1) ' ' in
+    let ident = String.sub s ident_start (ident_end - ident_start + 1) in
+    let prototype = skip_n_char_ (proto_start + 1) 1 s in
+
+    let fq, name = split_at_two_colons ident in
+    FUNC_DECL_WITH_PROTO(Some fq, name, prototype, pos lexbuf)
+  }
+
+| "$#" ident? ("::" ident)+ { arraylen_fqident_from_lexbuf lexbuf }
+| "$#" ident { arraylen_ident_from_lexbuf lexbuf }
+
+| stash ident? ("::" ident)+ { typed_fqident_from_lexbuf lexbuf }
+| stash ident
+| '$' [^ '{' ' ' '\n' '$']
+| "$^" [^ '{' ' ' '\n'] { typed_ident_from_lexbuf lexbuf }
+
+| "$$" [^ 'a'-'z' 'A'-'Z' '_' '{'] { putback lexbuf 1; SCALAR_IDENT(None, "$", pos lexbuf) }
+
+| stash "::" { putback lexbuf 2; ident_type_from_char None "main" lexbuf (lexeme_char lexbuf 0) }
+
+| ident? ("::" ident)+ { ident_from_lexbuf lexbuf }
+| ident { not_ok_for_match := lexeme_end lexbuf; 
+	  let word = lexeme lexbuf in
+	  if word = "qx" then die lexbuf "don't use qx{...}, use `...` instead" else
+	  BAREWORD(word, pos lexbuf) }
+
+| ident ":" { LABEL(lexeme lexbuf, pos lexbuf) }
+
+| '-' [ 'a'-'z' 'A'-'Z' ] [ ' ' '(' ';' ] { putback lexbuf 1; ONE_SCALAR_PARA(lexeme lexbuf, pos lexbuf) }
+
+| ['0'-'9'] ['0'-'9' '_']* '.' ['0'-'9'] ['0'-'9' '_']* ('.' ['0'-'9'] ['0'-'9' '_']*)+ 
+| 'v' ['0'-'9'] ['0'-'9' '_']* ('.' ['0'-'9'] ['0'-'9' '_']*)* 
+  { 
+    not_ok_for_match := lexeme_end lexbuf; 
+    REVISION(lexeme lexbuf, pos lexbuf)
+  }
+
+| ['0'-'9']* '.' ['0'-'9']+ (['e' 'E']['-' '+']?['0'-'9']+)? { 
+    not_ok_for_match := lexeme_end lexbuf; 
+    FLOAT(lexeme lexbuf, pos lexbuf)
+  }
+| ['0'-'9'] ['0'-'9' '_']*  (['e' 'E']['-' '+']?['0'-'9']+)?
+| "0x" ['0'-'9' 'a'-'f' 'A'-'F']+ { 
+    not_ok_for_match := lexeme_end lexbuf; 
+    INT(lexeme lexbuf, pos lexbuf)
+  }
+
+| 'N' '_'? "(\"" { string_is_i18n := true ; putback lexbuf 2 ; BAREWORD(lexeme lexbuf, pos lexbuf) }
+
+| '"'   { ins_to_string string lexbuf }
+| "'"   { raw_ins_to_string rawstring lexbuf }
+| '`'   { delimit_char := '`'; 
+	  current_string_start_line := !current_file_current_line;
+	  not_ok_for_match := lexeme_end lexbuf; 
+	  let s, pos = ins delimited_string lexbuf in
+	  check_multi_line_delimited_string None pos ;
+	  COMMAND_STRING(s, pos) }
+| "q" pattern_open { set_delimit_char_open lexbuf "q"; raw_ins_to_string qstring lexbuf }
+| "qq" pattern_open { set_delimit_char_open lexbuf "qq"; ins_to_string qqstring lexbuf }
+| "qx" pattern_open { set_delimit_char_open lexbuf "qx"; ins_to_string qqstring lexbuf }
+| "qw" pattern_open { set_delimit_char_open lexbuf "qw"; let s, pos = raw_ins qstring lexbuf in QUOTEWORDS(s, pos) }
+
+| "\n__END__" [^ '0'-'9' 'A'-'Z' 'a'-'z' '_'] 
+| "\n__DATA__" [^ '0'-'9' 'A'-'Z' 'a'-'z' '_']
+| eof   { EOF(pos lexbuf) }
+| _ { failwith (Printf.sprintf "%serror tokenizing <<%s>>" (pos2sfull lexbuf) (lexeme lexbuf)) }
+
+and string = parse
+| '"' { () }
+| '\\' { Stack.push string next_rule ; string_escape_kind := Double_quote; string_escape lexbuf }
+| '$'  { Stack.push string next_rule ; string_interpolate_scalar lexbuf }
+| '@'  { Stack.push string next_rule ; string_interpolate_array lexbuf }
+| '\n' { 
+    add_a_new_line(lexeme_end lexbuf);
+    next string lexbuf
+  }
+| "'" { string_escape_useful := Left true ; next string lexbuf }
+| [^ '\n' '\\' '"' '$' '@']+ { next string lexbuf }
+| eof { die_in_string lexbuf "Unterminated_string" }
+
+and delimited_string = parse
+| '\\' { Stack.push delimited_string next_rule ; string_escape_kind := Delimited; string_escape lexbuf }
+| '$'  { Stack.push delimited_string next_rule ; delimited_string_interpolate_scalar lexbuf }
+| '@'  { Stack.push delimited_string next_rule ; delimited_string_interpolate_array lexbuf }
+| '\n' { 
+    add_a_new_line(lexeme_end lexbuf);
+    next delimited_string lexbuf
+  }
+| eof { die_in_string lexbuf "Unterminated_delimited_string" }
+| [ ^ '\n' '\\' '$' '@'] { if lexeme_char lexbuf 0 <> !delimit_char then next delimited_string lexbuf }
+
+and re_delimited_string = parse
+| '\\' { Stack.push re_delimited_string next_rule ; re_string_escape lexbuf }
+| '$'  { Stack.push re_delimited_string next_rule ; delimited_string_interpolate_scalar lexbuf }
+| '@'  { if lexeme_char lexbuf 0 <> !delimit_char then 
+        (Stack.push re_delimited_string next_rule ; delimited_string_interpolate_array lexbuf) }
+| '\n' { 
+    add_a_new_line(lexeme_end lexbuf);
+    next re_delimited_string lexbuf
+  }
+| eof { die_in_string lexbuf "Unterminated_delimited_string" }
+| [ ^ '\n' '\\' '$' '@'] { if lexeme_char lexbuf 0 <> !delimit_char then next re_delimited_string lexbuf }
+
+and rawstring = parse
+| ''' { () }
+| '\n' { 
+    add_a_new_line(lexeme_end lexbuf);
+    next rawstring lexbuf
+  }
+| '\\' { next rawstring lexbuf }
+| "\\'" { next_s "'" rawstring lexbuf }
+| [^ '\n' ''' '\\']+ { next rawstring lexbuf }
+| eof { die_in_string lexbuf "Unterminated_rawstring" }
+
+and qqstring = parse
+| pattern_close { 
+    if lexeme_char lexbuf 0 = !delimit_char_close then
+      if !string_nestness <> 0 then (decr string_nestness; next qqstring lexbuf)
+      else ()
+    else next qstring lexbuf
+  }
+| pattern_open {
+    if lexeme_char lexbuf 0 = !delimit_char_open then incr string_nestness;
+    next qqstring lexbuf
+  }
+| '\\' { Stack.push qqstring next_rule ; string_escape_kind := Qq; string_escape lexbuf }
+| '$'  { Stack.push qqstring next_rule ; string_interpolate_scalar lexbuf }
+| '@'  { Stack.push qqstring next_rule ; string_interpolate_array lexbuf }
+| '\n' { 
+    add_a_new_line(lexeme_end lexbuf);
+    next qqstring lexbuf
+  }
+| [^ '\n' '(' ')' '{' '}' '\\' '$' '@']+ { next qqstring lexbuf }
+| eof { die_in_string lexbuf "Unterminated_qqstring" }
+
+and qstring = parse
+| pattern_close { 
+    if lexeme_char lexbuf 0 = !delimit_char_close then
+      if !string_nestness <> 0 then (decr string_nestness ; next qstring lexbuf) 
+      else ()
+    else next qstring lexbuf
+  }
+| pattern_open {
+    if lexeme_char lexbuf 0 = !delimit_char_open then incr string_nestness;
+    next qstring lexbuf
+  }
+| '\n' { 
+    add_a_new_line(lexeme_end lexbuf);
+    next qstring lexbuf
+  }
+| [^ '\n' '(' ')' '{' '}']+ { next qstring lexbuf }
+| eof { die_in_string lexbuf "Unterminated_qstring" }
+
+and here_doc = parse
+| '\\' { Stack.push here_doc next_rule ; string_escape_kind := Here_doc; string_escape lexbuf }
+| '$'  { Stack.push here_doc next_rule ; string_interpolate_scalar lexbuf }
+| '@'  { Stack.push here_doc next_rule ; string_interpolate_array lexbuf }
+| [ ^ '\n' '\\' '$' '@' ]* {
+    let s = lexeme lexbuf in
+    if chomps s <> !current_here_doc_mark
+    then next_s s here_doc lexbuf 
+    else if s <> !current_here_doc_mark then warn_with_pos [Warn_traps] (pos lexbuf) "Trailing spaces after HERE-document mark"
+  }
+| '\n' { 
+    add_a_new_line(lexeme_end lexbuf);
+    next here_doc lexbuf
+  }
+| eof { die_in_string lexbuf "Unterminated_here_doc" }
+
+and raw_here_doc = parse
+| [ ^ '\n' ]* {
+    let s = lexeme lexbuf in
+    if chomps s <> !current_here_doc_mark
+    then next_s s raw_here_doc lexbuf 
+    else if s <> !current_here_doc_mark then warn_with_pos [Warn_traps] (pos lexbuf) "Trailing spaces after HERE-document mark"
+  }
+| '\n' { 
+    add_a_new_line(lexeme_end lexbuf);
+    next raw_here_doc lexbuf
+  }
+| eof { die_in_string lexbuf "Unterminated_raw_here_doc" }
+
+
+and string_escape = parse
+| ['0'-'9']         { string_escape_useful := Left true; next_s (String.make 1 (Char.chr (int_of_string (lexeme lexbuf)))) (Stack.pop next_rule) lexbuf }
+| 'n'               { string_escape_useful := Left true; next_s "\n" (Stack.pop next_rule) lexbuf }
+| 't'               { string_escape_useful := Left true; next_s "\t" (Stack.pop next_rule) lexbuf }
+| "x{" [^ '}']* '}' { string_escape_useful := Left true; hex_in_string lexbuf next_rule (skip_n_char_ 2 1 (lexeme lexbuf)) }
+| 'x' [^ '{'] _     { string_escape_useful := Left true; hex_in_string lexbuf next_rule (skip_n_char 1 (lexeme lexbuf)) }
+| '\n' { die lexbuf "do not use \"\\\" before end-of-line, it's useless and generally bad" }
+| '\\'{ next_s "\\" (Stack.pop next_rule) lexbuf }
+| 'Q' { 
+	warn [Warn_complex_expressions] lexbuf ("don't use \\Q, use quotemeta instead");
+	string_escape_useful := Left true; next_s ("\\" ^ lexeme lexbuf) (Stack.pop next_rule) lexbuf }
+| ['b' 'f' 'a' 'r'] { string_escape_useful := Left true; next_s ("\\" ^ lexeme lexbuf) (Stack.pop next_rule) lexbuf }
+| ['$' '@' '%' '{' '[' ':'] { 
+	if !string_escape_useful = Left false then string_escape_useful := Right (lexeme_char lexbuf 0) ;
+	next_s (lexeme lexbuf) (Stack.pop next_rule) lexbuf 
+  }
+| _   { 
+    let c = lexeme_char lexbuf 0 in
+    (match !string_escape_kind with
+    | Double_quote -> 
+	if c <> '"' then
+	  warn_escape_unneeded lexbuf c
+	else (
+	  if !string_escape_useful = Left false then string_escape_useful := Right c ;
+	  string_quote_escape := true
+        )
+    | Qq -> if c <> !delimit_char_open && c <> !delimit_char_close then warn_escape_unneeded lexbuf c
+    | Here_doc -> warn_escape_unneeded lexbuf c
+    | Delimited -> if c = !delimit_char then 
+          warn [Warn_suggest_simpler] lexbuf ("change the delimit character " ^ String.make 1 !delimit_char ^ " to get rid of this escape")
+        else warn_escape_unneeded lexbuf c);
+    let s = if c = '"' then String.make 1 c else "\\" ^ String.make 1 c in
+    next_s s (Stack.pop next_rule) lexbuf 
+  }
+
+and re_string_escape = parse
+| ['0'-'9'] { next_s (String.make 1 (Char.chr (int_of_string (lexeme lexbuf)))) (Stack.pop next_rule) lexbuf }
+| '\\'{ next_s "\\" (Stack.pop next_rule) lexbuf }
+| 'n' { next_s "\n" (Stack.pop next_rule) lexbuf }
+| 't' { next_s "\t" (Stack.pop next_rule) lexbuf }
+| "x{" [^ '}']* '}' { hex_in_string lexbuf next_rule (skip_n_char_ 2 1 (lexeme lexbuf)) }
+| 'x' [^ '{'] _ { hex_in_string lexbuf next_rule (skip_n_char 1 (lexeme lexbuf)) }
+| '\n' { die lexbuf "do not use \"\\\" before end-of-line, it's useless and generally bad" }
+| ['r' 'b' 'f' '$' '@' '%' 's' 'S' 'd' 'D' 'w' 'W' 'Q' 'E' 'b' 'Z' 'z' '^' '.' '*' '+' '?' '[' ']' '(' ')' '|' '{' '}' '-' ':'] { 
+     next_s ("\\" ^ lexeme lexbuf) (Stack.pop next_rule) lexbuf 
+  }
+| _  {
+     let c = lexeme_char lexbuf 0 in 
+     if c = !delimit_char then 
+       warn [Warn_suggest_simpler] lexbuf ("change the delimit character " ^ String.make 1 !delimit_char ^ " to get rid of this escape")
+     else warn_escape_unneeded lexbuf c ;
+     next_s ("\\" ^ lexeme lexbuf) (Stack.pop next_rule) lexbuf 
+  }
+
+and string_interpolate_scalar = parse
+| '$' ident
+| ['0'-'9']
+| '{' [^ '{' '}']* '}'
+| in_string_expr
+| [^ '{' '}' ' ' '\n' '"'] { (* eg: $! $$ *)
+      string_interpolate token "$" lexbuf
+  }
+
+| "{"
+| ident "->"? '{'
+| '"' { putback lexbuf 1; next_s "$" (Stack.pop next_rule) lexbuf }
+| eof {                   next_s "$" (Stack.pop next_rule) lexbuf }
+| _ { warn [Warn_strange] lexbuf (Printf.sprintf "weird \"%s\" in string" (lexeme lexbuf)); next_s ("$" ^ lexeme lexbuf) (Stack.pop next_rule) lexbuf }
+
+and delimited_string_interpolate_scalar = parse (* needed for delimited string like m!foo$! where $! should not be taken as is... *)
+| '$' ident
+| ['0'-'9']
+| '{' [^ '{' '}']* '}'
+| (ident | (ident? ("::" ident)+)) "->"? ('{' [^ '{' '}' '\n']* '}')*
+| (ident | (ident? ("::" ident)+)) "->"? (('{' [^ '{' '}' '\n']* '}') | ('[' ('$' ident | ['0'-'9']+) ']'))*
+  {
+      string_interpolate token "$" lexbuf
+  }
+
+| (ident | (ident? ("::" ident)+)) "->"? (('{' [^ '{' '}' '\n']* '}') | ('[' ['$' '0'-'9'] [^ '[' ']' '\n']* ']'))*
+  {
+   die lexbuf (Printf.sprintf "I really can't handle this, [xxx] can be indexing or not based on stellar position :-(")
+  }
+
+| "{"
+| ident "->"? '{'
+| eof { next_s "$" (Stack.pop next_rule) lexbuf }
+| _ { 
+    let c = lexeme_char lexbuf 0 in
+    if c <> !delimit_char && c <> '|' && c<>')' && c<>'/' && c<>' ' then warn [Warn_strange] lexbuf (Printf.sprintf "weird \"%s\" in string" (lexeme lexbuf)); 
+    putback lexbuf 1;
+    next_s "$" (Stack.pop next_rule) lexbuf
+  }
+
+and string_interpolate_array = parse
+| '$' ident
+| '{' [^ '{' '}']* '}'
+| in_string_expr { string_interpolate token "@" lexbuf }
+
+| [ '@' '*' '<' '>' ']' '.' '(' ' ' ] { next_s ("@" ^ lexeme lexbuf) (Stack.pop next_rule) lexbuf }
+| '"' { putback lexbuf 1; next_s "@" (Stack.pop next_rule) lexbuf }
+| eof {                   next_s "@" (Stack.pop next_rule) lexbuf }
+| _ { warn [Warn_strange] lexbuf (Printf.sprintf "weird \"%s\" in string" (lexeme lexbuf)); next_s ("@" ^ lexeme lexbuf) (Stack.pop next_rule) lexbuf }
+
+and delimited_string_interpolate_array = parse
+| '$' ident
+| '{' [^ '{' '}']* '}'
+| in_string_expr
+    { string_interpolate token "@" lexbuf }
+
+| [ '@' '*' '<' '>' ']' '.' '(' ' ' ] { next_s ("@" ^ lexeme lexbuf) (Stack.pop next_rule) lexbuf }
+| eof { next_s "@" (Stack.pop next_rule) lexbuf }
+| _ { 
+    let c = lexeme_char lexbuf 0 in
+    if c <> !delimit_char then warn [Warn_strange] lexbuf (Printf.sprintf "weird \"%s\" in string" (lexeme lexbuf));
+    putback lexbuf 1;
+    next_s "@" (Stack.pop next_rule) lexbuf
+  }
+
+and pattern_options = parse
+| [ 'g' 'i' 'm' 'o' 's' 'x' 'e' 'd' ] { next pattern_options lexbuf }
+| _ { putback lexbuf 1; () }
+
+and pod_command = parse
+| [^ '\n' ]+ {
+  let s = lexeme lexbuf in
+  let command = String.sub s 0 (try String.index s ' ' with Not_found -> String.length s) in
+  match command with
+  | "cut" ->
+    if !(Stack.top building_current_string) = "" then
+      failwith(pos2sfull lexbuf ^ "found POD command \"=cut\" but it is not a POD block")
+  | "head1" | "head2" | "head3" | "head4" | "over" | "item" | "back" | "pod" | "begin" | "end" | "for" ->
+      next pod lexbuf
+  | s -> failwith(pos2sfull lexbuf ^ "unknown POD command \"" ^ s ^ "\"")
+ }
+| _ { failwith(pos2sfull lexbuf ^ "POD command expected") }
+
+and pod = parse
+| "\n=" { 
+    add_a_new_line(lexeme_end lexbuf - 1);
+    next pod_command lexbuf 
+  }
+| "\n" [^ '=' '\n'] [^ '\n']*
+| "\n" { 
+    add_a_new_line(lexeme_end lexbuf);
+    next pod lexbuf 
+  }
+| eof
+| _ { failwith(pos2sfull_with !current_string_start_pos (lexeme_end lexbuf) ^ "POD block still open") }
author	Mageia SVN-Git Migration <svn-git-migration@mageia.org>	2007-04-25 15:16:21 +0000
committer	Mageia SVN-Git Migration <svn-git-migration@mageia.org>	2007-04-25 15:16:21 +0000
commit	be4fff49f0164e606d4b2f76f64d4d108895f236 (patch)
tree	a46bc8c23de0b885f8a2962a9069930b48836fd9 /src/lexer.mll
parent	4746e8e79a5b3cdf3f72400a5a5d6742f6a76a8c (diff)
download	perl_checker-be4fff49f0164e606d4b2f76f64d4d108895f236.tar perl_checker-be4fff49f0164e606d4b2f76f64d4d108895f236.tar.gz perl_checker-be4fff49f0164e606d4b2f76f64d4d108895f236.tar.bz2 perl_checker-be4fff49f0164e606d4b2f76f64d4d108895f236.tar.xz perl_checker-be4fff49f0164e606d4b2f76f64d4d108895f236.zip