*** empty log message ***

author: Pascal Rigaux <pixel@mandriva.com> 2002-11-12 14:07:28 +0000
committer: Pascal Rigaux <pixel@mandriva.com> 2002-11-12 14:07:28 +0000
commit: 695c5b66f3a8da169c48e7989edb4db2a894e5aa (patch)
tree: 8ac6e0561f382c2636ab41c0be90b11da2033f7c /perl_checker.src/lexer.mll
parent: 7fff947a462837c0cc7ea4cbf4da146b796ac349 (diff)
download: perl-MDK-Common-695c5b66f3a8da169c48e7989edb4db2a894e5aa.tar
perl-MDK-Common-695c5b66f3a8da169c48e7989edb4db2a894e5aa.tar.gz
perl-MDK-Common-695c5b66f3a8da169c48e7989edb4db2a894e5aa.tar.bz2
perl-MDK-Common-695c5b66f3a8da169c48e7989edb4db2a894e5aa.tar.xz
perl-MDK-Common-695c5b66f3a8da169c48e7989edb4db2a894e5aa.zip
1 files changed, 304 insertions, 123 deletions
diff --git a/perl_checker.src/lexer.mll b/perl_checker.src/lexer.mll
index 6abbbc2..8450aa9 100644
--- a/perl_checker.src/lexer.mll
+++ b/perl_checker.src/lexer.mll
@@ -1,12 +1,202 @@
 { (* -*- caml -*- *)
-open Parser
 open Common
+open Types
 open Lexing
 open Info
 
+type raw_token =
+  | EOF of raw_pos
+  | SPACE of int
+  | CR
+  | NUM of (string * raw_pos)
+  | STRING of (string * raw_pos)
+  | BAREWORD of (string * raw_pos)
+  | REVISION of (string * raw_pos)
+  | COMMENT of (string * raw_pos)
+  | POD of (string * raw_pos)
+  | LABEL of (string * raw_pos)
+  | COMMAND_STRING of (string * raw_pos)
+  | PRINT_TO_STAR of (string * raw_pos)
+  | PRINT_TO_SCALAR of (string * raw_pos)
+  | QUOTEWORDS of (string * raw_pos)
+  | COMPACT_HASH_SUBSCRIPT of (string * raw_pos)
+  | HERE_DOC of ((string * raw_pos) ref * raw_pos)
+  | PATTERN of (string * string * raw_pos)
+  | PATTERN_SUBST of (string * string * string * raw_pos)
+  | SCALAR_IDENT of (string option * string * raw_pos)
+  | ARRAY_IDENT of (string option * string * raw_pos)
+  | HASH_IDENT of (string option * string * raw_pos)
+  | FUNC_IDENT of (string option * string * raw_pos)
+  | STAR_IDENT of (string option * string * raw_pos)
+  | RAW_IDENT of (string option * string * raw_pos)
+  | ARRAYLEN_IDENT of (string option * string * raw_pos)
+  | FUNC_DECL_WITH_PROTO of (string * string * raw_pos)
+
+  | IF of raw_pos | ELSIF of raw_pos | ELSE of raw_pos | UNLESS of raw_pos | DO of raw_pos | WHILE of raw_pos | UNTIL of raw_pos | MY of raw_pos | CONTINUE of raw_pos | SUB of raw_pos
+  | LOCAL of raw_pos | FOR of (string * raw_pos) | USE of raw_pos | PACKAGE of raw_pos | BEGIN of raw_pos | END of raw_pos | PRINT of (string * raw_pos) 
+  | NEW of (raw_pos) | FORMAT of (raw_pos) | AT of raw_pos | DOLLAR of raw_pos | PERCENT of raw_pos | AMPERSAND of raw_pos
+  | STAR of raw_pos | ARRAYLEN of raw_pos | SEMI_COLON of raw_pos | PKG_SCOPE of raw_pos | PAREN of raw_pos | PAREN_END of raw_pos | BRACKET of raw_pos
+  | BRACKET_END of raw_pos | BRACKET_HASHREF of raw_pos | ARRAYREF of raw_pos | ARRAYREF_END of raw_pos | ARROW of raw_pos | INCR of raw_pos | DECR of raw_pos
+  | POWER of raw_pos | TIGHT_NOT of raw_pos | BIT_NEG of raw_pos | REF of raw_pos | PATTERN_MATCH of raw_pos | PATTERN_MATCH_NOT of raw_pos | MULT of raw_pos
+  | DIVISION of raw_pos | MODULO of raw_pos | REPLICATE of raw_pos | PLUS of raw_pos | MINUS of raw_pos | CONCAT of raw_pos | BIT_SHIFT_LEFT of raw_pos
+  | BIT_SHIFT_RIGHT of raw_pos | LT of raw_pos | GT of raw_pos | COMPARE_OP of (string * raw_pos) | EQ_OP of (string * raw_pos)
+  | BIT_AND of raw_pos | BIT_OR of raw_pos | BIT_XOR of raw_pos | AND_TIGHT of raw_pos | OR_TIGHT of raw_pos | DOTDOT of raw_pos | DOTDOTDOT of raw_pos
+  | QUESTION_MARK of raw_pos | COLON of raw_pos | ASSIGN of (string * raw_pos) | COMMA of raw_pos | RIGHT_ARROW of raw_pos | NOT of raw_pos | AND of raw_pos | OR of raw_pos | XOR of raw_pos
+
+let saved_token = ref None
+
+let concat_bareword_paren get_token lexbuf =
+  let token = match !saved_token with
+  | None -> get_token lexbuf
+  | Some t -> t
+  in
+  let token, next =
+    match token with
+    | Parser.PRINT(s, both)
+    | Parser.BAREWORD(s, both) ->
+	let next_token = get_token lexbuf in
+	(match next_token with Parser.PAREN(_, (Space_0, _)) -> Parser.BAREWORD_PAREN(s, both) | _ -> token), Some next_token
+    | Parser.RAW_IDENT(ident, both) ->
+	let next_token = get_token lexbuf in
+	(match next_token with Parser.PAREN(_, (Space_0, _)) -> Parser.RAW_IDENT_PAREN(ident, both) | _ -> token), Some next_token
+    | _ -> token, None
+  in
+  saved_token := next ; token
+
+let rec concat_spaces get_token lexbuf =
+  let rec get_spaces spaces lexbuf =
+    match get_token lexbuf with
+    | CR -> get_spaces Space_cr lexbuf
+    | SPACE n ->
+	let spaces' = 
+	  match spaces with
+	  | Space_cr -> Space_cr
+	  | Space_0 -> if n = 1 then Space_1 else Space_n
+	  | _ -> Space_n
+	in
+	get_spaces spaces' lexbuf
+    | token -> token, spaces
+  in
+  let token, spaces = get_spaces Space_0 lexbuf in
+  match token with
+  | NUM(s, pos) -> Parser.NUM(s, (spaces, pos))
+  | STRING(s, pos) -> Parser.STRING(s, (spaces, pos))
+  | BAREWORD(s, pos) -> Parser.BAREWORD(s, (spaces, pos))
+  | REVISION(s, pos) -> Parser.REVISION(s, (spaces, pos))
+  | COMMENT(s, pos) -> Parser.COMMENT(s, (spaces, pos))
+  | POD(s, pos) -> Parser.POD(s, (spaces, pos))
+  | LABEL(s, pos) -> Parser.LABEL(s, (spaces, pos))
+  | COMMAND_STRING(s, pos) -> Parser.COMMAND_STRING(s, (spaces, pos))
+  | PRINT(s, pos) -> Parser.PRINT(s, (spaces, pos))
+  | PRINT_TO_STAR(s, pos) -> Parser.PRINT_TO_STAR(s, (spaces, pos))
+  | PRINT_TO_SCALAR(s, pos) -> Parser.PRINT_TO_SCALAR(s, (spaces, pos))
+  | QUOTEWORDS(s, pos) -> Parser.QUOTEWORDS(s, (spaces, pos))
+  | COMPACT_HASH_SUBSCRIPT(s, pos) -> Parser.COMPACT_HASH_SUBSCRIPT(s, (spaces, pos))
+  | HERE_DOC(r, pos) -> Parser.HERE_DOC(r, (spaces, pos))
+  | PATTERN(s, opts, pos) -> Parser.PATTERN((s, opts), (spaces, pos))
+  | PATTERN_SUBST(from, to_, opts, pos) -> Parser.PATTERN_SUBST((from, to_, opts), (spaces, pos))
+  | SCALAR_IDENT(kind, name, pos) -> Parser.SCALAR_IDENT((kind, name), (spaces, pos))
+  | ARRAY_IDENT(kind, name, pos) -> Parser.ARRAY_IDENT((kind, name), (spaces, pos))
+  | HASH_IDENT(kind, name, pos) -> Parser.HASH_IDENT((kind, name), (spaces, pos))
+  | FUNC_IDENT(kind, name, pos) -> Parser.FUNC_IDENT((kind, name), (spaces, pos))
+  | STAR_IDENT(kind, name, pos) -> Parser.STAR_IDENT((kind, name), (spaces, pos))
+  | RAW_IDENT(kind, name, pos) -> Parser.RAW_IDENT((kind, name), (spaces, pos))
+  | ARRAYLEN_IDENT(kind, name, pos) -> Parser.ARRAYLEN_IDENT((kind, name), (spaces, pos))
+  | FUNC_DECL_WITH_PROTO(name, proto, pos) -> Parser.FUNC_DECL_WITH_PROTO((name, proto), (spaces, pos))
+
+  | NEW(pos) -> Parser.NEW((), (spaces, pos))
+  | FORMAT(pos) -> Parser.FORMAT((), (spaces, pos))
+  | COMPARE_OP(s, pos) -> Parser.COMPARE_OP(s, (spaces, pos))
+  | EQ_OP(s, pos) -> Parser.EQ_OP(s, (spaces, pos))
+  | ASSIGN(s, pos) -> Parser.ASSIGN(s, (spaces, pos))
+  | FOR(s, pos) -> Parser.FOR(s, (spaces, pos))
+
+  | EOF              (pos) -> Parser.EOF              ((), (spaces, pos))
+  | IF               (pos) -> Parser.IF               ((), (spaces, pos))
+  | ELSIF            (pos) -> Parser.ELSIF            ((), (spaces, pos))
+  | ELSE             (pos) -> Parser.ELSE             ((), (spaces, pos))
+  | UNLESS           (pos) -> Parser.UNLESS           ((), (spaces, pos))
+  | DO               (pos) -> Parser.DO               ((), (spaces, pos))
+  | WHILE            (pos) -> Parser.WHILE            ((), (spaces, pos))
+  | UNTIL            (pos) -> Parser.UNTIL            ((), (spaces, pos))
+  | MY               (pos) -> Parser.MY               ((), (spaces, pos))
+  | CONTINUE         (pos) -> Parser.CONTINUE         ((), (spaces, pos))
+  | SUB              (pos) -> Parser.SUB              ((), (spaces, pos))
+  | LOCAL            (pos) -> Parser.LOCAL            ((), (spaces, pos))
+  | USE              (pos) -> Parser.USE              ((), (spaces, pos))
+  | PACKAGE          (pos) -> Parser.PACKAGE          ((), (spaces, pos))
+  | BEGIN            (pos) -> Parser.BEGIN            ((), (spaces, pos))
+  | END              (pos) -> Parser.END              ((), (spaces, pos))
+  | AT               (pos) -> Parser.AT               ((), (spaces, pos))
+  | DOLLAR           (pos) -> Parser.DOLLAR           ((), (spaces, pos))
+  | PERCENT          (pos) -> Parser.PERCENT          ((), (spaces, pos))
+  | AMPERSAND        (pos) -> Parser.AMPERSAND        ((), (spaces, pos))
+  | STAR             (pos) -> Parser.STAR             ((), (spaces, pos))
+  | ARRAYLEN         (pos) -> Parser.ARRAYLEN         ((), (spaces, pos))
+  | SEMI_COLON       (pos) -> Parser.SEMI_COLON       ((), (spaces, pos))
+  | PKG_SCOPE        (pos) -> Parser.PKG_SCOPE        ((), (spaces, pos))
+  | PAREN            (pos) -> Parser.PAREN            ((), (spaces, pos))
+  | PAREN_END        (pos) -> Parser.PAREN_END        ((), (spaces, pos))
+  | BRACKET          (pos) -> Parser.BRACKET          ((), (spaces, pos))
+  | BRACKET_END      (pos) -> Parser.BRACKET_END      ((), (spaces, pos))
+  | BRACKET_HASHREF  (pos) -> Parser.BRACKET_HASHREF  ((), (spaces, pos))
+  | ARRAYREF         (pos) -> Parser.ARRAYREF         ((), (spaces, pos))
+  | ARRAYREF_END     (pos) -> Parser.ARRAYREF_END     ((), (spaces, pos))
+  | ARROW            (pos) -> Parser.ARROW            ((), (spaces, pos))
+  | INCR             (pos) -> Parser.INCR             ((), (spaces, pos))
+  | DECR             (pos) -> Parser.DECR             ((), (spaces, pos))
+  | POWER            (pos) -> Parser.POWER            ((), (spaces, pos))
+  | TIGHT_NOT        (pos) -> Parser.TIGHT_NOT        ((), (spaces, pos))
+  | BIT_NEG          (pos) -> Parser.BIT_NEG          ((), (spaces, pos))
+  | REF              (pos) -> Parser.REF              ((), (spaces, pos))
+  | PATTERN_MATCH    (pos) -> Parser.PATTERN_MATCH    ((), (spaces, pos))
+  | PATTERN_MATCH_NOT(pos) -> Parser.PATTERN_MATCH_NOT((), (spaces, pos))
+  | MULT             (pos) -> Parser.MULT             ((), (spaces, pos))
+  | DIVISION         (pos) -> Parser.DIVISION         ((), (spaces, pos))
+  | MODULO           (pos) -> Parser.MODULO           ((), (spaces, pos))
+  | REPLICATE        (pos) -> Parser.REPLICATE        ((), (spaces, pos))
+  | PLUS             (pos) -> Parser.PLUS             ((), (spaces, pos))
+  | MINUS            (pos) -> Parser.MINUS            ((), (spaces, pos))
+  | CONCAT           (pos) -> Parser.CONCAT           ((), (spaces, pos))
+  | BIT_SHIFT_LEFT   (pos) -> Parser.BIT_SHIFT_LEFT   ((), (spaces, pos))
+  | BIT_SHIFT_RIGHT  (pos) -> Parser.BIT_SHIFT_RIGHT  ((), (spaces, pos))
+  | LT               (pos) -> Parser.LT               ((), (spaces, pos))
+  | GT               (pos) -> Parser.GT               ((), (spaces, pos))
+  | BIT_AND          (pos) -> Parser.BIT_AND          ((), (spaces, pos))
+  | BIT_OR           (pos) -> Parser.BIT_OR           ((), (spaces, pos))
+  | BIT_XOR          (pos) -> Parser.BIT_XOR          ((), (spaces, pos))
+  | AND_TIGHT        (pos) -> Parser.AND_TIGHT        ((), (spaces, pos))
+  | OR_TIGHT         (pos) -> Parser.OR_TIGHT         ((), (spaces, pos))
+  | DOTDOT           (pos) -> Parser.DOTDOT           ((), (spaces, pos))
+  | DOTDOTDOT        (pos) -> Parser.DOTDOTDOT        ((), (spaces, pos))
+  | QUESTION_MARK    (pos) -> Parser.QUESTION_MARK    ((), (spaces, pos))
+  | COLON            (pos) -> Parser.COLON            ((), (spaces, pos))
+  | COMMA            (pos) -> Parser.COMMA            ((), (spaces, pos))
+  | RIGHT_ARROW      (pos) -> Parser.RIGHT_ARROW      ((), (spaces, pos))
+  | NOT              (pos) -> Parser.NOT              ((), (spaces, pos))
+  | AND              (pos) -> Parser.AND              ((), (spaces, pos))
+  | OR               (pos) -> Parser.OR               ((), (spaces, pos))
+  | XOR              (pos) -> Parser.XOR              ((), (spaces, pos))
+
+  | SPACE _ | CR -> internal_error "raw_token_to_token"
+
+let rec lexbuf2list t lexbuf =
+  let rec f () =
+    match t lexbuf with
+    | Parser.EOF _ -> []
+    | e -> e :: f()
+  in
+  let l = f() in
+  l
 
 let next_rule = ref None
 
+let bpos = -1,-1
+let pos lexbuf = lexeme_start lexbuf, lexeme_end lexbuf
+let pos2sfull_with start end_ = Info.pos2sfull (!current_file, start, end_)
+let pos2sfull lexbuf = pos2sfull_with (lexeme_start lexbuf) (lexeme_end lexbuf)
+let putback lexbuf nb = lexbuf.Lexing.lex_curr_pos <- lexbuf.Lexing.lex_curr_pos - nb
+
 let add_a_new_line raw_pos = 
    incr current_file_current_line ;
    lpush current_file_lines_starts raw_pos
@@ -26,12 +216,13 @@ let string_nestness = ref 0
 let building_current_string = ref ""
 let current_string_start_pos = ref 0
 let current_string_start_line = ref 0
+let die lexbuf err = failwith (pos2sfull_with !current_string_start_pos (lexeme_end lexbuf) ^ err)
 
 let ins t lexbuf = 
   building_current_string := ""; 
   current_string_start_pos := lexeme_start lexbuf;
   t lexbuf ;
-  !building_current_string, (!current_file, !current_string_start_pos, lexeme_end lexbuf)
+  !building_current_string, (!current_string_start_pos, lexeme_end lexbuf)
 let ins_to_string t lexbuf =
   let s, pos = ins t lexbuf in
   not_ok_for_match := lexeme_end lexbuf; 
@@ -42,23 +233,6 @@ let next_s s t lexbuf =
   t lexbuf
 let next t lexbuf = next_s (lexeme lexbuf) t lexbuf
 
-let pos lexbuf = !current_file, lexeme_start lexbuf, lexeme_end lexbuf
-let pos2sfull_with start end_ = Info.pos2sfull (!current_file, start, end_)
-let pos2sfull lexbuf = pos2sfull_with (lexeme_start lexbuf) (lexeme_end lexbuf)
-
-let putback lexbuf nb = lexbuf.Lexing.lex_curr_pos <- lexbuf.Lexing.lex_curr_pos - nb
-
-let die lexbuf err = failwith (pos2sfull_with !current_string_start_pos (lexeme_end lexbuf) ^ err)
-
-let rec lexbuf2list t lexbuf =
-  let rec f () =
-    match t lexbuf with
-    | EOF -> []
-    | e -> e :: f()
-  in
-  let l = f() in
-  l
-
 let ident_type_from_char fq name lexbuf c = 
   not_ok_for_match := lexeme_end lexbuf; 
   match c with
@@ -92,7 +266,7 @@ let arraylen_fqident_from_lexbuf lexbuf =
   let fq, name = split_at_two_colons (skip_n_char 2 s) in
   ARRAYLEN_IDENT(Some fq, name, pos lexbuf)
 
-let check_multi_line_delimited_string opts (_, start, end_) =
+let check_multi_line_delimited_string opts (start, end_) =
    let check =
      match opts with
      | None -> true
@@ -102,19 +276,18 @@ let check_multi_line_delimited_string opts (_, start, end_) =
      failwith (pos2sfull_with start end_ ^ "multi-line patterns are not allowed (or use /x modifier)")
 }
 
-let space = [' ' '\t']
 let stash = [ '$' '@' '%' '&' '*' ]
 let ident_start = ['a'-'z' 'A'-'Z' '_']
 let ident = ident_start ['0'-'9' 'A'-'Z' 'a'-'z' '_'] *
 let pattern_separator = [ '/' '!' ',' '|' ]
 
 rule token = parse
-| space+ { 
+| ' '+ { 
     (* propagate not_ok_for_match when it was set by the previous token *)
     if lexeme_start lexbuf = !not_ok_for_match then not_ok_for_match := lexeme_end lexbuf; 
-   (*SPACE(pos lexbuf) *) token lexbuf
+    SPACE(lexeme_end lexbuf - lexeme_start lexbuf)
   }
-| '#' [^ '\n']* { (*COMMENT(lexeme lexbuf, pos lexbuf)*) token lexbuf }
+| '#' [^ '\n']* { SPACE(1) }
 
 | "\n=" { 
     add_a_new_line(lexeme_end lexbuf - 1);
@@ -128,76 +301,85 @@ rule token = parse
       current_here_doc_mark := mark ;
       r := ins (if interpolate then here_doc else raw_here_doc) lexbuf
     with Queue.Empty -> ());
-    token lexbuf
-  }
-| "->" { ARROW }
-| "++" { INCR }
-| "--" { DECR }
-| "**" { POWER }
-| "!" { TIGHT_NOT }
-| "~" { BIT_NEG }
-| "=~" { PATTERN_MATCH }
-| "!~" { PATTERN_MATCH_NOT }
-| "*" { MULT }
-| "%" { MODULO }
-| "x" { REPLICATE }
-| "+" { PLUS }
-| "-" { MINUS }
-| "." { CONCAT }
-| "<<" { BIT_SHIFT_LEFT }
-| ">>" { BIT_SHIFT_RIGHT }
-| "<" { LT }
-| ">" { GT }
-| "<=" | ">=" | "lt" | "gt" | "le" | "ge" { COMPARE_OP(lexeme lexbuf) }
-| "==" | "!=" | "<=>" | "eq" | "ne" | "cmp" { EQ_OP(lexeme lexbuf) }
-| "&" { BIT_AND }
-| "|" { BIT_OR }
-| "^" { BIT_XOR }
-| "&&" { AND_TIGHT }
-| "||" { OR_TIGHT }
-| ".." { DOTDOT }
-| "..." { DOTDOTDOT }
-| "?" { QUESTION_MARK }
-| ":" { COLON }
-| "::" { PKG_SCOPE }
-
-| "=" | "+=" | "-=" | "*=" | ".=" | "|=" | "&=" | "^=" | "||=" | "&&=" { ASSIGN(lexeme lexbuf) }
-
-| "," { COMMA }
-| "=>" { RIGHT_ARROW }
-| "not" { NOT }
-| "and" { AND }
-| "or" { OR }
-| "xor" { XOR }
-
-| "if"       { IF }
-| "else"     { ELSE }
-| "elsif"    { ELSIF }
-| "unless"   { UNLESS }
-| "do"       { DO }
-| "while"    { WHILE }
-| "until"    { UNTIL }
-| "foreach"  { FOR("foreach") }
-| "for"      { FOR("for") }
-| "my"       { MY }
-| "local"    { LOCAL }
-| "continue" { CONTINUE }
-| "sub"      { SUB }
-| "package"  { PACKAGE }
-| "use"      { USE }
-| "BEGIN"    { BEGIN }
-| "END"      { END }
-| "print"    { PRINT(pos lexbuf) }
+    CR
+  }
+| "->" { ARROW(pos lexbuf) }
+| "++" { INCR(pos lexbuf) }
+| "--" { DECR(pos lexbuf) }
+| "**" { POWER(pos lexbuf) }
+| "!" { TIGHT_NOT(pos lexbuf) }
+| "~" { BIT_NEG(pos lexbuf) }
+| "=~" { PATTERN_MATCH(pos lexbuf) }
+| "!~" { PATTERN_MATCH_NOT(pos lexbuf) }
+| "*" { MULT(pos lexbuf) }
+| "%" { MODULO(pos lexbuf) }
+| "x" { REPLICATE(pos lexbuf) }
+| "+" { PLUS(pos lexbuf) }
+| "-" { MINUS(pos lexbuf) }
+| "." { CONCAT(pos lexbuf) }
+| "<<" { BIT_SHIFT_LEFT(pos lexbuf) }
+| ">>" { BIT_SHIFT_RIGHT(pos lexbuf) }
+| "<" { LT(pos lexbuf) }
+| ">" { GT(pos lexbuf) }
+| "<=" | ">=" | "lt" | "gt" | "le" | "ge" { COMPARE_OP(lexeme lexbuf, pos lexbuf) }
+| "==" | "!=" | "<=>" | "eq" | "ne" | "cmp" { EQ_OP(lexeme lexbuf, pos lexbuf) }
+| "&" { BIT_AND(pos lexbuf) }
+| "|" { BIT_OR(pos lexbuf) }
+| "^" { BIT_XOR(pos lexbuf) }
+| "&&" { AND_TIGHT(pos lexbuf) }
+| "||" { OR_TIGHT(pos lexbuf) }
+| ".." { DOTDOT(pos lexbuf) }
+| "..." { DOTDOTDOT(pos lexbuf) }
+| "?" { QUESTION_MARK(pos lexbuf) }
+| ":" { COLON(pos lexbuf) }
+| "::" { PKG_SCOPE(pos lexbuf) }
+
+| "=" | "+=" | "-=" | "*=" | ".=" | "|=" | "&=" | "^=" | "||=" | "&&=" { ASSIGN(lexeme lexbuf, pos lexbuf) }
+
+| "," { COMMA(pos lexbuf) }
+| "=>" { RIGHT_ARROW(pos lexbuf) }
+| "not" { NOT(pos lexbuf) }
+| "and" { AND(pos lexbuf) }
+| "or" { OR(pos lexbuf) }
+| "xor" { XOR(pos lexbuf) }
+
+| "if"       { IF(pos lexbuf) }
+| "else"     { ELSE(pos lexbuf) }
+| "elsif"    { ELSIF(pos lexbuf) }
+| "unless"   { UNLESS(pos lexbuf) }
+| "do"       { DO(pos lexbuf) }
+| "while"    { WHILE(pos lexbuf) }
+| "until"    { UNTIL(pos lexbuf) }
+| "foreach"  { FOR(lexeme lexbuf, pos lexbuf) }
+| "for"      { FOR(lexeme lexbuf, pos lexbuf) }
+| "my"       { MY(pos lexbuf) }
+| "local"    { LOCAL(pos lexbuf) }
+| "continue" { CONTINUE(pos lexbuf) }
+| "sub"      { SUB(pos lexbuf) }
+| "package"  { PACKAGE(pos lexbuf) }
+| "use"      { USE(pos lexbuf) }
+| "BEGIN"    { BEGIN(pos lexbuf) }
+| "END"      { END(pos lexbuf) }
+| "print"    { PRINT(lexeme lexbuf, pos lexbuf) }
 | "new"      { NEW(pos lexbuf) }
 | "format"   { let _ = here_doc_next_line "." false in FORMAT(pos lexbuf) }
 
 | "split"
 | "grep"  { (* ok_for_match! *) BAREWORD(lexeme lexbuf, pos lexbuf) }
 
-| ident space* "=>" { (* needed so that (if => 1) works *)
+| "print " ident ' ' { 
+    putback lexbuf 1; 
+    PRINT_TO_STAR(skip_n_char_ 6 1 (lexeme lexbuf), pos lexbuf);
+  }
+| "print $" ident ' ' { 
+    putback lexbuf 1; 
+    PRINT_TO_SCALAR(skip_n_char_ 7 1 (lexeme lexbuf), pos lexbuf);
+  }
+
+| ident ' '* "=>" { (* needed so that (if => 1) works *)
     let s = lexeme lexbuf in
     let end_ = String.length s - 1 in
-    let ident_end = rindex_non_spaces_from (end_ - 2) s in
+    let ident_end = non_rindex_from s (end_ - 2) ' ' in
     putback lexbuf (end_ - ident_end); 
     BAREWORD(String.sub s 0 (ident_end+1), pos lexbuf)
   }
@@ -207,25 +389,25 @@ rule token = parse
     COMPACT_HASH_SUBSCRIPT(lexeme lexbuf, pos lexbuf)
   }
 
-| '@' { AT }
-| '$' { DOLLAR }
-| '$' '#' { ARRAYLEN }
-| '%' ['$' '{'] { putback lexbuf 1; PERCENT }
-| '&' ['$' '{'] { putback lexbuf 1; AMPERSAND }
-| '*' ['$' '{'] { putback lexbuf 1; if lexeme_start lexbuf = !not_ok_for_match then MULT else STAR }
+| '@' { AT(pos lexbuf) }
+| '$' { DOLLAR(pos lexbuf) }
+| '$' '#' { ARRAYLEN(pos lexbuf) }
+| '%' ['$' '{'] { putback lexbuf 1; PERCENT(pos lexbuf) }
+| '&' ['$' '{'] { putback lexbuf 1; AMPERSAND(pos lexbuf) }
+| '*' ['$' '{'] { putback lexbuf 1; if lexeme_start lexbuf = !not_ok_for_match then MULT(pos lexbuf) else STAR(pos lexbuf) }
 
 
-| ';' { SEMI_COLON }
-| '(' { PAREN }
-| '{' { BRACKET }
-| "+{"{ BRACKET_HASHREF }
-| '[' { ARRAYREF }
-| ')' { not_ok_for_match := lexeme_end lexbuf; PAREN_END }
-| '}' { not_ok_for_match := lexeme_end lexbuf; BRACKET_END }
-| ']' { not_ok_for_match := lexeme_end lexbuf; ARRAYREF_END }
+| ';' { SEMI_COLON(pos lexbuf) }
+| '(' { PAREN(pos lexbuf) }
+| '{' { BRACKET(pos lexbuf) }
+| "+{"{ BRACKET_HASHREF(pos lexbuf) }
+| '[' { ARRAYREF(pos lexbuf) }
+| ')' { not_ok_for_match := lexeme_end lexbuf; PAREN_END(pos lexbuf) }
+| '}' { not_ok_for_match := lexeme_end lexbuf; BRACKET_END(pos lexbuf) }
+| ']' { not_ok_for_match := lexeme_end lexbuf; ARRAYREF_END(pos lexbuf) }
 
 | "/" {
-    if lexeme_start lexbuf = !not_ok_for_match then DIVISION 
+    if lexeme_start lexbuf = !not_ok_for_match then DIVISION(pos lexbuf)
     else (
       delimit_char := '/' ;
       current_string_start_line := !current_file_current_line;
@@ -237,7 +419,7 @@ rule token = parse
   }
 
 | "/=" {
-    if lexeme_start lexbuf = !not_ok_for_match then ASSIGN(lexeme lexbuf)
+    if lexeme_start lexbuf = !not_ok_for_match then ASSIGN(lexeme lexbuf, pos lexbuf)
     else (
       putback lexbuf 1 ;
       delimit_char := '/' ;
@@ -268,10 +450,10 @@ rule token = parse
 | "s" pattern_separator {
   delimit_char := lexeme_char lexbuf 1 ;
   current_string_start_line := !current_file_current_line;
-  let s1, (_, start, _) = ins delimited_string lexbuf in 
-  let s2, (_, _, end_)  = ins delimited_string lexbuf in 
+  let s1, (start, _) = ins delimited_string lexbuf in 
+  let s2, (_, end_)  = ins delimited_string lexbuf in 
   let opts, _ = ins pattern_options lexbuf in
-  let pos = !current_file, start, end_ in
+  let pos = start, end_ in
   check_multi_line_delimited_string (Some opts) pos ;
   PATTERN_SUBST(s1, s2, opts, pos)
 }
@@ -279,43 +461,43 @@ rule token = parse
 | "tr" pattern_separator {
   delimit_char := lexeme_char lexbuf 2 ;
   current_string_start_line := !current_file_current_line;
-  let s1, (_, start, _) = ins delimited_string lexbuf in 
-  let s2, (_, _, end_)  = ins delimited_string lexbuf in 
+  let s1, (start, _) = ins delimited_string lexbuf in 
+  let s2, (_, end_)  = ins delimited_string lexbuf in 
   let opts, _ = ins pattern_options lexbuf in
-  let pos = !current_file, start, end_ in
+  let pos = start, end_ in
   check_multi_line_delimited_string None pos ;
   PATTERN_SUBST(s1, s2, opts, pos)
 }
     
 | "<<" ident { 
     not_ok_for_match := lexeme_end lexbuf; 
-    HERE_DOC(here_doc_next_line (skip_n_char 2 (lexeme lexbuf)) true)
+    HERE_DOC(here_doc_next_line (skip_n_char 2 (lexeme lexbuf)) true, pos lexbuf)
   }
 | "<<'" ident "'" { 
     not_ok_for_match := lexeme_end lexbuf; 
-    HERE_DOC(here_doc_next_line (skip_n_char_ 3 1 (lexeme lexbuf)) false)
+    HERE_DOC(here_doc_next_line (skip_n_char_ 3 1 (lexeme lexbuf)) false, pos lexbuf)
   }
-| "<<" space+ "'"
-| "<<" space+ ident { 
+| "<<" ' '+ "'"
+| "<<" ' '+ ident { 
     failwith (pos2sfull_with (lexeme_start lexbuf + 2) (lexeme_end lexbuf) ^ "No space allowed between \"<<\" and the marker")
   }
-| "<<" space* '"' { 
+| "<<" ' '* '"' { 
     failwith (pos2sfull_with (lexeme_start lexbuf + 2) (lexeme_end lexbuf) ^ "Don't use <<\"MARK\", use <<MARK instead")
   }
 
 | "\\" stash
 | "\\" ['0'-'9' 'A'-'Z' 'a'-'z']
-| "\\" space* '('
-    { putback lexbuf 1; REF }
+| "\\" ' '* '('
+    { putback lexbuf 1; REF(pos lexbuf) }
 
-| "sub" space+ ident space* '(' [ '$' '@' '\\' '&' ';' ]* ')' {
+| "sub" ' '+ ident ' '* '(' [ '$' '@' '\\' '&' ';' ]* ')' {
     (* bloody prototypes, must be caught especially otherwise "($)" is badly tokenized *)
     (* and alas "($@)" is both valid as an expression and a prototype *)
     let s = lexeme lexbuf in
-    let ident_start = index_non_spaces_from 3 s in
+    let ident_start = non_index_from s 3 ' ' in
       
     let proto_start = String.index_from s ident_start '(' in
-    let ident_end = rindex_non_spaces_from (proto_start-1) s in
+    let ident_end = non_rindex_from s (proto_start-1) ' ' in
     let ident = String.sub s ident_start (ident_end - ident_start + 1) in
     let prototype = skip_n_char_ (proto_start + 1) 1 s in
 
@@ -327,8 +509,8 @@ rule token = parse
 
 | stash ident? ("::" ident)+ { typed_fqident_from_lexbuf lexbuf }
 | stash ident
-| '$' [^ '{' ' ' '\t' '\n' '$']
-| "$^" [^ '{' ' ' '\t' '\n'] { typed_ident_from_lexbuf lexbuf }
+| '$' [^ '{' ' ' '\n' '$']
+| "$^" [^ '{' ' ' '\n'] { typed_ident_from_lexbuf lexbuf }
 
 | "$$" [^ 'a'-'z' 'A'-'Z' '_' '{'] { putback lexbuf 1; SCALAR_IDENT(None, "$$", pos lexbuf) }
 
@@ -339,7 +521,7 @@ rule token = parse
 
 | ident ":" { LABEL(lexeme lexbuf, pos lexbuf) }
 
-| '-' [ 'a'-'z' 'A'-'Z' ] [ ' ' '\t' '(' ] { putback lexbuf 1; BAREWORD(lexeme lexbuf, pos lexbuf) }
+| '-' [ 'a'-'z' 'A'-'Z' ] [ ' ' '(' ] { putback lexbuf 1; BAREWORD(lexeme lexbuf, pos lexbuf) }
 
 | ['0'-'9'] ['0'-'9' '_']* ('.' ['0'-'9'] ['0'-'9' '_']*)+ 
 | 'v' ['0'-'9'] ['0'-'9' '_']* ('.' ['0'-'9'] ['0'-'9' '_']*)* 
@@ -367,7 +549,7 @@ rule token = parse
 | "qq(" { ins_to_string qqstring lexbuf }
 | "qw(" { let s, pos = ins qstring lexbuf in QUOTEWORDS(s, pos) }
 
-| eof   { EOF }
+| eof   { EOF(pos lexbuf) }
 | _ { failwith (Printf.sprintf "%serror tokenizing <<%s>>" (pos2sfull lexbuf) (lexeme lexbuf)) }
 
 and string = parse
@@ -480,7 +662,6 @@ and pattern_options = parse
 and pod_command = parse
 | [^ '\n' ]+ {
   let s = lexeme lexbuf in
-  if String.contains s '\t' then failwith(pos2sfull lexbuf ^ "tabulations not accepted in POD commands") else
   let command = String.sub s 0 (try String.index s ' ' with Not_found -> String.length s) in
   match command with
   | "cut" ->
author	Pascal Rigaux <pixel@mandriva.com>	2002-11-12 14:07:28 +0000
committer	Pascal Rigaux <pixel@mandriva.com>	2002-11-12 14:07:28 +0000
commit	695c5b66f3a8da169c48e7989edb4db2a894e5aa (patch)
tree	8ac6e0561f382c2636ab41c0be90b11da2033f7c /perl_checker.src/lexer.mll
parent	7fff947a462837c0cc7ea4cbf4da146b796ac349 (diff)
download	perl-MDK-Common-695c5b66f3a8da169c48e7989edb4db2a894e5aa.tar perl-MDK-Common-695c5b66f3a8da169c48e7989edb4db2a894e5aa.tar.gz perl-MDK-Common-695c5b66f3a8da169c48e7989edb4db2a894e5aa.tar.bz2 perl-MDK-Common-695c5b66f3a8da169c48e7989edb4db2a894e5aa.tar.xz perl-MDK-Common-695c5b66f3a8da169c48e7989edb4db2a894e5aa.zip