diff options
| -rw-r--r-- | perl_checker.src/.cvsignore | 8 | ||||
| -rw-r--r-- | perl_checker.src/Makefile | 7 | ||||
| -rw-r--r-- | perl_checker.src/common.ml | 12 | ||||
| -rw-r--r-- | perl_checker.src/common.mli | 6 | ||||
| -rw-r--r-- | perl_checker.src/info.ml | 7 | ||||
| -rw-r--r-- | perl_checker.src/info.mli | 2 | ||||
| -rw-r--r-- | perl_checker.src/lexer.mll | 187 | ||||
| -rw-r--r-- | perl_checker.src/parser.mly | 126 | ||||
| -rw-r--r-- | perl_checker.src/perl_checker.ml | 4 | 
9 files changed, 248 insertions, 111 deletions
| diff --git a/perl_checker.src/.cvsignore b/perl_checker.src/.cvsignore new file mode 100644 index 0000000..fe1e303 --- /dev/null +++ b/perl_checker.src/.cvsignore @@ -0,0 +1,8 @@ +.depend +perl_checker +perl_checker_debug +gmon.out +*.cmi +*.cmo +*.cmx +parser.ml diff --git a/perl_checker.src/Makefile b/perl_checker.src/Makefile index 0dc65c2..fcabe3c 100644 --- a/perl_checker.src/Makefile +++ b/perl_checker.src/Makefile @@ -12,7 +12,7 @@ YACC_FILES = $(wildcard *.mly)  TMP_MLFILES = $(YACC_FILES:%.mly=%.ml) $(LEX_FILES:%.mll=%.ml)  TMP_MLIFILES = $(YACC_FILES:%.mly=%.mli) -ALL_PROGS = perl_checker +ALL_PROGS = perl_checker_debug perl_checker  PROG_OBJS_WITH_CMI = parser.cmo print.cmo perl_checker.cmo  PROG_OBJS = common.cmo flags.cmo info.cmo $(LEX_FILES:%.mll=%.cmo) $(PROG_OBJS_WITH_CMI) @@ -29,11 +29,10 @@ default: .compiling TAGS $(ALL_PROGS)  all: perl_checker -perl_checker: .depend $(PROG_OBJS) +perl_checker_debug: .depend $(PROG_OBJS)  	$(CSLC) -custom $(CSLFLAGS) $(LIBDIRS) -o $@ $(CMA_FILES) $(PROG_OBJS) -	cp -f perl_checker perl_checker_debug -perl_checker_opt: .depend $(PROG_OBJX) +perl_checker: .depend $(PROG_OBJX)  	$(CSLOPT) $(CSLOPTFLAGS) $(LIBDIRS) -o $@ $(CMXA_FILES) $(PROG_OBJX)  .compiling: diff --git a/perl_checker.src/common.ml b/perl_checker.src/common.ml index 4b33f8f..0f20e7a 100644 --- a/perl_checker.src/common.ml +++ b/perl_checker.src/common.ml @@ -653,6 +653,18 @@ let skip_n_char_ beg end_ s =    String.sub s beg (String.length s - beg - end_)  let skip_n_char n s = skip_n_char_ n 0 s +let rec index_spaces_from beg s = +  if s.[beg] = ' ' || s.[beg] = '\t' then beg else index_spaces_from (beg+1) s +let index_spaces s = index_spaces_from 0 s + +let rec index_non_spaces_from beg s = +  if s.[beg] = ' ' || s.[beg] = '\t' then index_non_spaces_from (beg+1) s else beg +let index_non_spaces s = index_non_spaces_from 0 s + +let rec rindex_non_spaces_from beg s = +  if s.[beg] = ' ' || s.[beg] = '\t' then rindex_non_spaces_from (beg-1) s else beg +let rindex_non_spaces s = rindex_non_spaces_from (String.length s - 1) s +  let rec explode_string = function    | "" -> []    | s -> (String.get s 0) :: explode_string (String.sub s 1 (String.length s - 1)) diff --git a/perl_checker.src/common.mli b/perl_checker.src/common.mli index 5fe8ade..353a2a3 100644 --- a/perl_checker.src/common.mli +++ b/perl_checker.src/common.mli @@ -175,6 +175,12 @@ val chop : string -> string  val chomps : string -> string  val skip_n_char_ : int -> int -> string -> string  val skip_n_char : int -> string -> string +val index_spaces_from : int -> string -> int +val index_spaces : string -> int +val index_non_spaces_from : int -> string -> int +val index_non_spaces : string -> int +val rindex_non_spaces_from : int -> string -> int +val rindex_non_spaces : string -> int  val explode_string : string -> char list  val is_uppercase : char -> bool  val is_lowercase : char -> bool diff --git a/perl_checker.src/info.ml b/perl_checker.src/info.ml index 5c002ae..8e27bd3 100644 --- a/perl_checker.src/info.ml +++ b/perl_checker.src/info.ml @@ -4,6 +4,7 @@ open Common  let (lines_starts : (string * int list ref) list ref) = ref []  let current_file_lines_starts = ref [] +let current_file_current_line = ref 0  let current_file = ref ""  let start_a_new_file file =  @@ -11,9 +12,13 @@ let start_a_new_file file =    current_file_lines_starts := [0] ;    lines_starts := (file, current_file_lines_starts) :: !lines_starts -let pos2line (file, a, b) =  +let raw_pos2raw_line file a =    let starts = map_index (fun a b -> a,b) (rev !(assoc file !lines_starts)) in    let ((offset, line), _) = find (fun (_,(e,_)) -> e > a) (combine starts (tl starts @ [99999, 99999])) in +  line, offset + +let pos2line (file, a, b) =  +  let line, offset = raw_pos2raw_line file a in    file, line, a - offset, b - offset  let pos2sfull pos =  diff --git a/perl_checker.src/info.mli b/perl_checker.src/info.mli index 5751d2b..ed9455e 100644 --- a/perl_checker.src/info.mli +++ b/perl_checker.src/info.mli @@ -1,7 +1,9 @@  val lines_starts : (string * int list ref) list ref  val current_file_lines_starts : int list ref +val current_file_current_line : int ref  val current_file : string ref  val start_a_new_file : string -> unit +val raw_pos2raw_line : string -> int -> int * int  val pos2line : string * int * int -> string * int * int * int  val pos2sfull : string * int * int -> string  val pos2sfull_current : int -> int -> string diff --git a/perl_checker.src/lexer.mll b/perl_checker.src/lexer.mll index 73c40bb..49f5102 100644 --- a/perl_checker.src/lexer.mll +++ b/perl_checker.src/lexer.mll @@ -7,21 +7,31 @@ open Info  let next_rule = ref None +let add_a_new_line raw_pos =  +   incr current_file_current_line ; +   lpush current_file_lines_starts raw_pos +  let here_docs = Queue.create()  let current_here_doc_mark = ref "" +let here_doc_next_line mark interpolate = +  let here_doc_ref = ref("", bpos) in +  Queue.push (interpolate, mark, here_doc_ref) here_docs ; +  here_doc_ref +  let delimit_char = ref '/'  let not_ok_for_match = ref (-1)  let string_nestness = ref 0  let building_current_string = ref ""  let current_string_start_pos = ref 0 +let current_string_start_line = ref 0 -let ins_with_offset offset t lexbuf =  -  building_current_string := ""; current_string_start_pos := lexeme_start lexbuf + offset; +let ins t lexbuf =  +  building_current_string := "";  +  current_string_start_pos := lexeme_start lexbuf;    t lexbuf ;    !building_current_string, (!current_file, !current_string_start_pos, lexeme_end lexbuf) -let ins t lexbuf = ins_with_offset 0 t lexbuf  let ins_to_string t lexbuf =    let s, pos = ins t lexbuf in    not_ok_for_match := lexeme_end lexbuf;  @@ -33,11 +43,11 @@ let next_s s t lexbuf =  let next t lexbuf = next_s (lexeme lexbuf) t lexbuf  let pos lexbuf = !current_file, lexeme_start lexbuf, lexeme_end lexbuf -  let pos2sfull_with start end_ = Info.pos2sfull (!current_file, start, end_) -  let pos2sfull lexbuf = pos2sfull_with (lexeme_start lexbuf) (lexeme_end lexbuf) +let putback lexbuf nb = lexbuf.Lexing.lex_curr_pos <- lexbuf.Lexing.lex_curr_pos - nb +  let die lexbuf err = failwith (pos2sfull_with !current_string_start_pos (lexeme_end lexbuf) ^ err)  let rec lexbuf2list t lexbuf = @@ -82,6 +92,14 @@ let arraylen_fqident_from_lexbuf lexbuf =    let fq, name = split_at_two_colons (skip_n_char 2 s) in    ARRAYLEN_IDENT(Some fq, name, pos lexbuf) +let check_multi_line_delimited_string opts (_, start, end_) = +   let check = +     match opts with +     | None -> true +     | Some s -> not (String.contains s 'x') in +   if check then +     if !current_file_current_line <> !current_string_start_line then +     failwith (pos2sfull_with start end_ ^ "multi-line patterns are not allowed (or use /x modifier)")  }  let space = [' ' '\t'] @@ -99,10 +117,12 @@ rule token = parse  | '#' [^ '\n']* { (*COMMENT(lexeme lexbuf, pos lexbuf)*) token lexbuf }  | "\n=" {  -  let s, pos = ins_with_offset 1 pod_command lexbuf in POD(s, pos) } +    add_a_new_line(lexeme_end lexbuf - 1); +    let _ = ins pod_command lexbuf in token lexbuf  +  }  | '\n' {  -    lpush current_file_lines_starts (lexeme_end lexbuf); +    add_a_new_line(lexeme_end lexbuf);      (try        let (interpolate, mark, r) = Queue.pop here_docs in        current_here_doc_mark := mark ; @@ -126,7 +146,9 @@ rule token = parse  | "." { CONCAT }  | "<<" { BIT_SHIFT_LEFT }  | ">>" { BIT_SHIFT_RIGHT } -| "<" | ">" | "<=" | ">=" | "lt" | "gt" | "le" | "ge" { COMPARE_OP(lexeme lexbuf) } +| "<" { LT } +| ">" { GT } +| "<=" | ">=" | "lt" | "gt" | "le" | "ge" { COMPARE_OP(lexeme lexbuf) }  | "==" | "!=" | "<=>" | "eq" | "ne" | "cmp" { EQ_OP(lexeme lexbuf) }  | "&" { BIT_AND }  | "|" { BIT_OR } @@ -139,7 +161,7 @@ rule token = parse  | ":" { COLON }  | "::" { PKG_SCOPE } -| "=" | "+=" | "-=" | "*=" | "/=" | ".=" | "|=" | "&=" | "^=" | "||=" | "&&=" { ASSIGN(lexeme lexbuf) } +| "=" | "+=" | "-=" | "*=" | ".=" | "|=" | "&=" | "^=" | "||=" | "&&=" { ASSIGN(lexeme lexbuf) }  | "," { COMMA }  | "=>" { RIGHT_ARROW } @@ -149,6 +171,8 @@ rule token = parse  | "xor" { XOR }  | "if"       { IF } +| "else"     { ELSE } +| "elsif"    { ELSIF }  | "unless"   { UNLESS }  | "do"       { DO }  | "while"    { WHILE } @@ -159,37 +183,64 @@ rule token = parse  | "local"    { LOCAL }  | "continue" { CONTINUE }  | "sub"      { SUB } -| "format"   { FORMAT }  | "package"  { PACKAGE }  | "use"      { USE } +| "BEGIN"    { BEGIN } +| "END"      { END }  | "print"    { PRINT(pos lexbuf) }  | "new"      { NEW(pos lexbuf) } +| "format"   { let _ = here_doc_next_line "." false in FORMAT(pos lexbuf) } + +| "split" +| "grep"  { (* ok_for_match! *) BAREWORD(lexeme lexbuf, pos lexbuf) } + +| ident space* "=>" { (* needed so that (if => 1) works *) +    let s = lexeme lexbuf in +    let end_ = String.length s - 1 in +    let ident_end = rindex_non_spaces_from (end_ - 2) s in +    putback lexbuf (end_ - ident_end);  +    BAREWORD(String.sub s 0 (ident_end+1), pos lexbuf) +  } + +| "{" ident "}" { (* needed so that $h{if} works *) +    not_ok_for_match := lexeme_end lexbuf; +    COMPACT_HASH_SUBSCRIPT(lexeme lexbuf, pos lexbuf) +  }  | '@' { AT }  | '$' { DOLLAR } -| '%' { PERCENT } -| '&' { AMPERSAND } -| '*' { STAR } -| "$#" { ARRAYLEN } +| '$' '#' { ARRAYLEN } +| '%' ['$' '{'] { putback lexbuf 1; PERCENT } +| '&' ['$' '{'] { putback lexbuf 1; AMPERSAND } +| '*' ['$' '{'] { putback lexbuf 1; if lexeme_start lexbuf = !not_ok_for_match then MULT else STAR }  | ';' { SEMI_COLON }  | '(' { PAREN }  | '{' { BRACKET } +| "+{"{ BRACKET_HASHREF }  | '[' { ARRAYREF }  | ')' { not_ok_for_match := lexeme_end lexbuf; PAREN_END }  | '}' { not_ok_for_match := lexeme_end lexbuf; BRACKET_END }  | ']' { not_ok_for_match := lexeme_end lexbuf; ARRAYREF_END } -| '(' [ '$' '@' '\\' '&' ';' ]+ ')' { -    (* bloody prototypes, must be caught especially otherwise "($)" is badly tokenized *) -    PROTOTYPE(lexeme lexbuf, pos lexbuf) -  } -  | "/" {      if lexeme_start lexbuf = !not_ok_for_match then DIVISION       else (        delimit_char := '/' ; +      current_string_start_line := !current_file_current_line; +      let s, pos = ins delimited_string lexbuf in +      let opts, _ = ins pattern_options lexbuf in +      check_multi_line_delimited_string (Some opts) pos ; +      PATTERN(s, opts, pos) +    )  +  } + +| "/=" { +    if lexeme_start lexbuf = !not_ok_for_match then ASSIGN(lexeme lexbuf) +    else ( +      putback lexbuf 1 ; +      delimit_char := '/' ;        let s, pos = ins delimited_string lexbuf in        let opts, _ = ins pattern_options lexbuf in        PATTERN(s, opts, pos) @@ -198,44 +249,51 @@ rule token = parse  | "m" pattern_separator {    delimit_char := lexeme_char lexbuf 1 ; +  current_string_start_line := !current_file_current_line;    let s, pos = ins delimited_string lexbuf in    let opts, _ = ins pattern_options lexbuf in +  check_multi_line_delimited_string (Some opts) pos ;    PATTERN(s, opts, pos)  }  | "qr" pattern_separator {    delimit_char := lexeme_char lexbuf 2 ; +  current_string_start_line := !current_file_current_line;    let s, pos = ins delimited_string lexbuf in    let opts, _ = ins pattern_options lexbuf in +  check_multi_line_delimited_string (Some opts) pos ;    PATTERN(s, opts, pos)  }  | "s" pattern_separator {    delimit_char := lexeme_char lexbuf 1 ; +  current_string_start_line := !current_file_current_line;    let s1, (_, start, _) = ins delimited_string lexbuf in     let s2, (_, _, end_)  = ins delimited_string lexbuf in     let opts, _ = ins pattern_options lexbuf in -  PATTERN_SUBST(s1, s2, opts, (!current_file, start, end_)) +  let pos = !current_file, start, end_ in +  check_multi_line_delimited_string (Some opts) pos ; +  PATTERN_SUBST(s1, s2, opts, pos)  }  | "tr" pattern_separator {    delimit_char := lexeme_char lexbuf 2 ; +  current_string_start_line := !current_file_current_line;    let s1, (_, start, _) = ins delimited_string lexbuf in     let s2, (_, _, end_)  = ins delimited_string lexbuf in     let opts, _ = ins pattern_options lexbuf in -  PATTERN_SUBST(s1, s2, opts, (!current_file, start, end_)) +  let pos = !current_file, start, end_ in +  check_multi_line_delimited_string None pos ; +  PATTERN_SUBST(s1, s2, opts, pos)  }  | "<<" ident {  -    let here_doc_ref = ref("", bpos) in -    Queue.push (true, skip_n_char 2 (lexeme lexbuf), here_doc_ref) here_docs ; -    HERE_DOC here_doc_ref +    not_ok_for_match := lexeme_end lexbuf;  +    HERE_DOC(here_doc_next_line (skip_n_char 2 (lexeme lexbuf)) true)    }  | "<<'" ident "'" {       not_ok_for_match := lexeme_end lexbuf;  -    let here_doc_ref = ref("", bpos) in -    Queue.push (false, skip_n_char_ 3 1 (lexeme lexbuf), here_doc_ref) here_docs ; -    HERE_DOC here_doc_ref +    HERE_DOC(here_doc_next_line (skip_n_char_ 3 1 (lexeme lexbuf)) false)    }  | "<<" space+ "'"  | "<<" space+ ident {  @@ -248,20 +306,41 @@ rule token = parse  | "\\" stash  | "\\" ['0'-'9' 'A'-'Z' 'a'-'z']  | "\\" space* '(' -    { lexbuf.Lexing.lex_curr_pos <- lexbuf.Lexing.lex_curr_pos - 1; REF } +    { putback lexbuf 1; REF } + +| "sub" space+ ident space* '(' [ '$' '@' '\\' '&' ';' ]* ')' { +    (* bloody prototypes, must be caught especially otherwise "($)" is badly tokenized *) +    (* and alas "($@)" is both valid as an expression and a prototype *) +    let s = lexeme lexbuf in +    let ident_start = index_non_spaces_from 3 s in +       +    let proto_start = String.index_from s ident_start '(' in +    let ident_end = rindex_non_spaces_from (proto_start-1) s in +    let ident = String.sub s ident_start (ident_end - ident_start + 1) in +    let prototype = skip_n_char_ (proto_start + 1) 1 s in + +    FUNC_DECL_WITH_PROTO(ident, prototype, pos lexbuf) +  }  | "$#" ident? ("::" ident)+ { arraylen_fqident_from_lexbuf lexbuf }  | "$#" ident { arraylen_ident_from_lexbuf lexbuf }  | stash ident? ("::" ident)+ { typed_fqident_from_lexbuf lexbuf }  | stash ident -| stash '^'? [^ '{' ' ' '\t' '\n'] { typed_ident_from_lexbuf lexbuf } +| '$' [^ '{' ' ' '\t' '\n' '$'] +| "$^" [^ '{' ' ' '\t' '\n'] { typed_ident_from_lexbuf lexbuf } + +| "$$" [^ 'a'-'z' 'A'-'Z' '_' '{'] { putback lexbuf 1; SCALAR_IDENT(None, "$$", pos lexbuf) } + +| stash "::" { putback lexbuf 2; ident_type_from_char None "main" lexbuf (lexeme_char lexbuf 0) }  | ident? ("::" ident)+ { ident_from_lexbuf lexbuf } -| ident { BAREWORD(lexeme lexbuf, pos lexbuf) } +| ident { not_ok_for_match := lexeme_end lexbuf; BAREWORD(lexeme lexbuf, pos lexbuf) }  | ident ":" { LABEL(lexeme lexbuf, pos lexbuf) } +| '-' [ 'a'-'z' 'A'-'Z' ] [ ' ' '\t' '(' ] { putback lexbuf 1; BAREWORD(lexeme lexbuf, pos lexbuf) } +  | ['0'-'9'] ['0'-'9' '_']* ('.' ['0'-'9'] ['0'-'9' '_']*)+   | 'v' ['0'-'9'] ['0'-'9' '_']* ('.' ['0'-'9'] ['0'-'9' '_']*)*     {  @@ -269,14 +348,8 @@ rule token = parse      REVISION(lexeme lexbuf, pos lexbuf)    } -| ['0'-'9']* '.' ['0'-'9']+ (['e' 'E']['-' '+']?['0'-'9']+)? {  -    not_ok_for_match := lexeme_end lexbuf;  -    NUM(lexeme lexbuf, pos lexbuf)  -  } -| ['0'-'9'] ['0'-'9' '_']* {  -    not_ok_for_match := lexeme_end lexbuf;  -    NUM(lexeme lexbuf, pos lexbuf) -  } +| ['0'-'9']* '.' ['0'-'9']+ (['e' 'E']['-' '+']?['0'-'9']+)?  +| ['0'-'9'] ['0'-'9' '_']*  (['e' 'E']['-' '+']?['0'-'9']+)?  | "0x" ['0'-'9' 'a'-'f' 'A'-'F']+ {       not_ok_for_match := lexeme_end lexbuf;       NUM(lexeme lexbuf, pos lexbuf) @@ -285,8 +358,11 @@ rule token = parse  | '"'   { ins_to_string string lexbuf }  | "'"   { ins_to_string rawstring lexbuf }  | '`'   { delimit_char := '`';  +	  current_string_start_line := !current_file_current_line;  	  not_ok_for_match := lexeme_end lexbuf;  -	  let s, pos = ins delimited_string lexbuf in COMMAND_STRING(s, pos) } +	  let s, pos = ins delimited_string lexbuf in +	  check_multi_line_delimited_string None pos ; +	  COMMAND_STRING(s, pos) }  | "q("  { ins_to_string qstring lexbuf }  | "qq(" { ins_to_string qqstring lexbuf }  | "qw(" { let s, pos = ins qstring lexbuf in QUOTEWORDS(s, pos) } @@ -298,7 +374,7 @@ and string = parse    '"' { () }  | '\\' { next_rule := Some string ; string_escape lexbuf }  | '\n' {  -    lpush current_file_lines_starts (lexeme_end lexbuf); +    add_a_new_line(lexeme_end lexbuf);      next string lexbuf    }  | [^ '\n' '\\' '"']+ { next string lexbuf } @@ -307,7 +383,7 @@ and string = parse  and delimited_string = parse  | '\\' { next_rule := Some delimited_string ; string_escape lexbuf }  | '\n' {  -    lpush current_file_lines_starts (lexeme_end lexbuf); +    add_a_new_line(lexeme_end lexbuf);      next delimited_string lexbuf    }  | eof { die lexbuf "Unterminated_delimited_string" } @@ -316,11 +392,12 @@ and delimited_string = parse  and rawstring = parse    ''' { () }  | '\n' {  -    lpush current_file_lines_starts (lexeme_end lexbuf); +    add_a_new_line(lexeme_end lexbuf);      next rawstring lexbuf    } -| "\\'" -| [^ '\n' ''']+ { next rawstring lexbuf } +| '\\' { next rawstring lexbuf } +| "\\'" { next_s "'" rawstring lexbuf } +| [^ '\n' ''' '\\']+ { next rawstring lexbuf }  | eof { die lexbuf "Unterminated_rawstring" }  and qqstring = parse @@ -333,7 +410,7 @@ and qqstring = parse    }  | '\\' { next_rule := Some qqstring ; string_escape lexbuf }  | '\n' {  -    lpush current_file_lines_starts (lexeme_end lexbuf); +    add_a_new_line(lexeme_end lexbuf);      next qqstring lexbuf    }  | [^ '\n' '(' ')' '\\']+ { next qqstring lexbuf } @@ -348,7 +425,7 @@ and qstring = parse      next qstring lexbuf    }  | '\n' {  -    lpush current_file_lines_starts (lexeme_end lexbuf); +    add_a_new_line(lexeme_end lexbuf);      next qstring lexbuf    }  | [^ '\n' '(' ')']+ { next qstring lexbuf } @@ -363,7 +440,7 @@ and here_doc = parse      else if s <> !current_here_doc_mark then Printf.eprintf "%sTrailing spaces after HERE-document mark\n" (pos2sfull lexbuf)    }  | '\n' {  -    lpush current_file_lines_starts (lexeme_end lexbuf); +    add_a_new_line(lexeme_end lexbuf);      next here_doc lexbuf    }  | eof { die lexbuf "Unterminated_here_doc" } @@ -376,7 +453,7 @@ and raw_here_doc = parse      else if s <> !current_here_doc_mark then Printf.eprintf "%sTrailing spaces after HERE-document mark\n" (pos2sfull lexbuf)    }  | '\n' {  -    lpush current_file_lines_starts (lexeme_end lexbuf); +    add_a_new_line(lexeme_end lexbuf);      next raw_here_doc lexbuf    }  | eof { die lexbuf "Unterminated_raw_here_doc" } @@ -397,8 +474,8 @@ and string_escape = parse  | _ { next_s ("\\" ^ lexeme lexbuf) (some !next_rule) lexbuf }  and pattern_options = parse -| [ 'g' 'i' 'm' 'o' 's' 'x' 'e' ] { next pattern_options lexbuf } -| _ { lexbuf.Lexing.lex_curr_pos <- lexbuf.Lexing.lex_curr_pos - 1; () } +| [ 'g' 'i' 'm' 'o' 's' 'x' 'e' 'd' ] { next pattern_options lexbuf } +| _ { putback lexbuf 1; () }  and pod_command = parse  | [^ '\n' ]+ { @@ -416,8 +493,14 @@ and pod_command = parse  | _ { failwith(pos2sfull lexbuf ^ "POD command expected") }  and pod = parse -| "\n=" { next pod_command lexbuf } +| "\n=" {  +    add_a_new_line(lexeme_end lexbuf - 1); +    next pod_command lexbuf  +  }  | "\n" [^ '=' '\n'] [^ '\n']* -| "\n" { next pod lexbuf } +| "\n" {  +    add_a_new_line(lexeme_end lexbuf); +    next pod lexbuf  +  }  | eof  | _ { failwith(pos2sfull_with !current_string_start_pos (lexeme_end lexbuf) ^ "POD block still open") } diff --git a/perl_checker.src/parser.mly b/perl_checker.src/parser.mly index 0f5f34b..37ec3ce 100644 --- a/perl_checker.src/parser.mly +++ b/perl_checker.src/parser.mly @@ -2,8 +2,8 @@    open Types    open Common -  let parse_error _ = -   failwith (Info.pos2sfull_current (Parsing.symbol_start()) (Parsing.symbol_end()) ^ "parse error")    +  let parse_error msg = +   failwith (Info.pos2sfull_current (Parsing.symbol_start()) (Parsing.symbol_end()) ^ msg)    let to_Ident = function     | BAREWORD(name, pos) -> Ident(I_raw, None, name, pos) @@ -19,22 +19,23 @@  %token EOF  %token <Types.pos> SPACE -%token <string * Types.pos> NUM STRING BAREWORD PROTOTYPE REVISION COMMENT POD LABEL -%token <string * Types.pos> COMMAND_STRING QUOTEWORDS +%token <string * Types.pos> NUM STRING BAREWORD REVISION COMMENT POD LABEL +%token <string * Types.pos> COMMAND_STRING QUOTEWORDS COMPACT_HASH_SUBSCRIPT  %token <(string * Types.pos) ref> HERE_DOC  %token <string * string * Types.pos> PATTERN  %token <string * string * string * Types.pos> PATTERN_SUBST  %token <string option * string * Types.pos> SCALAR_IDENT ARRAY_IDENT HASH_IDENT FUNC_IDENT STAR_IDENT RAW_IDENT ARRAYLEN_IDENT +%token <string * string * Types.pos> FUNC_DECL_WITH_PROTO  %token IF ELSIF ELSE UNLESS DO WHILE UNTIL MY CONTINUE SUB LOCAL  %token <string> FOR -%token USE PACKAGE FORMAT -%token <Types.pos> PRINT NEW +%token USE PACKAGE BEGIN END +%token <Types.pos> PRINT NEW FORMAT  %token AT DOLLAR PERCENT AMPERSAND STAR ARRAYLEN  %token SEMI_COLON PKG_SCOPE  %token PAREN PAREN_END -%token BRACKET BRACKET_END +%token BRACKET BRACKET_END BRACKET_HASHREF  %token ARRAYREF ARRAYREF_END @@ -46,6 +47,7 @@  %token MULT DIVISION MODULO REPLICATE  %token PLUS MINUS CONCAT  %token BIT_SHIFT_LEFT BIT_SHIFT_RIGHT +%token LT GT  %token <string> COMPARE_OP EQ_OP  %token BIT_AND  %token BIT_OR BIT_XOR @@ -76,7 +78,7 @@  %left       BIT_OR BIT_XOR  %left       BIT_AND  %nonassoc   EQ_OP -%nonassoc   COMPARE_OP +%nonassoc   LT GT COMPARE_OP  %nonassoc   UNIOP  %left       BIT_SHIFT_LEFT BIT_SHIFT_RIGHT  %left       PLUS MINUS CONCAT @@ -104,7 +106,9 @@ block: BRACKET lineseq BRACKET_END { $2 }  lineseq: /* A collection of "lines" in the program */  |              {[]}  | decl lineseq {[]} -| label line {[]} +| line {[]} +| LABEL lineseq {[]} +| line {[]}  line:  | if_then_else lineseq { [] } @@ -133,7 +137,7 @@ loop:  | FOR MY SCALAR_IDENT PAREN expr PAREN_END block cont {[]}  | FOR SCALAR_IDENT PAREN expr PAREN_END block cont {[]}  | FOR PAREN expr PAREN_END block cont {[]} -| FOR PAREN expr_or_empty ';' expr_or_empty ';' expr_or_empty PAREN_END block {[]} +| FOR PAREN expr_or_empty SEMI_COLON expr_or_empty SEMI_COLON expr_or_empty PAREN_END block {[]}  | block cont {[]} /* a block is a loop that happens once */  cont: /* Continue blocks */ @@ -150,9 +154,12 @@ sideff: /* An expression which may have a side-effect */  | expr  FOR    expr { [ (*Binary($2,        $1, $3)*) ] }  decl: -| FORMAT formname block {[]} -| SUB word prototype_or_empty subbody {[]} +| FORMAT bareword_or_empty ASSIGN {[]} +| SUB word subbody {[]} +| FUNC_DECL_WITH_PROTO subbody {[]}  | PACKAGE word SEMI_COLON {[]} +| BEGIN block {[]} +| END block {[]}  | USE word_or_empty revision_or_empty listexpr SEMI_COLON {[]}  formname: {[]} | BAREWORD {[]} @@ -184,11 +191,7 @@ term:  | LOCAL term    %prec UNIOP {[]}  | PAREN expr_or_empty PAREN_END {[]} -| scalar        %prec PAREN {[]} -| star          %prec PAREN {[]} -| hash          %prec PAREN {[]} -| array         %prec PAREN {[]} -| arraylen      %prec PAREN {[]} /* $#x, $#{ something } */ +| variable {[]}  | subscripted {[]} @@ -199,15 +202,9 @@ term:  | function_call {[]}  | word {[]} -| NUM {[]} -| STRING {[]} -| REVISION {[]} -| COMMAND_STRING {[]} -| QUOTEWORDS {[]} -| HERE_DOC {[]} +| value {[]}  function_call: -| func {[]} /* &foo; */  | func PAREN expr_or_empty PAREN_END {[]} /* &foo(@args) */  | word argexpr {[]} /* foo(@args) */  | word block listexpr %prec LSTOP {[]} /* map { foo } @bar */ @@ -216,6 +213,7 @@ function_call:  | term ARROW word_or_scalar {[]} /* $foo->bar */  | NEW word listexpr {[]} /* new Class @args */ +| PRINT argexpr {[]} /* print $fh @args */  | PRINT word_or_scalar argexpr {[]} /* print $fh @args */  termdo: /* Things called with "do" */ @@ -240,15 +238,25 @@ myterm: /* Things that can be "my"'d */  | array {[]}  subscripted: /* Some kind of subscripted expression */ -| star PKG_SCOPE BRACKET expr BRACKET_END {[]} /* *main::{something} */ -| scalar ARRAYREF expr ARRAYREF_END         {[]} /* $array[$element] */ -| scalar BRACKET expr BRACKET_END           {[]} /* $foo{bar} */ -| term ARROW ARRAYREF expr ARRAYREF_END     {[]} /* somearef->[$element] */ -| term ARROW BRACKET expr BRACKET_END       {[]} /* somehref->{bar} */ -| term ARROW PAREN expr_or_empty PAREN_END  {[]} /* $subref->(@args) */ -| subscripted ARRAYREF expr ARRAYREF_END    {[]} /* $foo->[$bar][$baz] */ -| subscripted BRACKET expr BRACKET_END      {[]} /* $foo->[bar]{baz;} */ -| subscripted PAREN expr_or_empty PAREN_END {[]} /* $foo->{bar}(@args) */ +| variable PKG_SCOPE bracket_subscript        {[]} /* *main::{something} */ +| scalar bracket_subscript                    {[]} /* $foo{bar} */ +| scalar ARRAYREF expr ARRAYREF_END           {[]} /* $array[$element] */ +| term ARROW bracket_subscript                {[]} /* somehref->{bar} */ +| term ARROW ARRAYREF expr ARRAYREF_END       {[]} /* somearef->[$element] */ +| term ARROW PAREN expr_or_empty PAREN_END    {[]} /* $subref->(@args) */ +| subscripted bracket_subscript               {[]} /* $foo->[bar]{baz;} */ +| subscripted ARRAYREF expr ARRAYREF_END      {[]} /* $foo->[$bar][$baz] */ +| subscripted PAREN expr_or_empty PAREN_END   {[]} /* $foo->{bar}(@args) */ + +bracket_subscript: +| BRACKET expr BRACKET_END {[]} +| COMPACT_HASH_SUBSCRIPT {[]} + +anonymous: /* Constructors for anonymous data */ +| ARRAYREF expr_or_empty ARRAYREF_END {[]} +| BRACKET expr_or_empty BRACKET_END %prec PAREN {[]} /* { foo => "Bar" } */ +| BRACKET_HASHREF expr_or_empty BRACKET_END %prec PAREN {[]} /* { foo => "Bar" } */ +| SUB block %prec PAREN {[]}  binop:  | ASSIGN {[]} @@ -256,7 +264,7 @@ binop:  | MULT {[]} | DIVISION {[]} | MODULO {[]} | REPLICATE {[]}  | PLUS {[]} | MINUS {[]} | CONCAT {[]}  | BIT_SHIFT_LEFT {[]} | BIT_SHIFT_RIGHT {[]} -| COMPARE_OP {[]} +| LT {[]} | GT {[]} | COMPARE_OP {[]}  | EQ_OP {[]}  | BIT_AND {[]}  | BIT_OR {[]} | BIT_XOR {[]} @@ -265,14 +273,25 @@ binop:  | OR_TIGHT {[]} | XOR {[]}  | PATTERN_MATCH {[]} | PATTERN_MATCH_NOT {[]} -anonymous: /* Constructors for anonymous data */ -| ARRAYREF expr_or_empty ARRAYREF_END {[]} -| BRACKET expr_or_empty BRACKET_END %prec PAREN {[]} /* { foo => "Bar" } */ -| SUB prototype_or_empty block %prec PAREN {[]} - -label: -|                 { None } -| BAREWORD COLON { Some $1 } +value: +| NUM {[]} +| STRING {[]} +| REVISION {[]} +| COMMAND_STRING {[]} +| QUOTEWORDS {[]} +| HERE_DOC {[]} +| PATTERN {[]} +| PATTERN_SUBST {[]} +| LT GT {[]} +| LT term GT {[]} + +variable: +| scalar   %prec PAREN {[]} +| star     %prec PAREN {[]} +| hash     %prec PAREN {[]} +| array    %prec PAREN {[]} +| arraylen %prec PAREN {[]} /* $#x, $#{ something } */ +| func     %prec PAREN {[]} /* &foo; */  word:  | bareword { fst $1 } @@ -285,23 +304,26 @@ word:  comma: COMMA {[]} | RIGHT_ARROW {[]}  word_or_scalar: -| bareword     { [] } -| RAW_IDENT    { [] } -| SCALAR_IDENT { [] } +| bareword  { [] } +| RAW_IDENT { [] } +| scalar    { [] } + +block_or_scalar: block {[]} | scalar {[]}  bareword:  | NEW { "new", $1 }  | PRINT { "print", $1 } +| FORMAT { "format", $1 }  | BAREWORD { $1 } -arraylen: ARRAYLEN_IDENT {[]} | ARRAYLEN block {[]} -scalar: SCALAR_IDENT {[]} | DOLLAR    block {[]} -func:   FUNC_IDENT   {[]} | AMPERSAND block {[]} -array:  ARRAY_IDENT  {[]} | AT        block {[]} -hash:   HASH_IDENT   {[]} | PERCENT   block {[]} -star:   STAR_IDENT   {[]} | STAR      block {[]} +arraylen: ARRAYLEN_IDENT {[]} | ARRAYLEN block_or_scalar {[]} +scalar: SCALAR_IDENT {[]} | DOLLAR    block_or_scalar {[]} +func:   FUNC_IDENT   {[]} | AMPERSAND block_or_scalar {[]} +array:  ARRAY_IDENT  {[]} | AT        block_or_scalar {[]} +hash:   HASH_IDENT   {[]} | PERCENT   block_or_scalar {[]} +star:   STAR_IDENT   {[]} | STAR      block_or_scalar {[]}  expr_or_empty: {[]} | expr {[]}  word_or_empty: {[]} | word {[]} -prototype_or_empty: {[]} | PROTOTYPE {[]} +bareword_or_empty: {[]} | bareword {[]}  revision_or_empty: {[]} | REVISION {[]} diff --git a/perl_checker.src/perl_checker.ml b/perl_checker.src/perl_checker.ml index dd7b75e..cc5bb19 100644 --- a/perl_checker.src/perl_checker.ml +++ b/perl_checker.src/perl_checker.ml @@ -7,8 +7,8 @@ let _ =      try        Info.start_a_new_file file ;        if false then -	let tokens = Lexer.lexbuf2list Lexer.token lexbuf in -	let _,_ = tokens, tokens in "" +	let t = Lexer.lexbuf2list Lexer.token lexbuf in +	let _,_ = t, t in ""        else  	Parser.prog Lexer.token lexbuf      with Failure s -> ( | 
