diff options
author | Mageia SVN-Git Migration <svn-git-migration@mageia.org> | 2007-04-25 15:16:21 +0000 |
---|---|---|
committer | Mageia SVN-Git Migration <svn-git-migration@mageia.org> | 2007-04-25 15:16:21 +0000 |
commit | be4fff49f0164e606d4b2f76f64d4d108895f236 (patch) | |
tree | a46bc8c23de0b885f8a2962a9069930b48836fd9 /src | |
parent | 4746e8e79a5b3cdf3f72400a5a5d6742f6a76a8c (diff) | |
download | perl_checker-be4fff49f0164e606d4b2f76f64d4d108895f236.tar perl_checker-be4fff49f0164e606d4b2f76f64d4d108895f236.tar.gz perl_checker-be4fff49f0164e606d4b2f76f64d4d108895f236.tar.bz2 perl_checker-be4fff49f0164e606d4b2f76f64d4d108895f236.tar.xz perl_checker-be4fff49f0164e606d4b2f76f64d4d108895f236.zip |
Rename folder to match history.
This is a Synthesized commit to combine perl-MDK-Common and perl_checker repository
history.
Diffstat (limited to 'src')
38 files changed, 7882 insertions, 0 deletions
diff --git a/src/.cvsignore b/src/.cvsignore new file mode 100644 index 0000000..8c0f1f4 --- /dev/null +++ b/src/.cvsignore @@ -0,0 +1,15 @@ +._bcdi +._d +._ncdi +*.cmi +*.cmo +*.cmx +perl_checker +perl_checker.html +perl_checker_debug +gmon.out +lexer.ml +parser.ml +parser.mli +parser.output +build.ml diff --git a/src/Makefile b/src/Makefile new file mode 100644 index 0000000..22a45a6 --- /dev/null +++ b/src/Makefile @@ -0,0 +1,34 @@ +# OCAMLC = ocamlcp -p a +OCAMLBCFLAGS = -w A -w e +YFLAGS = -v +TRASH = parser.output perl_checker.html TAGS +RESULT = perl_checker +BCSUFFIX = _debug +SOURCES = types.mli build.ml common.ml flags.ml config_file.ml info.ml parser_helper.ml parser.mly lexer.mll tree.ml global_checks.ml perl_checker.ml +LIBS = unix +VENDORLIB = $(shell dirname `pwd`) +DEBUG = 1 + +default: TAGS build_ml build.ml debug-code native-code perl_checker.html + +build_ml: + rm -f build.ml + $(MAKE) build.ml + +build.ml: + date '+let date = "%s"' > $@ + echo 'let fake_packages_dir = "'$(VENDORLIB)'/perl_checker_fake_packages"' >> $@ + echo 'let debugging = $(DEBUG) > 0' >> $@ + +%.html: %.html.pl + rm -f $@ + perl $< > $@ + chmod a-w $@ + +tags: + ocamltags *.ml + +TAGS: + ocamltags *.ml + +-include OCamlMakefile diff --git a/src/OCamlMakefile b/src/OCamlMakefile new file mode 100644 index 0000000..95df83f --- /dev/null +++ b/src/OCamlMakefile @@ -0,0 +1,912 @@ +########################################################################### +# OCamlMakefile +# Copyright (C) 1999-2002 Markus Mottl +# +# For updates see: +# http://www.oefai.at/~markus/ocaml_sources +# +# $Id$ +# +########################################################################### + +# Set these variables to the names of the sources to be processed and +# the result variable. Order matters during linkage! + +ifndef SOURCES + SOURCES := foo.ml +endif +export SOURCES + +ifndef RES_CLIB_SUF + RES_CLIB_SUF := _stubs +endif +export RES_CLIB_SUF + +ifndef RESULT + RESULT := foo +endif +export RESULT + +ifndef DOC_FILES + DOC_FILES := $(filter %.mli, $(SOURCES)) +endif +export DOC_FILES + +export BCSUFFIX +export NCSUFFIX + +ifndef TOPSUFFIX + TOPSUFFIX := .top +endif + +export TOPSUFFIX + +# Eventually set include- and library-paths, libraries to link, +# additional compilation-, link- and ocamlyacc-flags +# Path- and library information needs not be written with "-I" and such... +# Define THREADS if you need it, otherwise leave it unset (same for +# USE_CAMLP4)! + +export THREADS +export USE_CAMLP4 + +export INCDIRS +export LIBDIRS +export EXTLIBDIRS +export OCAML_DEFAULT_DIRS +export OCAML_LIB_INSTALL + +export LIBS +export CLIBS + +export OCAMLFLAGS +export OCAMLNCFLAGS +export OCAMLBCFLAGS + +export OCAMLLDFLAGS +export OCAMLNLDFLAGS +export OCAMLBLDFLAGS + +ifndef OCAMLCPFLAGS + OCAMLCPFLAGS := a +endif + +export OCAMLCPFLAGS + +export YFLAGS +export IDLFLAGS + +export OCAMLDOCFLAGS + +export DVIPSFLAGS + +export STATIC + +# Add a list of optional trash files that should be deleted by "make clean" +export TRASH + +#################### variables depending on your OCaml-installation + +ifdef MINGW + export MINGW + WIN32 := 1 +endif +ifdef MSVC + export MSVC + WIN32 := 1 + EXT_OBJ := obj + EXT_LIB := lib + ifeq ($(CC),gcc) + # work around GNU Make default value + ifdef THREADS + CC := cl /MT + else + CC := cl + endif + endif + ifeq ($(CXX),g++) + # work around GNU Make default value + CXX := $(CC) + endif + CFLAG_O := -Fo +endif +ifdef WIN32 + EXT_CXX := cpp + EXE := .exe +endif + +ifndef EXT_OBJ + EXT_OBJ := o +endif +ifndef EXT_LIB + EXT_LIB := a +endif +ifndef EXT_CXX + EXT_CXX := cc +endif +ifndef EXE + EXE := # empty +endif +ifndef CFLAG_O + CFLAG_O := -o # do not delete this comment (preserves trailing whitespace)! +endif + +export CC +export CXX +export CFLAGS +export CXXFLAGS +export LDFLAGS + +BCRESULT := $(addsuffix $(BCSUFFIX), $(RESULT)) +NCRESULT := $(addsuffix $(NCSUFFIX), $(RESULT)) +TOPRESULT := $(addsuffix $(TOPSUFFIX), $(RESULT)) + +ifndef OCAMLC + OCAMLC := ocamlc +endif + +export OCAMLC + +ifndef OCAMLOPT + OCAMLOPT := ocamlopt +endif + +export OCAMLOPT + +ifndef OCAMLMKTOP + OCAMLMKTOP := ocamlmktop +endif + +export OCAMLMKTOP + +ifndef OCAMLCP + OCAMLCP := ocamlcp +endif + +export OCAMLCP + +ifndef OCAMLDEP + OCAMLDEP := ocamldep +endif + +export OCAMLDEP + +ifndef OCAMLLEX + OCAMLLEX := ocamllex +endif + +export OCAMLLEX + +ifndef OCAMLYACC + OCAMLYACC := ocamlyacc +endif + +export OCAMLYACC + +ifndef CAMELEON_REPORT + CAMELEON_REPORT := report +endif + +ifndef CAMELEON_REPORT_FLAGS + CAMELEON_REPORT_FLAGS := +endif + +ifndef CAMELEON_ZOGGY + CAMELEON_ZOGGY := camlp4o pa_zog.cma pr_o.cmo +endif + +ifndef CAMELEON_ZOGGY_FLAGS + CAMELEON_ZOGGY_FLAGS := +endif + +ifndef CAMLIDL + CAMLIDL := camlidl +endif + +export CAMLIDL + +ifndef CAMLIDLDLL + CAMLIDLDLL := camlidldll +endif + +export CAMLIDLDLL + +ifndef NOIDLHEADER + MAYBE_IDL_HEADER := -header +endif + +export NOIDLHEADER + +ifndef CAMLP4 + CAMLP4 := camlp4 +endif + +export CAMLP4 + +ifndef OCAMLDOC + OCAMLDOC := ocamldoc +endif + +export OCAMLDOC + +ifndef LATEX + LATEX := latex +endif + +export LATEX + +ifndef DVIPS + DVIPS := dvips +endif + +export DVIPS + +ifndef PS2PDF + PS2PDF := ps2pdf +endif + +export PS2PDF + +ifndef OCAMLMAKEFILE + OCAMLMAKEFILE := OCamlMakefile +endif + +export OCAMLMAKEFILE + +ifndef OCAMLLIBPATH + OCAMLLIBPATH := \ + $(shell $(OCAMLC) 2>/dev/null -where || echo /usr/local/lib/ocaml) +endif + +export OCAMLLIBPATH + +ifndef OCAML_LIB_INSTALL + OCAML_LIB_INSTALL := $(OCAMLLIBPATH)/contrib +endif + +export OCAML_LIB_INSTALL + +########################################################################### + +#################### change following sections only if +#################### you know what you are doing! + +# delete target files when a build command fails +.PHONY: .DELETE_ON_ERROR +.DELETE_ON_ERROR: + +# for pedants using "--warn-undefined-variables" +export MAYBE_IDL +export REAL_RESULT +export CAMLIDLFLAGS +export THREAD_FLAG +export RES_CLIB +export MAKEDLL + +SHELL := /bin/sh + +MLDEPDIR := ._d +BCDIDIR := ._bcdi +NCDIDIR := ._ncdi + +FILTER_EXTNS := %.mli %.ml %.mll %.mly %.idl %.c %.$(EXT_CXX) %.rep %.zog + +FILTERED := $(filter $(FILTER_EXTNS), $(SOURCES)) +SOURCE_DIRS := $(filter-out ./, $(sort $(dir $(FILTERED)))) + +FILTERED_REP := $(filter %.rep, $(FILTERED)) +DEP_REP := $(FILTERED_REP:%.rep=$(MLDEPDIR)/%.d) +AUTO_REP := $(FILTERED_REP:.rep=.ml) + +FILTERED_ZOG := $(filter %.zog, $(FILTERED)) +DEP_ZOG := $(FILTERED_ZOG:%.zog=$(MLDEPDIR)/%.d) +AUTO_ZOG := $(FILTERED_ZOG:.zog=.ml) + +FILTERED_ML := $(filter %.ml, $(FILTERED)) +DEP_ML := $(FILTERED_ML:%.ml=$(MLDEPDIR)/%.d) + +FILTERED_MLI := $(filter %.mli, $(FILTERED)) +DEP_MLI := $(FILTERED_MLI:.mli=.di) + +FILTERED_MLL := $(filter %.mll, $(FILTERED)) +DEP_MLL := $(FILTERED_MLL:%.mll=$(MLDEPDIR)/%.d) +AUTO_MLL := $(FILTERED_MLL:.mll=.ml) + +FILTERED_MLY := $(filter %.mly, $(FILTERED)) +DEP_MLY := $(FILTERED_MLY:%.mly=$(MLDEPDIR)/%.d) $(FILTERED_MLY:.mly=.di) +AUTO_MLY := $(FILTERED_MLY:.mly=.mli) $(FILTERED_MLY:.mly=.ml) + +FILTERED_IDL := $(filter %.idl, $(FILTERED)) +DEP_IDL := $(FILTERED_IDL:%.idl=$(MLDEPDIR)/%.d) $(FILTERED_IDL:.idl=.di) +C_IDL := $(FILTERED_IDL:%.idl=%_stubs.c) $(FILTERED_IDL:.idl=.h) +OBJ_C_IDL := $(FILTERED_IDL:%.idl=%_stubs.$(EXT_OBJ)) +AUTO_IDL := $(FILTERED_IDL:.idl=.mli) $(FILTERED_IDL:.idl=.ml) $(C_IDL) + +FILTERED_C_CXX := $(filter %.c %.$(EXT_CXX), $(FILTERED)) +OBJ_C_CXX := $(FILTERED_C_CXX:.c=.$(EXT_OBJ)) +OBJ_C_CXX := $(OBJ_C_CXX:.$(EXT_CXX)=.$(EXT_OBJ)) + +PRE_TARGETS += $(AUTO_MLL) $(AUTO_MLY) $(AUTO_IDL) $(AUTO_ZOG) $(AUTO_REP) + +ALL_DEPS := $(DEP_ML) $(DEP_MLI) $(DEP_MLL) $(DEP_MLY) $(DEP_IDL) $(DEP_ZOG) $(DEP_REP) + +MLDEPS := $(filter %.d, $(ALL_DEPS)) +MLIDEPS := $(filter %.di, $(ALL_DEPS)) +BCDEPIS := $(MLIDEPS:%.di=$(BCDIDIR)/%.di) +NCDEPIS := $(MLIDEPS:%.di=$(NCDIDIR)/%.di) + +ALLML := $(filter %.mli %.ml %.mll %.mly %.idl %.rep %.zog, $(FILTERED)) + +IMPLO_INTF := $(ALLML:%.mli=%.mli.__) +IMPLO_INTF := $(foreach file, $(IMPLO_INTF), \ + $(basename $(file)).cmi $(basename $(file)).cmo) +IMPLO_INTF := $(filter-out %.mli.cmo, $(IMPLO_INTF)) +IMPLO_INTF := $(IMPLO_INTF:%.mli.cmi=%.cmi) + +IMPLX_INTF := $(IMPLO_INTF:.cmo=.cmx) + +INTF := $(filter %.cmi, $(IMPLO_INTF)) +IMPL_CMO := $(filter %.cmo, $(IMPLO_INTF)) +IMPL_CMX := $(IMPL_CMO:.cmo=.cmx) + +OBJ_LINK := $(OBJ_C_IDL) $(OBJ_C_CXX) +OBJ_FILES := $(IMPL_CMO:.cmo=.$(EXT_OBJ)) $(OBJ_LINK) + +EXECS := $(addsuffix $(EXE), \ + $(sort $(TOPRESULT) $(BCRESULT) $(NCRESULT))) +ifdef WIN32 + EXECS += $(BCRESULT).dll $(NCRESULT).dll +endif + +CLIB_BASE := $(RESULT)$(RES_CLIB_SUF) +ifneq ($(strip $(OBJ_LINK)),) + RES_CLIB := lib$(CLIB_BASE).$(EXT_LIB) +endif + +ifndef MSVC +DLLSONAME := dll$(CLIB_BASE).so +endif + +NONEXECS := $(INTF) $(IMPL_CMO) $(IMPL_CMX) $(OBJ_FILES) $(PRE_TARGETS) \ + $(BCRESULT).cma $(NCRESULT).cmxa $(NCRESULT).$(EXT_LIB) \ + $(BCRESULT).cmi $(BCRESULT).cmo \ + $(NCRESULT).cmi $(NCRESULT).cmx $(NCRESULT).o \ + $(RES_CLIB) + +ifndef MSVC + NONEXECS += $(DLLSONAME) +endif + +ifndef LIBINSTALL_FILES + LIBINSTALL_FILES := $(RESULT).mli $(RESULT).cmi $(RESULT).cma \ + $(RESULT).cmxa $(RESULT).a $(RES_CLIB) +endif + +ifndef MSVC + LIBINSTALL_FILES += $(DLLSONAME) +endif + +export LIBINSTALL_FILES + +ifdef WIN32 + # some extra stuff is created while linking DLLs + NONEXECS += $(BCRESULT).$(EXT_LIB) $(BCRESULT).exp $(NCRESULT).exp +endif + +TARGETS := $(EXECS) $(NONEXECS) + +# handle ocamlfind +ifdef USING_OCAMLFIND + PACKOPT := -pack +else + PACKOPT := -passopt "-pack" +endif + +# If there are IDL-files +ifneq ($(strip $(FILTERED_IDL)),) + MAYBE_IDL := -cclib -lcamlidl +endif + +ifdef USE_CAMLP4 + CAMLP4PATH := \ + $(shell $(CAMLP4) -where 2>/dev/null || echo /usr/local/lib/camlp4) + INCFLAGS := -I $(CAMLP4PATH) + CINCFLAGS := -I$(CAMLP4PATH) +endif + +INCFLAGS += $(SOURCE_DIRS:%=-I %) $(INCDIRS:%=-I %) $(OCAML_DEFAULT_DIRS:%=-I %) +CINCFLAGS += $(SOURCE_DIRS:%=-I%) $(INCDIRS:%=-I%) $(OCAML_DEFAULT_DIRS:%=-I%) +CLIBFLAGS += $(SOURCE_DIRS:%=-L%) $(LIBDIRS:%=-L%) \ + $(EXTLIBDIRS:%=-L%) $(EXTLIBDIRS:%=-R%) \ + $(OCAML_DEFAULT_DIRS:%=-L%) + +ifndef PROFILING + INTF_OCAMLC := $(OCAMLC) +else + ifndef THREADS + INTF_OCAMLC := $(OCAMLCP) -p $(OCAMLCPFLAGS) + else + # OCaml does not support profiling byte code + # with threads (yet), therefore we force an error. + ifndef REAL_OCAMLC + $(error Profiling of multithreaded byte code not yet supported by OCaml) + endif + endif +endif + +ifndef MSVC + COMMON_LDFLAGS := $(LDFLAGS:%=-ccopt %) $(SOURCE_DIRS:%=-ccopt -L%) \ + $(LIBDIRS:%=-ccopt -L%) \ + $(EXTLIBDIRS:%=-ccopt -L%) $(EXTLIBDIRS:%=-ccopt -R%) \ + $(OCAML_DEFAULT_DIRS:%=-ccopt -L%) +else + # currenly MSVC-build ocamlc/ocamlopt cannot pass any option to C linker :-( + COMMON_LDFLAGS := +endif + +ifndef MSVC + CLIBS_OPTS := $(CLIBS:%=-cclib -l%) +else + # MSVC libraries do not have 'lib' prefix + CLIBS_OPTS := $(CLIBS:%=-ccopt %) +endif +ifneq ($(strip $(OBJ_LINK)),) + ifdef CREATE_LIB + OBJS_LIBS := -cclib -l$(CLIB_BASE) $(CLIBS_OPTS) $(MAYBE_IDL) + else + OBJS_LIBS := $(OBJ_LINK) $(CLIBS_OPTS) $(MAYBE_IDL) + endif +else + OBJS_LIBS := $(CLIBS_OPTS) $(MAYBE_IDL) +endif + +# If we have to make byte-code +ifndef REAL_OCAMLC + # EXTRADEPS is added dependencies we have to insert for all + # executable files we generate. Ideally it should be all of the + # libraries we use, but it's hard to find the ones that get searched on + # the path since I don't know the paths built into the compiler, so + # just include the ones with slashes in their names. + EXTRADEPS := $(addsuffix .cma,$(foreach i,$(LIBS),$(if $(findstring /,$(i)),$(i)))) + SPECIAL_OCAMLFLAGS := $(OCAMLBCFLAGS) + + REAL_OCAMLC := $(INTF_OCAMLC) + + REAL_IMPL := $(IMPL_CMO) + REAL_IMPL_INTF := $(IMPLO_INTF) + IMPL_SUF := .cmo + + DEPFLAGS := + MAKE_DEPS := $(MLDEPS) $(BCDEPIS) + + ifdef CREATE_LIB + ifndef STATIC + ifneq ($(strip $(OBJ_LINK)),) + MAKEDLL := $(DLLSONAME) + ALL_LDFLAGS := -dllib $(DLLSONAME) + endif + endif + endif + + ifndef NO_CUSTOM + ifneq "$(strip $(OBJ_LINK) $(THREADS) $(MAYBE_IDL) $(CLIBS))" "" + ALL_LDFLAGS += -custom + endif + endif + + ALL_LDFLAGS += $(INCFLAGS) $(OCAMLLDFLAGS) $(OCAMLBLDFLAGS) \ + $(COMMON_LDFLAGS) $(LIBS:%=%.cma) + CAMLIDLDLLFLAGS := + + ifdef THREADS + ALL_LDFLAGS := -thread $(ALL_LDFLAGS) + ifndef CREATE_LIB + ALL_LDFLAGS := unix.cma threads.cma $(ALL_LDFLAGS) + endif + THREAD_FLAG := -thread + endif + +# we have to make native-code +else + EXTRADEPS := $(addsuffix .cmxa,$(foreach i,$(LIBS),$(if $(findstring /,$(i)),$(i)))) + ifndef PROFILING + SPECIAL_OCAMLFLAGS := $(OCAMLNCFLAGS) + PLDFLAGS := + else + SPECIAL_OCAMLFLAGS := -p $(OCAMLNCFLAGS) + PLDFLAGS := -p + endif + + REAL_IMPL := $(IMPL_CMX) + REAL_IMPL_INTF := $(IMPLX_INTF) + IMPL_SUF := .cmx + + CFLAGS := -DNATIVE_CODE $(CFLAGS) + + DEPFLAGS := -native + MAKE_DEPS := $(MLDEPS) $(NCDEPIS) + + ALL_LDFLAGS := $(PLDFLAGS) $(INCFLAGS) $(OCAMLLDFLAGS) \ + $(OCAMLNLDFLAGS) $(COMMON_LDFLAGS) + CAMLIDLDLLFLAGS := -opt + + ifndef CREATE_LIB + ALL_LDFLAGS += $(LIBS:%=%.cmxa) + endif + + ifdef THREADS + ALL_LDFLAGS := -thread $(ALL_LDFLAGS) + ifndef CREATE_LIB + ALL_LDFLAGS := unix.cmxa threads.cmxa $(ALL_LDFLAGS) + endif + THREAD_FLAG := -thread + endif +endif + +export MAKE_DEPS + +ALL_OCAMLCFLAGS := $(THREAD_FLAG) $(OCAMLFLAGS) \ + $(INCFLAGS) $(SPECIAL_OCAMLFLAGS) + +ifdef make_deps + -include $(MAKE_DEPS) + PRE_TARGETS := +endif + +########################################################################### +# USER RULES + +# Call "OCamlMakefile QUIET=" to get rid of all of the @'s. +QUIET=@ + +# generates byte-code (default) +byte-code: $(PRE_TARGETS) + $(QUIET)$(MAKE) -r -f $(OCAMLMAKEFILE) $(BCRESULT) \ + REAL_RESULT="$(BCRESULT)" make_deps=yes +bc: byte-code + +byte-code-nolink: $(PRE_TARGETS) + $(QUIET)$(MAKE) -r -f $(OCAMLMAKEFILE) nolink \ + REAL_RESULT="$(BCRESULT)" make_deps=yes +bcnl: byte-code-nolink + +top: $(PRE_TARGETS) + $(QUIET)$(MAKE) -r -f $(OCAMLMAKEFILE) $(TOPRESULT) \ + REAL_RESULT="$(BCRESULT)" make_deps=yes + +# generates native-code + +native-code: $(PRE_TARGETS) + $(QUIET)$(MAKE) -r -f $(OCAMLMAKEFILE) $(NCRESULT) \ + REAL_RESULT="$(NCRESULT)" \ + REAL_OCAMLC="$(OCAMLOPT)" \ + make_deps=yes +nc: native-code + +native-code-nolink: $(PRE_TARGETS) + $(QUIET)$(MAKE) -r -f $(OCAMLMAKEFILE) nolink \ + REAL_RESULT="$(NCRESULT)" \ + REAL_OCAMLC="$(OCAMLOPT)" \ + make_deps=yes +ncnl: native-code-nolink + +# generates byte-code libraries +byte-code-library: $(PRE_TARGETS) + $(QUIET)$(MAKE) -r -f $(OCAMLMAKEFILE) \ + $(RES_CLIB) $(BCRESULT).cma \ + REAL_RESULT="$(BCRESULT)" \ + CREATE_LIB=yes \ + make_deps=yes +bcl: byte-code-library + +# generates native-code libraries +native-code-library: $(PRE_TARGETS) + $(QUIET)$(MAKE) -r -f $(OCAMLMAKEFILE) \ + $(RES_CLIB) $(NCRESULT).cmxa \ + REAL_RESULT="$(NCRESULT)" \ + REAL_OCAMLC="$(OCAMLOPT)" \ + CREATE_LIB=yes \ + make_deps=yes +ncl: native-code-library + +ifdef WIN32 +# generates byte-code dll +byte-code-dll: $(PRE_TARGETS) + $(QUIET)$(MAKE) -r -f $(OCAMLMAKEFILE) \ + $(RES_CLIB) $(BCRESULT).dll \ + REAL_RESULT="$(BCRESULT)" \ + make_deps=yes +bcd: byte-code-dll + +# generates native-code dll +native-code-dll: $(PRE_TARGETS) + $(QUIET)$(MAKE) -r -f $(OCAMLMAKEFILE) \ + $(RES_CLIB) $(NCRESULT).dll \ + REAL_RESULT="$(NCRESULT)" \ + REAL_OCAMLC="$(OCAMLOPT)" \ + make_deps=yes +ncd: native-code-dll +endif + +# generates byte-code with debugging information +debug-code: $(PRE_TARGETS) + $(QUIET)$(MAKE) -r -f $(OCAMLMAKEFILE) $(BCRESULT) \ + REAL_RESULT="$(BCRESULT)" make_deps=yes \ + OCAMLFLAGS="-g $(OCAMLFLAGS)" \ + OCAMLLDFLAGS="-g $(OCAMLLDFLAGS)" +dc: debug-code + +# generates byte-code libraries with debugging information +debug-code-library: $(PRE_TARGETS) + $(QUIET)$(MAKE) -r -f $(OCAMLMAKEFILE) \ + $(RES_CLIB) $(BCRESULT).cma \ + REAL_RESULT="$(BCRESULT)" make_deps=yes \ + CREATE_LIB=yes \ + OCAMLFLAGS="-g $(OCAMLFLAGS)" \ + OCAMLLDFLAGS="-g $(OCAMLLDFLAGS)" +dcl: debug-code-library + +# generates byte-code for profiling +profiling-byte-code: $(PRE_TARGETS) + $(QUIET)$(MAKE) -r -f $(OCAMLMAKEFILE) $(BCRESULT) \ + REAL_RESULT="$(BCRESULT)" PROFILING="y" \ + make_deps=yes +pbc: profiling-byte-code + +# generates native-code + +profiling-native-code: $(PRE_TARGETS) + $(QUIET)$(MAKE) -r -f $(OCAMLMAKEFILE) $(NCRESULT) \ + REAL_RESULT="$(NCRESULT)" \ + REAL_OCAMLC="$(OCAMLOPT)" \ + PROFILING="y" \ + make_deps=yes +pnc: profiling-native-code + +# generates byte-code libraries +profiling-byte-code-library: $(PRE_TARGETS) + $(QUIET)$(MAKE) -r -f $(OCAMLMAKEFILE) \ + $(RES_CLIB) $(BCRESULT).cma \ + REAL_RESULT="$(BCRESULT)" PROFILING="y" \ + CREATE_LIB=yes \ + make_deps=yes +pbcl: profiling-byte-code-library + +# generates native-code libraries +profiling-native-code-library: $(PRE_TARGETS) + $(QUIET)$(MAKE) -r -f $(OCAMLMAKEFILE) \ + $(RES_CLIB) $(NCRESULT).cmxa \ + REAL_RESULT="$(NCRESULT)" PROFILING="y" \ + REAL_OCAMLC="$(OCAMLOPT)" \ + CREATE_LIB=yes \ + make_deps=yes +pncl: profiling-native-code-library + +# packs byte-code objects +pack-byte-code: $(PRE_TARGETS) + $(QUIET)$(MAKE) -r -f $(OCAMLMAKEFILE) $(BCRESULT).cmo \ + REAL_RESULT="$(BCRESULT)" \ + PACK_LIB=yes make_deps=yes +pabc: pack-byte-code + +# packs native-code objects +pack-native-code: $(PRE_TARGETS) + $(QUIET)$(MAKE) -r -f $(OCAMLMAKEFILE) \ + $(NCRESULT).cmx $(NCRESULT).o \ + REAL_RESULT="$(NCRESULT)" \ + REAL_OCAMLC="$(OCAMLOPT)" \ + PACK_LIB=yes make_deps=yes +panc: pack-native-code + +# generates HTML-documentation +htdoc: doc/html + +# generates Latex-documentation +ladoc: doc/latex + +# generates PostScript-documentation +psdoc: doc/latex/doc.ps + +# generates PDF-documentation +pdfdoc: doc/latex/doc.pdf + +# generates all supported forms of documentation +doc: htdoc ladoc psdoc pdfdoc + +########################################################################### +# LOW LEVEL RULES + +$(REAL_RESULT): $(REAL_IMPL_INTF) $(OBJ_LINK) $(EXTRADEPS) + $(REAL_OCAMLC) $(ALL_LDFLAGS) $(OBJS_LIBS) -o $@$(EXE) \ + $(REAL_IMPL) +ifdef MSVC +# work around the bug in ocamlc -- it should delete this file itself + rm -f camlprim?.$(EXT_OBJ) +endif + +nolink: $(REAL_IMPL_INTF) $(OBJ_LINK) + +ifdef WIN32 +$(REAL_RESULT).dll: $(REAL_IMPL_INTF) $(OBJ_LINK) + $(CAMLIDLDLL) $(CAMLIDLDLLFLAGS) $(OBJ_LINK) $(CLIBS) \ + -o $@ $(REAL_IMPL) +endif + +%$(TOPSUFFIX): $(REAL_IMPL_INTF) $(OBJ_LINK) $(EXTRADEPS) + $(OCAMLMKTOP) $(ALL_LDFLAGS) $(OBJS_LIBS) -o $@$(EXE) \ + $(REAL_IMPL) +ifdef MSVC +# work around the bug in ocamltop -- it should delete this file itself + rm -f camlprim?.$(EXT_OBJ) +endif + +.SUFFIXES: .mli .ml .cmi .cmo .cmx .cma .cmxa .$(EXT_OBJ) \ + .mly .di .d .$(EXT_LIB) .idl .c .$(EXT_CXX) .h .so \ + .rep .zog +ifndef MSVC +$(DLLSONAME): $(OBJ_LINK) + $(CC) -shared $(CINCFLAGS) $(CLIBFLAGS) \ + -o $@ $(OBJ_LINK) $(CLIBS:%=-l%) +endif + +$(RESULT).cma: $(REAL_IMPL_INTF) $(MAKEDLL) $(EXTRADEPS) + $(REAL_OCAMLC) -a $(ALL_LDFLAGS) \ + $(OBJS_LIBS) -o $@ $(OCAMLBLDFLAGS) $(REAL_IMPL) + +$(RESULT).cmxa $(RESULT).$(EXT_LIB): $(REAL_IMPL_INTF) $(EXTRADEPS) + $(OCAMLOPT) -a $(ALL_LDFLAGS) $(OBJS_LIBS) \ + $(OCAMLNLDFLAGS) -o $@ $(REAL_IMPL) + +$(RES_CLIB): $(OBJ_LINK) +ifndef MSVC + ifneq ($(strip $(OBJ_LINK)),) + ar rc $@ $(OBJ_LINK) + ranlib $@ + endif +else + ifneq ($(strip $(OBJ_LINK)),) + lib /nologo /debugtype:cv /out:$(RES_CLIB) $(OBJ_LINK) + endif +endif + +.mli.cmi: $(EXTRADEPS) + $(QUIET)pp=`sed -n -e 's/(\*pp \([^*]*\) \*)/\1/p;q' $<`; \ + if [ -z "$$pp" ]; then \ + echo $(INTF_OCAMLC) -c $(THREAD_FLAG) \ + $(OCAMLFLAGS) $(INCFLAGS) $<; \ + $(INTF_OCAMLC) -c $(THREAD_FLAG) $(OCAMLFLAGS) \ + $(INCFLAGS) $<; \ + else \ + echo $(INTF_OCAMLC) -c -pp \"$$pp\" $(THREAD_FLAG) \ + $(OCAMLFLAGS) $(INCFLAGS) $<; \ + $(INTF_OCAMLC) -c -pp "$$pp" $(THREAD_FLAG) \ + $(OCAMLFLAGS) $(INCFLAGS) $<; \ + fi + +.ml.cmi .ml.$(EXT_OBJ) .ml.cmx .ml.cmo: $(EXTRADEPS) + $(QUIET)pp=`sed -n -e 's/(\*pp \([^*]*\) \*)/\1/p;q' $<`; \ + if [ -z "$$pp" ]; then \ + echo $(REAL_OCAMLC) -c $(ALL_OCAMLCFLAGS) $<; \ + $(REAL_OCAMLC) -c $(ALL_OCAMLCFLAGS) $<; \ + else \ + echo $(REAL_OCAMLC) -c -pp \"$$pp\" \ + $(ALL_OCAMLCFLAGS) $<; \ + $(REAL_OCAMLC) -c -pp "$$pp" $(ALL_OCAMLCFLAGS) $<; \ + fi + +ifdef PACK_LIB +$(REAL_RESULT).cmo $(REAL_RESULT).cmx $(REAL_RESULT).o: $(REAL_IMPL_INTF) $(OBJ_LINK) $(EXTRADEPS) + $(REAL_OCAMLC) $(PACKOPT) $(ALL_LDFLAGS) \ + $(OBJS_LIBS) -o $@ $(REAL_IMPL) +endif + +.PRECIOUS: %.ml +%.ml: %.mll + $(OCAMLLEX) $< + +.PRECIOUS: %.ml %.mli +%.ml %.mli: %.mly + $(OCAMLYACC) $(YFLAGS) $< + +.PRECIOUS: %.ml +%.ml : %.rep + $(CAMELEON_REPORT) $(CAMELEON_REPORT_FLAGS) -gen $< + +.PRECIOUS: %.ml +%.ml : %.zog + $(CAMELEON_ZOGGY) $(CAMELEON_ZOGGY_FLAGS) -impl $< > $@ + +.PRECIOUS: %.ml %.mli %_stubs.c %.h +%.ml %.mli %_stubs.c %.h: %.idl + $(CAMLIDL) $(MAYBE_IDL_HEADER) $(IDLFLAGS) \ + $(CAMLIDLFLAGS) $< + $(QUIET)if [ $(NOIDLHEADER) ]; then touch $*.h; fi + +.c.$(EXT_OBJ): + $(CC) -c $(CFLAGS) $(CINCFLAGS) -I$(OCAMLLIBPATH) \ + $< $(CFLAG_O)$@ + +.$(EXT_CXX).$(EXT_OBJ): + $(CXX) -c $(CXXFLAGS) $(CINCFLAGS) -I$(OCAMLLIBPATH) \ + $< $(CFLAG_O)$@ + +$(MLDEPDIR)/%.d: %.ml + $(QUIET)echo making $@ from $< + $(QUIET)if [ ! -d $(@D) ]; then mkdir -p $(@D); fi + $(QUIET)pp=`sed -n -e 's/(\*pp \([^*]*\) \*)/\1/p;q' $<`; \ + if [ -z "$$pp" ]; then \ + $(OCAMLDEP) $(INCFLAGS) $< > $@; \ + else \ + $(OCAMLDEP) -pp "$$pp" $(INCFLAGS) $< > $@; \ + fi + +$(BCDIDIR)/%.di $(NCDIDIR)/%.di: %.mli + $(QUIET)echo making $@ from $< + $(QUIET)if [ ! -d $(@D) ]; then mkdir -p $(@D); fi + $(QUIET)pp=`sed -n -e 's/(\*pp \([^*]*\) \*)/\1/p;q' $<`; \ + if [ -z "$$pp" ]; then \ + $(OCAMLDEP) $(DEPFLAGS) $(INCFLAGS) $< > $@; \ + else \ + $(OCAMLDEP) $(DEPFLAGS) \ + -pp "$$pp" $(INCFLAGS) $< > $@; \ + fi + +doc/html: $(DOC_FILES) + rm -rf $@ + mkdir -p $@ + $(OCAMLDOC) -html -d $@ $(OCAMLDOCFLAGS) $(DOC_FILES) + +doc/latex: $(DOC_FILES) + rm -rf $@ + mkdir -p $@ + $(OCAMLDOC) -latex -d $@ $(OCAMLDOCFLAGS) $(DOC_FILES) -o doc.tex + +doc/latex/doc.ps: doc/latex + cd doc/latex && \ + $(LATEX) doc.tex && \ + $(LATEX) doc.tex && \ + $(DVIPS) $(DVIPSFLAGS) doc.dvi -o $(@F) + +doc/latex/doc.pdf: doc/latex/doc.ps + cd doc/latex && $(PS2PDF) $(<F) + +########################################################################### +# (UN)INSTALL RULES FOR LIBRARIES + +.PHONY: libinstall +libinstall: all + $(QUIET)printf "\nInstalling library to: $(OCAML_LIB_INSTALL)\n" + -install -d $(OCAML_LIB_INSTALL) + for i in $(LIBINSTALL_FILES); do \ + if [ -f $$i ]; then \ + install -c -m 0644 $$i $(OCAML_LIB_INSTALL); \ + fi; \ + done + $(QUIET)printf "\nInstallation successful.\n" + +.PHONY: libuninstall +libuninstall: + $(QUIET)printf "\nUninstalling library from: $(OCAML_LIB_INSTALL)\n" + cd $(OCAML_LIB_INSTALL); rm $(notdir $(LIBINSTALL_FILES)) + $(QUIET)printf "\nUninstallation successful.\n" + +########################################################################### +# MAINTAINANCE RULES + +.PHONY: clean +clean: + rm -f $(TARGETS) $(TRASH) + rm -rf $(BCDIDIR) $(NCDIDIR) $(MLDEPDIR) + +.PHONY: cleanup +cleanup: + rm -f $(NONEXECS) $(TRASH) + rm -rf $(BCDIDIR) $(NCDIDIR) $(MLDEPDIR) + +.PHONY: clean-doc +clean-doc: + rm -rf doc + +.PHONY: nobackup +nobackup: + rm -f *.bak *~ *.dup diff --git a/src/build.mli b/src/build.mli new file mode 100644 index 0000000..716b843 --- /dev/null +++ b/src/build.mli @@ -0,0 +1,3 @@ +val date : string +val fake_packages_dir : string +val debugging : bool diff --git a/src/common.ml b/src/common.ml new file mode 100644 index 0000000..dd2f6b1 --- /dev/null +++ b/src/common.ml @@ -0,0 +1,1005 @@ +open Stack +open List + +exception Found +exception Not_comparable +exception GraphSort_circular_deps + +type ('a, 'b) either = Left of 'a | Right of 'b +type ('a, 'b) or_option = Or_some of 'a | Or_error of 'b + +(**********************************************************************************) + +let internal_error s = failwith ("internal error: " ^ s) + +let id x = x +let double a = a,a +let swap (x,y) = (y,x) +let safe_tl l = try tl l with _ -> [] +let fstfst ((e, _), _) = e +let sndfst ((_, e), _) = e +let fstsnd (_, (e, _)) = e +let sndsnd (_, (_, e)) = e + +let fst3 (e, _, _) = e +let snd3 (_, e, _) = e +let ter3 (_, _, e) = e +let sndter3 (_, a, b) = (a, b) + +let o f g x = f (g x) +let curry f x y = f (x,y) +let uncurry f (x, y) = f x y + +let is_int n = ceil n = n + +let uncons = function + | [] -> failwith "uncons" + | e::l -> e,l + +let has_env var = + try + let _ = Sys.getenv var in true + with Not_found -> false + +let some = function + | Some e -> e + | None -> failwith "some" + +let some_or = function + | None -> id + | Some e -> fun _ -> e + +let option2l = function + | None -> [] + | Some e -> [e] + +let prefer_some f a b = + match a, b with + | Some a, Some b -> Some (f a b) + | None, _ -> b + | _, None -> a + +let rec collect_accu f accu = function + | [] -> accu + | e::l -> collect_accu f (rev_append (f e) accu) l + +let collect f l = rev (collect_accu f [] l) + +let merge_some merge a b = + match a,b with + | None, None -> None + | _, None -> a + | None, _ -> b + | Some(a), Some(b) -> Some(merge a b) + +let rec uniq = function + | [] -> [] + | e::l -> if mem e l then uniq l else e :: uniq l + +let rec uniq_ eq = function + | [] -> [] + | e::l -> + try + let _ = find (eq e) l in + uniq_ eq l + with Not_found -> e :: uniq_ eq l + +let rec non_uniq = function + | [] -> [] + | e::l -> if mem e l then e :: non_uniq l else non_uniq l + +let rec member_ eq e = function + | [] -> false + | e'::l -> if eq e e' then true else member_ eq e l + +let rec find_some p = function + | [] -> raise Not_found + | x :: l -> + match p x with + | Some v -> v + | None -> find_some p l + +let fold_left1 f = function + | [] -> failwith "fold_left1" + | e :: l -> fold_left f e l + +let find_index e l = + let rec find_index_ i = function + | [] -> raise Not_found + | e'::l -> if e=e' then i else find_index_ (i+1) l + in + find_index_ 0 l + +let rec find_some_ p = function + | [] -> None + | x :: l -> + match p x with + | Some v -> Some v + | None -> find_some_ p l + +let rec fpartition p l = + let rec part yes no = function + | [] -> (rev yes, rev no) + | x :: l -> + (match p x with + | None -> part yes (x :: no) l + | Some v -> part (v :: yes) no l) in + part [] [] l + +let partition_either f l = + let rec part_either left right = function + | [] -> (rev left, rev right) + | x :: l -> + (match f x with + | Left e -> part_either (e :: left) right l + | Right e -> part_either left (e :: right) l) in + part_either [] [] l + +let rec keep_best f = + let rec partition e = function + | [] -> e, [] + | e' :: l -> + match f(e,e') with + | None -> let (e'', l') = partition e l in e'', e' :: l' + | Some e'' -> partition e'' l + in function + | [] -> [] + | e::l -> + let (e', l') = partition e l in + e' :: keep_best f l' + +let rec keep_bests f l = + let rec once e unchanged = function + | [] -> None + | e' :: l -> + match f(e,e') with + | None -> once e (e' :: unchanged) l + | Some e'' -> Some(e'', unchanged @ l) + in + let rec as_many_as_possible e l = + match once e [] l with + | None -> None + | Some(e', l') -> Some(some_or (as_many_as_possible e' l') (e', l')) + in + let rec try_with e l_done l_next = + match as_many_as_possible e l_next with + | None -> try_with_next (e :: l_done) l_next + | Some(e2, l_next2) -> + match as_many_as_possible e2 l_done with + | None -> try_with_next (e2 :: l_done) l_next2 + | Some(e3, l_done2) -> try_with e3 l_done2 l_next2 + and try_with_next l_done = function + | [] -> rev l_done + | e::l_next -> try_with e l_done l_next + in + try_with_next [] l + +let rec fold_right1 f = function + | [] -> failwith "fold_right1" + | [e] -> e + | e::l -> f e (fold_right1 f l) + +let rec for_all2_ p l1 l2 = + match (l1, l2) with + ([], []) -> true + | (a1::l1, a2::l2) -> p a1 a2 && for_all2_ p l1 l2 + | (_, _) -> false + +let rec for_all2_true p l1 l2 = + match (l1, l2) with + | (a1::l1, a2::l2) -> p a1 a2 && for_all2_true p l1 l2 + | (_, _) -> true + +let maxl l = fold_right1 max l + +let rec stack2list s = + let l = ref [] in + Stack.iter (fun e -> l := e :: !l) s ; + !l + +let rec stack_exists f s = + try + Stack.iter (fun e -> if f e then raise Found) s ; + false + with Found -> true + +let rec queue2list q = rev (Queue.fold (fun b a -> a :: b) [] q) + +let rec fix_point f p = + let p' = f p in + if p = p' then p else fix_point f p' + +let rec fix_point_withenv f env p = + let p', env' = f env p in + if p = p' then (p, env') else fix_point_withenv f env' p' + +let rec fix_point_ nb f p = + let p' = f p in + if p = p' then p, nb else fix_point_ (nb+1) f p' + +let rec group_by_2 = function + | [] -> [] + | a :: b :: l -> (a, b) :: group_by_2 l + | _ -> failwith "group_by_2" + +(* +let rec lfix_point f e = + let e' = f(e) in + if e = e' then e :: lfix_point f e' else [e] +*) + +let fluid_let ref value f = + let previous_val = !ref in + ref := value ; + let v = f() in + ref := previous_val ; + v + +let do0_withenv doit f env l = + let r_env = ref env in + doit (fun e -> r_env := f !r_env e) l ; + !r_env + +let do0_withenv2 doit f env l = + let r_env = ref env in + doit (fun e e' -> r_env := f !r_env e e') l ; + !r_env + +let do_withenv doit f env l = + let r_env = ref env in + let l' = doit (fun e -> + let e', env' = f !r_env e in + r_env := env' ; e' + ) l in + l', !r_env + +let do2_withenv doit f env l1 l2 = + let r_env = ref env in + let l' = doit (fun e1 e2 -> + let e', env' = f !r_env e1 e2 in + r_env := env' ; e' + ) l1 l2 in + l', !r_env + +let do_collect doit f l1 = + let l = ref [] in + doit (fun i t -> l := f i t @ !l) l1 ; + !l + +let map_withitself f l = + let rec map_withitself_ done_ = function + | [] -> done_ + | e :: l -> + let e' = f (done_ @ e :: l) e in + map_withitself_ (done_ @ [ e' ]) l + in map_withitself_ [] l + +let map_t2 f (x,y) = f x, f y +let map_t3 f (x,y,z) = f x, f y, f z +let map_option f = function + | Some e -> Some (f e) + | None -> None +let map_optionoption f = function + | Some e -> f e + | None -> None +let t2_option2option_t2 = function + | (Some x, Some y) -> Some(x,y) + | _ -> None +let rec l_option2option_l = function + | [] -> Some [] + | None :: _l -> None + | Some e :: l -> map_option (fun l -> e :: l) (l_option2option_l l) +let map_option_env f (e, env) = map_option f e, env + +let t2_to_list (a,b) = [ a ; b ] +let t3_to_list (a,b,c) = [ a ; b ; c ] + +let if_some bool val_ = if bool then Some val_ else None + +let rec fold_left_option f val_ = function + | [] -> Some val_ + | e::l -> + match f val_ e with + | None -> None + | Some val_' -> fold_left_option f val_' l + +let collect_some_withenv f env l = + let rec collect accu env = function + | [] -> rev accu, env + | e::l -> + let e', env' = f env e in + let accu' = + match e' with + | Some e' -> e'::accu + | None -> accu in + collect accu' env' l + in collect [] env l + +let for_all_option_withenv remap f env l = + let rec for_all env accu = function + | [] -> Some(remap (rev accu)), env + | e::l -> + (match f env e with + | None, env' -> None, env' + | Some e', env' -> for_all env' (e' :: accu) l) + in + for_all env [] l + +let for_all2_option_withenv remap f env la lb = + let rec for_all env accu = function + | [], [] -> Some(remap (rev accu)), env + | a::la, b::lb -> + (match f env a b with + | None, env' -> None, env' + | Some ab, env' -> for_all env' (ab :: accu) (la, lb)) + | _ -> None, env + in + for_all env [] (la, lb) + +let map_or_option f = function + | Or_some e -> Or_some (f e) + | Or_error err -> Or_error err + +let map_index f l = + let rec map_ n = function + | [] -> [] + | e::l -> f e n :: map_ (n+1) l + in map_ 0 l + +let filter_index f l = + let rec filter_ n = function + | [] -> [] + | e::l -> + let l' = filter_ (n+1) l in + if f e n then e :: l' else l' + in filter_ 0 l + +let iter_index f l = + let rec iter_ n = function + | [] -> () + | e::l -> f e n ; iter_ (n+1) l + in iter_ 0 l + +let map_fst f (x, y) = f x, y +let map_snd f (x, y) = x, f y + +let map_withenv f env e = do_withenv map f env e +let find_withenv f env e = do_withenv find f env e +let filter_withenv f env e = do_withenv filter f env e +let exists_withenv f env e = do_withenv exists f env e +let map_t2_withenv f env e = do_withenv map_t2 f env e +let for_all_withenv f env e = do_withenv for_all f env e +let collect_withenv f env e = do_withenv collect f env e +let partition_either_withenv f env e = do_withenv partition_either f env e + +let map2_withenv f env l1 l2 = do2_withenv map2 f env l1 l2 +let for_all2_withenv f env l1 l2 = do2_withenv for_all2 f env l1 l2 + +let rec take n l = + if n = 0 then [] + else match l with + | [] -> raise Not_found + | e::l -> e :: take (n-1) l +let last_n n l = rev (take n (rev l)) +let last l = hd (last_n 1 l) + +let rec skipfirst e = function + | [] -> [] + | e'::l when e = e' -> skipfirst e l + | l -> l + +let rec removelast = function + | [] -> failwith "removelast" + | [_] -> [] + | e::l -> e :: removelast l + +let rec split_last l = + let rec spl accu = function + | [] -> failwith "split_last" + | [e] -> rev accu, e + | e::l -> spl (e :: accu) l + in spl [] l + +let iter_assoc_val f l = iter (fun (_,v) -> f v) l +let map_assoc_val f l = map (fun (k,v) -> k, f v) l + +let assoc_or_fail e l = + try assoc e l with Not_found -> failwith "assoc failed" + +let assoc_by is_same e l = + find_some (fun (a,b) -> if is_same e a then Some b else None) l + +let rec update_assoc_by is_same f e = function + | [] -> raise Not_found + | (a,b) :: l when is_same e a -> (a, f b) :: l + | (a,b) :: l -> (a,b) :: update_assoc_by is_same f e l + +let update_assoc f e = update_assoc_by (=) f e + +let rec update_assoc_by_with_default default is_same f e = function + | [] -> [ e, f default ] + | (a,b) :: l when is_same e a -> (a, f b) :: l + | (a,b) :: l -> (a,b) :: update_assoc_by_with_default default is_same f e l + +let update_all_assoc_by is_same f e l = + map (fun (a,b) -> a, if is_same e a then f b else b) l + +let rec rassoc e = function + | [] -> raise Not_found + | (k,v) :: l -> if e = v then k else rassoc e l + +let rec all_assoc e = function + | [] -> [] + | (e',v) :: l when e=e' -> v :: all_assoc e l + | _ :: l -> all_assoc e l + +let rec all_assoc_by is_same e = function + | [] -> [] + | (e',v) :: l when is_same e e' -> v :: all_assoc_by is_same e l + | _ :: l -> all_assoc_by is_same e l + +let prepare_want_all_assoc l = + map (fun n -> n, uniq (all_assoc n l)) (uniq (map fst l)) + +let prepare_want_all_assoc_by is_same l = + map (fun n -> n, uniq_ is_same (all_assoc_by is_same n l)) (uniq_ is_same (map fst l)) + +let prepare_want_all_assoc_by_ is_same_a is_same_b l = + map (fun n -> n, uniq_ is_same_b (all_assoc_by is_same_a n l)) (uniq_ is_same_a (map fst l)) + +let rec count_uniq = function + | [] -> [] + | e::l -> + let has, l' = partition ((=) e) l in + (e, length has + 1) :: count_uniq l' + +let rec repeat e = function + | 0 -> [] + | n -> e :: repeat e (n-1) + +let rec inits = function + | [] -> [[]] + | e::l -> [] :: map (fun l -> e::l) (inits l) +let rec tails = function + | [] -> [[]] + | (_::xs) as xxs -> xxs :: tails xs + +let apply f x = f x;; + +let rec map3 f l1 l2 l3 = + match (l1, l2, l3) with + ([], [], []) -> [] + | (a1::l1, a2::l2, a3::l3) -> let r = f a1 a2 a3 in r :: map3 f l1 l2 l3 + | (_, _, _) -> invalid_arg "map3" + +let filter2 f l1 l2 = + split (filter f (combine l1 l2)) + +let break_at f l = + let rec b l1 = function + | [] -> l1, [] + | e::l2 -> if f e then (l1, e :: l2) else b (l1 @ [e]) l2 + in b [] l +let break v l = break_at ((=) v) l + +let drop_while f l = snd (break_at (fun e -> not (f e)) l) + +(* break_at_indice 0 [1;2] gives [], [1;2] + break_at_indice 1 [1;2] gives [1], [2] + *) +let rec break_at_indice i l = + if i = 0 then [], l else + match l with + | [] -> raise Not_found + | e::l2 -> + let a, b = break_at_indice (i-1) l2 in + e::a, b + +let rev_nth e l = + let rec rev_nth' i = function + | [] -> raise Not_found + | e'::_ when e'=e -> i + | _::l -> rev_nth' (i+1) l + in rev_nth' 0 l + +let rec getset_nth l i f = + match l, i with + | e::l', 0 -> f e :: l' + | [], _ -> failwith "getset_nth" + | e::l', _ -> e :: getset_nth l' (i - 1) f + +let set_nth l i v = getset_nth l i (fun _ -> v) + +let adjustModDown m n = n - (n mod m) +let adjustModUp m n = adjustModDown m (n + m - 1) + + +let hashtbl_find f h = + let r = ref None in + Hashtbl.iter (fun v c -> if f v c then r := Some v) h ; + match !r with + | Some v -> v + | None -> raise Not_found + +let hashtbl_map f h = Hashtbl.iter (fun v c -> Hashtbl.replace h v (f v c)) h + +let hashtbl_values h = Hashtbl.fold (fun _ v l -> v :: l) h [] +let hashtbl_keys h = Hashtbl.fold (fun k _ l -> k :: l) h [] +let hashtbl_to_list h = Hashtbl.fold (fun k v l -> (k,v) :: l) h [] + +let hashtbl_collect f h = + rev (Hashtbl.fold (fun k v l -> rev_append (f k v) l) h []) + +let hashtbl_exists f h = + try + Hashtbl.iter (fun v c -> if f v c then raise Found) h ; + false + with Found -> true + +let memoize f = + let hash = Hashtbl.create 16 in + fun k -> + try Hashtbl.find hash k + with Not_found -> + let v = f k in + Hashtbl.add hash k v ; v + +let array_shift a = Array.sub a 1 (Array.length a - 1) +let array_last_n n a = + let len = Array.length a in + Array.sub a (len - n) n + +let array_collect f a = Array.fold_left (fun l e -> f e @ l) [] a + +let rec lvector_product = + let rec vector_product a b = match a with + | [] -> [] + | e::l -> map (fun e' -> e :: e') b :: vector_product l b + in function + | [] -> [] + | [e] -> map (fun e -> [e]) e + | e::l -> flatten (vector_product e (lvector_product l)) + +let vector_product2 a b = + map (function + | [a;b] -> a,b + | _ -> failwith "vector_product2" + ) (lvector_product [ a ; b ]) + +let rec transpose = function + | [] :: _ -> [] + | ll -> + let l, ll' = split (map (function e::l -> e,l | _ -> raise Not_found) ll) in + l :: transpose ll' + +let rec range min max = + if min >= max then [] else min :: range (min + 1) max + +let sum l = List.fold_left (+) 0 l + +let rec filter_some_with f = function + | [] -> [] + | e :: l -> + match f e with + | None -> filter_some_with f l + | Some e' -> e' :: filter_some_with f l + +let rec filter_some = function + | [] -> [] + | None :: l -> filter_some l + | Some e :: l -> e :: filter_some l + +let rec difference l = function + | [] -> l + | e::l' -> difference (filter ((<>) e) l) l' + +let rec difference_ eq l = function + | [] -> l + | e::l' -> + let l2 = filter (fun e' -> not (eq e e')) l in + difference_ eq l2 l' + +let intersection_by is_same l1 l2 = filter (fun e -> exists (is_same e) l2) l1 + +let intersection_and_differences eq l1 l2 = + let rec both inter l2_only = function + | [], l2 -> inter, [], rev l2_only @ l2 + | l1, [] -> inter, l1, rev l2_only + | l1, e2 :: l2' -> + match partition (eq e2) l1 with + | [], _ -> both inter (e2 :: l2_only) (l1, l2') + | _, l1' -> both (e2 :: inter) l2_only (l1', l2') + in both [] [] (l1, l2) + +let rec triangularize = function + | [] -> [] + | e::l -> (e,l) :: triangularize l + +let diagonalize l = + map_index (fun a i -> + a, filter_index (fun _ j -> i <> j) l + ) l + +let rec list_of_nonempty_sublists = function + | [] -> [] + | e :: l -> + let l' = list_of_nonempty_sublists l in + [e] :: l' @ map (fun l -> e :: l) l' + +let rec graph_is_sorted_by eq = function + | [] -> true + | (_,deps) :: l -> + for_all (fun e -> try let _ = assoc_by eq e l in false with Not_found -> true) deps && graph_is_sorted_by eq l + +let graph_closure_by eq graph = + let err = ref None in + try + let graph_rev = collect (fun (i, l) -> map (fun e -> (e, i)) l) graph in + let bothway = map (fun (i,l) -> i, (l, all_assoc_by eq i graph_rev)) graph in + let closed = fold_left (fun graph j -> + let next, prev = assoc_by eq j graph in + let graph2 = fold_left (fun graph i -> + if member_ eq i next then (err := Some(j,i); raise GraphSort_circular_deps) else + update_assoc_by eq (fun (i_next,i_prev) -> i_next @ next, i_prev) i graph + ) graph (filter (fun a -> not (eq a j)) prev) in + let graph3 = fold_left (fun graph k -> + if member_ eq k prev then (err := Some(j,k); raise GraphSort_circular_deps) else + update_assoc_by eq (fun (k_next,k_prev) -> k_next, k_prev @ prev) k graph + ) graph2 (filter (fun a -> not (eq a j)) next) in + graph3 + ) bothway (map fst bothway) in + Or_some (map (fun (e,(next,_)) -> e, uniq_ eq next) closed) + with GraphSort_circular_deps -> + Or_error (some !err) + +let rec graph_sort_by eq l = + let cmp (_, deps_a) (b, _) = if member_ eq b deps_a then 1 else -1 in + let rec sort_it = function + | [] -> [] + | [e] -> [e] + | e::l -> + let l' = sort_it l in + let gt, lt = break_at (fun ((_, deps) as e') -> deps = [] or cmp e e' = 1) l' in + gt @ [e] @ lt + in + map_or_option (fun l' -> + let l_sorted = rev (sort_it l') in + if not (graph_is_sorted_by eq l_sorted) then internal_error "graph_sort failed" else + l_sorted + ) (graph_closure_by eq l) + +let int_sort l = sort (fun a b -> a - b) l + +let str_begins_with prefix s = + String.sub s 0 (min (String.length s) (String.length prefix)) = prefix + +let rec strstr s subs = + let len_s, len_subs = String.length s, String.length subs in + let rec rec_ i = + let i' = String.index_from s i subs.[0] in + if i' + len_subs <= len_s then + if String.sub s i' len_subs = subs then + i' + else + rec_ (i' + 1) + else + raise Not_found + in + rec_ 0 + +let str_contains s subs = + try + let _ = strstr s subs in true + with Not_found -> false + +let str_ends_with s suffix = + let len = min (String.length s) (String.length suffix) in + String.sub s (String.length s - len) len = suffix + +let chop = function + | "" -> "" + | s -> String.sub s 0 (String.length s - 1) + +let chomps s = + let i = ref (String.length s - 1) in + while !i >= 0 && (s.[!i] = ' ' || s.[!i] = '\t') do decr i done ; + String.sub s 0 (!i+1) + +let rec times e = function + | 0 -> [] + | n -> e :: times e (n-1) + +let skip_n_char_ beg end_ s = + let full_len = String.length s in + if beg < full_len && full_len - beg - end_ > 0 + then String.sub s beg (full_len - beg - end_) + else "" +let skip_n_char n s = skip_n_char_ n 0 s + +let rec non_index_from s beg c = + if s.[beg] = c then non_index_from s (beg+1) c else beg +let non_index s c = non_index_from s 0 c + +let rec non_rindex_from s beg c = + if s.[beg] = c then non_rindex_from s (beg-1) c else beg +let non_rindex s c = non_rindex_from s (String.length s - 1) c + +let rec explode_string = function + | "" -> [] + | s -> (String.get s 0) :: explode_string (String.sub s 1 (String.length s - 1)) + +let count_matching_char s c = + let rec count_matching_char_ nb i = + try + let i' = String.index_from s i c in + count_matching_char_ (nb+1) (i'+1) + with Not_found -> nb + in + count_matching_char_ 0 0 + +let is_uppercase c = Char.lowercase c <> c +let is_lowercase c = Char.uppercase c <> c + +let char_is_alphanumerical c = + let i = Char.code c in + Char.code 'a' <= i && i <= Char.code 'z' || + Char.code 'A' <= i && i <= Char.code 'Z' || + Char.code '0' <= i && i <= Char.code '9' + +let char_is_alphanumerical_ c = + let i = Char.code c in + Char.code 'a' <= i && i <= Char.code 'z' || + Char.code 'A' <= i && i <= Char.code 'Z' || + Char.code '0' <= i && i <= Char.code '9' || c = '_' + +let char_is_alpha c = + let i = Char.code c in + Char.code 'a' <= i && i <= Char.code 'z' || + Char.code 'A' <= i && i <= Char.code 'Z' + +let char_is_number c = + let i = Char.code c in + Char.code '0' <= i && i <= Char.code '9' + +let count_chars_in_string s c = + let rec rec_count_chars_in_string from = + try + let from' = String.index_from s from c in + 1 + rec_count_chars_in_string (from' + 1) + with + Not_found -> 0 + in rec_count_chars_in_string 0 + +let rec string_fold_left f val_ s = + let val_ = ref val_ in + for i = 0 to String.length s - 1 do + val_ := f !val_ s.[i] + done ; + !val_ + +(* +let rec string_forall_with f i s = + try + f s.[i] && string_forall_with f (i+1) s + with Invalid_argument _ -> true +*) +let string_forall_with f i s = + let len = String.length s in + let rec string_forall_with_ i = + i >= len || f s.[i] && string_forall_with_ (i+1) + in string_forall_with_ i + +let starts_with_non_lowercase s = s <> "" && s.[0] <> '_' && not (is_lowercase s.[0]) + +let rec fold_lines f init chan = + try + let line = input_line chan in + fold_lines f (f init line) chan + with End_of_file -> init +let readlines chan = List.rev (fold_lines (fun l e -> e::l) [] chan) + +let split_at c s = + let rec split_at_ accu i = + try + let i' = String.index_from s i c in + split_at_ (String.sub s i (i' - i) :: accu) (i'+1) + with Not_found -> rev (skip_n_char i s :: accu) + in + split_at_ [] 0 + +let split_at2 c1 c2 s = + let rec split_at2_ accu i i2 = + try + let i3 = String.index_from s i2 c1 in + if s.[i3+1] = c2 then split_at2_ (String.sub s i (i3 - i) :: accu) (i3+2) (i3+2) else + split_at2_ accu i i3 + with Not_found | Invalid_argument _ -> rev (skip_n_char i s :: accu) + in + split_at2_ [] 0 0 + +let words s = + let rec words_ accu i s = + try + let i2 = non_index_from s i ' ' in + try + let i3 = String.index_from s i2 ' ' in + words_ (String.sub s i2 (i3 - i2) :: accu) (i3+1) s + with Not_found -> rev (skip_n_char i2 s :: accu) + with Invalid_argument _ -> rev accu + in + collect (words_ [] 0) (split_at '\n' s) + +let to_CamelCase s_ = + let l = ref [] in + let s = String.copy s_ in + for i = 1 to String.length s - 1 do + if is_uppercase (String.unsafe_get s i) && is_lowercase (String.unsafe_get s (i-1)) then ( + String.set s i (Char.lowercase (String.get s i)) ; + l := i :: !l + ) + done ; + if !l = [] then None else + let offset, s' = fold_left (fun (offset, s') i -> + i, s' ^ String.sub s offset (i-offset) ^ "_" + ) (0, "") (rev !l) in + Some (s' ^ String.sub s offset (String.length s - offset)) + +let concat_symlink file link = + if str_begins_with "..//" link then (* ..//foo => /foo *) + skip_n_char 3 link + else + let file = if str_ends_with file "/" then chop file else file in (* s|/$|| *) + let rec reduce file link = + if str_begins_with "../" link then + let file = String.sub file 0 (String.rindex file '/') in (* s|/[^/]+$|| *) + reduce file (skip_n_char 3 link) + else + file ^ "/" ^ link + in + reduce file link + +let expand_symlinks file = + match split_at '/' file with + | "" :: l -> + let rec remove_dotdot accu nb = function + | [] -> if nb = 0 then accu else failwith "remove_dotdot" + | ".." :: l -> remove_dotdot accu (nb + 1) l + | e :: l -> if nb > 0 then remove_dotdot accu (nb - 1) l else remove_dotdot (e :: accu) nb l + in + let l = remove_dotdot [] 0 (List.rev l) in + List.fold_left (fun file piece -> + fix_point (fun file -> + try concat_symlink file ("../" ^ Unix.readlink file) + with _ -> file + ) (file ^ "/" ^ piece)) "" l + | _ -> internal_error (Printf.sprintf "expand_symlinks: %s is relative\n" file) + +let mtime f = (Unix.stat f).Unix.st_mtime + +let rec updir dir nb = + if nb = 0 then dir else + match dir with + | "." -> String.concat "/" (times ".." nb) + | _ -> + if Filename.basename dir = ".." then + dir ^ "/" ^ String.concat "/" (times ".." nb) + else + updir (Filename.dirname dir) (nb-1) + +let (string_of_ref : 'a ref -> string) = fun r -> + Printf.sprintf "0x%x" (Obj.magic r : int) + +let print_endline_flush s = print_endline s ; flush stdout + +let is_int n = n = floor n + +(* total order *) +let rec compare_lists cmp l1 l2 = + match l1, l2 with + | [], [] -> 0 + | [], _ -> -1 + | _, [] -> 1 + | e1::l1, e2::l2 -> + match cmp e1 e2 with + | 0 -> compare_lists cmp l1 l2 + | v -> v + +let compare_best a b = + match a, b with + | 0, 0 -> 0 + | 1, 1 | 1, 0 | 0, 1 -> 1 + | -1, -1 | -1, 0 | 0, -1 -> -1 + | 1, -1 | -1, 1 -> raise Not_comparable + | _ -> failwith "uh?" + +(* partial order *) +let combine_comparison_list l = + fold_left compare_best 0 l + +let min_with_cmp less_than a b = + if less_than a b then a + else if less_than b a then b + else raise Not_comparable + +let max_with_cmp less_than a b = + if less_than a b then b + else if less_than b a then a + else raise Not_comparable + +let rec fold_left2_compare f e l1 l2 = + match l1, l2 with + | [], [] -> e + | e1::l1, e2::l2 -> fold_left2_compare f (f e e1 e2) l1 l2 + | _ -> raise Not_comparable + +let rec exists_compare cmp = function + | [] -> raise Not_comparable + | e :: l -> try cmp e with Not_comparable -> exists_compare cmp l + +let forall_compare cmp = fold_left (fun n e -> compare_best n (cmp e)) 0 +let forall2_compare cmp = fold_left2_compare (fun n e1 e2 -> compare_best n (cmp e1 e2)) 0 + +let exists2_compare left_dropping cmp l1 l2 = + let rec forall_compare_ n = function + | [], [] -> n + | _, [] -> compare_best n left_dropping + | [], _ -> compare_best n (-left_dropping) + | e1::l1, e2::l2 -> + match try Some (cmp e1 e2) with Not_comparable -> None with + | Some n' -> forall_compare_ (compare_best n n') (l1, l2) + | None -> + if n = left_dropping then + forall_compare_ left_dropping (l1, e2::l2) + else if n = -left_dropping then + forall_compare_ (-left_dropping) (e1::l1, l2) + else + (* need to try both *) + try forall_compare_ left_dropping (l1, e2::l2) + with Not_comparable -> forall_compare_ (-left_dropping) (e1::l1, l2) + in forall_compare_ 0 (l1, l2) + + +let rec compare_sorted_sets is_same l1 l2 = + match l1, l2 with + | [], [] -> 0 + | [], _ -> -1 + | _, [] -> 1 + | e1::l1, e2::l2 -> if is_same e1 e2 then compare_sorted_sets is_same l1 l2 else raise Not_found + +let scan_list_while_modifying f l = + let rec scan_list_while_modifying_ prev = function + | [] -> prev + | e :: next -> + let prev', next' = some_or (f prev next e) (prev @ [e], next) in + scan_list_while_modifying_ prev' next' + in scan_list_while_modifying_ [] l + +let bools2compare = function + | true, true -> 0 + | true, false -> -1 + | false, true -> 1 + | _ -> raise Not_comparable + +let lpush l e = l := e :: !l + +(* +let is_greater2compare is_greater a b = + match is_greater a b, is_greater b a with + + *) + +module OrderedString = + struct + type t = string + let compare = compare + end;; + +module StringSet = Set.Make(OrderedString);; + +let stringSet_to_list = StringSet.elements +let stringSet_add set e = StringSet.add e set +let stringSet_difference = StringSet.diff +let list_to_StringSet l = fold_left stringSet_add StringSet.empty l + +(* this character messes emacs caml mode *) +let char_quote = '"' diff --git a/src/common.mli b/src/common.mli new file mode 100644 index 0000000..86a13cd --- /dev/null +++ b/src/common.mli @@ -0,0 +1,276 @@ +exception Found +exception Not_comparable +exception GraphSort_circular_deps +type ('a, 'b) either = Left of 'a | Right of 'b +type ('a, 'b) or_option = Or_some of 'a | Or_error of 'b +val internal_error : string -> 'a +val id : 'a -> 'a +val double : 'a -> 'a * 'a +val swap : 'a * 'b -> 'b * 'a +val safe_tl : 'a list -> 'a list +val fstfst : ('a * 'b) * 'c -> 'a +val sndfst : ('a * 'b) * 'c -> 'b +val fstsnd : 'a * ('b * 'c) -> 'b +val sndsnd : 'a * ('b * 'c) -> 'c +val fst3 : 'a * 'b * 'c -> 'a +val snd3 : 'a * 'b * 'c -> 'b +val ter3 : 'a * 'b * 'c -> 'c +val sndter3 : 'a * 'b * 'c -> 'b * 'c +val o : ('a -> 'b) -> ('c -> 'a) -> 'c -> 'b +val curry : ('a * 'b -> 'c) -> 'a -> 'b -> 'c +val uncurry : ('a -> 'b -> 'c) -> 'a * 'b -> 'c +val uncons : 'a list -> 'a * 'a list +val has_env : string -> bool +val some : 'a option -> 'a +val some_or : 'a option -> 'a -> 'a +val option2l : 'a option -> 'a list +val prefer_some : ('a -> 'a -> 'a) -> 'a option -> 'a option -> 'a option +val collect_accu : ('a -> 'b list) -> 'b list -> 'a list -> 'b list +val collect : ('a -> 'b list) -> 'a list -> 'b list +val merge_some : ('a -> 'a -> 'a) -> 'a option -> 'a option -> 'a option +val uniq : 'a list -> 'a list +val uniq_ : ('a -> 'a -> bool) -> 'a list -> 'a list +val non_uniq : 'a list -> 'a list +val member_ : ('a -> 'b -> bool) -> 'a -> 'b list -> bool +val find_some : ('a -> 'b option) -> 'a list -> 'b +val fold_left1 : ('a -> 'a -> 'a) -> 'a list -> 'a +val find_index : 'a -> 'a list -> int +val find_some_ : ('a -> 'b option) -> 'a list -> 'b option +val fpartition : ('a -> 'b option) -> 'a list -> 'b list * 'a list +val partition_either : + ('a -> ('b, 'c) either) -> 'a list -> 'b list * 'c list +val keep_best : ('a * 'a -> 'a option) -> 'a list -> 'a list +val keep_bests : ('a * 'a -> 'a option) -> 'a list -> 'a list +val fold_right1 : ('a -> 'a -> 'a) -> 'a list -> 'a +val for_all2_ : ('a -> 'b -> bool) -> 'a list -> 'b list -> bool +val for_all2_true : ('a -> 'b -> bool) -> 'a list -> 'b list -> bool +val maxl : 'a list -> 'a +val stack2list : 'a Stack.t -> 'a list +val stack_exists : ('a -> bool) -> 'a Stack.t -> bool +val queue2list : 'a Queue.t -> 'a list +val fix_point : ('a -> 'a) -> 'a -> 'a +val fix_point_withenv : ('a -> 'b -> 'b * 'a) -> 'a -> 'b -> 'b * 'a +val fix_point_ : int -> ('a -> 'a) -> 'a -> 'a * int +val group_by_2 : 'a list -> ('a * 'a) list +val fluid_let : 'a ref -> 'a -> (unit -> 'b) -> 'b +val do0_withenv : + (('a -> unit) -> 'b -> 'c) -> ('d -> 'a -> 'd) -> 'd -> 'b -> 'd +val do0_withenv2 : + (('a -> 'b -> unit) -> 'c -> 'd) -> + ('e -> 'a -> 'b -> 'e) -> 'e -> 'c -> 'e +val do_withenv : + (('a -> 'b) -> 'c -> 'd) -> ('e -> 'a -> 'b * 'e) -> 'e -> 'c -> 'd * 'e +val do2_withenv : + (('a -> 'b -> 'c) -> 'd -> 'e -> 'f) -> + ('g -> 'a -> 'b -> 'c * 'g) -> 'g -> 'd -> 'e -> 'f * 'g +val do_collect : + (('a -> 'b -> unit) -> 'c -> 'd) -> ('a -> 'b -> 'e list) -> 'c -> 'e list +val map_withitself : ('a list -> 'a -> 'a) -> 'a list -> 'a list +val map_t2 : ('a -> 'b) -> 'a * 'a -> 'b * 'b +val map_t3 : ('a -> 'b) -> 'a * 'a * 'a -> 'b * 'b * 'b +val map_option : ('a -> 'b) -> 'a option -> 'b option +val map_optionoption : ('a -> 'b option) -> 'a option -> 'b option +val t2_option2option_t2 : 'a option * 'b option -> ('a * 'b) option +val l_option2option_l : 'a option list -> 'a list option +val map_option_env : ('a -> 'b) -> 'a option * 'c -> 'b option * 'c +val t2_to_list : 'a * 'a -> 'a list +val t3_to_list : 'a * 'a * 'a -> 'a list +val if_some : bool -> 'a -> 'a option +val fold_left_option : ('a -> 'b -> 'a option) -> 'a -> 'b list -> 'a option +val collect_some_withenv : + ('a -> 'b -> 'c option * 'a) -> 'a -> 'b list -> 'c list * 'a +val for_all_option_withenv : + ('a list -> 'b) -> + ('c -> 'd -> 'a option * 'c) -> 'c -> 'd list -> 'b option * 'c +val for_all2_option_withenv : + ('a list -> 'b) -> + ('c -> 'd -> 'e -> 'a option * 'c) -> + 'c -> 'd list -> 'e list -> 'b option * 'c +val map_or_option : ('a -> 'b) -> ('a, 'c) or_option -> ('b, 'c) or_option +val map_index : ('a -> int -> 'b) -> 'a list -> 'b list +val filter_index : ('a -> int -> bool) -> 'a list -> 'a list +val iter_index : ('a -> int -> 'b) -> 'a list -> unit +val map_fst : ('a -> 'b) -> 'a * 'c -> 'b * 'c +val map_snd : ('a -> 'b) -> 'c * 'a -> 'c * 'b +val map_withenv : ('a -> 'b -> 'c * 'a) -> 'a -> 'b list -> 'c list * 'a +val find_withenv : ('a -> 'b -> bool * 'a) -> 'a -> 'b list -> 'b * 'a +val filter_withenv : ('a -> 'b -> bool * 'a) -> 'a -> 'b list -> 'b list * 'a +val exists_withenv : ('a -> 'b -> bool * 'a) -> 'a -> 'b list -> bool * 'a +val map_t2_withenv : ('a -> 'b -> 'c * 'a) -> 'a -> 'b * 'b -> ('c * 'c) * 'a +val for_all_withenv : ('a -> 'b -> bool * 'a) -> 'a -> 'b list -> bool * 'a +val collect_withenv : + ('a -> 'b -> 'c list * 'a) -> 'a -> 'b list -> 'c list * 'a +val partition_either_withenv : + ('a -> 'b -> ('c, 'd) either * 'a) -> + 'a -> 'b list -> ('c list * 'd list) * 'a +val map2_withenv : + ('a -> 'b -> 'c -> 'd * 'a) -> 'a -> 'b list -> 'c list -> 'd list * 'a +val for_all2_withenv : + ('a -> 'b -> 'c -> bool * 'a) -> 'a -> 'b list -> 'c list -> bool * 'a +val take : int -> 'a list -> 'a list +val last_n : int -> 'a list -> 'a list +val last : 'a list -> 'a +val skipfirst : 'a -> 'a list -> 'a list +val removelast : 'a list -> 'a list +val split_last : 'a list -> 'a list * 'a +val iter_assoc_val : ('a -> unit) -> ('b * 'a) list -> unit +val map_assoc_val : ('a -> 'b) -> ('c * 'a) list -> ('c * 'b) list +val assoc_or_fail : 'a -> ('a * 'b) list -> 'b +val assoc_by : ('a -> 'b -> bool) -> 'a -> ('b * 'c) list -> 'c +val update_assoc_by : + ('a -> 'b -> bool) -> ('c -> 'c) -> 'a -> ('b * 'c) list -> ('b * 'c) list +val update_assoc : ('a -> 'a) -> 'b -> ('b * 'a) list -> ('b * 'a) list +val update_assoc_by_with_default : + 'a -> + ('b -> 'b -> bool) -> ('a -> 'a) -> 'b -> ('b * 'a) list -> ('b * 'a) list +val update_all_assoc_by : + ('a -> 'b -> bool) -> ('c -> 'c) -> 'a -> ('b * 'c) list -> ('b * 'c) list +val rassoc : 'a -> ('b * 'a) list -> 'b +val all_assoc : 'a -> ('a * 'b) list -> 'b list +val all_assoc_by : ('a -> 'b -> bool) -> 'a -> ('b * 'c) list -> 'c list +val prepare_want_all_assoc : ('a * 'b) list -> ('a * 'b list) list +val prepare_want_all_assoc_by : + ('a -> 'a -> bool) -> ('a * 'a) list -> ('a * 'a list) list +val prepare_want_all_assoc_by_ : + ('a -> 'a -> bool) -> + ('b -> 'b -> bool) -> ('a * 'b) list -> ('a * 'b list) list +val count_uniq : 'a list -> ('a * int) list +val repeat : 'a -> int -> 'a list +val inits : 'a list -> 'a list list +val tails : 'a list -> 'a list list +val apply : ('a -> 'b) -> 'a -> 'b +val map3 : ('a -> 'b -> 'c -> 'd) -> 'a list -> 'b list -> 'c list -> 'd list +val filter2 : ('a * 'b -> bool) -> 'a list -> 'b list -> 'a list * 'b list +val break_at : ('a -> bool) -> 'a list -> 'a list * 'a list +val break : 'a -> 'a list -> 'a list * 'a list +val drop_while : ('a -> bool) -> 'a list -> 'a list +val break_at_indice : int -> 'a list -> 'a list * 'a list +val rev_nth : 'a -> 'a list -> int +val getset_nth : 'a list -> int -> ('a -> 'a) -> 'a list +val set_nth : 'a list -> int -> 'a -> 'a list +val adjustModDown : int -> int -> int +val adjustModUp : int -> int -> int +val hashtbl_find : ('a -> 'b -> bool) -> ('a, 'b) Hashtbl.t -> 'a +val hashtbl_map : ('a -> 'b -> 'b) -> ('a, 'b) Hashtbl.t -> unit +val hashtbl_values : ('a, 'b) Hashtbl.t -> 'b list +val hashtbl_keys : ('a, 'b) Hashtbl.t -> 'a list +val hashtbl_to_list : ('a, 'b) Hashtbl.t -> ('a * 'b) list +val hashtbl_collect : ('a -> 'b -> 'c list) -> ('a, 'b) Hashtbl.t -> 'c list +val hashtbl_exists : ('a -> 'b -> bool) -> ('a, 'b) Hashtbl.t -> bool +val memoize : ('a -> 'b) -> 'a -> 'b +val array_shift : 'a array -> 'a array +val array_last_n : int -> 'a array -> 'a array +val array_collect : ('a -> 'b list) -> 'a array -> 'b list +val lvector_product : 'a list list -> 'a list list +val vector_product2 : 'a list -> 'a list -> ('a * 'a) list +val transpose : 'a list list -> 'a list list +val range : int -> int -> int list +val sum : int list -> int +val filter_some_with : ('a -> 'b option) -> 'a list -> 'b list +val filter_some : 'a option list -> 'a list +val difference : 'a list -> 'a list -> 'a list +val difference_ : ('a -> 'b -> bool) -> 'b list -> 'a list -> 'b list +val intersection_by : ('a -> 'b -> bool) -> 'a list -> 'b list -> 'a list +val intersection_and_differences : + ('a -> 'b -> bool) -> 'b list -> 'a list -> 'a list * 'b list * 'a list +val triangularize : 'a list -> ('a * 'a list) list +val diagonalize : 'a list -> ('a * 'a list) list +val list_of_nonempty_sublists : 'a list -> 'a list list +val graph_is_sorted_by : ('a -> 'b -> bool) -> ('b * 'a list) list -> bool +val graph_closure_by : + ('a -> 'a -> bool) -> + ('a * 'a list) list -> (('a * 'a list) list, 'a * 'a) or_option +val graph_sort_by : + ('a -> 'a -> bool) -> + ('a * 'a list) list -> (('a * 'a list) list, 'a * 'a) or_option +val int_sort : int list -> int list +val str_begins_with : string -> string -> bool +val strstr : string -> string -> int +val str_contains : string -> string -> bool +val str_ends_with : string -> string -> bool +val chop : string -> string +val chomps : string -> string +val times : 'a -> int -> 'a list +val skip_n_char_ : int -> int -> string -> string +val skip_n_char : int -> string -> string +val non_index_from : string -> int -> char -> int +val non_index : string -> char -> int +val non_rindex_from : string -> int -> char -> int +val non_rindex : string -> char -> int +val explode_string : string -> char list +val count_matching_char : string -> char -> int +val is_uppercase : char -> bool +val is_lowercase : char -> bool +val char_is_alphanumerical : char -> bool +val char_is_alphanumerical_ : char -> bool +val char_is_alpha : char -> bool +val char_is_number : char -> bool +val count_chars_in_string : string -> char -> int +val string_fold_left : ('a -> char -> 'a) -> 'a -> string -> 'a +val string_forall_with : (char -> bool) -> int -> string -> bool +val starts_with_non_lowercase : string -> bool +val fold_lines : ('a -> string -> 'a) -> 'a -> in_channel -> 'a +val readlines : in_channel -> string list +val split_at : char -> string -> string list +val split_at2 : char -> char -> string -> string list +val words : string -> string list +val to_CamelCase : string -> string option +val concat_symlink : string -> string -> string +val expand_symlinks : string -> string +val mtime : string -> float +val updir : string -> int -> string +val string_of_ref : 'a ref -> string +val print_endline_flush : string -> unit +val is_int : float -> bool +val compare_lists : ('a -> 'b -> int) -> 'a list -> 'b list -> int +val compare_best : int -> int -> int +val combine_comparison_list : int list -> int +val min_with_cmp : ('a -> 'a -> bool) -> 'a -> 'a -> 'a +val max_with_cmp : ('a -> 'a -> bool) -> 'a -> 'a -> 'a +val fold_left2_compare : + ('a -> 'b -> 'c -> 'a) -> 'a -> 'b list -> 'c list -> 'a +val exists_compare : ('a -> 'b) -> 'a list -> 'b +val forall_compare : ('a -> int) -> 'a list -> int +val forall2_compare : ('a -> 'b -> int) -> 'a list -> 'b list -> int +val exists2_compare : int -> ('a -> 'b -> int) -> 'a list -> 'b list -> int +val compare_sorted_sets : ('a -> 'b -> bool) -> 'a list -> 'b list -> int +val scan_list_while_modifying : + ('a list -> 'a list -> 'a -> ('a list * 'a list) option) -> + 'a list -> 'a list +val bools2compare : bool * bool -> int +val lpush : 'a list ref -> 'a -> unit +module OrderedString : sig type t = string val compare : 'a -> 'a -> int end +module StringSet : + sig + type elt = OrderedString.t + type t = Set.Make(OrderedString).t + val empty : t + val is_empty : t -> bool + val mem : elt -> t -> bool + val add : elt -> t -> t + val singleton : elt -> t + val remove : elt -> t -> t + val union : t -> t -> t + val inter : t -> t -> t + val diff : t -> t -> t + val compare : t -> t -> int + val equal : t -> t -> bool + val subset : t -> t -> bool + val iter : (elt -> unit) -> t -> unit + val fold : (elt -> 'a -> 'a) -> t -> 'a -> 'a + val for_all : (elt -> bool) -> t -> bool + val exists : (elt -> bool) -> t -> bool + val filter : (elt -> bool) -> t -> t + val partition : (elt -> bool) -> t -> t * t + val cardinal : t -> int + val elements : t -> elt list + val min_elt : t -> elt + val max_elt : t -> elt + val choose : t -> elt + val split : elt -> t -> t * bool * t + end +val stringSet_to_list : StringSet.t -> StringSet.elt list +val stringSet_add : StringSet.t -> StringSet.elt -> StringSet.t +val stringSet_difference : StringSet.t -> StringSet.t -> StringSet.t +val list_to_StringSet : StringSet.elt list -> StringSet.t +val char_quote : char diff --git a/src/config_file.ml b/src/config_file.ml new file mode 100644 index 0000000..a5ee94f --- /dev/null +++ b/src/config_file.ml @@ -0,0 +1,40 @@ +open Common + +type config_file = { + basedir : int option ; + } + +let ignored_packages = ref [] + +let default = { basedir = None } + + +let config_cache = Hashtbl.create 16 + +let read dir = + try Hashtbl.find config_cache dir with Not_found -> + try + let file_name = dir ^ "/.perl_checker" in + let fh = open_in file_name in + let config = + fold_lines (fun config line -> + match words line with + | [ "Basedir"; ".." ] -> { config with basedir = Some 1 } + | [ "Basedir"; "../.." ] -> { config with basedir = Some 2 } + | [] -> config (* blank line *) + | [ "Ignore"; pkg ] + | [ pkg ] (* the deprecated form *) + -> lpush ignored_packages pkg; config + | _ -> prerr_endline (Printf.sprintf "bad line \"%s\" in %s" line file_name); config + ) default fh + in + Hashtbl.add config_cache dir config ; + if !Flags.verbose then print_endline_flush ("reading config file " ^ file_name); + config + with Sys_error _ -> default + + +let rec read_any dir depth = + if depth = 0 then () else + let _ = read dir in + read_any (updir dir 1) (depth - 1) diff --git a/src/config_file.mli b/src/config_file.mli new file mode 100644 index 0000000..d5ad2f2 --- /dev/null +++ b/src/config_file.mli @@ -0,0 +1,6 @@ +type config_file = { basedir : int option; } +val ignored_packages : string list ref +val default : config_file +val config_cache : (string, config_file) Hashtbl.t +val read : string -> config_file +val read_any : string -> int -> unit diff --git a/src/flags.ml b/src/flags.ml new file mode 100644 index 0000000..187c140 --- /dev/null +++ b/src/flags.ml @@ -0,0 +1,43 @@ +open Common +open Types + +let verbose = ref false +let quiet = ref false +let generate_pot = ref false +let expand_tabs = ref (Some 8) +let no_cache = ref false + +let check_unused_global_vars = ref false +let check_white_space = ref true +let check_suggest_simpler = ref true +let check_void = ref true +let check_context = ref true +let check_strange = ref true +let check_traps = ref true +let check_complex_expressions = ref true +let normalized_expressions = ref true +let check_help_perl_checker = ref true +let suggest_functional = ref true +let check_prototypes = ref true +let check_names = ref true +let check_import_export = ref true +let allow_MDK_Common = ref true + +let is_warning_type_set = function + | Warn_white_space -> !check_white_space + | Warn_suggest_simpler -> !check_suggest_simpler + | Warn_unused_global_vars -> !check_unused_global_vars + | Warn_void -> !check_void + | Warn_context -> !check_context + | Warn_strange -> !check_strange + | Warn_traps -> !check_traps + | Warn_complex_expressions -> !check_complex_expressions + | Warn_normalized_expressions -> !normalized_expressions + | Warn_suggest_functional -> !suggest_functional + | Warn_prototypes -> !check_prototypes + | Warn_names -> !check_names + | Warn_import_export -> !check_import_export + | Warn_MDK_Common -> !allow_MDK_Common + | Warn_help_perl_checker -> !check_help_perl_checker + +let are_warning_types_set l = not !quiet && List.for_all is_warning_type_set l diff --git a/src/flags.mli b/src/flags.mli new file mode 100644 index 0000000..2dc3b26 --- /dev/null +++ b/src/flags.mli @@ -0,0 +1,22 @@ +val verbose : bool ref +val quiet : bool ref +val generate_pot : bool ref +val expand_tabs : int option ref +val no_cache : bool ref +val check_unused_global_vars : bool ref +val check_white_space : bool ref +val check_suggest_simpler : bool ref +val check_void : bool ref +val check_context : bool ref +val check_strange : bool ref +val check_traps : bool ref +val check_complex_expressions : bool ref +val normalized_expressions : bool ref +val check_help_perl_checker : bool ref +val suggest_functional : bool ref +val check_prototypes : bool ref +val check_names : bool ref +val check_import_export : bool ref +val allow_MDK_Common : bool ref +val is_warning_type_set : Types.warning -> bool +val are_warning_types_set : Types.warning list -> bool diff --git a/src/global_checks.ml b/src/global_checks.ml new file mode 100644 index 0000000..a63e652 --- /dev/null +++ b/src/global_checks.ml @@ -0,0 +1,639 @@ +open Types +open Common +open Printf +open Config_file +open Parser_helper +open Tree + +type state = { + per_files : (string, per_file) Hashtbl.t ; + per_packages : (string, per_package) Hashtbl.t ; + methods : (string, (string * variable_used ref * prototype option) list) Hashtbl.t ; + global_vars_used : ((context * string * string) * pos) list ref ; + packages_being_classes : (string, unit) Hashtbl.t ; + packages_dependencies : (string * string, unit) Hashtbl.t ; + packages_dependencies_maybe : (string * string * string, unit) Hashtbl.t ; + } + +type vars = { + my_vars : ((context * string) * (pos * variable_used ref * prototype option)) list list ; + our_vars : ((context * string) * (pos * variable_used ref * prototype option)) list list ; + locally_imported : ((context * string) * (string * variable_used ref * prototype option)) list ; + required_vars : (context * string * string) list ; + current_package : per_package ; + is_toplevel : bool ; + write_only : bool ; + state : state ; + } + + +let rec get_imported state current_package (package_name, (imports, pos)) = + try + let package_used = Hashtbl.find state.per_packages package_name in + let exports = package_used.exports in + let get_var_by_name var = + let (b, prototype) = + try sndter3 (Hashtbl.find package_used.vars_declared var) + with Not_found -> + try + sndter3 (List.assoc var (get_imports state package_used)) + with Not_found -> + warn_with_pos [Warn_import_export] pos (sprintf "name %s is not defined in package %s" (variable2s var) package_name) ; + ref Access_various, None + in + var, (package_name, b, prototype) + in + match imports with + | None -> + let re = match exports.special_export with + | Some Re_export_all -> get_imports state package_used + | Some Fake_export_all -> + (* HACK: if package exporting-all is ignored, ignore package importing *) + if List.mem package_name !ignored_packages then Tree.ignore_package current_package.package_name; + + Hashtbl.fold (fun var (_pos, b, proto) l -> (var, (package_name, b, proto)) :: l) package_used.vars_declared [] + | _ -> [] in + let l = List.map get_var_by_name exports.export_auto in + re @ l + | Some l -> + let imports_vars = + collect (function + | I_raw, tag -> + (try + List.assoc tag exports.export_tags + with Not_found -> warn_with_pos [Warn_import_export] pos (sprintf "package %s doesn't export tag %s" package_name tag) ; []) + | variable -> + if List.mem variable exports.export_ok || List.mem variable exports.export_auto then + [ variable ] + else + (warn_with_pos [Warn_import_export] pos (sprintf "package %s doesn't export %s" package_name (variable2s variable)) ; []) + ) l + in + List.map get_var_by_name imports_vars + with Not_found -> [] + +and get_imports state package = + match !(package.imported) with + | Some l -> l + | None -> + let l = collect (get_imported state package) package.uses in + package.imported := Some l ; + l + +let do_para_comply_with_prototype para proto = + match proto with + | Some proto -> + (match para with + | [] as paras + | [List [List paras]] + | [List paras] -> + if List.exists is_not_a_scalar paras then 0 else + let len = List.length paras in + if len < proto.proto_nb_min then -1 + else (match proto.proto_nb_max with + | Some max -> if len > max then 1 else 0 + | None -> 0) + | _ -> 0) + | _ -> 0 + +let check_para_comply_with_prototype para proto = + match para with + | None -> () + | Some(pos, para) -> + match do_para_comply_with_prototype para proto with + | -1 -> warn_with_pos [Warn_prototypes] pos "not enough parameters" + | 1 -> warn_with_pos [Warn_prototypes] pos "too many parameters" + | _ -> () + +let is_anonymous_variable_name s = String.length s > 1 && s.[0] = '_' + +let add_to_packages_really_used state current_package used_name = + Hashtbl.replace state.packages_dependencies (current_package.package_name, used_name) () ; + (*List.iter (fun (p1, p2) -> prerr_endline (Printf.sprintf "%s -> %s" p1 p2)) (hashtbl_keys state.packages_dependencies);*) + () + +let add_to_packages_maybe_used state current_package used_name method_name = + Hashtbl.replace state.packages_dependencies_maybe (current_package.package_name, used_name, method_name) () ; + (*List.iter (fun (p1, p2) -> prerr_endline (Printf.sprintf "%s -> %s" p1 p2)) (hashtbl_keys state.packages_dependencies_maybe);*) + () + +let variable_used write_only used = + if !used != Access_various then + used := if write_only then Access_write_only else Access_various + +let is_my_declared vars t = + List.exists (fun l -> + List.mem_assoc t l && (variable_used vars.write_only (snd3 (List.assoc t l)) ; true) + ) vars.my_vars +let is_our_declared vars t = + List.exists (fun l -> + List.mem_assoc t l && (variable_used vars.write_only (snd3 (List.assoc t l)) ; true) + ) vars.our_vars + +let is_var_declared_raw write_only state package var para = + match + try + let _, used, proto = Hashtbl.find package.vars_declared var in + Some(used, proto) + with Not_found -> try + let package_name, used, proto = List.assoc var (get_imports state package) in + add_to_packages_really_used state package package_name ; + Some(used, proto) + with Not_found -> + None + with + | Some (used, proto) -> + check_para_comply_with_prototype para proto ; + variable_used write_only used ; + true + | None -> + false + +let is_var_declared vars var para = + List.mem_assoc var vars.locally_imported || + is_var_declared_raw vars.write_only vars.state vars.current_package var para + +let is_global_var_declared vars (context, fq, name) para = + try + let package = Hashtbl.find vars.state.per_packages fq in + add_to_packages_really_used vars.state vars.current_package package.package_name ; + is_var_declared_raw vars.write_only vars.state package (context, name) para + with Not_found -> false + + +let is_global_var context ident = + match context with + | I_scalar -> + (match ident with + | "@" | "!" | ">" | "\\" | "$" | "^A" | "'" | "/" | "?" | "<" | "^W" | "|" | "^I" | "&" | "." + | "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9" -> true + | _ -> false) + | I_array -> + (match ident with + | "ARGV" | "INC" -> true + | _ -> false) + | I_hash -> + (match ident with + | "ENV" | "SIG" -> true + | _ -> false) + | I_star -> + (match ident with + | "STDIN" | "STDOUT" | "STDERR" | "DATA" + | "__FILE__" | "__LINE__" | "undef" -> true + | _ -> false) + | I_func -> + (match ident with + | "-b" | "-c" | "-d" | "-e" | "-f" | "-l" | "-r" | "-s" | "-w" | "-x" + | "abs" | "alarm" | "atan2" | "bless" + | "caller" | "chdir" | "chmod" | "chomp" | "chop" | "chown" | "chr" | "chroot" | "close" | "closedir" | "cos" | "crypt" + | "defined" | "delete" | "die" + | "each" | "endpwent" | "eof" | "eval" | "exec" | "exists" | "exit" + | "fcntl" | "fileno" | "flock" | "formline" | "fork" + | "gethostbyaddr" | "gethostbyname" | "getgrent" | "getgrnam" | "getgrgid" | "getppid" | "getpwent" | "getpwnam" | "getpwuid" | "getservbyname" | "glob" | "gmtime" | "goto" | "grep" | "hex" + | "index" | "int" | "ioctl" | "join" | "keys" | "kill" + | "last" | "lc" | "lcfirst" | "length" | "link" | "localtime" | "log" | "lstat" + | "map" | "mkdir" | "next" | "no" | "oct" | "open" | "opendir" | "ord" + | "pack" | "pipe" | "pop" | "print" | "printf" | "push" | "quotemeta" + | "rand" | "read" | "readdir" | "readlink" | "redo" | "ref" | "rename" | "require" | "return" | "reverse" | "rindex" | "rmdir" + | "scalar" | "seek" | "select" | "setpwent" | "shift" | "sin" | "sleep" | "sort" | "splice" | "split" | "sprintf" | "sqrt" | "stat" | "substr" + | "symlink" | "syscall" | "sysopen" | "sysread" | "sysseek" | "system" | "syswrite" | "tie" | "time" + | "uc" | "ucfirst" | "umask" | "undef" | "unlink" | "unpack" | "unshift" | "utime" | "values" | "vec" | "wait" | "waitpid" | "wantarray" | "warn" | "write" + -> true + + | _ -> false) + | _ -> false + +let check_variable (context, var) vars para = + match var with + | Ident(_, s, pos) when context <> I_func && is_anonymous_variable_name s && s <> "__FILE__" && s <> "__LINE__" -> + warn_with_pos [Warn_normalized_expressions] pos (sprintf "variable %s must not be used\n (variable with name _XXX are reserved for unused variables)" (variable2s(context, string_of_fromparser var))) + | Ident(Some pkg, _, _) when uses_external_package pkg || List.mem pkg !ignored_packages -> () + | Ident(None, ident, pos) -> + if is_my_declared vars (context, ident) || is_our_declared vars (context, ident) || is_var_declared vars (context, ident) para || is_global_var context ident + then () + else warn_with_pos [Warn_names] pos (if context = I_func then "unknown function " ^ ident else "undeclared variable " ^ variable2s(context, ident)) + | Ident(Some fq, name, pos) -> + if (fq = "CORE") && is_global_var context name || is_global_var_declared vars (context, fq, name) para + then () + else + if context = I_func then + warn_with_pos [Warn_names] pos ("unknown function " ^ string_of_fromparser var) + else + lpush vars.state.global_vars_used ((context, fq, name), pos) + | _ -> () + +let declare_My vars (mys, pos) = + let l_new = List.filter (fun (context, ident) -> + if context = I_raw then + if ident = "undef" then false else die_with_pos pos (sprintf "bad ident %s in my" ident) + else true + ) mys in + let l_pre = List.hd vars.my_vars in + List.iter (fun v -> + if List.mem_assoc v l_pre then warn_with_pos [Warn_names] pos (sprintf "redeclared variable %s" (variable2s v)) + ) l_new ; + { vars with my_vars = (List.map (fun v -> v, (pos, ref Access_none, None)) l_new @ l_pre) :: List.tl vars.my_vars } + +let declare_Our vars (ours, pos) = + match vars.our_vars with + | [] -> vars (* we're at the toplevel, already declared in vars_declared *) + | l_pre :: other -> + List.iter (fun v -> + if List.mem_assoc v l_pre && v <> (I_scalar, "_") then warn_with_pos [Warn_names] pos (sprintf "redeclared variable %s" (variable2s v)) + ) ours ; + { vars with our_vars = (List.map (fun v -> v, (pos, ref Access_none, None)) ours @ l_pre) :: other } + +let declare_My_our vars (my_or_our, l, pos) = + match my_or_our with + | "my" -> declare_My vars (l, pos) + | "local" + | "our" -> declare_Our vars (l, pos) + | _ -> internal_error "declare_My_our" + +let un_parenthesize_one_elt_List = function + | [List l] -> l + | l -> l + +let check_unused_local_variables vars = + List.iter (fun ((context, s as v), (pos, used, _proto)) -> + if !used != Access_various then + match s with + | "BEGIN" | "END" | "DESTROY" -> () + | "_" when context = I_array -> + warn_with_pos [Warn_normalized_expressions] pos "if the function doesn't take any parameters, please use the empty prototype.\nexample \"sub foo() { ... }\"" + | _ -> + if s.[0] != '_' || s = "_" then + let msg = if !used = Access_write_only then sprintf "variable %s assigned, but not read" else sprintf "unused variable %s" in + warn_with_pos [Warn_names] pos (msg (variable2s v)) + ) (List.hd vars.my_vars) + +let check_variables vars t = + let rec check_variables_ vars t = fold_tree check vars t + and check vars = function + | Block l -> + let vars' = { vars with my_vars = [] :: vars.my_vars ; our_vars = [] :: vars.our_vars } in + let vars' = List.fold_left check_variables_ vars' l in + check_unused_local_variables vars' ; + Some vars + | Call(Deref(I_func, Ident(None, "sort", _)), (Anonymous_sub(_, Block f, pos) :: l)) -> + let vars = List.fold_left check_variables_ vars l in + let vars' = { vars with my_vars = [ (I_scalar, "a"), (pos, ref Access_various, None) ; (I_scalar, "b"), (pos, ref Access_various, None) ] :: vars.my_vars ; our_vars = [] :: vars.our_vars } in + let vars' = List.fold_left check_variables_ vars' f in + check_unused_local_variables vars' ; + Some vars + + | Call(Deref(I_func, Ident(None, func, func_pos)), Anonymous_sub(_, Block f, pos) :: l) + when List.mem func [ "grep" ; "map" ; "substInFile" ; "map_index" ; "each_index" ; "partition" ; "find_index" ; "grep_index" ; "find" ; "any" ; "every" ; "uniq_" ] -> + let vars = List.fold_left check_variables_ vars l in + let vars' = { vars with my_vars = [] :: vars.my_vars ; our_vars = [(I_scalar, "_"), (pos, ref Access_various, None)] :: vars.our_vars } in + let vars' = List.fold_left check_variables_ vars' f in + check_unused_local_variables vars' ; + check_variable (I_func, Ident(None, func, func_pos)) vars None ; + Some vars + + | Call(Deref(I_func, (Ident _ as ident)), [ Deref(I_star, (Ident(None, "_", _))) ]) -> + (* the &f case: allow access to @_ *) + check_variable (I_func, ident) vars None ; + let _ = is_my_declared vars (I_array, "_") in + Some vars + + | Call(Deref(I_func, (Ident _ as ident)), [ List [ Deref(I_array, (Ident(None, "_", pos))) ] ]) -> + (* special warning if @_ is unbound *) + check_variable (I_func, ident) vars None ; + if not (is_my_declared vars (I_array, "_")) then + warn_with_pos [Warn_suggest_simpler] pos (sprintf "replace %s(@_) with &%s" (string_of_fromparser ident) (string_of_fromparser ident)) ; + Some vars + + | Call(Deref(I_func, Ident(None, "require", _)), [Ident _]) -> Some vars + + | Call(Deref(I_func, Ident(None, "shift", pos)) as var, []) + | Call(Deref(I_func, Ident(None, "pop", pos)) as var, []) -> + check vars (Call(var, [ Deref(I_array, Ident(None, (if vars.is_toplevel then "ARGV" else "_"), pos)) ])) + + | Call(Deref(context, (Ident(_, _, pos) as var)), para) -> + check_variable (context, var) vars (Some(pos, para)) ; + let vars = List.fold_left check_variables_ vars para in + Some vars + +(* | Call_op("=", -> List.fold_left (fold_tree f) env l*) + + | Call_op("while infix", [ expr ; (List [ Call_op("<>", _, _) ] as l) ], pos) + | Call_op("for infix", [ expr ; l ], pos) -> + let vars = check_variables_ vars l in + let vars' = { vars with my_vars = [] :: vars.my_vars ; our_vars = [(I_scalar, "_"), (pos, ref Access_various, None)] :: vars.our_vars } in + let vars' = check_variables_ vars' expr in + if List.hd(vars'.my_vars) <> [] then warn_with_pos [Warn_traps] pos "you can't declare variables in foreach postfix"; + Some vars + + | Call_op("foreach my", [my; expr; Block block], _) -> + let vars = check_variables_ vars expr in + let vars = check_variables_ vars (Block (my :: block)) in + Some vars + | Call_op(op, l, _) when op = "if" || op = "while" || op = "unless" || op = "until" -> + let vars' = { vars with my_vars = [] :: vars.my_vars ; our_vars = [] :: vars.our_vars } in + let vars' = List.fold_left check_variables_ vars' l in + check_unused_local_variables vars' ; + Some vars + + | Sub_declaration(Ident(fq, name, pos) as ident, perl_proto, Block body, kind) -> + let vars = declare_Our vars ([ I_func, string_of_fromparser ident ], pos) in + + let my_vars, l = + match has_proto perl_proto (Block body) with + | Some(mys, mys_pos, body) -> + [], My_our ("my", mys, mys_pos) :: body + | _ -> + let dont_check_use = + kind = Glob_assign || + fq = None && List.mem name ["DESTROY"] || + Hashtbl.mem vars.state.packages_being_classes (some_or fq vars.current_package.package_name) + in + [(I_array, "_"), (pos, ref (if dont_check_use then Access_various else Access_none), None)], body + in + let local_vars = + if fq = None && name = "AUTOLOAD" + then [ (I_scalar, "AUTOLOAD"), (pos, ref Access_various, None) ] + else [] in + + let vars' = { vars with my_vars = my_vars :: vars.my_vars ; our_vars = local_vars :: vars.our_vars ; is_toplevel = false } in + let vars' = List.fold_left check_variables_ vars' l in + check_unused_local_variables vars' ; + Some vars + + | Anonymous_sub(_, Block l, pos) -> + let vars' = { vars with my_vars = [(I_array, "_"), (pos, ref Access_various, None)] :: vars.my_vars ; is_toplevel = false } in + let vars' = List.fold_left check_variables_ vars' l in + check_unused_local_variables vars' ; + Some vars + + | Call_op("foreach", [ expr ; Block l ], pos) -> + let vars = check_variables_ vars expr in + let vars' = { vars with my_vars = [] :: vars.my_vars ; our_vars = [(I_scalar, "_"), (pos, ref Access_various, None)] :: vars.our_vars } in + let vars' = List.fold_left check_variables_ vars' l in + check_unused_local_variables vars' ; + Some vars + + | Anonymous_sub _ + | Sub_declaration _ -> internal_error "check_variables" + + | Ident _ as var -> + check_variable (I_star, var) vars None ; + Some vars + + | My_our(my_or_our, mys, pos) -> Some(declare_My_our vars (my_or_our, mys, pos)) + | Deref(context, (Ident _ as var)) -> + check_variable (context, var) vars None ; + Some vars + | Deref_with(context, _, (Ident _ as var), para) -> + let vars = check_variables_ vars para in + check_variable (context, var) vars None ; + Some vars + + | Call_op("=", [My_our(my_or_our, mys, pos); e], _) -> + (* check e first *) + let vars = check_variables_ vars e in + List.iter (fun (context, var) -> + if non_scalar_context context then warn_with_pos [Warn_prototypes] pos (sprintf "%s takes all the arguments, %s is undef in any case" (variable2s (context, var)) (variable2s (last mys))) + ) (removelast mys) ; (* mys is never empty *) + Some(declare_My_our vars (my_or_our, mys, pos)) + + | Call_op("if infix", [List [My_our _]; List [Num("0", _)]], _) -> None (* special allowed case *) + | Call_op(op, List (My_our _ :: _) :: _, pos) + | Call_op(op, My_our _ :: _, pos) + | Call_op(op, Call_op("local", _, _) :: _, pos) -> + if op <> "=" then warn_with_pos [Warn_traps] pos (sprintf "applying %s on a new initialized variable is wrong" op); + None + + | Call_op("=", [ Deref(context, (Ident _ as var)) ; para], _) -> + check_variable (context, var) { vars with write_only = true } None ; + Some (check_variables_ vars para) + + | Call_op("=", [ List [ List l ] ; para], _) -> + let vars = List.fold_left (fun vars -> function + | Deref(context, (Ident _ as var)) -> + check_variable (context, var) { vars with write_only = true } None ; + vars + | e -> check_variables_ vars e + ) vars l in + let vars = check_variables_ vars para in + Some vars + + | Method_call(Raw_string(package_name, pos), Raw_string ("import", _), para) -> + let args = + match para with + | [] -> None + | [ List [v] ] -> Some(from_qw v) + | _ -> die_with_pos pos "bad import statement" in + let l = get_imported vars.state vars.current_package (package_name, (args, pos)) in + let vars = + if vars.is_toplevel then ( + vars.current_package.imported := Some (get_imports vars.state vars.current_package @ l) ; + vars + ) else + { vars with locally_imported = l @ vars.locally_imported } in + Some vars + + | Method_call(Raw_string(pkg, _) as class_, Raw_string(method_, pos), para) -> + let vars = List.fold_left check_variables_ vars para in + let rec search pkg = + if is_global_var_declared vars (I_func, pkg, method_) (Some(pos, [ List (class_ :: un_parenthesize_one_elt_List para) ])) then true + else + let package = Hashtbl.find vars.state.per_packages pkg in + List.exists search (List.map fst (some_or package.isa [])) + in + (try + if not (uses_external_package pkg || List.mem pkg !ignored_packages || search pkg || method_ = "bootstrap") then + warn_with_pos [Warn_import_export] pos (sprintf "unknown method %s starting in package %s" method_ pkg); + with Not_found -> warn_with_pos [Warn_import_export] pos (sprintf "unknown package %s" pkg)); + Some vars + + | Method_call(o, Raw_string(method_, pos), para) -> + let vars = check_variables_ vars o in + let vars = List.fold_left check_variables_ vars para in + (try + let l = Hashtbl.find vars.state.methods method_ in + let l_and = List.map (fun (pkg_name, used, proto) -> pkg_name, used, do_para_comply_with_prototype [ List (o :: un_parenthesize_one_elt_List para) ] proto) l in + let l_and' = + match List.filter (fun (_, _, n) -> n = 0) l_and with + | [] -> + (match uniq (List.map ter3 l_and) with + | [-1] -> warn_with_pos [Warn_prototypes] pos "not enough parameters" + | [ 1] -> warn_with_pos [Warn_prototypes] pos "too many parameters" + | _ -> warn_with_pos [Warn_prototypes] pos "not enough or too many parameters") ; + l_and + | l -> l + in + List.iter (fun (pkg_name, _, _) -> add_to_packages_maybe_used vars.state vars.current_package pkg_name method_) l_and' ; + List.iter (fun (_, used, _) -> used := Access_various) l_and' + with Not_found -> + if not (List.mem method_ [ "isa"; "can" ]) then + warn_with_pos [Warn_names] pos ("unknown method " ^ method_)) ; + Some vars + + | _ -> None + in + let vars = List.fold_left check_variables_ { vars with my_vars = [[]] } t in + vars + +let check_tree state package = + let vars = { my_vars = [[]]; our_vars = []; locally_imported = []; required_vars = []; current_package = package; state = state; is_toplevel = true; write_only = false } in + if !Flags.verbose then print_endline_flush ("checking package " ^ package.package_name) ; + let vars = check_variables vars package.body in + check_unused_local_variables vars ; + () + +let imported_add i1 i2 = if i1 = None && i2 = None then None else Some (some_or i1 [] @ some_or i2 []) + +let add_package_to_state state package = + let package = + try + let existing_package = Hashtbl.find state.per_packages package.package_name in + (*print_endline_flush (existing_package.file_name ^ " vs " ^ package.file_name); *) + let vars_declared = existing_package.vars_declared in + Hashtbl.iter (fun var pos -> Hashtbl.replace vars_declared var pos) package.vars_declared ; + let p = { + package_name = package.package_name ; has_package_name = package.has_package_name ; + isa = if existing_package.isa = None then package.isa else existing_package.isa ; + body = existing_package.body @ package.body ; + uses = existing_package.uses @ package.uses ; + required_packages = existing_package.required_packages @ package.required_packages ; + vars_declared = vars_declared ; + imported = ref (imported_add !(existing_package.imported) !(package.imported)) ; + exports = { export_ok = existing_package.exports.export_ok @ package.exports.export_ok ; + export_auto = existing_package.exports.export_auto @ package.exports.export_auto ; + export_tags = existing_package.exports.export_tags @ package.exports.export_tags ; + special_export = None } + } in + Hashtbl.replace state.per_packages package.package_name p ; + p + with Not_found -> package + in + Hashtbl.replace state.per_packages package.package_name package + +let add_file_to_files per_files file = + Hashtbl.replace per_files file.file_name file + +let check_unused_vars package = + Hashtbl.iter (fun (context, name) (pos, is_used, _proto) -> + if !is_used != Access_various && not (List.mem name ["BEGIN"; "END"; "DESTROY"; "ISA"; "AUTOLOAD"; "EXPORT"; "EXPORT_OK"; "EXPORT_TAGS"]) then + warn_with_pos [Warn_unused_global_vars] pos (sprintf "unused %s%s::%s" (if context = I_func then "function " else "variable " ^ context2s context) package.package_name name) + ) package.vars_declared + +let arrange_global_vars_declared global_vars_declared state = + Hashtbl.iter (fun (context, fq, name) (pos, proto) -> + let package = + try + Hashtbl.find state.per_packages fq + with Not_found -> + (* creating a new shadow package *) + let package = + { + package_name = fq; + has_package_name = true ; + exports = empty_exports ; + imported = ref None ; + vars_declared = Hashtbl.create 16 ; + uses = [] ; + required_packages = [] ; + body = [] ; + isa = None ; + } in + Hashtbl.add state.per_packages fq package ; + package + in + if not (Hashtbl.mem package.vars_declared (context, name)) then + Hashtbl.add package.vars_declared (context, name) (pos, ref Access_none, proto) + (* otherwise dropping this second declaration *) + ) global_vars_declared ; + state + +let get_methods_available state = + let classes = uniq ( + hashtbl_collect (fun _ package -> + match package.isa with + | None -> + if Hashtbl.mem package.vars_declared (I_func, "new") then [package] else [] + | Some l -> + package :: List.map (fun (pkg, pos) -> + try + Hashtbl.find state.per_packages pkg + with Not_found -> die_with_pos pos ("bad package " ^ pkg) + ) l + ) state.per_packages + ) in + List.iter (fun pkg -> + Hashtbl.replace state.packages_being_classes pkg.package_name () ; + Hashtbl.iter (fun (context, v) (_pos, is_used, proto) -> + if context = I_func then + let l = try Hashtbl.find state.methods v with Not_found -> [] in + Hashtbl.replace state.methods v ((pkg.package_name, is_used, proto) :: l) + ) pkg.vars_declared + ) classes ; + state + + +let default_per_files() = Hashtbl.create 16 +let default_state per_files = { + per_files = per_files; + per_packages = Hashtbl.create 16; + methods = Hashtbl.create 256; + global_vars_used = ref []; + packages_being_classes = Hashtbl.create 16; + packages_dependencies = Hashtbl.create 16; + packages_dependencies_maybe = Hashtbl.create 16 +} + +let cache_cache = Hashtbl.create 16 + +let pkgs2s prefix l = + let l = List.sort compare (List.map (fun pkg -> pkg.file_name) l) in + String.concat "" (List.map (fun s -> prefix ^ s ^ "\n") l) + +let read_packages_from_cache per_files dir = + if !Flags.no_cache || Hashtbl.mem cache_cache dir then () else + try + Hashtbl.add cache_cache dir (); + let file = dir ^ "/.perl_checker.cache" in + let fh = open_in file in + let magic = input_line fh in + if magic <> "perl_checker cache " ^ Build.date then () else + let l = Marshal.from_channel fh in + close_in fh ; + + let l = List.filter (fun file -> + not (Hashtbl.mem per_files file.file_name) && + (try file.build_time > mtime file.file_name with _ -> false) + ) l in + + if !Flags.verbose then print_endline_flush (sprintf "using cached files\n%sfrom %s" (pkgs2s " " l) file) ; + + List.iter (fun file -> + Info.add_a_file file.file_name file.lines_starts ; + add_file_to_files per_files file + ) l + with Sys_error _ | End_of_file -> () + +let write_packages_cache per_files dir = + try + let l = List.filter (fun per_file -> per_file.require_name <> None) (hashtbl_values per_files) in + let file = dir ^ "/.perl_checker.cache" in + let fh = open_out file in + output_string fh ("perl_checker cache " ^ Build.date ^ "\n") ; + Marshal.to_channel fh l [] ; + close_out fh ; + if !Flags.verbose then print_endline_flush (sprintf "saving cached files\n%sin %s" (pkgs2s " " l) file) + with Sys_error _ -> () + +let generate_package_dependencies_graph state file = + let fh = open_out file in + + List.iter (fun (p1, p2) -> + output_string fh (p1 ^ " -> " ^ p2 ^ "\n") + ) (List.sort compare (hashtbl_keys state.packages_dependencies)) ; + + let l = Hashtbl.fold (fun (p1, p2, method_) _ l -> ((p1, method_), p2) :: l) state.packages_dependencies_maybe [] in + List.iter (fun ((p1, method_), l) -> + output_string fh (p1 ^ " ?-> " ^ String.concat " " l ^ " (" ^ method_ ^ ")\n") + ) (List.sort compare (prepare_want_all_assoc l)); + + close_out fh diff --git a/src/global_checks.mli b/src/global_checks.mli new file mode 100644 index 0000000..9edacbf --- /dev/null +++ b/src/global_checks.mli @@ -0,0 +1,26 @@ +open Types +open Tree + +type state = { + per_files : (string, per_file) Hashtbl.t ; + per_packages : (string, per_package) Hashtbl.t ; + methods : (string, (string * variable_used ref * prototype option) list) Hashtbl.t ; + global_vars_used : ((context * string * string) * pos) list ref ; + packages_being_classes : (string, unit) Hashtbl.t ; + packages_dependencies : (string * string, unit) Hashtbl.t ; + packages_dependencies_maybe : (string * string * string, unit) Hashtbl.t ; + } + +val default_per_files : unit -> (string, per_file) Hashtbl.t +val default_state : (string, per_file) Hashtbl.t -> state +val check_tree : state -> per_package -> unit +val add_file_to_files : (string, per_file) Hashtbl.t -> per_file -> unit +val add_package_to_state : state -> per_package -> unit +val check_unused_vars : per_package -> unit +val arrange_global_vars_declared : (context * string * string, pos * Tree.prototype option) Hashtbl.t -> state -> state +val get_methods_available : state -> state + +val read_packages_from_cache : (string, per_file) Hashtbl.t -> string -> unit +val write_packages_cache : (string, per_file) Hashtbl.t -> string -> unit + +val generate_package_dependencies_graph : state -> string -> unit diff --git a/src/info.ml b/src/info.ml new file mode 100644 index 0000000..ab76b9f --- /dev/null +++ b/src/info.ml @@ -0,0 +1,76 @@ +open List +open Printf +open Common + +let (lines_starts : (string, int list) Hashtbl.t) = Hashtbl.create 4 +let current_file_lines_starts = ref [] +let current_file_current_line = ref 0 +let current_file = ref "" + +let start_a_new_file file = + if !current_file <> "" then Hashtbl.add lines_starts !current_file !current_file_lines_starts ; + current_file := file ; + current_file_lines_starts := [0] + +let add_a_file file file_lines_starts = Hashtbl.replace lines_starts file file_lines_starts + +let get_lines_starts_for_file file = + if file = !current_file then !current_file_lines_starts else Hashtbl.find lines_starts file + +let cwd = expand_symlinks (Unix.getcwd()) + +let file_to_absolute_file file = + let abs_file = + if file.[0] = '/' then file else + if file = "." then cwd else cwd ^ "/" ^ file + in + expand_symlinks abs_file + +let absolute_file_to_file = + let s1 = Filename.dirname cwd in + if String.length s1 < 4 then (fun x -> x) else + let short_cwd = + let s2 = Filename.dirname s1 in + if String.length s2 < 4 then s1 else + let s3 = Filename.dirname s2 in (* allow up to ../../../xxx *) + if String.length s3 < 4 then s2 else s3 in + memoize (fun abs_file -> + if str_begins_with (short_cwd ^ "/") abs_file then + let rec to_file rel cwd = + if str_begins_with (cwd ^ "/") abs_file then + rel ^ skip_n_char_ (String.length cwd + 1) 0 abs_file + else + to_file ("../" ^ rel) (Filename.dirname cwd) + in + to_file "" cwd + else + abs_file) + +let raw_pos2raw_line file a = + let starts = map_index (fun a b -> a,b) (rev (get_lines_starts_for_file file)) in + let ((offset, line), _) = find (fun (_,(e,_)) -> e > a) (combine starts (tl starts @ [999999999, 999999999])) in + line, offset + +let pos2line (file, a, b) = + let line, offset = raw_pos2raw_line file a in + file, line, a - offset + 1, b - offset + 1 + +let pos2s (file, a, b) = sprintf "(%s, %d, %d)" file a b + +let pos2sfull pos = + try + let file, line, n1, n2 = pos2line pos in + sprintf "File \"%s\", line %d, character %d-%d\n" (absolute_file_to_file file) (line + 1) n1 n2 + with Not_found -> failwith ("bad position " ^ pos2s pos) + +let pos2s_for_po pos = + let file, line, _, _ = pos2line pos in + absolute_file_to_file file ^ ":" ^ string_of_int (line + 1) + +let is_on_same_line file (a,b) = + let line_a, _ = raw_pos2raw_line file a in + let line_b, _ = raw_pos2raw_line file b in + line_a = line_b + +let is_on_same_line_current (a,b) = is_on_same_line !current_file (a,b) +let pos2sfull_current a b = pos2sfull (!current_file, a, b) diff --git a/src/info.mli b/src/info.mli new file mode 100644 index 0000000..d337316 --- /dev/null +++ b/src/info.mli @@ -0,0 +1,17 @@ +val lines_starts : (string, int list) Hashtbl.t +val current_file_lines_starts : int list ref +val current_file_current_line : int ref +val current_file : string ref +val start_a_new_file : string -> unit +val add_a_file : string -> int list -> unit +val get_lines_starts_for_file : string -> int list +val file_to_absolute_file : string -> string +val absolute_file_to_file : string -> string +val raw_pos2raw_line : string -> int -> int * int +val pos2line : string * int * int -> string * int * int * int +val pos2s : string * int * int -> string +val pos2sfull : string * int * int -> string +val pos2s_for_po : string * int * int -> string +val is_on_same_line : string -> int * int -> bool +val is_on_same_line_current : int * int -> bool +val pos2sfull_current : int -> int -> string diff --git a/src/lexer.mll b/src/lexer.mll new file mode 100644 index 0000000..f416499 --- /dev/null +++ b/src/lexer.mll @@ -0,0 +1,1057 @@ +{ (* -*- caml -*- *) +open Common +open Types +open Lexing +open Info + +let bpos = -1,-1 + +type raw_token = + | EOF of raw_pos + | SPACE of int + | CR + | INT of (string * raw_pos) + | FLOAT of (string * raw_pos) + | RAW_STRING of (string * raw_pos) + | STRING of (raw_interpolated_string * raw_pos) + | PATTERN of (raw_interpolated_string * string * raw_pos) + | QR_PATTERN of (raw_interpolated_string * string * raw_pos) + | PATTERN_SUBST of (raw_interpolated_string * raw_interpolated_string * string * raw_pos) + | BAREWORD of (string * raw_pos) + | BAREWORD_PAREN of (string * raw_pos) + | REVISION of (string * raw_pos) + | PERL_CHECKER_COMMENT of (string * raw_pos) + | PO_COMMENT of (string * raw_pos) + | POD of (string * raw_pos) + | LABEL of (string * raw_pos) + | COMMAND_STRING of (raw_interpolated_string * raw_pos) + | PRINT_TO_STAR of ((string * string) * raw_pos) + | PRINT_TO_SCALAR of ((string * string) * raw_pos) + | QUOTEWORDS of (string * raw_pos) + | COMPACT_HASH_SUBSCRIPT of (string * raw_pos) + | RAW_HERE_DOC of ((string * raw_pos) ref * raw_pos) + | HERE_DOC of (raw_interpolated_string * raw_pos) ref * raw_pos + | FORMAT of (raw_interpolated_string * raw_pos) ref * raw_pos + | SCALAR_IDENT of (string option * string * raw_pos) + | ARRAY_IDENT of (string option * string * raw_pos) + | HASH_IDENT of (string option * string * raw_pos) + | FUNC_IDENT of (string option * string * raw_pos) + | STAR_IDENT of (string option * string * raw_pos) + | RAW_IDENT of (string option * string * raw_pos) + | RAW_IDENT_PAREN of (string option * string * raw_pos) + | ARRAYLEN_IDENT of (string option * string * raw_pos) + | SUB_WITH_PROTO of (string * raw_pos) + | FUNC_DECL_WITH_PROTO of (string option * string * string * raw_pos) + + | IF of raw_pos | ELSIF of raw_pos | ELSE of raw_pos | UNLESS of raw_pos | DO of raw_pos | WHILE of raw_pos | UNTIL of raw_pos | MY_OUR of (string * raw_pos) | CONTINUE of raw_pos | SUB of raw_pos + | LOCAL of raw_pos | FOR of (string * raw_pos) | USE of raw_pos | PACKAGE of raw_pos | BEGIN of raw_pos | END of raw_pos | PRINT of (string * raw_pos) + | NEW of (raw_pos) | AT of raw_pos | DOLLAR of raw_pos | PERCENT of raw_pos | AMPERSAND of raw_pos + | STAR of raw_pos | ARRAYLEN of raw_pos | SEMI_COLON of raw_pos | PKG_SCOPE of raw_pos | PAREN of raw_pos | PAREN_END of raw_pos | BRACKET of raw_pos + | BRACKET_END of raw_pos | BRACKET_HASHREF of raw_pos | ARRAYREF of raw_pos | ARRAYREF_END of raw_pos | ARROW of raw_pos | INCR of raw_pos | DECR of raw_pos + | CONCAT of raw_pos | POWER of raw_pos | TIGHT_NOT of raw_pos | BIT_NEG of raw_pos | REF of raw_pos | ONE_SCALAR_PARA of (string * raw_pos) | PATTERN_MATCH of raw_pos | PATTERN_MATCH_NOT of raw_pos | MULT of (string * raw_pos) | MULT_L_STR of raw_pos + | PLUS of (string * raw_pos) | BIT_SHIFT of (string * raw_pos) + | LT of raw_pos | GT of raw_pos | COMPARE_OP of (string * raw_pos) | COMPARE_OP_STR of (string * raw_pos) | EQ_OP of (string * raw_pos) | EQ_OP_STR of (string * raw_pos) + | BIT_AND of raw_pos | BIT_OR of raw_pos | BIT_XOR of raw_pos | AND_TIGHT of raw_pos | OR_TIGHT of raw_pos | DOTDOT of (string * raw_pos) + | QUESTION_MARK of raw_pos | COLON of raw_pos | ASSIGN of (string * raw_pos) | COMMA of raw_pos | RIGHT_ARROW of raw_pos | NOT of raw_pos | AND of raw_pos | OR of raw_pos | XOR of raw_pos + +and raw_interpolated_string = (string * raw_token list) list + +let new_any mcontext any spaces pos = { mcontext = mcontext ; any = any ; spaces = spaces ; pos = pos } + +let pos lexbuf = lexeme_start lexbuf, lexeme_end lexbuf +let pos2sfull_with start end_ = Info.pos2sfull (!current_file, start, end_) +let pos2sfull lexbuf = pos2sfull_with (lexeme_start lexbuf) (lexeme_end lexbuf) + +let warn_with_pos warn_types (start, end_) err = if Flags.are_warning_types_set warn_types then print_endline_flush (pos2sfull_with start end_ ^ err) +let warn warn_types lexbuf err = warn_with_pos warn_types (pos lexbuf) err +let die lexbuf err = failwith (pos2sfull_with (lexeme_start lexbuf) (lexeme_end lexbuf) ^ err) + +let rec concat_bareword_paren accu = function + | PRINT(s, pos1) :: PAREN(pos2) :: l + | BAREWORD(s, pos1) :: PAREN(pos2) :: l -> + concat_bareword_paren (PAREN(pos2) :: BAREWORD_PAREN(s, pos1) :: accu) l + | RAW_IDENT(kind, ident, pos1) :: PAREN(pos2) :: l -> + concat_bareword_paren (PAREN(pos2) :: RAW_IDENT_PAREN(kind, ident, pos1) :: accu) l + | PO_COMMENT(_, pos) as e :: l -> + let l = drop_while (function CR | SPACE _ -> true | _ -> false) l in + (match l with + | PO_COMMENT _ :: _ + (* the check will be done on this PO_COMMENT *) + | BAREWORD("N", _) :: PAREN(_) :: _ + | BAREWORD("N_", _) :: PAREN(_) :: _ -> + concat_bareword_paren (e :: accu) l + | _ -> + warn_with_pos [Warn_MDK_Common] pos "N(...) must follow the #-PO: comment, with nothing in between" ; + concat_bareword_paren accu l) + | [] -> List.rev accu + | e :: l -> + concat_bareword_paren (e :: accu) l + +let rec bracket_bareword_is_hashref accu = function + | (pos, Parser.BRACKET bracket) :: (_, Parser.BAREWORD _ as bareword) :: (_, Parser.RIGHT_ARROW _ as right_arrow) :: l -> + bracket_bareword_is_hashref (right_arrow :: bareword :: (pos, Parser.BRACKET_HASHREF bracket) :: accu) l + | [] -> List.rev accu + | e :: l -> + bracket_bareword_is_hashref (e :: accu) l + + +let rec raw_token_to_pos_and_token spaces = function + | INT(s, pos) -> pos, Parser.NUM(new_any M_int s spaces pos) + | FLOAT(s, pos) -> pos, Parser.NUM(new_any M_float s spaces pos) + | RAW_STRING(s, pos) -> pos, Parser.RAW_STRING(new_any M_string s spaces pos) + | RAW_HERE_DOC(r, pos) -> pos, Parser.RAW_HERE_DOC(new_any M_string !r spaces pos) + | STRING(l, pos) -> pos, Parser.STRING(new_any M_string (raw_interpolated_string_to_tokens l) spaces pos) + | COMMAND_STRING(l, pos) -> pos, Parser.COMMAND_STRING(new_any (M_mixed [M_string; M_array]) (raw_interpolated_string_to_tokens l) spaces pos) + | QR_PATTERN(s, opts, pos) -> pos, Parser.QR_PATTERN(new_any M_special (raw_interpolated_string_to_tokens s, opts) spaces pos) + | PATTERN(s, opts, pos) -> pos, Parser.PATTERN(new_any M_special (raw_interpolated_string_to_tokens s, opts) spaces pos) + | PATTERN_SUBST(from, to_, opts, pos) -> pos, Parser.PATTERN_SUBST(new_any M_special (raw_interpolated_string_to_tokens from, raw_interpolated_string_to_tokens to_, opts) spaces pos) + | HERE_DOC(l, pos) -> pos, Parser.HERE_DOC(new_any M_string (raw_interpolated_string_to_tokens (fst !l), snd !l) spaces pos) + | FORMAT(l, pos) -> pos, Parser.FORMAT(new_any M_string (raw_interpolated_string_to_tokens (fst !l), snd !l) spaces pos) + | BAREWORD(s, pos) -> pos, Parser.BAREWORD(new_any M_special s spaces pos) + | BAREWORD_PAREN(s, pos) -> pos, Parser.BAREWORD_PAREN(new_any M_special s spaces pos) + | REVISION(s, pos) -> pos, Parser.REVISION(new_any M_revision s spaces pos) + | PERL_CHECKER_COMMENT(s, pos) -> pos, Parser.PERL_CHECKER_COMMENT(new_any M_none s spaces pos) + | PO_COMMENT(s, pos) -> pos, Parser.PO_COMMENT(new_any M_special s spaces pos) + | POD(s, pos) -> pos, Parser.POD(new_any M_special s spaces pos) + | LABEL(s, pos) -> pos, Parser.LABEL(new_any M_none s spaces pos) + | PRINT(s, pos) -> pos, Parser.PRINT(new_any M_special s spaces pos) + | PRINT_TO_STAR(s, pos) -> pos, Parser.PRINT_TO_STAR(new_any M_special s spaces pos) + | PRINT_TO_SCALAR(s, pos) -> pos, Parser.PRINT_TO_SCALAR(new_any M_special s spaces pos) + | QUOTEWORDS(s, pos) -> pos, Parser.QUOTEWORDS(new_any M_array s spaces pos) + | COMPACT_HASH_SUBSCRIPT(s, pos) -> pos, Parser.COMPACT_HASH_SUBSCRIPT(new_any M_special s spaces pos) + | SCALAR_IDENT(kind, name, pos) -> pos, Parser.SCALAR_IDENT(new_any M_special (kind, name) spaces pos) + | ARRAY_IDENT(kind, name, pos) -> pos, Parser.ARRAY_IDENT(new_any M_special (kind, name) spaces pos) + | HASH_IDENT(kind, name, pos) -> pos, Parser.HASH_IDENT(new_any M_special (kind, name) spaces pos) + | FUNC_IDENT(kind, name, pos) -> pos, Parser.FUNC_IDENT(new_any M_special (kind, name) spaces pos) + | STAR_IDENT(kind, name, pos) -> pos, Parser.STAR_IDENT(new_any M_special (kind, name) spaces pos) + | RAW_IDENT(kind, name, pos) -> pos, Parser.RAW_IDENT(new_any M_special (kind, name) spaces pos) + | RAW_IDENT_PAREN(kind, name, pos) -> pos, Parser.RAW_IDENT_PAREN(new_any M_special (kind, name) spaces pos) + | ARRAYLEN_IDENT(kind, name, pos) -> pos, Parser.ARRAYLEN_IDENT(new_any M_special (kind, name) spaces pos) + | SUB_WITH_PROTO(proto, pos) -> pos, Parser.SUB_WITH_PROTO(new_any M_special proto spaces pos) + | FUNC_DECL_WITH_PROTO(fq, name, proto, pos) -> pos, Parser.FUNC_DECL_WITH_PROTO(new_any M_special (fq, name, proto) spaces pos) + + | NEW(pos) -> pos, Parser.NEW(new_any M_special () spaces pos) + | COMPARE_OP(s, pos) -> pos, Parser.COMPARE_OP(new_any M_special s spaces pos) + | COMPARE_OP_STR(s, pos) -> pos, Parser.COMPARE_OP_STR(new_any M_special s spaces pos) + | EQ_OP(s, pos) -> pos, Parser.EQ_OP(new_any M_special s spaces pos) + | EQ_OP_STR(s, pos) -> pos, Parser.EQ_OP_STR(new_any M_special s spaces pos) + | ASSIGN(s, pos) -> pos, Parser.ASSIGN(new_any M_special s spaces pos) + | FOR(s, pos) -> pos, Parser.FOR(new_any M_special s spaces pos) + + | DOTDOT(s, pos) -> pos, Parser.DOTDOT(new_any M_special s spaces pos) + | MULT(s, pos) -> pos, Parser.MULT(new_any M_special s spaces pos) + | BIT_SHIFT(s, pos) -> pos, Parser.BIT_SHIFT(new_any M_special s spaces pos) + | PLUS(s, pos) -> pos, Parser.PLUS(new_any M_special s spaces pos) + | ONE_SCALAR_PARA(s, pos) -> pos, Parser.ONE_SCALAR_PARA(new_any M_special s spaces pos) + | MY_OUR(s, pos) -> pos, Parser.MY_OUR(new_any M_special s spaces pos) + + | EOF (pos) -> pos, Parser.EOF (new_any M_special () spaces pos) + | IF (pos) -> pos, Parser.IF (new_any M_special () spaces pos) + | ELSIF (pos) -> pos, Parser.ELSIF (new_any M_special () spaces pos) + | ELSE (pos) -> pos, Parser.ELSE (new_any M_special () spaces pos) + | UNLESS (pos) -> pos, Parser.UNLESS (new_any M_special () spaces pos) + | DO (pos) -> pos, Parser.DO (new_any M_special () spaces pos) + | WHILE (pos) -> pos, Parser.WHILE (new_any M_special () spaces pos) + | UNTIL (pos) -> pos, Parser.UNTIL (new_any M_special () spaces pos) + | CONTINUE (pos) -> pos, Parser.CONTINUE (new_any M_special () spaces pos) + | SUB (pos) -> pos, Parser.SUB (new_any M_special () spaces pos) + | LOCAL (pos) -> pos, Parser.LOCAL (new_any M_special () spaces pos) + | USE (pos) -> pos, Parser.USE (new_any M_special () spaces pos) + | PACKAGE (pos) -> pos, Parser.PACKAGE (new_any M_special () spaces pos) + | BEGIN (pos) -> pos, Parser.BEGIN (new_any M_special () spaces pos) + | END (pos) -> pos, Parser.END (new_any M_special () spaces pos) + | AT (pos) -> pos, Parser.AT (new_any M_special () spaces pos) + | DOLLAR (pos) -> pos, Parser.DOLLAR (new_any M_special () spaces pos) + | PERCENT (pos) -> pos, Parser.PERCENT (new_any M_special () spaces pos) + | AMPERSAND (pos) -> pos, Parser.AMPERSAND (new_any M_special () spaces pos) + | STAR (pos) -> pos, Parser.STAR (new_any M_special () spaces pos) + | ARRAYLEN (pos) -> pos, Parser.ARRAYLEN (new_any M_special () spaces pos) + | SEMI_COLON (pos) -> pos, Parser.SEMI_COLON (new_any M_none () spaces pos) + | PKG_SCOPE (pos) -> pos, Parser.PKG_SCOPE (new_any M_special () spaces pos) + | PAREN (pos) -> pos, Parser.PAREN (new_any M_special () spaces pos) + | PAREN_END (pos) -> pos, Parser.PAREN_END (new_any M_special () spaces pos) + | BRACKET (pos) -> pos, Parser.BRACKET (new_any M_special () spaces pos) + | BRACKET_END (pos) -> pos, Parser.BRACKET_END (new_any M_special () spaces pos) + | BRACKET_HASHREF (pos) -> pos, Parser.BRACKET_HASHREF (new_any M_special () spaces pos) + | ARRAYREF (pos) -> pos, Parser.ARRAYREF (new_any M_special () spaces pos) + | ARRAYREF_END (pos) -> pos, Parser.ARRAYREF_END (new_any M_special () spaces pos) + | ARROW (pos) -> pos, Parser.ARROW (new_any M_special () spaces pos) + | INCR (pos) -> pos, Parser.INCR (new_any M_special () spaces pos) + | DECR (pos) -> pos, Parser.DECR (new_any M_special () spaces pos) + | POWER (pos) -> pos, Parser.POWER (new_any M_special () spaces pos) + | TIGHT_NOT (pos) -> pos, Parser.TIGHT_NOT (new_any M_special () spaces pos) + | BIT_NEG (pos) -> pos, Parser.BIT_NEG (new_any M_special () spaces pos) + | REF (pos) -> pos, Parser.REF (new_any M_special () spaces pos) + | PATTERN_MATCH (pos) -> pos, Parser.PATTERN_MATCH (new_any M_special () spaces pos) + | PATTERN_MATCH_NOT(pos) -> pos, Parser.PATTERN_MATCH_NOT(new_any M_special () spaces pos) + | LT (pos) -> pos, Parser.LT (new_any M_special () spaces pos) + | GT (pos) -> pos, Parser.GT (new_any M_special () spaces pos) + | BIT_AND (pos) -> pos, Parser.BIT_AND (new_any M_special () spaces pos) + | BIT_OR (pos) -> pos, Parser.BIT_OR (new_any M_special () spaces pos) + | BIT_XOR (pos) -> pos, Parser.BIT_XOR (new_any M_special () spaces pos) + | AND_TIGHT (pos) -> pos, Parser.AND_TIGHT (new_any M_special () spaces pos) + | OR_TIGHT (pos) -> pos, Parser.OR_TIGHT (new_any M_special () spaces pos) + | QUESTION_MARK (pos) -> pos, Parser.QUESTION_MARK (new_any M_special () spaces pos) + | COLON (pos) -> pos, Parser.COLON (new_any M_special () spaces pos) + | COMMA (pos) -> pos, Parser.COMMA (new_any M_special () spaces pos) + | CONCAT (pos) -> pos, Parser.CONCAT (new_any M_special () spaces pos) + | MULT_L_STR (pos) -> pos, Parser.MULT_L_STR (new_any M_special () spaces pos) + | RIGHT_ARROW (pos) -> pos, Parser.RIGHT_ARROW (new_any M_special () spaces pos) + | NOT (pos) -> pos, Parser.NOT (new_any M_special () spaces pos) + | AND (pos) -> pos, Parser.AND (new_any M_special () spaces pos) + | OR (pos) -> pos, Parser.OR (new_any M_special () spaces pos) + | XOR (pos) -> pos, Parser.XOR (new_any M_special () spaces pos) + + | SPACE _ | CR -> internal_error "raw_token_to_token" + +and raw_token_to_token spaces raw_token = + let _, token = raw_token_to_pos_and_token spaces raw_token in + token + +and raw_interpolated_string_to_tokens l = + List.map (fun (s, rtok) -> s, concat_spaces [] Space_0 rtok) l + +and concat_spaces ret spaces = function + | CR :: l -> concat_spaces ret Space_cr l + | SPACE n :: l -> + let spaces' = + match spaces with + | Space_cr -> Space_cr + | Space_0 -> if n = 1 then Space_1 else Space_n + | _ -> Space_n + in + concat_spaces ret spaces' l + | [] -> List.rev ret + | token :: l -> concat_spaces (raw_token_to_pos_and_token spaces token :: ret) Space_0 l + +let rec lexbuf2list accu t lexbuf = + match t lexbuf with + | EOF pos -> List.rev (EOF pos :: accu) + | e -> lexbuf2list (e :: accu) t lexbuf + +let get_token token lexbuf = + let tokens = lexbuf2list [] token lexbuf in + let tokens = concat_bareword_paren [] tokens in + let tokens = concat_spaces [] Space_0 tokens in + let tokens = bracket_bareword_is_hashref [] tokens in + tokens + +let next_rule = Stack.create() + + +let putback lexbuf nb = lexbuf.Lexing.lex_curr_pos <- lexbuf.Lexing.lex_curr_pos - nb + +let add_a_new_line raw_pos = + incr current_file_current_line ; + lpush current_file_lines_starts raw_pos + +let here_docs = Queue.create() +let raw_here_docs = Queue.create() +let current_here_doc_mark = ref "" + +let here_doc_next_line mark = + let here_doc_ref = ref([], bpos) in + Queue.push (mark, here_doc_ref) here_docs ; + here_doc_ref +let raw_here_doc_next_line mark = + let here_doc_ref = ref("", bpos) in + Queue.push (mark, here_doc_ref) raw_here_docs ; + here_doc_ref + +let delimit_char = ref '/' +let delimit_char_open = ref '(' +let delimit_char_close = ref ')' +type string_escape_kinds = Double_quote | Qq | Delimited | Here_doc +let string_escape_kind = ref Double_quote +let string_quote_escape = ref false +let string_escape_useful = ref (Left false) +let not_ok_for_match = ref (-1) +let string_nestness = ref 0 +let string_is_i18n = ref false + +let building_current_interpolated_string = Stack.create() +let building_current_string = Stack.create() +let current_string_start_pos = ref 0 +let current_string_start_line = ref 0 + +let die_in_string lexbuf err = failwith (pos2sfull_with !current_string_start_pos (lexeme_end lexbuf) ^ err) +let warn_escape_unneeded lexbuf c = + let s = String.make 1 c in warn [Warn_suggest_simpler] lexbuf ("you can replace \\" ^ s ^ " with " ^ s) +let next_interpolated toks = + let r = Stack.top building_current_string in + Queue.push (!r, toks) (Stack.top building_current_interpolated_string) ; + r := "" + +let raw_ins t lexbuf = + Stack.push (ref "") building_current_string; + current_string_start_pos := lexeme_start lexbuf; + t lexbuf ; + !(Stack.pop building_current_string), (!current_string_start_pos, lexeme_end lexbuf) + +let ins t lexbuf = + Stack.push (Queue.create()) building_current_interpolated_string ; + Stack.push (ref "") building_current_string; + current_string_start_pos := lexeme_start lexbuf; + t lexbuf ; + next_interpolated [] ; + let _ = Stack.pop building_current_string in + queue2list (Stack.pop building_current_interpolated_string), (!current_string_start_pos, lexeme_end lexbuf) + +let raw_ins_to_string t lexbuf = + let s, pos = raw_ins t lexbuf in + not_ok_for_match := lexeme_end lexbuf; + RAW_STRING(s, pos) +let ins_to_string t lexbuf = + string_escape_useful := Left false ; + string_quote_escape := false ; + let s, pos = ins t lexbuf in + + if not !string_is_i18n then + (match !string_escape_useful, s with + | Right c, [ _, [] ] -> + let s = String.make 1 c in + warn_with_pos [Warn_suggest_simpler] pos ("you can replace \"xxx\\" ^ s ^ "xxx\" with 'xxx" ^ s ^ "xxx', that way you don't need to escape <" ^ s ^ ">") + | _ -> + if !string_quote_escape then + let full_s = String.concat "" (List.map fst s) in + let nb = string_fold_left (fun nb c -> + if nb < 0 then nb else + if c = '(' then nb + 1 else + if c = ')' then nb - 1 else nb + ) 0 full_s in + if nb = 0 then + warn_with_pos [Warn_suggest_simpler] pos "you can replace \"xxx\\\"xxx\" with qq(xxx\"xxx), that way you don't need to escape <\">" + ); + + not_ok_for_match := lexeme_end lexbuf; + string_is_i18n := false ; + STRING(s, pos) + +let next_s s t lexbuf = + let r = Stack.top building_current_string in r := !r ^ s ; + t lexbuf +let next t lexbuf = next_s (lexeme lexbuf) t lexbuf + +let ins_re re_delimited_string lexbuf = + let s, pos = ins re_delimited_string lexbuf in + List.iter (fun (s, _) -> + if str_contains s "[^\\s]" then warn [Warn_suggest_simpler] lexbuf "you can replace [^\\s] with \\S"; + if str_contains s "[^\\w]" then warn [Warn_suggest_simpler] lexbuf "you can replace [^\\w] with \\W" + ) s ; + s, pos + +let string_interpolate token pre lexbuf = + let s = lexeme lexbuf in + let local_lexbuf = Lexing.from_string (pre ^ s ^ " ") in (* add a space to help tokenizing "xxx$$" *) + local_lexbuf.lex_start_p <- lexbuf.lex_start_p ; + local_lexbuf.lex_curr_p <- lexbuf.lex_start_p ; + local_lexbuf.lex_abs_pos <- lexeme_start lexbuf ; + let l = lexbuf2list [] token local_lexbuf in + let l = concat_bareword_paren [] l in + next_interpolated l; + (Stack.pop next_rule) lexbuf + +let ident_type_from_char fq name lexbuf c = + not_ok_for_match := lexeme_end lexbuf; + match c with + | '$' -> SCALAR_IDENT(fq, name, pos lexbuf) + | '@' -> ARRAY_IDENT (fq, name, pos lexbuf) + | '%' -> HASH_IDENT (fq, name, pos lexbuf) + | '&' -> FUNC_IDENT (fq, name, pos lexbuf) + | '*' -> STAR_IDENT (fq, name, pos lexbuf) + | _ -> internal_error "ident_type_from_char" + +let split_at_two_colons s = + let i_fq = String.rindex s ':' in + String.sub s 0 (i_fq - 1), skip_n_char (i_fq + 1) s + +let ident_from_lexbuf lexbuf = + let fq, name = split_at_two_colons (lexeme lexbuf) in + RAW_IDENT(Some fq, name, pos lexbuf) + +let typed_ident_from_lexbuf lexbuf = + let s = lexeme lexbuf in + ident_type_from_char None (skip_n_char 1 s) lexbuf s.[0] + +let typed_fqident_from_lexbuf lexbuf = + let s = lexeme lexbuf in + let fq, name = split_at_two_colons (skip_n_char 1 s) in + ident_type_from_char (Some fq) name lexbuf s.[0] + +let arraylen_ident_from_lexbuf lexbuf = + not_ok_for_match := lexeme_end lexbuf; + let s = lexeme lexbuf in + ARRAYLEN_IDENT(None, skip_n_char 2 s, pos lexbuf) + +let arraylen_fqident_from_lexbuf lexbuf = + let s = lexeme lexbuf in + let fq, name = split_at_two_colons (skip_n_char 2 s) in + ARRAYLEN_IDENT(Some fq, name, pos lexbuf) + +let check_multi_line_delimited_string opts (start, end_) = + let check = + match opts with + | None -> true + | Some s -> not (String.contains s 'x') in + if check then + if !current_file_current_line <> !current_string_start_line then + failwith (pos2sfull_with start end_ ^ "multi-line patterns are not allowed (or use /x modifier)") + +let hex_in_string lexbuf next_rule s = + let i = + try int_of_string ("0x" ^ s) + with Failure("int_of_string") -> die_in_string lexbuf ("Bad_hex_in_string \"" ^ lexeme lexbuf ^ "\"") + in + let s = + if i < 256 then + String.make 1 (Char.chr i) + else + "\\x{" ^ s ^ "}" in + next_s s (Stack.pop next_rule) lexbuf + +let set_delimit_char lexbuf op = + let c = lexeme_char lexbuf (String.length op) in + delimit_char := c; + match c with + | '@' -> warn [Warn_complex_expressions] lexbuf ("don't use " ^ op ^ "@...@, replace @ with / ! , or |") + | ':' -> warn [Warn_complex_expressions] lexbuf ("don't use " ^ op ^ ":...:, replace : with / ! , or |") + | _ -> () + +let set_delimit_char_open lexbuf op = + let char_open = lexeme_char lexbuf (String.length op) in + let char_close = + match char_open with + | '(' -> ')' + | '{' -> '}' + | _ -> internal_error "set_delimit_char_open" + in + if op = "qx" then + warn [Warn_complex_expressions] lexbuf (Printf.sprintf "don't use qx%c...%c, use `...` instead" char_open char_close) + else if char_open = '{' then + warn [Warn_complex_expressions] lexbuf ("don't use " ^ op ^ "{...}, use " ^ op ^ "(...) instead"); + delimit_char_open := char_open; + delimit_char_close := char_close +} + +let stash = [ '$' '@' '%' '&' '*' ] +let ident_start = ['a'-'z' 'A'-'Z' '_'] +let ident = ident_start ['0'-'9' 'A'-'Z' 'a'-'z' '_'] * +let pattern_separator = [ '/' '!' ',' '|' '@' ':' ] +let pattern_open = [ '(' '{' ] +let pattern_close = [ ')' '}' ] + +let in_string_expr = (ident | (ident? ("::" ident)+)) "->"? (('{' [^ '{' '}' '\n']* '}') | ('[' [^ '[' ']' '\n']* ']'))* + +rule token = parse +| [' ' '\t']+ { + (* propagate not_ok_for_match when it was set by the previous token *) + if lexeme_start lexbuf = !not_ok_for_match then not_ok_for_match := lexeme_end lexbuf; + SPACE(lexeme_end lexbuf - lexeme_start lexbuf) + } +| "# perl_checker: " [^ '\n']* { PERL_CHECKER_COMMENT(skip_n_char 16 (lexeme lexbuf), pos lexbuf) } +| "#-PO: " [^ '\n']* { PO_COMMENT(skip_n_char 1 (lexeme lexbuf), pos lexbuf) } +| '#' [^ '\n']* { SPACE(1) } + +| "\n=" { + add_a_new_line(lexeme_end lexbuf - 1); + let _ = ins pod_command lexbuf in token lexbuf + } + +| '\n' { + add_a_new_line(lexeme_end lexbuf); + (try + let (mark, r) = Queue.pop here_docs in + current_here_doc_mark := mark ; + r := ins here_doc lexbuf + with Queue.Empty -> + try + let (mark, r) = Queue.pop raw_here_docs in + current_here_doc_mark := mark ; + r := raw_ins raw_here_doc lexbuf + with Queue.Empty -> ()); + CR + } +| "->" { ARROW(pos lexbuf) } +| "++" { INCR(pos lexbuf) } +| "--" { DECR(pos lexbuf) } +| "**" { POWER(pos lexbuf) } +| "!" { TIGHT_NOT(pos lexbuf) } +| "~" { BIT_NEG(pos lexbuf) } +| "=~" { PATTERN_MATCH(pos lexbuf) } +| "!~" { PATTERN_MATCH_NOT(pos lexbuf) } +| "*" { MULT(lexeme lexbuf, pos lexbuf) } +| "%" { MULT(lexeme lexbuf, pos lexbuf) } +| "x" { MULT_L_STR(pos lexbuf) } +| "+" { PLUS(lexeme lexbuf, pos lexbuf) } +| "-" { PLUS(lexeme lexbuf, pos lexbuf) } +| "." { CONCAT(pos lexbuf) } +| "<<" { BIT_SHIFT(lexeme lexbuf, pos lexbuf) } +| ">>" { BIT_SHIFT(lexeme lexbuf, pos lexbuf) } +| "<" { LT(pos lexbuf) } +| ">" { GT(pos lexbuf) } +| "<=" | ">=" { COMPARE_OP(lexeme lexbuf, pos lexbuf) } +| "lt" | "gt" | "le" | "ge" { COMPARE_OP_STR(lexeme lexbuf, pos lexbuf) } +| "==" | "!=" | "<=>" { EQ_OP(lexeme lexbuf, pos lexbuf) } +| "eq" | "ne" | "cmp" { EQ_OP_STR(lexeme lexbuf, pos lexbuf) } +| "&" { BIT_AND(pos lexbuf) } +| "|" { BIT_OR(pos lexbuf) } +| "^" { BIT_XOR(pos lexbuf) } +| "&&" { AND_TIGHT(pos lexbuf) } +| "||" { OR_TIGHT(pos lexbuf) } +| ".." { DOTDOT(lexeme lexbuf, pos lexbuf) } +| "..." { DOTDOT(lexeme lexbuf, pos lexbuf) } +| "?" { QUESTION_MARK(pos lexbuf) } +| ":" { COLON(pos lexbuf) } +| "::" { PKG_SCOPE(pos lexbuf) } + +| "=" | "+=" | "-=" | "*=" | ".=" | "|=" | "&=" | "^=" | "||=" | "&&=" { ASSIGN(lexeme lexbuf, pos lexbuf) } + +| "<<=" | ">>=" | "**=" { + warn [Warn_complex_expressions] lexbuf (Printf.sprintf "don't use \"%s\", use the expanded version instead" (lexeme lexbuf)) ; + ASSIGN(lexeme lexbuf, pos lexbuf) + } + +| "," { COMMA(pos lexbuf) } +| "=>" { RIGHT_ARROW(pos lexbuf) } +| "not" { NOT(pos lexbuf) } +| "and" { AND(pos lexbuf) } +| "or" { OR(pos lexbuf) } +| "xor" { XOR(pos lexbuf) } + +| "if" { IF(pos lexbuf) } +| "else" { ELSE(pos lexbuf) } +| "elsif" { ELSIF(pos lexbuf) } +| "unless" { UNLESS(pos lexbuf) } +| "do" { DO(pos lexbuf) } +| "while" { WHILE(pos lexbuf) } +| "until" { UNTIL(pos lexbuf) } +| "foreach" { FOR(lexeme lexbuf, pos lexbuf) } +| "for" { FOR(lexeme lexbuf, pos lexbuf) } +| "my" { MY_OUR(lexeme lexbuf, pos lexbuf) } +| "our" { MY_OUR(lexeme lexbuf, pos lexbuf) } +| "local" { LOCAL(pos lexbuf) } +| "continue" { CONTINUE(pos lexbuf) } +| "sub" { SUB(pos lexbuf) } +| "package" { PACKAGE(pos lexbuf) } +| "use" { USE(pos lexbuf) } +| "BEGIN" { BEGIN(pos lexbuf) } +| "END" { END(pos lexbuf) } +| "print" { PRINT(lexeme lexbuf, pos lexbuf) } +| "printf" { PRINT(lexeme lexbuf, pos lexbuf) } +| "new" { NEW(pos lexbuf) } +| "format" { let pos = pos lexbuf in FORMAT(here_doc_next_line ".", pos) } +| "delete" +| "defined" +| "length" +| "keys" +| "exists" +| "shift" +| "pop" +| "eval" +| "ref" { ONE_SCALAR_PARA(lexeme lexbuf, pos lexbuf) } + +| "split" +| "grep" { (* ok_for_match! *) BAREWORD(lexeme lexbuf, pos lexbuf) } + +| "print " ['A'-'Z'] ['A'-'Z' '0'-'9']* ['\n' ' '] { + putback lexbuf 1; + PRINT_TO_STAR(("print", skip_n_char 6 (lexeme lexbuf)), pos lexbuf) + } +| "print $" ident ['\n' ' '] { + putback lexbuf 1; + PRINT_TO_SCALAR(("print", skip_n_char 7 (lexeme lexbuf)), pos lexbuf); + } +| "printf " ['A'-'Z'] ['A'-'Z' '0'-'9']* ['\n' ' '] { + putback lexbuf 1; + PRINT_TO_STAR(("printf", skip_n_char 7 (lexeme lexbuf)), pos lexbuf) + } +| "printf $" ident ['\n' ' '] { + putback lexbuf 1; + PRINT_TO_SCALAR(("printf", skip_n_char 8 (lexeme lexbuf)), pos lexbuf); + } + +| ident ' '* "=>" { (* needed so that (if => 1) works *) + let s = lexeme lexbuf in + let end_ = String.length s - 1 in + let ident_end = non_rindex_from s (end_ - 2) ' ' in + putback lexbuf (end_ - ident_end); + BAREWORD(String.sub s 0 (ident_end+1), pos lexbuf) + } + +| "{" ident "}" { (* needed so that $h{if} works *) + not_ok_for_match := lexeme_end lexbuf; + COMPACT_HASH_SUBSCRIPT(skip_n_char_ 1 1 (lexeme lexbuf), pos lexbuf) + } + +| '@' { AT(pos lexbuf) } +| '$' { DOLLAR(pos lexbuf) } +| '$' '#' { ARRAYLEN(pos lexbuf) } +| '%' ['$' '{'] { putback lexbuf 1; PERCENT(pos lexbuf) } +| '&' ['$' '{'] { putback lexbuf 1; AMPERSAND(pos lexbuf) } +| '*' ['$' '{'] { putback lexbuf 1; if lexeme_start lexbuf = !not_ok_for_match then MULT("*", pos lexbuf) else STAR(pos lexbuf) } + + +| ';' { SEMI_COLON(pos lexbuf) } +| '(' { PAREN(pos lexbuf) } +| '{' { BRACKET(pos lexbuf) } +| "+{"{ BRACKET_HASHREF(pos lexbuf) } +| '[' { ARRAYREF(pos lexbuf) } +| ')' { not_ok_for_match := lexeme_end lexbuf; PAREN_END(pos lexbuf) } +| '}' { not_ok_for_match := lexeme_end lexbuf; BRACKET_END(pos lexbuf) } +| ']' { not_ok_for_match := lexeme_end lexbuf; ARRAYREF_END(pos lexbuf) } + +| "/" { + if lexeme_start lexbuf = !not_ok_for_match then MULT("/", pos lexbuf) + else ( + delimit_char := '/' ; + current_string_start_line := !current_file_current_line; + let s, pos = ins_re re_delimited_string lexbuf in + let opts, _ = raw_ins pattern_options lexbuf in + check_multi_line_delimited_string (Some opts) pos ; + PATTERN(s, opts, pos) + ) + } + +| "/=" { + if lexeme_start lexbuf = !not_ok_for_match then ASSIGN(lexeme lexbuf, pos lexbuf) + else ( + putback lexbuf 1 ; + delimit_char := '/' ; + let s, pos = ins_re re_delimited_string lexbuf in + let opts, _ = raw_ins pattern_options lexbuf in + PATTERN(s, opts, pos) + ) + } + +| "m" pattern_separator { + set_delimit_char lexbuf "m" ; + current_string_start_line := !current_file_current_line; + let s, pos = ins_re re_delimited_string lexbuf in + let opts, _ = raw_ins pattern_options lexbuf in + check_multi_line_delimited_string (Some opts) pos ; + PATTERN(s, opts, pos) +} + +| "qr" pattern_separator { + set_delimit_char lexbuf "qr" ; + current_string_start_line := !current_file_current_line; + let s, pos = ins_re re_delimited_string lexbuf in + let opts, _ = raw_ins pattern_options lexbuf in + check_multi_line_delimited_string (Some opts) pos ; + QR_PATTERN(s, opts, pos) +} + +| "qw" pattern_separator { + set_delimit_char lexbuf "qw" ; + current_string_start_line := !current_file_current_line; + let s, pos = raw_ins delimited_string lexbuf in + warn_with_pos [Warn_complex_expressions] pos (Printf.sprintf "don't use qw%c...%c, use qw(...) instead" !delimit_char !delimit_char) ; + QUOTEWORDS(s, pos) +} + +| "s" pattern_separator { + set_delimit_char lexbuf "s" ; + current_string_start_line := !current_file_current_line; + let s1, (start, _) = ins_re re_delimited_string lexbuf in + let s2, (_, end_) = ins delimited_string lexbuf in + let opts, _ = raw_ins pattern_options lexbuf in + let pos = start, end_ in + if String.contains opts 'e' && sum (List.map (fun (s, _) -> count_chars_in_string s '"') s2) > 2 then + die lexbuf ("do not write so complicated things in the eval part of s///,\n" ^ + "i generate wrong warnings for things like s/xxx/die \"yyy \\\"zzz\\\" \"/") ; + check_multi_line_delimited_string (Some opts) pos ; + PATTERN_SUBST(s1, s2, opts, pos) +} + +| "tr" pattern_separator { + set_delimit_char lexbuf "tr" ; + current_string_start_line := !current_file_current_line; + let s1, (start, _) = ins delimited_string lexbuf in + let s2, (_, end_) = ins delimited_string lexbuf in + let opts, _ = raw_ins pattern_options lexbuf in + let pos = start, end_ in + check_multi_line_delimited_string None pos ; + PATTERN_SUBST(s1, s2, opts, pos) +} + +| "<<" ident { + not_ok_for_match := lexeme_end lexbuf; + HERE_DOC(here_doc_next_line (skip_n_char 2 (lexeme lexbuf)), pos lexbuf) + } +| "<<\"" ident "\"" { + warn_with_pos [Warn_suggest_simpler] (lexeme_start lexbuf + 2, lexeme_end lexbuf) "Don't use <<\"MARK\", use <<MARK instead" ; + not_ok_for_match := lexeme_end lexbuf; + HERE_DOC(here_doc_next_line (skip_n_char_ 3 1 (lexeme lexbuf)), pos lexbuf) + } +| "<<'" ident "'" { + not_ok_for_match := lexeme_end lexbuf; + RAW_HERE_DOC(raw_here_doc_next_line (skip_n_char_ 3 1 (lexeme lexbuf)), pos lexbuf) + } +| "<<" ' '+ "'" +| "<<" ' '+ ident +| "<<" ' '* '"' { + failwith (pos2sfull_with (lexeme_start lexbuf + 2) (lexeme_end lexbuf) ^ "No space allowed between \"<<\" and the marker") + } + +| "\\"+ stash +| "\\" ['0'-'9' 'A'-'Z' 'a'-'z'] +| "\\" ' '* '(' + { lexbuf.lex_curr_pos <- lexbuf.lex_start_pos + 1; REF(pos lexbuf) } + +| "sub(" [ '$' '@' '\\' '&' ';' '%' ]* ')' { + SUB_WITH_PROTO(skip_n_char_ 4 1 (lexeme lexbuf), pos lexbuf) + } + +| "sub" ' '+ ident ' '* '(' [ '$' '@' '\\' '&' ';' '%' ]* ')' { + (* bloody prototypes, must be caught especially otherwise "($)" is badly tokenized *) + (* and alas "($@)" is both valid as an expression and a prototype *) + let s = lexeme lexbuf in + let ident_start = non_index_from s 3 ' ' in + + let proto_start = String.index_from s ident_start '(' in + let ident_end = non_rindex_from s (proto_start-1) ' ' in + let ident = String.sub s ident_start (ident_end - ident_start + 1) in + let prototype = skip_n_char_ (proto_start + 1) 1 s in + + FUNC_DECL_WITH_PROTO(None, ident, prototype, pos lexbuf) + } + +| "sub" ' '+ ident ("::" ident)+ ' '* '(' [ '$' '@' '\\' '&' ';' '%' ]* ')' { + (* bloody prototypes, must be caught especially otherwise "($)" is badly tokenized *) + (* and alas "($@)" is both valid as an expression and a prototype *) + let s = lexeme lexbuf in + let ident_start = non_index_from s 3 ' ' in + + let proto_start = String.index_from s ident_start '(' in + let ident_end = non_rindex_from s (proto_start-1) ' ' in + let ident = String.sub s ident_start (ident_end - ident_start + 1) in + let prototype = skip_n_char_ (proto_start + 1) 1 s in + + let fq, name = split_at_two_colons ident in + FUNC_DECL_WITH_PROTO(Some fq, name, prototype, pos lexbuf) + } + +| "$#" ident? ("::" ident)+ { arraylen_fqident_from_lexbuf lexbuf } +| "$#" ident { arraylen_ident_from_lexbuf lexbuf } + +| stash ident? ("::" ident)+ { typed_fqident_from_lexbuf lexbuf } +| stash ident +| '$' [^ '{' ' ' '\n' '$'] +| "$^" [^ '{' ' ' '\n'] { typed_ident_from_lexbuf lexbuf } + +| "$$" [^ 'a'-'z' 'A'-'Z' '_' '{'] { putback lexbuf 1; SCALAR_IDENT(None, "$", pos lexbuf) } + +| stash "::" { putback lexbuf 2; ident_type_from_char None "main" lexbuf (lexeme_char lexbuf 0) } + +| ident? ("::" ident)+ { ident_from_lexbuf lexbuf } +| ident { not_ok_for_match := lexeme_end lexbuf; + let word = lexeme lexbuf in + if word = "qx" then die lexbuf "don't use qx{...}, use `...` instead" else + BAREWORD(word, pos lexbuf) } + +| ident ":" { LABEL(lexeme lexbuf, pos lexbuf) } + +| '-' [ 'a'-'z' 'A'-'Z' ] [ ' ' '(' ';' ] { putback lexbuf 1; ONE_SCALAR_PARA(lexeme lexbuf, pos lexbuf) } + +| ['0'-'9'] ['0'-'9' '_']* '.' ['0'-'9'] ['0'-'9' '_']* ('.' ['0'-'9'] ['0'-'9' '_']*)+ +| 'v' ['0'-'9'] ['0'-'9' '_']* ('.' ['0'-'9'] ['0'-'9' '_']*)* + { + not_ok_for_match := lexeme_end lexbuf; + REVISION(lexeme lexbuf, pos lexbuf) + } + +| ['0'-'9']* '.' ['0'-'9']+ (['e' 'E']['-' '+']?['0'-'9']+)? { + not_ok_for_match := lexeme_end lexbuf; + FLOAT(lexeme lexbuf, pos lexbuf) + } +| ['0'-'9'] ['0'-'9' '_']* (['e' 'E']['-' '+']?['0'-'9']+)? +| "0x" ['0'-'9' 'a'-'f' 'A'-'F']+ { + not_ok_for_match := lexeme_end lexbuf; + INT(lexeme lexbuf, pos lexbuf) + } + +| 'N' '_'? "(\"" { string_is_i18n := true ; putback lexbuf 2 ; BAREWORD(lexeme lexbuf, pos lexbuf) } + +| '"' { ins_to_string string lexbuf } +| "'" { raw_ins_to_string rawstring lexbuf } +| '`' { delimit_char := '`'; + current_string_start_line := !current_file_current_line; + not_ok_for_match := lexeme_end lexbuf; + let s, pos = ins delimited_string lexbuf in + check_multi_line_delimited_string None pos ; + COMMAND_STRING(s, pos) } +| "q" pattern_open { set_delimit_char_open lexbuf "q"; raw_ins_to_string qstring lexbuf } +| "qq" pattern_open { set_delimit_char_open lexbuf "qq"; ins_to_string qqstring lexbuf } +| "qx" pattern_open { set_delimit_char_open lexbuf "qx"; ins_to_string qqstring lexbuf } +| "qw" pattern_open { set_delimit_char_open lexbuf "qw"; let s, pos = raw_ins qstring lexbuf in QUOTEWORDS(s, pos) } + +| "\n__END__" [^ '0'-'9' 'A'-'Z' 'a'-'z' '_'] +| "\n__DATA__" [^ '0'-'9' 'A'-'Z' 'a'-'z' '_'] +| eof { EOF(pos lexbuf) } +| _ { failwith (Printf.sprintf "%serror tokenizing <<%s>>" (pos2sfull lexbuf) (lexeme lexbuf)) } + +and string = parse +| '"' { () } +| '\\' { Stack.push string next_rule ; string_escape_kind := Double_quote; string_escape lexbuf } +| '$' { Stack.push string next_rule ; string_interpolate_scalar lexbuf } +| '@' { Stack.push string next_rule ; string_interpolate_array lexbuf } +| '\n' { + add_a_new_line(lexeme_end lexbuf); + next string lexbuf + } +| "'" { string_escape_useful := Left true ; next string lexbuf } +| [^ '\n' '\\' '"' '$' '@']+ { next string lexbuf } +| eof { die_in_string lexbuf "Unterminated_string" } + +and delimited_string = parse +| '\\' { Stack.push delimited_string next_rule ; string_escape_kind := Delimited; string_escape lexbuf } +| '$' { Stack.push delimited_string next_rule ; delimited_string_interpolate_scalar lexbuf } +| '@' { Stack.push delimited_string next_rule ; delimited_string_interpolate_array lexbuf } +| '\n' { + add_a_new_line(lexeme_end lexbuf); + next delimited_string lexbuf + } +| eof { die_in_string lexbuf "Unterminated_delimited_string" } +| [ ^ '\n' '\\' '$' '@'] { if lexeme_char lexbuf 0 <> !delimit_char then next delimited_string lexbuf } + +and re_delimited_string = parse +| '\\' { Stack.push re_delimited_string next_rule ; re_string_escape lexbuf } +| '$' { Stack.push re_delimited_string next_rule ; delimited_string_interpolate_scalar lexbuf } +| '@' { if lexeme_char lexbuf 0 <> !delimit_char then + (Stack.push re_delimited_string next_rule ; delimited_string_interpolate_array lexbuf) } +| '\n' { + add_a_new_line(lexeme_end lexbuf); + next re_delimited_string lexbuf + } +| eof { die_in_string lexbuf "Unterminated_delimited_string" } +| [ ^ '\n' '\\' '$' '@'] { if lexeme_char lexbuf 0 <> !delimit_char then next re_delimited_string lexbuf } + +and rawstring = parse +| ''' { () } +| '\n' { + add_a_new_line(lexeme_end lexbuf); + next rawstring lexbuf + } +| '\\' { next rawstring lexbuf } +| "\\'" { next_s "'" rawstring lexbuf } +| [^ '\n' ''' '\\']+ { next rawstring lexbuf } +| eof { die_in_string lexbuf "Unterminated_rawstring" } + +and qqstring = parse +| pattern_close { + if lexeme_char lexbuf 0 = !delimit_char_close then + if !string_nestness <> 0 then (decr string_nestness; next qqstring lexbuf) + else () + else next qstring lexbuf + } +| pattern_open { + if lexeme_char lexbuf 0 = !delimit_char_open then incr string_nestness; + next qqstring lexbuf + } +| '\\' { Stack.push qqstring next_rule ; string_escape_kind := Qq; string_escape lexbuf } +| '$' { Stack.push qqstring next_rule ; string_interpolate_scalar lexbuf } +| '@' { Stack.push qqstring next_rule ; string_interpolate_array lexbuf } +| '\n' { + add_a_new_line(lexeme_end lexbuf); + next qqstring lexbuf + } +| [^ '\n' '(' ')' '{' '}' '\\' '$' '@']+ { next qqstring lexbuf } +| eof { die_in_string lexbuf "Unterminated_qqstring" } + +and qstring = parse +| pattern_close { + if lexeme_char lexbuf 0 = !delimit_char_close then + if !string_nestness <> 0 then (decr string_nestness ; next qstring lexbuf) + else () + else next qstring lexbuf + } +| pattern_open { + if lexeme_char lexbuf 0 = !delimit_char_open then incr string_nestness; + next qstring lexbuf + } +| '\n' { + add_a_new_line(lexeme_end lexbuf); + next qstring lexbuf + } +| [^ '\n' '(' ')' '{' '}']+ { next qstring lexbuf } +| eof { die_in_string lexbuf "Unterminated_qstring" } + +and here_doc = parse +| '\\' { Stack.push here_doc next_rule ; string_escape_kind := Here_doc; string_escape lexbuf } +| '$' { Stack.push here_doc next_rule ; string_interpolate_scalar lexbuf } +| '@' { Stack.push here_doc next_rule ; string_interpolate_array lexbuf } +| [ ^ '\n' '\\' '$' '@' ]* { + let s = lexeme lexbuf in + if chomps s <> !current_here_doc_mark + then next_s s here_doc lexbuf + else if s <> !current_here_doc_mark then warn_with_pos [Warn_traps] (pos lexbuf) "Trailing spaces after HERE-document mark" + } +| '\n' { + add_a_new_line(lexeme_end lexbuf); + next here_doc lexbuf + } +| eof { die_in_string lexbuf "Unterminated_here_doc" } + +and raw_here_doc = parse +| [ ^ '\n' ]* { + let s = lexeme lexbuf in + if chomps s <> !current_here_doc_mark + then next_s s raw_here_doc lexbuf + else if s <> !current_here_doc_mark then warn_with_pos [Warn_traps] (pos lexbuf) "Trailing spaces after HERE-document mark" + } +| '\n' { + add_a_new_line(lexeme_end lexbuf); + next raw_here_doc lexbuf + } +| eof { die_in_string lexbuf "Unterminated_raw_here_doc" } + + +and string_escape = parse +| ['0'-'9'] { string_escape_useful := Left true; next_s (String.make 1 (Char.chr (int_of_string (lexeme lexbuf)))) (Stack.pop next_rule) lexbuf } +| 'n' { string_escape_useful := Left true; next_s "\n" (Stack.pop next_rule) lexbuf } +| 't' { string_escape_useful := Left true; next_s "\t" (Stack.pop next_rule) lexbuf } +| "x{" [^ '}']* '}' { string_escape_useful := Left true; hex_in_string lexbuf next_rule (skip_n_char_ 2 1 (lexeme lexbuf)) } +| 'x' [^ '{'] _ { string_escape_useful := Left true; hex_in_string lexbuf next_rule (skip_n_char 1 (lexeme lexbuf)) } +| '\n' { die lexbuf "do not use \"\\\" before end-of-line, it's useless and generally bad" } +| '\\'{ next_s "\\" (Stack.pop next_rule) lexbuf } +| 'Q' { + warn [Warn_complex_expressions] lexbuf ("don't use \\Q, use quotemeta instead"); + string_escape_useful := Left true; next_s ("\\" ^ lexeme lexbuf) (Stack.pop next_rule) lexbuf } +| ['b' 'f' 'a' 'r'] { string_escape_useful := Left true; next_s ("\\" ^ lexeme lexbuf) (Stack.pop next_rule) lexbuf } +| ['$' '@' '%' '{' '[' ':'] { + if !string_escape_useful = Left false then string_escape_useful := Right (lexeme_char lexbuf 0) ; + next_s (lexeme lexbuf) (Stack.pop next_rule) lexbuf + } +| _ { + let c = lexeme_char lexbuf 0 in + (match !string_escape_kind with + | Double_quote -> + if c <> '"' then + warn_escape_unneeded lexbuf c + else ( + if !string_escape_useful = Left false then string_escape_useful := Right c ; + string_quote_escape := true + ) + | Qq -> if c <> !delimit_char_open && c <> !delimit_char_close then warn_escape_unneeded lexbuf c + | Here_doc -> warn_escape_unneeded lexbuf c + | Delimited -> if c = !delimit_char then + warn [Warn_suggest_simpler] lexbuf ("change the delimit character " ^ String.make 1 !delimit_char ^ " to get rid of this escape") + else warn_escape_unneeded lexbuf c); + let s = if c = '"' then String.make 1 c else "\\" ^ String.make 1 c in + next_s s (Stack.pop next_rule) lexbuf + } + +and re_string_escape = parse +| ['0'-'9'] { next_s (String.make 1 (Char.chr (int_of_string (lexeme lexbuf)))) (Stack.pop next_rule) lexbuf } +| '\\'{ next_s "\\" (Stack.pop next_rule) lexbuf } +| 'n' { next_s "\n" (Stack.pop next_rule) lexbuf } +| 't' { next_s "\t" (Stack.pop next_rule) lexbuf } +| "x{" [^ '}']* '}' { hex_in_string lexbuf next_rule (skip_n_char_ 2 1 (lexeme lexbuf)) } +| 'x' [^ '{'] _ { hex_in_string lexbuf next_rule (skip_n_char 1 (lexeme lexbuf)) } +| '\n' { die lexbuf "do not use \"\\\" before end-of-line, it's useless and generally bad" } +| ['r' 'b' 'f' '$' '@' '%' 's' 'S' 'd' 'D' 'w' 'W' 'Q' 'E' 'b' 'Z' 'z' '^' '.' '*' '+' '?' '[' ']' '(' ')' '|' '{' '}' '-' ':'] { + next_s ("\\" ^ lexeme lexbuf) (Stack.pop next_rule) lexbuf + } +| _ { + let c = lexeme_char lexbuf 0 in + if c = !delimit_char then + warn [Warn_suggest_simpler] lexbuf ("change the delimit character " ^ String.make 1 !delimit_char ^ " to get rid of this escape") + else warn_escape_unneeded lexbuf c ; + next_s ("\\" ^ lexeme lexbuf) (Stack.pop next_rule) lexbuf + } + +and string_interpolate_scalar = parse +| '$' ident +| ['0'-'9'] +| '{' [^ '{' '}']* '}' +| in_string_expr +| [^ '{' '}' ' ' '\n' '"'] { (* eg: $! $$ *) + string_interpolate token "$" lexbuf + } + +| "{" +| ident "->"? '{' +| '"' { putback lexbuf 1; next_s "$" (Stack.pop next_rule) lexbuf } +| eof { next_s "$" (Stack.pop next_rule) lexbuf } +| _ { warn [Warn_strange] lexbuf (Printf.sprintf "weird \"%s\" in string" (lexeme lexbuf)); next_s ("$" ^ lexeme lexbuf) (Stack.pop next_rule) lexbuf } + +and delimited_string_interpolate_scalar = parse (* needed for delimited string like m!foo$! where $! should not be taken as is... *) +| '$' ident +| ['0'-'9'] +| '{' [^ '{' '}']* '}' +| (ident | (ident? ("::" ident)+)) "->"? ('{' [^ '{' '}' '\n']* '}')* +| (ident | (ident? ("::" ident)+)) "->"? (('{' [^ '{' '}' '\n']* '}') | ('[' ('$' ident | ['0'-'9']+) ']'))* + { + string_interpolate token "$" lexbuf + } + +| (ident | (ident? ("::" ident)+)) "->"? (('{' [^ '{' '}' '\n']* '}') | ('[' ['$' '0'-'9'] [^ '[' ']' '\n']* ']'))* + { + die lexbuf (Printf.sprintf "I really can't handle this, [xxx] can be indexing or not based on stellar position :-(") + } + +| "{" +| ident "->"? '{' +| eof { next_s "$" (Stack.pop next_rule) lexbuf } +| _ { + let c = lexeme_char lexbuf 0 in + if c <> !delimit_char && c <> '|' && c<>')' && c<>'/' && c<>' ' then warn [Warn_strange] lexbuf (Printf.sprintf "weird \"%s\" in string" (lexeme lexbuf)); + putback lexbuf 1; + next_s "$" (Stack.pop next_rule) lexbuf + } + +and string_interpolate_array = parse +| '$' ident +| '{' [^ '{' '}']* '}' +| in_string_expr { string_interpolate token "@" lexbuf } + +| [ '@' '*' '<' '>' ']' '.' '(' ' ' ] { next_s ("@" ^ lexeme lexbuf) (Stack.pop next_rule) lexbuf } +| '"' { putback lexbuf 1; next_s "@" (Stack.pop next_rule) lexbuf } +| eof { next_s "@" (Stack.pop next_rule) lexbuf } +| _ { warn [Warn_strange] lexbuf (Printf.sprintf "weird \"%s\" in string" (lexeme lexbuf)); next_s ("@" ^ lexeme lexbuf) (Stack.pop next_rule) lexbuf } + +and delimited_string_interpolate_array = parse +| '$' ident +| '{' [^ '{' '}']* '}' +| in_string_expr + { string_interpolate token "@" lexbuf } + +| [ '@' '*' '<' '>' ']' '.' '(' ' ' ] { next_s ("@" ^ lexeme lexbuf) (Stack.pop next_rule) lexbuf } +| eof { next_s "@" (Stack.pop next_rule) lexbuf } +| _ { + let c = lexeme_char lexbuf 0 in + if c <> !delimit_char then warn [Warn_strange] lexbuf (Printf.sprintf "weird \"%s\" in string" (lexeme lexbuf)); + putback lexbuf 1; + next_s "@" (Stack.pop next_rule) lexbuf + } + +and pattern_options = parse +| [ 'g' 'i' 'm' 'o' 's' 'x' 'e' 'd' ] { next pattern_options lexbuf } +| _ { putback lexbuf 1; () } + +and pod_command = parse +| [^ '\n' ]+ { + let s = lexeme lexbuf in + let command = String.sub s 0 (try String.index s ' ' with Not_found -> String.length s) in + match command with + | "cut" -> + if !(Stack.top building_current_string) = "" then + failwith(pos2sfull lexbuf ^ "found POD command \"=cut\" but it is not a POD block") + | "head1" | "head2" | "head3" | "head4" | "over" | "item" | "back" | "pod" | "begin" | "end" | "for" -> + next pod lexbuf + | s -> failwith(pos2sfull lexbuf ^ "unknown POD command \"" ^ s ^ "\"") + } +| _ { failwith(pos2sfull lexbuf ^ "POD command expected") } + +and pod = parse +| "\n=" { + add_a_new_line(lexeme_end lexbuf - 1); + next pod_command lexbuf + } +| "\n" [^ '=' '\n'] [^ '\n']* +| "\n" { + add_a_new_line(lexeme_end lexbuf); + next pod lexbuf + } +| eof +| _ { failwith(pos2sfull_with !current_string_start_pos (lexeme_end lexbuf) ^ "POD block still open") } diff --git a/src/parser.mly b/src/parser.mly new file mode 100644 index 0000000..a9bf396 --- /dev/null +++ b/src/parser.mly @@ -0,0 +1,500 @@ +%{ (* -*- caml -*- *) + open Types + open Common + open Parser_helper + + let parse_error msg = die_rule msg + let prog_ref = ref None + let to_String e = Parser_helper.to_String (some !prog_ref) e + let from_PATTERN e = Parser_helper.from_PATTERN (some !prog_ref) e + let from_PATTERN_SUBST e = Parser_helper.from_PATTERN_SUBST (some !prog_ref) e +%} + + +%token <unit Types.any_spaces_pos> EOF +%token <string Types.any_spaces_pos> NUM RAW_STRING BAREWORD BAREWORD_PAREN REVISION COMMENT POD LABEL PO_COMMENT PERL_CHECKER_COMMENT ONE_SCALAR_PARA +%token <(string * string) Types.any_spaces_pos> PRINT_TO_STAR PRINT_TO_SCALAR +%token <string Types.any_spaces_pos> QUOTEWORDS COMPACT_HASH_SUBSCRIPT +%token <(string * Types.raw_pos) Types.any_spaces_pos> RAW_HERE_DOC +%token <(string * ((int * int) * token) list) list Types.any_spaces_pos> STRING COMMAND_STRING +%token <((string * ((int * int) * token) list) list * Types.raw_pos) Types.any_spaces_pos> HERE_DOC FORMAT + +%token <((string * ((int * int) * token) list) list * string) Types.any_spaces_pos> PATTERN QR_PATTERN +%token <((string * ((int * int) * token) list) list * (string * ((int * int) * token) list) list * string) Types.any_spaces_pos> PATTERN_SUBST + +%token <(string option * string) Types.any_spaces_pos> SCALAR_IDENT ARRAY_IDENT HASH_IDENT FUNC_IDENT STAR_IDENT RAW_IDENT RAW_IDENT_PAREN ARRAYLEN_IDENT +%token <string Types.any_spaces_pos> SUB_WITH_PROTO +%token <(string option * string * string) Types.any_spaces_pos> FUNC_DECL_WITH_PROTO + +%token <string Types.any_spaces_pos> FOR PRINT +%token <unit Types.any_spaces_pos> NEW +%token <string Types.any_spaces_pos> COMPARE_OP COMPARE_OP_STR EQ_OP EQ_OP_STR +%token <string Types.any_spaces_pos> ASSIGN MY_OUR + +%token <unit Types.any_spaces_pos> IF ELSIF ELSE UNLESS DO WHILE UNTIL CONTINUE SUB LOCAL +%token <unit Types.any_spaces_pos> USE PACKAGE BEGIN END +%token <unit Types.any_spaces_pos> AT DOLLAR PERCENT AMPERSAND STAR ARRAYLEN +%token <unit Types.any_spaces_pos> SEMI_COLON PKG_SCOPE +%token <unit Types.any_spaces_pos> PAREN PAREN_END +%token <unit Types.any_spaces_pos> BRACKET BRACKET_END BRACKET_HASHREF +%token <unit Types.any_spaces_pos> ARRAYREF ARRAYREF_END + +%token <unit Types.any_spaces_pos> ARROW +%token <unit Types.any_spaces_pos> INCR DECR +%token <unit Types.any_spaces_pos> POWER +%token <unit Types.any_spaces_pos> TIGHT_NOT BIT_NEG REF +%token <unit Types.any_spaces_pos> PATTERN_MATCH PATTERN_MATCH_NOT +%token <string Types.any_spaces_pos> MULT +%token <string Types.any_spaces_pos> PLUS +%token <string Types.any_spaces_pos> BIT_SHIFT +%token <unit Types.any_spaces_pos> LT GT CONCAT MULT_L_STR +%token <unit Types.any_spaces_pos> BIT_AND +%token <unit Types.any_spaces_pos> BIT_OR BIT_XOR +%token <unit Types.any_spaces_pos> AND_TIGHT +%token <unit Types.any_spaces_pos> OR_TIGHT +%token <string Types.any_spaces_pos> DOTDOT +%token <unit Types.any_spaces_pos> QUESTION_MARK COLON +%token <unit Types.any_spaces_pos> COMMA RIGHT_ARROW +%token <unit Types.any_spaces_pos> NOT +%token <unit Types.any_spaces_pos> AND +%token <unit Types.any_spaces_pos> OR XOR + +%nonassoc PREC_LOW +%nonassoc LOOPEX + +%right OR XOR +%right AND +%right NOT +%nonassoc LSTOP +%left COMMA RIGHT_ARROW + +%right ASSIGN +%right QUESTION_MARK COLON +%nonassoc DOTDOT +%left OR_TIGHT +%left AND_TIGHT +%left BIT_OR BIT_XOR +%left BIT_AND +%nonassoc EQ_OP EQ_OP_STR +%nonassoc LT GT COMPARE_OP COMPARE_OP_STR +%nonassoc UNIOP ONE_SCALAR_PARA +%left BIT_SHIFT +%left PLUS CONCAT +%left MULT MULT_L_STR +%left PATTERN_MATCH PATTERN_MATCH_NOT +%right TIGHT_NOT BIT_NEG REF UNARY_MINUS +%right POWER +%nonassoc INCR DECR +%left ARROW + +%nonassoc PAREN_END +%left PAREN PREC_HIGH +%left ARRAYREF BRACKET + +%type <Types.fromparser list> prog +%type <prio_expr_spaces_pos> expr term +%type <fromparser any_spaces_pos> scalar bracket_subscript variable restricted_subscripted + +%start prog + + +%% +prog: lines EOF {fst $1.any} + +lines: /* A collection of "lines" in the program */ +| { default_esp ([], true) } +| sideff { new_1esp ([$1.any], false) $1 } +| line lines { if fst $2.any <> [] then mcontext_check_none "value is dropped" $1.any $1; new_esp $2.mcontext ($1.any @ fst $2.any, snd $2.any) $1 $2 } + +line: +| decl { new_1esp [$1.any] $1 } +| if_then_else { new_1esp [$1.any] $1 } +| loop { new_1esp [$1.any] $1 } +| LABEL { sp_cr($1); new_1esp [Label $1.any] $1 } +| PERL_CHECKER_COMMENT {sp_p($1); new_1esp [Perl_checker_comment($1.any, get_pos $1)] $1 } +| semi_colon {warn_rule [Warn_white_space] "unneeded \";\""; new_1esp [Semi_colon] $1 } +| sideff semi_colon {new_1esp [$1.any ; Semi_colon] $1 } +| BRACKET lines BRACKET_END {new_esp $2.mcontext [lines_to_Block $2 $3] $1 $3} + +if_then_else: /* Real conditional expressions */ +| IF PAREN expr PAREN_END BRACKET lines BRACKET_END elsif else_ {sp_p($1); sp_n($2); sp_0($3); sp_0_or_cr($4); sp_p($5); mcontext_check M_bool $3; to_Call_op (if $9.any = [] then M_none else mcontext_lmerge ($6.mcontext :: mcontext_lmaybe $8 @ [$9.mcontext])) "if" (prio_lo P_loose $3 :: lines_to_Block $6 $7 :: $8.any @ $9.any) $1 $9} +| UNLESS PAREN expr PAREN_END BRACKET lines BRACKET_END elsif else_ {sp_p($1); sp_n($2); sp_0($3); sp_0_or_cr($4); sp_p($5); mcontext_check M_bool $3; check_unless_else $8 $9; to_Call_op M_none "unless" (prio_lo P_loose $3 :: lines_to_Block $6 $7 :: $8.any @ $9.any) $1 $9} + +elsif: +| {default_esp []} +| ELSIF PAREN expr PAREN_END BRACKET lines BRACKET_END elsif {sp_p($1); sp_n($2); sp_0($3); sp_0_or_cr($4); sp_p($5); mcontext_check M_bool $3; new_esp (mcontext_lmerge ($6.mcontext :: mcontext_lmaybe $8)) (prio_lo P_loose $3 :: lines_to_Block $6 $7 :: $8.any) $1 $8} + +else_: +| { default_esp [] } +| ELSE BRACKET lines BRACKET_END {sp_p($1); sp_n($2); new_esp $3.mcontext [lines_to_Block $3 $4] $1 $4} + +loop: +| WHILE PAREN expr PAREN_END BRACKET lines BRACKET_END cont {sp_p($1); sp_n($2); sp_0($3); sp_0_or_cr($4); sp_p($5); mcontext_check M_bool $3; to_Call_op M_none "while" [ prio_lo P_loose $3; lines_to_Block $6 $7 ] $1 $8} +| UNTIL PAREN expr PAREN_END BRACKET lines BRACKET_END cont {sp_p($1); sp_n($2); sp_0($3); sp_0_or_cr($4); sp_p($5); mcontext_check M_bool $3; to_Call_op M_none "until" [ prio_lo P_loose $3; lines_to_Block $6 $7 ] $1 $8} +| FOR PAREN expr_or_empty semi_colon expr_or_empty semi_colon expr_or_empty PAREN_END BRACKET lines BRACKET_END {sp_p($1); check_for($1); sp_n($2); sp_0($3); sp_p($5); sp_p($7); sp_0($8); sp_n($9); to_Call_op M_none "for" [ $3.any; $5.any; $7.any; lines_to_Block $10 $11 ] $1 $11} +| FOR SCALAR_IDENT PAREN expr PAREN_END BRACKET lines BRACKET_END cont { warn_rule [Warn_normalized_expressions] "don't use for without \"my\"ing the iteration variable"; sp_p($1); sp_0($4); sp_0_or_cr($5); sp_p($6); mcontext_check M_list $4; to_Call_op M_none "foreach" [ prio_lo P_loose $4; lines_to_Block $7 $8 ] $1 $9} +| FOR PAREN expr PAREN_END BRACKET lines BRACKET_END cont {sp_p($1); sp_n($2); sp_0($3); sp_0_or_cr($4); sp_p($5); mcontext_check M_list $3; check_for_foreach $1 $3; to_Call_op M_none "foreach" [ prio_lo P_loose $3; lines_to_Block $6 $7 ] $1 $8} +| for_my lines BRACKET_END cont { to_Call_op M_none "foreach my" ($1.any @ [ lines_to_Block $2 $3 ]) $1 $4} + +for_my: +| FOR MY_OUR SCALAR_IDENT PAREN expr PAREN_END BRACKET {sp_p($1); check_my($2); check_foreach($1); sp_n($4); sp_0($5); sp_0_or_cr($6); sp_p($7); new_esp M_none [ My_our($2.any, [I_scalar, snd $3.any], get_pos $3); prio_lo P_loose $5 ] $1 $7} + + +cont: /* Continue blocks */ +| {default_esp ()} +| CONTINUE BRACKET lines BRACKET_END {sp_p($1); sp_n($2); check_block_lines $3 $4; new_esp $3.mcontext () $1 $4} + +sideff: /* An expression which may have a side-effect */ +| expr { new_1esp $1.any.expr $1 } +| expr IF expr {sp_p($2); sp_p($3); mcontext_check M_bool $3; call_op_if_infix (prio_lo P_loose $1) (prio_lo P_loose $3) $1 $3} +| expr UNLESS expr {sp_p($2); sp_p($3); mcontext_check M_bool $3; call_op_unless_infix (prio_lo P_loose $1) (prio_lo P_loose $3) $1 $3} +| expr WHILE expr {sp_p($2); sp_p($3); mcontext_check M_bool $3; to_Call_op M_none "while infix" [ prio_lo P_loose $1 ; prio_lo P_loose $3 ] $1 $3} +| expr UNTIL expr {sp_p($2); sp_p($3); mcontext_check M_bool $3; to_Call_op M_none "until infix" [ prio_lo P_loose $1 ; prio_lo P_loose $3 ] $1 $3} +| expr FOR expr {sp_p($2); sp_p($3); mcontext_check M_list $3; check_foreach($2); to_Call_op M_none "for infix" [ prio_lo P_loose $1 ; prio_lo P_loose $3 ] $1 $3} + +decl: +| FORMAT BAREWORD ASSIGN {to_Call_op M_none "format" [Raw_string($2.any, get_pos $2) ; to_String false (new_1esp (fst $1.any) $1)] $1 $3} +| FORMAT ASSIGN {new_esp M_none Too_complex $1 $2} +| func_decl semi_colon {if snd $1.any = None then die_rule "there is no need to pre-declare in Perl!" else (warn_rule [Warn_normalized_expressions] "please don't use prototype pre-declaration" ; new_esp M_special Too_complex $1 $2) } +| func_decl BRACKET BRACKET_END {sp_n($2); sp_0_or_cr($3); let name, proto = $1.any in new_esp M_none (sub_declaration (name, proto) [] Real_sub_declaration) $1 $3} +| func_decl BRACKET lines BRACKET_END {sp_n($2); check_block_lines $3 $4; new_esp M_none (sub_declaration $1.any (fst $3.any) Real_sub_declaration) $1 $4} +| func_decl BRACKET BRACKET expr BRACKET_END BRACKET_END {sp_n($2); sp_p($3); sp_p($4); sp_p($5); check_block_expr false Undef $5 $6; new_esp M_none (sub_declaration $1.any [hash_ref $4] Real_sub_declaration) $1 $6} +| func_decl BRACKET BRACKET expr BRACKET_END semi_colon BRACKET_END {sp_n($2); sp_p($3); sp_p($4); sp_p($5); check_block_expr true Semi_colon $6 $7; new_esp M_none (sub_declaration $1.any [hash_ref $4; Semi_colon] Real_sub_declaration) $1 $7} +| PACKAGE word semi_colon {sp_0_or_cr($1); sp_1($2); new_esp M_none (Package $2.any) $1 $3} +| BEGIN BRACKET lines BRACKET_END {sp_0_or_cr($1); sp_1($2); new_esp M_none (Sub_declaration(Ident(None, "BEGIN", get_pos $1), None, lines_to_Block $3 $4, Glob_assign)) $1 $4} +| END BRACKET lines BRACKET_END {sp_0_or_cr($1); sp_1($2); new_esp M_none (Sub_declaration(Ident(None, "END", get_pos $1), None, lines_to_Block $3 $4, Glob_assign)) $1 $4} +| use {$1} + +use: +| use_word listexpr semi_colon {sp_n($2); new_esp M_none (Use($1.any, $2.any.expr)) $1 $3} +| use_revision word_paren PAREN listexpr PAREN_END {sp_0($4); sp_0_or_cr($5); new_esp M_none (Use($2.any, $4.any.expr)) $1 $5} + +use_word: +| use_revision word comma {new_esp M_none $2.any $1 $3} +| use_revision word {new_esp M_none $2.any $1 $2} +| use_revision {new_1esp Undef $1 } + +use_revision: +| USE REVISION comma {$1} +| USE REVISION {$1} +| USE {$1} + +func_decl: +| SUB word { new_esp M_none ($2.any, None) $1 $2} +| SUB ONE_SCALAR_PARA { new_esp M_none (Ident(None, $2.any, get_pos $2), None) $1 $2} +| SUB BAREWORD_PAREN PAREN PAREN_END { warn_rule [Warn_white_space] "remove carriage return between \"sub\" and the function name"; new_esp M_none (Ident(None, $2.any, get_pos $2), Some "") $1 $4 } +| FUNC_DECL_WITH_PROTO {new_1esp (Ident(fst3 $1.any, snd3 $1.any, get_pos $1), Some (ter3 $1.any)) $1 } + +listexpr: /* Basic list expressions */ +| %prec PREC_LOW { default_pesp P_tok []} +| argexpr %prec PREC_LOW {$1} + +expr: /* Ordinary expressions; logical combinations */ +| expr AND expr {sp_p($2); sp_p($3); mcontext_check M_bool $1; mcontext_check_none "value should be dropped" [$3.any.expr] $3; to_Call_op_ M_none P_and "and" [ prio_lo P_and $1; prio_lo_after P_and $3 ] $1 $3} +| expr OR expr {sp_p($2); sp_p($3); mcontext_check M_bool $1; mcontext_check_none "value should be dropped" [$3.any.expr] $3; to_Call_op_ M_none P_or "or" [ prio_lo P_or $1; prio_lo_after P_or $3 ] $1 $3} +| argexpr %prec PREC_LOW { new_1pesp $1.any.priority (List $1.any.expr) $1 } + +argexpr: /* Expressions are a list of terms joined by commas */ +| argexpr comma { new_pesp $1.mcontext P_comma $1.any.expr $1 $2} +| bareword RIGHT_ARROW term {if not_simple ($3.any.expr) then sp_p($3); new_pesp (mtuple_context_concat M_string $3.mcontext) P_comma (followed_by_comma [$1.any] false @ [$3.any.expr]) $1 $3} +| bareword RIGHT_ARROW BRACKET expr BRACKET_END {sp_p($3); sp_p($5); new_pesp (mtuple_context_concat M_string (M_ref M_hash)) P_comma (followed_by_comma [$1.any] false @ [ hash_ref $4 ]) $1 $5} +| argexpr comma term {prio_lo_check P_comma $1.any.priority $1.pos (last $1.any.expr); if not_simple ($3.any.expr) then sp_p($3); new_pesp (mtuple_context_concat $1.mcontext $3.mcontext) P_comma (followed_by_comma $1.any.expr $2.any @ [$3.any.expr]) $1 $3} +| argexpr comma BRACKET expr BRACKET_END {sp_p($3); sp_p($5); new_pesp (mtuple_context_concat $1.mcontext (M_ref M_hash)) P_comma (followed_by_comma $1.any.expr $2.any @ [ hash_ref $4 ]) $1 $5} +| term %prec PREC_LOW { new_1pesp $1.any.priority [$1.any.expr] $1 } + +/********************************************************************************/ +term: +| term + COMPARE_OP_STR term {sp_p $2; symops P_cmp M_string M_bool $2.any $1 $2 $3} +| term COMPARE_OP term {sp_p $2; symops P_cmp M_float M_bool $2.any $1 $2 $3} +| term LT term {sp_p $2; symops P_cmp M_float M_bool "<" $1 $2 $3} +| term GT term {sp_p $2; symops P_cmp M_float M_bool ">" $1 $2 $3} +| term EQ_OP term {sp_p $2; symops P_eq M_float M_bool $2.any $1 $2 $3} +| term EQ_OP_STR term {sp_p $2; symops P_eq M_string M_bool $2.any $1 $2 $3} + +| term BIT_AND term {sp_p $2; symops P_bit M_int M_int "&" $1 $2 $3} +| term BIT_OR term { symops P_bit M_int M_int "|" $1 $2 $3} +| term BIT_XOR term {sp_p $2; symops P_bit M_int M_int "^" $1 $2 $3} + +| term POWER term { symops P_tight M_float (mcontext_float_or_int [$1.mcontext; $3.mcontext]) "**" $1 $2 $3} +| term PLUS term { symops P_add M_float (mcontext_float_or_int [$1.mcontext; $3.mcontext]) $2.any $1 $2 $3} +| term CONCAT term {sp_p $2; symops P_add M_string M_string "." $1 $2 $3} +| term BIT_SHIFT term { symops (P_paren_wanted P_tight) M_int M_int $2.any $1 $2 $3} +| term XOR term {sp_p $2; symops (P_paren_wanted P_expr) M_bool M_bool "xor" $1 $2 $3} +| term DOTDOT term { symops (P_paren_wanted P_expr) M_unknown_scalar M_string $2.any $1 $2 $3} + +| term AND_TIGHT term {sp_p $2; sp_same $2 $3; mcontext_check M_bool $1; let pri = P_tight_and in to_Call_op_ (mcontext_to_scalar $3.mcontext) pri "&&" [prio_lo pri $1; prio_lo_after pri $3] $1 $3} +| term OR_TIGHT term {sp_p $2; sp_same $2 $3; mcontext_check M_bool $1; let pri = P_tight_or in to_Call_op_ (mcontext_to_scalar (mcontext_merge $1.mcontext $3.mcontext)) pri "||" [prio_lo pri $1; prio_lo_after pri $3] $1 $3} + +| term MULT term {sp_same $2 $3; let pri = P_mul in to_Call_op_ (mcontext_float_or_int [$1.mcontext; $3.mcontext]) pri $2.any [prio_lo_concat $1; prio_lo_after pri $3] $1 $3} +| term MULT_L_STR term {sp_same $2 $3; mcontext_check M_int $3; let pri = P_mul in to_Call_op_ (if mcontext_lower $1.mcontext M_string then M_string else M_list) pri "x" + [prio_lo_concat $1; prio_lo_after pri $3] $1 $3} + +| term ASSIGN term {sp_same $2 $3; let pri = P_assign in to_Call_assign_op_ (mcontext_op_assign $1 $3) pri $2.any ($1.any.expr) (prio_lo_after pri $3) $1 $3} + +| term ASSIGN BRACKET expr_bracket_end {sp_p($2); sp_p($3); sp_p($4); to_Call_assign_op_ (M_mixed [M_ref M_hash; M_none]) P_assign $2.any (prio_lo P_assign $1) $4.any $1 $4} +| term AND_TIGHT BRACKET expr_bracket_end {sp_p($2); sp_p($3); sp_p($4); to_Call_op_ M_bool P_tight_and "&&" [prio_lo P_assign $1; $4.any] $1 $4} +| term OR_TIGHT BRACKET expr_bracket_end {sp_p($2); sp_p($3); sp_p($4); to_Call_op_ M_bool P_tight_or "||" [prio_lo P_assign $1; $4.any] $1 $4} + + +| term PATTERN_MATCH PATTERN {sp_n($2); sp_p($3); check_unneeded_var_dollar_ ($1); mcontext_check M_string $1; to_Call_op_ M_array P_expr "m//" ($1.any.expr :: from_PATTERN $3) $1 $3} +| term PATTERN_MATCH_NOT PATTERN {sp_n($2); sp_p($3); check_unneeded_var_dollar_not($1); mcontext_check M_string $1; to_Call_op_ M_int P_expr "!m//" ($1.any.expr :: from_PATTERN $3) $1 $3} +| term PATTERN_MATCH PATTERN_SUBST {sp_n($2); sp_p($3); check_unneeded_var_dollar_s ($1); to_Call_op_ (M_mixed[M_none; M_int]) P_expr "s///" ($1.any.expr :: from_PATTERN_SUBST $3) $1 $3} +| term PATTERN_MATCH_NOT PATTERN_SUBST {die_with_rawpos $2.pos "use =~ instead of !~ and negate the return value"} + +| term PATTERN_MATCH QR_PATTERN {warn [Warn_traps] $3.pos "use m/.../ or /.../ instead of qr/.../ when you do a pattern matching"; to_Call_op_ M_array P_expr "m//" ($1.any.expr :: from_PATTERN $3) $1 $3} +| term PATTERN_MATCH_NOT QR_PATTERN {warn [Warn_traps] $3.pos "use m/.../ or /.../ instead of qr/.../ when you do a pattern matching"; to_Call_op_ M_int P_expr "!m//" ($1.any.expr :: from_PATTERN $3) $1 $3} +| term PATTERN_MATCH scalar { new_pesp M_array P_expr (Call(Too_complex, [$1.any.expr ; $3.any ])) $1 $3} +| term PATTERN_MATCH_NOT scalar { new_pesp M_int P_expr (Call(Too_complex, [$1.any.expr ; $3.any ])) $1 $3} + +| term PATTERN_MATCH RAW_STRING {warn [Warn_complex_expressions] $3.pos "use a regexp, not a string"; to_Call_op_ M_array P_expr "m//" [ $1.any.expr; to_Raw_string $3 ] $1 $3} +| term PATTERN_MATCH_NOT RAW_STRING {warn [Warn_complex_expressions] $3.pos "use a regexp, not a string"; to_Call_op_ M_int P_expr "!m//" [ $1.any.expr; to_Raw_string $3 ] $1 $3} +| term PATTERN_MATCH STRING {warn [Warn_complex_expressions] $3.pos "use a regexp, not a string"; to_Call_op_ M_array P_expr "m//" [ $1.any.expr; to_String false $3 ] $1 $3} +| term PATTERN_MATCH_NOT STRING {warn [Warn_complex_expressions] $3.pos "use a regexp, not a string"; to_Call_op_ M_int P_expr "!m//" [ $1.any.expr; to_String false $3 ] $1 $3} + + +| term QUESTION_MARK term COLON term {sp_p($2); sp_p($3); sp_p($4); sp_p($5); mcontext_check M_bool $1; to_Call_op_ (mcontext_merge $3.mcontext $5.mcontext) P_ternary "?:" (check_ternary_paras(prio_lo P_ternary $1, prio_lo_after P_ternary $3, prio_lo_after P_ternary $5)) $1 $5} +| term QUESTION_MARK term COLON BRACKET expr BRACKET_END {sp_p($2); sp_p($3); sp_p($4); sp_p($5); sp_p($6); sp_p($7); mcontext_check M_bool $1; to_Call_op_ (mcontext_merge $3.mcontext (M_ref M_hash)) P_ternary "?:" (check_ternary_paras(prio_lo P_ternary $1, prio_lo_after P_ternary $3, hash_ref $6)) $1 $7} +| term QUESTION_MARK BRACKET expr BRACKET_END COLON term {sp_p($2); sp_p($3); sp_p($4); sp_p($5); sp_p($6); sp_p($7); mcontext_check M_bool $1; to_Call_op_ (mcontext_merge $7.mcontext (M_ref M_hash)) P_ternary "?:" (check_ternary_paras(prio_lo P_ternary $1, hash_ref $4, prio_lo_after P_ternary $7)) $1 $7} +| term QUESTION_MARK BRACKET expr BRACKET_END COLON BRACKET expr BRACKET_END {sp_p($2); sp_p($3); sp_p($4); sp_p($5); sp_p($6); sp_p($7); sp_p($8); sp_p($9); mcontext_check M_bool $1; to_Call_op_ (M_ref M_hash) P_ternary "?:" (check_ternary_paras(prio_lo P_ternary $1, hash_ref $4, hash_ref $8)) $1 $9} + +/* Unary operators and terms */ +| PLUS term %prec UNARY_MINUS { + sp_0($2); + match $1.any with + | "+" -> + warn_rule [Warn_normalized_expressions] "don't use unary +" ; + to_Call_op_ (mcontext_float_or_int [$2.mcontext]) P_tight "+ unary" [$2.any.expr] $1 $2 + | "-" -> + (match $2.any.expr with + | Ident(_, _, pos) when $2.spaces = Space_0 -> + let s = "-" ^ string_of_fromparser $2.any.expr in + warn_rule [Warn_complex_expressions] (Printf.sprintf "don't use %s, use '%s' instead" s s); + new_pesp M_string P_tok (Raw_string(s, pos)) $1 $2 + | _ -> to_Call_op_ (mcontext_float_or_int [$2.mcontext]) P_tight "- unary" [$2.any.expr] $1 $2) + | _ -> die_rule "syntax error" +} +| TIGHT_NOT term {check_negatable_expr $2; mcontext_check M_bool $2; to_Call_op_ M_bool P_tight "not" [$2.any.expr] $1 $2} +| BIT_NEG term { mcontext_check M_int $2; to_Call_op_ M_int P_expr "~" [$2.any.expr] $1 $2} +| INCR term {sp_0($2); mcontext_check M_int $2; to_Call_op_ (M_mixed [M_int ; M_none]) P_tight "++" [$2.any.expr] $1 $2} +| DECR term {sp_0($2); mcontext_check M_int $2; to_Call_op_ (M_mixed [M_int ; M_none]) P_tight "--" [$2.any.expr] $1 $2} +| term INCR {sp_0($2); mcontext_check M_int $1; to_Call_op_ (M_mixed [M_int ; M_none]) P_tight "++ post" [$1.any.expr] $1 $2} +| term DECR {sp_0($2); mcontext_check M_int $1; to_Call_op_ (M_mixed [M_int ; M_none]) P_tight "-- post" [$1.any.expr] $1 $2} +| NOT argexpr {warn_rule [Warn_normalized_expressions] "don't use \"not\", use \"!\" instead"; mcontext_check_unop_l M_bool $2; to_Call_op_ M_bool P_and "not" ($2.any.expr) $1 $2} + +/* Constructors for anonymous data */ + +| ARRAYREF ARRAYREF_END {sp_0($2); new_pesp (M_ref M_array) P_expr (Ref(I_array, List[])) $1 $2} +| arrayref_start ARRAYREF_END {(if $1.any = [] then sp_0 else sp_p)($2) ; new_pesp (M_ref M_array) P_expr (Ref(I_array, List $1.any)) $1 $2} +| arrayref_start expr ARRAYREF_END {sp_same $2 $3; new_pesp (M_ref M_array) P_expr (Ref(I_array, List($1.any @ [$2.any.expr]))) $1 $3} +| arrayref_start BRACKET expr BRACKET_END ARRAYREF_END {sp_same $2 $5; new_pesp (M_ref M_array) P_expr (Ref(I_array, List($1.any @ [hash_ref $3]))) $1 $5} + +| BRACKET BRACKET_END {new_pesp (M_ref M_hash) P_expr (Ref(I_hash, List [])) $1 $2} /* empty hash */ +| BRACKET_HASHREF expr BRACKET_END %prec PREC_HIGH {sp_p($3); new_pesp (M_ref M_hash) P_expr (hash_ref $2) $1 $3} /* { foo => "Bar" } */ +| SUB BRACKET BRACKET_END %prec PREC_HIGH {sp_n($2); sp_0($3); new_pesp (M_ref M_sub) P_expr (Anonymous_sub(None, Block [], pos_range $2 $3)) $1 $3} +| SUB_WITH_PROTO BRACKET BRACKET_END %prec PREC_HIGH {sp_n($2); sp_0($3); new_pesp (M_ref M_sub) P_expr (Anonymous_sub(Some $1.any, Block [], pos_range $2 $3)) $1 $3} +| SUB BRACKET lines BRACKET_END %prec PREC_HIGH {sp_n($2); new_pesp (M_ref M_sub) P_expr (anonymous_sub None $3 $4) $1 $4} +| SUB_WITH_PROTO BRACKET lines BRACKET_END %prec PREC_HIGH {sp_n($2); new_pesp (M_ref M_sub) P_expr (anonymous_sub (Some $1.any) $3 $4) $1 $4} + +| termdo {new_1pesp P_tok $1.any $1} +| REF term {new_pesp (M_ref $2.mcontext) P_expr (Ref(I_scalar, remove_call_with_same_para_special $2.any.expr)) $1 $2} /* \$x, \@y, \%z */ +| my_our %prec UNIOP {new_1pesp P_expr $1.any $1} +| LOCAL term %prec UNIOP {sp_n($2); new_pesp (M_mixed [ $2.mcontext ; M_none ]) P_expr (to_Local $2) $1 $2} + +| parenthesized {new_1pesp $1.any.priority (List $1.any.expr) $1} /* (1, 2) */ +| parenthesized arrayref {sp_0($2); let is_slice = not (is_only_one_in_List $2.any) in new_pesp (if is_slice then M_list else M_unknown_scalar) P_tok (to_Deref_with(I_array, (if is_slice then I_array else I_scalar), List $1.any.expr, List $2.any)) $1 $2} /* list indexing or slicing */ + +| variable { + let e = + match $1.any with + | Deref(I_func, Ident _) -> + call_with_same_para_special $1.any (* not the same as f(@_) *) + | e -> e in + new_1pesp P_tok e $1 + } + +| subscripted {new_1pesp P_tok $1.any $1} + +| array arrayref {new_pesp M_list P_expr (to_Deref_with(I_array, I_array, from_array $1, List $2.any)) $1 $2} /* array slice: @array[vals] */ +| array BRACKET expr BRACKET_END {sp_0($2); sp_0($3); sp_0($4); new_pesp M_list P_expr (to_Deref_with(I_hash, I_array, from_array $1, $3.any.expr)) $1 $4} /* hash slice: @hash{@keys} */ + +/* function_calls */ +| ONE_SCALAR_PARA RAW_STRING {call_one_scalar_para P_uniop $1 [to_Raw_string $2] $1 $2} +| ONE_SCALAR_PARA STRING {call_one_scalar_para P_uniop $1 [to_String true $2] $1 $2} +| ONE_SCALAR_PARA variable {call_one_scalar_para P_uniop $1 [$2.any] $1 $2} +| ONE_SCALAR_PARA restricted_subscripted {call_one_scalar_para P_uniop $1 [$2.any] $1 $2} +| ONE_SCALAR_PARA parenthesized {call_one_scalar_para P_tok $1 $2.any.expr $1 $2} +| ONE_SCALAR_PARA BRACKET lines BRACKET_END {sp_n($2); new_pesp M_unknown P_uniop (call(Deref(I_func, Ident(None, $1.any, raw_pos2pos $1.pos)), [anonymous_sub None $3 $4])) $1 $4} /* eval { foo } */ +| ONE_SCALAR_PARA diamond {call_one_scalar_para P_uniop $1 [$2.any] $1 $2} +| ONE_SCALAR_PARA %prec PREC_LOW {call_one_scalar_para P_tok $1 [] $1 $1} +| ONE_SCALAR_PARA word argexpr {check_parenthesized_first_argexpr_with_Ident $2.any $3; call_one_scalar_para P_uniop $1 [call(Deref(I_func, $2.any), $3.any.expr)] $1 $3} /* ref foo $a, $b */ +| ONE_SCALAR_PARA hash PKG_SCOPE {sp_0($3); call_one_scalar_para P_uniop $1 [ Call(Too_complex, [$2.any]) ] $1 $3} /* keys %main:: */ +| ONE_SCALAR_PARA BAREWORD {if $2.any = "_" && $1.any.[0] = '-' then new_pesp M_bool P_uniop Too_complex $1 $2 else die_rule "syntax error"} /* -e "foo" && -f _ */ + +| ONE_SCALAR_PARA array arrayref {call_one_scalar_para P_uniop $1 [to_Deref_with(I_array, I_array, from_array $2, List $3.any)] $1 $3} /* array slice: @array[vals] */ +| ONE_SCALAR_PARA array BRACKET expr BRACKET_END {sp_0($3); sp_0($4); sp_0($5); call_one_scalar_para P_uniop $1 [to_Deref_with(I_hash, I_array, from_array $2, $4.any.expr)] $1 $5} /* hash slice: @hash{@keys} */ + +| func parenthesized {sp_0($2); call_func $1 $2} /* &foo(@args) */ +| word argexpr {check_parenthesized_first_argexpr_with_Ident $1.any $2; call_no_paren $1 $2} /* foo $a, $b */ +| word BRACKET lines BRACKET_END MULT { die_with_rawpos $5.pos "I can't handle this correctly, please add parentheses" } +| word BRACKET lines BRACKET_END COMMA argexpr %prec LSTOP {sp_n($2); new_pesp M_unknown P_call_no_paren (call(Deref(I_func, $1.any), Ref(I_hash, List (fst $3.any)) :: $6.any.expr)) $1 $6} /* bless { foo }, $bar */ +| word_paren parenthesized {sp_0($2); call_with_paren $1 $2} /* foo(@args) */ +| word BRACKET lines BRACKET_END listexpr %prec LSTOP {sp_n($2); call_and_context(Deref(I_func, $1.any), anonymous_sub None $3 $4 :: $5.any.expr) false (if $5.any.expr = [] then P_tok else P_call_no_paren) $1 $5} /* map { foo } @bar */ +| word BRACKET BRACKET expr BRACKET_END BRACKET_END listexpr %prec LSTOP {sp_n($2); sp_p($3); sp_p($4); sp_p($5); sp_p($6); new_pesp M_unknown (if $7.any.expr = [] then P_tok else P_call_no_paren) (call(Deref(I_func, $1.any), anonymous_sub None (new_esp (M_ref M_hash) ([ hash_ref $4 ], false) $3 $5) $6 :: $7.any.expr)) $1 $7} /* map { { foo } } @bar */ +| word BRACKET BRACKET expr BRACKET_END semi_colon BRACKET_END listexpr %prec LSTOP {sp_n($2); sp_p($3); sp_p($4); sp_p($5); sp_p($7); new_pesp M_unknown (if $8.any.expr = [] then P_tok else P_call_no_paren) (call(Deref(I_func, $1.any), anonymous_sub None (new_esp (M_ref M_hash) ([ hash_ref $4; Semi_colon ], true) $3 $6) $7 :: $8.any.expr)) $1 $8} /* map { { foo }; } @bar */ + +| term ARROW word_or_scalar parenthesized {sp_0($2); sp_0($3); sp_0($4); if $4.any.expr = [] then warn [Warn_suggest_simpler] $4.pos "remove these unneeded parentheses"; new_pesp M_unknown P_tok (to_Method_call($1.any.expr, $3.any, $4.any.expr)) $1 $4} /* $foo->bar(list) */ +| term ARROW word_or_scalar {sp_0($2); sp_0($3); new_pesp M_unknown P_tok (to_Method_call($1.any.expr, $3.any, [])) $1 $3} /* $foo->bar */ + +| NEW word { sp_n($2); new_pesp (M_ref M_unknown) P_expr (to_Method_call ($2.any, Ident(None, "new", get_pos $1), [])) $1 $2} /* new Class */ +| NEW word_paren parenthesized { sp_n($2); sp_0($3); new_pesp (M_ref M_unknown) P_expr (to_Method_call($2.any, Ident(None, "new", get_pos $1), $3.any.expr)) $1 $3} /* new Class(...) */ +| NEW word terminal { die_rule "you must parenthesize parameters: \"new Class(...)\" instead of \"new Class ...\"" } +| NEW word variable { die_rule "you must parenthesize parameters: \"new Class(...)\" instead of \"new Class ...\"" } + +| PRINT { to_Call_op_ (M_mixed [M_int; M_none]) P_call_no_paren $1.any (var_STDOUT :: [ var_dollar_ (get_pos $1) ]) $1 $1} +| PRINT argexpr {check_parenthesized_first_argexpr $1.any $2; to_Call_op_ (M_mixed [M_int; M_none]) P_call_no_paren $1.any (var_STDOUT :: $2.any.expr) $1 $2} +| PRINT_TO_SCALAR { to_Call_op_ (M_mixed [M_int; M_none]) P_call_no_paren (fst $1.any) (var_STDOUT :: [ Deref(I_scalar, Ident(None, snd $1.any, get_pos $1)) ]) $1 $1} +| PRINT_TO_SCALAR argexpr { to_Call_op_ (M_mixed [M_int; M_none]) P_call_no_paren (fst $1.any) (Deref(I_scalar, Ident(None, snd $1.any, get_pos $1)) :: $2.any.expr) $1 $2} +| PRINT_TO_STAR { to_Call_op_ (M_mixed [M_int; M_none]) P_call_no_paren (fst $1.any) (Deref(I_star, Ident(None, snd $1.any, get_pos $1)) :: [ var_dollar_ (get_pos $1) ]) $1 $1} +| PRINT_TO_STAR argexpr { to_Call_op_ (M_mixed [M_int; M_none]) P_call_no_paren (fst $1.any) (Deref(I_star, Ident(None, snd $1.any, get_pos $1)) :: $2.any.expr) $1 $2} + +| hash PKG_SCOPE {sp_0($2); new_pesp M_hash P_tok (Call(Too_complex, [$1.any])) $1 $2} /* %main:: */ + +| terminal {$1} + +expr_bracket_end: +| expr BRACKET_END { sp_p($2); new_esp (M_ref M_hash) (hash_ref $1) $1 $2 } +| expr BRACKET_END ARROW bracket_subscript {sp_p($2); sp_0($3); new_esp M_unknown_scalar (to_Deref_with(I_hash, I_scalar, hash_ref $1, $4.any)) $1 $4} /* { foo }->{Bar} */ + +terminal: +| word {word_alone $1} +| NUM {new_1pesp P_tok (Num($1.any, get_pos $1)) $1} +| STRING {new_1pesp P_tok (to_String true $1) $1} +| RAW_STRING {new_1pesp P_tok (to_Raw_string $1) $1} +| REVISION {new_1pesp P_tok (to_Raw_string $1) $1} +| COMMAND_STRING {to_Call_op_ (M_mixed[M_string; M_list]) P_tok "``" [to_String false $1] $1 $1} +| QUOTEWORDS {let l = List.map (fun s -> Raw_string(s, raw_pos2pos $1.pos)) (words $1.any) in new_pesp (M_tuple (repeat M_string (List.length l))) P_tok (List [ List l ]) $1 $1} +| HERE_DOC {new_1pesp P_tok (to_String false (new_1esp (fst $1.any) $1)) $1 } +| RAW_HERE_DOC {new_1pesp P_tok (Raw_string(fst $1.any, raw_pos2pos (snd $1.any))) $1} +| QR_PATTERN {to_Call_op_ M_string P_tok "qr//" (from_PATTERN $1) $1 $1} +| PATTERN {to_Call_op_ M_array P_expr "m//" (var_dollar_ (get_pos $1) :: from_PATTERN $1) $1 $1} +| PATTERN_SUBST {to_Call_op_ (M_mixed[M_none; M_int]) P_expr "s///" (var_dollar_ (get_pos $1) :: from_PATTERN_SUBST $1) $1 $1} +| diamond {new_1pesp P_expr $1.any $1} + +diamond: +| LT GT {sp_0($2); to_Call_op (M_mixed[M_string; M_list]) "<>" [] $1 $2} +| LT term GT {sp_0($2); sp_0($3); to_Call_op (M_mixed[M_string; M_list]) "<>" [$2.any.expr] $1 $3} + +subscripted: /* Some kind of subscripted expression */ +| variable PKG_SCOPE bracket_subscript {sp_0($2); sp_0($3); new_esp M_unknown (Call(Too_complex, [$3.any])) $1 $3} /* $foo::{something} */ +| scalar bracket_subscript {sp_0($2); check_scalar_subscripted $1; new_esp M_unknown_scalar (to_Deref_with(I_hash , I_scalar, from_scalar $1, $2.any )) $1 $2} /* $foo{bar} */ +| scalar arrayref {sp_0($2); check_scalar_subscripted $1; new_esp M_unknown_scalar (to_Deref_with(I_array, I_scalar, from_scalar $1, only_one_array_ref $2)) $1 $2} /* $array[$element] */ +| term ARROW simple_subscript {sp_0($2); sp_0($3); new_esp $3.mcontext (to_Deref_with_arrow $2 (fst $3.any, I_scalar, $1.any.expr, snd $3.any)) $1 $3} +| subscripted simple_subscript {sp_0($2); new_esp $2.mcontext (to_Deref_with(fst $2.any, I_scalar, $1.any, snd $2.any)) $1 $2} + +restricted_subscripted: /* Some kind of subscripted expression */ +| variable PKG_SCOPE bracket_subscript {sp_0($2); sp_0($3); new_esp M_unknown (Call(Too_complex, [$3.any])) $1 $3} /* $foo::{something} */ +| word_paren parenthesized {new_esp M_unknown (call(Deref(I_func, $1.any), $2.any.expr)) $1 $2} +| scalar bracket_subscript {sp_0($2); check_scalar_subscripted $1; new_esp M_unknown_scalar (to_Deref_with(I_hash , I_scalar, from_scalar $1, $2.any )) $1 $2} /* $foo{bar} */ +| scalar arrayref {sp_0($2); check_scalar_subscripted $1; new_esp M_unknown_scalar (to_Deref_with(I_array, I_scalar, from_scalar $1, only_one_array_ref $2)) $1 $2} /* $array[$element] */ +| scalar ARROW simple_subscript {sp_0($2); sp_0($3); new_esp $3.mcontext (to_Deref_with_arrow $2 (fst $3.any, I_scalar, $1.any, snd $3.any)) $1 $3} +| restricted_subscripted ARROW simple_subscript {sp_0($2); sp_0($3); new_esp $3.mcontext (to_Deref_with_arrow $2 (fst $3.any, I_scalar, $1.any, snd $3.any)) $1 $3} /* somehref->{bar} */ +| restricted_subscripted simple_subscript {sp_0($2); new_esp $2.mcontext (to_Deref_with(fst $2.any, I_scalar, $1.any, snd $2.any)) $1 $2} + +| restricted_subscripted ARROW word_or_scalar parenthesized {sp_0($2); sp_0($3); sp_0($4); if $4.any.expr = [] then warn [Warn_suggest_simpler] $4.pos "remove these unneeded parentheses"; new_esp M_unknown (to_Method_call($1.any, $3.any, $4.any.expr)) $1 $4} /* $foo->bar(list) */ +| restricted_subscripted ARROW word_or_scalar {sp_0($2); sp_0($3); new_esp M_unknown (to_Method_call($1.any, $3.any, [])) $1 $3} /* $foo->bar */ + +simple_subscript: +| bracket_subscript {new_esp M_unknown_scalar (I_hash, $1.any) $1 $1} +| arrayref {new_esp M_unknown_scalar (I_array, only_one_array_ref $1) $1 $1} +| parenthesized {new_esp M_unknown (I_func , List($1.any.expr)) $1 $1} + + +arrayref: +| arrayref_start ARRAYREF_END {sp_0($2); new_esp (M_ref M_array) $1.any $1 $2} +| arrayref_start expr ARRAYREF_END {sp_0($3); new_esp (M_ref M_array) ($1.any @ [$2.any.expr]) $1 $3} +| arrayref_start BRACKET expr BRACKET_END ARRAYREF_END {sp_p($2); sp_p($4); sp_0($5); new_esp (M_ref M_hash) ($1.any @ [hash_ref $3]) $1 $5} +parenthesized: +| parenthesized_start PAREN_END {sp_0_or_cr($2); new_pesp (if $1.any = [] then M_list else $1.mcontext) (if $1.any = [] then P_tok else P_paren P_comma) $1.any $1 $2} +| parenthesized_start expr PAREN_END {sp_0_or_cr($3); (if $1.any = [] then sp_0_or_cr else sp_p)($2); new_pesp (if $1.any = [] then $2.mcontext else M_list) (P_paren (if $1.any = [] then $2.any.priority else P_comma)) ($1.any @ [(if $1.any = [] then prio_lo P_loose else prio_lo_after P_comma) $2]) $1 $3} +| parenthesized_start BRACKET expr BRACKET_END PAREN_END {sp_p($4); sp_0_or_cr($5); new_pesp (if $1.any = [] then M_ref M_hash else M_list) (P_paren (if $1.any = [] then P_expr else P_comma)) ($1.any @ [hash_ref $3]) $1 $5} + +arrayref_start: +| ARRAYREF {new_1esp [] $1 } +| arrayref_start BRACKET expr BRACKET_END comma {sp_p($2); sp_p($3); sp_p($4); new_esp M_special ($1.any @ [hash_ref $3]) $1 $5} +parenthesized_start: +| PAREN {new_1esp [] $1 } +| parenthesized_start BRACKET expr BRACKET_END comma {(if $1.any = [] then sp_0_or_cr else sp_p)($2); sp_p($3); sp_p($4); new_esp (M_ref M_hash) ($1.any @ [hash_ref $3]) $1 $5} + +my_our: /* Things that can be "my"'d */ +| my_our_paren PAREN_END {sp_0($2); new_esp (M_mixed [ $1.mcontext ; M_none ]) (My_our(sndfst $1.any, snd $1.any, get_pos $1)) $1 $2} +| my_our_paren SCALAR_IDENT PAREN_END {check_my_our_paren $1 $2; new_esp (M_mixed [ mtuple_context_concat $1.mcontext M_unknown_scalar; M_none ]) (My_our(sndfst $1.any, snd $1.any @ [I_scalar, snd $2.any], pos_range $1 $3)) $1 $3} +| my_our_paren HASH_IDENT PAREN_END {check_my_our_paren $1 $2; new_esp (M_mixed [ M_list ; M_none ]) (My_our(sndfst $1.any, snd $1.any @ [I_hash, snd $2.any], pos_range $1 $3)) $1 $3} +| my_our_paren ARRAY_IDENT PAREN_END {check_my_our_paren $1 $2; new_esp (M_mixed [ M_list ; M_none ]) (My_our(sndfst $1.any, snd $1.any @ [I_array, snd $2.any], pos_range $1 $3)) $1 $3} +| MY_OUR SCALAR_IDENT {new_esp (M_mixed [M_unknown_scalar; M_none]) (My_our($1.any, [I_scalar, snd $2.any], get_pos $2)) $1 $2} +| MY_OUR HASH_IDENT {new_esp (M_mixed [M_hash ; M_none]) (My_our($1.any, [I_hash, snd $2.any], get_pos $2)) $1 $2} +| MY_OUR ARRAY_IDENT {new_esp (M_mixed [M_array ; M_none]) (My_our($1.any, [I_array, snd $2.any], get_pos $2)) $1 $2} + +my_our_paren: +| MY_OUR PAREN {sp_1($2); new_esp (M_tuple []) ((true, $1.any), []) $1 $2} +| my_our_paren comma {if fstfst $1.any then die_rule "syntax error"; new_esp $1.mcontext ((true, sndfst $1.any), snd $1.any) $1 $2} +| my_our_paren BAREWORD {check_my_our_paren $1 $2; if $2.any <> "undef" then die_rule "scalar expected"; new_esp (mtuple_context_concat $1.mcontext M_none) ((false, sndfst $1.any), snd $1.any @ [I_raw, $2.any]) $1 $2} +| my_our_paren SCALAR_IDENT {check_my_our_paren $1 $2; new_esp (mtuple_context_concat $1.mcontext M_unknown_scalar) ((false, sndfst $1.any), snd $1.any @ [I_scalar, snd $2.any]) $1 $2} +| my_our_paren HASH_IDENT {check_my_our_paren $1 $2; new_esp M_list ((false, sndfst $1.any), snd $1.any @ [I_hash, snd $2.any]) $1 $2} +| my_our_paren ARRAY_IDENT {check_my_our_paren $1 $2; new_esp M_list ((false, sndfst $1.any), snd $1.any @ [I_array, snd $2.any]) $1 $2} + +termdo: /* Things called with "do" */ +| DO term %prec UNIOP { die_rule "\"do EXPR\" not allowed" } /* do $filename */ +| DO BRACKET lines BRACKET_END %prec PREC_HIGH {sp_n($2); new_esp $3.mcontext (lines_to_Block $3 $4) $1 $4} /* do { code */ + +bracket_subscript: +| BRACKET expr BRACKET_END {sp_0($1); sp_same $2 $3; check_hash_subscript $2; new_esp M_special (only_one_in_List $2) $1 $3} +| COMPACT_HASH_SUBSCRIPT {sp_0($1); new_1esp (to_Raw_string $1) $1 } + +variable: +| scalar {$1} +| star {$1} +| hash {$1} +| array {$1} +| arraylen {$1} /* $#x, $#{ something } */ +| func {$1} /* &foo; */ + +word: +| bareword { $1 } +| RAW_IDENT { new_1esp (to_Ident $1) $1 } + +comma: COMMA {new_esp M_special true $1 $1} | RIGHT_ARROW {sp_p($1); new_1esp false $1 } + +semi_colon: SEMI_COLON {sp_0($1); $1} + +word_or_scalar: +| word {$1} +| scalar {$1} +| word_paren {$1} +| MULT_L_STR { new_1esp (Ident(None, "x", get_pos $1)) $1 } +| FOR { new_1esp (Ident(None, $1.any, get_pos $1)) $1 } +| ONE_SCALAR_PARA { new_1esp (Ident(None, $1.any, get_pos $1)) $1 } + +bareword: +| NEW { new_1esp (Ident(None, "new", get_pos $1)) $1 } +| BAREWORD { new_1esp (Ident(None, $1.any, get_pos $1)) $1 } + +word_paren: +| BAREWORD_PAREN { new_1esp (Ident(None, $1.any, get_pos $1)) $1 } +| RAW_IDENT_PAREN { new_1esp (to_Ident $1) $1 } +| PO_COMMENT word_paren { po_comment($1); new_esp M_special $2.any $1 $2 } + + +arraylen: ARRAYLEN_IDENT {new_esp M_int (deref_arraylen (to_Ident $1)) $1 $1} | ARRAYLEN scalar {sp_0($2); new_esp M_int (deref_arraylen $2.any ) $1 $1 } | ARRAYLEN bracket_subscript {new_esp M_int (deref_arraylen $2.any) $1 $2} +scalar: SCALAR_IDENT {new_esp M_unknown_scalar (Deref(I_scalar, to_Ident $1)) $1 $1} | DOLLAR scalar {sp_0($2); new_esp M_unknown_scalar (Deref(I_scalar, $2.any)) $1 $1 } | DOLLAR bracket_subscript {new_esp M_unknown_scalar (deref_raw I_scalar $2.any) $1 $2} | DOLLAR BRACKET BRACKET expr BRACKET_END BRACKET_END {sp_0($2); sp_0($3); sp_p($5); sp_0($6); new_esp M_unknown_scalar (Deref(I_scalar, hash_ref $4)) $1 $6} +func: FUNC_IDENT {new_esp M_unknown (Deref(I_func , to_Ident $1)) $1 $1} | AMPERSAND scalar {sp_0($2); new_esp M_unknown (Deref(I_func , $2.any)) $1 $1 } | AMPERSAND bracket_subscript {new_esp M_unknown (deref_raw I_func $2.any) $1 $2} +array: ARRAY_IDENT {new_esp M_array (Deref(I_array , to_Ident $1)) $1 $1} | AT scalar {sp_0($2); new_esp M_array (Deref(I_array , $2.any)) $1 $1 } | AT bracket_subscript {new_esp M_array (deref_raw I_array $2.any) $1 $2} +hash: HASH_IDENT {new_esp M_hash (Deref(I_hash , to_Ident $1)) $1 $1} | PERCENT scalar {sp_0($2); new_esp M_hash (Deref(I_hash , $2.any)) $1 $1 } | PERCENT bracket_subscript {new_esp M_hash (deref_raw I_hash $2.any) $1 $2} +star: STAR_IDENT {new_esp M_unknown (Deref(I_star , to_Ident $1)) $1 $1} | STAR scalar {sp_0($2); new_esp M_unknown (Deref(I_star , $2.any)) $1 $1 } | STAR bracket_subscript {new_esp M_unknown (deref_raw I_star $2.any) $1 $2} + +expr_or_empty: {default_esp (Block [])} | expr {new_1esp $1.any.expr $1 } + +%% + +prog_ref := Some prog +;; diff --git a/src/parser_helper.ml b/src/parser_helper.ml new file mode 100644 index 0000000..43d60a4 --- /dev/null +++ b/src/parser_helper.ml @@ -0,0 +1,1409 @@ +open Types +open Common +open Printf + +let bpos = -1, -1 + +let raw_pos2pos(a, b) = !Info.current_file, a, b +let raw_pos_range { pos = (a1, b1) } { pos = (a2, b2) } = (if a1 = -1 then a2 else a1), (if b2 = -1 then b1 else b2) +let pos_range esp1 esp2 = raw_pos2pos (raw_pos_range esp1 esp2) +let get_pos pesp = raw_pos2pos pesp.pos +let get_pos_start { pos = (start, _) } = start +let get_pos_end { pos = (_, end_) } = end_ +let var_dollar_ pos = Deref(I_scalar, Ident(None, "_", pos)) +let var_STDOUT = Deref(I_star, Ident(None, "STDOUT", raw_pos2pos bpos)) + +let new_any mcontext any spaces pos = { mcontext = mcontext ; any = any ; spaces = spaces ; pos = pos } +let new_any_ any spaces pos = new_any M_unknown any spaces pos +let new_esp mcontext e esp_start esp_end = new_any mcontext e esp_start.spaces (raw_pos_range esp_start esp_end) +let new_1esp e esp = new_any esp.mcontext e esp.spaces esp.pos +let new_pesp mcontext prio e esp_start esp_end = new_any mcontext { priority = prio ; expr = e } esp_start.spaces (raw_pos_range esp_start esp_end) +let new_1pesp prio e esp = new_any esp.mcontext { priority = prio ; expr = e } esp.spaces esp.pos +let default_esp e = new_any M_unknown e Space_none bpos +let default_pesp prio e = new_any M_unknown { priority = prio ; expr = e } Space_none bpos + +let split_name_or_fq_name full_ident = + match split_at2 ':'':' full_ident with + | [] -> internal_error "split_ident" + | [ident] -> None, ident + | l -> + let fql, name = split_last l in + let fq = String.concat "::" fql in + Some fq, name + +let is_var_dollar_ = function + | Deref(I_scalar, Ident(None, "_", _)) -> true + | _ -> false +let is_var_number_match = function + | Deref(I_scalar, Ident(None, s, _)) -> String.length s = 1 && s.[0] <> '0' && char_is_number s.[0] + | _ -> false + +let non_scalar_context context = context = I_hash || context = I_array +let is_scalar_context context = context = I_scalar + +let rec is_not_a_scalar = function + | Deref_with(_, context, _, _) + | Deref(context, _) -> non_scalar_context context + | List [] + | List(_ :: _ :: _) -> true + | Call(Deref(I_func, Ident(None, "map", _)), _) + | Call(Deref(I_func, Ident(None, "grep", _)), _) -> true + | Call_op("?:", [ _cond ; a; b ], _) -> is_not_a_scalar a || is_not_a_scalar b + | _ -> false + +let is_a_scalar = function + | Ref _ + | Num _ + | Raw_string _ + | String _ + | Call(Deref(I_func, Ident(None, "N", _)), _) -> true + | My_our(_, [ context, _ ], _) + | Deref_with(_, context, _, _) + | Deref(context, _) -> is_scalar_context context + | _ -> false + +let is_a_string = function + | String _ | Raw_string _ -> true + | _ -> false + +let is_parenthesized = function + | List[] + | List[List _] -> true + | _ -> false + +let un_parenthesize = function + | List[List[e]] -> e + | List[e] -> e + | _ -> internal_error "un_parenthesize" + +let rec un_parenthesize_full = function + | List[e] -> un_parenthesize_full e + | e -> e + +let rec un_parenthesize_full_l = function + | [ List l ] -> un_parenthesize_full_l l + | l -> l + +let is_always_true = function + | Num(n, _) -> float_of_string n <> 0. + | Raw_string(s, _) -> s <> "" + | String(l, _) -> l <> [] + | Ref _ -> true + | _ -> false + +let is_always_false = function + | Num(n, _) -> float_of_string n = 0. + | Raw_string(s, _) -> s = "" + | String(l, _) -> l = [] + | List [] -> true + | Ident(None, "undef", _) -> true + | _ -> false + +let rec is_lvalue = function + | Call(Deref(I_func, Ident(None, f, _)), _) -> List.mem f [ "substr" ] + + | Call_op("?:", [ _ ; a ; b ], _) -> is_lvalue a && is_lvalue b + + | Call_op("local", l, _) + | List [ List l ] + -> List.for_all is_lvalue l + + | My_our _ + | Deref(_, _) + | Deref_with(_, _, _, _) + | Ident(None, "undef", _) + -> true + + | _ -> false + +let not_complex e = + if is_parenthesized e then true else + let rec not_complex_ op = function + | Call_op("?:", _, _) -> false + | Call_op(op', l, _) -> op <> op' && List.for_all (not_complex_ op') l + | e -> not (is_parenthesized e) + in not_complex_ "" (un_parenthesize_full e) + +let not_simple = function + | Num _ | Ident _ | Deref(_, Ident _) -> false + | _ -> true + +let context2s = function + | I_scalar -> "$" + | I_hash -> "%" + | I_array -> "@" + | I_func -> "&" + | I_raw -> "" + | I_star -> "*" +let variable2s(context, ident) = context2s context ^ ident + +let rec string_of_fromparser = function + | Semi_colon -> ";" + | Undef -> "undef" + | Num(num, _) -> num + + | Raw_string(s, _) -> "\"" ^ s ^ "\"" + | String(l, _) -> + let l' = List.map (fun (s, e) -> + s ^ if e = List[] then "" else string_of_fromparser e + ) l in + "\"" ^ String.concat "" l' ^ "\"" + + | Ident(None, s, _) -> s + | Ident(Some fq, s, _) -> fq ^ "::" ^ s + | My_our(myour, l, _) -> myour ^ "(" ^ String.concat "," (List.map (fun (context, s) -> context2s context ^ s) l) ^ ")" + + | Anonymous_sub(_, e, _) -> "sub { " ^ string_of_fromparser e ^ " }" + | Ref(_, e) -> "\\" ^ string_of_fromparser e + | Deref(context, e) -> context2s context ^ string_of_fromparser e + + | Diamond(None) -> "<>" + | Diamond(Some e) -> "<" ^ string_of_fromparser e ^ ">" + + | Sub_declaration(name, _prototype, body, Real_sub_declaration) -> + "sub " ^ string_of_fromparser name ^ " { " ^ string_of_fromparser body ^ " }" + + | Sub_declaration(name, _prototype, body, Glob_assign) -> + "*" ^ string_of_fromparser name ^ " = sub { " ^ string_of_fromparser body ^ " };" + + | Deref_with(_, _, _e1, _e2) -> + internal_error "todo" + + | Package(p) -> "package " ^ string_of_fromparser p + + | Use(e, []) -> "use " ^ string_of_fromparser e + | Use(e, l) -> "use " ^ string_of_fromparser e ^ "(" ^ lstring_of_fromparser l + + | List l -> lstring_of_fromparser_parentheses l + | Block l -> "{ " ^ lstring_of_fromparser l ^ " }" + | Call_op(op, l, _) -> op ^ lstring_of_fromparser_parentheses l + + | Call(e, l) -> string_of_fromparser e ^ lstring_of_fromparser l + + | Method_call(obj, meth, l) -> + let para = if l = [] then "" else lstring_of_fromparser_parentheses l in + string_of_fromparser obj ^ "->" ^ string_of_fromparser meth ^ para + + | Label(e) -> e ^ ": " + + | Perl_checker_comment _ -> "" + | Too_complex -> "XXX" + +and lstring_of_fromparser l = String.concat ", " (List.map string_of_fromparser l) +and lstring_of_fromparser_parentheses l = "(" ^ lstring_of_fromparser l ^ ")" + +let rec is_same_fromparser a b = + match a, b with + | Undef, Undef -> true + | Ident(fq1, s1, _), Ident(fq2, s2, _) -> fq1 = fq2 && s1 = s2 + | Num(s1, _), Num(s2, _) + | Raw_string(s1, _), Raw_string(s2, _) -> s1 = s2 + + | String(l1, _), String(l2, _) -> + for_all2_ (fun (s1, e1) (s2, e2) -> s1 = s2 && is_same_fromparser e1 e2) l1 l2 + + | Ref(c1, e1), Ref(c2, e2) + | Deref(c1, e1), Deref(c2, e2) -> c1 = c2 && is_same_fromparser e1 e2 + + | Deref_with(c1, c_1, e1, e_1), Deref_with(c2, c_2, e2, e_2) -> c1 = c2 && c_1 = c_2 && is_same_fromparser e1 e2 && is_same_fromparser e_1 e_2 + + | Diamond(None), Diamond(None) -> true + | Diamond(Some e1), Diamond(Some e2) -> is_same_fromparser e1 e2 + + | List(l1), List(l2) -> for_all2_ is_same_fromparser l1 l2 + + | Call_op(op1, l1, _), Call_op(op2, l2, _) -> op1 = op2 && for_all2_ is_same_fromparser l1 l2 + | Call(e1, l1), Call(e2, l2) -> is_same_fromparser e1 e2 && for_all2_ is_same_fromparser l1 l2 + + | Method_call(e1, m1, l1), Method_call(e2, m2, l2) -> + is_same_fromparser e1 e2 && is_same_fromparser m1 m2 && for_all2_ is_same_fromparser l1 l2 + + | _ -> false + +let from_scalar esp = + match esp.any with + | Deref(I_scalar, ident) -> ident + | _ -> internal_error "from_scalar" + +let from_array esp = + match esp.any with + | Deref(I_array, ident) -> ident + | _ -> internal_error "from_array" + +let rec get_pos_from_expr = function + | Anonymous_sub(_, _, pos) + | String(_, pos) + | Call_op(_, _, pos) + | Perl_checker_comment(_, pos) + | My_our(_, _, pos) + | Raw_string(_, pos) + | Num(_, pos) + | Ident(_, _, pos) + -> pos + + | Package e + | Ref(_, e) + | Deref(_, e) + | Sub_declaration(e, _, _, _) + | Deref_with(_, _, e, _) + | Use(e, _) + | Call(e, _) + | Method_call(_, e, _) + -> get_pos_from_expr e + + | Diamond(option_e) + -> if option_e = None then raw_pos2pos bpos else get_pos_from_expr (some option_e) + + | List l + | Block l + -> if l = [] then raw_pos2pos bpos else get_pos_from_expr (List.hd l) + + | Semi_colon + | Too_complex + | Undef + | Label _ + -> raw_pos2pos bpos + +let msg_with_rawpos (start, end_) msg = Info.pos2sfull_current start end_ ^ msg +let die_with_rawpos raw_pos msg = failwith (msg_with_rawpos raw_pos msg) +let warn warn_types raw_pos msg = if Flags.are_warning_types_set warn_types then print_endline_flush (msg_with_rawpos raw_pos msg) + +let die_rule msg = die_with_rawpos (Parsing.symbol_start(), Parsing.symbol_end()) msg +let warn_rule warn_types msg = warn warn_types (Parsing.symbol_start(), Parsing.symbol_end()) msg + +let warn_verb warn_types pos msg = if not !Flags.quiet then warn warn_types (pos, pos) msg +let warn_too_many_space start = warn_verb [Warn_white_space] start "you should have only one space here" +let warn_no_space start = warn_verb [Warn_white_space] start "you should have a space here" +let warn_cr start = warn_verb [Warn_white_space] start "you should not have a carriage-return (\\n) here" +let warn_space start = warn_verb [Warn_white_space] start "you should not have a space here" + +let rec prio_less = function + | P_none, _ | _, P_none -> internal_error "prio_less" + + | P_paren_wanted prio1, prio2 + | prio1, P_paren_wanted prio2 -> prio_less(prio1, prio2) + + | P_ternary, P_or -> false + | P_ternary, P_and -> false + + | _, P_loose -> true + | P_loose, _ -> false + | _, P_or -> true + | P_or, _ -> false + + | _, P_and -> true + | P_and, _ -> false + | _, P_call_no_paren -> true + | P_call_no_paren, _ -> false + | _, P_comma -> true + | P_comma, _ -> false + | _, P_assign -> true + | P_assign, _ -> false + | _, P_ternary -> true + | P_ternary, _ -> false + + | _, P_tight_or -> true + | P_tight_or, _ -> false + | _, P_tight_and -> true + | P_tight_and, _ -> false + + | P_bit, P_bit -> true + | P_bit, _ -> false + + | _, P_expr -> true + | P_expr, _ -> false + + | _, P_eq -> true + | P_eq, _ -> false + | _, P_cmp -> true + | P_cmp, _ -> false + | _, P_uniop -> true + | P_uniop, _ -> false + | _, P_add -> true + | P_add, _ -> false + | _, P_mul -> true + | P_mul, _ -> false + | _, P_tight -> true + | P_tight, _ -> false + + | _, P_paren _ -> true + | P_paren _, _ -> true + | P_tok, _ -> true + +let prio_lo_check pri_out pri_in pos expr = + if prio_less(pri_in, pri_out) then + (match pri_in with + | P_paren (P_paren_wanted _) -> () + | P_paren pri_in' -> + if pri_in' <> pri_out && + prio_less(pri_in', pri_out) && not_complex (un_parenthesize expr) then + warn [Warn_suggest_simpler] pos "unneeded parentheses" + | _ -> ()) + else + (match expr with + | Call(Deref(I_func, Ident(None, f, _)), _) when f <> "delete" && pri_in = P_uniop && pri_out = P_add + -> () (* ugly special case since we don't parse uniop correctly (eg: -d $_ . "foo" *) + | Call_op ("print", [Deref (I_star, Ident (None, "STDOUT", _)); (Deref(I_scalar, _) as ident)], _) -> + warn [Warn_traps] pos (sprintf "use parentheses: replace \"print %s ...\" with \"print(%s ...)\"" (string_of_fromparser ident) (string_of_fromparser ident)) + | _ -> warn [Warn_traps] pos "missing parentheses (needed for clarity)") + +let prio_lo pri_out in_ = prio_lo_check pri_out in_.any.priority in_.pos in_.any.expr ; in_.any.expr + +let prio_lo_after pri_out in_ = + if in_.any.priority = P_call_no_paren then in_.any.expr else prio_lo pri_out in_ + +let prio_lo_concat esp = prio_lo P_mul { esp with any = { esp.any with priority = P_paren_wanted esp.any.priority } } + +let hash_ref esp = Ref(I_hash, prio_lo P_loose esp) + +let sp_0 esp = + match esp.spaces with + | Space_none -> () + | Space_0 -> () + | Space_1 + | Space_n -> warn_space (get_pos_start esp) + | Space_cr -> warn_cr (get_pos_start esp) + +let sp_0_or_cr esp = + match esp.spaces with + | Space_none -> () + | Space_0 -> () + | Space_1 + | Space_n -> warn_space (get_pos_start esp) + | Space_cr -> () + +let sp_1 esp = + match esp.spaces with + | Space_none -> () + | Space_0 -> warn_no_space (get_pos_start esp) + | Space_1 -> () + | Space_n -> warn_too_many_space (get_pos_start esp) + | Space_cr -> warn_cr (get_pos_start esp) + +let sp_n esp = + match esp.spaces with + | Space_none -> () + | Space_0 -> warn_no_space (get_pos_start esp) + | Space_1 -> () + | Space_n -> () + | Space_cr -> warn_cr (get_pos_start esp) + +let sp_p esp = + match esp.spaces with + | Space_none -> () + | Space_0 -> warn_no_space (get_pos_start esp) + | Space_1 -> () + | Space_n -> () + | Space_cr -> () + +let sp_cr esp = + match esp.spaces with + | Space_none -> () + | Space_0 + | Space_1 + | Space_n -> warn_verb [Warn_white_space] (get_pos_start esp) "you should have a carriage-return (\\n) here" + | Space_cr -> () + +let sp_same esp1 esp2 = + if esp1.spaces <> Space_0 then sp_p esp2 + else if esp2.spaces <> Space_0 then sp_p esp1 + +let function_to_context word_alone = function + | "map" | "grep" | "grep_index" | "map_index" | "uniq" | "uniq_" -> M_array + | "partition" -> M_tuple [ M_ref M_array ; M_ref M_array ] + | "find" -> M_unknown_scalar + | "any" | "every" -> M_bool + | "find_index" -> M_int + | "each_index" -> M_none + | "N" | "N_" -> M_string + + | "chop" | "chomp" | "push" | "unshift" -> M_none + | "hex" | "length" | "time" | "fork" | "getppid" -> M_int + | "eof" | "wantarray" -> M_int + | "stat" | "lstat" -> M_list + | "arch" | "quotemeta" | "join" | "lc" | "lcfirst" | "uc" | "ucfirst" -> M_string + + | "split" -> M_array + | "shift" | "pop" -> M_unknown_scalar + | "die" | "return" | "redo" | "next" | "last" -> M_unknown + | "caller" -> M_mixed [M_string ; M_list] + + | "ref" -> M_ref M_unknown_scalar + | "undef" -> if word_alone then M_undef else M_none + | _ -> M_unknown + +let word_alone esp = + let word = esp.any in + let mcontext, e = match word with + | Ident(None, f, pos) -> + let e = match f with + | "length" | "stat" | "lstat" | "chop" | "chomp" | "quotemeta" | "lc" | "lcfirst" | "uc" | "ucfirst" -> + Call(Deref(I_func, word), [var_dollar_ pos]) + + | "split" -> Call(Deref(I_func, word), [ Raw_string(" ", pos) ; var_dollar_ pos ]) + | "die" -> Call(Deref(I_func, word), [ Deref(I_scalar, Ident(None, "@", raw_pos2pos bpos)) ]) + | "return" | "eof" | "caller" + | "redo" | "next" | "last" -> + Deref(I_func, word) + + | "hex" | "ref" -> + warn_rule [Warn_complex_expressions] (sprintf "please use \"%s $_\" instead of \"%s\"" f f) ; + Call(Deref(I_func, word), [ Raw_string(" ", pos) ; var_dollar_ pos ]) + | "time" | "wantarray" | "fork" | "getppid" | "arch" -> + warn_rule [Warn_complex_expressions] (sprintf "please use %s() instead of %s" f f) ; + Deref(I_func, word) + | _ -> word + in + function_to_context true f, e + | _ -> M_unknown, word + in + new_pesp mcontext P_tok e esp esp + +let check_parenthesized_first_argexpr word esp = + let want_space = word.[0] = '-' in + if word = "return" then () else + match esp.any.expr with + | [ Call_op(_, (e' :: l), _) ] + | e' :: l -> + if is_parenthesized e' then + if l = [] then + (if want_space then sp_n else sp_0) esp + else + (* eg: join (" ", @l) . "\n" *) + die_with_rawpos (get_pos_start esp, get_pos_start esp) "please remove the space before the function call" + else + sp_p esp + | _ -> + if word = "time" then die_rule "please use time() instead of time"; + sp_p esp + +let check_parenthesized_first_argexpr_with_Ident ident esp = + if esp.any.priority = P_tok then (); + (match ident with + | Ident(Some _, _, _) -> + (match esp.any.expr with + | [e] when is_parenthesized e -> () + | _ -> warn_rule [Warn_suggest_simpler] "use parentheses around argument (otherwise it might cause syntax errors if the package is \"require\"d and not \"use\"d") + | Ident(None, word, _) when List.mem word ["ref" ; "readlink"] -> + if esp.any.priority <> P_tok then warn_rule [Warn_complex_expressions] "use parentheses around argument" + | _ -> ()); + check_parenthesized_first_argexpr (string_of_fromparser ident) esp + +let check_hash_subscript esp = + let can_be_raw_string = function + | "" | "x" | "y" -> false (* special case for {'y'} otherwise the emacs mode goes wild, special case for {'x'} to have the same as {'y'} (since they usually go together) *) + | s -> + char_is_alpha s.[0] && (String.length s = 1 || string_forall_with char_is_alphanumerical_ 1 s) + in + match esp.any.expr with + | List [String ([(s, List [])], _)] when can_be_raw_string s -> warn [Warn_suggest_simpler] esp.pos (sprintf "{\"%s\"} can be written {%s}" s s) + | List [Raw_string(s, _)] when can_be_raw_string s -> warn [Warn_suggest_simpler] esp.pos (sprintf "{'%s'} can be written {%s}" s s) + | _ -> () + +let check_arrow_needed arrow = function + | Deref_with(I_array, I_scalar, List [List [Call _]], _) -> () (* "->" needed for (f())[0]->{XX} *) + | Deref_with _ -> warn [Warn_suggest_simpler] arrow.pos "the arrow \"->\" is unneeded" + | _ -> () + +let check_scalar_subscripted esp = + match esp.any with + | Deref(I_scalar, Deref _) -> warn_rule [Warn_complex_expressions] "for complex dereferencing, use \"->\"" + | _ -> () + +let negatable_ops = collect (fun (a, b) -> [ a, b ; b, a ]) [ + "==", "!=" ; + "eq", "ne" ; +] + +let check_negatable_expr esp = + match un_parenthesize_full esp.any.expr with + | Call_op("m//", var :: _, _) when not (is_var_dollar_ var) -> + warn_rule [Warn_suggest_simpler] "!($var =~ /.../) is better written $var !~ /.../" + | Call_op("!m//", var :: _, _) when not (is_var_dollar_ var) -> + warn_rule [Warn_suggest_simpler] "!($var !~ /.../) is better written $var =~ /.../" + | Call_op(op, _, _) -> + (try + let neg_op = List.assoc op negatable_ops in + warn_rule [Warn_suggest_simpler] (Printf.sprintf "!($foo %s $bar) is better written $foo %s $bar" op neg_op) + with Not_found -> ()) + | _ -> () + +let check_ternary_paras(cond, a, b) = + let rec dont_need_short_circuit_rec = function + | Num _ + | Raw_string _ + | String ([(_, List [])], _) + -> true + | Call(Deref(I_func, Ident(None, "N", _)), [ List(String _ :: l) ]) + | Call_op(".", l, _) + | Ref(I_hash, List l) + | List l -> List.for_all dont_need_short_circuit_rec l + | _ -> false + in + let rec dont_need_short_circuit = function + | Ref(_, Deref(_, Ident _)) + | Deref(_, Ident _) -> true + | Ref(I_hash, List l) + | List l -> List.for_all dont_need_short_circuit l + | e -> dont_need_short_circuit_rec e + in + let check_ternary_para = function + | List [] -> warn_rule [Warn_suggest_simpler; Warn_MDK_Common] "you may use if_() here\n beware that the short-circuit semantic of ?: is not kept\n if you want to keep the short-circuit behaviour, replace () with @{[]} and there will be no warning anymore" + | _ -> () + in + if dont_need_short_circuit a || is_same_fromparser cond a then check_ternary_para b; + if dont_need_short_circuit b || is_same_fromparser cond b then check_ternary_para a; + if is_same_fromparser cond a && is_a_scalar a && is_a_scalar b then warn_rule [Warn_suggest_simpler] "you can replace \"$foo ? $foo : $bar\" with \"$foo || $bar\""; + [ cond; a; b ] + +let check_unneeded_var_dollar_ esp = + if is_var_dollar_ esp.any.expr then warn [Warn_suggest_simpler] esp.pos "\"$_ =~ /regexp/\" can be written \"/regexp/\"" else + if is_var_number_match esp.any.expr then warn [Warn_complex_expressions] esp.pos "do not use the result of a match (eg: $1) to match another pattern" +let check_unneeded_var_dollar_not esp = + if is_var_dollar_ esp.any.expr then warn [Warn_suggest_simpler] esp.pos "\"$_ !~ /regexp/\" can be written \"!/regexp/\"" else + if is_var_number_match esp.any.expr then warn [Warn_complex_expressions] esp.pos "do not use the result of a match (eg: $1) to match another pattern" +let check_unneeded_var_dollar_s esp = + let expr = esp.any.expr in + if is_var_dollar_ expr then warn [Warn_suggest_simpler] esp.pos "\"$_ =~ s/regexp/.../\" can be written \"s/regexp/.../\"" else + if is_var_number_match expr then warn [Warn_traps] esp.pos "do not modify the result of a match (eg: $1)" else + let expr = match expr with + | List [List [Call_op("=", [ expr; _], _)]] -> expr (* check $xx in ($xx = ...) =~ ... *) + | _ -> expr in + if is_a_string expr || not (is_a_scalar expr) then warn [Warn_complex_expressions] esp.pos "you can only use s/// on a variable" + +let check_my esp = if esp.any <> "my" then die_rule "syntax error" +let check_foreach esp = if esp.any = "for" then warn [Warn_normalized_expressions] esp.pos "write \"foreach\" instead of \"for\"" +let check_for esp = if esp.any = "foreach" then warn [Warn_normalized_expressions] esp.pos "write \"for\" instead of \"foreach\"" +let check_for_foreach esp arg = + match arg.any.expr with + | List [ Deref(I_scalar, _) ] -> + if esp.any = "foreach" then warn [Warn_normalized_expressions] esp.pos "you are using the special trick to locally set $_ with a value, for this please use \"for\" instead of \"foreach\"" + | List [ Deref_with(context, I_scalar, _, _) ] when context <> I_func -> + if esp.any = "foreach" then warn [Warn_normalized_expressions] esp.pos "you are using the special trick to locally set $_ with a value, for this please use \"for\" instead of \"foreach\"" + | List [ Deref(I_hash, _) ] -> + warn [Warn_traps] esp.pos "foreach with a hash is usually an error" + | _ -> + if esp.any = "for" then warn [Warn_normalized_expressions] esp.pos "write \"foreach\" instead of \"for\"" + +let check_block_expr has_semi_colon last_expr esp_last esp_BRACKET_END = + sp_p esp_BRACKET_END ; + + if esp_BRACKET_END.spaces = Space_cr then + (if not has_semi_colon then warn_verb [Warn_white_space] (get_pos_end esp_last) "missing \";\"") + else + (if last_expr = Semi_colon then warn_verb [Warn_white_space] (get_pos_end esp_last) "spurious \";\" before closing block") + +let check_block_lines esp_lines esp_BRACKET_END = + match fst esp_lines.any with + | [] -> + sp_0_or_cr esp_BRACKET_END + | l -> + (if List.hd l = Semi_colon then sp_0 else sp_p) esp_lines ; + check_block_expr (snd esp_lines.any) (last l) esp_lines esp_BRACKET_END + +let check_unless_else elsif else_ = + if elsif.any <> [] then warn [Warn_complex_expressions] elsif.pos "don't use \"elsif\" with \"unless\" (replace \"unless\" with \"if\")"; + if else_.any <> [] then warn [Warn_complex_expressions] else_.pos "don't use \"else\" with \"unless\" (replace \"unless\" with \"if\")" + +let check_my_our_paren { any = ((comma_closed, _), l) } after_esp = + (if l = [] then sp_0 else sp_1) after_esp ; + if not comma_closed then die_rule "syntax error" + +let check_simple_pattern = function + | [ String([ st, List [] ], _); Raw_string("", _) ] -> + if String.length st > 2 && + st.[0] = '^' && st.[String.length st - 1] = '$' then + let st = skip_n_char_ 1 1 st in + if string_forall_with char_is_alphanumerical_ 0 st then + warn_rule [Warn_suggest_simpler] (sprintf "\"... =~ /^%s$/\" is better written \"... eq '%s'\"" st st) + | _ -> () + +let rec only_one esp = + match esp.any with + | [List l'] -> only_one { esp with any = l' } + | [e] -> e + | [] -> die_with_rawpos esp.pos "you must give one argument" + | _ -> die_with_rawpos esp.pos "you must give only one argument" + +let only_one_array_ref esp = + let e = only_one esp in + (match e with + | Call_op("last_array_index", [Deref(I_array, e)], _) -> + warn [Warn_suggest_simpler] esp.pos (sprintf "you can replace $#%s with -1" (string_of_fromparser e)) + | _ -> ()); + e + +let only_one_in_List esp = + match esp.any.expr with + | List l -> only_one { esp with any = l } + | e -> e + +let rec is_only_one_in_List = function + | [List l] -> is_only_one_in_List l + | [_] -> true + | _ -> false + +let maybe_to_Raw_string = function + | Ident(None, s, pos) -> Raw_string(s, pos) + | Ident(Some fq, s, pos) -> Raw_string(fq ^ "::" ^ s, pos) + | e -> e + +let to_List = function + | [e] -> e + | l -> List l + +let deref_arraylen e = Call_op("last_array_index", [Deref(I_array, e)], raw_pos2pos bpos) +let deref_raw context e = + let e = match e with + | Raw_string(s, pos) -> + let fq, ident = split_name_or_fq_name s in + Ident(fq, ident, pos) + | Deref(I_scalar, (Ident _ as ident)) -> + warn_rule [Warn_suggest_simpler] (sprintf "%s{$%s} can be written %s$%s" (context2s context) (string_of_fromparser ident) (context2s context) (string_of_fromparser ident)); + e + | _ -> e + in Deref(context, e) + +let to_Ident { any = (fq, name); pos = pos } = Ident(fq, name, raw_pos2pos pos) +let to_Raw_string { any = s; pos = pos } = Raw_string(s, raw_pos2pos pos) +let to_Method_call (object_, method_, para) = + match method_ with + | Ident(Some "SUPER", name, pos) -> Method_call(maybe_to_Raw_string object_, Raw_string(name, pos), para) + | Ident(Some _, _, _) -> Call(Deref(I_func, method_), maybe_to_Raw_string object_ :: para) + | _ -> Method_call(maybe_to_Raw_string object_, maybe_to_Raw_string method_, para) +let to_Deref_with(from_context, to_context, ref_, para) = + if is_not_a_scalar ref_ then warn_rule [] "bad deref"; + Deref_with(from_context, to_context, ref_, para) + +let to_Deref_with_arrow arrow (from_context, to_context, ref_, para) = + if from_context != I_func then check_arrow_needed arrow ref_ ; + to_Deref_with(from_context, to_context, ref_, para) + +let lines_to_Block esp_lines esp_BRACKET_END = + check_block_lines esp_lines esp_BRACKET_END; + Block (fst esp_lines.any) + +let to_Local esp = + let l = + match esp.any.expr with + | List[List l] -> l + | e -> [e] + in + let local_vars, local_exprs = fpartition (function + | Deref(I_star as context, Ident(None, ident, _)) + | Deref(I_scalar as context, Ident(None, ("_" as ident), _)) -> + Some(context, ident) + | Deref(I_scalar, Ident _) + | Deref(I_array, Ident _) + | Deref(I_star, Ident _) + | Deref_with(I_hash, I_scalar, Ident _, _) + | Deref_with(I_hash, I_scalar, Deref(I_scalar, _), _) + | Deref_with(I_hash, I_scalar, Deref_with(I_hash, I_scalar, Ident _, _), _) + | Deref_with(I_hash, I_scalar, Deref_with(I_hash, I_scalar, Deref(I_scalar, Ident _), _), _) -> + None + | _ -> die_with_rawpos esp.pos "bad argument to \"local\"" + ) l in + if local_vars = [] then Call_op("local", local_exprs, raw_pos2pos esp.pos) + else if local_exprs = [] then My_our("local", local_vars, raw_pos2pos esp.pos) + else die_with_rawpos esp.pos "bad argument to \"local\"" + +let sub_declaration (name, proto) body sub_kind = Sub_declaration(name, proto, Block body, sub_kind) +let anonymous_sub proto lines bracket_end = Anonymous_sub (proto, lines_to_Block lines bracket_end, raw_pos2pos lines.pos) +let call_with_same_para_special f = Call(f, [Deref(I_star, (Ident(None, "_", raw_pos2pos bpos)))]) +let remove_call_with_same_para_special = function + | Call(f, [Deref(I_star, (Ident(None, "_", _)))]) -> f + | e -> e + +let check_My_under_condition msg = function + | List [ My_our("my", _, _) ] -> + warn_rule [Warn_traps] "this is stupid" + | List [ Call_op("=", [ My_our("my", _, _); _ ], _) ] -> + warn_rule [Warn_traps] msg + | _ -> () + +let cook_call_op op para pos = + (match op with + | "le" | "ge" | "eq" | "ne" | "gt" | "lt" | "cmp" -> + if List.exists (function Num _ -> true | _ -> false) para then + warn_rule [Warn_traps] (sprintf "you should use a number operator, not the string operator \"%s\" (or replace the number with a string)" op) + | "." -> + if List.exists (function Call(Deref(I_func, Ident(None, "N_", _)), _) -> true | _ -> false) para then + warn_rule [Warn_MDK_Common; Warn_traps] "N_(\"xxx\") . \"yyy\" is dumb since the string \"xxx\" will never get translated" + | _ -> ()); + + (match op, para with + | "if", List [Call_op ("=", [ _; e ], _)] :: _ when is_always_true e || is_always_false e -> + warn_rule [Warn_traps] "are you sure you did not mean \"==\" instead of \"=\"?" + + | "foreach", [ _; Block [ expr ; Semi_colon ] ] + | "foreach", [ _; Block [ expr ] ] -> + (match expr with + | Call_op("if infix", [ List [ Call(Deref(I_func, Ident(None, "push", _)), [ Deref(I_array, Ident _) as l ; Deref(I_scalar, Ident(None, "_", _)) ]) ] ; _ ], _) -> + let l = string_of_fromparser l in + warn_rule [Warn_suggest_functional] (sprintf "use \"push %s, grep { ... } ...\" instead of \"foreach (...) { push %s, $_ if ... }\"\n or sometimes \"%s = grep { ... } ...\"" l l l) + | Call_op("if infix", [ List [ Call(Deref(I_func, Ident(None, "push", _)), [ Deref(I_array, Ident _) as l; _ ]) ] ; _ ], _) -> + let l = string_of_fromparser l in + warn_rule [Warn_suggest_functional] (sprintf "use \"push %s, map { ... ? ... : () } ...\" instead of \"foreach (...) { push %s, ... if ... }\"\n or sometimes \"%s = map { ... ? ... : () } ...\"\n or sometimes \"%s = map { if_(..., ...) } ...\"" l l l l) + + | Call_op ("if", [ _; Block [ List [ Call_op("=", [Deref(I_scalar, _) as ret; Deref(I_scalar, Ident(None, "_", _)) ], _) ]; + Semi_colon; + List [ Deref(I_func, Ident(None, "last", _)) ]; + Semi_colon ] ], _) -> + warn_rule [Warn_suggest_functional; Warn_MDK_Common] (sprintf "use \"%s = find { ... } ...\"" (string_of_fromparser ret)) + + | List [ Call(Deref(I_func, Ident(None, "push", _)), [ Deref(I_array, Ident _) as l; _ ]) ] -> + let l = string_of_fromparser l in + warn_rule [Warn_suggest_functional] (sprintf "use \"push %s, map { ... } ...\" instead of \"foreach (...) { push %s, ... }\"\n or sometimes \"%s = map { ... } ...\"" l l l) + | _ -> ()) + + | "=", [My_our _; Ident(None, "undef", _)] -> + warn [Warn_suggest_simpler] pos "no need to initialize variable, it's done by default" + | "=", [My_our _; List[]] -> + if Info.is_on_same_line_current pos then warn [Warn_suggest_simpler] pos "no need to initialize variables, it's done by default" + + | "=", [ Deref_with(I_array, I_scalar, id, Deref(I_array, id_)); _ ] when is_same_fromparser id id_ -> + warn_rule [Warn_suggest_simpler] "\"$a[@a] = ...\" is better written \"push @a, ...\"" + + | "=", [ Deref(I_star, String ([(sf1, List [])], _)); _ ] -> + warn_rule [Warn_help_perl_checker] (sprintf "write *{'%s'} instead of *{\"%s\"}" sf1 sf1) + + | "||=", List [ List _ ] :: _ + | "&&=", List [ List _ ] :: _ -> warn_rule [Warn_complex_expressions] "remove the parentheses" + | "||=", e :: _ + | "&&=", e :: _ -> if is_not_a_scalar e then warn_rule [Warn_traps] (sprintf "\"%s\" is only useful with a scalar" op) + + | "==", [Call_op("last_array_index", _, _); Num(n, _)] -> + warn_rule [Warn_suggest_simpler] (sprintf "$#x == %s is better written @x == %d" n (1 + int_of_string n)) + | "==", [Call_op("last_array_index", _, _); Call_op("- unary", [Num (n, _)], _)] -> + warn_rule [Warn_suggest_simpler] (sprintf "$#x == -%s is better written @x == %d" n (1 - int_of_string n)) + + + | "||", e :: _ when is_always_true e -> warn_rule [Warn_strange] "<constant> || ... is the same as <constant>" + | "&&", e :: _ when is_always_false e -> warn_rule [Warn_strange] "<constant> && ... is the same as <constant>" + | "||", e :: _ when is_always_false e -> warn_rule [Warn_strange] "<constant> || ... is the same as ..." + | "&&", e :: _ when is_always_true e -> warn_rule [Warn_strange] "<constant> && ... is the same as ..." + + | "or", e :: _ when is_always_true (un_parenthesize_full e) -> warn_rule [Warn_strange] "<constant> or ... is the same as <constant>" + | "and", e :: _ when is_always_false (un_parenthesize_full e) -> warn_rule [Warn_strange] "<constant> and ... is the same as <constant>" + | "or", e :: _ when is_always_false (un_parenthesize_full e) -> warn_rule [Warn_strange] "<constant> or ... is the same as ..." + | "and", e :: _ when is_always_true (un_parenthesize_full e) -> warn_rule [Warn_strange] "<constant> and ... is the same as ..." + + | "or", [ List [ Deref(I_scalar, id) ]; List [ Call_op("=", [ Deref(I_scalar, id_); _], _) ] ] when is_same_fromparser id id_ -> + warn_rule [Warn_suggest_simpler] "\"$foo or $foo = ...\" can be written \"$foo ||= ...\"" + + | "and", [ _cond ; expr ] -> check_My_under_condition "replace \"<cond> and my $foo = ...\" with \"my $foo = <cond> && ...\"" expr + | "or", [ _cond ; expr ] -> check_My_under_condition "replace \"<cond> or my $foo = ...\" with \"my $foo = !<cond> && ...\"" expr + + | _ -> ()); + + match op, para with + | "=", [ Deref(I_star, (Ident _ as f1)); Deref(I_star, (Ident _ as f2)) ] -> + let s1, s2 = string_of_fromparser f1, string_of_fromparser f2 in + warn [Warn_complex_expressions] pos (sprintf "\"*%s = *%s\" is better written \"*%s = \\&%s\"" s1 s2 s1 s2) ; + sub_declaration (f1, None) [ call_with_same_para_special(Deref(I_func, f2)) ] Glob_assign + | "=", [ Deref(I_star, Raw_string(sf1, pos_f1)); Deref(I_star, (Ident _ as f2)) ] -> + let s2 = string_of_fromparser f2 in + warn [Warn_help_perl_checker] pos (sprintf "\"*{'%s'} = *%s\" is better written \"*{'%s'} = \\&%s\"" sf1 s2 sf1 s2) ; + sub_declaration (Ident(None, sf1, pos_f1), None) [ call_with_same_para_special(Deref(I_func, f2)) ] Glob_assign + + | "=", [ Deref(I_star, (Ident _ as f1)); Ref(I_scalar, Deref(I_func, (Ident _ as f2))) ] -> + sub_declaration (f1, None) [ call_with_same_para_special(Deref(I_func, f2)) ] Glob_assign + | "=", [ Deref(I_star, Raw_string(sf1, pos_f1)); Ref(I_scalar, Deref(I_func, (Ident _ as f2))) ] -> + sub_declaration (Ident(None, sf1, pos_f1), None) [ call_with_same_para_special(Deref(I_func, f2)) ] Glob_assign + + | "=", [ Deref(I_star, (Ident _ as f1)); (Anonymous_sub(proto, sub, _)) ] -> + sub_declaration (f1, proto) [ sub ] Glob_assign + + | _ -> Call_op(op, para, raw_pos2pos pos) + +let to_Call_op mcontext op para esp_start esp_end = + let pos = raw_pos_range esp_start esp_end in + new_any mcontext (cook_call_op op para pos) esp_start.spaces pos +let to_Call_op_ mcontext prio op para esp_start esp_end = + let pos = raw_pos_range esp_start esp_end in + new_any mcontext { priority = prio ; expr = cook_call_op op para pos } esp_start.spaces pos +let to_Call_assign_op_ mcontext prio op left right esp_left esp_end = + if not (is_lvalue left) then warn [Warn_strange] esp_left.pos "invalid lvalue"; + to_Call_op_ mcontext prio op [ left ; right ] esp_left esp_end + +let followed_by_comma expr true_comma = + if true_comma then expr else + match split_last expr with + | l, Ident(None, s, pos) -> l @ [Raw_string(s, pos)] + | _ -> expr + + +let pot_strings = Hashtbl.create 16 +let po_comments = ref [] +let po_comment esp = lpush po_comments esp.any + +let check_format_a_la_printf s pos = + let rec check_format_a_la_printf_ contexts i = + try + let i' = String.index_from s i '%' in + try + let contexts = + match s.[i' + 1] with + | '%' -> contexts + | 'd' -> M_int :: contexts + | 's' | 'c' -> M_string :: contexts + | c -> warn [Warn_strange] (pos + i', pos + i') (sprintf "invalid command %%%c" c); contexts + in + check_format_a_la_printf_ contexts (i' + 2) + with Invalid_argument _ -> warn [Warn_strange] (pos + i', pos + i') "invalid command %" ; contexts + with Not_found -> contexts + in check_format_a_la_printf_ [] 0 + +let generate_pot file = + let fd = open_out file in + output_string fd +("# SOME DESCRIPTIVE TITLE. +# Copyright (C) YEAR Free Software Foundation, Inc. +# FIRST AUTHOR <EMAIL@ADDRESS>, YEAR. +# +#, fuzzy +msgid \"\" +msgstr \"\" +\"Project-Id-Version: PACKAGE VERSION\\n\" +\"POT-Creation-Date: " ^ input_line (Unix.open_process_in "date '+%Y-%m-%d %H:%M%z'") ^ "\\n\" +\"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\\n\" +\"Last-Translator: FULL NAME <EMAIL@ADDRESS>\\n\" +\"Language-Team: LANGUAGE <LL@li.org>\\n\" +\"MIME-Version: 1.0\\n\" +\"Content-Type: text/plain; charset=CHARSET\\n\" +\"Content-Transfer-Encoding: 8-bit\\n\" + +") ; + + let rec print_formatted_char = function + | '"' -> output_char fd '\\'; output_char fd '"' + | '\t' -> output_char fd '\\'; output_char fd 't' + | '\\' -> output_char fd '\\'; output_char fd '\\' + | '\n' -> output_string fd "\\n\"\n\"" + | c -> output_char fd c + in + let sorted_pot_strings = List.sort (fun (_, pos_a) (_, pos_b) -> compare pos_a pos_b) + (Hashtbl.fold (fun k (v, _) l -> (k,v) :: l) pot_strings [] ) in + List.iter (fun (s, _) -> + match Hashtbl.find_all pot_strings s with + | [] -> () + | l -> + List.iter (fun _ -> Hashtbl.remove pot_strings s) l ; + + List.iter (fun po_comment -> output_string fd ("#. " ^ po_comment ^ "\n")) (collect snd l); + + let pos_l = List.sort compare (List.map fst l) in + fprintf fd "#: %s\n" (String.concat " " (List.map Info.pos2s_for_po pos_l)) ; + output_string fd "#, c-format\n" ; + + output_string fd (if String.contains s '\n' then "msgid \"\"\n\"" else "msgid \"") ; + String.iter print_formatted_char s ; + output_string fd "\"\n" ; + output_string fd "msgstr \"\"\n\n" + ) sorted_pot_strings ; + close_out fd + +let check_system_call = function + | "mkdir" :: l -> + let has_p = List.exists (str_begins_with "-p") l in + let has_m = List.exists (str_begins_with "-m") l in + if has_p && has_m then () + else if has_p then warn_rule [Warn_suggest_simpler; Warn_MDK_Common] "you can replace system(\"mkdir -p ...\") with mkdir_p(...)" + else if has_m then warn_rule [Warn_suggest_simpler; Warn_MDK_Common] "you can replace system(\"mkdir -m <mode> ...\") with mkdir(..., <mode>)" + else warn_rule [Warn_suggest_simpler; Warn_MDK_Common] "you can replace system(\"mkdir ...\") with mkdir(...)" + | _ -> () + +let call_raw force_non_builtin_func (e, para) = + let check_anonymous_block f = function + | [ Anonymous_sub _ ; Deref (I_hash, _) ] -> + warn_rule [Warn_strange] ("a hash is not a valid parameter to function " ^ f) + + | Anonymous_sub _ :: _ -> () + | _ -> warn_rule [Warn_complex_expressions] (sprintf "always use \"%s\" with a block (eg: %s { ... } @list)" f f) + in + + match e with + | Deref(I_func, Ident(None, f, _)) -> + (match f with + | "join" -> + (match un_parenthesize_full_l para with + | e :: _ when not (is_a_scalar e) -> warn_rule [Warn_traps] "first argument of join() must be a scalar"; + | [_] -> warn_rule [Warn_traps] "not enough parameters" + | [_; e] when is_a_scalar e -> warn_rule [Warn_traps] "join('...', $foo) is the same as $foo" + | _ -> ()) + + | "length" -> + if para = [] then warn_rule [Warn_complex_expressions] (sprintf "please use \"%s $_\" instead of \"%s\"" f f) else + if is_not_a_scalar (List.hd para) then warn_rule [Warn_traps] "never use \"length @l\", it returns the length of the string int(@l)" ; + + | "open" -> + (match para with + | [ List(Ident(None, name, _) :: _) ] + | Ident(None, name, _) :: _ -> + if not (List.mem name [ "STDIN" ; "STDOUT" ; "STDERR" ]) then + warn_rule [Warn_complex_expressions] (sprintf "use a scalar instead of a bareword (eg: occurrences of %s with $%s)" name name) + | _ -> ()) + + | "N" | "N_" -> + (match para with + | [ List(String([ s, List [] ], (_, pos_offset, _ as pos)) :: para) ] -> + if !Flags.generate_pot then ( + Hashtbl.add pot_strings s (pos, !po_comments) ; + po_comments := [] + ) ; + let contexts = check_format_a_la_printf s pos_offset in + if f = "N" then + if List.length para < List.length contexts then + warn_rule [Warn_traps; Warn_MDK_Common] "not enough parameters" + else if List.length para > List.length contexts then + warn_rule [Warn_traps; Warn_MDK_Common] "too many parameters" ; + (*if String.contains s '\t' then warn_rule "tabulation in translated string must be written \\\\t";*) + (*if count_matching_char s '\n' > 10 then warn_rule "long string";*) + | [ List(String _ :: _) ] -> die_rule "don't use interpolated translated string, use %s or %d instead" + | _ -> die_rule (sprintf "%s() must be used with a string" f)) + + | "if_" -> + (match para with + | [ List [ _ ] ] -> warn_rule [Warn_traps; Warn_MDK_Common] "not enough parameters"; + | _ -> ()) + + | "map" -> + (match para with + + | Anonymous_sub(None, Block [ List [ Call(Deref(I_func, Ident(None, "if_", _)), + [ List [ _ ; Deref(I_scalar, Ident(None, "_", _)) ] ]) ] ], _) :: _ -> + warn_rule [Warn_suggest_simpler; Warn_MDK_Common] "you can replace \"map { if_(..., $_) }\" with \"grep { ... }\"" + | _ -> check_anonymous_block f para) + + | "grep" -> + (match para with + | [ Anonymous_sub(None, Block [ List [ Call_op("not", [ Call(Deref(I_func, Ident(None, "member", _)), [ List(Deref(I_scalar, Ident(None, "_", _)) :: _) ]) ], _) ] ], _); _ ] -> + warn_rule [Warn_suggest_simpler; Warn_MDK_Common] "you can replace \"grep { !member($_, ...) } @l\" with \"difference2([ @l ], [ ... ])\"" + | [ Anonymous_sub(None, Block [ List [ Call(Deref(I_func, Ident(None, "member", _)), [ List(Deref(I_scalar, Ident(None, "_", _)) :: _) ]) ] ], _); _ ] -> + warn_rule [Warn_suggest_simpler; Warn_MDK_Common] "you can replace \"grep { member($_, ...) } @l\" with \"intersection([ @l ], [ ... ])\"" + | _ -> check_anonymous_block f para) + + | "any" -> + (match para with + [Anonymous_sub (None, Block + [ List [ Call_op("eq", [Deref(I_scalar, Ident(None, "_", _)); _ ], _) ] ], + _); _ ] -> + warn_rule [Warn_suggest_simpler; Warn_MDK_Common] "you can replace \"any { $_ eq ... } @l\" with \"member(..., @l)\"" + | _ -> check_anonymous_block f para) + + | "grep_index" | "map_index" | "partition" | "uniq_" + | "find" + | "every" + | "find_index" + | "each_index" -> check_anonymous_block f para + + | "member" -> + (match para with + [ List [ _; Call(Deref(I_func, Ident(None, "keys", _)), _) ] ] -> + warn_rule [Warn_suggest_simpler; Warn_MDK_Common] "you can replace \"member($xxx, keys %yyy)\" with \"exists $yyy{$xxx}\"" + | _ -> ()) + + | "pop" | "shift" -> + (match para with + | [] + | [ Deref(I_array, _) ] + | [ List [ Deref(I_array, _) ] ] -> () + | _ -> warn_rule [Warn_traps] (f ^ " is expecting an array and nothing else")) + + | "push" | "unshift" -> + (match para with + | Deref(I_array, _) :: l + | [ List (Deref(I_array, _) :: l) ] -> + if l = [] then warn_rule [Warn_traps] ("you must give some arguments to " ^ f) + | _ -> warn_rule [Warn_traps] (f ^ " is expecting an array")) + + | "system" -> + let fake_string_option_from_expr = function + | String(l, _) -> Some(String.concat "" (List.map fst l)) + | Raw_string(s, _) -> Some s + | _ -> None + in + (match un_parenthesize_full_l para with + | [ e ] -> + (match fake_string_option_from_expr e with + | Some s -> + if List.exists (String.contains s) [ '\'' ; char_quote ] && + not (List.exists (String.contains s) [ '<' ; '>' ; '&' ; ';']) then + warn_rule [Warn_complex_expressions] "instead of quoting parameters you should give a list of arguments"; + check_system_call (split_at ' ' s) + | None -> ()) + | l -> + let l' = filter_some_with fake_string_option_from_expr l in + check_system_call l') + | _ -> () + ); + + let para' = match f with + | "no" -> + (match para with + | [ Ident(_, _, pos) as s ] -> Some [ Raw_string(string_of_fromparser s, pos) ] + | [ Call(Deref(I_func, (Ident(_, _, pos) as s)), l) ] -> Some(Raw_string(string_of_fromparser s, pos) :: l) + | _ -> die_rule "use \"no PACKAGE <para>\"") + | "undef" -> + (match para with + | [ Deref(I_star, ident) ] -> Some [ Deref(I_func, ident) ] + | _ -> None) + + | "goto" -> + (match para with + | [ Ident(None, s, pos) ] -> Some [ Raw_string(s, pos) ] + | _ -> None) + + | "last" | "next" | "redo" when not force_non_builtin_func -> + (match para with + | [ Ident(None, s, pos) ] -> Some [ Raw_string(s, pos) ] + | _ -> die_rule (sprintf "%s must be used with a raw string" f)) + + | "split" -> + (match para with + | [ List(Call_op("m//", Deref(I_scalar, Ident(None, "_", _)) :: pattern, pos) :: l) ] + | Call_op("m//", Deref(I_scalar, Ident(None, "_", _)) :: pattern, pos) :: l -> + Some(Call_op("qr//", pattern, pos) :: l) + | _ -> None) + + | _ -> None + in Call(e, some_or para' para) + | _ -> Call(e, para) + +let call(e, para) = call_raw false (e, para) + +let check_return esp_func esp_para = + match esp_func.any with + | Ident(None, "return", _) -> + prio_lo_check P_call_no_paren esp_para.any.priority esp_para.pos (List esp_para.any.expr) + | _ -> () + +let call_and_context(e, para) force_non_builtin_func priority esp_start esp_end = + let context = + match e with + | Deref(I_func, Ident(None, f, _)) -> function_to_context false f + | _ -> M_unknown + in + new_pesp context priority (call_raw force_non_builtin_func (e, para)) esp_start esp_end + +let call_no_paren esp_func esp_para = check_return esp_func esp_para; call_and_context(Deref(I_func, esp_func.any), esp_para.any.expr) false P_call_no_paren esp_func esp_para +let call_with_paren esp_func esp_para = check_return esp_func esp_para; call_and_context (Deref(I_func, esp_func.any), esp_para.any.expr) false P_tok esp_func esp_para + +let call_func esp_func esp_para = + call_and_context(esp_func.any, esp_para.any.expr) true P_tok esp_func esp_para + +let call_one_scalar_para prio { any = e ; pos = pos } para esp_start esp_end = + let para' = + match para with + | [] -> + if e = "shift" || e = "pop" then + [] (* can't decide here *) + else + (if not (List.mem e [ "length" ]) then warn_rule [Warn_complex_expressions] (sprintf "please use \"%s $_\" instead of \"%s\"" e e) ; + [var_dollar_ (raw_pos2pos pos)]) + | _ -> para + in + new_pesp M_unknown prio (call(Deref(I_func, Ident(None, e, raw_pos2pos pos)), para')) esp_start esp_end + + +let (current_lexbuf : Lexing.lexbuf option ref) = ref None + +let rec list2tokens l = + let rl = ref l in + fun lexbuf -> + match !rl with + | [] -> internal_error "list2tokens" + | ((start, end_), e) :: l -> + (* HACK: fake a normal lexbuf *) + lexbuf.Lexing.lex_start_p <- { Lexing.dummy_pos with Lexing.pos_cnum = start } ; + lexbuf.Lexing.lex_curr_p <- { Lexing.dummy_pos with Lexing.pos_cnum = end_ } ; + rl := l ; e + +let parse_tokens parse tokens lexbuf_opt = + if lexbuf_opt <> None then current_lexbuf := lexbuf_opt ; + if tokens = [] then [] else + parse (list2tokens tokens) (some !current_lexbuf) + +let parse_interpolated parse l = + let l' = List.map (fun (s, tokens) -> s, to_List(parse_tokens parse tokens None)) l in + match split_last l' with + | pl, ("", List []) -> pl + | _ -> l' + +let to_String parse strict { any = l ; pos = pos } = + let l' = parse_interpolated parse l in + (match l' with + | [ "", List [Deref(I_scalar, Ident(None, ident, _))]] -> + if ident <> "!" && strict then warn [Warn_suggest_simpler] pos (sprintf "%s is better written without the double quotes" (variable2s(I_scalar, ident))) + | [ "", List [Deref(I_hash, _)]] -> + warn [Warn_traps] pos "don't use a hash in string context" + | [ "", List [Deref(I_array, _)]] + | [ "", List [Deref_with(I_array, I_array, _, _)]] -> (* for slices like: "@m3[1..$#m3]" *) + () + | [("", _)] -> + if strict then warn [Warn_suggest_simpler] pos "double quotes are unneeded" + | _ -> ()); + String(l', raw_pos2pos pos) + +let from_PATTERN parse { any = (s, opts) ; pos = pos } = + let re = parse_interpolated parse s in + (match List.rev re with + | (s, List []) :: _ -> + if str_ends_with s ".*" then + warn_rule [Warn_suggest_simpler] (sprintf "you can remove \"%s\" at the end of your regexp" ".*") + else if str_ends_with s ".*$" then + warn_rule [Warn_suggest_simpler] (sprintf "you can remove \"%s\" at the end of your regexp" ".*$") + | _ -> ()); + let pattern = [ String(re, raw_pos2pos pos) ; Raw_string(opts, raw_pos2pos pos) ] in + check_simple_pattern pattern; + pattern + +let from_PATTERN_SUBST parse { any = (s1, s2, opts) ; pos = pos } = + [ String(parse_interpolated parse s1, raw_pos2pos pos) ; + String(parse_interpolated parse s2, raw_pos2pos pos) ; + Raw_string(opts, raw_pos2pos pos) ] + + +let rec mcontext2s = function + | M_none -> "()" + + | M_bool -> "bool" + + | M_int -> "int" + | M_float -> "float" + | M_string -> "string" + | M_ref c -> "ref(" ^ mcontext2s c ^ ")" + | M_revision -> "revision" + | M_undef -> "undef" + | M_sub -> "sub" + | M_unknown_scalar -> "scalar" + + | M_tuple l -> "tuple(" ^ String.concat ", " (List.map mcontext2s l) ^ ")" + | M_list -> "list" + | M_array -> "array" + | M_hash -> "hash" + + | M_special -> "special" + | M_unknown -> "unknown" + | M_mixed l -> String.concat " | " (List.map mcontext2s l) + +let rec mcontext_lower c1 c2 = + match c1, c2 with + | M_special, _ | _, M_special -> internal_error "M_special in mcontext_compare" + + | M_unknown, _ + | _, M_unknown -> true + + | M_mixed l, c -> List.exists (fun a -> mcontext_lower a c) l + | c, M_mixed l -> List.exists (mcontext_lower c) l + + | M_none, M_none | M_sub, M_sub | M_hash, M_hash | M_hash, M_bool -> true + | M_none, _ | M_sub, _ | M_hash, _ -> false + + | _, M_list -> true + + | M_list, M_bool + | M_list, M_tuple _ + + (* M_unknown_scalar is M_mixed [ M_int ; M_float ; M_string ; M_bool ; M_ref _ ; M_revision ; M_undef ] *) + | M_unknown_scalar, M_int | M_unknown_scalar, M_float | M_unknown_scalar, M_string | M_unknown_scalar, M_bool + | M_unknown_scalar, M_ref _ | M_unknown_scalar, M_revision | M_unknown_scalar, M_undef | M_unknown_scalar, M_unknown_scalar + + | M_array, M_array | M_array, M_int | M_array, M_float | M_array, M_bool | M_array, M_unknown_scalar | M_array, M_tuple _ + | M_int, M_int | M_int, M_float | M_int, M_string | M_int, M_bool | M_int, M_unknown_scalar + | M_float, M_float | M_float, M_string | M_float, M_bool | M_float, M_unknown_scalar + | M_string, M_string | M_string, M_bool | M_string, M_unknown_scalar + | M_bool, M_bool | M_bool, M_unknown_scalar + + | M_ref _, M_unknown_scalar + | M_revision, M_revision | M_revision, M_unknown_scalar + | M_undef, M_undef | M_undef, M_unknown_scalar + + -> true + + | M_tuple t1, M_tuple t2 -> + List.length t1 = List.length t2 && for_all2_true mcontext_lower t1 t2 + + | M_tuple [c], M_int | M_tuple [c], M_float | M_tuple [c], M_string | M_tuple [c], M_bool + | M_tuple [c], M_ref _ | M_tuple [c], M_revision | M_tuple [c], M_undef | M_tuple [c], M_unknown_scalar + -> mcontext_lower c c2 + +(* | M_ref a, M_ref b -> mcontext_lower a b *) + + | _ -> false + +let mcontext_is_scalar = function + | M_unknown -> false + | c -> mcontext_lower c M_unknown_scalar + +let mcontext_to_scalar = function + | M_array -> M_int + | c -> if mcontext_is_scalar c then c else M_unknown_scalar + +let mcontext_merge_raw c1 c2 = + match c1, c2 with + | M_unknown, _ | _, M_unknown -> Some M_unknown + | M_unknown_scalar, c when mcontext_is_scalar c -> Some M_unknown_scalar + | c, M_unknown_scalar when mcontext_is_scalar c -> Some M_unknown_scalar + | M_mixed _, _ | _, M_mixed _ -> internal_error "mcontext_merge_raw" + | _ -> + if mcontext_lower c1 c2 then Some c2 else + if mcontext_lower c2 c1 then Some c1 else + if c1 = c2 then Some c1 else + None + +let rec mcontext_lmerge_add l = function + | M_mixed l2 -> List.fold_left mcontext_lmerge_add [] (l2 @ l) + | c -> + let rec add_to = function + | [] -> [c] + | M_mixed subl :: l -> add_to (subl @ l) + | c2 :: l -> + match mcontext_merge_raw c c2 with + | Some c' -> c' :: l + | None -> c2 :: add_to l + in add_to l + +let mcontext_lmerge l = + match List.fold_left mcontext_lmerge_add [] l with + | [] -> internal_error "mcontext_lmerge" + | [c] -> c + | l -> M_mixed l + +let mcontext_merge c1 c2 = mcontext_lmerge [ c1 ; c2 ] + +let mcontext_lmaybe esp = if esp.any = [] then [] else [esp.mcontext] + +let mcontext_check_raw wanted_mcontext mcontext = + if not (mcontext_lower mcontext wanted_mcontext) then + warn_rule [Warn_context] (sprintf "context %s is not compatible with context %s" (mcontext2s mcontext) (mcontext2s wanted_mcontext)) + +let mcontext_check wanted_mcontext esp = + (match wanted_mcontext with + | M_list | M_array | M_float | M_mixed [M_array; M_none] | M_tuple _ -> () + | _ -> + match un_parenthesize_full esp.any.expr with + | Call(Deref(I_func, Ident(None, "grep", _)), _) -> + warn_rule [Warn_suggest_simpler; Warn_help_perl_checker] (if wanted_mcontext = M_bool then + "in boolean context, use \"any\" instead of \"grep\"" else + "you may use \"find\" instead of \"grep\"") + | _ -> ()); + mcontext_check_raw wanted_mcontext esp.mcontext + +let mcontext_check_unop_l wanted_mcontext esp = + mcontext_check wanted_mcontext { esp with any = { esp.any with expr = List esp.any.expr } } + +let mcontext_check_non_none esp = + if esp.mcontext = M_none then warn_rule [Warn_context] "() context not accepted here" + +let mcontext_check_none msg expr esp = + let rec mcontext_check_none_rec msg expr = function + | M_none | M_unknown -> () + | M_mixed l when List.exists (fun c -> c = M_none) l -> () + | M_tuple l -> + (match expr with + | [Block [List l_expr]] + | [List l_expr] + | [List l_expr ; Semi_colon] -> + let rec iter = function + | e::l_expr, mcontext::l -> + mcontext_check_none_rec (if l = [] then msg else "value is dropped") [e] mcontext ; + iter (l_expr, l) + | [], [] -> () + | _ -> internal_error "mcontext_check_none" + in iter (un_parenthesize_full_l l_expr, l) + | _ -> internal_error "mcontext_check_none") + | _ -> + match expr with + | [List [Num("1", _)]; Semi_colon] -> () (* allow "1;" for package return value. It would be much better to check we are at toplevel, but hell i don't want to wire this information up to here *) + | [List [Call_op ("<>", [Ident (None, "STDIN", _)], _)]; Semi_colon] -> () (* allow <STDIN> to ask "press return" *) + | [List [Call(Deref(I_func, Ident(None, "map", _)), _)]; Semi_colon] -> warn_rule [Warn_void] "if you don't use the return value, use \"foreach\" instead of \"map\"" + | _ -> warn [Warn_void] esp.pos msg + in + mcontext_check_none_rec msg expr esp.mcontext + +(* only returns M_float when there is at least one float *) +let mcontext_float_or_int l = + List.iter (mcontext_check_raw M_float) l; + if List.mem M_float l then M_float else M_int + +let mcontext_op_assign left right = + mcontext_check_non_none right; + + let left_mcontext = + match left.mcontext with + | M_mixed [ c ; M_none ] -> c + | c -> c + in + + let wanted_mcontext = match left_mcontext with + | M_array -> M_list + | M_hash -> M_mixed [ M_hash ; M_list ] + | m -> m + in + mcontext_check wanted_mcontext right; + + let return_mcontext = + match left_mcontext with + | M_tuple _ -> M_array + | c -> c + in + mcontext_merge return_mcontext M_none + +let mtuple_context_concat c1 c2 = + match c1, c2 with + | M_array, _ | _, M_array + | M_hash, _ | _, M_hash -> M_list + | M_tuple l, _ -> M_tuple (l @ [c2]) + | _ -> M_tuple [c1 ; c2] + +let call_op_if_infix left right esp_start esp_end = + (match left, right with + | List [Call_op("=", [Deref(context, _); _], _)], _ when non_scalar_context context -> () + | List [Call_op("=", [v; _], _)], + List [Call_op("not", [v'], _)] when is_same_fromparser v v' -> + warn_rule [Warn_suggest_simpler] "\"$foo = ... if !$foo\" can be written \"$foo ||= ...\"" + | _ -> ()); + + mcontext_check_none "value is dropped" [left] esp_start; + (match right with + | List [ Num("0", _)] -> () (* allow my $x if 0 *) + | _ -> check_My_under_condition "replace \"my $foo = ... if <cond>\" with \"my $foo = <cond> && ...\"" left); + + let pos = raw_pos_range esp_start esp_end in + new_any M_none (Call_op("if infix", [ left ; right], raw_pos2pos pos)) esp_start.spaces pos + +let call_op_unless_infix left right esp_start esp_end = + (match left, right with + | List [Call_op("=", [Deref(context, _); _], _)], _ when non_scalar_context context -> () + | List [Call_op("=", [v; _], _)], List [v'] when is_same_fromparser v v' -> + warn_rule [Warn_suggest_simpler] "\"$foo = ... unless $foo\" can be written \"$foo ||= ...\"" + | _ -> ()); + (match right with + | List [Call_op(op, _, _)] -> + (match op with + | "&&" | "||" | "not" | "ne" | "?:" -> warn_rule [Warn_complex_expressions] "don't use \"unless\" when the condition is complex, use \"if\" instead" + | _ -> ()); + | _ -> ()); + + mcontext_check_none "value is dropped" [left] esp_start; + check_My_under_condition "replace \"my $foo = ... unless <cond>\" with \"my $foo = !<cond> && ...\"" left; + + let pos = raw_pos_range esp_start esp_end in + new_any M_none (Call_op("unless infix", [ left ; right], raw_pos2pos pos)) esp_start.spaces pos + +let symops pri para_context return_context op_str left op right = + sp_same op right; + let skip_context_check = + (op_str = "==" || op_str = "!=") && (match left.any.expr, right.any.expr with + | Deref(I_array, _), List [] -> true (* allow @l == () and @l != () *) + | _ -> false) + in + if op_str <> "==" && op_str <> "!=" && para_context = M_float then + (match un_parenthesize_full left.any.expr with + | Call_op("last_array_index", _, _) -> warn_rule [Warn_complex_expressions] "change your expression to use @xxx instead of $#xxx" + | _ -> ()); + + if not skip_context_check then + (mcontext_check para_context left ; mcontext_check para_context right) ; + to_Call_op_ return_context pri op_str [prio_lo pri left; prio_lo_after pri right] left right diff --git a/src/parser_helper.mli b/src/parser_helper.mli new file mode 100644 index 0000000..e820703 --- /dev/null +++ b/src/parser_helper.mli @@ -0,0 +1,314 @@ +val bpos : int * int +val raw_pos2pos : 'a * 'b -> string * 'a * 'b +val raw_pos_range : + 'a Types.any_spaces_pos -> 'b Types.any_spaces_pos -> int * int +val pos_range : + 'a Types.any_spaces_pos -> 'b Types.any_spaces_pos -> string * int * int +val get_pos : 'a Types.any_spaces_pos -> string * int * int +val get_pos_start : 'a Types.any_spaces_pos -> int +val get_pos_end : 'a Types.any_spaces_pos -> int +val var_dollar_ : Types.pos -> Types.fromparser +val var_STDOUT : Types.fromparser +val new_any : + Types.maybe_context -> + 'a -> Types.spaces -> int * int -> 'a Types.any_spaces_pos +val new_any_ : 'a -> Types.spaces -> int * int -> 'a Types.any_spaces_pos +val new_esp : + Types.maybe_context -> + 'a -> + 'b Types.any_spaces_pos -> + 'c Types.any_spaces_pos -> 'a Types.any_spaces_pos +val new_1esp : 'a -> 'b Types.any_spaces_pos -> 'a Types.any_spaces_pos +val new_pesp : + Types.maybe_context -> + Types.priority -> + 'a -> + 'b Types.any_spaces_pos -> + 'c Types.any_spaces_pos -> 'a Types.prio_anyexpr Types.any_spaces_pos +val new_1pesp : + Types.priority -> + 'a -> 'b Types.any_spaces_pos -> 'a Types.prio_anyexpr Types.any_spaces_pos +val default_esp : 'a -> 'a Types.any_spaces_pos +val default_pesp : + Types.priority -> 'a -> 'a Types.prio_anyexpr Types.any_spaces_pos +val split_name_or_fq_name : string -> string option * string +val is_var_dollar_ : Types.fromparser -> bool +val is_var_number_match : Types.fromparser -> bool +val non_scalar_context : Types.context -> bool +val is_scalar_context : Types.context -> bool +val is_not_a_scalar : Types.fromparser -> bool +val is_a_scalar : Types.fromparser -> bool +val is_a_string : Types.fromparser -> bool +val is_parenthesized : Types.fromparser -> bool +val un_parenthesize : Types.fromparser -> Types.fromparser +val un_parenthesize_full : Types.fromparser -> Types.fromparser +val un_parenthesize_full_l : Types.fromparser list -> Types.fromparser list +val is_always_true : Types.fromparser -> bool +val is_always_false : Types.fromparser -> bool +val is_lvalue : Types.fromparser -> bool +val not_complex : Types.fromparser -> bool +val not_simple : Types.fromparser -> bool +val context2s : Types.context -> string +val variable2s : Types.context * string -> string +val string_of_fromparser : Types.fromparser -> string +val lstring_of_fromparser : Types.fromparser list -> string +val lstring_of_fromparser_parentheses : Types.fromparser list -> string +val is_same_fromparser : Types.fromparser -> Types.fromparser -> bool +val from_scalar : Types.fromparser Types.any_spaces_pos -> Types.fromparser +val from_array : Types.fromparser Types.any_spaces_pos -> Types.fromparser +val get_pos_from_expr : Types.fromparser -> Types.pos +val msg_with_rawpos : int * int -> string -> string +val die_with_rawpos : int * int -> string -> 'a +val warn : Types.warning list -> int * int -> string -> unit +val die_rule : string -> 'a +val warn_rule : Types.warning list -> string -> unit +val warn_verb : Types.warning list -> int -> string -> unit +val warn_too_many_space : int -> unit +val warn_no_space : int -> unit +val warn_cr : int -> unit +val warn_space : int -> unit +val prio_less : Types.priority * Types.priority -> bool +val prio_lo_check : + Types.priority -> Types.priority -> int * int -> Types.fromparser -> unit +val prio_lo : + Types.priority -> + Types.fromparser Types.prio_anyexpr Types.any_spaces_pos -> + Types.fromparser +val prio_lo_after : + Types.priority -> + Types.fromparser Types.prio_anyexpr Types.any_spaces_pos -> + Types.fromparser +val prio_lo_concat : + Types.fromparser Types.prio_anyexpr Types.any_spaces_pos -> + Types.fromparser +val hash_ref : + Types.fromparser Types.prio_anyexpr Types.any_spaces_pos -> + Types.fromparser +val sp_0 : 'a Types.any_spaces_pos -> unit +val sp_0_or_cr : 'a Types.any_spaces_pos -> unit +val sp_1 : 'a Types.any_spaces_pos -> unit +val sp_n : 'a Types.any_spaces_pos -> unit +val sp_p : 'a Types.any_spaces_pos -> unit +val sp_cr : 'a Types.any_spaces_pos -> unit +val sp_same : 'a Types.any_spaces_pos -> 'b Types.any_spaces_pos -> unit +val function_to_context : bool -> string -> Types.maybe_context +val word_alone : + Types.fromparser Types.any_spaces_pos -> + Types.fromparser Types.prio_anyexpr Types.any_spaces_pos +val check_parenthesized_first_argexpr : + string -> + Types.fromparser list Types.prio_anyexpr Types.any_spaces_pos -> unit +val check_parenthesized_first_argexpr_with_Ident : + Types.fromparser -> + Types.fromparser list Types.prio_anyexpr Types.any_spaces_pos -> unit +val check_hash_subscript : + Types.fromparser Types.prio_anyexpr Types.any_spaces_pos -> unit +val check_arrow_needed : 'a Types.any_spaces_pos -> Types.fromparser -> unit +val check_scalar_subscripted : Types.fromparser Types.any_spaces_pos -> unit +val negatable_ops : (string * string) list +val check_negatable_expr : + Types.fromparser Types.prio_anyexpr Types.any_spaces_pos -> unit +val check_ternary_paras : + Types.fromparser * Types.fromparser * Types.fromparser -> + Types.fromparser list +val check_unneeded_var_dollar_ : + Types.fromparser Types.prio_anyexpr Types.any_spaces_pos -> unit +val check_unneeded_var_dollar_not : + Types.fromparser Types.prio_anyexpr Types.any_spaces_pos -> unit +val check_unneeded_var_dollar_s : + Types.fromparser Types.prio_anyexpr Types.any_spaces_pos -> unit +val check_my : string Types.any_spaces_pos -> unit +val check_foreach : string Types.any_spaces_pos -> unit +val check_for : string Types.any_spaces_pos -> unit +val check_for_foreach : + string Types.any_spaces_pos -> + Types.fromparser Types.prio_anyexpr Types.any_spaces_pos -> unit +val check_block_expr : + bool -> + Types.fromparser -> + 'a Types.any_spaces_pos -> 'b Types.any_spaces_pos -> unit +val check_block_lines : + (Types.fromparser list * bool) Types.any_spaces_pos -> + 'a Types.any_spaces_pos -> unit +val check_unless_else : + 'a list Types.any_spaces_pos -> 'b list Types.any_spaces_pos -> unit +val check_my_our_paren : + ((bool * 'a) * 'b list) Types.any_spaces_pos -> + 'c Types.any_spaces_pos -> unit +val check_simple_pattern : Types.fromparser list -> unit +val only_one : Types.fromparser list Types.any_spaces_pos -> Types.fromparser +val only_one_array_ref : + Types.fromparser list Types.any_spaces_pos -> Types.fromparser +val only_one_in_List : + Types.fromparser Types.prio_anyexpr Types.any_spaces_pos -> + Types.fromparser +val is_only_one_in_List : Types.fromparser list -> bool +val maybe_to_Raw_string : Types.fromparser -> Types.fromparser +val to_List : Types.fromparser list -> Types.fromparser +val deref_arraylen : Types.fromparser -> Types.fromparser +val deref_raw : Types.context -> Types.fromparser -> Types.fromparser +val to_Ident : + (string option * string) Types.any_spaces_pos -> Types.fromparser +val to_Raw_string : string Types.any_spaces_pos -> Types.fromparser +val to_Method_call : + Types.fromparser * Types.fromparser * Types.fromparser list -> + Types.fromparser +val to_Deref_with : + Types.context * Types.context * Types.fromparser * Types.fromparser -> + Types.fromparser +val to_Deref_with_arrow : + 'a Types.any_spaces_pos -> + Types.context * Types.context * Types.fromparser * Types.fromparser -> + Types.fromparser +val lines_to_Block : + (Types.fromparser list * bool) Types.any_spaces_pos -> + 'a Types.any_spaces_pos -> Types.fromparser +val to_Local : + Types.fromparser Types.prio_anyexpr Types.any_spaces_pos -> + Types.fromparser +val sub_declaration : + Types.fromparser * string option -> + Types.fromparser list -> Types.sub_declaration_kind -> Types.fromparser +val anonymous_sub : + string option -> + (Types.fromparser list * bool) Types.any_spaces_pos -> + 'a Types.any_spaces_pos -> Types.fromparser +val call_with_same_para_special : Types.fromparser -> Types.fromparser +val remove_call_with_same_para_special : Types.fromparser -> Types.fromparser +val check_My_under_condition : string -> Types.fromparser -> unit +val cook_call_op : + string -> Types.fromparser list -> int * int -> Types.fromparser +val to_Call_op : + Types.maybe_context -> + string -> + Types.fromparser list -> + 'a Types.any_spaces_pos -> + 'b Types.any_spaces_pos -> Types.fromparser Types.any_spaces_pos +val to_Call_op_ : + Types.maybe_context -> + Types.priority -> + string -> + Types.fromparser list -> + 'a Types.any_spaces_pos -> + 'b Types.any_spaces_pos -> + Types.fromparser Types.prio_anyexpr Types.any_spaces_pos +val to_Call_assign_op_ : + Types.maybe_context -> + Types.priority -> + string -> + Types.fromparser -> + Types.fromparser -> + 'a Types.any_spaces_pos -> + 'b Types.any_spaces_pos -> + Types.fromparser Types.prio_anyexpr Types.any_spaces_pos +val followed_by_comma : + Types.fromparser list -> bool -> Types.fromparser list +val pot_strings : (string, (string * int * int) * string list) Hashtbl.t +val po_comments : string list ref +val po_comment : string Types.any_spaces_pos -> unit +val check_format_a_la_printf : string -> int -> Types.maybe_context list +val generate_pot : string -> unit +val check_system_call : string list -> unit +val call_raw : + bool -> Types.fromparser * Types.fromparser list -> Types.fromparser +val call : Types.fromparser * Types.fromparser list -> Types.fromparser +val check_return : + Types.fromparser Types.any_spaces_pos -> + Types.fromparser list Types.prio_anyexpr Types.any_spaces_pos -> unit +val call_and_context : + Types.fromparser * Types.fromparser list -> + bool -> + Types.priority -> + 'a Types.any_spaces_pos -> + 'b Types.any_spaces_pos -> + Types.fromparser Types.prio_anyexpr Types.any_spaces_pos +val call_no_paren : + Types.fromparser Types.any_spaces_pos -> + Types.fromparser list Types.prio_anyexpr Types.any_spaces_pos -> + Types.fromparser Types.prio_anyexpr Types.any_spaces_pos +val call_with_paren : + Types.fromparser Types.any_spaces_pos -> + Types.fromparser list Types.prio_anyexpr Types.any_spaces_pos -> + Types.fromparser Types.prio_anyexpr Types.any_spaces_pos +val call_func : + Types.fromparser Types.any_spaces_pos -> + Types.fromparser list Types.prio_anyexpr Types.any_spaces_pos -> + Types.fromparser Types.prio_anyexpr Types.any_spaces_pos +val call_one_scalar_para : + Types.priority -> + string Types.any_spaces_pos -> + Types.fromparser list -> + 'a Types.any_spaces_pos -> + 'b Types.any_spaces_pos -> + Types.fromparser Types.prio_anyexpr Types.any_spaces_pos +val current_lexbuf : Lexing.lexbuf option ref +val list2tokens : ((int * int) * 'a) list -> Lexing.lexbuf -> 'a +val parse_tokens : + ((Lexing.lexbuf -> 'a) -> Lexing.lexbuf -> 'b list) -> + ((int * int) * 'a) list -> Lexing.lexbuf option -> 'b list +val parse_interpolated : + ((Lexing.lexbuf -> 'a) -> Lexing.lexbuf -> Types.fromparser list) -> + (string * ((int * int) * 'a) list) list -> (string * Types.fromparser) list +val to_String : + ((Lexing.lexbuf -> 'a) -> Lexing.lexbuf -> Types.fromparser list) -> + bool -> + (string * ((int * int) * 'a) list) list Types.any_spaces_pos -> + Types.fromparser +val from_PATTERN : + ((Lexing.lexbuf -> 'a) -> Lexing.lexbuf -> Types.fromparser list) -> + ((string * ((int * int) * 'a) list) list * string) Types.any_spaces_pos -> + Types.fromparser list +val from_PATTERN_SUBST : + ((Lexing.lexbuf -> 'a) -> Lexing.lexbuf -> Types.fromparser list) -> + ((string * ((int * int) * 'a) list) list * + (string * ((int * int) * 'a) list) list * string) + Types.any_spaces_pos -> Types.fromparser list +val mcontext2s : Types.maybe_context -> string +val mcontext_lower : Types.maybe_context -> Types.maybe_context -> bool +val mcontext_is_scalar : Types.maybe_context -> bool +val mcontext_to_scalar : Types.maybe_context -> Types.maybe_context +val mcontext_merge_raw : + Types.maybe_context -> Types.maybe_context -> Types.maybe_context option +val mcontext_lmerge_add : + Types.maybe_context list -> Types.maybe_context -> Types.maybe_context list +val mcontext_lmerge : Types.maybe_context list -> Types.maybe_context +val mcontext_merge : + Types.maybe_context -> Types.maybe_context -> Types.maybe_context +val mcontext_lmaybe : + 'a list Types.any_spaces_pos -> Types.maybe_context list +val mcontext_check_raw : Types.maybe_context -> Types.maybe_context -> unit +val mcontext_check : + Types.maybe_context -> + Types.fromparser Types.prio_anyexpr Types.any_spaces_pos -> unit +val mcontext_check_unop_l : + Types.maybe_context -> + Types.fromparser list Types.prio_anyexpr Types.any_spaces_pos -> unit +val mcontext_check_non_none : 'a Types.any_spaces_pos -> unit +val mcontext_check_none : + string -> Types.fromparser list -> 'a Types.any_spaces_pos -> unit +val mcontext_float_or_int : Types.maybe_context list -> Types.maybe_context +val mcontext_op_assign : + 'a Types.any_spaces_pos -> + Types.fromparser Types.prio_anyexpr Types.any_spaces_pos -> + Types.maybe_context +val mtuple_context_concat : + Types.maybe_context -> Types.maybe_context -> Types.maybe_context +val call_op_if_infix : + Types.fromparser -> + Types.fromparser -> + 'a Types.any_spaces_pos -> + 'b Types.any_spaces_pos -> Types.fromparser Types.any_spaces_pos +val call_op_unless_infix : + Types.fromparser -> + Types.fromparser -> + 'a Types.any_spaces_pos -> + 'b Types.any_spaces_pos -> Types.fromparser Types.any_spaces_pos +val symops : + Types.priority -> + Types.maybe_context -> + Types.maybe_context -> + string -> + Types.fromparser Types.prio_anyexpr Types.any_spaces_pos -> + 'a Types.any_spaces_pos -> + Types.fromparser Types.prio_anyexpr Types.any_spaces_pos -> + Types.fromparser Types.prio_anyexpr Types.any_spaces_pos diff --git a/src/perl_checker.html.pl b/src/perl_checker.html.pl new file mode 100644 index 0000000..e90d2eb --- /dev/null +++ b/src/perl_checker.html.pl @@ -0,0 +1,168 @@ +$s = <<'EOF'; +<head><title>perl_checker</title></head> +<h1>Goals of perl_checker</h1> + +<ul> +<li> for beginners in perl: + based on what the programmer is writing, + <ul> + <li> suggest better or more standard ways to do the same + <li> detect wrong code + <br> + => a kind of automatic teacher + </ul> + +<li> for senior programmers: + detect typos, unused variables, check number + of parameters, global analysis to check method calls... + +<li> enforce the same perl style by enforcing a subset of perl of features. + In perl <a href="http://c2.com/cgi/wiki?ThereIsMoreThanOneWayToDoIt">There is more than one way to do it</a>. + In perl_checker's subset of Perl, there is not too many ways to do it. + This is especially useful for big projects. + (NB: the subset is chosen to keep a good expressivity) +</ul> + +<h1>Compared to <a href="http://perlcritic.tigris.org/">Perl-Critic</a> + +<ul> +<li>perl_checker use its own OCaml-written perl parser, which is in no way as robust as <a href="http://www.perl.com/pub/a/2005/06/09/ppi.html">PPI</a>. + A PPI require is to be able to parse non finished perl documents. + perl_checker is a checker, and it is not a big deal to die horribly on a weird perl expression, telling the programmer what to write instead. + +<li>perl_checker is <b>much</b> faster (more than 100 times) (ML pattern matching rulez) + +<li>perl_checker checks a lot more things than perlcritic: undeclared variables, unknown functions, unknown methods... + +<li>and of course perl_checker checks are different from the Conways's <a href="http://www.oreilly.com/catalog/perlbp/">Perl Best Practices</a> +</ul> + +<h1>Get it</h1> + +<a href="http://cvs.mandriva.com/cgi-bin/cvsweb.cgi/soft/perl-MDK-Common/perl_checker.src/">CVS source</a> + +<h1>Implemented features</h1> + +<dl> + <dt>white space normalization + <dd>enforce a similar coding style. In many languages you can find a coding + style document (eg: <a href="http://www.gnu.org/prep/standards_23.html">the GNU one</a>). + + TESTS=force_layout.t + + </dd> + <dt>disallow <i>complex</i> expressions + <dd>perl_checker try to ban some weird-not-used-a-lot features. + + TESTS=syntax_restrictions.t + + </dd> + <dt>suggest simpler expressions + <dd>when there is a simpler way to write an expression, suggest it. It can + also help detecting errors. + + TESTS=suggest_better.t + + </dd> + <dt>context checks + <dd>Perl has types associated with variables names, the so-called "context". + Some expressions mixing contexts are stupid, perl_checker detects them. + + TESTS=context.t + + </dd> + <dt>function call check + <dd>detection of unknown functions or mismatching prototypes (warning: since + perl is a dynamic language, some spurious warnings may occur when a function + is defined using stashes). + + TESTS=prototype.t + + </dd> + <dt>method call check + <dd>detection of unknown methods or mismatching prototypes. perl_checker + doesn't have any idea what the object type is, it simply checks if a method + with that name and that number of parameters exists. + + TESTS=method.t + + </dd> + <dt>return value check + <dd>dropping the result of a functionnally <i>pure</i> function is stupid. + using the result of a function returning void is stupid too. + + TESTS=return_value.t + + </dd> + <dt>detect some Perl traps + <dd>some Perl expressions are stupid, and one gets a warning when running + them with <tt>perl -w</tt>. The drawback are <tt>perl -w</tt> is the lack of + code coverage, it only detects expressions which are evaluated. + + TESTS=various_errors.t + +</dl> + +<h1>Todo</h1> + +Functionalities that would be nice: +<ul> + <li> add flow analysis + <li> maybe a "soft typing" type analysis + <li> detect places where imperative code can be replaced with + functional code (already done for some <b>simple</b> loops) + <li> check the number of returned values when checking prototype compliance +</ul> +EOF + +my $_rationale = <<'EOF'; +<h1>Rationale</h1> + +Perl is a big language, there is <a +href="http://c2.com/cgi/wiki?ThereIsMoreThanOneWayToDoIt">ThereIsMoreThanOneWayToDoIt</a>. +It has advantages but also some drawbacks for team project: +<ul> + <li> it is hard to learn every special rules. Automatically enforced syntax + coding rules help learning incrementally +EOF + +use lib ('test', '..'); +use read_t; +sub get_example { + my ($file) = @_; + my @tests = read_t::read_t("test/$file"); + $file =~ s|test/||; + qq(<p><a name="$file"><table border=1 cellpadding=3>\n) . + join('', map { + my $lines = join("<br>", map { "<tt>" . html_quote($_) . "</tt>" } @{$_->{lines}}); + my $logs = join("<br>", map { html_quote($_) } @{$_->{logs}}); + " <tr><td>\n", $lines, "</td><td>", $logs, "</td></tr>\n"; + } @tests) . + "</table></a>\n"; +} + +sub anchor_to_examples { + my ($s) = @_; + $s =~ s!TESTS=(\S+)!(<a href="#$1">examples</a>)!g; + $s; +} +sub fill_in_examples { + my ($s) = @_; + $s =~ s!TESTS=(\S+)!get_example($1)!ge; + $s; +} + +$s =~ s!<h1>Implemented features</h1>(.*)<h1>! + "<h1>Implemented features</h1>" . anchor_to_examples($1) . + "<h1>Examples</h1>" . fill_in_examples($1) . + "<h1>"!se; + +print $s; + +sub html_quote { + local $_ = $_[0]; + s/</</g; + s/>/>/g; + s/^(\s*)/" " x length($1)/e; + $_; +} diff --git a/src/perl_checker.ml b/src/perl_checker.ml new file mode 100644 index 0000000..4459e30 --- /dev/null +++ b/src/perl_checker.ml @@ -0,0 +1,183 @@ +open Types +open Common +open Tree +open Global_checks + +let search_basedir file_name nb = + let dir = Filename.dirname file_name in + let config = Config_file.read dir in + let nb = some_or config.Config_file.basedir nb in + updir dir nb + +let basedir = ref "" +let set_basedir per_files file = + if !basedir = "" then + let nb = List.length (split_at2 ':'':' (List.hd file.packages).package_name) - 1 in + let dir = search_basedir file.file_name nb in + lpush Tree.use_lib dir ; + Config_file.read_any dir 1 ; + read_packages_from_cache per_files dir ; + if !Flags.verbose then print_endline_flush ("basedir is " ^ dir); + basedir := dir + +let rec parse_file from_basedir require_name per_files file = + try + if !Flags.verbose then print_endline_flush ("parsing " ^ file) ; + let build_time = Unix.time() in + let command = + match !Flags.expand_tabs with + | Some width -> "expand -t " ^ string_of_int width + | None -> "cat" in + let channel = Unix.open_process_in (Printf.sprintf "%s \"%s\"" command file) in + let lexbuf = Lexing.from_channel channel in + try + Info.start_a_new_file file ; + let tokens = Lexer.get_token Lexer.token lexbuf in + if not Build.debugging then ignore (Unix.close_process_in channel) ; + let t = Parser_helper.parse_tokens Parser.prog tokens (Some lexbuf) in + let per_file = get_global_info_from_package from_basedir require_name build_time t in + set_basedir per_files per_file ; + Global_checks.add_file_to_files per_files per_file ; + + let required_packages = collect (fun package -> package.required_packages) per_file.packages in + required_packages, per_files + with Failure s -> ( + print_endline_flush s ; + exit 1 + ) + with + | Not_found -> internal_error "runaway Not_found" + +and parse_package_if_needed per_files (package_name, pos) = + if List.mem package_name !Config_file.ignored_packages then [], per_files else + let splitted = split_at2 ':'':' package_name in + let rel_file = String.concat "/" splitted ^ ".pm" in + + (*print_endline_flush ("wondering about " ^ package_name) ;*) + try + let dir = findfile (Build.fake_packages_dir :: !use_lib) rel_file in + let file = Info.file_to_absolute_file (dir ^ "/" ^ rel_file) in + Config_file.read_any (Filename.dirname file) (List.length splitted) ; + let already_done = + try + let per_file = Hashtbl.find per_files file in + Some (collect (fun pkg -> pkg.required_packages) per_file.packages) + with Not_found -> None in + match already_done with + | Some required_packages -> required_packages, per_files + | None -> parse_file (dir = !basedir) (Some package_name) per_files file + with Not_found -> + print_endline_flush (Info.pos2sfull pos ^ Printf.sprintf "can't find package %s" package_name) ; + [], per_files + +let rec parse_required_packages state already_done = function + | [] -> state, already_done + | e :: l -> + if List.mem e already_done then + parse_required_packages state already_done l + else + let el, state = parse_package_if_needed state e in + parse_required_packages state (e :: already_done) (el @ l) + + +let parse_options = + let args_r = ref [] in + let restrict_to_files = ref false in + + let pot_file = ref "" in + let package_dependencies_graph_file = ref "" in + let generate_pot_chosen file = + Flags.generate_pot := true ; + Flags.expand_tabs := None ; + pot_file := file + in + let options = [ + "-v", Arg.Set Flags.verbose, " be verbose" ; + "-q", Arg.Set Flags.quiet, " be quiet" ; + "-t", Arg.Int (fun i -> Flags.expand_tabs := Some i), " set the tabulation width (default is 8)" ; + "--restrict-to-files", Arg.Set restrict_to_files, " only display warnings concerning the file(s) given on command line" ; + "--no-cache", Arg.Set Flags.no_cache, " do not use cache" ; + "--generate-pot", Arg.String generate_pot_chosen, "" ; + "--generate-package-dependencies-graph", Arg.String (fun f -> package_dependencies_graph_file := f), + "\n" ; + + "--check-unused-global-vars", Arg.Set Flags.check_unused_global_vars, " disable unused global functions & variables check" ^ + "\nBasic warnings:"; + "--no-check-white-space", Arg.Clear Flags.check_white_space, " disable white space check" ; + "--no-suggest-simpler", Arg.Clear Flags.check_suggest_simpler, " disable simpler code suggestion" ; + "--no-suggest-functional", Arg.Clear Flags.suggest_functional, " disable Functional Programming suggestions" ^ + "\nNormalisation warnings:"; + "--no-check-strange", Arg.Clear Flags.check_strange, " disable strange code check" ; + "--no-check-complex-expressions", Arg.Clear Flags.check_complex_expressions, " disable complex expressions check" ; + "--no-check-normalized-expressions", Arg.Clear Flags.normalized_expressions, " don't warn about non normalized expressions" ; + "--no-help-perl-checker", Arg.Clear Flags.check_help_perl_checker, " beware, perl_checker doesn't understand all perl expressions, so those warnings *are* important" ^ + "\nCommon warnings:"; + "--no-check-void", Arg.Clear Flags.check_void, " disable dropped value check" ; + "--no-check-names", Arg.Clear Flags.check_names, " disable variable & function usage check" ; + "--no-check-prototypes", Arg.Clear Flags.check_prototypes, " disable prototypes check" ; + "--no-check-import-export", Arg.Clear Flags.check_import_export, " disable inter modules check" ^ + "\nImportant warnings:"; + "--no-check-context", Arg.Clear Flags.check_context, " disable context check" ; + "--no-check-traps", Arg.Clear Flags.check_traps, " disable traps (errors) check" ^ + "\n"; + + ] in + let usage = "Usage: perl_checker [<options>] <files>\nOptions are:" in + Arg.parse options (lpush args_r) usage; + + let files = if !args_r = [] && Build.debugging then ["../t.pl"] else !args_r in + let files = List.map Info.file_to_absolute_file files in + + let required_packages, per_files = collect_withenv (parse_file true None) (default_per_files()) files in + let required_packages = uniq_ (fun (a,_) (b,_) -> a = b) required_packages in + + if !Flags.generate_pot then Parser_helper.generate_pot !pot_file else ( + + let per_files, required_packages = + fluid_let Flags.quiet (!restrict_to_files || !Flags.quiet) + (fun () -> + parse_required_packages per_files [] required_packages) in + let l_required_packages = List.map fst required_packages in + + write_packages_cache per_files !basedir ; + + (* removing non needed files from per_files (those files come from the cache) *) + List.iter (fun k -> + let per_file = Hashtbl.find per_files k in + if per_file.require_name <> None && not (List.mem (some per_file.require_name) l_required_packages) && not (List.mem per_file.file_name files) then + Hashtbl.remove per_files k + ) (hashtbl_keys per_files); + + let state = default_state per_files in + + Hashtbl.iter (fun _ per_file -> List.iter (add_package_to_state state) per_file.packages) per_files ; + + let state = + let global_vars_declared = Hashtbl.create 16 in + let package_name_to_file_name = hashtbl_collect (fun _ per_file -> List.map (fun pkg -> pkg.package_name, per_file.file_name) per_file.packages) per_files in + Hashtbl.iter (fun _ pkg -> + let file_name = List.assoc pkg.package_name package_name_to_file_name in + fluid_let Flags.quiet (!restrict_to_files || !Flags.quiet) + (fun () -> get_vars_declaration global_vars_declared file_name pkg) + ) state.per_packages ; + arrange_global_vars_declared global_vars_declared state + in + + let state = Global_checks.get_methods_available state in + + let l = hashtbl_values per_files in + let l = if !restrict_to_files then List.filter (fun file -> List.mem file.file_name files) l else l in + + let l = uniq (collect (fun file -> List.map (fun pkg -> pkg.package_name) file.packages) l) in + let l = List.map (Hashtbl.find state.per_packages) l in + + (* HACK: skip ignored_packages. Some package may have appeared in ignored_packages due to the xs bootstrap hack *) + let l = List.filter (fun pkg -> not (List.mem pkg.package_name !Config_file.ignored_packages)) l in + + List.iter (Global_checks.check_tree state) l; + + if !Flags.check_unused_global_vars then List.iter Global_checks.check_unused_vars l; + + if !package_dependencies_graph_file <> "" then generate_package_dependencies_graph state !package_dependencies_graph_file + + ) diff --git a/src/perl_checker.mli b/src/perl_checker.mli new file mode 100644 index 0000000..8b13789 --- /dev/null +++ b/src/perl_checker.mli @@ -0,0 +1 @@ + diff --git a/src/print.ml b/src/print.ml new file mode 100644 index 0000000..e69de29 --- /dev/null +++ b/src/print.ml diff --git a/src/print.mli b/src/print.mli new file mode 100644 index 0000000..8b13789 --- /dev/null +++ b/src/print.mli @@ -0,0 +1 @@ + diff --git a/src/test/.cvsignore b/src/test/.cvsignore new file mode 100644 index 0000000..9f6633c --- /dev/null +++ b/src/test/.cvsignore @@ -0,0 +1,2 @@ +.pl +.perl_checker.cache diff --git a/src/test/Makefile b/src/test/Makefile new file mode 100644 index 0000000..abe816c --- /dev/null +++ b/src/test/Makefile @@ -0,0 +1,3 @@ + +test: + for i in *.t; do ./test_it $$i || exit 1; done diff --git a/src/test/context.t b/src/test/context.t new file mode 100644 index 0000000..081abcc --- /dev/null +++ b/src/test/context.t @@ -0,0 +1,41 @@ +foreach (%h) {} context hash is not compatible with context list + foreach with a hash is usually an error + +map { 'xxx' } %h a hash is not a valid parameter to function map + +$xxx = ('yyy', 'zzz') context tuple(string, string) is not compatible with context scalar + +@l ||= 'xxx' "||=" is only useful with a scalar + +length @l never use "length @l", it returns the length of the string int(@l) + +%h . 'yyy' context hash is not compatible with context string + +'xxx' > 'yyy' context string is not compatible with context float + context string is not compatible with context float + + +1 cmp 2 you should use a number operator, not the string operator "cmp" (or replace the number with a string) + +$xxx == undef context undef is not compatible with context float + +my ($xxx) = 1 context int is not compatible with context tuple(scalar) + +($xxx, $yyy) = 1 context int is not compatible with context tuple(scalar, scalar) + +($xxx, $yyy) = (1, 2, 3) context tuple(int, int, int) is not compatible with context tuple(scalar, scalar) + +@l eq '3' context array is not compatible with context string + +qw(a b) > 2 context tuple(string, string) is not compatible with context float + +%h > 0 context hash is not compatible with context float + +%h eq 0 context hash is not compatible with context string + you should use a number operator, not the string operator "eq" (or replace the number with a string) + +@l == () + +$xxx = { xxx() }->{xxx}; + +$xxx = { xxx() }->{$xxx}; diff --git a/src/test/force_layout.t b/src/test/force_layout.t new file mode 100644 index 0000000..bb5494e --- /dev/null +++ b/src/test/force_layout.t @@ -0,0 +1,23 @@ +sub xxx you should not have a carriage-return (\n) here +{} + +xxx you should not have a carriage-return (\n) here + ($xxx); + +xxx( $xxx) you should not have a space here + +$xxx ++ you should not have a space here + +my($_xxx, $_yyy) you should have a space here + +xxx ($xxx) you should not have a space here + +'foo'.'bar' you should have a space here + +if ($xxx) { missing ";" + xxx() +} + +if ($xxx) { unneeded ";" + xxx(); +}; diff --git a/src/test/method.t b/src/test/method.t new file mode 100644 index 0000000..e59e858 --- /dev/null +++ b/src/test/method.t @@ -0,0 +1,11 @@ +bad->yyy unknown package bad + +pkg->bad unknown method bad starting in package pkg + +$xxx->bad unknown method bad + +$xxx->m1 not enough parameters + +$xxx->m0('zzz') too many parameters + +$xxx->m0_or_2('zzz') not enough or too many parameters diff --git a/src/test/prototype.t b/src/test/prototype.t new file mode 100644 index 0000000..6e56aae --- /dev/null +++ b/src/test/prototype.t @@ -0,0 +1,23 @@ + +sub xxx { 'yyy' } if the function doesn't take any parameters, please use the empty prototype. + example "sub foo() { ... }" + +sub xxx { an non-optional argument must not follow an optional argument + my ($o_xxx, $yyy) = @_; + ($o_xxx, $yyy); +} + +sub xxx { an array must be the last variable in a prototype + my (@xxx, $yyy) = @_; + @xxx, $yyy; +} + +bad() unknown function bad + +sub f0() {} too many parameters +f0('yyy') + +sub f2 { my ($x, $_y) = @_; $x } not enough parameters +f2('yyy') + +N("xxx %s yyy") not enough parameters diff --git a/src/test/read_t.pm b/src/test/read_t.pm new file mode 100644 index 0000000..a07c041 --- /dev/null +++ b/src/test/read_t.pm @@ -0,0 +1,28 @@ +package read_t; + +use lib '../..'; +use MDK::Common; + +sub read_t { + my ($file) = @_; + + my @tests; + my ($column_width, $line_number, @lines, @logs); + foreach (cat_($file), "\n") { + if (/^$/) { + push @tests, { line_number => $line_number, lines => [ @lines ], logs => [ @logs ] } if @lines; + @lines = @logs = (); + } else { + $column_width ||= length(first(/(.{20}\s+)/)); + my ($line, $log) = $column_width > 25 && /(.{$column_width})(.*)/ ? (chomp_($1) . "\n", $2) : ($_, ''); + $line =~ s/[ \t]*$//; + push @lines, $line; + push @logs, $log; + } + $line_number++; + } + @tests; +} + +1; + diff --git a/src/test/return_value.t b/src/test/return_value.t new file mode 100644 index 0000000..b4786f5 --- /dev/null +++ b/src/test/return_value.t @@ -0,0 +1,23 @@ +if ($xxx or $yyy) {} value should be dropped + context () is not compatible with context bool + +if ($xxx and $yyy) {} value should be dropped + context () is not compatible with context bool + +$xxx && yyy(); value is dropped + +`xxx`; value is dropped + +/(.*)/; value is dropped + +'xxx'; value is dropped + +'xxx' if $xxx; value is dropped + +map { xxx($_) } @l; if you don't use the return value, use "foreach" instead of "map" + +$xxx = chomp; () context not accepted here + context () is not compatible with context scalar + +$xxx = push @l, 1 () context not accepted here + context () is not compatible with context scalar diff --git a/src/test/suggest_better.t b/src/test/suggest_better.t new file mode 100644 index 0000000..d76abeb --- /dev/null +++ b/src/test/suggest_better.t @@ -0,0 +1,112 @@ +@{$xxx} @{$xxx} can be written @$xxx + +$h{"yyy"} {"yyy"} can be written {yyy} + +"$xxx" $xxx is better written without the double quotes + +$xxx->{yyy}->{zzz} the arrow "->" is unneeded + +"xxx\$xxx" you can replace "xxx\$xxx" with 'xxx$xxx', that way you don't need to escape <$> + +"xxx\$xxx" you can replace "xxx\$xxx" with 'xxx$xxx', that way you don't need to escape <$> + +"xxx\"$xxx" you can replace "xxx\"xxx" with qq(xxx"xxx), that way you don't need to escape <"> + +/xxx\'xxx/ you can replace \' with ' + +/xxx\;xxx/ you can replace \; with ; + +/\// change the delimit character / to get rid of this escape + +{ nop(); } spurious ";" before closing block + ++1 don't use unary + + +return ($xxx) unneeded parentheses + +if (($xxx eq $yyy) || $zzz) {} unneeded parentheses + +if (($xxx =~ /yyy/) || $zzz) {} unneeded parentheses + +nop() foreach ($xxx, $yyy); unneeded parentheses + +($xxx) ||= 'xxx' remove the parentheses + +$o->m0() remove these unneeded parentheses + +$o = xxx() if !$o; "$foo = ... if !$foo" can be written "$foo ||= ..." + +$o = xxx() unless $o; "$foo = ... unless $foo" can be written "$foo ||= ..." + +$o or $o = xxx(); "$foo or $foo = ..." can be written "$foo ||= ..." + +$_ =~ s/xxx/yyy/ "$_ =~ s/regexp/.../" can be written "s/regexp/.../" + +$xxx =~ /^yyy$/ "... =~ /^yyy$/" is better written "... eq 'yyy'" + +/xxx.*/ you can remove ".*" at the end of your regexp + +/xxx.*$/ you can remove ".*$" at the end of your regexp + +/[^\s]/ you can replace [^\s] with \S + +/[^\w]/ you can replace [^\w] with \W + +$xxx ? $xxx : $yyy you can replace "$foo ? $foo : $bar" with "$foo || $bar" + +my @l = (); no need to initialize variables, it's done by default + +$l[$#l] you can replace $#l with -1 + +$#l == 0 $#x == 0 is better written @x == 1 + +$#l == -1 $#x == -1 is better written @x == 0 + +$#l < 0 change your expression to use @xxx instead of $#xxx + +$l[@l] = 1 "$a[@a] = ..." is better written "push @a, ..." + +xxx(@_) replace xxx(@_) with &xxx + +member($xxx, keys %h) you can replace "member($xxx, keys %yyy)" with "exists $yyy{$xxx}" + +!($xxx =~ /.../) !($var =~ /.../) is better written $var !~ /.../ + +!($xxx == 1) !($foo == $bar) is better written $foo != $bar + +!($xxx eq 'foo') !($foo eq $bar) is better written $foo ne $bar + +grep { !member($_, qw(a b c)) } @l you can replace "grep { !member($_, ...) } @l" with "difference2([ @l ], [ ... ])" + +any { $_ eq 'foo' } @l you can replace "any { $_ eq ... } @l" with "member(..., @l)" + +foreach (@l) { use "push @l2, grep { ... } ..." instead of "foreach (...) { push @l2, $_ if ... }" + push @l2, $_ if yyy($_); or sometimes "@l2 = grep { ... } ..." +} + +foreach (@l) { use "push @l2, map { ... } ..." instead of "foreach (...) { push @l2, ... }" + push @l2, yyy($_); or sometimes "@l2 = map { ... } ..." +} + +foreach (@l) { use "push @l2, map { ... ? ... : () } ..." instead of "foreach (...) { push @l2, ... if ... }" + push @l2, yyy($_) if zzz($_); or sometimes "@l2 = map { ... ? ... : () } ..." +} or sometimes "@l2 = map { if_(..., ...) } ..." + +foreach (@l) { use "$xxx = find { ... } ..." + if (xxx($_)) { + $xxx = $_; + last; + } +} + +if (grep { xxx() } @l) {} in boolean context, use "any" instead of "grep" + +$xxx = grep { xxx() } @l; you may use "find" instead of "grep" + +$xxx ? $yyy : () you may use if_() here + beware that the short-circuit semantic of ?: is not kept + if you want to keep the short-circuit behaviour, replace () with @{[]} and there will be no warning anymore + +system(qq(foo "$xxx")) instead of quoting parameters you should give a list of arguments + +system("mkdir", $xxx) you can replace system("mkdir ...") with mkdir(...) diff --git a/src/test/syntax_restrictions.t b/src/test/syntax_restrictions.t new file mode 100644 index 0000000..de7bf77 --- /dev/null +++ b/src/test/syntax_restrictions.t @@ -0,0 +1,70 @@ +$xxx <<= 2 don't use "<<=", use the expanded version instead + +m@xxx@ don't use m@...@, replace @ with / ! , or | + +s:xxx:yyy: don't use s:...:, replace : with / ! , or | + +qw/a b c/ don't use qw/.../, use qw(...) instead + +qw{a b c} don't use qw{...}, use qw(...) instead + +q{xxx} don't use q{...}, use q(...) instead + +qq{xxx} don't use qq{...}, use qq(...) instead + +qx(xxx) don't use qx(...), use `...` instead + +-xxx don't use -xxx, use '-xxx' instead + +not $xxx don't use "not", use "!" instead + +$1 =~ /xxx/ do not use the result of a match (eg: $1) to match another pattern + +$xxx =~ "yyy" use a regexp, not a string + +xxx() =~ s/xxx/yyy/ you can only use s/// on a variable + +$1 =~ /xxx/ do not use the result of a match (eg: $1) to match another pattern + +grep /xxx/, @l always use "grep" with a block (eg: grep { ... } @list) + +for (@l) {} write "foreach" instead of "for" + +foreach ($xxx = 0; $xxx < 9; $xxx++) {} write "for" instead of "foreach" + +foreach $xxx (@l) {} don't use for without "my"ing the iteration variable + +foreach ($xxx) {} you are using the special trick to locally set $_ with a value, for this please use "for" instead of "foreach" + +unless ($xxx) {} else {} don't use "else" with "unless" (replace "unless" with "if") + +unless ($xxx) {} elsif ($yyy) {} don't use "elsif" with "unless" (replace "unless" with "if") + +zzz() unless $xxx || $yyy; don't use "unless" when the condition is complex, use "if" instead + +$$xxx{yyy} for complex dereferencing, use "->" + +wantarray please use wantarray() instead of wantarray + +eval please use "eval $_" instead of "eval" + +local *F; open F, "foo"; use a scalar instead of a bareword (eg: occurrences of F with $F) + +$xxx !~ s/xxx/yyy/ use =~ instead of !~ and negate the return value + +pkg::nop $xxx; use parentheses around argument (otherwise it might cause syntax errors if the package is "require"d and not "use"d + +new foo $xxx you must parenthesize parameters: "new Class(...)" instead of "new Class ..." + +*xxx = *yyy "*xxx = *yyy" is better written "*xxx = \&yyy" + +$_xxx = 1 variable $_xxx must not be used + (variable with name _XXX are reserved for unused variables) + +sub f2 { my ($x, $_y) = @_; $x } not enough parameters +f2(@l); # ok +f2(xxx()); # bad + +$xxx = <<"EOF"; Don't use <<"MARK", use <<MARK instead +foo +EOF diff --git a/src/test/test_it b/src/test/test_it new file mode 100755 index 0000000..a89c2c5 --- /dev/null +++ b/src/test/test_it @@ -0,0 +1,113 @@ +#!/usr/bin/perl + +use lib '../..'; +use MDK::Common; +use read_t; + +my ($file) = @ARGV; +my @tests = read_t::read_t($file); + +output('pkg3.pm', <<'EOF'); +package pkg3; +our @ISA = qw(Exporter); +our %EXPORT_TAGS = ( + missing_fs => [ qw(f f0) ], +); +our @EXPORT_OK = qw(f); +EOF + +my $header = <<'EOF'; +package pkg; +use lib "../.."; +sub new {} +sub m0 { my ($_o) = @_; 0 } +sub m1 { my ($_o, $a) = @_; $a } +sub m2 { my ($_o, $_a, $b) = @_; $b } +sub m0_or_2 { my ($_o, $_a, $b) = @_; $b } +package pkg2; +sub new {} +sub m0_or_2 { my ($_o) = @_; 0 } + +package my_pkg; +sub nop {} +sub xxx { @_ } +sub yyy { @_ } +sub zzz { @_ } +sub pkg::nop {} +sub N { $_[0] } +sub N_ { $_[0] } +my ($xxx, $yyy, $zzz, $o, @l, @l2, %h); +xxx($xxx, $yyy, $zzz, $o, @l, @l2, %h); +use MDK::Common; + +EOF + +my $oo_header = <<'EOF'; +EOF + +my $local = <<'EOF'; +{ + local $_; +EOF + +my $local_trailer = <<'EOF'; + + xxx($xxx, $yyy, $zzz, $o, @l, @l2, %h); +} +EOF + +my $new; +foreach my $test (@tests) { + my @l = @{$test->{lines}}; + + pop @l while $l[-1] =~ /^\s*$/; + if (@l == 1) { + if ($l[-1] !~ /(;|[\s{]\})\s*$/) { + $l[-1] =~ s/^(.*?)(\s*$)/xxx($1);$2/; + } else { + # no comma for: + # - prefix for/foreach/... + # - already a comma + # - a block { ... } + my $no_comma = $l[-1] =~ /(^\s*(for|foreach|if|unless|while|sub)\s)|(;\s+$)|(^{.*}\s*$)/; + my $opt_comma = $no_comma ? '' : ';'; + $l[-1] =~ s/(\s+$)/$opt_comma nop();$1/; + } + } + if (! any { /^(sub|use) / } @l) { + @l = ($local, @l, $local_trailer); + } + if (any { /->\w/ } @l) { + @l = ($oo_header, $header, @l); + } else { + @l = ($header, @l); + } + output('.pl', @l); + my @raw_log = `../perl_checker .pl`; + die "@raw_log in .pl ($file):\n" . join('', @{$test->{lines}}) if any { /^syntax error$/ } @raw_log; + + my $f; + my @log = grep { + if (/^File "(.*)", line /) { + $f = $1; + 0; + } else { + $f eq '.pl'; + } + } @raw_log; + + foreach my $i (0 .. max(int @{$test->{lines}}, int @log) - 1) { + my $s = $test->{lines}[$i]; + $s =~ s/\s+$//; + $new .= sprintf "%-40s %s", $s, $log[$i] || "\n"; + } + $new .= "\n"; +} +output("$file.new", $new); +if (system('diff', '-buB', $file, "$file.new") == 0) { + unlink "$file.new", '.pl', 'pkg3.pm'; + exit 0; +} else { + warn "*" x 80, "\nnot same\n"; + exit 1; +} diff --git a/src/test/various_errors.t b/src/test/various_errors.t new file mode 100644 index 0000000..48a8ece --- /dev/null +++ b/src/test/various_errors.t @@ -0,0 +1,61 @@ +local $xxx ||= $yyy applying ||= on a new initialized variable is wrong + +$1 =~ s/xxx/yyy/ do not modify the result of a match (eg: $1) + +$xxx[1, 2] you must give only one argument + +$xxx[] you must give one argument + +my $_x = 'xxx' if $xxx; replace "my $foo = ... if <cond>" with "my $foo = <cond> && ..." + +$xxx or my $_x = 'xxx'; replace "<cond> or my $foo = ..." with "my $foo = !<cond> && ..." + +'' || 'xxx' <constant> || ... is the same as ... + +if ($xxx = '') {} are you sure you did not mean "==" instead of "="? + +N("xxx$yyy") don't use interpolated translated string, use %s or %d instead + +if ($xxx && $yyy = xxx()) {} invalid lvalue + +1 + 2 >> 3 missing parentheses (needed for clarity) + +$xxx ? $yyy = 1 : $zzz = 2; missing parentheses (needed for clarity) + invalid lvalue + +N_("xxx") . 'yyy' N_("xxx") . "yyy" is dumb since the string "xxx" will never get translated + +join(@l) first argument of join() must be a scalar + +join(',', 'foo') join('...', $foo) is the same as $foo + +if_($xxx) not enough parameters + +push @l you must give some arguments to push + +push $xxx, 1 push is expecting an array + +pop $xxx pop is expecting an array and nothing else + +my (@l2, $xxx) = @l; @l2 takes all the arguments, $xxx is undef in any case + +$bad undeclared variable $bad + +{ my $a } unused variable $a + +my $xxx; yyy($xxx); my $xxx; redeclared variable $xxx + +{ my $xxx; $xxx = 1 } variable $xxx assigned, but not read + +$a undeclared variable $a + +use bad; can't find package bad + +use pkg3 ':bad'; package pkg3 doesn't export tag :bad +bad(); unknown function bad + +use pkg3 ':missing_fs'; name &f is not defined in package pkg3 +f(); name &f0 is not defined in package pkg3 + +use pkg3 'f'; name &f is not defined in package pkg3 +f(); diff --git a/src/tree.ml b/src/tree.ml new file mode 100644 index 0000000..16fd0e4 --- /dev/null +++ b/src/tree.ml @@ -0,0 +1,443 @@ +open Types +open Common +open Printf +open Config_file +open Parser_helper + +type special_export = Re_export_all | Fake_export_all + +type exports = { + export_ok : (context * string) list ; + export_auto : (context * string) list ; + export_tags : (string * (context * string) list) list ; + special_export : special_export option ; + } + +type uses = (string * ((context * string) list option * pos)) list + +type prototype = { + proto_nb_min : int ; + proto_nb_max : int option ; + } + +type variable_used = Access_none | Access_write_only | Access_various + +type per_package = { + package_name : string ; has_package_name : bool ; + vars_declared : (context * string, pos * variable_used ref * prototype option) Hashtbl.t; + imported : ((context * string) * (string * variable_used ref * prototype option)) list option ref; + exports : exports ; + uses : uses ; + required_packages : (string * pos) list ; + body : fromparser list; + isa : (string * pos) list option ; + } + +type per_file = { + file_name : string ; + require_name : string option ; + lines_starts : int list ; + build_time : float ; + packages : per_package list ; + from_basedir : bool ; + } + +let anonymous_package_count = ref 0 +let empty_exports = { export_ok = []; export_auto = []; export_tags = []; special_export = None } +let use_lib = ref (List.map Info.file_to_absolute_file (readlines (Unix.open_process_in "perl -le 'print foreach @INC'"))) + +let ignore_package pkg = + if !Flags.verbose then print_endline_flush ("ignoring package " ^ pkg); + lpush ignored_packages pkg + +let die_with_pos pos msg = failwith (Info.pos2sfull pos ^ msg) +let warn_with_pos warn_types pos msg = if Flags.are_warning_types_set warn_types then print_endline_flush (Info.pos2sfull pos ^ msg) + +let s2context s = + match s.[0] with + | '$' -> I_scalar, skip_n_char 1 s + | '%' -> I_hash , skip_n_char 1 s + | '@' -> I_array , skip_n_char 1 s + | '&' -> I_func , skip_n_char 1 s + | '*' -> I_star , skip_n_char 1 s + | _ -> I_raw, s + + +let get_current_package t = + match t with + | Package(Ident _ as ident) :: body -> + let rec bundled_packages packages current_package found_body = function + | [] -> List.rev ((Some current_package, List.rev found_body) :: packages) + | Package(Ident _ as ident) :: body -> + let packages = (Some current_package, List.rev found_body) :: packages in + bundled_packages packages (string_of_fromparser ident) [] body + | instr :: body -> + bundled_packages packages current_package (instr :: found_body) body + in + bundled_packages [] (string_of_fromparser ident) [] body + | _ -> + if str_ends_with !Info.current_file ".pm" then warn_with_pos [Warn_normalized_expressions] (!Info.current_file, 0, 0) (sprintf "module %s does not have \"package xxxx;\" on its first line" (Info.absolute_file_to_file !Info.current_file)) ; + [ None, t ] + +let from_qw_raw = function + | String([s, List []], pos) -> [ s, pos ] + | String(_, pos) -> + warn_with_pos [] pos "not recognised yet" ; + [] + | Raw_string(s, pos) -> + [ s, pos ] + | List [] -> [] + | List [ List l ] -> + some_or (l_option2option_l (List.map (function + | String([s, List []], pos) + | Raw_string(s, pos) -> Some(s, pos) + | Ident(_, _, pos) as ident -> Some(string_of_fromparser ident, pos) + | e -> warn_with_pos [] (get_pos_from_expr e) "not recognised yet"; None + ) l)) [] + | e -> warn_with_pos [] (get_pos_from_expr e) "not recognised yet"; [] + +let from_qw e = + List.map (fun (s, pos) -> + let context, s' = s2context s in + let context = + match context with + | I_raw -> if s'.[0] = ':' then I_raw else I_func + | I_func -> warn_with_pos [Warn_import_export] pos "weird, exported name with a function context especially given"; I_func + | _ -> context + in context, s' + ) (from_qw_raw e) + +let get_exported t = + List.fold_left (fun exports e -> + match e with + | List [ Call_op("=", [ Deref(I_array, Ident(None, "EXPORT", _)); Call _ ], pos) ] + | List [ Call_op("=", [ My_our("our", [(I_array, "EXPORT")], _); Call _ ], pos) ] -> + if exports.special_export = None then warn_with_pos [Warn_import_export] pos "unrecognised @EXPORT" ; + exports + + | List [ Call_op("=", [ Deref(I_array, Ident(None, "EXPORT", _)); v ], pos)] + | List [ Call_op("=", [ My_our("our", [(I_array, "EXPORT")], _); v ], pos)] -> + if exports.export_auto <> [] then warn_with_pos [Warn_import_export] pos "weird, @EXPORT set twice" ; + { exports with export_auto = from_qw v } + + | Perl_checker_comment("RE-EXPORT-ALL", _) -> { exports with special_export = Some Re_export_all } + | Perl_checker_comment("EXPORT-ALL", _) -> { exports with special_export = Some Fake_export_all } + + | List [ Call_op("=", [ Deref(I_array, Ident(None, "EXPORT_OK", _)); v ], pos)] + | List [ Call_op("=", [ My_our("our", [(I_array, "EXPORT_OK")], _); v ], pos)] -> + if exports.export_ok <> [] then warn_with_pos [Warn_import_export] pos "weird, @EXPORT_OK set twice" ; + (match v with + | Call(Deref(I_func, Ident(None, "map", _)), + [ Anonymous_sub(_, Block [List [Deref(I_array, Deref(I_scalar, Ident (None, "_", _)))]], _); + Call(Deref(I_func, Ident(None, "values", _)), [ Deref(I_hash, Ident(None, "EXPORT_TAGS", _))])]) -> + { exports with export_ok = collect snd exports.export_tags } + | _ -> { exports with export_ok = from_qw v }) + + | List [ Call_op("=", [ Deref(I_hash, Ident(None, "EXPORT_TAGS", _)); v ], pos)] + | List [ Call_op("=", [ My_our("our", [(I_hash, "EXPORT_TAGS")], _); v ], pos)] -> + (try + let export_tags = + match v with + | List [ List l ] -> + List.map (function + | Raw_string(tag, _), Ref(I_array, List [List [v]]) -> + let para = + match v with + | Deref(I_array, Ident(None, "EXPORT_OK", _)) -> exports.export_ok + | _ -> from_qw v + in + ":" ^ tag, para + | _ -> raise Not_found + ) (group_by_2 l) + | _ -> raise Not_found + in + if exports.export_tags <> [] then warn_with_pos [Warn_import_export] pos "weird, %EXPORT_TAGS set twice" ; + { exports with export_tags = export_tags } + with _ -> + warn_with_pos [Warn_import_export] pos "unrecognised %EXPORT_TAGS" ; + exports) + + (* $EXPORT_TAGS{all} = [ map { @$_ } values %EXPORT_TAGS ]; *) + | List [Call_op("=", [ + Deref_with(I_hash, I_scalar, Ident(None, "EXPORT_TAGS", _), Raw_string("all", _)); + Ref(I_array, + List[List[ + Call(Deref(I_func, Ident(None, "map", _)), + [Anonymous_sub(_, Block [List [Deref(I_array, Deref(I_scalar, Ident(None, "_", _)))]], _); + Call(Deref(I_func, Ident(None, "values", _)), [Deref(I_hash, Ident(None, "EXPORT_TAGS", _))])]) + ]]) + ], _)] -> + { exports with export_tags = (":all", collect snd exports.export_tags) :: exports.export_tags } + + | List (My_our _ :: _) -> + let _,_ = e,e in + exports + | _ -> exports + ) empty_exports t + +let uses_external_package = function + | "vars" | "Exporter" | "diagnostics" | "strict" | "warnings" | "lib" | "POSIX" | "Gtk" | "Storable" + | "Config" | "Socket" | "IO::Socket" | "DynaLoader" | "Data::Dumper" | "Time::localtime" | "Expect" -> true + | _ -> false + +let get_uses t = + List.fold_left (fun uses e -> + match e with + | Use(Ident(None, "lib", _), [libs]) -> + use_lib := List.map Info.file_to_absolute_file (List.map snd (from_qw libs)) @ !use_lib ; + uses + | Use(Ident(None, "base", _), classes) -> + let l = List.map (fun (pkg, pos) -> (pkg, (None, pos))) (collect from_qw_raw classes) in + l @ uses + | Use(Ident(_, _, pos) as pkg, l) -> + let package = string_of_fromparser pkg in + if uses_external_package package then + uses + else + let para = match l with + | [] -> None + | [ Num(_, _) ] -> None (* don't care about the version number *) + | _ -> Some(collect from_qw l) + in + (package, (para, pos)) :: uses + | _ -> uses + ) [] t + +let get_isa t = + List.fold_left (fun (isa, exporter) e -> + match e with + | Use(Ident(None, "base", pos), classes) -> + if isa <> None || exporter <> None then die_with_pos pos "\"use base\" and \"@ISA\" must be used once only"; + Some (collect from_qw_raw classes), None + | List [ Call_op("=", [ Deref(I_array, Ident(None, "ISA", pos)) ; classes ], _) ] + | List [ Call_op("=", [ My_our("our", [ I_array, "ISA" ], pos) ; classes ], _) ] -> + if isa <> None || exporter <> None then die_with_pos pos "\"use base\" and \"@ISA\" must be used once only"; + let special, l = List.partition (fun (s, _) -> s = "DynaLoader" || s = "Exporter") (from_qw_raw classes) in + let exporter = if List.mem_assoc "Exporter" special then Some pos else None in + let isa = if l = [] && special <> [] then None else Some l in + isa, exporter + | _ -> isa, exporter + ) (None, None) t + +let read_xs_extension_from_c global_vars_declared file_name package pos = + try + let cfile = Filename.chop_extension file_name ^ ".c" in + let prefix = "newXS(\"" ^ package.package_name ^ "::" in + ignore (fold_lines (fun in_bootstrap s -> + if in_bootstrap then + (try + let offset = strstr s prefix + String.length prefix in + let end_ = String.index_from s offset '"' in + let ident = String.sub s offset (end_ - offset) in + match split_name_or_fq_name ident with + | None, ident -> Hashtbl.replace package.vars_declared (I_func, ident) (pos, ref Access_none, None) + | Some fq, ident -> + let fq = package.package_name ^ "::" ^ fq in + Hashtbl.replace global_vars_declared (I_func, fq, ident) (pos, None) + with Not_found -> ()); + in_bootstrap || str_contains s "XS_VERSION_BOOTCHECK" + ) false (open_in cfile)); + if !Flags.verbose then print_endline_flush (sprintf "using xs symbols from %s" cfile) ; + true + with Invalid_argument _ | Sys_error _ -> false + +let findfile dirs f = List.find (fun dir -> Sys.file_exists (dir ^ "/" ^ f)) dirs + +let read_xs_extension_from_so global_vars_declared package pos = + try + let splitted = split_at2 ':'':' package.package_name in + let rel_file = String.concat "/" ("auto" :: splitted @ [ last splitted ]) ^ ".so" in + let so = (findfile !use_lib rel_file) ^ "/" ^ rel_file in + let channel = Unix.open_process_in (Printf.sprintf "nm --defined-only -D \"%s\"" so) in + if !Flags.verbose then print_endline_flush (sprintf "using shared-object symbols from %s" so) ; + fold_lines (fun () s -> + let s = skip_n_char 11 s in + if str_begins_with "XS_" s then + let s = skip_n_char 3 s in + let len = String.length s in + let rec find_package_name accu i = + try + let i' = String.index_from s i '_' in + let accu = String.sub s i (i'-i) :: accu in + if i' + 1 < len && s.[i'+1] = '_' then + find_package_name accu (i' + 2) + else + List.rev accu, skip_n_char (i'+1) s + with Not_found -> List.rev accu, skip_n_char i s + in + let fq, name = find_package_name [] 0 in + Hashtbl.replace global_vars_declared (I_func, String.concat "::" fq, name) (pos, None) + ) () channel; + if not Build.debugging then ignore (Unix.close_process_in channel) ; + true + with Not_found -> false + +let has_proto perl_proto body = + match perl_proto with + | Some "" -> Some([], raw_pos2pos bpos, [body]) + | _ -> + match body with + | Block [] -> + Some([ I_array, "_empty" ], raw_pos2pos bpos, []) + | Block (List [Call_op ("=", [My_our ("my", mys, mys_pos); Deref(I_array, Ident(None, "_", _))], _pos)] :: body) -> + Some(mys, mys_pos, body) + | _ -> None + +let get_proto perl_proto body = + map_option (fun (mys, pos, _) -> + let scalars, others = break_at (fun (context, _) -> context <> I_scalar) mys in + (match others with + | (I_array, _) :: _ :: _ -> warn_with_pos [Warn_prototypes] pos "an array must be the last variable in a prototype" + | (I_hash, _) :: _ :: _ -> warn_with_pos [Warn_prototypes] pos "an hash must be the last variable in a prototype" + | _ -> ()); + let is_optional (_, s) = + String.length s > 2 && (s.[0] = 'o' || s.[0] = 'b') && s.[1] = '_' || + String.length s > 3 && s.[0] = '_' && (s.[1] = 'o' || s.[1] = 'b') && s.[2] = '_' + in + let must_have, optional = break_at is_optional scalars in + if not (List.for_all is_optional optional) then + warn_with_pos [Warn_prototypes] pos "an non-optional argument must not follow an optional argument"; + let min = List.length must_have in + { proto_nb_min = min; proto_nb_max = if others = [] then Some(min + List.length optional) else None } + ) (has_proto perl_proto body) + +let get_vars_declaration global_vars_declared file_name package = + List.iter (function + | Sub_declaration(Ident(None, name, pos), perl_proto, body, _) -> + Hashtbl.replace package.vars_declared (I_func, name) (pos, ref Access_none, get_proto perl_proto body) + | Sub_declaration(Ident(Some fq, name, pos), perl_proto, body, _) -> + Hashtbl.replace global_vars_declared (I_func, fq, name) (pos, get_proto perl_proto body) + + | List [ Call_op("=", [My_our("our", ours, pos); _], _) ] + | List [ Call_op("=", [My_our("local", ([ I_scalar, "_" ] as ours), pos); _], _) ] + | List [ My_our("our", ours, pos) ] + | My_our("our", ours, pos) -> + List.iter (fun (context, name) -> Hashtbl.replace package.vars_declared (context, name) (pos, ref Access_none, None)) ours + + | Use(Ident(None, "vars", pos), [ours]) -> + List.iter (fun (context, name) -> Hashtbl.replace package.vars_declared (context, name) (pos, ref Access_none, None)) (from_qw ours) + | Use(Ident(None, "vars", pos), _) -> + die_with_pos pos "usage: use vars qw($var func)" + + | List [ Method_call(Raw_string(pkg, pos), Raw_string("bootstrap", _), _) ] -> + if pkg <> package.package_name then + warn_with_pos [Warn_import_export] pos "strange bootstrap (the package name is not the same as the current package)" + else + if not (read_xs_extension_from_c global_vars_declared file_name package pos) then + if not (read_xs_extension_from_so global_vars_declared package pos) then + ignore_package pkg + | _ -> () + ) package.body + +let rec fold_tree f env e = + match f env e with + | Some env -> env + | None -> + match e with + | Anonymous_sub(_, e', _) + | Ref(_, e') + | Deref(_, e') + -> fold_tree f env e' + + | Diamond(e') + -> fold_tree_option f env e' + + | String(l, _) + -> List.fold_left (fun env (_, e) -> fold_tree f env e) env l + + | Sub_declaration(e1, _, e2, _) + | Deref_with(_, _, e1, e2) + -> + let env = fold_tree f env e1 in + let env = fold_tree f env e2 in + env + + | Use(_, l) + | List l + | Block l + | Call_op(_, l, _) + -> List.fold_left (fold_tree f) env l + + | Call(e', l) + -> + let env = fold_tree f env e' in + List.fold_left (fold_tree f) env l + + | Method_call(e1, e2, l) + -> + let env = fold_tree f env e1 in + let env = fold_tree f env e2 in + List.fold_left (fold_tree f) env l + + | _ -> env + +and fold_tree_option f env = function + | None -> env + | Some e -> fold_tree f env e + + +let get_global_info_from_package from_basedir require_name build_time t = + let current_packages = get_current_package t in + let packages = List.map (fun (current_package, t) -> + let exports = get_exported t in + let exporting_something() = exports.export_ok <> [] || exports.export_auto <> [] || exports.export_tags <> [] || exports.special_export = Some Re_export_all in + + let package_name = + match current_package with + | None -> + if exporting_something() then + die_with_pos (!Info.current_file, 0, 0) "file with no package name wants to export!" + else + (incr anonymous_package_count ; sprintf "anonymous%d" !anonymous_package_count) + | Some name -> name + in + let isa, exporter = get_isa t in + (match exporter with + | None -> + if exporting_something() then warn_with_pos [Warn_import_export] (!Info.current_file, 0, 0) "you must have \"@ISA = qw(Exporter)\" to EXPORT something" + | Some pos -> + if not (exporting_something()) then warn_with_pos [Warn_import_export] pos "Inheritating from Exporter without EXPORTing anything"); + + let uses = List.rev (get_uses t) in + let required_packages = List.map (fun (s, (_, pos)) -> s, pos) uses in + let required_packages = List.fold_left (fold_tree (fun l -> + function + | Perl_checker_comment(s, pos) when str_begins_with "require " s -> + Some((skip_n_char 8 s, pos) :: l) + | Call(Deref(I_func, Ident (None, "require", pos)), [Ident _ as pkg]) -> + let package = string_of_fromparser pkg in + if uses_external_package package then None else Some((package, pos) :: l) + | Call(Deref(I_func, Ident (None, "require", pos)), [Raw_string(pkg, _)]) + when not (String.contains pkg '/') && Filename.check_suffix pkg ".pm" -> + let package = Filename.chop_suffix pkg ".pm" in + if uses_external_package package then None else Some((package, pos) :: l) + | _ -> None) + ) required_packages t in + { + package_name = package_name; + has_package_name = current_package <> None ; + exports = exports ; + imported = ref None ; + vars_declared = Hashtbl.create 16 ; + uses = uses ; + required_packages = required_packages ; + body = t ; + isa = isa ; + } + ) current_packages in + + let require_name = match require_name with + | Some require_name -> Some require_name + | None -> match packages with + | [ pkg ] when pkg.has_package_name -> Some pkg.package_name + | _ -> None + in + { + file_name = !Info.current_file ; + require_name = require_name ; + lines_starts = !Info.current_file_lines_starts ; + build_time = build_time ; + packages = packages ; + from_basedir = from_basedir ; + } + diff --git a/src/tree.mli b/src/tree.mli new file mode 100644 index 0000000..3cdf219 --- /dev/null +++ b/src/tree.mli @@ -0,0 +1,57 @@ +open Types + +type special_export = Re_export_all | Fake_export_all + +type exports = { + export_ok : (context * string) list; + export_auto : (context * string) list; + export_tags : (string * (context * string) list) list; + special_export : special_export option; +} + + +type uses = (string * ((context * string) list option * pos)) list + +type prototype = { + proto_nb_min : int ; + proto_nb_max : int option ; + } + +type variable_used = Access_none | Access_write_only | Access_various + +type per_package = { + package_name : string ; has_package_name : bool ; + vars_declared : (context * string, pos * variable_used ref * prototype option) Hashtbl.t; + imported : ((context * string) * (string * variable_used ref * prototype option)) list option ref; + exports : exports ; + uses : uses ; + required_packages : (string * pos) list ; + body : fromparser list; + isa : (string * pos) list option ; + } + +type per_file = { + file_name : string ; + require_name : string option ; + lines_starts : int list ; + build_time : float ; + packages : per_package list ; + from_basedir : bool ; + } + +val empty_exports : exports +val ignore_package : string -> unit +val use_lib : string list ref +val uses_external_package : string -> bool +val findfile : string list -> string -> string + +val get_global_info_from_package : bool -> string option -> float -> fromparser list -> per_file + +val has_proto : string option -> fromparser -> ((context * string) list * pos * fromparser list) option +val get_vars_declaration : (context * string * string, pos * prototype option) Hashtbl.t -> string -> per_package -> unit + +val die_with_pos : string * int * int -> string -> 'a +val warn_with_pos : Types.warning list -> string * int * int -> string -> unit + +val fold_tree : ('a -> fromparser -> 'a option) -> 'a -> fromparser -> 'a +val from_qw : fromparser -> (context * string) list diff --git a/src/types.mli b/src/types.mli new file mode 100644 index 0000000..5f23d3a --- /dev/null +++ b/src/types.mli @@ -0,0 +1,125 @@ +exception TooMuchRParen + +type raw_pos = int * int + +type pos = string * int * int + +type spaces = + | Space_0 + | Space_1 + | Space_n + | Space_cr + | Space_none + +type context = I_scalar | I_hash | I_array | I_func | I_raw | I_star + +type maybe_context = + | M_none + + (* scalars *) + | M_bool | M_int | M_float + | M_revision + | M_string + | M_ref of maybe_context + | M_undef + | M_unknown_scalar + + | M_tuple of maybe_context list + | M_list + | M_array + | M_hash + | M_sub + + | M_special + | M_unknown + | M_mixed of maybe_context list + +type sub_declaration_kind = Real_sub_declaration | Glob_assign + +type fromparser = + | Undef + | Ident of string option * string * pos + | Num of string * pos + | Raw_string of string * pos + | String of (string * fromparser) list * pos + + | Ref of context * fromparser + | Deref of context * fromparser + | Deref_with of context * context * fromparser * fromparser (* from_context, to_context, ref, para *) + + | Diamond of fromparser option + + | List of fromparser list + | Block of fromparser list + + | Call_op of string * fromparser list * pos + | Call of fromparser * fromparser list + | Method_call of fromparser * fromparser * fromparser list + + | Anonymous_sub of string option * fromparser * pos (* prototype, expr, pos *) + | My_our of string * (context * string) list * pos + | Use of fromparser * fromparser list + | Sub_declaration of fromparser * string option * fromparser * sub_declaration_kind (* name, prototype, body, kind *) + | Package of fromparser + | Label of string + | Perl_checker_comment of string * pos + + | Too_complex + | Semi_colon + +type priority = +| P_tok +| P_tight +| P_mul +| P_add +| P_uniop +| P_cmp +| P_eq +| P_expr +| P_bit +| P_tight_and +| P_tight_or +| P_ternary +| P_assign +| P_comma +| P_call_no_paren +| P_and +| P_or +| P_loose + +| P_paren_wanted of priority +| P_paren of priority + +| P_none + +type 'a any_spaces_pos = { + any : 'a ; + spaces : spaces ; + pos : int * int ; + mcontext : maybe_context ; + } + +type 'a prio_anyexpr = { + priority : priority ; + expr : 'a + } + +type prio_expr_spaces_pos = fromparser prio_anyexpr any_spaces_pos +type prio_lexpr_spaces_pos = fromparser list prio_anyexpr any_spaces_pos + +type warning = + | Warn_white_space + | Warn_suggest_simpler + | Warn_unused_global_vars + | Warn_void + | Warn_context + | Warn_strange + | Warn_traps + | Warn_complex_expressions + | Warn_normalized_expressions + | Warn_suggest_functional + | Warn_prototypes + | Warn_import_export + | Warn_names + | Warn_MDK_Common + | Warn_help_perl_checker |