From de47eb59bb829423b1d0f47ba13099073999b3cb Mon Sep 17 00:00:00 2001 From: Nicolas Planel Date: Wed, 29 Oct 2003 16:07:11 +0000 Subject: Corporate Server 2.1.1 release --- mdk-stage1/dietlibc/libregex/rx.c | 128 ++++++++++++++++++++++++++++---------- 1 file changed, 94 insertions(+), 34 deletions(-) (limited to 'mdk-stage1/dietlibc/libregex') diff --git a/mdk-stage1/dietlibc/libregex/rx.c b/mdk-stage1/dietlibc/libregex/rx.c index b28358c2e..9bab3e22c 100644 --- a/mdk-stage1/dietlibc/libregex/rx.c +++ b/mdk-stage1/dietlibc/libregex/rx.c @@ -1,8 +1,11 @@ +#define NDEBUG #include #include +#include #include #include #include +#include /* this is ugly. * the idea is to build a parse tree, then do some poor man's OOP with a @@ -40,7 +43,7 @@ struct regex { struct atom { matcher m; void* next; - enum { EMPTY, REGEX, BRACKET, ANY, LINESTART, LINEEND, WORDSTART, WORDEND, CHAR, } type; + enum { ILLEGAL, EMPTY, REGEX, BRACKET, ANY, LINESTART, LINEEND, WORDSTART, WORDEND, CHAR, } type; int bnum; union { struct regex r; @@ -64,7 +67,7 @@ struct branch { }; static void clearcc(unsigned int* x) { - memset(x,0,sizeof(*x)); + memset(x,0,sizeof(struct bracketed)); } static void setcc(unsigned int* x,unsigned int bit) { @@ -84,9 +87,9 @@ static const char* parsebracketed(struct bracketed*__restrict__ b,const char*__r if (*t=='^') { negflag=1; ++t; } do { if (*t==0) return s; - setcc(b->cc,rx->cflags®_ICASE?*t:tolower(*t)); + setcc(b->cc,rx->cflags®_ICASE?tolower(*t):*t); if (t[1]=='-' && t[2]!=']') { - for (i=*t+1; i<=t[2]; ++i) setcc(b->cc,rx->cflags®_ICASE?i:tolower(i)); + for (i=*t+1; i<=t[2]; ++i) setcc(b->cc,rx->cflags®_ICASE?tolower(i):i); t+=2; } ++t; @@ -97,55 +100,79 @@ static const char* parsebracketed(struct bracketed*__restrict__ b,const char*__r static const char* parseregex(struct regex* r,const char* s,regex_t* rx); -static int matchatom(void*__restrict__ x,const char*__restrict__ s,int ofs,struct __regex_t*__restrict__ preg,int plus,int eflags) { +static int matchatom(void*__restrict__ x,const unsigned char*__restrict__ s,int ofs,struct __regex_t*__restrict__ preg,int plus,int eflags) { register struct atom* a=(struct atom*)x; int matchlen=0; + assert(a->type!=ILLEGAL); switch (a->type) { case EMPTY: -// printf("matching EMPTY against \"%s\"\n",s); - preg->l[a->bnum].rm_so=preg->l[a->bnum].rm_eo=ofs; +#ifdef DEBUG + printf("matching atom EMPTY against \"%s\"\n",s); + printf("a->bnum is %d\n",a->bnum); +#endif + if (a->bnum>=0) preg->l[a->bnum].rm_so=preg->l[a->bnum].rm_eo=ofs; goto match; case REGEX: -// printf("matching REGEX against \"%s\"\n",s); - if ((matchlen=a->u.r.m(a,s,ofs,preg,0,eflags))>=0) { +#ifdef DEBUG + printf("matching atom REGEX against \"%s\"\n",s); + printf("a->bnum is %d\n",a->bnum); +#endif + if ((matchlen=a->u.r.m(&a->u.r,s,ofs,preg,0,eflags))>=0) { + assert(a->bnum>=0); preg->l[a->bnum].rm_so=ofs; preg->l[a->bnum].rm_eo=ofs+matchlen; goto match; } break; case BRACKET: -// printf("matching BRACKET against \"%s\"\n",s); +#ifdef DEBUG + printf("matching atom BRACKET against \"%s\"\n",s); +#endif matchlen=1; if (*s=='\n' && (preg->cflags®_NEWLINE)) break; if (*s && issetcc(a->u.b.cc,(preg->cflags®_ICASE?tolower(*s):*s))) goto match; break; case ANY: -// printf("matching ANY against \"%s\"\n",s); +#ifdef DEBUG + printf("matching atom ANY against \"%s\"\n",s); +#endif if (*s=='\n' && (preg->cflags®_NEWLINE)) break; matchlen=1; if (*s) goto match; break; case LINESTART: -// printf("matching LINESTART against \"%s\"\n",s); +#ifdef DEBUG + printf("matching atom LINESTART against \"%s\"\n",s); +#endif if (ofs==0 && (eflags®_NOTBOL)==0) { goto match; } break; case LINEEND: -// printf("matching LINEEND against \"%s\"\n",s); - if ((*s && *s!='\n') || (eflags®_NOTEOL)==0) break; +#ifdef DEBUG + printf("matching atom LINEEND against \"%s\"\n",s); +#endif + if ((*s && *s!='\n') || (eflags®_NOTEOL)) break; goto match; case WORDSTART: - if ((ofs==0 || isspace(s[-1])) && !isspace(*s)) +#ifdef DEBUG + printf("matching atom WORDSTART against \"%s\"\n",s); +#endif + if ((ofs==0 || !isalnum(s[-1])) && isalnum(*s)) goto match; break; case WORDEND: - if (ofs>0 && !isspace(s[-1]) && isspace(*s)) +#ifdef DEBUG + printf("matching atom WORDEND against \"%s\"\n",s); +#endif + if (ofs>0 && isalnum(s[-1]) && !isalnum(*s)) goto match; break; case CHAR: -// printf("matching CHAR %c against \"%s\"\n",a->u.c,s); +#ifdef DEBUG + printf("matching atom CHAR %c against \"%s\"\n",a->u.c,s); +#endif matchlen=1; if (((preg->cflags®_ICASE)?tolower(*s):*s)==a->u.c) goto match; break; @@ -160,7 +187,7 @@ match: static const char* parseatom(struct atom*__restrict__ a,const char*__restrict__ s,regex_t*__restrict__ rx) { const char *tmp; - a->m=matchatom; + a->m=(matcher)matchatom; a->bnum=-1; switch (*s) { case '(': @@ -203,7 +230,7 @@ static const char* parseatom(struct atom*__restrict__ a,const char*__restrict__ } default: a->type=CHAR; - a->u.c=rx->cflags®_ICASE?*s:tolower(*s); + a->u.c=rx->cflags®_ICASE?tolower(*s):*s; break; } return s+1; @@ -213,9 +240,15 @@ static const char* parseatom(struct atom*__restrict__ a,const char*__restrict__ static int matchpiece(void*__restrict__ x,const char*__restrict__ s,int ofs,struct __regex_t*__restrict__ preg,int plus,int eflags) { register struct piece* a=(struct piece*)x; int matchlen=0; - int tmp,num=0; - unsigned int *offsets=alloca(sizeof(int)*a->max); + int tmp=0,num=0; + unsigned int *offsets; + assert(a->max>0 && a->max<1000); +#ifdef DEBUG + printf("alloca(%d)\n",sizeof(int)*a->max); +#endif + offsets=alloca(sizeof(int)*a->max); offsets[0]=0; +// printf("allocating %d offsets...\n",a->max); // printf("matchpiece \"%s\"...\n",s); /* first, try to match the atom as often as possible, up to a->max times */ if (a->max == 1 && a->min == 1) @@ -227,6 +260,7 @@ static int matchpiece(void*__restrict__ x,const char*__restrict__ s,int ofs,stru a->a.next=save; ++num; matchlen+=tmp; +// printf("setting offsets[%d] to %d\n",num,tmp); offsets[num]=tmp; } else { a->a.next=save; @@ -235,12 +269,13 @@ static int matchpiece(void*__restrict__ x,const char*__restrict__ s,int ofs,stru } if (nummin) return -1; /* already at minimum matches; signal mismatch */ /* then, while the rest does not match, back off */ - for (;;) { + for (;num>=0;) { if (a->next) tmp=((struct atom*)(a->next))->m(a->next,s+matchlen,ofs+matchlen,preg,plus+matchlen,eflags); else tmp=plus+matchlen; if (tmp>=0) break; /* it did match; don't back off any further */ +// printf("using offsets[%d] (%d)\n",num,offsets[num]); matchlen-=offsets[num]; --num; } @@ -279,6 +314,11 @@ static const char* parsepiece(struct piece*__restrict__ p,const char*__restrict_ static int matchbranch(void*__restrict__ x,const char*__restrict__ s,int ofs,struct __regex_t*__restrict__ preg,int plus,int eflags) { register struct branch* a=(struct branch*)x; int tmp; +#ifdef DEBUG + printf("%08p matching branch against \"%s\"\n",a,s); + printf("%p %p\n",&a->p->m,a->p->m); +#endif + assert(a->p->m==matchpiece); tmp=a->p->m(a->p,s,ofs,preg,plus,eflags); if (tmp>=0) { if (a->next) @@ -291,7 +331,7 @@ static int matchbranch(void*__restrict__ x,const char*__restrict__ s,int ofs,str static const char* parsebranch(struct branch*__restrict__ b,const char*__restrict__ s,regex_t*__restrict__ rx,int*__restrict__ pieces) { struct piece p; - const char *tmp; + const char *tmp; /* the gcc warning here is bogus */ b->m=matchbranch; b->num=0; b->p=0; for (;;) { @@ -305,9 +345,11 @@ static const char* parsebranch(struct branch*__restrict__ b,const char*__restric tmp=parsepiece(&p,s,rx); if (tmp==s) return s; } +// printf("b->p from %p to ",b->p); if (!(b->p=realloc(b->p,++b->num*sizeof(p)))) return s; +// printf("%p (size %d)\n",b->p,b->num*sizeof(p)); b->p[b->num-1]=p; - if (*s=='|') { ++tmp; break; } + if (*tmp=='|') { break; } s=tmp; } *pieces+=b->num; @@ -318,7 +360,11 @@ static const char* parsebranch(struct branch*__restrict__ b,const char*__restric static int matchregex(void*__restrict__ x,const char*__restrict__ s,int ofs,struct __regex_t*__restrict__ preg,int plus,int eflags) { register struct regex* a=(struct regex*)x; int i,tmp; +#ifdef DEBUG + printf("%08p matching regex against \"%s\"\n",a,s); +#endif for (i=0; inum; ++i) { + assert(a->b[i].m==matchbranch); tmp=a->b[i].m(&a->b[i],s,ofs,preg,plus,eflags); if (tmp>=0) { if (a->next) @@ -335,12 +381,15 @@ static const char* parseregex(struct regex*__restrict__ r,const char*__restrict_ const char *tmp; r->m=matchregex; r->num=0; r->b=0; r->pieces=0; + p->brackets=0; for (;;) { tmp=parsebranch(&b,s,p,&r->pieces); if (tmp==s) return s; +// printf("r->b from %p to ",r->b); if (!(r->b=realloc(r->b,++r->num*sizeof(b)))) return s; +// printf("%p (size %d)\n",r->b,r->num*sizeof(b)); r->b[r->num-1]=b; - s=tmp; + s=tmp; if (*s=='|') ++s; } return tmp; } @@ -355,7 +404,7 @@ static void regex_putnext(struct regex* r,void* next); static void atom_putnext(struct atom*__restrict__ a,void*__restrict__ next) { a->next=next; if (a->type==REGEX) - regex_putnext(&a->u.r,next); + regex_putnext(&a->u.r,0); } static void piece_putnext(struct piece*__restrict__ p,void*__restrict__ next) { @@ -365,8 +414,12 @@ static void piece_putnext(struct piece*__restrict__ p,void*__restrict__ next) { static void branch_putnext(struct branch*__restrict__ b,void*__restrict__ next) { int i; - for (i=0; inum-1; ++i) - piece_putnext(&b->p[i],&b->p[i+1]); + for (i=0; inum-1; ++i) { + if (b->p[i+1].min==1 && b->p[i+1].max==1) + piece_putnext(&b->p[i],&b->p[i+1].a); + else + piece_putnext(&b->p[i],&b->p[i+1]); + } piece_putnext(&b->p[i],0); b->next=next; } @@ -381,21 +434,29 @@ static void regex_putnext(struct regex*__restrict__ r,void*__restrict__ next) { int regcomp(regex_t*__restrict__ preg, const char*__restrict__ regex, int cflags) { - const char* t=parseregex(&preg->r,regex,preg); + const char* t; + preg->cflags=cflags; + t=parseregex(&preg->r,regex,preg); if (t==regex) return -1; regex_putnext(&preg->r,0); - preg->cflags=cflags; return 0; } int regexec(const regex_t*__restrict__ preg, const char*__restrict__ string, size_t nmatch, regmatch_t pmatch[], int eflags) { int matched; const char *orig=string; - ((regex_t*)preg)->l=alloca(sizeof(regmatch_t)*(preg->brackets+1)); + assert(preg->brackets+1>0 && preg->brackets<1000); +#ifdef DEBUG + printf("alloca(%d)\n",sizeof(regmatch_t)*(preg->brackets+3)); +#endif + ((regex_t*)preg)->l=alloca(sizeof(regmatch_t)*(preg->brackets+3)); while (*string) { matched=preg->r.m((void*)&preg->r,string,string-orig,(regex_t*)preg,0,eflags); +// printf("ebp on stack = %x\n",stack[1]); if (matched>=0) { - if ((preg->cflags®_NOSUB)==0) memmove(pmatch,preg->l,nmatch*sizeof(regmatch_t)); + preg->l[0].rm_so=string-orig; + preg->l[0].rm_eo=string-orig+matched; + if ((preg->cflags®_NOSUB)==0) memcpy(pmatch,preg->l,nmatch*sizeof(regmatch_t)); return 0; } ++string; eflags|=REG_NOTBOL; @@ -407,10 +468,9 @@ int regexec(const regex_t*__restrict__ preg, const char*__restrict__ string, siz void regfree(regex_t* preg) { int i; - for (i=0; ir.num; ++i) { + for (i=0; ir.num; ++i) free(preg->r.b[i].p); free(preg->r.b); - } } size_t regerror(int errcode, const regex_t*__restrict__ preg, char*__restrict__ errbuf, size_t errbuf_size) { -- cgit v1.2.1