summaryrefslogtreecommitdiffstats
path: root/mdk-stage1/dietlibc/libregex/rx.c
diff options
context:
space:
mode:
Diffstat (limited to 'mdk-stage1/dietlibc/libregex/rx.c')
-rw-r--r--mdk-stage1/dietlibc/libregex/rx.c128
1 files changed, 94 insertions, 34 deletions
diff --git a/mdk-stage1/dietlibc/libregex/rx.c b/mdk-stage1/dietlibc/libregex/rx.c
index b28358c2e..9bab3e22c 100644
--- a/mdk-stage1/dietlibc/libregex/rx.c
+++ b/mdk-stage1/dietlibc/libregex/rx.c
@@ -1,8 +1,11 @@
+#define NDEBUG
#include <regex.h>
#include <stdlib.h>
+#include <string.h>
#include <ctype.h>
#include <sys/types.h>
#include <string.h>
+#include <assert.h>
/* this is ugly.
* the idea is to build a parse tree, then do some poor man's OOP with a
@@ -40,7 +43,7 @@ struct regex {
struct atom {
matcher m;
void* next;
- enum { EMPTY, REGEX, BRACKET, ANY, LINESTART, LINEEND, WORDSTART, WORDEND, CHAR, } type;
+ enum { ILLEGAL, EMPTY, REGEX, BRACKET, ANY, LINESTART, LINEEND, WORDSTART, WORDEND, CHAR, } type;
int bnum;
union {
struct regex r;
@@ -64,7 +67,7 @@ struct branch {
};
static void clearcc(unsigned int* x) {
- memset(x,0,sizeof(*x));
+ memset(x,0,sizeof(struct bracketed));
}
static void setcc(unsigned int* x,unsigned int bit) {
@@ -84,9 +87,9 @@ static const char* parsebracketed(struct bracketed*__restrict__ b,const char*__r
if (*t=='^') { negflag=1; ++t; }
do {
if (*t==0) return s;
- setcc(b->cc,rx->cflags&REG_ICASE?*t:tolower(*t));
+ setcc(b->cc,rx->cflags&REG_ICASE?tolower(*t):*t);
if (t[1]=='-' && t[2]!=']') {
- for (i=*t+1; i<=t[2]; ++i) setcc(b->cc,rx->cflags&REG_ICASE?i:tolower(i));
+ for (i=*t+1; i<=t[2]; ++i) setcc(b->cc,rx->cflags&REG_ICASE?tolower(i):i);
t+=2;
}
++t;
@@ -97,55 +100,79 @@ static const char* parsebracketed(struct bracketed*__restrict__ b,const char*__r
static const char* parseregex(struct regex* r,const char* s,regex_t* rx);
-static int matchatom(void*__restrict__ x,const char*__restrict__ s,int ofs,struct __regex_t*__restrict__ preg,int plus,int eflags) {
+static int matchatom(void*__restrict__ x,const unsigned char*__restrict__ s,int ofs,struct __regex_t*__restrict__ preg,int plus,int eflags) {
register struct atom* a=(struct atom*)x;
int matchlen=0;
+ assert(a->type!=ILLEGAL);
switch (a->type) {
case EMPTY:
-// printf("matching EMPTY against \"%s\"\n",s);
- preg->l[a->bnum].rm_so=preg->l[a->bnum].rm_eo=ofs;
+#ifdef DEBUG
+ printf("matching atom EMPTY against \"%s\"\n",s);
+ printf("a->bnum is %d\n",a->bnum);
+#endif
+ if (a->bnum>=0) preg->l[a->bnum].rm_so=preg->l[a->bnum].rm_eo=ofs;
goto match;
case REGEX:
-// printf("matching REGEX against \"%s\"\n",s);
- if ((matchlen=a->u.r.m(a,s,ofs,preg,0,eflags))>=0) {
+#ifdef DEBUG
+ printf("matching atom REGEX against \"%s\"\n",s);
+ printf("a->bnum is %d\n",a->bnum);
+#endif
+ if ((matchlen=a->u.r.m(&a->u.r,s,ofs,preg,0,eflags))>=0) {
+ assert(a->bnum>=0);
preg->l[a->bnum].rm_so=ofs;
preg->l[a->bnum].rm_eo=ofs+matchlen;
goto match;
}
break;
case BRACKET:
-// printf("matching BRACKET against \"%s\"\n",s);
+#ifdef DEBUG
+ printf("matching atom BRACKET against \"%s\"\n",s);
+#endif
matchlen=1;
if (*s=='\n' && (preg->cflags&REG_NEWLINE)) break;
if (*s && issetcc(a->u.b.cc,(preg->cflags&REG_ICASE?tolower(*s):*s)))
goto match;
break;
case ANY:
-// printf("matching ANY against \"%s\"\n",s);
+#ifdef DEBUG
+ printf("matching atom ANY against \"%s\"\n",s);
+#endif
if (*s=='\n' && (preg->cflags&REG_NEWLINE)) break;
matchlen=1;
if (*s) goto match;
break;
case LINESTART:
-// printf("matching LINESTART against \"%s\"\n",s);
+#ifdef DEBUG
+ printf("matching atom LINESTART against \"%s\"\n",s);
+#endif
if (ofs==0 && (eflags&REG_NOTBOL)==0) {
goto match;
}
break;
case LINEEND:
-// printf("matching LINEEND against \"%s\"\n",s);
- if ((*s && *s!='\n') || (eflags&REG_NOTEOL)==0) break;
+#ifdef DEBUG
+ printf("matching atom LINEEND against \"%s\"\n",s);
+#endif
+ if ((*s && *s!='\n') || (eflags&REG_NOTEOL)) break;
goto match;
case WORDSTART:
- if ((ofs==0 || isspace(s[-1])) && !isspace(*s))
+#ifdef DEBUG
+ printf("matching atom WORDSTART against \"%s\"\n",s);
+#endif
+ if ((ofs==0 || !isalnum(s[-1])) && isalnum(*s))
goto match;
break;
case WORDEND:
- if (ofs>0 && !isspace(s[-1]) && isspace(*s))
+#ifdef DEBUG
+ printf("matching atom WORDEND against \"%s\"\n",s);
+#endif
+ if (ofs>0 && isalnum(s[-1]) && !isalnum(*s))
goto match;
break;
case CHAR:
-// printf("matching CHAR %c against \"%s\"\n",a->u.c,s);
+#ifdef DEBUG
+ printf("matching atom CHAR %c against \"%s\"\n",a->u.c,s);
+#endif
matchlen=1;
if (((preg->cflags&REG_ICASE)?tolower(*s):*s)==a->u.c) goto match;
break;
@@ -160,7 +187,7 @@ match:
static const char* parseatom(struct atom*__restrict__ a,const char*__restrict__ s,regex_t*__restrict__ rx) {
const char *tmp;
- a->m=matchatom;
+ a->m=(matcher)matchatom;
a->bnum=-1;
switch (*s) {
case '(':
@@ -203,7 +230,7 @@ static const char* parseatom(struct atom*__restrict__ a,const char*__restrict__
}
default:
a->type=CHAR;
- a->u.c=rx->cflags&REG_ICASE?*s:tolower(*s);
+ a->u.c=rx->cflags&REG_ICASE?tolower(*s):*s;
break;
}
return s+1;
@@ -213,9 +240,15 @@ static const char* parseatom(struct atom*__restrict__ a,const char*__restrict__
static int matchpiece(void*__restrict__ x,const char*__restrict__ s,int ofs,struct __regex_t*__restrict__ preg,int plus,int eflags) {
register struct piece* a=(struct piece*)x;
int matchlen=0;
- int tmp,num=0;
- unsigned int *offsets=alloca(sizeof(int)*a->max);
+ int tmp=0,num=0;
+ unsigned int *offsets;
+ assert(a->max>0 && a->max<1000);
+#ifdef DEBUG
+ printf("alloca(%d)\n",sizeof(int)*a->max);
+#endif
+ offsets=alloca(sizeof(int)*a->max);
offsets[0]=0;
+// printf("allocating %d offsets...\n",a->max);
// printf("matchpiece \"%s\"...\n",s);
/* first, try to match the atom as often as possible, up to a->max times */
if (a->max == 1 && a->min == 1)
@@ -227,6 +260,7 @@ static int matchpiece(void*__restrict__ x,const char*__restrict__ s,int ofs,stru
a->a.next=save;
++num;
matchlen+=tmp;
+// printf("setting offsets[%d] to %d\n",num,tmp);
offsets[num]=tmp;
} else {
a->a.next=save;
@@ -235,12 +269,13 @@ static int matchpiece(void*__restrict__ x,const char*__restrict__ s,int ofs,stru
}
if (num<a->min) return -1; /* already at minimum matches; signal mismatch */
/* then, while the rest does not match, back off */
- for (;;) {
+ for (;num>=0;) {
if (a->next)
tmp=((struct atom*)(a->next))->m(a->next,s+matchlen,ofs+matchlen,preg,plus+matchlen,eflags);
else
tmp=plus+matchlen;
if (tmp>=0) break; /* it did match; don't back off any further */
+// printf("using offsets[%d] (%d)\n",num,offsets[num]);
matchlen-=offsets[num];
--num;
}
@@ -279,6 +314,11 @@ static const char* parsepiece(struct piece*__restrict__ p,const char*__restrict_
static int matchbranch(void*__restrict__ x,const char*__restrict__ s,int ofs,struct __regex_t*__restrict__ preg,int plus,int eflags) {
register struct branch* a=(struct branch*)x;
int tmp;
+#ifdef DEBUG
+ printf("%08p matching branch against \"%s\"\n",a,s);
+ printf("%p %p\n",&a->p->m,a->p->m);
+#endif
+ assert(a->p->m==matchpiece);
tmp=a->p->m(a->p,s,ofs,preg,plus,eflags);
if (tmp>=0) {
if (a->next)
@@ -291,7 +331,7 @@ static int matchbranch(void*__restrict__ x,const char*__restrict__ s,int ofs,str
static const char* parsebranch(struct branch*__restrict__ b,const char*__restrict__ s,regex_t*__restrict__ rx,int*__restrict__ pieces) {
struct piece p;
- const char *tmp;
+ const char *tmp; /* the gcc warning here is bogus */
b->m=matchbranch;
b->num=0; b->p=0;
for (;;) {
@@ -305,9 +345,11 @@ static const char* parsebranch(struct branch*__restrict__ b,const char*__restric
tmp=parsepiece(&p,s,rx);
if (tmp==s) return s;
}
+// printf("b->p from %p to ",b->p);
if (!(b->p=realloc(b->p,++b->num*sizeof(p)))) return s;
+// printf("%p (size %d)\n",b->p,b->num*sizeof(p));
b->p[b->num-1]=p;
- if (*s=='|') { ++tmp; break; }
+ if (*tmp=='|') { break; }
s=tmp;
}
*pieces+=b->num;
@@ -318,7 +360,11 @@ static const char* parsebranch(struct branch*__restrict__ b,const char*__restric
static int matchregex(void*__restrict__ x,const char*__restrict__ s,int ofs,struct __regex_t*__restrict__ preg,int plus,int eflags) {
register struct regex* a=(struct regex*)x;
int i,tmp;
+#ifdef DEBUG
+ printf("%08p matching regex against \"%s\"\n",a,s);
+#endif
for (i=0; i<a->num; ++i) {
+ assert(a->b[i].m==matchbranch);
tmp=a->b[i].m(&a->b[i],s,ofs,preg,plus,eflags);
if (tmp>=0) {
if (a->next)
@@ -335,12 +381,15 @@ static const char* parseregex(struct regex*__restrict__ r,const char*__restrict_
const char *tmp;
r->m=matchregex;
r->num=0; r->b=0; r->pieces=0;
+ p->brackets=0;
for (;;) {
tmp=parsebranch(&b,s,p,&r->pieces);
if (tmp==s) return s;
+// printf("r->b from %p to ",r->b);
if (!(r->b=realloc(r->b,++r->num*sizeof(b)))) return s;
+// printf("%p (size %d)\n",r->b,r->num*sizeof(b));
r->b[r->num-1]=b;
- s=tmp;
+ s=tmp; if (*s=='|') ++s;
}
return tmp;
}
@@ -355,7 +404,7 @@ static void regex_putnext(struct regex* r,void* next);
static void atom_putnext(struct atom*__restrict__ a,void*__restrict__ next) {
a->next=next;
if (a->type==REGEX)
- regex_putnext(&a->u.r,next);
+ regex_putnext(&a->u.r,0);
}
static void piece_putnext(struct piece*__restrict__ p,void*__restrict__ next) {
@@ -365,8 +414,12 @@ static void piece_putnext(struct piece*__restrict__ p,void*__restrict__ next) {
static void branch_putnext(struct branch*__restrict__ b,void*__restrict__ next) {
int i;
- for (i=0; i<b->num-1; ++i)
- piece_putnext(&b->p[i],&b->p[i+1]);
+ for (i=0; i<b->num-1; ++i) {
+ if (b->p[i+1].min==1 && b->p[i+1].max==1)
+ piece_putnext(&b->p[i],&b->p[i+1].a);
+ else
+ piece_putnext(&b->p[i],&b->p[i+1]);
+ }
piece_putnext(&b->p[i],0);
b->next=next;
}
@@ -381,21 +434,29 @@ static void regex_putnext(struct regex*__restrict__ r,void*__restrict__ next) {
int regcomp(regex_t*__restrict__ preg, const char*__restrict__ regex, int cflags) {
- const char* t=parseregex(&preg->r,regex,preg);
+ const char* t;
+ preg->cflags=cflags;
+ t=parseregex(&preg->r,regex,preg);
if (t==regex) return -1;
regex_putnext(&preg->r,0);
- preg->cflags=cflags;
return 0;
}
int regexec(const regex_t*__restrict__ preg, const char*__restrict__ string, size_t nmatch, regmatch_t pmatch[], int eflags) {
int matched;
const char *orig=string;
- ((regex_t*)preg)->l=alloca(sizeof(regmatch_t)*(preg->brackets+1));
+ assert(preg->brackets+1>0 && preg->brackets<1000);
+#ifdef DEBUG
+ printf("alloca(%d)\n",sizeof(regmatch_t)*(preg->brackets+3));
+#endif
+ ((regex_t*)preg)->l=alloca(sizeof(regmatch_t)*(preg->brackets+3));
while (*string) {
matched=preg->r.m((void*)&preg->r,string,string-orig,(regex_t*)preg,0,eflags);
+// printf("ebp on stack = %x\n",stack[1]);
if (matched>=0) {
- if ((preg->cflags&REG_NOSUB)==0) memmove(pmatch,preg->l,nmatch*sizeof(regmatch_t));
+ preg->l[0].rm_so=string-orig;
+ preg->l[0].rm_eo=string-orig+matched;
+ if ((preg->cflags&REG_NOSUB)==0) memcpy(pmatch,preg->l,nmatch*sizeof(regmatch_t));
return 0;
}
++string; eflags|=REG_NOTBOL;
@@ -407,10 +468,9 @@ int regexec(const regex_t*__restrict__ preg, const char*__restrict__ string, siz
void regfree(regex_t* preg) {
int i;
- for (i=0; i<preg->r.num; ++i) {
+ for (i=0; i<preg->r.num; ++i)
free(preg->r.b[i].p);
free(preg->r.b);
- }
}
size_t regerror(int errcode, const regex_t*__restrict__ preg, char*__restrict__ errbuf, size_t errbuf_size) {