| /**************************************************************** |
| Copyright (C) Lucent Technologies 1997 |
| All Rights Reserved |
| |
| Permission to use, copy, modify, and distribute this software and |
| its documentation for any purpose and without fee is hereby |
| granted, provided that the above copyright notice appear in all |
| copies and that both that the copyright notice and this |
| permission notice and warranty disclaimer appear in supporting |
| documentation, and that the name Lucent Technologies or any of |
| its entities not be used in advertising or publicity pertaining |
| to distribution of the software without specific, written prior |
| permission. |
| |
| LUCENT DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, |
| INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS. |
| IN NO EVENT SHALL LUCENT OR ANY OF ITS ENTITIES BE LIABLE FOR ANY |
| SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES |
| WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER |
| IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, |
| ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF |
| THIS SOFTWARE. |
| ****************************************************************/ |
| |
| /* lasciate ogne speranza, voi ch'intrate. */ |
| |
| #define DEBUG |
| |
| #include <ctype.h> |
| #include <stdio.h> |
| #include <string.h> |
| #include <stdlib.h> |
| #include "awk.h" |
| #include "ytab.h" |
| |
| #define HAT (NCHARS+2) /* matches ^ in regular expr */ |
| /* NCHARS is 2**n */ |
| #define MAXLIN 22 |
| |
| #define type(v) (v)->nobj /* badly overloaded here */ |
| #define info(v) (v)->ntype /* badly overloaded here */ |
| #define left(v) (v)->narg[0] |
| #define right(v) (v)->narg[1] |
| #define parent(v) (v)->nnext |
| |
| #define LEAF case CCL: case NCCL: case CHAR: case DOT: case FINAL: case ALL: |
| #define ELEAF case EMPTYRE: /* empty string in regexp */ |
| #define UNARY case STAR: case PLUS: case QUEST: |
| |
| /* encoding in tree Nodes: |
| leaf (CCL, NCCL, CHAR, DOT, FINAL, ALL, EMPTYRE): |
| left is index, right contains value or pointer to value |
| unary (STAR, PLUS, QUEST): left is child, right is null |
| binary (CAT, OR): left and right are children |
| parent contains pointer to parent |
| */ |
| |
| |
| int *setvec; |
| int *tmpset; |
| int maxsetvec = 0; |
| |
| int rtok; /* next token in current re */ |
| int rlxval; |
| static uschar *rlxstr; |
| static uschar *prestr; /* current position in current re */ |
| static uschar *lastre; /* origin of last re */ |
| |
| static int setcnt; |
| static int poscnt; |
| |
| char *patbeg; |
| int patlen; |
| |
| #define NFA 20 /* cache this many dynamic fa's */ |
| fa *fatab[NFA]; |
| int nfatab = 0; /* entries in fatab */ |
| |
| fa *makedfa(const char *s, int anchor) /* returns dfa for reg expr s */ |
| { |
| int i, use, nuse; |
| fa *pfa; |
| static int now = 1; |
| |
| if (setvec == 0) { /* first time through any RE */ |
| maxsetvec = MAXLIN; |
| setvec = (int *) malloc(maxsetvec * sizeof(int)); |
| tmpset = (int *) malloc(maxsetvec * sizeof(int)); |
| if (setvec == 0 || tmpset == 0) |
| overflo("out of space initializing makedfa"); |
| } |
| |
| if (compile_time) /* a constant for sure */ |
| return mkdfa(s, anchor); |
| for (i = 0; i < nfatab; i++) /* is it there already? */ |
| if (fatab[i]->anchor == anchor |
| && strcmp((const char *) fatab[i]->restr, s) == 0) { |
| fatab[i]->use = now++; |
| return fatab[i]; |
| } |
| pfa = mkdfa(s, anchor); |
| if (nfatab < NFA) { /* room for another */ |
| fatab[nfatab] = pfa; |
| fatab[nfatab]->use = now++; |
| nfatab++; |
| return pfa; |
| } |
| use = fatab[0]->use; /* replace least-recently used */ |
| nuse = 0; |
| for (i = 1; i < nfatab; i++) |
| if (fatab[i]->use < use) { |
| use = fatab[i]->use; |
| nuse = i; |
| } |
| freefa(fatab[nuse]); |
| fatab[nuse] = pfa; |
| pfa->use = now++; |
| return pfa; |
| } |
| |
| fa *mkdfa(const char *s, int anchor) /* does the real work of making a dfa */ |
| /* anchor = 1 for anchored matches, else 0 */ |
| { |
| Node *p, *p1; |
| fa *f; |
| |
| p = reparse(s); |
| p1 = op2(CAT, op2(STAR, op2(ALL, NIL, NIL), NIL), p); |
| /* put ALL STAR in front of reg. exp. */ |
| p1 = op2(CAT, p1, op2(FINAL, NIL, NIL)); |
| /* put FINAL after reg. exp. */ |
| |
| poscnt = 0; |
| penter(p1); /* enter parent pointers and leaf indices */ |
| if ((f = (fa *) calloc(1, sizeof(fa) + poscnt*sizeof(rrow))) == NULL) |
| overflo("out of space for fa"); |
| f->accept = poscnt-1; /* penter has computed number of positions in re */ |
| cfoll(f, p1); /* set up follow sets */ |
| freetr(p1); |
| if ((f->posns[0] = (int *) calloc(1, *(f->re[0].lfollow)*sizeof(int))) == NULL) |
| overflo("out of space in makedfa"); |
| if ((f->posns[1] = (int *) calloc(1, sizeof(int))) == NULL) |
| overflo("out of space in makedfa"); |
| *f->posns[1] = 0; |
| f->initstat = makeinit(f, anchor); |
| f->anchor = anchor; |
| f->restr = (uschar *) tostring(s); |
| return f; |
| } |
| |
| int makeinit(fa *f, int anchor) |
| { |
| int i, k; |
| |
| f->curstat = 2; |
| f->out[2] = 0; |
| f->reset = 0; |
| k = *(f->re[0].lfollow); |
| xfree(f->posns[2]); |
| if ((f->posns[2] = (int *) calloc(1, (k+1)*sizeof(int))) == NULL) |
| overflo("out of space in makeinit"); |
| for (i=0; i <= k; i++) { |
| (f->posns[2])[i] = (f->re[0].lfollow)[i]; |
| } |
| if ((f->posns[2])[1] == f->accept) |
| f->out[2] = 1; |
| for (i=0; i < NCHARS; i++) |
| f->gototab[2][i] = 0; |
| f->curstat = cgoto(f, 2, HAT); |
| if (anchor) { |
| *f->posns[2] = k-1; /* leave out position 0 */ |
| for (i=0; i < k; i++) { |
| (f->posns[0])[i] = (f->posns[2])[i]; |
| } |
| |
| f->out[0] = f->out[2]; |
| if (f->curstat != 2) |
| --(*f->posns[f->curstat]); |
| } |
| return f->curstat; |
| } |
| |
| void penter(Node *p) /* set up parent pointers and leaf indices */ |
| { |
| switch (type(p)) { |
| ELEAF |
| LEAF |
| info(p) = poscnt; |
| poscnt++; |
| break; |
| UNARY |
| penter(left(p)); |
| parent(left(p)) = p; |
| break; |
| case CAT: |
| case OR: |
| penter(left(p)); |
| penter(right(p)); |
| parent(left(p)) = p; |
| parent(right(p)) = p; |
| break; |
| default: /* can't happen */ |
| FATAL("can't happen: unknown type %d in penter", type(p)); |
| break; |
| } |
| } |
| |
| void freetr(Node *p) /* free parse tree */ |
| { |
| switch (type(p)) { |
| ELEAF |
| LEAF |
| xfree(p); |
| break; |
| UNARY |
| freetr(left(p)); |
| xfree(p); |
| break; |
| case CAT: |
| case OR: |
| freetr(left(p)); |
| freetr(right(p)); |
| xfree(p); |
| break; |
| default: /* can't happen */ |
| FATAL("can't happen: unknown type %d in freetr", type(p)); |
| break; |
| } |
| } |
| |
| /* in the parsing of regular expressions, metacharacters like . have */ |
| /* to be seen literally; \056 is not a metacharacter. */ |
| |
| int hexstr(char **pp) /* find and eval hex string at pp, return new p */ |
| { /* only pick up one 8-bit byte (2 chars) */ |
| uschar *p; |
| int n = 0; |
| int i; |
| |
| for (i = 0, p = (uschar *) *pp; i < 2 && isxdigit(*p); i++, p++) { |
| if (isdigit(*p)) |
| n = 16 * n + *p - '0'; |
| else if (*p >= 'a' && *p <= 'f') |
| n = 16 * n + *p - 'a' + 10; |
| else if (*p >= 'A' && *p <= 'F') |
| n = 16 * n + *p - 'A' + 10; |
| } |
| *pp = (char *) p; |
| return n; |
| } |
| |
| #define isoctdigit(c) ((c) >= '0' && (c) <= '7') /* multiple use of arg */ |
| |
| int quoted(char **pp) /* pick up next thing after a \\ */ |
| /* and increment *pp */ |
| { |
| char *p = *pp; |
| int c; |
| |
| if ((c = *p++) == 't') |
| c = '\t'; |
| else if (c == 'n') |
| c = '\n'; |
| else if (c == 'f') |
| c = '\f'; |
| else if (c == 'r') |
| c = '\r'; |
| else if (c == 'b') |
| c = '\b'; |
| else if (c == '\\') |
| c = '\\'; |
| else if (c == 'x') { /* hexadecimal goo follows */ |
| c = hexstr(&p); /* this adds a null if number is invalid */ |
| } else if (isoctdigit(c)) { /* \d \dd \ddd */ |
| int n = c - '0'; |
| if (isoctdigit(*p)) { |
| n = 8 * n + *p++ - '0'; |
| if (isoctdigit(*p)) |
| n = 8 * n + *p++ - '0'; |
| } |
| c = n; |
| } /* else */ |
| /* c = c; */ |
| *pp = p; |
| return c; |
| } |
| |
| char *cclenter(const char *argp) /* add a character class */ |
| { |
| int i, c, c2; |
| uschar *p = (uschar *) argp; |
| uschar *op, *bp; |
| static uschar *buf = 0; |
| static int bufsz = 100; |
| |
| op = p; |
| if (buf == 0 && (buf = (uschar *) malloc(bufsz)) == NULL) |
| FATAL("out of space for character class [%.10s...] 1", p); |
| bp = buf; |
| for (i = 0; (c = *p++) != 0; ) { |
| if (c == '\\') { |
| c = quoted((char **) &p); |
| } else if (c == '-' && i > 0 && bp[-1] != 0) { |
| if (*p != 0) { |
| c = bp[-1]; |
| c2 = *p++; |
| if (c2 == '\\') |
| c2 = quoted((char **) &p); |
| if (c > c2) { /* empty; ignore */ |
| bp--; |
| i--; |
| continue; |
| } |
| while (c < c2) { |
| if (!adjbuf((char **) &buf, &bufsz, bp-buf+2, 100, (char **) &bp, "cclenter1")) |
| FATAL("out of space for character class [%.10s...] 2", p); |
| *bp++ = ++c; |
| i++; |
| } |
| continue; |
| } |
| } |
| if (!adjbuf((char **) &buf, &bufsz, bp-buf+2, 100, (char **) &bp, "cclenter2")) |
| FATAL("out of space for character class [%.10s...] 3", p); |
| *bp++ = c; |
| i++; |
| } |
| *bp = 0; |
| dprintf( ("cclenter: in = |%s|, out = |%s|\n", op, buf) ); |
| xfree(op); |
| return (char *) tostring((char *) buf); |
| } |
| |
| void overflo(const char *s) |
| { |
| FATAL("regular expression too big: %.30s...", s); |
| } |
| |
| void cfoll(fa *f, Node *v) /* enter follow set of each leaf of vertex v into lfollow[leaf] */ |
| { |
| int i; |
| int *p; |
| |
| switch (type(v)) { |
| ELEAF |
| LEAF |
| f->re[info(v)].ltype = type(v); |
| f->re[info(v)].lval.np = right(v); |
| while (f->accept >= maxsetvec) { /* guessing here! */ |
| maxsetvec *= 4; |
| setvec = (int *) realloc(setvec, maxsetvec * sizeof(int)); |
| tmpset = (int *) realloc(tmpset, maxsetvec * sizeof(int)); |
| if (setvec == 0 || tmpset == 0) |
| overflo("out of space in cfoll()"); |
| } |
| for (i = 0; i <= f->accept; i++) |
| setvec[i] = 0; |
| setcnt = 0; |
| follow(v); /* computes setvec and setcnt */ |
| if ((p = (int *) calloc(1, (setcnt+1)*sizeof(int))) == NULL) |
| overflo("out of space building follow set"); |
| f->re[info(v)].lfollow = p; |
| *p = setcnt; |
| for (i = f->accept; i >= 0; i--) |
| if (setvec[i] == 1) |
| *++p = i; |
| break; |
| UNARY |
| cfoll(f,left(v)); |
| break; |
| case CAT: |
| case OR: |
| cfoll(f,left(v)); |
| cfoll(f,right(v)); |
| break; |
| default: /* can't happen */ |
| FATAL("can't happen: unknown type %d in cfoll", type(v)); |
| } |
| } |
| |
| int first(Node *p) /* collects initially active leaves of p into setvec */ |
| /* returns 0 if p matches empty string */ |
| { |
| int b, lp; |
| |
| switch (type(p)) { |
| ELEAF |
| LEAF |
| lp = info(p); /* look for high-water mark of subscripts */ |
| while (setcnt >= maxsetvec || lp >= maxsetvec) { /* guessing here! */ |
| maxsetvec *= 4; |
| setvec = (int *) realloc(setvec, maxsetvec * sizeof(int)); |
| tmpset = (int *) realloc(tmpset, maxsetvec * sizeof(int)); |
| if (setvec == 0 || tmpset == 0) |
| overflo("out of space in first()"); |
| } |
| if (type(p) == EMPTYRE) { |
| setvec[lp] = 0; |
| return(0); |
| } |
| if (setvec[lp] != 1) { |
| setvec[lp] = 1; |
| setcnt++; |
| } |
| if (type(p) == CCL && (*(char *) right(p)) == '\0') |
| return(0); /* empty CCL */ |
| else return(1); |
| case PLUS: |
| if (first(left(p)) == 0) return(0); |
| return(1); |
| case STAR: |
| case QUEST: |
| first(left(p)); |
| return(0); |
| case CAT: |
| if (first(left(p)) == 0 && first(right(p)) == 0) return(0); |
| return(1); |
| case OR: |
| b = first(right(p)); |
| if (first(left(p)) == 0 || b == 0) return(0); |
| return(1); |
| } |
| FATAL("can't happen: unknown type %d in first", type(p)); /* can't happen */ |
| return(-1); |
| } |
| |
| void follow(Node *v) /* collects leaves that can follow v into setvec */ |
| { |
| Node *p; |
| |
| if (type(v) == FINAL) |
| return; |
| p = parent(v); |
| switch (type(p)) { |
| case STAR: |
| case PLUS: |
| first(v); |
| follow(p); |
| return; |
| |
| case OR: |
| case QUEST: |
| follow(p); |
| return; |
| |
| case CAT: |
| if (v == left(p)) { /* v is left child of p */ |
| if (first(right(p)) == 0) { |
| follow(p); |
| return; |
| } |
| } else /* v is right child */ |
| follow(p); |
| return; |
| } |
| } |
| |
| int member(int c, const char *sarg) /* is c in s? */ |
| { |
| uschar *s = (uschar *) sarg; |
| |
| while (*s) |
| if (c == *s++) |
| return(1); |
| return(0); |
| } |
| |
| int match(fa *f, const char *p0) /* shortest match ? */ |
| { |
| int s, ns; |
| uschar *p = (uschar *) p0; |
| |
| s = f->reset ? makeinit(f,0) : f->initstat; |
| if (f->out[s]) |
| return(1); |
| do { |
| /* assert(*p < NCHARS); */ |
| if ((ns = f->gototab[s][*p]) != 0) |
| s = ns; |
| else |
| s = cgoto(f, s, *p); |
| if (f->out[s]) |
| return(1); |
| } while (*p++ != 0); |
| return(0); |
| } |
| |
| int pmatch(fa *f, const char *p0) /* longest match, for sub */ |
| { |
| int s, ns; |
| uschar *p = (uschar *) p0; |
| uschar *q; |
| int i, k; |
| |
| /* s = f->reset ? makeinit(f,1) : f->initstat; */ |
| if (f->reset) { |
| f->initstat = s = makeinit(f,1); |
| } else { |
| s = f->initstat; |
| } |
| patbeg = (char *) p; |
| patlen = -1; |
| do { |
| q = p; |
| do { |
| if (f->out[s]) /* final state */ |
| patlen = q-p; |
| /* assert(*q < NCHARS); */ |
| if ((ns = f->gototab[s][*q]) != 0) |
| s = ns; |
| else |
| s = cgoto(f, s, *q); |
| if (s == 1) { /* no transition */ |
| if (patlen >= 0) { |
| patbeg = (char *) p; |
| return(1); |
| } |
| else |
| goto nextin; /* no match */ |
| } |
| } while (*q++ != 0); |
| if (f->out[s]) |
| patlen = q-p-1; /* don't count $ */ |
| if (patlen >= 0) { |
| patbeg = (char *) p; |
| return(1); |
| } |
| nextin: |
| s = 2; |
| if (f->reset) { |
| for (i = 2; i <= f->curstat; i++) |
| xfree(f->posns[i]); |
| k = *f->posns[0]; |
| if ((f->posns[2] = (int *) calloc(1, (k+1)*sizeof(int))) == NULL) |
| overflo("out of space in pmatch"); |
| for (i = 0; i <= k; i++) |
| (f->posns[2])[i] = (f->posns[0])[i]; |
| f->initstat = f->curstat = 2; |
| f->out[2] = f->out[0]; |
| for (i = 0; i < NCHARS; i++) |
| f->gototab[2][i] = 0; |
| } |
| } while (*p++ != 0); |
| return (0); |
| } |
| |
| int nematch(fa *f, const char *p0) /* non-empty match, for sub */ |
| { |
| int s, ns; |
| uschar *p = (uschar *) p0; |
| uschar *q; |
| int i, k; |
| |
| /* s = f->reset ? makeinit(f,1) : f->initstat; */ |
| if (f->reset) { |
| f->initstat = s = makeinit(f,1); |
| } else { |
| s = f->initstat; |
| } |
| patlen = -1; |
| while (*p) { |
| q = p; |
| do { |
| if (f->out[s]) /* final state */ |
| patlen = q-p; |
| /* assert(*q < NCHARS); */ |
| if ((ns = f->gototab[s][*q]) != 0) |
| s = ns; |
| else |
| s = cgoto(f, s, *q); |
| if (s == 1) { /* no transition */ |
| if (patlen > 0) { |
| patbeg = (char *) p; |
| return(1); |
| } else |
| goto nnextin; /* no nonempty match */ |
| } |
| } while (*q++ != 0); |
| if (f->out[s]) |
| patlen = q-p-1; /* don't count $ */ |
| if (patlen > 0 ) { |
| patbeg = (char *) p; |
| return(1); |
| } |
| nnextin: |
| s = 2; |
| if (f->reset) { |
| for (i = 2; i <= f->curstat; i++) |
| xfree(f->posns[i]); |
| k = *f->posns[0]; |
| if ((f->posns[2] = (int *) calloc(1, (k+1)*sizeof(int))) == NULL) |
| overflo("out of state space"); |
| for (i = 0; i <= k; i++) |
| (f->posns[2])[i] = (f->posns[0])[i]; |
| f->initstat = f->curstat = 2; |
| f->out[2] = f->out[0]; |
| for (i = 0; i < NCHARS; i++) |
| f->gototab[2][i] = 0; |
| } |
| p++; |
| } |
| return (0); |
| } |
| |
| Node *reparse(const char *p) /* parses regular expression pointed to by p */ |
| { /* uses relex() to scan regular expression */ |
| Node *np; |
| |
| dprintf( ("reparse <%s>\n", p) ); |
| lastre = prestr = (uschar *) p; /* prestr points to string to be parsed */ |
| rtok = relex(); |
| /* GNU compatibility: an empty regexp matches anything */ |
| if (rtok == '\0') { |
| /* FATAL("empty regular expression"); previous */ |
| return(op2(EMPTYRE, NIL, NIL)); |
| } |
| np = regexp(); |
| if (rtok != '\0') |
| FATAL("syntax error in regular expression %s at %s", lastre, prestr); |
| return(np); |
| } |
| |
| Node *regexp(void) /* top-level parse of reg expr */ |
| { |
| return (alt(concat(primary()))); |
| } |
| |
| Node *primary(void) |
| { |
| Node *np; |
| |
| switch (rtok) { |
| case CHAR: |
| np = op2(CHAR, NIL, itonp(rlxval)); |
| rtok = relex(); |
| return (unary(np)); |
| case ALL: |
| rtok = relex(); |
| return (unary(op2(ALL, NIL, NIL))); |
| case EMPTYRE: |
| rtok = relex(); |
| return (unary(op2(ALL, NIL, NIL))); |
| case DOT: |
| rtok = relex(); |
| return (unary(op2(DOT, NIL, NIL))); |
| case CCL: |
| np = op2(CCL, NIL, (Node*) cclenter((char *) rlxstr)); |
| rtok = relex(); |
| return (unary(np)); |
| case NCCL: |
| np = op2(NCCL, NIL, (Node *) cclenter((char *) rlxstr)); |
| rtok = relex(); |
| return (unary(np)); |
| case '^': |
| rtok = relex(); |
| return (unary(op2(CHAR, NIL, itonp(HAT)))); |
| case '$': |
| rtok = relex(); |
| return (unary(op2(CHAR, NIL, NIL))); |
| case '(': |
| rtok = relex(); |
| if (rtok == ')') { /* special pleading for () */ |
| rtok = relex(); |
| return unary(op2(CCL, NIL, (Node *) tostring(""))); |
| } |
| np = regexp(); |
| if (rtok == ')') { |
| rtok = relex(); |
| return (unary(np)); |
| } |
| else |
| FATAL("syntax error in regular expression %s at %s", lastre, prestr); |
| default: |
| FATAL("illegal primary in regular expression %s at %s", lastre, prestr); |
| } |
| return 0; /*NOTREACHED*/ |
| } |
| |
| Node *concat(Node *np) |
| { |
| switch (rtok) { |
| case CHAR: case DOT: case ALL: case EMPTYRE: case CCL: case NCCL: case '$': case '(': |
| return (concat(op2(CAT, np, primary()))); |
| } |
| return (np); |
| } |
| |
| Node *alt(Node *np) |
| { |
| if (rtok == OR) { |
| rtok = relex(); |
| return (alt(op2(OR, np, concat(primary())))); |
| } |
| return (np); |
| } |
| |
| Node *unary(Node *np) |
| { |
| switch (rtok) { |
| case STAR: |
| rtok = relex(); |
| return (unary(op2(STAR, np, NIL))); |
| case PLUS: |
| rtok = relex(); |
| return (unary(op2(PLUS, np, NIL))); |
| case QUEST: |
| rtok = relex(); |
| return (unary(op2(QUEST, np, NIL))); |
| default: |
| return (np); |
| } |
| } |
| |
| /* |
| * Character class definitions conformant to the POSIX locale as |
| * defined in IEEE P1003.1 draft 7 of June 2001, assuming the source |
| * and operating character sets are both ASCII (ISO646) or supersets |
| * thereof. |
| * |
| * Note that to avoid overflowing the temporary buffer used in |
| * relex(), the expanded character class (prior to range expansion) |
| * must be less than twice the size of their full name. |
| */ |
| |
| /* Because isblank doesn't show up in any of the header files on any |
| * system i use, it's defined here. if some other locale has a richer |
| * definition of "blank", define HAS_ISBLANK and provide your own |
| * version. |
| * the parentheses here are an attempt to find a path through the maze |
| * of macro definition and/or function and/or version provided. thanks |
| * to nelson beebe for the suggestion; let's see if it works everywhere. |
| */ |
| |
| #ifndef HAS_ISBLANK |
| |
| int (my_isblank)(int c) |
| { |
| return c==' ' || c=='\t'; |
| } |
| #undef my_isblank |
| #define isblank my_isblank |
| |
| #endif |
| |
| struct charclass { |
| const char *cc_name; |
| int cc_namelen; |
| int (*cc_func)(int); |
| } charclasses[] = { |
| { "alnum", 5, isalnum }, |
| { "alpha", 5, isalpha }, |
| { "blank", 5, isblank }, |
| { "cntrl", 5, iscntrl }, |
| { "digit", 5, isdigit }, |
| { "graph", 5, isgraph }, |
| { "lower", 5, islower }, |
| { "print", 5, isprint }, |
| { "punct", 5, ispunct }, |
| { "space", 5, isspace }, |
| { "upper", 5, isupper }, |
| { "xdigit", 6, isxdigit }, |
| { NULL, 0, NULL }, |
| }; |
| |
| |
| int relex(void) /* lexical analyzer for reparse */ |
| { |
| int c, n; |
| int cflag; |
| static uschar *buf = 0; |
| static int bufsz = 100; |
| uschar *bp; |
| struct charclass *cc; |
| int i; |
| |
| switch (c = *prestr++) { |
| case '|': return OR; |
| case '*': return STAR; |
| case '+': return PLUS; |
| case '?': return QUEST; |
| case '.': return DOT; |
| case '\0': prestr--; return '\0'; |
| case '^': |
| case '$': |
| case '(': |
| case ')': |
| return c; |
| case '\\': |
| rlxval = quoted((char **) &prestr); |
| return CHAR; |
| default: |
| rlxval = c; |
| return CHAR; |
| case '[': |
| if (buf == 0 && (buf = (uschar *) malloc(bufsz)) == NULL) |
| FATAL("out of space in reg expr %.10s..", lastre); |
| bp = buf; |
| if (*prestr == '^') { |
| cflag = 1; |
| prestr++; |
| } |
| else |
| cflag = 0; |
| n = 2 * strlen((const char *) prestr)+1; |
| if (!adjbuf((char **) &buf, &bufsz, n, n, (char **) &bp, "relex1")) |
| FATAL("out of space for reg expr %.10s...", lastre); |
| for (; ; ) { |
| if ((c = *prestr++) == '\\') { |
| *bp++ = '\\'; |
| if ((c = *prestr++) == '\0') |
| FATAL("nonterminated character class %.20s...", lastre); |
| *bp++ = c; |
| /* } else if (c == '\n') { */ |
| /* FATAL("newline in character class %.20s...", lastre); */ |
| } else if (c == '[' && *prestr == ':') { |
| /* POSIX char class names, Dag-Erling Smorgrav, des@ofug.org */ |
| for (cc = charclasses; cc->cc_name; cc++) |
| if (strncmp((const char *) prestr + 1, (const char *) cc->cc_name, cc->cc_namelen) == 0) |
| break; |
| if (cc->cc_name != NULL && prestr[1 + cc->cc_namelen] == ':' && |
| prestr[2 + cc->cc_namelen] == ']') { |
| prestr += cc->cc_namelen + 3; |
| for (i = 0; i < NCHARS; i++) { |
| if (!adjbuf((char **) &buf, &bufsz, bp-buf+1, 100, (char **) &bp, "relex2")) |
| FATAL("out of space for reg expr %.10s...", lastre); |
| if (cc->cc_func(i)) { |
| *bp++ = i; |
| n++; |
| } |
| } |
| } else |
| *bp++ = c; |
| } else if (c == '\0') { |
| FATAL("nonterminated character class %.20s", lastre); |
| } else if (bp == buf) { /* 1st char is special */ |
| *bp++ = c; |
| } else if (c == ']') { |
| *bp++ = 0; |
| rlxstr = (uschar *) tostring((char *) buf); |
| if (cflag == 0) |
| return CCL; |
| else |
| return NCCL; |
| } else |
| *bp++ = c; |
| } |
| } |
| } |
| |
| int cgoto(fa *f, int s, int c) |
| { |
| int i, j, k; |
| int *p, *q; |
| |
| assert(c == HAT || c < NCHARS); |
| while (f->accept >= maxsetvec) { /* guessing here! */ |
| maxsetvec *= 4; |
| setvec = (int *) realloc(setvec, maxsetvec * sizeof(int)); |
| tmpset = (int *) realloc(tmpset, maxsetvec * sizeof(int)); |
| if (setvec == 0 || tmpset == 0) |
| overflo("out of space in cgoto()"); |
| } |
| for (i = 0; i <= f->accept; i++) |
| setvec[i] = 0; |
| setcnt = 0; |
| /* compute positions of gototab[s,c] into setvec */ |
| p = f->posns[s]; |
| for (i = 1; i <= *p; i++) { |
| if ((k = f->re[p[i]].ltype) != FINAL) { |
| if ((k == CHAR && c == ptoi(f->re[p[i]].lval.np)) |
| || (k == DOT && c != 0 && c != HAT) |
| || (k == ALL && c != 0) |
| || (k == EMPTYRE && c != 0) |
| || (k == CCL && member(c, (char *) f->re[p[i]].lval.up)) |
| || (k == NCCL && !member(c, (char *) f->re[p[i]].lval.up) && c != 0 && c != HAT)) { |
| q = f->re[p[i]].lfollow; |
| for (j = 1; j <= *q; j++) { |
| if (q[j] >= maxsetvec) { |
| maxsetvec *= 4; |
| setvec = (int *) realloc(setvec, maxsetvec * sizeof(int)); |
| tmpset = (int *) realloc(setvec, maxsetvec * sizeof(int)); |
| if (setvec == 0 || tmpset == 0) |
| overflo("cgoto overflow"); |
| } |
| if (setvec[q[j]] == 0) { |
| setcnt++; |
| setvec[q[j]] = 1; |
| } |
| } |
| } |
| } |
| } |
| /* determine if setvec is a previous state */ |
| tmpset[0] = setcnt; |
| j = 1; |
| for (i = f->accept; i >= 0; i--) |
| if (setvec[i]) { |
| tmpset[j++] = i; |
| } |
| /* tmpset == previous state? */ |
| for (i = 1; i <= f->curstat; i++) { |
| p = f->posns[i]; |
| if ((k = tmpset[0]) != p[0]) |
| goto different; |
| for (j = 1; j <= k; j++) |
| if (tmpset[j] != p[j]) |
| goto different; |
| /* setvec is state i */ |
| f->gototab[s][c] = i; |
| return i; |
| different:; |
| } |
| |
| /* add tmpset to current set of states */ |
| if (f->curstat >= NSTATES-1) { |
| f->curstat = 2; |
| f->reset = 1; |
| for (i = 2; i < NSTATES; i++) |
| xfree(f->posns[i]); |
| } else |
| ++(f->curstat); |
| for (i = 0; i < NCHARS; i++) |
| f->gototab[f->curstat][i] = 0; |
| xfree(f->posns[f->curstat]); |
| if ((p = (int *) calloc(1, (setcnt+1)*sizeof(int))) == NULL) |
| overflo("out of space in cgoto"); |
| |
| f->posns[f->curstat] = p; |
| f->gototab[s][c] = f->curstat; |
| for (i = 0; i <= setcnt; i++) |
| p[i] = tmpset[i]; |
| if (setvec[f->accept]) |
| f->out[f->curstat] = 1; |
| else |
| f->out[f->curstat] = 0; |
| return f->curstat; |
| } |
| |
| |
| void freefa(fa *f) /* free a finite automaton */ |
| { |
| int i; |
| |
| if (f == NULL) |
| return; |
| for (i = 0; i <= f->curstat; i++) |
| xfree(f->posns[i]); |
| for (i = 0; i <= f->accept; i++) { |
| xfree(f->re[i].lfollow); |
| if (f->re[i].ltype == CCL || f->re[i].ltype == NCCL) |
| xfree((f->re[i].lval.np)); |
| } |
| xfree(f->restr); |
| xfree(f); |
| } |