/*-*- mode:c;indent-tabs-mode:t;c-basic-offset:8;tab-width:8;coding:utf-8 -*-│ │ vi: set noet ft=c ts=8 sw=8 fenc=utf-8 :vi │ ╚──────────────────────────────────────────────────────────────────────────────╝ │ │ │ Copyright (C) Lucent Technologies 1997 │ │ All Rights Reserved │ │ │ │ Permission to use, copy, modify, and distribute this software and │ │ its documentation for any purpose and without fee is hereby │ │ granted, provided that the above copyright notice appear in all │ │ copies and that both that the copyright notice and this │ │ permission notice and warranty disclaimer appear in supporting │ │ documentation, and that the name Lucent Technologies or any of │ │ its entities not be used in advertising or publicity pertaining │ │ to distribution of the software without specific, written prior │ │ permission. │ │ │ │ LUCENT DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, │ │ INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS. │ │ IN NO EVENT SHALL LUCENT OR ANY OF ITS ENTITIES BE LIABLE FOR ANY │ │ SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES │ │ WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER │ │ IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, │ │ ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF │ │ THIS SOFTWARE. │ │ │ ╚─────────────────────────────────────────────────────────────────────────────*/ #define DEBUG #include "libc/calls/calls.h" #include "libc/calls/weirdtypes.h" #include "libc/errno.h" #include "libc/fmt/conv.h" #include "libc/mem/mem.h" #include "libc/runtime/runtime.h" #include "libc/str/str.h" #include "libc/sysv/consts/f.h" #include "libc/sysv/consts/fd.h" #include "libc/time/time.h" #include "third_party/awk/awk.h" #include "third_party/awk/awkgram.tab.h" #include "third_party/libcxx/math.h" static void stdinit(void); static void flush_all(void); #if 1 #define tempfree(x) do { if (istemp(x)) tfree(x); } while (/*CONSTCOND*/0) #else void tempfree(Cell *p) { if (p->ctype == OCELL && (p->csub < CUNK || p->csub > CFREE)) { WARNING("bad csub %d in Cell %d %s", p->csub, p->ctype, p->sval); } if (istemp(p)) tfree(p); } #endif jmp_buf env; extern Awkfloat srand_seed; Node *winner = NULL; /* root of parse tree */ Cell *tmps; /* free temporary cells for execution */ static Cell truecell ={ OBOOL, BTRUE, 0, 0, 1.0, NUM, NULL, NULL }; Cell *True = &truecell; static Cell falsecell ={ OBOOL, BFALSE, 0, 0, 0.0, NUM, NULL, NULL }; Cell *False = &falsecell; static Cell breakcell ={ OJUMP, JBREAK, 0, 0, 0.0, NUM, NULL, NULL }; Cell *jbreak = &breakcell; static Cell contcell ={ OJUMP, JCONT, 0, 0, 0.0, NUM, NULL, NULL }; Cell *jcont = &contcell; static Cell nextcell ={ OJUMP, JNEXT, 0, 0, 0.0, NUM, NULL, NULL }; Cell *jnext = &nextcell; static Cell nextfilecell ={ OJUMP, JNEXTFILE, 0, 0, 0.0, NUM, NULL, NULL }; Cell *jnextfile = &nextfilecell; static Cell exitcell ={ OJUMP, JEXIT, 0, 0, 0.0, NUM, NULL, NULL }; Cell *jexit = &exitcell; static Cell retcell ={ OJUMP, JRET, 0, 0, 0.0, NUM, NULL, NULL }; Cell *jret = &retcell; static Cell tempcell ={ OCELL, CTEMP, 0, EMPTY, 0.0, NUM|STR|DONTFREE, NULL, NULL }; Node *curnode = NULL; /* the node being executed, for debugging */ /* buffer memory management */ int adjbuf(char **pbuf, int *psiz, int minlen, int quantum, char **pbptr, const char *whatrtn) /* pbuf: address of pointer to buffer being managed * psiz: address of buffer size variable * minlen: minimum length of buffer needed * quantum: buffer size quantum * pbptr: address of movable pointer into buffer, or 0 if none * whatrtn: name of the calling routine if failure should cause fatal error * * return 0 for realloc failure, !=0 for success */ { if (minlen > *psiz) { char *tbuf; int rminlen = quantum ? minlen % quantum : 0; int boff = pbptr ? *pbptr - *pbuf : 0; /* round up to next multiple of quantum */ if (rminlen) minlen += quantum - rminlen; tbuf = (char *) realloc(*pbuf, minlen); DPRINTF("adjbuf %s: %d %d (pbuf=%p, tbuf=%p)\n", whatrtn, *psiz, minlen, (void*)*pbuf, (void*)tbuf); if (tbuf == NULL) { if (whatrtn) FATAL("out of memory in %s", whatrtn); return 0; } *pbuf = tbuf; *psiz = minlen; if (pbptr) *pbptr = tbuf + boff; } return 1; } void run(Node *a) /* execution of parse tree starts here */ { stdinit(); execute(a); closeall(); } Cell *execute(Node *u) /* execute a node of the parse tree */ { Cell *(*proc)(Node **, int); Cell *x; Node *a; if (u == NULL) return(True); for (a = u; ; a = a->nnext) { curnode = a; if (isvalue(a)) { x = (Cell *) (a->narg[0]); if (isfld(x) && !donefld) fldbld(); else if (isrec(x) && !donerec) recbld(); return(x); } if (notlegal(a->nobj)) /* probably a Cell* but too risky to print */ FATAL("illegal statement"); proc = proctab[a->nobj-FIRSTTOKEN]; x = (*proc)(a->narg, a->nobj); if (isfld(x) && !donefld) fldbld(); else if (isrec(x) && !donerec) recbld(); if (isexpr(a)) return(x); if (isjump(x)) return(x); if (a->nnext == NULL) return(x); tempfree(x); } } Cell *program(Node **a, int n) /* execute an awk program */ { /* a[0] = BEGIN, a[1] = body, a[2] = END */ Cell *x; if (setjmp(env) != 0) goto ex; if (a[0]) { /* BEGIN */ x = execute(a[0]); if (isexit(x)) return(True); if (isjump(x)) FATAL("illegal break, continue, next or nextfile from BEGIN"); tempfree(x); } if (a[1] || a[2]) while (getrec(&record, &recsize, true) > 0) { x = execute(a[1]); if (isexit(x)) break; tempfree(x); } ex: if (setjmp(env) != 0) /* handles exit within END */ goto ex1; if (a[2]) { /* END */ x = execute(a[2]); if (isbreak(x) || isnext(x) || iscont(x)) FATAL("illegal break, continue, next or nextfile from END"); tempfree(x); } ex1: return(True); } struct Frame { /* stack frame for awk function calls */ int nargs; /* number of arguments in this call */ Cell *fcncell; /* pointer to Cell for function */ Cell **args; /* pointer to array of arguments after execute */ Cell *retval; /* return value */ }; #define NARGS 50 /* max args in a call */ struct Frame *frame = NULL; /* base of stack frames; dynamically allocated */ int nframe = 0; /* number of frames allocated */ struct Frame *frp = NULL; /* frame pointer. bottom level unused */ Cell *call(Node **a, int n) /* function call. very kludgy and fragile */ { static const Cell newcopycell = { OCELL, CCOPY, 0, EMPTY, 0.0, NUM|STR|DONTFREE, NULL, NULL }; int i, ncall, ndef; int freed = 0; /* handles potential double freeing when fcn & param share a tempcell */ Node *x; Cell *args[NARGS], *oargs[NARGS]; /* BUG: fixed size arrays */ Cell *y, *z, *fcn; char *s; fcn = execute(a[0]); /* the function itself */ s = fcn->nval; if (!isfcn(fcn)) FATAL("calling undefined function %s", s); if (frame == NULL) { frp = frame = (struct Frame *) calloc(nframe += 100, sizeof(*frame)); if (frame == NULL) FATAL("out of space for stack frames calling %s", s); } for (ncall = 0, x = a[1]; x != NULL; x = x->nnext) /* args in call */ ncall++; ndef = (int) fcn->fval; /* args in defn */ DPRINTF("calling %s, %d args (%d in defn), frp=%d\n", s, ncall, ndef, (int) (frp-frame)); if (ncall > ndef) WARNING("function %s called with %d args, uses only %d", s, ncall, ndef); if (ncall + ndef > NARGS) FATAL("function %s has %d arguments, limit %d", s, ncall+ndef, NARGS); for (i = 0, x = a[1]; x != NULL; i++, x = x->nnext) { /* get call args */ DPRINTF("evaluate args[%d], frp=%d:\n", i, (int) (frp-frame)); y = execute(x); oargs[i] = y; DPRINTF("args[%d]: %s %f <%s>, t=%o\n", i, NN(y->nval), y->fval, isarr(y) ? "(array)" : NN(y->sval), y->tval); if (isfcn(y)) FATAL("can't use function %s as argument in %s", y->nval, s); if (isarr(y)) args[i] = y; /* arrays by ref */ else args[i] = copycell(y); tempfree(y); } for ( ; i < ndef; i++) { /* add null args for ones not provided */ args[i] = gettemp(); *args[i] = newcopycell; } frp++; /* now ok to up frame */ if (frp >= frame + nframe) { int dfp = frp - frame; /* old index */ frame = (struct Frame *) realloc(frame, (nframe += 100) * sizeof(*frame)); if (frame == NULL) FATAL("out of space for stack frames in %s", s); frp = frame + dfp; } frp->fcncell = fcn; frp->args = args; frp->nargs = ndef; /* number defined with (excess are locals) */ frp->retval = gettemp(); DPRINTF("start exec of %s, frp=%d\n", s, (int) (frp-frame)); y = execute((Node *)(fcn->sval)); /* execute body */ DPRINTF("finished exec of %s, frp=%d\n", s, (int) (frp-frame)); for (i = 0; i < ndef; i++) { Cell *t = frp->args[i]; if (isarr(t)) { if (t->csub == CCOPY) { if (i >= ncall) { freesymtab(t); t->csub = CTEMP; tempfree(t); } else { oargs[i]->tval = t->tval; oargs[i]->tval &= ~(STR|NUM|DONTFREE); oargs[i]->sval = t->sval; tempfree(t); } } } else if (t != y) { /* kludge to prevent freeing twice */ t->csub = CTEMP; tempfree(t); } else if (t == y && t->csub == CCOPY) { t->csub = CTEMP; tempfree(t); freed = 1; } } tempfree(fcn); if (isexit(y) || isnext(y)) return y; if (freed == 0) { tempfree(y); /* don't free twice! */ } z = frp->retval; /* return value */ DPRINTF("%s returns %g |%s| %o\n", s, getfval(z), getsval(z), z->tval); frp--; return(z); } Cell *copycell(Cell *x) /* make a copy of a cell in a temp */ { Cell *y; /* copy is not constant or field */ y = gettemp(); y->tval = x->tval & ~(CON|FLD|REC); y->csub = CCOPY; /* prevents freeing until call is over */ y->nval = x->nval; /* BUG? */ if (isstr(x) /* || x->ctype == OCELL */) { y->sval = tostring(x->sval); y->tval &= ~DONTFREE; } else y->tval |= DONTFREE; y->fval = x->fval; return y; } Cell *arg(Node **a, int n) /* nth argument of a function */ { n = ptoi(a[0]); /* argument number, counting from 0 */ DPRINTF("arg(%d), frp->nargs=%d\n", n, frp->nargs); if (n+1 > frp->nargs) FATAL("argument #%d of function %s was not supplied", n+1, frp->fcncell->nval); return frp->args[n]; } Cell *jump(Node **a, int n) /* break, continue, next, nextfile, return */ { Cell *y; switch (n) { case EXIT: if (a[0] != NULL) { y = execute(a[0]); errorflag = (int) getfval(y); tempfree(y); } longjmp(env, 1); case RETURN: if (a[0] != NULL) { y = execute(a[0]); if ((y->tval & (STR|NUM)) == (STR|NUM)) { setsval(frp->retval, getsval(y)); frp->retval->fval = getfval(y); frp->retval->tval |= NUM; } else if (y->tval & STR) setsval(frp->retval, getsval(y)); else if (y->tval & NUM) setfval(frp->retval, getfval(y)); else /* can't happen */ FATAL("bad type variable %d", y->tval); tempfree(y); } return(jret); case NEXT: return(jnext); case NEXTFILE: nextfile(); return(jnextfile); case BREAK: return(jbreak); case CONTINUE: return(jcont); default: /* can't happen */ FATAL("illegal jump type %d", n); } return 0; /* not reached */ } Cell *awkgetline(Node **a, int n) /* get next line from specific input */ { /* a[0] is variable, a[1] is operator, a[2] is filename */ Cell *r, *x; extern Cell **fldtab; FILE *fp; char *buf; int bufsize = recsize; int mode; bool newflag; double result; if ((buf = (char *) malloc(bufsize)) == NULL) FATAL("out of memory in getline"); fflush(stdout); /* in case someone is waiting for a prompt */ r = gettemp(); if (a[1] != NULL) { /* getline < file */ x = execute(a[2]); /* filename */ mode = ptoi(a[1]); if (mode == '|') /* input pipe */ mode = LE; /* arbitrary flag */ fp = openfile(mode, getsval(x), &newflag); tempfree(x); if (fp == NULL) n = -1; else n = readrec(&buf, &bufsize, fp, newflag); if (n <= 0) { ; } else if (a[0] != NULL) { /* getline var sval, & result)) { x->fval = result; x->tval |= NUM; } tempfree(x); } else { /* getline sval, & result)) { fldtab[0]->fval = result; fldtab[0]->tval |= NUM; } } } else { /* bare getline; use current input */ if (a[0] == NULL) /* getline */ n = getrec(&record, &recsize, true); else { /* getline var */ n = getrec(&buf, &bufsize, false); if (n > 0) { x = execute(a[0]); setsval(x, buf); if (is_number(x->sval, & result)) { x->fval = result; x->tval |= NUM; } tempfree(x); } } } setfval(r, (Awkfloat) n); free(buf); return r; } Cell *getnf(Node **a, int n) /* get NF */ { if (!donefld) fldbld(); return (Cell *) a[0]; } static char * makearraystring(Node *p, const char *func) { char *buf; int bufsz = recsize; size_t blen; if ((buf = (char *) malloc(bufsz)) == NULL) { FATAL("%s: out of memory", func); } blen = 0; buf[blen] = '\0'; for (; p; p = p->nnext) { Cell *x = execute(p); /* expr */ char *s = getsval(x); size_t seplen = strlen(getsval(subseploc)); size_t nsub = p->nnext ? seplen : 0; size_t slen = strlen(s); size_t tlen = blen + slen + nsub; if (!adjbuf(&buf, &bufsz, tlen + 1, recsize, 0, func)) { FATAL("%s: out of memory %s[%s...]", func, x->nval, buf); } memcpy(buf + blen, s, slen); if (nsub) { memcpy(buf + blen + slen, *SUBSEP, nsub); } buf[tlen] = '\0'; blen = tlen; tempfree(x); } return buf; } Cell *array(Node **a, int n) /* a[0] is symtab, a[1] is list of subscripts */ { Cell *x, *z; char *buf; x = execute(a[0]); /* Cell* for symbol table */ buf = makearraystring(a[1], __func__); if (!isarr(x)) { DPRINTF("making %s into an array\n", NN(x->nval)); if (freeable(x)) xfree(x->sval); x->tval &= ~(STR|NUM|DONTFREE); x->tval |= ARR; x->sval = (char *) makesymtab(NSYMTAB); } z = setsymtab(buf, "", 0.0, STR|NUM, (Array *) x->sval); z->ctype = OCELL; z->csub = CVAR; tempfree(x); free(buf); return(z); } Cell *awkdelete(Node **a, int n) /* a[0] is symtab, a[1] is list of subscripts */ { Cell *x; x = execute(a[0]); /* Cell* for symbol table */ if (x == symtabloc) { FATAL("cannot delete SYMTAB or its elements"); } if (!isarr(x)) return True; if (a[1] == NULL) { /* delete the elements, not the table */ freesymtab(x); x->tval &= ~STR; x->tval |= ARR; x->sval = (char *) makesymtab(NSYMTAB); } else { char *buf = makearraystring(a[1], __func__); freeelem(x, buf); free(buf); } tempfree(x); return True; } Cell *intest(Node **a, int n) /* a[0] is index (list), a[1] is symtab */ { Cell *ap, *k; char *buf; ap = execute(a[1]); /* array name */ if (!isarr(ap)) { DPRINTF("making %s into an array\n", ap->nval); if (freeable(ap)) xfree(ap->sval); ap->tval &= ~(STR|NUM|DONTFREE); ap->tval |= ARR; ap->sval = (char *) makesymtab(NSYMTAB); } buf = makearraystring(a[0], __func__); k = lookup(buf, (Array *) ap->sval); tempfree(ap); free(buf); if (k == NULL) return(False); else return(True); } Cell *matchop(Node **a, int n) /* ~ and match() */ { Cell *x, *y; char *s, *t; int i; fa *pfa; int (*mf)(fa *, const char *) = match, mode = 0; if (n == MATCHFCN) { mf = pmatch; mode = 1; } x = execute(a[1]); /* a[1] = target text */ s = getsval(x); if (a[0] == NULL) /* a[1] == 0: already-compiled reg expr */ i = (*mf)((fa *) a[2], s); else { y = execute(a[2]); /* a[2] = regular expr */ t = getsval(y); pfa = makedfa(t, mode); i = (*mf)(pfa, s); tempfree(y); } tempfree(x); if (n == MATCHFCN) { int start = patbeg - s + 1; if (patlen < 0) start = 0; setfval(rstartloc, (Awkfloat) start); setfval(rlengthloc, (Awkfloat) patlen); x = gettemp(); x->tval = NUM; x->fval = start; return x; } else if ((n == MATCH && i == 1) || (n == NOTMATCH && i == 0)) return(True); else return(False); } Cell *boolop(Node **a, int n) /* a[0] || a[1], a[0] && a[1], !a[0] */ { Cell *x, *y; int i; x = execute(a[0]); i = istrue(x); tempfree(x); switch (n) { case BOR: if (i) return(True); y = execute(a[1]); i = istrue(y); tempfree(y); if (i) return(True); else return(False); case AND: if ( !i ) return(False); y = execute(a[1]); i = istrue(y); tempfree(y); if (i) return(True); else return(False); case NOT: if (i) return(False); else return(True); default: /* can't happen */ FATAL("unknown boolean operator %d", n); } return 0; /*NOTREACHED*/ } Cell *relop(Node **a, int n) /* a[0 < a[1], etc. */ { int i; Cell *x, *y; Awkfloat j; x = execute(a[0]); y = execute(a[1]); if (x->tval&NUM && y->tval&NUM) { j = x->fval - y->fval; i = j<0? -1: (j>0? 1: 0); } else { i = strcmp(getsval(x), getsval(y)); } tempfree(x); tempfree(y); switch (n) { case LT: if (i<0) return(True); else return(False); case LE: if (i<=0) return(True); else return(False); case NE: if (i!=0) return(True); else return(False); case EQ: if (i == 0) return(True); else return(False); case GE: if (i>=0) return(True); else return(False); case GT: if (i>0) return(True); else return(False); default: /* can't happen */ FATAL("unknown relational operator %d", n); } return 0; /*NOTREACHED*/ } void tfree(Cell *a) /* free a tempcell */ { if (freeable(a)) { DPRINTF("freeing %s %s %o\n", NN(a->nval), NN(a->sval), a->tval); xfree(a->sval); } if (a == tmps) FATAL("tempcell list is curdled"); a->cnext = tmps; tmps = a; } Cell *gettemp(void) /* get a tempcell */ { int i; Cell *x; if (!tmps) { tmps = (Cell *) calloc(100, sizeof(*tmps)); if (!tmps) FATAL("out of space for temporaries"); for (i = 1; i < 100; i++) tmps[i-1].cnext = &tmps[i]; tmps[i-1].cnext = NULL; } x = tmps; tmps = x->cnext; *x = tempcell; return(x); } Cell *indirect(Node **a, int n) /* $( a[0] ) */ { Awkfloat val; Cell *x; int m; char *s; x = execute(a[0]); val = getfval(x); /* freebsd: defend against super large field numbers */ if ((Awkfloat)INT_MAX < val) FATAL("trying to access out of range field %s", x->nval); m = (int) val; if (m == 0 && !is_number(s = getsval(x), NULL)) /* suspicion! */ FATAL("illegal field $(%s), name \"%s\"", s, x->nval); /* BUG: can x->nval ever be null??? */ tempfree(x); x = fieldadr(m); x->ctype = OCELL; /* BUG? why are these needed? */ x->csub = CFLD; return(x); } Cell *substr(Node **a, int nnn) /* substr(a[0], a[1], a[2]) */ { int k, m, n; char *s; int temp; Cell *x, *y, *z = NULL; x = execute(a[0]); y = execute(a[1]); if (a[2] != NULL) z = execute(a[2]); s = getsval(x); k = strlen(s) + 1; if (k <= 1) { tempfree(x); tempfree(y); if (a[2] != NULL) { tempfree(z); } x = gettemp(); setsval(x, ""); return(x); } m = (int) getfval(y); if (m <= 0) m = 1; else if (m > k) m = k; tempfree(y); if (a[2] != NULL) { n = (int) getfval(z); tempfree(z); } else n = k - 1; if (n < 0) n = 0; else if (n > k - m) n = k - m; DPRINTF("substr: m=%d, n=%d, s=%s\n", m, n, s); y = gettemp(); temp = s[n+m-1]; /* with thanks to John Linderman */ s[n+m-1] = '\0'; setsval(y, s + m - 1); s[n+m-1] = temp; tempfree(x); return(y); } Cell *sindex(Node **a, int nnn) /* index(a[0], a[1]) */ { Cell *x, *y, *z; char *s1, *s2, *p1, *p2, *q; Awkfloat v = 0.0; x = execute(a[0]); s1 = getsval(x); y = execute(a[1]); s2 = getsval(y); z = gettemp(); for (p1 = s1; *p1 != '\0'; p1++) { for (q = p1, p2 = s2; *p2 != '\0' && *q == *p2; q++, p2++) continue; if (*p2 == '\0') { v = (Awkfloat) (p1 - s1 + 1); /* origin 1 */ break; } } tempfree(x); tempfree(y); setfval(z, v); return(z); } #define MAXNUMSIZE 50 int format(char **pbuf, int *pbufsize, const char *s, Node *a) /* printf-like conversions */ { char *fmt; char *p, *t; const char *os; Cell *x; int flag = 0, n; int fmtwd; /* format width */ int fmtsz = recsize; char *buf = *pbuf; int bufsize = *pbufsize; #define FMTSZ(a) (fmtsz - ((a) - fmt)) #define BUFSZ(a) (bufsize - ((a) - buf)) static bool first = true; static bool have_a_format = false; if (first) { char xbuf[100]; snprintf(xbuf, sizeof(xbuf), "%a", 42.0); have_a_format = (strcmp(xbuf, "0x1.5p+5") == 0); first = false; } os = s; p = buf; if ((fmt = (char *) malloc(fmtsz)) == NULL) FATAL("out of memory in format()"); while (*s) { adjbuf(&buf, &bufsize, MAXNUMSIZE+1+p-buf, recsize, &p, "format1"); if (*s != '%') { *p++ = *s++; continue; } if (*(s+1) == '%') { *p++ = '%'; s += 2; continue; } /* have to be real careful in case this is a huge number, eg, %100000d */ fmtwd = atoi(s+1); if (fmtwd < 0) fmtwd = -fmtwd; adjbuf(&buf, &bufsize, fmtwd+1+p-buf, recsize, &p, "format2"); for (t = fmt; (*t++ = *s) != '\0'; s++) { if (!adjbuf(&fmt, &fmtsz, MAXNUMSIZE+1+t-fmt, recsize, &t, "format3")) FATAL("format item %.30s... ran format() out of memory", os); /* Ignore size specifiers */ if (strchr("hjLlqtz", *s) != NULL) { /* the ansi panoply */ t--; continue; } if (isalpha((uschar)*s)) break; if (*s == '$') { FATAL("'$' not permitted in awk formats"); } if (*s == '*') { if (a == NULL) { FATAL("not enough args in printf(%s)", os); } x = execute(a); a = a->nnext; snprintf(t - 1, FMTSZ(t - 1), "%d", fmtwd=(int) getfval(x)); if (fmtwd < 0) fmtwd = -fmtwd; adjbuf(&buf, &bufsize, fmtwd+1+p-buf, recsize, &p, "format"); t = fmt + strlen(fmt); tempfree(x); } } *t = '\0'; if (fmtwd < 0) fmtwd = -fmtwd; adjbuf(&buf, &bufsize, fmtwd+1+p-buf, recsize, &p, "format4"); switch (*s) { case 'a': case 'A': if (have_a_format) flag = *s; else flag = 'f'; break; case 'f': case 'e': case 'g': case 'E': case 'G': flag = 'f'; break; case 'd': case 'i': case 'o': case 'x': case 'X': case 'u': flag = (*s == 'd' || *s == 'i') ? 'd' : 'u'; *(t-1) = 'j'; *t = *s; *++t = '\0'; break; case 's': flag = 's'; break; case 'c': flag = 'c'; break; default: WARNING("weird printf conversion %s", fmt); flag = '?'; break; } if (a == NULL) FATAL("not enough args in printf(%s)", os); x = execute(a); a = a->nnext; n = MAXNUMSIZE; if (fmtwd > n) n = fmtwd; adjbuf(&buf, &bufsize, 1+n+p-buf, recsize, &p, "format5"); switch (flag) { case '?': snprintf(p, BUFSZ(p), "%s", fmt); /* unknown, so dump it too */ t = getsval(x); n = strlen(t); if (fmtwd > n) n = fmtwd; adjbuf(&buf, &bufsize, 1+strlen(p)+n+p-buf, recsize, &p, "format6"); p += strlen(p); snprintf(p, BUFSZ(p), "%s", t); break; case 'a': case 'A': case 'f': snprintf(p, BUFSZ(p), fmt, getfval(x)); break; case 'd': snprintf(p, BUFSZ(p), fmt, (intmax_t) getfval(x)); break; case 'u': snprintf(p, BUFSZ(p), fmt, (uintmax_t) getfval(x)); break; case 's': t = getsval(x); n = strlen(t); if (fmtwd > n) n = fmtwd; if (!adjbuf(&buf, &bufsize, 1+n+p-buf, recsize, &p, "format7")) FATAL("huge string/format (%d chars) in printf %.30s... ran format() out of memory", n, t); snprintf(p, BUFSZ(p), fmt, t); break; case 'c': if (isnum(x)) { if ((int)getfval(x)) snprintf(p, BUFSZ(p), fmt, (int) getfval(x)); else { *p++ = '\0'; /* explicit null byte */ *p = '\0'; /* next output will start here */ } } else snprintf(p, BUFSZ(p), fmt, getsval(x)[0]); break; default: FATAL("can't happen: bad conversion %c in format()", flag); } tempfree(x); p += strlen(p); s++; } *p = '\0'; free(fmt); for ( ; a; a = a->nnext) /* evaluate any remaining args */ execute(a); *pbuf = buf; *pbufsize = bufsize; return p - buf; } Cell *awksprintf(Node **a, int n) /* sprintf(a[0]) */ { Cell *x; Node *y; char *buf; int bufsz=3*recsize; if ((buf = (char *) malloc(bufsz)) == NULL) FATAL("out of memory in awksprintf"); y = a[0]->nnext; x = execute(a[0]); if (format(&buf, &bufsz, getsval(x), y) == -1) FATAL("sprintf string %.30s... too long. can't happen.", buf); tempfree(x); x = gettemp(); x->sval = buf; x->tval = STR; return(x); } Cell *awkprintf(Node **a, int n) /* printf */ { /* a[0] is list of args, starting with format string */ /* a[1] is redirection operator, a[2] is redirection file */ FILE *fp; Cell *x; Node *y; char *buf; int len; int bufsz=3*recsize; if ((buf = (char *) malloc(bufsz)) == NULL) FATAL("out of memory in awkprintf"); y = a[0]->nnext; x = execute(a[0]); if ((len = format(&buf, &bufsz, getsval(x), y)) == -1) FATAL("printf string %.30s... too long. can't happen.", buf); tempfree(x); if (a[1] == NULL) { /* fputs(buf, stdout); */ fwrite(buf, len, 1, stdout); if (ferror(stdout)) FATAL("write error on stdout"); } else { fp = redirect(ptoi(a[1]), a[2]); /* fputs(buf, fp); */ fwrite(buf, len, 1, fp); fflush(fp); if (ferror(fp)) FATAL("write error on %s", filename(fp)); } free(buf); return(True); } Cell *arith(Node **a, int n) /* a[0] + a[1], etc. also -a[0] */ { Awkfloat i, j = 0; double v; Cell *x, *y, *z; x = execute(a[0]); i = getfval(x); tempfree(x); if (n != UMINUS && n != UPLUS) { y = execute(a[1]); j = getfval(y); tempfree(y); } z = gettemp(); switch (n) { case ADD: i += j; break; case MINUS: i -= j; break; case MULT: i *= j; break; case DIVIDE: if (j == 0) FATAL("division by zero"); i /= j; break; case MOD: if (j == 0) FATAL("division by zero in mod"); modf(i/j, &v); i = i - j * v; break; case UMINUS: i = -i; break; case UPLUS: /* handled by getfval(), above */ break; case POWER: if (j >= 0 && modf(j, &v) == 0.0) /* pos integer exponent */ i = ipow(i, (int) j); else { errno = 0; i = errcheck(pow(i, j), "pow"); } break; default: /* can't happen */ FATAL("illegal arithmetic operator %d", n); } setfval(z, i); return(z); } double ipow(double x, int n) /* x**n. ought to be done by pow, but isn't always */ { double v; if (n <= 0) return 1; v = ipow(x, n/2); if (n % 2 == 0) return v * v; else return x * v * v; } Cell *incrdecr(Node **a, int n) /* a[0]++, etc. */ { Cell *x, *z; int k; Awkfloat xf; x = execute(a[0]); xf = getfval(x); k = (n == PREINCR || n == POSTINCR) ? 1 : -1; if (n == PREINCR || n == PREDECR) { setfval(x, xf + k); return(x); } z = gettemp(); setfval(z, xf); setfval(x, xf + k); tempfree(x); return(z); } Cell *assign(Node **a, int n) /* a[0] = a[1], a[0] += a[1], etc. */ { /* this is subtle; don't muck with it. */ Cell *x, *y; Awkfloat xf, yf; double v; y = execute(a[1]); x = execute(a[0]); if (n == ASSIGN) { /* ordinary assignment */ if (x == y && !(x->tval & (FLD|REC)) && x != nfloc) ; /* self-assignment: leave alone unless it's a field or NF */ else if ((y->tval & (STR|NUM)) == (STR|NUM)) { setsval(x, getsval(y)); x->fval = getfval(y); x->tval |= NUM; } else if (isstr(y)) setsval(x, getsval(y)); else if (isnum(y)) setfval(x, getfval(y)); else funnyvar(y, "read value of"); tempfree(y); return(x); } xf = getfval(x); yf = getfval(y); switch (n) { case ADDEQ: xf += yf; break; case SUBEQ: xf -= yf; break; case MULTEQ: xf *= yf; break; case DIVEQ: if (yf == 0) FATAL("division by zero in /="); xf /= yf; break; case MODEQ: if (yf == 0) FATAL("division by zero in %%="); modf(xf/yf, &v); xf = xf - yf * v; break; case POWEQ: if (yf >= 0 && modf(yf, &v) == 0.0) /* pos integer exponent */ xf = ipow(xf, (int) yf); else { errno = 0; xf = errcheck(pow(xf, yf), "pow"); } break; default: FATAL("illegal assignment operator %d", n); break; } tempfree(y); setfval(x, xf); return(x); } Cell *cat(Node **a, int q) /* a[0] cat a[1] */ { Cell *x, *y, *z; int n1, n2; char *s = NULL; int ssz = 0; x = execute(a[0]); n1 = strlen(getsval(x)); adjbuf(&s, &ssz, n1, recsize, 0, "cat1"); memcpy(s, x->sval, n1); y = execute(a[1]); n2 = strlen(getsval(y)); adjbuf(&s, &ssz, n1 + n2 + 1, recsize, 0, "cat2"); memcpy(s + n1, y->sval, n2); s[n1 + n2] = '\0'; tempfree(x); tempfree(y); z = gettemp(); z->sval = s; z->tval = STR; return(z); } Cell *pastat(Node **a, int n) /* a[0] { a[1] } */ { Cell *x; if (a[0] == NULL) x = execute(a[1]); else { x = execute(a[0]); if (istrue(x)) { tempfree(x); x = execute(a[1]); } } return x; } Cell *dopa2(Node **a, int n) /* a[0], a[1] { a[2] } */ { Cell *x; int pair; pair = ptoi(a[3]); if (pairstack[pair] == 0) { x = execute(a[0]); if (istrue(x)) pairstack[pair] = 1; tempfree(x); } if (pairstack[pair] == 1) { x = execute(a[1]); if (istrue(x)) pairstack[pair] = 0; tempfree(x); x = execute(a[2]); return(x); } return(False); } Cell *split(Node **a, int nnn) /* split(a[0], a[1], a[2]); a[3] is type */ { Cell *x = NULL, *y, *ap; const char *s, *origs, *t; const char *fs = NULL; char *origfs = NULL; int sep; char temp, num[50]; int n, tempstat, arg3type; double result; y = execute(a[0]); /* source string */ origs = s = strdup(getsval(y)); arg3type = ptoi(a[3]); if (a[2] == NULL) /* fs string */ fs = getsval(fsloc); else if (arg3type == STRING) { /* split(str,arr,"string") */ x = execute(a[2]); fs = origfs = strdup(getsval(x)); tempfree(x); } else if (arg3type == REGEXPR) fs = "(regexpr)"; /* split(str,arr,/regexpr/) */ else FATAL("illegal type of split"); sep = *fs; ap = execute(a[1]); /* array name */ freesymtab(ap); DPRINTF("split: s=|%s|, a=%s, sep=|%s|\n", s, NN(ap->nval), fs); ap->tval &= ~STR; ap->tval |= ARR; ap->sval = (char *) makesymtab(NSYMTAB); n = 0; if (arg3type == REGEXPR && strlen((char*)((fa*)a[2])->restr) == 0) { /* split(s, a, //); have to arrange that it looks like empty sep */ arg3type = 0; fs = ""; sep = 0; } if (*s != '\0' && (strlen(fs) > 1 || arg3type == REGEXPR)) { /* reg expr */ fa *pfa; if (arg3type == REGEXPR) { /* it's ready already */ pfa = (fa *) a[2]; } else { pfa = makedfa(fs, 1); } if (nematch(pfa,s)) { tempstat = pfa->initstat; pfa->initstat = 2; do { n++; snprintf(num, sizeof(num), "%d", n); temp = *patbeg; setptr(patbeg, '\0'); if (is_number(s, & result)) setsymtab(num, s, result, STR|NUM, (Array *) ap->sval); else setsymtab(num, s, 0.0, STR, (Array *) ap->sval); setptr(patbeg, temp); s = patbeg + patlen; if (*(patbeg+patlen-1) == '\0' || *s == '\0') { n++; snprintf(num, sizeof(num), "%d", n); setsymtab(num, "", 0.0, STR, (Array *) ap->sval); pfa->initstat = tempstat; goto spdone; } } while (nematch(pfa,s)); pfa->initstat = tempstat; /* bwk: has to be here to reset */ /* cf gsub and refldbld */ } n++; snprintf(num, sizeof(num), "%d", n); if (is_number(s, & result)) setsymtab(num, s, result, STR|NUM, (Array *) ap->sval); else setsymtab(num, s, 0.0, STR, (Array *) ap->sval); spdone: pfa = NULL; } else if (sep == ' ') { for (n = 0; ; ) { #define ISWS(c) ((c) == ' ' || (c) == '\t' || (c) == '\n') while (ISWS(*s)) s++; if (*s == '\0') break; n++; t = s; do s++; while (*s != '\0' && !ISWS(*s)); temp = *s; setptr(s, '\0'); snprintf(num, sizeof(num), "%d", n); if (is_number(t, & result)) setsymtab(num, t, result, STR|NUM, (Array *) ap->sval); else setsymtab(num, t, 0.0, STR, (Array *) ap->sval); setptr(s, temp); if (*s != '\0') s++; } } else if (sep == 0) { /* new: split(s, a, "") => 1 char/elem */ for (n = 0; *s != '\0'; s++) { char buf[2]; n++; snprintf(num, sizeof(num), "%d", n); buf[0] = *s; buf[1] = '\0'; if (isdigit((uschar)buf[0])) setsymtab(num, buf, atof(buf), STR|NUM, (Array *) ap->sval); else setsymtab(num, buf, 0.0, STR, (Array *) ap->sval); } } else if (*s != '\0') { for (;;) { n++; t = s; while (*s != sep && *s != '\n' && *s != '\0') s++; temp = *s; setptr(s, '\0'); snprintf(num, sizeof(num), "%d", n); if (is_number(t, & result)) setsymtab(num, t, result, STR|NUM, (Array *) ap->sval); else setsymtab(num, t, 0.0, STR, (Array *) ap->sval); setptr(s, temp); if (*s++ == '\0') break; } } tempfree(ap); tempfree(y); xfree(origs); xfree(origfs); x = gettemp(); x->tval = NUM; x->fval = n; return(x); } Cell *condexpr(Node **a, int n) /* a[0] ? a[1] : a[2] */ { Cell *x; x = execute(a[0]); if (istrue(x)) { tempfree(x); x = execute(a[1]); } else { tempfree(x); x = execute(a[2]); } return(x); } Cell *ifstat(Node **a, int n) /* if (a[0]) a[1]; else a[2] */ { Cell *x; x = execute(a[0]); if (istrue(x)) { tempfree(x); x = execute(a[1]); } else if (a[2] != NULL) { tempfree(x); x = execute(a[2]); } return(x); } Cell *whilestat(Node **a, int n) /* while (a[0]) a[1] */ { Cell *x; for (;;) { x = execute(a[0]); if (!istrue(x)) return(x); tempfree(x); x = execute(a[1]); if (isbreak(x)) { x = True; return(x); } if (isnext(x) || isexit(x) || isret(x)) return(x); tempfree(x); } } Cell *dostat(Node **a, int n) /* do a[0]; while(a[1]) */ { Cell *x; for (;;) { x = execute(a[0]); if (isbreak(x)) return True; if (isnext(x) || isexit(x) || isret(x)) return(x); tempfree(x); x = execute(a[1]); if (!istrue(x)) return(x); tempfree(x); } } Cell *forstat(Node **a, int n) /* for (a[0]; a[1]; a[2]) a[3] */ { Cell *x; x = execute(a[0]); tempfree(x); for (;;) { if (a[1]!=NULL) { x = execute(a[1]); if (!istrue(x)) return(x); else tempfree(x); } x = execute(a[3]); if (isbreak(x)) /* turn off break */ return True; if (isnext(x) || isexit(x) || isret(x)) return(x); tempfree(x); x = execute(a[2]); tempfree(x); } } Cell *instat(Node **a, int n) /* for (a[0] in a[1]) a[2] */ { Cell *x, *vp, *arrayp, *cp, *ncp; Array *tp; int i; vp = execute(a[0]); arrayp = execute(a[1]); if (!isarr(arrayp)) { return True; } tp = (Array *) arrayp->sval; tempfree(arrayp); for (i = 0; i < tp->size; i++) { /* this routine knows too much */ for (cp = tp->tab[i]; cp != NULL; cp = ncp) { setsval(vp, cp->nval); ncp = cp->cnext; x = execute(a[2]); if (isbreak(x)) { tempfree(vp); return True; } if (isnext(x) || isexit(x) || isret(x)) { tempfree(vp); return(x); } tempfree(x); } } return True; } static char *nawk_convert(const char *s, int (*fun_c)(int), wint_t (*fun_wc)(wint_t)) { char *buf = NULL; char *pbuf = NULL; const char *ps = NULL; size_t n = 0; wchar_t wc; size_t sz = MB_CUR_MAX; if (sz == 1) { buf = tostring(s); for (pbuf = buf; *pbuf; pbuf++) *pbuf = fun_c((uschar)*pbuf); return buf; } else { /* upper/lower character may be shorter/longer */ buf = tostringN(s, strlen(s) * sz + 1); (void) mbtowc(NULL, NULL, 0); /* reset internal state */ /* * Reset internal state here too. * Assign result to avoid a compiler warning. (Casting to void * doesn't work.) * Increment said variable to avoid a different warning. */ int unused = wctomb(NULL, L'\0'); unused++; ps = s; pbuf = buf; while (n = mbtowc(&wc, ps, sz), n > 0 && n != (size_t)-1 && n != (size_t)-2) { ps += n; n = wctomb(pbuf, fun_wc(wc)); if (n == (size_t)-1) FATAL("illegal wide character %s", s); pbuf += n; } *pbuf = '\0'; if (n) FATAL("illegal byte sequence %s", s); return buf; } } #ifdef __DJGPP__ static wint_t towupper(wint_t wc) { if (wc >= 0 && wc < 256) return toupper(wc & 0xFF); return wc; } static wint_t towlower(wint_t wc) { if (wc >= 0 && wc < 256) return tolower(wc & 0xFF); return wc; } #endif static char *nawk_toupper(const char *s) { return nawk_convert(s, toupper, towupper); } static char *nawk_tolower(const char *s) { return nawk_convert(s, tolower, towlower); } Cell *bltin(Node **a, int n) /* builtin functions. a[0] is type, a[1] is arg list */ { Cell *x, *y; Awkfloat u; int t; Awkfloat tmp; char *buf; Node *nextarg; FILE *fp; int status = 0; t = ptoi(a[0]); x = execute(a[1]); nextarg = a[1]->nnext; switch (t) { case FLENGTH: if (isarr(x)) u = ((Array *) x->sval)->nelem; /* GROT. should be function*/ else u = strlen(getsval(x)); break; case FLOG: errno = 0; u = errcheck(log(getfval(x)), "log"); break; case FINT: modf(getfval(x), &u); break; case FEXP: errno = 0; u = errcheck(exp(getfval(x)), "exp"); break; case FSQRT: errno = 0; u = errcheck(sqrt(getfval(x)), "sqrt"); break; case FSIN: u = sin(getfval(x)); break; case FCOS: u = cos(getfval(x)); break; case FATAN: if (nextarg == NULL) { WARNING("atan2 requires two arguments; returning 1.0"); u = 1.0; } else { y = execute(a[1]->nnext); u = atan2(getfval(x), getfval(y)); tempfree(y); nextarg = nextarg->nnext; } break; case FSYSTEM: fflush(stdout); /* in case something is buffered already */ status = system(getsval(x)); u = status; if (status != -1) { if (WIFEXITED(status)) { u = WEXITSTATUS(status); } else if (WIFSIGNALED(status)) { u = WTERMSIG(status) + 256; if (WCOREDUMP(status)) u += 256; } else /* something else?!? */ u = 0; } break; case FRAND: /* random() returns numbers in [0..2^31-1] * in order to get a number in [0, 1), divide it by 2^31 */ u = (Awkfloat) random() / (0x7fffffffL + 0x1UL); break; case FSRAND: if (isrec(x)) /* no argument provided */ u = time((time_t *)0); else u = getfval(x); tmp = u; srandom((unsigned long) u); u = srand_seed; srand_seed = tmp; break; case FTOUPPER: case FTOLOWER: if (t == FTOUPPER) buf = nawk_toupper(getsval(x)); else buf = nawk_tolower(getsval(x)); tempfree(x); x = gettemp(); setsval(x, buf); free(buf); return x; case FFLUSH: if (isrec(x) || strlen(getsval(x)) == 0) { flush_all(); /* fflush() or fflush("") -> all */ u = 0; } else if ((fp = openfile(FFLUSH, getsval(x), NULL)) == NULL) u = EOF; else u = fflush(fp); break; default: /* can't happen */ FATAL("illegal function type %d", t); break; } tempfree(x); x = gettemp(); setfval(x, u); if (nextarg != NULL) { WARNING("warning: function has too many arguments"); for ( ; nextarg; nextarg = nextarg->nnext) execute(nextarg); } return(x); } Cell *printstat(Node **a, int n) /* print a[0] */ { Node *x; Cell *y; FILE *fp; if (a[1] == NULL) /* a[1] is redirection operator, a[2] is file */ fp = stdout; else fp = redirect(ptoi(a[1]), a[2]); for (x = a[0]; x != NULL; x = x->nnext) { y = execute(x); fputs(getpssval(y), fp); tempfree(y); if (x->nnext == NULL) fputs(getsval(orsloc), fp); else fputs(getsval(ofsloc), fp); } if (a[1] != NULL) fflush(fp); if (ferror(fp)) FATAL("write error on %s", filename(fp)); return(True); } Cell *nullproc(Node **a, int n) { return 0; } FILE *redirect(int a, Node *b) /* set up all i/o redirections */ { FILE *fp; Cell *x; char *fname; x = execute(b); fname = getsval(x); fp = openfile(a, fname, NULL); if (fp == NULL) FATAL("can't open file %s", fname); tempfree(x); return fp; } struct files { FILE *fp; const char *fname; int mode; /* '|', 'a', 'w' => LE/LT, GT */ } *files; size_t nfiles; static void stdinit(void) /* in case stdin, etc., are not constants */ { nfiles = FOPEN_MAX; files = (struct files *) calloc(nfiles, sizeof(*files)); if (files == NULL) FATAL("can't allocate file memory for %zu files", nfiles); files[0].fp = stdin; files[0].fname = tostring("/dev/stdin"); files[0].mode = LT; files[1].fp = stdout; files[1].fname = tostring("/dev/stdout"); files[1].mode = GT; files[2].fp = stderr; files[2].fname = tostring("/dev/stderr"); files[2].mode = GT; } FILE *openfile(int a, const char *us, bool *pnewflag) { const char *s = us; size_t i; int m; FILE *fp = NULL; if (*s == '\0') FATAL("null file name in print or getline"); for (i = 0; i < nfiles; i++) if (files[i].fname && strcmp(s, files[i].fname) == 0 && (a == files[i].mode || (a==APPEND && files[i].mode==GT) || a == FFLUSH)) { if (pnewflag) *pnewflag = false; return files[i].fp; } if (a == FFLUSH) /* didn't find it, so don't create it! */ return NULL; for (i = 0; i < nfiles; i++) if (files[i].fp == NULL) break; if (i >= nfiles) { struct files *nf; size_t nnf = nfiles + FOPEN_MAX; nf = (struct files *) realloc(files, nnf * sizeof(*nf)); if (nf == NULL) FATAL("cannot grow files for %s and %zu files", s, nnf); memset(&nf[nfiles], 0, FOPEN_MAX * sizeof(*nf)); nfiles = nnf; files = nf; } fflush(stdout); /* force a semblance of order */ m = a; if (a == GT) { fp = fopen(s, "w"); } else if (a == APPEND) { fp = fopen(s, "a"); m = GT; /* so can mix > and >> */ } else if (a == '|') { /* output pipe */ fp = popen(s, "w"); } else if (a == LE) { /* input pipe */ fp = popen(s, "r"); } else if (a == LT) { /* getline sval, files[i].fname) != 0) continue; if (files[i].mode == GT || files[i].mode == '|') fflush(files[i].fp); if (ferror(files[i].fp)) { if ((files[i].mode == GT && files[i].fp != stderr) || files[i].mode == '|') FATAL("write error on %s", files[i].fname); else WARNING("i/o error occurred on %s", files[i].fname); } if (files[i].fp == stdin || files[i].fp == stdout || files[i].fp == stderr) stat = freopen("/dev/null", "r+", files[i].fp) == NULL; else if (files[i].mode == '|' || files[i].mode == LE) stat = pclose(files[i].fp) == -1; else stat = fclose(files[i].fp) == EOF; if (stat) WARNING("i/o error occurred closing %s", files[i].fname); xfree(files[i].fname); files[i].fname = NULL; /* watch out for ref thru this */ files[i].fp = NULL; break; } tempfree(x); x = gettemp(); setfval(x, (Awkfloat) (stat ? -1 : 0)); return(x); } void closeall(void) { size_t i; bool stat = false; for (i = 0; i < nfiles; i++) { if (! files[i].fp) continue; if (files[i].mode == GT || files[i].mode == '|') fflush(files[i].fp); if (ferror(files[i].fp)) { if ((files[i].mode == GT && files[i].fp != stderr) || files[i].mode == '|') FATAL("write error on %s", files[i].fname); else WARNING("i/o error occurred on %s", files[i].fname); } if (files[i].fp == stdin || files[i].fp == stdout || files[i].fp == stderr) continue; if (files[i].mode == '|' || files[i].mode == LE) stat = pclose(files[i].fp) == -1; else stat = fclose(files[i].fp) == EOF; if (stat) WARNING("i/o error occurred while closing %s", files[i].fname); } } static void flush_all(void) { size_t i; for (i = 0; i < nfiles; i++) if (files[i].fp) fflush(files[i].fp); } void backsub(char **pb_ptr, const char **sptr_ptr); Cell *sub(Node **a, int nnn) /* substitute command */ { const char *sptr, *q; Cell *x, *y, *result; char *t, *buf, *pb; fa *pfa; int bufsz = recsize; if ((buf = (char *) malloc(bufsz)) == NULL) FATAL("out of memory in sub"); x = execute(a[3]); /* target string */ t = getsval(x); if (a[0] == NULL) /* 0 => a[1] is already-compiled regexpr */ pfa = (fa *) a[1]; /* regular expression */ else { y = execute(a[1]); pfa = makedfa(getsval(y), 1); tempfree(y); } y = execute(a[2]); /* replacement string */ result = False; if (pmatch(pfa, t)) { sptr = t; adjbuf(&buf, &bufsz, 1+patbeg-sptr, recsize, 0, "sub"); pb = buf; while (sptr < patbeg) *pb++ = *sptr++; sptr = getsval(y); while (*sptr != '\0') { adjbuf(&buf, &bufsz, 5+pb-buf, recsize, &pb, "sub"); if (*sptr == '\\') { backsub(&pb, &sptr); } else if (*sptr == '&') { sptr++; adjbuf(&buf, &bufsz, 1+patlen+pb-buf, recsize, &pb, "sub"); for (q = patbeg; q < patbeg+patlen; ) *pb++ = *q++; } else *pb++ = *sptr++; } *pb = '\0'; if (pb > buf + bufsz) FATAL("sub result1 %.30s too big; can't happen", buf); sptr = patbeg + patlen; if ((patlen == 0 && *patbeg) || (patlen && *(sptr-1))) { adjbuf(&buf, &bufsz, 1+strlen(sptr)+pb-buf, 0, &pb, "sub"); while ((*pb++ = *sptr++) != '\0') continue; } if (pb > buf + bufsz) FATAL("sub result2 %.30s too big; can't happen", buf); setsval(x, buf); /* BUG: should be able to avoid copy */ result = True; } tempfree(x); tempfree(y); free(buf); return result; } Cell *gsub(Node **a, int nnn) /* global substitute */ { Cell *x, *y; char *rptr, *pb; const char *q, *t, *sptr; char *buf; fa *pfa; int mflag, tempstat, num; int bufsz = recsize; if ((buf = (char *) malloc(bufsz)) == NULL) FATAL("out of memory in gsub"); mflag = 0; /* if mflag == 0, can replace empty string */ num = 0; x = execute(a[3]); /* target string */ t = getsval(x); if (a[0] == NULL) /* 0 => a[1] is already-compiled regexpr */ pfa = (fa *) a[1]; /* regular expression */ else { y = execute(a[1]); pfa = makedfa(getsval(y), 1); tempfree(y); } y = execute(a[2]); /* replacement string */ if (pmatch(pfa, t)) { tempstat = pfa->initstat; pfa->initstat = 2; pb = buf; rptr = getsval(y); do { if (patlen == 0 && *patbeg != '\0') { /* matched empty string */ if (mflag == 0) { /* can replace empty */ num++; sptr = rptr; while (*sptr != '\0') { adjbuf(&buf, &bufsz, 5+pb-buf, recsize, &pb, "gsub"); if (*sptr == '\\') { backsub(&pb, &sptr); } else if (*sptr == '&') { sptr++; adjbuf(&buf, &bufsz, 1+patlen+pb-buf, recsize, &pb, "gsub"); for (q = patbeg; q < patbeg+patlen; ) *pb++ = *q++; } else *pb++ = *sptr++; } } if (*t == '\0') /* at end */ goto done; adjbuf(&buf, &bufsz, 2+pb-buf, recsize, &pb, "gsub"); *pb++ = *t++; if (pb > buf + bufsz) /* BUG: not sure of this test */ FATAL("gsub result0 %.30s too big; can't happen", buf); mflag = 0; } else { /* matched nonempty string */ num++; sptr = t; adjbuf(&buf, &bufsz, 1+(patbeg-sptr)+pb-buf, recsize, &pb, "gsub"); while (sptr < patbeg) *pb++ = *sptr++; sptr = rptr; while (*sptr != '\0') { adjbuf(&buf, &bufsz, 5+pb-buf, recsize, &pb, "gsub"); if (*sptr == '\\') { backsub(&pb, &sptr); } else if (*sptr == '&') { sptr++; adjbuf(&buf, &bufsz, 1+patlen+pb-buf, recsize, &pb, "gsub"); for (q = patbeg; q < patbeg+patlen; ) *pb++ = *q++; } else *pb++ = *sptr++; } t = patbeg + patlen; if (patlen == 0 || *t == '\0' || *(t-1) == '\0') goto done; if (pb > buf + bufsz) FATAL("gsub result1 %.30s too big; can't happen", buf); mflag = 1; } } while (pmatch(pfa,t)); sptr = t; adjbuf(&buf, &bufsz, 1+strlen(sptr)+pb-buf, 0, &pb, "gsub"); while ((*pb++ = *sptr++) != '\0') continue; done: if (pb < buf + bufsz) *pb = '\0'; else if (*(pb-1) != '\0') FATAL("gsub result2 %.30s truncated; can't happen", buf); setsval(x, buf); /* BUG: should be able to avoid copy + free */ pfa->initstat = tempstat; } tempfree(x); tempfree(y); x = gettemp(); x->tval = NUM; x->fval = num; free(buf); return(x); } void backsub(char **pb_ptr, const char **sptr_ptr) /* handle \\& variations */ { /* sptr[0] == '\\' */ char *pb = *pb_ptr; const char *sptr = *sptr_ptr; static bool first = true; static bool do_posix = false; if (first) { first = false; do_posix = (getenv("POSIXLY_CORRECT") != NULL); } if (sptr[1] == '\\') { if (sptr[2] == '\\' && sptr[3] == '&') { /* \\\& -> \& */ *pb++ = '\\'; *pb++ = '&'; sptr += 4; } else if (sptr[2] == '&') { /* \\& -> \ + matched */ *pb++ = '\\'; sptr += 2; } else if (do_posix) { /* \\x -> \x */ sptr++; *pb++ = *sptr++; } else { /* \\x -> \\x */ *pb++ = *sptr++; *pb++ = *sptr++; } } else if (sptr[1] == '&') { /* literal & */ sptr++; *pb++ = *sptr++; } else /* literal \ */ *pb++ = *sptr++; *pb_ptr = pb; *sptr_ptr = sptr; }