From f0bf9f3389f6924448cf7b8ba7c428d26e2d1cf4 Mon Sep 17 00:00:00 2001 From: Justine Tunney Date: Sun, 21 Aug 2022 20:35:38 -0700 Subject: [PATCH] Import NetBSD sed --- third_party/sed/README.cosmo | 12 + third_party/sed/compile.c | 1087 ++++++++++++++++++++++++++++++++++ third_party/sed/defs.h | 151 +++++ third_party/sed/extern.h | 61 ++ third_party/sed/main.c | 559 +++++++++++++++++ third_party/sed/misc.c | 119 ++++ third_party/sed/process.c | 804 +++++++++++++++++++++++++ third_party/sed/sed.1 | 640 ++++++++++++++++++++ 8 files changed, 3433 insertions(+) create mode 100644 third_party/sed/README.cosmo create mode 100644 third_party/sed/compile.c create mode 100644 third_party/sed/defs.h create mode 100644 third_party/sed/extern.h create mode 100644 third_party/sed/main.c create mode 100644 third_party/sed/misc.c create mode 100644 third_party/sed/process.c create mode 100644 third_party/sed/sed.1 diff --git a/third_party/sed/README.cosmo b/third_party/sed/README.cosmo new file mode 100644 index 000000000..b990fd640 --- /dev/null +++ b/third_party/sed/README.cosmo @@ -0,0 +1,12 @@ +DESCRIPTION + + sed is a stream editor program + +ORIGIN + + https://github.com/netbsd/src + 6348f192439798ea49b672bb9a00490240376889 + +LOCAL CHANGES + + None. diff --git a/third_party/sed/compile.c b/third_party/sed/compile.c new file mode 100644 index 000000000..cec6684b8 --- /dev/null +++ b/third_party/sed/compile.c @@ -0,0 +1,1087 @@ +/* $NetBSD: compile.c,v 1.48 2019/10/05 20:23:55 christos Exp $ */ + +/*- + * Copyright (c) 1992 Diomidis Spinellis. + * Copyright (c) 1992, 1993 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Diomidis Spinellis of Imperial College, University of London. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#if HAVE_NBTOOL_CONFIG_H +#include "nbtool_config.h" +#endif + +#include +__RCSID("$NetBSD: compile.c,v 1.48 2019/10/05 20:23:55 christos Exp $"); +#ifdef __FBSDID +__FBSDID("$FreeBSD: head/usr.bin/sed/compile.c 259132 2013-12-09 18:57:20Z eadler $"); +#endif + +#if 0 +static const char sccsid[] = "@(#)compile.c 8.1 (Berkeley) 6/6/93"; +#endif + +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "defs.h" +#include "extern.h" + +#define LHSZ 128 +#define LHMASK (LHSZ - 1) +static struct labhash { + struct labhash *lh_next; + u_int lh_hash; + struct s_command *lh_cmd; + int lh_ref; +} *labels[LHSZ]; + +static char *compile_addr(char *, struct s_addr *); +static char *compile_ccl(char **, char *); +static char *compile_delimited(char *, char *, int); +static char *compile_flags(char *, struct s_subst *); +static regex_t *compile_re(char *, int); +static char *compile_subst(char *, struct s_subst *); +static char *compile_text(void); +static char *compile_tr(char *, struct s_tr **); +static struct s_command + **compile_stream(struct s_command **); +static char *duptoeol(char *, const char *); +static void enterlabel(struct s_command *); +static struct s_command + *findlabel(char *); +static void fixuplabel(struct s_command *, struct s_command *); +static void uselabel(void); +static void parse_escapes(char *); + +/* + * Command specification. This is used to drive the command parser. + */ +struct s_format { + char code; /* Command code */ + int naddr; /* Number of address args */ + enum e_args args; /* Argument type */ +}; + +static struct s_format cmd_fmts[] = { + {'{', 2, GROUP}, + {'}', 0, ENDGROUP}, + {'a', 1, TEXT}, + {'b', 2, BRANCH}, + {'c', 2, TEXT}, + {'d', 2, EMPTY}, + {'D', 2, EMPTY}, + {'g', 2, EMPTY}, + {'G', 2, EMPTY}, + {'h', 2, EMPTY}, + {'H', 2, EMPTY}, + {'i', 1, TEXT}, + {'l', 2, EMPTY}, + {'n', 2, EMPTY}, + {'N', 2, EMPTY}, + {'p', 2, EMPTY}, + {'P', 2, EMPTY}, + {'q', 1, EMPTY}, + {'r', 1, RFILE}, + {'s', 2, SUBST}, + {'t', 2, BRANCH}, + {'w', 2, WFILE}, + {'x', 2, EMPTY}, + {'y', 2, TR}, + {'!', 2, NONSEL}, + {':', 0, LABEL}, + {'#', 0, COMMENT}, + {'=', 1, EMPTY}, + {'\0', 0, COMMENT}, +}; + +/* The compiled program. */ +struct s_command *prog; + +/* + * Compile the program into prog. + * Initialise appends. + */ +void +compile(void) +{ + *compile_stream(&prog) = NULL; + fixuplabel(prog, NULL); + uselabel(); + if (appendnum > 0) + appends = xmalloc(sizeof(struct s_appends) * appendnum); + match = xmalloc((maxnsub + 1) * sizeof(regmatch_t)); +} + +#define EATSPACE() do { \ + if (p) \ + while (*p && isspace((unsigned char)*p)) \ + p++; \ + } while (0) + +static struct s_command ** +compile_stream(struct s_command **link) +{ + char *p; + static char lbuf[_POSIX2_LINE_MAX + 1]; /* To save stack */ + struct s_command *cmd, *cmd2, *stack; + struct s_format *fp; + char re[_POSIX2_LINE_MAX + 1]; + int naddr; /* Number of addresses */ + + stack = 0; + for (;;) { + if ((p = cu_fgets(lbuf, sizeof(lbuf), NULL)) == NULL) { + if (stack != 0) + errx(1, "%lu: %s: unexpected EOF (pending }'s)", + linenum, fname); + return (link); + } + +semicolon: EATSPACE(); + if (p) { + if (*p == '#' || *p == '\0') + continue; + else if (*p == ';') { + p++; + goto semicolon; + } + } + *link = cmd = xmalloc(sizeof(struct s_command)); + link = &cmd->next; + cmd->startline = cmd->nonsel = 0; + /* First parse the addresses */ + naddr = 0; + +/* Valid characters to start an address */ +#define addrchar(c) (strchr("0123456789/\\$", (c))) + if (addrchar(*p)) { + naddr++; + cmd->a1 = xmalloc(sizeof(struct s_addr)); + p = compile_addr(p, cmd->a1); + EATSPACE(); /* EXTENSION */ + if (*p == ',') { + p++; + EATSPACE(); /* EXTENSION */ + naddr++; + cmd->a2 = xmalloc(sizeof(struct s_addr)); + p = compile_addr(p, cmd->a2); + EATSPACE(); + } else + cmd->a2 = 0; + } else + cmd->a1 = cmd->a2 = 0; + +nonsel: /* Now parse the command */ + if (!*p) + errx(1, "%lu: %s: command expected", linenum, fname); + cmd->code = *p; + for (fp = cmd_fmts; fp->code; fp++) + if (fp->code == *p) + break; + if (!fp->code) + errx(1, "%lu: %s: invalid command code %c", linenum, fname, *p); + if (naddr > fp->naddr) + errx(1, + "%lu: %s: command %c expects up to %d address(es), found %d", + linenum, fname, *p, fp->naddr, naddr); + switch (fp->args) { + case NONSEL: /* ! */ + p++; + EATSPACE(); + cmd->nonsel = ! cmd->nonsel; + goto nonsel; + case GROUP: /* { */ + p++; + EATSPACE(); + cmd->next = stack; + stack = cmd; + link = &cmd->u.c; + if (*p) + goto semicolon; + break; + case ENDGROUP: + /* + * Short-circuit command processing, since end of + * group is really just a noop. + */ + cmd->nonsel = 1; + if (stack == 0) + errx(1, "%lu: %s: unexpected }", linenum, fname); + cmd2 = stack; + stack = cmd2->next; + cmd2->next = cmd; + /*FALLTHROUGH*/ + case EMPTY: /* d D g G h H l n N p P q x = \0 */ + p++; + EATSPACE(); + switch (*p) { + case ';': + p++; + link = &cmd->next; + goto semicolon; + case '}': + goto semicolon; + case '\0': + break; + default: + errx(1, "%lu: %s: extra characters at the end of %c command", + linenum, fname, cmd->code); + } + break; + case TEXT: /* a c i */ + p++; + EATSPACE(); + if (*p != '\\') + errx(1, +"%lu: %s: command %c expects \\ followed by text", linenum, fname, cmd->code); + p++; + EATSPACE(); + if (*p) + errx(1, + "%lu: %s: extra characters after \\ at the end of %c command", + linenum, fname, cmd->code); + cmd->t = compile_text(); + break; + case COMMENT: /* \0 # */ + break; + case WFILE: /* w */ + p++; + EATSPACE(); + if (*p == '\0') + errx(1, "%lu: %s: filename expected", linenum, fname); + cmd->t = duptoeol(p, "w command"); + if (aflag) + cmd->u.fd = -1; + else if ((cmd->u.fd = open(p, + O_WRONLY|O_APPEND|O_CREAT|O_TRUNC, + DEFFILEMODE)) == -1) + err(1, "%s", p); + break; + case RFILE: /* r */ + p++; + EATSPACE(); + if (*p == '\0') + errx(1, "%lu: %s: filename expected", linenum, fname); + else + cmd->t = duptoeol(p, "read command"); + break; + case BRANCH: /* b t */ + p++; + EATSPACE(); + if (*p == '\0') + cmd->t = NULL; + else + cmd->t = duptoeol(p, "branch"); + break; + case LABEL: /* : */ + p++; + EATSPACE(); + cmd->t = duptoeol(p, "label"); + if (strlen(p) == 0) + errx(1, "%lu: %s: empty label", linenum, fname); + enterlabel(cmd); + break; + case SUBST: /* s */ + p++; + if (*p == '\0' || *p == '\\') + errx(1, +"%lu: %s: substitute pattern can not be delimited by newline or backslash", + linenum, fname); + cmd->u.s = xcalloc(1, sizeof(struct s_subst)); + p = compile_delimited(p, re, 0); + if (p == NULL) + errx(1, + "%lu: %s: unterminated substitute pattern", linenum, fname); + + /* Compile RE with no case sensitivity temporarily */ + if (*re == '\0') + cmd->u.s->re = NULL; + else + cmd->u.s->re = compile_re(re, 0); + --p; + p = compile_subst(p, cmd->u.s); + p = compile_flags(p, cmd->u.s); + + /* Recompile RE with case sensitivity from "I" flag if any */ + if (*re == '\0') + cmd->u.s->re = NULL; + else + cmd->u.s->re = compile_re(re, cmd->u.s->icase); + EATSPACE(); + if (*p == ';') { + p++; + link = &cmd->next; + goto semicolon; + } + break; + case TR: /* y */ + p++; + p = compile_tr(p, &cmd->u.y); + EATSPACE(); + switch (*p) { + case ';': + p++; + link = &cmd->next; + goto semicolon; + case '}': + goto semicolon; + case '\0': + break; + default: + errx(1, +"%lu: %s: extra text at the end of a transform command", linenum, fname); + } + if (*p) + break; + } + } +} + +/* + * Get a delimited string. P points to the delimeter of the string; d points + * to a buffer area. Newline and delimiter escapes are processed; other + * escapes are ignored. + * + * Returns a pointer to the first character after the final delimiter or NULL + * in the case of a non-terminated string. The character array d is filled + * with the processed string. + */ +static char * +compile_delimited(char *p, char *d, int is_tr) +{ + char c; + + c = *p++; + if (c == '\0') + return (NULL); + else if (c == '\\') + errx(1, "%lu: %s: \\ can not be used as a string delimiter", + linenum, fname); + else if (c == '\n') + errx(1, "%lu: %s: newline can not be used as a string delimiter", + linenum, fname); + while (*p) { + if (*p == '[' && *p != c) { + if ((d = compile_ccl(&p, d)) == NULL) + errx(1, "%lu: %s: unbalanced brackets ([])", linenum, fname); + continue; + } else if (*p == '\\' && p[1] == '[') { + *d++ = *p++; + } else if (*p == '\\' && p[1] == c) + p++; + else if (*p == '\\' && p[1] == 'n') { + *d++ = '\n'; + p += 2; + continue; + } else if (*p == '\\' && p[1] == '\\') { + if (is_tr) + p++; + else + *d++ = *p++; + } else if (*p == c) { + *d = '\0'; + return (p + 1); + } + *d++ = *p++; + } + return (NULL); +} + + +/* compile_ccl: expand a POSIX character class */ +static char * +compile_ccl(char **sp, char *t) +{ + int c, d; + char *s = *sp; + + *t++ = *s++; + if (*s == '^') + *t++ = *s++; + if (*s == ']') + *t++ = *s++; + for (; *s && (*t = *s) != ']'; s++, t++) + if (*s == '[' && ((d = *(s+1)) == '.' || d == ':' || d == '=')) { + *++t = *++s, t++, s++; + for (c = *s; (*t = *s) != ']' || c != d; s++, t++) + if ((c = *s) == '\0') + return NULL; + } + return (*s == ']') ? *sp = ++s, ++t : NULL; +} + +/* + * Compiles the regular expression in RE and returns a pointer to the compiled + * regular expression. + * Cflags are passed to regcomp. + */ +static regex_t * +compile_re(char *re, int case_insensitive) +{ + regex_t *rep; + int eval, flags; + + + flags = rflags; + if (case_insensitive) + flags |= REG_ICASE; + rep = xmalloc(sizeof(regex_t)); + parse_escapes(re); + if ((eval = regcomp(rep, re, flags)) != 0) + errx(1, "%lu: %s: RE error: %s", + linenum, fname, strregerror(eval, rep)); + if (maxnsub < rep->re_nsub) + maxnsub = rep->re_nsub; + return (rep); +} + +static char +cton(char c, int base) +{ + switch (c) { + case '0': case '1': case '2': case '3': case '4': + case '5': case '6': case '7': + return (char)(c - '0'); + case '8': case '9': + return base == 8 ? '?' : (char)(c - '0'); + case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': + return base == 16 ? (char)(c - 'a' + 10) : '?'; + case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': + return base == 16 ? (char)(c - 'A' + 10) : '?'; + default: + return '?'; + } +} + +static int +ston(char **pp, char *sp, int base) +{ + char *p = *pp, n; + int r = cton(p[1], base); + + if (r == '?') + return 0; + + p++; + while ((n = cton(p[1], base)) != '?' && r < 255) { + r = r * base + n; + p++; + } + *sp = (char)r; + *pp = p; + return 1; +} + +static int +unescape(char **pp, char **spp) +{ + char *p = *pp; + char *sp = *spp; + + switch (*p) { + case 'o': + if (!ston(&p, sp, 8)) + return 0; + break; + case 'd': + if (!ston(&p, sp, 10)) + return 0; + break; + case 'x': + if (!ston(&p, sp, 16)) + return 0; + break; + case 'a': + *sp = '\a'; + p++; + break; +#if 0 + // No, \b RE + case 'b': + *sp = '\b'; + break; +#endif + case 'f': + *sp = '\f'; + break; + case 'n': + *sp = '\n'; + break; + case 'r': + *sp = '\r'; + break; + case 'v': + *sp = '\v'; + break; + default: + return 0; + } + *spp = sp + 1; + *pp = p; + return 1; +} + +static void +parse_escapes(char *buf) +{ + char bracket = '\0'; + char *p, *q; + + p = q = buf; + + for (p = q = buf; *p; p++) { + if (*p == '\\' && p[1] && !bracket) { + p++; + if (unescape(&p, &q)) + continue; + *q++ = '\\'; + } + switch (*p) { + case '[': + if (!bracket) + bracket = *p; + break; + case '.': + case ':': + case '=': + if (bracket == '[' && p[-1] == '[') + bracket = *p; + break; + case ']': + if (!bracket) + break; + if (bracket == '[') + bracket = '\0'; + else if (p[-2] != bracket && p[-1] == bracket) + bracket = '['; + break; + default: + break; + } + *q++ = *p; + } + *q = '\0'; +} + +/* + * Compile the substitution string of a regular expression and set res to + * point to a saved copy of it. Nsub is the number of parenthesized regular + * expressions. + */ +static char * +compile_subst(char *p, struct s_subst *s) +{ + static char lbuf[_POSIX2_LINE_MAX + 1]; + size_t asize, size; + u_char ref; + char c, *text, *op, *sp; + int more = 1, sawesc = 0; + + c = *p++; /* Terminator character */ + if (c == '\0') + return (NULL); + + s->maxbref = 0; + s->linenum = linenum; + asize = 2 * _POSIX2_LINE_MAX + 1; + text = xmalloc(asize); + size = 0; + do { + op = sp = text + size; + for (; *p; p++) { + if (*p == '\\' || sawesc) { + /* + * If this is a continuation from the last + * buffer, we won't have a character to + * skip over. + */ + if (sawesc) + sawesc = 0; + else + p++; + + switch (*p) { + case '\0': + /* + * This escaped character is continued + * in the next part of the line. Note + * this fact, then cause the loop to + * exit w/ normal EOL case and reenter + * above with the new buffer. + */ + sawesc = 1; + p--; + continue; + case '0': case '1': case '2': case '3': + case '4': case '5': case '6': case '7': + case '8': case '9': + *sp++ = '\\'; + ref = (u_char)(*p - '0'); + if (s->re != NULL && + ref > s->re->re_nsub) + errx(1, "%lu: %s: \\%c not defined in the RE", + linenum, fname, *p); + if (s->maxbref < ref) + s->maxbref = ref; + break; + case '&': + case '\\': + *sp++ = '\\'; + break; + default: + if (unescape(&p, &sp)) + continue; + break; + } + } else if (*p == c) { + if (*++p == '\0' && more) { + if (cu_fgets(lbuf, sizeof(lbuf), &more)) + p = lbuf; + } + *sp++ = '\0'; + size += (size_t)(sp - op); + s->new = xrealloc(text, size); + return (p); + } else if (*p == '\n') { + errx(1, +"%lu: %s: unescaped newline inside substitute pattern", linenum, fname); + /* NOTREACHED */ + } + *sp++ = *p; + } + size += (size_t)(sp - op); + if (asize - size < _POSIX2_LINE_MAX + 1) { + asize *= 2; + text = xrealloc(text, asize); + } + } while (cu_fgets(p = lbuf, sizeof(lbuf), &more)); + errx(1, "%lu: %s: unterminated substitute in regular expression", + linenum, fname); + /* NOTREACHED */ +} + +/* + * Compile the flags of the s command + */ +static char * +compile_flags(char *p, struct s_subst *s) +{ + int gn; /* True if we have seen g or n */ + unsigned long nval; + char wfile[_POSIX2_LINE_MAX + 1], *q; + + s->n = 1; /* Default */ + s->p = 0; + s->wfile = NULL; + s->wfd = -1; + s->icase = 0; + for (gn = 0;;) { + EATSPACE(); /* EXTENSION */ + switch (*p) { + case 'g': + if (gn) + errx(1, +"%lu: %s: more than one number or 'g' in substitute flags", linenum, fname); + gn = 1; + s->n = 0; + break; + case '\0': + case '\n': + case ';': + return (p); + case 'p': + s->p = 1; + break; + case 'i': + case 'I': + s->icase = 1; + break; + case '1': case '2': case '3': + case '4': case '5': case '6': + case '7': case '8': case '9': + if (gn) + errx(1, +"%lu: %s: more than one number or 'g' in substitute flags", linenum, fname); + gn = 1; + errno = 0; + nval = strtoul(p, &p, 10); + if (errno == ERANGE || nval > INT_MAX) + errx(1, +"%lu: %s: overflow in the 'N' substitute flag", linenum, fname); + s->n = (int)nval; + p--; + break; + case 'w': + p++; +#ifdef HISTORIC_PRACTICE + if (*p != ' ') { + warnx("%lu: %s: space missing before w wfile", linenum, fname); + return (p); + } +#endif + EATSPACE(); + q = wfile; + while (*p) { + if (*p == '\n') + break; + *q++ = *p++; + } + *q = '\0'; + if (q == wfile) + errx(1, "%lu: %s: no wfile specified", linenum, fname); + s->wfile = strdup(wfile); + if (!aflag && (s->wfd = open(wfile, + O_WRONLY|O_APPEND|O_CREAT|O_TRUNC, + DEFFILEMODE)) == -1) + err(1, "%s", wfile); + return (p); + default: + errx(1, "%lu: %s: bad flag in substitute command: '%c'", + linenum, fname, *p); + break; + } + p++; + } +} + +/* + * Compile a translation set of strings into a lookup table. + */ +static char * +compile_tr(char *p, struct s_tr **py) +{ + struct s_tr *y; + size_t i; + const char *op, *np; + char old[_POSIX2_LINE_MAX + 1]; + char new[_POSIX2_LINE_MAX + 1]; + size_t oclen, oldlen, nclen, newlen; + mbstate_t mbs1, mbs2; + + *py = y = xmalloc(sizeof(*y)); + y->multis = NULL; + y->nmultis = 0; + + if (*p == '\0' || *p == '\\') + errx(1, + "%lu: %s: transform pattern can not be delimited by newline or backslash", + linenum, fname); + p = compile_delimited(p, old, 1); + if (p == NULL) + errx(1, "%lu: %s: unterminated transform source string", + linenum, fname); + p = compile_delimited(p - 1, new, 1); + if (p == NULL) + errx(1, "%lu: %s: unterminated transform target string", + linenum, fname); + EATSPACE(); + op = old; + oldlen = mbsrtowcs(NULL, &op, 0, NULL); + if (oldlen == (size_t)-1) + err(1, NULL); + np = new; + newlen = mbsrtowcs(NULL, &np, 0, NULL); + if (newlen == (size_t)-1) + err(1, NULL); + if (newlen != oldlen) + errx(1, "%lu: %s: transform strings are not the same length", + linenum, fname); + if (MB_CUR_MAX == 1) { + /* + * The single-byte encoding case is easy: generate a + * lookup table. + */ + for (i = 0; i <= UCHAR_MAX; i++) + y->bytetab[i] = (u_char)i; + for (; *op; op++, np++) + y->bytetab[(u_char)*op] = (u_char)*np; + } else { + /* + * Multi-byte encoding case: generate a lookup table as + * above, but only for single-byte characters. The first + * bytes of multi-byte characters have their lookup table + * entries set to 0, which causes do_tr() to search through + * an auxiliary vector of multi-byte mappings. + */ + memset(&mbs1, 0, sizeof(mbs1)); + memset(&mbs2, 0, sizeof(mbs2)); + for (i = 0; i <= UCHAR_MAX; i++) + y->bytetab[i] = (u_char)((btowc((int)i) != WEOF) ? i : 0); + while (*op != '\0') { + oclen = mbrlen(op, MB_LEN_MAX, &mbs1); + if (oclen == (size_t)-1 || oclen == (size_t)-2) + errc(1, EILSEQ, NULL); + nclen = mbrlen(np, MB_LEN_MAX, &mbs2); + if (nclen == (size_t)-1 || nclen == (size_t)-2) + errc(1, EILSEQ, NULL); + if (oclen == 1 && nclen == 1) + y->bytetab[(u_char)*op] = (u_char)*np; + else { + y->bytetab[(u_char)*op] = 0; + y->multis = xrealloc(y->multis, + (y->nmultis + 1) * sizeof(*y->multis)); + i = y->nmultis++; + y->multis[i].fromlen = oclen; + memcpy(y->multis[i].from, op, oclen); + y->multis[i].tolen = nclen; + memcpy(y->multis[i].to, np, nclen); + } + op += oclen; + np += nclen; + } + } + return (p); +} + +/* + * Compile the text following an a or i command. + */ +static char * +compile_text(void) +{ + size_t asize, size; + int esc_nl; + char *text, *p, *op, *s; + char lbuf[_POSIX2_LINE_MAX + 1]; + + asize = 2 * _POSIX2_LINE_MAX + 1; + text = xmalloc(asize); + size = 0; + while (cu_fgets(lbuf, sizeof(lbuf), NULL)) { + op = s = text + size; + p = lbuf; + for (esc_nl = 0; *p != '\0'; p++) { + if (*p == '\\' && p[1] != '\0' && *++p == '\n') + esc_nl = 1; + *s++ = *p; + } + size += (size_t)(s - op); + if (!esc_nl) { + *s = '\0'; + break; + } + if (asize - size < _POSIX2_LINE_MAX + 1) { + asize *= 2; + text = xrealloc(text, asize); + } + } + text[size] = '\0'; + p = xrealloc(text, size + 1); + return (p); +} + +/* + * Get an address and return a pointer to the first character after + * it. Fill the structure pointed to according to the address. + */ +static char * +compile_addr(char *p, struct s_addr *a) +{ + char *end, re[_POSIX2_LINE_MAX + 1]; + int icase; + + icase = 0; + + a->type = 0; + switch (*p) { + case '\\': /* Context address */ + ++p; + /* FALLTHROUGH */ + case '/': /* Context address */ + p = compile_delimited(p, re, 0); + if (p == NULL) + errx(1, "%lu: %s: unterminated regular expression", linenum, fname); + /* Check for case insensitive regexp flag */ + if (*p == 'I') { + icase = 1; + p++; + } + if (*re == '\0') + a->u.r = NULL; + else + a->u.r = compile_re(re, icase); + a->type = AT_RE; + return (p); + + case '$': /* Last line */ + a->type = AT_LAST; + return (p + 1); + + case '+': /* Relative line number */ + a->type = AT_RELLINE; + p++; + /* FALLTHROUGH */ + /* Line number */ + case '0': case '1': case '2': case '3': case '4': + case '5': case '6': case '7': case '8': case '9': + if (a->type == 0) + a->type = AT_LINE; + a->u.l = strtoul(p, &end, 10); + return (end); + default: + errx(1, "%lu: %s: expected context address", linenum, fname); + return (NULL); + } +} + +/* + * duptoeol -- + * Return a copy of all the characters up to \n or \0. + */ +static char * +duptoeol(char *s, const char *ctype) +{ + size_t len; + int ws; + char *p, *start; + + ws = 0; + for (start = s; *s != '\0' && *s != '\n'; ++s) + ws = isspace((unsigned char)*s); + *s = '\0'; + if (ws) + warnx("%lu: %s: whitespace after %s", linenum, fname, ctype); + len = (size_t)(s - start + 1); + p = xmalloc(len); + return (memmove(p, start, len)); +} + +/* + * Convert goto label names to addresses, and count a and r commands, in + * the given subset of the script. Free the memory used by labels in b + * and t commands (but not by :). + * + * TODO: Remove } nodes + */ +static void +fixuplabel(struct s_command *cp, struct s_command *end) +{ + + for (; cp != end; cp = cp->next) + switch (cp->code) { + case 'a': + case 'r': + appendnum++; + break; + case 'b': + case 't': + /* Resolve branch target. */ + if (cp->t == NULL) { + cp->u.c = NULL; + break; + } + if ((cp->u.c = findlabel(cp->t)) == NULL) + errx(1, "%lu: %s: undefined label '%s'", linenum, fname, cp->t); + free(cp->t); + break; + case '{': + /* Do interior commands. */ + fixuplabel(cp->u.c, cp->next); + break; + } +} + +/* + * Associate the given command label for later lookup. + */ +static void +enterlabel(struct s_command *cp) +{ + struct labhash **lhp, *lh; + u_char *p; + u_int h, c; + + for (h = 0, p = (u_char *)cp->t; (c = *p) != 0; p++) + h = (h << 5) + h + c; + lhp = &labels[h & LHMASK]; + for (lh = *lhp; lh != NULL; lh = lh->lh_next) + if (lh->lh_hash == h && strcmp(cp->t, lh->lh_cmd->t) == 0) + errx(1, "%lu: %s: duplicate label '%s'", linenum, fname, cp->t); + lh = xmalloc(sizeof *lh); + lh->lh_next = *lhp; + lh->lh_hash = h; + lh->lh_cmd = cp; + lh->lh_ref = 0; + *lhp = lh; +} + +/* + * Find the label contained in the command l in the command linked + * list cp. L is excluded from the search. Return NULL if not found. + */ +static struct s_command * +findlabel(char *name) +{ + struct labhash *lh; + u_char *p; + u_int h, c; + + for (h = 0, p = (u_char *)name; (c = *p) != 0; p++) + h = (h << 5) + h + c; + for (lh = labels[h & LHMASK]; lh != NULL; lh = lh->lh_next) { + if (lh->lh_hash == h && strcmp(name, lh->lh_cmd->t) == 0) { + lh->lh_ref = 1; + return (lh->lh_cmd); + } + } + return (NULL); +} + +/* + * Warn about any unused labels. As a side effect, release the label hash + * table space. + */ +static void +uselabel(void) +{ + struct labhash *lh, *next; + int i; + + for (i = 0; i < LHSZ; i++) { + for (lh = labels[i]; lh != NULL; lh = next) { + next = lh->lh_next; + if (!lh->lh_ref) + warnx("%lu: %s: unused label '%s'", + linenum, fname, lh->lh_cmd->t); + free(lh); + } + } +} diff --git a/third_party/sed/defs.h b/third_party/sed/defs.h new file mode 100644 index 000000000..3963a5e3a --- /dev/null +++ b/third_party/sed/defs.h @@ -0,0 +1,151 @@ +/* $NetBSD: defs.h,v 1.13 2020/05/15 22:39:54 christos Exp $ */ + +/*- + * Copyright (c) 1992 Diomidis Spinellis. + * Copyright (c) 1992, 1993 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Diomidis Spinellis of Imperial College, University of London. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)defs.h 8.1 (Berkeley) 6/6/93 + * $FreeBSD: head/usr.bin/sed/defs.h 192732 2009-05-25 06:45:33Z brian $ + */ + +/* + * Types of address specifications + */ +enum e_atype { + AT_RE = 1, /* Line that match RE */ + AT_LINE, /* Specific line */ + AT_RELLINE, /* Relative line */ + AT_LAST /* Last line */ +}; + +/* + * Format of an address + */ +struct s_addr { + enum e_atype type; /* Address type */ + union { + u_long l; /* Line number */ + regex_t *r; /* Regular expression */ + } u; +}; + +/* + * Substitution command + */ +struct s_subst { + int n; /* Occurrence to subst. */ + int p; /* True if p flag */ + int icase; /* True if I flag */ + char *wfile; /* NULL if no wfile */ + int wfd; /* Cached file descriptor */ + regex_t *re; /* Regular expression */ + unsigned int maxbref; /* Largest backreference. */ + u_long linenum; /* Line number. */ + char *new; /* Replacement text */ +}; + +/* + * Translate command. + */ +struct s_tr { + unsigned char bytetab[256]; + struct trmulti { + size_t fromlen; + char from[MB_LEN_MAX]; + size_t tolen; + char to[MB_LEN_MAX]; + } *multis; + size_t nmultis; +}; + +/* + * An internally compiled command. + * Initialy, label references are stored in t, on a second pass they + * are updated to pointers. + */ +struct s_command { + struct s_command *next; /* Pointer to next command */ + struct s_addr *a1, *a2; /* Start and end address */ + u_long startline; /* Start line number or zero */ + char *t; /* Text for : a c i r w */ + union { + struct s_command *c; /* Command(s) for b t { */ + struct s_subst *s; /* Substitute command */ + struct s_tr *y; /* Replace command array */ + int fd; /* File descriptor for w */ + } u; + char code; /* Command code */ + u_int nonsel:1; /* True if ! */ +}; + +/* + * Types of command arguments recognised by the parser + */ +enum e_args { + EMPTY, /* d D g G h H l n N p P q x = \0 */ + TEXT, /* a c i */ + NONSEL, /* ! */ + GROUP, /* { */ + ENDGROUP, /* } */ + COMMENT, /* # */ + BRANCH, /* b t */ + LABEL, /* : */ + RFILE, /* r */ + WFILE, /* w */ + SUBST, /* s */ + TR /* y */ +}; + +/* + * Structure containing things to append before a line is read + */ +struct s_appends { + enum {AP_STRING, AP_FILE} type; + char *s; + size_t len; +}; + +enum e_spflag { + APPEND, /* Append to the contents. */ + REPLACE /* Replace the contents. */ +}; + +/* + * Structure for a space (process, hold, otherwise). + */ +typedef struct { + char *space; /* Current space pointer. */ + size_t len; /* Current length. */ + int deleted; /* If deleted. */ + int append_newline; /* If originally terminated by \n. */ + char *back; /* Backing memory. */ + size_t blen; /* Backing memory length. */ +} SPACE; diff --git a/third_party/sed/extern.h b/third_party/sed/extern.h new file mode 100644 index 000000000..0d28591f3 --- /dev/null +++ b/third_party/sed/extern.h @@ -0,0 +1,61 @@ +/* $NetBSD: extern.h,v 1.20 2015/03/12 12:40:41 christos Exp $ */ + +/*- + * Copyright (c) 1992 Diomidis Spinellis. + * Copyright (c) 1992, 1993 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Diomidis Spinellis of Imperial College, University of London. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)extern.h 8.1 (Berkeley) 6/6/93 + * $FreeBSD: head/usr.bin/sed/extern.h 170608 2007-06-12 12:05:24Z yar $ + */ + +extern struct s_command *prog; +extern struct s_appends *appends; +extern regmatch_t *match; +extern size_t maxnsub; +extern u_long linenum; +extern size_t appendnum; +extern int aflag, eflag, nflag; +extern const char *fname, *outfname; +extern FILE *infile, *outfile; +extern int rflags; /* regex flags to use */ + +void cfclose(struct s_command *, struct s_command *); +void compile(void); +void cspace(SPACE *, const char *, size_t, enum e_spflag); +char *cu_fgets(char *, int, int *); +int mf_fgets(SPACE *, enum e_spflag); +int lastline(void); +void process(void); +void resetstate(void); +char *strregerror(int, regex_t *); +void *xmalloc(size_t); +void *xrealloc(void *, size_t); +void *xcalloc(size_t, size_t); diff --git a/third_party/sed/main.c b/third_party/sed/main.c new file mode 100644 index 000000000..8221a1140 --- /dev/null +++ b/third_party/sed/main.c @@ -0,0 +1,559 @@ +/* $NetBSD: main.c,v 1.36 2020/05/15 22:39:54 christos Exp $ */ + +/*- + * Copyright (c) 2013 Johann 'Myrkraverk' Oskarsson. + * Copyright (c) 1992 Diomidis Spinellis. + * Copyright (c) 1992, 1993 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Diomidis Spinellis of Imperial College, University of London. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#if HAVE_NBTOOL_CONFIG_H +#include "nbtool_config.h" +#endif + +#include +__RCSID("$NetBSD: main.c,v 1.36 2020/05/15 22:39:54 christos Exp $"); +#ifdef __FBSDID +__FBSDID("$FreeBSD: head/usr.bin/sed/main.c 252231 2013-06-26 04:14:19Z pfg $"); +#endif + +#ifndef lint +__COPYRIGHT("@(#) Copyright (c) 1992, 1993\ + The Regents of the University of California. All rights reserved."); +#endif + +#if 0 +static const char sccsid[] = "@(#)main.c 8.2 (Berkeley) 1/3/94"; +#endif + +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#define _WITH_GETLINE +#include +#include +#include +#include + +#include "defs.h" +#include "extern.h" + +/* + * Linked list of units (strings and files) to be compiled + */ +struct s_compunit { + struct s_compunit *next; + enum e_cut {CU_FILE, CU_STRING} type; + char *s; /* Pointer to string or fname */ +}; + +/* + * Linked list pointer to compilation units and pointer to current + * next pointer. + */ +static struct s_compunit *script, **cu_nextp = &script; + +/* + * Linked list of files to be processed + */ +struct s_flist { + char *fname; + struct s_flist *next; +}; + +/* + * Linked list pointer to files and pointer to current + * next pointer. + */ +static struct s_flist *files, **fl_nextp = &files; + +FILE *infile; /* Current input file */ +FILE *outfile; /* Current output file */ + +int aflag, eflag, nflag; +int rflags = 0; +static int rval; /* Exit status */ + +static int ispan; /* Whether inplace editing spans across files */ + +/* + * Current file and line number; line numbers restart across compilation + * units, but span across input files. The latter is optional if editing + * in place. + */ +const char *fname; /* File name. */ +const char *outfname; /* Output file name */ +static char oldfname[PATH_MAX]; /* Old file name (for in-place editing) */ +static char tmpfname[PATH_MAX]; /* Temporary file name (for in-place editing) */ +static const char *inplace; /* Inplace edit file extension. */ +u_long linenum; + +static void add_compunit(enum e_cut, char *); +static void add_file(char *); +static void usage(void) __dead; + +int +main(int argc, char *argv[]) +{ + int c, fflag; + char *temp_arg; + + setprogname(argv[0]); + (void) setlocale(LC_ALL, ""); + + fflag = 0; + inplace = NULL; + + while ((c = getopt(argc, argv, "EI::ae:f:i::lnru")) != -1) + switch (c) { + case 'r': /* Gnu sed compat */ + case 'E': + rflags = REG_EXTENDED; + break; + case 'I': + inplace = optarg ? optarg : __UNCONST(""); + ispan = 1; /* span across input files */ + break; + case 'a': + aflag = 1; + break; + case 'e': + eflag = 1; + temp_arg = xmalloc(strlen(optarg) + 2); + strcpy(temp_arg, optarg); + strcat(temp_arg, "\n"); + add_compunit(CU_STRING, temp_arg); + break; + case 'f': + fflag = 1; + add_compunit(CU_FILE, optarg); + break; + case 'i': + inplace = optarg ? optarg : __UNCONST(""); + ispan = 0; /* don't span across input files */ + break; + case 'l': +#ifdef _IOLBF + c = setvbuf(stdout, NULL, _IOLBF, 0); +#else + c = setlinebuf(stdout); +#endif + if (c) + warn("setting line buffered output failed"); + break; + case 'n': + nflag = 1; + break; + case 'u': +#ifdef _IONBF + c = setvbuf(stdout, NULL, _IONBF, 0); +#else + c = -1; + errno = EOPNOTSUPP; +#endif + if (c) + warn("setting unbuffered output failed"); + break; + default: + case '?': + usage(); + } + argc -= optind; + argv += optind; + + /* First usage case; script is the first arg */ + if (!eflag && !fflag && *argv) { + add_compunit(CU_STRING, *argv); + argv++; + } + + compile(); + + /* Continue with first and start second usage */ + if (*argv) + for (; *argv; argv++) + add_file(*argv); + else + add_file(NULL); + process(); + cfclose(prog, NULL); + if (fclose(stdout)) + err(1, "stdout"); + exit(rval); +} + +static void +usage(void) +{ + (void)fprintf(stderr, + "Usage: %s [-aElnru] command [file ...]\n" + "\t%s [-aElnru] [-e command] [-f command_file] [-I[extension]]\n" + "\t [-i[extension]] [file ...]\n", getprogname(), getprogname()); + exit(1); +} + +/* + * Like fgets, but go through the chain of compilation units chaining them + * together. Empty strings and files are ignored. + */ +char * +cu_fgets(char *buf, int n, int *more) +{ + static enum {ST_EOF, ST_FILE, ST_STRING} state = ST_EOF; + static FILE *f; /* Current open file */ + static char *s; /* Current pointer inside string */ + static char string_ident[30]; + char *p; + +again: + switch (state) { + case ST_EOF: + if (script == NULL) { + if (more != NULL) + *more = 0; + return (NULL); + } + linenum = 0; + switch (script->type) { + case CU_FILE: + if ((f = fopen(script->s, "r")) == NULL) + err(1, "%s", script->s); + fname = script->s; + state = ST_FILE; + goto again; + case CU_STRING: + if (((size_t)snprintf(string_ident, + sizeof(string_ident), "\"%s\"", script->s)) >= + sizeof(string_ident) - 1) + (void)strcpy(string_ident + + sizeof(string_ident) - 6, " ...\""); + fname = string_ident; + s = script->s; + state = ST_STRING; + goto again; + default: + abort(); + } + case ST_FILE: + if ((p = fgets(buf, n, f)) != NULL) { + linenum++; + if (linenum == 1 && buf[0] == '#' && buf[1] == 'n') + nflag = 1; + if (more != NULL) + *more = !feof(f); + return (p); + } + script = script->next; + (void)fclose(f); + state = ST_EOF; + goto again; + case ST_STRING: + if (linenum == 0 && s[0] == '#' && s[1] == 'n') + nflag = 1; + p = buf; + for (;;) { + if (n-- <= 1) { + *p = '\0'; + linenum++; + if (more != NULL) + *more = 1; + return (buf); + } + switch (*s) { + case '\0': + state = ST_EOF; + if (s == script->s) { + script = script->next; + goto again; + } else { + script = script->next; + *p = '\0'; + linenum++; + if (more != NULL) + *more = 0; + return (buf); + } + case '\n': + *p++ = '\n'; + *p = '\0'; + s++; + linenum++; + if (more != NULL) + *more = 0; + return (buf); + default: + *p++ = *s++; + } + } + } + /* NOTREACHED */ + return (NULL); +} + +/* + * Like fgets, but go through the list of files chaining them together. + * Set len to the length of the line. + */ +int +mf_fgets(SPACE *sp, enum e_spflag spflag) +{ + struct stat sb; + size_t len; + static char *p = NULL; + static size_t plen = 0; + int c; + static int firstfile; + + if (infile == NULL) { + /* stdin? */ + if (files->fname == NULL) { + if (inplace != NULL) + errx(1, "-I or -i may not be used with stdin"); + infile = stdin; + fname = "stdin"; + outfile = stdout; + outfname = "stdout"; + } + firstfile = 1; + } + + for (;;) { + if (infile != NULL && (c = getc(infile)) != EOF) { + (void)ungetc(c, infile); + break; + } + /* If we are here then either eof or no files are open yet */ + if (infile == stdin) { + sp->len = 0; + return (0); + } + if (infile != NULL) { + fclose(infile); + if (*oldfname != '\0') { + /* if there was a backup file, remove it */ + unlink(oldfname); + /* + * Backup the original. Note that hard links + * are not supported on all filesystems. + */ + if ((link(fname, oldfname) != 0) && + (rename(fname, oldfname) != 0)) { + warn("rename()"); + if (*tmpfname) + unlink(tmpfname); + exit(1); + } + *oldfname = '\0'; + } + if (*tmpfname != '\0') { + if (outfile != NULL && outfile != stdout) + if (fclose(outfile) != 0) { + warn("fclose()"); + unlink(tmpfname); + exit(1); + } + outfile = NULL; + if (rename(tmpfname, fname) != 0) { + /* this should not happen really! */ + warn("rename()"); + unlink(tmpfname); + exit(1); + } + *tmpfname = '\0'; + } + outfname = NULL; + } + if (firstfile == 0) + files = files->next; + else + firstfile = 0; + if (files == NULL) { + sp->len = 0; + return (0); + } + fname = files->fname; + if (inplace != NULL) { + if (lstat(fname, &sb) != 0) + err(1, "%s", fname); + if (!(sb.st_mode & S_IFREG)) + errx(1, "%s: %s %s", fname, + "in-place editing only", + "works for regular files"); + if (*inplace != '\0') { + strlcpy(oldfname, fname, + sizeof(oldfname)); + len = strlcat(oldfname, inplace, + sizeof(oldfname)); + if (len > sizeof(oldfname)) + errx(1, "%s: name too long", fname); + } + char d_name[PATH_MAX], f_name[PATH_MAX]; + (void)strlcpy(d_name, fname, sizeof(d_name)); + (void)strlcpy(f_name, fname, sizeof(f_name)); + len = (size_t)snprintf(tmpfname, sizeof(tmpfname), + "%s/.!%ld!%s", dirname(d_name), (long)getpid(), + basename(f_name)); + if (len >= sizeof(tmpfname)) + errx(1, "%s: name too long", fname); + unlink(tmpfname); + if (outfile != NULL && outfile != stdout) + fclose(outfile); + if ((outfile = fopen(tmpfname, "w")) == NULL) + err(1, "%s", fname); + fchown(fileno(outfile), sb.st_uid, sb.st_gid); + fchmod(fileno(outfile), sb.st_mode & ALLPERMS); + outfname = tmpfname; + if (!ispan) { + linenum = 0; + resetstate(); + } + } else { + outfile = stdout; + outfname = "stdout"; + } + if ((infile = fopen(fname, "r")) == NULL) { + warn("%s", fname); + rval = 1; + continue; + } + } + /* + * We are here only when infile is open and we still have something + * to read from it. + * + * Use getline() so that we can handle essentially infinite input + * data. The p and plen are static so each invocation gives + * getline() the same buffer which is expanded as needed. + */ + ssize_t slen = getline(&p, &plen, infile); + if (slen == -1) + err(1, "%s", fname); + if (slen != 0 && p[slen - 1] == '\n') { + sp->append_newline = 1; + slen--; + } else if (!lastline()) { + sp->append_newline = 1; + } else { + sp->append_newline = 0; + } + cspace(sp, p, (size_t)slen, spflag); + + linenum++; + + return (1); +} + +/* + * Add a compilation unit to the linked list + */ +static void +add_compunit(enum e_cut type, char *s) +{ + struct s_compunit *cu; + + cu = xmalloc(sizeof(struct s_compunit)); + cu->type = type; + cu->s = s; + cu->next = NULL; + *cu_nextp = cu; + cu_nextp = &cu->next; +} + +/* + * Add a file to the linked list + */ +static void +add_file(char *s) +{ + struct s_flist *fp; + + fp = xmalloc(sizeof(struct s_flist)); + fp->next = NULL; + *fl_nextp = fp; + fp->fname = s; + fl_nextp = &fp->next; +} + +static int +next_files_have_lines(void) +{ + struct s_flist *file; + FILE *file_fd; + int ch; + + file = files; + while ((file = file->next) != NULL) { + if ((file_fd = fopen(file->fname, "r")) == NULL) + continue; + + if ((ch = getc(file_fd)) != EOF) { + /* + * This next file has content, therefore current + * file doesn't contains the last line. + */ + ungetc(ch, file_fd); + fclose(file_fd); + return (1); + } + + fclose(file_fd); + } + + return (0); +} + +int +lastline(void) +{ + int ch; + + if (feof(infile)) { + return !( + (inplace == NULL || ispan) && + next_files_have_lines()); + } + if ((ch = getc(infile)) == EOF) { + return !( + (inplace == NULL || ispan) && + next_files_have_lines()); + } + ungetc(ch, infile); + return (0); +} diff --git a/third_party/sed/misc.c b/third_party/sed/misc.c new file mode 100644 index 000000000..bd0ecdff4 --- /dev/null +++ b/third_party/sed/misc.c @@ -0,0 +1,119 @@ +/* $NetBSD: misc.c,v 1.15 2014/06/26 02:14:32 christos Exp $ */ + +/*- + * Copyright (c) 1992 Diomidis Spinellis. + * Copyright (c) 1992, 1993 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Diomidis Spinellis of Imperial College, University of London. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#if HAVE_NBTOOL_CONFIG_H +#include "nbtool_config.h" +#endif + +#include +__RCSID("$NetBSD: misc.c,v 1.15 2014/06/26 02:14:32 christos Exp $"); +#ifdef __FBSDID +__FBSDID("$FreeBSD: head/usr.bin/sed/misc.c 200462 2009-12-13 03:14:06Z delphij $"); +#endif + +#if 0 +static const char sccsid[] = "@(#)misc.c 8.1 (Berkeley) 6/6/93"; +#endif + +#include + +#include +#include +#include +#include +#include +#include + +#include "defs.h" +#include "extern.h" + +/* + * malloc with result test + */ +void * +xmalloc(size_t size) +{ + void *p; + + if ((p = malloc(size)) == NULL) + err(1, "malloc(%zu)", size); + return p; +} + +/* + * realloc with result test + */ +void * +xrealloc(void *p, size_t size) +{ + if (p == NULL) /* Compatibility hack. */ + return (xmalloc(size)); + + if ((p = realloc(p, size)) == NULL) + err(1, "realloc(%zu)", size); + return p; +} + +/* + * realloc with result test + */ +void * +xcalloc(size_t c, size_t n) +{ + void *p; + + if ((p = calloc(c, n)) == NULL) + err(1, "calloc(%zu, %zu)", c, n); + return p; +} +/* + * Return a string for a regular expression error passed. This is overkill, + * because of the silly semantics of regerror (we can never know the size of + * the buffer). + */ +char * +strregerror(int errcode, regex_t *preg) +{ + char buf[1]; + static char *oe; + size_t s; + + if (oe != NULL) + free(oe); + s = regerror(errcode, preg, buf, 0); + oe = xmalloc(s); + (void)regerror(errcode, preg, oe, s); + return (oe); +} diff --git a/third_party/sed/process.c b/third_party/sed/process.c new file mode 100644 index 000000000..a3c29a1a7 --- /dev/null +++ b/third_party/sed/process.c @@ -0,0 +1,804 @@ +/* $NetBSD: process.c,v 1.53 2020/05/15 22:39:54 christos Exp $ */ + +/*- + * Copyright (c) 1992 Diomidis Spinellis. + * Copyright (c) 1992, 1993, 1994 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Diomidis Spinellis of Imperial College, University of London. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#if HAVE_NBTOOL_CONFIG_H +#include "nbtool_config.h" +#endif + +#include +__RCSID("$NetBSD: process.c,v 1.53 2020/05/15 22:39:54 christos Exp $"); +#ifdef __FBSDID +__FBSDID("$FreeBSD: head/usr.bin/sed/process.c 192732 2009-05-25 06:45:33Z brian $"); +#endif + +#if 0 +static const char sccsid[] = "@(#)process.c 8.6 (Berkeley) 4/20/94"; +#endif + +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "defs.h" +#include "extern.h" + +static SPACE HS, PS, SS, YS; +#define pd PS.deleted +#define ps PS.space +#define psl PS.len +#define psanl PS.append_newline +#define hs HS.space +#define hsl HS.len + +static __inline int applies(struct s_command *); +static void do_tr(struct s_tr *); +static void flush_appends(void); +static void lputs(char *, size_t); +static __inline int regexec_e(regex_t *, const char *, int, int, size_t); +static void regsub(SPACE *, char *, char *); +static int substitute(struct s_command *); + +struct s_appends *appends; /* Array of pointers to strings to append. */ +static size_t appendx; /* Index into appends array. */ +size_t appendnum; /* Size of appends array. */ + +static int lastaddr; /* Set by applies if last address of a range. */ +static int sdone; /* If any substitutes since last line input. */ + /* Iov structure for 'w' commands. */ +static regex_t *defpreg; +size_t maxnsub; +regmatch_t *match; + +#define OUT() do { \ + fwrite(ps, 1, psl, outfile); \ + if (psanl) fputc('\n', outfile); \ +} while (0) + +void +process(void) +{ + struct s_command *cp; + SPACE tspace; + size_t oldpsl = 0; + char *p; + int oldpsanl; + + p = NULL; + + for (linenum = 0; mf_fgets(&PS, REPLACE);) { + pd = 0; +top: + cp = prog; +redirect: + while (cp != NULL) { + if (!applies(cp)) { + cp = cp->next; + continue; + } + switch (cp->code) { + case '{': + cp = cp->u.c; + goto redirect; + case 'a': + if (appendx >= appendnum) + appends = xrealloc(appends, + sizeof(struct s_appends) * + (appendnum *= 2)); + appends[appendx].type = AP_STRING; + appends[appendx].s = cp->t; + appends[appendx].len = strlen(cp->t); + appendx++; + break; + case 'b': + cp = cp->u.c; + goto redirect; + case 'c': + pd = 1; + psl = 0; + if (cp->a2 == NULL || lastaddr || lastline()) + (void)fprintf(outfile, "%s", cp->t); + goto new; + case 'd': + pd = 1; + goto new; + case 'D': + if (pd) + goto new; + if (psl == 0 || + (p = memchr(ps, '\n', psl - 1)) == NULL) { + pd = 1; + goto new; + } else { + psl -= (size_t)((p + 1) - ps); + memmove(ps, p + 1, psl); + goto top; + } + case 'g': + cspace(&PS, hs, hsl, REPLACE); + break; + case 'G': + cspace(&PS, "\n", 1, APPEND); + cspace(&PS, hs, hsl, APPEND); + break; + case 'h': + cspace(&HS, ps, psl, REPLACE); + break; + case 'H': + cspace(&HS, "\n", 1, APPEND); + cspace(&HS, ps, psl, APPEND); + break; + case 'i': + (void)fprintf(outfile, "%s", cp->t); + break; + case 'l': + lputs(ps, psl); + break; + case 'n': + if (!nflag && !pd) + OUT(); + flush_appends(); + if (!mf_fgets(&PS, REPLACE)) + exit(0); + pd = 0; + break; + case 'N': + flush_appends(); + cspace(&PS, "\n", 1, APPEND); + if (!mf_fgets(&PS, APPEND)) + exit(0); + break; + case 'p': + if (pd) + break; + OUT(); + break; + case 'P': + if (pd) + break; + if ((p = memchr(ps, '\n', psl - 1)) != NULL) { + oldpsl = psl; + oldpsanl = psanl; + psl = (size_t)(p - ps); + psanl = 1; + } + OUT(); + if (p != NULL) { + psl = oldpsl; + psanl = oldpsanl; + } + break; + case 'q': + if (!nflag && !pd) + OUT(); + flush_appends(); + exit(0); + case 'r': + if (appendx >= appendnum) + appends = xrealloc(appends, + sizeof(struct s_appends) * + (appendnum *= 2)); + appends[appendx].type = AP_FILE; + appends[appendx].s = cp->t; + appends[appendx].len = strlen(cp->t); + appendx++; + break; + case 's': + sdone |= substitute(cp); + break; + case 't': + if (sdone) { + sdone = 0; + cp = cp->u.c; + goto redirect; + } + break; + case 'w': + if (pd) + break; + if (cp->u.fd == -1 && (cp->u.fd = open(cp->t, + O_WRONLY|O_APPEND|O_CREAT|O_TRUNC, + DEFFILEMODE)) == -1) + err(1, "%s", cp->t); + if (write(cp->u.fd, ps, psl) != (ssize_t)psl || + write(cp->u.fd, "\n", 1) != 1) + err(1, "%s", cp->t); + break; + case 'x': + /* + * If the hold space is null, make it empty + * but not null. Otherwise the pattern space + * will become null after the swap, which is + * an abnormal condition. + */ + if (hs == NULL) + cspace(&HS, "", 0, REPLACE); + tspace = PS; + PS = HS; + psanl = tspace.append_newline; + HS = tspace; + break; + case 'y': + if (pd || psl == 0) + break; + do_tr(cp->u.y); + break; + case ':': + case '}': + break; + case '=': + (void)fprintf(outfile, "%lu\n", linenum); + } + cp = cp->next; + } /* for all cp */ + +new: if (!nflag && !pd) + OUT(); + flush_appends(); + } /* for all lines */ +} + +/* + * TRUE if the address passed matches the current program state + * (lastline, linenumber, ps). + */ +#define MATCH(a) \ + ((a)->type == AT_RE ? regexec_e((a)->u.r, ps, 0, 1, psl) : \ + (a)->type == AT_LINE ? linenum == (a)->u.l : lastline()) + +/* + * Return TRUE if the command applies to the current line. Sets the start + * line for process ranges. Interprets the non-select (``!'') flag. + */ +static __inline int +applies(struct s_command *cp) +{ + int r; + + lastaddr = 0; + if (cp->a1 == NULL && cp->a2 == NULL) + r = 1; + else if (cp->a2) + if (cp->startline > 0) { + switch (cp->a2->type) { + case AT_RELLINE: + if (linenum - cp->startline <= cp->a2->u.l) + r = 1; + else { + cp->startline = 0; + r = 0; + } + break; + default: + if (MATCH(cp->a2)) { + cp->startline = 0; + lastaddr = 1; + r = 1; + } else if (cp->a2->type == AT_LINE && + linenum > cp->a2->u.l) { + /* + * We missed the 2nd address due to a + * branch, so just close the range and + * return false. + */ + cp->startline = 0; + r = 0; + } else + r = 1; + } + } else if (cp->a1 && MATCH(cp->a1)) { + /* + * If the second address is a number less than or + * equal to the line number first selected, only + * one line shall be selected. + * -- POSIX 1003.2 + * Likewise if the relative second line address is zero. + */ + if ((cp->a2->type == AT_LINE && + linenum >= cp->a2->u.l) || + (cp->a2->type == AT_RELLINE && cp->a2->u.l == 0)) + lastaddr = 1; + else { + cp->startline = linenum; + } + r = 1; + } else + r = 0; + else + r = MATCH(cp->a1); + return (cp->nonsel ? ! r : r); +} + +/* + * Reset the sed processor to its initial state. + */ +void +resetstate(void) +{ + struct s_command *cp; + + /* + * Reset all in-range markers. + */ + for (cp = prog; cp; cp = cp->code == '{' ? cp->u.c : cp->next) + if (cp->a2) + cp->startline = 0; + + /* + * Clear out the hold space. + */ + cspace(&HS, "", 0, REPLACE); +} + +/* + * substitute -- + * Do substitutions in the pattern space. Currently, we build a + * copy of the new pattern space in the substitute space structure + * and then swap them. + */ +static int +substitute(struct s_command *cp) +{ + SPACE tspace; + regex_t *re; + regoff_t re_off, slen; + int lastempty, n; + char *s; + + s = ps; + re = cp->u.s->re; + if (re == NULL) { + if (defpreg != NULL && cp->u.s->maxbref > defpreg->re_nsub) { + linenum = cp->u.s->linenum; + errx(1, "%lu: %s: \\%u not defined in the RE", + linenum, fname, cp->u.s->maxbref); + } + } + if (!regexec_e(re, s, 0, 0, psl)) + return (0); + + SS.len = 0; /* Clean substitute space. */ + slen = (regoff_t)psl; + n = cp->u.s->n; + lastempty = 1; + + switch (n) { + case 0: /* Global */ + do { + if (lastempty || match[0].rm_so != match[0].rm_eo) { + /* Locate start of replaced string. */ + re_off = match[0].rm_so; + /* Copy leading retained string. */ + cspace(&SS, s, (size_t)re_off, APPEND); + /* Add in regular expression. */ + regsub(&SS, s, cp->u.s->new); + } + + /* Move past this match. */ + if (match[0].rm_so != match[0].rm_eo) { + s += match[0].rm_eo; + slen -= match[0].rm_eo; + lastempty = 0; + } else { + if (match[0].rm_so < slen) + cspace(&SS, s + match[0].rm_so, 1, + APPEND); + s += match[0].rm_so + 1; + slen -= match[0].rm_so + 1; + lastempty = 1; + } + } while (slen >= 0 && regexec_e(re, s, REG_NOTBOL, 0, (size_t)slen)); + /* Copy trailing retained string. */ + if (slen > 0) + cspace(&SS, s, (size_t)slen, APPEND); + break; + default: /* Nth occurrence */ + while (--n) { + if (match[0].rm_eo == match[0].rm_so) + match[0].rm_eo = match[0].rm_so + 1; + s += match[0].rm_eo; + slen -= match[0].rm_eo; + if (slen < 0) + return (0); + if (!regexec_e(re, s, REG_NOTBOL, 0, (size_t)slen)) + return (0); + } + /* FALLTHROUGH */ + case 1: /* 1st occurrence */ + /* Locate start of replaced string. */ + re_off = match[0].rm_so + (s - ps); + /* Copy leading retained string. */ + cspace(&SS, ps, (size_t)re_off, APPEND); + /* Add in regular expression. */ + regsub(&SS, s, cp->u.s->new); + /* Copy trailing retained string. */ + s += match[0].rm_eo; + slen -= match[0].rm_eo; + cspace(&SS, s, (size_t)slen, APPEND); + break; + } + + /* + * Swap the substitute space and the pattern space, and make sure + * that any leftover pointers into stdio memory get lost. + */ + tspace = PS; + PS = SS; + psanl = tspace.append_newline; + SS = tspace; + SS.space = SS.back; + + /* Handle the 'p' flag. */ + if (cp->u.s->p) + OUT(); + + /* Handle the 'w' flag. */ + if (cp->u.s->wfile && !pd) { + if (cp->u.s->wfd == -1 && (cp->u.s->wfd = open(cp->u.s->wfile, + O_WRONLY|O_APPEND|O_CREAT|O_TRUNC, DEFFILEMODE)) == -1) + err(1, "%s", cp->u.s->wfile); + if (write(cp->u.s->wfd, ps, psl) != (ssize_t)psl || + write(cp->u.s->wfd, "\n", 1) != 1) + err(1, "%s", cp->u.s->wfile); + } + return (1); +} + +/* + * do_tr -- + * Perform translation ('y' command) in the pattern space. + */ +static void +do_tr(struct s_tr *y) +{ + SPACE tmp; + char c, *p; + size_t clen, left; + size_t i; + + if (MB_CUR_MAX == 1) { + /* + * Single-byte encoding: perform in-place translation + * of the pattern space. + */ + for (p = ps; p < &ps[psl]; p++) + *p = (char)y->bytetab[(u_char)*p]; + } else { + /* + * Multi-byte encoding: perform translation into the + * translation space, then swap the translation and + * pattern spaces. + */ + /* Clean translation space. */ + YS.len = 0; + for (p = ps, left = psl; left > 0; p += clen, left -= clen) { + if ((c = (char)y->bytetab[(u_char)*p]) != '\0') { + cspace(&YS, &c, 1, APPEND); + clen = 1; + continue; + } + for (i = 0; i < y->nmultis; i++) + if (left >= y->multis[i].fromlen && + memcmp(p, y->multis[i].from, + y->multis[i].fromlen) == 0) + break; + if (i < y->nmultis) { + cspace(&YS, y->multis[i].to, + y->multis[i].tolen, APPEND); + clen = y->multis[i].fromlen; + } else { + cspace(&YS, p, 1, APPEND); + clen = 1; + } + } + /* Swap the translation space and the pattern space. */ + tmp = PS; + PS = YS; + psanl = tmp.append_newline; + YS = tmp; + YS.space = YS.back; + } +} + +/* + * Flush append requests. Always called before reading a line, + * therefore it also resets the substitution done (sdone) flag. + */ +static void +flush_appends(void) +{ + FILE *f; + size_t count, i; + char buf[8 * 1024]; + + for (i = 0; i < appendx; i++) + switch (appends[i].type) { + case AP_STRING: + fwrite(appends[i].s, sizeof(char), appends[i].len, + outfile); + break; + case AP_FILE: + /* + * Read files probably shouldn't be cached. Since + * it's not an error to read a non-existent file, + * it's possible that another program is interacting + * with the sed script through the filesystem. It + * would be truly bizarre, but possible. It's probably + * not that big a performance win, anyhow. + */ + if ((f = fopen(appends[i].s, "r")) == NULL) + break; + while ((count = fread(buf, sizeof(char), sizeof(buf), f))) + (void)fwrite(buf, sizeof(char), count, outfile); + (void)fclose(f); + break; + } + if (ferror(outfile)) + errx(1, "%s: %s", outfname, strerror(errno ? errno : EIO)); + appendx = 0; + sdone = 0; +} + +static void +lputs(char *s, size_t len) +{ + static const char escapes[] = "\\\a\b\f\r\t\v"; + int c; + size_t col, width; + const char *p; +#ifdef TIOCGWINSZ + struct winsize win; +#endif + static size_t termwidth = (size_t)-1; + size_t clen, i; + wchar_t wc; + mbstate_t mbs; + + if (outfile != stdout) + termwidth = 60; + if (termwidth == (size_t)-1) { + if ((p = getenv("COLUMNS")) && *p != '\0') + termwidth = (size_t)atoi(p); +#ifdef TIOCGWINSZ + else if (ioctl(STDOUT_FILENO, TIOCGWINSZ, &win) == 0 && + win.ws_col > 0) + termwidth = win.ws_col; +#endif + else + termwidth = 60; + } + if (termwidth == 0) + termwidth = 1; + + memset(&mbs, 0, sizeof(mbs)); + col = 0; + while (len != 0) { + clen = mbrtowc(&wc, s, len, &mbs); + if (clen == 0) + clen = 1; + if (clen == (size_t)-1 || clen == (size_t)-2) { + wc = (unsigned char)*s; + clen = 1; + memset(&mbs, 0, sizeof(mbs)); + } + if (wc == '\n') { + if (col + 1 >= termwidth) + fprintf(outfile, "\\\n"); + fputc('$', outfile); + fputc('\n', outfile); + col = 0; + } else if (iswprint(wc)) { + width = (size_t)wcwidth(wc); + if (col + width >= termwidth) { + fprintf(outfile, "\\\n"); + col = 0; + } + fwrite(s, 1, clen, outfile); + col += width; + } else if (wc != L'\0' && (c = wctob(wc)) != EOF && + (p = strchr(escapes, c)) != NULL) { + if (col + 2 >= termwidth) { + fprintf(outfile, "\\\n"); + col = 0; + } + fprintf(outfile, "\\%c", "\\abfrtv"[p - escapes]); + col += 2; + } else { + if (col + 4 * clen >= termwidth) { + fprintf(outfile, "\\\n"); + col = 0; + } + for (i = 0; i < clen; i++) + fprintf(outfile, "\\%03o", + (int)(unsigned char)s[i]); + col += 4 * clen; + } + s += clen; + len -= clen; + } + if (col + 1 >= termwidth) + fprintf(outfile, "\\\n"); + (void)fputc('$', outfile); + (void)fputc('\n', outfile); + if (ferror(outfile)) + errx(1, "%s: %s", outfname, strerror(errno ? errno : EIO)); +} + +static __inline int +regexec_e(regex_t *preg, const char *string, int eflags, int nomatch, + size_t slen) +{ + int eval; +#ifndef REG_STARTEND + char *buf; +#endif + + if (preg == NULL) { + if (defpreg == NULL) + errx(1, "first RE may not be empty"); + } else + defpreg = preg; + + /* Set anchors */ +#ifndef REG_STARTEND + buf = xmalloc(slen + 1); + (void)memcpy(buf, string, slen); + buf[slen] = '\0'; + eval = regexec(defpreg, buf, + nomatch ? 0 : maxnsub + 1, match, eflags); + free(buf); +#else + match[0].rm_so = 0; + match[0].rm_eo = (regoff_t)slen; + eval = regexec(defpreg, string, + nomatch ? 0 : maxnsub + 1, match, eflags | REG_STARTEND); +#endif + switch(eval) { + case 0: + return (1); + case REG_NOMATCH: + return (0); + } + errx(1, "RE error: %s", strregerror(eval, defpreg)); + /* NOTREACHED */ +} + +/* + * regsub - perform substitutions after a regexp match + * Based on a routine by Henry Spencer + */ +static void +regsub(SPACE *sp, char *string, char *src) +{ + size_t len; + int no; + char c, *dst; + +#define NEEDSP(reqlen) \ + /* XXX What is the +1 for? */ \ + if (sp->len + (reqlen) + 1 >= sp->blen) { \ + sp->blen += (reqlen) + 1024; \ + sp->space = sp->back = xrealloc(sp->back, sp->blen); \ + dst = sp->space + sp->len; \ + } + + dst = sp->space + sp->len; + while ((c = *src++) != '\0') { + if (c == '&') + no = 0; + else if (c == '\\' && isdigit((unsigned char)*src)) + no = *src++ - '0'; + else + no = -1; + if (no < 0) { /* Ordinary character. */ + if (c == '\\' && (*src == '\\' || *src == '&')) + c = *src++; + NEEDSP(1); + *dst++ = c; + ++sp->len; + } else if (match[no].rm_so != -1 && match[no].rm_eo != -1) { + len = (size_t)(match[no].rm_eo - match[no].rm_so); + NEEDSP(len); + memmove(dst, string + match[no].rm_so, len); + dst += len; + sp->len += len; + } + } + NEEDSP(1); + *dst = '\0'; +} + +/* + * cspace -- + * Concatenate space: append the source space to the destination space, + * allocating new space as necessary. + */ +void +cspace(SPACE *sp, const char *p, size_t len, enum e_spflag spflag) +{ + size_t tlen; + + /* Make sure SPACE has enough memory and ramp up quickly. */ + tlen = sp->len + len + 1; + if (tlen > sp->blen) { + sp->blen = tlen + 1024; + sp->space = sp->back = xrealloc(sp->back, sp->blen); + } + + if (spflag == REPLACE) + sp->len = 0; + + memmove(sp->space + sp->len, p, len); + + sp->space[sp->len += len] = '\0'; +} + +/* + * Close all cached opened files and report any errors + */ +void +cfclose(struct s_command *cp, struct s_command *end) +{ + + for (; cp != end; cp = cp->next) + switch(cp->code) { + case 's': + if (cp->u.s->wfd != -1 && close(cp->u.s->wfd)) + err(1, "%s", cp->u.s->wfile); + cp->u.s->wfd = -1; + break; + case 'w': + if (cp->u.fd != -1 && close(cp->u.fd)) + err(1, "%s", cp->t); + cp->u.fd = -1; + break; + case '{': + cfclose(cp->u.c, cp->next); + break; + } +} diff --git a/third_party/sed/sed.1 b/third_party/sed/sed.1 new file mode 100644 index 000000000..82b3cbd0c --- /dev/null +++ b/third_party/sed/sed.1 @@ -0,0 +1,640 @@ +.\" $NetBSD: sed.1,v 1.40 2014/06/25 02:05:58 uwe Exp $ +.\" Copyright (c) 1992, 1993 +.\" The Regents of the University of California. All rights reserved. +.\" +.\" This code is derived from software contributed to Berkeley by +.\" the Institute of Electrical and Electronics Engineers, Inc. +.\" +.\" Redistribution and use in source and binary forms, with or without +.\" modification, are permitted provided that the following conditions +.\" are met: +.\" 1. Redistributions of source code must retain the above copyright +.\" notice, this list of conditions and the following disclaimer. +.\" 2. Redistributions in binary form must reproduce the above copyright +.\" notice, this list of conditions and the following disclaimer in the +.\" documentation and/or other materials provided with the distribution. +.\" 3. Neither the name of the University nor the names of its contributors +.\" may be used to endorse or promote products derived from this software +.\" without specific prior written permission. +.\" +.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND +.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +.\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE +.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF +.\" SUCH DAMAGE. +.\" +.\" @(#)sed.1 8.2 (Berkeley) 12/30/93 +.\" $FreeBSD: head/usr.bin/sed/sed.1 259132 2013-12-09 18:57:20Z eadler $ +.\" +.Dd June 18, 2014 +.Dt SED 1 +.Os +.Sh NAME +.Nm sed +.Nd stream editor +.Sh SYNOPSIS +.Nm +.Op Fl aElnru +.Ar command +.Op Ar +.Nm +.Op Fl aElnru +.Op Fl e Ar command +.Op Fl f Ar command_file +.Op Fl I Ns Op Ar extension +.Op Fl i Ns Op Ar extension +.Op Ar +.Sh DESCRIPTION +The +.Nm +utility reads the specified files, or the standard input if no files +are specified, modifying the input as specified by a list of commands. +The input is then written to the standard output. +.Pp +A single command may be specified as the first argument to +.Nm . +Multiple commands may be specified by using the +.Fl e +or +.Fl f +options. +All commands are applied to the input in the order they are specified +regardless of their origin. +.Pp +The following options are available: +.Bl -tag -width indent +.It Fl a +The files listed as parameters for the +.Dq w +functions are created (or truncated) before any processing begins, +by default. +The +.Fl a +option causes +.Nm +to delay opening each file until a command containing the related +.Dq w +function is applied to a line of input. +.It Fl E +Interpret regular expressions as extended (modern) regular expressions +rather than basic regular expressions (BRE's). +The +.Xr re_format 7 +manual page fully describes both formats. +.It Fl e Ar command +Append the editing commands specified by the +.Ar command +argument +to the list of commands. +.It Fl f Ar command_file +Append the editing commands found in the file +.Ar command_file +to the list of commands. +The editing commands should each be listed on a separate line. +.It Fl I Ns Op Ar extension +Edit files in-place, saving backups with the specified +.Ar extension . +If no +.Ar extension +is given, no backup will be saved. +It is not recommended to give a zero-length +.Ar extension +when in-place editing files, as you risk corruption or partial content +in situations where disk space is exhausted, etc. +.Pp +Note that in-place editing with +.Fl I +still takes place in a single continuous line address space covering +all files, although each file preserves its individuality instead of +forming one output stream. +The line counter is never reset between files, address ranges can span +file boundaries, and the +.Dq $ +address matches only the last line of the last file. +(See +.Sx "Sed Addresses" . ) +That can lead to unexpected results in many cases of in-place editing, +where using +.Fl i +is desired. +.It Fl i Ns Op Ar extension +Edit files in-place similarly to +.Fl I , +but treat each file independently from other files. +In particular, line numbers in each file start at 1, +the +.Dq $ +address matches the last line of the current file, +and address ranges are limited to the current file. +(See +.Sx "Sed Addresses" . ) +The net result is as though each file were edited by a separate +.Nm +instance. +.It Fl l +Make output line buffered. +.It Fl n +By default, each line of input is echoed to the standard output after +all of the commands have been applied to it. +The +.Fl n +option suppresses this behavior. +.It Fl r +Same as +.Fl E +for compatibility with GNU sed. +.It Fl u +Make output unbuffered. +.El +.Pp +The form of a +.Nm +command is as follows: +.Pp +.Dl [address[,address]]function[arguments] +.Pp +Whitespace may be inserted before the first address and the function +portions of the command. +.Pp +Normally, +.Nm +cyclically copies a line of input, not including its terminating newline +character, into a +.Em "pattern space" , +(unless there is something left after a +.Dq D +function), +applies all of the commands with addresses that select that pattern space, +copies the pattern space to the standard output, appending a newline, and +deletes the pattern space. +.Pp +Some of the functions use a +.Em "hold space" +to save all or part of the pattern space for subsequent retrieval. +.Ss "Sed Addresses" +An address is not required, but if specified must have one of the +following formats: +.Bl -bullet -offset indent +.It +a number that counts +input lines +cumulatively across input files (or in each file independently +if a +.Fl i +option is in effect); +.It +a dollar +.Pq Dq $ +character that addresses the last line of input (or the last line +of the current file if a +.Fl i +option was specified); +.It +a context address +that consists of a regular expression preceded and followed by a +delimiter. +The closing delimiter can also optionally be followed by the +.Dq i +character, to indicate that the regular expression is to be matched +in a case-insensitive way. +.El +.Pp +A command line with no addresses selects every pattern space. +.Pp +A command line with one address selects all of the pattern spaces +that match the address. +.Pp +A command line with two addresses selects an inclusive range. +This +range starts with the first pattern space that matches the first +address. +The end of the range is the next following pattern space +that matches the second address. +If the second address is a number +less than or equal to the line number first selected, only that +line is selected. +The number in the second address may be prefixed with a +.Pq Dq \&+ +to specify the number of lines to match after the first pattern. +In the case when the second address is a context +address, +.Nm +does not re-match the second address against the +pattern space that matched the first address. +Starting at the +first line following the selected range, +.Nm +starts looking again for the first address. +.Pp +Editing commands can be applied to non-selected pattern spaces by use +of the exclamation character +.Pq Dq \&! +function. +.Ss "Sed Regular Expressions" +The regular expressions used in +.Nm , +by default, are basic regular expressions (BREs, see +.Xr re_format 7 +for more information), but extended (modern) regular expressions can be used +instead if the +.Fl E +flag is given. +In addition, +.Nm +has the following two additions to regular expressions: +.Pp +.Bl -enum -compact +.It +In a context address, any character other than a backslash +.Pq Dq \e +or newline character may be used to delimit the regular expression. +The opening delimiter needs to be preceded by a backslash +unless it is a slash. +For example, the context address +.Li \exabcx +is equivalent to +.Li /abc/ . +Also, putting a backslash character before the delimiting character +within the regular expression causes the character to be treated literally. +For example, in the context address +.Li \exabc\exdefx , +the RE delimiter is an +.Dq x +and the second +.Dq x +stands for itself, so that the regular expression is +.Dq abcxdef . +.Pp +.It +The escape sequence \en matches a newline character embedded in the +pattern space. +You cannot, however, use a literal newline character in an address or +in the substitute command. +.El +.Pp +One special feature of +.Nm +regular expressions is that they can default to the last regular +expression used. +If a regular expression is empty, i.e., just the delimiter characters +are specified, the last regular expression encountered is used instead. +The last regular expression is defined as the last regular expression +used as part of an address or substitute command, and at run-time, not +compile-time. +For example, the command +.Dq /abc/s//XXX/ +will substitute +.Dq XXX +for the pattern +.Dq abc . +.Ss "Sed Functions" +In the following list of commands, the maximum number of permissible +addresses for each command is indicated by [0addr], [1addr], or [2addr], +representing zero, one, or two addresses. +.Pp +The argument +.Em text +consists of one or more lines. +To embed a newline in the text, precede it with a backslash. +Other backslashes in text are deleted and the following character +taken literally. +.Pp +The +.Dq r +and +.Dq w +functions take an optional file parameter, which should be separated +from the function letter by white space. +Each file given as an argument to +.Nm +is created (or its contents truncated) before any input processing begins. +.Pp +The +.Dq b , +.Dq r , +.Dq s , +.Dq t , +.Dq w , +.Dq y , +.Dq \&! , +and +.Dq \&: +functions all accept additional arguments. +The following synopses indicate which arguments have to be separated from +the function letters by white space characters. +.Pp +Two of the functions take a function-list. +This is a list of +.Nm +functions separated by newlines, as follows: +.Bd -literal -offset indent +{ function + function + ... + function +} +.Ed +.Pp +The +.Dq { +can be preceded by white space and can be followed by white space. +The function can be preceded by white space. +The terminating +.Dq } +must be preceded by a newline, and may also be preceded by white space. +.Pp +.Bl -tag -width "XXXXXX" -compact +.It [2addr] function-list +Execute function-list only when the pattern space is selected. +.Pp +.It [1addr]a\e +.It text +Write +.Em text +to standard output immediately before each attempt to read a line of input, +whether by executing the +.Dq N +function or by beginning a new cycle. +.Pp +.It [2addr]b[label] +Branch to the +.Dq \&: +function with the specified label. +If the label is not specified, branch to the end of the script. +.Pp +.It [2addr]c\e +.It text +Delete the pattern space. +With 0 or 1 address or at the end of a 2-address range, +.Em text +is written to the standard output. +.Pp +.It [2addr]d +Delete the pattern space and start the next cycle. +.Pp +.It [2addr]D +Delete the initial segment of the pattern space through the first +newline character and start the next cycle. +.Pp +.It [2addr]g +Replace the contents of the pattern space with the contents of the +hold space. +.Pp +.It [2addr]G +Append a newline character followed by the contents of the hold space +to the pattern space. +.Pp +.It [2addr]h +Replace the contents of the hold space with the contents of the +pattern space. +.Pp +.It [2addr]H +Append a newline character followed by the contents of the pattern space +to the hold space. +.Pp +.It [1addr]i\e +.It text +Write +.Em text +to the standard output. +.Pp +.It [2addr]l +(The letter ell.) +Write the pattern space to the standard output in a visually unambiguous +form. +This form is as follows: +.Pp +.Bl -tag -width "carriage-returnXX" -offset indent -compact +.It backslash +\e\e +.It alert +\ea +.It form-feed +\ef +.It carriage-return +\er +.It tab +\et +.It vertical tab +\ev +.El +.Pp +Nonprintable characters are written as three-digit octal numbers (with a +preceding backslash) for each byte in the character (most significant byte +first). +Long lines are folded, with the point of folding indicated by displaying +a backslash followed by a newline. +The end of each line is marked with a +.Dq $ . +.Pp +.It [2addr]n +Write the pattern space to the standard output if the default output has +not been suppressed, and replace the pattern space with the next line of +input. +.Pp +.It [2addr]N +Append the next line of input to the pattern space, using an embedded +newline character to separate the appended material from the original +contents. +Note that the current line number changes. +.Pp +.It [2addr]p +Write the pattern space to standard output. +.Pp +.It [2addr]P +Write the pattern space, up to the first newline character to the +standard output. +.Pp +.It [1addr]q +Branch to the end of the script and quit without starting a new cycle. +.Pp +.It [1addr]r file +Copy the contents of +.Em file +to the standard output immediately before the next attempt to read a +line of input. +If +.Em file +cannot be read for any reason, it is silently ignored and no error +condition is set. +.Pp +.It [2addr]s/regular expression/replacement/flags +Substitute the replacement string for the first instance of the regular +expression in the pattern space. +Any character other than backslash or newline can be used instead of +a slash to delimit the RE and the replacement. +Within the RE and the replacement, the RE delimiter itself can be used as +a literal character if it is preceded by a backslash. +.Pp +An ampersand +.Pq Dq & +appearing in the replacement is replaced by the string matching the RE. +The special meaning of +.Dq & +in this context can be suppressed by preceding it by a backslash. +The string +.Dq \e# , +where +.Dq # +is a digit, is replaced by the text matched +by the corresponding backreference expression (see +.Xr re_format 7 ) . +.Pp +A line can be split by substituting a newline character into it. +To specify a newline character in the replacement string, precede it with +a backslash. +.Pp +The value of +.Em flags +in the substitute function is zero or more of the following: +.Bl -tag -width "XXXXXX" -offset indent +.It Ar N +Make the substitution only for the +.Ar N Ns 'th +occurrence of the regular expression in the pattern space. +.It g +Make the substitution for all non-overlapping matches of the +regular expression, not just the first one. +.It p +Write the pattern space to standard output if a replacement was made. +If the replacement string is identical to that which it replaces, it +is still considered to have been a replacement. +.It w Em file +Append the pattern space to +.Em file +if a replacement was made. +If the replacement string is identical to that which it replaces, it +is still considered to have been a replacement. +.It i or I +Match the regular expression in a case-insensitive way. +.El +.Pp +.It [2addr]t [label] +Branch to the +.Dq \&: +function bearing the label if any substitutions have been made since the +most recent reading of an input line or execution of a +.Dq t +function. +If no label is specified, branch to the end of the script. +.Pp +.It [2addr]w Em file +Append the pattern space to the +.Em file . +.Pp +.It [2addr]x +Swap the contents of the pattern and hold spaces. +.Pp +.It [2addr]y/string1/string2/ +Replace all occurrences of characters in +.Em string1 +in the pattern space with the corresponding characters from +.Em string2 . +Any character other than a backslash or newline can be used instead of +a slash to delimit the strings. +Within +.Em string1 +and +.Em string2 , +a backslash followed by any character other than a newline is that literal +character, and a backslash followed by an ``n'' is replaced by a newline +character. +.Pp +.It [2addr]!function +.It [2addr]!function-list +Apply the function or function-list only to the lines that are +.Em not +selected by the address(es). +.Pp +.It [0addr]:label +This function does nothing; it bears a label to which the +.Dq b +and +.Dq t +commands may branch. +.Pp +.It [1addr]= +Write the line number to the standard output followed by a newline +character. +.Pp +.It [0addr] +Empty lines are ignored. +.Pp +.It [0addr]# +The +.Dq # +and the remainder of the line are ignored (treated as a comment), with +the single exception that if the first two characters in the file are +.Dq #n , +the default output is suppressed. +This is the same as specifying the +.Fl n +option on the command line. +.El +.Sh ENVIRONMENT +The +.Ev COLUMNS , LANG , LC_ALL , LC_CTYPE +and +.Ev LC_COLLATE +environment variables affect the execution of +.Nm +as described in +.Xr environ 7 . +.Sh EXIT STATUS +.Ex -std +.Sh SEE ALSO +.Xr awk 1 , +.Xr ed 1 , +.Xr grep 1 , +.Xr regex 3 , +.Xr re_format 7 +.Sh STANDARDS +The +.Nm +utility is expected to be a superset of the +.St -p1003.2 +specification. +.Pp +The +.Fl a , E , I , +and +.Fl i +options, the prefixing +.Dq \&+ +in the second member of an address range, +as well as the +.Dq I +flag to the address regular expression and substitution command are +non-standard +.Fx +extensions and may not be available on other operating systems. +.Sh HISTORY +A +.Nm +command, written by +.An L. E. McMahon , +appeared in +.At v7 . +.Sh AUTHORS +.An "Diomidis D. Spinellis" Aq dds@FreeBSD.org +.Sh BUGS +Multibyte characters containing a byte with value 0x5C +.Tn ( ASCII +.Ql \e ) +may be incorrectly treated as line continuation characters in arguments to the +.Dq a , +.Dq c +and +.Dq i +commands. +Multibyte characters cannot be used as delimiters with the +.Dq s +and +.Dq y +commands.