1 /* sed.c - stream editor. Thing that does s/// and other stuff.
3 * Copyright 2014 Rob Landley <rob@landley.net>
5 * See http://pubs.opengroup.org/onlinepubs/9699919799/utilities/sed.html
7 * TODO: lines > 2G could signed int wrap length counters. Not just getline()
10 USE_SED(NEWTOY(sed, "(version)e*f*inr", TOYFLAG_USR|TOYFLAG_BIN|TOYFLAG_LOCALE))
16 usage: sed [-inr] [-e SCRIPT]...|SCRIPT [-f SCRIPT_FILE]... [FILE...]
18 Stream editor. Apply one or more editing SCRIPTs to each line of input
19 (from FILE or stdin) producing output (by default to stdout).
22 -f add contents of SCRIPT_FILE to list
23 -i Edit each file in place.
24 -n No default output. (Use the p command to output matched lines.)
25 -r Use extended regular expression syntax.
26 -s Treat input files separately (implied by -i)
28 A SCRIPT is a series of one or more COMMANDs separated by newlines or
29 semicolons. All -e SCRIPTs are concatenated together as if separated
30 by newlines, followed by all lines from -f SCRIPT_FILEs, in order.
31 If no -e or -f SCRIPTs are specified, the first argument is the SCRIPT.
33 Each COMMAND may be preceded by an address which limits the command to
34 apply only to the specified line(s). Commands without an address apply to
35 every line. Addresses are of the form:
37 [ADDRESS[,ADDRESS]]COMMAND
39 The ADDRESS may be a decimal line number (starting at 1), a /regular
40 expression/ within a pair of forward slashes, or the character "$" which
41 matches the last line of input. (In -s or -i mode this matches the last
42 line of each file, otherwise just the last line of the last file.) A single
43 address matches one line, a pair of comma separated addresses match
44 everything from the first address to the second address (inclusive). If
45 both addresses are regular expressions, more than one range of lines in
48 REGULAR EXPRESSIONS in sed are started and ended by the same character
49 (traditionally / but anything except a backslash or a newline works).
50 Backslashes may be used to escape the delimiter if it occurs in the
51 regex, and for the usual printf escapes (\abcefnrtv and octal, hex,
52 and unicode). An empty regex repeats the previous one. ADDRESS regexes
53 (above) require the first delimeter to be escaped with a backslash when
54 it isn't a forward slash (to distinguish it from the COMMANDs below).
56 Sed mostly operates on individual lines one at a time. It reads each line,
57 processes it, and either writes it to the output or discards it before
58 reading the next line. Sed can remember one additional line in a separate
59 buffer (using the h, H, g, G, and x commands), and can read the next line
60 of input early (using the n and N command), but other than that command
61 scripts operate on individual lines of text.
63 Each COMMAND starts with a single character. The following commands take
66 { Start a new command block, continuing until a corresponding "}".
67 Command blocks may nest. If the block has an address, commands within
68 the block are only run for lines within the block's address range.
70 } End command block (this command cannot have an address)
72 d Delete this line and move on to the next one
73 (ignores remaining COMMANDs)
75 D Delete one line of input and restart command SCRIPT (same as "d"
76 unless you've glued lines together with "N" or similar)
78 g Get remembered line (overwriting current line)
80 G Get remembered line (appending to current line)
82 h Remember this line (overwriting remembered line)
84 H Remember this line (appending to remembered line, if any)
86 l Print line, escaping \abfrtv (but not newline), octal escaping other
87 nonprintable characters, wrapping lines to terminal width with a
88 backslash, and appending $ to actual end of line.
90 n Print default output and read next line, replacing current line
91 (If no next line available, quit processing script)
93 N Append next line of input to this line, separated by a newline
94 (This advances the line counter for address matching and "=", if no
95 next line available quit processing script without default output)
99 P Print this line up to first newline (from "N")
101 q Quit (print default output, no more commands processed or lines read)
103 x Exchange this line with remembered line (overwrite in both directions)
105 = Print the current line number (followed by a newline)
107 The following commands (may) take an argument. The "text" arguments (to
108 the "a", "b", and "c" commands) may end with an unescaped "\" to append
109 the next line (for which leading whitespace is not skipped), and also
110 treat ";" as a literal character (use "\;" instead).
112 a [text] Append text to output before attempting to read next line
114 b [label] Branch, jumps to :label (or with no label, to end of SCRIPT)
116 c [text] Delete line, output text at end of matching address range
117 (ignores remaining COMMANDs)
121 r [file] Append contents of file to output before attempting to read
124 s/S/R/F Search for regex S, replace matched text with R using flags F.
125 The first character after the "s" (anything but newline or
126 backslash) is the delimiter, escape with \ to use normally.
128 The replacement text may contain "&" to substitute the matched
129 text (escape it with backslash for a literal &), or \1 through
130 \9 to substitute a parenthetical subexpression in the regex.
131 You can also use the normal backslash escapes such as \n and
132 a backslash at the end of the line appends the next line.
136 [0-9] A number, substitute only that occurrence of pattern
137 g Global, substitute all occurrences of pattern
138 i Ignore case when matching
139 p Print the line if match was found and replaced
140 w [file] Write (append) line to file if match replaced
142 t [label] Test, jump to :label only if an "s" command found a match in
143 this line since last test (replacing with same text counts)
145 T [label] Test false, jump only if "s" hasn't found a match.
147 w [file] Write (append) line to file
149 y/old/new/ Change each character in 'old' to corresponding character
150 in 'new' (with standard backslash escapes, delimiter can be
151 any repeated character except \ or \n)
153 : [label] Labeled target for jump commands
155 # Comment, ignore rest of this line of SCRIPT
157 Deviations from posix: allow extended regular expressions with -r,
158 editing in place with -i, separate with -s, printf escapes in text, line
159 continuations, semicolons after all commands, 2-address anywhere an
160 address is allowed, "T" command, multiline continuations for [abc],
161 \; to end [abc] argument before end of line.
171 // processed pattern list
172 struct double_list *pattern;
174 char *nextline, *remember;
175 void *restart, *lastregex;
176 long nextlen, rememberlen, count;
182 struct step *next, *prev;
184 // Begin and end of each match
186 int rmatch[2], arg1, arg2, w; // offsets because remalloc()
187 unsigned not, hit, sflags;
191 // Write out line with potential embedded NUL, handling eol/noeol
192 static int emit(char *line, long len, int eol)
194 int l, old = line[len];
196 if (TT.noeol && !writeall(TT.fdout, "\n", 1)) return 1;
197 if (eol) line[len++] = '\n';
199 TT.noeol = len && !eol;
200 l = writeall(TT.fdout, line, len);
201 if (eol) line[len-1] = old;
203 perror_msg("short write");
211 // Do regex matching handling embedded NUL bytes in string. Note that
212 // neither the pattern nor the match can currently include NUL bytes
213 // (even with wildcards) and string must be null terminated.
214 static int ghostwheel(regex_t *preg, char *string, long len, int nmatch,
215 regmatch_t pmatch[], int eflags)
227 while (s[ll] && ll<len) ll++;
229 rc = regexec(preg, s, nmatch, pmatch, eflags);
231 for (rc = 0; rc<nmatch && pmatch[rc].rm_so!=-1; rc++) {
232 pmatch[rc].rm_so += s-string;
233 pmatch[rc].rm_eo += s-string;
238 if (ll==len) return rc;
245 // Extend allocation to include new string, with newline between if newlen<0
247 static char *extend_string(char **old, char *new, int oldlen, int newlen)
249 int newline = newlen < 0;
252 if (newline) newlen = -newlen;
253 s = *old = xrealloc(*old, oldlen+newlen+newline+1);
254 if (newline) s[oldlen++] = '\n';
255 memcpy(s+oldlen, new, newlen);
256 s[oldlen+newlen] = 0;
258 return s+oldlen+newlen+1;
261 // An empty regex repeats the previous one
262 void *get_regex(void *trump, int offset)
265 if (!TT.lastregex) error_exit("no previous regex");
269 return TT.lastregex = offset+(char *)trump;
272 // Apply pattern to line from input file
273 static void walk_pattern(char **pline, long plen)
276 struct append *next, *prev;
280 char *line = TT.nextline;
281 long len = TT.nextlen;
283 int eol = 0, tea = 0;
285 // Grab next line for deferred processing (EOF detection: we get a NULL
286 // pline at EOF to flush last line). Note that only end of _last_ input
287 // file matches $ (unless we're doing -i).
291 TT.nextline = *pline;
296 if (!line || !len) return;
297 if (line[len-1] == '\n') line[--len] = eol++;
300 logrus = TT.restart ? TT.restart : (void *)TT.pattern;
304 char *str, c = logrus->c;
306 // Have we got a line or regex matching range for this rule?
307 if (*logrus->lmatch || *logrus->rmatch) {
311 // In a match that might end?
313 if (!(lm = logrus->lmatch[1])) {
314 if (!logrus->rmatch[1]) logrus->hit = 0;
316 void *rm = get_regex(logrus, logrus->rmatch[1]);
318 // regex match end includes matching line, so defer deactivation
319 if (line && !ghostwheel(rm, line, len, 0, 0, 0)) miss = 1;
321 } else if (lm > 0 && lm < TT.count) logrus->hit = 0;
323 // Start a new match?
325 if (!(lm = *logrus->lmatch)) {
326 void *rm = get_regex(logrus, *logrus->rmatch);
328 if (line && !ghostwheel(rm, line, len, 0, 0, 0)) logrus->hit++;
329 } else if (lm == TT.count || (lm == -1 && !pline)) logrus->hit++;
331 if (!logrus->lmatch[1] && !logrus->rmatch[1]) miss = 1;
335 lm = !(logrus->hit ^ logrus->not);
337 // Deferred disable from regex end match
338 if (miss || logrus->lmatch[1] == TT.count) logrus->hit = 0;
341 // Handle skipping curly bracket command group
346 logrus = logrus->next;
347 if (logrus->c == '{') curly++;
348 if (logrus->c == '}') curly--;
351 logrus = logrus->next;
356 // A deleted line can still update line match state for later commands
358 logrus = logrus->next;
364 if (c=='a' || c=='r') {
365 struct append *a = xzalloc(sizeof(struct append));
366 a->str = logrus->arg1+(char *)logrus;
368 dlist_add_nomalloc((void *)&append, (void *)a);
369 } else if (c=='b' || c=='t' || c=='T') {
372 if (c != 'b') tea = 0;
373 if (c=='b' || t^(c=='T')) {
374 if (!logrus->arg1) break;
375 str = logrus->arg1+(char *)logrus;
376 for (logrus = (void *)TT.pattern; logrus; logrus = logrus->next)
377 if (logrus->c == ':' && !strcmp(logrus->arg1+(char *)logrus, str))
379 if (!logrus) error_exit("no :%s", str);
382 str = logrus->arg1+(char *)logrus;
383 if (!logrus->hit) emit(str, strlen(str), 1);
392 // Delete up to \n or end of buffer
394 while ((str-line)<len) if (*(str++) == '\n') break;
396 memmove(line, str, len);
398 // if "delete" blanks line, disable further processing
399 // otherwise trim and restart script
405 logrus = (void *)TT.pattern;
410 line = xstrdup(TT.remember);
411 len = TT.rememberlen;
413 line = xrealloc(line, len+TT.rememberlen+2);
415 memcpy(line+len, TT.remember, TT.rememberlen);
416 line[len += TT.rememberlen] = 0;
419 TT.remember = xstrdup(line);
420 TT.rememberlen = len;
422 TT.remember = xrealloc(TT.remember, TT.rememberlen+len+2);
423 TT.remember[TT.rememberlen++] = '\n';
424 memcpy(TT.remember+TT.rememberlen, line, len);
425 TT.remember[TT.rememberlen += len] = 0;
427 str = logrus->arg1+(char *)logrus;
428 emit(str, strlen(str), 1);
433 terminal_size(&TT.xx, 0);
434 if (!TT.xx) TT.xx = 80;
435 if (TT.xx > sizeof(toybuf)-10) TT.xx = sizeof(toybuf)-10;
436 if (TT.xx > 4) TT.xx -= 4;
439 for (i = off = 0; i<len; i++) {
441 toybuf[off++] = '\\';
442 emit(toybuf, off, 1);
445 x = stridx("\\\a\b\f\r\t\v", line[i]);
447 toybuf[off++] = '\\';
448 toybuf[off++] = "\\abfrtv"[x];
449 } else if (line[i] >= ' ') toybuf[off++] = line[i];
450 else off += sprintf(toybuf+off, "\\%03o", line[i]);
453 emit(toybuf, off, 1);
455 TT.restart = logrus->next;
459 // Can't just grab next line because we could have multiple N and
460 // we need to actually read ahead to get N;$p EOF detection right.
462 TT.restart = logrus->next;
463 extend_string(&line, TT.nextline, len, -TT.nextlen);
466 TT.nextlen += len + 1;
470 // Pending append goes out right after N
472 } else if (c=='p' || c=='P') {
473 char *l = (c=='P') ? strchr(line, '\n') : 0;
475 if (emit(line, l ? l-line : len, eol)) break;
477 if (pline) *pline = (void *)1;
484 char *rline = line, *new = logrus->arg2 + (char *)logrus, *swap, *rswap;
485 regmatch_t *match = (void *)toybuf;
486 regex_t *reg = get_regex(logrus, logrus->arg1);
487 int mflags = 0, count = 0, zmatch = 1, rlen = len, mlen, off, newlen;
489 // Find match in remaining line (up to remaining len)
490 while (!ghostwheel(reg, rline, rlen, 10, match, mflags)) {
493 // Zero length matches don't count immediately after a previous match
494 mlen = match[0].rm_eo-match[0].rm_so;
495 if (!mlen && !zmatch) {
502 // If we're replacing only a specific match, skip if this isn't it
503 off = logrus->sflags>>3;
504 if (off && off != ++count) {
505 rline += match[0].rm_eo;
506 rlen -= match[0].rm_eo;
510 // The fact getline() can allocate unbounded amounts of memory is
511 // a bigger issue, but while we're here check for integer overflow
512 if (match[0].rm_eo > INT_MAX) perror_exit(0);
514 // newlen = strlen(new) but with \1 and & and printf escapes
515 for (off = newlen = 0; new[off]; off++) {
518 if (new[off] == '&') cc = 0;
519 else if (new[off] == '\\') cc = new[++off] - '0';
520 if (cc < 0 || cc > 9) {
524 newlen += match[cc].rm_eo-match[cc].rm_so;
527 // Allocate new size, copy start/end around match. (Can't extend in
528 // place because backrefs may refer to text after it's overwritten.)
530 swap = xmalloc(len+1);
531 rswap = swap+(rline-line)+match[0].rm_so;
532 memcpy(swap, line, (rline-line)+match[0].rm_so);
533 memcpy(rswap+newlen, rline+match[0].rm_eo, (rlen -= match[0].rm_eo)+1);
535 // copy in new replacement text
536 for (off = mlen = 0; new[off]; off++) {
539 if (new[off] == '\\') {
540 cc = new[++off] - '0';
542 if (!(rswap[mlen++] = unescape(new[off])))
543 rswap[mlen-1] = new[off];
546 } else if (match[cc].rm_so == -1) error_exit("no s//\\%d/", cc);
547 } else if (new[off] != '&') {
548 rswap[mlen++] = new[off];
553 ll = match[cc].rm_eo-match[cc].rm_so;
554 memcpy(rswap+mlen, rline+match[cc].rm_so, ll);
558 rline = rswap+newlen;
562 // Stop after first substitution unless we have flag g
563 if (!(logrus->sflags & 2)) break;
568 if (logrus->sflags & 4) emit(line, len, eol);
571 if (logrus->w) goto writenow;
578 // Swap out emit() context
582 // We save filehandle and newline status before filename
583 name = logrus->w + (char *)logrus;
584 memcpy(&TT.fdout, name, 4);
586 TT.noeol = *(name++);
588 // write, then save/restore context
589 if (emit(line, len, eol))
590 perror_exit("w '%s'", logrus->arg1+(char *)logrus);
591 *(--name) = TT.noeol;
595 long swap = TT.rememberlen;
600 TT.rememberlen = len;
603 char *from, *to = (char *)logrus;
606 from = to+logrus->arg1;
609 for (i = 0; i < len; i++) {
610 j = stridx(from, line[i]);
611 if (j != -1) line[i] = to[j];
614 sprintf(toybuf, "%ld", TT.count);
615 emit(toybuf, strlen(toybuf), 1);
618 logrus = logrus->next;
621 if (line && !(toys.optflags & FLAG_n)) emit(line, len, eol);
626 if (dlist_terminate(append)) while (append) {
627 struct append *a = append->next;
630 int fd = open(append->str, O_RDONLY);
632 // Force newline if noeol pending
634 if (TT.noeol) xwrite(TT.fdout, "\n", 1);
636 xsendfile(fd, TT.fdout);
639 } else emit(append->str, strlen(append->str), 1);
645 // Genericish function, can probably get moved to lib.c
647 // Iterate over lines in file, calling function. Function can write 0 to
648 // the line pointer if they want to keep it, or 1 to terminate processing,
649 // otherwise line is freed. Passed file descriptor is closed at the end.
650 static void do_lines(int fd, char *name, void (*call)(char **pline, long len))
652 FILE *fp = fd ? xfdopen(fd, "r") : stdin;
658 len = getline(&line, (void *)&len, fp);
661 if (line == (void *)1) break;
669 static void do_sed(int fd, char *name)
671 int i = toys.optflags & FLAG_i;
677 if (!fd && *name=='-') {
678 error_msg("-i on stdin");
681 TT.fdout = copy_tempfile(fd, name, &tmp);
683 for (primal = (void *)TT.pattern; primal; primal = primal->next)
686 do_lines(fd, name, walk_pattern);
689 replace_tempfile(-1, TT.fdout, &tmp);
692 TT.nextlen = TT.noeol = 0;
696 // Copy chunk of string between two delimiters, converting printf escapes.
697 // returns processed copy of string (0 if error), *pstr advances to next
698 // unused char. if delim (or *delim) is 0 uses/saves starting char as delimiter
699 // if regxex, ignore delimiter in [ranges]
700 static char *unescape_delimited_string(char **pstr, char *delim, int regex)
702 char *to, *from, mode = 0, d;
705 if (!delim || !*delim) {
706 if (!(d = *(from++))) return 0;
707 if (d == '\\') d = *(from++);
708 if (!d || d == '\\') return 0;
709 if (delim) *delim = d;
711 to = delim = xmalloc(strlen(*pstr)+1);
713 while (mode || *from != d) {
720 // delimiter in regex character range doesn't count
723 if (from[1] == ']') *(to++) = *(from++);
724 } else if (mode && *from == ']') mode = 0;
725 else if (*from == '\\') {
726 if (!from[1]) return 0;
728 // Check escaped end delimiter before printf style escapes.
729 if (from[1] == d) from++;
730 else if (from[1]=='\\') *(to++) = *(from++);
732 char c = unescape(from[1]);
738 } else if (from[1]) *(to++) = *(from++);
749 // Translate primal pattern into walkable form.
750 static void jewel_of_judgement(char **pline, long len)
752 struct step *corwin = (void *)TT.pattern;
753 char *line, *reg, c, *errstart;
756 line = errstart = pline ? *pline : "";
758 // Append additional line to pattern argument string?
759 // We temporarily repurpose "hit" to indicate line continuations
760 if (corwin && corwin->prev->hit) {
761 if (!*pline) error_exit("unfinished %c", corwin->prev->c);;
762 // Remove half-finished entry from list so remalloc() doesn't confuse it
763 TT.pattern = TT.pattern->prev;
764 corwin = dlist_pop(&TT.pattern);
766 reg = (char *)corwin;
767 reg += corwin->arg1 + strlen(reg + corwin->arg1);
769 // Resume parsing for 'a' or 's' command
770 if (corwin->hit < 256) goto resume_s;
774 // Loop through commands in line
778 if (corwin) dlist_add_nomalloc(&TT.pattern, (void *)corwin);
781 while (isspace(*line) || *line == ';') line++;
782 if (*line == '#') while (*line && *line != '\n') line++;
788 memset(toybuf, 0, sizeof(struct step));
789 corwin = (void *)toybuf;
790 reg = toybuf + sizeof(struct step);
792 // Parse address range (if any)
793 for (i = 0; i < 2; i++) {
794 if (*line == ',') line++;
797 if (isdigit(*line)) corwin->lmatch[i] = strtol(line, &line, 0);
798 else if (*line == '$') {
799 corwin->lmatch[i] = -1;
801 } else if (*line == '/' || *line == '\\') {
804 if (!(s = unescape_delimited_string(&line, 0, 1))) goto brand;
805 if (!*s) corwin->rmatch[i] = 0;
807 xregcomp((void *)reg, s, (toys.optflags & FLAG_r)*REG_EXTENDED);
808 corwin->rmatch[i] = reg-toybuf;
809 reg += sizeof(regex_t);
815 while (isspace(*line)) line++;
818 while (*line == '!') {
822 while (isspace(*line)) line++;
824 c = corwin->c = *(line++);
825 if (strchr("}:", c) && i) break;
826 if (strchr("aiqr=", c) && i>1) break;
828 // Add step to pattern
829 corwin = xmalloc(reg-toybuf);
830 memcpy(corwin, toybuf, reg-toybuf);
831 reg = (reg-toybuf) + (char *)corwin;
833 // Parse arguments by command type
834 if (c == '{') TT.nextlen++;
836 if (!TT.nextlen--) break;
837 } else if (c == 's') {
838 char *fiona, delim = 0;
840 // s/pattern/replacement/flags
842 // line continuations use arg1, so we fill out arg2 first (since the
843 // regex part can't be multiple lines) and swap them back later.
845 // get pattern (just record, we parse it later)
846 corwin->arg2 = reg - (char *)corwin;
847 if (!(TT.remember = unescape_delimited_string(&line, &delim, 1)))
850 reg += sizeof(regex_t);
851 corwin->arg1 = reg-(char *)corwin;
854 // get replacement - don't replace escapes because \1 and \& need
855 // processing later, after we replace \\ with \ we can't tell \\1 from \1
857 while (*fiona != corwin->hit) {
859 if (*fiona++ == '\\') {
860 if (!*fiona || *fiona == '\n') {
868 reg = extend_string((void *)&corwin, line, reg-(char *)corwin,fiona-line);
870 // line continuation? (note: '\n' can't be a valid delim).
871 if (*line == corwin->hit) corwin->hit = 0;
873 if (!*line) continue;
879 // swap arg1/arg2 so they're back in order arguments occur.
881 corwin->arg1 = corwin->arg2;
885 for (line++; *line; line++) {
888 if (isspace(*line) && *line != '\n') continue;
890 if (0 <= (l = stridx("igp", *line))) corwin->sflags |= 1<<l;
891 else if (!(corwin->sflags>>3) && 0<(l = strtol(line, &line, 10))) {
892 corwin->sflags |= l << 3;
897 // We deferred actually parsing the regex until we had the s///i flag
898 // allocating the space was done by extend_string() above
899 if (!*TT.remember) corwin->arg1 = 0;
900 else xregcomp((void *)(corwin->arg1 + (char *)corwin), TT.remember,
901 ((toys.optflags & FLAG_r)*REG_EXTENDED)|((corwin->sflags&1)*REG_ICASE));
908 } else if (c == 'w') {
912 // Since s/// uses arg1 and arg2, and w needs a persistent filehandle and
913 // eol status, and to retain the filename for error messages, we'd need
914 // to go up to arg5 just for this. Compromise: dynamically allocate the
915 // filehandle and eol status.
918 while (isspace(*line)) line++;
919 if (!*line) goto brand;
920 for (cc = line; *cc; cc++) if (*cc == '\\' && cc[1] == ';') break;
923 fd = xcreate(line, O_WRONLY|O_CREAT|O_TRUNC, 0644);
926 corwin->w = reg - (char *)corwin;
927 corwin = xrealloc(corwin, corwin->w+(cc-line)+6);
928 reg = corwin->w + (char *)corwin;
933 memcpy(reg, line, delim);
938 if (delim) line += 2;
939 } else if (c == 'y') {
943 if (!(s = unescape_delimited_string(&line, &delim, 0))) goto brand;
944 corwin->arg1 = reg-(char *)corwin;
946 reg = extend_string((void *)&corwin, s, reg-(char *)corwin, len);
948 corwin->arg2 = reg-(char *)corwin;
949 if (!(s = unescape_delimited_string(&line, &delim, 0))) goto brand;
950 if (len != strlen(s)) goto brand;
951 reg = extend_string((void *)&corwin, s, reg-(char*)corwin, len);
953 } else if (strchr("abcirtTw:", c)) {
956 while (isspace(*line) && *line != '\n') line++;
958 // Resume logic differs from 's' case because we don't add a newline
959 // unless it's after something, so we add it on return instead.
963 // Trim whitespace from "b ;" and ": blah " but only first space in "w x "
964 if (!(end = strcspn(line, strchr("btT:", c) ? "; \t\r\n\v\f" : "\n"))) {
965 if (strchr("btT", c)) continue;
966 else if (!corwin->arg1) break;
969 // Extend allocation to include new string. We use offsets instead of
970 // pointers so realloc() moving stuff doesn't break things. Do it
971 // here instead of toybuf so there's no maximum size.
972 if (!corwin->arg1) corwin->arg1 = reg - (char*)corwin;
973 else if ((corwin+1) != (void *)reg) *(reg++) = '\n';
974 reg = extend_string((void *)&corwin, line, reg - (char *)corwin, end);
978 // Line continuation? (Two slightly different input methods, -e with
979 // embedded newline vs -f line by line. Must parse both correctly.)
980 if (!strchr("btT:", c) && line[-1] == '\\') {
981 // backslash only matters if we have an odd number of them
982 for (i = 0; i<end; i++) if (line[-i-1] != '\\') break;
984 // reg is next available space, so reg[-1] is the null terminator
986 if (*line && line[1]) {
990 } else corwin->hit = 256;
994 // Commands that take no arguments
995 } else if (!strchr("{dDgGhHlnNpPqx=", c)) break;
999 // Reminisce about chestnut trees.
1000 error_exit("bad pattern '%s'@%ld (%c)", errstart, line-errstart+1, *line);
1005 struct arg_list *dworkin;
1006 char **args = toys.optargs;
1008 // Lie to autoconf when it asks stupid questions, so configure regexes
1009 // that look for "GNU sed version %f" greater than some old buggy number
1010 // don't fail us for not matching their narrow expectations.
1011 if (toys.optflags & FLAG_version) {
1012 xprintf("This is not GNU sed version 9.0\n");
1016 // Need a pattern. If no unicorns about, fight serpent and take its eye.
1017 if (!TT.e && !TT.f) {
1018 if (!*toys.optargs) error_exit("no pattern");
1019 (TT.e = xzalloc(sizeof(struct arg_list)))->arg = *(args++);
1022 // Option parsing infrastructure can't interlace "-e blah -f blah -e blah"
1023 // so handle all -e, then all -f. (At least the behavior's consistent.)
1025 for (dworkin = TT.e; dworkin; dworkin = dworkin->next)
1026 jewel_of_judgement(&dworkin->arg, strlen(dworkin->arg));
1027 for (dworkin = TT.f; dworkin; dworkin = dworkin->next)
1028 do_lines(xopen(dworkin->arg, O_RDONLY), dworkin->arg, jewel_of_judgement);
1029 jewel_of_judgement(0, 0);
1030 dlist_terminate(TT.pattern);
1031 if (TT.nextlen) error_exit("no }");
1034 TT.remember = xstrdup("");
1036 // Inflict pattern upon input files
1037 loopfiles_rw(args, O_RDONLY, 0, 0, do_sed);
1039 if (!(toys.optflags & FLAG_i)) walk_pattern(0, 0);
1041 // todo: need to close fd when done for TOYBOX_FREE?