1 /* sed.c - stream editor. Thing that does s/// and other stuff.
3 * Copyright 2014 Rob Landley <rob@landley.net>
5 * See http://pubs.opengroup.org/onlinepubs/9699919799/utilities/sed.html
7 * TODO: lines > 2G could wrap signed int length counters. Not just getline()
9 * TODO: make y// handle unicode
10 * TODO: handle error return from emit(), error_msg/exit consistently
11 * What's the right thing to do for -i when write fails? Skip to next?
13 USE_SED(NEWTOY(sed, "(version)e*f*inEr[+Er]", TOYFLAG_USR|TOYFLAG_BIN|TOYFLAG_LOCALE))
19 usage: sed [-inrE] [-e SCRIPT]...|SCRIPT [-f SCRIPT_FILE]... [FILE...]
21 Stream editor. Apply one or more editing SCRIPTs to each line of input
22 (from FILE or stdin) producing output (by default to stdout).
25 -f add contents of SCRIPT_FILE to list
26 -i Edit each file in place.
27 -n No default output. (Use the p command to output matched lines.)
28 -r Use extended regular expression syntax.
30 -s Treat input files separately (implied by -i)
32 A SCRIPT is a series of one or more COMMANDs separated by newlines or
33 semicolons. All -e SCRIPTs are concatenated together as if separated
34 by newlines, followed by all lines from -f SCRIPT_FILEs, in order.
35 If no -e or -f SCRIPTs are specified, the first argument is the SCRIPT.
37 Each COMMAND may be preceded by an address which limits the command to
38 apply only to the specified line(s). Commands without an address apply to
39 every line. Addresses are of the form:
41 [ADDRESS[,ADDRESS]]COMMAND
43 The ADDRESS may be a decimal line number (starting at 1), a /regular
44 expression/ within a pair of forward slashes, or the character "$" which
45 matches the last line of input. (In -s or -i mode this matches the last
46 line of each file, otherwise just the last line of the last file.) A single
47 address matches one line, a pair of comma separated addresses match
48 everything from the first address to the second address (inclusive). If
49 both addresses are regular expressions, more than one range of lines in
52 REGULAR EXPRESSIONS in sed are started and ended by the same character
53 (traditionally / but anything except a backslash or a newline works).
54 Backslashes may be used to escape the delimiter if it occurs in the
55 regex, and for the usual printf escapes (\abcefnrtv and octal, hex,
56 and unicode). An empty regex repeats the previous one. ADDRESS regexes
57 (above) require the first delimeter to be escaped with a backslash when
58 it isn't a forward slash (to distinguish it from the COMMANDs below).
60 Sed mostly operates on individual lines one at a time. It reads each line,
61 processes it, and either writes it to the output or discards it before
62 reading the next line. Sed can remember one additional line in a separate
63 buffer (using the h, H, g, G, and x commands), and can read the next line
64 of input early (using the n and N command), but other than that command
65 scripts operate on individual lines of text.
67 Each COMMAND starts with a single character. The following commands take
70 { Start a new command block, continuing until a corresponding "}".
71 Command blocks may nest. If the block has an address, commands within
72 the block are only run for lines within the block's address range.
74 } End command block (this command cannot have an address)
76 d Delete this line and move on to the next one
77 (ignores remaining COMMANDs)
79 D Delete one line of input and restart command SCRIPT (same as "d"
80 unless you've glued lines together with "N" or similar)
82 g Get remembered line (overwriting current line)
84 G Get remembered line (appending to current line)
86 h Remember this line (overwriting remembered line)
88 H Remember this line (appending to remembered line, if any)
90 l Print line, escaping \abfrtv (but not newline), octal escaping other
91 nonprintable characters, wrapping lines to terminal width with a
92 backslash, and appending $ to actual end of line.
94 n Print default output and read next line, replacing current line
95 (If no next line available, quit processing script)
97 N Append next line of input to this line, separated by a newline
98 (This advances the line counter for address matching and "=", if no
99 next line available quit processing script without default output)
103 P Print this line up to first newline (from "N")
105 q Quit (print default output, no more commands processed or lines read)
107 x Exchange this line with remembered line (overwrite in both directions)
109 = Print the current line number (followed by a newline)
111 The following commands (may) take an argument. The "text" arguments (to
112 the "a", "b", and "c" commands) may end with an unescaped "\" to append
113 the next line (for which leading whitespace is not skipped), and also
114 treat ";" as a literal character (use "\;" instead).
116 a [text] Append text to output before attempting to read next line
118 b [label] Branch, jumps to :label (or with no label, to end of SCRIPT)
120 c [text] Delete line, output text at end of matching address range
121 (ignores remaining COMMANDs)
125 r [file] Append contents of file to output before attempting to read
128 s/S/R/F Search for regex S, replace matched text with R using flags F.
129 The first character after the "s" (anything but newline or
130 backslash) is the delimiter, escape with \ to use normally.
132 The replacement text may contain "&" to substitute the matched
133 text (escape it with backslash for a literal &), or \1 through
134 \9 to substitute a parenthetical subexpression in the regex.
135 You can also use the normal backslash escapes such as \n and
136 a backslash at the end of the line appends the next line.
140 [0-9] A number, substitute only that occurrence of pattern
141 g Global, substitute all occurrences of pattern
142 i Ignore case when matching
143 p Print the line if match was found and replaced
144 w [file] Write (append) line to file if match replaced
146 t [label] Test, jump to :label only if an "s" command found a match in
147 this line since last test (replacing with same text counts)
149 T [label] Test false, jump only if "s" hasn't found a match.
151 w [file] Write (append) line to file
153 y/old/new/ Change each character in 'old' to corresponding character
154 in 'new' (with standard backslash escapes, delimiter can be
155 any repeated character except \ or \n)
157 : [label] Labeled target for jump commands
159 # Comment, ignore rest of this line of SCRIPT
161 Deviations from posix: allow extended regular expressions with -r,
162 editing in place with -i, separate with -s, printf escapes in text, line
163 continuations, semicolons after all commands, 2-address anywhere an
164 address is allowed, "T" command, multiline continuations for [abc],
165 \; to end [abc] argument before end of line.
175 // processed pattern list
176 struct double_list *pattern;
178 char *nextline, *remember;
179 void *restart, *lastregex;
180 long nextlen, rememberlen, count;
185 // Linked list of parsed sed commands. Offset fields indicate location where
186 // regex or string starts, ala offset+(char *)struct, because we remalloc()
187 // these to expand them for multiline inputs, and pointers would have to be
188 // individually adjusted.
191 struct sedcmd *next, *prev;
193 // Begin and end of each match
194 long lmatch[2]; // line number of match
195 int rmatch[2]; // offset of regex struct for prefix matches (/abc/,/def/p)
196 int arg1, arg2, w; // offset of two arguments per command, plus s//w filename
198 unsigned sflags; // s///flag bits: i=1, g=2, p=4
202 // Write out line with potential embedded NUL, handling eol/noeol
203 static int emit(char *line, long len, int eol)
205 int l, old = line[len];
207 if (TT.noeol && !writeall(TT.fdout, "\n", 1)) return 1;
209 if (eol) line[len++] = '\n';
211 l = writeall(TT.fdout, line, len);
212 if (eol) line[len-1] = old;
214 perror_msg("short write");
222 // Do regex matching handling embedded NUL bytes in string. Note that
223 // neither the pattern nor the match can currently include NUL bytes
224 // (even with wildcards) and string must be null terminated at string[len].
225 // But this can find a match after the first NUL.
226 static int regex_null(regex_t *preg, char *string, long len, int nmatch,
227 regmatch_t pmatch[], int eflags)
239 while (s[ll] && ll<len) ll++;
241 rc = regexec(preg, s, nmatch, pmatch, eflags);
243 for (rc = 0; rc<nmatch && pmatch[rc].rm_so!=-1; rc++) {
244 pmatch[rc].rm_so += s-string;
245 pmatch[rc].rm_eo += s-string;
250 if (ll==len) return rc;
257 // Extend allocation to include new string, with newline between if newlen<0
259 static char *extend_string(char **old, char *new, int oldlen, int newlen)
261 int newline = newlen < 0;
264 if (newline) newlen = -newlen;
265 s = *old = xrealloc(*old, oldlen+newlen+newline+1);
266 if (newline) s[oldlen++] = '\n';
267 memcpy(s+oldlen, new, newlen);
268 s[oldlen+newlen] = 0;
270 return s+oldlen+newlen+1;
273 // An empty regex repeats the previous one
274 static void *get_regex(void *trump, int offset)
277 if (!TT.lastregex) error_exit("no previous regex");
281 return TT.lastregex = offset+(char *)trump;
284 // Apply pattern to line from input file
285 static void process_line(char **pline, long plen)
288 struct append *next, *prev;
292 char *line = TT.nextline;
293 long len = TT.nextlen;
294 struct sedcmd *command;
295 int eol = 0, tea = 0;
297 // Grab next line for deferred processing (EOF detection: we get a NULL
298 // pline at EOF to flush last line). Note that only end of _last_ input
299 // file matches $ (unless we're doing -i).
303 TT.nextline = *pline;
308 if (!line || !len) return;
309 if (line[len-1] == '\n') line[--len] = eol++;
312 // The restart-1 is because we added one to make sure it wasn't NULL,
313 // otherwise N as last command would restart script
314 command = TT.restart ? ((struct sedcmd *)TT.restart)-1 : (void *)TT.pattern;
318 char *str, c = command->c;
320 // Have we got a line or regex matching range for this rule?
321 if (*command->lmatch || *command->rmatch) {
325 // In a match that might end?
327 if (!(lm = command->lmatch[1])) {
328 if (!command->rmatch[1]) command->hit = 0;
330 void *rm = get_regex(command, command->rmatch[1]);
332 // regex match end includes matching line, so defer deactivation
333 if (line && !regex_null(rm, line, len, 0, 0, 0)) miss = 1;
335 } else if (lm > 0 && lm < TT.count) command->hit = 0;
337 // Start a new match?
339 if (!(lm = *command->lmatch)) {
340 void *rm = get_regex(command, *command->rmatch);
342 if (line && !regex_null(rm, line, len, 0, 0, 0)) command->hit++;
343 } else if (lm == TT.count || (lm == -1 && !pline)) command->hit++;
345 if (!command->lmatch[1] && !command->rmatch[1]) miss = 1;
349 lm = !(command->hit ^ command->not);
351 // Deferred disable from regex end match
352 if (miss || command->lmatch[1] == TT.count) command->hit = 0;
355 // Handle skipping curly bracket command group
360 command = command->next;
361 if (command->c == '{') curly++;
362 if (command->c == '}') curly--;
365 command = command->next;
370 // A deleted line can still update line match state for later commands
372 command = command->next;
378 if (c=='a' || c=='r') {
379 struct append *a = xzalloc(sizeof(struct append));
380 if (command->arg1) a->str = command->arg1+(char *)command;
382 dlist_add_nomalloc((void *)&append, (void *)a);
383 } else if (c=='b' || c=='t' || c=='T') {
386 if (c != 'b') tea = 0;
387 if (c=='b' || t^(c=='T')) {
388 if (!command->arg1) break;
389 str = command->arg1+(char *)command;
390 for (command = (void *)TT.pattern; command; command = command->next)
391 if (command->c == ':' && !strcmp(command->arg1+(char *)command, str))
393 if (!command) error_exit("no :%s", str);
396 str = command->arg1+(char *)command;
397 if (!command->hit) emit(str, strlen(str), 1);
406 // Delete up to \n or end of buffer
408 while ((str-line)<len) if (*(str++) == '\n') break;
410 memmove(line, str, len);
412 // if "delete" blanks line, disable further processing
413 // otherwise trim and restart script
419 command = (void *)TT.pattern;
424 line = xstrdup(TT.remember);
425 len = TT.rememberlen;
427 line = xrealloc(line, len+TT.rememberlen+2);
429 memcpy(line+len, TT.remember, TT.rememberlen);
430 line[len += TT.rememberlen] = 0;
433 TT.remember = xstrdup(line);
434 TT.rememberlen = len;
436 TT.remember = xrealloc(TT.remember, TT.rememberlen+len+2);
437 TT.remember[TT.rememberlen++] = '\n';
438 memcpy(TT.remember+TT.rememberlen, line, len);
439 TT.remember[TT.rememberlen += len] = 0;
441 str = command->arg1+(char *)command;
442 emit(str, strlen(str), 1);
447 terminal_size(&TT.xx, 0);
448 if (!TT.xx) TT.xx = 80;
449 if (TT.xx > sizeof(toybuf)-10) TT.xx = sizeof(toybuf)-10;
450 if (TT.xx > 4) TT.xx -= 4;
453 for (i = off = 0; i<len; i++) {
455 toybuf[off++] = '\\';
456 emit(toybuf, off, 1);
459 x = stridx("\\\a\b\f\r\t\v", line[i]);
461 toybuf[off++] = '\\';
462 toybuf[off++] = "\\abfrtv"[x];
463 } else if (line[i] >= ' ') toybuf[off++] = line[i];
464 else off += sprintf(toybuf+off, "\\%03o", line[i]);
467 emit(toybuf, off, 1);
469 TT.restart = command->next+1;
473 // Can't just grab next line because we could have multiple N and
474 // we need to actually read ahead to get N;$p EOF detection right.
476 TT.restart = command->next+1;
477 extend_string(&line, TT.nextline, len, -TT.nextlen);
480 TT.nextlen += len + 1;
484 // Pending append goes out right after N
486 } else if (c=='p' || c=='P') {
487 char *l = (c=='P') ? strchr(line, '\n') : 0;
489 if (emit(line, l ? l-line : len, eol)) break;
491 if (pline) *pline = (void *)1;
498 char *rline = line, *new = command->arg2 + (char *)command, *swap, *rswap;
499 regmatch_t *match = (void *)toybuf;
500 regex_t *reg = get_regex(command, command->arg1);
501 int mflags = 0, count = 0, zmatch = 1, rlen = len, mlen, off, newlen;
503 // Find match in remaining line (up to remaining len)
504 while (!regex_null(reg, rline, rlen, 10, match, mflags)) {
507 // Zero length matches don't count immediately after a previous match
508 mlen = match[0].rm_eo-match[0].rm_so;
509 if (!mlen && !zmatch) {
516 // If we're replacing only a specific match, skip if this isn't it
517 off = command->sflags>>3;
518 if (off && off != ++count) {
519 rline += match[0].rm_eo;
520 rlen -= match[0].rm_eo;
524 // The fact getline() can allocate unbounded amounts of memory is
525 // a bigger issue, but while we're here check for integer overflow
526 if (match[0].rm_eo > INT_MAX) perror_exit(0);
528 // newlen = strlen(new) but with \1 and & and printf escapes
529 for (off = newlen = 0; new[off]; off++) {
532 if (new[off] == '&') cc = 0;
533 else if (new[off] == '\\') cc = new[++off] - '0';
534 if (cc < 0 || cc > 9) {
538 newlen += match[cc].rm_eo-match[cc].rm_so;
541 // Allocate new size, copy start/end around match. (Can't extend in
542 // place because backrefs may refer to text after it's overwritten.)
544 swap = xmalloc(len+1);
545 rswap = swap+(rline-line)+match[0].rm_so;
546 memcpy(swap, line, (rline-line)+match[0].rm_so);
547 memcpy(rswap+newlen, rline+match[0].rm_eo, (rlen -= match[0].rm_eo)+1);
549 // copy in new replacement text
550 for (off = mlen = 0; new[off]; off++) {
553 if (new[off] == '\\') {
554 cc = new[++off] - '0';
556 if (!(rswap[mlen++] = unescape(new[off])))
557 rswap[mlen-1] = new[off];
560 } else if (match[cc].rm_so == -1) error_exit("no s//\\%d/", cc);
561 } else if (new[off] != '&') {
562 rswap[mlen++] = new[off];
567 ll = match[cc].rm_eo-match[cc].rm_so;
568 memcpy(rswap+mlen, rline+match[cc].rm_so, ll);
572 rline = rswap+newlen;
576 // Stop after first substitution unless we have flag g
577 if (!(command->sflags & 2)) break;
582 if (command->sflags & 4) emit(line, len, eol);
585 if (command->w) goto writenow;
592 // Swap out emit() context
596 // We save filehandle and newline status before filename
597 name = command->w + (char *)command;
598 memcpy(&TT.fdout, name, 4);
600 TT.noeol = *(name++);
602 // write, then save/restore context
603 if (emit(line, len, eol))
604 perror_exit("w '%s'", command->arg1+(char *)command);
605 *(--name) = TT.noeol;
609 long swap = TT.rememberlen;
614 TT.rememberlen = len;
617 char *from, *to = (char *)command;
620 from = to+command->arg1;
623 for (i = 0; i < len; i++) {
624 j = stridx(from, line[i]);
625 if (j != -1) line[i] = to[j];
628 sprintf(toybuf, "%ld", TT.count);
629 emit(toybuf, strlen(toybuf), 1);
632 command = command->next;
635 if (line && !(toys.optflags & FLAG_n)) emit(line, len, eol);
638 if (dlist_terminate(append)) while (append) {
639 struct append *a = append->next;
642 int fd = open(append->str, O_RDONLY);
644 // Force newline if noeol pending
646 if (TT.noeol) xwrite(TT.fdout, "\n", 1);
648 xsendfile(fd, TT.fdout);
651 } else if (append->str) emit(append->str, strlen(append->str), 1);
652 else emit(line, 0, 0);
659 // Genericish function, can probably get moved to lib.c
661 // Iterate over lines in file, calling function. Function can write 0 to
662 // the line pointer if they want to keep it, or 1 to terminate processing,
663 // otherwise line is freed. Passed file descriptor is closed at the end.
664 static void do_lines(int fd, void (*call)(char **pline, long len))
666 FILE *fp = fd ? xfdopen(fd, "r") : stdin;
672 len = getline(&line, (void *)&len, fp);
675 if (line == (void *)1) break;
683 // Callback called on each input file
684 static void do_sed(int fd, char *name)
686 int i = toys.optflags & FLAG_i;
690 struct sedcmd *command;
692 if (!fd && !strcmp(name, "-")) {
693 error_msg("-i on stdin");
696 TT.fdout = copy_tempfile(fd, name, &tmp);
698 for (command = (void *)TT.pattern; command; command = command->next)
701 do_lines(fd, process_line);
704 replace_tempfile(-1, TT.fdout, &tmp);
707 TT.nextlen = TT.noeol = 0;
711 // Copy chunk of string between two delimiters, converting printf escapes.
712 // returns processed copy of string (0 if error), *pstr advances to next
713 // unused char. if delim (or *delim) is 0 uses/saves starting char as delimiter
714 // if regxex, ignore delimiter in [ranges]
715 static char *unescape_delimited_string(char **pstr, char *delim)
717 char *to, *from, mode = 0, d;
720 if (!delim || !*delim) {
721 if (!(d = *(from++))) return 0;
722 if (d == '\\') d = *(from++);
723 if (!d || d == '\\') return 0;
724 if (delim) *delim = d;
726 to = delim = xmalloc(strlen(*pstr)+1);
728 while (mode || *from != d) {
729 if (!*from) return 0;
731 // delimiter in regex character range doesn't count
732 if (!mode && *from == '[') {
734 if (from[1]=='-' || from[1]==']') *(to++) = *(from++);
735 } else if (mode && *from == ']') mode = 0;
736 // Length 1 range (X-X with same X) is "undefined" and makes regcomp err,
737 // but the perl build does it, so we need to filter it out.
738 else if (mode && *from == '-' && from[-1] == from[1]) {
741 } else if (*from == '\\') {
742 if (!from[1]) return 0;
744 // Check escaped end delimiter before printf style escapes.
745 if (from[1] == d) from++;
746 else if (from[1]=='\\') *(to++) = *(from++);
748 char c = unescape(from[1]);
754 } else if (!mode) *(to++) = *(from++);
765 // Translate pattern strings into command structures. Each command structure
766 // is a single allocation (which requires some math and remalloc at times).
767 static void parse_pattern(char **pline, long len)
769 struct sedcmd *command = (void *)TT.pattern;
770 char *line, *reg, c, *errstart;
773 line = errstart = pline ? *pline : "";
774 if (len && line[len-1]=='\n') line[--len] = 0;
776 // Append this line to previous multiline command? (hit indicates type.)
777 // During parsing "hit" stores data about line continuations, but in
778 // process_line() it means the match range attached to this command
779 // is active, so processing the continuation must zero it again.
780 if (command && command->prev->hit) {
781 // Remove half-finished entry from list so remalloc() doesn't confuse it
782 TT.pattern = TT.pattern->prev;
783 command = dlist_pop(&TT.pattern);
785 reg = (char *)command;
786 reg += command->arg1 + strlen(reg + command->arg1);
788 // Resume parsing for 'a' or 's' command. (Only two that can do this.)
789 // TODO: using 256 to indicate 'a' means our s/// delimiter can't be
790 // a unicode character.
791 if (command->hit < 256) goto resume_s;
795 // Loop through commands in this line.
799 if (command) dlist_add_nomalloc(&TT.pattern, (void *)command);
801 // If there's no more data on this line, return.
803 while (isspace(*line) || *line == ';') line++;
804 if (*line == '#') while (*line && *line != '\n') line++;
809 // We start by writing data into toybuf. Later we'll allocate the
813 memset(toybuf, 0, sizeof(struct sedcmd));
814 command = (void *)toybuf;
815 reg = toybuf + sizeof(struct sedcmd);
817 // Parse address range (if any)
818 for (i = 0; i < 2; i++) {
819 if (*line == ',') line++;
822 if (isdigit(*line)) command->lmatch[i] = strtol(line, &line, 0);
823 else if (*line == '$') {
824 command->lmatch[i] = -1;
826 } else if (*line == '/' || *line == '\\') {
829 if (!(s = unescape_delimited_string(&line, 0))) goto error;
830 if (!*s) command->rmatch[i] = 0;
832 xregcomp((void *)reg, s, (toys.optflags & FLAG_r)*REG_EXTENDED);
833 command->rmatch[i] = reg-toybuf;
834 reg += sizeof(regex_t);
840 while (isspace(*line)) line++;
843 while (*line == '!') {
847 while (isspace(*line)) line++;
849 c = command->c = *(line++);
850 if (strchr("}:", c) && i) break;
851 if (strchr("aiqr=", c) && i>1) break;
853 // Add step to pattern
854 command = xmemdup(toybuf, reg-toybuf);
855 reg = (reg-toybuf) + (char *)command;
857 // Parse arguments by command type
858 if (c == '{') TT.nextlen++;
860 if (!TT.nextlen--) break;
861 } else if (c == 's') {
862 char *end, delim = 0;
864 // s/pattern/replacement/flags
866 // line continuations use arg1 (back at the start of the function),
867 // so let's fill out arg2 first (since the regex part can't be multiple
868 // lines) and swap them back later.
870 // get pattern (just record, we parse it later)
871 command->arg2 = reg - (char *)command;
872 if (!(TT.remember = unescape_delimited_string(&line, &delim)))
875 reg += sizeof(regex_t);
876 command->arg1 = reg-(char *)command;
877 command->hit = delim;
879 // get replacement - don't replace escapes yet because \1 and \& need
880 // processing later, after we replace \\ with \ we can't tell \\1 from \1
882 while (*end != command->hit) {
883 if (!*end) goto error;
884 if (*end++ == '\\') {
885 if (!*end || *end == '\n') {
893 reg = extend_string((void *)&command, line, reg-(char *)command,end-line);
895 // line continuation? (note: '\n' can't be a valid delim).
896 if (*line == command->hit) command->hit = 0;
898 if (!*line) continue;
904 // swap arg1/arg2 so they're back in order arguments occur.
906 command->arg1 = command->arg2;
910 for (line++; *line; line++) {
913 if (isspace(*line) && *line != '\n') continue;
915 if (0 <= (l = stridx("igp", *line))) command->sflags |= 1<<l;
916 else if (!(command->sflags>>3) && 0<(l = strtol(line, &line, 10))) {
917 command->sflags |= l << 3;
922 // We deferred actually parsing the regex until we had the s///i flag
923 // allocating the space was done by extend_string() above
924 if (!*TT.remember) command->arg1 = 0;
925 else xregcomp((void *)(command->arg1 + (char *)command), TT.remember,
926 ((toys.optflags & FLAG_r)*REG_EXTENDED)|((command->sflags&1)*REG_ICASE));
933 } else if (c == 'w') {
937 // Since s/// uses arg1 and arg2, and w needs a persistent filehandle and
938 // eol status, and to retain the filename for error messages, we'd need
939 // to go up to arg5 just for this. Compromise: dynamically allocate the
940 // filehandle and eol status.
943 while (isspace(*line)) line++;
944 if (!*line) goto error;
945 for (cc = line; *cc; cc++) if (*cc == '\\' && cc[1] == ';') break;
948 fd = xcreate(line, O_WRONLY|O_CREAT|O_TRUNC, 0644);
951 command->w = reg - (char *)command;
952 command = xrealloc(command, command->w+(cc-line)+6);
953 reg = command->w + (char *)command;
958 memcpy(reg, line, delim);
963 if (delim) line += 2;
964 } else if (c == 'y') {
968 if (!(s = unescape_delimited_string(&line, &delim))) goto error;
969 command->arg1 = reg-(char *)command;
971 reg = extend_string((void *)&command, s, reg-(char *)command, len);
973 command->arg2 = reg-(char *)command;
974 if (!(s = unescape_delimited_string(&line, &delim))) goto error;
975 if (len != strlen(s)) goto error;
976 reg = extend_string((void *)&command, s, reg-(char*)command, len);
978 } else if (strchr("abcirtTw:", c)) {
981 // trim leading spaces
982 while (isspace(*line) && *line != '\n') line++;
984 // Resume logic differs from 's' case because we don't add a newline
985 // unless it's after something, so we add it on return instead.
989 // btT: end with space or semicolon, aicrw continue to newline.
990 if (!(end = strcspn(line, strchr(":btT", c) ? "; \t\r\n\v\f" : "\n"))) {
991 // Argument's optional for btT
992 if (strchr("btT", c)) continue;
993 else if (!command->arg1) break;
996 // Extend allocation to include new string. We use offsets instead of
997 // pointers so realloc() moving stuff doesn't break things. Ok to write
998 // \n over NUL terminator because call to extend_string() adds it back.
999 if (!command->arg1) command->arg1 = reg - (char*)command;
1000 else if (*(command->arg1+(char *)command)) *(reg++) = '\n';
1005 reg = extend_string((void *)&command, line, reg - (char *)command, end);
1007 // Recopy data to remove escape sequences and handle line continuation.
1008 if (strchr("aci", c)) {
1010 for (i = end; i; i--) {
1011 if ((*reg++ = *line++)=='\\') {
1013 // escape at end of line: resume if -e escaped literal newline,
1014 // else request callback and resume with next line
1024 if (!(reg[-1] = unescape(*line))) reg[-1] = *line;
1031 // Commands that take no arguments
1032 } else if (!strchr("{dDgGhHlnNpPqx=", c)) break;
1036 error_exit("bad pattern '%s'@%ld (%c)", errstart, line-errstart+1L, *line);
1041 struct arg_list *al;
1042 char **args = toys.optargs;
1044 // Lie to autoconf when it asks stupid questions, so configure regexes
1045 // that look for "GNU sed version %f" greater than some old buggy number
1046 // don't fail us for not matching their narrow expectations.
1047 if (toys.optflags & FLAG_version) {
1048 xprintf("This is not GNU sed version 9.0\n");
1052 // Parse pattern into commands.
1054 // If no -e or -f, first argument is the pattern.
1055 if (!TT.e && !TT.f) {
1056 if (!*toys.optargs) error_exit("no pattern");
1057 (TT.e = xzalloc(sizeof(struct arg_list)))->arg = *(args++);
1060 // Option parsing infrastructure can't interlace "-e blah -f blah -e blah"
1061 // so handle all -e, then all -f. (At least the behavior's consistent.)
1063 for (al = TT.e; al; al = al->next) parse_pattern(&al->arg, strlen(al->arg));
1064 for (al = TT.f; al; al = al->next)
1065 do_lines(strcmp(al->arg, "-") ? xopen(al->arg, O_RDONLY) : 0,parse_pattern);
1066 parse_pattern(0, 0);
1067 dlist_terminate(TT.pattern);
1068 if (TT.nextlen) error_exit("no }");
1071 TT.remember = xstrdup("");
1073 // Inflict pattern upon input files
1074 loopfiles_rw(args, O_RDONLY, 0, 0, do_sed);
1076 if (!(toys.optflags & FLAG_i)) process_line(0, 0);
1078 // todo: need to close fd when done for TOYBOX_FREE?