1 /* sed.c - stream editor. Thing that does s/// and other stuff.
3 * Copyright 2014 Rob Landley <rob@landley.net>
5 * See http://pubs.opengroup.org/onlinepubs/9699919799/utilities/sed.html
7 * TODO: lines > 2G could wrap signed int length counters. Not just getline()
9 * TODO: make y// handle unicode
10 * TODO: handle error return from emit(), error_msg/exit consistently
11 * What's the right thing to do for -i when write fails? Skip to next?
13 USE_SED(NEWTOY(sed, "(version)e*f*inEr[+Er]", TOYFLAG_USR|TOYFLAG_BIN|TOYFLAG_LOCALE))
19 usage: sed [-inrE] [-e SCRIPT]...|SCRIPT [-f SCRIPT_FILE]... [FILE...]
21 Stream editor. Apply one or more editing SCRIPTs to each line of input
22 (from FILE or stdin) producing output (by default to stdout).
25 -f add contents of SCRIPT_FILE to list
26 -i Edit each file in place.
27 -n No default output. (Use the p command to output matched lines.)
28 -r Use extended regular expression syntax.
30 -s Treat input files separately (implied by -i)
32 A SCRIPT is a series of one or more COMMANDs separated by newlines or
33 semicolons. All -e SCRIPTs are concatenated together as if separated
34 by newlines, followed by all lines from -f SCRIPT_FILEs, in order.
35 If no -e or -f SCRIPTs are specified, the first argument is the SCRIPT.
37 Each COMMAND may be preceded by an address which limits the command to
38 apply only to the specified line(s). Commands without an address apply to
39 every line. Addresses are of the form:
41 [ADDRESS[,ADDRESS]]COMMAND
43 The ADDRESS may be a decimal line number (starting at 1), a /regular
44 expression/ within a pair of forward slashes, or the character "$" which
45 matches the last line of input. (In -s or -i mode this matches the last
46 line of each file, otherwise just the last line of the last file.) A single
47 address matches one line, a pair of comma separated addresses match
48 everything from the first address to the second address (inclusive). If
49 both addresses are regular expressions, more than one range of lines in
52 REGULAR EXPRESSIONS in sed are started and ended by the same character
53 (traditionally / but anything except a backslash or a newline works).
54 Backslashes may be used to escape the delimiter if it occurs in the
55 regex, and for the usual printf escapes (\abcefnrtv and octal, hex,
56 and unicode). An empty regex repeats the previous one. ADDRESS regexes
57 (above) require the first delimeter to be escaped with a backslash when
58 it isn't a forward slash (to distinguish it from the COMMANDs below).
60 Sed mostly operates on individual lines one at a time. It reads each line,
61 processes it, and either writes it to the output or discards it before
62 reading the next line. Sed can remember one additional line in a separate
63 buffer (using the h, H, g, G, and x commands), and can read the next line
64 of input early (using the n and N command), but other than that command
65 scripts operate on individual lines of text.
67 Each COMMAND starts with a single character. The following commands take
70 { Start a new command block, continuing until a corresponding "}".
71 Command blocks may nest. If the block has an address, commands within
72 the block are only run for lines within the block's address range.
74 } End command block (this command cannot have an address)
76 d Delete this line and move on to the next one
77 (ignores remaining COMMANDs)
79 D Delete one line of input and restart command SCRIPT (same as "d"
80 unless you've glued lines together with "N" or similar)
82 g Get remembered line (overwriting current line)
84 G Get remembered line (appending to current line)
86 h Remember this line (overwriting remembered line)
88 H Remember this line (appending to remembered line, if any)
90 l Print line, escaping \abfrtv (but not newline), octal escaping other
91 nonprintable characters, wrapping lines to terminal width with a
92 backslash, and appending $ to actual end of line.
94 n Print default output and read next line, replacing current line
95 (If no next line available, quit processing script)
97 N Append next line of input to this line, separated by a newline
98 (This advances the line counter for address matching and "=", if no
99 next line available quit processing script without default output)
103 P Print this line up to first newline (from "N")
105 q Quit (print default output, no more commands processed or lines read)
107 x Exchange this line with remembered line (overwrite in both directions)
109 = Print the current line number (followed by a newline)
111 The following commands (may) take an argument. The "text" arguments (to
112 the "a", "b", and "c" commands) may end with an unescaped "\" to append
113 the next line (for which leading whitespace is not skipped), and also
114 treat ";" as a literal character (use "\;" instead).
116 a [text] Append text to output before attempting to read next line
118 b [label] Branch, jumps to :label (or with no label, to end of SCRIPT)
120 c [text] Delete line, output text at end of matching address range
121 (ignores remaining COMMANDs)
125 r [file] Append contents of file to output before attempting to read
128 s/S/R/F Search for regex S, replace matched text with R using flags F.
129 The first character after the "s" (anything but newline or
130 backslash) is the delimiter, escape with \ to use normally.
132 The replacement text may contain "&" to substitute the matched
133 text (escape it with backslash for a literal &), or \1 through
134 \9 to substitute a parenthetical subexpression in the regex.
135 You can also use the normal backslash escapes such as \n and
136 a backslash at the end of the line appends the next line.
140 [0-9] A number, substitute only that occurrence of pattern
141 g Global, substitute all occurrences of pattern
142 i Ignore case when matching
143 p Print the line if match was found and replaced
144 w [file] Write (append) line to file if match replaced
146 t [label] Test, jump to :label only if an "s" command found a match in
147 this line since last test (replacing with same text counts)
149 T [label] Test false, jump only if "s" hasn't found a match.
151 w [file] Write (append) line to file
153 y/old/new/ Change each character in 'old' to corresponding character
154 in 'new' (with standard backslash escapes, delimiter can be
155 any repeated character except \ or \n)
157 : [label] Labeled target for jump commands
159 # Comment, ignore rest of this line of SCRIPT
161 Deviations from posix: allow extended regular expressions with -r,
162 editing in place with -i, separate with -s, printf escapes in text, line
163 continuations, semicolons after all commands, 2-address anywhere an
164 address is allowed, "T" command, multiline continuations for [abc],
165 \; to end [abc] argument before end of line.
175 // processed pattern list
176 struct double_list *pattern;
178 char *nextline, *remember;
179 void *restart, *lastregex;
180 long nextlen, rememberlen, count;
185 // Linked list of parsed sed commands. Offset fields indicate location where
186 // regex or string starts, ala offset+(char *)struct, because we remalloc()
187 // these to expand them for multiline inputs, and pointers would have to be
188 // individually adjusted.
191 struct sedcmd *next, *prev;
193 // Begin and end of each match
194 long lmatch[2]; // line number of match
195 int rmatch[2]; // offset of regex struct for prefix matches (/abc/,/def/p)
196 int arg1, arg2, w; // offset of two arguments per command, plus s//w filename
198 unsigned sflags; // s///flag bits: i=1, g=2, p=4
202 // Write out line with potential embedded NUL, handling eol/noeol
203 static int emit(char *line, long len, int eol)
205 int l, old = line[len];
207 if (TT.noeol && !writeall(TT.fdout, "\n", 1)) return 1;
209 if (eol) line[len++] = '\n';
211 l = writeall(TT.fdout, line, len);
212 if (eol) line[len-1] = old;
214 perror_msg("short write");
222 // Extend allocation to include new string, with newline between if newlen<0
224 static char *extend_string(char **old, char *new, int oldlen, int newlen)
226 int newline = newlen < 0;
229 if (newline) newlen = -newlen;
230 s = *old = xrealloc(*old, oldlen+newlen+newline+1);
231 if (newline) s[oldlen++] = '\n';
232 memcpy(s+oldlen, new, newlen);
233 s[oldlen+newlen] = 0;
235 return s+oldlen+newlen+1;
238 // An empty regex repeats the previous one
239 static void *get_regex(void *trump, int offset)
242 if (!TT.lastregex) error_exit("no previous regex");
246 return TT.lastregex = offset+(char *)trump;
249 // Apply pattern to line from input file
250 static void process_line(char **pline, long plen)
253 struct append *next, *prev;
257 char *line = TT.nextline;
258 long len = TT.nextlen;
259 struct sedcmd *command;
260 int eol = 0, tea = 0;
262 // Grab next line for deferred processing (EOF detection: we get a NULL
263 // pline at EOF to flush last line). Note that only end of _last_ input
264 // file matches $ (unless we're doing -i).
268 TT.nextline = *pline;
273 if (!line || !len) return;
274 if (line[len-1] == '\n') line[--len] = eol++;
277 // The restart-1 is because we added one to make sure it wasn't NULL,
278 // otherwise N as last command would restart script
279 command = TT.restart ? ((struct sedcmd *)TT.restart)-1 : (void *)TT.pattern;
283 char *str, c = command->c;
285 // Have we got a line or regex matching range for this rule?
286 if (*command->lmatch || *command->rmatch) {
290 // In a match that might end?
292 if (!(lm = command->lmatch[1])) {
293 if (!command->rmatch[1]) command->hit = 0;
295 void *rm = get_regex(command, command->rmatch[1]);
297 // regex match end includes matching line, so defer deactivation
298 if (line && !regexec0(rm, line, len, 0, 0, 0)) miss = 1;
300 } else if (lm > 0 && lm < TT.count) command->hit = 0;
302 // Start a new match?
304 if (!(lm = *command->lmatch)) {
305 void *rm = get_regex(command, *command->rmatch);
307 if (line && !regexec0(rm, line, len, 0, 0, 0)) command->hit++;
308 } else if (lm == TT.count || (lm == -1 && !pline)) command->hit++;
310 if (!command->lmatch[1] && !command->rmatch[1]) miss = 1;
314 lm = !(command->hit ^ command->not);
316 // Deferred disable from regex end match
317 if (miss || command->lmatch[1] == TT.count) command->hit = 0;
320 // Handle skipping curly bracket command group
325 command = command->next;
326 if (command->c == '{') curly++;
327 if (command->c == '}') curly--;
330 command = command->next;
335 // A deleted line can still update line match state for later commands
337 command = command->next;
343 if (c=='a' || c=='r') {
344 struct append *a = xzalloc(sizeof(struct append));
345 if (command->arg1) a->str = command->arg1+(char *)command;
347 dlist_add_nomalloc((void *)&append, (void *)a);
348 } else if (c=='b' || c=='t' || c=='T') {
351 if (c != 'b') tea = 0;
352 if (c=='b' || t^(c=='T')) {
353 if (!command->arg1) break;
354 str = command->arg1+(char *)command;
355 for (command = (void *)TT.pattern; command; command = command->next)
356 if (command->c == ':' && !strcmp(command->arg1+(char *)command, str))
358 if (!command) error_exit("no :%s", str);
361 str = command->arg1+(char *)command;
362 if (!command->hit) emit(str, strlen(str), 1);
371 // Delete up to \n or end of buffer
373 while ((str-line)<len) if (*(str++) == '\n') break;
375 memmove(line, str, len);
377 // if "delete" blanks line, disable further processing
378 // otherwise trim and restart script
384 command = (void *)TT.pattern;
389 line = xstrdup(TT.remember);
390 len = TT.rememberlen;
392 line = xrealloc(line, len+TT.rememberlen+2);
394 memcpy(line+len, TT.remember, TT.rememberlen);
395 line[len += TT.rememberlen] = 0;
398 TT.remember = xstrdup(line);
399 TT.rememberlen = len;
401 TT.remember = xrealloc(TT.remember, TT.rememberlen+len+2);
402 TT.remember[TT.rememberlen++] = '\n';
403 memcpy(TT.remember+TT.rememberlen, line, len);
404 TT.remember[TT.rememberlen += len] = 0;
406 str = command->arg1+(char *)command;
407 emit(str, strlen(str), 1);
412 terminal_size(&TT.xx, 0);
413 if (!TT.xx) TT.xx = 80;
414 if (TT.xx > sizeof(toybuf)-10) TT.xx = sizeof(toybuf)-10;
415 if (TT.xx > 4) TT.xx -= 4;
418 for (i = off = 0; i<len; i++) {
420 toybuf[off++] = '\\';
421 emit(toybuf, off, 1);
424 x = stridx("\\\a\b\f\r\t\v", line[i]);
426 toybuf[off++] = '\\';
427 toybuf[off++] = "\\abfrtv"[x];
428 } else if (line[i] >= ' ') toybuf[off++] = line[i];
429 else off += sprintf(toybuf+off, "\\%03o", line[i]);
432 emit(toybuf, off, 1);
434 TT.restart = command->next+1;
438 // Can't just grab next line because we could have multiple N and
439 // we need to actually read ahead to get N;$p EOF detection right.
441 TT.restart = command->next+1;
442 extend_string(&line, TT.nextline, len, -TT.nextlen);
445 TT.nextlen += len + 1;
449 // Pending append goes out right after N
451 } else if (c=='p' || c=='P') {
452 char *l = (c=='P') ? strchr(line, '\n') : 0;
454 if (emit(line, l ? l-line : len, eol)) break;
456 if (pline) *pline = (void *)1;
463 char *rline = line, *new = command->arg2 + (char *)command, *swap, *rswap;
464 regmatch_t *match = (void *)toybuf;
465 regex_t *reg = get_regex(command, command->arg1);
466 int mflags = 0, count = 0, zmatch = 1, rlen = len, mlen, off, newlen;
468 // Find match in remaining line (up to remaining len)
469 while (!regexec0(reg, rline, rlen, 10, match, mflags)) {
472 // Zero length matches don't count immediately after a previous match
473 mlen = match[0].rm_eo-match[0].rm_so;
474 if (!mlen && !zmatch) {
481 // If we're replacing only a specific match, skip if this isn't it
482 off = command->sflags>>3;
483 if (off && off != ++count) {
484 rline += match[0].rm_eo;
485 rlen -= match[0].rm_eo;
489 // The fact getline() can allocate unbounded amounts of memory is
490 // a bigger issue, but while we're here check for integer overflow
491 if (match[0].rm_eo > INT_MAX) perror_exit(0);
493 // newlen = strlen(new) but with \1 and & and printf escapes
494 for (off = newlen = 0; new[off]; off++) {
497 if (new[off] == '&') cc = 0;
498 else if (new[off] == '\\') cc = new[++off] - '0';
499 if (cc < 0 || cc > 9) {
503 newlen += match[cc].rm_eo-match[cc].rm_so;
506 // Allocate new size, copy start/end around match. (Can't extend in
507 // place because backrefs may refer to text after it's overwritten.)
509 swap = xmalloc(len+1);
510 rswap = swap+(rline-line)+match[0].rm_so;
511 memcpy(swap, line, (rline-line)+match[0].rm_so);
512 memcpy(rswap+newlen, rline+match[0].rm_eo, (rlen -= match[0].rm_eo)+1);
514 // copy in new replacement text
515 for (off = mlen = 0; new[off]; off++) {
518 if (new[off] == '\\') {
519 cc = new[++off] - '0';
521 if (!(rswap[mlen++] = unescape(new[off])))
522 rswap[mlen-1] = new[off];
525 } else if (match[cc].rm_so == -1) error_exit("no s//\\%d/", cc);
526 } else if (new[off] != '&') {
527 rswap[mlen++] = new[off];
532 ll = match[cc].rm_eo-match[cc].rm_so;
533 memcpy(rswap+mlen, rline+match[cc].rm_so, ll);
537 rline = rswap+newlen;
541 // Stop after first substitution unless we have flag g
542 if (!(command->sflags & 2)) break;
547 if (command->sflags & 4) emit(line, len, eol);
550 if (command->w) goto writenow;
557 // Swap out emit() context
561 // We save filehandle and newline status before filename
562 name = command->w + (char *)command;
563 memcpy(&TT.fdout, name, 4);
565 TT.noeol = *(name++);
567 // write, then save/restore context
568 if (emit(line, len, eol))
569 perror_exit("w '%s'", command->arg1+(char *)command);
570 *(--name) = TT.noeol;
574 long swap = TT.rememberlen;
579 TT.rememberlen = len;
582 char *from, *to = (char *)command;
585 from = to+command->arg1;
588 for (i = 0; i < len; i++) {
589 j = stridx(from, line[i]);
590 if (j != -1) line[i] = to[j];
593 sprintf(toybuf, "%ld", TT.count);
594 emit(toybuf, strlen(toybuf), 1);
597 command = command->next;
600 if (line && !(toys.optflags & FLAG_n)) emit(line, len, eol);
603 if (dlist_terminate(append)) while (append) {
604 struct append *a = append->next;
607 int fd = open(append->str, O_RDONLY);
609 // Force newline if noeol pending
611 if (TT.noeol) xwrite(TT.fdout, "\n", 1);
613 xsendfile(fd, TT.fdout);
616 } else if (append->str) emit(append->str, strlen(append->str), 1);
617 else emit(line, 0, 0);
624 // Callback called on each input file
625 static void do_sed(int fd, char *name)
627 int i = toys.optflags & FLAG_i;
631 struct sedcmd *command;
633 if (!fd && !strcmp(name, "-")) {
634 error_msg("-i on stdin");
637 TT.fdout = copy_tempfile(fd, name, &tmp);
639 for (command = (void *)TT.pattern; command; command = command->next)
642 do_lines(fd, process_line);
645 replace_tempfile(-1, TT.fdout, &tmp);
648 TT.nextlen = TT.noeol = 0;
652 // Copy chunk of string between two delimiters, converting printf escapes.
653 // returns processed copy of string (0 if error), *pstr advances to next
654 // unused char. if delim (or *delim) is 0 uses/saves starting char as delimiter
655 // if regxex, ignore delimiter in [ranges]
656 static char *unescape_delimited_string(char **pstr, char *delim)
658 char *to, *from, mode = 0, d;
661 if (!delim || !*delim) {
662 if (!(d = *(from++))) return 0;
663 if (d == '\\') d = *(from++);
664 if (!d || d == '\\') return 0;
665 if (delim) *delim = d;
667 to = delim = xmalloc(strlen(*pstr)+1);
669 while (mode || *from != d) {
670 if (!*from) return 0;
672 // delimiter in regex character range doesn't count
673 if (!mode && *from == '[') {
675 if (from[1]=='-' || from[1]==']') *(to++) = *(from++);
676 } else if (mode && *from == ']') mode = 0;
677 // Length 1 range (X-X with same X) is "undefined" and makes regcomp err,
678 // but the perl build does it, so we need to filter it out.
679 else if (mode && *from == '-' && from[-1] == from[1]) {
682 } else if (*from == '\\') {
683 if (!from[1]) return 0;
685 // Check escaped end delimiter before printf style escapes.
686 if (from[1] == d) from++;
687 else if (from[1]=='\\') *(to++) = *(from++);
689 char c = unescape(from[1]);
695 } else if (!mode) *(to++) = *(from++);
706 // Translate pattern strings into command structures. Each command structure
707 // is a single allocation (which requires some math and remalloc at times).
708 static void parse_pattern(char **pline, long len)
710 struct sedcmd *command = (void *)TT.pattern;
711 char *line, *reg, c, *errstart;
714 line = errstart = pline ? *pline : "";
715 if (len && line[len-1]=='\n') line[--len] = 0;
717 // Append this line to previous multiline command? (hit indicates type.)
718 // During parsing "hit" stores data about line continuations, but in
719 // process_line() it means the match range attached to this command
720 // is active, so processing the continuation must zero it again.
721 if (command && command->prev->hit) {
722 // Remove half-finished entry from list so remalloc() doesn't confuse it
723 TT.pattern = TT.pattern->prev;
724 command = dlist_pop(&TT.pattern);
726 reg = (char *)command;
727 reg += command->arg1 + strlen(reg + command->arg1);
729 // Resume parsing for 'a' or 's' command. (Only two that can do this.)
730 // TODO: using 256 to indicate 'a' means our s/// delimiter can't be
731 // a unicode character.
732 if (command->hit < 256) goto resume_s;
736 // Loop through commands in this line.
740 if (command) dlist_add_nomalloc(&TT.pattern, (void *)command);
742 // If there's no more data on this line, return.
744 while (isspace(*line) || *line == ';') line++;
745 if (*line == '#') while (*line && *line != '\n') line++;
750 // We start by writing data into toybuf. Later we'll allocate the
754 memset(toybuf, 0, sizeof(struct sedcmd));
755 command = (void *)toybuf;
756 reg = toybuf + sizeof(struct sedcmd);
758 // Parse address range (if any)
759 for (i = 0; i < 2; i++) {
760 if (*line == ',') line++;
763 if (isdigit(*line)) command->lmatch[i] = strtol(line, &line, 0);
764 else if (*line == '$') {
765 command->lmatch[i] = -1;
767 } else if (*line == '/' || *line == '\\') {
770 if (!(s = unescape_delimited_string(&line, 0))) goto error;
771 if (!*s) command->rmatch[i] = 0;
773 xregcomp((void *)reg, s, (toys.optflags & FLAG_r)*REG_EXTENDED);
774 command->rmatch[i] = reg-toybuf;
775 reg += sizeof(regex_t);
781 while (isspace(*line)) line++;
784 while (*line == '!') {
788 while (isspace(*line)) line++;
790 c = command->c = *(line++);
791 if (strchr("}:", c) && i) break;
792 if (strchr("aiqr=", c) && i>1) break;
794 // Add step to pattern
795 command = xmemdup(toybuf, reg-toybuf);
796 reg = (reg-toybuf) + (char *)command;
798 // Parse arguments by command type
799 if (c == '{') TT.nextlen++;
801 if (!TT.nextlen--) break;
802 } else if (c == 's') {
803 char *end, delim = 0;
805 // s/pattern/replacement/flags
807 // line continuations use arg1 (back at the start of the function),
808 // so let's fill out arg2 first (since the regex part can't be multiple
809 // lines) and swap them back later.
811 // get pattern (just record, we parse it later)
812 command->arg2 = reg - (char *)command;
813 if (!(TT.remember = unescape_delimited_string(&line, &delim)))
816 reg += sizeof(regex_t);
817 command->arg1 = reg-(char *)command;
818 command->hit = delim;
820 // get replacement - don't replace escapes yet because \1 and \& need
821 // processing later, after we replace \\ with \ we can't tell \\1 from \1
823 while (*end != command->hit) {
824 if (!*end) goto error;
825 if (*end++ == '\\') {
826 if (!*end || *end == '\n') {
834 reg = extend_string((void *)&command, line, reg-(char *)command,end-line);
836 // line continuation? (note: '\n' can't be a valid delim).
837 if (*line == command->hit) command->hit = 0;
839 if (!*line) continue;
845 // swap arg1/arg2 so they're back in order arguments occur.
847 command->arg1 = command->arg2;
851 for (line++; *line; line++) {
854 if (isspace(*line) && *line != '\n') continue;
856 if (0 <= (l = stridx("igp", *line))) command->sflags |= 1<<l;
857 else if (!(command->sflags>>3) && 0<(l = strtol(line, &line, 10))) {
858 command->sflags |= l << 3;
863 // We deferred actually parsing the regex until we had the s///i flag
864 // allocating the space was done by extend_string() above
865 if (!*TT.remember) command->arg1 = 0;
866 else xregcomp((void *)(command->arg1 + (char *)command), TT.remember,
867 ((toys.optflags & FLAG_r)*REG_EXTENDED)|((command->sflags&1)*REG_ICASE));
874 } else if (c == 'w') {
878 // Since s/// uses arg1 and arg2, and w needs a persistent filehandle and
879 // eol status, and to retain the filename for error messages, we'd need
880 // to go up to arg5 just for this. Compromise: dynamically allocate the
881 // filehandle and eol status.
884 while (isspace(*line)) line++;
885 if (!*line) goto error;
886 for (cc = line; *cc; cc++) if (*cc == '\\' && cc[1] == ';') break;
889 fd = xcreate(line, O_WRONLY|O_CREAT|O_TRUNC, 0644);
892 command->w = reg - (char *)command;
893 command = xrealloc(command, command->w+(cc-line)+6);
894 reg = command->w + (char *)command;
899 memcpy(reg, line, delim);
904 if (delim) line += 2;
905 } else if (c == 'y') {
909 if (!(s = unescape_delimited_string(&line, &delim))) goto error;
910 command->arg1 = reg-(char *)command;
912 reg = extend_string((void *)&command, s, reg-(char *)command, len);
914 command->arg2 = reg-(char *)command;
915 if (!(s = unescape_delimited_string(&line, &delim))) goto error;
916 if (len != strlen(s)) goto error;
917 reg = extend_string((void *)&command, s, reg-(char*)command, len);
919 } else if (strchr("abcirtTw:", c)) {
922 // trim leading spaces
923 while (isspace(*line) && *line != '\n') line++;
925 // Resume logic differs from 's' case because we don't add a newline
926 // unless it's after something, so we add it on return instead.
930 // btT: end with space or semicolon, aicrw continue to newline.
931 if (!(end = strcspn(line, strchr(":btT", c) ? "; \t\r\n\v\f" : "\n"))) {
932 // Argument's optional for btT
933 if (strchr("btT", c)) continue;
934 else if (!command->arg1) break;
937 // Extend allocation to include new string. We use offsets instead of
938 // pointers so realloc() moving stuff doesn't break things. Ok to write
939 // \n over NUL terminator because call to extend_string() adds it back.
940 if (!command->arg1) command->arg1 = reg - (char*)command;
941 else if (*(command->arg1+(char *)command)) *(reg++) = '\n';
946 reg = extend_string((void *)&command, line, reg - (char *)command, end);
948 // Recopy data to remove escape sequences and handle line continuation.
949 if (strchr("aci", c)) {
951 for (i = end; i; i--) {
952 if ((*reg++ = *line++)=='\\') {
954 // escape at end of line: resume if -e escaped literal newline,
955 // else request callback and resume with next line
965 if (!(reg[-1] = unescape(*line))) reg[-1] = *line;
972 // Commands that take no arguments
973 } else if (!strchr("{dDgGhHlnNpPqx=", c)) break;
977 error_exit("bad pattern '%s'@%ld (%c)", errstart, line-errstart+1L, *line);
983 char **args = toys.optargs;
985 // Lie to autoconf when it asks stupid questions, so configure regexes
986 // that look for "GNU sed version %f" greater than some old buggy number
987 // don't fail us for not matching their narrow expectations.
988 if (toys.optflags & FLAG_version) {
989 xprintf("This is not GNU sed version 9.0\n");
993 // Parse pattern into commands.
995 // If no -e or -f, first argument is the pattern.
996 if (!TT.e && !TT.f) {
997 if (!*toys.optargs) error_exit("no pattern");
998 (TT.e = xzalloc(sizeof(struct arg_list)))->arg = *(args++);
1001 // Option parsing infrastructure can't interlace "-e blah -f blah -e blah"
1002 // so handle all -e, then all -f. (At least the behavior's consistent.)
1004 for (al = TT.e; al; al = al->next) parse_pattern(&al->arg, strlen(al->arg));
1005 for (al = TT.f; al; al = al->next) do_lines(xopenro(al->arg), parse_pattern);
1006 parse_pattern(0, 0);
1007 dlist_terminate(TT.pattern);
1008 if (TT.nextlen) error_exit("no }");
1011 TT.remember = xstrdup("");
1013 // Inflict pattern upon input files
1014 loopfiles_rw(args, O_RDONLY, 0, 0, do_sed);
1016 if (!(toys.optflags & FLAG_i)) process_line(0, 0);
1018 // todo: need to close fd when done for TOYBOX_FREE?