1 /* sed.c - stream editor. Thing that does s/// and other stuff.
3 * Copyright 2014 Rob Landley <rob@landley.net>
5 * See http://pubs.opengroup.org/onlinepubs/9699919799/utilities/sed.html
7 * TODO: lines > 2G could wrap signed int length counters. Not just getline()
9 * TODO: make y// handle unicode
10 * TODO: handle error return from emit(), error_msg/exit consistently
11 * What's the right thing to do for -i when write fails? Skip to next?
13 USE_SED(NEWTOY(sed, "(version)e*f*inEr[+Er]", TOYFLAG_USR|TOYFLAG_BIN|TOYFLAG_LOCALE))
19 usage: sed [-inrE] [-e SCRIPT]...|SCRIPT [-f SCRIPT_FILE]... [FILE...]
21 Stream editor. Apply one or more editing SCRIPTs to each line of input
22 (from FILE or stdin) producing output (by default to stdout).
25 -f add contents of SCRIPT_FILE to list
26 -i Edit each file in place.
27 -n No default output. (Use the p command to output matched lines.)
28 -r Use extended regular expression syntax.
30 -s Treat input files separately (implied by -i)
32 A SCRIPT is a series of one or more COMMANDs separated by newlines or
33 semicolons. All -e SCRIPTs are concatenated together as if separated
34 by newlines, followed by all lines from -f SCRIPT_FILEs, in order.
35 If no -e or -f SCRIPTs are specified, the first argument is the SCRIPT.
37 Each COMMAND may be preceded by an address which limits the command to
38 apply only to the specified line(s). Commands without an address apply to
39 every line. Addresses are of the form:
41 [ADDRESS[,ADDRESS]]COMMAND
43 The ADDRESS may be a decimal line number (starting at 1), a /regular
44 expression/ within a pair of forward slashes, or the character "$" which
45 matches the last line of input. (In -s or -i mode this matches the last
46 line of each file, otherwise just the last line of the last file.) A single
47 address matches one line, a pair of comma separated addresses match
48 everything from the first address to the second address (inclusive). If
49 both addresses are regular expressions, more than one range of lines in
52 REGULAR EXPRESSIONS in sed are started and ended by the same character
53 (traditionally / but anything except a backslash or a newline works).
54 Backslashes may be used to escape the delimiter if it occurs in the
55 regex, and for the usual printf escapes (\abcefnrtv and octal, hex,
56 and unicode). An empty regex repeats the previous one. ADDRESS regexes
57 (above) require the first delimeter to be escaped with a backslash when
58 it isn't a forward slash (to distinguish it from the COMMANDs below).
60 Sed mostly operates on individual lines one at a time. It reads each line,
61 processes it, and either writes it to the output or discards it before
62 reading the next line. Sed can remember one additional line in a separate
63 buffer (using the h, H, g, G, and x commands), and can read the next line
64 of input early (using the n and N command), but other than that command
65 scripts operate on individual lines of text.
67 Each COMMAND starts with a single character. The following commands take
70 { Start a new command block, continuing until a corresponding "}".
71 Command blocks may nest. If the block has an address, commands within
72 the block are only run for lines within the block's address range.
74 } End command block (this command cannot have an address)
76 d Delete this line and move on to the next one
77 (ignores remaining COMMANDs)
79 D Delete one line of input and restart command SCRIPT (same as "d"
80 unless you've glued lines together with "N" or similar)
82 g Get remembered line (overwriting current line)
84 G Get remembered line (appending to current line)
86 h Remember this line (overwriting remembered line)
88 H Remember this line (appending to remembered line, if any)
90 l Print line, escaping \abfrtv (but not newline), octal escaping other
91 nonprintable characters, wrapping lines to terminal width with a
92 backslash, and appending $ to actual end of line.
94 n Print default output and read next line, replacing current line
95 (If no next line available, quit processing script)
97 N Append next line of input to this line, separated by a newline
98 (This advances the line counter for address matching and "=", if no
99 next line available quit processing script without default output)
103 P Print this line up to first newline (from "N")
105 q Quit (print default output, no more commands processed or lines read)
107 x Exchange this line with remembered line (overwrite in both directions)
109 = Print the current line number (followed by a newline)
111 The following commands (may) take an argument. The "text" arguments (to
112 the "a", "b", and "c" commands) may end with an unescaped "\" to append
113 the next line (for which leading whitespace is not skipped), and also
114 treat ";" as a literal character (use "\;" instead).
116 a [text] Append text to output before attempting to read next line
118 b [label] Branch, jumps to :label (or with no label, to end of SCRIPT)
120 c [text] Delete line, output text at end of matching address range
121 (ignores remaining COMMANDs)
125 r [file] Append contents of file to output before attempting to read
128 s/S/R/F Search for regex S, replace matched text with R using flags F.
129 The first character after the "s" (anything but newline or
130 backslash) is the delimiter, escape with \ to use normally.
132 The replacement text may contain "&" to substitute the matched
133 text (escape it with backslash for a literal &), or \1 through
134 \9 to substitute a parenthetical subexpression in the regex.
135 You can also use the normal backslash escapes such as \n and
136 a backslash at the end of the line appends the next line.
140 [0-9] A number, substitute only that occurrence of pattern
141 g Global, substitute all occurrences of pattern
142 i Ignore case when matching
143 p Print the line if match was found and replaced
144 w [file] Write (append) line to file if match replaced
146 t [label] Test, jump to :label only if an "s" command found a match in
147 this line since last test (replacing with same text counts)
149 T [label] Test false, jump only if "s" hasn't found a match.
151 w [file] Write (append) line to file
153 y/old/new/ Change each character in 'old' to corresponding character
154 in 'new' (with standard backslash escapes, delimiter can be
155 any repeated character except \ or \n)
157 : [label] Labeled target for jump commands
159 # Comment, ignore rest of this line of SCRIPT
161 Deviations from posix: allow extended regular expressions with -r,
162 editing in place with -i, separate with -s, printf escapes in text, line
163 continuations, semicolons after all commands, 2-address anywhere an
164 address is allowed, "T" command, multiline continuations for [abc],
165 \; to end [abc] argument before end of line.
175 // processed pattern list
176 struct double_list *pattern;
178 char *nextline, *remember;
179 void *restart, *lastregex;
180 long nextlen, rememberlen, count;
185 // Linked list of parsed sed commands. Offset fields indicate location where
186 // regex or string starts, ala offset+(char *)struct, because we remalloc()
187 // these to expand them for multiline inputs, and pointers would have to be
188 // individually adjusted.
191 struct sedcmd *next, *prev;
193 // Begin and end of each match
194 long lmatch[2]; // line number of match
195 int rmatch[2]; // offset of regex struct for prefix matches (/abc/,/def/p)
196 int arg1, arg2, w; // offset of two arguments per command, plus s//w filename
198 unsigned sflags; // s///flag bits: i=1, g=2, p=4
202 // Write out line with potential embedded NUL, handling eol/noeol
203 static int emit(char *line, long len, int eol)
205 int l, old = line[len];
207 if (TT.noeol && !writeall(TT.fdout, "\n", 1)) return 1;
209 if (eol) line[len++] = '\n';
211 l = writeall(TT.fdout, line, len);
212 if (eol) line[len-1] = old;
214 perror_msg("short write");
222 // Do regex matching handling embedded NUL bytes in string. Note that
223 // neither the pattern nor the match can currently include NUL bytes
224 // (even with wildcards) and string must be null terminated at string[len].
225 // But this can find a match after the first NUL.
226 static int regex_null(regex_t *preg, char *string, long len, int nmatch,
227 regmatch_t pmatch[], int eflags)
239 while (s[ll] && ll<len) ll++;
241 rc = regexec(preg, s, nmatch, pmatch, eflags);
243 for (rc = 0; rc<nmatch && pmatch[rc].rm_so!=-1; rc++) {
244 pmatch[rc].rm_so += s-string;
245 pmatch[rc].rm_eo += s-string;
250 if (ll==len) return rc;
257 // Extend allocation to include new string, with newline between if newlen<0
259 static char *extend_string(char **old, char *new, int oldlen, int newlen)
261 int newline = newlen < 0;
264 if (newline) newlen = -newlen;
265 s = *old = xrealloc(*old, oldlen+newlen+newline+1);
266 if (newline) s[oldlen++] = '\n';
267 memcpy(s+oldlen, new, newlen);
268 s[oldlen+newlen] = 0;
270 return s+oldlen+newlen+1;
273 // An empty regex repeats the previous one
274 static void *get_regex(void *trump, int offset)
277 if (!TT.lastregex) error_exit("no previous regex");
281 return TT.lastregex = offset+(char *)trump;
284 // Apply pattern to line from input file
285 static void process_line(char **pline, long plen)
288 struct append *next, *prev;
292 char *line = TT.nextline;
293 long len = TT.nextlen;
294 struct sedcmd *command;
295 int eol = 0, tea = 0;
297 // Grab next line for deferred processing (EOF detection: we get a NULL
298 // pline at EOF to flush last line). Note that only end of _last_ input
299 // file matches $ (unless we're doing -i).
303 TT.nextline = *pline;
308 if (!line || !len) return;
309 if (line[len-1] == '\n') line[--len] = eol++;
312 // The restart-1 is because we added one to make sure it wasn't NULL,
313 // otherwise N as last command would restart script
314 command = TT.restart ? ((struct sedcmd *)TT.restart)-1 : (void *)TT.pattern;
318 char *str, c = command->c;
320 // Have we got a line or regex matching range for this rule?
321 if (*command->lmatch || *command->rmatch) {
325 // In a match that might end?
327 if (!(lm = command->lmatch[1])) {
328 if (!command->rmatch[1]) command->hit = 0;
330 void *rm = get_regex(command, command->rmatch[1]);
332 // regex match end includes matching line, so defer deactivation
333 if (line && !regex_null(rm, line, len, 0, 0, 0)) miss = 1;
335 } else if (lm > 0 && lm < TT.count) command->hit = 0;
337 // Start a new match?
339 if (!(lm = *command->lmatch)) {
340 void *rm = get_regex(command, *command->rmatch);
342 if (line && !regex_null(rm, line, len, 0, 0, 0)) command->hit++;
343 } else if (lm == TT.count || (lm == -1 && !pline)) command->hit++;
345 if (!command->lmatch[1] && !command->rmatch[1]) miss = 1;
349 lm = !(command->hit ^ command->not);
351 // Deferred disable from regex end match
352 if (miss || command->lmatch[1] == TT.count) command->hit = 0;
355 // Handle skipping curly bracket command group
360 command = command->next;
361 if (command->c == '{') curly++;
362 if (command->c == '}') curly--;
365 command = command->next;
370 // A deleted line can still update line match state for later commands
372 command = command->next;
378 if (c=='a' || c=='r') {
379 struct append *a = xzalloc(sizeof(struct append));
380 if (command->arg1) a->str = command->arg1+(char *)command;
382 dlist_add_nomalloc((void *)&append, (void *)a);
383 } else if (c=='b' || c=='t' || c=='T') {
386 if (c != 'b') tea = 0;
387 if (c=='b' || t^(c=='T')) {
388 if (!command->arg1) break;
389 str = command->arg1+(char *)command;
390 for (command = (void *)TT.pattern; command; command = command->next)
391 if (command->c == ':' && !strcmp(command->arg1+(char *)command, str))
393 if (!command) error_exit("no :%s", str);
396 str = command->arg1+(char *)command;
397 if (!command->hit) emit(str, strlen(str), 1);
406 // Delete up to \n or end of buffer
408 while ((str-line)<len) if (*(str++) == '\n') break;
410 memmove(line, str, len);
412 // if "delete" blanks line, disable further processing
413 // otherwise trim and restart script
419 command = (void *)TT.pattern;
424 line = xstrdup(TT.remember);
425 len = TT.rememberlen;
427 line = xrealloc(line, len+TT.rememberlen+2);
429 memcpy(line+len, TT.remember, TT.rememberlen);
430 line[len += TT.rememberlen] = 0;
433 TT.remember = xstrdup(line);
434 TT.rememberlen = len;
436 TT.remember = xrealloc(TT.remember, TT.rememberlen+len+2);
437 TT.remember[TT.rememberlen++] = '\n';
438 memcpy(TT.remember+TT.rememberlen, line, len);
439 TT.remember[TT.rememberlen += len] = 0;
441 str = command->arg1+(char *)command;
442 emit(str, strlen(str), 1);
447 terminal_size(&TT.xx, 0);
448 if (!TT.xx) TT.xx = 80;
449 if (TT.xx > sizeof(toybuf)-10) TT.xx = sizeof(toybuf)-10;
450 if (TT.xx > 4) TT.xx -= 4;
453 for (i = off = 0; i<len; i++) {
455 toybuf[off++] = '\\';
456 emit(toybuf, off, 1);
459 x = stridx("\\\a\b\f\r\t\v", line[i]);
461 toybuf[off++] = '\\';
462 toybuf[off++] = "\\abfrtv"[x];
463 } else if (line[i] >= ' ') toybuf[off++] = line[i];
464 else off += sprintf(toybuf+off, "\\%03o", line[i]);
467 emit(toybuf, off, 1);
469 TT.restart = command->next+1;
473 // Can't just grab next line because we could have multiple N and
474 // we need to actually read ahead to get N;$p EOF detection right.
476 TT.restart = command->next+1;
477 extend_string(&line, TT.nextline, len, -TT.nextlen);
480 TT.nextlen += len + 1;
484 // Pending append goes out right after N
486 } else if (c=='p' || c=='P') {
487 char *l = (c=='P') ? strchr(line, '\n') : 0;
489 if (emit(line, l ? l-line : len, eol)) break;
491 if (pline) *pline = (void *)1;
498 char *rline = line, *new = command->arg2 + (char *)command, *swap, *rswap;
499 regmatch_t *match = (void *)toybuf;
500 regex_t *reg = get_regex(command, command->arg1);
501 int mflags = 0, count = 0, zmatch = 1, rlen = len, mlen, off, newlen;
503 // Find match in remaining line (up to remaining len)
504 while (!regex_null(reg, rline, rlen, 10, match, mflags)) {
507 // Zero length matches don't count immediately after a previous match
508 mlen = match[0].rm_eo-match[0].rm_so;
509 if (!mlen && !zmatch) {
516 // If we're replacing only a specific match, skip if this isn't it
517 off = command->sflags>>3;
518 if (off && off != ++count) {
519 rline += match[0].rm_eo;
520 rlen -= match[0].rm_eo;
524 // The fact getline() can allocate unbounded amounts of memory is
525 // a bigger issue, but while we're here check for integer overflow
526 if (match[0].rm_eo > INT_MAX) perror_exit(0);
528 // newlen = strlen(new) but with \1 and & and printf escapes
529 for (off = newlen = 0; new[off]; off++) {
532 if (new[off] == '&') cc = 0;
533 else if (new[off] == '\\') cc = new[++off] - '0';
534 if (cc < 0 || cc > 9) {
538 newlen += match[cc].rm_eo-match[cc].rm_so;
541 // Allocate new size, copy start/end around match. (Can't extend in
542 // place because backrefs may refer to text after it's overwritten.)
544 swap = xmalloc(len+1);
545 rswap = swap+(rline-line)+match[0].rm_so;
546 memcpy(swap, line, (rline-line)+match[0].rm_so);
547 memcpy(rswap+newlen, rline+match[0].rm_eo, (rlen -= match[0].rm_eo)+1);
549 // copy in new replacement text
550 for (off = mlen = 0; new[off]; off++) {
553 if (new[off] == '\\') {
554 cc = new[++off] - '0';
556 if (!(rswap[mlen++] = unescape(new[off])))
557 rswap[mlen-1] = new[off];
560 } else if (match[cc].rm_so == -1) error_exit("no s//\\%d/", cc);
561 } else if (new[off] != '&') {
562 rswap[mlen++] = new[off];
567 ll = match[cc].rm_eo-match[cc].rm_so;
568 memcpy(rswap+mlen, rline+match[cc].rm_so, ll);
572 rline = rswap+newlen;
576 // Stop after first substitution unless we have flag g
577 if (!(command->sflags & 2)) break;
582 if (command->sflags & 4) emit(line, len, eol);
585 if (command->w) goto writenow;
592 // Swap out emit() context
596 // We save filehandle and newline status before filename
597 name = command->w + (char *)command;
598 memcpy(&TT.fdout, name, 4);
600 TT.noeol = *(name++);
602 // write, then save/restore context
603 if (emit(line, len, eol))
604 perror_exit("w '%s'", command->arg1+(char *)command);
605 *(--name) = TT.noeol;
609 long swap = TT.rememberlen;
614 TT.rememberlen = len;
617 char *from, *to = (char *)command;
620 from = to+command->arg1;
623 for (i = 0; i < len; i++) {
624 j = stridx(from, line[i]);
625 if (j != -1) line[i] = to[j];
628 sprintf(toybuf, "%ld", TT.count);
629 emit(toybuf, strlen(toybuf), 1);
632 command = command->next;
635 if (line && !(toys.optflags & FLAG_n)) emit(line, len, eol);
640 if (dlist_terminate(append)) while (append) {
641 struct append *a = append->next;
644 int fd = open(append->str, O_RDONLY);
646 // Force newline if noeol pending
648 if (TT.noeol) xwrite(TT.fdout, "\n", 1);
650 xsendfile(fd, TT.fdout);
653 } else if (append->str) emit(append->str, strlen(append->str), 1);
654 else emit(line, 0, 0);
660 // Genericish function, can probably get moved to lib.c
662 // Iterate over lines in file, calling function. Function can write 0 to
663 // the line pointer if they want to keep it, or 1 to terminate processing,
664 // otherwise line is freed. Passed file descriptor is closed at the end.
665 static void do_lines(int fd, void (*call)(char **pline, long len))
667 FILE *fp = fd ? xfdopen(fd, "r") : stdin;
673 len = getline(&line, (void *)&len, fp);
676 if (line == (void *)1) break;
684 // Callback called on each input file
685 static void do_sed(int fd, char *name)
687 int i = toys.optflags & FLAG_i;
691 struct sedcmd *command;
693 if (!fd && !strcmp(name, "-")) {
694 error_msg("-i on stdin");
697 TT.fdout = copy_tempfile(fd, name, &tmp);
699 for (command = (void *)TT.pattern; command; command = command->next)
702 do_lines(fd, process_line);
705 replace_tempfile(-1, TT.fdout, &tmp);
708 TT.nextlen = TT.noeol = 0;
712 // Copy chunk of string between two delimiters, converting printf escapes.
713 // returns processed copy of string (0 if error), *pstr advances to next
714 // unused char. if delim (or *delim) is 0 uses/saves starting char as delimiter
715 // if regxex, ignore delimiter in [ranges]
716 static char *unescape_delimited_string(char **pstr, char *delim)
718 char *to, *from, mode = 0, d;
721 if (!delim || !*delim) {
722 if (!(d = *(from++))) return 0;
723 if (d == '\\') d = *(from++);
724 if (!d || d == '\\') return 0;
725 if (delim) *delim = d;
727 to = delim = xmalloc(strlen(*pstr)+1);
729 while (mode || *from != d) {
730 if (!*from) return 0;
732 // delimiter in regex character range doesn't count
733 if (!mode && *from == '[') {
735 if (from[1]=='-' || from[1]==']') *(to++) = *(from++);
736 } else if (mode && *from == ']') mode = 0;
737 // Length 1 range (X-X with same X) is "undefined" and makes regcomp err,
738 // but the perl build does it, so we need to filter it out.
739 else if (mode && *from == '-' && from[-1] == from[1]) {
742 } else if (*from == '\\') {
743 if (!from[1]) return 0;
745 // Check escaped end delimiter before printf style escapes.
746 if (from[1] == d) from++;
747 else if (from[1]=='\\') *(to++) = *(from++);
749 char c = unescape(from[1]);
755 } else if (!mode) *(to++) = *(from++);
766 // Translate pattern strings into command structures. Each command structure
767 // is a single allocation (which requires some math and remalloc at times).
768 static void parse_pattern(char **pline, long len)
770 struct sedcmd *command = (void *)TT.pattern;
771 char *line, *reg, c, *errstart;
774 line = errstart = pline ? *pline : "";
775 if (len && line[len-1]=='\n') line[--len] = 0;
777 // Append this line to previous multiline command? (hit indicates type.)
778 // During parsing "hit" stores data about line continuations, but in
779 // process_line() it means the match range attached to this command
780 // is active, so processing the continuation must zero it again.
781 if (command && command->prev->hit) {
782 // Remove half-finished entry from list so remalloc() doesn't confuse it
783 TT.pattern = TT.pattern->prev;
784 command = dlist_pop(&TT.pattern);
786 reg = (char *)command;
787 reg += command->arg1 + strlen(reg + command->arg1);
789 // Resume parsing for 'a' or 's' command. (Only two that can do this.)
790 // TODO: using 256 to indicate 'a' means our s/// delimiter can't be
791 // a unicode character.
792 if (command->hit < 256) goto resume_s;
796 // Loop through commands in this line.
800 if (command) dlist_add_nomalloc(&TT.pattern, (void *)command);
802 // If there's no more data on this line, return.
804 while (isspace(*line) || *line == ';') line++;
805 if (*line == '#') while (*line && *line != '\n') line++;
810 // We start by writing data into toybuf. Later we'll allocate the
814 memset(toybuf, 0, sizeof(struct sedcmd));
815 command = (void *)toybuf;
816 reg = toybuf + sizeof(struct sedcmd);
818 // Parse address range (if any)
819 for (i = 0; i < 2; i++) {
820 if (*line == ',') line++;
823 if (isdigit(*line)) command->lmatch[i] = strtol(line, &line, 0);
824 else if (*line == '$') {
825 command->lmatch[i] = -1;
827 } else if (*line == '/' || *line == '\\') {
830 if (!(s = unescape_delimited_string(&line, 0))) goto error;
831 if (!*s) command->rmatch[i] = 0;
833 xregcomp((void *)reg, s, (toys.optflags & FLAG_r)*REG_EXTENDED);
834 command->rmatch[i] = reg-toybuf;
835 reg += sizeof(regex_t);
841 while (isspace(*line)) line++;
844 while (*line == '!') {
848 while (isspace(*line)) line++;
850 c = command->c = *(line++);
851 if (strchr("}:", c) && i) break;
852 if (strchr("aiqr=", c) && i>1) break;
854 // Add step to pattern
855 command = xmemdup(toybuf, reg-toybuf);
856 reg = (reg-toybuf) + (char *)command;
858 // Parse arguments by command type
859 if (c == '{') TT.nextlen++;
861 if (!TT.nextlen--) break;
862 } else if (c == 's') {
863 char *end, delim = 0;
865 // s/pattern/replacement/flags
867 // line continuations use arg1 (back at the start of the function),
868 // so let's fill out arg2 first (since the regex part can't be multiple
869 // lines) and swap them back later.
871 // get pattern (just record, we parse it later)
872 command->arg2 = reg - (char *)command;
873 if (!(TT.remember = unescape_delimited_string(&line, &delim)))
876 reg += sizeof(regex_t);
877 command->arg1 = reg-(char *)command;
878 command->hit = delim;
880 // get replacement - don't replace escapes yet because \1 and \& need
881 // processing later, after we replace \\ with \ we can't tell \\1 from \1
883 while (*end != command->hit) {
884 if (!*end) goto error;
885 if (*end++ == '\\') {
886 if (!*end || *end == '\n') {
894 reg = extend_string((void *)&command, line, reg-(char *)command,end-line);
896 // line continuation? (note: '\n' can't be a valid delim).
897 if (*line == command->hit) command->hit = 0;
899 if (!*line) continue;
905 // swap arg1/arg2 so they're back in order arguments occur.
907 command->arg1 = command->arg2;
911 for (line++; *line; line++) {
914 if (isspace(*line) && *line != '\n') continue;
916 if (0 <= (l = stridx("igp", *line))) command->sflags |= 1<<l;
917 else if (!(command->sflags>>3) && 0<(l = strtol(line, &line, 10))) {
918 command->sflags |= l << 3;
923 // We deferred actually parsing the regex until we had the s///i flag
924 // allocating the space was done by extend_string() above
925 if (!*TT.remember) command->arg1 = 0;
926 else xregcomp((void *)(command->arg1 + (char *)command), TT.remember,
927 ((toys.optflags & FLAG_r)*REG_EXTENDED)|((command->sflags&1)*REG_ICASE));
934 } else if (c == 'w') {
938 // Since s/// uses arg1 and arg2, and w needs a persistent filehandle and
939 // eol status, and to retain the filename for error messages, we'd need
940 // to go up to arg5 just for this. Compromise: dynamically allocate the
941 // filehandle and eol status.
944 while (isspace(*line)) line++;
945 if (!*line) goto error;
946 for (cc = line; *cc; cc++) if (*cc == '\\' && cc[1] == ';') break;
949 fd = xcreate(line, O_WRONLY|O_CREAT|O_TRUNC, 0644);
952 command->w = reg - (char *)command;
953 command = xrealloc(command, command->w+(cc-line)+6);
954 reg = command->w + (char *)command;
959 memcpy(reg, line, delim);
964 if (delim) line += 2;
965 } else if (c == 'y') {
969 if (!(s = unescape_delimited_string(&line, &delim))) goto error;
970 command->arg1 = reg-(char *)command;
972 reg = extend_string((void *)&command, s, reg-(char *)command, len);
974 command->arg2 = reg-(char *)command;
975 if (!(s = unescape_delimited_string(&line, &delim))) goto error;
976 if (len != strlen(s)) goto error;
977 reg = extend_string((void *)&command, s, reg-(char*)command, len);
979 } else if (strchr("abcirtTw:", c)) {
982 // trim leading spaces
983 while (isspace(*line) && *line != '\n') line++;
985 // Resume logic differs from 's' case because we don't add a newline
986 // unless it's after something, so we add it on return instead.
990 // btT: end with space or semicolon, aicrw continue to newline.
991 if (!(end = strcspn(line, strchr(":btT", c) ? "; \t\r\n\v\f" : "\n"))) {
992 // Argument's optional for btT
993 if (strchr("btT", c)) continue;
994 else if (!command->arg1) break;
997 // Extend allocation to include new string. We use offsets instead of
998 // pointers so realloc() moving stuff doesn't break things. Ok to write
999 // \n over NUL terminator because call to extend_string() adds it back.
1000 if (!command->arg1) command->arg1 = reg - (char*)command;
1001 else if (*(command->arg1+(char *)command)) *(reg++) = '\n';
1006 reg = extend_string((void *)&command, line, reg - (char *)command, end);
1008 // Recopy data to remove escape sequences and handle line continuation.
1009 if (strchr("aci", c)) {
1011 for (i = end; i; i--) {
1012 if ((*reg++ = *line++)=='\\') {
1014 // escape at end of line: resume if -e escaped literal newline,
1015 // else request callback and resume with next line
1025 if (!(reg[-1] = unescape(*line))) reg[-1] = *line;
1032 // Commands that take no arguments
1033 } else if (!strchr("{dDgGhHlnNpPqx=", c)) break;
1037 error_exit("bad pattern '%s'@%ld (%c)", errstart, line-errstart+1L, *line);
1042 struct arg_list *al;
1043 char **args = toys.optargs;
1045 // Lie to autoconf when it asks stupid questions, so configure regexes
1046 // that look for "GNU sed version %f" greater than some old buggy number
1047 // don't fail us for not matching their narrow expectations.
1048 if (toys.optflags & FLAG_version) {
1049 xprintf("This is not GNU sed version 9.0\n");
1053 // Parse pattern into commands.
1055 // If no -e or -f, first argument is the pattern.
1056 if (!TT.e && !TT.f) {
1057 if (!*toys.optargs) error_exit("no pattern");
1058 (TT.e = xzalloc(sizeof(struct arg_list)))->arg = *(args++);
1061 // Option parsing infrastructure can't interlace "-e blah -f blah -e blah"
1062 // so handle all -e, then all -f. (At least the behavior's consistent.)
1064 for (al = TT.e; al; al = al->next) parse_pattern(&al->arg, strlen(al->arg));
1065 for (al = TT.f; al; al = al->next)
1066 do_lines(strcmp(al->arg, "-") ? xopen(al->arg, O_RDONLY) : 0,parse_pattern);
1067 parse_pattern(0, 0);
1068 dlist_terminate(TT.pattern);
1069 if (TT.nextlen) error_exit("no }");
1072 TT.remember = xstrdup("");
1074 // Inflict pattern upon input files
1075 loopfiles_rw(args, O_RDONLY, 0, 0, do_sed);
1077 if (!(toys.optflags & FLAG_i)) process_line(0, 0);
1079 // todo: need to close fd when done for TOYBOX_FREE?