1 /* sed.c - stream editor. Thing that does s/// and other stuff.
3 * Copyright 2014 Rob Landley <rob@landley.net>
5 * See http://pubs.opengroup.org/onlinepubs/9699919799/utilities/sed.html
7 * TODO: lines > 2G could wrap signed int length counters. Not just getline()
9 * TODO: make y// handle unicode
10 * TODO: handle error return from emit(), error_msg/exit consistently
11 * What's the right thing to do for -i when write fails? Skip to next?
13 USE_SED(NEWTOY(sed, "(version)e*f*inEr[+Er]", TOYFLAG_USR|TOYFLAG_BIN|TOYFLAG_LOCALE))
19 usage: sed [-inrE] [-e SCRIPT]...|SCRIPT [-f SCRIPT_FILE]... [FILE...]
21 Stream editor. Apply one or more editing SCRIPTs to each line of input
22 (from FILE or stdin) producing output (by default to stdout).
25 -f add contents of SCRIPT_FILE to list
26 -i Edit each file in place.
27 -n No default output. (Use the p command to output matched lines.)
28 -r Use extended regular expression syntax.
30 -s Treat input files separately (implied by -i)
32 A SCRIPT is a series of one or more COMMANDs separated by newlines or
33 semicolons. All -e SCRIPTs are concatenated together as if separated
34 by newlines, followed by all lines from -f SCRIPT_FILEs, in order.
35 If no -e or -f SCRIPTs are specified, the first argument is the SCRIPT.
37 Each COMMAND may be preceded by an address which limits the command to
38 apply only to the specified line(s). Commands without an address apply to
39 every line. Addresses are of the form:
41 [ADDRESS[,ADDRESS]]COMMAND
43 The ADDRESS may be a decimal line number (starting at 1), a /regular
44 expression/ within a pair of forward slashes, or the character "$" which
45 matches the last line of input. (In -s or -i mode this matches the last
46 line of each file, otherwise just the last line of the last file.) A single
47 address matches one line, a pair of comma separated addresses match
48 everything from the first address to the second address (inclusive). If
49 both addresses are regular expressions, more than one range of lines in
52 REGULAR EXPRESSIONS in sed are started and ended by the same character
53 (traditionally / but anything except a backslash or a newline works).
54 Backslashes may be used to escape the delimiter if it occurs in the
55 regex, and for the usual printf escapes (\abcefnrtv and octal, hex,
56 and unicode). An empty regex repeats the previous one. ADDRESS regexes
57 (above) require the first delimeter to be escaped with a backslash when
58 it isn't a forward slash (to distinguish it from the COMMANDs below).
60 Sed mostly operates on individual lines one at a time. It reads each line,
61 processes it, and either writes it to the output or discards it before
62 reading the next line. Sed can remember one additional line in a separate
63 buffer (using the h, H, g, G, and x commands), and can read the next line
64 of input early (using the n and N command), but other than that command
65 scripts operate on individual lines of text.
67 Each COMMAND starts with a single character. The following commands take
70 { Start a new command block, continuing until a corresponding "}".
71 Command blocks may nest. If the block has an address, commands within
72 the block are only run for lines within the block's address range.
74 } End command block (this command cannot have an address)
76 d Delete this line and move on to the next one
77 (ignores remaining COMMANDs)
79 D Delete one line of input and restart command SCRIPT (same as "d"
80 unless you've glued lines together with "N" or similar)
82 g Get remembered line (overwriting current line)
84 G Get remembered line (appending to current line)
86 h Remember this line (overwriting remembered line)
88 H Remember this line (appending to remembered line, if any)
90 l Print line, escaping \abfrtv (but not newline), octal escaping other
91 nonprintable characters, wrapping lines to terminal width with a
92 backslash, and appending $ to actual end of line.
94 n Print default output and read next line, replacing current line
95 (If no next line available, quit processing script)
97 N Append next line of input to this line, separated by a newline
98 (This advances the line counter for address matching and "=", if no
99 next line available quit processing script without default output)
103 P Print this line up to first newline (from "N")
105 q Quit (print default output, no more commands processed or lines read)
107 x Exchange this line with remembered line (overwrite in both directions)
109 = Print the current line number (followed by a newline)
111 The following commands (may) take an argument. The "text" arguments (to
112 the "a", "b", and "c" commands) may end with an unescaped "\" to append
113 the next line (for which leading whitespace is not skipped), and also
114 treat ";" as a literal character (use "\;" instead).
116 a [text] Append text to output before attempting to read next line
118 b [label] Branch, jumps to :label (or with no label, to end of SCRIPT)
120 c [text] Delete line, output text at end of matching address range
121 (ignores remaining COMMANDs)
125 r [file] Append contents of file to output before attempting to read
128 s/S/R/F Search for regex S, replace matched text with R using flags F.
129 The first character after the "s" (anything but newline or
130 backslash) is the delimiter, escape with \ to use normally.
132 The replacement text may contain "&" to substitute the matched
133 text (escape it with backslash for a literal &), or \1 through
134 \9 to substitute a parenthetical subexpression in the regex.
135 You can also use the normal backslash escapes such as \n and
136 a backslash at the end of the line appends the next line.
140 [0-9] A number, substitute only that occurrence of pattern
141 g Global, substitute all occurrences of pattern
142 i Ignore case when matching
143 p Print the line if match was found and replaced
144 w [file] Write (append) line to file if match replaced
146 t [label] Test, jump to :label only if an "s" command found a match in
147 this line since last test (replacing with same text counts)
149 T [label] Test false, jump only if "s" hasn't found a match.
151 w [file] Write (append) line to file
153 y/old/new/ Change each character in 'old' to corresponding character
154 in 'new' (with standard backslash escapes, delimiter can be
155 any repeated character except \ or \n)
157 : [label] Labeled target for jump commands
159 # Comment, ignore rest of this line of SCRIPT
161 Deviations from posix: allow extended regular expressions with -r,
162 editing in place with -i, separate with -s, printf escapes in text, line
163 continuations, semicolons after all commands, 2-address anywhere an
164 address is allowed, "T" command, multiline continuations for [abc],
165 \; to end [abc] argument before end of line.
175 // processed pattern list
176 struct double_list *pattern;
178 char *nextline, *remember;
179 void *restart, *lastregex;
180 long nextlen, rememberlen, count;
185 // Linked list of parsed sed commands. Offset fields indicate location where
186 // regex or string starts, ala offset+(char *)struct, because we remalloc()
187 // these to expand them for multiline inputs, and pointers would have to be
188 // individually adjusted.
191 struct sedcmd *next, *prev;
193 // Begin and end of each match
194 long lmatch[2]; // line number of match
195 int rmatch[2]; // offset of regex struct for prefix matches (/abc/,/def/p)
196 int arg1, arg2, w; // offset of two arguments per command, plus s//w filename
198 unsigned sflags; // s///flag bits: i=1, g=2, p=4
202 // Write out line with potential embedded NUL, handling eol/noeol
203 static int emit(char *line, long len, int eol)
205 int l, old = line[len];
207 if (TT.noeol && !writeall(TT.fdout, "\n", 1)) return 1;
209 if (eol) line[len++] = '\n';
211 l = writeall(TT.fdout, line, len);
212 if (eol) line[len-1] = old;
214 perror_msg("short write");
222 // Extend allocation to include new string, with newline between if newlen<0
224 static char *extend_string(char **old, char *new, int oldlen, int newlen)
226 int newline = newlen < 0;
229 if (newline) newlen = -newlen;
230 s = *old = xrealloc(*old, oldlen+newlen+newline+1);
231 if (newline) s[oldlen++] = '\n';
232 memcpy(s+oldlen, new, newlen);
233 s[oldlen+newlen] = 0;
235 return s+oldlen+newlen+1;
238 // An empty regex repeats the previous one
239 static void *get_regex(void *trump, int offset)
242 if (!TT.lastregex) error_exit("no previous regex");
246 return TT.lastregex = offset+(char *)trump;
249 // Apply pattern to line from input file
250 static void process_line(char **pline, long plen)
253 struct append *next, *prev;
257 char *line = TT.nextline;
258 long len = TT.nextlen;
259 struct sedcmd *command;
260 int eol = 0, tea = 0;
262 // Grab next line for deferred processing (EOF detection: we get a NULL
263 // pline at EOF to flush last line). Note that only end of _last_ input
264 // file matches $ (unless we're doing -i).
268 TT.nextline = *pline;
273 if (!line || !len) return;
274 if (line[len-1] == '\n') line[--len] = eol++;
277 // The restart-1 is because we added one to make sure it wasn't NULL,
278 // otherwise N as last command would restart script
279 command = TT.restart ? ((struct sedcmd *)TT.restart)-1 : (void *)TT.pattern;
283 char *str, c = command->c;
285 // Have we got a line or regex matching range for this rule?
286 if (*command->lmatch || *command->rmatch) {
290 // In a match that might end?
292 if (!(lm = command->lmatch[1])) {
293 if (!command->rmatch[1]) command->hit = 0;
295 void *rm = get_regex(command, command->rmatch[1]);
297 // regex match end includes matching line, so defer deactivation
298 if (line && !regexec0(rm, line, len, 0, 0, 0)) miss = 1;
300 } else if (lm > 0 && lm < TT.count) command->hit = 0;
302 // Start a new match?
304 if (!(lm = *command->lmatch)) {
305 void *rm = get_regex(command, *command->rmatch);
307 if (line && !regexec0(rm, line, len, 0, 0, 0)) command->hit++;
308 } else if (lm == TT.count || (lm == -1 && !pline)) command->hit++;
310 if (!command->lmatch[1] && !command->rmatch[1]) miss = 1;
314 lm = !(command->hit ^ command->not);
316 // Deferred disable from regex end match
317 if (miss || command->lmatch[1] == TT.count) command->hit = 0;
320 // Handle skipping curly bracket command group
325 command = command->next;
326 if (command->c == '{') curly++;
327 if (command->c == '}') curly--;
330 command = command->next;
335 // A deleted line can still update line match state for later commands
337 command = command->next;
343 if (c=='a' || c=='r') {
344 struct append *a = xzalloc(sizeof(struct append));
345 if (command->arg1) a->str = command->arg1+(char *)command;
347 dlist_add_nomalloc((void *)&append, (void *)a);
348 } else if (c=='b' || c=='t' || c=='T') {
351 if (c != 'b') tea = 0;
352 if (c=='b' || t^(c=='T')) {
353 if (!command->arg1) break;
354 str = command->arg1+(char *)command;
355 for (command = (void *)TT.pattern; command; command = command->next)
356 if (command->c == ':' && !strcmp(command->arg1+(char *)command, str))
358 if (!command) error_exit("no :%s", str);
361 str = command->arg1+(char *)command;
362 if (!command->hit) emit(str, strlen(str), 1);
371 // Delete up to \n or end of buffer
373 while ((str-line)<len) if (*(str++) == '\n') break;
375 memmove(line, str, len);
377 // if "delete" blanks line, disable further processing
378 // otherwise trim and restart script
384 command = (void *)TT.pattern;
389 line = xstrdup(TT.remember);
390 len = TT.rememberlen;
392 line = xrealloc(line, len+TT.rememberlen+2);
394 memcpy(line+len, TT.remember, TT.rememberlen);
395 line[len += TT.rememberlen] = 0;
398 TT.remember = xstrdup(line);
399 TT.rememberlen = len;
401 TT.remember = xrealloc(TT.remember, TT.rememberlen+len+2);
402 TT.remember[TT.rememberlen++] = '\n';
403 memcpy(TT.remember+TT.rememberlen, line, len);
404 TT.remember[TT.rememberlen += len] = 0;
406 str = command->arg1+(char *)command;
407 emit(str, strlen(str), 1);
412 terminal_size(&TT.xx, 0);
413 if (!TT.xx) TT.xx = 80;
414 if (TT.xx > sizeof(toybuf)-10) TT.xx = sizeof(toybuf)-10;
415 if (TT.xx > 4) TT.xx -= 4;
418 for (i = off = 0; i<len; i++) {
420 toybuf[off++] = '\\';
421 emit(toybuf, off, 1);
424 x = stridx("\\\a\b\f\r\t\v", line[i]);
426 toybuf[off++] = '\\';
427 toybuf[off++] = "\\abfrtv"[x];
428 } else if (line[i] >= ' ') toybuf[off++] = line[i];
429 else off += sprintf(toybuf+off, "\\%03o", line[i]);
432 emit(toybuf, off, 1);
434 TT.restart = command->next+1;
438 // Can't just grab next line because we could have multiple N and
439 // we need to actually read ahead to get N;$p EOF detection right.
441 TT.restart = command->next+1;
442 extend_string(&line, TT.nextline, len, -TT.nextlen);
445 TT.nextlen += len + 1;
449 // Pending append goes out right after N
451 } else if (c=='p' || c=='P') {
452 char *l = (c=='P') ? strchr(line, '\n') : 0;
454 if (emit(line, l ? l-line : len, eol)) break;
456 if (pline) *pline = (void *)1;
463 char *rline = line, *new = command->arg2 + (char *)command, *swap, *rswap;
464 regmatch_t *match = (void *)toybuf;
465 regex_t *reg = get_regex(command, command->arg1);
466 int mflags = 0, count = 0, zmatch = 1, rlen = len, mlen, off, newlen;
468 // Find match in remaining line (up to remaining len)
469 while (!regexec0(reg, rline, rlen, 10, match, mflags)) {
472 // Zero length matches don't count immediately after a previous match
473 mlen = match[0].rm_eo-match[0].rm_so;
474 if (!mlen && !zmatch) {
481 // If we're replacing only a specific match, skip if this isn't it
482 off = command->sflags>>3;
483 if (off && off != ++count) {
484 rline += match[0].rm_eo;
485 rlen -= match[0].rm_eo;
489 // The fact getline() can allocate unbounded amounts of memory is
490 // a bigger issue, but while we're here check for integer overflow
491 if (match[0].rm_eo > INT_MAX) perror_exit(0);
493 // newlen = strlen(new) but with \1 and & and printf escapes
494 for (off = newlen = 0; new[off]; off++) {
497 if (new[off] == '&') cc = 0;
498 else if (new[off] == '\\') cc = new[++off] - '0';
499 if (cc < 0 || cc > 9) {
503 newlen += match[cc].rm_eo-match[cc].rm_so;
506 // Allocate new size, copy start/end around match. (Can't extend in
507 // place because backrefs may refer to text after it's overwritten.)
509 swap = xmalloc(len+1);
510 rswap = swap+(rline-line)+match[0].rm_so;
511 memcpy(swap, line, (rline-line)+match[0].rm_so);
512 memcpy(rswap+newlen, rline+match[0].rm_eo, (rlen -= match[0].rm_eo)+1);
514 // copy in new replacement text
515 for (off = mlen = 0; new[off]; off++) {
518 if (new[off] == '\\') {
519 cc = new[++off] - '0';
521 if (!(rswap[mlen++] = unescape(new[off])))
522 rswap[mlen-1] = new[off];
525 } else if (match[cc].rm_so == -1) error_exit("no s//\\%d/", cc);
526 } else if (new[off] != '&') {
527 rswap[mlen++] = new[off];
532 ll = match[cc].rm_eo-match[cc].rm_so;
533 memcpy(rswap+mlen, rline+match[cc].rm_so, ll);
537 rline = rswap+newlen;
541 // Stop after first substitution unless we have flag g
542 if (!(command->sflags & 2)) break;
547 if (command->sflags & 4) emit(line, len, eol);
550 if (command->w) goto writenow;
557 // Swap out emit() context
561 // We save filehandle and newline status before filename
562 name = command->w + (char *)command;
563 memcpy(&TT.fdout, name, 4);
565 TT.noeol = *(name++);
567 // write, then save/restore context
568 if (emit(line, len, eol))
569 perror_exit("w '%s'", command->arg1+(char *)command);
570 *(--name) = TT.noeol;
574 long swap = TT.rememberlen;
579 TT.rememberlen = len;
582 char *from, *to = (char *)command;
585 from = to+command->arg1;
588 for (i = 0; i < len; i++) {
589 j = stridx(from, line[i]);
590 if (j != -1) line[i] = to[j];
593 sprintf(toybuf, "%ld", TT.count);
594 emit(toybuf, strlen(toybuf), 1);
597 command = command->next;
600 if (line && !(toys.optflags & FLAG_n)) emit(line, len, eol);
603 if (dlist_terminate(append)) while (append) {
604 struct append *a = append->next;
607 int fd = open(append->str, O_RDONLY);
609 // Force newline if noeol pending
611 if (TT.noeol) xwrite(TT.fdout, "\n", 1);
613 xsendfile(fd, TT.fdout);
616 } else if (append->str) emit(append->str, strlen(append->str), 1);
617 else emit(line, 0, 0);
624 // Genericish function, can probably get moved to lib.c
626 // Iterate over lines in file, calling function. Function can write 0 to
627 // the line pointer if they want to keep it, or 1 to terminate processing,
628 // otherwise line is freed. Passed file descriptor is closed at the end.
629 static void do_lines(int fd, void (*call)(char **pline, long len))
631 FILE *fp = fd ? xfdopen(fd, "r") : stdin;
637 len = getline(&line, (void *)&len, fp);
640 if (line == (void *)1) break;
648 // Callback called on each input file
649 static void do_sed(int fd, char *name)
651 int i = toys.optflags & FLAG_i;
655 struct sedcmd *command;
657 if (!fd && !strcmp(name, "-")) {
658 error_msg("-i on stdin");
661 TT.fdout = copy_tempfile(fd, name, &tmp);
663 for (command = (void *)TT.pattern; command; command = command->next)
666 do_lines(fd, process_line);
669 replace_tempfile(-1, TT.fdout, &tmp);
672 TT.nextlen = TT.noeol = 0;
676 // Copy chunk of string between two delimiters, converting printf escapes.
677 // returns processed copy of string (0 if error), *pstr advances to next
678 // unused char. if delim (or *delim) is 0 uses/saves starting char as delimiter
679 // if regxex, ignore delimiter in [ranges]
680 static char *unescape_delimited_string(char **pstr, char *delim)
682 char *to, *from, mode = 0, d;
685 if (!delim || !*delim) {
686 if (!(d = *(from++))) return 0;
687 if (d == '\\') d = *(from++);
688 if (!d || d == '\\') return 0;
689 if (delim) *delim = d;
691 to = delim = xmalloc(strlen(*pstr)+1);
693 while (mode || *from != d) {
694 if (!*from) return 0;
696 // delimiter in regex character range doesn't count
697 if (!mode && *from == '[') {
699 if (from[1]=='-' || from[1]==']') *(to++) = *(from++);
700 } else if (mode && *from == ']') mode = 0;
701 // Length 1 range (X-X with same X) is "undefined" and makes regcomp err,
702 // but the perl build does it, so we need to filter it out.
703 else if (mode && *from == '-' && from[-1] == from[1]) {
706 } else if (*from == '\\') {
707 if (!from[1]) return 0;
709 // Check escaped end delimiter before printf style escapes.
710 if (from[1] == d) from++;
711 else if (from[1]=='\\') *(to++) = *(from++);
713 char c = unescape(from[1]);
719 } else if (!mode) *(to++) = *(from++);
730 // Translate pattern strings into command structures. Each command structure
731 // is a single allocation (which requires some math and remalloc at times).
732 static void parse_pattern(char **pline, long len)
734 struct sedcmd *command = (void *)TT.pattern;
735 char *line, *reg, c, *errstart;
738 line = errstart = pline ? *pline : "";
739 if (len && line[len-1]=='\n') line[--len] = 0;
741 // Append this line to previous multiline command? (hit indicates type.)
742 // During parsing "hit" stores data about line continuations, but in
743 // process_line() it means the match range attached to this command
744 // is active, so processing the continuation must zero it again.
745 if (command && command->prev->hit) {
746 // Remove half-finished entry from list so remalloc() doesn't confuse it
747 TT.pattern = TT.pattern->prev;
748 command = dlist_pop(&TT.pattern);
750 reg = (char *)command;
751 reg += command->arg1 + strlen(reg + command->arg1);
753 // Resume parsing for 'a' or 's' command. (Only two that can do this.)
754 // TODO: using 256 to indicate 'a' means our s/// delimiter can't be
755 // a unicode character.
756 if (command->hit < 256) goto resume_s;
760 // Loop through commands in this line.
764 if (command) dlist_add_nomalloc(&TT.pattern, (void *)command);
766 // If there's no more data on this line, return.
768 while (isspace(*line) || *line == ';') line++;
769 if (*line == '#') while (*line && *line != '\n') line++;
774 // We start by writing data into toybuf. Later we'll allocate the
778 memset(toybuf, 0, sizeof(struct sedcmd));
779 command = (void *)toybuf;
780 reg = toybuf + sizeof(struct sedcmd);
782 // Parse address range (if any)
783 for (i = 0; i < 2; i++) {
784 if (*line == ',') line++;
787 if (isdigit(*line)) command->lmatch[i] = strtol(line, &line, 0);
788 else if (*line == '$') {
789 command->lmatch[i] = -1;
791 } else if (*line == '/' || *line == '\\') {
794 if (!(s = unescape_delimited_string(&line, 0))) goto error;
795 if (!*s) command->rmatch[i] = 0;
797 xregcomp((void *)reg, s, (toys.optflags & FLAG_r)*REG_EXTENDED);
798 command->rmatch[i] = reg-toybuf;
799 reg += sizeof(regex_t);
805 while (isspace(*line)) line++;
808 while (*line == '!') {
812 while (isspace(*line)) line++;
814 c = command->c = *(line++);
815 if (strchr("}:", c) && i) break;
816 if (strchr("aiqr=", c) && i>1) break;
818 // Add step to pattern
819 command = xmemdup(toybuf, reg-toybuf);
820 reg = (reg-toybuf) + (char *)command;
822 // Parse arguments by command type
823 if (c == '{') TT.nextlen++;
825 if (!TT.nextlen--) break;
826 } else if (c == 's') {
827 char *end, delim = 0;
829 // s/pattern/replacement/flags
831 // line continuations use arg1 (back at the start of the function),
832 // so let's fill out arg2 first (since the regex part can't be multiple
833 // lines) and swap them back later.
835 // get pattern (just record, we parse it later)
836 command->arg2 = reg - (char *)command;
837 if (!(TT.remember = unescape_delimited_string(&line, &delim)))
840 reg += sizeof(regex_t);
841 command->arg1 = reg-(char *)command;
842 command->hit = delim;
844 // get replacement - don't replace escapes yet because \1 and \& need
845 // processing later, after we replace \\ with \ we can't tell \\1 from \1
847 while (*end != command->hit) {
848 if (!*end) goto error;
849 if (*end++ == '\\') {
850 if (!*end || *end == '\n') {
858 reg = extend_string((void *)&command, line, reg-(char *)command,end-line);
860 // line continuation? (note: '\n' can't be a valid delim).
861 if (*line == command->hit) command->hit = 0;
863 if (!*line) continue;
869 // swap arg1/arg2 so they're back in order arguments occur.
871 command->arg1 = command->arg2;
875 for (line++; *line; line++) {
878 if (isspace(*line) && *line != '\n') continue;
880 if (0 <= (l = stridx("igp", *line))) command->sflags |= 1<<l;
881 else if (!(command->sflags>>3) && 0<(l = strtol(line, &line, 10))) {
882 command->sflags |= l << 3;
887 // We deferred actually parsing the regex until we had the s///i flag
888 // allocating the space was done by extend_string() above
889 if (!*TT.remember) command->arg1 = 0;
890 else xregcomp((void *)(command->arg1 + (char *)command), TT.remember,
891 ((toys.optflags & FLAG_r)*REG_EXTENDED)|((command->sflags&1)*REG_ICASE));
898 } else if (c == 'w') {
902 // Since s/// uses arg1 and arg2, and w needs a persistent filehandle and
903 // eol status, and to retain the filename for error messages, we'd need
904 // to go up to arg5 just for this. Compromise: dynamically allocate the
905 // filehandle and eol status.
908 while (isspace(*line)) line++;
909 if (!*line) goto error;
910 for (cc = line; *cc; cc++) if (*cc == '\\' && cc[1] == ';') break;
913 fd = xcreate(line, O_WRONLY|O_CREAT|O_TRUNC, 0644);
916 command->w = reg - (char *)command;
917 command = xrealloc(command, command->w+(cc-line)+6);
918 reg = command->w + (char *)command;
923 memcpy(reg, line, delim);
928 if (delim) line += 2;
929 } else if (c == 'y') {
933 if (!(s = unescape_delimited_string(&line, &delim))) goto error;
934 command->arg1 = reg-(char *)command;
936 reg = extend_string((void *)&command, s, reg-(char *)command, len);
938 command->arg2 = reg-(char *)command;
939 if (!(s = unescape_delimited_string(&line, &delim))) goto error;
940 if (len != strlen(s)) goto error;
941 reg = extend_string((void *)&command, s, reg-(char*)command, len);
943 } else if (strchr("abcirtTw:", c)) {
946 // trim leading spaces
947 while (isspace(*line) && *line != '\n') line++;
949 // Resume logic differs from 's' case because we don't add a newline
950 // unless it's after something, so we add it on return instead.
954 // btT: end with space or semicolon, aicrw continue to newline.
955 if (!(end = strcspn(line, strchr(":btT", c) ? "; \t\r\n\v\f" : "\n"))) {
956 // Argument's optional for btT
957 if (strchr("btT", c)) continue;
958 else if (!command->arg1) break;
961 // Extend allocation to include new string. We use offsets instead of
962 // pointers so realloc() moving stuff doesn't break things. Ok to write
963 // \n over NUL terminator because call to extend_string() adds it back.
964 if (!command->arg1) command->arg1 = reg - (char*)command;
965 else if (*(command->arg1+(char *)command)) *(reg++) = '\n';
970 reg = extend_string((void *)&command, line, reg - (char *)command, end);
972 // Recopy data to remove escape sequences and handle line continuation.
973 if (strchr("aci", c)) {
975 for (i = end; i; i--) {
976 if ((*reg++ = *line++)=='\\') {
978 // escape at end of line: resume if -e escaped literal newline,
979 // else request callback and resume with next line
989 if (!(reg[-1] = unescape(*line))) reg[-1] = *line;
996 // Commands that take no arguments
997 } else if (!strchr("{dDgGhHlnNpPqx=", c)) break;
1001 error_exit("bad pattern '%s'@%ld (%c)", errstart, line-errstart+1L, *line);
1006 struct arg_list *al;
1007 char **args = toys.optargs;
1009 // Lie to autoconf when it asks stupid questions, so configure regexes
1010 // that look for "GNU sed version %f" greater than some old buggy number
1011 // don't fail us for not matching their narrow expectations.
1012 if (toys.optflags & FLAG_version) {
1013 xprintf("This is not GNU sed version 9.0\n");
1017 // Parse pattern into commands.
1019 // If no -e or -f, first argument is the pattern.
1020 if (!TT.e && !TT.f) {
1021 if (!*toys.optargs) error_exit("no pattern");
1022 (TT.e = xzalloc(sizeof(struct arg_list)))->arg = *(args++);
1025 // Option parsing infrastructure can't interlace "-e blah -f blah -e blah"
1026 // so handle all -e, then all -f. (At least the behavior's consistent.)
1028 for (al = TT.e; al; al = al->next) parse_pattern(&al->arg, strlen(al->arg));
1029 for (al = TT.f; al; al = al->next)
1030 do_lines(strcmp(al->arg, "-") ? xopen(al->arg, O_RDONLY) : 0,parse_pattern);
1031 parse_pattern(0, 0);
1032 dlist_terminate(TT.pattern);
1033 if (TT.nextlen) error_exit("no }");
1036 TT.remember = xstrdup("");
1038 // Inflict pattern upon input files
1039 loopfiles_rw(args, O_RDONLY, 0, 0, do_sed);
1041 if (!(toys.optflags & FLAG_i)) process_line(0, 0);
1043 // todo: need to close fd when done for TOYBOX_FREE?