1 /* $OpenBSD: lex.c,v 1.47 2013/03/03 19:11:34 guenther Exp $ */
4 * Copyright (c) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010,
6 * Thorsten Glaser <tg@mirbsd.org>
8 * Provided that these terms and disclaimer and all copyright notices
9 * are retained or reproduced in an accompanying document, permission
10 * is granted to deal in this work without restriction, including un-
11 * limited rights to use, publicly perform, distribute, sell, modify,
12 * merge, give away, or sublicence.
14 * This work is provided "AS IS" and WITHOUT WARRANTY of any kind, to
15 * the utmost extent permitted by applicable law, neither express nor
16 * implied; without malicious intent or gross negligence. In no event
17 * may a licensor, author or contributor be held liable for indirect,
18 * direct, other damage, loss, or other issues arising in any way out
19 * of dealing in the work, even if advised of the possibility of such
20 * damage or existence of a defect, except proven that it results out
21 * of said person's immediate fault when using the work as intended.
26 __RCSID("$MirOS: src/bin/mksh/lex.c,v 1.188 2013/08/10 13:44:31 tg Exp $");
29 * states while lexing word
31 #define SBASE 0 /* outside any lexical constructs */
32 #define SWORD 1 /* implicit quoting for substitute() */
33 #define SLETPAREN 2 /* inside (( )), implicit quoting */
34 #define SSQUOTE 3 /* inside '' */
35 #define SDQUOTE 4 /* inside "" */
36 #define SEQUOTE 5 /* inside $'' */
37 #define SBRACE 6 /* inside ${} */
38 #define SQBRACE 7 /* inside "${}" */
39 #define SBQUOTE 8 /* inside `` */
40 #define SASPAREN 9 /* inside $(( )) */
41 #define SHEREDELIM 10 /* parsing <<,<<-,<<< delimiter */
42 #define SHEREDQUOTE 11 /* parsing " in <<,<<-,<<< delimiter */
43 #define SPATTERN 12 /* parsing *(...|...) pattern (*+?@!) */
44 #define SADELIM 13 /* like SBASE, looking for delimiter */
45 #define STBRACEKORN 14 /* parsing ${...[#%]...} !FSH */
46 #define STBRACEBOURNE 15 /* parsing ${...[#%]...} FSH */
47 #define SINVALID 255 /* invalid state */
49 struct sretrace_info {
50 struct sretrace_info *next;
56 * Structure to keep track of the lexing state and the various pieces of info
57 * needed for each particular state.
59 typedef struct lex_state {
61 /* point to the next state block */
62 struct lex_state *base;
63 /* marks start of state output in output string */
65 /* SBQUOTE: true if in double quotes: "`...`" */
66 /* SEQUOTE: got NUL, ignore rest of string */
68 /* SADELIM information */
70 /* character to search for */
71 unsigned char delimiter;
72 /* max. number of delimiters */
76 /* count open parentheses */
78 /* type of this state */
81 #define ls_base u.base
82 #define ls_start u.start
83 #define ls_bool u.abool
84 #define ls_adelim u.adelim
91 static void readhere(struct ioword *);
92 static void ungetsc(int);
93 static void ungetsc_i(int);
94 static int getsc_uu(void);
95 static void getsc_line(Source *);
96 static int getsc_bn(void);
97 static int s_get(void);
98 static void s_put(int);
99 static char *get_brace_var(XString *, char *);
100 static bool arraysub(char **);
101 static void gethere(bool);
102 static Lex_state *push_state_i(State_info *, Lex_state *);
103 static Lex_state *pop_state_i(State_info *, Lex_state *);
105 static int backslash_skip;
106 static int ignore_backslash_newline;
108 /* optimised getsc_bn() */
109 #define o_getsc() (*source->str != '\0' && *source->str != '\\' && \
110 !backslash_skip ? *source->str++ : getsc_bn())
111 /* optimised getsc_uu() */
112 #define o_getsc_u() ((*source->str != '\0') ? *source->str++ : getsc_uu())
115 #define o_getsc_r(carg) { \
117 struct sretrace_info *rp = retrace_info; \
120 Xcheck(rp->xs, rp->xp); \
128 #if defined(MKSH_SMALL) && !defined(MKSH_SMALL_BUT_FAST)
129 static int getsc(void);
134 o_getsc_r(o_getsc());
137 static int getsc_r(int);
145 #define getsc() getsc_r(o_getsc())
148 #define STATE_BSIZE 8
150 #define PUSH_STATE(s) do { \
151 if (++statep == state_info.end) \
152 statep = push_state_i(&state_info, statep); \
153 state = statep->type = (s); \
154 } while (/* CONSTCOND */ 0)
156 #define POP_STATE() do { \
157 if (--statep == state_info.base) \
158 statep = pop_state_i(&state_info, statep); \
159 state = statep->type; \
160 } while (/* CONSTCOND */ 0)
162 #define PUSH_SRETRACE() do { \
163 struct sretrace_info *ri; \
165 statep->ls_start = Xsavepos(ws, wp); \
166 ri = alloc(sizeof(struct sretrace_info), ATEMP); \
167 Xinit(ri->xs, ri->xp, 64, ATEMP); \
168 ri->next = retrace_info; \
170 } while (/* CONSTCOND */ 0)
172 #define POP_SRETRACE() do { \
173 wp = Xrestpos(ws, wp, statep->ls_start); \
174 *retrace_info->xp = '\0'; \
175 sp = Xstring(retrace_info->xs, retrace_info->xp); \
176 dp = (void *)retrace_info; \
177 retrace_info = retrace_info->next; \
179 } while (/* CONSTCOND */ 0)
184 * tokens are not regular expressions, they are LL(1).
185 * for example, "${var:-${PWD}}", and "$(size $(whence ksh))".
186 * hence the state stack. Note "$(...)" are now parsed recursively.
192 Lex_state states[STATE_BSIZE], *statep, *s2, *base;
193 State_info state_info;
196 XString ws; /* expandable output word */
197 char *wp; /* output word pointer */
201 states[0].type = SINVALID;
202 states[0].ls_base = NULL;
204 state_info.base = states;
205 state_info.end = &state_info.base[STATE_BSIZE];
207 Xinit(ws, wp, 64, ATEMP);
210 ignore_backslash_newline = 0;
214 else if (cf & LETEXPR) {
215 /* enclose arguments in (double) quotes */
221 state = (cf & HEREDELIM) ? SHEREDELIM : SBASE;
222 while ((c = getsc()) == ' ' || c == '\t')
225 ignore_backslash_newline++;
226 while ((c = getsc()) != '\0' && c != '\n')
228 ignore_backslash_newline--;
232 if (source->flags & SF_ALIAS) {
233 /* trailing ' ' in alias definition */
234 source->flags &= ~SF_ALIAS;
238 /* Initial state: one of SWORD SLETPAREN SHEREDELIM SBASE */
239 statep->type = state;
241 /* check for here string */
242 if (state == SHEREDELIM) {
246 while ((c = getsc()) == ' ' || c == '\t')
255 /* collect non-special or quoted characters to form word */
256 while (!((c = getsc()) == 0 ||
257 ((state == SBASE || state == SHEREDELIM) && ctype(c, C_LEX1)))) {
258 if (state == SBASE &&
259 subshell_nesting_type == /*{*/ '}' &&
261 /* possibly end ${ :;} */
271 else if (statep->nparen == 0 && (c == /*{*/ '}' ||
272 c == (int)statep->ls_adelim.delimiter)) {
275 if (c == /*{*/ '}' || --statep->ls_adelim.num == 0)
283 if (c == '[' && (cf & (VARASN|ARRAYVAR))) {
286 if (is_wdvarname(Xstring(ws, wp), false)) {
289 if (arraysub(&tmp)) {
292 for (p = tmp; *p; ) {
315 Sbase1: /* includes *(...|...) pattern (*+?@!) */
316 if (c == '*' || c == '@' || c == '+' || c == '?' ||
319 if (c2 == '(' /*)*/ ) {
322 PUSH_STATE(SPATTERN);
328 Sbase2: /* doesn't include *(...|...) pattern (*+?@!) */
333 /* trailing \ is lost */
339 open_ssquote_unless_heredoc:
343 ignore_backslash_newline++;
353 * processing of dollar sign belongs into
354 * Subst, except for those which can open
355 * a string: $'…' and $"…"
391 /* trailing \ is lost */
403 if (c == '(') /*)*/ {
405 if (c == '(') /*)*/ {
407 PUSH_STATE(SASPAREN);
410 *retrace_info->xp++ = '(';
423 } else if (c == '{') /*}*/ {
424 if ((c = getsc()) == '|') {
431 } else if (ctype(c, C_IFSWS)) {
434 * "command" substitution
442 wp = get_brace_var(&ws, wp);
444 /* allow :# and :% (ksh88 compat) */
456 statep->ls_adelim.delimiter = ':';
457 statep->ls_adelim.num = 1;
460 } else if (ksh_isdigit(c) ||
461 c == '('/*)*/ || c == ' ' ||
464 /* substring subst. */
472 statep->ls_adelim.delimiter = ':';
473 statep->ls_adelim.num = 2;
477 } else if (c == '/') {
480 if ((c = getsc()) == '/') {
487 statep->ls_adelim.delimiter = '/';
488 statep->ls_adelim.num = 1;
493 * If this is a trim operation,
494 * treat (,|,) specially in STBRACE.
496 if (ctype(c, C_SUBOP2)) {
499 PUSH_STATE(STBRACEBOURNE);
501 PUSH_STATE(STBRACEKORN);
504 if (state == SDQUOTE ||
510 } else if (ksh_isalphx(c)) {
517 } while (ksh_isalnux(c));
522 } else if (ctype(c, C_VAR1 | C_DIGIT)) {
541 * Need to know if we are inside double quotes
542 * since sh/AT&T-ksh translate the \" to " in
544 * This is not done in POSIX mode (section
545 * 3.2.3, Double Quotes: "The backquote shall
546 * retain its special meaning introducing the
547 * other form of command substitution (see
548 * 3.6.3). The portion of the quoted string
549 * from the initial backquote and the
550 * characters up to the next backquote that
551 * is not preceded by a backslash (having
552 * escape characters removed) defines that
553 * command whose output replaces `...` when
554 * the word is expanded."
555 * Section 3.6.3, Command Substitution:
556 * "Within the backquoted style of command
557 * substitution, backslash shall retain its
558 * literal meaning, except when followed by
561 statep->ls_bool = false;
563 base = state_info.base;
564 while (/* CONSTCOND */ 1) {
565 for (; s2 != base; s2--) {
566 if (s2->type == SDQUOTE) {
567 statep->ls_bool = true;
573 if (!(s2 = s2->ls_base))
575 base = s2-- - STATE_BSIZE;
596 ignore_backslash_newline--;
597 } else if (c == '\\') {
598 if ((c2 = unbksl(true, s_get, s_put)) == -1)
601 statep->ls_bool = true;
602 if (!statep->ls_bool) {
605 if ((unsigned int)c2 < 0x100) {
609 cz = utf_wctomb(ts, c2 - 0x100);
611 for (cz = 0; ts[cz]; ++cz) {
617 } else if (!statep->ls_bool) {
626 if ((cf & HEREDOC) || state == SQBRACE)
629 ignore_backslash_newline--;
650 if (statep->nparen == 1) {
655 if ((c2 = getsc()) == /*(*/ ')') {
658 memcpy(wp, sp + 1, cz);
668 * mismatched parenthesis -
669 * assume we were really
670 * parsing a $(...) expression
683 /* reuse existing state machine */
689 * perform POSIX "quote removal" if the back-
690 * slash is "special", i.e. same cases as the
691 * {case '\\':} in Subst: plus closing brace;
692 * in mksh code "quote removal" on '\c' means
693 * write QCHAR+c, otherwise CHAR+\+CHAR+c are
694 * emitted (in heredocquote:)
696 if ((c = getsc()) == '"' || c == '\\' ||
697 c == '$' || c == '`' || c == /*{*/'}')
705 goto open_ssquote_unless_heredoc;
712 goto subst_dollar_ex;
715 else if (c != /*{*/ '}')
722 /* Same as SBASE, except (,|,) treated specially */
730 PUSH_STATE(SPATTERN);
731 } else /* FALLTHROUGH */
733 if (c == /*{*/ '}') {
745 } else if (c == '\\') {
746 switch (c = getsc()) {
748 /* trailing \ is lost */
755 if (statep->ls_bool) {
773 /* LETEXPR: (( ... )) */
775 if (c == /*(*/ ')') {
776 if (statep->nparen > 0)
778 else if ((c2 = getsc()) == /*(*/ ')') {
787 * mismatched parenthesis -
788 * assume we were really
789 * parsing a (...) expression
792 sp = Xstring(ws, wp);
793 dp = wdstrip(sp, WDS_KEEPQ);
794 s = pushs(SREREAD, source->areap);
795 s->start = s->str = s->u.freeme = dp;
802 * parentheses inside quotes and
803 * backslashes are lost, but AT&T ksh
804 * doesn't count them either
809 /* <<, <<-, <<< delimiter */
812 * here delimiters need a special case since
813 * $ and `...` are not to be treated specially
818 /* trailing \ is lost */
824 goto open_ssquote_unless_heredoc;
826 if ((c2 = getsc()) == '\'') {
829 ignore_backslash_newline++;
831 statep->ls_bool = false;
833 } else if (c2 == '"') {
836 state = statep->type = SHEREDQUOTE;
848 /* " in <<, <<-, <<< delimiter */
854 /* remove the trailing double quote */
856 /* store the quoted string */
858 XcheckN(ws, wp, (dp - sp));
860 while ((c = *dp++)) {
862 switch ((c = *dp++)) {
879 state = statep->type = SHEREDELIM;
882 /* in *(...|...) pattern (*+?@!) */
884 if (c == /*(*/ ')') {
887 } else if (c == '|') {
889 } else if (c == '(') {
893 PUSH_STATE(SPATTERN);
901 if (statep != &states[1])
902 /* XXX figure out what is missing */
903 yyerror("no closing quote\n");
905 /* This done to avoid tests for SHEREDELIM wherever SBASE tested */
906 if (state == SHEREDELIM)
909 dp = Xstring(ws, wp);
910 if (state == SBASE && (
911 #ifndef MKSH_LEGACY_MODE
912 (c == '&' && !Flag(FSH) && !Flag(FPOSIX)) ||
914 c == '<' || c == '>')) {
915 struct ioword *iop = alloc(sizeof(struct ioword), ATEMP);
917 if (Xlength(ws, wp) == 0)
918 iop->unit = c == '<' ? 0 : 1;
919 else for (iop->unit = 0, c2 = 0; c2 < Xlength(ws, wp); c2 += 2) {
922 if (!ksh_isdigit(dp[c2 + 1]))
924 iop->unit = (iop->unit * 10) + dp[c2 + 1] - '0';
927 if (iop->unit >= FDBASE)
931 if ((c2 = getsc()) != '>') {
941 /* <<, >>, <> are ok, >< is not */
942 if (c == c2 || (c == '<' && c2 == '>')) {
943 iop->flag |= c == c2 ?
944 (c == '>' ? IOCAT : IOHERE) : IORDWR;
945 if (iop->flag == IOHERE) {
946 if ((c2 = getsc()) == '-') {
949 } else if (c2 == '<')
950 iop->flag |= IOHERESTR;
953 iop->flag |= IONDELIM;
955 } else if (c2 == '&')
956 iop->flag |= IODUP | (c == '<' ? IORDUP : 0);
958 iop->flag |= c == '>' ? IOWRITE : IOREAD;
959 if (c == '>' && c2 == '|')
976 if (wp == dp && state == SBASE) {
979 /* no word, process LEX1 character */
980 if ((c == '|') || (c == '&') || (c == ';') || (c == '('/*)*/)) {
981 if ((c2 = getsc()) == c)
982 c = (c == ';') ? BREAK :
984 (c == '&') ? LOGAND :
985 /* c == '(' ) */ MDPAREN;
986 else if (c == '|' && c2 == '&')
988 else if (c == ';' && c2 == '|')
990 else if (c == ';' && c2 == '&')
996 if ((c2 = getsc()) == '&')
1002 } else if (c == '\n') {
1006 } else if (c == '\0')
1007 /* need here strings at EOF */
1012 /* terminate word */
1014 yylval.cp = Xclose(ws, wp);
1015 if (state == SWORD || state == SLETPAREN
1019 /* unget terminator */
1023 * note: the alias-vs-function code below depends on several
1024 * interna: starting from here, source->str is not modified;
1025 * the way getsc() and ungetsc() operate; etc.
1028 /* copy word to unprefixed string ident */
1031 if ((cf & HEREDELIM) && (sp[1] == '<'))
1032 while ((dp - ident) < IDENT) {
1033 if ((c = *sp++) == CHAR)
1035 else if ((c != OQUOTE) && (c != CQUOTE))
1039 while ((dp - ident) < IDENT && (c = *sp++) == CHAR)
1041 /* Make sure the ident array stays '\0' padded */
1042 memset(dp, 0, (ident + IDENT) - dp + 1);
1044 /* word is not unquoted */
1047 if (*ident != '\0' && (cf & (KEYWORD | ALIAS))) {
1049 uint32_t h = hash(ident);
1051 if ((cf & KEYWORD) && (p = ktsearch(&keywords, ident, h)) &&
1052 (!(cf & ESACONLY) || p->val.i == ESAC ||
1053 p->val.i == /*{*/ '}')) {
1054 afree(yylval.cp, ATEMP);
1057 if ((cf & ALIAS) && (p = ktsearch(&aliases, ident, h)) &&
1058 (p->flag & ISSET)) {
1060 * this still points to the same character as the
1061 * ungetsc'd terminator from above
1063 const char *cp = source->str;
1065 /* prefer POSIX but not Korn functions over aliases */
1066 while (*cp == ' ' || *cp == '\t')
1068 * this is like getsc() without skipping
1069 * over Source boundaries (including not
1070 * parsing ungetsc'd characters that got
1071 * pushed into an SREREAD) which is what
1072 * we want here anyway: find out whether
1073 * the alias name is followed by a POSIX
1074 * function definition (only the opening
1075 * parenthesis is checked though)
1078 /* prefer functions over aliases */
1079 if (cp[0] != '(' || cp[1] != ')') {
1082 while (s && (s->flags & SF_HASALIAS))
1087 /* push alias expansion */
1088 s = pushs(SALIAS, source->areap);
1089 s->start = s->str = p->val.s;
1091 s->flags |= SF_HASALIAS;
1093 if (source->type == SEOF) {
1094 /* prevent infinite recursion at EOS */
1096 source->flags |= SF_HASALIAS;
1099 afree(yylval.cp, ATEMP);
1113 for (p = heres; p < herep; p++)
1114 if (iseof && !((*p)->flag & IOHERESTR))
1115 /* only here strings at EOF */
1123 * read "<<word" text into temp file
1127 readhere(struct ioword *iop)
1130 const char *eof, *eofp;
1135 if (iop->flag & IOHERESTR) {
1136 /* process the here string */
1137 iop->heredoc = xp = evalstr(iop->delim, DOBLANK);
1138 xpos = strlen(xp) - 1;
1139 memmove(xp, xp + 1, xpos);
1144 eof = iop->flag & IONDELIM ? "<<" : evalstr(iop->delim, 0);
1146 if (!(iop->flag & IOEVAL))
1147 ignore_backslash_newline++;
1149 Xinit(xs, xp, 256, ATEMP);
1152 /* beginning of line */
1154 xpos = Xsavepos(xs, xp);
1155 if (iop->flag & IOSKIP) {
1156 /* skip over leading tabs */
1157 while ((c = getsc()) == '\t')
1159 goto heredoc_parse_char;
1164 /* compare with here document marker */
1166 /* end of here document marker, what to do? */
1169 if (!subshell_nesting_type)
1171 * not allowed outside $(...) or (...)
1175 /* allow $(...) or (...) to close here */
1180 * Allow EOF here to commands without trailing
1181 * newlines (mksh -c '...') will work as well.
1184 /* Newline terminates here document marker */
1185 goto heredoc_found_terminator;
1187 } else if (c == *eofp++)
1188 /* store; then read and compare next character */
1189 goto heredoc_store_and_loop;
1190 /* nope, mismatch; read until end of line */
1193 /* oops, reached EOF */
1194 yyerror("%s '%s' unclosed\n", "here document", eof);
1195 /* store character */
1198 /* read next character */
1201 /* we read a newline as last character */
1202 heredoc_store_and_loop:
1203 /* store character */
1207 goto heredoc_read_line;
1208 goto heredoc_read_char;
1210 heredoc_found_terminator:
1211 /* jump back to saved beginning of line */
1212 xp = Xrestpos(xs, xp, xpos);
1213 /* terminate, close and store */
1215 iop->heredoc = Xclose(xs, xp);
1217 if (!(iop->flag & IOEVAL))
1218 ignore_backslash_newline--;
1222 yyerror(const char *fmt, ...)
1226 /* pop aliases and re-reads */
1227 while (source->type == SALIAS || source->type == SREREAD)
1228 source = source->next;
1229 /* zap pending input */
1234 shf_vfprintf(shl_out, fmt, va);
1240 * input for yylex with alias expansion
1244 pushs(int type, Area *areap)
1248 s = alloc(sizeof(Source), areap);
1249 memset(s, 0, sizeof(Source));
1253 if (type == SFILE || type == SSTDIN)
1254 XinitN(s->xs, 256, s->areap);
1264 while ((c = *s->str++) == 0) {
1265 /* return 0 for EOF by default */
1281 case SSTRINGCMDLINE:
1285 s->start = s->str = *s->u.strv++;
1290 if (*s->u.strv == NULL) {
1291 s->start = s->str = "\n";
1294 s->start = s->str = " ";
1300 if (s->flags & SF_ALIASEND) {
1301 /* pass on an unused SF_ALIAS flag */
1303 source->flags |= s->flags & SF_ALIAS;
1305 } else if (*s->u.tblp->val.s &&
1306 (c = strnul(s->u.tblp->val.s)[-1], ksh_isspace(c))) {
1307 /* pop source stack */
1308 source = s = s->next;
1310 * Note that this alias ended with a
1311 * space, enabling alias expansion on
1312 * the following word.
1314 s->flags |= SF_ALIAS;
1317 * At this point, we need to keep the current
1318 * alias in the source list so recursive
1319 * aliases can be detected and we also need to
1320 * return the next character. Do this by
1321 * temporarily popping the alias to get the
1322 * next character and then put it back in the
1323 * source list with the SF_ALIASEND flag set.
1325 /* pop source stack */
1327 source->flags |= s->flags & SF_ALIAS;
1330 s->flags |= SF_ALIASEND;
1331 s->ugbuf[0] = c; s->ugbuf[1] = '\0';
1332 s->start = s->str = s->ugbuf;
1337 /* avoid reading EOF twice */
1345 if (s->start != s->ugbuf)
1347 afree(s->u.freeme, ATEMP);
1348 source = s = s->next;
1351 if (s->str == NULL) {
1353 s->start = s->str = null;
1356 if (s->flags & SF_ECHO) {
1357 shf_puts(s->str, shl_out);
1365 getsc_line(Source *s)
1367 char *xp = Xstring(s->xs, xp), *cp;
1368 bool interactive = Flag(FTALKING) && s->type == SSTDIN;
1369 bool have_tty = tobool(interactive && (s->flags & SF_TTY));
1371 /* Done here to ensure nothing odd happens when a timeout occurs */
1372 XcheckN(s->xs, xp, LINE);
1374 s->start = s->str = xp;
1376 if (have_tty && ksh_tmout) {
1377 ksh_tmout_state = TMOUT_READING;
1382 #ifndef MKSH_NO_CMDLINE_EDITING
1387 Flag(FEMACS) || Flag(FGMACS))) {
1404 while (/* CONSTCOND */ 1) {
1405 char *p = shf_getse(xp, Xnleft(s->xs, xp), s->u.shf);
1407 if (!p && shf_error(s->u.shf) &&
1408 shf_errno(s->u.shf) == EINTR) {
1409 shf_clearerr(s->u.shf);
1414 if (!p || (xp = p, xp[-1] == '\n'))
1416 /* double buffer size */
1417 /* move past NUL so doubling works... */
1419 XcheckN(s->xs, xp, Xlength(s->xs, xp));
1420 /* ...and move back again */
1424 * flush any unwanted input so other programs/builtins
1425 * can read it. Not very optimal, but less error prone
1426 * than flushing else where, dealing with redirections,
1428 * TODO: reduce size of shf buffer (~128?) if SSTDIN
1430 if (s->type == SSTDIN)
1431 shf_flush(s->u.shf);
1434 * XXX: temporary kludge to restore source after a
1435 * trap may have been executed.
1438 if (have_tty && ksh_tmout) {
1439 ksh_tmout_state = TMOUT_EXECUTING;
1442 cp = Xstring(s->xs, xp);
1443 s->start = s->str = cp;
1444 strip_nuls(Xstring(s->xs, xp), Xlength(s->xs, xp));
1445 /* Note: if input is all nulls, this is not eof */
1446 if (Xlength(s->xs, xp) == 0) {
1448 if (s->type == SFILE)
1449 shf_fdclose(s->u.shf);
1451 } else if (interactive && *s->str &&
1452 (cur_prompt != PS1 || !ctype(*s->str, C_IFS | C_IFSWS))) {
1453 histsave(&s->line, s->str, true, true);
1454 #if !defined(MKSH_SMALL) && HAVE_PERSISTENT_HISTORY
1455 } else if (interactive && cur_prompt == PS1) {
1456 cp = Xstring(s->xs, xp);
1457 while (*cp && ctype(*cp, C_IFSWS))
1464 set_prompt(PS2, NULL);
1468 set_prompt(int to, Source *s)
1476 * Substitute ! and !! here, before substitutions are done
1477 * so ! in expanded variables are not expanded.
1478 * NOTE: this is not what AT&T ksh does (it does it after
1479 * substitutions, POSIX doesn't say which is to be done.
1483 char * volatile ps1;
1486 ps1 = str_val(global("PS1"));
1487 shf = shf_sopen(NULL, strlen(ps1) * 2,
1488 SHF_WR | SHF_DYNAMIC, NULL);
1490 if (*ps1 != '!' || *++ps1 == '!')
1491 shf_putchar(*ps1++, shf);
1493 shf_fprintf(shf, "%d",
1494 s ? s->line + 1 : 0);
1495 ps1 = shf_sclose(shf);
1496 saved_atemp = ATEMP;
1498 if (kshsetjmp(e->jbuf)) {
1499 prompt = safe_prompt;
1501 * Don't print an error - assume it has already
1502 * been printed. Reason is we may have forked
1503 * to run a command and the child may be
1504 * unwinding its stack through this code as it
1508 char *cp = substitute(ps1, 0);
1509 strdupx(prompt, cp, saved_atemp);
1514 /* command continuation */
1516 prompt = str_val(global("PS2"));
1522 pprompt(const char *cp, int ntruncate)
1524 int columns = 0, lines = 0;
1525 bool indelimit = false;
1529 * Undocumented AT&T ksh feature:
1530 * If the second char in the prompt string is \r then the first
1531 * char is taken to be a non-printing delimiter and any chars
1532 * between two instances of the delimiter are not considered to
1533 * be part of the prompt length
1535 if (*cp && cp[1] == '\r') {
1540 if (indelimit && *cp != delimiter)
1542 else if (*cp == '\n' || *cp == '\r') {
1543 lines += columns / x_cols + ((*cp == '\n') ? 1 : 0);
1545 } else if (*cp == '\t') {
1546 columns = (columns | 7) + 1;
1547 } else if (*cp == '\b') {
1550 } else if (*cp == delimiter)
1551 indelimit = !indelimit;
1552 else if (UTFMODE && ((unsigned char)*cp > 0x7F)) {
1554 columns += utf_widthadj(cp, &cp2);
1556 (ntruncate < (x_cols * lines + columns)))
1557 shf_write(cp, cp2 - cp, shl_out);
1558 cp = cp2 - /* loop increment */ 1;
1562 if ((*cp != delimiter) &&
1563 (indelimit || (ntruncate < (x_cols * lines + columns))))
1564 shf_putc(*cp, shl_out);
1567 return (x_cols * lines + columns);
1571 * Read the variable part of a ${...} expression (i.e. up to but not
1572 * including the :[-+?=#%] or close-brace).
1575 get_brace_var(XString *wsp, char *wp)
1579 PS_INITIAL, PS_SAW_HASH, PS_IDENT,
1581 } state = PS_INITIAL;
1583 while (/* CONSTCOND */ 1) {
1585 /* State machine to figure out where the variable part ends. */
1588 if (c == '#' || c == '!' || c == '%') {
1589 state = PS_SAW_HASH;
1596 else if (ksh_isdigit(c))
1598 else if (c == '#') {
1599 if (state == PS_SAW_HASH) {
1604 if (c2 != /*{*/ '}') {
1610 } else if (ctype(c, C_VAR1))
1616 if (!ksh_isalnux(c)) {
1620 if (!arraysub(&tmp))
1621 yyerror("missing ]\n");
1623 for (p = tmp; *p; ) {
1635 if (!ksh_isdigit(c))
1645 /* end of variable part */
1652 * Save an array subscript - returns true if matching bracket found, false
1653 * if eof or newline was found.
1654 * (Returned string double null terminated)
1657 arraysub(char **strp)
1661 /* we are just past the initial [ */
1662 unsigned int depth = 1;
1664 Xinit(ws, wp, 32, ATEMP);
1674 } while (depth > 0 && c && c != '\n');
1677 *strp = Xclose(ws, wp);
1679 return (tobool(depth == 0));
1682 /* Unget a char: handles case when we are already at the start of the buffer */
1686 struct sretrace_info *rp = retrace_info;
1690 /* Don't unget EOF... */
1691 if (source->str == null && c == '\0')
1694 if (Xlength(rp->xs, rp->xp))
1703 if (source->str > source->start)
1708 s = pushs(SREREAD, source->areap);
1709 s->ugbuf[0] = c; s->ugbuf[1] = '\0';
1710 s->start = s->str = s->ugbuf;
1717 /* Called to get a char that isn't a \newline sequence. */
1723 if (ignore_backslash_newline)
1724 return (o_getsc_u());
1726 if (backslash_skip == 1) {
1728 return (o_getsc_u());
1733 while (/* CONSTCOND */ 1) {
1736 if ((c2 = o_getsc_u()) == '\n')
1737 /* ignore the \newline; get the next char... */
1751 if ((unsigned char)(c = o_getsc_u()) != 0xEF) {
1755 if ((unsigned char)(c = o_getsc_u()) != 0xBB) {
1760 if ((unsigned char)(c = o_getsc_u()) != 0xBF) {
1770 push_state_i(State_info *si, Lex_state *old_end)
1772 Lex_state *news = alloc2(STATE_BSIZE, sizeof(Lex_state), ATEMP);
1774 news[0].ls_base = old_end;
1775 si->base = &news[0];
1776 si->end = &news[STATE_BSIZE];
1781 pop_state_i(State_info *si, Lex_state *old_end)
1783 Lex_state *old_base = si->base;
1785 si->base = old_end->ls_base - STATE_BSIZE;
1786 si->end = old_end->ls_base;
1788 afree(old_base, ATEMP);
1790 return (si->base + STATE_BSIZE - 1);