#include "sh.h"
-__RCSID("$MirOS: src/bin/mksh/lex.c,v 1.234 2017/04/06 01:59:55 tg Exp $");
+__RCSID("$MirOS: src/bin/mksh/lex.c,v 1.239 2017/05/05 22:53:29 tg Exp $");
/*
* states while lexing word
}
#if defined(MKSH_SMALL) && !defined(MKSH_SMALL_BUT_FAST)
-#define getsc getsc_i
+#define getsc() ord(getsc_i())
#else
static int getsc_r(int);
o_getsc_r(c);
}
-#define getsc() getsc_r(o_getsc())
+#define getsc() ord(getsc_r(o_getsc()))
#endif
#define STATE_BSIZE 8
} else {
/* normal lexing */
state = (cf & HEREDELIM) ? SHEREDELIM : SBASE;
- while ((c = getsc()) == ' ' || c == '\t')
+ while (ctype((c = getsc()), C_BLANK))
;
if (c == '#') {
ignore_backslash_newline++;
- while ((c = getsc()) != '\0' && c != '\n')
+ while (!ctype((c = getsc()), C_NUL | C_LF))
;
ignore_backslash_newline--;
}
while (!((c = getsc()) == 0 ||
((state == SBASE || state == SHEREDELIM) && ctype(c, C_LEX1)))) {
if (state == SBASE &&
- subshell_nesting_type == /*{*/ '}' &&
- c == /*{*/ '}')
+ subshell_nesting_type == ord(/*{*/ '}') &&
+ c == ord(/*{*/ '}'))
/* possibly end ${ :;} */
break;
Xcheck(ws, wp);
switch (state) {
case SADELIM:
- if (c == '(')
+ if (c == ord('('))
statep->nparen++;
- else if (c == ')')
+ else if (c == ord(')'))
statep->nparen--;
- else if (statep->nparen == 0 && (c == /*{*/ '}' ||
+ else if (statep->nparen == 0 && (c == ord(/*{*/ '}') ||
c == (int)statep->ls_adelim.delimiter)) {
*wp++ = ADELIM;
*wp++ = c;
- if (c == /*{*/ '}' || --statep->ls_adelim.num == 0)
+ if (c == ord(/*{*/ '}') || --statep->ls_adelim.num == 0)
POP_STATE();
- if (c == /*{*/ '}')
+ if (c == ord(/*{*/ '}'))
POP_STATE();
break;
}
/* FALLTHROUGH */
case SBASE:
- if (c == '[' && (cf & CMDASN)) {
+ if (c == ord('[') && (cf & CMDASN)) {
/* temporary */
*wp = EOS;
if (is_wdvarname(Xstring(ws, wp), false)) {
}
/* FALLTHROUGH */
Sbase1: /* includes *(...|...) pattern (*+?@!) */
- if (c == '*' || c == '@' || c == '+' || c == '?' ||
- c == '!') {
+ if (ctype(c, C_PATMO)) {
c2 = getsc();
- if (c2 == '(' /*)*/ ) {
+ if (c2 == ord('(' /*)*/)) {
*wp++ = OPAT;
*wp++ = c;
PUSH_STATE(SPATTERN);
/* FALLTHROUGH */
Sbase2: /* doesn't include *(...|...) pattern (*+?@!) */
switch (c) {
- case '\\':
+ case ord('\\'):
getsc_qchar:
if ((c = getsc())) {
/* trailing \ is lost */
*wp++ = c;
}
break;
- case '\'':
+ case ord('\''):
open_ssquote_unless_heredoc:
if ((cf & HEREDOC))
goto store_char;
ignore_backslash_newline++;
PUSH_STATE(SSQUOTE);
break;
- case '"':
+ case ord('"'):
open_sdquote:
*wp++ = OQUOTE;
PUSH_STATE(SDQUOTE);
break;
- case '$':
+ case ord('$'):
/*
* processing of dollar sign belongs into
* Subst, except for those which can open
subst_dollar_ex:
c = getsc();
switch (c) {
- case '"':
+ case ord('"'):
goto open_sdquote;
- case '\'':
+ case ord('\''):
goto open_sequote;
default:
goto SubstS;
Subst:
switch (c) {
- case '\\':
+ case ord('\\'):
c = getsc();
switch (c) {
- case '"':
+ case ord('"'):
if ((cf & HEREDOC))
goto heredocquote;
/* FALLTHROUGH */
- case '\\':
- case '$': case '`':
+ case ord('\\'):
+ case ord('$'):
+ case ord('`'):
store_qchar:
*wp++ = QCHAR;
*wp++ = c;
break;
}
break;
- case '$':
+ case ord('$'):
c = getsc();
SubstS:
- if (c == '(') /*)*/ {
+ if (c == ord('(' /*)*/)) {
c = getsc();
- if (c == '(') /*)*/ {
+ if (c == ord('(' /*)*/)) {
*wp++ = EXPRSUB;
PUSH_SRETRACE(SASPAREN);
statep->nparen = 2;
memcpy(wp, sp, cz);
wp += cz;
}
- } else if (c == '{') /*}*/ {
- if ((c = getsc()) == '|') {
+ } else if (c == ord('{' /*}*/)) {
+ if ((c = getsc()) == ord('|')) {
/*
* non-subenvironment
* value substitution
}
ungetsc(c);
*wp++ = OSUBST;
- *wp++ = '{'; /*}*/
+ *wp++ = '{' /*}*/;
wp = get_brace_var(&ws, wp);
c = getsc();
/* allow :# and :% (ksh88 compat) */
- if (c == ':') {
+ if (c == ord(':')) {
*wp++ = CHAR;
*wp++ = c;
c = getsc();
- if (c == ':') {
+ if (c == ord(':')) {
*wp++ = CHAR;
*wp++ = '0';
*wp++ = ADELIM;
statep->ls_adelim.num = 1;
statep->nparen = 0;
break;
- } else if (ksh_isdigit(c) ||
- c == '('/*)*/ || c == ' ' ||
+ } else if (ctype(c, C_DIGIT | C_DOLAR | C_SPC) ||
/*XXX what else? */
- c == '$') {
+ c == '(' /*)*/) {
/* substring subst. */
if (c != ' ') {
*wp++ = CHAR;
parse_adelim_slash:
*wp++ = CHAR;
*wp++ = c;
- if ((c = getsc()) == '/') {
+ if ((c = getsc()) == ord('/')) {
*wp++ = c2;
*wp++ = c;
} else
} else if (c == '@') {
c2 = getsc();
ungetsc(c2);
- if (c2 == '/') {
+ if (c2 == ord('/')) {
c2 = CHAR;
goto parse_adelim_slash;
}
* If this is a trim operation,
* treat (,|,) specially in STBRACE.
*/
- if (ksh_issubop2(c)) {
+ if (ctype(c, C_SUB2)) {
ungetsc(c);
if (Flag(FSH))
PUSH_STATE(STBRACEBOURNE);
else
PUSH_STATE(SBRACE);
}
- } else if (ksh_isalphx(c)) {
+ } else if (ctype(c, C_ALPHX)) {
*wp++ = OSUBST;
*wp++ = 'X';
do {
Xcheck(ws, wp);
*wp++ = c;
c = getsc();
- } while (ksh_isalnux(c));
+ } while (ctype(c, C_ALNUX));
*wp++ = '\0';
*wp++ = CSUBST;
*wp++ = 'X';
ungetsc(c);
}
break;
- case '`':
+ case ord('`'):
subst_gravis:
PUSH_STATE(SBQUOTE);
*wp++ = COMASUB;
break;
case SEQUOTE:
- if (c == '\'') {
+ if (c == ord('\'')) {
POP_STATE();
*wp++ = CQUOTE;
ignore_backslash_newline--;
- } else if (c == '\\') {
+ } else if (c == ord('\\')) {
if ((c2 = unbksl(true, getsc_i, ungetsc)) == -1)
c2 = getsc();
if (c2 == 0)
break;
case SSQUOTE:
- if (c == '\'') {
+ if (c == ord('\'')) {
POP_STATE();
if ((cf & HEREDOC) || state == SQBRACE)
goto store_char;
break;
case SDQUOTE:
- if (c == '"') {
+ if (c == ord('"')) {
POP_STATE();
*wp++ = CQUOTE;
} else
/* $(( ... )) */
case SASPAREN:
- if (c == '(')
+ if (c == ord('('))
statep->nparen++;
- else if (c == ')') {
+ else if (c == ord(')')) {
statep->nparen--;
if (statep->nparen == 1) {
/* end of EXPRSUB */
POP_SRETRACE();
- if ((c2 = getsc()) == /*(*/ ')') {
+ if ((c2 = getsc()) == ord(/*(*/ ')')) {
cz = strlen(sp) - 2;
XcheckN(ws, wp, cz);
memcpy(wp, sp + 1, cz);
goto Sbase2;
case SQBRACE:
- if (c == '\\') {
+ if (c == ord('\\')) {
/*
* perform POSIX "quote removal" if the back-
* slash is "special", i.e. same cases as the
* write QCHAR+c, otherwise CHAR+\+CHAR+c are
* emitted (in heredocquote:)
*/
- if ((c = getsc()) == '"' || c == '\\' ||
- c == '$' || c == '`' || c == /*{*/'}')
+ if ((c = getsc()) == ord('"') || c == ord('\\') ||
+ ctype(c, C_DOLAR | C_GRAVE) || c == ord(/*{*/ '}'))
goto store_qchar;
goto heredocquote;
}
goto common_SQBRACE;
case SBRACE:
- if (c == '\'')
+ if (c == ord('\''))
goto open_ssquote_unless_heredoc;
- else if (c == '\\')
+ else if (c == ord('\\'))
goto getsc_qchar;
common_SQBRACE:
- if (c == '"')
+ if (c == ord('"'))
goto open_sdquote;
- else if (c == '$')
+ else if (c == ord('$'))
goto subst_dollar_ex;
- else if (c == '`')
+ else if (c == ord('`'))
goto subst_gravis;
- else if (c != /*{*/ '}')
+ else if (c != ord(/*{*/ '}'))
goto store_char;
POP_STATE();
*wp++ = CSUBST;
/* Same as SBASE, except (,|,) treated specially */
case STBRACEKORN:
- if (c == '|')
+ if (c == ord('|'))
*wp++ = SPAT;
- else if (c == '(') {
+ else if (c == ord('(')) {
*wp++ = OPAT;
/* simile for @ */
*wp++ = ' ';
PUSH_STATE(SPATTERN);
} else /* FALLTHROUGH */
case STBRACEBOURNE:
- if (c == /*{*/ '}') {
+ if (c == ord(/*{*/ '}')) {
POP_STATE();
*wp++ = CSUBST;
*wp++ = /*{*/ '}';
break;
case SBQUOTE:
- if (c == '`') {
+ if (c == ord('`')) {
*wp++ = 0;
POP_STATE();
- } else if (c == '\\') {
+ } else if (c == ord('\\')) {
switch (c = getsc()) {
case 0:
/* trailing \ is lost */
break;
- case '$':
- case '`':
- case '\\':
+ case ord('$'):
+ case ord('`'):
+ case ord('\\'):
*wp++ = c;
break;
- case '"':
+ case ord('"'):
if (statep->ls_bool) {
*wp++ = c;
break;
/* LETEXPR: (( ... )) */
case SLETPAREN:
- if (c == /*(*/ ')') {
+ if (c == ord(/*(*/ ')')) {
if (statep->nparen > 0)
--statep->nparen;
- else if ((c2 = getsc()) == /*(*/ ')') {
+ else if ((c2 = getsc()) == ord(/*(*/ ')')) {
c = 0;
*wp++ = CQUOTE;
goto Done;
s->start = s->str = s->u.freeme = dp;
s->next = source;
source = s;
- ungetsc('('/*)*/);
- return ('('/*)*/);
+ ungetsc('(' /*)*/);
+ return (ord('(' /*)*/));
}
- } else if (c == '(')
+ } else if (c == ord('('))
/*
* parentheses inside quotes and
* backslashes are lost, but AT&T ksh
* $ and `...` are not to be treated specially
*/
switch (c) {
- case '\\':
+ case ord('\\'):
if ((c = getsc())) {
/* trailing \ is lost */
*wp++ = QCHAR;
*wp++ = c;
}
break;
- case '\'':
+ case ord('\''):
goto open_ssquote_unless_heredoc;
- case '$':
- if ((c2 = getsc()) == '\'') {
+ case ord('$'):
+ if ((c2 = getsc()) == ord('\'')) {
open_sequote:
*wp++ = OQUOTE;
ignore_backslash_newline++;
PUSH_STATE(SEQUOTE);
statep->ls_bool = false;
break;
- } else if (c2 == '"') {
+ } else if (c2 == ord('"')) {
/* FALLTHROUGH */
- case '"':
+ case ord('"'):
PUSH_SRETRACE(SHEREDQUOTE);
break;
}
/* " in << or <<- delimiter */
case SHEREDQUOTE:
- if (c != '"')
+ if (c != ord('"'))
goto Subst;
POP_SRETRACE();
dp = strnul(sp) - 1;
while ((c = *dp++)) {
if (c == '\\') {
switch ((c = *dp++)) {
- case '\\':
- case '"':
- case '$':
- case '`':
+ case ord('\\'):
+ case ord('"'):
+ case ord('$'):
+ case ord('`'):
break;
default:
*wp++ = CHAR;
/* in *(...|...) pattern (*+?@!) */
case SPATTERN:
- if (c == /*(*/ ')') {
+ if (c == ord(/*(*/ ')')) {
*wp++ = CPAT;
POP_STATE();
- } else if (c == '|') {
+ } else if (c == ord('|')) {
*wp++ = SPAT;
- } else if (c == '(') {
+ } else if (c == ord('(')) {
*wp++ = OPAT;
/* simile for @ */
*wp++ = ' ';
dp = Xstring(ws, wp);
if (state == SBASE && (
(c == '&' && !Flag(FSH) && !Flag(FPOSIX)) ||
- c == '<' || c == '>') && ((c2 = Xlength(ws, wp)) == 0 ||
- (c2 == 2 && dp[0] == CHAR && ksh_isdigit(dp[1])))) {
+ ctype(c, C_ANGLE)) && ((c2 = Xlength(ws, wp)) == 0 ||
+ (c2 == 2 && dp[0] == CHAR && ctype(dp[1], C_DIGIT)))) {
struct ioword *iop = alloc(sizeof(struct ioword), ATEMP);
iop->unit = c2 == 2 ? ksh_numdig(dp[1]) : c == '<' ? 0 : 1;
if (c == '&') {
- if ((c2 = getsc()) != '>') {
+ if ((c2 = getsc()) != ord('>')) {
ungetsc(c2);
goto no_iop;
}
c2 = getsc();
/* <<, >>, <> are ok, >< is not */
- if (c == c2 || (c == '<' && c2 == '>')) {
+ if (c == c2 || (c == ord('<') && c2 == ord('>'))) {
iop->ioflag |= c == c2 ?
- (c == '>' ? IOCAT : IOHERE) : IORDWR;
+ (c == ord('>') ? IOCAT : IOHERE) : IORDWR;
if (iop->ioflag == IOHERE) {
- if ((c2 = getsc()) == '-')
+ if ((c2 = getsc()) == ord('-'))
iop->ioflag |= IOSKIP;
- else if (c2 == '<')
+ else if (c2 == ord('<'))
iop->ioflag |= IOHERESTR;
else
ungetsc(c2);
}
- } else if (c2 == '&')
- iop->ioflag |= IODUP | (c == '<' ? IORDUP : 0);
+ } else if (c2 == ord('&'))
+ iop->ioflag |= IODUP | (c == ord('<') ? IORDUP : 0);
else {
- iop->ioflag |= c == '>' ? IOWRITE : IOREAD;
- if (c == '>' && c2 == '|')
+ iop->ioflag |= c == ord('>') ? IOWRITE : IOREAD;
+ if (c == ord('>') && c2 == ord('|'))
iop->ioflag |= IOCLOB;
else
ungetsc(c2);
/* free word */
Xfree(ws, wp);
/* no word, process LEX1 character */
- if ((c == '|') || (c == '&') || (c == ';') || (c == '('/*)*/)) {
+ if ((c == ord('|')) || (c == ord('&')) || (c == ord(';')) ||
+ (c == ord('(' /*)*/))) {
if ((c2 = getsc()) == c)
- c = (c == ';') ? BREAK :
- (c == '|') ? LOGOR :
- (c == '&') ? LOGAND :
- /* c == '(' ) */ MDPAREN;
- else if (c == '|' && c2 == '&')
+ c = (c == ord(';')) ? BREAK :
+ (c == ord('|')) ? LOGOR :
+ (c == ord('&')) ? LOGAND :
+ /* c == ord('(' )) */ MDPAREN;
+ else if (c == ord('|') && c2 == ord('&'))
c = COPROC;
- else if (c == ';' && c2 == '|')
+ else if (c == ord(';') && c2 == ord('|'))
c = BRKEV;
- else if (c == ';' && c2 == '&')
+ else if (c == ord(';') && c2 == ord('&'))
c = BRKFT;
else
ungetsc(c2);
#ifndef MKSH_SMALL
if (c == BREAK) {
- if ((c2 = getsc()) == '&')
+ if ((c2 = getsc()) == ord('&'))
c = BRKEV;
else
ungetsc(c2);
}
#endif
- } else if (c == '\n') {
+ } else if (c == ord('\n')) {
if (cf & HEREDELIM)
ungetsc(c);
else {
if ((cf & KEYWORD) && (p = ktsearch(&keywords, ident, h)) &&
(!(cf & ESACONLY) || p->val.i == ESAC ||
- p->val.i == /*{*/ '}')) {
+ p->val.i == ord(/*{*/ '}'))) {
afree(yylval.cp, ATEMP);
return (p->val.i);
}
const char *cp = source->str;
/* prefer POSIX but not Korn functions over aliases */
- while (*cp == ' ' || *cp == '\t')
+ while (ctype(*cp, C_BLANK))
/*
* this is like getsc() without skipping
* over Source boundaries (including not
if (!*eofp) {
/* end of here document marker, what to do? */
switch (c) {
- case /*(*/ ')':
+ case ord(/*(*/ ')'):
if (!subshell_nesting_type)
/*-
* not allowed outside $(...) or (...)
* Allow EOF here to commands without trailing
* newlines (mksh -c '...') will work as well.
*/
- case '\n':
+ case ord('\n'):
/* Newline terminates here document marker */
goto heredoc_found_terminator;
}
Source *s = source;
int c;
- while ((c = *s->str++) == 0) {
+ while ((c = ord(*s->str++)) == 0) {
/* return 0 for EOF by default */
s->str = NULL;
switch (s->type) {
source->flags |= s->flags & SF_ALIAS;
s = source;
} else if (*s->u.tblp->val.s &&
- (c = strnul(s->u.tblp->val.s)[-1], ksh_isspace(c))) {
+ ctype((c = strnul(s->u.tblp->val.s)[-1]), C_SPACE)) {
/* pop source stack */
source = s = s->next;
/*
} else if (interactive && cur_prompt == PS1) {
check_for_sole_return:
cp = Xstring(s->xs, xp);
- while (*cp && ctype(*cp, C_IFSWS))
+ while (ctype(*cp, C_IFSWS))
++cp;
if (!*cp) {
histsave(&s->line, NULL, HIST_FLUSH, true);
for (; *cp; cp++) {
if (indelimit && *cp != delimiter)
;
- else if (*cp == '\n' || *cp == '\r') {
+ else if (ctype(*cp, C_CR | C_LF)) {
lines += columns / x_cols + ((*cp == '\n') ? 1 : 0);
columns = 0;
} else if (*cp == '\t') {
columns--;
} else if (*cp == delimiter)
indelimit = !indelimit;
- else if (UTFMODE && ((unsigned char)*cp > 0x7F)) {
+ else if (UTFMODE && (rtt2asc(*cp) > 0x7F)) {
const char *cp2;
columns += utf_widthadj(cp, &cp2);
if (doprint && (indelimit ||
c2 = getsc();
ungetsc(c2);
- if (c2 != /*{*/ '}') {
+ if (ord(c2) != ord(/*{*/ '}')) {
ungetsc(c);
goto out;
}
}
goto ps_common;
case PS_SAW_BANG:
- switch (c) {
- case '@':
- case '#':
- case '-':
- case '?':
+ switch (ord(c)) {
+ case ord('@'):
+ case ord('#'):
+ case ord('-'):
+ case ord('?'):
goto out;
}
goto ps_common;
case PS_INITIAL:
- switch (c) {
- case '%':
+ switch (ord(c)) {
+ case ord('%'):
state = PS_SAW_PERCENT;
goto next;
- case '#':
+ case ord('#'):
state = PS_SAW_HASH;
goto next;
- case '!':
+ case ord('!'):
state = PS_SAW_BANG;
goto next;
}
/* FALLTHROUGH */
case PS_SAW_PERCENT:
ps_common:
- if (ksh_isalphx(c))
+ if (ctype(c, C_ALPHX))
state = PS_IDENT;
- else if (ksh_isdigit(c))
+ else if (ctype(c, C_DIGIT))
state = PS_NUMBER;
else if (ctype(c, C_VAR1))
state = PS_VAR1;
goto out;
break;
case PS_IDENT:
- if (!ksh_isalnux(c)) {
- if (c == '[') {
+ if (!ctype(c, C_ALNUX)) {
+ if (ord(c) == ord('[')) {
char *tmp, *p;
if (!arraysub(&tmp))
yyerror("missing ]");
*wp++ = c;
- for (p = tmp; *p; ) {
+ p = tmp;
+ while (*p) {
Xcheck(*wsp, wp);
*wp++ = *p++;
}
next:
break;
case PS_NUMBER:
- if (!ksh_isdigit(c))
+ if (!ctype(c, C_DIGIT))
goto out;
break;
case PS_VAR1:
c = getsc();
Xcheck(ws, wp);
*wp++ = c;
- if (c == '[')
+ if (ord(c) == ord('['))
depth++;
- else if (c == ']')
+ else if (ord(c) == ord(']'))
depth--;
} while (depth > 0 && c && c != '\n');
{
int c;
- if ((unsigned char)(c = o_getsc_u()) != 0xEF) {
+ if (rtt2asc((c = o_getsc_u())) != 0xEF) {
ungetsc_i(c);
return;
}
- if ((unsigned char)(c = o_getsc_u()) != 0xBB) {
+ if (rtt2asc((c = o_getsc_u())) != 0xBB) {
ungetsc_i(c);
- ungetsc_i(0xEF);
+ ungetsc_i(asc2rtt(0xEF));
return;
}
- if ((unsigned char)(c = o_getsc_u()) != 0xBF) {
+ if (rtt2asc((c = o_getsc_u())) != 0xBF) {
ungetsc_i(c);
- ungetsc_i(0xBB);
- ungetsc_i(0xEF);
+ ungetsc_i(asc2rtt(0xBB));
+ ungetsc_i(asc2rtt(0xEF));
return;
}
UTFMODE |= 8;