1 /* $OpenBSD: expr.c,v 1.24 2014/12/08 14:26:31 otto Exp $ */
4 * Copyright (c) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010,
5 * 2011, 2012, 2013, 2014, 2016
6 * mirabilos <m@mirbsd.org>
8 * Provided that these terms and disclaimer and all copyright notices
9 * are retained or reproduced in an accompanying document, permission
10 * is granted to deal in this work without restriction, including un-
11 * limited rights to use, publicly perform, distribute, sell, modify,
12 * merge, give away, or sublicence.
14 * This work is provided "AS IS" and WITHOUT WARRANTY of any kind, to
15 * the utmost extent permitted by applicable law, neither express nor
16 * implied; without malicious intent or gross negligence. In no event
17 * may a licensor, author or contributor be held liable for indirect,
18 * direct, other damage, loss, or other issues arising in any way out
19 * of dealing in the work, even if advised of the possibility of such
20 * damage or existence of a defect, except proven that it results out
21 * of said person's immediate fault when using the work as intended.
26 __RCSID("$MirOS: src/bin/mksh/expr.c,v 1.90 2016/11/07 16:58:48 tg Exp $");
31 /* precisions; used to be enum prec but we do arithmetics on it */
32 #define P_PRIMARY 0 /* VAR, LIT, (), ! ~ ++ -- */
33 #define P_MULT 1 /* * / % */
34 #define P_ADD 2 /* + - */
35 #define P_SHIFT 3 /* ^< ^> << >> */
36 #define P_RELATION 4 /* < <= > >= */
37 #define P_EQUALITY 5 /* == != */
38 #define P_BAND 6 /* & */
39 #define P_BXOR 7 /* ^ */
40 #define P_BOR 8 /* | */
41 #define P_LAND 9 /* && */
42 #define P_LOR 10 /* || */
43 #define P_TERN 11 /* ?: */
44 /* = += -= *= /= %= ^<= ^>= <<= >>= &= ^= |= */
46 #define P_COMMA 13 /* , */
47 #define MAX_PREC P_COMMA
54 static const char opname[][4] = {
59 static const uint8_t oplen[] = {
64 static const uint8_t opprec[] = {
69 typedef struct expr_state {
70 /* expression being evaluated */
71 const char *expression;
72 /* lexical position */
74 /* value from token() */
76 /* variable that is being recursively expanded (EXPRINEVAL flag set) */
78 /* token from token() */
80 /* don't do assignments (for ?:, &&, ||) */
82 /* evaluating an $(()) expression? */
84 /* unsigned arithmetic calculation */
89 ET_UNEXPECTED, ET_BADLIT, ET_RECURSIVE,
90 ET_LVALUE, ET_RDONLY, ET_STR
93 static void evalerr(Expr_state *, enum error_type, const char *)
95 static struct tbl *evalexpr(Expr_state *, unsigned int);
96 static void exprtoken(Expr_state *);
97 static struct tbl *do_ppmm(Expr_state *, enum token, struct tbl *, bool);
98 static void assign_check(Expr_state *, enum token, struct tbl *);
99 static struct tbl *intvar(Expr_state *, struct tbl *);
102 * parse and evaluate expression
105 evaluate(const char *expr, mksh_ari_t *rval, int error_ok, bool arith)
110 v.flag = DEFINED | INTEGER;
112 ret = v_evaluate(&v, expr, error_ok, arith);
118 * parse and evaluate expression, storing result in vp.
121 v_evaluate(struct tbl *vp, const char *expr, volatile int error_ok,
126 Expr_state * const es = &curstate;
129 /* save state to allow recursive calls */
130 memset(&curstate, 0, sizeof(curstate));
131 curstate.expression = curstate.tokp = expr;
133 curstate.arith = arith;
136 if ((i = kshsetjmp(e->jbuf))) {
137 /* Clear EXPRINEVAL in of any variables we were playing with */
138 if (curstate.evaling)
139 curstate.evaling->flag &= ~EXPRINEVAL;
142 if (error_ok == KSH_RETURN_ERROR)
151 if (es->tok == END) {
153 es->val = tempvar("");
155 v = intvar(es, evalexpr(es, MAX_PREC));
158 evalerr(es, ET_UNEXPECTED, NULL);
160 if (es->arith && es->natural)
162 if (vp->flag & INTEGER)
163 setint_v(vp, v, es->arith);
165 /* can fail if readonly */
166 setstr(vp, str_val(v), error_ok);
174 evalerr(Expr_state *es, enum error_type type, const char *str)
187 s = str_val(es->val);
190 s = "end of expression";
198 s = opname[(int)es->tok];
200 warningf(true, Tf_sD_s_qs, es->expression,
205 warningf(true, Tf_sD_s_qs, es->expression,
210 warningf(true, Tf_sD_s_qs, es->expression,
211 "expression recurses on parameter", str);
215 warningf(true, Tf_sD_s_s,
216 es->expression, str, "requires lvalue");
220 warningf(true, Tf_sD_s_s,
221 es->expression, str, "applied to read-only variable");
224 default: /* keep gcc happy */
226 warningf(true, Tf_sD_s, es->expression, str);
232 /* do a ++ or -- operation */
234 do_ppmm(Expr_state *es, enum token op, struct tbl *vasn, bool is_prefix)
239 assign_check(es, op, vasn);
241 vl = intvar(es, vasn);
243 if (op == O_PLUSPLUS)
248 if (vasn->flag & INTEGER)
249 setint_v(vasn, vl, es->arith);
251 setint(vasn, vl->val.i);
254 /* undo the increment/decrement */
261 evalexpr(Expr_state *es, unsigned int prec)
263 struct tbl *vl, *vr = NULL, *vasn;
265 mksh_uari_t res = 0, t1, t2, t3;
267 if (prec == P_PRIMARY) {
268 switch ((int)(op = es->tok)) {
274 vl = intvar(es, evalexpr(es, P_PRIMARY));
277 vl->val.u = ~vl->val.u;
280 vl->val.u = !vl->val.u;
283 vl->val.u = -vl->val.u;
293 vl = evalexpr(es, MAX_PREC);
294 if (es->tok != CLOSE_PAREN)
295 evalerr(es, ET_STR, "missing )");
302 vl = do_ppmm(es, op, es->val, true);
313 evalerr(es, ET_UNEXPECTED, NULL);
317 if (es->tok == O_PLUSPLUS || es->tok == O_MINUSMINUS) {
318 vl = do_ppmm(es, es->tok, vl, false);
323 /* prec == P_PRIMARY */
326 vl = evalexpr(es, prec - 1);
327 while ((int)(op = es->tok) >= (int)O_EQ && (int)op <= (int)O_COMMA &&
328 opprec[(int)op] == prec) {
340 /* vl may not have a value yet */
342 if (IS_ASSIGNOP(op)) {
344 assign_check(es, op, vasn);
345 vr = intvar(es, evalexpr(es, P_ASSIGN));
346 } else if (op == O_TERN) {
347 bool ev = vl->val.u != 0;
352 vl = evalexpr(es, MAX_PREC);
355 if (es->tok != CTERN)
356 evalerr(es, ET_STR, "missing :");
360 vr = evalexpr(es, P_TERN);
365 } else if (op != O_LAND && op != O_LOR)
366 vr = intvar(es, evalexpr(es, prec - 1));
368 /* common ops setup */
374 if (vr->val.u == 0) {
376 evalerr(es, ET_STR, "zero divisor");
379 /* calculate the absolute values */
380 t1 = vl->val.i < 0 ? -vl->val.u : vl->val.u;
381 t2 = vr->val.i < 0 ? -vr->val.u : vr->val.u;
383 #ifndef MKSH_LEGACY_MODE
407 #define cmpop(op) (es->natural ? \
408 (mksh_uari_t)(vl->val.u op vr->val.u) : \
409 (mksh_uari_t)(vl->val.i op vr->val.i) \
421 res = vl->val.u % vr->val.u;
424 goto signed_division;
428 res = vl->val.u / vr->val.u;
433 * a / b = abs(a) / abs(b) * sgn((u)a^(u)b)
436 #ifndef MKSH_LEGACY_MODE
437 res = ((vl->val.u ^ vr->val.u) & 0x80000000) ? -t3 : t3;
439 res = ((t1 == vl->val.u ? 0 : 1) ^
440 (t2 == vr->val.u ? 0 : 1)) ? -t3 : t3;
442 if (op == O_MOD || op == O_MODASN) {
444 * primitive modulo, to get the sign of
445 * the result correct:
446 * (a % b) = a - ((a / b) * b)
447 * the subtraction and multiplication
448 * are, amazingly enough, sign ignorant
450 res = vl->val.u - (res * vr->val.u);
461 #ifndef MKSH_LEGACY_MODE
464 res = (t1 << t2) | (t1 >> (32 - t2));
468 res = (t1 >> t2) | (t1 << (32 - t2));
477 res = es->natural || vl->val.i >= 0 ?
515 vr = intvar(es, evalexpr(es, prec - 1));
516 res = t1 && vr->val.u;
524 vr = intvar(es, evalexpr(es, prec - 1));
525 res = t1 || vr->val.u;
537 if (IS_ASSIGNOP(op)) {
540 if (vasn->flag & INTEGER)
541 setint_v(vasn, vr, es->arith);
543 setint(vasn, vr->val.i);
553 exprtoken(Expr_state *es)
555 const char *cp = es->tokp;
559 /* skip whitespace */
561 while ((c = *cp), ksh_isspace(c))
563 if (es->tokp == es->expression && c == '#') {
564 /* expression begins with # */
565 /* switch to unsigned */
574 else if (ksh_isalphx(c)) {
575 for (; ksh_isalnux(c); c = *cp)
580 len = array_ref_len(cp);
582 evalerr(es, ET_STR, "missing ]");
586 es->val = tempvar("");
587 es->val->flag |= EXPRLVALUE;
589 strndupx(tvar, es->tokp, cp - es->tokp, ATEMP);
590 es->val = global(tvar);
594 } else if (c == '1' && cp[1] == '#') {
597 cp += utf_ptradj(cp);
598 strndupx(tvar, es->tokp, cp - es->tokp, ATEMP);
601 } else if (c == '\'') {
604 evalerr(es, ET_UNEXPECTED, NULL);
606 cp += utf_ptradj(cp);
609 "multi-character character constant");
610 /* 'x' -> 1#x (x = one multibyte character) */
612 tvar = alloc(c + /* NUL */ 1, ATEMP);
615 memcpy(tvar + 2, es->tokp + 1, c - 2);
619 } else if (ksh_isdigit(c)) {
620 while (c != '_' && (ksh_isalnux(c) || c == '#'))
622 strndupx(tvar, es->tokp, --cp - es->tokp, ATEMP);
624 es->val = tempvar("");
625 es->val->flag &= ~INTEGER;
627 es->val->val.s = tvar;
628 if (setint_v(es->val, es->val, es->arith) == NULL)
629 evalerr(es, ET_BADLIT, tvar);
635 for (i = 0; (n0 = opname[i][0]); i++)
636 if (c == n0 && strncmp(cp, opname[i],
637 (size_t)oplen[i]) == 0) {
638 es->tok = (enum token)i;
649 assign_check(Expr_state *es, enum token op, struct tbl *vasn)
651 if (es->tok == END || !vasn ||
652 (vasn->name[0] == '\0' && !(vasn->flag & EXPRLVALUE)))
653 evalerr(es, ET_LVALUE, opname[(int)op]);
654 else if (vasn->flag & RDONLY)
655 evalerr(es, ET_RDONLY, opname[(int)op]);
659 tempvar(const char *vname)
664 vsize = strlen(vname) + 1;
665 vp = alloc(offsetof(struct tbl, name[0]) + vsize, ATEMP);
666 memcpy(vp->name, vname, vsize);
667 vp->flag = ISSET|INTEGER;
675 /* cast (string) variable to temporary integer variable */
677 intvar(Expr_state *es, struct tbl *vp)
681 /* try to avoid replacing a temp var with another temp var */
682 if (vp->name[0] == '\0' &&
683 (vp->flag & (ISSET|INTEGER|EXPRLVALUE)) == (ISSET|INTEGER))
687 if (setint_v(vq, vp, es->arith) == NULL) {
688 if (vp->flag & EXPRINEVAL)
689 evalerr(es, ET_RECURSIVE, vp->name);
691 vp->flag |= EXPRINEVAL;
692 v_evaluate(vq, str_val(vp), KSH_UNWIND_ERROR, es->arith);
693 vp->flag &= ~EXPRINEVAL;
701 * UTF-8 support code: high-level functions
705 utf_widthadj(const char *src, const char **dst)
711 if (!UTFMODE || (len = utf_mbtowc(&wc, src)) == (size_t)-1 ||
714 else if ((width = utf_wcwidth(wc)) < 0)
715 /* XXX use 2 for x_zotc3 here? */
724 utf_mbswidth(const char *s)
726 size_t len, width = 0;
734 if (((len = utf_mbtowc(&wc, s)) == (size_t)-1) ||
735 ((cw = utf_wcwidth(wc)) == -1)) {
746 utf_skipcols(const char *p, int cols, int *colp)
753 /* end of input; special handling for edit.c */
755 return (p + cols - c);
759 c += utf_widthadj(p, &p);
762 while (utf_widthadj(p, &q) == 0)
770 utf_ptradj(const char *src)
775 *(const unsigned char *)(src) < 0xC2 ||
776 (n = utf_mbtowc(NULL, src)) == (size_t)-1)
782 * UTF-8 support code: low-level functions
785 /* CESU-8 multibyte and wide character conversion crafted for mksh */
788 utf_mbtowc(unsigned int *dst, const char *src)
790 const unsigned char *s = (const unsigned char *)src;
793 if ((wc = *s++) < 0x80) {
797 return (wc ? ((const char *)s - src) : 0);
799 if (wc < 0xC2 || wc >= 0xF0)
800 /* < 0xC0: spurious second byte */
801 /* < 0xC2: non-minimalistic mapping error in 2-byte seqs */
802 /* > 0xEF: beyond BMP */
806 wc = (wc & 0x1F) << 6;
807 if (((c = *s++) & 0xC0) != 0x80)
813 wc = (wc & 0x0F) << 12;
815 if (((c = *s++) & 0xC0) != 0x80)
817 wc |= (c & 0x3F) << 6;
819 if (((c = *s++) & 0xC0) != 0x80)
823 /* Check for non-minimalistic mapping error in 3-byte seqs */
824 if (wc >= 0x0800 && wc <= 0xFFFD)
827 return ((size_t)(-1));
831 utf_wctomb(char *dst, unsigned int wc)
840 d = (unsigned char *)dst;
842 *d++ = (wc >> 6) | 0xC0;
844 *d++ = ((wc = wc > 0xFFFD ? 0xFFFD : wc) >> 12) | 0xE0;
845 *d++ = ((wc >> 6) & 0x3F) | 0x80;
847 *d++ = (wc & 0x3F) | 0x80;
848 return ((char *)d - dst);
852 * Wrapper around access(2) because it says root can execute everything
853 * on some operating systems. Does not set errno, no user needs it. Use
854 * this iff mode can have the X_OK bit set, access otherwise.
857 ksh_access(const char *fn, int mode)
862 if ((rv = access(fn, mode)) == 0 && kshuid == 0 && (mode & X_OK) &&
863 (rv = stat(fn, &sb)) == 0 && !S_ISDIR(sb.st_mode) &&
864 (sb.st_mode & (S_IXUSR|S_IXGRP|S_IXOTH)) == 0)
870 #ifndef MIRBSD_BOOTFLOPPY
871 /* From: X11/xc/programs/xterm/wcwidth.c,v 1.9 */
878 static int mb_ucsbsearch(const struct mb_ucsrange arr[], size_t elems,
879 unsigned int val) MKSH_A_PURE;
882 * Generated from the Unicode Character Database, Version 9.0.0, by
883 * MirOS: contrib/code/Snippets/eawparse,v 1.3 2014/11/16 12:16:24 tg Exp $
886 static const struct mb_ucsrange mb_ucs_combining[] = {
1098 static const struct mb_ucsrange mb_ucs_fullwidth[] = {
1145 /* simple binary search in ranges, with bounds optimisation */
1147 mb_ucsbsearch(const struct mb_ucsrange arr[], size_t elems, unsigned int val)
1149 size_t min = 0, mid, max = elems;
1151 if (val < arr[min].beg || val > arr[max - 1].end)
1155 mid = (min + max) / 2;
1157 if (val < arr[mid].beg)
1159 else if (val > arr[mid].end)
1167 /* Unix column width of a wide character (Unicode code point, really) */
1169 utf_wcwidth(unsigned int wc)
1171 /* except NUL, C0/C1 control characters and DEL yield -1 */
1172 if (wc < 0x20 || (wc >= 0x7F && wc < 0xA0))
1173 return (wc ? -1 : 0);
1175 /* combining characters use 0 screen columns */
1176 if (mb_ucsbsearch(mb_ucs_combining, NELEM(mb_ucs_combining), wc))
1179 /* all others use 1 or 2 screen columns */
1180 if (mb_ucsbsearch(mb_ucs_fullwidth, NELEM(mb_ucs_fullwidth), wc))