Upgrade to mksh R56b.

author Elliott Hughes <enh@google.com>

Fri, 22 Sep 2017 23:04:20 +0000 (16:04 -0700)

committer Elliott Hughes <enh@google.com>

Fri, 22 Sep 2017 23:21:10 +0000 (16:21 -0700)
author Elliott Hughes <enh@google.com>
Fri, 22 Sep 2017 23:04:20 +0000 (16:04 -0700)
committer Elliott Hughes <enh@google.com>
Fri, 22 Sep 2017 23:21:10 +0000 (16:21 -0700)
diff --git a/Android.mk b/Android.mk

index 829fc7b..5a53d9f 100644 (file)
--- a/Android.mk
+++ b/Android.mk
@@ -32,48 +32,89 @@ MKSH_SRC_FILES := \
  
  MKSH_INCLUDES := $(LOCAL_PATH)/src
  
+# Compiler flags...
  MKSH_CFLAGS += \
      -Wno-deprecated-declarations \
      -fno-asynchronous-unwind-tables \
      -fno-strict-aliasing \
      -fstack-protector -fwrapv \
  
-# ...and CPPFLAGS.
+# ...various options we choose...
  MKSH_CFLAGS += \
-    -DDEBUG_LEAKS -DMKSH_ASSUME_UTF8 \
+    -DDEBUG_LEAKS \
+    -DMKSH_ASSUME_UTF8 \
      -DMKSH_DONT_EMIT_IDSTRING \
+    -DKSH_VERSIONNAME_VENDOR_EXT=\"\ Android\" \
+
+# ...and the defines from Build.sh.
+MKSH_CFLAGS += \
      -DMKSH_BUILDSH \
-    -D_GNU_SOURCE -DSETUID_CAN_FAIL_WITH_EAGAIN \
-    -DHAVE_ATTRIBUTE_BOUNDED=0 -DHAVE_ATTRIBUTE_FORMAT=1 \
+    -D_GNU_SOURCE \
+    -DSETUID_CAN_FAIL_WITH_EAGAIN \
+    -DHAVE_STRING_POOLING=1 \
+    -DHAVE_ATTRIBUTE_BOUNDED=1 \
+    -DHAVE_ATTRIBUTE_FORMAT=1 \
      -DHAVE_ATTRIBUTE_NORETURN=1 \
      -DHAVE_ATTRIBUTE_PURE=1 \
      -DHAVE_ATTRIBUTE_UNUSED=1 \
-    -DHAVE_ATTRIBUTE_USED=1 -DHAVE_SYS_TIME_H=1 -DHAVE_TIME_H=1 \
-    -DHAVE_BOTH_TIME_H=1 -DHAVE_SYS_BSDTYPES_H=0 \
-    -DHAVE_SYS_FILE_H=1 -DHAVE_SYS_MKDEV_H=0 -DHAVE_SYS_MMAN_H=1 \
-    -DHAVE_SYS_PARAM_H=1 -DHAVE_SYS_RESOURCE_H=1 \
-    -DHAVE_SYS_SELECT_H=1 -DHAVE_SYS_SYSMACROS_H=1 \
-    -DHAVE_BSTRING_H=0 -DHAVE_GRP_H=1 -DHAVE_IO_H=0 -DHAVE_LIBGEN_H=1 \
-    -DHAVE_LIBUTIL_H=0 -DHAVE_PATHS_H=1 -DHAVE_STDINT_H=1 \
-    -DHAVE_STRINGS_H=1 -DHAVE_TERMIOS_H=1 -DHAVE_ULIMIT_H=0 \
-    -DHAVE_VALUES_H=0 -DHAVE_CAN_INTTYPES=1 -DHAVE_CAN_UCBINTS=1 \
-    -DHAVE_CAN_INT8TYPE=1 -DHAVE_CAN_UCBINT8=1 -DHAVE_RLIM_T=1 \
+    -DHAVE_ATTRIBUTE_USED=1 \
+    -DHAVE_SYS_TIME_H=1 \
+    -DHAVE_TIME_H=1 \
+    -DHAVE_BOTH_TIME_H=1 \
+    -DHAVE_SYS_BSDTYPES_H=0 \
+    -DHAVE_SYS_FILE_H=1 \
+    -DHAVE_SYS_MKDEV_H=0 \
+    -DHAVE_SYS_MMAN_H=1 \
+    -DHAVE_SYS_PARAM_H=1 \
+    -DHAVE_SYS_RESOURCE_H=1 \
+    -DHAVE_SYS_SELECT_H=1 \
+    -DHAVE_SYS_SYSMACROS_H=1 \
+    -DHAVE_BSTRING_H=0 \
+    -DHAVE_GRP_H=1 \
+    -DHAVE_IO_H=0 \
+    -DHAVE_LIBGEN_H=1 \
+    -DHAVE_LIBUTIL_H=0 \
+    -DHAVE_PATHS_H=1 \
+    -DHAVE_STDINT_H=1 \
+    -DHAVE_STRINGS_H=1 \
+    -DHAVE_TERMIOS_H=1 \
+    -DHAVE_ULIMIT_H=0 \
+    -DHAVE_VALUES_H=0 \
+    -DHAVE_CAN_INTTYPES=1 \
+    -DHAVE_CAN_UCBINTS=1 \
+    -DHAVE_CAN_INT8TYPE=1 \
+    -DHAVE_CAN_UCBINT8=1 \
+    -DHAVE_RLIM_T=1 \
      -DHAVE_SIG_T=1 \
-    -DHAVE_STRING_POOLING=1 \
-    -DHAVE_SYS_ERRLIST=0 -DHAVE_SYS_SIGNAME=1 \
-    -DHAVE_SYS_SIGLIST=1 -DHAVE_FLOCK=1 -DHAVE_LOCK_FCNTL=1 \
+    -DHAVE_SYS_ERRLIST=0 \
+    -DHAVE_SYS_SIGNAME=1 \
+    -DHAVE_SYS_SIGLIST=1 \
+    -DHAVE_FLOCK=1 \
+    -DHAVE_LOCK_FCNTL=1 \
      -DHAVE_GETRUSAGE=1 \
      -DHAVE_GETSID=1 \
      -DHAVE_GETTIMEOFDAY=1 \
-    -DHAVE_ISSETUGID=0 \
      -DHAVE_KILLPG=1 \
-    -DHAVE_MEMMOVE=1 -DHAVE_MKNOD=0 -DHAVE_MMAP=1 -DHAVE_NICE=1 \
-    -DHAVE_REVOKE=0 -DHAVE_SETLOCALE_CTYPE=0 \
-    -DHAVE_LANGINFO_CODESET=0 -DHAVE_SELECT=1 -DHAVE_SETRESUGID=1 \
-    -DHAVE_SETGROUPS=1 -DHAVE_STRERROR=1 -DHAVE_STRSIGNAL=0 \
-    -DHAVE_STRLCPY=1 -DHAVE_FLOCK_DECL=1 -DHAVE_REVOKE_DECL=1 \
-    -DHAVE_SYS_ERRLIST_DECL=0 -DHAVE_SYS_SIGLIST_DECL=1 \
-    -DHAVE_PERSISTENT_HISTORY=0 -DMKSH_BUILD_R=551
+    -DHAVE_MEMMOVE=1 \
+    -DHAVE_MKNOD=0 \
+    -DHAVE_MMAP=1 \
+    -DHAVE_FTRUNCATE=1 \
+    -DHAVE_NICE=1 \
+    -DHAVE_REVOKE=0 \
+    -DHAVE_SETLOCALE_CTYPE=1 \
+    -DHAVE_LANGINFO_CODESET=1 \
+    -DHAVE_SELECT=1 \
+    -DHAVE_SETRESUGID=1 \
+    -DHAVE_SETGROUPS=1 \
+    -DHAVE_STRERROR=1 \
+    -DHAVE_STRSIGNAL=0 \
+    -DHAVE_STRLCPY=1 \
+    -DHAVE_FLOCK_DECL=1 \
+    -DHAVE_REVOKE_DECL=1 \
+    -DHAVE_SYS_ERRLIST_DECL=0 \
+    -DHAVE_SYS_SIGLIST_DECL=1 \
+    -DHAVE_PERSISTENT_HISTORY=0 \
+    -DMKSH_BUILD_R=562 \
  
  LOCAL_SRC_FILES := $(MKSH_SRC_FILES)
  
diff --git a/Android.patch.txt b/Android.patch.txt

new file mode 100644 (file)

index 0000000..c3cf892
--- /dev/null
+++ b/Android.patch.txt
@@ -0,0 +1,48 @@
+--- mksh-R56b/funcs.c  2017-05-05 15:53:55.000000000 -0700
++++ src/funcs.c        2017-09-22 16:19:44.327000462 -0700
+@@ -103,7 +103,9 @@
+       {Tsgbreak, c_brkcont},
+       {T__builtin, c_builtin},
+       {Tbuiltin, c_builtin},
++#if !defined(__ANDROID__)
+       {Tbcat, c_cat},
++#endif
+       {Tcd, c_cd},
+       /* dash compatibility hack */
+       {"chdir", c_cd},
+@@ -126,7 +128,9 @@
+       {"pwd", c_pwd},
+       {Tread, c_read},
+       {Tdsgreadonly, c_typeset},
++#if !defined(__ANDROID__)
+       {"!realpath", c_realpath},
++#endif
+       {"~rename", c_rename},
+       {"*=return", c_exitreturn},
+       {Tsgset, c_set},
+@@ -160,8 +164,10 @@
+       {"~printf", c_printf},
+ #endif
+ #if HAVE_SELECT
++#if !defined(__ANDROID__)
+       {"sleep", c_sleep},
+ #endif
++#endif
+ #ifdef __MirBSD__
+       /* alias to "true" for historical reasons */
+       {"domainname", c_true},
+--- mksh-R56b/main.c   2017-04-28 04:14:14.000000000 -0700
++++ src/main.c 2017-09-22 15:58:14.134149037 -0700
+@@ -410,6 +410,12 @@
+               }
+       }
+ 
++      /* override default PATH regardless of environment */
++#ifdef MKSH_DEFPATH_OVERRIDE
++      vp = global(TPATH);
++      setstr(vp, MKSH_DEFPATH_OVERRIDE, KSH_RETURN_ERROR);
++#endif
++
+       /* for security */
+       typeset(TinitIFS, 0, 0, 0, 0);
+ 
diff --git a/src/Build.sh b/src/Build.sh

index ca88a06..78fe347 100644 (file)
--- a/src/Build.sh
+++ b/src/Build.sh
@@ -1,5 +1,5 @@
  #!/bin/sh
-srcversion='$MirOS: src/bin/mksh/Build.sh,v 1.716 2017/04/12 18:33:22 tg Exp $'
+srcversion='$MirOS: src/bin/mksh/Build.sh,v 1.727 2017/08/29 13:38:28 tg Exp $'
  #-
  # Copyright (c) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010,
  #              2011, 2012, 2013, 2014, 2015, 2016, 2017
@@ -53,6 +53,16 @@ alll=qwertyuiopasdfghjklzxcvbnm
  alln=0123456789
  alls=______________________________________________________________
  
+case `echo a | tr '\201' X` in
+X)
+       # EBCDIC build system
+       lfcr='\n\r'
+       ;;
+*)
+       lfcr='\012\015'
+       ;;
+esac
+
  genopt_die() {
         if test -n "$1"; then
                 echo >&2 "E: $*"
@@ -425,7 +435,7 @@ ac_header() {
                 na=0
         fi
         hf=$1; shift
-       hv=`echo "$hf" | tr -d '\012\015' | tr -c $alll$allu$alln $alls`
+       hv=`echo "$hf" | tr -d "$lfcr" | tr -c $alll$allu$alln $alls`
         echo "/* NeXTstep bug workaround */" >x
         for i
         do
@@ -496,6 +506,7 @@ last=
  tfn=
  legacy=0
  textmode=0
+ebcdic=false
  
  for i
  do
@@ -519,6 +530,9 @@ do
         :-c)
                 last=c
                 ;;
+       :-E)
+               ebcdic=true
+               ;;
         :-G)
                 echo "$me: Do not call me with '-G'!" >&2
                 exit 1
@@ -603,6 +617,10 @@ else
         add_cppflags -DMKSH_LEGACY_MODE
  fi
  
+if $ebcdic; then
+       add_cppflags -DMKSH_EBCDIC
+fi
+
  if test $textmode = 0; then
         check_categories="$check_categories shell:textmode-no shell:binmode-yes"
  else
@@ -765,7 +783,9 @@ GNU/kFreeBSD)
         add_cppflags -DSETUID_CAN_FAIL_WITH_EAGAIN
         ;;
  Haiku)
-       add_cppflags -DMKSH_ASSUME_UTF8; HAVE_ISSET_MKSH_ASSUME_UTF8=1
+       add_cppflags -DMKSH_ASSUME_UTF8
+       HAVE_ISSET_MKSH_ASSUME_UTF8=1
+       HAVE_ISOFF_MKSH_ASSUME_UTF8=0
         ;;
  Harvey)
         add_cppflags -D_POSIX_SOURCE
@@ -773,11 +793,14 @@ Harvey)
         add_cppflags -D_BSD_EXTENSION
         add_cppflags -D_SUSV2_SOURCE
         add_cppflags -D_GNU_SOURCE
-       add_cppflags -DMKSH_ASSUME_UTF8; HAVE_ISSET_MKSH_ASSUME_UTF8=1
+       add_cppflags -DMKSH_ASSUME_UTF8
+       HAVE_ISSET_MKSH_ASSUME_UTF8=1
+       HAVE_ISOFF_MKSH_ASSUME_UTF8=0
         add_cppflags -DMKSH_NO_CMDLINE_EDITING
         add_cppflags -DMKSH__NO_SETEUGID
         oswarn=' and will currently not work'
         add_cppflags -DMKSH_UNEMPLOYED
+       add_cppflags -DMKSH_NOPROSPECTOFWORK
         # these taken from Harvey-OS github and need re-checking
         add_cppflags -D_setjmp=setjmp -D_longjmp=longjmp
         : "${HAVE_CAN_NO_EH_FRAME=0}"
@@ -826,7 +849,9 @@ Minix3)
  MirBSD)
         ;;
  MSYS_*)
-       add_cppflags -DMKSH_ASSUME_UTF8=0; HAVE_ISSET_MKSH_ASSUME_UTF8=1
+       add_cppflags -DMKSH_ASSUME_UTF8=0
+       HAVE_ISSET_MKSH_ASSUME_UTF8=1
+       HAVE_ISOFF_MKSH_ASSUME_UTF8=1
         # almost same as CYGWIN* (from RT|Chatzilla)
         : "${HAVE_SETLOCALE_CTYPE=0}"
         # broken on this OE (from ir0nh34d)
@@ -860,7 +885,9 @@ OpenBSD)
         : "${HAVE_SETLOCALE_CTYPE=0}"
         ;;
  OS/2)
-       add_cppflags -DMKSH_ASSUME_UTF8=0; HAVE_ISSET_MKSH_ASSUME_UTF8=1
+       add_cppflags -DMKSH_ASSUME_UTF8=0
+       HAVE_ISSET_MKSH_ASSUME_UTF8=1
+       HAVE_ISOFF_MKSH_ASSUME_UTF8=1
         HAVE_TERMIOS_H=0
         HAVE_MKNOD=0    # setmode() incompatible
         oswarn="; it is being ported"
@@ -894,6 +921,16 @@ the mksh-os2 porter.
  ] incompatibilities with $y.
  "
         ;;
+OS/390)
+       add_cppflags -DMKSH_ASSUME_UTF8=0
+       HAVE_ISSET_MKSH_ASSUME_UTF8=1
+       HAVE_ISOFF_MKSH_ASSUME_UTF8=1
+       : "${CC=xlc}"
+       : "${SIZE=: size}"
+       add_cppflags -DMKSH_FOR_Z_OS
+       add_cppflags -D_ALL_SOURCE
+       oswarn='; EBCDIC support is incomplete'
+       ;;
  OSF1)
         HAVE_SIG_T=0    # incompatible
         add_cppflags -D_OSF_SOURCE
@@ -907,7 +944,9 @@ Plan9)
         add_cppflags -D_LIMITS_EXTENSION
         add_cppflags -D_BSD_EXTENSION
         add_cppflags -D_SUSV2_SOURCE
-       add_cppflags -DMKSH_ASSUME_UTF8; HAVE_ISSET_MKSH_ASSUME_UTF8=1
+       add_cppflags -DMKSH_ASSUME_UTF8
+       HAVE_ISSET_MKSH_ASSUME_UTF8=1
+       HAVE_ISOFF_MKSH_ASSUME_UTF8=0
         add_cppflags -DMKSH_NO_CMDLINE_EDITING
         add_cppflags -DMKSH__NO_SETEUGID
         oswarn=' and will currently not work'
@@ -1047,7 +1086,7 @@ $e $bi$me: Scanning for functions... please ignore any errors.$ao
  # - LLVM+clang defines __GNUC__ too
  # - nwcc defines __GNUC__ too
  CPP="$CC -E"
-$e ... which compiler seems to be used
+$e ... which compiler type seems to be used
  cat >conftest.c <<'EOF'
  const char *
  #if defined(__ICC) || defined(__INTEL_COMPILER)
@@ -1297,7 +1336,7 @@ unknown)
         # huh?
         ;;
  esac
-$e "$bi==> which compiler seems to be used...$ao $ui$ct$etd$ao"
+$e "$bi==> which compiler type seems to be used...$ao $ui$ct$etd$ao"
  rmf conftest.c conftest.o conftest a.out* a.exe* conftest.exe* vv.out
  
  #
@@ -1392,8 +1431,16 @@ watcom)
         DOWARN=-Wc,-we
         ;;
  xlc)
-       save_NOWARN=-qflag=i:e
-       DOWARN=-qflag=i:i
+       case $TARGET_OS in
+       OS/390)
+               save_NOWARN=-qflag=e
+               DOWARN=-qflag=i
+               ;;
+       *)
+               save_NOWARN=-qflag=i:e
+               DOWARN=-qflag=i:i
+               ;;
+       esac
         ;;
  *)
         test x"$save_NOWARN" = x"" && save_NOWARN=-Wno-error
@@ -1563,10 +1610,24 @@ tendra)
         ac_flags 1 extansi -Xa
         ;;
  xlc)
-       ac_flags 1 rodata "-qro -qroconst -qroptr"
-       ac_flags 1 rtcheck -qcheck=all
-       #ac_flags 1 rtchkc -qextchk     # reported broken
-       ac_flags 1 wformat "-qformat=all -qformat=nozln"
+       case $TARGET_OS in
+       OS/390)
+               # On IBM z/OS, the following are warnings by default:
+               # CCN3296: #include file <foo.h> not found.
+               # CCN3944: Attribute "__foo__" is not supported and is ignored.
+               # CCN3963: The attribute "foo" is not a valid variable attribute and is ignored.
+               ac_flags 1 halton '-qhaltonmsg=CCN3296 -qhaltonmsg=CCN3944 -qhaltonmsg=CCN3963'
+               # CCN3290: Unknown macro name FOO on #undef directive.
+               # CCN4108: The use of keyword '__attribute__' is non-portable.
+               ac_flags 1 supprss '-qsuppress=CCN3290 -qsuppress=CCN4108'
+               ;;
+       *)
+               ac_flags 1 rodata '-qro -qroconst -qroptr'
+               ac_flags 1 rtcheck -qcheck=all
+               #ac_flags 1 rtchkc -qextchk     # reported broken
+               ac_flags 1 wformat '-qformat=all -qformat=nozln'
+               ;;
+       esac
         #ac_flags 1 wp64 -qwarn64       # too verbose for now
         ;;
  esac
@@ -1705,6 +1766,10 @@ ac_ifcpp 'ifdef MKSH_NOPROSPECTOFWORK' isset_MKSH_NOPROSPECTOFWORK '' \
      check_categories="$check_categories arge nojsig"
  ac_ifcpp 'ifdef MKSH_ASSUME_UTF8' isset_MKSH_ASSUME_UTF8 '' \
      'if the default UTF-8 mode is specified' && : "${HAVE_SETLOCALE_CTYPE=0}"
+ac_ifcpp 'if !MKSH_ASSUME_UTF8' isoff_MKSH_ASSUME_UTF8 \
+    isset_MKSH_ASSUME_UTF8 0 \
+    'if the default UTF-8 mode is disabled' && \
+    check_categories="$check_categories noutf8"
  #ac_ifcpp 'ifdef MKSH_DISABLE_DEPRECATED' isset_MKSH_DISABLE_DEPRECATED '' \
  #    "if deprecated features are to be omitted" && \
  #    check_categories="$check_categories nodeprecated"
@@ -2025,6 +2090,11 @@ ac_test mmap lock_fcntl 0 'for mmap and munmap' <<-'EOF'
             munmap(NULL, 0)); }
  EOF
  
+ac_test ftruncate mmap 0 'for ftruncate' <<-'EOF'
+       #include <unistd.h>
+       int main(void) { return (ftruncate(0, 0)); }
+EOF
+
  ac_test nice <<-'EOF'
         #include <unistd.h>
         int main(void) { return (nice(4)); }
@@ -2179,8 +2249,8 @@ EOF
  # other checks
  #
  fd='if to use persistent history'
-ac_cache PERSISTENT_HISTORY || case $HAVE_MMAP$HAVE_FLOCK$HAVE_LOCK_FCNTL in
-11*|101) fv=1 ;;
+ac_cache PERSISTENT_HISTORY || case $HAVE_FTRUNCATE$HAVE_MMAP$HAVE_FLOCK$HAVE_LOCK_FCNTL in
+111*|1101) fv=1 ;;
  esac
  test 1 = $fv || check_categories="$check_categories no-histfile"
  ac_testdone
@@ -2339,7 +2409,7 @@ addsrcs '!' HAVE_STRLCPY strlcpy.c
  addsrcs USE_PRINTF_BUILTIN printf.c
  test 1 = "$USE_PRINTF_BUILTIN" && add_cppflags -DMKSH_PRINTF_BUILTIN
  test 1 = "$HAVE_CAN_VERB" && CFLAGS="$CFLAGS -verbose"
-add_cppflags -DMKSH_BUILD_R=551
+add_cppflags -DMKSH_BUILD_R=562
  
  $e $bi$me: Finished configuration testing, now producing output.$ao
  
@@ -2366,8 +2436,8 @@ cat >test.sh <<-EOF
         set -A check_categories -- $check_categories
         pflag='$curdir/$mkshexe'
         sflag='$srcdir/check.t'
-       usee=0 Pflag=0 Sflag=0 uset=0 vflag=1 xflag=0
-       while getopts "C:e:fPp:QSs:t:v" ch; do case \$ch {
+       usee=0 useU=0 Pflag=0 Sflag=0 uset=0 vflag=1 xflag=0
+       while getopts "C:e:fPp:QSs:t:U:v" ch; do case \$ch {
         (C)     check_categories[\${#check_categories[*]}]=\$OPTARG ;;
         (e)     usee=1; eflag=\$OPTARG ;;
         (f)     check_categories[\${#check_categories[*]}]=fastbox ;;
@@ -2380,6 +2450,7 @@ cat >test.sh <<-EOF
         (+S)    Sflag=0 ;;
         (s)     sflag=\$OPTARG ;;
         (t)     uset=1; tflag=\$OPTARG ;;
+       (U)     useU=1; Uflag=\$OPTARG ;;
         (v)     vflag=1 ;;
         (+v)    vflag=0 ;;
         (*)     xflag=1 ;;
@@ -2387,6 +2458,9 @@ cat >test.sh <<-EOF
         done
         shift \$((OPTIND - 1))
         set -A args -- '$srcdir/check.pl' -p "\$pflag"
+       if $ebcdic; then
+               args[\${#args[*]}]=-E
+       fi
         x=
         for y in "\${check_categories[@]}"; do
                 x=\$x,\$y
@@ -2404,6 +2478,10 @@ cat >test.sh <<-EOF
                 args[\${#args[*]}]=-t
                 args[\${#args[*]}]=\$tflag
         fi
+       if (( useU )); then
+               args[\${#args[*]}]=-U
+               args[\${#args[*]}]=\$Uflag
+       fi
         (( vflag )) && args[\${#args[*]}]=-v
         (( xflag )) && args[\${#args[*]}]=-x    # force usage by synerr
         if [[ -n \$TMPDIR && -d \$TMPDIR/. ]]; then
@@ -2647,7 +2725,7 @@ MKSH_A4PB                 force use of arc4random_pushb
  MKSH_ASSUME_UTF8               (0=disabled, 1=enabled; default: unset)
  MKSH_BINSHPOSIX                        if */sh or */-sh, enable set -o posix
  MKSH_BINSHREDUCED              if */sh or */-sh, enable set -o sh
-MKSH_CLS_STRING                        "\033[;H\033[J"
+MKSH_CLS_STRING                        KSH_ESC_STRING "[;H" KSH_ESC_STRING "[J"
  MKSH_DEFAULT_EXECSHELL         "/bin/sh" (do not change)
  MKSH_DEFAULT_PROFILEDIR                "/etc" (do not change)
  MKSH_DEFAULT_TMPDIR            "/tmp" (do not change)
diff --git a/src/check.pl b/src/check.pl

index a80d4e1..e9c2437 100644 (file)
--- a/src/check.pl
+++ b/src/check.pl
@@ -1,8 +1,8 @@
-# $MirOS: src/bin/mksh/check.pl,v 1.42 2015/11/29 17:05:00 tg Exp $
+# $MirOS: src/bin/mksh/check.pl,v 1.49 2017/05/05 21:17:31 tg Exp $
  # $OpenBSD: th,v 1.1 2013/12/02 20:39:44 millert Exp $
  #-
  # Copyright (c) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2011,
-#              2012, 2013, 2014, 2015
+#              2012, 2013, 2014, 2015, 2017
  #      mirabilos <m@mirbsd.org>
  #
  # Provided that these terms and disclaimer and all copyright notices
@@ -78,9 +78,9 @@
  #                                      the test harness).
  #                                      CYGWIN is set to nodosfilewarning.
  #                                      ENV is set to /nonexistant.
-#                                      PATHSEP is set to either : or ;.
  #                                      __progname is set to the -p argument.
  #                                      __perlname is set to $^X (perlexe).
+#                                      @utflocale@ is substituted from -U.
  #      file-setup              mps     Used to create files, directories
  #                                      and symlinks. First word is either
  #                                      file, dir or symlink; second word is
@@ -153,9 +153,15 @@
  #      p       tag takes parameters (used with m).
  #      s       tag can be used several times.
  
+# require Config only if it exists
  # pull EINTR from POSIX.pm or Errno.pm if they exist
  # otherwise just skip it
  BEGIN {
+       eval {
+               require Config;
+               import Config;
+               1;
+       };
         $EINTR = 0;
         eval {
                 require POSIX;
@@ -172,7 +178,6 @@ BEGIN {
  };
  
  use Getopt::Std;
-use Config;
  
  $os = defined $^O ? $^O : 'unknown';
  
@@ -180,7 +185,7 @@ $os = defined $^O ? $^O : 'unknown';
  
  $Usage = <<EOF ;
  Usage: $prog [-Pv] [-C cat] [-e e=v] [-p prog] [-s fn] [-T dir] \
-       [-t tmo] name ...
+       [-t tmo] [-U lcl] name ...
         -C c    Specify the comma separated list of categories the program
                 belongs to (see category field).
         -e e=v  Set the environment variable e to v for all tests
@@ -193,6 +198,7 @@ Usage: $prog [-Pv] [-C cat] [-e e=v] [-p prog] [-s fn] [-T dir] \
                 scaned for test files (which end in .t).
         -T dir  Use dir instead of /tmp to hold temporary files
         -t t    Use t as default time limit for tests (default is unlimited)
+       -U lcl  Use lcl as UTF-8 locale (e.g. C.UTF-8) instead of the default
         -v      Verbose mode: print reason test failed.
         name    specifies the name of the test(s) to run; if none are
                 specified, all tests are run.
@@ -241,7 +247,7 @@ $nxpassed = 0;
  
  %known_tests = ();
  
-if (!getopts('C:e:Pp:s:T:t:v')) {
+if (!getopts('C:Ee:Pp:s:T:t:U:v')) {
      print STDERR $Usage;
      exit 1;
  }
@@ -250,8 +256,10 @@ die "$prog: no program specified (use -p)\n" if !defined $opt_p;
  die "$prog: no test set specified (use -s)\n" if !defined $opt_s;
  $test_prog = $opt_p;
  $verbose = defined $opt_v && $opt_v;
+$is_ebcdic = defined $opt_E && $opt_E;
  $test_set = $opt_s;
  $temp_base = $opt_T || "/tmp";
+$utflocale = $opt_U || (($os eq "hpux") ? "en_US.utf8" : "en_US.UTF-8");
  if (defined $opt_t) {
      die "$prog: bad -t argument (should be number > 0): $opt_t\n"
         if $opt_t !~ /^\d+$/ || $opt_t <= 0;
@@ -259,6 +267,14 @@ if (defined $opt_t) {
  }
  $program_kludge = defined $opt_P ? $opt_P : 0;
  
+if ($is_ebcdic) {
+       $categories{'shell:ebcdic-yes'} = 1;
+       $categories{'shell:ascii-no'} = 1;
+} else {
+       $categories{'shell:ebcdic-no'} = 1;
+       $categories{'shell:ascii-yes'} = 1;
+}
+
  if (defined $opt_C) {
      foreach $c (split(',', $opt_C)) {
         $c =~ s/\s+//;
@@ -281,12 +297,24 @@ foreach $env (('HOME', 'LD_LIBRARY_PATH', 'LOCPATH', 'LOGNAME',
  }
  $new_env{'CYGWIN'} = 'nodosfilewarning';
  $new_env{'ENV'} = '/nonexistant';
-$new_env{'PATHSEP'} = $os eq 'os2' ? ';' : ':';
+
  if (($os eq 'VMS') || ($Config{perlpath} =~ m/$Config{_exe}$/i)) {
         $new_env{'__perlname'} = $Config{perlpath};
  } else {
         $new_env{'__perlname'} = $Config{perlpath} . $Config{_exe};
  }
+$new_env{'__perlname'} = $^X if ($new_env{'__perlname'} eq '') and -f $^X and -x $^X;
+if ($new_env{'__perlname'} eq '') {
+       foreach $pathelt (split /:/,$ENV{'PATH'}) {
+               chomp($pathelt = `pwd`) if $pathelt eq '';
+               my $x = $pathelt . '/' . $^X;
+               next unless -f $x and -x $x;
+               $new_env{'__perlname'} = $x;
+               last;
+       }
+}
+$new_env{'__perlname'} = $^X if ($new_env{'__perlname'} eq '');
+
  if (defined $opt_e) {
      # XXX need a way to allow many -e arguments...
      if ($opt_e =~ /^([a-zA-Z_]\w*)(|=(.*))$/) {
@@ -866,38 +894,50 @@ first_diff
             $char = 1;
         }
      }
-    return "first difference: line $lineno, char $char (wanted '"
-       . &format_char($ce) . "', got '"
-       . &format_char($cg) . "'";
+    return "first difference: line $lineno, char $char (wanted " .
+       &format_char($ce) . ", got " . &format_char($cg);
  }
  
  sub
  format_char
  {
-    local($ch, $s);
+    local($ch, $s, $q);
  
      $ch = ord($_[0]);
+    $q = "'";
+
+    if ($is_ebcdic) {
+       if ($ch == 0x15) {
+               return $q . '\n' . $q;
+       } elsif ($ch == 0x16) {
+               return $q . '\b' . $q;
+       } elsif ($ch == 0x05) {
+               return $q . '\t' . $q;
+       } elsif ($ch < 64 || $ch == 255) {
+               return sprintf("X'%02X'", $ch);
+       }
+       return sprintf("'%c' (X'%02X')", $ch, $ch);
+    }
+
+    $s = sprintf("0x%02X (", $ch);
      if ($ch == 10) {
-       return '\n';
+       return $s . $q . '\n' . $q . ')';
      } elsif ($ch == 13) {
-       return '\r';
+       return $s . $q . '\r' . $q . ')';
      } elsif ($ch == 8) {
-       return '\b';
+       return $s . $q . '\b' . $q . ')';
      } elsif ($ch == 9) {
-       return '\t';
+       return $s . $q . '\t' . $q . ')';
      } elsif ($ch > 127) {
-       $ch -= 127;
-       $s = "M-";
-    } else {
-       $s = '';
+       $ch -= 128;
+       $s .= "M-";
      }
      if ($ch < 32) {
-       $s .= '^';
-       $ch += ord('@');
+       return sprintf("%s^%c)", $s, $ch + ord('@'));
      } elsif ($ch == 127) {
-       return $s . "^?";
+       return $s . "^?)";
      }
-    return $s . sprintf("%c", $ch);
+    return sprintf("%s'%c')", $s, $ch);
  }
  
  sub
@@ -1159,6 +1199,8 @@ read_test
             print STDERR "$prog:$test{':long-name'}: env-setup field doesn't start and end with the same character\n";
             return undef;
         }
+
+       $test{'env-setup'} =~ s/\@utflocale\@/$utflocale/g;
      }
      if (defined $test{'expected-exit'}) {
         local($val) = $test{'expected-exit'};
diff --git a/src/check.t b/src/check.t

index 93c614f..4473c08 100644 (file)
--- a/src/check.t
+++ b/src/check.t
@@ -1,4 +1,4 @@
-# $MirOS: src/bin/mksh/check.t,v 1.775 2017/04/12 17:38:41 tg Exp $
+# $MirOS: src/bin/mksh/check.t,v 1.797 2017/08/29 13:38:29 tg Exp $
  # -*- mode: sh -*-
  #-
  # Copyright © 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010,
@@ -30,40 +30,62 @@
  # (2013/12/02 20:39:44) http://cvsweb.openbsd.org/cgi-bin/cvsweb/src/regress/bin/ksh/?sortby=date
  
  expected-stdout:
-       @(#)MIRBSD KSH R55 2017/04/12
+       @(#)MIRBSD KSH R56 2017/08/29
  description:
-       Check version of shell.
+       Check base version of full shell
  stdin:
-       echo $KSH_VERSION
+       echo ${KSH_VERSION%%' +'*}
  name: KSH_VERSION
-category: !shell:legacy-yes,!shell:textmode-yes
+category: !shell:legacy-yes
  ---
  expected-stdout:
-       @(#)LEGACY KSH R55 2017/04/12
+       @(#)LEGACY KSH R56 2017/08/29
  description:
-       Check version of legacy shell.
+       Check base version of legacy shell
  stdin:
-       echo $KSH_VERSION
+       echo ${KSH_VERSION%%' +'*}
  name: KSH_VERSION-legacy
-category: !shell:legacy-no,!shell:textmode-yes
+category: !shell:legacy-no
  ---
-expected-stdout:
-       @(#)MIRBSD KSH R55 2017/04/12 +TEXTMODE
+name: KSH_VERSION-ascii
  description:
-       Check version of shell.
+       Check that the shell version tag does not include EBCDIC
+category: !shell:ebcdic-yes
  stdin:
-       echo $KSH_VERSION
-name: KSH_VERSION-textmode
-category: !shell:legacy-yes,!shell:textmode-no
+       for x in $KSH_VERSION; do
+               [[ $x = '+EBCDIC' ]] && exit 1
+       done
+       exit 0
  ---
-expected-stdout:
-       @(#)LEGACY KSH R55 2017/04/12 +TEXTMODE
+name: KSH_VERSION-ebcdic
  description:
-       Check version of legacy shell.
+       Check that the shell version tag includes EBCDIC
+category: !shell:ebcdic-no
  stdin:
-       echo $KSH_VERSION
-name: KSH_VERSION-legacy-textmode
-category: !shell:legacy-no,!shell:textmode-no
+       for x in $KSH_VERSION; do
+               [[ $x = '+EBCDIC' ]] && exit 0
+       done
+       exit 1
+---
+name: KSH_VERSION-binmode
+description:
+       Check that the shell version tag does not include TEXTMODE
+category: !shell:textmode-yes
+stdin:
+       for x in $KSH_VERSION; do
+               [[ $x = '+TEXTMODE' ]] && exit 1
+       done
+       exit 0
+---
+name: KSH_VERSION-textmode
+description:
+       Check that the shell version tag includes TEXTMODE
+category: !shell:textmode-no
+stdin:
+       for x in $KSH_VERSION; do
+               [[ $x = '+TEXTMODE' ]] && exit 0
+       done
+       exit 1
  ---
  name: selftest-1
  description:
@@ -1334,7 +1356,7 @@ name: cd-pe
  description:
         Check package for cd -Pe
  need-pass: no
-# the mv command fails on Cygwin
+# the mv command fails on Cygwin and z/OS
  # Hurd aborts the testsuite (permission denied)
  # QNX does not find subdir to cd into
  category: !os:cygwin,!os:gnu,!os:msys,!os:nto,!os:os390,!nosymlink
@@ -1355,7 +1377,7 @@ file-setup: file 644 "x"
         cd -P$1 subdir
         echo 2=$?,${PWD#$bwd/}
         cd $bwd
-       chmod 755 renamed
+       chmod 755 noread renamed 2>/dev/null
         rm -rf noread link renamed
  stdin:
         export TSHELL="$__progname"
@@ -1944,15 +1966,12 @@ expected-stdout:
  name: eglob-bad-1
  description:
         Check that globbing isn't done when glob has syntax error
-file-setup: file 644 "abcx"
-file-setup: file 644 "abcz"
-file-setup: file 644 "bbc"
+category: !os:cygwin,!os:msys,!os:os2
+file-setup: file 644 "@(a[b|)c]foo"
  stdin:
-       echo !([*)*
-       echo +(a|b[)*
+       echo @(a[b|)c]*
  expected-stdout:
-       !([*)*
-       +(a|b[)*
+       @(a[b|)c]*
  ---
  name: eglob-bad-2
  description:
@@ -2039,9 +2058,11 @@ stdin:
         case foo in *(a|b[)) echo yes;; *) echo no;; esac
         case foo in *(a|b[)|f*) echo yes;; *) echo no;; esac
         case '*(a|b[)' in *(a|b[)) echo yes;; *) echo no;; esac
+       case 'aab[b[ab[a' in *(a|b[)) echo yes;; *) echo no;; esac
  expected-stdout:
         no
         yes
+       no
         yes
  ---
  name: eglob-trim-1
@@ -2305,6 +2326,7 @@ expected-stdout:
  name: eglob-utf8-1
  description:
         UTF-8 mode differences for eglobbing
+category: !shell:ebcdic-yes
  stdin:
         s=blöd
         set +U
@@ -2336,17 +2358,26 @@ expected-stdout:
  ---
  name: glob-bad-1
  description:
-       Check that globbing isn't done when glob has syntax error
+       Check that [ matches itself if it's not a valid bracket expr
+       but does not prevent globbing, while backslash-escaping does
  file-setup: dir 755 "[x"
  file-setup: file 644 "[x/foo"
  stdin:
         echo [*
         echo *[x
         echo [x/*
-expected-stdout:
-       [*
-       *[x
+       :>'ab[x'
+       :>'a[a-z][x'
+       echo a[a-z][*
+       echo a[a-z]*
+       echo a[a\-z]*
+expected-stdout:
+       [x
+       [x
         [x/foo
+       ab[x
+       ab[x
+       a[a-z]*
  ---
  name: glob-bad-2
  description:
@@ -2365,6 +2396,18 @@ expected-stdout:
         dir/abc
         dir/abc
  ---
+name: glob-bad-3
+description:
+       Check that the slash is parsed before the glob
+stdin:
+       mkdir a 'a[b'
+       (cd 'a[b'; echo ok >'c]d')
+       echo nok >abd
+       echo fail >a/d
+       cat a[b/c]d
+expected-stdout:
+       ok
+---
  name: glob-range-1
  description:
         Test range matching
@@ -2373,24 +2416,31 @@ file-setup: file 644 "abc"
  file-setup: file 644 "bbc"
  file-setup: file 644 "cbc"
  file-setup: file 644 "-bc"
+file-setup: file 644 "!bc"
+file-setup: file 644 "^bc"
+file-setup: file 644 "+bc"
+file-setup: file 644 ",bc"
+file-setup: file 644 "0bc"
+file-setup: file 644 "1bc"
  stdin:
         echo [ab-]*
         echo [-ab]*
         echo [!-ab]*
         echo [!ab]*
         echo []ab]*
-       :>'./!bc'
-       :>'./^bc'
         echo [^ab]*
-       echo [!ab]*
+       echo [+--]*
+       echo [--1]*
+
  expected-stdout:
         -bc abc bbc
         -bc abc bbc
-       cbc
-       -bc cbc
+       !bc +bc ,bc 0bc 1bc ^bc cbc
+       !bc +bc ,bc -bc 0bc 1bc ^bc cbc
         abc bbc
         ^bc abc bbc
-       !bc -bc ^bc cbc
+       +bc ,bc -bc
+       -bc 0bc 1bc
  ---
  name: glob-range-2
  description:
@@ -2408,7 +2458,7 @@ description:
  # breaks on Mac OSX (HFS+ non-standard Unicode canonical decomposition)
  # breaks on Cygwin 1.7 (files are now UTF-16 or something)
  # breaks on QNX 6.4.1 (says RT)
-category: !os:cygwin,!os:darwin,!os:msys,!os:nto,!os:os2
+category: !os:cygwin,!os:darwin,!os:msys,!os:nto,!os:os2,!os:os390
  need-pass: no
  file-setup: file 644 "aÂc"
  stdin:
@@ -2435,10 +2485,32 @@ file-setup: file 644 "cbc"
  file-setup: file 644 "dbc"
  file-setup: file 644 "ebc"
  file-setup: file 644 "-bc"
+file-setup: file 644 "@bc"
  stdin:
         echo [a-c-e]*
+       echo [a--@]*
  expected-stdout:
         -bc abc bbc cbc ebc
+       @bc
+---
+name: glob-word-1
+description:
+       Check BSD word boundary matches
+stdin:
+       t() { [[ $1 = *[[:\<:]]bar[[:\>:]]* ]]; echo =$?; }
+       t 'foo bar baz'
+       t 'foobar baz'
+       t 'foo barbaz'
+       t 'bar'
+       t '_bar'
+       t 'bar_'
+expected-stdout:
+       =0
+       =1
+       =1
+       =0
+       =1
+       =1
  ---
  name: glob-trim-1
  description:
@@ -2695,6 +2767,7 @@ expected-stdout:
  name: heredoc-10
  description:
         Check direct here document assignment
+category: !shell:ebcdic-yes
  stdin:
         x=u
         va=<<EOF
@@ -2747,6 +2820,62 @@ expected-stdout:
         } |
         | vapp1^vapp2^ |
  ---
+name: heredoc-10-ebcdic
+description:
+       Check direct here document assignment
+category: !shell:ebcdic-no
+stdin:
+       x=u
+       va=<<EOF
+       =a $x \x7C=
+       EOF
+       vb=<<'EOF'
+       =b $x \x7C=
+       EOF
+       function foo {
+               vc=<<-EOF
+                       =c $x \x7C=
+               EOF
+       }
+       fnd=$(typeset -f foo)
+       print -r -- "$fnd"
+       function foo {
+               echo blub
+       }
+       foo
+       eval "$fnd"
+       foo
+       # rather nonsensical, but…
+       vd=<<<"=d $x \x7C="
+       ve=<<<'=e $x \x7C='
+       vf=<<<$'=f $x \x7C='
+       # now check
+       print -r -- "| va={$va} vb={$vb} vc={$vc} vd={$vd} ve={$ve} vf={$vf} |"
+       # check append
+       v=<<-EOF
+               vapp1
+       EOF
+       v+=<<-EOF
+               vapp2
+       EOF
+       print -r -- "| ${v//$'\n'/^} |"
+expected-stdout:
+       function foo {
+               vc=<<-EOF 
+       =c $x \x7C=
+       EOF
+       
+       } 
+       blub
+       | va={=a u \x7C=
+       } vb={=b $x \x7C=
+       } vc={=c u \x7C=
+       } vd={=d u \x7C=
+       } ve={=e $x \x7C=
+       } vf={=f $x @=
+       } |
+       | vapp1^vapp2^ |
+---
  name: heredoc-11
  description:
         Check here documents with no or empty delimiter
@@ -5034,18 +5163,34 @@ expected-stdout:
         2 :10/8,16: .
         3 :10/10,16: .
  ---
-name: integer-base-check-numeric-from
+name: integer-base-check-numeric-from-1
+description:
+       Check behaviour for base one
+category: !shell:ebcdic-yes
+stdin:
+       echo 1:$((1#1))0.
+expected-stdout:
+       1:490.
+---
+name: integer-base-check-numeric-from-1-ebcdic
  description:
-       Check behaviour for base one to 36, and that 37 degrades to 10
+       Check behaviour for base one
+category: !shell:ebcdic-no
  stdin:
         echo 1:$((1#1))0.
+expected-stdout:
+       1:2410.
+---
+name: integer-base-check-numeric-from-2
+description:
+       Check behaviour for base two to 36, and that 37 degrades to 10
+stdin:
         i=1
         while (( ++i <= 37 )); do
                 eval 'echo '$i':$(('$i'#10)).'
         done
         echo 37:$($__progname -c 'echo $((37#10))').$?:
  expected-stdout:
-       1:490.
         2:2.
         3:3.
         4:4.
@@ -5084,18 +5229,41 @@ expected-stdout:
         37:10.
         37:10.0:
  ---
-name: integer-base-check-numeric-to
+name: integer-base-check-numeric-to-1
  description:
-       Check behaviour for base one to 36, and that 37 degrades to 10
+       Check behaviour for base one
+category: !shell:ebcdic-yes
  stdin:
-       i=0
+       i=1
+       typeset -Uui$i x=0x40
+       eval "typeset -i10 y=$x"
+       print $i:$x.$y.
+expected-stdout:
+       1:1#@.64.
+---
+name: integer-base-check-numeric-to-1-ebcdic
+description:
+       Check behaviour for base one
+category: !shell:ebcdic-no
+stdin:
+       i=1
+       typeset -Uui$i x=0x7C
+       eval "typeset -i10 y=$x"
+       print $i:$x.$y.
+expected-stdout:
+       1:1#@.124.
+---
+name: integer-base-check-numeric-to-2
+description:
+       Check behaviour for base two to 36, and that 37 degrades to 10
+stdin:
+       i=1
         while (( ++i <= 37 )); do
                 typeset -Uui$i x=0x40
                 eval "typeset -i10 y=$x"
                 print $i:$x.$y.
         done
  expected-stdout:
-       1:1#@.64.
         2:2#1000000.64.
         3:3#2101.64.
         4:4#1000.64.
@@ -6738,6 +6906,13 @@ expected-exit: e != 0
  expected-stderr-pattern:
         /read[ -]?only/
  ---
+name: readonly-5
+description:
+       Ensure readonly is idempotent
+stdin:
+       readonly x=1
+       readonly x
+---
  name: syntax-1
  description:
         Check that lone ampersand is a syntax error
@@ -6871,6 +7046,48 @@ expected-stdout:
         y1-
         x2-3- z1-
  ---
+name: exec-modern-korn-shell
+description:
+       Check that exec can execute any command that makes it
+       through syntax and parser
+stdin:
+       print '#!'"$__progname"'\necho tf' >lq
+       chmod +x lq
+       PATH=$PWD
+       exec 2>&1
+       foo() { print two; }
+       print =1
+       (exec print one)
+       print =2
+       (exec foo)
+       print =3
+       (exec ls)
+       print =4
+       (exec lq)
+expected-stdout-pattern:
+       /=1\none\n=2\ntwo\n=3\n.*: ls: not found\n=4\ntf\n/
+---
+name: exec-ksh88
+description:
+       Check that exec only executes after a PATH search
+arguments: !-o!posix!
+stdin:
+       print '#!'"$__progname"'\necho tf' >lq
+       chmod +x lq
+       PATH=$PWD
+       exec 2>&1
+       foo() { print two; }
+       print =1
+       (exec print one)
+       print =2
+       (exec foo)
+       print =3
+       (exec ls)
+       print =4
+       (exec lq)
+expected-stdout-pattern:
+       /=1\n.*: print: not found\n=2\n.*: foo: not found\n=3\n.*: ls: not found\n=4\ntf\n/
+---
  name: xxx-what-do-you-call-this-1
  stdin:
         echo "${foo:-"a"}*"
@@ -8233,7 +8450,7 @@ description:
         multibyte character of the shell input (with -c, from standard
         input, as file, or as eval argument), but nowhere else
  # breaks on Mac OSX (HFS+ non-standard Unicode canonical decomposition)
-category: !os:darwin
+category: !os:darwin,!shell:ebcdic-yes
  stdin:
         mkdir foo
         print '#!/bin/sh\necho ohne' >foo/fnord
@@ -8310,25 +8527,10 @@ expected-stdout:
  expected-stderr-pattern:
         /(Unrecognized character .... ignored at \..t4 line 1)*/
  ---
-name: utf8opt-1a
-description:
-       Check that the utf8-mode flag is not set at non-interactive startup
-category: !os:hpux
-env-setup: !PS1=!PS2=!LC_CTYPE=en_US.UTF-8!
-stdin:
-       if [[ $- = *U* ]]; then
-               echo is set
-       else
-               echo is not set
-       fi
-expected-stdout:
-       is not set
----
-name: utf8opt-1b
+name: utf8opt-1
  description:
         Check that the utf8-mode flag is not set at non-interactive startup
-category: os:hpux
-env-setup: !PS1=!PS2=!LC_CTYPE=en_US.utf8!
+env-setup: !PS1=!PS2=!LC_CTYPE=@utflocale@!
  stdin:
         if [[ $- = *U* ]]; then
                 echo is set
@@ -8338,37 +8540,15 @@ stdin:
  expected-stdout:
         is not set
  ---
-name: utf8opt-2a
+name: utf8opt-2
  description:
         Check that the utf8-mode flag is set at interactive startup.
-       -DMKSH_ASSUME_UTF8=0 => expected failure, please ignore
-       -DMKSH_ASSUME_UTF8=1 => not expected, please investigate
-       -UMKSH_ASSUME_UTF8 => not expected, but if your OS is old,
-        try passing HAVE_SETLOCALE_CTYPE=0 to Build.sh
+       If your OS is old, try passing HAVE_SETLOCALE_CTYPE=0 to Build.sh
  need-pass: no
-category: !os:hpux,!os:msys,!os:os2
-need-ctty: yes
-arguments: !-i!
-env-setup: !PS1=!PS2=!LC_CTYPE=en_US.UTF-8!
-stdin:
-       if [[ $- = *U* ]]; then
-               echo is set
-       else
-               echo is not set
-       fi
-expected-stdout:
-       is set
-expected-stderr-pattern:
-       /(# )*/
----
-name: utf8opt-2b
-description:
-       Check that the utf8-mode flag is set at interactive startup
-       Expected failure if -DMKSH_ASSUME_UTF8=0
-category: os:hpux
+category: !noutf8
  need-ctty: yes
  arguments: !-i!
-env-setup: !PS1=!PS2=!LC_CTYPE=en_US.utf8!
+env-setup: !PS1=!PS2=!LC_CTYPE=@utflocale@!
  stdin:
         if [[ $- = *U* ]]; then
                 echo is set
@@ -9348,6 +9528,7 @@ expected-stdout:
  name: varexpand-special-hash
  description:
         Check special ${var@x} expansion for x=hash
+category: !shell:ebcdic-yes
  stdin:
         typeset -i8 foo=10
         bar=baz
@@ -9356,9 +9537,22 @@ stdin:
  expected-stdout:
         9B15FBFB CFBDD32B 00000000 .
  ---
+name: varexpand-special-hash-ebcdic
+description:
+       Check special ${var@x} expansion for x=hash
+category: !shell:ebcdic-no
+stdin:
+       typeset -i8 foo=10
+       bar=baz
+       unset baz
+       print ${foo@#} ${bar@#} ${baz@#} .
+expected-stdout:
+       016AE33D 9769C4AF 00000000 .
+---
  name: varexpand-special-quote
  description:
         Check special ${var@Q} expansion for quoted strings
+category: !shell:faux-ebcdic
  stdin:
         set +U
         i=x
@@ -9378,6 +9572,29 @@ expected-stdout:
         typeset v='a b'
         typeset w=$'c\nd\240e\u20ACf'
  ---
+name: varexpand-special-quote-faux-EBCDIC
+description:
+       Check special ${var@Q} expansion for quoted strings
+category: shell:faux-ebcdic
+stdin:
+       set +U
+       i=x
+       j=a\ b
+       k=$'c
+       d\xA0''e€f'
+       print -r -- "<i=$i j=$j k=$k>"
+       s="u=${i@Q} v=${j@Q} w=${k@Q}"
+       print -r -- "s=\"$s\""
+       eval "$s"
+       typeset -p u v w
+expected-stdout:
+       <i=x j=a b k=c
+       d eâ\82¬f>
+       s="u=x v='a b' w=$'c\nd e\u20ACf'"
+       typeset u=x
+       typeset v='a b'
+       typeset w=$'c\nd e\u20ACf'
+---
  name: varexpand-null-1
  description:
         Ensure empty strings expand emptily
@@ -9718,7 +9935,7 @@ stdin:
             $'\J\K\L\M\N\O\P\Q\R\S\T\U1\V\W\X\Y\Z\[\\\]\^\_\`\a\b\d\e' \
             $'\f\g\h\i\j\k\l\m\n\o\p\q\r\s\t\u1\v\w\x1\y\z\{\|\}\~ $x' \
             $'\u20acd' $'\U20acd' $'\x123' $'fn\x0rd' $'\0234' $'\234' \
-           $'\2345' $'\ca' $'\c!' $'\c?' $'\câ\82¬' $'a\
+           $'\2345' $'\ca' $'\c!' $'\c?' $'\câ\80¦' $'a\
         b' | {
                 # integer-base-one-3As
                 typeset -Uui16 -Z11 pos=0
@@ -9760,7 +9977,7 @@ expected-stdout:
         00000050  68 69 6A 6B 6C 6D 0A 6F - 70 71 0D 73 09 01 0B 77  |hijklm.opq.s...w|
         00000060  01 79 7A 7B 7C 7D 7E 20 - 24 78 0A E2 82 AC 64 0A  |.yz{|}~ $x....d.|
         00000070  EF BF BD 0A C4 A3 0A 66 - 6E 0A 13 34 0A 9C 0A 9C  |.......fn..4....|
-       00000080  35 0A 01 0A 01 0A 7F 0A - 02 82 AC 0A 61 0A 62 0A  |5...........a.b.|
+       00000080  35 0A 01 0A 01 0A 7F 0A - 82 80 A6 0A 61 0A 62 0A  |5...........a.b.|
  ---
  name: dollar-quotes-in-heredocs-strings
  description:
@@ -10391,6 +10608,7 @@ expected-stdout:
  name: integer-base-one-5A
  description:
         Check to see that we’re NUL and Unicode safe
+category: !shell:ebcdic-yes
  stdin:
         set +U
         print 'a\0b\xfdz' >x
@@ -10401,6 +10619,20 @@ stdin:
  expected-stdout:
         16#61 16#0 16#62 16#FD 16#7A .
  ---
+name: integer-base-one-5E
+description:
+       Check to see that we’re NUL and Unicode safe
+category: !shell:ebcdic-no
+stdin:
+       set +U
+       print 'a\0b\xfdz' >x
+       read -a y <x
+       set -U
+       typeset -Uui16 y
+       print ${y[*]} .
+expected-stdout:
+       16#81 16#0 16#82 16#FD 16#A9 .
+---
  name: integer-base-one-5W
  description:
         Check to see that we’re NUL and Unicode safe
@@ -11486,19 +11718,19 @@ expected-stdout:
                 echo $(true) $((1+ 2)) ${  :;} ${| REPLY=x;}
         }
         inline_COMSUB_EXPRSUB_FUNSUB_VALSUB() {
-               \echo $(\true ) $((1+ 2)) ${ : ;} ${|REPLY=x ;} 
+               \echo $(\true ) $((1+ 2)) ${ \: ;} ${|REPLY=x ;} 
         } 
         function comsub_COMSUB_EXPRSUB_FUNSUB_VALSUB { x=$(
                 echo $(true) $((1+ 2)) ${  :;} ${| REPLY=x;}
         ); }
         function comsub_COMSUB_EXPRSUB_FUNSUB_VALSUB {
-               x=$(\echo $(\true ) $((1+ 2)) ${ : ;} ${|REPLY=x ;} ) 
+               x=$(\echo $(\true ) $((1+ 2)) ${ \: ;} ${|REPLY=x ;} ) 
         } 
         function reread_COMSUB_EXPRSUB_FUNSUB_VALSUB { x=$((
                 echo $(true) $((1+ 2)) ${  :;} ${| REPLY=x;}
         )|tr u x); }
         function reread_COMSUB_EXPRSUB_FUNSUB_VALSUB {
-               x=$( ( \echo $(\true ) $((1+ 2)) ${ : ;} ${|REPLY=x ;} ) | \tr u x ) 
+               x=$( ( \echo $(\true ) $((1+ 2)) ${ \: ;} ${|REPLY=x ;} ) | \tr u x ) 
         } 
         inline_QCHAR_OQUOTE_CQUOTE() {
                 echo fo\ob\"a\`r\'b\$az
@@ -12498,12 +12730,23 @@ expected-stdout:
  name: echo-test-1
  description:
         Test what the echo builtin does (mksh)
+category: !shell:ebcdic-yes
  stdin:
         echo -n 'foo\x40bar'
         echo -e '\tbaz'
  expected-stdout:
         foo@bar baz
  ---
+name: echo-test-1-ebcdic
+description:
+       Test what the echo builtin does (mksh)
+category: !shell:ebcdic-no
+stdin:
+       echo -n 'foo\x7Cbar'
+       echo -e '\tbaz'
+expected-stdout:
+       foo@bar baz
+---
  name: echo-test-2
  description:
         Test what the echo builtin does (POSIX)
@@ -12534,7 +12777,7 @@ expected-stdout:
  name: echo-test-3-normal
  description:
         Test what the echo builtin does, and test a compatibility flag.
-category: !mnbsdash
+category: !mnbsdash,!shell:ebcdic-yes
  stdin:
         "$__progname" -c 'echo -n 1=\\x40$1; echo -e \\x2E' -- foo bar
         "$__progname" -o posix -c 'echo -n 2=\\x40$1; echo -e \\x2E' -- foo bar
@@ -12544,6 +12787,19 @@ expected-stdout:
         2=\x40foo-e \x2E
         3=\x40foo-e \x2E
  ---
+name: echo-test-3-ebcdic
+description:
+       Test what the echo builtin does, and test a compatibility flag.
+category: !mnbsdash,!shell:ebcdic-no
+stdin:
+       "$__progname" -c 'echo -n 1=\\x7C$1; echo -e \\x4B' -- foo bar
+       "$__progname" -o posix -c 'echo -n 2=\\x7C$1; echo -e \\x4B' -- foo bar
+       "$__progname" -o sh -c 'echo -n 3=\\x7C$1; echo -e \\x4B' -- foo bar
+expected-stdout:
+       1=@foo.
+       2=\x7Cfoo-e \x4B
+       3=\x7Cfoo-e \x4B
+---
  name: utilities-getopts-1
  description:
         getopts sets OPTIND correctly for unparsed option
@@ -12979,6 +13235,7 @@ name: duffs-device
  description:
         Check that the compiler did not optimise-break them
         (lex.c has got a similar one in SHEREDELIM)
+category: !shell:faux-ebcdic,!shell:ebcdic-yes
  stdin:
         set +U
         s=
@@ -12991,6 +13248,38 @@ stdin:
  expected-stdout:
         typeset s=$'\001\002\003\004\005\006\a\b\t\n\v\f\r\016\017\020\021\022\023\024\025\026\027\030\031\032\E\034\035\036\037 !"#$%&\047()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\177\200\201\202\203\204\205\206\207\210\211\212\213\214\215\216\217\220\221\222\223\224\225\226\227\230\231\232\233\234\235\236\237\240\241\242\243\244\245\246\247\250\251\252\253\254\255\256\257\260\261\262\263\264\265\266\267\270\271\272\273\274\275\276\277\300\301\302\303\304\305\306\307\310\311\312\313\314\315\316\317\320\321\322\323\324\325\326\327\330\331\332\333\334\335\336\337\340\341\342\343\344\345\346\347\350\351\352\353\354\355\356\357\360\361\362\363\364\365\366\367\370\371\372\373\374\375\376\377\u00A0\u20AC\uFFFD\357\277\276\357\277\277\360\220\200\200.'
  ---
+name: duffs-device-ebcdic
+description:
+       Check that the compiler did not optimise-break them
+category: !shell:ebcdic-no
+stdin:
+       set +U
+       s=
+       typeset -i1 i=0
+       while (( ++i < 256 )); do
+               s+=${i#1#}
+       done
+       #s+=$'\xC2\xA0\xE2\x82\xAC\xEF\xBF\xBD\xEF\xBF\xBE\xEF\xBF\xBF\xF0\x90\x80\x80.' #XXX
+       typeset -p s
+expected-stdout:
+       typeset s=$'\001\002\003\004\t\006\007\010\011\012\v\f\r\016\017\020\021\022\023\024\n\b\027\030\031\032\033\034\035\036\037\040\041\042\043\044\045\046\E\050\051\052\053\054\055\056\a\060\061\062\063\064\065\066\067\070\071\072\073\074\075\076\077  âäàáãåçñ¢.<(+|&éêëèíîïìß!$*);^-/ÂÄÀÁÃÅÇÑ¦,%_>?øÉÊËÈÍÎÏÌ`:#@\175="Øabcdefghi«»ðýþ±°jklmnopqrªºæ¸Æ¤µ~stuvwxyz¡¿Ð[Þ®¬£¥·©§¶¼½¾Ý¨¯]´×{ABCDEFGHIôöòóõ}JKLMNOPQR¹ûüùúÿ\\÷STUVWXYZ²ÔÖÒÓÕ0123456789³ÛÜÙÚ\377'
+---
+name: duffs-device-faux-EBCDIC
+description:
+       Check that the compiler did not optimise-break them
+category: shell:faux-ebcdic
+stdin:
+       set +U
+       s=
+       typeset -i1 i=0
+       while (( ++i < 256 )); do
+               s+=${i#1#}
+       done
+       s+=$'\xC2\xA0\xE2\x82\xAC\xEF\xBF\xBD\xEF\xBF\xBE\xEF\xBF\xBF\xF0\x90\x80\x80.'
+       typeset -p s
+expected-stdout:
+       typeset s=$'\001\002\003\004\005\006\a\b\t\n\v\f\r\016\017\020\021\022\023\024\025\026\027\030\031\032\E\034\035\036\037 !"#$%&\047()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\177\200\201\202\203\204\205\206\207\210\211\212\213\214\215\216\217\220\221\222\223\224\225\226\227\230\231\232\233\234\235\236\237 ¡¢£¤¥¦§¨©ª«¬®¯°±²³´µ¶·¸¹º»¼½¾¿ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖ×ØÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõö÷øùúûüýþÿ\u00A0\u20AC\uFFFDï¿¾ï¿¿ð\220\200\200.'
+---
  name: stateptr-underflow
  description:
         This check overflows an Xrestpos stored in a short in R40
diff --git a/src/dot.mkshrc b/src/dot.mkshrc

index af55d7d..4a3dfea 100644 (file)
--- a/src/dot.mkshrc
+++ b/src/dot.mkshrc
@@ -1,5 +1,5 @@
  # $Id$
-# $MirOS: src/bin/mksh/dot.mkshrc,v 1.114 2017/03/19 22:31:26 tg Exp $
+# $MirOS: src/bin/mksh/dot.mkshrc,v 1.121 2017/08/08 21:10:21 tg Exp $
  #-
  # Copyright (c) 2002, 2003, 2004, 2006, 2007, 2008, 2009, 2010,
  #              2011, 2012, 2013, 2014, 2015, 2016, 2017
@@ -56,6 +56,13 @@ smores() (
         done
  )
  
+# customise your favourite editor here; the first one found is used
+for EDITOR in "${EDITOR:-}" jupp jstar mcedit ed vi; do
+       EDITOR=$(\\builtin whence -p "$EDITOR") || EDITOR=
+       [[ -n $EDITOR && -x $EDITOR ]] && break
+       EDITOR=
+done
+
  \\builtin alias ls=ls l='ls -F' la='l -a' ll='l -l' lo='l -alo'
  \: "${HOSTNAME:=$(\\builtin ulimit -c 0; \\builtin print -r -- $(hostname \
      2>/dev/null))}${EDITOR:=/bin/ed}${TERM:=vt100}${USER:=$(\\builtin ulimit \
@@ -73,6 +80,7 @@ fi
  \: "${MKSH:=$(\\builtin whence -p mksh)}${MKSH:=/bin/mksh}"
  \\builtin export MKSH
  
+# prompts
  PS4='[$EPOCHREALTIME] '; PS1='#'; (( USER_ID )) && PS1='$'; PS1=$'\001\r''${|
         \\builtin typeset e=$?
  
@@ -86,6 +94,8 @@ PS4='[$EPOCHREALTIME] '; PS1='#'; (( USER_ID )) && PS1='$'; PS1=$'\001\r''${|
  
         \\builtin return $e
  } '"$PS1 "
+
+# utilities
  \\builtin alias doch='sudo mksh -c "$(\\builtin fc -ln -1)"'
  \\builtin command -v rot13 >/dev/null || \\builtin alias rot13='tr \
      abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ \
@@ -99,42 +109,47 @@ elif \\builtin command -v hexdump >/dev/null; then
         }
  else
         function hd {
-               \\builtin typeset -Uui16 -Z11 pos=0
-               \\builtin typeset -Uui16 -Z5 hv=2147483647
-               \\builtin typeset dasc line i
-               \\builtin set +U
-
-               \\builtin cat "$@" | if \\builtin read -arN -1 line; then
-                       \\builtin typeset -i1 'line[*]'
-                       i=0
-                       while (( i < ${#line[*]} )); do
-                               hv=${line[i++]}
-                               if (( (pos & 15) == 0 )); then
-                                       (( pos )) && \
-                                           \\builtin print -r -- "$dasc|"
-                                       \\builtin print -nr "${pos#16#}  "
-                                       dasc=' |'
-                               fi
-                               \\builtin print -nr "${hv#16#} "
-                               #XXX EBCDIC, but we need [[:print:]] to fix this
-                               if (( (hv < 32) || (hv > 126) )); then
-                                       dasc+=.
-                               else
-                                       dasc+=${line[i-1]#1#}
-                               fi
-                               (( (pos++ & 15) == 7 )) && \
-                                   \\builtin print -nr -- '- '
-                       done
-                       while (( pos & 15 )); do
-                               \\builtin print -nr '   '
-                               (( (pos++ & 15) == 7 )) && \
-                                   \\builtin print -nr -- '- '
-                       done
-                       (( hv == 2147483647 )) || \\builtin print -r -- "$dasc|"
-               fi
+               \\builtin cat "$@" | hd_mksh "$@"
         }
  fi
  
+# NUL-safe and EBCDIC-safe hexdump (from stdin)
+function hd_mksh {
+       \\builtin typeset -Uui16 -Z11 pos=0
+       \\builtin typeset -Uui16 -Z5 hv=2147483647
+       \\builtin typeset dasc dn line i
+       \\builtin set +U
+
+       while \\builtin read -arn 512 line; do
+               \\builtin typeset -i1 'line[*]'
+               i=0
+               while (( i < ${#line[*]} )); do
+                       dn=
+                       (( (hv = line[i++]) != 0 )) && dn=${line[i-1]#1#}
+                       if (( (pos & 15) == 0 )); then
+                               (( pos )) && \
+                                   \\builtin print -r -- "$dasc|"
+                               \\builtin print -nr "${pos#16#}  "
+                               dasc=' |'
+                       fi
+                       \\builtin print -nr "${hv#16#} "
+                       if [[ $dn = [[:print:]] ]]; then
+                               dasc+=$dn
+                       else
+                               dasc+=.
+                       fi
+                       (( (pos++ & 15) == 7 )) && \
+                           \\builtin print -nr -- '- '
+               done
+       done
+       while (( pos & 15 )); do
+               \\builtin print -nr '   '
+               (( (pos++ & 15) == 7 )) && \
+                   \\builtin print -nr -- '- '
+       done
+       (( hv == 2147483647 )) || \\builtin print -r -- "$dasc|"
+}
+
  # Berkeley C shell compatible dirs, popd, and pushd functions
  # Z shell compatible chpwd() hook, used to update DIRSTACK[0]
  DIRSTACKBASE=$(\\builtin realpath ~/. 2>/dev/null || \
@@ -483,6 +498,7 @@ function enable {
         i_func[nfunc++]=setenv
         i_func[nfunc++]=smores
         i_func[nfunc++]=hd
+       i_func[nfunc++]=hd_mksh
         i_func[nfunc++]=chpwd
         i_func[nfunc++]=cd
         i_func[nfunc++]=cd_csh
@@ -588,6 +604,11 @@ function enable {
  
  \: place customisations below this line
  
+# some defaults follow — you are supposed to adjust these to your
+# liking; by default we add ~/.etc/bin and ~/bin (whichever exist)
+# to $PATH, set $SHELL to mksh, set some defaults for man and less
+# and show a few more possible things for users to begin moving in
+
  for p in ~/.etc/bin ~/bin; do
         [[ -d $p/. ]] || \\builtin continue
         [[ $PATHSEP$PATH$PATHSEP = *"$PATHSEP$p$PATHSEP"* ]] || \
diff --git a/src/edit.c b/src/edit.c

index 58eaf7f..8bccf13 100644 (file)
--- a/src/edit.c
+++ b/src/edit.c
@@ -28,16 +28,16 @@
  
  #ifndef MKSH_NO_CMDLINE_EDITING
  
-__RCSID("$MirOS: src/bin/mksh/edit.c,v 1.321 2017/04/12 16:46:20 tg Exp $");
+__RCSID("$MirOS: src/bin/mksh/edit.c,v 1.340 2017/08/27 23:33:50 tg Exp $");
  
  /*
   * in later versions we might use libtermcap for this, but since external
   * dependencies are problematic, this has not yet been decided on; another
- * good string is "\033c" except on hardware terminals like the DEC VT420
- * which do a full power cycle then...
+ * good string is KSH_ESC_STRING "c" except on hardware terminals like the
+ * DEC VT420 which do a full power cycle then...
   */
  #ifndef MKSH_CLS_STRING
-#define MKSH_CLS_STRING                "\033[;H\033[J"
+#define MKSH_CLS_STRING                KSH_ESC_STRING "[;H" KSH_ESC_STRING "[J"
  #endif
  
  /* tty driver characters we are interested in */
@@ -76,7 +76,7 @@ static int modified;                  /* buffer has been "modified" */
  static char *holdbufp;                 /* place to hold last edit buffer */
  
  /* 0=dumb 1=tmux (for now) */
-static bool x_term_mode;
+static uint8_t x_term_mode;
  
  static void x_adjust(void);
  static int x_getc(void);
@@ -97,6 +97,7 @@ static void x_init_prompt(bool);
  #if !MKSH_S_NOVI
  static int x_vi(char *);
  #endif
+static void x_intr(int, int) MKSH_A_NORETURN;
  
  #define x_flush()      shf_flush(shl_out)
  #if defined(MKSH_SMALL) && !defined(MKSH_SMALL_BUT_FAST)
@@ -309,14 +310,14 @@ x_glob_hlp_add_qchar(char *cp)
                          * empirically made list of chars to escape
                          * for globbing as well as QCHAR itself
                          */
-                       switch (ch) {
+                       switch (ord(ch)) {
                         case QCHAR:
-                       case '$':
-                       case '*':
-                       case '?':
-                       case '[':
-                       case '\\':
-                       case '`':
+                       case ord('$'):
+                       case ord('*'):
+                       case ord('?'):
+                       case ord('['):
+                       case ord('\\'):
+                       case ord('`'):
                                 *dp++ = QCHAR;
                                 break;
                         }
@@ -467,7 +468,7 @@ path_order_cmp(const void *aa, const void *bb)
         const struct path_order_info *b = (const struct path_order_info *)bb;
         int t;
  
-       if ((t = strcmp(a->word + a->base, b->word + b->base)))
+       if ((t = ascstrcmp(a->word + a->base, b->word + b->base)))
                 return (t);
         if (a->path_order > b->path_order)
                 return (1);
@@ -535,7 +536,7 @@ x_command_glob(int flags, char *toglob, char ***wordsp)
                 char **words = (char **)XPptrv(w);
                 size_t i, j;
  
-               qsort(words, nwords, sizeof(void *), xstrcmp);
+               qsort(words, nwords, sizeof(void *), ascpstrcmp);
                 for (i = j = 0; i < nwords - 1; i++) {
                         if (strcmp(words[i], words[i + 1]))
                                 words[j++] = words[i];
@@ -552,8 +553,7 @@ x_command_glob(int flags, char *toglob, char ***wordsp)
         return (nwords);
  }
  
-#define IS_WORDC(c)    (!ctype(c, C_LEX1) && (c) != '\'' && (c) != '"' && \
-                           (c) != '`' && (c) != '=' && (c) != ':')
+#define IS_WORDC(c)    (!ctype(c, C_EDNWC))
  
  static int
  x_locate_word(const char *buf, int buflen, int pos, int *startp,
@@ -588,9 +588,9 @@ x_locate_word(const char *buf, int buflen, int pos, int *startp,
                 int p = start - 1;
  
                 /* Figure out if this is a command */
-               while (p >= 0 && ksh_isspace(buf[p]))
+               while (p >= 0 && ctype(buf[p], C_SPACE))
                         p--;
-               iscmd = p < 0 || vstrchr(";|&()`", buf[p]);
+               iscmd = p < 0 || ctype(buf[p], C_EDCMD);
                 if (iscmd) {
                         /*
                          * If command has a /, path, etc. is not searched;
@@ -649,11 +649,12 @@ x_cf_glob(int *flagsp, const char *buf, int buflen, int pos, int *startp,
                 for (s = toglob; *s; s++) {
                         if (*s == '\\' && s[1])
                                 s++;
-                       else if (*s == '?' || *s == '*' || *s == '[' ||
-                           *s == '$' ||
+                       else if (ctype(*s, C_QUEST | C_DOLAR) ||
+                           ord(*s) == ord('*') || ord(*s) == ord('[') ||
                             /* ?() *() +() @() !() but two already checked */
-                           (s[1] == '(' /*)*/ &&
-                           (*s == '+' || *s == '@' || *s == '!'))) {
+                           (ord(s[1]) == ord('(' /*)*/) &&
+                           (ord(*s) == ord('+') || ord(*s) == ord('@') ||
+                           ord(*s) == ord('!')))) {
                                 /*
                                  * just expand based on the extglob
                                  * or parameter
@@ -714,8 +715,8 @@ x_longest_prefix(int nwords, char * const * words)
                                 break;
                         }
         /* false for nwords==1 as 0 = words[0][prefix_len] then */
-       if (UTFMODE && prefix_len && (words[0][prefix_len] & 0xC0) == 0x80)
-               while (prefix_len && (words[0][prefix_len] & 0xC0) != 0xC0)
+       if (UTFMODE && prefix_len && (rtt2asc(words[0][prefix_len]) & 0xC0) == 0x80)
+               while (prefix_len && (rtt2asc(words[0][prefix_len]) & 0xC0) != 0xC0)
                         --prefix_len;
         return (prefix_len);
  }
@@ -747,7 +748,7 @@ x_basename(const char *s, const char *se)
         const char *p;
  
         if (se == NULL)
-               se = s + strlen(s);
+               se = strnul(s);
         if (s == se)
                 return (0);
  
@@ -799,7 +800,7 @@ glob_path(int flags, const char *pat, XPtrV *wp, const char *lpath)
         while (sp) {
                 xp = Xstring(xs, xp);
                 if (!(p = cstrchr(sp, MKSH_PATHSEPC)))
-                       p = sp + strlen(sp);
+                       p = strnul(sp);
                 pathlen = p - sp;
                 if (pathlen) {
                         /*
@@ -858,8 +859,7 @@ x_escape(const char *s, size_t len, int (*putbuf_func)(const char *, size_t))
         int rval = 0;
  
         while (wlen - add > 0)
-               if (vstrchr("\"#$&'()*:;<=>?[\\`{|}", s[add]) ||
-                   ctype(s[add], C_IFS)) {
+               if (ctype(s[add], C_IFS | C_EDQ)) {
                         if (putbuf_func(s, add) != 0) {
                                 rval = -1;
                                 break;
@@ -908,11 +908,6 @@ struct x_defbindings {
  #define        XF_NOBIND       2       /* not allowed to bind to function */
  #define        XF_PREFIX       4       /* function sets prefix */
  
-/* Separator for completion */
-#define        is_cfs(c)       ((c) == ' ' || (c) == '\t' || (c) == '"' || (c) == '\'')
-/* Separator for motion */
-#define        is_mfs(c)       (!(ksh_isalnux(c) || (c) == '$' || ((c) & 0x80)))
-
  #define X_NTABS                4                       /* normal, meta1, meta2, pc */
  #define X_TABSZ                256                     /* size of keydef tables etc */
  
@@ -991,6 +986,7 @@ static void x_bs3(char **);
  static int x_size2(char *, char **);
  static void x_zots(char *);
  static void x_zotc3(char **);
+static void x_vi_zotc(int);
  static void x_load_hist(char **);
  static int x_search(char *, int, int);
  #ifndef MKSH_SMALL
@@ -1036,56 +1032,56 @@ static const struct x_ftab x_ftab[] = {
  };
  
  static struct x_defbindings const x_defbindings[] = {
-       { XFUNC_del_back,               0, CTRL('?')    },
-       { XFUNC_del_bword,              1, CTRL('?')    },
-       { XFUNC_eot_del,                0, CTRL('D')    },
-       { XFUNC_del_back,               0, CTRL('H')    },
-       { XFUNC_del_bword,              1, CTRL('H')    },
+       { XFUNC_del_back,               0,  CTRL_QM     },
+       { XFUNC_del_bword,              1,  CTRL_QM     },
+       { XFUNC_eot_del,                0,  CTRL_D      },
+       { XFUNC_del_back,               0,  CTRL_H      },
+       { XFUNC_del_bword,              1,  CTRL_H      },
         { XFUNC_del_bword,              1,      'h'     },
         { XFUNC_mv_bword,               1,      'b'     },
         { XFUNC_mv_fword,               1,      'f'     },
         { XFUNC_del_fword,              1,      'd'     },
-       { XFUNC_mv_back,                0, CTRL('B')    },
-       { XFUNC_mv_forw,                0, CTRL('F')    },
-       { XFUNC_search_char_forw,       0, CTRL(']')    },
-       { XFUNC_search_char_back,       1, CTRL(']')    },
-       { XFUNC_newline,                0, CTRL('M')    },
-       { XFUNC_newline,                0, CTRL('J')    },
-       { XFUNC_end_of_text,            0, CTRL('_')    },
-       { XFUNC_abort,                  0, CTRL('G')    },
-       { XFUNC_prev_com,               0, CTRL('P')    },
-       { XFUNC_next_com,               0, CTRL('N')    },
-       { XFUNC_nl_next_com,            0, CTRL('O')    },
-       { XFUNC_search_hist,            0, CTRL('R')    },
+       { XFUNC_mv_back,                0,  CTRL_B      },
+       { XFUNC_mv_forw,                0,  CTRL_F      },
+       { XFUNC_search_char_forw,       0,  CTRL_BC     },
+       { XFUNC_search_char_back,       1,  CTRL_BC     },
+       { XFUNC_newline,                0,  CTRL_M      },
+       { XFUNC_newline,                0,  CTRL_J      },
+       { XFUNC_end_of_text,            0,  CTRL_US     },
+       { XFUNC_abort,                  0,  CTRL_G      },
+       { XFUNC_prev_com,               0,  CTRL_P      },
+       { XFUNC_next_com,               0,  CTRL_N      },
+       { XFUNC_nl_next_com,            0,  CTRL_O      },
+       { XFUNC_search_hist,            0,  CTRL_R      },
         { XFUNC_beg_hist,               1,      '<'     },
         { XFUNC_end_hist,               1,      '>'     },
         { XFUNC_goto_hist,              1,      'g'     },
-       { XFUNC_mv_end,                 0, CTRL('E')    },
-       { XFUNC_mv_beg,                 0, CTRL('A')    },
-       { XFUNC_draw_line,              0, CTRL('L')    },
-       { XFUNC_cls,                    1, CTRL('L')    },
-       { XFUNC_meta1,                  0, CTRL('[')    },
-       { XFUNC_meta2,                  0, CTRL('X')    },
-       { XFUNC_kill,                   0, CTRL('K')    },
-       { XFUNC_yank,                   0, CTRL('Y')    },
+       { XFUNC_mv_end,                 0,  CTRL_E      },
+       { XFUNC_mv_beg,                 0,  CTRL_A      },
+       { XFUNC_draw_line,              0,  CTRL_L      },
+       { XFUNC_cls,                    1,  CTRL_L      },
+       { XFUNC_meta1,                  0,  CTRL_BO     },
+       { XFUNC_meta2,                  0,  CTRL_X      },
+       { XFUNC_kill,                   0,  CTRL_K      },
+       { XFUNC_yank,                   0,  CTRL_Y      },
         { XFUNC_meta_yank,              1,      'y'     },
-       { XFUNC_literal,                0, CTRL('^')    },
+       { XFUNC_literal,                0,  CTRL_CA     },
         { XFUNC_comment,                1,      '#'     },
-       { XFUNC_transpose,              0, CTRL('T')    },
-       { XFUNC_complete,               1, CTRL('[')    },
-       { XFUNC_comp_list,              0, CTRL('I')    },
+       { XFUNC_transpose,              0,  CTRL_T      },
+       { XFUNC_complete,               1,  CTRL_BO     },
+       { XFUNC_comp_list,              0,  CTRL_I      },
         { XFUNC_comp_list,              1,      '='     },
         { XFUNC_enumerate,              1,      '?'     },
         { XFUNC_expand,                 1,      '*'     },
-       { XFUNC_comp_file,              1, CTRL('X')    },
-       { XFUNC_comp_comm,              2, CTRL('[')    },
+       { XFUNC_comp_file,              1,  CTRL_X      },
+       { XFUNC_comp_comm,              2,  CTRL_BO     },
         { XFUNC_list_comm,              2,      '?'     },
-       { XFUNC_list_file,              2, CTRL('Y')    },
+       { XFUNC_list_file,              2,  CTRL_Y      },
         { XFUNC_set_mark,               1,      ' '     },
-       { XFUNC_kill_region,            0, CTRL('W')    },
-       { XFUNC_xchg_point_mark,        2, CTRL('X')    },
-       { XFUNC_literal,                0, CTRL('V')    },
-       { XFUNC_version,                1, CTRL('V')    },
+       { XFUNC_kill_region,            0,  CTRL_W      },
+       { XFUNC_xchg_point_mark,        2,  CTRL_X      },
+       { XFUNC_literal,                0,  CTRL_V      },
+       { XFUNC_version,                1,  CTRL_V      },
         { XFUNC_prev_histword,          1,      '.'     },
         { XFUNC_prev_histword,          1,      '_'     },
         { XFUNC_set_arg,                1,      '0'     },
@@ -1148,7 +1144,7 @@ static struct x_defbindings const x_defbindings[] = {
  #endif
  #ifndef MKSH_SMALL
         /* more non-standard ones */
-       { XFUNC_eval_region,            1, CTRL('E')    },
+       { XFUNC_eval_region,            1,  CTRL_E      },
         { XFUNC_edit_line,              2,      'e'     }
  #endif
  };
@@ -1191,17 +1187,19 @@ x_e_getmbc(char *sbuf)
         if (c == -1)
                 return (-1);
         if (UTFMODE) {
-               if ((buf[0] >= 0xC2) && (buf[0] < 0xF0)) {
+               if ((rtt2asc(buf[0]) >= (unsigned char)0xC2) &&
+                   (rtt2asc(buf[0]) < (unsigned char)0xF0)) {
                         c = x_e_getc();
                         if (c == -1)
                                 return (-1);
-                       if ((c & 0xC0) != 0x80) {
+                       if ((rtt2asc(c) & 0xC0) != 0x80) {
                                 x_e_ungetc(c);
                                 return (1);
                         }
                         buf[pos++] = c;
                 }
-               if ((buf[0] >= 0xE0) && (buf[0] < 0xF0)) {
+               if ((rtt2asc(buf[0]) >= (unsigned char)0xE0) &&
+                   (rtt2asc(buf[0]) < (unsigned char)0xF0)) {
                         /* XXX x_e_ungetc is one-octet only */
                         buf[pos++] = c = x_e_getc();
                         if (c == -1)
@@ -1299,9 +1297,7 @@ x_emacs(char *buf)
                         return (i);
                 case KINTR:
                         /* special case for interrupt */
-                       trapsig(SIGINT);
-                       x_mode(false);
-                       unwind(LSHELL);
+                       x_intr(SIGINT, c);
                 }
                 /* ad-hoc hack for fixing the cursor position */
                 x_goto(xcp);
@@ -1320,11 +1316,11 @@ x_insert(int c)
         if (c == 0) {
   invmbs:
                 left = 0;
-               x_e_putc2(7);
+               x_e_putc2(KSH_BEL);
                 return (KSTD);
         }
         if (UTFMODE) {
-               if (((c & 0xC0) == 0x80) && left) {
+               if (((rtt2asc(c) & 0xC0) == 0x80) && left) {
                         str[pos++] = c;
                         if (!--left) {
                                 str[pos] = '\0';
@@ -1382,7 +1378,7 @@ static int
  x_do_ins(const char *cp, size_t len)
  {
         if (xep + len >= xend) {
-               x_e_putc2(7);
+               x_e_putc2(KSH_BEL);
                 return (-1);
         }
         memmove(xcp + len, xcp, xep - xcp + 1);
@@ -1422,7 +1418,7 @@ x_del_back(int c MKSH_A_UNUSED)
         ssize_t i = 0;
  
         if (xcp == xbuf) {
-               x_e_putc2(7);
+               x_e_putc2(KSH_BEL);
                 return (KSTD);
         }
         do {
@@ -1448,7 +1444,7 @@ x_del_char(int c MKSH_A_UNUSED)
         }
  
         if (!i) {
-               x_e_putc2(7);
+               x_e_putc2(KSH_BEL);
                 return (KSTD);
         }
         x_delete(i, false);
@@ -1558,15 +1554,15 @@ x_bword(void)
         char *cp = xcp;
  
         if (cp == xbuf) {
-               x_e_putc2(7);
+               x_e_putc2(KSH_BEL);
                 return (0);
         }
         while (x_arg--) {
-               while (cp != xbuf && is_mfs(cp[-1])) {
+               while (cp != xbuf && ctype(cp[-1], C_MFS)) {
                         cp--;
                         nb++;
                 }
-               while (cp != xbuf && !is_mfs(cp[-1])) {
+               while (cp != xbuf && !ctype(cp[-1], C_MFS)) {
                         cp--;
                         nb++;
                 }
@@ -1582,13 +1578,13 @@ x_fword(bool move)
         char *cp = xcp;
  
         if (cp == xep) {
-               x_e_putc2(7);
+               x_e_putc2(KSH_BEL);
                 return (0);
         }
         while (x_arg--) {
-               while (cp != xep && is_mfs(*cp))
+               while (cp != xep && ctype(*cp, C_MFS))
                         cp++;
-               while (cp != xep && !is_mfs(*cp))
+               while (cp != xep && !ctype(*cp, C_MFS))
                         cp++;
         }
         nc = x_nb2nc(cp - xcp);
@@ -1621,7 +1617,7 @@ x_bs0(char *cp, char *lower_bound)
  {
         if (UTFMODE)
                 while ((!lower_bound || (cp > lower_bound)) &&
-                   ((*(unsigned char *)cp & 0xC0) == 0x80))
+                   ((rtt2asc(*cp) & 0xC0) == 0x80))
                         --cp;
         return (cp);
  }
@@ -1642,14 +1638,14 @@ x_size2(char *cp, char **dcp)
  {
         uint8_t c = *(unsigned char *)cp;
  
-       if (UTFMODE && (c > 0x7F))
+       if (UTFMODE && (rtt2asc(c) > 0x7F))
                 return (utf_widthadj(cp, (const char **)dcp));
         if (dcp)
                 *dcp = cp + 1;
         if (c == '\t')
                 /* Kludge, tabs are always four spaces. */
                 return (4);
-       if (ISCTRL(c) && /* but not C1 */ c < 0x80)
+       if (ksh_isctrl(c))
                 /* control unsigned char */
                 return (2);
         return (1);
@@ -1674,9 +1670,9 @@ x_zotc3(char **cp)
                 /* Kludge, tabs are always four spaces. */
                 x_e_puts(T4spaces);
                 (*cp)++;
-       } else if (ISCTRL(c) && /* but not C1 */ c < 0x80) {
+       } else if (ksh_isctrl(c)) {
                 x_e_putc2('^');
-               x_e_putc2(UNCTRL(c));
+               x_e_putc2(ksh_unctrl(c));
                 (*cp)++;
         } else
                 x_e_putc3((const char **)cp);
@@ -1686,7 +1682,7 @@ static int
  x_mv_back(int c MKSH_A_UNUSED)
  {
         if (xcp == xbuf) {
-               x_e_putc2(7);
+               x_e_putc2(KSH_BEL);
                 return (KSTD);
         }
         while (x_arg--) {
@@ -1703,7 +1699,7 @@ x_mv_forw(int c MKSH_A_UNUSED)
         char *cp = xcp, *cp2;
  
         if (xcp == xep) {
-               x_e_putc2(7);
+               x_e_putc2(KSH_BEL);
                 return (KSTD);
         }
         while (x_arg--) {
@@ -1724,13 +1720,13 @@ x_search_char_forw(int c MKSH_A_UNUSED)
  
         *xep = '\0';
         if (x_e_getmbc(tmp) < 0) {
-               x_e_putc2(7);
+               x_e_putc2(KSH_BEL);
                 return (KSTD);
         }
         while (x_arg--) {
                 if ((cp = (cp == xep) ? NULL : strstr(cp + 1, tmp)) == NULL &&
                     (cp = strstr(xbuf, tmp)) == NULL) {
-                       x_e_putc2(7);
+                       x_e_putc2(KSH_BEL);
                         return (KSTD);
                 }
         }
@@ -1745,7 +1741,7 @@ x_search_char_back(int c MKSH_A_UNUSED)
         bool b;
  
         if (x_e_getmbc(tmp) < 0) {
-               x_e_putc2(7);
+               x_e_putc2(KSH_BEL);
                 return (KSTD);
         }
         for (; x_arg--; cp = p)
@@ -1753,7 +1749,7 @@ x_search_char_back(int c MKSH_A_UNUSED)
                         if (p-- == xbuf)
                                 p = xep;
                         if (p == cp) {
-                               x_e_putc2(7);
+                               x_e_putc2(KSH_BEL);
                                 return (KSTD);
                         }
                         if ((tmp[1] && ((p+1) > xep)) ||
@@ -1789,7 +1785,7 @@ x_end_of_text(int c MKSH_A_UNUSED)
         unsigned char tmp[1], *cp = tmp;
  
         *tmp = isedchar(edchars.eof) ? (unsigned char)edchars.eof :
-           (unsigned char)CTRL('D');
+           (unsigned char)CTRL_D;
         x_zotc3((char **)&cp);
         x_putc('\r');
         x_putc('\n');
@@ -1849,7 +1845,7 @@ x_load_hist(char **hp)
                 sp = holdbufp;
                 modified = 0;
         } else if (hp < history || hp > histptr) {
-               x_e_putc2(7);
+               x_e_putc2(KSH_BEL);
                 return;
         }
         if (sp == NULL)
@@ -1859,7 +1855,7 @@ x_load_hist(char **hp)
                 strlcpy(holdbufp, xbuf, LINE);
         strlcpy(xbuf, sp, xend - xbuf);
         xbp = xbuf;
-       xep = xcp = xbuf + strlen(xbuf);
+       xep = xcp = strnul(xbuf);
         x_adjust();
         modified = 0;
  }
@@ -1904,13 +1900,13 @@ x_search_hist(int c)
                 if ((c = x_e_getc()) < 0)
                         return (KSTD);
                 f = x_tab[0][c];
-               if (c == CTRL('[')) {
+               if (c == CTRL_BO) {
                         if ((f & 0x7F) == XFUNC_meta1) {
                                 if ((c = x_e_getc()) < 0)
                                         return (KSTD);
                                 f = x_tab[1][c] & 0x7F;
                                 if (f == XFUNC_meta1 || f == XFUNC_meta2)
-                                       x_meta1(CTRL('['));
+                                       x_meta1(CTRL_BO);
                                 x_e_ungetc(c);
                         }
                         break;
@@ -1942,7 +1938,7 @@ x_search_hist(int c)
                         /* add char to pattern */
                         /* overflow check... */
                         if ((size_t)(p - pat) >= sizeof(pat) - 1) {
-                               x_e_putc2(7);
+                               x_e_putc2(KSH_BEL);
                                 continue;
                         }
                         *p++ = c, *p = '\0';
@@ -1988,7 +1984,7 @@ x_search(char *pat, int sameline, int offset)
                         return (i);
                 }
         }
-       x_e_putc2(7);
+       x_e_putc2(KSH_BEL);
         x_histp = histptr;
         return (-1);
  }
@@ -2094,7 +2090,7 @@ x_clrtoeol(int lastch, bool line_was_cleared)
         int col;
  
         if (lastch == ' ' && !line_was_cleared && x_term_mode == 1) {
-               shf_puts("\033[K", shl_out);
+               shf_puts(KSH_ESC_STRING "[K", shl_out);
                 line_was_cleared = true;
         }
         if (lastch == ' ' && line_was_cleared)
@@ -2168,11 +2164,11 @@ x_transpose(int c MKSH_A_UNUSED)
          * to the one they want.
          */
         if (xcp == xbuf) {
-               x_e_putc2(7);
+               x_e_putc2(KSH_BEL);
                 return (KSTD);
         } else if (xcp == xep || Flag(FGMACS)) {
                 if (xcp - xbuf == 1) {
-                       x_e_putc2(7);
+                       x_e_putc2(KSH_BEL);
                         return (KSTD);
                 }
                 /*
@@ -2181,12 +2177,12 @@ x_transpose(int c MKSH_A_UNUSED)
                  */
                 x_bs3(&xcp);
                 if (utf_mbtowc(&tmpa, xcp) == (size_t)-1) {
-                       x_e_putc2(7);
+                       x_e_putc2(KSH_BEL);
                         return (KSTD);
                 }
                 x_bs3(&xcp);
                 if (utf_mbtowc(&tmpb, xcp) == (size_t)-1) {
-                       x_e_putc2(7);
+                       x_e_putc2(KSH_BEL);
                         return (KSTD);
                 }
                 utf_wctomb(xcp, tmpa);
@@ -2199,12 +2195,12 @@ x_transpose(int c MKSH_A_UNUSED)
                  * cursor, move cursor position along one.
                  */
                 if (utf_mbtowc(&tmpa, xcp) == (size_t)-1) {
-                       x_e_putc2(7);
+                       x_e_putc2(KSH_BEL);
                         return (KSTD);
                 }
                 x_bs3(&xcp);
                 if (utf_mbtowc(&tmpb, xcp) == (size_t)-1) {
-                       x_e_putc2(7);
+                       x_e_putc2(KSH_BEL);
                         return (KSTD);
                 }
                 utf_wctomb(xcp, tmpa);
@@ -2313,21 +2309,35 @@ x_meta_yank(int c MKSH_A_UNUSED)
         return (KSTD);
  }
  
-static int
-x_abort(int c MKSH_A_UNUSED)
+/* fake receiving an interrupt */
+static void
+x_intr(int signo, int c)
  {
-       /* x_zotc(c); */
+       x_vi_zotc(c);
+       *xep = '\0';
+       strip_nuls(xbuf, xep - xbuf);
+       if (*xbuf)
+               histsave(&source->line, xbuf, HIST_STORE, true);
         xlp = xep = xcp = xbp = xbuf;
         xlp_valid = true;
         *xcp = 0;
         x_modified();
+       x_flush();
+       trapsig(signo);
+       x_mode(false);
+       unwind(LSHELL);
+}
+
+static int
+x_abort(int c MKSH_A_UNUSED)
+{
         return (KINTR);
  }
  
  static int
  x_error(int c MKSH_A_UNUSED)
  {
-       x_e_putc2(7);
+       x_e_putc2(KSH_BEL);
         return (KSTD);
  }
  
@@ -2387,19 +2397,18 @@ x_mapin(const char *cp, Area *ap)
         strdupx(news, cp, ap);
         op = news;
         while (*cp) {
-               /* XXX -- should handle \^ escape? */
-               if (*cp == '^') {
+               switch (*cp) {
+               case '^':
                         cp++;
-                       /*XXX or ^^ escape? this is ugly. */
-                       if (*cp >= '?')
-                               /* includes '?'; ASCII */
-                               *op++ = CTRL(*cp);
-                       else {
-                               *op++ = '^';
-                               cp--;
-                       }
-               } else
+                       *op++ = ksh_toctrl(*cp);
+                       break;
+               case '\\':
+                       if (cp[1] == '\\' || cp[1] == '^')
+                               ++cp;
+                       /* FALLTHROUGH */
+               default:
                         *op++ = *cp;
+               }
                 cp++;
         }
         *op = '\0';
@@ -2412,9 +2421,9 @@ x_mapout2(int c, char **buf)
  {
         char *p = *buf;
  
-       if (ISCTRL(c)) {
+       if (ksh_isctrl(c)) {
                 *p++ = '^';
-               *p++ = UNCTRL(c);
+               *p++ = ksh_unctrl(c);
         } else
                 *p++ = c;
         *p = 0;
@@ -2437,9 +2446,9 @@ x_print(int prefix, int key)
         int f = x_tab[prefix][key];
  
         if (prefix)
-               /* prefix == 1 || prefix == 2 */
-               shf_puts(x_mapout(prefix == 1 ? CTRL('[') :
-                   prefix == 2 ? CTRL('X') : 0), shl_stdout);
+               /* prefix == 1 || prefix == 2 || prefix == 3 */
+               shf_puts(x_mapout(prefix == 1 ? CTRL_BO :
+                   prefix == 2 ? CTRL_X : 0), shl_stdout);
  #ifdef MKSH_SMALL
         shprintf("%s = ", x_mapout(key));
  #else
@@ -2603,7 +2612,7 @@ x_kill_region(int c MKSH_A_UNUSED)
         char *xr;
  
         if (xmp == NULL) {
-               x_e_putc2(7);
+               x_e_putc2(KSH_BEL);
                 return (KSTD);
         }
         if (xmp > xcp) {
@@ -2625,7 +2634,7 @@ x_xchg_point_mark(int c MKSH_A_UNUSED)
         char *tmp;
  
         if (xmp == NULL) {
-               x_e_putc2(7);
+               x_e_putc2(KSH_BEL);
                 return (KSTD);
         }
         tmp = xmp;
@@ -2703,7 +2712,7 @@ x_expand(int c MKSH_A_UNUSED)
             &start, &end, &words);
  
         if (nwords == 0) {
-               x_e_putc2(7);
+               x_e_putc2(KSH_BEL);
                 return (KSTD);
         }
         x_goto(xbuf + start);
@@ -2713,7 +2722,7 @@ x_expand(int c MKSH_A_UNUSED)
         while (i < nwords) {
                 if (x_escape(words[i], strlen(words[i]), x_do_ins) < 0 ||
                     (++i < nwords && x_ins(T1space) < 0)) {
-                       x_e_putc2(7);
+                       x_e_putc2(KSH_BEL);
                         return (KSTD);
                 }
         }
@@ -2737,7 +2746,7 @@ do_complete(
             &start, &end, &words);
         /* no match */
         if (nwords == 0) {
-               x_e_putc2(7);
+               x_e_putc2(KSH_BEL);
                 return;
         }
         if (type == CT_LIST) {
@@ -2894,9 +2903,10 @@ x_e_putc2(int c)
  {
         int width = 1;
  
-       if (c == '\r' || c == '\n')
+       if (ctype(c, C_CR | C_LF))
                 x_col = 0;
         if (x_col < xx_cols) {
+#ifndef MKSH_EBCDIC
                 if (UTFMODE && (c > 0x7F)) {
                         char utf_tmp[3];
                         size_t x;
@@ -2911,9 +2921,10 @@ x_e_putc2(int c)
                                 x_putc(utf_tmp[2]);
                         width = utf_wcwidth(c);
                 } else
+#endif
                         x_putc(c);
                 switch (c) {
-               case 7:
+               case KSH_BEL:
                         break;
                 case '\r':
                 case '\n':
@@ -2935,7 +2946,7 @@ x_e_putc3(const char **cp)
  {
         int width = 1, c = **(const unsigned char **)cp;
  
-       if (c == '\r' || c == '\n')
+       if (ctype(c, C_CR | C_LF))
                 x_col = 0;
         if (x_col < xx_cols) {
                 if (UTFMODE && (c > 0x7F)) {
@@ -2944,7 +2955,13 @@ x_e_putc3(const char **cp)
                         width = utf_widthadj(*cp, (const char **)&cp2);
                         if (cp2 == *cp + 1) {
                                 (*cp)++;
+#ifdef MKSH_EBCDIC
+                               x_putc(asc2rtt(0xEF));
+                               x_putc(asc2rtt(0xBF));
+                               x_putc(asc2rtt(0xBD));
+#else
                                 shf_puts("\xEF\xBF\xBD", shl_out);
+#endif
                         } else
                                 while (*cp < cp2)
                                         x_putcf(*(*cp)++);
@@ -2953,7 +2970,7 @@ x_e_putc3(const char **cp)
                         x_putc(c);
                 }
                 switch (c) {
-               case 7:
+               case KSH_BEL:
                         break;
                 case '\r':
                 case '\n':
@@ -2997,7 +3014,7 @@ x_set_arg(int c)
  
         /* strip command prefix */
         c &= 255;
-       while (c >= 0 && ksh_isdigit(c)) {
+       while (c >= 0 && ctype(c, C_DIGIT)) {
                 n = n * 10 + ksh_numdig(c);
                 if (n > LINE)
                         /* upper bound for repeat */
@@ -3007,7 +3024,7 @@ x_set_arg(int c)
         }
         if (c < 0 || first) {
   x_set_arg_too_big:
-               x_e_putc2(7);
+               x_e_putc2(KSH_BEL);
                 x_arg = 1;
                 x_arg_defaulted = true;
         } else {
@@ -3026,7 +3043,7 @@ x_comment(int c MKSH_A_UNUSED)
         int ret = x_do_comment(xbuf, xend - xbuf, &len);
  
         if (ret < 0)
-               x_e_putc2(7);
+               x_e_putc2(KSH_BEL);
         else {
                 x_modified();
                 xep = xbuf + len;
@@ -3049,7 +3066,7 @@ x_version(int c MKSH_A_UNUSED)
         strdupx(v, KSH_VERSION, ATEMP);
  
         xbuf = xbp = xcp = v;
-       xend = xep = v + strlen(v);
+       xend = xep = strnul(v);
         x_redraw('\r');
         x_flush();
  
@@ -3077,7 +3094,7 @@ x_edit_line(int c MKSH_A_UNUSED)
  {
         if (x_arg_defaulted) {
                 if (xep == xbuf) {
-                       x_e_putc2(7);
+                       x_e_putc2(KSH_BEL);
                         return (KSTD);
                 }
                 if (modified) {
@@ -3092,7 +3109,7 @@ x_edit_line(int c MKSH_A_UNUSED)
                     "fc -e ${VISUAL:-${EDITOR:-vi}} --", x_arg);
         else
                 strlcpy(xbuf, "fc -e ${VISUAL:-${EDITOR:-vi}} --", xend - xbuf);
-       xep = xbuf + strlen(xbuf);
+       xep = strnul(xbuf);
         return (x_newline('\n'));
  }
  #endif
@@ -3132,7 +3149,7 @@ x_prev_histword(int c MKSH_A_UNUSED)
                 last_arg = x_arg_defaulted ? -1 : x_arg;
         xhp = histptr - (m - 1);
         if ((xhp < history) || !(cp = *xhp)) {
-               x_e_putc2(7);
+               x_e_putc2(KSH_BEL);
                 x_modified();
                 return (KSTD);
         }
@@ -3144,11 +3161,11 @@ x_prev_histword(int c MKSH_A_UNUSED)
                 /*
                  * ignore white-space after the last word
                  */
-               while (rcp > cp && is_cfs(*rcp))
+               while (rcp > cp && ctype(*rcp, C_CFS))
                         rcp--;
-               while (rcp > cp && !is_cfs(*rcp))
+               while (rcp > cp && !ctype(*rcp, C_CFS))
                         rcp--;
-               if (is_cfs(*rcp))
+               if (ctype(*rcp, C_CFS))
                         rcp++;
                 x_ins(rcp);
         } else {
@@ -3159,16 +3176,16 @@ x_prev_histword(int c MKSH_A_UNUSED)
                 /*
                  * ignore white-space at start of line
                  */
-               while (*rcp && is_cfs(*rcp))
+               while (*rcp && ctype(*rcp, C_CFS))
                         rcp++;
                 while (x_arg-- > 0) {
-                       while (*rcp && !is_cfs(*rcp))
+                       while (*rcp && !ctype(*rcp, C_CFS))
                                 rcp++;
-                       while (*rcp && is_cfs(*rcp))
+                       while (*rcp && ctype(*rcp, C_CFS))
                                 rcp++;
                 }
                 cp = rcp;
-               while (*rcp && !is_cfs(*rcp))
+               while (*rcp && !ctype(*rcp, C_CFS))
                         rcp++;
                 ch = *rcp;
                 *rcp = '\0';
@@ -3220,14 +3237,14 @@ x_fold_case(int c)
         char *cp = xcp;
  
         if (cp == xep) {
-               x_e_putc2(7);
+               x_e_putc2(KSH_BEL);
                 return (KSTD);
         }
         while (x_arg--) {
                 /*
                  * first skip over any white-space
                  */
-               while (cp != xep && is_mfs(*cp))
+               while (cp != xep && ctype(*cp, C_MFS))
                         cp++;
                 /*
                  * do the first char on its own since it may be
@@ -3245,7 +3262,7 @@ x_fold_case(int c)
                 /*
                  * now for the rest of the word
                  */
-               while (cp != xep && !is_mfs(*cp)) {
+               while (cp != xep && !ctype(*cp, C_MFS)) {
                         if (c == 'U')
                                 /* uppercase */
                                 *cp = ksh_toupper(*cp);
@@ -3324,17 +3341,17 @@ x_mode(bool onoff)
  #endif
  
                 if (!edchars.erase)
-                       edchars.erase = CTRL('H');
+                       edchars.erase = CTRL_H;
                 if (!edchars.kill)
-                       edchars.kill = CTRL('U');
+                       edchars.kill = CTRL_U;
                 if (!edchars.intr)
-                       edchars.intr = CTRL('C');
+                       edchars.intr = CTRL_C;
                 if (!edchars.quit)
-                       edchars.quit = CTRL('\\');
+                       edchars.quit = CTRL_BK;
                 if (!edchars.eof)
-                       edchars.eof = CTRL('D');
+                       edchars.eof = CTRL_D;
                 if (!edchars.werase)
-                       edchars.werase = CTRL('W');
+                       edchars.werase = CTRL_W;
  
                 if (isedchar(edchars.erase)) {
                         bind_if_not_bound(0, edchars.erase, XFUNC_del_back);
@@ -3368,6 +3385,7 @@ static int nextstate(int);
  static int vi_insert(int);
  static int vi_cmd(int, const char *);
  static int domove(int, const char *, int);
+static int domovebeg(void);
  static int redo_insert(int);
  static void yank_range(int, int);
  static int bracktype(int);
@@ -3394,12 +3412,10 @@ static void ed_mov_opt(int, char *);
  static int expand_word(int);
  static int complete_word(int, int);
  static int print_expansions(struct edstate *, int);
-#define char_len(c)    ((ISCTRL((unsigned char)c) && \
-                       /* but not C1 */ (unsigned char)c < 0x80) ? 2 : 1)
-static void x_vi_zotc(int);
  static void vi_error(void);
  static void vi_macro_reset(void);
  static int x_vi_putbuf(const char *, size_t);
+#define char_len(c) (ksh_isctrl(c) ? 2 : 1)
  
  #define vC     0x01            /* a valid command that isn't a vM, vE, vU */
  #define vM     0x02            /* movement command (h, l, etc.) */
@@ -3410,14 +3426,14 @@ static int x_vi_putbuf(const char *, size_t);
  #define vZ     0x40            /* repeat count defaults to 0 (not 1) */
  #define vS     0x80            /* search (/, ?) */
  
-#define is_bad(c)      (classify[(c)&0x7f]&vB)
-#define is_cmd(c)      (classify[(c)&0x7f]&(vM|vE|vC|vU))
-#define is_move(c)     (classify[(c)&0x7f]&vM)
-#define is_extend(c)   (classify[(c)&0x7f]&vE)
-#define is_long(c)     (classify[(c)&0x7f]&vX)
-#define is_undoable(c) (!(classify[(c)&0x7f]&vU))
-#define is_srch(c)     (classify[(c)&0x7f]&vS)
-#define is_zerocount(c)        (classify[(c)&0x7f]&vZ)
+#define is_bad(c)      (classify[rtt2asc(c) & 0x7F] & vB)
+#define is_cmd(c)      (classify[rtt2asc(c) & 0x7F] & (vM | vE | vC | vU))
+#define is_move(c)     (classify[rtt2asc(c) & 0x7F] & vM)
+#define is_extend(c)   (classify[rtt2asc(c) & 0x7F] & vE)
+#define is_long(c)     (classify[rtt2asc(c) & 0x7F] & vX)
+#define is_undoable(c) (!(classify[rtt2asc(c) & 0x7F] & vU))
+#define is_srch(c)     (classify[rtt2asc(c) & 0x7F] & vS)
+#define is_zerocount(c)        (classify[rtt2asc(c) & 0x7F] & vZ)
  
  static const unsigned char classify[128] = {
  /*      0      1       2       3       4       5       6       7       */
@@ -3587,13 +3603,14 @@ x_vi(char *buf)
                 if (state != VLIT) {
                         if (isched(c, edchars.intr) ||
                             isched(c, edchars.quit)) {
+                               /* shove input buffer away */
+                               xbuf = ebuf.cbuf;
+                               xep = xbuf;
+                               if (ebuf.linelen > 0)
+                                       xep += ebuf.linelen;
                                 /* pretend we got an interrupt */
-                               x_vi_zotc(c);
-                               x_flush();
-                               trapsig(isched(c, edchars.intr) ?
-                                   SIGINT : SIGQUIT);
-                               x_mode(false);
-                               unwind(LSHELL);
+                               x_intr(isched(c, edchars.intr) ?
+                                   SIGINT : SIGQUIT, c);
                         } else if (isched(c, edchars.eof) &&
                             state != VVERSION) {
                                 if (vs->linelen == 0) {
@@ -3646,7 +3663,7 @@ vi_hook(int ch)
                 default: ch = 0; goto vi_insert_failed;
                 }
                 if (insert != 0) {
-                       if (ch == CTRL('v')) {
+                       if (ch == CTRL_V) {
                                 state = VLIT;
                                 ch = '^';
                         }
@@ -3667,11 +3684,11 @@ vi_hook(int ch)
                                 return (1);
                         }
                 } else {
-                       if (ch == '\r' || ch == '\n')
+                       if (ctype(ch, C_CR | C_LF))
                                 return (1);
                         cmdlen = 0;
                         argc1 = 0;
-                       if (ch >= ord('1') && ch <= ord('9')) {
+                       if (ctype(ch, C_DIGIT) && ord(ch) != ord('0')) {
                                 argc1 = ksh_numdig(ch);
                                 state = VARG1;
                         } else {
@@ -3716,7 +3733,7 @@ vi_hook(int ch)
                 break;
  
         case VARG1:
-               if (ksh_isdigit(ch))
+               if (ctype(ch, C_DIGIT))
                         argc1 = argc1 * 10 + ksh_numdig(ch);
                 else {
                         curcmd[cmdlen++] = ch;
@@ -3726,7 +3743,7 @@ vi_hook(int ch)
  
         case VEXTCMD:
                 argc2 = 0;
-               if (ch >= ord('1') && ch <= ord('9')) {
+               if (ctype(ch, C_DIGIT) && ord(ch) != ord('0')) {
                         argc2 = ksh_numdig(ch);
                         state = VARG2;
                         return (0);
@@ -3742,7 +3759,7 @@ vi_hook(int ch)
                 break;
  
         case VARG2:
-               if (ksh_isdigit(ch))
+               if (ctype(ch, C_DIGIT))
                         argc2 = argc2 * 10 + ksh_numdig(ch);
                 else {
                         if (argc1 == 0)
@@ -3760,7 +3777,7 @@ vi_hook(int ch)
                 break;
  
         case VXCH:
-               if (ch == CTRL('['))
+               if (ch == CTRL_BO)
                         state = VNORMAL;
                 else {
                         curcmd[cmdlen++] = ch;
@@ -3769,7 +3786,7 @@ vi_hook(int ch)
                 break;
  
         case VSEARCH:
-               if (ch == '\r' || ch == '\n' /*|| ch == CTRL('[')*/ ) {
+               if (ctype(ch, C_CR | C_LF) /* || ch == CTRL_BO */ ) {
                         restore_cbuf();
                         /* Repeat last search? */
                         if (srchlen == 0) {
@@ -3784,7 +3801,7 @@ vi_hook(int ch)
                                 memcpy(srchpat, locpat, srchlen + 1);
                         }
                         state = VCMD;
-               } else if (isched(ch, edchars.erase) || ch == CTRL('h')) {
+               } else if (isched(ch, edchars.erase) || ch == CTRL_H) {
                         if (srchlen != 0) {
                                 srchlen--;
                                 vs->linelen -= char_len(locpat[srchlen]);
@@ -3825,12 +3842,12 @@ vi_hook(int ch)
                                 vi_error();
                         else {
                                 locpat[srchlen++] = ch;
-                               if (ISCTRL(ch) && /* but not C1 */ ch < 0x80) {
+                               if (ksh_isctrl(ch)) {
                                         if ((size_t)vs->linelen + 2 >
                                             (size_t)vs->cbufsize)
                                                 vi_error();
                                         vs->cbuf[vs->linelen++] = '^';
-                                       vs->cbuf[vs->linelen++] = UNCTRL(ch);
+                                       vs->cbuf[vs->linelen++] = ksh_unctrl(ch);
                                 } else {
                                         if (vs->linelen >= vs->cbufsize)
                                                 vi_error();
@@ -3903,8 +3920,8 @@ vi_hook(int ch)
                         break;
                 case 0:
                         if (insert != 0) {
-                               if (lastcmd[0] == 's' || lastcmd[0] == 'c' ||
-                                   lastcmd[0] == 'C') {
+                               if (lastcmd[0] == 's' ||
+                                   ksh_eq(lastcmd[0], 'C', 'c')) {
                                         if (redo_insert(1) != 0)
                                                 vi_error();
                                 } else {
@@ -3942,7 +3959,7 @@ nextstate(int ch)
                 return (VXCH);
         else if (ch == '.')
                 return (VREDO);
-       else if (ch == CTRL('v'))
+       else if (ch == CTRL_V)
                 return (VVERSION);
         else if (is_cmd(ch))
                 return (VCMD);
@@ -3955,7 +3972,7 @@ vi_insert(int ch)
  {
         int tcursor;
  
-       if (isched(ch, edchars.erase) || ch == CTRL('h')) {
+       if (isched(ch, edchars.erase) || ch == CTRL_H) {
                 if (insert == REPLACE) {
                         if (vs->cursor == undo->cursor) {
                                 vi_error();
@@ -4012,7 +4029,7 @@ vi_insert(int ch)
          * buffer (if user inserts & deletes char, ibuf gets trashed and
          * we don't want to use it)
          */
-       if (first_insert && ch != CTRL('['))
+       if (first_insert && ch != CTRL_BO)
                 saved_inslen = 0;
         switch (ch) {
         case '\0':
@@ -4022,7 +4039,7 @@ vi_insert(int ch)
         case '\n':
                 return (1);
  
-       case CTRL('['):
+       case CTRL_BO:
                 expanded = NONE;
                 if (first_insert) {
                         first_insert = false;
@@ -4033,26 +4050,25 @@ vi_insert(int ch)
                         lastcmd[0] = 'a';
                         lastac = 1;
                 }
-               if (lastcmd[0] == 's' || lastcmd[0] == 'c' ||
-                   lastcmd[0] == 'C')
+               if (lastcmd[0] == 's' || ksh_eq(lastcmd[0], 'C', 'c'))
                         return (redo_insert(0));
                 else
                         return (redo_insert(lastac - 1));
  
         /* { start nonstandard vi commands */
-       case CTRL('x'):
+       case CTRL_X:
                 expand_word(0);
                 break;
  
-       case CTRL('f'):
+       case CTRL_F:
                 complete_word(0, 0);
                 break;
  
-       case CTRL('e'):
+       case CTRL_E:
                 print_expansions(vs, 0);
                 break;
  
-       case CTRL('i'):
+       case CTRL_I:
                 if (Flag(FVITABCOMPLETE)) {
                         complete_word(0, 0);
                         break;
@@ -4105,14 +4121,14 @@ vi_cmd(int argcnt, const char *cmd)
                         lastac = argcnt;
                         memmove(lastcmd, cmd, MAXVICMD);
                 }
-               switch (*cmd) {
+               switch (ord(*cmd)) {
  
-               case CTRL('l'):
-               case CTRL('r'):
+               case CTRL_L:
+               case CTRL_R:
                         redraw_line(true);
                         break;
  
-               case '@':
+               case ord('@'):
                         {
                                 static char alias[] = "_\0";
                                 struct tbl *ap;
@@ -4153,7 +4169,7 @@ vi_cmd(int argcnt, const char *cmd)
                         }
                         break;
  
-               case 'a':
+               case ord('a'):
                         modified = 1;
                         hnum = hlast;
                         if (vs->linelen != 0)
@@ -4161,7 +4177,7 @@ vi_cmd(int argcnt, const char *cmd)
                         insert = INSERT;
                         break;
  
-               case 'A':
+               case ord('A'):
                         modified = 1;
                         hnum = hlast;
                         del_range(0, 0);
@@ -4169,36 +4185,35 @@ vi_cmd(int argcnt, const char *cmd)
                         insert = INSERT;
                         break;
  
-               case 'S':
-                       vs->cursor = domove(1, "^", 1);
+               case ord('S'):
+                       vs->cursor = domovebeg();
                         del_range(vs->cursor, vs->linelen);
                         modified = 1;
                         hnum = hlast;
                         insert = INSERT;
                         break;
  
-               case 'Y':
+               case ord('Y'):
                         cmd = "y$";
                         /* ahhhhhh... */
  
                         /* FALLTHROUGH */
-               case 'c':
-               case 'd':
-               case 'y':
+               case ord('c'):
+               case ord('d'):
+               case ord('y'):
                         if (*cmd == cmd[1]) {
-                               c1 = *cmd == 'c' ? domove(1, "^", 1) : 0;
+                               c1 = *cmd == 'c' ? domovebeg() : 0;
                                 c2 = vs->linelen;
                         } else if (!is_move(cmd[1]))
                                 return (-1);
                         else {
                                 if ((ncursor = domove(argcnt, &cmd[1], 1)) < 0)
                                         return (-1);
-                               if (*cmd == 'c' &&
-                                   (cmd[1] == 'w' || cmd[1] == 'W') &&
-                                   !ksh_isspace(vs->cbuf[vs->cursor])) {
+                               if (*cmd == 'c' && ksh_eq(cmd[1], 'W', 'w') &&
+                                   !ctype(vs->cbuf[vs->cursor], C_SPACE)) {
                                         do {
                                                 --ncursor;
-                                       } while (ksh_isspace(vs->cbuf[ncursor]));
+                                       } while (ctype(vs->cbuf[ncursor], C_SPACE));
                                         ncursor++;
                                 }
                                 if (ncursor > vs->cursor) {
@@ -4224,7 +4239,7 @@ vi_cmd(int argcnt, const char *cmd)
                         }
                         break;
  
-               case 'p':
+               case ord('p'):
                         modified = 1;
                         hnum = hlast;
                         if (vs->linelen != 0)
@@ -4238,7 +4253,7 @@ vi_cmd(int argcnt, const char *cmd)
                                 return (-1);
                         break;
  
-               case 'P':
+               case ord('P'):
                         modified = 1;
                         hnum = hlast;
                         any = 0;
@@ -4251,25 +4266,25 @@ vi_cmd(int argcnt, const char *cmd)
                                 return (-1);
                         break;
  
-               case 'C':
+               case ord('C'):
                         modified = 1;
                         hnum = hlast;
                         del_range(vs->cursor, vs->linelen);
                         insert = INSERT;
                         break;
  
-               case 'D':
+               case ord('D'):
                         yank_range(vs->cursor, vs->linelen);
                         del_range(vs->cursor, vs->linelen);
                         if (vs->cursor != 0)
                                 vs->cursor--;
                         break;
  
-               case 'g':
+               case ord('g'):
                         if (!argcnt)
                                 argcnt = hlast;
                         /* FALLTHROUGH */
-               case 'G':
+               case ord('G'):
                         if (!argcnt)
                                 argcnt = 1;
                         else
@@ -4282,22 +4297,22 @@ vi_cmd(int argcnt, const char *cmd)
                         }
                         break;
  
-               case 'i':
+               case ord('i'):
                         modified = 1;
                         hnum = hlast;
                         insert = INSERT;
                         break;
  
-               case 'I':
+               case ord('I'):
                         modified = 1;
                         hnum = hlast;
-                       vs->cursor = domove(1, "^", 1);
+                       vs->cursor = domovebeg();
                         insert = INSERT;
                         break;
  
-               case 'j':
-               case '+':
-               case CTRL('n'):
+               case ord('j'):
+               case ord('+'):
+               case CTRL_N:
                         if (grabhist(modified, hnum + argcnt) < 0)
                                 return (-1);
                         else {
@@ -4306,9 +4321,9 @@ vi_cmd(int argcnt, const char *cmd)
                         }
                         break;
  
-               case 'k':
-               case '-':
-               case CTRL('p'):
+               case ord('k'):
+               case ord('-'):
+               case CTRL_P:
                         if (grabhist(modified, hnum - argcnt) < 0)
                                 return (-1);
                         else {
@@ -4317,7 +4332,7 @@ vi_cmd(int argcnt, const char *cmd)
                         }
                         break;
  
-               case 'r':
+               case ord('r'):
                         if (vs->linelen == 0)
                                 return (-1);
                         modified = 1;
@@ -4335,13 +4350,13 @@ vi_cmd(int argcnt, const char *cmd)
                         }
                         break;
  
-               case 'R':
+               case ord('R'):
                         modified = 1;
                         hnum = hlast;
                         insert = REPLACE;
                         break;
  
-               case 's':
+               case ord('s'):
                         if (vs->linelen == 0)
                                 return (-1);
                         modified = 1;
@@ -4352,7 +4367,7 @@ vi_cmd(int argcnt, const char *cmd)
                         insert = INSERT;
                         break;
  
-               case 'v':
+               case ord('v'):
                         if (!argcnt) {
                                 if (vs->linelen == 0)
                                         return (-1);
@@ -4375,7 +4390,7 @@ vi_cmd(int argcnt, const char *cmd)
                         vs->linelen = strlen(vs->cbuf);
                         return (2);
  
-               case 'x':
+               case ord('x'):
                         if (vs->linelen == 0)
                                 return (-1);
                         modified = 1;
@@ -4386,7 +4401,7 @@ vi_cmd(int argcnt, const char *cmd)
                         del_range(vs->cursor, vs->cursor + argcnt);
                         break;
  
-               case 'X':
+               case ord('X'):
                         if (vs->cursor > 0) {
                                 modified = 1;
                                 hnum = hlast;
@@ -4399,13 +4414,13 @@ vi_cmd(int argcnt, const char *cmd)
                                 return (-1);
                         break;
  
-               case 'u':
+               case ord('u'):
                         t = vs;
                         vs = undo;
                         undo = t;
                         break;
  
-               case 'U':
+               case ord('U'):
                         if (!modified)
                                 return (-1);
                         if (grabhist(modified, ohnum) < 0)
@@ -4414,19 +4429,19 @@ vi_cmd(int argcnt, const char *cmd)
                         hnum = ohnum;
                         break;
  
-               case '?':
+               case ord('?'):
                         if (hnum == hlast)
                                 hnum = -1;
                         /* ahhh */
  
                         /* FALLTHROUGH */
-               case '/':
+               case ord('/'):
                         c3 = 1;
                         srchlen = 0;
                         lastsearch = *cmd;
                         /* FALLTHROUGH */
-               case 'n':
-               case 'N':
+               case ord('n'):
+               case ord('N'):
                         if (lastsearch == ' ')
                                 return (-1);
                         if (lastsearch == '?')
@@ -4453,7 +4468,7 @@ vi_cmd(int argcnt, const char *cmd)
                                 return (0);
                         }
                         break;
-               case '_':
+               case ord('_'):
                         {
                                 bool inspace;
                                 char *p, *sp;
@@ -4461,14 +4476,13 @@ vi_cmd(int argcnt, const char *cmd)
                                 if (histnum(-1) < 0)
                                         return (-1);
                                 p = *histpos();
-#define issp(c)                (ksh_isspace(c) || (c) == '\n')
                                 if (argcnt) {
-                                       while (*p && issp(*p))
+                                       while (ctype(*p, C_SPACE))
                                                 p++;
                                         while (*p && --argcnt) {
-                                               while (*p && !issp(*p))
+                                               while (*p && !ctype(*p, C_SPACE))
                                                         p++;
-                                               while (*p && issp(*p))
+                                               while (ctype(*p, C_SPACE))
                                                         p++;
                                         }
                                         if (!*p)
@@ -4478,7 +4492,7 @@ vi_cmd(int argcnt, const char *cmd)
                                         sp = p;
                                         inspace = false;
                                         while (*p) {
-                                               if (issp(*p))
+                                               if (ctype(*p, C_SPACE))
                                                         inspace = true;
                                                 else if (inspace) {
                                                         inspace = false;
@@ -4492,7 +4506,7 @@ vi_cmd(int argcnt, const char *cmd)
                                 hnum = hlast;
                                 if (vs->cursor != vs->linelen)
                                         vs->cursor++;
-                               while (*p && !issp(*p)) {
+                               while (*p && !ctype(*p, C_SPACE)) {
                                         argcnt++;
                                         p++;
                                 }
@@ -4506,7 +4520,7 @@ vi_cmd(int argcnt, const char *cmd)
                         }
                         break;
  
-               case '~':
+               case ord('~'):
                         {
                                 char *p;
                                 int i;
@@ -4515,11 +4529,11 @@ vi_cmd(int argcnt, const char *cmd)
                                         return (-1);
                                 for (i = 0; i < argcnt; i++) {
                                         p = &vs->cbuf[vs->cursor];
-                                       if (ksh_islower(*p)) {
+                                       if (ctype(*p, C_LOWER)) {
                                                 modified = 1;
                                                 hnum = hlast;
                                                 *p = ksh_toupper(*p);
-                                       } else if (ksh_isupper(*p)) {
+                                       } else if (ctype(*p, C_UPPER)) {
                                                 modified = 1;
                                                 hnum = hlast;
                                                 *p = ksh_tolower(*p);
@@ -4530,7 +4544,7 @@ vi_cmd(int argcnt, const char *cmd)
                                 break;
                         }
  
-               case '#':
+               case ord('#'):
                         {
                                 int ret = x_do_comment(vs->cbuf, vs->cbufsize,
                                     &vs->linelen);
@@ -4540,44 +4554,44 @@ vi_cmd(int argcnt, const char *cmd)
                         }
  
                 /* AT&T ksh */
-               case '=':
+               case ord('='):
                 /* Nonstandard vi/ksh */
-               case CTRL('e'):
+               case CTRL_E:
                         print_expansions(vs, 1);
                         break;
  
  
                 /* Nonstandard vi/ksh */
-               case CTRL('i'):
+               case CTRL_I:
                         if (!Flag(FVITABCOMPLETE))
                                 return (-1);
                         complete_word(1, argcnt);
                         break;
  
                 /* some annoying AT&T kshs */
-               case CTRL('['):
+               case CTRL_BO:
                         if (!Flag(FVIESCCOMPLETE))
                                 return (-1);
                         /* FALLTHROUGH */
                 /* AT&T ksh */
-               case '\\':
+               case ord('\\'):
                 /* Nonstandard vi/ksh */
-               case CTRL('f'):
+               case CTRL_F:
                         complete_word(1, argcnt);
                         break;
  
  
                 /* AT&T ksh */
-               case '*':
+               case ord('*'):
                 /* Nonstandard vi/ksh */
-               case CTRL('x'):
+               case CTRL_X:
                         expand_word(1);
                         break;
  
  
                 /* mksh: cursor movement */
-               case '[':
-               case 'O':
+               case ord('['):
+               case ord('O'):
                         state = VPREFIX2;
                         if (vs->linelen != 0)
                                 vs->cursor++;
@@ -4596,20 +4610,20 @@ domove(int argcnt, const char *cmd, int sub)
         int ncursor = 0, i = 0, t;
         unsigned int bcount;
  
-       switch (*cmd) {
-       case 'b':
+       switch (ord(*cmd)) {
+       case ord('b'):
                 if (!sub && vs->cursor == 0)
                         return (-1);
                 ncursor = backword(argcnt);
                 break;
  
-       case 'B':
+       case ord('B'):
                 if (!sub && vs->cursor == 0)
                         return (-1);
                 ncursor = Backword(argcnt);
                 break;
  
-       case 'e':
+       case ord('e'):
                 if (!sub && vs->cursor + 1 >= vs->linelen)
                         return (-1);
                 ncursor = endword(argcnt);
@@ -4617,7 +4631,7 @@ domove(int argcnt, const char *cmd, int sub)
                         ncursor++;
                 break;
  
-       case 'E':
+       case ord('E'):
                 if (!sub && vs->cursor + 1 >= vs->linelen)
                         return (-1);
                 ncursor = Endword(argcnt);
@@ -4625,18 +4639,18 @@ domove(int argcnt, const char *cmd, int sub)
                         ncursor++;
                 break;
  
-       case 'f':
-       case 'F':
-       case 't':
-       case 'T':
+       case ord('f'):
+       case ord('F'):
+       case ord('t'):
+       case ord('T'):
                 fsavecmd = *cmd;
                 fsavech = cmd[1];
                 /* FALLTHROUGH */
-       case ',':
-       case ';':
+       case ord(','):
+       case ord(';'):
                 if (fsavecmd == ' ')
                         return (-1);
-               i = fsavecmd == 'f' || fsavecmd == 'F';
+               i = ksh_eq(fsavecmd, 'F', 'f');
                 t = fsavecmd > 'a';
                 if (*cmd == ',')
                         t = !t;
@@ -4647,8 +4661,8 @@ domove(int argcnt, const char *cmd, int sub)
                         ncursor++;
                 break;
  
-       case 'h':
-       case CTRL('h'):
+       case ord('h'):
+       case CTRL_H:
                 if (!sub && vs->cursor == 0)
                         return (-1);
                 ncursor = vs->cursor - argcnt;
@@ -4656,8 +4670,8 @@ domove(int argcnt, const char *cmd, int sub)
                         ncursor = 0;
                 break;
  
-       case ' ':
-       case 'l':
+       case ord(' '):
+       case ord('l'):
                 if (!sub && vs->cursor + 1 >= vs->linelen)
                         return (-1);
                 if (vs->linelen != 0) {
@@ -4667,30 +4681,27 @@ domove(int argcnt, const char *cmd, int sub)
                 }
                 break;
  
-       case 'w':
+       case ord('w'):
                 if (!sub && vs->cursor + 1 >= vs->linelen)
                         return (-1);
                 ncursor = forwword(argcnt);
                 break;
  
-       case 'W':
+       case ord('W'):
                 if (!sub && vs->cursor + 1 >= vs->linelen)
                         return (-1);
                 ncursor = Forwword(argcnt);
                 break;
  
-       case '0':
+       case ord('0'):
                 ncursor = 0;
                 break;
  
-       case '^':
-               ncursor = 0;
-               while (ncursor < vs->linelen - 1 &&
-                   ksh_isspace(vs->cbuf[ncursor]))
-                       ncursor++;
+       case ord('^'):
+               ncursor = domovebeg();
                 break;
  
-       case '|':
+       case ord('|'):
                 ncursor = argcnt;
                 if (ncursor > vs->linelen)
                         ncursor = vs->linelen;
@@ -4698,14 +4709,14 @@ domove(int argcnt, const char *cmd, int sub)
                         ncursor--;
                 break;
  
-       case '$':
+       case ord('$'):
                 if (vs->linelen != 0)
                         ncursor = vs->linelen;
                 else
                         ncursor = 0;
                 break;
  
-       case '%':
+       case ord('%'):
                 ncursor = vs->cursor;
                 while (ncursor < vs->linelen &&
                     (i = bracktype(vs->cbuf[ncursor])) == 0)
@@ -4738,6 +4749,17 @@ domove(int argcnt, const char *cmd, int sub)
  }
  
  static int
+domovebeg(void)
+{
+       int ncursor = 0;
+
+       while (ncursor < vs->linelen - 1 &&
+           ctype(vs->cbuf[ncursor], C_SPACE))
+               ncursor++;
+       return (ncursor);
+}
+
+static int
  redo_insert(int count)
  {
         while (count-- > 0)
@@ -4760,24 +4782,24 @@ yank_range(int a, int b)
  static int
  bracktype(int ch)
  {
-       switch (ch) {
+       switch (ord(ch)) {
  
-       case '(':
+       case ord('('):
                 return (1);
  
-       case '[':
+       case ord('['):
                 return (2);
  
-       case '{':
+       case ord('{'):
                 return (3);
  
-       case ')':
+       case ord(')'):
                 return (-1);
  
-       case ']':
+       case ord(']'):
                 return (-2);
  
-       case '}':
+       case ord('}'):
                 return (-3);
  
         default:
@@ -4912,17 +4934,16 @@ forwword(int argcnt)
  
         ncursor = vs->cursor;
         while (ncursor < vs->linelen && argcnt--) {
-               if (ksh_isalnux(vs->cbuf[ncursor]))
+               if (ctype(vs->cbuf[ncursor], C_ALNUX))
                         while (ncursor < vs->linelen &&
-                           ksh_isalnux(vs->cbuf[ncursor]))
+                           ctype(vs->cbuf[ncursor], C_ALNUX))
                                 ncursor++;
-               else if (!ksh_isspace(vs->cbuf[ncursor]))
+               else if (!ctype(vs->cbuf[ncursor], C_SPACE))
                         while (ncursor < vs->linelen &&
-                           !ksh_isalnux(vs->cbuf[ncursor]) &&
-                           !ksh_isspace(vs->cbuf[ncursor]))
+                           !ctype(vs->cbuf[ncursor], C_ALNUX | C_SPACE))
                                 ncursor++;
                 while (ncursor < vs->linelen &&
-                   ksh_isspace(vs->cbuf[ncursor]))
+                   ctype(vs->cbuf[ncursor], C_SPACE))
                         ncursor++;
         }
         return (ncursor);
@@ -4935,17 +4956,16 @@ backword(int argcnt)
  
         ncursor = vs->cursor;
         while (ncursor > 0 && argcnt--) {
-               while (--ncursor > 0 && ksh_isspace(vs->cbuf[ncursor]))
+               while (--ncursor > 0 && ctype(vs->cbuf[ncursor], C_SPACE))
                         ;
                 if (ncursor > 0) {
-                       if (ksh_isalnux(vs->cbuf[ncursor]))
+                       if (ctype(vs->cbuf[ncursor], C_ALNUX))
                                 while (--ncursor >= 0 &&
-                                   ksh_isalnux(vs->cbuf[ncursor]))
+                                   ctype(vs->cbuf[ncursor], C_ALNUX))
                                         ;
                         else
                                 while (--ncursor >= 0 &&
-                                   !ksh_isalnux(vs->cbuf[ncursor]) &&
-                                   !ksh_isspace(vs->cbuf[ncursor]))
+                                   !ctype(vs->cbuf[ncursor], C_ALNUX | C_SPACE))
                                         ;
                         ncursor++;
                 }
@@ -4961,17 +4981,16 @@ endword(int argcnt)
         ncursor = vs->cursor;
         while (ncursor < vs->linelen && argcnt--) {
                 while (++ncursor < vs->linelen - 1 &&
-                   ksh_isspace(vs->cbuf[ncursor]))
+                   ctype(vs->cbuf[ncursor], C_SPACE))
                         ;
                 if (ncursor < vs->linelen - 1) {
-                       if (ksh_isalnux(vs->cbuf[ncursor]))
+                       if (ctype(vs->cbuf[ncursor], C_ALNUX))
                                 while (++ncursor < vs->linelen &&
-                                   ksh_isalnux(vs->cbuf[ncursor]))
+                                   ctype(vs->cbuf[ncursor], C_ALNUX))
                                         ;
                         else
                                 while (++ncursor < vs->linelen &&
-                                   !ksh_isalnux(vs->cbuf[ncursor]) &&
-                                   !ksh_isspace(vs->cbuf[ncursor]))
+                                   !ctype(vs->cbuf[ncursor], C_ALNUX | C_SPACE))
                                         ;
                         ncursor--;
                 }
@@ -4987,10 +5006,10 @@ Forwword(int argcnt)
         ncursor = vs->cursor;
         while (ncursor < vs->linelen && argcnt--) {
                 while (ncursor < vs->linelen &&
-                   !ksh_isspace(vs->cbuf[ncursor]))
+                   !ctype(vs->cbuf[ncursor], C_SPACE))
                         ncursor++;
                 while (ncursor < vs->linelen &&
-                   ksh_isspace(vs->cbuf[ncursor]))
+                   ctype(vs->cbuf[ncursor], C_SPACE))
                         ncursor++;
         }
         return (ncursor);
@@ -5003,9 +5022,9 @@ Backword(int argcnt)
  
         ncursor = vs->cursor;
         while (ncursor > 0 && argcnt--) {
-               while (--ncursor >= 0 && ksh_isspace(vs->cbuf[ncursor]))
+               while (--ncursor >= 0 && ctype(vs->cbuf[ncursor], C_SPACE))
                         ;
-               while (ncursor >= 0 && !ksh_isspace(vs->cbuf[ncursor]))
+               while (ncursor >= 0 && !ctype(vs->cbuf[ncursor], C_SPACE))
                         ncursor--;
                 ncursor++;
         }
@@ -5020,11 +5039,11 @@ Endword(int argcnt)
         ncursor = vs->cursor;
         while (ncursor < vs->linelen - 1 && argcnt--) {
                 while (++ncursor < vs->linelen - 1 &&
-                   ksh_isspace(vs->cbuf[ncursor]))
+                   ctype(vs->cbuf[ncursor], C_SPACE))
                         ;
                 if (ncursor < vs->linelen - 1) {
                         while (++ncursor < vs->linelen &&
-                           !ksh_isspace(vs->cbuf[ncursor]))
+                           !ctype(vs->cbuf[ncursor], C_SPACE))
                                 ;
                         ncursor--;
                 }
@@ -5187,10 +5206,10 @@ display(char *wb1, char *wb2, int leftside)
                                 *twb1++ = ' ';
                         } while (++col < winwidth && (col & 7) != 0);
                 else if (col < winwidth) {
-                       if (ISCTRL(ch) && /* but not C1 */ ch < 0x80) {
+                       if (ksh_isctrl(ch)) {
                                 *twb1++ = '^';
                                 if (++col < winwidth) {
-                                       *twb1++ = UNCTRL(ch);
+                                       *twb1++ = ksh_unctrl(ch);
                                         col++;
                                 }
                         } else {
@@ -5460,24 +5479,26 @@ print_expansions(struct edstate *est, int cmd MKSH_A_UNUSED)
         redraw_line(false);
         return (0);
  }
+#endif /* !MKSH_S_NOVI */
  
  /* Similar to x_zotc(emacs.c), but no tab weirdness */
  static void
  x_vi_zotc(int c)
  {
-       if (ISCTRL(c)) {
+       if (ksh_isctrl(c)) {
                 x_putc('^');
-               c = UNCTRL(c);
+               c = ksh_unctrl(c);
         }
         x_putc(c);
  }
  
+#if !MKSH_S_NOVI
  static void
  vi_error(void)
  {
         /* Beem out of any macros as soon as an error occurs */
         vi_macro_reset();
-       x_putc(7);
+       x_putc(KSH_BEL);
         x_flush();
  }
  
@@ -5602,7 +5623,7 @@ x_eval_region(int c MKSH_A_UNUSED)
         if (cp == NULL) {
                 /* command cannot be parsed */
   x_eval_region_err:
-               x_e_putc2(7);
+               x_e_putc2(KSH_BEL);
                 x_redraw('\r');
                 return (KSTD);
         }
diff --git a/src/eval.c b/src/eval.c

index 23894d6..7a892c0 100644 (file)
--- a/src/eval.c
+++ b/src/eval.c
@@ -23,7 +23,7 @@
  
  #include "sh.h"
  
-__RCSID("$MirOS: src/bin/mksh/eval.c,v 1.201 2017/04/06 01:59:54 tg Exp $");
+__RCSID("$MirOS: src/bin/mksh/eval.c,v 1.215 2017/08/28 23:27:51 tg Exp $");
  
  /*
   * string expansion
@@ -65,6 +65,12 @@ typedef struct {
  #define IFS_IWS                3       /* beginning of word, ignore IFS WS */
  #define IFS_QUOTE      4       /* beg.w/quote, become IFS_WORD unless "$@" */
  
+#define STYPE_CHAR     0xFF
+#define STYPE_DBL      0x100
+#define STYPE_AT       0x200
+#define STYPE_SINGLE   0x2FF
+#define STYPE_MASK     0x300
+
  static int varsub(Expand *, const char *, const char *, int *, int *);
  static int comsub(Expand *, const char *, int);
  static char *valsub(struct op *, Area *);
@@ -277,18 +283,18 @@ expand(
                 switch (type) {
                 case XBASE:
                         /* original prefixed string */
-                       c = *sp++;
+                       c = ord(*sp++);
                         switch (c) {
                         case EOS:
                                 c = 0;
                                 break;
                         case CHAR:
-                               c = *sp++;
+                               c = ord(*sp++);
                                 break;
                         case QCHAR:
                                 /* temporary quote */
                                 quote |= 2;
-                               c = *sp++;
+                               c = ord(*sp++);
                                 break;
                         case OQUOTE:
                                 if (word != IFS_WORD)
@@ -314,21 +320,21 @@ expand(
                                         case COMASUB:
                                         case COMSUB:
                                                 *dp++ = '(';
-                                               c = ')';
+                                               c = ord(')');
                                                 break;
                                         case FUNASUB:
                                         case FUNSUB:
                                         case VALSUB:
                                                 *dp++ = '{';
                                                 *dp++ = c == VALSUB ? '|' : ' ';
-                                               c = '}';
+                                               c = ord('}');
                                                 break;
                                         }
                                         while (*sp != '\0') {
                                                 Xcheck(ds, dp);
                                                 *dp++ = *sp++;
                                         }
-                                       if (c == '}')
+                                       if (c == ord('}'))
                                                 *dp++ = ';';
                                         *dp++ = c;
                                 } else {
@@ -429,12 +435,12 @@ expand(
                                         /* skip qualifier(s) */
                                         if (stype)
                                                 sp += slen;
-                                       switch (stype & 0x17F) {
-                                       case 0x100 | '#':
+                                       switch (stype & STYPE_SINGLE) {
+                                       case ord('#') | STYPE_AT:
                                                 x.str = shf_smprintf("%08X",
                                                     (unsigned int)hash(str_val(st->var)));
                                                 break;
-                                       case 0x100 | 'Q': {
+                                       case ord('Q') | STYPE_AT: {
                                                 struct shf shf;
  
                                                 shf_sopen(NULL, 0, SHF_WR|SHF_DYNAMIC, &shf);
@@ -442,7 +448,7 @@ expand(
                                                 x.str = shf_sclose(&shf);
                                                 break;
                                             }
-                                       case '0': {
+                                       case ord('0'): {
                                                 char *beg, *mid, *end, *stg;
                                                 mksh_ari_t from = 0, num = -1, flen, finc = 0;
  
@@ -450,13 +456,13 @@ expand(
                                                 mid = beg + (wdscan(sp, ADELIM) - sp);
                                                 stg = beg + (wdscan(sp, CSUBST) - sp);
                                                 mid[-2] = EOS;
-                                               if (mid[-1] == /*{*/'}') {
+                                               if (ord(mid[-1]) == ord(/*{*/ '}')) {
                                                         sp += mid - beg - 1;
                                                         end = NULL;
                                                 } else {
                                                         end = mid +
                                                             (wdscan(mid, ADELIM) - mid);
-                                                       if (end[-1] != /*{*/ '}')
+                                                       if (ord(end[-1]) != ord(/*{*/ '}'))
                                                                 /* more than max delimiters */
                                                                 goto unwind_substsyn;
                                                         end[-2] = EOS;
@@ -489,8 +495,8 @@ expand(
                                                 strndupx(x.str, beg, num, ATEMP);
                                                 goto do_CSUBST;
                                             }
-                                       case 0x100 | '/':
-                                       case '/': {
+                                       case ord('/') | STYPE_AT:
+                                       case ord('/'): {
                                                 char *s, *p, *d, *sbeg, *end;
                                                 char *pat = NULL, *rrep = null;
                                                 char fpat = 0, *tpat1, *tpat2;
@@ -500,18 +506,18 @@ expand(
                                                 p = s + (wdscan(sp, ADELIM) - sp);
                                                 d = s + (wdscan(sp, CSUBST) - sp);
                                                 p[-2] = EOS;
-                                               if (p[-1] == /*{*/'}')
+                                               if (ord(p[-1]) == ord(/*{*/ '}'))
                                                         d = NULL;
                                                 else
                                                         d[-2] = EOS;
                                                 sp += (d ? d : p) - s - 1;
-                                               if (!(stype & 0x180) &&
+                                               if (!(stype & STYPE_MASK) &&
                                                     s[0] == CHAR &&
-                                                   (s[1] == '#' || s[1] == '%'))
+                                                   ctype(s[1], C_SUB2))
                                                         fpat = s[1];
                                                 wpat = s + (fpat ? 2 : 0);
                                                 wrep = d ? p : NULL;
-                                               if (!(stype & 0x100)) {
+                                               if (!(stype & STYPE_AT)) {
                                                         rrep = wrep ? evalstr(wrep,
                                                             DOTILDE | DOSCALAR) :
                                                             null;
@@ -531,21 +537,21 @@ expand(
                                                          */
                                                         goto no_repl;
                                                 }
-                                               if ((stype & 0x180) &&
+                                               if ((stype & STYPE_MASK) &&
                                                     gmatchx(null, pat, false)) {
                                                         /*
                                                          * pattern matches empty
                                                          * string => don't loop
                                                          */
-                                                       stype &= ~0x180;
+                                                       stype &= ~STYPE_MASK;
                                                 }
  
                                                 /* first see if we have any match at all */
-                                               if (fpat == '#') {
+                                               if (ord(fpat) == ord('#')) {
                                                         /* anchor at the beginning */
                                                         tpat1 = shf_smprintf("%s%c*", pat, MAGIC);
                                                         tpat2 = tpat1;
-                                               } else if (fpat == '%') {
+                                               } else if (ord(fpat) == ord('%')) {
                                                         /* anchor at the end */
                                                         tpat1 = shf_smprintf("%c*%s", MAGIC, pat);
                                                         tpat2 = pat;
@@ -563,7 +569,7 @@ expand(
                                                         goto end_repl;
                                                 end = strnul(s);
                                                 /* now anchor the beginning of the match */
-                                               if (fpat != '#')
+                                               if (ord(fpat) != ord('#'))
                                                         while (sbeg <= end) {
                                                                 if (gmatchx(sbeg, tpat2, false))
                                                                         break;
@@ -572,7 +578,7 @@ expand(
                                                         }
                                                 /* now anchor the end of the match */
                                                 p = end;
-                                               if (fpat != '%')
+                                               if (ord(fpat) != ord('%'))
                                                         while (p >= sbeg) {
                                                                 bool gotmatch;
  
@@ -587,7 +593,7 @@ expand(
                                                 strndupx(end, sbeg, p - sbeg, ATEMP);
                                                 record_match(end);
                                                 afree(end, ATEMP);
-                                               if (stype & 0x100) {
+                                               if (stype & STYPE_AT) {
                                                         if (rrep != null)
                                                                 afree(rrep, ATEMP);
                                                         rrep = wrep ? evalstr(wrep,
@@ -600,11 +606,11 @@ expand(
                                                 sbeg = d + (sbeg - s) + strlen(rrep);
                                                 afree(s, ATEMP);
                                                 s = d;
-                                               if (stype & 0x100) {
+                                               if (stype & STYPE_AT) {
                                                         afree(tpat1, ATEMP);
                                                         afree(pat, ATEMP);
                                                         goto again_search;
-                                               } else if (stype & 0x80)
+                                               } else if (stype & STYPE_DBL)
                                                         goto again_repl;
   end_repl:
                                                 afree(tpat1, ATEMP);
@@ -616,8 +622,8 @@ expand(
                                                 afree(ws, ATEMP);
                                                 goto do_CSUBST;
                                             }
-                                       case '#':
-                                       case '%':
+                                       case ord('#'):
+                                       case ord('%'):
                                                 /* ! DOBLANK,DOBRACE */
                                                 f = (f & DONTRUNCOMMAND) |
                                                     DOPAT | DOTILDE |
@@ -631,10 +637,10 @@ expand(
                                                  */
                                                 if (!Flag(FSH)) {
                                                         *dp++ = MAGIC;
-                                                       *dp++ = 0x80 | '@';
+                                                       *dp++ = ord(0x80 | '@');
                                                 }
                                                 break;
-                                       case '=':
+                                       case ord('='):
                                                 /*
                                                  * Tilde expansion for string
                                                  * variables in POSIX mode is
@@ -658,7 +664,7 @@ expand(
                                                 f &= ~(DOBLANK|DOGLOB|DOBRACE);
                                                 tilde_ok = 1;
                                                 break;
-                                       case '?':
+                                       case ord('?'):
                                                 if (*sp == CSUBST)
                                                         errorf("%s: parameter null or not set",
                                                             st->var->name);
@@ -692,9 +698,9 @@ expand(
                                 f = st->f;
                                 if (f & DOBLANK)
                                         doblank--;
-                               switch (st->stype & 0x17F) {
-                               case '#':
-                               case '%':
+                               switch (st->stype & STYPE_SINGLE) {
+                               case ord('#'):
+                               case ord('%'):
                                         if (!Flag(FSH)) {
                                                 /* Append end-pattern */
                                                 *dp++ = MAGIC;
@@ -724,7 +730,7 @@ expand(
                                                 doblank++;
                                         st = st->prev;
                                         continue;
-                               case '=':
+                               case ord('='):
                                         /*
                                          * Restore our position and substitute
                                          * the value of st->var (may not be
@@ -757,17 +763,17 @@ expand(
                                         st = st->prev;
                                         word = quote || (!*x.str && (f & DOSCALAR)) ? IFS_WORD : IFS_IWS;
                                         continue;
-                               case '?':
+                               case ord('?'):
                                         dp = Xrestpos(ds, dp, st->base);
  
                                         errorf(Tf_sD_s, st->var->name,
                                             debunk(dp, dp, strlen(dp) + 1));
                                         break;
-                               case '0':
-                               case 0x100 | '/':
-                               case '/':
-                               case 0x100 | '#':
-                               case 0x100 | 'Q':
+                               case ord('0'):
+                               case ord('/') | STYPE_AT:
+                               case ord('/'):
+                               case ord('#') | STYPE_AT:
+                               case ord('Q') | STYPE_AT:
                                         dp = Xrestpos(ds, dp, st->base);
                                         type = XSUB;
                                         word = quote || (!*x.str && (f & DOSCALAR)) ? IFS_WORD : IFS_IWS;
@@ -845,7 +851,7 @@ expand(
                                                 doblank--;
                                         continue;
                                 }
-                               c = ifs0;
+                               c = ord(ifs0);
                                 if ((f & DOHEREDOC)) {
                                         /* pseudo-field-split reliably */
                                         if (c == 0)
@@ -891,10 +897,7 @@ expand(
                                 --newlines;
                         } else {
                                 while ((c = shf_getc(x.u.shf)) == 0 ||
-#ifdef MKSH_WITH_TEXTMODE
-                                      c == '\r' ||
-#endif
-                                      c == '\n') {
+                                   ctype(c, C_NL)) {
  #ifdef MKSH_WITH_TEXTMODE
                                         if (c == '\r') {
                                                 c = shf_getc(x.u.shf);
@@ -999,11 +1002,11 @@ expand(
                         tilde_ok <<= 1;
                         /* mark any special second pass chars */
                         if (!quote)
-                               switch (c) {
-                               case '[':
-                               case '!':
-                               case '-':
-                               case ']':
+                               switch (ord(c)) {
+                               case ord('['):
+                               case ord('!'):
+                               case ord('-'):
+                               case ord(']'):
                                         /*
                                          * For character classes - doesn't hurt
                                          * to have magic !,-,]s outside of
@@ -1011,28 +1014,29 @@ expand(
                                          */
                                         if (f & (DOPAT | DOGLOB)) {
                                                 fdo |= DOMAGIC;
-                                               if (c == '[')
+                                               if (c == ord('['))
                                                         fdo |= f & DOGLOB;
                                                 *dp++ = MAGIC;
                                         }
                                         break;
-                               case '*':
-                               case '?':
+                               case ord('*'):
+                               case ord('?'):
                                         if (f & (DOPAT | DOGLOB)) {
                                                 fdo |= DOMAGIC | (f & DOGLOB);
                                                 *dp++ = MAGIC;
                                         }
                                         break;
-                               case '{':
-                               case '}':
-                               case ',':
-                                       if ((f & DOBRACE) && (c == '{' /*}*/ ||
+                               case ord('{'):
+                               case ord('}'):
+                               case ord(','):
+                                       if ((f & DOBRACE) &&
+                                           (ord(c) == ord('{' /*}*/) ||
                                             (fdo & DOBRACE))) {
                                                 fdo |= DOBRACE|DOMAGIC;
                                                 *dp++ = MAGIC;
                                         }
                                         break;
-                               case '=':
+                               case ord('='):
                                         /* Note first unquoted = for ~ */
                                         if (!(f & DOTEMP) && (!Flag(FPOSIX) ||
                                             (f & DOASNTILDE)) && !saw_eq) {
@@ -1040,13 +1044,13 @@ expand(
                                                 tilde_ok = 1;
                                         }
                                         break;
-                               case ':':
+                               case ord(':'):
                                         /* : */
                                         /* Note unquoted : for ~ */
                                         if (!(f & DOTEMP) && (f & DOASNTILDE))
                                                 tilde_ok = 1;
                                         break;
-                               case '~':
+                               case ord('~'):
                                         /*
                                          * tilde_ok is reset whenever
                                          * any of ' " $( $(( ${ } are seen.
@@ -1118,7 +1122,7 @@ varsub(Expand *xp, const char *sp, const char *word,
         struct tbl *vp;
         bool zero_ok = false;
  
-       if ((stype = sp[0]) == '\0')
+       if ((stype = ord(sp[0])) == '\0')
                 /* Bad variable name */
                 return (-1);
  
@@ -1128,20 +1132,20 @@ varsub(Expand *xp, const char *sp, const char *word,
          * ${#var}, string length (-U: characters, +U: octets) or array size
          * ${%var}, string width (-U: screen columns, +U: octets)
          */
-       c = sp[1];
-       if (stype == '%' && c == '\0')
+       c = ord(sp[1]);
+       if (stype == ord('%') && c == '\0')
                 return (-1);
-       if ((stype == '#' || stype == '%') && c != '\0') {
+       if (ctype(stype, C_SUB2) && c != '\0') {
                 /* Can't have any modifiers for ${#...} or ${%...} */
                 if (*word != CSUBST)
                         return (-1);
                 sp++;
                 /* Check for size of array */
-               if ((p = cstrchr(sp, '[')) && (p[1] == '*' || p[1] == '@') &&
-                   p[2] == ']') {
+               if ((p = cstrchr(sp, '[')) && (ord(p[1]) == ord('*') ||
+                   ord(p[1]) == ord('@')) && ord(p[2]) == ord(']')) {
                         int n = 0;
  
-                       if (stype != '#')
+                       if (stype != ord('#'))
                                 return (-1);
                         vp = global(arrayname(sp));
                         if (vp->flag & (ISSET|ARRAY))
@@ -1150,14 +1154,14 @@ varsub(Expand *xp, const char *sp, const char *word,
                                 if (vp->flag & ISSET)
                                         n++;
                         c = n;
-               } else if (c == '*' || c == '@') {
-                       if (stype != '#')
+               } else if (c == ord('*') || c == ord('@')) {
+                       if (stype != ord('#'))
                                 return (-1);
                         c = e->loc->argc;
                 } else {
                         p = str_val(global(sp));
                         zero_ok = p != null;
-                       if (stype == '#')
+                       if (stype == ord('#'))
                                 c = utflen(p);
                         else {
                                 /* partial utf_mbswidth reimplementation */
@@ -1171,7 +1175,7 @@ varsub(Expand *xp, const char *sp, const char *word,
                                         if (!UTFMODE || (len = utf_mbtowc(&wc,
                                             s)) == (size_t)-1)
                                                 /* not UTFMODE or not UTF-8 */
-                                               wc = (unsigned char)(*s++);
+                                               wc = rtt2asc(*s++);
                                         else
                                                 /* UTFMODE and UTF-8 */
                                                 s += len;
@@ -1192,11 +1196,11 @@ varsub(Expand *xp, const char *sp, const char *word,
                 xp->str = shf_smprintf(Tf_d, c);
                 return (XSUB);
         }
-       if (stype == '!' && c != '\0' && *word == CSUBST) {
+       if (stype == ord('!') && c != '\0' && *word == CSUBST) {
                 sp++;
-               if ((p = cstrchr(sp, '[')) && (p[1] == '*' || p[1] == '@') &&
-                   p[2] == ']') {
-                       c = '!';
+               if ((p = cstrchr(sp, '[')) && (ord(p[1]) == ord('*') ||
+                   ord(p[1]) == ord('@')) && ord(p[2]) == ord(']')) {
+                       c = ord('!');
                         stype = 0;
                         goto arraynames;
                 }
@@ -1209,43 +1213,46 @@ varsub(Expand *xp, const char *sp, const char *word,
  
         /* Check for qualifiers in word part */
         stype = 0;
-       c = word[slen + 0] == CHAR ? word[slen + 1] : 0;
-       if (c == ':') {
+       c = word[slen + 0] == CHAR ? ord(word[slen + 1]) : 0;
+       if (c == ord(':')) {
                 slen += 2;
-               stype = 0x80;
-               c = word[slen + 0] == CHAR ? word[slen + 1] : 0;
+               stype = STYPE_DBL;
+               c = word[slen + 0] == CHAR ? ord(word[slen + 1]) : 0;
         }
-       if (!stype && c == '/') {
+       if (!stype && c == ord('/')) {
                 slen += 2;
                 stype = c;
-               if (word[slen] == ADELIM && word[slen + 1] == c) {
+               if (word[slen] == ADELIM &&
+                   ord(word[slen + 1]) == (unsigned int)c) {
                         slen += 2;
-                       stype |= 0x80;
+                       stype |= STYPE_DBL;
                 }
-       } else if (stype == 0x80 && (c == ' ' || c == '0')) {
-               stype |= '0';
-       } else if (ctype(c, C_SUBOP1)) {
+       } else if (stype == STYPE_DBL && (c == ord(' ') || c == ord('0'))) {
+               stype |= ord('0');
+       } else if (ctype(c, C_SUB1)) {
                 slen += 2;
                 stype |= c;
-       } else if (ksh_issubop2(c)) {
+       } else if (ctype(c, C_SUB2)) {
                 /* Note: ksh88 allows :%, :%%, etc */
                 slen += 2;
                 stype = c;
-               if (word[slen + 0] == CHAR && c == word[slen + 1]) {
-                       stype |= 0x80;
+               if (word[slen + 0] == CHAR &&
+                   ord(word[slen + 1]) == (unsigned int)c) {
+                       stype |= STYPE_DBL;
                         slen += 2;
                 }
-       } else if (c == '@') {
+       } else if (c == ord('@')) {
                 /* @x where x is command char */
-               switch (c = word[slen + 2] == CHAR ? word[slen + 3] : 0) {
-               case '#':
-               case '/':
-               case 'Q':
+               switch (c = ord(word[slen + 2]) == CHAR ?
+                   ord(word[slen + 3]) : 0) {
+               case ord('#'):
+               case ord('/'):
+               case ord('Q'):
                         break;
                 default:
                         return (-1);
                 }
-               stype |= 0x100 | c;
+               stype |= STYPE_AT | c;
                 slen += 4;
         } else if (stype)
                 /* : is not ok */
@@ -1253,51 +1260,51 @@ varsub(Expand *xp, const char *sp, const char *word,
         if (!stype && *word != CSUBST)
                 return (-1);
  
-       c = sp[0];
-       if (c == '*' || c == '@') {
-               switch (stype & 0x17F) {
+       c = ord(sp[0]);
+       if (c == ord('*') || c == ord('@')) {
+               switch (stype & STYPE_SINGLE) {
                 /* can't assign to a vector */
-               case '=':
+               case ord('='):
                 /* can't trim a vector (yet) */
-               case '%':
-               case '#':
-               case '?':
-               case '0':
-               case 0x100 | '/':
-               case '/':
-               case 0x100 | '#':
-               case 0x100 | 'Q':
+               case ord('%'):
+               case ord('#'):
+               case ord('?'):
+               case ord('0'):
+               case ord('/') | STYPE_AT:
+               case ord('/'):
+               case ord('#') | STYPE_AT:
+               case ord('Q') | STYPE_AT:
                         return (-1);
                 }
                 if (e->loc->argc == 0) {
                         xp->str = null;
                         xp->var = global(sp);
-                       state = c == '@' ? XNULLSUB : XSUB;
+                       state = c == ord('@') ? XNULLSUB : XSUB;
                 } else {
                         xp->u.strv = (const char **)e->loc->argv + 1;
                         xp->str = *xp->u.strv++;
                         /* $@ */
-                       xp->split = tobool(c == '@');
+                       xp->split = tobool(c == ord('@'));
                         state = XARG;
                 }
                 /* POSIX 2009? */
                 zero_ok = true;
-       } else if ((p = cstrchr(sp, '[')) && (p[1] == '*' || p[1] == '@') &&
-           p[2] == ']') {
+       } else if ((p = cstrchr(sp, '[')) && (ord(p[1]) == ord('*') ||
+           ord(p[1]) == ord('@')) && ord(p[2]) == ord(']')) {
                 XPtrV wv;
  
-               switch (stype & 0x17F) {
+               switch (stype & STYPE_SINGLE) {
                 /* can't assign to a vector */
-               case '=':
+               case ord('='):
                 /* can't trim a vector (yet) */
-               case '%':
-               case '#':
-               case '?':
-               case '0':
-               case 0x100 | '/':
-               case '/':
-               case 0x100 | '#':
-               case 0x100 | 'Q':
+               case ord('%'):
+               case ord('#'):
+               case ord('?'):
+               case ord('0'):
+               case ord('/') | STYPE_AT:
+               case ord('/'):
+               case ord('#') | STYPE_AT:
+               case ord('Q') | STYPE_AT:
                         return (-1);
                 }
                 c = 0;
@@ -1307,45 +1314,45 @@ varsub(Expand *xp, const char *sp, const char *word,
                 for (; vp; vp = vp->u.array) {
                         if (!(vp->flag&ISSET))
                                 continue;
-                       XPput(wv, c == '!' ? shf_smprintf(Tf_lu,
+                       XPput(wv, c == ord('!') ? shf_smprintf(Tf_lu,
                             arrayindex(vp)) :
                             str_val(vp));
                 }
                 if (XPsize(wv) == 0) {
                         xp->str = null;
-                       state = p[1] == '@' ? XNULLSUB : XSUB;
+                       state = ord(p[1]) == ord('@') ? XNULLSUB : XSUB;
                         XPfree(wv);
                 } else {
                         XPput(wv, 0);
                         xp->u.strv = (const char **)XPptrv(wv);
                         xp->str = *xp->u.strv++;
                         /* ${foo[@]} */
-                       xp->split = tobool(p[1] == '@');
+                       xp->split = tobool(ord(p[1]) == ord('@'));
                         state = XARG;
                 }
         } else {
                 xp->var = global(sp);
                 xp->str = str_val(xp->var);
                 /* can't assign things like $! or $1 */
-               if ((stype & 0x17F) == '=' && !*xp->str &&
+               if ((stype & STYPE_SINGLE) == ord('=') && !*xp->str &&
                     ctype(*sp, C_VAR1 | C_DIGIT))
                         return (-1);
                 state = XSUB;
         }
  
-       c = stype & 0x7F;
+       c = stype & STYPE_CHAR;
         /* test the compiler's code generator */
-       if (((stype < 0x100) && (ksh_issubop2(c) ||
-           (((stype & 0x80) ? *xp->str == '\0' : xp->str == null) &&
+       if ((!(stype & STYPE_AT) && (ctype(c, C_SUB2) ||
+           (((stype & STYPE_DBL) ? *xp->str == '\0' : xp->str == null) &&
             (state != XARG || (ifs0 || xp->split ?
             (xp->u.strv[0] == NULL) : !hasnonempty(xp->u.strv))) ?
-           c == '=' || c == '-' || c == '?' : c == '+'))) ||
-           stype == (0x80 | '0') || stype == (0x100 | '#') ||
-           stype == (0x100 | 'Q') || (stype & 0x7F) == '/')
+           ctype(c, C_EQUAL | C_MINUS | C_QUEST) : c == ord('+')))) ||
+           stype == (ord('0') | STYPE_DBL) || stype == (ord('#') | STYPE_AT) ||
+           stype == (ord('Q') | STYPE_AT) || (stype & STYPE_CHAR) == ord('/'))
                 /* expand word instead of variable value */
                 state = XBASE;
         if (Flag(FNOUNSET) && xp->str == null && !zero_ok &&
-           (ksh_issubop2(c) || (state != XBASE && c != '+')))
+           (ctype(c, C_SUB2) || (state != XBASE && c != ord('+'))))
                 errorf(Tf_parm, sp);
         *stypep = stype;
         *slenp = slen;
@@ -1408,7 +1415,7 @@ comsub(Expand *xp, const char *cp, int fn)
                         if (!herein(io, &name)) {
                                 xp->str = name;
                                 /* as $(…) requires, trim trailing newlines */
-                               name += strlen(name);
+                               name = strnul(name);
                                 while (name > xp->str && name[-1] == '\n')
                                         --name;
                                 *name = '\0';
@@ -1483,8 +1490,8 @@ trimsub(char *str, char *pat, int how)
         char *end = strnul(str);
         char *p, c;
  
-       switch (how & 0xFF) {
-       case '#':
+       switch (how & (STYPE_CHAR | STYPE_DBL)) {
+       case ord('#'):
                 /* shortest match at beginning */
                 for (p = str; p <= end; p += utf_ptradj(p)) {
                         c = *p; *p = '\0';
@@ -1496,7 +1503,7 @@ trimsub(char *str, char *pat, int how)
                         *p = c;
                 }
                 break;
-       case '#'|0x80:
+       case ord('#') | STYPE_DBL:
                 /* longest match at beginning */
                 for (p = end; p >= str; p--) {
                         c = *p; *p = '\0';
@@ -1508,7 +1515,7 @@ trimsub(char *str, char *pat, int how)
                         *p = c;
                 }
                 break;
-       case '%':
+       case ord('%'):
                 /* shortest match at end */
                 p = end;
                 while (p >= str) {
@@ -1516,7 +1523,7 @@ trimsub(char *str, char *pat, int how)
                                 goto trimsub_match;
                         if (UTFMODE) {
                                 char *op = p;
-                               while ((p-- > str) && ((*p & 0xC0) == 0x80))
+                               while ((p-- > str) && ((rtt2asc(*p) & 0xC0) == 0x80))
                                         ;
                                 if ((p < str) || (p + utf_ptradj(p) != op))
                                         p = op - 1;
@@ -1524,7 +1531,7 @@ trimsub(char *str, char *pat, int how)
                                 --p;
                 }
                 break;
-       case '%'|0x80:
+       case ord('%') | STYPE_DBL:
                 /* longest match at end */
                 for (p = str; p <= end; p++)
                         if (gmatchx(p, pat, false)) {
@@ -1555,7 +1562,7 @@ glob(char *cp, XPtrV *wp, bool markdirs)
                 XPput(*wp, debunk(cp, cp, strlen(cp) + 1));
         else
                 qsort(XPptrv(*wp) + oldsize, XPsize(*wp) - oldsize,
-                   sizeof(void *), xstrcmp);
+                   sizeof(void *), ascpstrcmp);
  }
  
  #define GF_NONE                0
@@ -1658,7 +1665,7 @@ globit(XString *xs,       /* dest string */
                 *np++ = '\0';
         } else {
                 odirsep = '\0'; /* keep gcc quiet */
-               se = sp + strlen(sp);
+               se = strnul(sp);
         }
  
  
@@ -1669,10 +1676,10 @@ globit(XString *xs,     /* dest string */
          * directory isn't readable - if no globbing is needed, only execute
          * permission should be required (as per POSIX)).
          */
-       if (!has_globbing(sp, se)) {
+       if (!has_globbing(sp)) {
                 XcheckN(*xs, xp, se - sp + 1);
                 debunk(xp, sp, Xnleft(*xs, xp));
-               xp += strlen(xp);
+               xp = strnul(xp);
                 *xpp = xp;
                 globit(xs, xpp, np, wp, check);
         } else {
@@ -1701,9 +1708,8 @@ globit(XString *xs,       /* dest string */
                         XcheckN(*xs, xp, len);
                         memcpy(xp, name, len);
                         *xpp = xp + len - 1;
-                       globit(xs, xpp, np, wp,
-                               (check & GF_MARKDIR) | GF_GLOBBED
-                               | (np ? GF_EXCHECK : GF_NONE));
+                       globit(xs, xpp, np, wp, (check & GF_MARKDIR) |
+                           GF_GLOBBED | (np ? GF_EXCHECK : GF_NONE));
                         xp = Xstring(*xs, xp) + prefix_len;
                 }
                 closedir(dirp);
@@ -1728,7 +1734,7 @@ debunk(char *dp, const char *sp, size_t dlen)
                 memmove(dp, sp, s - sp);
                 for (d = dp + (s - sp); *s && (d - dp < (ssize_t)dlen); s++)
                         if (!ISMAGIC(*s) || !(*++s & 0x80) ||
-                           !vstrchr("*+?@! ", *s & 0x7f))
+                           !ctype(*s & 0x7F, C_PATMO | C_SPC))
                                 *d++ = *s;
                         else {
                                 /* extended pattern operators: *+?@! */
@@ -1857,7 +1863,7 @@ alt_expand(XPtrV *wp, char *start, char *exp_start, char *end, int fdo)
         char *p = exp_start;
  
         /* search for open brace */
-       while ((p = strchr(p, MAGIC)) && p[1] != '{' /*}*/)
+       while ((p = strchr(p, MAGIC)) && ord(p[1]) != ord('{' /*}*/))
                 p += 2;
         brace_start = p;
  
@@ -1868,9 +1874,9 @@ alt_expand(XPtrV *wp, char *start, char *exp_start, char *end, int fdo)
                 p += 2;
                 while (*p && count) {
                         if (ISMAGIC(*p++)) {
-                               if (*p == '{' /*}*/)
+                               if (ord(*p) == ord('{' /*}*/))
                                         ++count;
-                               else if (*p == /*{*/ '}')
+                               else if (ord(*p) == ord(/*{*/ '}'))
                                         --count;
                                 else if (*p == ',' && count == 1)
                                         comma = p;
@@ -1902,9 +1908,9 @@ alt_expand(XPtrV *wp, char *start, char *exp_start, char *end, int fdo)
         count = 1;
         for (p = brace_start + 2; p != brace_end; p++) {
                 if (ISMAGIC(*p)) {
-                       if (*++p == '{' /*}*/)
+                       if (ord(*++p) == ord('{' /*}*/))
                                 ++count;
-                       else if ((*p == /*{*/ '}' && --count == 0) ||
+                       else if ((ord(*p) == ord(/*{*/ '}') && --count == 0) ||
                             (*p == ',' && count == 1)) {
                                 char *news;
                                 int l1, l2, l3;
diff --git a/src/exec.c b/src/exec.c

index 6307bce..56a42f6 100644 (file)
--- a/src/exec.c
+++ b/src/exec.c
@@ -23,7 +23,7 @@
  
  #include "sh.h"
  
-__RCSID("$MirOS: src/bin/mksh/exec.c,v 1.196 2017/04/12 16:46:21 tg Exp $");
+__RCSID("$MirOS: src/bin/mksh/exec.c,v 1.199 2017/08/07 21:16:31 tg Exp $");
  
  #ifndef MKSH_DEFAULT_EXECSHELL
  #define MKSH_DEFAULT_EXECSHELL MKSH_UNIXROOT "/bin/sh"
@@ -554,6 +554,9 @@ comexec(struct op *t, struct tbl * volatile tp, const char **ap,
                                 }
                         ap += builtin_opt.optind;
                         flags |= XEXEC;
+                       /* POSuX demands ksh88-like behaviour here */
+                       if (Flag(FPOSIX))
+                               fcflags = FC_PATH;
                 } else if (tp->val.f == c_command) {
                         bool saw_p = false;
  
@@ -885,7 +888,9 @@ scriptexec(struct op *tp, const char **ap)
  #ifndef MKSH_SMALL
         if ((fd = binopen2(tp->str, O_RDONLY)) >= 0) {
                 unsigned char *cp;
+#ifndef MKSH_EBCDIC
                 unsigned short m;
+#endif
                 ssize_t n;
  
  #if defined(__OS2__) && defined(MKSH_WITH_TEXTMODE)
@@ -905,7 +910,7 @@ scriptexec(struct op *tp, const char **ap)
                     (buf[2] == 0xBF)) ? 3 : 0);
  
                 /* scan for newline or NUL (end of buffer) */
-               while (*cp && *cp != '\n')
+               while (!ctype(*cp, C_NL | C_NUL))
                         ++cp;
                 /* if the shebang line is longer than MAXINTERP, bail out */
                 if (!*cp)
@@ -920,13 +925,13 @@ scriptexec(struct op *tp, const char **ap)
                         cp += 2;
  #ifdef __OS2__
                 else if (!strncmp(cp, Textproc, 7) &&
-                   (cp[7] == ' ' || cp[7] == '\t'))
+                   ctype(cp[7], C_BLANK))
                         cp += 8;
  #endif
                 else
                         goto noshebang;
                 /* skip whitespace before shell name */
-               while (*cp == ' ' || *cp == '\t')
+               while (ctype(*cp, C_BLANK))
                         ++cp;
                 /* just whitespace on the line? */
                 if (*cp == '\0')
@@ -934,13 +939,13 @@ scriptexec(struct op *tp, const char **ap)
                 /* no, we actually found an interpreter name */
                 sh = (char *)cp;
                 /* look for end of shell/interpreter name */
-               while (*cp != ' ' && *cp != '\t' && *cp != '\0')
+               while (!ctype(*cp, C_BLANK | C_NUL))
                         ++cp;
                 /* any arguments? */
                 if (*cp) {
                         *cp++ = '\0';
                         /* skip spaces before arguments */
-                       while (*cp == ' ' || *cp == '\t')
+                       while (ctype(*cp, C_BLANK))
                                 ++cp;
                         /* pass it all in ONE argument (historic reasons) */
                         if (*cp)
@@ -959,6 +964,7 @@ scriptexec(struct op *tp, const char **ap)
  #endif
                 goto nomagic;
   noshebang:
+#ifndef MKSH_EBCDIC
                 m = buf[0] << 8 | buf[1];
                 if (m == 0x7F45 && buf[2] == 'L' && buf[3] == 'F')
                         errorf("%s: not executable: %d-bit ELF file", tp->str,
@@ -977,6 +983,7 @@ scriptexec(struct op *tp, const char **ap)
                     buf[4] == 'Z') || (m == /* 7zip */ 0x377A) ||
                     (m == /* gzip */ 0x1F8B) || (m == /* .Z */ 0x1F9D))
                         errorf("%s: not executable: magic %04X", tp->str, m);
+#endif
  #ifdef __OS2__
                 cp = _getext(tp->str);
                 if (cp && (!stricmp(cp, ".cmd") || !stricmp(cp, ".bat"))) {
@@ -1337,7 +1344,7 @@ search_path(const char *name, const char *lpath,
         while (sp != NULL) {
                 xp = Xstring(xs, xp);
                 if (!(p = cstrchr(sp, MKSH_PATHSEPC)))
-                       p = sp + strlen(sp);
+                       p = strnul(sp);
                 if (p != sp) {
                         XcheckN(xs, xp, p - sp);
                         memcpy(xp, sp, p - sp);
diff --git a/src/expr.c b/src/expr.c

index 124dc17..12989d4 100644 (file)
--- a/src/expr.c
+++ b/src/expr.c
@@ -23,7 +23,7 @@
  
  #include "sh.h"
  
-__RCSID("$MirOS: src/bin/mksh/expr.c,v 1.93 2017/04/02 16:47:41 tg Exp $");
+__RCSID("$MirOS: src/bin/mksh/expr.c,v 1.100 2017/08/07 21:38:55 tg Exp $");
  
  #define EXPRTOK_DEFNS
  #include "exprtok.h"
@@ -558,9 +558,9 @@ exprtoken(Expr_state *es)
  
         /* skip whitespace */
   skip_spaces:
-       while ((c = *cp), ksh_isspace(c))
+       while (ctype(ord((c = *cp)), C_SPACE))
                 ++cp;
-       if (es->tokp == es->expression && c == '#') {
+       if (es->tokp == es->expression && c == ord('#')) {
                 /* expression begins with # */
                 /* switch to unsigned */
                 es->natural = true;
@@ -571,11 +571,11 @@ exprtoken(Expr_state *es)
  
         if (c == '\0')
                 es->tok = END;
-       else if (ksh_isalphx(c)) {
+       else if (ctype(c, C_ALPHX)) {
                 do {
-                       c = *++cp;
-               } while (ksh_isalnux(c));
-               if (c == '[') {
+                       c = ord(*++cp);
+               } while (ctype(c, C_ALNUX));
+               if (c == ord('[')) {
                         size_t len;
  
                         len = array_ref_len(cp);
@@ -617,9 +617,9 @@ exprtoken(Expr_state *es)
                 tvar[c] = '\0';
                 goto process_tvar;
  #endif
-       } else if (ksh_isdigit(c)) {
-               while (c != '_' && (ksh_isalnux(c) || c == '#'))
-                       c = *cp++;
+       } else if (ctype(c, C_DIGIT)) {
+               while (ctype(c, C_ALNUM | C_HASH))
+                       c = ord(*cp++);
                 strndupx(tvar, es->tokp, --cp - es->tokp, ATEMP);
   process_tvar:
                 es->val = tempvar("");
@@ -633,7 +633,7 @@ exprtoken(Expr_state *es)
         } else {
                 int i, n0;
  
-               for (i = 0; (n0 = opname[i][0]); i++)
+               for (i = 0; (n0 = ord(opname[i][0])); i++)
                         if (c == n0 && strncmp(cp, opname[i],
                             (size_t)oplen[i]) == 0) {
                                 es->tok = (enum token)i;
@@ -772,8 +772,7 @@ utf_ptradj(const char *src)
  {
         register size_t n;
  
-       if (!UTFMODE ||
-           *(const unsigned char *)(src) < 0xC2 ||
+       if (!UTFMODE || rtt2asc(*src) < 0xC2 ||
             (n = utf_mbtowc(NULL, src)) == (size_t)-1)
                 n = 1;
         return (n);
@@ -791,7 +790,7 @@ utf_mbtowc(unsigned int *dst, const char *src)
         const unsigned char *s = (const unsigned char *)src;
         unsigned int c, wc;
  
-       if ((wc = *s++) < 0x80) {
+       if ((wc = ord(rtt2asc(*s++))) < 0x80) {
   out:
                 if (dst != NULL)
                         *dst = wc;
@@ -805,7 +804,7 @@ utf_mbtowc(unsigned int *dst, const char *src)
  
         if (wc < 0xE0) {
                 wc = (wc & 0x1F) << 6;
-               if (((c = *s++) & 0xC0) != 0x80)
+               if (((c = ord(rtt2asc(*s++))) & 0xC0) != 0x80)
                         goto ilseq;
                 wc |= c & 0x3F;
                 goto out;
@@ -813,11 +812,11 @@ utf_mbtowc(unsigned int *dst, const char *src)
  
         wc = (wc & 0x0F) << 12;
  
-       if (((c = *s++) & 0xC0) != 0x80)
+       if (((c = ord(rtt2asc(*s++))) & 0xC0) != 0x80)
                 goto ilseq;
         wc |= (c & 0x3F) << 6;
  
-       if (((c = *s++) & 0xC0) != 0x80)
+       if (((c = ord(rtt2asc(*s++))) & 0xC0) != 0x80)
                 goto ilseq;
         wc |= c & 0x3F;
  
@@ -834,18 +833,18 @@ utf_wctomb(char *dst, unsigned int wc)
         unsigned char *d;
  
         if (wc < 0x80) {
-               *dst = wc;
+               *dst = asc2rtt(wc);
                 return (1);
         }
  
         d = (unsigned char *)dst;
         if (wc < 0x0800)
-               *d++ = (wc >> 6) | 0xC0;
+               *d++ = asc2rtt((wc >> 6) | 0xC0);
         else {
-               *d++ = ((wc = wc > 0xFFFD ? 0xFFFD : wc) >> 12) | 0xE0;
-               *d++ = ((wc >> 6) & 0x3F) | 0x80;
+               *d++ = asc2rtt(((wc = wc > 0xFFFD ? 0xFFFD : wc) >> 12) | 0xE0);
+               *d++ = asc2rtt(((wc >> 6) & 0x3F) | 0x80);
         }
-       *d++ = (wc & 0x3F) | 0x80;
+       *d++ = asc2rtt((wc & 0x3F) | 0x80);
         return ((char *)d - dst);
  }
  
@@ -873,7 +872,7 @@ ksh_access(const char *fn, int mode)
  }
  
  #ifndef MIRBSD_BOOTFLOPPY
-/* From: X11/xc/programs/xterm/wcwidth.c,v 1.9 */
+/* From: X11/xc/programs/xterm/wcwidth.c,v 1.10 */
  
  struct mb_ucsrange {
         unsigned short beg;
@@ -884,8 +883,8 @@ static int mb_ucsbsearch(const struct mb_ucsrange arr[], size_t elems,
      unsigned int val) MKSH_A_PURE;
  
  /*
- * Generated from the Unicode Character Database, Version 9.0.0, by
- * MirOS: contrib/code/Snippets/eawparse,v 1.3 2014/11/16 12:16:24 tg Exp $
+ * Generated from the Unicode Character Database, Version 10.0.0, by
+ * MirOS: contrib/code/Snippets/eawparse,v 1.10 2017/07/12 22:47:26 tg Exp $
   */
  
  static const struct mb_ucsrange mb_ucs_combining[] = {
@@ -941,6 +940,7 @@ static const struct mb_ucsrange mb_ucs_combining[] = {
         { 0x0AC7, 0x0AC8 },
         { 0x0ACD, 0x0ACD },
         { 0x0AE2, 0x0AE3 },
+       { 0x0AFA, 0x0AFF },
         { 0x0B01, 0x0B01 },
         { 0x0B3C, 0x0B3C },
         { 0x0B3F, 0x0B3F },
@@ -963,7 +963,8 @@ static const struct mb_ucsrange mb_ucs_combining[] = {
         { 0x0CC6, 0x0CC6 },
         { 0x0CCC, 0x0CCD },
         { 0x0CE2, 0x0CE3 },
-       { 0x0D01, 0x0D01 },
+       { 0x0D00, 0x0D01 },
+       { 0x0D3B, 0x0D3C },
         { 0x0D41, 0x0D44 },
         { 0x0D4D, 0x0D4D },
         { 0x0D62, 0x0D63 },
@@ -1048,7 +1049,7 @@ static const struct mb_ucsrange mb_ucs_combining[] = {
         { 0x1CED, 0x1CED },
         { 0x1CF4, 0x1CF4 },
         { 0x1CF8, 0x1CF9 },
-       { 0x1DC0, 0x1DF5 },
+       { 0x1DC0, 0x1DF9 },
         { 0x1DFB, 0x1DFF },
         { 0x200B, 0x200F },
         { 0x202A, 0x202E },
@@ -1136,14 +1137,16 @@ static const struct mb_ucsrange mb_ucs_fullwidth[] = {
         { 0x2B1B, 0x2B1C },
         { 0x2B50, 0x2B50 },
         { 0x2B55, 0x2B55 },
-       { 0x2E80, 0x303E },
-       { 0x3040, 0xA4CF },
+       { 0x2E80, 0x3029 },
+       { 0x302E, 0x303E },
+       { 0x3040, 0x3098 },
+       { 0x309B, 0xA4CF },
         { 0xA960, 0xA97F },
         { 0xAC00, 0xD7A3 },
         { 0xF900, 0xFAFF },
         { 0xFE10, 0xFE19 },
         { 0xFE30, 0xFE6F },
-       { 0xFF00, 0xFF60 },
+       { 0xFF01, 0xFF60 },
         { 0xFFE0, 0xFFE6 }
  };
  
diff --git a/src/funcs.c b/src/funcs.c

index 930462d..38e66f8 100644 (file)
--- a/src/funcs.c
+++ b/src/funcs.c
@@ -38,7 +38,7 @@
  #endif
  #endif
  
-__RCSID("$MirOS: src/bin/mksh/funcs.c,v 1.340 2017/04/12 17:46:29 tg Exp $");
+__RCSID("$MirOS: src/bin/mksh/funcs.c,v 1.350 2017/05/05 22:53:28 tg Exp $");
  
  #if HAVE_KILLPG
  /*
@@ -751,11 +751,15 @@ do_whence(const char **wp, int fcflags, bool vflag, bool iscommand)
  bool
  valid_alias_name(const char *cp)
  {
+       if (ord(*cp) == ord('-'))
+               return (false);
+       if (ord(cp[0]) == ord('[') && ord(cp[1]) == ord('[') && !cp[2])
+               return (false);
         while (*cp)
-               if (!ksh_isalias(*cp))
-                       return (false);
-               else
+               if (ctype(*cp, C_ALIAS))
                         ++cp;
+               else
+                       return (false);
         return (true);
  }
  
@@ -764,7 +768,7 @@ c_alias(const char **wp)
  {
         struct table *t = &aliases;
         int rv = 0, prefix = 0;
-       bool rflag = false, tflag, Uflag = false, pflag = false;
+       bool rflag = false, tflag, Uflag = false, pflag = false, chkalias;
         uint32_t xflag = 0;
         int optc;
  
@@ -809,12 +813,13 @@ c_alias(const char **wp)
         wp += builtin_opt.optind;
  
         if (!(builtin_opt.info & GI_MINUSMINUS) && *wp &&
-           (wp[0][0] == '-' || wp[0][0] == '+') && wp[0][1] == '\0') {
+           ctype(wp[0][0], C_MINUS | C_PLUS) && wp[0][1] == '\0') {
                 prefix = wp[0][0];
                 wp++;
         }
  
         tflag = t == &taliases;
+       chkalias = t == &aliases;
  
         /* "hash -r" means reset all the tracked aliases.. */
         if (rflag) {
@@ -857,7 +862,7 @@ c_alias(const char **wp)
                         strndupx(xalias, alias, val++ - alias, ATEMP);
                         alias = xalias;
                 }
-               if (!valid_alias_name(alias) || *alias == '-') {
+               if (chkalias && !valid_alias_name(alias)) {
                         bi_errorf(Tinvname, alias, Talias);
                         afree(xalias, ATEMP);
                         return (1);
@@ -1072,8 +1077,7 @@ c_kill(const char **wp)
         int i, n, rv, sig;
  
         /* assume old style options if -digits or -UPPERCASE */
-       if ((p = wp[1]) && *p == '-' && (ksh_isdigit(p[1]) ||
-           ksh_isupper(p[1]))) {
+       if ((p = wp[1]) && *p == '-' && ctype(p[1], C_DIGIT | C_UPPER)) {
                 if (!(t = gettrap(p + 1, false, false))) {
                         bi_errorf(Tbad_sig_s, p + 1);
                         return (1);
@@ -1422,9 +1426,9 @@ c_umask(const char **wp)
         } else {
                 mode_t new_umask;
  
-               if (ksh_isdigit(*cp)) {
+               if (ctype(*cp, C_DIGIT)) {
                         new_umask = 0;
-                       while (*cp >= ord('0') && *cp <= ord('7')) {
+                       while (ctype(*cp, C_OCTAL)) {
                                 new_umask = new_umask * 8 + ksh_numdig(*cp);
                                 ++cp;
                         }
@@ -1462,7 +1466,7 @@ c_umask(const char **wp)
                                 if (!positions)
                                         /* default is a */
                                         positions = 0111;
-                               if (!vstrchr("=+-", op = *cp))
+                               if (!ctype((op = *cp), C_EQUAL | C_MINUS | C_PLUS))
                                         break;
                                 cp++;
                                 new_val = 0;
@@ -1503,7 +1507,7 @@ c_umask(const char **wp)
                                 if (*cp == ',') {
                                         positions = 0;
                                         cp++;
-                               } else if (!vstrchr("=+-", *cp))
+                               } else if (!ctype(*cp, C_EQUAL | C_MINUS | C_PLUS))
                                         break;
                         }
                         if (*cp) {
@@ -1585,7 +1589,7 @@ c_wait(const char **wp)
         return (rv);
  }
  
-static char REPLY[] = "REPLY";
+static const char REPLY[] = "REPLY";
  int
  c_read(const char **wp)
  {
@@ -2300,8 +2304,9 @@ c_unset(const char **wp)
                         size_t n;
  
                         n = strlen(id);
-                       if (n > 3 && id[n-3] == '[' && id[n-2] == '*' &&
-                           id[n-1] == ']') {
+                       if (n > 3 && ord(id[n - 3]) == ord('[') &&
+                           ord(id[n - 2]) == ord('*') &&
+                           ord(id[n - 1]) == ord(']')) {
                                 strndupx(cp, id, n - 3, ATEMP);
                                 id = cp;
                                 optc = 3;
@@ -3350,7 +3355,7 @@ set_ulimit(const struct limits *l, const char *v, int how)
                  * If this causes problems, will have to add parameter to
                  * evaluate() to control if unset params are 0 or an error.
                  */
-               if (!rval && !ksh_isdigit(v[0])) {
+               if (!rval && !ctype(v[0], C_DIGIT)) {
                         bi_errorf("invalid %s limit: %s", l->name, v);
                         return (1);
                 }
diff --git a/src/histrap.c b/src/histrap.c

index 26dd521..6b9396e 100644 (file)
--- a/src/histrap.c
+++ b/src/histrap.c
@@ -3,7 +3,7 @@
  
  /*-
   * Copyright (c) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010,
- *              2011, 2012, 2014, 2015, 2016
+ *              2011, 2012, 2014, 2015, 2016, 2017
   *     mirabilos <m@mirbsd.org>
   *
   * Provided that these terms and disclaimer and all copyright notices
@@ -27,7 +27,7 @@
  #include <sys/file.h>
  #endif
  
-__RCSID("$MirOS: src/bin/mksh/histrap.c,v 1.160 2017/04/08 01:07:16 tg Exp $");
+__RCSID("$MirOS: src/bin/mksh/histrap.c,v 1.166 2017/08/07 23:25:09 tg Exp $");
  
  Trap sigtraps[ksh_NSIG + 1];
  static struct sigaction Sigact_ign;
@@ -629,7 +629,7 @@ histsave(int *lnp, const char *cmd, int svmode, bool ignoredups)
         if (svmode == HIST_FLUSH)
                 return;
  
-       ccp = cmd + strlen(cmd);
+       ccp = strnul(cmd);
         while (ccp > cmd && ccp[-1] == '\n')
                 --ccp;
         strndupx(c, cmd, ccp - cmd, APERM);
@@ -714,26 +714,66 @@ histsave(int *lnp, const char *cmd, int svmode, bool ignoredups)
  
  #if HAVE_PERSISTENT_HISTORY
  static const unsigned char sprinkle[2] = { HMAGIC1, HMAGIC2 };
-#endif
  
-void
-hist_init(Source *s)
+static int
+hist_persist_back(int srcfd)
  {
-#if HAVE_PERSISTENT_HISTORY
-       unsigned char *base;
-       int lines, fd;
-       enum { hist_init_first, hist_init_retry, hist_init_restore } hs;
-#endif
+       off_t tot, mis;
+       ssize_t n, w;
+       char *buf, *cp;
+       int rv = 0;
+#define MKSH_HS_BUFSIZ 4096
+
+       if ((tot = lseek(srcfd, (off_t)0, SEEK_END)) < 0 ||
+           lseek(srcfd, (off_t)0, SEEK_SET) < 0 ||
+           lseek(histfd, (off_t)0, SEEK_SET) < 0)
+               return (1);
  
-       histsave(NULL, NULL, HIST_DISCARD, true);
+       if ((buf = malloc_osfunc(MKSH_HS_BUFSIZ)) == NULL)
+               return (1);
  
-       if (Flag(FTALKING) == 0)
-               return;
+       mis = tot;
+       while (mis > 0) {
+               if ((n = blocking_read(srcfd, (cp = buf),
+                   MKSH_HS_BUFSIZ)) == -1) {
+                       if (errno == EINTR) {
+                               intrcheck();
+                               continue;
+                       }
+                       goto copy_error;
+               }
+               mis -= n;
+               while (n) {
+                       if (intrsig)
+                               goto has_intrsig;
+                       if ((w = write(histfd, cp, n)) != -1) {
+                               n -= w;
+                               cp += w;
+                               continue;
+                       }
+                       if (errno == EINTR) {
+ has_intrsig:
+                               intrcheck();
+                               continue;
+                       }
+                       goto copy_error;
+               }
+       }
+       if (ftruncate(histfd, tot)) {
+ copy_error:
+               rv = 1;
+       }
+       free_osfunc(buf);
+       return (rv);
+}
  
-       hstarted = true;
-       hist_source = s;
+static void
+hist_persist_init(void)
+{
+       unsigned char *base;
+       int lines, fd;
+       enum { hist_init_first, hist_init_retry, hist_use_it } hs;
  
-#if HAVE_PERSISTENT_HISTORY
         if (((hname = str_val(global("HISTFILE"))) == NULL) || !*hname) {
                 hname = NULL;
                 return;
@@ -745,17 +785,16 @@ hist_init(Source *s)
         /* we have a file and are interactive */
         if ((fd = binopen3(hname, O_RDWR | O_CREAT | O_APPEND, 0600)) < 0)
                 return;
-
-       histfd = savefd(fd);
+       if ((histfd = savefd(fd)) < 0)
+               return;
         if (histfd != fd)
                 close(fd);
  
         mksh_lockfd(histfd);
  
         histfsize = lseek(histfd, (off_t)0, SEEK_END);
-       if (histfsize > MKSH_MAXHISTFSIZE || hs == hist_init_restore) {
+       if (histfsize > MKSH_MAXHISTFSIZE) {
                 /* we ignore too large files but still append to them */
-               /* we also don't need to re-read after truncation */
                 goto hist_init_tail;
         } else if (histfsize > 2) {
                 /* we have some data, check its validity */
@@ -781,6 +820,7 @@ hist_init(Source *s)
                         if ((fd = binopen3(nhname, O_RDWR | O_CREAT | O_TRUNC |
                             O_EXCL, 0600)) < 0) {
                                 /* just don't truncate then, meh. */
+                               hs = hist_use_it;
                                 goto hist_trunc_dont;
                         }
                         if (fstat(histfd, &sb) >= 0 &&
@@ -795,28 +835,26 @@ hist_init(Source *s)
                         hp = history;
                         while (hp < histptr) {
                                 if (!writehistline(fd,
-                                   s->line - (histptr - hp), *hp))
+                                   hist_source->line - (histptr - hp), *hp))
                                         goto hist_trunc_abort;
                                 ++hp;
                         }
-                       /* now unlock, close both, rename, rinse, repeat */
+                       /* now transfer back */
+                       if (!hist_persist_back(fd)) {
+                               /* success! */
+                               hs = hist_use_it;
+                       }
+ hist_trunc_abort:
+                       /* remove temporary file */
                         close(fd);
                         fd = -1;
-                       hist_finish();
-                       if (rename(nhname, hname) < 0) {
- hist_trunc_abort:
-                               if (fd != -1)
-                                       close(fd);
-                               unlink(nhname);
-                               if (fd != -1)
-                                       goto hist_trunc_dont;
-                               /* darn! restore histfd and pray */
-                       }
-                       hs = hist_init_restore;
+                       unlink(nhname);
+                       /* use whatever is in the file now */
   hist_trunc_dont:
                         afree(nhname, ATEMP);
-                       if (hs == hist_init_restore)
-                               goto retry;
+                       if (hs == hist_use_it)
+                               goto hist_trunc_done;
+                       goto hist_init_fail;
                 }
         } else if (histfsize != 0) {
                 /* negative or too small... */
@@ -840,9 +878,26 @@ hist_init(Source *s)
                         return;
                 }
         }
+ hist_trunc_done:
         histfsize = lseek(histfd, (off_t)0, SEEK_END);
   hist_init_tail:
         mksh_unlkfd(histfd);
+}
+#endif
+
+void
+hist_init(Source *s)
+{
+       histsave(NULL, NULL, HIST_DISCARD, true);
+
+       if (Flag(FTALKING) == 0)
+               return;
+
+       hstarted = true;
+       hist_source = s;
+
+#if HAVE_PERSISTENT_HISTORY
+       hist_persist_init();
  #endif
  }
  
@@ -909,10 +964,11 @@ writehistfile(int lno, const char *cmd)
         mksh_lockfd(histfd);
         sizenow = lseek(histfd, (off_t)0, SEEK_END);
         if (sizenow < histfsize) {
-               /* the file has shrunk; give up */
-               goto bad;
-       }
-       if (
+               /* the file has shrunk; trust it just appending the new data */
+               /* well, for now, anyway… since mksh strdups all into memory */
+               /* we can use a nicer approach some time later… */
+               ;
+       } else if (
                 /* ignore changes when the file is too large */
                 sizenow <= MKSH_MAXHISTFSIZE
             &&
@@ -1114,7 +1170,7 @@ gettrap(const char *cs, bool igncase, bool allsigs)
  
         /* signal number (1..ksh_NSIG) or 0? */
  
-       if (ksh_isdigit(*cs))
+       if (ctype(*cs, C_DIGIT))
                 return ((getn(cs, &i) && 0 <= i && i < ksh_NSIG) ?
                     (&sigtraps[i]) : NULL);
  
diff --git a/src/jobs.c b/src/jobs.c

index 0366004..4df98b7 100644 (file)
--- a/src/jobs.c
+++ b/src/jobs.c
@@ -23,7 +23,7 @@
  
  #include "sh.h"
  
-__RCSID("$MirOS: src/bin/mksh/jobs.c,v 1.121 2016/07/25 00:04:44 tg Exp $");
+__RCSID("$MirOS: src/bin/mksh/jobs.c,v 1.124 2017/08/08 14:30:10 tg Exp $");
  
  #if HAVE_KILLPG
  #define mksh_killpg            killpg
@@ -39,14 +39,27 @@ __RCSID("$MirOS: src/bin/mksh/jobs.c,v 1.121 2016/07/25 00:04:44 tg Exp $");
  #define PSTOPPED       3
  
  typedef struct proc Proc;
+/* to take alignment into consideration */
+struct proc_dummy {
+       Proc *next;
+       pid_t pid;
+       int state;
+       int status;
+       char command[128];
+};
+/* real structure */
  struct proc {
-       Proc *next;             /* next process in pipeline (if any) */
-       pid_t pid;              /* process id */
+       /* next process in pipeline (if any) */
+       Proc *next;
+       /* process id of this Unix process in the job */
+       pid_t pid;
+       /* one of the four P… above */
         int state;
-       int status;             /* wait status */
+       /* wait status */
+       int status;
         /* process command string from vistree */
-       char command[256 - (ALLOC_OVERHEAD + sizeof(Proc *) +
-           sizeof(pid_t) + 2 * sizeof(int))];
+       char command[256 - (ALLOC_OVERHEAD +
+           offsetof(struct proc_dummy, command[0]))];
  };
  
  /* Notify/print flag - j_print() argument */
@@ -1009,8 +1022,14 @@ j_notify(void)
         }
         for (j = job_list; j; j = tmp) {
                 tmp = j->next;
-               if (j->flags & JF_REMOVE)
-                       remove_job(j, "notify");
+               if (j->flags & JF_REMOVE) {
+                       if (j == async_job || (j->flags & JF_KNOWN)) {
+                               j->flags = (j->flags & ~JF_REMOVE) | JF_ZOMBIE;
+                               j->job = -1;
+                               nzombie++;
+                       } else
+                               remove_job(j, "notify");
+               }
         }
         shf_flush(shl_out);
  #ifndef MKSH_NOPROSPECTOFWORK
@@ -1651,7 +1670,7 @@ j_lookup(const char *cp, int *ecodep)
         size_t len;
         int job = 0;
  
-       if (ksh_isdigit(*cp) && getn(cp, &job)) {
+       if (ctype(*cp, C_DIGIT) && getn(cp, &job)) {
                 /* Look for last_proc->pid (what $! returns) first... */
                 for (j = job_list; j != NULL; j = j->next)
                         if (j->last_proc && j->last_proc->pid == job)
diff --git a/src/lex.c b/src/lex.c

index 78c2ee7..f450221 100644 (file)
--- a/src/lex.c
+++ b/src/lex.c
@@ -23,7 +23,7 @@
  
  #include "sh.h"
  
-__RCSID("$MirOS: src/bin/mksh/lex.c,v 1.234 2017/04/06 01:59:55 tg Exp $");
+__RCSID("$MirOS: src/bin/mksh/lex.c,v 1.239 2017/05/05 22:53:29 tg Exp $");
  
  /*
   * states while lexing word
@@ -131,7 +131,7 @@ getsc_i(void)
  }
  
  #if defined(MKSH_SMALL) && !defined(MKSH_SMALL_BUT_FAST)
-#define getsc getsc_i
+#define getsc()                ord(getsc_i())
  #else
  static int getsc_r(int);
  
@@ -141,7 +141,7 @@ getsc_r(int c)
         o_getsc_r(c);
  }
  
-#define getsc()                getsc_r(o_getsc())
+#define getsc()                ord(getsc_r(o_getsc()))
  #endif
  
  #define STATE_BSIZE    8
@@ -220,11 +220,11 @@ yylex(int cf)
         } else {
                 /* normal lexing */
                 state = (cf & HEREDELIM) ? SHEREDELIM : SBASE;
-               while ((c = getsc()) == ' ' || c == '\t')
+               while (ctype((c = getsc()), C_BLANK))
                         ;
                 if (c == '#') {
                         ignore_backslash_newline++;
-                       while ((c = getsc()) != '\0' && c != '\n')
+                       while (!ctype((c = getsc()), C_NUL | C_LF))
                                 ;
                         ignore_backslash_newline--;
                 }
@@ -245,30 +245,30 @@ yylex(int cf)
         while (!((c = getsc()) == 0 ||
             ((state == SBASE || state == SHEREDELIM) && ctype(c, C_LEX1)))) {
                 if (state == SBASE &&
-                   subshell_nesting_type == /*{*/ '}' &&
-                   c == /*{*/ '}')
+                   subshell_nesting_type == ord(/*{*/ '}') &&
+                   c == ord(/*{*/ '}'))
                         /* possibly end ${ :;} */
                         break;
                 Xcheck(ws, wp);
                 switch (state) {
                 case SADELIM:
-                       if (c == '(')
+                       if (c == ord('('))
                                 statep->nparen++;
-                       else if (c == ')')
+                       else if (c == ord(')'))
                                 statep->nparen--;
-                       else if (statep->nparen == 0 && (c == /*{*/ '}' ||
+                       else if (statep->nparen == 0 && (c == ord(/*{*/ '}') ||
                             c == (int)statep->ls_adelim.delimiter)) {
                                 *wp++ = ADELIM;
                                 *wp++ = c;
-                               if (c == /*{*/ '}' || --statep->ls_adelim.num == 0)
+                               if (c == ord(/*{*/ '}') || --statep->ls_adelim.num == 0)
                                         POP_STATE();
-                               if (c == /*{*/ '}')
+                               if (c == ord(/*{*/ '}'))
                                         POP_STATE();
                                 break;
                         }
                         /* FALLTHROUGH */
                 case SBASE:
-                       if (c == '[' && (cf & CMDASN)) {
+                       if (c == ord('[') && (cf & CMDASN)) {
                                 /* temporary */
                                 *wp = EOS;
                                 if (is_wdvarname(Xstring(ws, wp), false)) {
@@ -301,10 +301,9 @@ yylex(int cf)
                         }
                         /* FALLTHROUGH */
   Sbase1:               /* includes *(...|...) pattern (*+?@!) */
-                       if (c == '*' || c == '@' || c == '+' || c == '?' ||
-                           c == '!') {
+                       if (ctype(c, C_PATMO)) {
                                 c2 = getsc();
-                               if (c2 == '(' /*)*/ ) {
+                               if (c2 == ord('(' /*)*/)) {
                                         *wp++ = OPAT;
                                         *wp++ = c;
                                         PUSH_STATE(SPATTERN);
@@ -315,7 +314,7 @@ yylex(int cf)
                         /* FALLTHROUGH */
   Sbase2:               /* doesn't include *(...|...) pattern (*+?@!) */
                         switch (c) {
-                       case '\\':
+                       case ord('\\'):
   getsc_qchar:
                                 if ((c = getsc())) {
                                         /* trailing \ is lost */
@@ -323,7 +322,7 @@ yylex(int cf)
                                         *wp++ = c;
                                 }
                                 break;
-                       case '\'':
+                       case ord('\''):
   open_ssquote_unless_heredoc:
                                 if ((cf & HEREDOC))
                                         goto store_char;
@@ -331,12 +330,12 @@ yylex(int cf)
                                 ignore_backslash_newline++;
                                 PUSH_STATE(SSQUOTE);
                                 break;
-                       case '"':
+                       case ord('"'):
   open_sdquote:
                                 *wp++ = OQUOTE;
                                 PUSH_STATE(SDQUOTE);
                                 break;
-                       case '$':
+                       case ord('$'):
                                 /*
                                  * processing of dollar sign belongs into
                                  * Subst, except for those which can open
@@ -345,9 +344,9 @@ yylex(int cf)
   subst_dollar_ex:
                                 c = getsc();
                                 switch (c) {
-                               case '"':
+                               case ord('"'):
                                         goto open_sdquote;
-                               case '\'':
+                               case ord('\''):
                                         goto open_sequote;
                                 default:
                                         goto SubstS;
@@ -359,15 +358,16 @@ yylex(int cf)
  
   Subst:
                         switch (c) {
-                       case '\\':
+                       case ord('\\'):
                                 c = getsc();
                                 switch (c) {
-                               case '"':
+                               case ord('"'):
                                         if ((cf & HEREDOC))
                                                 goto heredocquote;
                                         /* FALLTHROUGH */
-                               case '\\':
-                               case '$': case '`':
+                               case ord('\\'):
+                               case ord('$'):
+                               case ord('`'):
   store_qchar:
                                         *wp++ = QCHAR;
                                         *wp++ = c;
@@ -385,12 +385,12 @@ yylex(int cf)
                                         break;
                                 }
                                 break;
-                       case '$':
+                       case ord('$'):
                                 c = getsc();
   SubstS:
-                               if (c == '(') /*)*/ {
+                               if (c == ord('(' /*)*/)) {
                                         c = getsc();
-                                       if (c == '(') /*)*/ {
+                                       if (c == ord('(' /*)*/)) {
                                                 *wp++ = EXPRSUB;
                                                 PUSH_SRETRACE(SASPAREN);
                                                 statep->nparen = 2;
@@ -407,8 +407,8 @@ yylex(int cf)
                                                 memcpy(wp, sp, cz);
                                                 wp += cz;
                                         }
-                               } else if (c == '{') /*}*/ {
-                                       if ((c = getsc()) == '|') {
+                               } else if (c == ord('{' /*}*/)) {
+                                       if ((c = getsc()) == ord('|')) {
                                                 /*
                                                  * non-subenvironment
                                                  * value substitution
@@ -425,15 +425,15 @@ yylex(int cf)
                                         }
                                         ungetsc(c);
                                         *wp++ = OSUBST;
-                                       *wp++ = '{'; /*}*/
+                                       *wp++ = '{' /*}*/;
                                         wp = get_brace_var(&ws, wp);
                                         c = getsc();
                                         /* allow :# and :% (ksh88 compat) */
-                                       if (c == ':') {
+                                       if (c == ord(':')) {
                                                 *wp++ = CHAR;
                                                 *wp++ = c;
                                                 c = getsc();
-                                               if (c == ':') {
+                                               if (c == ord(':')) {
                                                         *wp++ = CHAR;
                                                         *wp++ = '0';
                                                         *wp++ = ADELIM;
@@ -444,10 +444,9 @@ yylex(int cf)
                                                         statep->ls_adelim.num = 1;
                                                         statep->nparen = 0;
                                                         break;
-                                               } else if (ksh_isdigit(c) ||
-                                                   c == '('/*)*/ || c == ' ' ||
+                                               } else if (ctype(c, C_DIGIT | C_DOLAR | C_SPC) ||
                                                     /*XXX what else? */
-                                                   c == '$') {
+                                                   c == '(' /*)*/) {
                                                         /* substring subst. */
                                                         if (c != ' ') {
                                                                 *wp++ = CHAR;
@@ -466,7 +465,7 @@ yylex(int cf)
   parse_adelim_slash:
                                                 *wp++ = CHAR;
                                                 *wp++ = c;
-                                               if ((c = getsc()) == '/') {
+                                               if ((c = getsc()) == ord('/')) {
                                                         *wp++ = c2;
                                                         *wp++ = c;
                                                 } else
@@ -480,7 +479,7 @@ yylex(int cf)
                                         } else if (c == '@') {
                                                 c2 = getsc();
                                                 ungetsc(c2);
-                                               if (c2 == '/') {
+                                               if (c2 == ord('/')) {
                                                         c2 = CHAR;
                                                         goto parse_adelim_slash;
                                                 }
@@ -489,7 +488,7 @@ yylex(int cf)
                                          * If this is a trim operation,
                                          * treat (,|,) specially in STBRACE.
                                          */
-                                       if (ksh_issubop2(c)) {
+                                       if (ctype(c, C_SUB2)) {
                                                 ungetsc(c);
                                                 if (Flag(FSH))
                                                         PUSH_STATE(STBRACEBOURNE);
@@ -503,14 +502,14 @@ yylex(int cf)
                                                 else
                                                         PUSH_STATE(SBRACE);
                                         }
-                               } else if (ksh_isalphx(c)) {
+                               } else if (ctype(c, C_ALPHX)) {
                                         *wp++ = OSUBST;
                                         *wp++ = 'X';
                                         do {
                                                 Xcheck(ws, wp);
                                                 *wp++ = c;
                                                 c = getsc();
-                                       } while (ksh_isalnux(c));
+                                       } while (ctype(c, C_ALNUX));
                                         *wp++ = '\0';
                                         *wp++ = CSUBST;
                                         *wp++ = 'X';
@@ -529,7 +528,7 @@ yylex(int cf)
                                         ungetsc(c);
                                 }
                                 break;
-                       case '`':
+                       case ord('`'):
   subst_gravis:
                                 PUSH_STATE(SBQUOTE);
                                 *wp++ = COMASUB;
@@ -573,11 +572,11 @@ yylex(int cf)
                         break;
  
                 case SEQUOTE:
-                       if (c == '\'') {
+                       if (c == ord('\'')) {
                                 POP_STATE();
                                 *wp++ = CQUOTE;
                                 ignore_backslash_newline--;
-                       } else if (c == '\\') {
+                       } else if (c == ord('\\')) {
                                 if ((c2 = unbksl(true, getsc_i, ungetsc)) == -1)
                                         c2 = getsc();
                                 if (c2 == 0)
@@ -605,7 +604,7 @@ yylex(int cf)
                         break;
  
                 case SSQUOTE:
-                       if (c == '\'') {
+                       if (c == ord('\'')) {
                                 POP_STATE();
                                 if ((cf & HEREDOC) || state == SQBRACE)
                                         goto store_char;
@@ -618,7 +617,7 @@ yylex(int cf)
                         break;
  
                 case SDQUOTE:
-                       if (c == '"') {
+                       if (c == ord('"')) {
                                 POP_STATE();
                                 *wp++ = CQUOTE;
                         } else
@@ -627,15 +626,15 @@ yylex(int cf)
  
                 /* $(( ... )) */
                 case SASPAREN:
-                       if (c == '(')
+                       if (c == ord('('))
                                 statep->nparen++;
-                       else if (c == ')') {
+                       else if (c == ord(')')) {
                                 statep->nparen--;
                                 if (statep->nparen == 1) {
                                         /* end of EXPRSUB */
                                         POP_SRETRACE();
  
-                                       if ((c2 = getsc()) == /*(*/ ')') {
+                                       if ((c2 = getsc()) == ord(/*(*/ ')')) {
                                                 cz = strlen(sp) - 2;
                                                 XcheckN(ws, wp, cz);
                                                 memcpy(wp, sp + 1, cz);
@@ -667,7 +666,7 @@ yylex(int cf)
                         goto Sbase2;
  
                 case SQBRACE:
-                       if (c == '\\') {
+                       if (c == ord('\\')) {
                                 /*
                                  * perform POSIX "quote removal" if the back-
                                  * slash is "special", i.e. same cases as the
@@ -676,26 +675,26 @@ yylex(int cf)
                                  * write QCHAR+c, otherwise CHAR+\+CHAR+c are
                                  * emitted (in heredocquote:)
                                  */
-                               if ((c = getsc()) == '"' || c == '\\' ||
-                                   c == '$' || c == '`' || c == /*{*/'}')
+                               if ((c = getsc()) == ord('"') || c == ord('\\') ||
+                                   ctype(c, C_DOLAR | C_GRAVE) || c == ord(/*{*/ '}'))
                                         goto store_qchar;
                                 goto heredocquote;
                         }
                         goto common_SQBRACE;
  
                 case SBRACE:
-                       if (c == '\'')
+                       if (c == ord('\''))
                                 goto open_ssquote_unless_heredoc;
-                       else if (c == '\\')
+                       else if (c == ord('\\'))
                                 goto getsc_qchar;
   common_SQBRACE:
-                       if (c == '"')
+                       if (c == ord('"'))
                                 goto open_sdquote;
-                       else if (c == '$')
+                       else if (c == ord('$'))
                                 goto subst_dollar_ex;
-                       else if (c == '`')
+                       else if (c == ord('`'))
                                 goto subst_gravis;
-                       else if (c != /*{*/ '}')
+                       else if (c != ord(/*{*/ '}'))
                                 goto store_char;
                         POP_STATE();
                         *wp++ = CSUBST;
@@ -704,16 +703,16 @@ yylex(int cf)
  
                 /* Same as SBASE, except (,|,) treated specially */
                 case STBRACEKORN:
-                       if (c == '|')
+                       if (c == ord('|'))
                                 *wp++ = SPAT;
-                       else if (c == '(') {
+                       else if (c == ord('(')) {
                                 *wp++ = OPAT;
                                 /* simile for @ */
                                 *wp++ = ' ';
                                 PUSH_STATE(SPATTERN);
                         } else /* FALLTHROUGH */
                 case STBRACEBOURNE:
-                         if (c == /*{*/ '}') {
+                         if (c == ord(/*{*/ '}')) {
                                 POP_STATE();
                                 *wp++ = CSUBST;
                                 *wp++ = /*{*/ '}';
@@ -722,20 +721,20 @@ yylex(int cf)
                         break;
  
                 case SBQUOTE:
-                       if (c == '`') {
+                       if (c == ord('`')) {
                                 *wp++ = 0;
                                 POP_STATE();
-                       } else if (c == '\\') {
+                       } else if (c == ord('\\')) {
                                 switch (c = getsc()) {
                                 case 0:
                                         /* trailing \ is lost */
                                         break;
-                               case '$':
-                               case '`':
-                               case '\\':
+                               case ord('$'):
+                               case ord('`'):
+                               case ord('\\'):
                                         *wp++ = c;
                                         break;
-                               case '"':
+                               case ord('"'):
                                         if (statep->ls_bool) {
                                                 *wp++ = c;
                                                 break;
@@ -756,10 +755,10 @@ yylex(int cf)
  
                 /* LETEXPR: (( ... )) */
                 case SLETPAREN:
-                       if (c == /*(*/ ')') {
+                       if (c == ord(/*(*/ ')')) {
                                 if (statep->nparen > 0)
                                         --statep->nparen;
-                               else if ((c2 = getsc()) == /*(*/ ')') {
+                               else if ((c2 = getsc()) == ord(/*(*/ ')')) {
                                         c = 0;
                                         *wp++ = CQUOTE;
                                         goto Done;
@@ -780,10 +779,10 @@ yylex(int cf)
                                         s->start = s->str = s->u.freeme = dp;
                                         s->next = source;
                                         source = s;
-                                       ungetsc('('/*)*/);
-                                       return ('('/*)*/);
+                                       ungetsc('(' /*)*/);
+                                       return (ord('(' /*)*/));
                                 }
-                       } else if (c == '(')
+                       } else if (c == ord('('))
                                 /*
                                  * parentheses inside quotes and
                                  * backslashes are lost, but AT&T ksh
@@ -799,26 +798,26 @@ yylex(int cf)
                          * $ and `...` are not to be treated specially
                          */
                         switch (c) {
-                       case '\\':
+                       case ord('\\'):
                                 if ((c = getsc())) {
                                         /* trailing \ is lost */
                                         *wp++ = QCHAR;
                                         *wp++ = c;
                                 }
                                 break;
-                       case '\'':
+                       case ord('\''):
                                 goto open_ssquote_unless_heredoc;
-                       case '$':
-                               if ((c2 = getsc()) == '\'') {
+                       case ord('$'):
+                               if ((c2 = getsc()) == ord('\'')) {
   open_sequote:
                                         *wp++ = OQUOTE;
                                         ignore_backslash_newline++;
                                         PUSH_STATE(SEQUOTE);
                                         statep->ls_bool = false;
                                         break;
-                               } else if (c2 == '"') {
+                               } else if (c2 == ord('"')) {
                                         /* FALLTHROUGH */
-                       case '"':
+                       case ord('"'):
                                         PUSH_SRETRACE(SHEREDQUOTE);
                                         break;
                                 }
@@ -832,7 +831,7 @@ yylex(int cf)
  
                 /* " in << or <<- delimiter */
                 case SHEREDQUOTE:
-                       if (c != '"')
+                       if (c != ord('"'))
                                 goto Subst;
                         POP_SRETRACE();
                         dp = strnul(sp) - 1;
@@ -845,10 +844,10 @@ yylex(int cf)
                         while ((c = *dp++)) {
                                 if (c == '\\') {
                                         switch ((c = *dp++)) {
-                                       case '\\':
-                                       case '"':
-                                       case '$':
-                                       case '`':
+                                       case ord('\\'):
+                                       case ord('"'):
+                                       case ord('$'):
+                                       case ord('`'):
                                                 break;
                                         default:
                                                 *wp++ = CHAR;
@@ -866,12 +865,12 @@ yylex(int cf)
  
                 /* in *(...|...) pattern (*+?@!) */
                 case SPATTERN:
-                       if (c == /*(*/ ')') {
+                       if (c == ord(/*(*/ ')')) {
                                 *wp++ = CPAT;
                                 POP_STATE();
-                       } else if (c == '|') {
+                       } else if (c == ord('|')) {
                                 *wp++ = SPAT;
-                       } else if (c == '(') {
+                       } else if (c == ord('(')) {
                                 *wp++ = OPAT;
                                 /* simile for @ */
                                 *wp++ = ' ';
@@ -894,14 +893,14 @@ yylex(int cf)
         dp = Xstring(ws, wp);
         if (state == SBASE && (
             (c == '&' && !Flag(FSH) && !Flag(FPOSIX)) ||
-           c == '<' || c == '>') && ((c2 = Xlength(ws, wp)) == 0 ||
-           (c2 == 2 && dp[0] == CHAR && ksh_isdigit(dp[1])))) {
+           ctype(c, C_ANGLE)) && ((c2 = Xlength(ws, wp)) == 0 ||
+           (c2 == 2 && dp[0] == CHAR && ctype(dp[1], C_DIGIT)))) {
                 struct ioword *iop = alloc(sizeof(struct ioword), ATEMP);
  
                 iop->unit = c2 == 2 ? ksh_numdig(dp[1]) : c == '<' ? 0 : 1;
  
                 if (c == '&') {
-                       if ((c2 = getsc()) != '>') {
+                       if ((c2 = getsc()) != ord('>')) {
                                 ungetsc(c2);
                                 goto no_iop;
                         }
@@ -912,22 +911,22 @@ yylex(int cf)
  
                 c2 = getsc();
                 /* <<, >>, <> are ok, >< is not */
-               if (c == c2 || (c == '<' && c2 == '>')) {
+               if (c == c2 || (c == ord('<') && c2 == ord('>'))) {
                         iop->ioflag |= c == c2 ?
-                           (c == '>' ? IOCAT : IOHERE) : IORDWR;
+                           (c == ord('>') ? IOCAT : IOHERE) : IORDWR;
                         if (iop->ioflag == IOHERE) {
-                               if ((c2 = getsc()) == '-')
+                               if ((c2 = getsc()) == ord('-'))
                                         iop->ioflag |= IOSKIP;
-                               else if (c2 == '<')
+                               else if (c2 == ord('<'))
                                         iop->ioflag |= IOHERESTR;
                                 else
                                         ungetsc(c2);
                         }
-               } else if (c2 == '&')
-                       iop->ioflag |= IODUP | (c == '<' ? IORDUP : 0);
+               } else if (c2 == ord('&'))
+                       iop->ioflag |= IODUP | (c == ord('<') ? IORDUP : 0);
                 else {
-                       iop->ioflag |= c == '>' ? IOWRITE : IOREAD;
-                       if (c == '>' && c2 == '|')
+                       iop->ioflag |= c == ord('>') ? IOWRITE : IOREAD;
+                       if (c == ord('>') && c2 == ord('|'))
                                 iop->ioflag |= IOCLOB;
                         else
                                 ungetsc(c2);
@@ -948,29 +947,30 @@ yylex(int cf)
                 /* free word */
                 Xfree(ws, wp);
                 /* no word, process LEX1 character */
-               if ((c == '|') || (c == '&') || (c == ';') || (c == '('/*)*/)) {
+               if ((c == ord('|')) || (c == ord('&')) || (c == ord(';')) ||
+                   (c == ord('(' /*)*/))) {
                         if ((c2 = getsc()) == c)
-                               c = (c == ';') ? BREAK :
-                                   (c == '|') ? LOGOR :
-                                   (c == '&') ? LOGAND :
-                                   /* c == '(' ) */ MDPAREN;
-                       else if (c == '|' && c2 == '&')
+                               c = (c == ord(';')) ? BREAK :
+                                   (c == ord('|')) ? LOGOR :
+                                   (c == ord('&')) ? LOGAND :
+                                   /* c == ord('(' )) */ MDPAREN;
+                       else if (c == ord('|') && c2 == ord('&'))
                                 c = COPROC;
-                       else if (c == ';' && c2 == '|')
+                       else if (c == ord(';') && c2 == ord('|'))
                                 c = BRKEV;
-                       else if (c == ';' && c2 == '&')
+                       else if (c == ord(';') && c2 == ord('&'))
                                 c = BRKFT;
                         else
                                 ungetsc(c2);
  #ifndef MKSH_SMALL
                         if (c == BREAK) {
-                               if ((c2 = getsc()) == '&')
+                               if ((c2 = getsc()) == ord('&'))
                                         c = BRKEV;
                                 else
                                         ungetsc(c2);
                         }
  #endif
-               } else if (c == '\n') {
+               } else if (c == ord('\n')) {
                         if (cf & HEREDELIM)
                                 ungetsc(c);
                         else {
@@ -1025,7 +1025,7 @@ yylex(int cf)
  
                 if ((cf & KEYWORD) && (p = ktsearch(&keywords, ident, h)) &&
                     (!(cf & ESACONLY) || p->val.i == ESAC ||
-                   p->val.i == /*{*/ '}')) {
+                   p->val.i == ord(/*{*/ '}'))) {
                         afree(yylval.cp, ATEMP);
                         return (p->val.i);
                 }
@@ -1038,7 +1038,7 @@ yylex(int cf)
                         const char *cp = source->str;
  
                         /* prefer POSIX but not Korn functions over aliases */
-                       while (*cp == ' ' || *cp == '\t')
+                       while (ctype(*cp, C_BLANK))
                                 /*
                                  * this is like getsc() without skipping
                                  * over Source boundaries (including not
@@ -1136,7 +1136,7 @@ readhere(struct ioword *iop)
         if (!*eofp) {
                 /* end of here document marker, what to do? */
                 switch (c) {
-               case /*(*/ ')':
+               case ord(/*(*/ ')'):
                         if (!subshell_nesting_type)
                                 /*-
                                  * not allowed outside $(...) or (...)
@@ -1151,7 +1151,7 @@ readhere(struct ioword *iop)
                          * Allow EOF here to commands without trailing
                          * newlines (mksh -c '...') will work as well.
                          */
-               case '\n':
+               case ord('\n'):
                         /* Newline terminates here document marker */
                         goto heredoc_found_terminator;
                 }
@@ -1233,7 +1233,7 @@ getsc_uu(void)
         Source *s = source;
         int c;
  
-       while ((c = *s->str++) == 0) {
+       while ((c = ord(*s->str++)) == 0) {
                 /* return 0 for EOF by default */
                 s->str = NULL;
                 switch (s->type) {
@@ -1275,7 +1275,7 @@ getsc_uu(void)
                                 source->flags |= s->flags & SF_ALIAS;
                                 s = source;
                         } else if (*s->u.tblp->val.s &&
-                           (c = strnul(s->u.tblp->val.s)[-1], ksh_isspace(c))) {
+                           ctype((c = strnul(s->u.tblp->val.s)[-1]), C_SPACE)) {
                                 /* pop source stack */
                                 source = s = s->next;
                                 /*
@@ -1435,7 +1435,7 @@ getsc_line(Source *s)
         } else if (interactive && cur_prompt == PS1) {
   check_for_sole_return:
                 cp = Xstring(s->xs, xp);
-               while (*cp && ctype(*cp, C_IFSWS))
+               while (ctype(*cp, C_IFSWS))
                         ++cp;
                 if (!*cp) {
                         histsave(&s->line, NULL, HIST_FLUSH, true);
@@ -1528,7 +1528,7 @@ pprompt(const char *cp, int ntruncate)
         for (; *cp; cp++) {
                 if (indelimit && *cp != delimiter)
                         ;
-               else if (*cp == '\n' || *cp == '\r') {
+               else if (ctype(*cp, C_CR | C_LF)) {
                         lines += columns / x_cols + ((*cp == '\n') ? 1 : 0);
                         columns = 0;
                 } else if (*cp == '\t') {
@@ -1538,7 +1538,7 @@ pprompt(const char *cp, int ntruncate)
                                 columns--;
                 } else if (*cp == delimiter)
                         indelimit = !indelimit;
-               else if (UTFMODE && ((unsigned char)*cp > 0x7F)) {
+               else if (UTFMODE && (rtt2asc(*cp) > 0x7F)) {
                         const char *cp2;
                         columns += utf_widthadj(cp, &cp2);
                         if (doprint && (indelimit ||
@@ -1580,39 +1580,39 @@ get_brace_var(XString *wsp, char *wp)
  
                                 c2 = getsc();
                                 ungetsc(c2);
-                               if (c2 != /*{*/ '}') {
+                               if (ord(c2) != ord(/*{*/ '}')) {
                                         ungetsc(c);
                                         goto out;
                                 }
                         }
                         goto ps_common;
                 case PS_SAW_BANG:
-                       switch (c) {
-                       case '@':
-                       case '#':
-                       case '-':
-                       case '?':
+                       switch (ord(c)) {
+                       case ord('@'):
+                       case ord('#'):
+                       case ord('-'):
+                       case ord('?'):
                                 goto out;
                         }
                         goto ps_common;
                 case PS_INITIAL:
-                       switch (c) {
-                       case '%':
+                       switch (ord(c)) {
+                       case ord('%'):
                                 state = PS_SAW_PERCENT;
                                 goto next;
-                       case '#':
+                       case ord('#'):
                                 state = PS_SAW_HASH;
                                 goto next;
-                       case '!':
+                       case ord('!'):
                                 state = PS_SAW_BANG;
                                 goto next;
                         }
                         /* FALLTHROUGH */
                 case PS_SAW_PERCENT:
   ps_common:
-                       if (ksh_isalphx(c))
+                       if (ctype(c, C_ALPHX))
                                 state = PS_IDENT;
-                       else if (ksh_isdigit(c))
+                       else if (ctype(c, C_DIGIT))
                                 state = PS_NUMBER;
                         else if (ctype(c, C_VAR1))
                                 state = PS_VAR1;
@@ -1620,14 +1620,15 @@ get_brace_var(XString *wsp, char *wp)
                                 goto out;
                         break;
                 case PS_IDENT:
-                       if (!ksh_isalnux(c)) {
-                               if (c == '[') {
+                       if (!ctype(c, C_ALNUX)) {
+                               if (ord(c) == ord('[')) {
                                         char *tmp, *p;
  
                                         if (!arraysub(&tmp))
                                                 yyerror("missing ]");
                                         *wp++ = c;
-                                       for (p = tmp; *p; ) {
+                                       p = tmp;
+                                       while (*p) {
                                                 Xcheck(*wsp, wp);
                                                 *wp++ = *p++;
                                         }
@@ -1640,7 +1641,7 @@ get_brace_var(XString *wsp, char *wp)
   next:
                         break;
                 case PS_NUMBER:
-                       if (!ksh_isdigit(c))
+                       if (!ctype(c, C_DIGIT))
                                 goto out;
                         break;
                 case PS_VAR1:
@@ -1675,9 +1676,9 @@ arraysub(char **strp)
                 c = getsc();
                 Xcheck(ws, wp);
                 *wp++ = c;
-               if (c == '[')
+               if (ord(c) == ord('['))
                         depth++;
-               else if (c == ']')
+               else if (ord(c) == ord(']'))
                         depth--;
         } while (depth > 0 && c && c != '\n');
  
@@ -1756,19 +1757,19 @@ yyskiputf8bom(void)
  {
         int c;
  
-       if ((unsigned char)(c = o_getsc_u()) != 0xEF) {
+       if (rtt2asc((c = o_getsc_u())) != 0xEF) {
                 ungetsc_i(c);
                 return;
         }
-       if ((unsigned char)(c = o_getsc_u()) != 0xBB) {
+       if (rtt2asc((c = o_getsc_u())) != 0xBB) {
                 ungetsc_i(c);
-               ungetsc_i(0xEF);
+               ungetsc_i(asc2rtt(0xEF));
                 return;
         }
-       if ((unsigned char)(c = o_getsc_u()) != 0xBF) {
+       if (rtt2asc((c = o_getsc_u())) != 0xBF) {
                 ungetsc_i(c);
-               ungetsc_i(0xBB);
-               ungetsc_i(0xEF);
+               ungetsc_i(asc2rtt(0xBB));
+               ungetsc_i(asc2rtt(0xEF));
                 return;
         }
         UTFMODE |= 8;
diff --git a/src/main.c b/src/main.c

index 1286b07..b4d7244 100644 (file)
--- a/src/main.c
+++ b/src/main.c
@@ -34,7 +34,7 @@
  #include <locale.h>
  #endif
  
-__RCSID("$MirOS: src/bin/mksh/main.c,v 1.332 2017/04/12 16:01:45 tg Exp $");
+__RCSID("$MirOS: src/bin/mksh/main.c,v 1.342 2017/04/28 11:13:47 tg Exp $");
  
  extern char **environ;
  
@@ -236,6 +236,11 @@ main_init(int argc, const char *argv[], Source **sp, struct block **lp)
         ssize_t k;
  #endif
  
+#if defined(MKSH_EBCDIC) || defined(MKSH_FAUX_EBCDIC)
+       ebcdic_init();
+#endif
+       set_ifs(TC_IFSWS);
+
  #ifdef __OS2__
         for (i = 0; i < 3; ++i)
                 if (!isatty(i))
@@ -333,8 +338,6 @@ main_init(int argc, const char *argv[], Source **sp, struct block **lp)
  
         initvar();
  
-       initctypes();
-
         inittraps();
  
         coproc_init();
@@ -409,12 +412,12 @@ main_init(int argc, const char *argv[], Source **sp, struct block **lp)
  
         /* override default PATH regardless of environment */
  #ifdef MKSH_DEFPATH_OVERRIDE
-        vp = global(TPATH);
-        setstr(vp, MKSH_DEFPATH_OVERRIDE, KSH_RETURN_ERROR);
+       vp = global(TPATH);
+       setstr(vp, MKSH_DEFPATH_OVERRIDE, KSH_RETURN_ERROR);
  #endif
  
         /* for security */
-       typeset("IFS= \t\n", 0, 0, 0, 0);
+       typeset(TinitIFS, 0, 0, 0, 0);
  
         /* assign default shell variable values */
         typeset("PATHSEP=" MKSH_PATHSEPS, 0, 0, 0, 0);
@@ -497,7 +500,7 @@ main_init(int argc, const char *argv[], Source **sp, struct block **lp)
                 if (!(s->start = s->str = argv[argi++]))
                         errorf(Tf_optfoo, "", "", 'c', Treq_arg);
                 while (*s->str) {
-                       if (*s->str != ' ' && ctype(*s->str, C_QUOTE))
+                       if (ctype(*s->str, C_QUOTE))
                                 break;
                         s->str++;
                 }
@@ -1554,7 +1557,7 @@ check_fd(const char *name, int mode, const char **emsgp)
                 goto illegal_fd_name;
         if (name[0] == 'p')
                 return (coproc_getfd(mode, emsgp));
-       if (!ksh_isdigit(name[0])) {
+       if (!ctype(name[0], C_DIGIT)) {
   illegal_fd_name:
                 if (emsgp)
                         *emsgp = "illegal file descriptor name";
@@ -1893,7 +1896,7 @@ tnamecmp(const void *p1, const void *p2)
         const struct tbl *a = *((const struct tbl * const *)p1);
         const struct tbl *b = *((const struct tbl * const *)p2);
  
-       return (strcmp(a->name, b->name));
+       return (ascstrcmp(a->name, b->name));
  }
  
  struct tbl **
diff --git a/src/misc.c b/src/misc.c

index 6957c22..1205072 100644 (file)
--- a/src/misc.c
+++ b/src/misc.c
@@ -5,6 +5,8 @@
   * Copyright (c) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010,
   *              2011, 2012, 2013, 2014, 2015, 2016, 2017
   *     mirabilos <m@mirbsd.org>
+ * Copyright (c) 2015
+ *     Daniel Richard G. <skunk@iSKUNK.ORG>
   *
   * Provided that these terms and disclaimer and all copyright notices
   * are retained or reproduced in an accompanying document, permission
@@ -30,7 +32,7 @@
  #include <grp.h>
  #endif
  
-__RCSID("$MirOS: src/bin/mksh/misc.c,v 1.255 2017/04/12 16:46:22 tg Exp $");
+__RCSID("$MirOS: src/bin/mksh/misc.c,v 1.279 2017/08/07 21:39:25 tg Exp $");
  
  #define KSH_CHVT_FLAG
  #ifdef MKSH_SMALL
@@ -47,7 +49,8 @@ unsigned char chtypes[UCHAR_MAX + 1];
  static const unsigned char *pat_scan(const unsigned char *,
      const unsigned char *, bool) MKSH_A_PURE;
  static int do_gmatch(const unsigned char *, const unsigned char *,
-    const unsigned char *, const unsigned char *) MKSH_A_PURE;
+    const unsigned char *, const unsigned char *,
+    const unsigned char *) MKSH_A_PURE;
  static const unsigned char *gmatch_cclass(const unsigned char *, unsigned char)
      MKSH_A_PURE;
  #ifdef KSH_CHVT_CODE
@@ -68,37 +71,6 @@ static int make_path(const char *, const char *, char **, XString *, int *);
  #define DO_SETUID(func, argvec) func argvec
  #endif
  
-/*
- * Fast character classes
- */
-void
-setctypes(const char *s, int t)
-{
-       if (t & C_IFS) {
-               unsigned int i = 0;
-
-               while (++i <= UCHAR_MAX)
-                       chtypes[i] &= ~C_IFS;
-               /* include '\0' in C_IFS */
-               chtypes[0] |= C_IFS;
-       }
-       while (*s != 0)
-               chtypes[(unsigned char)*s++] |= t;
-}
-
-void
-initctypes(void)
-{
-       setctypes(letters_uc, C_ALPHX);
-       setctypes(letters_lc, C_ALPHX);
-       chtypes['_'] |= C_ALPHX;
-       setctypes("0123456789", C_DIGIT);
-       setctypes(TC_LEX1, C_LEX1);
-       setctypes("*@#!$-?", C_VAR1);
-       setctypes(TC_IFSWS, C_IFSWS);
-       setctypes("=-+?", C_SUBOP1);
-       setctypes("\t\n \"#$&'()*;<=>?[\\]`|", C_QUOTE);
-}
  
  /* called from XcheckN() to grow buffer */
  char *
@@ -147,7 +119,7 @@ option(const char *n)
  {
         size_t i = 0;
  
-       if ((n[0] == '-' || n[0] == '+') && n[1] && !n[2])
+       if (ctype(n[0], C_MINUS | C_PLUS) && n[1] && !n[2])
                 while (i < NELEM(options)) {
                         if (OFC(i) == n[1])
                                 return (i);
@@ -299,6 +271,11 @@ change_flag(enum sh_flag f, int what, bool newset)
         } else if ((f == FPOSIX || f == FSH) && newval) {
                 /* Turning on -o posix or -o sh? */
                 Flag(FBRACEEXPAND) = 0;
+               /* Turning on -o posix? */
+               if (f == FPOSIX) {
+                       /* C locale required for compliance */
+                       UTFMODE = 0;
+               }
         } else if (f == FTALKING) {
                 /* Changing interactive flag? */
                 if ((what == OF_CMDLINE || what == OF_SET) && procpid == kshpid)
@@ -483,7 +460,7 @@ parse_args(const char **argv,
                 }
         }
         if (!(go.info & GI_MINUSMINUS) && argv[go.optind] &&
-           (argv[go.optind][0] == '-' || argv[go.optind][0] == '+') &&
+           ctype(argv[go.optind][0], C_MINUS | C_PLUS) &&
             argv[go.optind][1] == '\0') {
                 /* lone - clears -v and -x flags */
                 if (argv[go.optind][0] == '-') {
@@ -512,7 +489,7 @@ parse_args(const char **argv,
                 for (i = go.optind; argv[i]; i++)
                         ;
                 qsort(&argv[go.optind], i - go.optind, sizeof(void *),
-                   xstrcmp);
+                   ascpstrcmp);
         }
         if (arrayset)
                 go.optind += set_array(array, tobool(arrayset > 0),
@@ -533,7 +510,7 @@ getn(const char *s, int *ai)
  
         do {
                 c = *s++;
-       } while (ksh_isspace(c));
+       } while (ctype(c, C_SPACE));
  
         switch (c) {
         case '-':
@@ -545,7 +522,7 @@ getn(const char *s, int *ai)
         }
  
         do {
-               if (!ksh_isdigit(c))
+               if (!ctype(c, C_DIGIT))
                         /* not numeric */
                         return (0);
                 if (num.u > 214748364U)
@@ -585,7 +562,7 @@ simplify_gmatch_pattern(const unsigned char *sp)
         sp = cp;
   simplify_gmatch_pat1a:
         dp = cp;
-       se = sp + strlen((const void *)sp);
+       se = strnul(sp);
         while ((c = *sp++)) {
                 if (!ISMAGIC(c)) {
                         *dp++ = c;
@@ -657,29 +634,30 @@ gmatchx(const char *s, const char *p, bool isfile)
         if (s == NULL || p == NULL)
                 return (0);
  
-       se = s + strlen(s);
-       pe = p + strlen(p);
+       pe = strnul(p);
         /*
          * isfile is false iff no syntax check has been done on
-        * the pattern. If check fails, just to a strcmp().
+        * the pattern. If check fails, just do a strcmp().
          */
-       if (!isfile && !has_globbing(p, pe)) {
+       if (!isfile && !has_globbing(p)) {
                 size_t len = pe - p + 1;
                 char tbuf[64];
                 char *t = len <= sizeof(tbuf) ? tbuf : alloc(len, ATEMP);
                 debunk(t, p, len);
                 return (!strcmp(t, s));
         }
+       se = strnul(s);
  
         /*
          * since the do_gmatch() engine sucks so much, we must do some
          * pattern simplifications
          */
         pnew = simplify_gmatch_pattern((const unsigned char *)p);
-       pe = pnew + strlen(pnew);
+       pe = strnul(pnew);
  
         rv = do_gmatch((const unsigned char *)s, (const unsigned char *)se,
-           (const unsigned char *)pnew, (const unsigned char *)pe);
+           (const unsigned char *)pnew, (const unsigned char *)pe,
+           (const unsigned char *)s);
         afree(pnew, ATEMP);
         return (rv);
  }
@@ -690,7 +668,7 @@ gmatchx(const char *s, const char *p, bool isfile)
   * Syntax errors are:
   *     - [ with no closing ]
   *     - imbalanced $(...) expression
- *     - [...] and *(...) not nested (eg, [a$(b|]c), *(a[b|c]d))
+ *     - [...] and *(...) not nested (eg, @(a[b|)c], *(a[b|c]d))
   */
  /*XXX
   * - if no magic,
@@ -701,76 +679,101 @@ gmatchx(const char *s, const char *p, bool isfile)
   *     return ?
   * - return ?
   */
-int
-has_globbing(const char *xp, const char *xpe)
+bool
+has_globbing(const char *pat)
  {
-       const unsigned char *p = (const unsigned char *) xp;
-       const unsigned char *pe = (const unsigned char *) xpe;
-       int c;
-       int nest = 0, bnest = 0;
+       unsigned char c, subc;
         bool saw_glob = false;
-       /* inside [...] */
-       bool in_bracket = false;
+       unsigned int nest = 0;
+       const unsigned char *p = (const unsigned char *)pat;
+       const unsigned char *s;
  
-       for (; p < pe; p++) {
-               if (!ISMAGIC(*p))
+       while ((c = *p++)) {
+               /* regular character? ok. */
+               if (!ISMAGIC(c))
                         continue;
-               if ((c = *++p) == '*' || c == '?')
+               /* MAGIC + NUL? abort. */
+               if (!(c = *p++))
+                       return (false);
+               /* some specials */
+               if (ord(c) == ord('*') || ord(c) == ord('?')) {
+                       /* easy glob, accept */
                         saw_glob = true;
-               else if (c == '[') {
-                       if (!in_bracket) {
-                               saw_glob = true;
-                               in_bracket = true;
-                               if (ISMAGIC(p[1]) && p[2] == '!')
-                                       p += 2;
-                               if (ISMAGIC(p[1]) && p[2] == ']')
-                                       p += 2;
-                       }
-                       /*XXX Do we need to check ranges here? POSIX Q */
-               } else if (c == ']') {
-                       if (in_bracket) {
-                               if (bnest)
-                                       /* [a*(b]) */
-                                       return (0);
-                               in_bracket = false;
+               } else if (ord(c) == ord('[')) {
+                       /* bracket expression; eat negation and initial ] */
+                       if (ISMAGIC(p[0]) && ord(p[1]) == ord('!'))
+                               p += 2;
+                       if (ISMAGIC(p[0]) && ord(p[1]) == ord(']'))
+                               p += 2;
+                       /* check next string part */
+                       s = p;
+                       while ((c = *s++)) {
+                               /* regular chars are ok */
+                               if (!ISMAGIC(c))
+                                       continue;
+                               /* MAGIC + NUL cannot happen */
+                               if (!(c = *s++))
+                                       return (false);
+                               /* terminating bracket? */
+                               if (ord(c) == ord(']')) {
+                                       /* accept and continue */
+                                       p = s;
+                                       saw_glob = true;
+                                       break;
+                               }
+                               /* sub-bracket expressions */
+                               if (ord(c) == ord('[') && (
+                                   /* collating element? */
+                                   ord(*s) == ord('.') ||
+                                   /* equivalence class? */
+                                   ord(*s) == ord('=') ||
+                                   /* character class? */
+                                   ord(*s) == ord(':'))) {
+                                       /* must stop with exactly the same c */
+                                       subc = *s++;
+                                       /* arbitrarily many chars in betwixt */
+                                       while ((c = *s++))
+                                               /* but only this sequence... */
+                                               if (c == subc && ISMAGIC(*s) &&
+                                                   ord(s[1]) == ord(']')) {
+                                                       /* accept, terminate */
+                                                       s += 2;
+                                                       break;
+                                               }
+                                       /* EOS without: reject bracket expr */
+                                       if (!c)
+                                               break;
+                                       /* continue; */
+                               }
+                               /* anything else just goes on */
                         }
-               } else if ((c & 0x80) && vstrchr("*+?@! ", c & 0x7f)) {
+               } else if ((c & 0x80) && ctype(c & 0x7F, C_PATMO | C_SPC)) {
+                       /* opening pattern */
                         saw_glob = true;
-                       if (in_bracket)
-                               bnest++;
-                       else
-                               nest++;
-               } else if (c == '|') {
-                       if (in_bracket && !bnest)
-                               /* *(a[foo|bar]) */
-                               return (0);
-               } else if (c == /*(*/ ')') {
-                       if (in_bracket) {
-                               if (!bnest--)
-                                       /* *(a[b)c] */
-                                       return (0);
-                       } else if (nest)
-                               nest--;
+                       ++nest;
+               } else if (ord(c) == ord(/*(*/ ')')) {
+                       /* closing pattern */
+                       if (nest)
+                               --nest;
                 }
-               /*
-                * else must be a MAGIC-MAGIC, or MAGIC-!,
-                * MAGIC--, MAGIC-], MAGIC-{, MAGIC-, MAGIC-}
-                */
         }
-       return (saw_glob && !in_bracket && !nest);
+       return (saw_glob && !nest);
  }
  
  /* Function must return either 0 or 1 (assumed by code for 0x80|'!') */
  static int
  do_gmatch(const unsigned char *s, const unsigned char *se,
-    const unsigned char *p, const unsigned char *pe)
+    const unsigned char *p, const unsigned char *pe,
+    const unsigned char *smin)
  {
-       unsigned char sc, pc;
+       unsigned char sc, pc, sl = 0;
         const unsigned char *prest, *psub, *pnext;
         const unsigned char *srest;
  
         if (s == NULL || p == NULL)
                 return (0);
+       if (s > smin && s <= se)
+               sl = s[-1];
         while (p < pe) {
                 pc = *p++;
                 sc = s < se ? *s : '\0';
@@ -778,15 +781,39 @@ do_gmatch(const unsigned char *s, const unsigned char *se,
                 if (!ISMAGIC(pc)) {
                         if (sc != pc)
                                 return (0);
+                       sl = sc;
                         continue;
                 }
-               switch (*p++) {
-               case '[':
+               switch (ord(*p++)) {
+               case ord('['):
+                       /* BSD cclass extension? */
+                       if (ISMAGIC(p[0]) && ord(p[1]) == ord('[') &&
+                           ord(p[2]) == ord(':') &&
+                           ctype((pc = p[3]), C_ANGLE) &&
+                           ord(p[4]) == ord(':') &&
+                           ISMAGIC(p[5]) && ord(p[6]) == ord(']') &&
+                           ISMAGIC(p[7]) && ord(p[8]) == ord(']')) {
+                               /* zero-length match */
+                               --s;
+                               p += 9;
+                               /* word begin? */
+                               if (ord(pc) == ord('<') &&
+                                   !ctype(sl, C_ALNUX) &&
+                                   ctype(sc, C_ALNUX))
+                                       break;
+                               /* word end? */
+                               if (ord(pc) == ord('>') &&
+                                   ctype(sl, C_ALNUX) &&
+                                   !ctype(sc, C_ALNUX))
+                                       break;
+                               /* neither */
+                               return (0);
+                       }
                         if (sc == 0 || (p = gmatch_cclass(p, sc)) == NULL)
                                 return (0);
                         break;
  
-               case '?':
+               case ord('?'):
                         if (sc == 0)
                                 return (0);
                         if (UTFMODE) {
@@ -795,39 +822,39 @@ do_gmatch(const unsigned char *s, const unsigned char *se,
                         }
                         break;
  
-               case '*':
+               case ord('*'):
                         if (p == pe)
                                 return (1);
                         s--;
                         do {
-                               if (do_gmatch(s, se, p, pe))
+                               if (do_gmatch(s, se, p, pe, smin))
                                         return (1);
                         } while (s++ < se);
                         return (0);
  
                 /**
-                * [*+?@!](pattern|pattern|..)
+                * [+*?@!](pattern|pattern|..)
                  * This is also needed for ${..%..}, etc.
                  */
  
                 /* matches one or more times */
-               case 0x80|'+':
+               case 0x80|ord('+'):
                 /* matches zero or more times */
-               case 0x80|'*':
+               case 0x80|ord('*'):
                         if (!(prest = pat_scan(p, pe, false)))
                                 return (0);
                         s--;
                         /* take care of zero matches */
-                       if (p[-1] == (0x80 | '*') &&
-                           do_gmatch(s, se, prest, pe))
+                       if (ord(p[-1]) == (0x80 | ord('*')) &&
+                           do_gmatch(s, se, prest, pe, smin))
                                 return (1);
                         for (psub = p; ; psub = pnext) {
                                 pnext = pat_scan(psub, pe, true);
                                 for (srest = s; srest <= se; srest++) {
-                                       if (do_gmatch(s, srest, psub, pnext - 2) &&
-                                           (do_gmatch(srest, se, prest, pe) ||
-                                           (s != srest && do_gmatch(srest,
-                                           se, p - 2, pe))))
+                                       if (do_gmatch(s, srest, psub, pnext - 2, smin) &&
+                                           (do_gmatch(srest, se, prest, pe, smin) ||
+                                           (s != srest &&
+                                           do_gmatch(srest, se, p - 2, pe, smin))))
                                                 return (1);
                                 }
                                 if (pnext == prest)
@@ -836,24 +863,24 @@ do_gmatch(const unsigned char *s, const unsigned char *se,
                         return (0);
  
                 /* matches zero or once */
-               case 0x80|'?':
+               case 0x80|ord('?'):
                 /* matches one of the patterns */
-               case 0x80|'@':
+               case 0x80|ord('@'):
                 /* simile for @ */
-               case 0x80|' ':
+               case 0x80|ord(' '):
                         if (!(prest = pat_scan(p, pe, false)))
                                 return (0);
                         s--;
                         /* Take care of zero matches */
-                       if (p[-1] == (0x80 | '?') &&
-                           do_gmatch(s, se, prest, pe))
+                       if (ord(p[-1]) == (0x80 | ord('?')) &&
+                           do_gmatch(s, se, prest, pe, smin))
                                 return (1);
                         for (psub = p; ; psub = pnext) {
                                 pnext = pat_scan(psub, pe, true);
                                 srest = prest == pe ? se : s;
                                 for (; srest <= se; srest++) {
-                                       if (do_gmatch(s, srest, psub, pnext - 2) &&
-                                           do_gmatch(srest, se, prest, pe))
+                                       if (do_gmatch(s, srest, psub, pnext - 2, smin) &&
+                                           do_gmatch(srest, se, prest, pe, smin))
                                                 return (1);
                                 }
                                 if (pnext == prest)
@@ -862,7 +889,7 @@ do_gmatch(const unsigned char *s, const unsigned char *se,
                         return (0);
  
                 /* matches none of the patterns */
-               case 0x80|'!':
+               case 0x80|ord('!'):
                         if (!(prest = pat_scan(p, pe, false)))
                                 return (0);
                         s--;
@@ -872,7 +899,7 @@ do_gmatch(const unsigned char *s, const unsigned char *se,
                                 for (psub = p; ; psub = pnext) {
                                         pnext = pat_scan(psub, pe, true);
                                         if (do_gmatch(s, srest, psub,
-                                           pnext - 2)) {
+                                           pnext - 2, smin)) {
                                                 matched = 1;
                                                 break;
                                         }
@@ -880,7 +907,7 @@ do_gmatch(const unsigned char *s, const unsigned char *se,
                                                 break;
                                 }
                                 if (!matched &&
-                                   do_gmatch(srest, se, prest, pe))
+                                   do_gmatch(srest, se, prest, pe, smin))
                                         return (1);
                         }
                         return (0);
@@ -890,55 +917,245 @@ do_gmatch(const unsigned char *s, const unsigned char *se,
                                 return (0);
                         break;
                 }
+               sl = sc;
         }
         return (s == se);
  }
  
+/*XXX this is a prime example for bsearch or a const hashtable */
+static const struct cclass {
+       const char *name;
+       uint32_t value;
+} cclasses[] = {
+       /* POSIX */
+       { "alnum",      C_ALNUM },
+       { "alpha",      C_ALPHA },
+       { "blank",      C_BLANK },
+       { "cntrl",      C_CNTRL },
+       { "digit",      C_DIGIT },
+       { "graph",      C_GRAPH },
+       { "lower",      C_LOWER },
+       { "print",      C_PRINT },
+       { "punct",      C_PUNCT },
+       { "space",      C_SPACE },
+       { "upper",      C_UPPER },
+       { "xdigit",     C_SEDEC },
+       /* BSD */
+       /* "<" and ">" are handled inline */
+       /* GNU bash */
+       { "ascii",      C_ASCII },
+       { "word",       C_ALNUX },
+       /* mksh */
+       { "sh_alias",   C_ALIAS },
+       { "sh_edq",     C_EDQ   },
+       { "sh_ifs",     C_IFS   },
+       { "sh_ifsws",   C_IFSWS },
+       { "sh_nl",      C_NL    },
+       { "sh_quote",   C_QUOTE },
+       /* sentinel */
+       { NULL,         0       }
+};
+
  static const unsigned char *
-gmatch_cclass(const unsigned char *p, unsigned char sub)
+gmatch_cclass(const unsigned char *pat, unsigned char sc)
  {
-       unsigned char c, d;
-       bool notp, found = false;
-       const unsigned char *orig_p = p;
-
-       if ((notp = tobool(ISMAGIC(*p) && *++p == '!')))
-               p++;
-       do {
-               c = *p++;
+       unsigned char c, subc, lc;
+       const unsigned char *p = pat, *s;
+       bool found = false;
+       bool negated = false;
+       char *subp;
+
+       /* check for negation */
+       if (ISMAGIC(p[0]) && ord(p[1]) == ord('!')) {
+               p += 2;
+               negated = true;
+       }
+       /* make initial ] non-MAGIC */
+       if (ISMAGIC(p[0]) && ord(p[1]) == ord(']'))
+               ++p;
+       /* iterate over bracket expression, debunk()ing on the fly */
+       while ((c = *p++)) {
+ nextc:
+               /* non-regular character? */
                 if (ISMAGIC(c)) {
-                       c = *p++;
-                       if ((c & 0x80) && !ISMAGIC(c)) {
-                               /* extended pattern matching: *+?@! */
-                               c &= 0x7F;
-                               /* XXX the ( char isn't handled as part of [] */
-                               if (c == ' ')
-                                       /* simile for @: plain (..) */
-                                       c = '(' /*)*/;
+                       /* MAGIC + NUL cannot happen */
+                       if (!(c = *p++))
+                               break;
+                       /* terminating bracket? */
+                       if (ord(c) == ord(']')) {
+                               /* accept and return */
+                               return (found != negated ? p : NULL);
+                       }
+                       /* sub-bracket expressions */
+                       if (ord(c) == ord('[') && (
+                           /* collating element? */
+                           ord(*p) == ord('.') ||
+                           /* equivalence class? */
+                           ord(*p) == ord('=') ||
+                           /* character class? */
+                           ord(*p) == ord(':'))) {
+                               /* must stop with exactly the same c */
+                               subc = *p++;
+                               /* save away start of substring */
+                               s = p;
+                               /* arbitrarily many chars in betwixt */
+                               while ((c = *p++))
+                                       /* but only this sequence... */
+                                       if (c == subc && ISMAGIC(*p) &&
+                                           ord(p[1]) == ord(']')) {
+                                               /* accept, terminate */
+                                               p += 2;
+                                               break;
+                                       }
+                               /* EOS without: reject bracket expr */
+                               if (!c)
+                                       break;
+                               /* debunk substring */
+                               strndupx(subp, s, p - s - 3, ATEMP);
+                               debunk(subp, subp, p - s - 3 + 1);
+ cclass_common:
+                               /* whither subexpression */
+                               if (ord(subc) == ord(':')) {
+                                       const struct cclass *cls = cclasses;
+
+                                       /* search for name in cclass list */
+                                       while (cls->name)
+                                               if (!strcmp(subp, cls->name)) {
+                                                       /* found, match? */
+                                                       if (ctype(sc,
+                                                           cls->value))
+                                                               found = true;
+                                                       /* break either way */
+                                                       break;
+                                               } else
+                                                       ++cls;
+                                       /* that's all here */
+                                       afree(subp, ATEMP);
+                                       continue;
+                               }
+                               /* collating element or equivalence class */
+                               /* Note: latter are treated as former */
+                               if (ctype(subp[0], C_ASCII) && !subp[1])
+                                       /* [.a.] where a is one ASCII char */
+                                       c = subp[0];
+                               else
+                                       /* force no match */
+                                       c = 0;
+                               /* no longer needed */
+                               afree(subp, ATEMP);
+                       } else if (!ISMAGIC(c) && (c & 0x80)) {
+                               /* 0x80|' ' is plain (...) */
+                               if ((c &= 0x7F) != ' ') {
+                                       /* check single match NOW */
+                                       if (sc == c)
+                                               found = true;
+                                       /* next character is (...) */
+                               }
+                               c = '(' /*)*/;
                         }
                 }
-               if (c == '\0')
-                       /* No closing ] - act as if the opening [ was quoted */
-                       return (sub == '[' ? orig_p : NULL);
-               if (ISMAGIC(p[0]) && p[1] == '-' &&
-                   (!ISMAGIC(p[2]) || p[3] != ']')) {
-                       /* MAGIC- */
-                       p += 2;
-                       d = *p++;
-                       if (ISMAGIC(d)) {
-                               d = *p++;
-                               if ((d & 0x80) && !ISMAGIC(d))
-                                       d &= 0x7f;
+               /* range expression? */
+               if (!(ISMAGIC(p[0]) && ord(p[1]) == ord('-') &&
+                   /* not terminating bracket? */
+                   (!ISMAGIC(p[2]) || ord(p[3]) != ord(']')))) {
+                       /* no, check single match */
+                       if (sc == c)
+                               /* note: sc is never NUL */
+                               found = true;
+                       /* do the next "first" character */
+                       continue;
+               }
+               /* save lower range bound */
+               lc = c;
+               /* skip over the range operator */
+               p += 2;
+               /* do the same shit as above... almost */
+               subc = 0;
+               if (!(c = *p++))
+                       break;
+               /* non-regular character? */
+               if (ISMAGIC(c)) {
+                       /* MAGIC + NUL cannot happen */
+                       if (!(c = *p++))
+                               break;
+                       /* sub-bracket expressions */
+                       if (ord(c) == ord('[') && (
+                           /* collating element? */
+                           ord(*p) == ord('.') ||
+                           /* equivalence class? */
+                           ord(*p) == ord('=') ||
+                           /* character class? */
+                           ord(*p) == ord(':'))) {
+                               /* must stop with exactly the same c */
+                               subc = *p++;
+                               /* save away start of substring */
+                               s = p;
+                               /* arbitrarily many chars in betwixt */
+                               while ((c = *p++))
+                                       /* but only this sequence... */
+                                       if (c == subc && ISMAGIC(*p) &&
+                                           ord(p[1]) == ord(']')) {
+                                               /* accept, terminate */
+                                               p += 2;
+                                               break;
+                                       }
+                               /* EOS without: reject bracket expr */
+                               if (!c)
+                                       break;
+                               /* debunk substring */
+                               strndupx(subp, s, p - s - 3, ATEMP);
+                               debunk(subp, subp, p - s - 3 + 1);
+                               /* whither subexpression */
+                               if (ord(subc) == ord(':')) {
+                                       /* oops, not a range */
+
+                                       /* match single previous char */
+                                       if (lc && (sc == lc))
+                                               found = true;
+                                       /* match hyphen-minus */
+                                       if (ord(sc) == ord('-'))
+                                               found = true;
+                                       /* handle cclass common part */
+                                       goto cclass_common;
+                               }
+                               /* collating element or equivalence class */
+                               /* Note: latter are treated as former */
+                               if (ctype(subp[0], C_ASCII) && !subp[1])
+                                       /* [.a.] where a is one ASCII char */
+                                       c = subp[0];
+                               else
+                                       /* force no match */
+                                       c = 0;
+                               /* no longer needed */
+                               afree(subp, ATEMP);
+                               /* other meaning below */
+                               subc = 0;
+                       } else if (c == (0x80 | ' ')) {
+                               /* 0x80|' ' is plain (...) */
+                               c = '(' /*)*/;
+                       } else if (!ISMAGIC(c) && (c & 0x80)) {
+                               c &= 0x7F;
+                               subc = '(' /*)*/;
                         }
-                       /* POSIX says this is an invalid expression */
-                       if (c > d)
-                               return (NULL);
-               } else
-                       d = c;
-               if (c == sub || (c <= sub && sub <= d))
+               }
+               /* now do the actual range match check */
+               if (lc != 0 /* && c != 0 */ &&
+                   asciibetical(lc) <= asciibetical(sc) &&
+                   asciibetical(sc) <= asciibetical(c))
                         found = true;
-       } while (!(ISMAGIC(p[0]) && p[1] == ']'));
-
-       return ((found != notp) ? p+2 : NULL);
+               /* forced next character? */
+               if (subc) {
+                       c = subc;
+                       goto nextc;
+               }
+               /* otherwise, just go on with the pattern string */
+       }
+       /* if we broke here, the bracket expression was invalid */
+       if (ord(sc) == ord('['))
+               /* initial opening bracket as literal match */
+               return (pat);
+       /* or rather no match */
+       return (NULL);
  }
  
  /* Look for next ) or | (if match_sep) in *(foo|bar) pattern */
@@ -953,16 +1170,30 @@ pat_scan(const unsigned char *p, const unsigned char *pe, bool match_sep)
                 if ((*++p == /*(*/ ')' && nest-- == 0) ||
                     (*p == '|' && match_sep && nest == 0))
                         return (p + 1);
-               if ((*p & 0x80) && vstrchr("*+?@! ", *p & 0x7f))
+               if ((*p & 0x80) && ctype(*p & 0x7F, C_PATMO | C_SPC))
                         nest++;
         }
         return (NULL);
  }
  
  int
-xstrcmp(const void *p1, const void *p2)
+ascstrcmp(const void *s1, const void *s2)
  {
-       return (strcmp(*(const char * const *)p1, *(const char * const *)p2));
+       const uint8_t *cp1 = s1, *cp2 = s2;
+
+       while (*cp1 == *cp2) {
+               if (*cp1++ == '\0')
+                       return (0);
+               ++cp2;
+       }
+       return ((int)asciibetical(*cp1) - (int)asciibetical(*cp2));
+}
+
+int
+ascpstrcmp(const void *pstr1, const void *pstr2)
+{
+       return (ascstrcmp(*(const char * const *)pstr1,
+           *(const char * const *)pstr2));
  }
  
  /* Initialise a Getopt structure */
@@ -1032,7 +1263,7 @@ ksh_getopt(const char **argv, Getopt *go, const char *optionsp)
                 go->info |= flag == '-' ? GI_MINUS : GI_PLUS;
         }
         go->p++;
-       if (c == '?' || c == ':' || c == ';' || c == ',' || c == '#' ||
+       if (ctype(c, C_QUEST | C_COLON | C_HASH) || c == ';' || c == ',' ||
             !(o = cstrchr(optionsp, c))) {
                 if (optionsp[0] == ':') {
                         go->buf[0] = c;
@@ -1086,13 +1317,14 @@ ksh_getopt(const char **argv, Getopt *go, const char *optionsp)
                  * argument is missing.
                  */
                 if (argv[go->optind - 1][go->p]) {
-                       if (ksh_isdigit(argv[go->optind - 1][go->p])) {
+                       if (ctype(argv[go->optind - 1][go->p], C_DIGIT)) {
                                 go->optarg = argv[go->optind - 1] + go->p;
                                 go->p = 0;
                         } else
                                 go->optarg = NULL;
                 } else {
-                       if (argv[go->optind] && ksh_isdigit(argv[go->optind][0])) {
+                       if (argv[go->optind] &&
+                           ctype(argv[go->optind][0], C_DIGIT)) {
                                 go->optarg = argv[go->optind++];
                                 go->p = 0;
                         } else
@@ -1115,8 +1347,8 @@ print_value_quoted(struct shf *shf, const char *s)
         bool inquote = true;
  
         /* first, check whether any quotes are needed */
-       while ((c = *p++) >= 32)
-               if (ctype(c, C_QUOTE))
+       while (rtt2asc(c = *p++) >= 32)
+               if (ctype(c, C_QUOTE | C_SPC))
                         inquote = false;
  
         p = (const unsigned char *)s;
@@ -1154,6 +1386,7 @@ print_value_quoted(struct shf *shf, const char *s)
                 shf_putc('$', shf);
                 shf_putc('\'', shf);
                 while ((c = *p) != 0) {
+#ifndef MKSH_EBCDIC
                         if (c >= 0xC2) {
                                 n = utf_mbtowc(&wc, (const char *)p);
                                 if (n != (size_t)-1) {
@@ -1162,10 +1395,11 @@ print_value_quoted(struct shf *shf, const char *s)
                                         continue;
                                 }
                         }
+#endif
                         ++p;
                         switch (c) {
                         /* see unbksl() in this file for comments */
-                       case 7:
+                       case KSH_BEL:
                                 c = 'a';
                                 if (0)
                                         /* FALLTHROUGH */
@@ -1189,11 +1423,11 @@ print_value_quoted(struct shf *shf, const char *s)
                                   c = 't';
                                 if (0)
                                         /* FALLTHROUGH */
-                       case 11:
+                       case KSH_VTAB:
                                   c = 'v';
                                 if (0)
                                         /* FALLTHROUGH */
-                       case '\033':
+                       case KSH_ESC:
                                 /* take E not e because \e is \ in *roff */
                                   c = 'E';
                                 /* FALLTHROUGH */
@@ -1203,7 +1437,12 @@ print_value_quoted(struct shf *shf, const char *s)
                                 if (0)
                                         /* FALLTHROUGH */
                         default:
-                                 if (c < 32 || c > 0x7E) {
+#if defined(MKSH_EBCDIC) || defined(MKSH_FAUX_EBCDIC)
+                                 if (ksh_isctrl(c))
+#else
+                                 if (!ctype(c, C_PRINT))
+#endif
+                                   {
                                         /* FALLTHROUGH */
                         case '\'':
                                         shf_fprintf(shf, "\\%03o", c);
@@ -2154,13 +2393,7 @@ unbksl(bool cstyle, int (*fg)(void), void (*fp)(int))
         fc = (*fg)();
         switch (fc) {
         case 'a':
-               /*
-                * according to the comments in pdksh, \007 seems
-                * to be more portable than \a (due to HP-UX cc,
-                * Ultrix cc, old pcc, etc.) so we avoid the escape
-                * sequence altogether in mksh and assume ASCII
-                */
-               wc = 7;
+               wc = KSH_BEL;
                 break;
         case 'b':
                 wc = '\b';
@@ -2169,11 +2402,11 @@ unbksl(bool cstyle, int (*fg)(void), void (*fp)(int))
                 if (!cstyle)
                         goto unknown_escape;
                 c = (*fg)();
-               wc = CTRL(c);
+               wc = ksh_toctrl(c);
                 break;
         case 'E':
         case 'e':
-               wc = 033;
+               wc = KSH_ESC;
                 break;
         case 'f':
                 wc = '\f';
@@ -2188,8 +2421,7 @@ unbksl(bool cstyle, int (*fg)(void), void (*fp)(int))
                 wc = '\t';
                 break;
         case 'v':
-               /* assume ASCII here as well */
-               wc = 11;
+               wc = KSH_VTAB;
                 break;
         case '1':
         case '2':
@@ -2212,7 +2444,7 @@ unbksl(bool cstyle, int (*fg)(void), void (*fp)(int))
                 wc = 0;
                 i = 3;
                 while (i--)
-                       if ((c = (*fg)()) >= ord('0') && c <= ord('7'))
+                       if (ctype((c = (*fg)()), C_OCTAL))
                                 wc = (wc << 3) + ksh_numdig(c);
                         else {
                                 (*fp)(c);
@@ -2240,17 +2472,17 @@ unbksl(bool cstyle, int (*fg)(void), void (*fp)(int))
                 n = 0;
                 while (n < i || i == -1) {
                         wc <<= 4;
-                       if ((c = (*fg)()) >= ord('0') && c <= ord('9'))
-                               wc += ksh_numdig(c);
-                       else if (c >= ord('A') && c <= ord('F'))
-                               wc += ksh_numuc(c) + 10;
-                       else if (c >= ord('a') && c <= ord('f'))
-                               wc += ksh_numlc(c) + 10;
-                       else {
+                       if (!ctype((c = (*fg)()), C_SEDEC)) {
                                 wc >>= 4;
                                 (*fp)(c);
                                 break;
                         }
+                       if (ctype(c, C_DIGIT))
+                               wc += ksh_numdig(c);
+                       else if (ctype(c, C_UPPER))
+                               wc += ksh_numuc(c) + 10;
+                       else
+                               wc += ksh_numlc(c) + 10;
                         ++n;
                 }
                 if (!n)
diff --git a/src/mksh.1 b/src/mksh.1

index 6a2609a..aa67ac9 100644 (file)
--- a/src/mksh.1
+++ b/src/mksh.1
@@ -1,4 +1,4 @@
-.\" $MirOS: src/bin/mksh/mksh.1,v 1.442 2017/04/12 18:30:58 tg Exp $
+.\" $MirOS: src/bin/mksh/mksh.1,v 1.451 2017/08/16 21:40:14 tg Exp $
  .\" $OpenBSD: ksh.1,v 1.160 2015/07/04 13:27:04 feinerer Exp $
  .\"-
  .\" Copyright © 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009,
@@ -76,7 +76,7 @@
  .\" with -mandoc, it might implement .Mx itself, but we want to
  .\" use our own definition. And .Dd must come *first*, always.
  .\"
-.Dd $Mdocdate: April 12 2017 $
+.Dd $Mdocdate: August 16 2017 $
  .\"
  .\" Check which macro package we use, and do other -mdoc setup.
  .\"
@@ -2091,7 +2091,7 @@ this hack; it's derived from the original
  which did print the delimiter character so you were out of luck
  if you did not have any non-printing characters.
  .Pp
-Since Backslashes and other special characters may be
+Since backslashes and other special characters may be
  interpreted by the shell, to set
  .Ev PS1
  either escape the backslash itself
@@ -2106,7 +2106,7 @@ in reverse video
  .Pq colour would work, too ,
  in the prompt string:
  .Bd -literal -offset indent
-x=$(print \e\e001)
+x=$(print \e\e001) # otherwise unused char
  PS1="$x$(print \e\er)$x$(tput so)$x\e$PWD$x$(tput se)$x\*(Gt "
  .Ed
  .Pp
@@ -3066,11 +3066,13 @@ Without arguments,
  .Ic alias
  lists all aliases.
  For any name without a value, the existing alias is listed.
-Any name with a value defines an alias (see
+Any name with a value defines an alias; see
  .Sx Aliases
-above).
-.Li \&[A\-Za\-z0\-9_!%,@\-]
-are valid in names except they may not begin with a hyphen-minus.
+above.
+.Li \&[][A\-Za\-z0\-9_!%,.@:\-]
+are valid in names, except they may not begin with a hyphen-minus, and
+.Ic \&[[
+is not a valid alias name.
  .Pp
  When listing aliases, one of two formats is used.
  Normally, aliases are listed as
@@ -3162,7 +3164,8 @@ other trailing character will be processed afterwards.
  .Pp
  Control characters may be written using caret notation
  i.e. \*(haX represents Ctrl-X.
-Note that although only two prefix characters (usually ESC and \*(haX)
+The caret itself can be escaped by a backslash, which also escapes itself.
+Note that although only three prefix characters (usually ESC, \*(haX and NUL)
  are supported, some multi-character sequences can be supported.
  .Pp
  The following default bindings show how the arrow keys, the home, end and
@@ -4305,9 +4308,11 @@ Automatically enabled if the basename of the shell invocation begins with
  .Dq sh
  and this autodetection feature is compiled in
  .Pq not in MirBSD .
-As a side effect, setting this flag turns off
+As a side effect, setting this flag turns off the
  .Ic braceexpand
-mode, which can be turned back on manually, and
+and
+.Ic utf8\-mode
+flags, which can be turned back on manually, and
  .Ic sh
  mode (unless both are enabled at the same time).
  .It Fl o Ic sh
@@ -5382,6 +5387,11 @@ only lists signal names, all in one line.
  .Ic getopts
  does not accept options with a leading
  .Ql + .
+.It
+.Ic exec
+skips builtins, functions and other commands and uses a
+.Ev PATH
+search to determine the utility to execute.
  .El
  .Ss SH mode
  Compatibility mode; intended for use with legacy scripts that
@@ -5537,7 +5547,7 @@ Emacs key bindings:
  .No INTR Pq \*(haC ,
  .No \*(haG
  .Xc
-Abort the current command, empty the line buffer and
+Abort the current command, save it to the history, empty the line buffer and
  set the exit state to interrupted.
  .It auto\-insert: Op Ar n
  Simply causes the character to appear as literal input.
@@ -5572,7 +5582,8 @@ Uppercase the first ASCII character in the next
  words, leaving the cursor past the end of the last word.
  .It clear\-screen: \*(ha[\*(haL
  Prints a compile-time configurable sequence to clear the screen and home
-the cursor, redraws the entire prompt and the currently edited input line.
+the cursor, redraws the last line of the prompt string and the currently
+edited input line.
  The default sequence works for almost all standard terminals.
  .It comment: \*(ha[#
  If the current line does not begin with a comment character, one is added at
@@ -6434,7 +6445,7 @@ Undo all changes that have been made to the current line.
  They move as expected, both in insert and command mode.
  .It Ar intr No and Ar quit
  The interrupt and quit terminal characters cause the current line to be
-deleted and a new prompt to be printed.
+removed to the history and a new prompt to be printed.
  .El
  .Sh FILES
  .Bl -tag -width XetcXsuid_profile -compact
@@ -6584,7 +6595,7 @@ and
  .An Michael Rendell .
  The effort of several projects, such as Debian and OpenBSD, and other
  contributors including our users, to improve the shell is appreciated.
-See the documentation, web site and CVS for details.
+See the documentation, website and source code (CVS) for details.
  .Pp
  .Nm mksh\-os2
  is developed by
@@ -6594,6 +6605,10 @@ is developed by
  is developed by
  .An Michael Langguth Aq Mt lan@scalaris.com .
  .Pp
+.Nm mksh Ns / Ns Tn z/OS
+is contributed by
+.An Daniel Richard G. Aq Mt skunk@iSKUNK.ORG .
+.Pp
  The BSD daemon is Copyright \(co Marshall Kirk McKusick.
  The complete legalese is at:
  .Pa http://www.mirbsd.org/TaC\-mksh.txt
@@ -6633,12 +6648,14 @@ supports only the
  locale.
  .Nm mksh Ns 's
  .Ic utf8\-mode
+.Em must
+be disabled in POSIX mode, and it
  only supports the Unicode BMP (Basic Multilingual Plane) and maps
  raw octets into the U+EF80..U+EFFF wide character range; compare
  .Sx Arithmetic expressions .
  The following
  .Tn POSIX
-.Nm sh
+.Nm sh Ns -compatible
  code toggles the
  .Ic utf8\-mode
  option dependent on the current
@@ -6680,7 +6697,7 @@ for the in-memory portion of the history is slow, should use
  .Xr memmove 3 .
  .Pp
  This document attempts to describe
-.Nm mksh\ R55
+.Nm mksh\ R56
  and up,
  .\" with vendor patches from insert-your-name-here,
  compiled without any options impacting functionality, such as
@@ -6881,3 +6898,9 @@ commands starting with what was already entered.
  .Nm
  separates the shortcuts: Cursor Up goes up one command
  and PgUp searches the history as described above.
+.Ss "My question is not answered here!"
+Check
+.Pa http://www.mirbsd.org/mksh\-faq.htm
+which contains a collection of frequently asked questions about
+.Nm
+in general, for packagers, etc. while these above are in user scope.
diff --git a/src/os2.c b/src/os2.c

index 5d39630..fc27d5a 100644 (file)
--- a/src/os2.c
+++ b/src/os2.c
@@ -1,6 +1,8 @@
  /*-
   * Copyright (c) 2015
   *     KO Myung-Hun <komh@chollian.net>
+ * Copyright (c) 2017
+ *     mirabilos <m@mirbsd.org>
   *
   * Provided that these terms and disclaimer and all copyright notices
   * are retained or reproduced in an accompanying document, permission
@@ -28,7 +30,7 @@
  #include <unistd.h>
  #include <process.h>
  
-__RCSID("$MirOS: src/bin/mksh/os2.c,v 1.1 2017/04/02 15:00:44 tg Exp $");
+__RCSID("$MirOS: src/bin/mksh/os2.c,v 1.2 2017/04/29 22:04:29 tg Exp $");
  
  static char *remove_trailing_dots(char *);
  static int access_stat_ex(int (*)(), const char *, void *);
@@ -247,9 +249,9 @@ setextlibpath(const char *name, const char *val)
  static char *
  remove_trailing_dots(char *name)
  {
-       char *p;
+       char *p = strnul(name);
  
-       for (p = name + strlen(name); --p > name && *p == '.'; )
+       while (--p > name && *p == '.')
                 /* nothing */;
  
         if (*p != '.' && *p != '/' && *p != '\\' && *p != ':')
diff --git a/src/sh.h b/src/sh.h

index 5b36378..88883cb 100644 (file)
--- a/src/sh.h
+++ b/src/sh.h
@@ -112,6 +112,13 @@
  #include <wchar.h>
  #endif
  
+/* monkey-patch known-bad offsetof versions to quell a warning */
+#if (defined(__KLIBC__) || defined(__dietlibc__)) && \
+    ((defined(__GNUC__) && (__GNUC__ > 3)) || defined(__NWCC__))
+#undef offsetof
+#define offsetof(s, e)         __builtin_offsetof(s, e)
+#endif
+
  #undef __attribute__
  #if HAVE_ATTRIBUTE_BOUNDED
  #define MKSH_A_BOUNDED(x,y,z)  __attribute__((__bounded__(x, y, z)))
@@ -175,9 +182,9 @@
  #endif
  
  #ifdef EXTERN
-__RCSID("$MirOS: src/bin/mksh/sh.h,v 1.808 2017/04/12 17:38:46 tg Exp $");
+__RCSID("$MirOS: src/bin/mksh/sh.h,v 1.841 2017/08/29 13:38:31 tg Exp $");
  #endif
-#define MKSH_VERSION "R55 2017/04/12"
+#define MKSH_VERSION "R56 2017/08/29"
  
  /* arithmetic types: C implementation */
  #if !HAVE_CAN_INTTYPES
@@ -257,6 +264,23 @@ typedef MKSH_TYPEDEF_SSIZE_T ssize_t;
  
  #ifndef MKSH_INCLUDES_ONLY
  
+/* EBCDIC fun */
+
+/* see the large comment in shf.c for an EBCDIC primer */
+
+#if defined(MKSH_FOR_Z_OS) && defined(__MVS__) && defined(__IBMC__) && defined(__CHARSET_LIB)
+# if !__CHARSET_LIB && !defined(MKSH_EBCDIC)
+#  error "Please compile with Build.sh -E for EBCDIC!"
+# endif
+# if __CHARSET_LIB && defined(MKSH_EBCDIC)
+#  error "Please compile without -E argument to Build.sh for ASCII!"
+# endif
+# if __CHARSET_LIB && !defined(_ENHANCED_ASCII_EXT)
+   /* go all-out on ASCII */
+#  define _ENHANCED_ASCII_EXT 0xFFFFFFFF
+# endif
+#endif
+
  /* extra types */
  
  /* getrusage does not exist on OS/2 kLIBC */
@@ -349,6 +373,8 @@ struct rusage {
  #define ksh_NSIG (_SIGMAX + 1)
  #elif defined(NSIG_MAX)
  #define ksh_NSIG (NSIG_MAX)
+#elif defined(MKSH_FOR_Z_OS)
+#define ksh_NSIG 40
  #else
  # error Please have your platform define NSIG.
  #endif
@@ -487,6 +513,23 @@ extern int __cdecl setegid(gid_t);
  #define ISTRIP         0
  #endif
  
+#ifdef MKSH_EBCDIC
+#define KSH_BEL                '\a'
+#define KSH_ESC                047
+#define KSH_ESC_STRING "\047"
+#define KSH_VTAB       '\v'
+#else
+/*
+ * According to the comments in pdksh, \007 seems to be more portable
+ * than \a (HP-UX cc, Ultrix cc, old pcc, etc.) so we avoid the escape
+ * sequence if ASCII can be assumed.
+ */
+#define KSH_BEL                7
+#define KSH_ESC                033
+#define KSH_ESC_STRING "\033"
+#define KSH_VTAB       11
+#endif
+
  
  /* some useful #defines */
  #ifdef EXTERN
@@ -498,16 +541,22 @@ extern int __cdecl setegid(gid_t);
  #endif
  
  /* define bit in flag */
-#define BIT(i)         (1 << (i))
+#define BIT(i)         (1U << (i))
  #define NELEM(a)       (sizeof(a) / sizeof((a)[0]))
  
  /*
   * Make MAGIC a char that might be printed to make bugs more obvious, but
   * not a char that is used often. Also, can't use the high bit as it causes
   * portability problems (calling strchr(x, 0x80 | 'x') is error prone).
+ *
+ * MAGIC can be followed by MAGIC (to escape the octet itself) or one of:
+ * ' !)*,-?[]{|}' 0x80|' !*+?@' (probably… hysteric raisins abound)
+ *
+ * The |0x80 is likely unsafe on EBCDIC :( though the listed chars are
+ * low-bit7 at least on cp1047 so YMMV
   */
-#define MAGIC          (7)     /* prefix for *?[!{,} during expand */
-#define ISMAGIC(c)     ((unsigned char)(c) == MAGIC)
+#define MAGIC          KSH_BEL /* prefix for *?[!{,} during expand */
+#define ISMAGIC(c)     (ord(c) == ord(MAGIC))
  
  EXTERN const char *safe_prompt; /* safe prompt if PS1 substitution fails */
  
@@ -521,17 +570,21 @@ EXTERN const char *safe_prompt; /* safe prompt if PS1 substitution fails */
  #else
  #define KSH_VERSIONNAME_TEXTMODE       ""
  #endif
+#ifdef MKSH_EBCDIC
+#define KSH_VERSIONNAME_EBCDIC         " +EBCDIC"
+#else
+#define KSH_VERSIONNAME_EBCDIC         ""
+#endif
  #ifndef KSH_VERSIONNAME_VENDOR_EXT
  #define KSH_VERSIONNAME_VENDOR_EXT     ""
  #endif
  EXTERN const char initvsn[] E_INIT("KSH_VERSION=@(#)" KSH_VERSIONNAME_ISLEGACY \
-    " KSH " MKSH_VERSION KSH_VERSIONNAME_TEXTMODE KSH_VERSIONNAME_VENDOR_EXT);
+    " KSH " MKSH_VERSION KSH_VERSIONNAME_EBCDIC KSH_VERSIONNAME_TEXTMODE \
+    KSH_VERSIONNAME_VENDOR_EXT);
  #define KSH_VERSION    (initvsn + /* "KSH_VERSION=@(#)" */ 16)
  
  EXTERN const char digits_uc[] E_INIT("0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ");
  EXTERN const char digits_lc[] E_INIT("0123456789abcdefghijklmnopqrstuvwxyz");
-#define letters_uc (digits_uc + 10)
-#define letters_lc (digits_lc + 10)
  
  /*
   * Evil hack for const correctness due to API brokenness
@@ -585,15 +638,12 @@ char *ucstrstr(char *, const char *);
  #endif
  
  #if defined(DEBUG) || defined(__COVERITY__)
-#define mkssert(e)     do { if (!(e)) exit(255); } while (/* CONSTCOND */ 0)
  #ifndef DEBUG_LEAKS
  #define DEBUG_LEAKS
  #endif
-#else
-#define mkssert(e)     do { } while (/* CONSTCOND */ 0)
  #endif
  
-#if (!defined(MKSH_BUILDMAKEFILE4BSD) && !defined(MKSH_BUILDSH)) || (MKSH_BUILD_R != 551)
+#if (!defined(MKSH_BUILDMAKEFILE4BSD) && !defined(MKSH_BUILDSH)) || (MKSH_BUILD_R != 562)
  #error Must run Build.sh to compile this.
  extern void thiswillneverbedefinedIhope(void);
  int
@@ -605,7 +655,7 @@ im_sorry_dave(void)
  #endif
  
  /* use this ipv strchr(s, 0) but no side effects in s! */
-#define strnul(s)      ((s) + strlen(s))
+#define strnul(s)      ((s) + strlen((const void *)s))
  
  #define utf_ptradjx(src, dst) do {                                     \
         (dst) = (src) + utf_ptradj(src);                                \
@@ -621,7 +671,7 @@ im_sorry_dave(void)
  #else
  /* be careful to evaluate arguments only once! */
  #define strdupx(d, s, ap) do {                                         \
-       const char *strdup_src = (s);                                   \
+       const char *strdup_src = (const void *)(s);                     \
         char *strdup_dst = NULL;                                        \
                                                                         \
         if (strdup_src != NULL) {                                       \
@@ -632,7 +682,7 @@ im_sorry_dave(void)
         (d) = strdup_dst;                                               \
  } while (/* CONSTCOND */ 0)
  #define strndupx(d, s, n, ap) do {                                     \
-       const char *strdup_src = (s);                                   \
+       const char *strdup_src = (const void *)(s);                     \
         char *strdup_dst = NULL;                                        \
                                                                         \
         if (strdup_src != NULL) {                                       \
@@ -753,8 +803,8 @@ enum sh_flag {
  struct sretrace_info;
  struct yyrecursive_state;
  
-EXTERN struct sretrace_info *retrace_info E_INIT(NULL);
-EXTERN int subshell_nesting_type E_INIT(0);
+EXTERN struct sretrace_info *retrace_info;
+EXTERN int subshell_nesting_type;
  
  extern struct env {
         ALLOC_ITEM alloc_INT;   /* internal, do not touch */
@@ -865,8 +915,8 @@ EXTERN char null[] E_INIT("");
  EXTERN const char T4spaces[] E_INIT("    ");
  #define T1space (Treal_sp2 + 5)
  #define Tcolsp (Tf_sD_ + 2)
-EXTERN const char TC_LEX1[] E_INIT("|&;<>() \t\n");
-#define TC_IFSWS (TC_LEX1 + 7)
+#define TC_IFSWS (TinitIFS + 4)
+EXTERN const char TinitIFS[] E_INIT("IFS= \t\n");
  EXTERN const char TFCEDIT_dollaru[] E_INIT("${FCEDIT:-/bin/ed} $_");
  #define Tspdollaru (TFCEDIT_dollaru + 18)
  EXTERN const char Tsgdot[] E_INIT("*=.");
@@ -1026,8 +1076,8 @@ EXTERN const char T_devtty[] E_INIT("/dev/tty");
  #define T4spaces "    "
  #define T1space " "
  #define Tcolsp ": "
-#define TC_LEX1 "|&;<>() \t\n"
  #define TC_IFSWS " \t\n"
+#define TinitIFS "IFS= \t\n"
  #define TFCEDIT_dollaru "${FCEDIT:-/bin/ed} $_"
  #define Tspdollaru " $_"
  #define Tsgdot "*=."
@@ -1277,7 +1327,7 @@ enum tmout_enum {
         TMOUT_LEAVING           /* have timed out */
  };
  EXTERN unsigned int ksh_tmout;
-EXTERN enum tmout_enum ksh_tmout_state E_INIT(TMOUT_EXECUTING);
+EXTERN enum tmout_enum ksh_tmout_state;
  
  /* For "You have stopped jobs" message */
  EXTERN bool really_exit;
@@ -1285,39 +1335,178 @@ EXTERN bool really_exit;
  /*
   * fast character classes
   */
-#define C_ALPHX         BIT(0)         /* A-Za-z_ */
-#define C_DIGIT         BIT(1)         /* 0-9 */
-#define C_LEX1  BIT(2)         /* \t \n\0|&;<>() */
-#define C_VAR1  BIT(3)         /* *@#!$-? */
-#define C_IFSWS         BIT(4)         /* \t \n (IFS white space) */
-#define C_SUBOP1 BIT(5)                /* "=-+?" */
-#define C_QUOTE         BIT(6)         /* \t\n "#$&'()*;<=>?[\]`| (needing quoting) */
-#define C_IFS   BIT(7)         /* $IFS */
-
-extern unsigned char chtypes[];
-
-#define ctype(c, t)    tobool(chtypes[(unsigned char)(c)] & (t))
-#define ord(c)         ((int)(unsigned char)(c))
-#define ksh_issubop2(c)        tobool((c) == ord('#') || (c) == ord('%'))
-#define ksh_isalias(c) (ctype((c), C_ALPHX | C_DIGIT) || (c) == ord('!') || \
-                           (c) == ord('%') || (c) == ord(',') || \
-                           (c) == ord('@') || (c) == ord('-'))
-#define ksh_isalpha(c) (ctype((c), C_ALPHX) && (c) != ord('_'))
-#define ksh_isalphx(c) ctype((c), C_ALPHX)
-#define ksh_isalnux(c) ctype((c), C_ALPHX | C_DIGIT)
-#define ksh_isdigit(c) ctype((c), C_DIGIT)
-#define ksh_islower(c) (((c) >= 'a') && ((c) <= 'z'))
-#define ksh_isupper(c) (((c) >= 'A') && ((c) <= 'Z'))
-#define ksh_tolower(c) (ksh_isupper(c) ? (c) - 'A' + 'a' : (c))
-#define ksh_toupper(c) (ksh_islower(c) ? (c) - 'a' + 'A' : (c))
-#define ksh_isdash(s)  (((s)[0] == '-') && ((s)[1] == '\0'))
-#define ksh_isspace(c) ((((c) >= 0x09) && ((c) <= 0x0D)) || ((c) == 0x20))
-#define ksh_eq(c,u,l)  (((c) | 0x20) == (l))
-#define ksh_numdig(c)  ((c) - ord('0'))
-#define ksh_numuc(c)   ((c) - ord('A'))
-#define ksh_numlc(c)   ((c) - ord('a'))
-
-EXTERN int ifs0 E_INIT(' ');   /* for "$*" */
+
+/* internal types, do not reference */
+
+/* initially empty — filled at runtime from $IFS */
+#define CiIFS  BIT(0)
+#define CiCNTRL        BIT(1)  /* \x01‥\x08\x0E‥\x1F\x7F   */
+#define CiUPPER        BIT(2)  /* A‥Z                                */
+#define CiLOWER        BIT(3)  /* a‥z                                */
+#define CiHEXLT        BIT(4)  /* A‥Fa‥f                   */
+#define CiOCTAL        BIT(5)  /* 0‥7                                */
+#define CiQCL  BIT(6)  /* &();|                        */
+#define CiALIAS        BIT(7)  /* !,.@                         */
+#define CiQCX  BIT(8)  /* *[\\                         */
+#define CiVAR1 BIT(9)  /* !*@                          */
+#define CiQCM  BIT(10) /* /^~                          */
+#define CiDIGIT        BIT(11) /* 89                           */
+#define CiQC   BIT(12) /* "'                           */
+#define CiSPX  BIT(13) /* \x0B\x0C                     */
+#define CiCURLY        BIT(14) /* {}                           */
+#define CiANGLE        BIT(15) /* <>                           */
+#define CiNUL  BIT(16) /* \x00                         */
+#define CiTAB  BIT(17) /* \x09                         */
+#define CiNL   BIT(18) /* \x0A                         */
+#define CiCR   BIT(19) /* \x0D                         */
+#define CiSP   BIT(20) /* \x20                         */
+#define CiHASH BIT(21) /* #                            */
+#define CiSS   BIT(22) /* $                            */
+#define CiPERCT        BIT(23) /* %                            */
+#define CiPLUS BIT(24) /* +                            */
+#define CiMINUS        BIT(25) /* -                            */
+#define CiCOLON        BIT(26) /* :                            */
+#define CiEQUAL        BIT(27) /* =                            */
+#define CiQUEST        BIT(28) /* ?                            */
+#define CiBRACK        BIT(29) /* ]                            */
+#define CiUNDER        BIT(30) /* _                            */
+#define CiGRAVE        BIT(31) /* `                            */
+/* out of space, but one for *@ would make sense, possibly others */
+
+/* compile-time initialised, ASCII only */
+extern const uint32_t tpl_ctypes[128];
+/* run-time, contains C_IFS as well, full 2⁸ octet range */
+EXTERN uint32_t ksh_ctypes[256];
+/* first octet of $IFS, for concatenating "$*" */
+EXTERN char ifs0;
+
+/* external types */
+
+/* !%,-.0‥9:@A‥Z[]_a‥z   valid characters in alias names */
+#define C_ALIAS        (CiALIAS | CiBRACK | CiCOLON | CiDIGIT | CiLOWER | CiMINUS | CiOCTAL | CiPERCT | CiUNDER | CiUPPER)
+/* 0‥9A‥Za‥z             alphanumerical */
+#define C_ALNUM        (CiDIGIT | CiLOWER | CiOCTAL | CiUPPER)
+/* 0‥9A‥Z_a‥z            alphanumerical plus underscore (“word character”) */
+#define C_ALNUX        (CiDIGIT | CiLOWER | CiOCTAL | CiUNDER | CiUPPER)
+/* A‥Za‥z          alphabetical (upper plus lower) */
+#define C_ALPHA        (CiLOWER | CiUPPER)
+/* A‥Z_a‥z         alphabetical plus underscore (identifier lead) */
+#define C_ALPHX        (CiLOWER | CiUNDER | CiUPPER)
+/* \x01‥\x7F         7-bit ASCII except NUL */
+#define C_ASCII (CiALIAS | CiANGLE | CiBRACK | CiCNTRL | CiCOLON | CiCR | CiCURLY | CiDIGIT | CiEQUAL | CiGRAVE | CiHASH | CiLOWER | CiMINUS | CiNL | CiOCTAL | CiPERCT | CiPLUS | CiQC | CiQCL | CiQCM | CiQCX | CiQUEST | CiSP | CiSPX | CiSS | CiTAB | CiUNDER | CiUPPER)
+/* \x09\x20            tab and space */
+#define C_BLANK        (CiSP | CiTAB)
+/* \x09\x20"'          separator for completion */
+#define C_CFS  (CiQC | CiSP | CiTAB)
+/* \x00‥\x1F\x7F     POSIX control characters */
+#define C_CNTRL        (CiCNTRL | CiCR | CiNL | CiNUL | CiSPX | CiTAB)
+/* 0‥9                       decimal digits */
+#define C_DIGIT        (CiDIGIT | CiOCTAL)
+/* &();`|                      editor x_locate_word() command */
+#define C_EDCMD        (CiGRAVE | CiQCL)
+/* \x09\x0A\x20"&'():;<=>`|    editor non-word characters */
+#define C_EDNWC        (CiANGLE | CiCOLON | CiEQUAL | CiGRAVE | CiNL | CiQC | CiQCL | CiSP | CiTAB)
+/* "#$&'()*:;<=>?[\\`{|}       editor quotes for tab completion */
+#define C_EDQ  (CiANGLE | CiCOLON | CiCURLY | CiEQUAL | CiGRAVE | CiHASH | CiQC | CiQCL | CiQCX | CiQUEST | CiSS)
+/* !‥~                       POSIX graphical (alphanumerical plus punctuation) */
+#define C_GRAPH        (C_PUNCT | CiDIGIT | CiLOWER | CiOCTAL | CiUPPER)
+/* A‥Fa‥f          hex letter */
+#define C_HEXLT        CiHEXLT
+/* \x00 + $IFS         IFS whitespace, IFS non-whitespace, NUL */
+#define C_IFS  (CiIFS | CiNUL)
+/* \x09\x0A\x20                IFS whitespace */
+#define C_IFSWS        (CiNL | CiSP | CiTAB)
+/* \x09\x0A\x20&();<>| (for the lexer) */
+#define C_LEX1 (CiANGLE | CiNL | CiQCL | CiSP | CiTAB)
+/* a‥z                       lowercase letters */
+#define C_LOWER        CiLOWER
+/* not alnux or dollar separator for motion */
+#define C_MFS  (CiALIAS | CiANGLE | CiBRACK | CiCNTRL | CiCOLON | CiCR | CiCURLY | CiEQUAL | CiGRAVE | CiHASH | CiMINUS | CiNL | CiNUL | CiPERCT | CiPLUS | CiQC | CiQCL | CiQCM | CiQCX | CiQUEST | CiSP | CiSPX | CiTAB)
+/* 0‥7                       octal digit */
+#define C_OCTAL        CiOCTAL
+/* !*+?@               pattern magical operator, except space */
+#define C_PATMO        (CiPLUS | CiQUEST | CiVAR1)
+/* \x20‥~            POSIX printable characters (graph plus space) */
+#define C_PRINT        (C_GRAPH | CiSP)
+/* !"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~   POSIX punctuation */
+#define C_PUNCT        (CiALIAS | CiANGLE | CiBRACK | CiCOLON | CiCURLY | CiEQUAL | CiGRAVE | CiHASH | CiMINUS | CiPERCT | CiPLUS | CiQC | CiQCL | CiQCM | CiQCX | CiQUEST | CiSS | CiUNDER)
+/* \x09\x0A"#$&'()*;<=>?[\\]`| characters requiring quoting, minus space */
+#define C_QUOTE        (CiANGLE | CiBRACK | CiEQUAL | CiGRAVE | CiHASH | CiNL | CiQC | CiQCL | CiQCX | CiQUEST | CiSS | CiTAB)
+/* 0‥9A‥Fa‥f             hexadecimal digit */
+#define C_SEDEC        (CiDIGIT | CiHEXLT | CiOCTAL)
+/* \x09‥\x0D\x20     POSIX space class */
+#define C_SPACE        (CiCR | CiNL | CiSP | CiSPX | CiTAB)
+/* +-=?                        substitution operations with word */
+#define C_SUB1 (CiEQUAL | CiMINUS | CiPLUS | CiQUEST)
+/* #%                  substitution operations with pattern */
+#define C_SUB2 (CiHASH | CiPERCT)
+/* A‥Z                       uppercase letters */
+#define C_UPPER        CiUPPER
+/* !#$*-?@             substitution parameters, other than positional */
+#define C_VAR1 (CiHASH | CiMINUS | CiQUEST | CiSS | CiVAR1)
+
+/* individual chars you might like */
+#define C_ANGLE        CiANGLE         /* <>   angle brackets */
+#define C_COLON        CiCOLON         /* :    colon */
+#define C_CR   CiCR            /* \x0D ASCII carriage return */
+#define C_DOLAR        CiSS            /* $    dollar sign */
+#define C_EQUAL        CiEQUAL         /* =    equals sign */
+#define C_GRAVE        CiGRAVE         /* `    accent gravis */
+#define C_HASH CiHASH          /* #    hash sign */
+#define C_LF   CiNL            /* \x0A ASCII line feed */
+#define C_MINUS        CiMINUS         /* -    hyphen-minus */
+#ifdef MKSH_WITH_TEXTMODE
+#define C_NL   (CiNL | CiCR)   /*      CR or LF under OS/2 TEXTMODE */
+#else
+#define C_NL   CiNL            /*      LF only like under Unix */
+#endif
+#define C_NUL  CiNUL           /* \x00 ASCII NUL */
+#define C_PLUS CiPLUS          /* +    plus sign */
+#define C_QC   CiQC            /* "'   quote characters */
+#define C_QUEST        CiQUEST         /* ?    question mark */
+#define C_SPC  CiSP            /* \x20 ASCII space */
+#define C_TAB  CiTAB           /* \x09 ASCII horizontal tabulator */
+#define C_UNDER        CiUNDER         /* _    underscore */
+
+/* identity transform of octet */
+#define ord(c)         ((unsigned int)(unsigned char)(c))
+#if defined(MKSH_EBCDIC) || defined(MKSH_FAUX_EBCDIC)
+EXTERN unsigned short ebcdic_map[256];
+EXTERN unsigned char ebcdic_rtt_toascii[256];
+EXTERN unsigned char ebcdic_rtt_fromascii[256];
+extern void ebcdic_init(void);
+/* one-way to-ascii-or-high conversion, for POSIX locale ordering */
+#define asciibetical(c)        ((unsigned int)ebcdic_map[(unsigned char)(c)])
+/* two-way round-trip conversion, for general use */
+#define rtt2asc(c)     ebcdic_rtt_toascii[(unsigned char)(c)]
+#define asc2rtt(c)     ebcdic_rtt_fromascii[(unsigned char)(c)]
+/* case-independent char comparison */
+#define ksh_eq(c,u,l)  (ord(c) == ord(u) || ord(c) == ord(l))
+#else
+#define asciibetical(c)        ord(c)
+#define rtt2asc(c)     ((unsigned char)(c))
+#define asc2rtt(c)     ((unsigned char)(c))
+#define ksh_eq(c,u,l)  ((ord(c) | 0x20) == ord(l))
+#endif
+/* control character foo */
+#ifdef MKSH_EBCDIC
+#define ksh_isctrl(c)  (ord(c) < 0x40 || ord(c) == 0xFF)
+#else
+#define ksh_isctrl(c)  ((ord(c) & 0x7F) < 0x20 || (c) == 0x7F)
+#endif
+/* new fast character classes */
+#define ctype(c,t)     tobool(ksh_ctypes[ord(c)] & (t))
+/* helper functions */
+#define ksh_isdash(s)  tobool(ord((s)[0]) == '-' && ord((s)[1]) == '\0')
+/* invariant distance even in EBCDIC */
+#define ksh_tolower(c) (ctype(c, C_UPPER) ? (c) - 'A' + 'a' : (c))
+#define ksh_toupper(c) (ctype(c, C_LOWER) ? (c) - 'a' + 'A' : (c))
+/* strictly speaking rtt2asc() here, but this works even in EBCDIC */
+#define ksh_numdig(c)  (ord(c) - ord('0'))
+#define ksh_numuc(c)   (rtt2asc(c) - rtt2asc('A'))
+#define ksh_numlc(c)   (rtt2asc(c) - rtt2asc('a'))
+#define ksh_toctrl(c)  asc2rtt(ord(c) == ord('?') ? 0x7F : rtt2asc(c) & 0x9F)
+#define ksh_unctrl(c)  asc2rtt(rtt2asc(c) ^ 0x40U)
  
  /* Argument parsing for built-in commands and getopts command */
  
@@ -1990,12 +2179,77 @@ typedef union {
  
  #define HERES          10      /* max number of << in line */
  
-#undef CTRL
-#define        CTRL(x)         ((x) == '?' ? 0x7F : (x) & 0x1F)        /* ASCII */
-#define        UNCTRL(x)       ((x) ^ 0x40)                            /* ASCII */
-#define        ISCTRL(x)       (((signed char)((uint8_t)(x) + 1)) < 33)
-
-#define IDENT          64
+#ifdef MKSH_EBCDIC
+#define CTRL_AT        (0x00U)
+#define CTRL_A (0x01U)
+#define CTRL_B (0x02U)
+#define CTRL_C (0x03U)
+#define CTRL_D (0x37U)
+#define CTRL_E (0x2DU)
+#define CTRL_F (0x2EU)
+#define CTRL_G (0x2FU)
+#define CTRL_H (0x16U)
+#define CTRL_I (0x05U)
+#define CTRL_J (0x15U)
+#define CTRL_K (0x0BU)
+#define CTRL_L (0x0CU)
+#define CTRL_M (0x0DU)
+#define CTRL_N (0x0EU)
+#define CTRL_O (0x0FU)
+#define CTRL_P (0x10U)
+#define CTRL_Q (0x11U)
+#define CTRL_R (0x12U)
+#define CTRL_S (0x13U)
+#define CTRL_T (0x3CU)
+#define CTRL_U (0x3DU)
+#define CTRL_V (0x32U)
+#define CTRL_W (0x26U)
+#define CTRL_X (0x18U)
+#define CTRL_Y (0x19U)
+#define CTRL_Z (0x3FU)
+#define CTRL_BO        (0x27U)
+#define CTRL_BK        (0x1CU)
+#define CTRL_BC        (0x1DU)
+#define CTRL_CA        (0x1EU)
+#define CTRL_US        (0x1FU)
+#define CTRL_QM        (0x07U)
+#else
+#define CTRL_AT        (0x00U)
+#define CTRL_A (0x01U)
+#define CTRL_B (0x02U)
+#define CTRL_C (0x03U)
+#define CTRL_D (0x04U)
+#define CTRL_E (0x05U)
+#define CTRL_F (0x06U)
+#define CTRL_G (0x07U)
+#define CTRL_H (0x08U)
+#define CTRL_I (0x09U)
+#define CTRL_J (0x0AU)
+#define CTRL_K (0x0BU)
+#define CTRL_L (0x0CU)
+#define CTRL_M (0x0DU)
+#define CTRL_N (0x0EU)
+#define CTRL_O (0x0FU)
+#define CTRL_P (0x10U)
+#define CTRL_Q (0x11U)
+#define CTRL_R (0x12U)
+#define CTRL_S (0x13U)
+#define CTRL_T (0x14U)
+#define CTRL_U (0x15U)
+#define CTRL_V (0x16U)
+#define CTRL_W (0x17U)
+#define CTRL_X (0x18U)
+#define CTRL_Y (0x19U)
+#define CTRL_Z (0x1AU)
+#define CTRL_BO        (0x1BU)
+#define CTRL_BK        (0x1CU)
+#define CTRL_BC        (0x1DU)
+#define CTRL_CA        (0x1EU)
+#define CTRL_US        (0x1FU)
+#define CTRL_QM        (0x7FU)
+#endif
+
+#define IDENT  64
  
  EXTERN Source *source;         /* yyparse/yylex source */
  EXTERN YYSTYPE yylval;         /* result from yylex */
@@ -2273,8 +2527,6 @@ void DF(const char *, ...)
      MKSH_A_FORMAT(__printf__, 1, 2);
  #endif
  /* misc.c */
-void setctypes(const char *, int);
-void initctypes(void);
  size_t option(const char *) MKSH_A_PURE;
  char *getoptions(void);
  void change_flag(enum sh_flag, int, bool);
@@ -2282,8 +2534,9 @@ void change_xtrace(unsigned char, bool);
  int parse_args(const char **, int, bool *);
  int getn(const char *, int *);
  int gmatchx(const char *, const char *, bool);
-int has_globbing(const char *, const char *) MKSH_A_PURE;
-int xstrcmp(const void *, const void *) MKSH_A_PURE;
+bool has_globbing(const char *) MKSH_A_PURE;
+int ascstrcmp(const void *, const void *) MKSH_A_PURE;
+int ascpstrcmp(const void *, const void *) MKSH_A_PURE;
  void ksh_getopt_reset(Getopt *, int);
  int ksh_getopt(const char **, Getopt *, const char *);
  void print_value_quoted(struct shf *, const char *);
@@ -2346,6 +2599,7 @@ char *shf_smprintf(const char *, ...)
      MKSH_A_FORMAT(__printf__, 1, 2);
  ssize_t shf_vfprintf(struct shf *, const char *, va_list)
      MKSH_A_FORMAT(__printf__, 2, 0);
+void set_ifs(const char *);
  /* syn.c */
  void initkeywords(void);
  struct op *compile(Source *, bool, bool);
@@ -2483,7 +2737,7 @@ extern int tty_init_fd(void);     /* initialise tty_fd, tty_devtty */
  #define mksh_abspath(s)                        __extension__({                 \
         const char *mksh_abspath_s = (s);                               \
         (mksh_cdirsep(mksh_abspath_s[0]) ||                             \
-           (ksh_isalpha(mksh_abspath_s[0]) &&                          \
+           (ctype(mksh_abspath_s[0], C_ALPHA) &&                       \
             mksh_abspath_s[1] == ':'));                                 \
  })
  #define mksh_cdirsep(c)                        __extension__({                 \
@@ -2492,15 +2746,15 @@ extern int tty_init_fd(void);   /* initialise tty_fd, tty_devtty */
  })
  #define mksh_sdirsep(s)                        __extension__({                 \
         const char *mksh_sdirsep_s = (s);                               \
-       ((char *)((ksh_isalphx(mksh_sdirsep_s[0]) &&                    \
+       ((char *)((ctype(mksh_sdirsep_s[0], C_ALPHA) &&                 \
             mksh_sdirsep_s[1] == ':' &&                                 \
             !mksh_cdirsep(mksh_sdirsep_s[2])) ?                         \
             (mksh_sdirsep_s + 1) : strpbrk(mksh_sdirsep_s, "/\\")));    \
  })
  #define mksh_vdirsep(s)                        (mksh_sdirsep((s)) != NULL)
  #else
-#define mksh_abspath(s)                        ((s)[0] == '/')
-#define mksh_cdirsep(c)                        ((c) == '/')
+#define mksh_abspath(s)                        (ord((s)[0]) == ord('/'))
+#define mksh_cdirsep(c)                        (ord(c) == ord('/'))
  #define mksh_sdirsep(s)                        strchr((s), '/')
  #define mksh_vdirsep(s)                        vstrchr((s), '/')
  #endif
diff --git a/src/shf.c b/src/shf.c

index 09cc7c3..7e53352 100644 (file)
--- a/src/shf.c
+++ b/src/shf.c
@@ -4,6 +4,8 @@
   * Copyright (c) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2011,
   *              2012, 2013, 2015, 2016, 2017
   *     mirabilos <m@mirbsd.org>
+ * Copyright (c) 2015
+ *     Daniel Richard G. <skunk@iSKUNK.ORG>
   *
   * Provided that these terms and disclaimer and all copyright notices
   * are retained or reproduced in an accompanying document, permission
@@ -25,7 +27,7 @@
  
  #include "sh.h"
  
-__RCSID("$MirOS: src/bin/mksh/shf.c,v 1.79 2017/04/12 17:08:49 tg Exp $");
+__RCSID("$MirOS: src/bin/mksh/shf.c,v 1.95 2017/05/05 22:45:58 tg Exp $");
  
  /* flags to shf_emptybuf() */
  #define EB_READSW      0x01    /* about to switch to reading */
@@ -874,11 +876,11 @@ shf_vfprintf(struct shf *shf, const char *fmt, va_list args)
                                 flags |= FL_SIZET;
                                 continue;
                         }
-                       if (ksh_isdigit(c)) {
+                       if (ctype(c, C_DIGIT)) {
                                 bool overflowed = false;
  
                                 tmp = ksh_numdig(c);
-                               while (c = *fmt++, ksh_isdigit(c))
+                               while (ctype((c = *fmt++), C_DIGIT))
                                         if (notok2mul(2147483647, tmp, 10))
                                                 overflowed = true;
                                         else
@@ -899,7 +901,7 @@ shf_vfprintf(struct shf *shf, const char *fmt, va_list args)
                         /* nasty format */
                         break;
  
-               if (ksh_isupper(c)) {
+               if (ctype(c, C_UPPER)) {
                         flags |= FL_UPPER;
                         c = ksh_tolower(c);
                 }
@@ -1029,8 +1031,7 @@ shf_vfprintf(struct shf *shf, const char *fmt, va_list args)
                         if (!(flags & FL_RIGHT)) {
                                 /* skip past sign or 0x when padding with 0 */
                                 if ((flags & FL_ZERO) && (flags & FL_NUMBER)) {
-                                       if (*s == '+' || *s == '-' ||
-                                           *s == ' ') {
+                                       if (ctype(*s, C_SPC | C_PLUS | C_MINUS)) {
                                                 shf_putc(*s, shf);
                                                 s++;
                                                 precision--;
@@ -1158,3 +1159,163 @@ cstrerror(int errnum)
         }
  }
  #endif
+
+/* fast character classes */
+const uint32_t tpl_ctypes[128] = {
+       /* 0x00 */
+       CiNUL,          CiCNTRL,        CiCNTRL,        CiCNTRL,
+       CiCNTRL,        CiCNTRL,        CiCNTRL,        CiCNTRL,
+       CiCNTRL,        CiTAB,          CiNL,           CiSPX,
+       CiSPX,          CiCR,           CiCNTRL,        CiCNTRL,
+       /* 0x10 */
+       CiCNTRL,        CiCNTRL,        CiCNTRL,        CiCNTRL,
+       CiCNTRL,        CiCNTRL,        CiCNTRL,        CiCNTRL,
+       CiCNTRL,        CiCNTRL,        CiCNTRL,        CiCNTRL,
+       CiCNTRL,        CiCNTRL,        CiCNTRL,        CiCNTRL,
+       /* 0x20 */
+       CiSP,           CiALIAS | CiVAR1,       CiQC,   CiHASH,
+       CiSS,           CiPERCT,        CiQCL,          CiQC,
+       CiQCL,          CiQCL,          CiQCX | CiVAR1, CiPLUS,
+       CiALIAS,        CiMINUS,        CiALIAS,        CiQCM,
+       /* 0x30 */
+       CiOCTAL,        CiOCTAL,        CiOCTAL,        CiOCTAL,
+       CiOCTAL,        CiOCTAL,        CiOCTAL,        CiOCTAL,
+       CiDIGIT,        CiDIGIT,        CiCOLON,        CiQCL,
+       CiANGLE,        CiEQUAL,        CiANGLE,        CiQUEST,
+       /* 0x40 */
+       CiALIAS | CiVAR1,       CiUPPER | CiHEXLT,
+       CiUPPER | CiHEXLT,      CiUPPER | CiHEXLT,
+       CiUPPER | CiHEXLT,      CiUPPER | CiHEXLT,
+       CiUPPER | CiHEXLT,      CiUPPER,
+       CiUPPER,        CiUPPER,        CiUPPER,        CiUPPER,
+       CiUPPER,        CiUPPER,        CiUPPER,        CiUPPER,
+       /* 0x50 */
+       CiUPPER,        CiUPPER,        CiUPPER,        CiUPPER,
+       CiUPPER,        CiUPPER,        CiUPPER,        CiUPPER,
+       CiUPPER,        CiUPPER,        CiUPPER,        CiQCX | CiBRACK,
+       CiQCX,          CiBRACK,        CiQCM,          CiUNDER,
+       /* 0x60 */
+       CiGRAVE,                CiLOWER | CiHEXLT,
+       CiLOWER | CiHEXLT,      CiLOWER | CiHEXLT,
+       CiLOWER | CiHEXLT,      CiLOWER | CiHEXLT,
+       CiLOWER | CiHEXLT,      CiLOWER,
+       CiLOWER,        CiLOWER,        CiLOWER,        CiLOWER,
+       CiLOWER,        CiLOWER,        CiLOWER,        CiLOWER,
+       /* 0x70 */
+       CiLOWER,        CiLOWER,        CiLOWER,        CiLOWER,
+       CiLOWER,        CiLOWER,        CiLOWER,        CiLOWER,
+       CiLOWER,        CiLOWER,        CiLOWER,        CiCURLY,
+       CiQCL,          CiCURLY,        CiQCM,          CiCNTRL
+};
+
+void
+set_ifs(const char *s)
+{
+#if defined(MKSH_EBCDIC) || defined(MKSH_FAUX_EBCDIC)
+       int i = 256;
+
+       memset(ksh_ctypes, 0, sizeof(ksh_ctypes));
+       while (i--)
+               if (ebcdic_map[i] < 0x80U)
+                       ksh_ctypes[i] = tpl_ctypes[ebcdic_map[i]];
+#else
+       memcpy(ksh_ctypes, tpl_ctypes, sizeof(tpl_ctypes));
+       memset((char *)ksh_ctypes + sizeof(tpl_ctypes), '\0',
+           sizeof(ksh_ctypes) - sizeof(tpl_ctypes));
+#endif
+       ifs0 = *s;
+       while (*s)
+               ksh_ctypes[ord(*s++)] |= CiIFS;
+}
+
+#if defined(MKSH_EBCDIC) || defined(MKSH_FAUX_EBCDIC)
+#include <locale.h>
+
+/*
+ * Many headaches with EBCDIC:
+ * 1. There are numerous EBCDIC variants, and it is not feasible for us
+ *    to support them all. But we can support the EBCDIC code pages that
+ *    contain all (most?) of the characters in ASCII, and these
+ *    usually tend to agree on the code points assigned to the ASCII
+ *    subset. If you need a representative example, look at EBCDIC 1047,
+ *    which is first among equals in the IBM MVS development
+ *    environment: https://en.wikipedia.org/wiki/EBCDIC_1047
+ *    Unfortunately, the square brackets are not consistently mapped,
+ *    and for certain reasons, we need an unambiguous bijective
+ *    mapping between EBCDIC and "extended ASCII".
+ * 2. Character ranges that are contiguous in ASCII, like the letters
+ *    in [A-Z], are broken up into segments (i.e. [A-IJ-RS-Z]), so we
+ *    can't implement e.g. islower() as { return c >= 'a' && c <= 'z'; }
+ *    because it will also return true for a handful of extraneous
+ *    characters (like the plus-minus sign at 0x8F in EBCDIC 1047, a
+ *    little after 'i'). But at least '_' is not one of these.
+ * 3. The normal [0-9A-Za-z] characters are at codepoints beyond 0x80.
+ *    Not only do they require all 8 bits instead of 7, if chars are
+ *    signed, they will have negative integer values! Something like
+ *    (c - 'A') could actually become (c + 63)! Use the ord() macro to
+ *    ensure you're getting a value in [0, 255].
+ * 4. '\n' is actually NL (0x15, U+0085) instead of LF (0x25, U+000A).
+ *    EBCDIC has a proper newline character instead of "emulating" one
+ *    with line feeds, although this is mapped to LF for our purposes.
+ * 5. Note that it is possible to compile programs in ASCII mode on IBM
+ *    mainframe systems, using the -qascii option to the XL C compiler.
+ *    We can determine the build mode by looking at __CHARSET_LIB:
+ *    0 == EBCDIC, 1 == ASCII
+ */
+
+void
+ebcdic_init(void)
+{
+       int i = 256;
+       unsigned char t;
+       bool mapcache[256];
+
+       while (i--)
+               ebcdic_rtt_toascii[i] = i;
+       memset(ebcdic_rtt_fromascii, 0xFF, sizeof(ebcdic_rtt_fromascii));
+       setlocale(LC_ALL, "");
+#ifdef MKSH_EBCDIC
+       if (__etoa_l(ebcdic_rtt_toascii, 256) != 256) {
+               write(2, "mksh: could not map EBCDIC to ASCII\n", 36);
+               exit(255);
+       }
+#endif
+
+       memset(mapcache, 0, sizeof(mapcache));
+       i = 256;
+       while (i--) {
+               t = ebcdic_rtt_toascii[i];
+               /* ensure unique round-trip capable mapping */
+               if (mapcache[t]) {
+                       write(2, "mksh: duplicate EBCDIC to ASCII mapping\n", 40);
+                       exit(255);
+               }
+               /*
+                * since there are 256 input octets, this also ensures
+                * the other mapping direction is completely filled
+                */
+               mapcache[t] = true;
+               /* fill the complete round-trip map */
+               ebcdic_rtt_fromascii[t] = i;
+               /*
+                * Only use the converted value if it's in the range
+                * [0x00; 0x7F], which I checked; the "extended ASCII"
+                * characters can be any encoding, not just Latin1,
+                * and the C1 control characters other than NEL are
+                * hopeless, but we map EBCDIC NEL to ASCII LF so we
+                * cannot even use C1 NEL.
+                * If ever we map to Unicode, bump the table width to
+                * an unsigned int, and or the raw unconverted EBCDIC
+                * values with 0x01000000 instead.
+                */
+               if (t < 0x80U)
+                       ebcdic_map[i] = (unsigned short)ord(t);
+               else
+                       ebcdic_map[i] = (unsigned short)(0x100U | ord(i));
+       }
+       if (ebcdic_rtt_toascii[0] || ebcdic_rtt_fromascii[0] || ebcdic_map[0]) {
+               write(2, "mksh: NUL not at position 0\n", 28);
+               exit(255);
+       }
+}
+#endif
diff --git a/src/signames.inc b/src/signames.inc

deleted file mode 100644 (file)

index 07811fd..0000000
--- a/src/signames.inc
+++ /dev/null
@@ -1,31 +0,0 @@
-               { "ABRT", 6 },
-               { "FPE", 8 },
-               { "ILL", 4 },
-               { "INT", 2 },
-               { "SEGV", 11 },
-               { "TERM", 15 },
-               { "ALRM", 14 },
-               { "BUS", 7 },
-               { "CHLD", 17 },
-               { "CONT", 18 },
-               { "HUP", 1 },
-               { "KILL", 9 },
-               { "PIPE", 13 },
-               { "QUIT", 3 },
-               { "STOP", 19 },
-               { "TSTP", 20 },
-               { "TTIN", 21 },
-               { "TTOU", 22 },
-               { "USR1", 10 },
-               { "USR2", 12 },
-               { "POLL", 29 },
-               { "PROF", 27 },
-               { "SYS", 31 },
-               { "TRAP", 5 },
-               { "URG", 23 },
-               { "VTALRM", 26 },
-               { "XCPU", 24 },
-               { "XFSZ", 25 },
-               { "WINCH", 28 },
-               { "PWR", 30 },
-               { "STKFLT", 16 },
diff --git a/src/syn.c b/src/syn.c

index 0454488..c50c2ab 100644 (file)
--- a/src/syn.c
+++ b/src/syn.c
@@ -23,7 +23,7 @@
  
  #include "sh.h"
  
-__RCSID("$MirOS: src/bin/mksh/syn.c,v 1.120 2017/04/06 01:59:57 tg Exp $");
+__RCSID("$MirOS: src/bin/mksh/syn.c,v 1.124 2017/05/05 22:53:31 tg Exp $");
  
  struct nesting_state {
         int start_token;        /* token than began nesting (eg, FOR) */
@@ -91,7 +91,7 @@ yyparse(bool doalias)
         c = tpeek(0);
         if (c == 0 && !outtree)
                 outtree = newtp(TEOF);
-       else if (c != '\n' && c != 0)
+       else if (!ctype(c, C_LF | C_NUL))
                 syntaxerr(NULL);
  }
  
@@ -330,7 +330,7 @@ get_command(int cf, int sALIAS)
                                         XPput(args, yylval.cp);
                                 break;
  
-                       case '(' /*)*/:
+                       case ord('(' /*)*/):
                                 if (XPsize(args) == 0 && XPsize(vars) == 1 &&
                                     is_wdvarassign(yylval.cp)) {
                                         char *tcp;
@@ -373,7 +373,7 @@ get_command(int cf, int sALIAS)
                                             XPsize(vars) != 0)
                                                 syntaxerr(NULL);
                                         ACCEPT;
-                                       musthave(/*(*/')', 0);
+                                       musthave(/*(*/ ')', 0);
                                         t = function_body(XPptrv(args)[0],
                                             sALIAS, false);
                                 }
@@ -386,18 +386,18 @@ get_command(int cf, int sALIAS)
   Leave:
                 break;
  
-       case '(': /*)*/ {
+       case ord('(' /*)*/): {
                 int subshell_nesting_type_saved;
   Subshell:
                 subshell_nesting_type_saved = subshell_nesting_type;
-               subshell_nesting_type = ')';
-               t = nested(TPAREN, '(', ')', sALIAS);
+               subshell_nesting_type = ord(')');
+               t = nested(TPAREN, ord('('), ord(')'), sALIAS);
                 subshell_nesting_type = subshell_nesting_type_saved;
                 break;
             }
  
-       case '{': /*}*/
-               t = nested(TBRACE, '{', '}', sALIAS);
+       case ord('{' /*}*/):
+               t = nested(TBRACE, ord('{'), ord('}'), sALIAS);
                 break;
  
         case MDPAREN:
@@ -407,8 +407,8 @@ get_command(int cf, int sALIAS)
                 switch (token(LETEXPR)) {
                 case LWORD:
                         break;
-               case '(': /*)*/
-                       c = '(';
+               case ord('(' /*)*/):
+                       c = ord('(');
                         goto Subshell;
                 default:
                         syntaxerr(NULL);
@@ -554,8 +554,8 @@ dogroup(int sALIAS)
          */
         if (c == DO)
                 c = DONE;
-       else if (c == '{')
-               c = '}';
+       else if (c == ord('{'))
+               c = ord('}');
         else
                 syntaxerr(NULL);
         list = c_list(sALIAS, true);
@@ -610,8 +610,8 @@ caselist(int sALIAS)
         /* A {...} can be used instead of in...esac for case statements */
         if (c == IN)
                 c = ESAC;
-       else if (c == '{')
-               c = '}';
+       else if (c == ord('{'))
+               c = ord('}');
         else
                 syntaxerr(NULL);
         t = tl = NULL;
@@ -636,17 +636,18 @@ casepart(int endtok, int sALIAS)
         XPinit(ptns, 16);
         t = newtp(TPAT);
         /* no ALIAS here */
-       if (token(CONTIN | KEYWORD) != '(')
+       if (token(CONTIN | KEYWORD) != ord('('))
                 REJECT;
         do {
                 switch (token(0)) {
                 case LWORD:
                         break;
-               case '}':
+               case ord('}'):
                 case ESAC:
                         if (symbol != endtok) {
                                 strdupx(yylval.cp,
-                                   symbol == '}' ? Tcbrace : Tesac, ATEMP);
+                                   symbol == ord('}') ? Tcbrace : Tesac,
+                                   ATEMP);
                                 break;
                         }
                         /* FALLTHROUGH */
@@ -658,23 +659,23 @@ casepart(int endtok, int sALIAS)
         REJECT;
         XPput(ptns, NULL);
         t->vars = (char **)XPclose(ptns);
-       musthave(')', 0);
+       musthave(ord(')'), 0);
  
         t->left = c_list(sALIAS, true);
  
         /* initialise to default for ;; or omitted */
-       t->u.charflag = ';';
+       t->u.charflag = ord(';');
         /* SUSv4 requires the ;; except in the last casepart */
         if ((tpeek(CONTIN|KEYWORD|sALIAS)) != endtok)
                 switch (symbol) {
                 default:
                         syntaxerr(NULL);
                 case BRKEV:
-                       t->u.charflag = '|';
+                       t->u.charflag = ord('|');
                         if (0)
                                 /* FALLTHROUGH */
                 case BRKFT:
-                       t->u.charflag = '&';
+                         t->u.charflag = ord('&');
                         /* FALLTHROUGH */
                 case BREAK:
                         /* initialised above, but we need to eat the token */
@@ -697,10 +698,10 @@ function_body(char *name, int sALIAS,
          * only allow [a-zA-Z_0-9] but this allows more as old pdkshs
          * have allowed more; the following were never allowed:
          *      NUL TAB NL SP " $ & ' ( ) ; < = > \ ` |
-        * C_QUOTE covers all but adds # * ? [ ]
+        * C_QUOTE|C_SPC covers all but adds # * ? [ ]
          */
         for (p = sname; *p; p++)
-               if (ctype(*p, C_QUOTE))
+               if (ctype(*p, C_QUOTE | C_SPC))
                         yyerror(Tinvname, sname, Tfunction);
  
         /*
@@ -710,14 +711,14 @@ function_body(char *name, int sALIAS,
          * only accepts an open-brace.
          */
         if (ksh_func) {
-               if (tpeek(CONTIN|KEYWORD|sALIAS) == '(' /*)*/) {
+               if (tpeek(CONTIN|KEYWORD|sALIAS) == ord('(' /*)*/)) {
                         /* function foo () { //}*/
                         ACCEPT;
-                       musthave(')', 0);
+                       musthave(ord(/*(*/ ')'), 0);
                         /* degrade to POSIX function */
                         ksh_func = false;
                 }
-               musthave('{' /*}*/, CONTIN|KEYWORD|sALIAS);
+               musthave(ord('{' /*}*/), CONTIN|KEYWORD|sALIAS);
                 REJECT;
         }
  
@@ -809,8 +810,8 @@ static const struct tokeninfo {
         { "in",         IN,     true },
         { Tfunction,    FUNCTION, true },
         { Ttime,        TIME,   true },
-       { "{",          '{',    true },
-       { Tcbrace,      '}',    true },
+       { "{",          ord('{'), true },
+       { Tcbrace,      ord('}'), true },
         { "!",          BANG,   true },
         { "[[",         DBRACKET, true },
         /* Lexical tokens (0[EOF], LWORD and REDIR handled specially) */
@@ -822,7 +823,7 @@ static const struct tokeninfo {
         { "((",         MDPAREN, false },
         { "|&",         COPROC, false },
         /* and some special cases... */
-       { "newline",    '\n',   false },
+       { "newline",    ord('\n'), false },
         { NULL,         0,      false }
  };
  
@@ -997,9 +998,9 @@ dbtestp_isa(Test_env *te, Test_meta meta)
                 ret = (uqword && !strcmp(yylval.cp,
                     dbtest_tokens[(int)TM_NOT])) ? TO_NONNULL : TO_NONOP;
         else if (meta == TM_OPAREN)
-               ret = c == '(' /*)*/ ? TO_NONNULL : TO_NONOP;
+               ret = c == ord('(') /*)*/ ? TO_NONNULL : TO_NONOP;
         else if (meta == TM_CPAREN)
-               ret = c == /*(*/ ')' ? TO_NONNULL : TO_NONOP;
+               ret = c == /*(*/ ord(')') ? TO_NONNULL : TO_NONOP;
         else if (meta == TM_UNOP || meta == TM_BINOP) {
                 if (meta == TM_BINOP && c == REDIR &&
                     (yylval.iop->ioflag == IOREAD ||
@@ -1079,7 +1080,7 @@ parse_usec(const char *s, struct timeval *tv)
  
         tv->tv_sec = 0;
         /* parse integral part */
-       while (ksh_isdigit(*s)) {
+       while (ctype(*s, C_DIGIT)) {
                 tt.tv_sec = tv->tv_sec * 10 + ksh_numdig(*s++);
                 /*XXX this overflow check maybe UB */
                 if (tt.tv_sec / 10 != tv->tv_sec) {
@@ -1101,14 +1102,14 @@ parse_usec(const char *s, struct timeval *tv)
  
         /* parse decimal fraction */
         i = 100000;
-       while (ksh_isdigit(*s)) {
+       while (ctype(*s, C_DIGIT)) {
                 tv->tv_usec += i * ksh_numdig(*s++);
                 if (i == 1)
                         break;
                 i /= 10;
         }
         /* check for junk after fractional part */
-       while (ksh_isdigit(*s))
+       while (ctype(*s, C_DIGIT))
                 ++s;
         if (*s) {
                 errno = EINVAL;
@@ -1133,11 +1134,11 @@ yyrecursive(int subtype)
         int stok, etok;
  
         if (subtype != COMSUB) {
-               stok = '{';
-               etok = '}';
+               stok = ord('{');
+               etok = ord('}');
         } else {
-               stok = '(';
-               etok = ')';
+               stok = ord('(');
+               etok = ord(')');
         }
  
         ys = alloc(sizeof(struct yyrecursive_state), ATEMP);
diff --git a/src/tree.c b/src/tree.c

index 1fd8f2a..1062feb 100644 (file)
--- a/src/tree.c
+++ b/src/tree.c
@@ -23,7 +23,7 @@
  
  #include "sh.h"
  
-__RCSID("$MirOS: src/bin/mksh/tree.c,v 1.89 2017/04/12 16:46:23 tg Exp $");
+__RCSID("$MirOS: src/bin/mksh/tree.c,v 1.93 2017/05/05 22:53:32 tg Exp $");
  
  #define INDENT 8
  
@@ -329,34 +329,34 @@ wdvarput(struct shf *shf, const char *wp, int quotelevel, int opmode)
                 case EOS:
                         return (--wp);
                 case ADELIM:
-                       if (*wp == /*{*/'}') {
+                       if (ord(*wp) == ord(/*{*/ '}')) {
                                 ++wp;
                                 goto wdvarput_csubst;
                         }
                         /* FALLTHROUGH */
                 case CHAR:
-                       c = *wp++;
+                       c = ord(*wp++);
                         shf_putc(c, shf);
                         break;
                 case QCHAR:
-                       c = *wp++;
+                       c = ord(*wp++);
                         if (opmode & WDS_TPUTS)
                                 switch (c) {
-                               case '\n':
+                               case ord('\n'):
                                         if (quotelevel == 0) {
-                                               c = '\'';
+                                               c = ord('\'');
                                                 shf_putc(c, shf);
-                                               shf_putc('\n', shf);
+                                               shf_putc(ord('\n'), shf);
                                         }
                                         break;
                                 default:
                                         if (quotelevel == 0)
                                                 /* FALLTHROUGH */
-                               case '"':
-                               case '`':
-                               case '$':
-                               case '\\':
-                                         shf_putc('\\', shf);
+                               case ord('"'):
+                               case ord('`'):
+                               case ord('$'):
+                               case ord('\\'):
+                                         shf_putc(ord('\\'), shf);
                                         break;
                                 }
                         shf_putc(c, shf);
@@ -365,7 +365,7 @@ wdvarput(struct shf *shf, const char *wp, int quotelevel, int opmode)
                 case COMSUB:
                         shf_puts("$(", shf);
                         cs = ")";
-                       if (*wp == '(' /*)*/)
+                       if (ord(*wp) == ord('(' /*)*/))
                                 shf_putc(' ', shf);
   pSUB:
                         while ((c = *wp++) != 0)
@@ -374,11 +374,11 @@ wdvarput(struct shf *shf, const char *wp, int quotelevel, int opmode)
                         break;
                 case FUNASUB:
                 case FUNSUB:
-                       c = ' ';
+                       c = ord(' ');
                         if (0)
                                 /* FALLTHROUGH */
                 case VALSUB:
-                         c = '|';
+                         c = ord('|');
                         shf_putc('$', shf);
                         shf_putc('{', shf);
                         shf_putc(c, shf);
@@ -403,14 +403,14 @@ wdvarput(struct shf *shf, const char *wp, int quotelevel, int opmode)
                         break;
                 case OSUBST:
                         shf_putc('$', shf);
-                       if (*wp++ == '{')
+                       if (ord(*wp++) == ord('{'))
                                 shf_putc('{', shf);
                         while ((c = *wp++) != 0)
                                 shf_putc(c, shf);
                         wp = wdvarput(shf, wp, 0, opmode);
                         break;
                 case CSUBST:
-                       if (*wp++ == '}') {
+                       if (ord(*wp++) == ord('}')) {
   wdvarput_csubst:
                                 shf_putc('}', shf);
                         }
@@ -420,11 +420,11 @@ wdvarput(struct shf *shf, const char *wp, int quotelevel, int opmode)
                         shf_putc('(', shf);
                         break;
                 case SPAT:
-                       c = '|';
+                       c = ord('|');
                         if (0)
                                 /* FALLTHROUGH */
                 case CPAT:
-                         c = /*(*/ ')';
+                         c = ord(/*(*/ ')');
                         shf_putc(c, shf);
                         break;
                 }
@@ -467,39 +467,39 @@ vfptreef(struct shf *shf, int indent, const char *fmt, va_list va)
  {
         int c;
  
-       while ((c = *fmt++)) {
+       while ((c = ord(*fmt++))) {
                 if (c == '%') {
-                       switch ((c = *fmt++)) {
-                       case 'c':
+                       switch ((c = ord(*fmt++))) {
+                       case ord('c'):
                                 /* character (octet, probably) */
                                 shf_putchar(va_arg(va, int), shf);
                                 break;
-                       case 's':
+                       case ord('s'):
                                 /* string */
                                 shf_puts(va_arg(va, char *), shf);
                                 break;
-                       case 'S':
+                       case ord('S'):
                                 /* word */
                                 wdvarput(shf, va_arg(va, char *), 0, WDS_TPUTS);
                                 break;
-                       case 'd':
+                       case ord('d'):
                                 /* signed decimal */
                                 shf_fprintf(shf, Tf_d, va_arg(va, int));
                                 break;
-                       case 'u':
+                       case ord('u'):
                                 /* unsigned decimal */
                                 shf_fprintf(shf, "%u", va_arg(va, unsigned int));
                                 break;
-                       case 'T':
+                       case ord('T'):
                                 /* format tree */
                                 ptree(va_arg(va, struct op *), indent, shf);
                                 goto dont_trash_prevent_semicolon;
-                       case ';':
+                       case ord(';'):
                                 /* newline or ; */
-                       case 'N':
+                       case ord('N'):
                                 /* newline or space */
                                 if (shf->flags & SHF_STRING) {
-                                       if (c == ';' && !prevent_semicolon)
+                                       if (c == ord(';') && !prevent_semicolon)
                                                 shf_putc(';', shf);
                                         shf_putc(' ', shf);
                                 } else {
@@ -515,7 +515,7 @@ vfptreef(struct shf *shf, int indent, const char *fmt, va_list va)
                                                 shf_putc(' ', shf);
                                 }
                                 break;
-                       case 'R':
+                       case ord('R'):
                                 /* I/O redirection */
                                 pioact(shf, va_arg(va, struct ioword *));
                                 break;
@@ -613,7 +613,7 @@ wdscan(const char *wp, int c)
                 case ADELIM:
                         if (c == ADELIM && nest == 0)
                                 return (wp + 1);
-                       if (*wp == /*{*/'}')
+                       if (ord(*wp) == ord(/*{*/ '}'))
                                 goto wdscan_csubst;
                         /* FALLTHROUGH */
                 case CHAR:
@@ -795,20 +795,20 @@ vistree(char *dst, size_t sz, struct op *t)
                         *dst++ = *cp++;
                 goto vist_loop;
         }
-       if (--sz == 0 || (c = (unsigned char)(*cp++)) == 0)
+       if (--sz == 0 || (c = ord(*cp++)) == 0)
                 /* NUL or not enough free space */
                 goto vist_out;
-       if (ISCTRL(c & 0x7F)) {
+       if (ksh_isctrl(c)) {
                 /* C0 or C1 control character or DEL */
                 if (--sz == 0)
                         /* not enough free space for two chars */
                         goto vist_out;
-               *dst++ = (c & 0x80) ? '$' : '^';
-               c = UNCTRL(c & 0x7F);
-       } else if (UTFMODE && c > 0x7F) {
+               *dst++ = '^';
+               c = ksh_unctrl(c);
+       } else if (UTFMODE && rtt2asc(c) > 0x7F) {
                 /* better not try to display broken multibyte chars */
                 /* also go easy on the Unicode: no U+FFFD here */
-               c = '?';
+               c = ord('?');
         }
         *dst++ = c;
         goto vist_loop;
@@ -822,10 +822,10 @@ vistree(char *dst, size_t sz, struct op *t)
  void
  dumpchar(struct shf *shf, int c)
  {
-       if (ISCTRL(c & 0x7F)) {
+       if (ksh_isctrl(c)) {
                 /* C0 or C1 control character or DEL */
-               shf_putc((c & 0x80) ? '$' : '^', shf);
-               c = UNCTRL(c & 0x7F);
+               shf_putc('^', shf);
+               c = ksh_unctrl(c);
         }
         shf_putc(c, shf);
  }
@@ -842,7 +842,7 @@ dumpwdvar_i(struct shf *shf, const char *wp, int quotelevel)
                         shf_puts("EOS", shf);
                         return (--wp);
                 case ADELIM:
-                       if (*wp == /*{*/'}') {
+                       if (ord(*wp) == ord(/*{*/ '}')) {
                                 shf_puts(/*{*/ "]ADELIM(})", shf);
                                 return (wp + 1);
                         }
@@ -855,9 +855,9 @@ dumpwdvar_i(struct shf *shf, const char *wp, int quotelevel)
                         break;
                 case QCHAR:
                         shf_puts("QCHAR<", shf);
-                       c = *wp++;
-                       if (quotelevel == 0 ||
-                           (c == '"' || c == '`' || c == '$' || c == '\\'))
+                       c = ord(*wp++);
+                       if (quotelevel == 0 || c == ord('"') ||
+                           c == ord('\\') || ctype(c, C_DOLAR | C_GRAVE))
                                 shf_putc('\\', shf);
                         dumpchar(shf, c);
                         goto closeandout;
diff --git a/src/var.c b/src/var.c

index b83977f..a53fae8 100644 (file)
--- a/src/var.c
+++ b/src/var.c
@@ -28,7 +28,7 @@
  #include <sys/sysctl.h>
  #endif
  
-__RCSID("$MirOS: src/bin/mksh/var.c,v 1.214 2017/04/02 16:47:43 tg Exp $");
+__RCSID("$MirOS: src/bin/mksh/var.c,v 1.220 2017/07/26 23:02:28 tg Exp $");
  
  /*-
   * Variables
@@ -183,7 +183,7 @@ array_index_calc(const char *n, bool *arrayp, uint32_t *valp)
         *arrayp = false;
   redo_from_ref:
         p = skip_varname(n, false);
-       if (innermost_refflag == SRF_NOP && (p != n) && ksh_isalphx(n[0])) {
+       if (innermost_refflag == SRF_NOP && (p != n) && ctype(n[0], C_ALPHX)) {
                 struct tbl *vp;
                 char *vn;
  
@@ -204,7 +204,7 @@ array_index_calc(const char *n, bool *arrayp, uint32_t *valp)
         }
         innermost_refflag = SRF_NOP;
  
-       if (p != n && *p == '[' && (len = array_ref_len(p))) {
+       if (p != n && ord(*p) == ord('[') && (len = array_ref_len(p))) {
                 char *sub, *tmp;
                 mksh_ari_t rval;
  
@@ -249,14 +249,14 @@ isglobal(const char *n, bool docreate)
         vn = array_index_calc(n, &array, &val);
         h = hash(vn);
         c = (unsigned char)vn[0];
-       if (!ksh_isalphx(c)) {
+       if (!ctype(c, C_ALPHX)) {
                 if (array)
                         errorf(Tbadsubst);
                 vp = vtemp;
                 vp->flag = DEFINED;
                 vp->type = 0;
                 vp->areap = ATEMP;
-               if (ksh_isdigit(c)) {
+               if (ctype(c, C_DIGIT)) {
                         if (getn(vn, &c)) {
                                 /* main.c:main_init() says 12 */
                                 shf_snprintf(vp->name, 12, Tf_d, c);
@@ -339,7 +339,7 @@ local(const char *n, bool copy)
          */
         vn = array_index_calc(n, &array, &val);
         h = hash(vn);
-       if (!ksh_isalphx(*vn)) {
+       if (!ctype(*vn, C_ALPHX)) {
                 vp = vtemp;
                 vp->flag = DEFINED|RDONLY;
                 vp->type = 0;
@@ -414,9 +414,11 @@ str_val(struct tbl *vp)
  
                         *(s = strbuf) = '1';
                         s[1] = '#';
-                       if (!UTFMODE || ((n & 0xFF80) == 0xEF80))
+                       if (!UTFMODE)
+                               s[2] = (unsigned char)n;
+                       else if ((n & 0xFF80) == 0xEF80)
                                 /* OPTU-16 -> raw octet */
-                               s[2] = n & 0xFF;
+                               s[2] = asc2rtt(n & 0xFF);
                         else
                                 sz = utf_wctomb(s + 2, n);
                         s[2 + sz] = '\0';
@@ -464,7 +466,7 @@ setstr(struct tbl *vq, const char *s, int error_ok)
  #ifndef MKSH_SMALL
                         /* debugging */
                         if (s >= vq->val.s &&
-                           s <= vq->val.s + strlen(vq->val.s)) {
+                           s <= strnul(vq->val.s)) {
                                 internal_errorf(
                                     "setstr: %s=%s: assigning to self",
                                     vq->name, s);
@@ -532,7 +534,7 @@ getint(struct tbl *vp, mksh_ari_u *nump, bool arith)
  
         do {
                 c = (unsigned char)*s++;
-       } while (ksh_isspace(c));
+       } while (ctype(c, C_SPACE));
  
         switch (c) {
         case '-':
@@ -549,7 +551,7 @@ getint(struct tbl *vp, mksh_ari_u *nump, bool arith)
                         base = 16;
                         ++s;
                         goto getint_c_style_base;
-               } else if (Flag(FPOSIX) && ksh_isdigit(s[0]) &&
+               } else if (Flag(FPOSIX) && ctype(s[0], C_DIGIT) &&
                     !(vp->flag & ZEROFIL)) {
                         /* interpret as octal (deprecated) */
                         base = 8;
@@ -577,7 +579,7 @@ getint(struct tbl *vp, mksh_ari_u *nump, bool arith)
                                          * the same as 1#\x80 does, thus is
                                          * not round-tripping correctly XXX)
                                          */
-                                       wc = 0xEF00 + *(const unsigned char *)s;
+                                       wc = 0xEF00 + rtt2asc(*s);
                                 nump->u = (mksh_uari_t)wc;
                                 return (1);
                         } else if (base > 36)
@@ -586,11 +588,11 @@ getint(struct tbl *vp, mksh_ari_u *nump, bool arith)
                         have_base = true;
                         continue;
                 }
-               if (ksh_isdigit(c))
+               if (ctype(c, C_DIGIT))
                         c = ksh_numdig(c);
-               else if (ksh_isupper(c))
+               else if (ctype(c, C_UPPER))
                         c = ksh_numuc(c) + 10;
-               else if (ksh_islower(c))
+               else if (ctype(c, C_LOWER))
                         c = ksh_numlc(c) + 10;
                 else
                         return (-1);
@@ -670,7 +672,7 @@ formatstr(struct tbl *vp, const char *s)
                         qq = utf_skipcols(s, slen, &slen);
  
                         /* strip trailing spaces (AT&T uses qq[-1] == ' ') */
-                       while (qq > s && ksh_isspace(qq[-1])) {
+                       while (qq > s && ctype(qq[-1], C_SPACE)) {
                                 --qq;
                                 --slen;
                         }
@@ -700,7 +702,7 @@ formatstr(struct tbl *vp, const char *s)
                             "%.*s", slen, s);
                 } else {
                         /* strip leading spaces/zeros */
-                       while (ksh_isspace(*s))
+                       while (ctype(*s, C_SPACE))
                                 s++;
                         if (vp->flag & ZEROFIL)
                                 while (*s == '0')
@@ -778,7 +780,7 @@ typeset(const char *var, uint32_t set, uint32_t clr, int field, int base)
                 /* no variable name given */
                 return (NULL);
         }
-       if (*val == '[') {
+       if (ord(*val) == ord('[')) {
                 if (new_refflag != SRF_NOP)
                         errorf(Tf_sD_s, var,
                             "reference variable can't be an array");
@@ -796,18 +798,18 @@ typeset(const char *var, uint32_t set, uint32_t clr, int field, int base)
                         size_t i;
  
                         for (i = 1; i < len - 1; i++)
-                               if (!ksh_isdigit(val[i]))
+                               if (!ctype(val[i], C_DIGIT))
                                         return (NULL);
                 }
                 val += len;
         }
-       if (val[0] == '=') {
+       if (ord(val[0]) == ord('=')) {
                 strndupx(tvar, var, val - var, ATEMP);
                 ++val;
         } else if (set & IMPORT) {
                 /* environment invalid variable name or no assignment */
                 return (NULL);
-       } else if (val[0] == '+' && val[1] == '=') {
+       } else if (ord(val[0]) == ord('+') && ord(val[1]) == ord('=')) {
                 strndupx(tvar, var, val - var, ATEMP);
                 val += 2;
                 vappend = true;
@@ -820,8 +822,9 @@ typeset(const char *var, uint32_t set, uint32_t clr, int field, int base)
                 val = NULL;
                 /* handle foo[*] => foo (whole array) mapping for R39b */
                 len = strlen(tvar);
-               if (len > 3 && tvar[len - 3] == '[' && tvar[len - 2] == '*' &&
-                   tvar[len - 1] == ']')
+               if (len > 3 && ord(tvar[len - 3]) == ord('[') &&
+                   ord(tvar[len - 2]) == ord('*') &&
+                   ord(tvar[len - 1]) == ord(']'))
                         tvar[len - 3] = '\0';
         }
  
@@ -845,7 +848,7 @@ typeset(const char *var, uint32_t set, uint32_t clr, int field, int base)
  
                         if (!(c = (unsigned char)qval[0]))
                                 goto nameref_empty;
-                       else if (ksh_isdigit(c) && getn(qval, &c))
+                       else if (ctype(c, C_DIGIT) && getn(qval, &c))
                                 goto nameref_rhs_checked;
                         else if (qval[1] == '\0') switch (c) {
                         case '$':
@@ -858,7 +861,7 @@ typeset(const char *var, uint32_t set, uint32_t clr, int field, int base)
   nameref_empty:
                         errorf(Tf_sD_s, var, "empty nameref target");
                 }
-               len = (*ccp == '[') ? array_ref_len(ccp) : 0;
+               len = (ord(*ccp) == ord('[')) ? array_ref_len(ccp) : 0;
                 if (ccp[len]) {
                         /*
                          * works for cases "no array", "valid array with
@@ -914,12 +917,12 @@ typeset(const char *var, uint32_t set, uint32_t clr, int field, int base)
         vpbase = (vp->flag & ARRAY) ? global(arrayname(tvar)) : vp;
  
         /*
-        * only allow export flag to be set; AT&T ksh allows any
-        * attribute to be changed which means it can be truncated or
-        * modified (-L/-R/-Z/-i)
+        * only allow export and readonly flag to be set; AT&T ksh
+        * allows any attribute to be changed which means it can be
+        * truncated or modified (-L/-R/-Z/-i)
          */
         if ((vpbase->flag & RDONLY) &&
-           (val || clr || (set & ~EXPORT)))
+           (val || clr || (set & ~(EXPORT | RDONLY))))
                 /* XXX check calls - is error here ok by POSIX? */
                 errorfx(2, Tf_ro, tvar);
         afree(tvar, ATEMP);
@@ -1064,11 +1067,11 @@ skip_varname(const char *s, bool aok)
  {
         size_t alen;
  
-       if (s && ksh_isalphx(*s)) {
+       if (s && ctype(*s, C_ALPHX)) {
                 do {
                         ++s;
-               } while (ksh_isalnux(*s));
-               if (aok && *s == '[' && (alen = array_ref_len(s)))
+               } while (ctype(*s, C_ALNUX));
+               if (aok && ord(*s) == ord('[') && (alen = array_ref_len(s)))
                         s += alen;
         }
         return (s);
@@ -1080,11 +1083,11 @@ skip_wdvarname(const char *s,
      /* skip array de-reference? */
      bool aok)
  {
-       if (s[0] == CHAR && ksh_isalphx(s[1])) {
+       if (s[0] == CHAR && ctype(s[1], C_ALPHX)) {
                 do {
                         s += 2;
-               } while (s[0] == CHAR && ksh_isalnux(s[1]));
-               if (aok && s[0] == CHAR && s[1] == '[') {
+               } while (s[0] == CHAR && ctype(s[1], C_ALNUX));
+               if (aok && s[0] == CHAR && ord(s[1]) == ord('[')) {
                         /* skip possible array de-reference */
                         const char *p = s;
                         char c;
@@ -1095,9 +1098,9 @@ skip_wdvarname(const char *s,
                                         break;
                                 c = p[1];
                                 p += 2;
-                               if (c == '[')
+                               if (ord(c) == ord('['))
                                         depth++;
-                               else if (c == ']' && --depth == 0) {
+                               else if (ord(c) == ord(']') && --depth == 0) {
                                         s = p;
                                         break;
                                 }
@@ -1307,8 +1310,7 @@ setspec(struct tbl *vp)
                 return;
  #endif
         case V_IFS:
-               setctypes(s = str_val(vp), C_IFS);
-               ifs0 = *s;
+               set_ifs(str_val(vp));
                 return;
         case V_PATH:
                 afree(path, APERM);
@@ -1436,8 +1438,7 @@ unsetspec(struct tbl *vp)
                 return;
  #endif
         case V_IFS:
-               setctypes(TC_IFSWS, C_IFS);
-               ifs0 = ' ';
+               set_ifs(TC_IFSWS);
                 break;
         case V_PATH:
                 afree(path, APERM);
@@ -1527,8 +1528,8 @@ array_ref_len(const char *cp)
         char c;
         int depth = 0;
  
-       while ((c = *s++) && (c != ']' || --depth))
-               if (c == '[')
+       while ((c = *s++) && (ord(c) != ord(']') || --depth))
+               if (ord(c) == ord('['))
                         depth++;
         if (!c)
                 return (0);
@@ -1600,17 +1601,18 @@ set_array(const char *var, bool reset, const char **vals)
         }
         while ((ccp = vals[i])) {
  #if 0 /* temporarily taken out due to regression */
-               if (*ccp == '[') {
+               if (ord(*ccp) == ord('[')) {
                         int level = 0;
  
                         while (*ccp) {
-                               if (*ccp == ']' && --level == 0)
+                               if (ord(*ccp) == ord(']') && --level == 0)
                                         break;
-                               if (*ccp == '[')
+                               if (ord(*ccp) == ord('['))
                                         ++level;
                                 ++ccp;
                         }
-                       if (*ccp == ']' && level == 0 && ccp[1] == '=') {
+                       if (ord(*ccp) == ord(']') && level == 0 &&
+                           ord(ccp[1]) == ord('=')) {
                                 strndupx(cp, vals[i] + 1, ccp - (vals[i] + 1),
                                     ATEMP);
                                 evaluate(substitute(cp, 0), (mksh_ari_t *)&j,
author	Elliott Hughes <enh@google.com>
	Fri, 22 Sep 2017 23:04:20 +0000 (16:04 -0700)
committer	Elliott Hughes <enh@google.com>
	Fri, 22 Sep 2017 23:21:10 +0000 (16:21 -0700)
Android.mk		patch \| blob \| history
Android.patch.txt	[new file with mode: 0644]	patch \| blob
src/Build.sh		patch \| blob \| history
src/check.pl		patch \| blob \| history
src/check.t		patch \| blob \| history
src/dot.mkshrc		patch \| blob \| history
src/edit.c		patch \| blob \| history
src/eval.c		patch \| blob \| history
src/exec.c		patch \| blob \| history
src/expr.c		patch \| blob \| history
src/funcs.c		patch \| blob \| history
src/histrap.c		patch \| blob \| history
src/jobs.c		patch \| blob \| history
src/lex.c		patch \| blob \| history
src/main.c		patch \| blob \| history
src/misc.c		patch \| blob \| history
src/mksh.1		patch \| blob \| history
src/os2.c		patch \| blob \| history
src/sh.h		patch \| blob \| history
src/shf.c		patch \| blob \| history
src/signames.inc	[deleted file]	patch \| blob \| history
src/syn.c		patch \| blob \| history
src/tree.c		patch \| blob \| history
src/var.c		patch \| blob \| history