xtensa: clean up word alignment macros in assembly code

author Max Filippov <jcmvbkbc@gmail.com>

Sun, 10 Dec 2017 05:21:35 +0000 (21:21 -0800)

committer Max Filippov <jcmvbkbc@gmail.com>

Sun, 10 Dec 2017 22:48:53 +0000 (14:48 -0800)
author Max Filippov <jcmvbkbc@gmail.com>
Sun, 10 Dec 2017 05:21:35 +0000 (21:21 -0800)
committer Max Filippov <jcmvbkbc@gmail.com>
Sun, 10 Dec 2017 22:48:53 +0000 (14:48 -0800)
diff --git a/arch/xtensa/include/asm/asmmacro.h b/arch/xtensa/include/asm/asmmacro.h

index d2a4415..7f2ae58 100644 (file)
--- a/arch/xtensa/include/asm/asmmacro.h
+++ b/arch/xtensa/include/asm/asmmacro.h
@@ -158,4 +158,37 @@
         .previous                               \
  97:
  
+
+/*
+ * Extract unaligned word that is split between two registers w0 and w1
+ * into r regardless of machine endianness. SAR must be loaded with the
+ * starting bit of the word (see __ssa8).
+ */
+
+       .macro __src_b  r, w0, w1
+#ifdef __XTENSA_EB__
+               src     \r, \w0, \w1
+#else
+               src     \r, \w1, \w0
+#endif
+       .endm
+
+/*
+ * Load 2 lowest address bits of r into SAR for __src_b to extract unaligned
+ * word starting at r from two registers loaded from consecutive aligned
+ * addresses covering r regardless of machine endianness.
+ *
+ *      r   0   1   2   3
+ * LE SAR   0   8  16  24
+ * BE SAR  32  24  16   8
+ */
+
+       .macro __ssa8   r
+#ifdef __XTENSA_EB__
+               ssa8b   \r
+#else
+               ssa8l   \r
+#endif
+       .endm
+
  #endif /* _XTENSA_ASMMACRO_H */
diff --git a/arch/xtensa/kernel/align.S b/arch/xtensa/kernel/align.S

index 24b3189..9301452 100644 (file)
--- a/arch/xtensa/kernel/align.S
+++ b/arch/xtensa/kernel/align.S
@@ -19,6 +19,7 @@
  #include <linux/linkage.h>
  #include <asm/current.h>
  #include <asm/asm-offsets.h>
+#include <asm/asmmacro.h>
  #include <asm/processor.h>
  
  #if XCHAL_UNALIGNED_LOAD_EXCEPTION || XCHAL_UNALIGNED_STORE_EXCEPTION
@@ -66,8 +67,6 @@
  #define        INSN_T          24
  #define        INSN_OP1        16
  
-.macro __src_b r, w0, w1;      src     \r, \w0, \w1;   .endm
-.macro __ssa8  r;              ssa8b   \r;             .endm
  .macro __ssa8r r;              ssa8l   \r;             .endm
  .macro __sh    r, s;           srl     \r, \s;         .endm
  .macro __sl    r, s;           sll     \r, \s;         .endm
@@ -81,8 +80,6 @@
  #define        INSN_T          4
  #define        INSN_OP1        12
  
-.macro __src_b r, w0, w1;      src     \r, \w1, \w0;   .endm
-.macro __ssa8  r;              ssa8l   \r;             .endm
  .macro __ssa8r r;              ssa8b   \r;             .endm
  .macro __sh    r, s;           sll     \r, \s;         .endm
  .macro __sl    r, s;           srl     \r, \s;         .endm
diff --git a/arch/xtensa/lib/memcopy.S b/arch/xtensa/lib/memcopy.S

index b1c219a..9bda748 100644 (file)
--- a/arch/xtensa/lib/memcopy.S
+++ b/arch/xtensa/lib/memcopy.S
@@ -10,22 +10,7 @@
   */
  
  #include <variant/core.h>
-
-       .macro  src_b   r, w0, w1
-#ifdef __XTENSA_EB__
-       src     \r, \w0, \w1
-#else
-       src     \r, \w1, \w0
-#endif
-       .endm
-
-       .macro  ssa8    r
-#ifdef __XTENSA_EB__
-       ssa8b   \r
-#else
-       ssa8l   \r
-#endif
-       .endm
+#include <asm/asmmacro.h>
  
  /*
   * void *memcpy(void *dst, const void *src, size_t len);
@@ -209,7 +194,7 @@ memcpy:
  .Lsrcunaligned:
         _beqz   a4, .Ldone      # avoid loading anything for zero-length copies
         # copy 16 bytes per iteration for word-aligned dst and unaligned src
-       ssa8    a3              # set shift amount from byte offset
+       __ssa8  a3              # set shift amount from byte offset
  
  /* set to 1 when running on ISS (simulator) with the
     lint or ferret client, or 0 to save a few cycles */
@@ -229,16 +214,16 @@ memcpy:
  .Loop2:
         l32i    a7, a3,  4
         l32i    a8, a3,  8
-       src_b   a6, a6, a7
+       __src_b a6, a6, a7
         s32i    a6, a5,  0
         l32i    a9, a3, 12
-       src_b   a7, a7, a8
+       __src_b a7, a7, a8
         s32i    a7, a5,  4
         l32i    a6, a3, 16
-       src_b   a8, a8, a9
+       __src_b a8, a8, a9
         s32i    a8, a5,  8
         addi    a3, a3, 16
-       src_b   a9, a9, a6
+       __src_b a9, a9, a6
         s32i    a9, a5, 12
         addi    a5, a5, 16
  #if !XCHAL_HAVE_LOOPS
@@ -249,10 +234,10 @@ memcpy:
         # copy 8 bytes
         l32i    a7, a3,  4
         l32i    a8, a3,  8
-       src_b   a6, a6, a7
+       __src_b a6, a6, a7
         s32i    a6, a5,  0
         addi    a3, a3,  8
-       src_b   a7, a7, a8
+       __src_b a7, a7, a8
         s32i    a7, a5,  4
         addi    a5, a5,  8
         mov     a6, a8
@@ -261,7 +246,7 @@ memcpy:
         # copy 4 bytes
         l32i    a7, a3,  4
         addi    a3, a3,  4
-       src_b   a6, a6, a7
+       __src_b a6, a6, a7
         s32i    a6, a5,  0
         addi    a5, a5,  4
         mov     a6, a7
@@ -485,7 +470,7 @@ memmove:
  .Lbacksrcunaligned:
         _beqz   a4, .Lbackdone  # avoid loading anything for zero-length copies
         # copy 16 bytes per iteration for word-aligned dst and unaligned src
-       ssa8    a3              # set shift amount from byte offset
+       __ssa8  a3              # set shift amount from byte offset
  #define SIM_CHECKS_ALIGNMENT   1       /* set to 1 when running on ISS with
                                          * the lint or ferret client, or 0
                                          * to save a few cycles */
@@ -506,15 +491,15 @@ memmove:
         l32i    a7, a3, 12
         l32i    a8, a3,  8
         addi    a5, a5, -16
-       src_b   a6, a7, a6
+       __src_b a6, a7, a6
         s32i    a6, a5, 12
         l32i    a9, a3,  4
-       src_b   a7, a8, a7
+       __src_b a7, a8, a7
         s32i    a7, a5,  8
         l32i    a6, a3,  0
-       src_b   a8, a9, a8
+       __src_b a8, a9, a8
         s32i    a8, a5,  4
-       src_b   a9, a6, a9
+       __src_b a9, a6, a9
         s32i    a9, a5,  0
  #if !XCHAL_HAVE_LOOPS
         bne     a3, a10, .backLoop2 # continue loop if a3:src != a10:src_start
@@ -526,9 +511,9 @@ memmove:
         l32i    a7, a3,  4
         l32i    a8, a3,  0
         addi    a5, a5, -8
-       src_b   a6, a7, a6
+       __src_b a6, a7, a6
         s32i    a6, a5,  4
-       src_b   a7, a8, a7
+       __src_b a7, a8, a7
         s32i    a7, a5,  0
         mov     a6, a8
  .Lback12:
@@ -537,7 +522,7 @@ memmove:
         addi    a3, a3, -4
         l32i    a7, a3,  0
         addi    a5, a5, -4
-       src_b   a6, a7, a6
+       __src_b a6, a7, a6
         s32i    a6, a5,  0
         mov     a6, a7
  .Lback13:
diff --git a/arch/xtensa/lib/usercopy.S b/arch/xtensa/lib/usercopy.S

index 4172b73..0959b6e 100644 (file)
--- a/arch/xtensa/lib/usercopy.S
+++ b/arch/xtensa/lib/usercopy.S
@@ -56,14 +56,6 @@
  #include <variant/core.h>
  #include <asm/asmmacro.h>
  
-#ifdef __XTENSA_EB__
-#define ALIGN(R, W0, W1) src   R, W0, W1
-#define SSA8(R)        ssa8b R
-#else
-#define ALIGN(R, W0, W1) src   R, W1, W0
-#define SSA8(R)        ssa8l R
-#endif
-
         .text
         .align  4
         .global __xtensa_copy_user
@@ -81,7 +73,7 @@ __xtensa_copy_user:
                                 # per iteration
         movi    a8, 3             # if source is also aligned,
         bnone   a3, a8, .Laligned # then use word copy
-       SSA8(   a3)             # set shift amount from byte offset
+       __ssa8  a3              # set shift amount from byte offset
         bnez    a4, .Lsrcunaligned
         movi    a2, 0           # return success for len==0
         retw
@@ -220,16 +212,16 @@ EX(10f)   l32i    a6, a3, 0       # load first word
  .Loop2:
  EX(10f)        l32i    a7, a3,  4
  EX(10f)        l32i    a8, a3,  8
-       ALIGN(  a6, a6, a7)
+       __src_b a6, a6, a7
  EX(10f)        s32i    a6, a5,  0
  EX(10f)        l32i    a9, a3, 12
-       ALIGN(  a7, a7, a8)
+       __src_b a7, a7, a8
  EX(10f)        s32i    a7, a5,  4
  EX(10f)        l32i    a6, a3, 16
-       ALIGN(  a8, a8, a9)
+       __src_b a8, a8, a9
  EX(10f)        s32i    a8, a5,  8
         addi    a3, a3, 16
-       ALIGN(  a9, a9, a6)
+       __src_b a9, a9, a6
  EX(10f)        s32i    a9, a5, 12
         addi    a5, a5, 16
  #if !XCHAL_HAVE_LOOPS
@@ -240,10 +232,10 @@ EX(10f)   s32i    a9, a5, 12
         # copy 8 bytes
  EX(10f)        l32i    a7, a3,  4
  EX(10f)        l32i    a8, a3,  8
-       ALIGN(  a6, a6, a7)
+       __src_b a6, a6, a7
  EX(10f)        s32i    a6, a5,  0
         addi    a3, a3,  8
-       ALIGN(  a7, a7, a8)
+       __src_b a7, a7, a8
  EX(10f)        s32i    a7, a5,  4
         addi    a5, a5,  8
         mov     a6, a8
@@ -252,7 +244,7 @@ EX(10f)     s32i    a7, a5,  4
         # copy 4 bytes
  EX(10f)        l32i    a7, a3,  4
         addi    a3, a3,  4
-       ALIGN(  a6, a6, a7)
+       __src_b a6, a6, a7
  EX(10f)        s32i    a6, a5,  0
         addi    a5, a5,  4
         mov     a6, a7
author	Max Filippov <jcmvbkbc@gmail.com>
	Sun, 10 Dec 2017 05:21:35 +0000 (21:21 -0800)
committer	Max Filippov <jcmvbkbc@gmail.com>
	Sun, 10 Dec 2017 22:48:53 +0000 (14:48 -0800)
arch/xtensa/include/asm/asmmacro.h		patch \| blob \| history
arch/xtensa/kernel/align.S		patch \| blob \| history
arch/xtensa/lib/memcopy.S		patch \| blob \| history
arch/xtensa/lib/usercopy.S		patch \| blob \| history