OSDN Git Service

Merge remote-tracking branch 'common/android-4.4' into android-4.4.y
authorDmitry Shmidt <dimitrysh@google.com>
Wed, 7 Sep 2016 21:37:52 +0000 (14:37 -0700)
committerDmitry Shmidt <dimitrysh@google.com>
Wed, 7 Sep 2016 21:37:52 +0000 (14:37 -0700)
48 files changed:
android/configs/android-base.cfg
arch/Kconfig
arch/arm/Kconfig
arch/arm/include/asm/uaccess.h
arch/arm/kernel/setup.c
arch/arm/kernel/vmlinux.lds.S
arch/arm64/Kconfig
arch/arm64/include/asm/uaccess.h
arch/arm64/kernel/arm64ksyms.c
arch/arm64/kernel/setup.c
arch/arm64/kernel/vmlinux.lds.S
arch/arm64/lib/copy_from_user.S
arch/arm64/lib/copy_to_user.S
arch/arm64/mm/mmu.c
arch/x86/Kconfig
arch/x86/include/asm/thread_info.h
arch/x86/include/asm/uaccess.h
arch/x86/include/asm/uaccess_32.h
arch/x86/include/asm/uaccess_64.h
block/blk-core.c
drivers/media/tuners/tuner-xc2028.c
drivers/mmc/core/core.c
drivers/mmc/core/host.c
drivers/mmc/core/host.h
drivers/scsi/ufs/ufshcd.c
drivers/scsi/ufs/ufshcd.h
drivers/tty/tty_ldisc.c
include/linux/blkdev.h
include/linux/mmc/core.h
include/linux/mmc/host.h
include/linux/mmzone.h
include/linux/slab.h
include/linux/slub_def.h
include/linux/thread_info.h
include/linux/uaccess.h
include/net/tcp.h
init/Kconfig
kernel/cpuset.c
kernel/rcu/sync.c
lib/strncpy_from_user.c
lib/strnlen_user.c
mm/Makefile
mm/slab.c
mm/slub.c
mm/usercopy.c [new file with mode: 0644]
net/netfilter/nfnetlink.c
net/netfilter/xt_qtaguid.c
security/Kconfig

index 6496bb3..34fa64a 100644 (file)
@@ -141,6 +141,7 @@ CONFIG_PROFILING=y
 CONFIG_QUOTA=y
 CONFIG_RTC_CLASS=y
 CONFIG_RT_GROUP_SCHED=y
+CONFIG_SECCOMP=y
 CONFIG_SECURITY=y
 CONFIG_SECURITY_NETWORK=y
 CONFIG_SECURITY_PERF_EVENTS_RESTRICT=y
index 31a318a..98f64ad 100644 (file)
@@ -423,6 +423,15 @@ config CC_STACKPROTECTOR_STRONG
 
 endchoice
 
+config HAVE_ARCH_WITHIN_STACK_FRAMES
+       bool
+       help
+         An architecture should select this if it can walk the kernel stack
+         frames to determine if an object is part of either the arguments
+         or local variables (i.e. that it excludes saved return addresses,
+         and similar) by implementing an inline arch_within_stack_frames(),
+         which is used by CONFIG_HARDENED_USERCOPY.
+
 config HAVE_CONTEXT_TRACKING
        bool
        help
index d3159ff..586134e 100644 (file)
@@ -33,6 +33,7 @@ config ARM
        select HARDIRQS_SW_RESEND
        select HAVE_ARCH_AUDITSYSCALL if (AEABI && !OABI_COMPAT)
        select HAVE_ARCH_BITREVERSE if (CPU_32v7M || CPU_32v7) && !CPU_32v6
+       select HAVE_ARCH_HARDENED_USERCOPY
        select HAVE_ARCH_JUMP_LABEL if !XIP_KERNEL && !CPU_ENDIAN_BE32
        select HAVE_ARCH_KGDB if !CPU_ENDIAN_BE32
        select HAVE_ARCH_MMAP_RND_BITS if MMU
index 35c9db8..7fb5919 100644 (file)
@@ -496,7 +496,10 @@ arm_copy_from_user(void *to, const void __user *from, unsigned long n);
 static inline unsigned long __must_check
 __copy_from_user(void *to, const void __user *from, unsigned long n)
 {
-       unsigned int __ua_flags = uaccess_save_and_enable();
+       unsigned int __ua_flags;
+
+       check_object_size(to, n, false);
+       __ua_flags = uaccess_save_and_enable();
        n = arm_copy_from_user(to, from, n);
        uaccess_restore(__ua_flags);
        return n;
@@ -511,11 +514,15 @@ static inline unsigned long __must_check
 __copy_to_user(void __user *to, const void *from, unsigned long n)
 {
 #ifndef CONFIG_UACCESS_WITH_MEMCPY
-       unsigned int __ua_flags = uaccess_save_and_enable();
+       unsigned int __ua_flags;
+
+       check_object_size(from, n, true);
+       __ua_flags = uaccess_save_and_enable();
        n = arm_copy_to_user(to, from, n);
        uaccess_restore(__ua_flags);
        return n;
 #else
+       check_object_size(from, n, true);
        return arm_copy_to_user(to, from, n);
 #endif
 }
index 20edd34..bf63b46 100644 (file)
@@ -772,7 +772,7 @@ static void __init request_standard_resources(const struct machine_desc *mdesc)
        struct resource *res;
 
        kernel_code.start   = virt_to_phys(_text);
-       kernel_code.end     = virt_to_phys(_etext - 1);
+       kernel_code.end     = virt_to_phys(__init_begin - 1);
        kernel_data.start   = virt_to_phys(_sdata);
        kernel_data.end     = virt_to_phys(_end - 1);
 
index 8b60fde..be2ab6d 100644 (file)
@@ -120,6 +120,8 @@ SECTIONS
 #ifdef CONFIG_DEBUG_RODATA
        . = ALIGN(1<<SECTION_SHIFT);
 #endif
+       _etext = .;                     /* End of text section */
+
        RO_DATA(PAGE_SIZE)
 
        . = ALIGN(4);
@@ -150,8 +152,6 @@ SECTIONS
 
        NOTES
 
-       _etext = .;                     /* End of text and rodata section */
-
 #ifndef CONFIG_XIP_KERNEL
 # ifdef CONFIG_ARM_KERNMEM_PERMS
        . = ALIGN(1<<SECTION_SHIFT);
index 579ab68..3ad9d9e 100644 (file)
@@ -48,6 +48,7 @@ config ARM64
        select HAVE_ALIGNED_STRUCT_PAGE if SLUB
        select HAVE_ARCH_AUDITSYSCALL
        select HAVE_ARCH_BITREVERSE
+       select HAVE_ARCH_HARDENED_USERCOPY
        select HAVE_ARCH_JUMP_LABEL
        select HAVE_ARCH_KASAN if SPARSEMEM_VMEMMAP && !(ARM64_16K_PAGES && ARM64_VA_BITS_48)
        select HAVE_ARCH_KGDB
index b2ede96..2a44b97 100644 (file)
@@ -247,24 +247,39 @@ do {                                                                      \
                -EFAULT;                                                \
 })
 
-extern unsigned long __must_check __copy_from_user(void *to, const void __user *from, unsigned long n);
-extern unsigned long __must_check __copy_to_user(void __user *to, const void *from, unsigned long n);
+extern unsigned long __must_check __arch_copy_from_user(void *to, const void __user *from, unsigned long n);
+extern unsigned long __must_check __arch_copy_to_user(void __user *to, const void *from, unsigned long n);
 extern unsigned long __must_check __copy_in_user(void __user *to, const void __user *from, unsigned long n);
 extern unsigned long __must_check __clear_user(void __user *addr, unsigned long n);
 
+static inline unsigned long __must_check __copy_from_user(void *to, const void __user *from, unsigned long n)
+{
+       check_object_size(to, n, false);
+       return __arch_copy_from_user(to, from, n);
+}
+
+static inline unsigned long __must_check __copy_to_user(void __user *to, const void *from, unsigned long n)
+{
+       check_object_size(from, n, true);
+       return __arch_copy_to_user(to, from, n);
+}
+
 static inline unsigned long __must_check copy_from_user(void *to, const void __user *from, unsigned long n)
 {
-       if (access_ok(VERIFY_READ, from, n))
-               n = __copy_from_user(to, from, n);
-       else /* security hole - plug it */
+       if (access_ok(VERIFY_READ, from, n)) {
+               check_object_size(to, n, false);
+               n = __arch_copy_from_user(to, from, n);
+       } else /* security hole - plug it */
                memset(to, 0, n);
        return n;
 }
 
 static inline unsigned long __must_check copy_to_user(void __user *to, const void *from, unsigned long n)
 {
-       if (access_ok(VERIFY_WRITE, to, n))
-               n = __copy_to_user(to, from, n);
+       if (access_ok(VERIFY_WRITE, to, n)) {
+               check_object_size(from, n, true);
+               n = __arch_copy_to_user(to, from, n);
+       }
        return n;
 }
 
index 3b6d8cc..c654df0 100644 (file)
@@ -33,8 +33,8 @@ EXPORT_SYMBOL(copy_page);
 EXPORT_SYMBOL(clear_page);
 
        /* user mem (segment) */
-EXPORT_SYMBOL(__copy_from_user);
-EXPORT_SYMBOL(__copy_to_user);
+EXPORT_SYMBOL(__arch_copy_from_user);
+EXPORT_SYMBOL(__arch_copy_to_user);
 EXPORT_SYMBOL(__clear_user);
 EXPORT_SYMBOL(__copy_in_user);
 
index 8119479..7f0e722 100644 (file)
@@ -201,7 +201,7 @@ static void __init request_standard_resources(void)
        struct resource *res;
 
        kernel_code.start   = virt_to_phys(_text);
-       kernel_code.end     = virt_to_phys(_etext - 1);
+       kernel_code.end     = virt_to_phys(__init_begin - 1);
        kernel_data.start   = virt_to_phys(_sdata);
        kernel_data.end     = virt_to_phys(_end - 1);
 
index 71426a7..f472809 100644 (file)
@@ -114,11 +114,12 @@ SECTIONS
        }
 
        ALIGN_DEBUG_RO
+       _etext = .;                     /* End of text section */
+
        RO_DATA(PAGE_SIZE)
        EXCEPTION_TABLE(8)
        NOTES
        ALIGN_DEBUG_RO
-       _etext = .;                     /* End of text and rodata section */
 
        ALIGN_DEBUG_RO_MIN(PAGE_SIZE)
        __init_begin = .;
index 4699cd7..281e75d 100644 (file)
@@ -66,7 +66,7 @@
        .endm
 
 end    .req    x5
-ENTRY(__copy_from_user)
+ENTRY(__arch_copy_from_user)
 ALTERNATIVE("nop", __stringify(SET_PSTATE_PAN(0)), ARM64_HAS_PAN, \
            CONFIG_ARM64_PAN)
        add     end, x0, x2
@@ -75,7 +75,7 @@ ALTERNATIVE("nop", __stringify(SET_PSTATE_PAN(1)), ARM64_HAS_PAN, \
            CONFIG_ARM64_PAN)
        mov     x0, #0                          // Nothing to copy
        ret
-ENDPROC(__copy_from_user)
+ENDPROC(__arch_copy_from_user)
 
        .section .fixup,"ax"
        .align  2
index 7512bbb..db4d187 100644 (file)
@@ -65,7 +65,7 @@
        .endm
 
 end    .req    x5
-ENTRY(__copy_to_user)
+ENTRY(__arch_copy_to_user)
 ALTERNATIVE("nop", __stringify(SET_PSTATE_PAN(0)), ARM64_HAS_PAN, \
            CONFIG_ARM64_PAN)
        add     end, x0, x2
@@ -74,7 +74,7 @@ ALTERNATIVE("nop", __stringify(SET_PSTATE_PAN(1)), ARM64_HAS_PAN, \
            CONFIG_ARM64_PAN)
        mov     x0, #0
        ret
-ENDPROC(__copy_to_user)
+ENDPROC(__arch_copy_to_user)
 
        .section .fixup,"ax"
        .align  2
index 653735a..a254a53 100644 (file)
@@ -337,7 +337,6 @@ static void __init __map_memblock(phys_addr_t start, phys_addr_t end)
                                end - kernel_x_end,
                                PAGE_KERNEL);
        }
-
 }
 #else
 static void __init __map_memblock(phys_addr_t start, phys_addr_t end)
@@ -425,7 +424,7 @@ static void __init fixup_executable(void)
 void mark_rodata_ro(void)
 {
        create_mapping_late(__pa(_stext), (unsigned long)_stext,
-                               (unsigned long)_etext - (unsigned long)_stext,
+                               (unsigned long)__init_begin - (unsigned long)_stext,
                                PAGE_KERNEL_ROX);
 
 }
index be2395b..2062067 100644 (file)
@@ -79,6 +79,7 @@ config X86
        select HAVE_ALIGNED_STRUCT_PAGE         if SLUB
        select HAVE_AOUT                        if X86_32
        select HAVE_ARCH_AUDITSYSCALL
+       select HAVE_ARCH_HARDENED_USERCOPY
        select HAVE_ARCH_HUGE_VMAP              if X86_64 || X86_PAE
        select HAVE_ARCH_JUMP_LABEL
        select HAVE_ARCH_KASAN                  if X86_64 && SPARSEMEM_VMEMMAP
@@ -88,6 +89,7 @@ config X86
        select HAVE_ARCH_SOFT_DIRTY             if X86_64
        select HAVE_ARCH_TRACEHOOK
        select HAVE_ARCH_TRANSPARENT_HUGEPAGE
+       select HAVE_ARCH_WITHIN_STACK_FRAMES
        select HAVE_BPF_JIT                     if X86_64
        select HAVE_CC_STACKPROTECTOR
        select HAVE_CMPXCHG_DOUBLE
index c7b5510..0c977fc 100644 (file)
@@ -177,6 +177,50 @@ static inline unsigned long current_stack_pointer(void)
        return sp;
 }
 
+/*
+ * Walks up the stack frames to make sure that the specified object is
+ * entirely contained by a single stack frame.
+ *
+ * Returns:
+ *              1 if within a frame
+ *             -1 if placed across a frame boundary (or outside stack)
+ *              0 unable to determine (no frame pointers, etc)
+ */
+static inline int arch_within_stack_frames(const void * const stack,
+                                          const void * const stackend,
+                                          const void *obj, unsigned long len)
+{
+#if defined(CONFIG_FRAME_POINTER)
+       const void *frame = NULL;
+       const void *oldframe;
+
+       oldframe = __builtin_frame_address(1);
+       if (oldframe)
+               frame = __builtin_frame_address(2);
+       /*
+        * low ----------------------------------------------> high
+        * [saved bp][saved ip][args][local vars][saved bp][saved ip]
+        *                     ^----------------^
+        *               allow copies only within here
+        */
+       while (stack <= frame && frame < stackend) {
+               /*
+                * If obj + len extends past the last frame, this
+                * check won't pass and the next frame will be 0,
+                * causing us to bail out and correctly report
+                * the copy as invalid.
+                */
+               if (obj + len <= frame)
+                       return obj >= oldframe + 2 * sizeof(void *) ? 1 : -1;
+               oldframe = frame;
+               frame = *(const void * const *)frame;
+       }
+       return -1;
+#else
+       return 0;
+#endif
+}
+
 #else /* !__ASSEMBLY__ */
 
 #ifdef CONFIG_X86_64
index 09b1b0a..be439e2 100644 (file)
@@ -134,6 +134,9 @@ extern int __get_user_4(void);
 extern int __get_user_8(void);
 extern int __get_user_bad(void);
 
+#define __uaccess_begin() stac()
+#define __uaccess_end()   clac()
+
 /*
  * This is a type: either unsigned long, if the argument fits into
  * that type, or otherwise unsigned long long.
@@ -193,10 +196,10 @@ __typeof__(__builtin_choose_expr(sizeof(x) > sizeof(0UL), 0ULL, 0UL))
 
 #ifdef CONFIG_X86_32
 #define __put_user_asm_u64(x, addr, err, errret)                       \
-       asm volatile(ASM_STAC "\n"                                      \
+       asm volatile("\n"                                               \
                     "1:        movl %%eax,0(%2)\n"                     \
                     "2:        movl %%edx,4(%2)\n"                     \
-                    "3: " ASM_CLAC "\n"                                \
+                    "3:"                                               \
                     ".section .fixup,\"ax\"\n"                         \
                     "4:        movl %3,%0\n"                           \
                     "  jmp 3b\n"                                       \
@@ -207,10 +210,10 @@ __typeof__(__builtin_choose_expr(sizeof(x) > sizeof(0UL), 0ULL, 0UL))
                     : "A" (x), "r" (addr), "i" (errret), "0" (err))
 
 #define __put_user_asm_ex_u64(x, addr)                                 \
-       asm volatile(ASM_STAC "\n"                                      \
+       asm volatile("\n"                                               \
                     "1:        movl %%eax,0(%1)\n"                     \
                     "2:        movl %%edx,4(%1)\n"                     \
-                    "3: " ASM_CLAC "\n"                                \
+                    "3:"                                               \
                     _ASM_EXTABLE_EX(1b, 2b)                            \
                     _ASM_EXTABLE_EX(2b, 3b)                            \
                     : : "A" (x), "r" (addr))
@@ -304,6 +307,10 @@ do {                                                                       \
        }                                                               \
 } while (0)
 
+/*
+ * This doesn't do __uaccess_begin/end - the exception handling
+ * around it must do that.
+ */
 #define __put_user_size_ex(x, ptr, size)                               \
 do {                                                                   \
        __chk_user_ptr(ptr);                                            \
@@ -358,9 +365,9 @@ do {                                                                        \
 } while (0)
 
 #define __get_user_asm(x, addr, err, itype, rtype, ltype, errret)      \
-       asm volatile(ASM_STAC "\n"                                      \
+       asm volatile("\n"                                               \
                     "1:        mov"itype" %2,%"rtype"1\n"              \
-                    "2: " ASM_CLAC "\n"                                \
+                    "2:\n"                                             \
                     ".section .fixup,\"ax\"\n"                         \
                     "3:        mov %3,%0\n"                            \
                     "  xor"itype" %"rtype"1,%"rtype"1\n"               \
@@ -370,6 +377,10 @@ do {                                                                       \
                     : "=r" (err), ltype(x)                             \
                     : "m" (__m(addr)), "i" (errret), "0" (err))
 
+/*
+ * This doesn't do __uaccess_begin/end - the exception handling
+ * around it must do that.
+ */
 #define __get_user_size_ex(x, ptr, size)                               \
 do {                                                                   \
        __chk_user_ptr(ptr);                                            \
@@ -400,7 +411,9 @@ do {                                                                        \
 #define __put_user_nocheck(x, ptr, size)                       \
 ({                                                             \
        int __pu_err;                                           \
+       __uaccess_begin();                                      \
        __put_user_size((x), (ptr), (size), __pu_err, -EFAULT); \
+       __uaccess_end();                                        \
        __builtin_expect(__pu_err, 0);                          \
 })
 
@@ -408,7 +421,9 @@ do {                                                                        \
 ({                                                                     \
        int __gu_err;                                                   \
        unsigned long __gu_val;                                         \
+       __uaccess_begin();                                              \
        __get_user_size(__gu_val, (ptr), (size), __gu_err, -EFAULT);    \
+       __uaccess_end();                                                \
        (x) = (__force __typeof__(*(ptr)))__gu_val;                     \
        __builtin_expect(__gu_err, 0);                                  \
 })
@@ -423,9 +438,9 @@ struct __large_struct { unsigned long buf[100]; };
  * aliasing issues.
  */
 #define __put_user_asm(x, addr, err, itype, rtype, ltype, errret)      \
-       asm volatile(ASM_STAC "\n"                                      \
+       asm volatile("\n"                                               \
                     "1:        mov"itype" %"rtype"1,%2\n"              \
-                    "2: " ASM_CLAC "\n"                                \
+                    "2:\n"                                             \
                     ".section .fixup,\"ax\"\n"                         \
                     "3:        mov %3,%0\n"                            \
                     "  jmp 2b\n"                                       \
@@ -445,11 +460,11 @@ struct __large_struct { unsigned long buf[100]; };
  */
 #define uaccess_try    do {                                            \
        current_thread_info()->uaccess_err = 0;                         \
-       stac();                                                         \
+       __uaccess_begin();                                              \
        barrier();
 
 #define uaccess_catch(err)                                             \
-       clac();                                                         \
+       __uaccess_end();                                                \
        (err) |= (current_thread_info()->uaccess_err ? -EFAULT : 0);    \
 } while (0)
 
@@ -547,12 +562,13 @@ extern void __cmpxchg_wrong_size(void)
        __typeof__(ptr) __uval = (uval);                                \
        __typeof__(*(ptr)) __old = (old);                               \
        __typeof__(*(ptr)) __new = (new);                               \
+       __uaccess_begin();                                              \
        switch (size) {                                                 \
        case 1:                                                         \
        {                                                               \
-               asm volatile("\t" ASM_STAC "\n"                         \
+               asm volatile("\n"                                       \
                        "1:\t" LOCK_PREFIX "cmpxchgb %4, %2\n"          \
-                       "2:\t" ASM_CLAC "\n"                            \
+                       "2:\n"                                          \
                        "\t.section .fixup, \"ax\"\n"                   \
                        "3:\tmov     %3, %0\n"                          \
                        "\tjmp     2b\n"                                \
@@ -566,9 +582,9 @@ extern void __cmpxchg_wrong_size(void)
        }                                                               \
        case 2:                                                         \
        {                                                               \
-               asm volatile("\t" ASM_STAC "\n"                         \
+               asm volatile("\n"                                       \
                        "1:\t" LOCK_PREFIX "cmpxchgw %4, %2\n"          \
-                       "2:\t" ASM_CLAC "\n"                            \
+                       "2:\n"                                          \
                        "\t.section .fixup, \"ax\"\n"                   \
                        "3:\tmov     %3, %0\n"                          \
                        "\tjmp     2b\n"                                \
@@ -582,9 +598,9 @@ extern void __cmpxchg_wrong_size(void)
        }                                                               \
        case 4:                                                         \
        {                                                               \
-               asm volatile("\t" ASM_STAC "\n"                         \
+               asm volatile("\n"                                       \
                        "1:\t" LOCK_PREFIX "cmpxchgl %4, %2\n"          \
-                       "2:\t" ASM_CLAC "\n"                            \
+                       "2:\n"                                          \
                        "\t.section .fixup, \"ax\"\n"                   \
                        "3:\tmov     %3, %0\n"                          \
                        "\tjmp     2b\n"                                \
@@ -601,9 +617,9 @@ extern void __cmpxchg_wrong_size(void)
                if (!IS_ENABLED(CONFIG_X86_64))                         \
                        __cmpxchg_wrong_size();                         \
                                                                        \
-               asm volatile("\t" ASM_STAC "\n"                         \
+               asm volatile("\n"                                       \
                        "1:\t" LOCK_PREFIX "cmpxchgq %4, %2\n"          \
-                       "2:\t" ASM_CLAC "\n"                            \
+                       "2:\n"                                          \
                        "\t.section .fixup, \"ax\"\n"                   \
                        "3:\tmov     %3, %0\n"                          \
                        "\tjmp     2b\n"                                \
@@ -618,6 +634,7 @@ extern void __cmpxchg_wrong_size(void)
        default:                                                        \
                __cmpxchg_wrong_size();                                 \
        }                                                               \
+       __uaccess_end();                                                \
        *__uval = __old;                                                \
        __ret;                                                          \
 })
@@ -714,9 +731,10 @@ copy_from_user(void *to, const void __user *from, unsigned long n)
         * case, and do only runtime checking for non-constant sizes.
         */
 
-       if (likely(sz < 0 || sz >= n))
+       if (likely(sz < 0 || sz >= n)) {
+               check_object_size(to, n, false);
                n = _copy_from_user(to, from, n);
-       else if(__builtin_constant_p(n))
+       } else if (__builtin_constant_p(n))
                copy_from_user_overflow();
        else
                __copy_from_user_overflow(sz, n);
@@ -732,9 +750,10 @@ copy_to_user(void __user *to, const void *from, unsigned long n)
        might_fault();
 
        /* See the comment in copy_from_user() above. */
-       if (likely(sz < 0 || sz >= n))
+       if (likely(sz < 0 || sz >= n)) {
+               check_object_size(from, n, true);
                n = _copy_to_user(to, from, n);
-       else if(__builtin_constant_p(n))
+       } else if (__builtin_constant_p(n))
                copy_to_user_overflow();
        else
                __copy_to_user_overflow(sz, n);
@@ -745,5 +764,30 @@ copy_to_user(void __user *to, const void *from, unsigned long n)
 #undef __copy_from_user_overflow
 #undef __copy_to_user_overflow
 
+/*
+ * The "unsafe" user accesses aren't really "unsafe", but the naming
+ * is a big fat warning: you have to not only do the access_ok()
+ * checking before using them, but you have to surround them with the
+ * user_access_begin/end() pair.
+ */
+#define user_access_begin()    __uaccess_begin()
+#define user_access_end()      __uaccess_end()
+
+#define unsafe_put_user(x, ptr, err_label)                                     \
+do {                                                                           \
+       int __pu_err;                                                           \
+       __put_user_size((x), (ptr), sizeof(*(ptr)), __pu_err, -EFAULT);         \
+       if (unlikely(__pu_err)) goto err_label;                                 \
+} while (0)
+
+#define unsafe_get_user(x, ptr, err_label)                                     \
+do {                                                                           \
+       int __gu_err;                                                           \
+       unsigned long __gu_val;                                                 \
+       __get_user_size(__gu_val, (ptr), sizeof(*(ptr)), __gu_err, -EFAULT);    \
+       (x) = (__force __typeof__(*(ptr)))__gu_val;                             \
+       if (unlikely(__gu_err)) goto err_label;                                 \
+} while (0)
+
 #endif /* _ASM_X86_UACCESS_H */
 
index f5dcb52..2ffc918 100644 (file)
@@ -43,6 +43,7 @@ unsigned long __must_check __copy_from_user_ll_nocache_nozero
 static __always_inline unsigned long __must_check
 __copy_to_user_inatomic(void __user *to, const void *from, unsigned long n)
 {
+       check_object_size(from, n, true);
        if (__builtin_constant_p(n)) {
                unsigned long ret;
 
@@ -143,6 +144,7 @@ static __always_inline unsigned long
 __copy_from_user(void *to, const void __user *from, unsigned long n)
 {
        might_fault();
+       check_object_size(to, n, false);
        if (__builtin_constant_p(n)) {
                unsigned long ret;
 
index f2f9b39..2957c82 100644 (file)
@@ -53,38 +53,53 @@ int __copy_from_user_nocheck(void *dst, const void __user *src, unsigned size)
 {
        int ret = 0;
 
+       check_object_size(dst, size, false);
        if (!__builtin_constant_p(size))
                return copy_user_generic(dst, (__force void *)src, size);
        switch (size) {
-       case 1:__get_user_asm(*(u8 *)dst, (u8 __user *)src,
+       case 1:
+               __uaccess_begin();
+               __get_user_asm(*(u8 *)dst, (u8 __user *)src,
                              ret, "b", "b", "=q", 1);
+               __uaccess_end();
                return ret;
-       case 2:__get_user_asm(*(u16 *)dst, (u16 __user *)src,
+       case 2:
+               __uaccess_begin();
+               __get_user_asm(*(u16 *)dst, (u16 __user *)src,
                              ret, "w", "w", "=r", 2);
+               __uaccess_end();
                return ret;
-       case 4:__get_user_asm(*(u32 *)dst, (u32 __user *)src,
+       case 4:
+               __uaccess_begin();
+               __get_user_asm(*(u32 *)dst, (u32 __user *)src,
                              ret, "l", "k", "=r", 4);
+               __uaccess_end();
                return ret;
-       case 8:__get_user_asm(*(u64 *)dst, (u64 __user *)src,
+       case 8:
+               __uaccess_begin();
+               __get_user_asm(*(u64 *)dst, (u64 __user *)src,
                              ret, "q", "", "=r", 8);
+               __uaccess_end();
                return ret;
        case 10:
+               __uaccess_begin();
                __get_user_asm(*(u64 *)dst, (u64 __user *)src,
                               ret, "q", "", "=r", 10);
-               if (unlikely(ret))
-                       return ret;
-               __get_user_asm(*(u16 *)(8 + (char *)dst),
-                              (u16 __user *)(8 + (char __user *)src),
-                              ret, "w", "w", "=r", 2);
+               if (likely(!ret))
+                       __get_user_asm(*(u16 *)(8 + (char *)dst),
+                                      (u16 __user *)(8 + (char __user *)src),
+                                      ret, "w", "w", "=r", 2);
+               __uaccess_end();
                return ret;
        case 16:
+               __uaccess_begin();
                __get_user_asm(*(u64 *)dst, (u64 __user *)src,
                               ret, "q", "", "=r", 16);
-               if (unlikely(ret))
-                       return ret;
-               __get_user_asm(*(u64 *)(8 + (char *)dst),
-                              (u64 __user *)(8 + (char __user *)src),
-                              ret, "q", "", "=r", 8);
+               if (likely(!ret))
+                       __get_user_asm(*(u64 *)(8 + (char *)dst),
+                                      (u64 __user *)(8 + (char __user *)src),
+                                      ret, "q", "", "=r", 8);
+               __uaccess_end();
                return ret;
        default:
                return copy_user_generic(dst, (__force void *)src, size);
@@ -103,38 +118,55 @@ int __copy_to_user_nocheck(void __user *dst, const void *src, unsigned size)
 {
        int ret = 0;
 
+       check_object_size(src, size, true);
        if (!__builtin_constant_p(size))
                return copy_user_generic((__force void *)dst, src, size);
        switch (size) {
-       case 1:__put_user_asm(*(u8 *)src, (u8 __user *)dst,
+       case 1:
+               __uaccess_begin();
+               __put_user_asm(*(u8 *)src, (u8 __user *)dst,
                              ret, "b", "b", "iq", 1);
+               __uaccess_end();
                return ret;
-       case 2:__put_user_asm(*(u16 *)src, (u16 __user *)dst,
+       case 2:
+               __uaccess_begin();
+               __put_user_asm(*(u16 *)src, (u16 __user *)dst,
                              ret, "w", "w", "ir", 2);
+               __uaccess_end();
                return ret;
-       case 4:__put_user_asm(*(u32 *)src, (u32 __user *)dst,
+       case 4:
+               __uaccess_begin();
+               __put_user_asm(*(u32 *)src, (u32 __user *)dst,
                              ret, "l", "k", "ir", 4);
+               __uaccess_end();
                return ret;
-       case 8:__put_user_asm(*(u64 *)src, (u64 __user *)dst,
+       case 8:
+               __uaccess_begin();
+               __put_user_asm(*(u64 *)src, (u64 __user *)dst,
                              ret, "q", "", "er", 8);
+               __uaccess_end();
                return ret;
        case 10:
+               __uaccess_begin();
                __put_user_asm(*(u64 *)src, (u64 __user *)dst,
                               ret, "q", "", "er", 10);
-               if (unlikely(ret))
-                       return ret;
-               asm("":::"memory");
-               __put_user_asm(4[(u16 *)src], 4 + (u16 __user *)dst,
-                              ret, "w", "w", "ir", 2);
+               if (likely(!ret)) {
+                       asm("":::"memory");
+                       __put_user_asm(4[(u16 *)src], 4 + (u16 __user *)dst,
+                                      ret, "w", "w", "ir", 2);
+               }
+               __uaccess_end();
                return ret;
        case 16:
+               __uaccess_begin();
                __put_user_asm(*(u64 *)src, (u64 __user *)dst,
                               ret, "q", "", "er", 16);
-               if (unlikely(ret))
-                       return ret;
-               asm("":::"memory");
-               __put_user_asm(1[(u64 *)src], 1 + (u64 __user *)dst,
-                              ret, "q", "", "er", 8);
+               if (likely(!ret)) {
+                       asm("":::"memory");
+                       __put_user_asm(1[(u64 *)src], 1 + (u64 __user *)dst,
+                                      ret, "q", "", "er", 8);
+               }
+               __uaccess_end();
                return ret;
        default:
                return copy_user_generic((__force void *)dst, src, size);
@@ -160,39 +192,47 @@ int __copy_in_user(void __user *dst, const void __user *src, unsigned size)
        switch (size) {
        case 1: {
                u8 tmp;
+               __uaccess_begin();
                __get_user_asm(tmp, (u8 __user *)src,
                               ret, "b", "b", "=q", 1);
                if (likely(!ret))
                        __put_user_asm(tmp, (u8 __user *)dst,
                                       ret, "b", "b", "iq", 1);
+               __uaccess_end();
                return ret;
        }
        case 2: {
                u16 tmp;
+               __uaccess_begin();
                __get_user_asm(tmp, (u16 __user *)src,
                               ret, "w", "w", "=r", 2);
                if (likely(!ret))
                        __put_user_asm(tmp, (u16 __user *)dst,
                                       ret, "w", "w", "ir", 2);
+               __uaccess_end();
                return ret;
        }
 
        case 4: {
                u32 tmp;
+               __uaccess_begin();
                __get_user_asm(tmp, (u32 __user *)src,
                               ret, "l", "k", "=r", 4);
                if (likely(!ret))
                        __put_user_asm(tmp, (u32 __user *)dst,
                                       ret, "l", "k", "ir", 4);
+               __uaccess_end();
                return ret;
        }
        case 8: {
                u64 tmp;
+               __uaccess_begin();
                __get_user_asm(tmp, (u64 __user *)src,
                               ret, "q", "", "=r", 8);
                if (likely(!ret))
                        __put_user_asm(tmp, (u64 __user *)dst,
                                       ret, "q", "", "er", 8);
+               __uaccess_end();
                return ret;
        }
        default:
index f8e64ca..b20ada4 100644 (file)
@@ -40,6 +40,8 @@
 #include "blk.h"
 #include "blk-mq.h"
 
+#include <linux/math64.h>
+
 EXPORT_TRACEPOINT_SYMBOL_GPL(block_bio_remap);
 EXPORT_TRACEPOINT_SYMBOL_GPL(block_rq_remap);
 EXPORT_TRACEPOINT_SYMBOL_GPL(block_bio_complete);
@@ -3539,3 +3541,83 @@ int __init blk_dev_init(void)
 
        return 0;
 }
+
+/*
+ * Blk IO latency support. We want this to be as cheap as possible, so doing
+ * this lockless (and avoiding atomics), a few off by a few errors in this
+ * code is not harmful, and we don't want to do anything that is
+ * perf-impactful.
+ * TODO : If necessary, we can make the histograms per-cpu and aggregate
+ * them when printing them out.
+ */
+void
+blk_zero_latency_hist(struct io_latency_state *s)
+{
+       memset(s->latency_y_axis_read, 0,
+              sizeof(s->latency_y_axis_read));
+       memset(s->latency_y_axis_write, 0,
+              sizeof(s->latency_y_axis_write));
+       s->latency_reads_elems = 0;
+       s->latency_writes_elems = 0;
+}
+
+ssize_t
+blk_latency_hist_show(struct io_latency_state *s, char *buf)
+{
+       int i;
+       int bytes_written = 0;
+       u_int64_t num_elem, elem;
+       int pct;
+
+       num_elem = s->latency_reads_elems;
+       if (num_elem > 0) {
+               bytes_written += scnprintf(buf + bytes_written,
+                          PAGE_SIZE - bytes_written,
+                          "IO svc_time Read Latency Histogram (n = %llu):\n",
+                          num_elem);
+               for (i = 0;
+                    i < ARRAY_SIZE(latency_x_axis_us);
+                    i++) {
+                       elem = s->latency_y_axis_read[i];
+                       pct = div64_u64(elem * 100, num_elem);
+                       bytes_written += scnprintf(buf + bytes_written,
+                                                  PAGE_SIZE - bytes_written,
+                                                  "\t< %5lluus%15llu%15d%%\n",
+                                                  latency_x_axis_us[i],
+                                                  elem, pct);
+               }
+               /* Last element in y-axis table is overflow */
+               elem = s->latency_y_axis_read[i];
+               pct = div64_u64(elem * 100, num_elem);
+               bytes_written += scnprintf(buf + bytes_written,
+                                          PAGE_SIZE - bytes_written,
+                                          "\t> %5dms%15llu%15d%%\n", 10,
+                                          elem, pct);
+       }
+       num_elem = s->latency_writes_elems;
+       if (num_elem > 0) {
+               bytes_written += scnprintf(buf + bytes_written,
+                          PAGE_SIZE - bytes_written,
+                          "IO svc_time Write Latency Histogram (n = %llu):\n",
+                          num_elem);
+               for (i = 0;
+                    i < ARRAY_SIZE(latency_x_axis_us);
+                    i++) {
+                       elem = s->latency_y_axis_write[i];
+                       pct = div64_u64(elem * 100, num_elem);
+                       bytes_written += scnprintf(buf + bytes_written,
+                                                  PAGE_SIZE - bytes_written,
+                                                  "\t< %5lluus%15llu%15d%%\n",
+                                                  latency_x_axis_us[i],
+                                                  elem, pct);
+               }
+               /* Last element in y-axis table is overflow */
+               elem = s->latency_y_axis_write[i];
+               pct = div64_u64(elem * 100, num_elem);
+               bytes_written += scnprintf(buf + bytes_written,
+                                          PAGE_SIZE - bytes_written,
+                                          "\t> %5dms%15llu%15d%%\n", 10,
+                                          elem, pct);
+       }
+       return bytes_written;
+}
index 4e941f0..317ef63 100644 (file)
@@ -1403,11 +1403,14 @@ static int xc2028_set_config(struct dvb_frontend *fe, void *priv_cfg)
         * in order to avoid troubles during device release.
         */
        kfree(priv->ctrl.fname);
+       priv->ctrl.fname = NULL;
        memcpy(&priv->ctrl, p, sizeof(priv->ctrl));
        if (p->fname) {
                priv->ctrl.fname = kstrdup(p->fname, GFP_KERNEL);
-               if (priv->ctrl.fname == NULL)
+               if (priv->ctrl.fname == NULL) {
                        rc = -ENOMEM;
+                       goto unlock;
+               }
        }
 
        /*
@@ -1439,6 +1442,7 @@ static int xc2028_set_config(struct dvb_frontend *fe, void *priv_cfg)
                } else
                        priv->state = XC2028_WAITING_FIRMWARE;
        }
+unlock:
        mutex_unlock(&priv->lock);
 
        return rc;
index 4df0c68..1689075 100644 (file)
@@ -183,6 +183,17 @@ void mmc_request_done(struct mmc_host *host, struct mmc_request *mrq)
                        pr_debug("%s:     %d bytes transferred: %d\n",
                                mmc_hostname(host),
                                mrq->data->bytes_xfered, mrq->data->error);
+                       if (mrq->lat_hist_enabled) {
+                               ktime_t completion;
+                               u_int64_t delta_us;
+
+                               completion = ktime_get();
+                               delta_us = ktime_us_delta(completion,
+                                                         mrq->io_start);
+                               blk_update_latency_hist(&host->io_lat_s,
+                                       (mrq->data->flags & MMC_DATA_READ),
+                                       delta_us);
+                       }
                        trace_mmc_blk_rw_end(cmd->opcode, cmd->arg, mrq->data);
                }
 
@@ -627,6 +638,11 @@ struct mmc_async_req *mmc_start_req(struct mmc_host *host,
        }
 
        if (!err && areq) {
+               if (host->latency_hist_enabled) {
+                       areq->mrq->io_start = ktime_get();
+                       areq->mrq->lat_hist_enabled = 1;
+               } else
+                       areq->mrq->lat_hist_enabled = 0;
                trace_mmc_blk_rw_start(areq->mrq->cmd->opcode,
                                       areq->mrq->cmd->arg,
                                       areq->mrq->data);
@@ -1964,7 +1980,7 @@ void mmc_init_erase(struct mmc_card *card)
 }
 
 static unsigned int mmc_mmc_erase_timeout(struct mmc_card *card,
-                                         unsigned int arg, unsigned int qty)
+                                         unsigned int arg, unsigned int qty)
 {
        unsigned int erase_timeout;
 
@@ -2907,6 +2923,54 @@ static void __exit mmc_exit(void)
        destroy_workqueue(workqueue);
 }
 
+static ssize_t
+latency_hist_show(struct device *dev, struct device_attribute *attr, char *buf)
+{
+       struct mmc_host *host = cls_dev_to_mmc_host(dev);
+
+       return blk_latency_hist_show(&host->io_lat_s, buf);
+}
+
+/*
+ * Values permitted 0, 1, 2.
+ * 0 -> Disable IO latency histograms (default)
+ * 1 -> Enable IO latency histograms
+ * 2 -> Zero out IO latency histograms
+ */
+static ssize_t
+latency_hist_store(struct device *dev, struct device_attribute *attr,
+                  const char *buf, size_t count)
+{
+       struct mmc_host *host = cls_dev_to_mmc_host(dev);
+       long value;
+
+       if (kstrtol(buf, 0, &value))
+               return -EINVAL;
+       if (value == BLK_IO_LAT_HIST_ZERO)
+               blk_zero_latency_hist(&host->io_lat_s);
+       else if (value == BLK_IO_LAT_HIST_ENABLE ||
+                value == BLK_IO_LAT_HIST_DISABLE)
+               host->latency_hist_enabled = value;
+       return count;
+}
+
+static DEVICE_ATTR(latency_hist, S_IRUGO | S_IWUSR,
+                  latency_hist_show, latency_hist_store);
+
+void
+mmc_latency_hist_sysfs_init(struct mmc_host *host)
+{
+       if (device_create_file(&host->class_dev, &dev_attr_latency_hist))
+               dev_err(&host->class_dev,
+                       "Failed to create latency_hist sysfs entry\n");
+}
+
+void
+mmc_latency_hist_sysfs_exit(struct mmc_host *host)
+{
+       device_remove_file(&host->class_dev, &dev_attr_latency_hist);
+}
+
 subsys_initcall(mmc_init);
 module_exit(mmc_exit);
 
index fcf7829..1706883 100644 (file)
@@ -32,8 +32,6 @@
 #include "slot-gpio.h"
 #include "pwrseq.h"
 
-#define cls_dev_to_mmc_host(d) container_of(d, struct mmc_host, class_dev)
-
 static DEFINE_IDR(mmc_host_idr);
 static DEFINE_SPINLOCK(mmc_host_lock);
 
@@ -394,6 +392,8 @@ int mmc_add_host(struct mmc_host *host)
        mmc_add_host_debugfs(host);
 #endif
 
+       mmc_latency_hist_sysfs_init(host);
+
        mmc_start_host(host);
        if (!(host->pm_flags & MMC_PM_IGNORE_PM_NOTIFY))
                register_pm_notifier(&host->pm_notify);
@@ -422,6 +422,8 @@ void mmc_remove_host(struct mmc_host *host)
        mmc_remove_host_debugfs(host);
 #endif
 
+       mmc_latency_hist_sysfs_exit(host);
+
        device_del(&host->class_dev);
 
        led_trigger_unregister_simple(host->led);
index 992bf53..bf38533 100644 (file)
@@ -12,6 +12,8 @@
 #define _MMC_CORE_HOST_H
 #include <linux/mmc/host.h>
 
+#define cls_dev_to_mmc_host(d) container_of(d, struct mmc_host, class_dev)
+
 int mmc_register_host_class(void);
 void mmc_unregister_host_class(void);
 
@@ -21,5 +23,8 @@ void mmc_retune_hold(struct mmc_host *host);
 void mmc_retune_release(struct mmc_host *host);
 int mmc_retune(struct mmc_host *host);
 
+void mmc_latency_hist_sysfs_init(struct mmc_host *host);
+void mmc_latency_hist_sysfs_exit(struct mmc_host *host);
+
 #endif
 
index 85cd256..4167bdb 100644 (file)
@@ -39,6 +39,7 @@
 
 #include <linux/async.h>
 #include <linux/devfreq.h>
+#include <linux/blkdev.h>
 
 #include "ufshcd.h"
 #include "unipro.h"
@@ -1332,6 +1333,17 @@ static int ufshcd_queuecommand(struct Scsi_Host *host, struct scsi_cmnd *cmd)
                clear_bit_unlock(tag, &hba->lrb_in_use);
                goto out;
        }
+
+       /* IO svc time latency histogram */
+       if (hba != NULL && cmd->request != NULL) {
+               if (hba->latency_hist_enabled &&
+                   (cmd->request->cmd_type == REQ_TYPE_FS)) {
+                       cmd->request->lat_hist_io_start = ktime_get();
+                       cmd->request->lat_hist_enabled = 1;
+               } else
+                       cmd->request->lat_hist_enabled = 0;
+       }
+
        WARN_ON(hba->clk_gating.state != CLKS_ON);
 
        lrbp = &hba->lrb[tag];
@@ -3160,6 +3172,7 @@ static void ufshcd_transfer_req_compl(struct ufs_hba *hba)
        u32 tr_doorbell;
        int result;
        int index;
+       struct request *req;
 
        /* Resetting interrupt aggregation counters first and reading the
         * DOOR_BELL afterward allows us to handle all the completed requests.
@@ -3184,6 +3197,22 @@ static void ufshcd_transfer_req_compl(struct ufs_hba *hba)
                        /* Mark completed command as NULL in LRB */
                        lrbp->cmd = NULL;
                        clear_bit_unlock(index, &hba->lrb_in_use);
+                       req = cmd->request;
+                       if (req) {
+                               /* Update IO svc time latency histogram */
+                               if (req->lat_hist_enabled) {
+                                       ktime_t completion;
+                                       u_int64_t delta_us;
+
+                                       completion = ktime_get();
+                                       delta_us = ktime_us_delta(completion,
+                                                 req->lat_hist_io_start);
+                                       /* rq_data_dir() => true if WRITE */
+                                       blk_update_latency_hist(&hba->io_lat_s,
+                                               (rq_data_dir(req) == READ),
+                                               delta_us);
+                               }
+                       }
                        /* Do not touch lrbp after scsi done */
                        cmd->scsi_done(cmd);
                        __ufshcd_release(hba);
@@ -5327,6 +5356,54 @@ out:
 }
 EXPORT_SYMBOL(ufshcd_shutdown);
 
+/*
+ * Values permitted 0, 1, 2.
+ * 0 -> Disable IO latency histograms (default)
+ * 1 -> Enable IO latency histograms
+ * 2 -> Zero out IO latency histograms
+ */
+static ssize_t
+latency_hist_store(struct device *dev, struct device_attribute *attr,
+                  const char *buf, size_t count)
+{
+       struct ufs_hba *hba = dev_get_drvdata(dev);
+       long value;
+
+       if (kstrtol(buf, 0, &value))
+               return -EINVAL;
+       if (value == BLK_IO_LAT_HIST_ZERO)
+               blk_zero_latency_hist(&hba->io_lat_s);
+       else if (value == BLK_IO_LAT_HIST_ENABLE ||
+                value == BLK_IO_LAT_HIST_DISABLE)
+               hba->latency_hist_enabled = value;
+       return count;
+}
+
+ssize_t
+latency_hist_show(struct device *dev, struct device_attribute *attr,
+                 char *buf)
+{
+       struct ufs_hba *hba = dev_get_drvdata(dev);
+
+       return blk_latency_hist_show(&hba->io_lat_s, buf);
+}
+
+static DEVICE_ATTR(latency_hist, S_IRUGO | S_IWUSR,
+                  latency_hist_show, latency_hist_store);
+
+static void
+ufshcd_init_latency_hist(struct ufs_hba *hba)
+{
+       if (device_create_file(hba->dev, &dev_attr_latency_hist))
+               dev_err(hba->dev, "Failed to create latency_hist sysfs entry\n");
+}
+
+static void
+ufshcd_exit_latency_hist(struct ufs_hba *hba)
+{
+       device_create_file(hba->dev, &dev_attr_latency_hist);
+}
+
 /**
  * ufshcd_remove - de-allocate SCSI host and host memory space
  *             data structure memory
@@ -5342,6 +5419,7 @@ void ufshcd_remove(struct ufs_hba *hba)
        scsi_host_put(hba->host);
 
        ufshcd_exit_clk_gating(hba);
+       ufshcd_exit_latency_hist(hba);
        if (ufshcd_is_clkscaling_enabled(hba))
                devfreq_remove_device(hba->devfreq);
        ufshcd_hba_exit(hba);
@@ -5639,6 +5717,8 @@ int ufshcd_init(struct ufs_hba *hba, void __iomem *mmio_base, unsigned int irq)
        /* Hold auto suspend until async scan completes */
        pm_runtime_get_sync(dev);
 
+       ufshcd_init_latency_hist(hba);
+
        /*
         * The device-initialize-sequence hasn't been invoked yet.
         * Set the device to power-off state
@@ -5653,6 +5733,7 @@ out_remove_scsi_host:
        scsi_remove_host(hba->host);
 exit_gating:
        ufshcd_exit_clk_gating(hba);
+       ufshcd_exit_latency_hist(hba);
 out_disable:
        hba->is_irq_enabled = false;
        scsi_host_put(host);
index 2570d94..f3780cf 100644 (file)
@@ -532,6 +532,9 @@ struct ufs_hba {
        struct devfreq *devfreq;
        struct ufs_clk_scaling clk_scaling;
        bool is_sys_suspended;
+
+       int                     latency_hist_enabled;
+       struct io_latency_state io_lat_s;
 };
 
 /* Returns true if clocks can be gated. Otherwise false */
index 629e3c8..9bee25c 100644 (file)
@@ -417,6 +417,10 @@ EXPORT_SYMBOL_GPL(tty_ldisc_flush);
  *     they are not on hot paths so a little discipline won't do
  *     any harm.
  *
+ *     The line discipline-related tty_struct fields are reset to
+ *     prevent the ldisc driver from re-using stale information for
+ *     the new ldisc instance.
+ *
  *     Locking: takes termios_rwsem
  */
 
@@ -425,6 +429,9 @@ static void tty_set_termios_ldisc(struct tty_struct *tty, int num)
        down_write(&tty->termios_rwsem);
        tty->termios.c_line = num;
        up_write(&tty->termios_rwsem);
+
+       tty->disc_data = NULL;
+       tty->receive_room = 0;
 }
 
 /**
index 1687557..c98bae9 100644 (file)
@@ -197,6 +197,9 @@ struct request {
 
        /* for bidi */
        struct request *next_rq;
+
+       ktime_t                 lat_hist_io_start;
+       int                     lat_hist_enabled;
 };
 
 static inline unsigned short req_get_ioprio(struct request *req)
@@ -1656,6 +1659,79 @@ extern int bdev_write_page(struct block_device *, sector_t, struct page *,
                                                struct writeback_control *);
 extern long bdev_direct_access(struct block_device *, sector_t,
                void __pmem **addr, unsigned long *pfn, long size);
+
+/*
+ * X-axis for IO latency histogram support.
+ */
+static const u_int64_t latency_x_axis_us[] = {
+       100,
+       200,
+       300,
+       400,
+       500,
+       600,
+       700,
+       800,
+       900,
+       1000,
+       1200,
+       1400,
+       1600,
+       1800,
+       2000,
+       2500,
+       3000,
+       4000,
+       5000,
+       6000,
+       7000,
+       9000,
+       10000
+};
+
+#define BLK_IO_LAT_HIST_DISABLE         0
+#define BLK_IO_LAT_HIST_ENABLE          1
+#define BLK_IO_LAT_HIST_ZERO            2
+
+struct io_latency_state {
+       u_int64_t       latency_y_axis_read[ARRAY_SIZE(latency_x_axis_us) + 1];
+       u_int64_t       latency_reads_elems;
+       u_int64_t       latency_y_axis_write[ARRAY_SIZE(latency_x_axis_us) + 1];
+       u_int64_t       latency_writes_elems;
+};
+
+static inline void
+blk_update_latency_hist(struct io_latency_state *s,
+                       int read,
+                       u_int64_t delta_us)
+{
+       int i;
+
+       for (i = 0; i < ARRAY_SIZE(latency_x_axis_us); i++) {
+               if (delta_us < (u_int64_t)latency_x_axis_us[i]) {
+                       if (read)
+                               s->latency_y_axis_read[i]++;
+                       else
+                               s->latency_y_axis_write[i]++;
+                       break;
+               }
+       }
+       if (i == ARRAY_SIZE(latency_x_axis_us)) {
+               /* Overflowed the histogram */
+               if (read)
+                       s->latency_y_axis_read[i]++;
+               else
+                       s->latency_y_axis_write[i]++;
+       }
+       if (read)
+               s->latency_reads_elems++;
+       else
+               s->latency_writes_elems++;
+}
+
+void blk_zero_latency_hist(struct io_latency_state *s);
+ssize_t blk_latency_hist_show(struct io_latency_state *s, char *buf);
+
 #else /* CONFIG_BLOCK */
 
 struct block_device;
index 37967b6..3349f06 100644 (file)
@@ -136,6 +136,8 @@ struct mmc_request {
        struct completion       completion;
        void                    (*done)(struct mmc_request *);/* completion function */
        struct mmc_host         *host;
+       ktime_t                 io_start;
+       int                     lat_hist_enabled;
 };
 
 struct mmc_card;
index 40025b2..e4862f7 100644 (file)
@@ -16,6 +16,7 @@
 #include <linux/sched.h>
 #include <linux/device.h>
 #include <linux/fault-inject.h>
+#include <linux/blkdev.h>
 
 #include <linux/mmc/core.h>
 #include <linux/mmc/card.h>
@@ -379,6 +380,9 @@ struct mmc_host {
        } embedded_sdio_data;
 #endif
 
+       int                     latency_hist_enabled;
+       struct io_latency_state io_lat_s;
+
        unsigned long           private[0] ____cacheline_aligned;
 };
 
index e23a9e7..bab4053 100644 (file)
@@ -65,8 +65,10 @@ enum {
 
 #ifdef CONFIG_CMA
 #  define is_migrate_cma(migratetype) unlikely((migratetype) == MIGRATE_CMA)
+#  define is_migrate_cma_page(_page) (get_pageblock_migratetype(_page) == MIGRATE_CMA)
 #else
 #  define is_migrate_cma(migratetype) false
+#  define is_migrate_cma_page(_page) false
 #endif
 
 #define for_each_migratetype_order(order, type) \
index 2037a86..4ef384b 100644 (file)
@@ -144,6 +144,18 @@ void kfree(const void *);
 void kzfree(const void *);
 size_t ksize(const void *);
 
+#ifdef CONFIG_HAVE_HARDENED_USERCOPY_ALLOCATOR
+const char *__check_heap_object(const void *ptr, unsigned long n,
+                               struct page *page);
+#else
+static inline const char *__check_heap_object(const void *ptr,
+                                             unsigned long n,
+                                             struct page *page)
+{
+       return NULL;
+}
+#endif
+
 /*
  * Some archs want to perform DMA into kmalloc caches and need a guaranteed
  * alignment larger than the alignment of a 64-bit integer.
index 3388511..f4e857e 100644 (file)
@@ -81,6 +81,7 @@ struct kmem_cache {
        int reserved;           /* Reserved bytes at the end of slabs */
        const char *name;       /* Name (only for display!) */
        struct list_head list;  /* List of slab caches */
+       int red_left_pad;       /* Left redzone padding size */
 #ifdef CONFIG_SYSFS
        struct kobject kobj;    /* For sysfs */
 #endif
index ff307b5..0ae29ff 100644 (file)
@@ -145,6 +145,30 @@ static inline bool test_and_clear_restore_sigmask(void)
 #error "no set_restore_sigmask() provided and default one won't work"
 #endif
 
+#ifndef CONFIG_HAVE_ARCH_WITHIN_STACK_FRAMES
+static inline int arch_within_stack_frames(const void * const stack,
+                                          const void * const stackend,
+                                          const void *obj, unsigned long len)
+{
+       return 0;
+}
+#endif
+
+#ifdef CONFIG_HARDENED_USERCOPY
+extern void __check_object_size(const void *ptr, unsigned long n,
+                                       bool to_user);
+
+static inline void check_object_size(const void *ptr, unsigned long n,
+                                    bool to_user)
+{
+       __check_object_size(ptr, n, to_user);
+}
+#else
+static inline void check_object_size(const void *ptr, unsigned long n,
+                                    bool to_user)
+{ }
+#endif /* CONFIG_HARDENED_USERCOPY */
+
 #endif /* __KERNEL__ */
 
 #endif /* _LINUX_THREAD_INFO_H */
index 558129a..f30c187 100644 (file)
@@ -111,4 +111,11 @@ extern long strncpy_from_unsafe(char *dst, const void *unsafe_addr, long count);
 #define probe_kernel_address(addr, retval)             \
        probe_kernel_read(&retval, addr, sizeof(retval))
 
+#ifndef user_access_begin
+#define user_access_begin() do { } while (0)
+#define user_access_end() do { } while (0)
+#define unsafe_get_user(x, ptr, err) do { if (unlikely(__get_user(x, ptr))) goto err; } while (0)
+#define unsafe_put_user(x, ptr, err) do { if (unlikely(__put_user(x, ptr))) goto err; } while (0)
+#endif
+
 #endif         /* __LINUX_UACCESS_H__ */
index b36ceba..6c48067 100644 (file)
@@ -1513,6 +1513,8 @@ static inline void tcp_check_send_head(struct sock *sk, struct sk_buff *skb_unli
 {
        if (sk->sk_send_head == skb_unlinked)
                sk->sk_send_head = NULL;
+       if (tcp_sk(sk)->highest_sack == skb_unlinked)
+               tcp_sk(sk)->highest_sack = NULL;
 }
 
 static inline void tcp_init_send_head(struct sock *sk)
index 235c7a2..e1d1d69 100644 (file)
@@ -1719,6 +1719,7 @@ choice
 
 config SLAB
        bool "SLAB"
+       select HAVE_HARDENED_USERCOPY_ALLOCATOR
        help
          The regular slab allocator that is established and known to work
          well in all environments. It organizes cache hot objects in
@@ -1726,6 +1727,7 @@ config SLAB
 
 config SLUB
        bool "SLUB (Unqueued Allocator)"
+       select HAVE_HARDENED_USERCOPY_ALLOCATOR
        help
           SLUB is a slab allocator that minimizes cache line usage
           instead of managing queues of cached objects (SLAB approach).
index a65d634..e2e294d 100644 (file)
@@ -98,6 +98,7 @@ struct cpuset {
 
        /* user-configured CPUs and Memory Nodes allow to tasks */
        cpumask_var_t cpus_allowed;
+       cpumask_var_t cpus_requested;
        nodemask_t mems_allowed;
 
        /* effective CPUs and Memory Nodes allow to tasks */
@@ -386,7 +387,7 @@ static void cpuset_update_task_spread_flag(struct cpuset *cs,
 
 static int is_cpuset_subset(const struct cpuset *p, const struct cpuset *q)
 {
-       return  cpumask_subset(p->cpus_allowed, q->cpus_allowed) &&
+       return  cpumask_subset(p->cpus_requested, q->cpus_requested) &&
                nodes_subset(p->mems_allowed, q->mems_allowed) &&
                is_cpu_exclusive(p) <= is_cpu_exclusive(q) &&
                is_mem_exclusive(p) <= is_mem_exclusive(q);
@@ -486,7 +487,7 @@ static int validate_change(struct cpuset *cur, struct cpuset *trial)
        cpuset_for_each_child(c, css, par) {
                if ((is_cpu_exclusive(trial) || is_cpu_exclusive(c)) &&
                    c != cur &&
-                   cpumask_intersects(trial->cpus_allowed, c->cpus_allowed))
+                   cpumask_intersects(trial->cpus_requested, c->cpus_requested))
                        goto out;
                if ((is_mem_exclusive(trial) || is_mem_exclusive(c)) &&
                    c != cur &&
@@ -945,17 +946,18 @@ static int update_cpumask(struct cpuset *cs, struct cpuset *trialcs,
        if (!*buf) {
                cpumask_clear(trialcs->cpus_allowed);
        } else {
-               retval = cpulist_parse(buf, trialcs->cpus_allowed);
+               retval = cpulist_parse(buf, trialcs->cpus_requested);
                if (retval < 0)
                        return retval;
 
-               if (!cpumask_subset(trialcs->cpus_allowed,
-                                   top_cpuset.cpus_allowed))
+               if (!cpumask_subset(trialcs->cpus_requested, cpu_present_mask))
                        return -EINVAL;
+
+               cpumask_and(trialcs->cpus_allowed, trialcs->cpus_requested, cpu_active_mask);
        }
 
        /* Nothing to do if the cpus didn't change */
-       if (cpumask_equal(cs->cpus_allowed, trialcs->cpus_allowed))
+       if (cpumask_equal(cs->cpus_requested, trialcs->cpus_requested))
                return 0;
 
        retval = validate_change(cs, trialcs);
@@ -964,6 +966,7 @@ static int update_cpumask(struct cpuset *cs, struct cpuset *trialcs,
 
        spin_lock_irq(&callback_lock);
        cpumask_copy(cs->cpus_allowed, trialcs->cpus_allowed);
+       cpumask_copy(cs->cpus_requested, trialcs->cpus_requested);
        spin_unlock_irq(&callback_lock);
 
        /* use trialcs->cpus_allowed as a temp variable */
@@ -1754,7 +1757,7 @@ static int cpuset_common_seq_show(struct seq_file *sf, void *v)
 
        switch (type) {
        case FILE_CPULIST:
-               seq_printf(sf, "%*pbl\n", cpumask_pr_args(cs->cpus_allowed));
+               seq_printf(sf, "%*pbl\n", cpumask_pr_args(cs->cpus_requested));
                break;
        case FILE_MEMLIST:
                seq_printf(sf, "%*pbl\n", nodemask_pr_args(&cs->mems_allowed));
@@ -1943,11 +1946,14 @@ cpuset_css_alloc(struct cgroup_subsys_state *parent_css)
                return ERR_PTR(-ENOMEM);
        if (!alloc_cpumask_var(&cs->cpus_allowed, GFP_KERNEL))
                goto free_cs;
+       if (!alloc_cpumask_var(&cs->cpus_requested, GFP_KERNEL))
+               goto free_allowed;
        if (!alloc_cpumask_var(&cs->effective_cpus, GFP_KERNEL))
-               goto free_cpus;
+               goto free_requested;
 
        set_bit(CS_SCHED_LOAD_BALANCE, &cs->flags);
        cpumask_clear(cs->cpus_allowed);
+       cpumask_clear(cs->cpus_requested);
        nodes_clear(cs->mems_allowed);
        cpumask_clear(cs->effective_cpus);
        nodes_clear(cs->effective_mems);
@@ -1956,7 +1962,9 @@ cpuset_css_alloc(struct cgroup_subsys_state *parent_css)
 
        return &cs->css;
 
-free_cpus:
+free_requested:
+       free_cpumask_var(cs->cpus_requested);
+free_allowed:
        free_cpumask_var(cs->cpus_allowed);
 free_cs:
        kfree(cs);
@@ -2019,6 +2027,7 @@ static int cpuset_css_online(struct cgroup_subsys_state *css)
        cs->mems_allowed = parent->mems_allowed;
        cs->effective_mems = parent->mems_allowed;
        cpumask_copy(cs->cpus_allowed, parent->cpus_allowed);
+       cpumask_copy(cs->cpus_requested, parent->cpus_requested);
        cpumask_copy(cs->effective_cpus, parent->cpus_allowed);
        spin_unlock_irq(&callback_lock);
 out_unlock:
@@ -2053,6 +2062,7 @@ static void cpuset_css_free(struct cgroup_subsys_state *css)
 
        free_cpumask_var(cs->effective_cpus);
        free_cpumask_var(cs->cpus_allowed);
+       free_cpumask_var(cs->cpus_requested);
        kfree(cs);
 }
 
@@ -2120,8 +2130,11 @@ int __init cpuset_init(void)
                BUG();
        if (!alloc_cpumask_var(&top_cpuset.effective_cpus, GFP_KERNEL))
                BUG();
+       if (!alloc_cpumask_var(&top_cpuset.cpus_requested, GFP_KERNEL))
+               BUG();
 
        cpumask_setall(top_cpuset.cpus_allowed);
+       cpumask_setall(top_cpuset.cpus_requested);
        nodes_setall(top_cpuset.mems_allowed);
        cpumask_setall(top_cpuset.effective_cpus);
        nodes_setall(top_cpuset.effective_mems);
@@ -2255,7 +2268,7 @@ retry:
                goto retry;
        }
 
-       cpumask_and(&new_cpus, cs->cpus_allowed, parent_cs(cs)->effective_cpus);
+       cpumask_and(&new_cpus, cs->cpus_requested, parent_cs(cs)->effective_cpus);
        nodes_and(new_mems, cs->mems_allowed, parent_cs(cs)->effective_mems);
 
        cpus_updated = !cpumask_equal(&new_cpus, cs->effective_cpus);
index e358313..b49cf3a 100644 (file)
@@ -68,6 +68,7 @@ void rcu_sync_lockdep_assert(struct rcu_sync *rsp)
        RCU_LOCKDEP_WARN(!gp_ops[rsp->gp_type].held(),
                         "suspicious rcu_sync_is_idle() usage");
 }
+EXPORT_SYMBOL_GPL(rcu_sync_lockdep_assert);
 #endif
 
 /**
index e0af6ff..5a003a2 100644 (file)
@@ -39,8 +39,8 @@ static inline long do_strncpy_from_user(char *dst, const char __user *src, long
                unsigned long c, data;
 
                /* Fall back to byte-at-a-time if we get a page fault */
-               if (unlikely(__get_user(c,(unsigned long __user *)(src+res))))
-                       break;
+               unsafe_get_user(c, (unsigned long __user *)(src+res), byte_at_a_time);
+
                *(unsigned long *)(dst+res) = c;
                if (has_zero(c, &data, &constants)) {
                        data = prep_zero_mask(c, data, &constants);
@@ -55,8 +55,7 @@ byte_at_a_time:
        while (max) {
                char c;
 
-               if (unlikely(__get_user(c,src+res)))
-                       return -EFAULT;
+               unsafe_get_user(c,src+res, efault);
                dst[res] = c;
                if (!c)
                        return res;
@@ -75,6 +74,7 @@ byte_at_a_time:
         * Nope: we hit the address space limit, and we still had more
         * characters the caller would have wanted. That's an EFAULT.
         */
+efault:
        return -EFAULT;
 }
 
@@ -107,7 +107,12 @@ long strncpy_from_user(char *dst, const char __user *src, long count)
        src_addr = (unsigned long)src;
        if (likely(src_addr < max_addr)) {
                unsigned long max = max_addr - src_addr;
-               return do_strncpy_from_user(dst, src, count, max);
+               long retval;
+
+               user_access_begin();
+               retval = do_strncpy_from_user(dst, src, count, max);
+               user_access_end();
+               return retval;
        }
        return -EFAULT;
 }
index 3a5f2b3..8e105ed 100644 (file)
@@ -45,8 +45,7 @@ static inline long do_strnlen_user(const char __user *src, unsigned long count,
        src -= align;
        max += align;
 
-       if (unlikely(__get_user(c,(unsigned long __user *)src)))
-               return 0;
+       unsafe_get_user(c, (unsigned long __user *)src, efault);
        c |= aligned_byte_mask(align);
 
        for (;;) {
@@ -61,8 +60,7 @@ static inline long do_strnlen_user(const char __user *src, unsigned long count,
                if (unlikely(max <= sizeof(unsigned long)))
                        break;
                max -= sizeof(unsigned long);
-               if (unlikely(__get_user(c,(unsigned long __user *)(src+res))))
-                       return 0;
+               unsafe_get_user(c, (unsigned long __user *)(src+res), efault);
        }
        res -= align;
 
@@ -77,6 +75,7 @@ static inline long do_strnlen_user(const char __user *src, unsigned long count,
         * Nope: we hit the address space limit, and we still had more
         * characters the caller would have wanted. That's 0.
         */
+efault:
        return 0;
 }
 
@@ -112,7 +111,12 @@ long strnlen_user(const char __user *str, long count)
        src_addr = (unsigned long)str;
        if (likely(src_addr < max_addr)) {
                unsigned long max = max_addr - src_addr;
-               return do_strnlen_user(str, count, max);
+               long retval;
+
+               user_access_begin();
+               retval = do_strnlen_user(str, count, max);
+               user_access_end();
+               return retval;
        }
        return 0;
 }
@@ -141,7 +145,12 @@ long strlen_user(const char __user *str)
        src_addr = (unsigned long)str;
        if (likely(src_addr < max_addr)) {
                unsigned long max = max_addr - src_addr;
-               return do_strnlen_user(str, ~0ul, max);
+               long retval;
+
+               user_access_begin();
+               retval = do_strnlen_user(str, ~0ul, max);
+               user_access_end();
+               return retval;
        }
        return 0;
 }
index 2ed4319..8b532c9 100644 (file)
@@ -5,6 +5,9 @@
 KASAN_SANITIZE_slab_common.o := n
 KASAN_SANITIZE_slub.o := n
 
+# Since __builtin_frame_address does work as used, disable the warning.
+CFLAGS_usercopy.o += $(call cc-disable-warning, frame-address)
+
 mmu-y                  := nommu.o
 mmu-$(CONFIG_MMU)      := gup.o highmem.o memory.o mincore.o \
                           mlock.o mmap.o mprotect.o mremap.o msync.o rmap.o \
@@ -81,3 +84,4 @@ obj-$(CONFIG_CMA_DEBUGFS) += cma_debug.o
 obj-$(CONFIG_USERFAULTFD) += userfaultfd.o
 obj-$(CONFIG_IDLE_PAGE_TRACKING) += page_idle.o
 obj-$(CONFIG_FRAME_VECTOR) += frame_vector.o
+obj-$(CONFIG_HARDENED_USERCOPY) += usercopy.o
index 4765c97..24a615d 100644 (file)
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -4228,6 +4228,36 @@ static int __init slab_proc_init(void)
 module_init(slab_proc_init);
 #endif
 
+#ifdef CONFIG_HARDENED_USERCOPY
+/*
+ * Rejects objects that are incorrectly sized.
+ *
+ * Returns NULL if check passes, otherwise const char * to name of cache
+ * to indicate an error.
+ */
+const char *__check_heap_object(const void *ptr, unsigned long n,
+                               struct page *page)
+{
+       struct kmem_cache *cachep;
+       unsigned int objnr;
+       unsigned long offset;
+
+       /* Find and validate object. */
+       cachep = page->slab_cache;
+       objnr = obj_to_index(cachep, page, (void *)ptr);
+       BUG_ON(objnr >= cachep->num);
+
+       /* Find offset within object. */
+       offset = ptr - index_to_obj(cachep, page, objnr) - obj_offset(cachep);
+
+       /* Allow address range falling entirely within object size. */
+       if (offset <= cachep->object_size && n <= cachep->object_size - offset)
+               return NULL;
+
+       return cachep->name;
+}
+#endif /* CONFIG_HARDENED_USERCOPY */
+
 /**
  * ksize - get the actual amount of memory allocated for a given object
  * @objp: Pointer to the object
index 65d5f92..41f7cae 100644 (file)
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -124,6 +124,14 @@ static inline int kmem_cache_debug(struct kmem_cache *s)
 #endif
 }
 
+static inline void *fixup_red_left(struct kmem_cache *s, void *p)
+{
+       if (kmem_cache_debug(s) && s->flags & SLAB_RED_ZONE)
+               p += s->red_left_pad;
+
+       return p;
+}
+
 static inline bool kmem_cache_has_cpu_partial(struct kmem_cache *s)
 {
 #ifdef CONFIG_SLUB_CPU_PARTIAL
@@ -224,24 +232,6 @@ static inline void stat(const struct kmem_cache *s, enum stat_item si)
  *                     Core slab cache functions
  *******************************************************************/
 
-/* Verify that a pointer has an address that is valid within a slab page */
-static inline int check_valid_pointer(struct kmem_cache *s,
-                               struct page *page, const void *object)
-{
-       void *base;
-
-       if (!object)
-               return 1;
-
-       base = page_address(page);
-       if (object < base || object >= base + page->objects * s->size ||
-               (object - base) % s->size) {
-               return 0;
-       }
-
-       return 1;
-}
-
 static inline void *get_freepointer(struct kmem_cache *s, void *object)
 {
        return *(void **)(object + s->offset);
@@ -271,12 +261,14 @@ static inline void set_freepointer(struct kmem_cache *s, void *object, void *fp)
 
 /* Loop over all objects in a slab */
 #define for_each_object(__p, __s, __addr, __objects) \
-       for (__p = (__addr); __p < (__addr) + (__objects) * (__s)->size;\
-                       __p += (__s)->size)
+       for (__p = fixup_red_left(__s, __addr); \
+               __p < (__addr) + (__objects) * (__s)->size; \
+               __p += (__s)->size)
 
 #define for_each_object_idx(__p, __idx, __s, __addr, __objects) \
-       for (__p = (__addr), __idx = 1; __idx <= __objects;\
-                       __p += (__s)->size, __idx++)
+       for (__p = fixup_red_left(__s, __addr), __idx = 1; \
+               __idx <= __objects; \
+               __p += (__s)->size, __idx++)
 
 /* Determine object index from a given position */
 static inline int slab_index(void *p, struct kmem_cache *s, void *addr)
@@ -456,6 +448,22 @@ static void get_map(struct kmem_cache *s, struct page *page, unsigned long *map)
                set_bit(slab_index(p, s, addr), map);
 }
 
+static inline int size_from_object(struct kmem_cache *s)
+{
+       if (s->flags & SLAB_RED_ZONE)
+               return s->size - s->red_left_pad;
+
+       return s->size;
+}
+
+static inline void *restore_red_left(struct kmem_cache *s, void *p)
+{
+       if (s->flags & SLAB_RED_ZONE)
+               p -= s->red_left_pad;
+
+       return p;
+}
+
 /*
  * Debug settings:
  */
@@ -489,6 +497,26 @@ static inline void metadata_access_disable(void)
 /*
  * Object debugging
  */
+
+/* Verify that a pointer has an address that is valid within a slab page */
+static inline int check_valid_pointer(struct kmem_cache *s,
+                               struct page *page, void *object)
+{
+       void *base;
+
+       if (!object)
+               return 1;
+
+       base = page_address(page);
+       object = restore_red_left(s, object);
+       if (object < base || object >= base + page->objects * s->size ||
+               (object - base) % s->size) {
+               return 0;
+       }
+
+       return 1;
+}
+
 static void print_section(char *text, u8 *addr, unsigned int length)
 {
        metadata_access_enable();
@@ -628,7 +656,9 @@ static void print_trailer(struct kmem_cache *s, struct page *page, u8 *p)
        pr_err("INFO: Object 0x%p @offset=%tu fp=0x%p\n\n",
               p, p - addr, get_freepointer(s, p));
 
-       if (p > addr + 16)
+       if (s->flags & SLAB_RED_ZONE)
+               print_section("Redzone ", p - s->red_left_pad, s->red_left_pad);
+       else if (p > addr + 16)
                print_section("Bytes b4 ", p - 16, 16);
 
        print_section("Object ", p, min_t(unsigned long, s->object_size,
@@ -645,9 +675,9 @@ static void print_trailer(struct kmem_cache *s, struct page *page, u8 *p)
        if (s->flags & SLAB_STORE_USER)
                off += 2 * sizeof(struct track);
 
-       if (off != s->size)
+       if (off != size_from_object(s))
                /* Beginning of the filler is the free pointer */
-               print_section("Padding ", p + off, s->size - off);
+               print_section("Padding ", p + off, size_from_object(s) - off);
 
        dump_stack();
 }
@@ -677,6 +707,9 @@ static void init_object(struct kmem_cache *s, void *object, u8 val)
 {
        u8 *p = object;
 
+       if (s->flags & SLAB_RED_ZONE)
+               memset(p - s->red_left_pad, val, s->red_left_pad);
+
        if (s->flags & __OBJECT_POISON) {
                memset(p, POISON_FREE, s->object_size - 1);
                p[s->object_size - 1] = POISON_END;
@@ -769,11 +802,11 @@ static int check_pad_bytes(struct kmem_cache *s, struct page *page, u8 *p)
                /* We also have user information there */
                off += 2 * sizeof(struct track);
 
-       if (s->size == off)
+       if (size_from_object(s) == off)
                return 1;
 
        return check_bytes_and_report(s, page, p, "Object padding",
-                               p + off, POISON_INUSE, s->size - off);
+                       p + off, POISON_INUSE, size_from_object(s) - off);
 }
 
 /* Check the pad bytes at the end of a slab page */
@@ -818,6 +851,10 @@ static int check_object(struct kmem_cache *s, struct page *page,
 
        if (s->flags & SLAB_RED_ZONE) {
                if (!check_bytes_and_report(s, page, object, "Redzone",
+                       object - s->red_left_pad, val, s->red_left_pad))
+                       return 0;
+
+               if (!check_bytes_and_report(s, page, object, "Redzone",
                        endobject, val, s->inuse - s->object_size))
                        return 0;
        } else {
@@ -1468,7 +1505,7 @@ static struct page *allocate_slab(struct kmem_cache *s, gfp_t flags, int node)
                        set_freepointer(s, p, NULL);
        }
 
-       page->freelist = start;
+       page->freelist = fixup_red_left(s, start);
        page->inuse = page->objects;
        page->frozen = 1;
 
@@ -3283,7 +3320,7 @@ static int calculate_sizes(struct kmem_cache *s, int forced_order)
                 */
                size += 2 * sizeof(struct track);
 
-       if (flags & SLAB_RED_ZONE)
+       if (flags & SLAB_RED_ZONE) {
                /*
                 * Add some empty padding so that we can catch
                 * overwrites from earlier objects rather than let
@@ -3292,6 +3329,11 @@ static int calculate_sizes(struct kmem_cache *s, int forced_order)
                 * of the object.
                 */
                size += sizeof(void *);
+
+               s->red_left_pad = sizeof(void *);
+               s->red_left_pad = ALIGN(s->red_left_pad, s->align);
+               size += s->red_left_pad;
+       }
 #endif
 
        /*
@@ -3585,6 +3627,46 @@ void *__kmalloc_node(size_t size, gfp_t flags, int node)
 EXPORT_SYMBOL(__kmalloc_node);
 #endif
 
+#ifdef CONFIG_HARDENED_USERCOPY
+/*
+ * Rejects objects that are incorrectly sized.
+ *
+ * Returns NULL if check passes, otherwise const char * to name of cache
+ * to indicate an error.
+ */
+const char *__check_heap_object(const void *ptr, unsigned long n,
+                               struct page *page)
+{
+       struct kmem_cache *s;
+       unsigned long offset;
+       size_t object_size;
+
+       /* Find object and usable object size. */
+       s = page->slab_cache;
+       object_size = slab_ksize(s);
+
+       /* Reject impossible pointers. */
+       if (ptr < page_address(page))
+               return s->name;
+
+       /* Find offset within object. */
+       offset = (ptr - page_address(page)) % s->size;
+
+       /* Adjust for redzone and reject if within the redzone. */
+       if (kmem_cache_debug(s) && s->flags & SLAB_RED_ZONE) {
+               if (offset < s->red_left_pad)
+                       return s->name;
+               offset -= s->red_left_pad;
+       }
+
+       /* Allow address range falling entirely within object size. */
+       if (offset <= object_size && n <= object_size - offset)
+               return NULL;
+
+       return s->name;
+}
+#endif /* CONFIG_HARDENED_USERCOPY */
+
 static size_t __ksize(const void *object)
 {
        struct page *page;
diff --git a/mm/usercopy.c b/mm/usercopy.c
new file mode 100644 (file)
index 0000000..f78015e
--- /dev/null
@@ -0,0 +1,269 @@
+/*
+ * This implements the various checks for CONFIG_HARDENED_USERCOPY*,
+ * which are designed to protect kernel memory from needless exposure
+ * and overwrite under many unintended conditions. This code is based
+ * on PAX_USERCOPY, which is:
+ *
+ * Copyright (C) 2001-2016 PaX Team, Bradley Spengler, Open Source
+ * Security Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ */
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/mm.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <asm/sections.h>
+
+enum {
+       BAD_STACK = -1,
+       NOT_STACK = 0,
+       GOOD_FRAME,
+       GOOD_STACK,
+};
+
+/*
+ * Checks if a given pointer and length is contained by the current
+ * stack frame (if possible).
+ *
+ * Returns:
+ *     NOT_STACK: not at all on the stack
+ *     GOOD_FRAME: fully within a valid stack frame
+ *     GOOD_STACK: fully on the stack (when can't do frame-checking)
+ *     BAD_STACK: error condition (invalid stack position or bad stack frame)
+ */
+static noinline int check_stack_object(const void *obj, unsigned long len)
+{
+       const void * const stack = task_stack_page(current);
+       const void * const stackend = stack + THREAD_SIZE;
+       int ret;
+
+       /* Object is not on the stack at all. */
+       if (obj + len <= stack || stackend <= obj)
+               return NOT_STACK;
+
+       /*
+        * Reject: object partially overlaps the stack (passing the
+        * the check above means at least one end is within the stack,
+        * so if this check fails, the other end is outside the stack).
+        */
+       if (obj < stack || stackend < obj + len)
+               return BAD_STACK;
+
+       /* Check if object is safely within a valid frame. */
+       ret = arch_within_stack_frames(stack, stackend, obj, len);
+       if (ret)
+               return ret;
+
+       return GOOD_STACK;
+}
+
+static void report_usercopy(const void *ptr, unsigned long len,
+                           bool to_user, const char *type)
+{
+       pr_emerg("kernel memory %s attempt detected %s %p (%s) (%lu bytes)\n",
+               to_user ? "exposure" : "overwrite",
+               to_user ? "from" : "to", ptr, type ? : "unknown", len);
+       /*
+        * For greater effect, it would be nice to do do_group_exit(),
+        * but BUG() actually hooks all the lock-breaking and per-arch
+        * Oops code, so that is used here instead.
+        */
+       BUG();
+}
+
+/* Returns true if any portion of [ptr,ptr+n) over laps with [low,high). */
+static bool overlaps(const void *ptr, unsigned long n, unsigned long low,
+                    unsigned long high)
+{
+       unsigned long check_low = (uintptr_t)ptr;
+       unsigned long check_high = check_low + n;
+
+       /* Does not overlap if entirely above or entirely below. */
+       if (check_low >= high || check_high <= low)
+               return false;
+
+       return true;
+}
+
+/* Is this address range in the kernel text area? */
+static inline const char *check_kernel_text_object(const void *ptr,
+                                                  unsigned long n)
+{
+       unsigned long textlow = (unsigned long)_stext;
+       unsigned long texthigh = (unsigned long)_etext;
+       unsigned long textlow_linear, texthigh_linear;
+
+       if (overlaps(ptr, n, textlow, texthigh))
+               return "<kernel text>";
+
+       /*
+        * Some architectures have virtual memory mappings with a secondary
+        * mapping of the kernel text, i.e. there is more than one virtual
+        * kernel address that points to the kernel image. It is usually
+        * when there is a separate linear physical memory mapping, in that
+        * __pa() is not just the reverse of __va(). This can be detected
+        * and checked:
+        */
+       textlow_linear = (unsigned long)__va(__pa(textlow));
+       /* No different mapping: we're done. */
+       if (textlow_linear == textlow)
+               return NULL;
+
+       /* Check the secondary mapping... */
+       texthigh_linear = (unsigned long)__va(__pa(texthigh));
+       if (overlaps(ptr, n, textlow_linear, texthigh_linear))
+               return "<linear kernel text>";
+
+       return NULL;
+}
+
+static inline const char *check_bogus_address(const void *ptr, unsigned long n)
+{
+       /* Reject if object wraps past end of memory. */
+       if ((unsigned long)ptr + n < (unsigned long)ptr)
+               return "<wrapped address>";
+
+       /* Reject if NULL or ZERO-allocation. */
+       if (ZERO_OR_NULL_PTR(ptr))
+               return "<null>";
+
+       return NULL;
+}
+
+static inline const char *check_heap_object(const void *ptr, unsigned long n,
+                                           bool to_user)
+{
+       struct page *page, *endpage;
+       const void *end = ptr + n - 1;
+       bool is_reserved, is_cma;
+
+       /*
+        * Some architectures (arm64) return true for virt_addr_valid() on
+        * vmalloced addresses. Work around this by checking for vmalloc
+        * first.
+        */
+       if (is_vmalloc_addr(ptr))
+               return NULL;
+
+       if (!virt_addr_valid(ptr))
+               return NULL;
+
+       page = virt_to_head_page(ptr);
+
+       /* Check slab allocator for flags and size. */
+       if (PageSlab(page))
+               return __check_heap_object(ptr, n, page);
+
+       /*
+        * Sometimes the kernel data regions are not marked Reserved (see
+        * check below). And sometimes [_sdata,_edata) does not cover
+        * rodata and/or bss, so check each range explicitly.
+        */
+
+       /* Allow reads of kernel rodata region (if not marked as Reserved). */
+       if (ptr >= (const void *)__start_rodata &&
+           end <= (const void *)__end_rodata) {
+               if (!to_user)
+                       return "<rodata>";
+               return NULL;
+       }
+
+       /* Allow kernel data region (if not marked as Reserved). */
+       if (ptr >= (const void *)_sdata && end <= (const void *)_edata)
+               return NULL;
+
+       /* Allow kernel bss region (if not marked as Reserved). */
+       if (ptr >= (const void *)__bss_start &&
+           end <= (const void *)__bss_stop)
+               return NULL;
+
+       /* Is the object wholly within one base page? */
+       if (likely(((unsigned long)ptr & (unsigned long)PAGE_MASK) ==
+                  ((unsigned long)end & (unsigned long)PAGE_MASK)))
+               return NULL;
+
+       /* Allow if start and end are inside the same compound page. */
+       endpage = virt_to_head_page(end);
+       if (likely(endpage == page))
+               return NULL;
+
+       /*
+        * Reject if range is entirely either Reserved (i.e. special or
+        * device memory), or CMA. Otherwise, reject since the object spans
+        * several independently allocated pages.
+        */
+       is_reserved = PageReserved(page);
+       is_cma = is_migrate_cma_page(page);
+       if (!is_reserved && !is_cma)
+               goto reject;
+
+       for (ptr += PAGE_SIZE; ptr <= end; ptr += PAGE_SIZE) {
+               page = virt_to_head_page(ptr);
+               if (is_reserved && !PageReserved(page))
+                       goto reject;
+               if (is_cma && !is_migrate_cma_page(page))
+                       goto reject;
+       }
+
+       return NULL;
+
+reject:
+       return "<spans multiple pages>";
+}
+
+/*
+ * Validates that the given object is:
+ * - not bogus address
+ * - known-safe heap or stack object
+ * - not in kernel text
+ */
+void __check_object_size(const void *ptr, unsigned long n, bool to_user)
+{
+       const char *err;
+
+       /* Skip all tests if size is zero. */
+       if (!n)
+               return;
+
+       /* Check for invalid addresses. */
+       err = check_bogus_address(ptr, n);
+       if (err)
+               goto report;
+
+       /* Check for bad heap object. */
+       err = check_heap_object(ptr, n, to_user);
+       if (err)
+               goto report;
+
+       /* Check for bad stack object. */
+       switch (check_stack_object(ptr, n)) {
+       case NOT_STACK:
+               /* Object is not touching the current process stack. */
+               break;
+       case GOOD_FRAME:
+       case GOOD_STACK:
+               /*
+                * Object is either in the correct frame (when it
+                * is possible to check) or just generally on the
+                * process stack (when frame checking not available).
+                */
+               return;
+       default:
+               err = "<process stack>";
+               goto report;
+       }
+
+       /* Check for object in kernel to avoid text exposure. */
+       err = check_kernel_text_object(ptr, n);
+       if (!err)
+               return;
+
+report:
+       report_usercopy(ptr, n, to_user, err);
+}
+EXPORT_SYMBOL(__check_object_size);
index 77afe91..9adedba 100644 (file)
@@ -326,10 +326,12 @@ replay:
                nlh = nlmsg_hdr(skb);
                err = 0;
 
-               if (nlmsg_len(nlh) < sizeof(struct nfgenmsg) ||
-                   skb->len < nlh->nlmsg_len) {
-                       err = -EINVAL;
-                       goto ack;
+               if (nlh->nlmsg_len < NLMSG_HDRLEN ||
+                   skb->len < nlh->nlmsg_len ||
+                   nlmsg_len(nlh) < sizeof(struct nfgenmsg)) {
+                       nfnl_err_reset(&err_list);
+                       status |= NFNL_BATCH_FAILURE;
+                       goto done;
                }
 
                /* Only requests are handled by the kernel */
index e2e7d54..3bf0c59 100644 (file)
@@ -1946,7 +1946,7 @@ static int qtaguid_ctrl_proc_show(struct seq_file *m, void *v)
                        );
                f_count = atomic_long_read(
                        &sock_tag_entry->socket->file->f_count);
-               seq_printf(m, "sock=%p tag=0x%llx (uid=%u) pid=%u "
+               seq_printf(m, "sock=%pK tag=0x%llx (uid=%u) pid=%u "
                           "f_count=%lu\n",
                           sock_tag_entry->sk,
                           sock_tag_entry->tag, uid,
@@ -2548,8 +2548,7 @@ static int pp_stats_line(struct seq_file *m, struct tag_stat *ts_entry,
        uid_t stat_uid = get_uid_from_tag(tag);
        struct proc_print_info *ppi = m->private;
        /* Detailed tags are not available to everybody */
-       if (get_atag_from_tag(tag) && !can_read_other_uid_stats(
-                                               make_kuid(&init_user_ns,stat_uid))) {
+       if (!can_read_other_uid_stats(make_kuid(&init_user_ns,stat_uid))) {
                CT_DEBUG("qtaguid: stats line: "
                         "%s 0x%llx %u: insufficient priv "
                         "from pid=%u tgid=%u uid=%u stats.gid=%u\n",
index 30a2603..2b42c22 100644 (file)
@@ -127,6 +127,35 @@ config LSM_MMAP_MIN_ADDR
          this low address space will need the permission specific to the
          systems running LSM.
 
+config HAVE_HARDENED_USERCOPY_ALLOCATOR
+       bool
+       help
+         The heap allocator implements __check_heap_object() for
+         validating memory ranges against heap object sizes in
+         support of CONFIG_HARDENED_USERCOPY.
+
+config HAVE_ARCH_HARDENED_USERCOPY
+       bool
+       help
+         The architecture supports CONFIG_HARDENED_USERCOPY by
+         calling check_object_size() just before performing the
+         userspace copies in the low level implementation of
+         copy_to_user() and copy_from_user().
+
+config HARDENED_USERCOPY
+       bool "Harden memory copies between kernel and userspace"
+       depends on HAVE_ARCH_HARDENED_USERCOPY
+       depends on HAVE_HARDENED_USERCOPY_ALLOCATOR
+       select BUG
+       help
+         This option checks for obviously wrong memory regions when
+         copying memory to/from the kernel (via copy_to_user() and
+         copy_from_user() functions) by rejecting memory ranges that
+         are larger than the specified heap object, span multiple
+         separately allocates pages, are not on the process stack,
+         or are part of the kernel text. This kills entire classes
+         of heap overflow exploits and similar kernel memory exposures.
+
 source security/selinux/Kconfig
 source security/smack/Kconfig
 source security/tomoyo/Kconfig