OSDN Git Service

BACKPORT: arm64: split thread_info from task stack
authorMark Rutland <mark.rutland@arm.com>
Thu, 3 Nov 2016 20:23:13 +0000 (20:23 +0000)
committerZubin Mithra <zsm@google.com>
Wed, 9 Aug 2017 14:23:23 +0000 (15:23 +0100)
This patch moves arm64's struct thread_info from the task stack into
task_struct. This protects thread_info from corruption in the case of
stack overflows, and makes its address harder to determine if stack
addresses are leaked, making a number of attacks more difficult. Precise
detection and handling of overflow is left for subsequent patches.

Largely, this involves changing code to store the task_struct in sp_el0,
and acquire the thread_info from the task struct. Core code now
implements current_thread_info(), and as noted in <linux/sched.h> this
relies on offsetof(task_struct, thread_info) == 0, enforced by core
code.

This change means that the 'tsk' register used in entry.S now points to
a task_struct, rather than a thread_info as it used to. To make this
clear, the TI_* field offsets are renamed to TSK_TI_*, with asm-offsets
appropriately updated to account for the structural change.

Userspace clobbers sp_el0, and we can no longer restore this from the
stack. Instead, the current task is cached in a per-cpu variable that we
can safely access from early assembly as interrupts are disabled (and we
are thus not preemptible).

Both secondary entry and idle are updated to stash the sp and task
pointer separately.

Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Tested-by: Laura Abbott <labbott@redhat.com>
Cc: AKASHI Takahiro <takahiro.akashi@linaro.org>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Cc: James Morse <james.morse@arm.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: Suzuki K Poulose <suzuki.poulose@arm.com>
Cc: Will Deacon <will.deacon@arm.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
This is a modification of Mark Rutland's original patch. Guards to check
if CONFIG_THREAD_INFO_IN_TASK is used has been inserted. get_current()
for when CONFIG_THREAD_INFO_IN_TASK is not used has been added to
arch/arm64/include/asm/current.h.

Bug: 38331309
Change-Id: Ic5eae344a7c2baea0864f6ae16be1e9c60c0a74a
(cherry picked from commit c02433dd6de32f042cf3ffe476746b1115b8c096)
Signed-off-by: Zubin Mithra <zsm@google.com>
arch/arm64/Kconfig
arch/arm64/include/asm/current.h [new file with mode: 0644]
arch/arm64/include/asm/smp.h
arch/arm64/include/asm/thread_info.h
arch/arm64/kernel/asm-offsets.c
arch/arm64/kernel/entry.S
arch/arm64/kernel/head.S
arch/arm64/kernel/process.c
arch/arm64/kernel/smp.c

index 35be856..da007c2 100644 (file)
@@ -98,6 +98,7 @@ config ARM64
        select SPARSE_IRQ
        select SYSCTL_EXCEPTION_TRACE
        select HAVE_CONTEXT_TRACKING
+       select THREAD_INFO_IN_TASK
        help
          ARM 64-bit (AArch64) Linux support.
 
diff --git a/arch/arm64/include/asm/current.h b/arch/arm64/include/asm/current.h
new file mode 100644 (file)
index 0000000..2e61d21
--- /dev/null
@@ -0,0 +1,27 @@
+#ifndef __ASM_CURRENT_H
+#define __ASM_CURRENT_H
+
+#include <linux/compiler.h>
+
+#include <asm/sysreg.h>
+
+#ifndef __ASSEMBLY__
+
+#ifdef CONFIG_THREAD_INFO_IN_TASK
+struct task_struct;
+
+static __always_inline struct task_struct *get_current(void)
+{
+       return (struct task_struct *)read_sysreg(sp_el0);
+}
+#define current get_current()
+#else
+#include <linux/thread_info.h>
+#define get_current() (current_thread_info()->task)
+#define current get_current()
+#endif
+
+#endif /* __ASSEMBLY__ */
+
+#endif /* __ASM_CURRENT_H */
+
index d7e851c..a05033b 100644 (file)
@@ -66,6 +66,9 @@ asmlinkage void secondary_start_kernel(void);
  */
 struct secondary_data {
        void *stack;
+#ifdef CONFIG_THREAD_INFO_IN_TASK
+       struct task_struct *task;
+#endif
 };
 extern struct secondary_data secondary_data;
 extern void secondary_entry(void);
index 41e4fd7..ec4f8c0 100644 (file)
@@ -47,14 +47,25 @@ typedef unsigned long mm_segment_t;
 struct thread_info {
        unsigned long           flags;          /* low level flags */
        mm_segment_t            addr_limit;     /* address limit */
+#ifndef CONFIG_THREAD_INFO_IN_TASK
        struct task_struct      *task;          /* main task structure */
+#endif
 #ifdef CONFIG_ARM64_SW_TTBR0_PAN
        u64                     ttbr0;          /* saved TTBR0_EL1 */
 #endif
        int                     preempt_count;  /* 0 => preemptable, <0 => bug */
+#ifndef CONFIG_THREAD_INFO_IN_TASK
        int                     cpu;            /* cpu */
+#endif
 };
 
+#ifdef CONFIG_THREAD_INFO_IN_TASK
+#define INIT_THREAD_INFO(tsk)                                          \
+{                                                                      \
+       .preempt_count  = INIT_PREEMPT_COUNT,                           \
+       .addr_limit     = KERNEL_DS,                                    \
+}
+#else
 #define INIT_THREAD_INFO(tsk)                                          \
 {                                                                      \
        .task           = &tsk,                                         \
@@ -63,8 +74,6 @@ struct thread_info {
        .addr_limit     = KERNEL_DS,                                    \
 }
 
-#define init_stack             (init_thread_union.stack)
-
 /*
  * how to get the thread information struct from C
  */
@@ -81,6 +90,9 @@ static inline struct thread_info *current_thread_info(void)
 
        return (struct thread_info *)sp_el0;
 }
+#endif
+
+#define init_stack             (init_thread_union.stack)
 
 #define thread_saved_pc(tsk)   \
        ((unsigned long)(tsk->thread.cpu_context.pc))
index 66357a4..24e65f0 100644 (file)
@@ -33,12 +33,15 @@ int main(void)
 {
   DEFINE(TSK_ACTIVE_MM,                offsetof(struct task_struct, active_mm));
   BLANK();
+#ifdef CONFIG_THREAD_INFO_IN_TASK
+  DEFINE(TSK_TI_FLAGS,         offsetof(struct task_struct, thread_info.flags));
+  DEFINE(TSK_TI_PREEMPT,       offsetof(struct task_struct, thread_info.preempt_count));
+  DEFINE(TSK_TI_ADDR_LIMIT,    offsetof(struct task_struct, thread_info.addr_limit));
+  DEFINE(TSK_STACK,            offsetof(struct task_struct, stack));
+#else
   DEFINE(TI_FLAGS,             offsetof(struct thread_info, flags));
   DEFINE(TI_PREEMPT,           offsetof(struct thread_info, preempt_count));
   DEFINE(TI_ADDR_LIMIT,                offsetof(struct thread_info, addr_limit));
-#ifndef CONFIG_THREAD_INFO_IN_TASK
-  DEFINE(TI_TASK,              offsetof(struct thread_info, task));
-  DEFINE(TI_CPU,               offsetof(struct thread_info, cpu));
 #endif
 #ifdef CONFIG_ARM64_SW_TTBR0_PAN
   DEFINE(TSK_TI_TTBR0,         offsetof(struct thread_info, ttbr0));
@@ -113,6 +116,11 @@ int main(void)
   DEFINE(TZ_MINWEST,           offsetof(struct timezone, tz_minuteswest));
   DEFINE(TZ_DSTTIME,           offsetof(struct timezone, tz_dsttime));
   BLANK();
+#ifdef CONFIG_THREAD_INFO_IN_TASK
+  DEFINE(CPU_BOOT_STACK,       offsetof(struct secondary_data, stack));
+  DEFINE(CPU_BOOT_TASK,                offsetof(struct secondary_data, task));
+  BLANK();
+#endif
 #ifdef CONFIG_KVM_ARM_HOST
   DEFINE(VCPU_CONTEXT,         offsetof(struct kvm_vcpu, arch.ctxt));
   DEFINE(CPU_GP_REGS,          offsetof(struct kvm_cpu_context, gp_regs));
index ed2df05..dba3ace 100644 (file)
 
        .if     \el == 0
        mrs     x21, sp_el0
+#ifdef CONFIG_THREAD_INFO_IN_TASK
+       ldr_this_cpu    tsk, __entry_task, x20  // Ensure MDSCR_EL1.SS is clear,
+       ldr     x19, [tsk, #TSK_TI_FLAGS]       // since we can unmask debug
+#else
        mov     tsk, sp
        and     tsk, tsk, #~(THREAD_SIZE - 1)   // Ensure MDSCR_EL1.SS is clear,
        ldr     x19, [tsk, #TI_FLAGS]           // since we can unmask debug
+#endif
        disable_step_tsk x19, x20               // exceptions when scheduling.
 
        mov     x29, xzr                        // fp pointed to user-space
        add     x21, sp, #S_FRAME_SIZE
        get_thread_info tsk
        /* Save the task's original addr_limit and set USER_DS (TASK_SIZE_64) */
+#ifdef CONFIG_THREAD_INFO_IN_TASK
+       ldr     x20, [tsk, #TSK_TI_ADDR_LIMIT]
+#else
        ldr     x20, [tsk, #TI_ADDR_LIMIT]
+#endif
        str     x20, [sp, #S_ORIG_ADDR_LIMIT]
        mov     x20, #TASK_SIZE_64
+#ifdef CONFIG_THREAD_INFO_IN_TASK
+       str     x20, [tsk, #TSK_TI_ADDR_LIMIT]
+#else
        str     x20, [tsk, #TI_ADDR_LIMIT]
+#endif
        ALTERNATIVE(nop, SET_PSTATE_UAO(0), ARM64_HAS_UAO, CONFIG_ARM64_UAO)
        .endif /* \el == 0 */
        mrs     x22, elr_el1
@@ -168,7 +181,11 @@ alternative_else_nop_endif
        .if     \el != 0
        /* Restore the task's original addr_limit. */
        ldr     x20, [sp, #S_ORIG_ADDR_LIMIT]
+#ifdef CONFIG_THREAD_INFO_IN_TASK
+       str     x20, [tsk, #TSK_TI_ADDR_LIMIT]
+#else
        str     x20, [tsk, #TI_ADDR_LIMIT]
+#endif
 
        /* No need to restore UAO, it will be restored from SPSR_EL1 */
        .endif
@@ -258,13 +275,20 @@ alternative_endif
        mov     x19, sp                 // preserve the original sp
 
        /*
-        * Compare sp with the current thread_info, if the top
-        * ~(THREAD_SIZE - 1) bits match, we are on a task stack, and
-        * should switch to the irq stack.
+        * Compare sp with the base of the task stack.
+        * If the top ~(THREAD_SIZE - 1) bits match, we are on a task stack,
+        * and should switch to the irq stack.
         */
+#ifdef CONFIG_THREAD_INFO_IN_TASK
+       ldr     x25, [tsk, TSK_STACK]
+       eor     x25, x25, x19
+       and     x25, x25, #~(THREAD_SIZE - 1)
+       cbnz    x25, 9998f
+#else
        and     x25, x19, #~(THREAD_SIZE - 1)
        cmp     x25, tsk
        b.ne    9998f
+#endif
 
        adr_this_cpu x25, irq_stack, x26
        mov     x26, #IRQ_STACK_START_SP
@@ -493,9 +517,17 @@ el1_irq:
        irq_handler
 
 #ifdef CONFIG_PREEMPT
+#ifdef CONFIG_THREAD_INFO_IN_TASK
+       ldr     w24, [tsk, #TSK_TI_PREEMPT]     // get preempt count
+#else
        ldr     w24, [tsk, #TI_PREEMPT]         // get preempt count
+#endif
        cbnz    w24, 1f                         // preempt count != 0
+#ifdef CONFIG_THREAD_INFO_IN_TASK
+       ldr     x0, [tsk, #TSK_TI_FLAGS]        // get flags
+#else
        ldr     x0, [tsk, #TI_FLAGS]            // get flags
+#endif
        tbz     x0, #TIF_NEED_RESCHED, 1f       // needs rescheduling?
        bl      el1_preempt
 1:
@@ -510,7 +542,11 @@ ENDPROC(el1_irq)
 el1_preempt:
        mov     x24, lr
 1:     bl      preempt_schedule_irq            // irq en/disable is done inside
+#ifdef CONFIG_THREAD_INFO_IN_TASK
+       ldr     x0, [tsk, #TSK_TI_FLAGS]        // get new tasks TI_FLAGS
+#else
        ldr     x0, [tsk, #TI_FLAGS]            // get new tasks TI_FLAGS
+#endif
        tbnz    x0, #TIF_NEED_RESCHED, 1b       // needs rescheduling?
        ret     x24
 #endif
@@ -730,8 +766,12 @@ ENTRY(cpu_switch_to)
        ldp     x29, x9, [x8], #16
        ldr     lr, [x8]
        mov     sp, x9
+#ifdef CONFIG_THREAD_INFO_IN_TASK
+       msr     sp_el0, x1
+#else
        and     x9, x9, #~(THREAD_SIZE - 1)
        msr     sp_el0, x9
+#endif
        ret
 ENDPROC(cpu_switch_to)
 
@@ -742,7 +782,11 @@ ENDPROC(cpu_switch_to)
 ret_fast_syscall:
        disable_irq                             // disable interrupts
        str     x0, [sp, #S_X0]                 // returned x0
+#ifdef CONFIG_THREAD_INFO_IN_TASK
+       ldr     x1, [tsk, #TSK_TI_FLAGS]        // re-check for syscall tracing
+#else
        ldr     x1, [tsk, #TI_FLAGS]            // re-check for syscall tracing
+#endif
        and     x2, x1, #_TIF_SYSCALL_WORK
        cbnz    x2, ret_fast_syscall_trace
        and     x2, x1, #_TIF_WORK_MASK
@@ -774,7 +818,11 @@ work_resched:
  */
 ret_to_user:
        disable_irq                             // disable interrupts
+#ifdef CONFIG_THREAD_INFO_IN_TASK
+       ldr     x1, [tsk, #TSK_TI_FLAGS]
+#else
        ldr     x1, [tsk, #TI_FLAGS]
+#endif
        and     x2, x1, #_TIF_WORK_MASK
        cbnz    x2, work_pending
        enable_step_tsk x1, x2
@@ -806,7 +854,11 @@ el0_svc_naked:                                     // compat entry point
        enable_dbg_and_irq
        ct_user_exit 1
 
+#ifdef CONFIG_THREAD_INFO_IN_TASK
+       ldr     x16, [tsk, #TSK_TI_FLAGS]       // check for syscall hooks
+#else
        ldr     x16, [tsk, #TI_FLAGS]           // check for syscall hooks
+#endif
        tst     x16, #_TIF_SYSCALL_WORK
        b.ne    __sys_trace
        cmp     scno, sc_nr                     // check upper syscall limit
index 9971039..16d0820 100644 (file)
@@ -424,6 +424,7 @@ kernel_img_size:
        .set    initial_sp, init_thread_union + THREAD_START_SP
 __mmap_switched:
        mov     x28, lr                         // preserve LR
+
        adr_l   x8, vectors                     // load VBAR_EL1 with virtual
        msr     vbar_el1, x8                    // vector table address
        isb
@@ -474,10 +475,18 @@ __mmap_switched:
        dsb     sy                              // with MMU off
 #endif
 
+#ifdef CONFIG_THREAD_INFO_IN_TASK
+        adrp    x4, init_thread_union
+        add     sp, x4, #THREAD_SIZE
+        adr_l   x5, init_task
+        msr     sp_el0, x5                      // Save thread_info
+#else
        adr_l   sp, initial_sp, x4
        mov     x4, sp
        and     x4, x4, #~(THREAD_SIZE - 1)
        msr     sp_el0, x4                      // Save thread_info
+#endif
+
        str_l   x21, __fdt_pointer, x5          // Save FDT pointer
 
        ldr_l   x4, kimage_vaddr                // Save the offset between
@@ -689,11 +698,18 @@ ENTRY(__secondary_switched)
        adr_l   x5, vectors
        msr     vbar_el1, x5
        isb
-
+#ifdef CONFIG_THREAD_INFO_IN_TASK
+       adr_l   x0, secondary_data
+       ldr     x1, [x0, #CPU_BOOT_STACK]       // get secondary_data.stack
+       mov     sp, x1
+       ldr     x2, [x0, #CPU_BOOT_TASK]
+       msr     sp_el0, x2
+#else
        ldr_l   x0, secondary_data              // get secondary_data.stack
        mov     sp, x0
        and     x0, x0, #~(THREAD_SIZE - 1)
        msr     sp_el0, x0                      // save thread_info
+#endif
        mov     x29, #0
        b       secondary_start_kernel
 ENDPROC(__secondary_switched)
index 75dac2c..e34bcf3 100644 (file)
@@ -45,6 +45,9 @@
 #include <linux/personality.h>
 #include <linux/notifier.h>
 #include <trace/events/power.h>
+#ifdef CONFIG_THREAD_INFO_IN_TASK
+#include <linux/percpu.h>
+#endif
 
 #include <asm/alternative.h>
 #include <asm/compat.h>
@@ -390,6 +393,22 @@ void uao_thread_switch(struct task_struct *next)
        }
 }
 
+#ifdef CONFIG_THREAD_INFO_IN_TASK
+/*
+ * We store our current task in sp_el0, which is clobbered by userspace. Keep a
+ * shadow copy so that we can restore this upon entry from userspace.
+ *
+ * This is *only* for exception entry from EL0, and is not valid until we
+ * __switch_to() a user task.
+ */
+DEFINE_PER_CPU(struct task_struct *, __entry_task);
+
+static void entry_task_switch(struct task_struct *next)
+{
+       __this_cpu_write(__entry_task, next);
+}
+#endif
+
 /*
  * Thread switching.
  */
@@ -402,6 +421,9 @@ struct task_struct *__switch_to(struct task_struct *prev,
        tls_thread_switch(next);
        hw_breakpoint_thread_switch(next);
        contextidr_thread_switch(next);
+#ifdef CONFIG_THREAD_INFO_IN_TASK
+       entry_task_switch(next);
+#endif
        uao_thread_switch(next);
 
        /*
index 0b66134..ac899ac 100644 (file)
@@ -98,6 +98,9 @@ int __cpu_up(unsigned int cpu, struct task_struct *idle)
         * We need to tell the secondary core where to find its stack and the
         * page tables.
         */
+#ifdef CONFIG_THREAD_INFO_IN_TASK
+       secondary_data.task = idle;
+#endif
        secondary_data.stack = task_stack_page(idle) + THREAD_START_SP;
        __flush_dcache_area(&secondary_data, sizeof(secondary_data));
 
@@ -121,6 +124,9 @@ int __cpu_up(unsigned int cpu, struct task_struct *idle)
                pr_err("CPU%u: failed to boot: %d\n", cpu, ret);
        }
 
+#ifdef CONFIG_THREAD_INFO_IN_TASK
+       secondary_data.task = NULL;
+#endif
        secondary_data.stack = NULL;
 
        return ret;