Merge 4.4.179 into android-4.4-p

[sagit-ice-cold/kernel_xiaomi_msm8998.git] / arch / arm64 / mm / fault.c
diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c

index 8648541..5a3117c 100644 (file)
--- a/arch/arm64/mm/fault.c
+++ b/arch/arm64/mm/fault.c
@@ -107,32 +107,38 @@ int ptep_set_access_flags(struct vm_area_struct *vma,
         /* only preserve the access flags and write permission */
         pte_val(entry) &= PTE_AF | PTE_WRITE | PTE_DIRTY;
  
-       /*
-        * PTE_RDONLY is cleared by default in the asm below, so set it in
-        * back if necessary (read-only or clean PTE).
-        */
+       /* set PTE_RDONLY if actual read-only or clean PTE */
         if (!pte_write(entry) || !pte_sw_dirty(entry))
                 pte_val(entry) |= PTE_RDONLY;
  
         /*
          * Setting the flags must be done atomically to avoid racing with the
-        * hardware update of the access/dirty state.
+        * hardware update of the access/dirty state. The PTE_RDONLY bit must
+        * be set to the most permissive (lowest value) of *ptep and entry
+        * (calculated as: a & b == ~(~a | ~b)).
          */
+       pte_val(entry) ^= PTE_RDONLY;
         asm volatile("//        ptep_set_access_flags\n"
         "       prfm    pstl1strm, %2\n"
         "1:     ldxr    %0, %2\n"
-       "       and     %0, %0, %3              // clear PTE_RDONLY\n"
+       "       eor     %0, %0, %3              // negate PTE_RDONLY in *ptep\n"
         "       orr     %0, %0, %4              // set flags\n"
+       "       eor     %0, %0, %3              // negate final PTE_RDONLY\n"
         "       stxr    %w1, %0, %2\n"
         "       cbnz    %w1, 1b\n"
         : "=&r" (old_pteval), "=&r" (tmp), "+Q" (pte_val(*ptep))
-       : "L" (~PTE_RDONLY), "r" (pte_val(entry)));
+       : "L" (PTE_RDONLY), "r" (pte_val(entry)));
  
         flush_tlb_fix_spurious_fault(vma, address);
         return 1;
  }
  #endif
  
+static bool is_el1_instruction_abort(unsigned int esr)
+{
+       return ESR_ELx_EC(esr) == ESR_ELx_EC_IABT_CUR;
+}
+
  /*
   * The kernel tried to access some page that wasn't present.
   */
@@ -141,8 +147,9 @@ static void __do_kernel_fault(struct mm_struct *mm, unsigned long addr,
  {
         /*
          * Are we prepared to handle this kernel fault?
+        * We are almost certainly not prepared to handle instruction faults.
          */
-       if (fixup_exception(regs))
+       if (!is_el1_instruction_abort(esr) && fixup_exception(regs))
                 return;
  
         /*
@@ -204,8 +211,6 @@ static void do_bad_area(unsigned long addr, unsigned int esr, struct pt_regs *re
  #define VM_FAULT_BADMAP                0x010000
  #define VM_FAULT_BADACCESS     0x020000
  
-#define ESR_LNX_EXEC           (1 << 24)
-
  static int __do_page_fault(struct mm_struct *mm, unsigned long addr,
                            unsigned int mm_flags, unsigned long vm_flags,
                            struct task_struct *tsk)
@@ -244,6 +249,26 @@ out:
         return fault;
  }
  
+static inline bool is_permission_fault(unsigned int esr, struct pt_regs *regs)
+{
+       unsigned int ec       = ESR_ELx_EC(esr);
+       unsigned int fsc_type = esr & ESR_ELx_FSC_TYPE;
+
+       if (ec != ESR_ELx_EC_DABT_CUR && ec != ESR_ELx_EC_IABT_CUR)
+               return false;
+
+       if (system_uses_ttbr0_pan())
+               return fsc_type == ESR_ELx_FSC_FAULT &&
+                       (regs->pstate & PSR_PAN_BIT);
+       else
+               return fsc_type == ESR_ELx_FSC_PERM;
+}
+
+static bool is_el0_instruction_abort(unsigned int esr)
+{
+       return ESR_ELx_EC(esr) == ESR_ELx_EC_IABT_LOW;
+}
+
  static int __kprobes do_page_fault(unsigned long addr, unsigned int esr,
                                    struct pt_regs *regs)
  {
@@ -270,19 +295,24 @@ static int __kprobes do_page_fault(unsigned long addr, unsigned int esr,
         if (user_mode(regs))
                 mm_flags |= FAULT_FLAG_USER;
  
-       if (esr & ESR_LNX_EXEC) {
+       if (is_el0_instruction_abort(esr)) {
                 vm_flags = VM_EXEC;
         } else if ((esr & ESR_ELx_WNR) && !(esr & ESR_ELx_CM)) {
                 vm_flags = VM_WRITE;
                 mm_flags |= FAULT_FLAG_WRITE;
         }
  
-       /*
-        * PAN bit set implies the fault happened in kernel space, but not
-        * in the arch's user access functions.
-        */
-       if (IS_ENABLED(CONFIG_ARM64_PAN) && (regs->pstate & PSR_PAN_BIT))
-               goto no_context;
+       if (addr < USER_DS && is_permission_fault(esr, regs)) {
+               /* regs->orig_addr_limit may be 0 if we entered from EL0 */
+               if (regs->orig_addr_limit == KERNEL_DS)
+                       die("Accessing user space memory with fs=KERNEL_DS", regs, esr);
+
+               if (is_el1_instruction_abort(esr))
+                       die("Attempting to execute userspace memory", regs, esr);
+
+               if (!search_exception_tables(regs->pc))
+                       die("Accessing user space memory outside uaccess.h routines", regs, esr);
+       }
  
         /*
          * As per x86, we may deadlock here. However, since the kernel only
@@ -434,7 +464,7 @@ static int do_bad(unsigned long addr, unsigned int esr, struct pt_regs *regs)
         return 1;
  }
  
-static struct fault_info {
+static const struct fault_info {
         int     (*fn)(unsigned long addr, unsigned int esr, struct pt_regs *regs);
         int     sig;
         int     code;
@@ -588,26 +618,40 @@ void __init hook_debug_fault_code(int nr,
         debug_fault_info[nr].name       = name;
  }
  
-asmlinkage int __exception do_debug_exception(unsigned long addr,
+asmlinkage int __exception do_debug_exception(unsigned long addr_if_watchpoint,
                                               unsigned int esr,
                                               struct pt_regs *regs)
  {
         const struct fault_info *inf = debug_fault_info + DBG_ESR_EVT(esr);
+       unsigned long pc = instruction_pointer(regs);
         struct siginfo info;
+       int rv;
  
-       if (!inf->fn(addr, esr, regs))
-               return 1;
+       /*
+        * Tell lockdep we disabled irqs in entry.S. Do nothing if they were
+        * already disabled to preserve the last enabled/disabled addresses.
+        */
+       if (interrupts_enabled(regs))
+               trace_hardirqs_off();
  
-       pr_alert("Unhandled debug exception: %s (0x%08x) at 0x%016lx\n",
-                inf->name, esr, addr);
+       if (!inf->fn(addr_if_watchpoint, esr, regs)) {
+               rv = 1;
+       } else {
+               pr_alert("Unhandled debug exception: %s (0x%08x) at 0x%016lx\n",
+                        inf->name, esr, pc);
+
+               info.si_signo = inf->sig;
+               info.si_errno = 0;
+               info.si_code  = inf->code;
+               info.si_addr  = (void __user *)pc;
+               arm64_notify_die("", regs, &info, 0);
+               rv = 0;
+       }
  
-       info.si_signo = inf->sig;
-       info.si_errno = 0;
-       info.si_code  = inf->code;
-       info.si_addr  = (void __user *)addr;
-       arm64_notify_die("", regs, &info, 0);
+       if (interrupts_enabled(regs))
+               trace_hardirqs_on();
  
-       return 0;
+       return rv;
  }
  
  #ifdef CONFIG_ARM64_PAN
@@ -624,3 +668,17 @@ int cpu_enable_pan(void *__unused)
         return 0;
  }
  #endif /* CONFIG_ARM64_PAN */
+
+#ifdef CONFIG_ARM64_UAO
+/*
+ * Kernel threads have fs=KERNEL_DS by default, and don't need to call
+ * set_fs(), devtmpfs in particular relies on this behaviour.
+ * We need to enable the feature at runtime (instead of adding it to
+ * PSR_MODE_EL1h) as the feature may not be implemented by the cpu.
+ */
+int cpu_enable_uao(void *__unused)
+{
+       asm(SET_PSTATE_UAO(1));
+       return 0;
+}
+#endif /* CONFIG_ARM64_UAO */