arch/arm/mm/fault.c

   1 // SPDX-License-Identifier: GPL-2.0-only
   2 /*
   3  *  linux/arch/arm/mm/fault.c
   4  *
   5  *  Copyright (C) 1995  Linus Torvalds
   6  *  Modifications for ARM processor (c) 1995-2004 Russell King
   7  */
   8 #include <linux/extable.h>
   9 #include <linux/signal.h>
  10 #include <linux/mm.h>
  11 #include <linux/hardirq.h>
  12 #include <linux/init.h>
  13 #include <linux/kprobes.h>
  14 #include <linux/uaccess.h>
  15 #include <linux/page-flags.h>
  16 #include <linux/sched/signal.h>
  17 #include <linux/sched/debug.h>
  18 #include <linux/highmem.h>
  19 #include <linux/perf_event.h>
  20
  21 #include <asm/pgtable.h>
  22 #include <asm/system_misc.h>
  23 #include <asm/system_info.h>
  24 #include <asm/tlbflush.h>
  25
  26 #include "fault.h"
  27
  28 #ifdef CONFIG_MMU
  29
  30 /*
  31  * This is useful to dump out the page tables associated with
  32  * 'addr' in mm 'mm'.
  33  */
  34 void show_pte(const char *lvl, struct mm_struct *mm, unsigned long addr)
  35 {
  36         pgd_t *pgd;
  37
  38         if (!mm)
  39                 mm = &init_mm;
  40
  41         printk("%spgd = %p\n", lvl, mm->pgd);
  42         pgd = pgd_offset(mm, addr);
  43         printk("%s[%08lx] *pgd=%08llx", lvl, addr, (long long)pgd_val(*pgd));
  44
  45         do {
  46                 pud_t *pud;
  47                 pmd_t *pmd;
  48                 pte_t *pte;
  49
  50                 if (pgd_none(*pgd))
  51                         break;
  52
  53                 if (pgd_bad(*pgd)) {
  54                         pr_cont("(bad)");
  55                         break;
  56                 }
  57
  58                 pud = pud_offset(pgd, addr);
  59                 if (PTRS_PER_PUD != 1)
  60                         pr_cont(", *pud=%08llx", (long long)pud_val(*pud));
  61
  62                 if (pud_none(*pud))
  63                         break;
  64
  65                 if (pud_bad(*pud)) {
  66                         pr_cont("(bad)");
  67                         break;
  68                 }
  69
  70                 pmd = pmd_offset(pud, addr);
  71                 if (PTRS_PER_PMD != 1)
  72                         pr_cont(", *pmd=%08llx", (long long)pmd_val(*pmd));
  73
  74                 if (pmd_none(*pmd))
  75                         break;
  76
  77                 if (pmd_bad(*pmd)) {
  78                         pr_cont("(bad)");
  79                         break;
  80                 }
  81
  82                 /* We must not map this if we have highmem enabled */
  83                 if (PageHighMem(pfn_to_page(pmd_val(*pmd) >> PAGE_SHIFT)))
  84                         break;
  85
  86                 pte = pte_offset_map(pmd, addr);
  87                 pr_cont(", *pte=%08llx", (long long)pte_val(*pte));
  88 #ifndef CONFIG_ARM_LPAE
  89                 pr_cont(", *ppte=%08llx",
  90                        (long long)pte_val(pte[PTE_HWTABLE_PTRS]));
  91 #endif
  92                 pte_unmap(pte);
  93         } while(0);
  94
  95         pr_cont("\n");
  96 }
  97 #else                                   /* CONFIG_MMU */
  98 void show_pte(const char *lvl, struct mm_struct *mm, unsigned long addr)
  99 { }
 100 #endif                                  /* CONFIG_MMU */
 101
 102 /*
 103  * Oops.  The kernel tried to access some page that wasn't present.
 104  */
 105 static void
 106 __do_kernel_fault(struct mm_struct *mm, unsigned long addr, unsigned int fsr,
 107                   struct pt_regs *regs)
 108 {
 109         /*
 110          * Are we prepared to handle this kernel fault?
 111          */
 112         if (fixup_exception(regs))
 113                 return;
 114
 115         /*
 116          * No handler, we'll have to terminate things with extreme prejudice.
 117          */
 118         bust_spinlocks(1);
 119         pr_alert("8<--- cut here ---\n");
 120         pr_alert("Unable to handle kernel %s at virtual address %08lx\n",
 121                  (addr < PAGE_SIZE) ? "NULL pointer dereference" :
 122                  "paging request", addr);
 123
 124         show_pte(KERN_ALERT, mm, addr);
 125         die("Oops", regs, fsr);
 126         bust_spinlocks(0);
 127         do_exit(SIGKILL);
 128 }
 129
 130 /*
 131  * Something tried to access memory that isn't in our memory map..
 132  * User mode accesses just cause a SIGSEGV
 133  */
 134 static void
 135 __do_user_fault(unsigned long addr, unsigned int fsr, unsigned int sig,
 136                 int code, struct pt_regs *regs)
 137 {
 138         struct task_struct *tsk = current;
 139
 140         if (addr > TASK_SIZE)
 141                 harden_branch_predictor();
 142
 143 #ifdef CONFIG_DEBUG_USER
 144         if (((user_debug & UDBG_SEGV) && (sig == SIGSEGV)) ||
 145             ((user_debug & UDBG_BUS)  && (sig == SIGBUS))) {
 146                 pr_err("8<--- cut here ---\n");
 147                 pr_err("%s: unhandled page fault (%d) at 0x%08lx, code 0x%03x\n",
 148                        tsk->comm, sig, addr, fsr);
 149                 show_pte(KERN_ERR, tsk->mm, addr);
 150                 show_regs(regs);
 151         }
 152 #endif
 153 #ifndef CONFIG_KUSER_HELPERS
 154         if ((sig == SIGSEGV) && ((addr & PAGE_MASK) == 0xffff0000))
 155                 printk_ratelimited(KERN_DEBUG
 156                                    "%s: CONFIG_KUSER_HELPERS disabled at 0x%08lx\n",
 157                                    tsk->comm, addr);
 158 #endif
 159
 160         tsk->thread.address = addr;
 161         tsk->thread.error_code = fsr;
 162         tsk->thread.trap_no = 14;
 163         force_sig_fault(sig, code, (void __user *)addr);
 164 }
 165
 166 void do_bad_area(unsigned long addr, unsigned int fsr, struct pt_regs *regs)
 167 {
 168         struct task_struct *tsk = current;
 169         struct mm_struct *mm = tsk->active_mm;
 170
 171         /*
 172          * If we are in kernel mode at this point, we
 173          * have no context to handle this fault with.
 174          */
 175         if (user_mode(regs))
 176                 __do_user_fault(addr, fsr, SIGSEGV, SEGV_MAPERR, regs);
 177         else
 178                 __do_kernel_fault(mm, addr, fsr, regs);
 179 }
 180
 181 #ifdef CONFIG_MMU
 182 #define VM_FAULT_BADMAP         0x010000
 183 #define VM_FAULT_BADACCESS      0x020000
 184
 185 /*
 186  * Check that the permissions on the VMA allow for the fault which occurred.
 187  * If we encountered a write fault, we must have write permission, otherwise
 188  * we allow any permission.
 189  */
 190 static inline bool access_error(unsigned int fsr, struct vm_area_struct *vma)
 191 {
 192         unsigned int mask = VM_READ | VM_WRITE | VM_EXEC;
 193
 194         if ((fsr & FSR_WRITE) && !(fsr & FSR_CM))
 195                 mask = VM_WRITE;
 196         if (fsr & FSR_LNX_PF)
 197                 mask = VM_EXEC;
 198
 199         return vma->vm_flags & mask ? false : true;
 200 }
 201
 202 static vm_fault_t __kprobes
 203 __do_page_fault(struct mm_struct *mm, unsigned long addr, unsigned int fsr,
 204                 unsigned int flags, struct task_struct *tsk)
 205 {
 206         struct vm_area_struct *vma;
 207         vm_fault_t fault;
 208
 209         vma = find_vma(mm, addr);
 210         fault = VM_FAULT_BADMAP;
 211         if (unlikely(!vma))
 212                 goto out;
 213         if (unlikely(vma->vm_start > addr))
 214                 goto check_stack;
 215
 216         /*
 217          * Ok, we have a good vm_area for this
 218          * memory access, so we can handle it.
 219          */
 220 good_area:
 221         if (access_error(fsr, vma)) {
 222                 fault = VM_FAULT_BADACCESS;
 223                 goto out;
 224         }
 225
 226         return handle_mm_fault(vma, addr & PAGE_MASK, flags);
 227
 228 check_stack:
 229         /* Don't allow expansion below FIRST_USER_ADDRESS */
 230         if (vma->vm_flags & VM_GROWSDOWN &&
 231             addr >= FIRST_USER_ADDRESS && !expand_stack(vma, addr))
 232                 goto good_area;
 233 out:
 234         return fault;
 235 }
 236
 237 static int __kprobes
 238 do_page_fault(unsigned long addr, unsigned int fsr, struct pt_regs *regs)
 239 {
 240         struct task_struct *tsk;
 241         struct mm_struct *mm;
 242         int sig, code;
 243         vm_fault_t fault;
 244         unsigned int flags = FAULT_FLAG_DEFAULT;
 245
 246         if (kprobe_page_fault(regs, fsr))
 247                 return 0;
 248
 249         tsk = current;
 250         mm  = tsk->mm;
 251
 252         /* Enable interrupts if they were enabled in the parent context. */
 253         if (interrupts_enabled(regs))
 254                 local_irq_enable();
 255
 256         /*
 257          * If we're in an interrupt or have no user
 258          * context, we must not take the fault..
 259          */
 260         if (faulthandler_disabled() || !mm)
 261                 goto no_context;
 262
 263         if (user_mode(regs))
 264                 flags |= FAULT_FLAG_USER;
 265         if ((fsr & FSR_WRITE) && !(fsr & FSR_CM))
 266                 flags |= FAULT_FLAG_WRITE;
 267
 268         /*
 269          * As per x86, we may deadlock here.  However, since the kernel only
 270          * validly references user space from well defined areas of the code,
 271          * we can bug out early if this is from code which shouldn't.
 272          */
 273         if (!down_read_trylock(&mm->mmap_sem)) {
 274                 if (!user_mode(regs) && !search_exception_tables(regs->ARM_pc))
 275                         goto no_context;
 276 retry:
 277                 down_read(&mm->mmap_sem);
 278         } else {
 279                 /*
 280                  * The above down_read_trylock() might have succeeded in
 281                  * which case, we'll have missed the might_sleep() from
 282                  * down_read()
 283                  */
 284                 might_sleep();
 285 #ifdef CONFIG_DEBUG_VM
 286                 if (!user_mode(regs) &&
 287                     !search_exception_tables(regs->ARM_pc))
 288                         goto no_context;
 289 #endif
 290         }
 291
 292         fault = __do_page_fault(mm, addr, fsr, flags, tsk);
 293
 294         /* If we need to retry but a fatal signal is pending, handle the
 295          * signal first. We do not need to release the mmap_sem because
 296          * it would already be released in __lock_page_or_retry in
 297          * mm/filemap.c. */
 298         if (fault_signal_pending(fault, regs)) {
 299                 if (!user_mode(regs))
 300                         goto no_context;
 301                 return 0;
 302         }
 303
 304         /*
 305          * Major/minor page fault accounting is only done on the
 306          * initial attempt. If we go through a retry, it is extremely
 307          * likely that the page will be found in page cache at that point.
 308          */
 309
 310         perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, addr);
 311         if (!(fault & VM_FAULT_ERROR) && flags & FAULT_FLAG_ALLOW_RETRY) {
 312                 if (fault & VM_FAULT_MAJOR) {
 313                         tsk->maj_flt++;
 314                         perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1,
 315                                         regs, addr);
 316                 } else {
 317                         tsk->min_flt++;
 318                         perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1,
 319                                         regs, addr);
 320                 }
 321                 if (fault & VM_FAULT_RETRY) {
 322                         flags |= FAULT_FLAG_TRIED;
 323                         goto retry;
 324                 }
 325         }
 326
 327         up_read(&mm->mmap_sem);
 328
 329         /*
 330          * Handle the "normal" case first - VM_FAULT_MAJOR
 331          */
 332         if (likely(!(fault & (VM_FAULT_ERROR | VM_FAULT_BADMAP | VM_FAULT_BADACCESS))))
 333                 return 0;
 334
 335         /*
 336          * If we are in kernel mode at this point, we
 337          * have no context to handle this fault with.
 338          */
 339         if (!user_mode(regs))
 340                 goto no_context;
 341
 342         if (fault & VM_FAULT_OOM) {
 343                 /*
 344                  * We ran out of memory, call the OOM killer, and return to
 345                  * userspace (which will retry the fault, or kill us if we
 346                  * got oom-killed)
 347                  */
 348                 pagefault_out_of_memory();
 349                 return 0;
 350         }
 351
 352         if (fault & VM_FAULT_SIGBUS) {
 353                 /*
 354                  * We had some memory, but were unable to
 355                  * successfully fix up this page fault.
 356                  */
 357                 sig = SIGBUS;
 358                 code = BUS_ADRERR;
 359         } else {
 360                 /*
 361                  * Something tried to access memory that
 362                  * isn't in our memory map..
 363                  */
 364                 sig = SIGSEGV;
 365                 code = fault == VM_FAULT_BADACCESS ?
 366                         SEGV_ACCERR : SEGV_MAPERR;
 367         }
 368
 369         __do_user_fault(addr, fsr, sig, code, regs);
 370         return 0;
 371
 372 no_context:
 373         __do_kernel_fault(mm, addr, fsr, regs);
 374         return 0;
 375 }
 376 #else                                   /* CONFIG_MMU */
 377 static int
 378 do_page_fault(unsigned long addr, unsigned int fsr, struct pt_regs *regs)
 379 {
 380         return 0;
 381 }
 382 #endif                                  /* CONFIG_MMU */
 383
 384 /*
 385  * First Level Translation Fault Handler
 386  *
 387  * We enter here because the first level page table doesn't contain
 388  * a valid entry for the address.
 389  *
 390  * If the address is in kernel space (>= TASK_SIZE), then we are
 391  * probably faulting in the vmalloc() area.
 392  *
 393  * If the init_task's first level page tables contains the relevant
 394  * entry, we copy the it to this task.  If not, we send the process
 395  * a signal, fixup the exception, or oops the kernel.
 396  *
 397  * NOTE! We MUST NOT take any locks for this case. We may be in an
 398  * interrupt or a critical region, and should only copy the information
 399  * from the master page table, nothing more.
 400  */
 401 #ifdef CONFIG_MMU
 402 static int __kprobes
 403 do_translation_fault(unsigned long addr, unsigned int fsr,
 404                      struct pt_regs *regs)
 405 {
 406         unsigned int index;
 407         pgd_t *pgd, *pgd_k;
 408         pud_t *pud, *pud_k;
 409         pmd_t *pmd, *pmd_k;
 410
 411         if (addr < TASK_SIZE)
 412                 return do_page_fault(addr, fsr, regs);
 413
 414         if (user_mode(regs))
 415                 goto bad_area;
 416
 417         index = pgd_index(addr);
 418
 419         pgd = cpu_get_pgd() + index;
 420         pgd_k = init_mm.pgd + index;
 421
 422         if (pgd_none(*pgd_k))
 423                 goto bad_area;
 424         if (!pgd_present(*pgd))
 425                 set_pgd(pgd, *pgd_k);
 426
 427         pud = pud_offset(pgd, addr);
 428         pud_k = pud_offset(pgd_k, addr);
 429
 430         if (pud_none(*pud_k))
 431                 goto bad_area;
 432         if (!pud_present(*pud))
 433                 set_pud(pud, *pud_k);
 434
 435         pmd = pmd_offset(pud, addr);
 436         pmd_k = pmd_offset(pud_k, addr);
 437
 438 #ifdef CONFIG_ARM_LPAE
 439         /*
 440          * Only one hardware entry per PMD with LPAE.
 441          */
 442         index = 0;
 443 #else
 444         /*
 445          * On ARM one Linux PGD entry contains two hardware entries (see page
 446          * tables layout in pgtable.h). We normally guarantee that we always
 447          * fill both L1 entries. But create_mapping() doesn't follow the rule.
 448          * It can create inidividual L1 entries, so here we have to call
 449          * pmd_none() check for the entry really corresponded to address, not
 450          * for the first of pair.
 451          */
 452         index = (addr >> SECTION_SHIFT) & 1;
 453 #endif
 454         if (pmd_none(pmd_k[index]))
 455                 goto bad_area;
 456
 457         copy_pmd(pmd, pmd_k);
 458         return 0;
 459
 460 bad_area:
 461         do_bad_area(addr, fsr, regs);
 462         return 0;
 463 }
 464 #else                                   /* CONFIG_MMU */
 465 static int
 466 do_translation_fault(unsigned long addr, unsigned int fsr,
 467                      struct pt_regs *regs)
 468 {
 469         return 0;
 470 }
 471 #endif                                  /* CONFIG_MMU */
 472
 473 /*
 474  * Some section permission faults need to be handled gracefully.
 475  * They can happen due to a __{get,put}_user during an oops.
 476  */
 477 #ifndef CONFIG_ARM_LPAE
 478 static int
 479 do_sect_fault(unsigned long addr, unsigned int fsr, struct pt_regs *regs)
 480 {
 481         do_bad_area(addr, fsr, regs);
 482         return 0;
 483 }
 484 #endif /* CONFIG_ARM_LPAE */
 485
 486 /*
 487  * This abort handler always returns "fault".
 488  */
 489 static int
 490 do_bad(unsigned long addr, unsigned int fsr, struct pt_regs *regs)
 491 {
 492         return 1;
 493 }
 494
 495 struct fsr_info {
 496         int     (*fn)(unsigned long addr, unsigned int fsr, struct pt_regs *regs);
 497         int     sig;
 498         int     code;
 499         const char *name;
 500 };
 501
 502 /* FSR definition */
 503 #ifdef CONFIG_ARM_LPAE
 504 #include "fsr-3level.c"
 505 #else
 506 #include "fsr-2level.c"
 507 #endif
 508
 509 void __init
 510 hook_fault_code(int nr, int (*fn)(unsigned long, unsigned int, struct pt_regs *),
 511                 int sig, int code, const char *name)
 512 {
 513         if (nr < 0 || nr >= ARRAY_SIZE(fsr_info))
 514                 BUG();
 515
 516         fsr_info[nr].fn   = fn;
 517         fsr_info[nr].sig  = sig;
 518         fsr_info[nr].code = code;
 519         fsr_info[nr].name = name;
 520 }
 521
 522 /*
 523  * Dispatch a data abort to the relevant handler.
 524  */
 525 asmlinkage void
 526 do_DataAbort(unsigned long addr, unsigned int fsr, struct pt_regs *regs)
 527 {
 528         const struct fsr_info *inf = fsr_info + fsr_fs(fsr);
 529
 530         if (!inf->fn(addr, fsr & ~FSR_LNX_PF, regs))
 531                 return;
 532
 533         pr_alert("8<--- cut here ---\n");
 534         pr_alert("Unhandled fault: %s (0x%03x) at 0x%08lx\n",
 535                 inf->name, fsr, addr);
 536         show_pte(KERN_ALERT, current->mm, addr);
 537
 538         arm_notify_die("", regs, inf->sig, inf->code, (void __user *)addr,
 539                        fsr, 0);
 540 }
 541
 542 void __init
 543 hook_ifault_code(int nr, int (*fn)(unsigned long, unsigned int, struct pt_regs *),
 544                  int sig, int code, const char *name)
 545 {
 546         if (nr < 0 || nr >= ARRAY_SIZE(ifsr_info))
 547                 BUG();
 548
 549         ifsr_info[nr].fn   = fn;
 550         ifsr_info[nr].sig  = sig;
 551         ifsr_info[nr].code = code;
 552         ifsr_info[nr].name = name;
 553 }
 554
 555 asmlinkage void
 556 do_PrefetchAbort(unsigned long addr, unsigned int ifsr, struct pt_regs *regs)
 557 {
 558         const struct fsr_info *inf = ifsr_info + fsr_fs(ifsr);
 559
 560         if (!inf->fn(addr, ifsr | FSR_LNX_PF, regs))
 561                 return;
 562
 563         pr_alert("Unhandled prefetch abort: %s (0x%03x) at 0x%08lx\n",
 564                 inf->name, ifsr, addr);
 565
 566         arm_notify_die("", regs, inf->sig, inf->code, (void __user *)addr,
 567                        ifsr, 0);
 568 }
 569
 570 /*
 571  * Abort handler to be used only during first unmasking of asynchronous aborts
 572  * on the boot CPU. This makes sure that the machine will not die if the
 573  * firmware/bootloader left an imprecise abort pending for us to trip over.
 574  */
 575 static int __init early_abort_handler(unsigned long addr, unsigned int fsr,
 576                                       struct pt_regs *regs)
 577 {
 578         pr_warn("Hit pending asynchronous external abort (FSR=0x%08x) during "
 579                 "first unmask, this is most likely caused by a "
 580                 "firmware/bootloader bug.\n", fsr);
 581
 582         return 0;
 583 }
 584
 585 void __init early_abt_enable(void)
 586 {
 587         fsr_info[FSR_FS_AEA].fn = early_abort_handler;
 588         local_abt_enable();
 589         fsr_info[FSR_FS_AEA].fn = do_bad;
 590 }
 591
 592 #ifndef CONFIG_ARM_LPAE
 593 static int __init exceptions_init(void)
 594 {
 595         if (cpu_architecture() >= CPU_ARCH_ARMv6) {
 596                 hook_fault_code(4, do_translation_fault, SIGSEGV, SEGV_MAPERR,
 597                                 "I-cache maintenance fault");
 598         }
 599
 600         if (cpu_architecture() >= CPU_ARCH_ARMv7) {
 601                 /*
 602                  * TODO: Access flag faults introduced in ARMv6K.
 603                  * Runtime check for 'K' extension is needed
 604                  */
 605                 hook_fault_code(3, do_bad, SIGSEGV, SEGV_MAPERR,
 606                                 "section access flag fault");
 607                 hook_fault_code(6, do_bad, SIGSEGV, SEGV_MAPERR,
 608                                 "section access flag fault");
 609         }
 610
 611         return 0;
 612 }
 613
 614 arch_initcall(exceptions_init);
 615 #endif