2 * linux/arch/x86-64/traps.c
4 * Copyright (C) 1991, 1992 Linus Torvalds
5 * Copyright (C) 2000, 2001, 2002 Andi Kleen, SuSE Labs
7 * Pentium III FXSR, SSE support
8 * Gareth Hughes <gareth@valinux.com>, May 2000
10 * $Id: traps.c,v 1.71 2004/02/27 22:07:36 ak Exp $
14 * 'Traps.c' handles hardware traps and faults after we have saved some
17 #include <linux/config.h>
18 #include <linux/sched.h>
19 #include <linux/kernel.h>
20 #include <linux/string.h>
21 #include <linux/errno.h>
22 #include <linux/ptrace.h>
23 #include <linux/timer.h>
25 #include <linux/init.h>
26 #include <linux/delay.h>
27 #include <linux/spinlock.h>
28 #include <linux/interrupt.h>
29 #include <linux/module.h>
31 #include <asm/system.h>
32 #include <asm/uaccess.h>
34 #include <asm/atomic.h>
35 #include <asm/debugreg.h>
38 #include <asm/kdebug.h>
41 #include <asm/pgalloc.h>
43 #include <asm/proto.h>
45 #include <linux/irq.h>
47 extern int exception_trace;
49 extern struct gate_struct idt_table[256];
51 asmlinkage void divide_error(void);
52 asmlinkage void debug(void);
53 asmlinkage void nmi(void);
54 asmlinkage void int3(void);
55 asmlinkage void overflow(void);
56 asmlinkage void bounds(void);
57 asmlinkage void invalid_op(void);
58 asmlinkage void device_not_available(void);
59 asmlinkage void double_fault(void);
60 asmlinkage void coprocessor_segment_overrun(void);
61 asmlinkage void invalid_TSS(void);
62 asmlinkage void segment_not_present(void);
63 asmlinkage void stack_segment(void);
64 asmlinkage void general_protection(void);
65 asmlinkage void page_fault(void);
66 asmlinkage void coprocessor_error(void);
67 asmlinkage void simd_coprocessor_error(void);
68 asmlinkage void reserved(void);
69 asmlinkage void alignment_check(void);
70 asmlinkage void machine_check(void);
71 asmlinkage void spurious_interrupt_bug(void);
72 asmlinkage void call_debug(void);
74 static inline void conditional_sti(struct pt_regs *regs)
76 if (regs->eflags & X86_EFLAGS_IF)
80 extern char iret_address[];
82 struct notifier_block *die_chain;
84 int kstack_depth_to_print = 12;
86 #ifdef CONFIG_KALLSYMS
87 #include <linux/kallsyms.h>
88 int printk_address(unsigned long address)
91 const char *modname, *secname, *symname;
92 unsigned long symstart;
95 /* What a function call! */
96 if (!kallsyms_address_to_symbol(address,
97 &modname, &dummy, &dummy,
98 &secname, &dummy, &dummy,
99 &symname, &symstart, &dummy)) {
100 return printk("[<%016lx>]", address);
102 if (!strcmp(modname, "kernel"))
103 modname = delim = "";
104 return printk("[<%016lx>]{%s%s%s%s%+ld}",
105 address,delim,modname,delim,symname,address-symstart);
108 int printk_address(unsigned long address)
110 return printk("[<%016lx>]", address);
115 #ifdef CONFIG_MODULES
117 extern struct module *module_list;
118 extern struct module kernel_module;
120 static inline int kernel_text_address(unsigned long addr)
125 if (addr >= (unsigned long) &_stext &&
126 addr <= (unsigned long) &_etext)
129 for (mod = module_list; mod != &kernel_module; mod = mod->next) {
130 /* mod_bound tests for addr being inside the vmalloc'ed
131 * module area. Of course it'd be better to test only
132 * for the .text subset... */
133 if (mod_bound(addr, 0, mod)) {
144 static inline int kernel_text_address(unsigned long addr)
146 return (addr >= (unsigned long) &_stext &&
147 addr <= (unsigned long) &_etext);
152 unsigned long *in_exception_stack(int cpu, unsigned long stack)
155 for (k = 0; k < N_EXCEPTION_STACKS; k++) {
156 unsigned long end = init_tss[cpu].ist[k] + EXCEPTION_STKSZ;
158 if (stack >= init_tss[cpu].ist[k] && stack <= end)
159 return (unsigned long *)end;
164 void show_trace(unsigned long *stack)
167 unsigned long *irqstack, *irqstack_end, *estack_end;
168 const int cpu = safe_smp_processor_id();
171 printk("\nCall Trace: ");
174 estack_end = in_exception_stack(cpu, (unsigned long)stack);
176 while (stack < estack_end) {
178 if (kernel_text_address(addr)) {
179 i += printk_address(addr);
189 stack = (unsigned long *) estack_end[-2];
193 irqstack_end = (unsigned long *) (cpu_pda[cpu].irqstackptr);
194 irqstack = (unsigned long *) (cpu_pda[cpu].irqstackptr - IRQSTACKSIZE + 8);
196 if (stack >= irqstack && stack < irqstack_end) {
197 while (stack < irqstack_end) {
200 * If the address is either in the text segment of the
201 * kernel, or in the region which contains vmalloc'ed
202 * memory, it *may* be the address of a calling
203 * routine; if so, print it so that someone tracing
204 * down the cause of the crash will be able to figure
205 * out the call path that was taken.
207 if (kernel_text_address(addr)) {
208 i += printk_address(addr);
216 stack = (unsigned long *) (irqstack_end[-1]);
223 while (((long) stack & (THREAD_SIZE-1)) != 0) {
225 if (kernel_text_address(addr)) {
226 i += printk_address(addr);
237 void show_trace_task(struct task_struct *tsk)
239 unsigned long rsp = tsk->thread.rsp;
241 /* User space on another CPU? */
242 if ((rsp ^ (unsigned long)tsk) & (PAGE_MASK<<1))
244 show_trace((unsigned long *)rsp);
247 void show_stack(unsigned long * rsp)
249 unsigned long *stack;
251 const int cpu = safe_smp_processor_id();
252 unsigned long *irqstack_end = (unsigned long *) (cpu_pda[cpu].irqstackptr);
253 unsigned long *irqstack = (unsigned long *) (cpu_pda[cpu].irqstackptr - IRQSTACKSIZE);
255 // debugging aid: "show_stack(NULL);" prints the
256 // back trace for this cpu.
259 rsp=(unsigned long*)&rsp;
262 for(i=0; i < kstack_depth_to_print; i++) {
263 if (stack >= irqstack && stack <= irqstack_end) {
264 if (stack == irqstack_end) {
265 stack = (unsigned long *) (irqstack_end[-1]);
269 if (((long) stack & (THREAD_SIZE-1)) == 0)
272 if (i && ((i % 4) == 0))
274 printk("%016lx ", *stack++);
276 show_trace((unsigned long *)rsp);
279 void show_registers(struct pt_regs *regs)
284 const int cpu = safe_smp_processor_id();
285 struct task_struct *cur = cpu_pda[cpu].pcurrent;
287 rsp = (unsigned long) (®s->rsp);
288 if (regs->rsp < TASK_SIZE) {
292 printk("CPU %d ", cpu);
294 printk("Process %s (pid: %d, stackpage=%08lx)\n",
295 cur->comm, cur->pid, 4096+(unsigned long)cur);
298 * When in-kernel, we also print out the stack and code at the
299 * time of the fault..
304 show_stack((unsigned long*)rsp);
307 if(regs->rip < PAGE_OFFSET)
313 if(__get_user(c, &((unsigned char*)regs->rip)[i])) {
315 printk(" Bad RIP value.");
324 void handle_BUG(struct pt_regs *regs)
331 if (__copy_from_user(&f, (struct bug_frame *) regs->rip,
332 sizeof(struct bug_frame)))
334 if ((unsigned long)f.filename < __PAGE_OFFSET ||
335 f.ud2[0] != 0x0f || f.ud2[1] != 0x0b)
337 if (__get_user(tmp, f.filename))
338 f.filename = "unmapped filename";
339 printk(KERN_EMERG "Kernel BUG at %.50s:%d\n", f.filename, f.line);
342 spinlock_t die_lock = SPIN_LOCK_UNLOCKED;
345 void __die(const char * str, struct pt_regs * regs, long err)
347 printk(KERN_EMERG "%s: %04lx\n", str, err & 0xffff);
348 notify_die(DIE_OOPS, (char *)str, regs, err, 255, SIGSEGV);
349 show_registers(regs);
350 /* Execute summary in case the oops scrolled away */
351 printk(KERN_EMERG "RIP ");
352 printk_address(regs->rip);
353 printk(" RSP <%016lx>\n", regs->rsp);
356 void prepare_die(unsigned long *flags)
361 cpu = safe_smp_processor_id();
362 /* racy, but better than risking deadlock. */
363 __save_flags(*flags);
365 if (!spin_trylock(&die_lock)) {
366 if (cpu == die_owner)
367 /* nested oops. should stop eventually */;
369 spin_lock(&die_lock);
374 void exit_die(unsigned long flags)
377 spin_unlock_irqrestore(&die_lock, flags);
378 __sti(); /* back scroll should work */
382 void die(const char * str, struct pt_regs * regs, long err)
387 __die(str, regs, err);
392 static inline void die_if_kernel(const char * str, struct pt_regs * regs, long err)
394 if (regs->cs == __KERNEL_CS)
398 static inline unsigned long get_cr2(void)
400 unsigned long address;
402 /* get the address */
403 __asm__("movq %%cr2,%0":"=r" (address));
407 static void do_trap(int trapnr, int signr, char *str,
408 struct pt_regs * regs, long error_code, siginfo_t *info)
410 conditional_sti(regs);
412 #if defined(CONFIG_CHECKING) && defined(CONFIG_LOCAL_APIC)
415 struct x8664_pda *pda = cpu_pda + safe_smp_processor_id();
416 rdmsrl(MSR_GS_BASE, gs);
417 if (gs != (unsigned long)pda) {
418 wrmsrl(MSR_GS_BASE, pda);
419 printk("%s: wrong gs %lx expected %p\n", str, gs, pda);
424 if ((regs->cs & 3) != 0) {
425 struct task_struct *tsk = current;
426 tsk->thread.error_code = error_code;
427 tsk->thread.trap_no = trapnr;
428 if (exception_trace && !(tsk->ptrace & PT_PTRACED) &&
429 (tsk->sig->action[signr-1].sa.sa_handler == SIG_IGN ||
430 (tsk->sig->action[signr-1].sa.sa_handler == SIG_DFL)))
432 "%s[%d] trap %s rip:%lx rsp:%lx error:%lx\n",
433 tsk->comm, tsk->pid, str,
434 regs->rip,regs->rsp,error_code);
436 force_sig_info(signr, info, tsk);
438 force_sig(signr, tsk);
445 unsigned long fixup = search_exception_table(regs->rip);
449 die(str, regs, error_code);
454 #define DO_ERROR(trapnr, signr, str, name) \
455 asmlinkage void do_##name(struct pt_regs * regs, long error_code) \
457 if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) == NOTIFY_BAD) \
459 do_trap(trapnr, signr, str, regs, error_code, NULL); \
462 #define DO_ERROR_INFO(trapnr, signr, str, name, sicode, siaddr) \
463 asmlinkage void do_##name(struct pt_regs * regs, long error_code) \
466 info.si_signo = signr; \
468 info.si_code = sicode; \
469 info.si_addr = (void *)siaddr; \
470 if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr)==NOTIFY_BAD) \
472 do_trap(trapnr, signr, str, regs, error_code, &info); \
475 DO_ERROR_INFO( 0, SIGFPE, "divide error", divide_error, FPE_INTDIV, regs->rip)
476 DO_ERROR( 3, SIGTRAP, "int3", int3);
477 DO_ERROR( 4, SIGSEGV, "overflow", overflow)
478 DO_ERROR( 5, SIGSEGV, "bounds", bounds)
479 DO_ERROR_INFO( 6, SIGILL, "invalid operand", invalid_op, ILL_ILLOPN, regs->rip)
480 DO_ERROR( 7, SIGSEGV, "device not available", device_not_available)
481 DO_ERROR( 8, SIGSEGV, "double fault", double_fault)
482 DO_ERROR( 9, SIGFPE, "coprocessor segment overrun", coprocessor_segment_overrun)
483 DO_ERROR(10, SIGSEGV, "invalid TSS", invalid_TSS)
484 DO_ERROR(11, SIGBUS, "segment not present", segment_not_present)
485 DO_ERROR(12, SIGBUS, "stack segment", stack_segment)
486 DO_ERROR_INFO(17, SIGBUS, "alignment check", alignment_check, BUS_ADRALN, get_cr2())
487 DO_ERROR(18, SIGSEGV, "reserved", reserved)
489 extern void dump_pagetable(unsigned long);
491 asmlinkage void do_general_protection(struct pt_regs * regs, long error_code)
493 conditional_sti(regs);
495 #ifdef CONFIG_CHECKING
498 struct x8664_pda *pda = cpu_pda + safe_smp_processor_id();
499 rdmsrl(MSR_GS_BASE, gs);
500 if (gs != (unsigned long)pda) {
501 wrmsrl(MSR_GS_BASE, pda);
502 /* Avoid wakeup in printk in case this was triggered
503 by the segment reloads in __switch_to. Otherwise
504 the wake_up could deadlock on scheduler locks. */
507 "general protection handler: wrong gs %lx expected %p\n", gs, pda);
514 struct task_struct *tsk = current;
515 tsk->thread.error_code = error_code;
516 tsk->thread.trap_no = 13;
517 if (exception_trace && !(tsk->ptrace & PT_PTRACED) &&
518 (tsk->sig->action[SIGSEGV-1].sa.sa_handler == SIG_IGN ||
519 (tsk->sig->action[SIGSEGV-1].sa.sa_handler == SIG_DFL)))
521 "%s[%d] general protection rip:%lx rsp:%lx error:%lx\n",
523 regs->rip,regs->rsp,error_code);
524 force_sig(SIGSEGV, tsk);
531 fixup = search_exception_table(regs->rip);
536 notify_die(DIE_GPF, "general protection fault", regs, error_code,
538 die("general protection fault", regs, error_code);
542 static void mem_parity_error(unsigned char reason, struct pt_regs * regs)
544 printk("Uhhuh. NMI received. Dazed and confused, but trying to continue\n");
545 printk("You probably have a hardware problem with your RAM chips\n");
547 /* Clear and disable the memory parity error line. */
548 reason = (reason & 0xf) | 4;
552 static void io_check_error(unsigned char reason, struct pt_regs * regs)
554 printk("NMI: IOCK error (debug interrupt?)\n");
555 show_registers(regs);
557 /* Re-enable the IOCK line, wait for a few seconds */
558 reason = (reason & 0xf) | 8;
565 static void unknown_nmi_error(unsigned char reason, struct pt_regs * regs)
567 printk("Uhhuh. NMI received for unknown reason %02x.\n", reason);
568 printk("Dazed and confused, but trying to continue\n");
569 printk("Do you have a strange power saving mode enabled?\n");
572 asmlinkage void do_nmi(struct pt_regs * regs)
574 unsigned char reason = inb(0x61);
576 ++nmi_count(safe_smp_processor_id());
578 if (!(reason & 0xc0)) {
579 #if CONFIG_X86_LOCAL_APIC
581 * Ok, so this is none of the documented NMI sources,
582 * so it must be the NMI watchdog.
585 nmi_watchdog_tick(regs, reason);
589 unknown_nmi_error(reason, regs);
592 if (notify_die(DIE_NMI, "nmi", regs, reason, 2, SIGINT) == NOTIFY_BAD)
595 mem_parity_error(reason, regs);
597 io_check_error(reason, regs);
600 * Reassert NMI in case it became active meanwhile
601 * as it's edge-triggered.
604 inb(0x71); /* dummy */
606 inb(0x71); /* dummy */
609 asmlinkage void do_debug(struct pt_regs * regs, long error_code)
611 unsigned long condition;
612 struct task_struct *tsk = current;
615 asm("movq %%db6,%0" : "=r" (condition));
617 conditional_sti(regs);
619 #ifdef CONFIG_CHECKING
621 /* XXX: interaction with debugger - could destroy gs */
623 struct x8664_pda *pda = cpu_pda + safe_smp_processor_id();
624 rdmsrl(MSR_GS_BASE, gs);
625 if (gs != (unsigned long)pda) {
626 wrmsrl(MSR_GS_BASE, pda);
627 printk(KERN_EMERG "debug handler: wrong gs %lx expected %p\n", gs, pda);
632 /* Mask out spurious debug traps due to lazy DR7 setting */
633 if (condition & (DR_TRAP0|DR_TRAP1|DR_TRAP2|DR_TRAP3)) {
634 if (!tsk->thread.debugreg[7]) {
639 tsk->thread.debugreg[6] = condition;
641 /* Mask out spurious TF errors due to lazy TF clearing */
642 if (condition & DR_STEP) {
644 * The TF error should be masked out only if the current
645 * process is not traced and if the TRAP flag has been set
646 * previously by a tracing process (condition detected by
647 * the PT_DTRACE flag); remember that the i386 TRAP flag
648 * can be modified by the process itself in user mode,
649 * allowing programs to debug themselves without the ptrace()
652 if ((regs->cs & 3) == 0)
654 if ((tsk->ptrace & (PT_DTRACE|PT_PTRACED)) == PT_DTRACE)
658 /* Ok, finally something we can handle */
659 tsk->thread.trap_no = 1;
660 tsk->thread.error_code = error_code;
661 info.si_signo = SIGTRAP;
663 info.si_code = TRAP_BRKPT;
664 if ((regs->cs & 3) == 0)
667 info.si_addr = (void *)regs->rip;
668 force_sig_info(SIGTRAP, &info, tsk);
670 asm volatile("movq %0,%%db7"::"r"(0UL));
671 notify_die(DIE_DEBUG, "debug", regs, error_code, 1, SIGTRAP);
675 /* XXX: could cause spurious errors */
676 if (notify_die(DIE_DEBUG, "debug2", regs, error_code, 1, SIGTRAP) != NOTIFY_BAD)
677 regs->eflags &= ~TF_MASK;
682 * Note that we play around with the 'TS' bit in an attempt to get
683 * the correct behaviour even in the presence of the asynchronous
686 void math_error(void *rip)
688 struct task_struct * task;
690 unsigned short cwd, swd;
693 * Save the info for the exception handler and clear the error.
697 task->thread.trap_no = 16;
698 task->thread.error_code = 0;
699 info.si_signo = SIGFPE;
701 info.si_code = __SI_FAULT;
704 * (~cwd & swd) will mask out exceptions that are not set to unmasked
705 * status. 0x3f is the exception bits in these regs, 0x200 is the
706 * C1 reg you need in case of a stack fault, 0x040 is the stack
707 * fault bit. We should only be taking one exception at a time,
708 * so if this combination doesn't produce any single exception,
709 * then we have a bad program that isn't syncronizing its FPU usage
710 * and it will suffer the consequences since we won't be able to
711 * fully reproduce the context of the exception
713 cwd = get_fpu_cwd(task);
714 swd = get_fpu_swd(task);
715 switch (((~cwd) & swd & 0x3f) | (swd & 0x240)) {
719 case 0x001: /* Invalid Op */
720 case 0x041: /* Stack Fault */
721 case 0x241: /* Stack Fault | Direction */
722 info.si_code = FPE_FLTINV;
724 case 0x002: /* Denormalize */
725 case 0x010: /* Underflow */
726 info.si_code = FPE_FLTUND;
728 case 0x004: /* Zero Divide */
729 info.si_code = FPE_FLTDIV;
731 case 0x008: /* Overflow */
732 info.si_code = FPE_FLTOVF;
734 case 0x020: /* Precision */
735 info.si_code = FPE_FLTRES;
738 force_sig_info(SIGFPE, &info, task);
741 asmlinkage void do_coprocessor_error(struct pt_regs * regs, long error_code)
743 conditional_sti(regs);
744 math_error((void *)regs->rip);
747 asmlinkage void bad_intr(void)
749 printk("bad interrupt");
752 static inline void simd_math_error(void *rip)
754 struct task_struct * task;
756 unsigned short mxcsr;
759 * Save the info for the exception handler and clear the error.
763 task->thread.trap_no = 19;
764 task->thread.error_code = 0;
765 info.si_signo = SIGFPE;
767 info.si_code = __SI_FAULT;
770 * The SIMD FPU exceptions are handled a little differently, as there
771 * is only a single status/control register. Thus, to determine which
772 * unmasked exception was caught we must mask the exception mask bits
773 * at 0x1f80, and then use these to mask the exception bits at 0x3f.
775 mxcsr = get_fpu_mxcsr(task);
776 switch (~((mxcsr & 0x1f80) >> 7) & (mxcsr & 0x3f)) {
780 case 0x001: /* Invalid Op */
781 info.si_code = FPE_FLTINV;
783 case 0x002: /* Denormalize */
784 case 0x010: /* Underflow */
785 info.si_code = FPE_FLTUND;
787 case 0x004: /* Zero Divide */
788 info.si_code = FPE_FLTDIV;
790 case 0x008: /* Overflow */
791 info.si_code = FPE_FLTOVF;
793 case 0x020: /* Precision */
794 info.si_code = FPE_FLTRES;
797 force_sig_info(SIGFPE, &info, task);
800 asmlinkage void do_simd_coprocessor_error(struct pt_regs * regs,
803 conditional_sti(regs);
804 simd_math_error((void *)regs->rip);
807 asmlinkage void do_spurious_interrupt_bug(struct pt_regs * regs)
812 * 'math_state_restore()' saves the current math information in the
813 * old math state array, and gets the new ones from the current task
815 * Careful.. There are problems with IBM-designed IRQ13 behaviour.
816 * Don't touch unless you *really* know how it works.
818 asmlinkage void math_state_restore(void)
820 struct task_struct *me = current;
821 clts(); /* Allow maths ops (or we recurse) */
825 restore_fpu_checking(&me->thread.i387.fxsave);
826 me->flags |= PF_USEDFPU; /* So we fxsave on switch_to() */
829 asmlinkage void math_emulate(void)
834 void do_call_debug(struct pt_regs *regs)
836 notify_die(DIE_CALL, "debug call", regs, 0, 255, SIGINT);
840 void do_machine_check(struct pt_regs *regs)
842 printk(KERN_INFO "Machine check ignored\n");
846 void __init trap_init(void)
848 set_intr_gate(0,÷_error);
849 set_intr_gate(1,&debug);
850 set_intr_gate_ist(2,&nmi,NMI_STACK);
851 set_system_gate(3,&int3); /* int3-5 can be called from all */
852 set_system_gate(4,&overflow);
853 set_system_gate(5,&bounds);
854 set_intr_gate(6,&invalid_op);
855 set_intr_gate(7,&device_not_available);
856 set_intr_gate_ist(8,&double_fault, DOUBLEFAULT_STACK);
857 set_intr_gate(9,&coprocessor_segment_overrun);
858 set_intr_gate(10,&invalid_TSS);
859 set_intr_gate(11,&segment_not_present);
860 set_intr_gate(12,&stack_segment);
861 set_intr_gate(13,&general_protection);
862 set_intr_gate(14,&page_fault);
863 set_intr_gate(15,&spurious_interrupt_bug);
864 set_intr_gate(16,&coprocessor_error);
865 set_intr_gate(17,&alignment_check);
866 set_intr_gate(18,&machine_check);
867 set_intr_gate(19,&simd_coprocessor_error);
869 #ifdef CONFIG_IA32_EMULATION
870 set_system_gate(IA32_SYSCALL_VECTOR, ia32_syscall);
874 * Should be a barrier for any external CPU state.