OSDN Git Service

Merge remote-tracking branch 'drm/drm-next' into drm-misc-next
[uclinux-h8/linux.git] / arch / powerpc / platforms / pseries / ras.c
index 851ce32..d97d527 100644 (file)
@@ -27,6 +27,7 @@
 #include <asm/machdep.h>
 #include <asm/rtas.h>
 #include <asm/firmware.h>
+#include <asm/mce.h>
 
 #include "pseries.h"
 
@@ -50,6 +51,101 @@ static irqreturn_t ras_hotplug_interrupt(int irq, void *dev_id);
 static irqreturn_t ras_epow_interrupt(int irq, void *dev_id);
 static irqreturn_t ras_error_interrupt(int irq, void *dev_id);
 
+/* RTAS pseries MCE errorlog section. */
+struct pseries_mc_errorlog {
+       __be32  fru_id;
+       __be32  proc_id;
+       u8      error_type;
+       /*
+        * sub_err_type (1 byte). Bit fields depends on error_type
+        *
+        *   MSB0
+        *   |
+        *   V
+        *   01234567
+        *   XXXXXXXX
+        *
+        * For error_type == MC_ERROR_TYPE_UE
+        *   XXXXXXXX
+        *   X          1: Permanent or Transient UE.
+        *    X         1: Effective address provided.
+        *     X        1: Logical address provided.
+        *      XX      2: Reserved.
+        *        XXX   3: Type of UE error.
+        *
+        * For error_type != MC_ERROR_TYPE_UE
+        *   XXXXXXXX
+        *   X          1: Effective address provided.
+        *    XXXXX     5: Reserved.
+        *         XX   2: Type of SLB/ERAT/TLB error.
+        */
+       u8      sub_err_type;
+       u8      reserved_1[6];
+       __be64  effective_address;
+       __be64  logical_address;
+} __packed;
+
+/* RTAS pseries MCE error types */
+#define MC_ERROR_TYPE_UE               0x00
+#define MC_ERROR_TYPE_SLB              0x01
+#define MC_ERROR_TYPE_ERAT             0x02
+#define MC_ERROR_TYPE_TLB              0x04
+#define MC_ERROR_TYPE_D_CACHE          0x05
+#define MC_ERROR_TYPE_I_CACHE          0x07
+
+/* RTAS pseries MCE error sub types */
+#define MC_ERROR_UE_INDETERMINATE              0
+#define MC_ERROR_UE_IFETCH                     1
+#define MC_ERROR_UE_PAGE_TABLE_WALK_IFETCH     2
+#define MC_ERROR_UE_LOAD_STORE                 3
+#define MC_ERROR_UE_PAGE_TABLE_WALK_LOAD_STORE 4
+
+#define MC_ERROR_SLB_PARITY            0
+#define MC_ERROR_SLB_MULTIHIT          1
+#define MC_ERROR_SLB_INDETERMINATE     2
+
+#define MC_ERROR_ERAT_PARITY           1
+#define MC_ERROR_ERAT_MULTIHIT         2
+#define MC_ERROR_ERAT_INDETERMINATE    3
+
+#define MC_ERROR_TLB_PARITY            1
+#define MC_ERROR_TLB_MULTIHIT          2
+#define MC_ERROR_TLB_INDETERMINATE     3
+
+static inline u8 rtas_mc_error_sub_type(const struct pseries_mc_errorlog *mlog)
+{
+       switch (mlog->error_type) {
+       case    MC_ERROR_TYPE_UE:
+               return (mlog->sub_err_type & 0x07);
+       case    MC_ERROR_TYPE_SLB:
+       case    MC_ERROR_TYPE_ERAT:
+       case    MC_ERROR_TYPE_TLB:
+               return (mlog->sub_err_type & 0x03);
+       default:
+               return 0;
+       }
+}
+
+static
+inline u64 rtas_mc_get_effective_addr(const struct pseries_mc_errorlog *mlog)
+{
+       __be64 addr = 0;
+
+       switch (mlog->error_type) {
+       case    MC_ERROR_TYPE_UE:
+               if (mlog->sub_err_type & 0x40)
+                       addr = mlog->effective_address;
+               break;
+       case    MC_ERROR_TYPE_SLB:
+       case    MC_ERROR_TYPE_ERAT:
+       case    MC_ERROR_TYPE_TLB:
+               if (mlog->sub_err_type & 0x80)
+                       addr = mlog->effective_address;
+       default:
+               break;
+       }
+       return be64_to_cpu(addr);
+}
 
 /*
  * Enable the hotplug interrupt late because processing them may touch other
@@ -237,8 +333,9 @@ static irqreturn_t ras_hotplug_interrupt(int irq, void *dev_id)
         * hotplug events on the ras_log_buf to be handled by rtas_errd.
         */
        if (hp_elog->resource == PSERIES_HP_ELOG_RESOURCE_MEM ||
-           hp_elog->resource == PSERIES_HP_ELOG_RESOURCE_CPU)
-               queue_hotplug_event(hp_elog, NULL, NULL);
+           hp_elog->resource == PSERIES_HP_ELOG_RESOURCE_CPU ||
+           hp_elog->resource == PSERIES_HP_ELOG_RESOURCE_PMEM)
+               queue_hotplug_event(hp_elog);
        else
                log_error(ras_log_buf, ERR_TYPE_RTAS_LOG, 0);
 
@@ -427,6 +524,188 @@ int pSeries_system_reset_exception(struct pt_regs *regs)
        return 0; /* need to perform reset */
 }
 
+#define VAL_TO_STRING(ar, val) \
+       (((val) < ARRAY_SIZE(ar)) ? ar[(val)] : "Unknown")
+
+static void pseries_print_mce_info(struct pt_regs *regs,
+                                  struct rtas_error_log *errp)
+{
+       const char *level, *sevstr;
+       struct pseries_errorlog *pseries_log;
+       struct pseries_mc_errorlog *mce_log;
+       u8 error_type, err_sub_type;
+       u64 addr;
+       u8 initiator = rtas_error_initiator(errp);
+       int disposition = rtas_error_disposition(errp);
+
+       static const char * const initiators[] = {
+               "Unknown",
+               "CPU",
+               "PCI",
+               "ISA",
+               "Memory",
+               "Power Mgmt",
+       };
+       static const char * const mc_err_types[] = {
+               "UE",
+               "SLB",
+               "ERAT",
+               "TLB",
+               "D-Cache",
+               "Unknown",
+               "I-Cache",
+       };
+       static const char * const mc_ue_types[] = {
+               "Indeterminate",
+               "Instruction fetch",
+               "Page table walk ifetch",
+               "Load/Store",
+               "Page table walk Load/Store",
+       };
+
+       /* SLB sub errors valid values are 0x0, 0x1, 0x2 */
+       static const char * const mc_slb_types[] = {
+               "Parity",
+               "Multihit",
+               "Indeterminate",
+       };
+
+       /* TLB and ERAT sub errors valid values are 0x1, 0x2, 0x3 */
+       static const char * const mc_soft_types[] = {
+               "Unknown",
+               "Parity",
+               "Multihit",
+               "Indeterminate",
+       };
+
+       if (!rtas_error_extended(errp)) {
+               pr_err("Machine check interrupt: Missing extended error log\n");
+               return;
+       }
+
+       pseries_log = get_pseries_errorlog(errp, PSERIES_ELOG_SECT_ID_MCE);
+       if (pseries_log == NULL)
+               return;
+
+       mce_log = (struct pseries_mc_errorlog *)pseries_log->data;
+
+       error_type = mce_log->error_type;
+       err_sub_type = rtas_mc_error_sub_type(mce_log);
+
+       switch (rtas_error_severity(errp)) {
+       case RTAS_SEVERITY_NO_ERROR:
+               level = KERN_INFO;
+               sevstr = "Harmless";
+               break;
+       case RTAS_SEVERITY_WARNING:
+               level = KERN_WARNING;
+               sevstr = "";
+               break;
+       case RTAS_SEVERITY_ERROR:
+       case RTAS_SEVERITY_ERROR_SYNC:
+               level = KERN_ERR;
+               sevstr = "Severe";
+               break;
+       case RTAS_SEVERITY_FATAL:
+       default:
+               level = KERN_ERR;
+               sevstr = "Fatal";
+               break;
+       }
+
+#ifdef CONFIG_PPC_BOOK3S_64
+       /* Display faulty slb contents for SLB errors. */
+       if (error_type == MC_ERROR_TYPE_SLB)
+               slb_dump_contents(local_paca->mce_faulty_slbs);
+#endif
+
+       printk("%s%s Machine check interrupt [%s]\n", level, sevstr,
+              disposition == RTAS_DISP_FULLY_RECOVERED ?
+              "Recovered" : "Not recovered");
+       if (user_mode(regs)) {
+               printk("%s  NIP: [%016lx] PID: %d Comm: %s\n", level,
+                      regs->nip, current->pid, current->comm);
+       } else {
+               printk("%s  NIP [%016lx]: %pS\n", level, regs->nip,
+                      (void *)regs->nip);
+       }
+       printk("%s  Initiator: %s\n", level,
+              VAL_TO_STRING(initiators, initiator));
+
+       switch (error_type) {
+       case MC_ERROR_TYPE_UE:
+               printk("%s  Error type: %s [%s]\n", level,
+                      VAL_TO_STRING(mc_err_types, error_type),
+                      VAL_TO_STRING(mc_ue_types, err_sub_type));
+               break;
+       case MC_ERROR_TYPE_SLB:
+               printk("%s  Error type: %s [%s]\n", level,
+                      VAL_TO_STRING(mc_err_types, error_type),
+                      VAL_TO_STRING(mc_slb_types, err_sub_type));
+               break;
+       case MC_ERROR_TYPE_ERAT:
+       case MC_ERROR_TYPE_TLB:
+               printk("%s  Error type: %s [%s]\n", level,
+                      VAL_TO_STRING(mc_err_types, error_type),
+                      VAL_TO_STRING(mc_soft_types, err_sub_type));
+               break;
+       default:
+               printk("%s  Error type: %s\n", level,
+                      VAL_TO_STRING(mc_err_types, error_type));
+               break;
+       }
+
+       addr = rtas_mc_get_effective_addr(mce_log);
+       if (addr)
+               printk("%s    Effective address: %016llx\n", level, addr);
+}
+
+static int mce_handle_error(struct rtas_error_log *errp)
+{
+       struct pseries_errorlog *pseries_log;
+       struct pseries_mc_errorlog *mce_log;
+       int disposition = rtas_error_disposition(errp);
+       u8 error_type;
+
+       if (!rtas_error_extended(errp))
+               goto out;
+
+       pseries_log = get_pseries_errorlog(errp, PSERIES_ELOG_SECT_ID_MCE);
+       if (pseries_log == NULL)
+               goto out;
+
+       mce_log = (struct pseries_mc_errorlog *)pseries_log->data;
+       error_type = mce_log->error_type;
+
+#ifdef CONFIG_PPC_BOOK3S_64
+       if (disposition == RTAS_DISP_NOT_RECOVERED) {
+               switch (error_type) {
+               case    MC_ERROR_TYPE_SLB:
+               case    MC_ERROR_TYPE_ERAT:
+                       /*
+                        * Store the old slb content in paca before flushing.
+                        * Print this when we go to virtual mode.
+                        * There are chances that we may hit MCE again if there
+                        * is a parity error on the SLB entry we trying to read
+                        * for saving. Hence limit the slb saving to single
+                        * level of recursion.
+                        */
+                       if (local_paca->in_mce == 1)
+                               slb_save_contents(local_paca->mce_faulty_slbs);
+                       flush_and_reload_slb();
+                       disposition = RTAS_DISP_FULLY_RECOVERED;
+                       rtas_set_disposition_recovered(errp);
+                       break;
+               default:
+                       break;
+               }
+       }
+#endif
+
+out:
+       return disposition;
+}
+
 /*
  * Process MCE rtas errlog event.
  */
@@ -452,8 +731,11 @@ static int recover_mce(struct pt_regs *regs, struct rtas_error_log *err)
        int recovered = 0;
        int disposition = rtas_error_disposition(err);
 
+       pseries_print_mce_info(regs, err);
+
        if (!(regs->msr & MSR_RI)) {
                /* If MSR_RI isn't set, we cannot recover */
+               pr_err("Machine check interrupt unrecoverable: MSR(RI=0)\n");
                recovered = 0;
 
        } else if (disposition == RTAS_DISP_FULLY_RECOVERED) {
@@ -503,11 +785,31 @@ int pSeries_machine_check_exception(struct pt_regs *regs)
        struct rtas_error_log *errp;
 
        if (fwnmi_active) {
-               errp = fwnmi_get_errinfo(regs);
                fwnmi_release_errinfo();
+               errp = fwnmi_get_errlog();
                if (errp && recover_mce(regs, errp))
                        return 1;
        }
 
        return 0;
 }
+
+long pseries_machine_check_realmode(struct pt_regs *regs)
+{
+       struct rtas_error_log *errp;
+       int disposition;
+
+       if (fwnmi_active) {
+               errp = fwnmi_get_errinfo(regs);
+               /*
+                * Call to fwnmi_release_errinfo() in real mode causes kernel
+                * to panic. Hence we will call it as soon as we go into
+                * virtual mode.
+                */
+               disposition = mce_handle_error(errp);
+               if (disposition == RTAS_DISP_FULLY_RECOVERED)
+                       return 1;
+       }
+
+       return 0;
+}