arch/powerpc/platforms/pseries/ras.c

   1 // SPDX-License-Identifier: GPL-2.0-or-later
   2 /*
   3  * Copyright (C) 2001 Dave Engebretsen IBM Corporation
   4  */
   5
   6 #include <linux/sched.h>
   7 #include <linux/interrupt.h>
   8 #include <linux/irq.h>
   9 #include <linux/of.h>
  10 #include <linux/fs.h>
  11 #include <linux/reboot.h>
  12 #include <linux/irq_work.h>
  13
  14 #include <asm/machdep.h>
  15 #include <asm/rtas.h>
  16 #include <asm/firmware.h>
  17 #include <asm/mce.h>
  18
  19 #include "pseries.h"
  20
  21 static unsigned char ras_log_buf[RTAS_ERROR_LOG_MAX];
  22 static DEFINE_SPINLOCK(ras_log_buf_lock);
  23
  24 static int ras_check_exception_token;
  25
  26 static void mce_process_errlog_event(struct irq_work *work);
  27 static struct irq_work mce_errlog_process_work = {
  28         .func = mce_process_errlog_event,
  29 };
  30
  31 #define EPOW_SENSOR_TOKEN       9
  32 #define EPOW_SENSOR_INDEX       0
  33
  34 /* EPOW events counter variable */
  35 static int num_epow_events;
  36
  37 static irqreturn_t ras_hotplug_interrupt(int irq, void *dev_id);
  38 static irqreturn_t ras_epow_interrupt(int irq, void *dev_id);
  39 static irqreturn_t ras_error_interrupt(int irq, void *dev_id);
  40
  41 /* RTAS pseries MCE errorlog section. */
  42 struct pseries_mc_errorlog {
  43         __be32  fru_id;
  44         __be32  proc_id;
  45         u8      error_type;
  46         /*
  47          * sub_err_type (1 byte). Bit fields depends on error_type
  48          *
  49          *   MSB0
  50          *   |
  51          *   V
  52          *   01234567
  53          *   XXXXXXXX
  54          *
  55          * For error_type == MC_ERROR_TYPE_UE
  56          *   XXXXXXXX
  57          *   X          1: Permanent or Transient UE.
  58          *    X         1: Effective address provided.
  59          *     X        1: Logical address provided.
  60          *      XX      2: Reserved.
  61          *        XXX   3: Type of UE error.
  62          *
  63          * For error_type != MC_ERROR_TYPE_UE
  64          *   XXXXXXXX
  65          *   X          1: Effective address provided.
  66          *    XXXXX     5: Reserved.
  67          *         XX   2: Type of SLB/ERAT/TLB error.
  68          */
  69         u8      sub_err_type;
  70         u8      reserved_1[6];
  71         __be64  effective_address;
  72         __be64  logical_address;
  73 } __packed;
  74
  75 /* RTAS pseries MCE error types */
  76 #define MC_ERROR_TYPE_UE                0x00
  77 #define MC_ERROR_TYPE_SLB               0x01
  78 #define MC_ERROR_TYPE_ERAT              0x02
  79 #define MC_ERROR_TYPE_UNKNOWN           0x03
  80 #define MC_ERROR_TYPE_TLB               0x04
  81 #define MC_ERROR_TYPE_D_CACHE           0x05
  82 #define MC_ERROR_TYPE_I_CACHE           0x07
  83
  84 /* RTAS pseries MCE error sub types */
  85 #define MC_ERROR_UE_INDETERMINATE               0
  86 #define MC_ERROR_UE_IFETCH                      1
  87 #define MC_ERROR_UE_PAGE_TABLE_WALK_IFETCH      2
  88 #define MC_ERROR_UE_LOAD_STORE                  3
  89 #define MC_ERROR_UE_PAGE_TABLE_WALK_LOAD_STORE  4
  90
  91 #define UE_EFFECTIVE_ADDR_PROVIDED              0x40
  92 #define UE_LOGICAL_ADDR_PROVIDED                0x20
  93
  94 #define MC_ERROR_SLB_PARITY             0
  95 #define MC_ERROR_SLB_MULTIHIT           1
  96 #define MC_ERROR_SLB_INDETERMINATE      2
  97
  98 #define MC_ERROR_ERAT_PARITY            1
  99 #define MC_ERROR_ERAT_MULTIHIT          2
 100 #define MC_ERROR_ERAT_INDETERMINATE     3
 101
 102 #define MC_ERROR_TLB_PARITY             1
 103 #define MC_ERROR_TLB_MULTIHIT           2
 104 #define MC_ERROR_TLB_INDETERMINATE      3
 105
 106 static inline u8 rtas_mc_error_sub_type(const struct pseries_mc_errorlog *mlog)
 107 {
 108         switch (mlog->error_type) {
 109         case    MC_ERROR_TYPE_UE:
 110                 return (mlog->sub_err_type & 0x07);
 111         case    MC_ERROR_TYPE_SLB:
 112         case    MC_ERROR_TYPE_ERAT:
 113         case    MC_ERROR_TYPE_TLB:
 114                 return (mlog->sub_err_type & 0x03);
 115         default:
 116                 return 0;
 117         }
 118 }
 119
 120 /*
 121  * Enable the hotplug interrupt late because processing them may touch other
 122  * devices or systems (e.g. hugepages) that have not been initialized at the
 123  * subsys stage.
 124  */
 125 int __init init_ras_hotplug_IRQ(void)
 126 {
 127         struct device_node *np;
 128
 129         /* Hotplug Events */
 130         np = of_find_node_by_path("/event-sources/hot-plug-events");
 131         if (np != NULL) {
 132                 if (dlpar_workqueue_init() == 0)
 133                         request_event_sources_irqs(np, ras_hotplug_interrupt,
 134                                                    "RAS_HOTPLUG");
 135                 of_node_put(np);
 136         }
 137
 138         return 0;
 139 }
 140 machine_late_initcall(pseries, init_ras_hotplug_IRQ);
 141
 142 /*
 143  * Initialize handlers for the set of interrupts caused by hardware errors
 144  * and power system events.
 145  */
 146 static int __init init_ras_IRQ(void)
 147 {
 148         struct device_node *np;
 149
 150         ras_check_exception_token = rtas_token("check-exception");
 151
 152         /* Internal Errors */
 153         np = of_find_node_by_path("/event-sources/internal-errors");
 154         if (np != NULL) {
 155                 request_event_sources_irqs(np, ras_error_interrupt,
 156                                            "RAS_ERROR");
 157                 of_node_put(np);
 158         }
 159
 160         /* EPOW Events */
 161         np = of_find_node_by_path("/event-sources/epow-events");
 162         if (np != NULL) {
 163                 request_event_sources_irqs(np, ras_epow_interrupt, "RAS_EPOW");
 164                 of_node_put(np);
 165         }
 166
 167         return 0;
 168 }
 169 machine_subsys_initcall(pseries, init_ras_IRQ);
 170
 171 #define EPOW_SHUTDOWN_NORMAL                            1
 172 #define EPOW_SHUTDOWN_ON_UPS                            2
 173 #define EPOW_SHUTDOWN_LOSS_OF_CRITICAL_FUNCTIONS        3
 174 #define EPOW_SHUTDOWN_AMBIENT_TEMPERATURE_TOO_HIGH      4
 175
 176 static void handle_system_shutdown(char event_modifier)
 177 {
 178         switch (event_modifier) {
 179         case EPOW_SHUTDOWN_NORMAL:
 180                 pr_emerg("Power off requested\n");
 181                 orderly_poweroff(true);
 182                 break;
 183
 184         case EPOW_SHUTDOWN_ON_UPS:
 185                 pr_emerg("Loss of system power detected. System is running on"
 186                          " UPS/battery. Check RTAS error log for details\n");
 187                 orderly_poweroff(true);
 188                 break;
 189
 190         case EPOW_SHUTDOWN_LOSS_OF_CRITICAL_FUNCTIONS:
 191                 pr_emerg("Loss of system critical functions detected. Check"
 192                          " RTAS error log for details\n");
 193                 orderly_poweroff(true);
 194                 break;
 195
 196         case EPOW_SHUTDOWN_AMBIENT_TEMPERATURE_TOO_HIGH:
 197                 pr_emerg("High ambient temperature detected. Check RTAS"
 198                          " error log for details\n");
 199                 orderly_poweroff(true);
 200                 break;
 201
 202         default:
 203                 pr_err("Unknown power/cooling shutdown event (modifier = %d)\n",
 204                         event_modifier);
 205         }
 206 }
 207
 208 struct epow_errorlog {
 209         unsigned char sensor_value;
 210         unsigned char event_modifier;
 211         unsigned char extended_modifier;
 212         unsigned char reserved;
 213         unsigned char platform_reason;
 214 };
 215
 216 #define EPOW_RESET                      0
 217 #define EPOW_WARN_COOLING               1
 218 #define EPOW_WARN_POWER                 2
 219 #define EPOW_SYSTEM_SHUTDOWN            3
 220 #define EPOW_SYSTEM_HALT                4
 221 #define EPOW_MAIN_ENCLOSURE             5
 222 #define EPOW_POWER_OFF                  7
 223
 224 static void rtas_parse_epow_errlog(struct rtas_error_log *log)
 225 {
 226         struct pseries_errorlog *pseries_log;
 227         struct epow_errorlog *epow_log;
 228         char action_code;
 229         char modifier;
 230
 231         pseries_log = get_pseries_errorlog(log, PSERIES_ELOG_SECT_ID_EPOW);
 232         if (pseries_log == NULL)
 233                 return;
 234
 235         epow_log = (struct epow_errorlog *)pseries_log->data;
 236         action_code = epow_log->sensor_value & 0xF;     /* bottom 4 bits */
 237         modifier = epow_log->event_modifier & 0xF;      /* bottom 4 bits */
 238
 239         switch (action_code) {
 240         case EPOW_RESET:
 241                 if (num_epow_events) {
 242                         pr_info("Non critical power/cooling issue cleared\n");
 243                         num_epow_events--;
 244                 }
 245                 break;
 246
 247         case EPOW_WARN_COOLING:
 248                 pr_info("Non-critical cooling issue detected. Check RTAS error"
 249                         " log for details\n");
 250                 break;
 251
 252         case EPOW_WARN_POWER:
 253                 pr_info("Non-critical power issue detected. Check RTAS error"
 254                         " log for details\n");
 255                 break;
 256
 257         case EPOW_SYSTEM_SHUTDOWN:
 258                 handle_system_shutdown(modifier);
 259                 break;
 260
 261         case EPOW_SYSTEM_HALT:
 262                 pr_emerg("Critical power/cooling issue detected. Check RTAS"
 263                          " error log for details. Powering off.\n");
 264                 orderly_poweroff(true);
 265                 break;
 266
 267         case EPOW_MAIN_ENCLOSURE:
 268         case EPOW_POWER_OFF:
 269                 pr_emerg("System about to lose power. Check RTAS error log "
 270                          " for details. Powering off immediately.\n");
 271                 emergency_sync();
 272                 kernel_power_off();
 273                 break;
 274
 275         default:
 276                 pr_err("Unknown power/cooling event (action code  = %d)\n",
 277                         action_code);
 278         }
 279
 280         /* Increment epow events counter variable */
 281         if (action_code != EPOW_RESET)
 282                 num_epow_events++;
 283 }
 284
 285 static irqreturn_t ras_hotplug_interrupt(int irq, void *dev_id)
 286 {
 287         struct pseries_errorlog *pseries_log;
 288         struct pseries_hp_errorlog *hp_elog;
 289
 290         spin_lock(&ras_log_buf_lock);
 291
 292         rtas_call(ras_check_exception_token, 6, 1, NULL,
 293                   RTAS_VECTOR_EXTERNAL_INTERRUPT, virq_to_hw(irq),
 294                   RTAS_HOTPLUG_EVENTS, 0, __pa(&ras_log_buf),
 295                   rtas_get_error_log_max());
 296
 297         pseries_log = get_pseries_errorlog((struct rtas_error_log *)ras_log_buf,
 298                                            PSERIES_ELOG_SECT_ID_HOTPLUG);
 299         hp_elog = (struct pseries_hp_errorlog *)pseries_log->data;
 300
 301         /*
 302          * Since PCI hotplug is not currently supported on pseries, put PCI
 303          * hotplug events on the ras_log_buf to be handled by rtas_errd.
 304          */
 305         if (hp_elog->resource == PSERIES_HP_ELOG_RESOURCE_MEM ||
 306             hp_elog->resource == PSERIES_HP_ELOG_RESOURCE_CPU ||
 307             hp_elog->resource == PSERIES_HP_ELOG_RESOURCE_PMEM)
 308                 queue_hotplug_event(hp_elog);
 309         else
 310                 log_error(ras_log_buf, ERR_TYPE_RTAS_LOG, 0);
 311
 312         spin_unlock(&ras_log_buf_lock);
 313         return IRQ_HANDLED;
 314 }
 315
 316 /* Handle environmental and power warning (EPOW) interrupts. */
 317 static irqreturn_t ras_epow_interrupt(int irq, void *dev_id)
 318 {
 319         int status;
 320         int state;
 321         int critical;
 322
 323         status = rtas_get_sensor_fast(EPOW_SENSOR_TOKEN, EPOW_SENSOR_INDEX,
 324                                       &state);
 325
 326         if (state > 3)
 327                 critical = 1;           /* Time Critical */
 328         else
 329                 critical = 0;
 330
 331         spin_lock(&ras_log_buf_lock);
 332
 333         status = rtas_call(ras_check_exception_token, 6, 1, NULL,
 334                            RTAS_VECTOR_EXTERNAL_INTERRUPT,
 335                            virq_to_hw(irq),
 336                            RTAS_EPOW_WARNING,
 337                            critical, __pa(&ras_log_buf),
 338                                 rtas_get_error_log_max());
 339
 340         log_error(ras_log_buf, ERR_TYPE_RTAS_LOG, 0);
 341
 342         rtas_parse_epow_errlog((struct rtas_error_log *)ras_log_buf);
 343
 344         spin_unlock(&ras_log_buf_lock);
 345         return IRQ_HANDLED;
 346 }
 347
 348 /*
 349  * Handle hardware error interrupts.
 350  *
 351  * RTAS check-exception is called to collect data on the exception.  If
 352  * the error is deemed recoverable, we log a warning and return.
 353  * For nonrecoverable errors, an error is logged and we stop all processing
 354  * as quickly as possible in order to prevent propagation of the failure.
 355  */
 356 static irqreturn_t ras_error_interrupt(int irq, void *dev_id)
 357 {
 358         struct rtas_error_log *rtas_elog;
 359         int status;
 360         int fatal;
 361
 362         spin_lock(&ras_log_buf_lock);
 363
 364         status = rtas_call(ras_check_exception_token, 6, 1, NULL,
 365                            RTAS_VECTOR_EXTERNAL_INTERRUPT,
 366                            virq_to_hw(irq),
 367                            RTAS_INTERNAL_ERROR, 1 /* Time Critical */,
 368                            __pa(&ras_log_buf),
 369                                 rtas_get_error_log_max());
 370
 371         rtas_elog = (struct rtas_error_log *)ras_log_buf;
 372
 373         if (status == 0 &&
 374             rtas_error_severity(rtas_elog) >= RTAS_SEVERITY_ERROR_SYNC)
 375                 fatal = 1;
 376         else
 377                 fatal = 0;
 378
 379         /* format and print the extended information */
 380         log_error(ras_log_buf, ERR_TYPE_RTAS_LOG, fatal);
 381
 382         if (fatal) {
 383                 pr_emerg("Fatal hardware error detected. Check RTAS error"
 384                          " log for details. Powering off immediately\n");
 385                 emergency_sync();
 386                 kernel_power_off();
 387         } else {
 388                 pr_err("Recoverable hardware error detected\n");
 389         }
 390
 391         spin_unlock(&ras_log_buf_lock);
 392         return IRQ_HANDLED;
 393 }
 394
 395 /*
 396  * Some versions of FWNMI place the buffer inside the 4kB page starting at
 397  * 0x7000. Other versions place it inside the rtas buffer. We check both.
 398  */
 399 #define VALID_FWNMI_BUFFER(A) \
 400         ((((A) >= 0x7000) && ((A) < 0x7ff0)) || \
 401         (((A) >= rtas.base) && ((A) < (rtas.base + rtas.size - 16))))
 402
 403 static inline struct rtas_error_log *fwnmi_get_errlog(void)
 404 {
 405         return (struct rtas_error_log *)local_paca->mce_data_buf;
 406 }
 407
 408 /*
 409  * Get the error information for errors coming through the
 410  * FWNMI vectors.  The pt_regs' r3 will be updated to reflect
 411  * the actual r3 if possible, and a ptr to the error log entry
 412  * will be returned if found.
 413  *
 414  * Use one buffer mce_data_buf per cpu to store RTAS error.
 415  *
 416  * The mce_data_buf does not have any locks or protection around it,
 417  * if a second machine check comes in, or a system reset is done
 418  * before we have logged the error, then we will get corruption in the
 419  * error log.  This is preferable over holding off on calling
 420  * ibm,nmi-interlock which would result in us checkstopping if a
 421  * second machine check did come in.
 422  */
 423 static struct rtas_error_log *fwnmi_get_errinfo(struct pt_regs *regs)
 424 {
 425         unsigned long *savep;
 426         struct rtas_error_log *h;
 427
 428         /* Mask top two bits */
 429         regs->gpr[3] &= ~(0x3UL << 62);
 430
 431         if (!VALID_FWNMI_BUFFER(regs->gpr[3])) {
 432                 printk(KERN_ERR "FWNMI: corrupt r3 0x%016lx\n", regs->gpr[3]);
 433                 return NULL;
 434         }
 435
 436         savep = __va(regs->gpr[3]);
 437         regs->gpr[3] = be64_to_cpu(savep[0]);   /* restore original r3 */
 438
 439         h = (struct rtas_error_log *)&savep[1];
 440         /* Use the per cpu buffer from paca to store rtas error log */
 441         memset(local_paca->mce_data_buf, 0, RTAS_ERROR_LOG_MAX);
 442         if (!rtas_error_extended(h)) {
 443                 memcpy(local_paca->mce_data_buf, h, sizeof(__u64));
 444         } else {
 445                 int len, error_log_length;
 446
 447                 error_log_length = 8 + rtas_error_extended_log_length(h);
 448                 len = min_t(int, error_log_length, RTAS_ERROR_LOG_MAX);
 449                 memcpy(local_paca->mce_data_buf, h, len);
 450         }
 451
 452         return (struct rtas_error_log *)local_paca->mce_data_buf;
 453 }
 454
 455 /* Call this when done with the data returned by FWNMI_get_errinfo.
 456  * It will release the saved data area for other CPUs in the
 457  * partition to receive FWNMI errors.
 458  */
 459 static void fwnmi_release_errinfo(void)
 460 {
 461         int ret = rtas_call(rtas_token("ibm,nmi-interlock"), 0, 1, NULL);
 462         if (ret != 0)
 463                 printk(KERN_ERR "FWNMI: nmi-interlock failed: %d\n", ret);
 464 }
 465
 466 int pSeries_system_reset_exception(struct pt_regs *regs)
 467 {
 468 #ifdef __LITTLE_ENDIAN__
 469         /*
 470          * Some firmware byteswaps SRR registers and gives incorrect SRR1. Try
 471          * to detect the bad SRR1 pattern here. Flip the NIP back to correct
 472          * endian for reporting purposes. Unfortunately the MSR can't be fixed,
 473          * so clear it. It will be missing MSR_RI so we won't try to recover.
 474          */
 475         if ((be64_to_cpu(regs->msr) &
 476                         (MSR_LE|MSR_RI|MSR_DR|MSR_IR|MSR_ME|MSR_PR|
 477                          MSR_ILE|MSR_HV|MSR_SF)) == (MSR_DR|MSR_SF)) {
 478                 regs->nip = be64_to_cpu((__be64)regs->nip);
 479                 regs->msr = 0;
 480         }
 481 #endif
 482
 483         if (fwnmi_active) {
 484                 struct rtas_error_log *errhdr = fwnmi_get_errinfo(regs);
 485                 if (errhdr) {
 486                         /* XXX Should look at FWNMI information */
 487                 }
 488                 fwnmi_release_errinfo();
 489         }
 490
 491         if (smp_handle_nmi_ipi(regs))
 492                 return 1;
 493
 494         return 0; /* need to perform reset */
 495 }
 496
 497
 498 static int mce_handle_error(struct pt_regs *regs, struct rtas_error_log *errp)
 499 {
 500         struct mce_error_info mce_err = { 0 };
 501         unsigned long eaddr = 0, paddr = 0;
 502         struct pseries_errorlog *pseries_log;
 503         struct pseries_mc_errorlog *mce_log;
 504         int disposition = rtas_error_disposition(errp);
 505         int initiator = rtas_error_initiator(errp);
 506         int severity = rtas_error_severity(errp);
 507         u8 error_type, err_sub_type;
 508
 509         if (initiator == RTAS_INITIATOR_UNKNOWN)
 510                 mce_err.initiator = MCE_INITIATOR_UNKNOWN;
 511         else if (initiator == RTAS_INITIATOR_CPU)
 512                 mce_err.initiator = MCE_INITIATOR_CPU;
 513         else if (initiator == RTAS_INITIATOR_PCI)
 514                 mce_err.initiator = MCE_INITIATOR_PCI;
 515         else if (initiator == RTAS_INITIATOR_ISA)
 516                 mce_err.initiator = MCE_INITIATOR_ISA;
 517         else if (initiator == RTAS_INITIATOR_MEMORY)
 518                 mce_err.initiator = MCE_INITIATOR_MEMORY;
 519         else if (initiator == RTAS_INITIATOR_POWERMGM)
 520                 mce_err.initiator = MCE_INITIATOR_POWERMGM;
 521         else
 522                 mce_err.initiator = MCE_INITIATOR_UNKNOWN;
 523
 524         if (severity == RTAS_SEVERITY_NO_ERROR)
 525                 mce_err.severity = MCE_SEV_NO_ERROR;
 526         else if (severity == RTAS_SEVERITY_EVENT)
 527                 mce_err.severity = MCE_SEV_WARNING;
 528         else if (severity == RTAS_SEVERITY_WARNING)
 529                 mce_err.severity = MCE_SEV_WARNING;
 530         else if (severity == RTAS_SEVERITY_ERROR_SYNC)
 531                 mce_err.severity = MCE_SEV_SEVERE;
 532         else if (severity == RTAS_SEVERITY_ERROR)
 533                 mce_err.severity = MCE_SEV_SEVERE;
 534         else if (severity == RTAS_SEVERITY_FATAL)
 535                 mce_err.severity = MCE_SEV_FATAL;
 536         else
 537                 mce_err.severity = MCE_SEV_FATAL;
 538
 539         if (severity <= RTAS_SEVERITY_ERROR_SYNC)
 540                 mce_err.sync_error = true;
 541         else
 542                 mce_err.sync_error = false;
 543
 544         mce_err.error_type = MCE_ERROR_TYPE_UNKNOWN;
 545         mce_err.error_class = MCE_ECLASS_UNKNOWN;
 546
 547         if (!rtas_error_extended(errp))
 548                 goto out;
 549
 550         pseries_log = get_pseries_errorlog(errp, PSERIES_ELOG_SECT_ID_MCE);
 551         if (pseries_log == NULL)
 552                 goto out;
 553
 554         mce_log = (struct pseries_mc_errorlog *)pseries_log->data;
 555         error_type = mce_log->error_type;
 556         err_sub_type = rtas_mc_error_sub_type(mce_log);
 557
 558         switch (mce_log->error_type) {
 559         case MC_ERROR_TYPE_UE:
 560                 mce_err.error_type = MCE_ERROR_TYPE_UE;
 561                 mce_common_process_ue(regs, &mce_err);
 562                 if (mce_err.ignore_event)
 563                         disposition = RTAS_DISP_FULLY_RECOVERED;
 564                 switch (err_sub_type) {
 565                 case MC_ERROR_UE_IFETCH:
 566                         mce_err.u.ue_error_type = MCE_UE_ERROR_IFETCH;
 567                         break;
 568                 case MC_ERROR_UE_PAGE_TABLE_WALK_IFETCH:
 569                         mce_err.u.ue_error_type = MCE_UE_ERROR_PAGE_TABLE_WALK_IFETCH;
 570                         break;
 571                 case MC_ERROR_UE_LOAD_STORE:
 572                         mce_err.u.ue_error_type = MCE_UE_ERROR_LOAD_STORE;
 573                         break;
 574                 case MC_ERROR_UE_PAGE_TABLE_WALK_LOAD_STORE:
 575                         mce_err.u.ue_error_type = MCE_UE_ERROR_PAGE_TABLE_WALK_LOAD_STORE;
 576                         break;
 577                 case MC_ERROR_UE_INDETERMINATE:
 578                 default:
 579                         mce_err.u.ue_error_type = MCE_UE_ERROR_INDETERMINATE;
 580                         break;
 581                 }
 582                 if (mce_log->sub_err_type & UE_EFFECTIVE_ADDR_PROVIDED)
 583                         eaddr = be64_to_cpu(mce_log->effective_address);
 584
 585                 if (mce_log->sub_err_type & UE_LOGICAL_ADDR_PROVIDED) {
 586                         paddr = be64_to_cpu(mce_log->logical_address);
 587                 } else if (mce_log->sub_err_type & UE_EFFECTIVE_ADDR_PROVIDED) {
 588                         unsigned long pfn;
 589
 590                         pfn = addr_to_pfn(regs, eaddr);
 591                         if (pfn != ULONG_MAX)
 592                                 paddr = pfn << PAGE_SHIFT;
 593                 }
 594
 595                 break;
 596         case MC_ERROR_TYPE_SLB:
 597                 mce_err.error_type = MCE_ERROR_TYPE_SLB;
 598                 switch (err_sub_type) {
 599                 case MC_ERROR_SLB_PARITY:
 600                         mce_err.u.slb_error_type = MCE_SLB_ERROR_PARITY;
 601                         break;
 602                 case MC_ERROR_SLB_MULTIHIT:
 603                         mce_err.u.slb_error_type = MCE_SLB_ERROR_MULTIHIT;
 604                         break;
 605                 case MC_ERROR_SLB_INDETERMINATE:
 606                 default:
 607                         mce_err.u.slb_error_type = MCE_SLB_ERROR_INDETERMINATE;
 608                         break;
 609                 }
 610                 if (mce_log->sub_err_type & 0x80)
 611                         eaddr = be64_to_cpu(mce_log->effective_address);
 612                 break;
 613         case MC_ERROR_TYPE_ERAT:
 614                 mce_err.error_type = MCE_ERROR_TYPE_ERAT;
 615                 switch (err_sub_type) {
 616                 case MC_ERROR_ERAT_PARITY:
 617                         mce_err.u.erat_error_type = MCE_ERAT_ERROR_PARITY;
 618                         break;
 619                 case MC_ERROR_ERAT_MULTIHIT:
 620                         mce_err.u.erat_error_type = MCE_ERAT_ERROR_MULTIHIT;
 621                         break;
 622                 case MC_ERROR_ERAT_INDETERMINATE:
 623                 default:
 624                         mce_err.u.erat_error_type = MCE_ERAT_ERROR_INDETERMINATE;
 625                         break;
 626                 }
 627                 if (mce_log->sub_err_type & 0x80)
 628                         eaddr = be64_to_cpu(mce_log->effective_address);
 629                 break;
 630         case MC_ERROR_TYPE_TLB:
 631                 mce_err.error_type = MCE_ERROR_TYPE_TLB;
 632                 switch (err_sub_type) {
 633                 case MC_ERROR_TLB_PARITY:
 634                         mce_err.u.tlb_error_type = MCE_TLB_ERROR_PARITY;
 635                         break;
 636                 case MC_ERROR_TLB_MULTIHIT:
 637                         mce_err.u.tlb_error_type = MCE_TLB_ERROR_MULTIHIT;
 638                         break;
 639                 case MC_ERROR_TLB_INDETERMINATE:
 640                 default:
 641                         mce_err.u.tlb_error_type = MCE_TLB_ERROR_INDETERMINATE;
 642                         break;
 643                 }
 644                 if (mce_log->sub_err_type & 0x80)
 645                         eaddr = be64_to_cpu(mce_log->effective_address);
 646                 break;
 647         case MC_ERROR_TYPE_D_CACHE:
 648                 mce_err.error_type = MCE_ERROR_TYPE_DCACHE;
 649                 break;
 650         case MC_ERROR_TYPE_I_CACHE:
 651                 mce_err.error_type = MCE_ERROR_TYPE_DCACHE;
 652                 break;
 653         case MC_ERROR_TYPE_UNKNOWN:
 654         default:
 655                 mce_err.error_type = MCE_ERROR_TYPE_UNKNOWN;
 656                 break;
 657         }
 658
 659 #ifdef CONFIG_PPC_BOOK3S_64
 660         if (disposition == RTAS_DISP_NOT_RECOVERED) {
 661                 switch (error_type) {
 662                 case    MC_ERROR_TYPE_SLB:
 663                 case    MC_ERROR_TYPE_ERAT:
 664                         /*
 665                          * Store the old slb content in paca before flushing.
 666                          * Print this when we go to virtual mode.
 667                          * There are chances that we may hit MCE again if there
 668                          * is a parity error on the SLB entry we trying to read
 669                          * for saving. Hence limit the slb saving to single
 670                          * level of recursion.
 671                          */
 672                         if (local_paca->in_mce == 1)
 673                                 slb_save_contents(local_paca->mce_faulty_slbs);
 674                         flush_and_reload_slb();
 675                         disposition = RTAS_DISP_FULLY_RECOVERED;
 676                         break;
 677                 default:
 678                         break;
 679                 }
 680         } else if (disposition == RTAS_DISP_LIMITED_RECOVERY) {
 681                 /* Platform corrected itself but could be degraded */
 682                 printk(KERN_ERR "MCE: limited recovery, system may "
 683                        "be degraded\n");
 684                 disposition = RTAS_DISP_FULLY_RECOVERED;
 685         }
 686 #endif
 687
 688 out:
 689         save_mce_event(regs, disposition == RTAS_DISP_FULLY_RECOVERED,
 690                         &mce_err, regs->nip, eaddr, paddr);
 691
 692         return disposition;
 693 }
 694
 695 /*
 696  * Process MCE rtas errlog event.
 697  */
 698 static void mce_process_errlog_event(struct irq_work *work)
 699 {
 700         struct rtas_error_log *err;
 701
 702         err = fwnmi_get_errlog();
 703         log_error((char *)err, ERR_TYPE_RTAS_LOG, 0);
 704 }
 705
 706 /*
 707  * See if we can recover from a machine check exception.
 708  * This is only called on power4 (or above) and only via
 709  * the Firmware Non-Maskable Interrupts (fwnmi) handler
 710  * which provides the error analysis for us.
 711  *
 712  * Return 1 if corrected (or delivered a signal).
 713  * Return 0 if there is nothing we can do.
 714  */
 715 static int recover_mce(struct pt_regs *regs, struct machine_check_event *evt)
 716 {
 717         int recovered = 0;
 718
 719         if (!(regs->msr & MSR_RI)) {
 720                 /* If MSR_RI isn't set, we cannot recover */
 721                 pr_err("Machine check interrupt unrecoverable: MSR(RI=0)\n");
 722                 recovered = 0;
 723         } else if (evt->disposition == MCE_DISPOSITION_RECOVERED) {
 724                 /* Platform corrected itself */
 725                 recovered = 1;
 726         } else if (evt->severity == MCE_SEV_FATAL) {
 727                 /* Fatal machine check */
 728                 pr_err("Machine check interrupt is fatal\n");
 729                 recovered = 0;
 730         }
 731
 732         if (!recovered && evt->sync_error) {
 733                 /*
 734                  * Try to kill processes if we get a synchronous machine check
 735                  * (e.g., one caused by execution of this instruction). This
 736                  * will devolve into a panic if we try to kill init or are in
 737                  * an interrupt etc.
 738                  *
 739                  * TODO: Queue up this address for hwpoisioning later.
 740                  * TODO: This is not quite right for d-side machine
 741                  *       checks ->nip is not necessarily the important
 742                  *       address.
 743                  */
 744                 if ((user_mode(regs))) {
 745                         _exception(SIGBUS, regs, BUS_MCEERR_AR, regs->nip);
 746                         recovered = 1;
 747                 } else if (die_will_crash()) {
 748                         /*
 749                          * die() would kill the kernel, so better to go via
 750                          * the platform reboot code that will log the
 751                          * machine check.
 752                          */
 753                         recovered = 0;
 754                 } else {
 755                         die("Machine check", regs, SIGBUS);
 756                         recovered = 1;
 757                 }
 758         }
 759
 760         return recovered;
 761 }
 762
 763 /*
 764  * Handle a machine check.
 765  *
 766  * Note that on Power 4 and beyond Firmware Non-Maskable Interrupts (fwnmi)
 767  * should be present.  If so the handler which called us tells us if the
 768  * error was recovered (never true if RI=0).
 769  *
 770  * On hardware prior to Power 4 these exceptions were asynchronous which
 771  * means we can't tell exactly where it occurred and so we can't recover.
 772  */
 773 int pSeries_machine_check_exception(struct pt_regs *regs)
 774 {
 775         struct machine_check_event evt;
 776
 777         if (!get_mce_event(&evt, MCE_EVENT_RELEASE))
 778                 return 0;
 779
 780         /* Print things out */
 781         if (evt.version != MCE_V1) {
 782                 pr_err("Machine Check Exception, Unknown event version %d !\n",
 783                        evt.version);
 784                 return 0;
 785         }
 786         machine_check_print_event_info(&evt, user_mode(regs), false);
 787
 788         if (recover_mce(regs, &evt))
 789                 return 1;
 790
 791         return 0;
 792 }
 793
 794 long pseries_machine_check_realmode(struct pt_regs *regs)
 795 {
 796         struct rtas_error_log *errp;
 797         int disposition;
 798
 799         if (fwnmi_active) {
 800                 errp = fwnmi_get_errinfo(regs);
 801                 /*
 802                  * Call to fwnmi_release_errinfo() in real mode causes kernel
 803                  * to panic. Hence we will call it as soon as we go into
 804                  * virtual mode.
 805                  */
 806                 disposition = mce_handle_error(regs, errp);
 807                 fwnmi_release_errinfo();
 808
 809                 /* Queue irq work to log this rtas event later. */
 810                 irq_work_queue(&mce_errlog_process_work);
 811
 812                 if (disposition == RTAS_DISP_FULLY_RECOVERED)
 813                         return 1;
 814         }
 815
 816         return 0;
 817 }