forked from Freescale/linux-fslc
-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
nfit: do an ARS scrub on hitting a latent media error
When a latent (unknown to 'badblocks') error is encountered, it will trigger a machine check exception. On a system with machine check recovery, this will only SIGBUS the process(es) which had the bad page mapped (as opposed to a kernel panic on platforms without machine check recovery features). In the former case, we want to trigger a full rescan of that nvdimm bus. This will allow any additional, new errors to be captured in the block devices' badblocks lists, and offending operations on them can be trapped early, avoiding machine checks. This is done by registering a callback function with the x86_mce_decoder_chain and calling the new ars_rescan functionality with the address in the mce notificatiion. Cc: Rafael J. Wysocki <rafael.j.wysocki@intel.com> Cc: Tony Luck <tony.luck@intel.com> Signed-off-by: Vishal Verma <vishal.l.verma@intel.com> Signed-off-by: Dan Williams <dan.j.williams@intel.com>
- Loading branch information
1 parent
bdf9701
commit 6839a6d
Showing
5 changed files
with
133 additions
and
4 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,2 +1,3 @@ | ||
obj-$(CONFIG_ACPI_NFIT) := nfit.o | ||
nfit-y := core.o | ||
nfit-$(CONFIG_X86_MCE) += mce.o |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,89 @@ | ||
/* | ||
* NFIT - Machine Check Handler | ||
* | ||
* Copyright(c) 2013-2016 Intel Corporation. All rights reserved. | ||
* | ||
* This program is free software; you can redistribute it and/or modify | ||
* it under the terms of version 2 of the GNU General Public License as | ||
* published by the Free Software Foundation. | ||
* | ||
* This program is distributed in the hope that it will be useful, but | ||
* WITHOUT ANY WARRANTY; without even the implied warranty of | ||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
* General Public License for more details. | ||
*/ | ||
#include <linux/notifier.h> | ||
#include <linux/acpi.h> | ||
#include <asm/mce.h> | ||
#include "nfit.h" | ||
|
||
static int nfit_handle_mce(struct notifier_block *nb, unsigned long val, | ||
void *data) | ||
{ | ||
struct mce *mce = (struct mce *)data; | ||
struct acpi_nfit_desc *acpi_desc; | ||
struct nfit_spa *nfit_spa; | ||
|
||
/* We only care about memory errors */ | ||
if (!(mce->status & MCACOD)) | ||
return NOTIFY_DONE; | ||
|
||
/* | ||
* mce->addr contains the physical addr accessed that caused the | ||
* machine check. We need to walk through the list of NFITs, and see | ||
* if any of them matches that address, and only then start a scrub. | ||
*/ | ||
mutex_lock(&acpi_desc_lock); | ||
list_for_each_entry(acpi_desc, &acpi_descs, list) { | ||
struct device *dev = acpi_desc->dev; | ||
int found_match = 0; | ||
|
||
mutex_lock(&acpi_desc->init_mutex); | ||
list_for_each_entry(nfit_spa, &acpi_desc->spas, list) { | ||
struct acpi_nfit_system_address *spa = nfit_spa->spa; | ||
|
||
if (nfit_spa_type(spa) == NFIT_SPA_PM) | ||
continue; | ||
/* find the spa that covers the mce addr */ | ||
if (spa->address > mce->addr) | ||
continue; | ||
if ((spa->address + spa->length - 1) < mce->addr) | ||
continue; | ||
found_match = 1; | ||
dev_dbg(dev, "%s: addr in SPA %d (0x%llx, 0x%llx)\n", | ||
__func__, spa->range_index, spa->address, | ||
spa->length); | ||
/* | ||
* We can break at the first match because we're going | ||
* to rescan all the SPA ranges. There shouldn't be any | ||
* aliasing anyway. | ||
*/ | ||
break; | ||
} | ||
mutex_unlock(&acpi_desc->init_mutex); | ||
|
||
/* | ||
* We can ignore an -EBUSY here because if an ARS is already | ||
* in progress, just let that be the last authoritative one | ||
*/ | ||
if (found_match) | ||
acpi_nfit_ars_rescan(acpi_desc); | ||
} | ||
|
||
mutex_unlock(&acpi_desc_lock); | ||
return NOTIFY_DONE; | ||
} | ||
|
||
static struct notifier_block nfit_mce_dec = { | ||
.notifier_call = nfit_handle_mce, | ||
}; | ||
|
||
void nfit_mce_register(void) | ||
{ | ||
mce_register_decode_chain(&nfit_mce_dec); | ||
} | ||
|
||
void nfit_mce_unregister(void) | ||
{ | ||
mce_unregister_decode_chain(&nfit_mce_dec); | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters