From 82360a3730e650ae082310a754e108b882f28070 Mon Sep 17 00:00:00 2001 From: Naoya Horiguchi Date: Fri, 8 May 2015 16:33:13 +0900 Subject: [PATCH] mm/memory-failure: introduce get_hwpoison_page() for consistent refcount handling memory_failrue() can run in 2 different mode (specified by MF_COUNT_INCREASED) in page refcount perspective. If MF_COUNT_INCREASED is set, memory_failrue() assumes that the caller takes a refcount of the target page. And if cleared, memory_failure() takes it in it's own. In current code, however, refcounting is done differently in each caller. For example, madvise_hwpoison() uses get_user_pages_fast() and hwpoison_inject() uses get_page_unless_zero(). Calling get_page_unless_zero() on thp tail pages is unexpected and lead to unexpected behavior. n't work for thp tail pages. As a result, memory_failure() can't handle properly refcounting. So this patch introduces get_hwpoison_page() to handle thp tail pages. There's non-trivial change in unpoison_memory(), which returns on thp with "MCE: Memory failure is now running on %#lx\n" message. This is not right when split_huge_page() fails. So this patch also allows unpoison_memory() to run on thp. Signed-off-by: Naoya Horiguchi --- include/linux/mm.h | 1 + mm/hwpoison-inject.c | 4 ++-- mm/memory-failure.c | 49 ++++++++++++++++++++++++++++---------------- 3 files changed, 34 insertions(+), 20 deletions(-) diff --git a/include/linux/mm.h b/include/linux/mm.h index 47a93928b90fff..84b08e40adfab0 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -2226,6 +2226,7 @@ enum mf_flags { extern int memory_failure(unsigned long pfn, int trapno, int flags); extern void memory_failure_queue(unsigned long pfn, int trapno, int flags); extern int unpoison_memory(unsigned long pfn); +extern int get_hwpoison_page(struct page *page); extern int sysctl_memory_failure_early_kill; extern int sysctl_memory_failure_recovery; extern void shake_page(struct page *p, int access); diff --git a/mm/hwpoison-inject.c b/mm/hwpoison-inject.c index 4ca5fe0042e17c..bf73ac17dad424 100644 --- a/mm/hwpoison-inject.c +++ b/mm/hwpoison-inject.c @@ -28,7 +28,7 @@ static int hwpoison_inject(void *data, u64 val) /* * This implies unable to support free buddy pages. */ - if (!get_page_unless_zero(hpage)) + if (!get_hwpoison_page(p)) return 0; if (!hwpoison_filter_enable) @@ -58,7 +58,7 @@ static int hwpoison_inject(void *data, u64 val) pr_info("Injecting memory failure at pfn %#lx\n", pfn); return memory_failure(pfn, 18, MF_COUNT_INCREASED); put_out: - put_page(hpage); + put_page(p); return 0; } diff --git a/mm/memory-failure.c b/mm/memory-failure.c index 3f411002cdaf4f..91a5193f9f9fc4 100644 --- a/mm/memory-failure.c +++ b/mm/memory-failure.c @@ -872,6 +872,27 @@ static int page_action(struct page_state *ps, struct page *p, return (result == RECOVERED || result == DELAYED) ? 0 : -EBUSY; } +/* + * Get refcount for memory error handling: + * - @page: raw page + */ +inline int get_hwpoison_page(struct page *page) { + struct page *head = compound_head(page); + + if (PageHuge(head)) + return get_page_unless_zero(head); + else if (PageTransHuge(head)) + if (get_page_unless_zero(head)) { + if (PageTail(page)) + get_page(page); + return 1; + } else { + return 0; + } + else + return get_page_unless_zero(page); +} + /* * Do all that is necessary to remove user space mappings. Unmap * the pages and send SIGBUS to the processes if the data was dirty. @@ -1054,8 +1075,7 @@ int memory_failure(unsigned long pfn, int trapno, int flags) * In fact it's dangerous to directly bump up page count from 0, * that may make page_freeze_refs()/page_unfreeze_refs() mismatch. */ - if (!(flags & MF_COUNT_INCREASED) && - !get_page_unless_zero(hpage)) { + if (!(flags & MF_COUNT_INCREASED) && !get_hwpoison_page(p)) { if (is_free_buddy_page(p)) { action_result(pfn, "free buddy", DELAYED); return 0; @@ -1363,19 +1383,12 @@ int unpoison_memory(unsigned long pfn) return 0; } - /* - * unpoison_memory() can encounter thp only when the thp is being - * worked by memory_failure() and the page lock is not held yet. - * In such case, we yield to memory_failure() and make unpoison fail. - */ - if (!PageHuge(page) && PageTransHuge(page)) { - pr_info("MCE: Memory failure is now running on %#lx\n", pfn); - return 0; - } - - nr_pages = 1 << compound_order(page); + if (PageHuge(page)) + nr_pages = 1 << compound_order(page); + else + nr_pages = 1; - if (!get_page_unless_zero(page)) { + if (!get_hwpoison_page(p)) { /* * Since HWPoisoned hugepage should have non-zero refcount, * race between memory failure and unpoison seems to happen. @@ -1399,7 +1412,7 @@ int unpoison_memory(unsigned long pfn) * the PG_hwpoison page will be caught and isolated on the entrance to * the free buddy page pool. */ - if (TestClearPageHWPoison(page)) { + if (TestClearPageHWPoison(p)) { pr_info("MCE: Software-unpoisoned page %#lx\n", pfn); atomic_long_sub(nr_pages, &num_poisoned_pages); freeit = 1; @@ -1408,9 +1421,9 @@ int unpoison_memory(unsigned long pfn) } unlock_page(page); - put_page(page); + put_page(p); if (freeit && !(pfn == my_zero_pfn(0) && page_count(p) == 1)) - put_page(page); + put_page(p); return 0; } @@ -1443,7 +1456,7 @@ static int __get_any_page(struct page *p, unsigned long pfn, int flags) * When the target page is a free hugepage, just remove it * from free hugepage list. */ - if (!get_page_unless_zero(compound_head(p))) { + if (!get_hwpoison_page(p)) { if (PageHuge(p)) { pr_info("%s: %#lx free huge page\n", __func__, pfn); ret = 0;