Skip to content

Commit

Permalink
riscv: mm: Add memory hotplugging support
Browse files Browse the repository at this point in the history
For an architecture to support memory hotplugging, a couple of
callbacks needs to be implemented:

 arch_add_memory()
  This callback is responsible for adding the physical memory into the
  direct map, and call into the memory hotplugging generic code via
  __add_pages() that adds the corresponding struct page entries, and
  updates the vmemmap mapping.

 arch_remove_memory()
  This is the inverse of the callback above.

 vmemmap_free()
  This function tears down the vmemmap mappings (if
  CONFIG_SPARSEMEM_VMEMMAP is enabled), and also deallocates the
  backing vmemmap pages. Note that for persistent memory, an
  alternative allocator for the backing pages can be used; The
  vmem_altmap. This means that when the backing pages are cleared,
  extra care is needed so that the correct deallocation method is
  used.

 arch_get_mappable_range()
  This functions returns the PA range that the direct map can map.
  Used by the MHP internals for sanity checks.

The page table unmap/teardown functions are heavily based on code from
the x86 tree. The same remove_pgd_mapping() function is used in both
vmemmap_free() and arch_remove_memory(), but in the latter function
the backing pages are not removed.

Signed-off-by: Björn Töpel <bjorn@rivosinc.com>
Reviewed-by: Alexandre Ghiti <alexghiti@rivosinc.com>
Link: https://lore.kernel.org/r/20240605114100.315918-7-bjorn@kernel.org
Signed-off-by: Palmer Dabbelt <palmer@rivosinc.com>
  • Loading branch information
bjorn-rivos authored and palmer-dabbelt committed Jun 26, 2024
1 parent 6e6c5e2 commit c75a74f
Showing 1 changed file with 267 additions and 0 deletions.
267 changes: 267 additions & 0 deletions arch/riscv/mm/init.c
Original file line number Diff line number Diff line change
Expand Up @@ -1533,3 +1533,270 @@ struct execmem_info __init *execmem_arch_setup(void)
}
#endif /* CONFIG_MMU */
#endif /* CONFIG_EXECMEM */

#ifdef CONFIG_MEMORY_HOTPLUG
static void __meminit free_pte_table(pte_t *pte_start, pmd_t *pmd)
{
struct page *page = pmd_page(*pmd);
struct ptdesc *ptdesc = page_ptdesc(page);
pte_t *pte;
int i;

for (i = 0; i < PTRS_PER_PTE; i++) {
pte = pte_start + i;
if (!pte_none(*pte))
return;
}

pagetable_pte_dtor(ptdesc);
if (PageReserved(page))
free_reserved_page(page);
else
pagetable_free(ptdesc);
pmd_clear(pmd);
}

static void __meminit free_pmd_table(pmd_t *pmd_start, pud_t *pud)
{
struct page *page = pud_page(*pud);
struct ptdesc *ptdesc = page_ptdesc(page);
pmd_t *pmd;
int i;

for (i = 0; i < PTRS_PER_PMD; i++) {
pmd = pmd_start + i;
if (!pmd_none(*pmd))
return;
}

pagetable_pmd_dtor(ptdesc);
if (PageReserved(page))
free_reserved_page(page);
else
pagetable_free(ptdesc);
pud_clear(pud);
}

static void __meminit free_pud_table(pud_t *pud_start, p4d_t *p4d)
{
struct page *page = p4d_page(*p4d);
pud_t *pud;
int i;

for (i = 0; i < PTRS_PER_PUD; i++) {
pud = pud_start + i;
if (!pud_none(*pud))
return;
}

if (PageReserved(page))
free_reserved_page(page);
else
free_pages((unsigned long)page_address(page), 0);
p4d_clear(p4d);
}

static void __meminit free_vmemmap_storage(struct page *page, size_t size,
struct vmem_altmap *altmap)
{
int order = get_order(size);

if (altmap) {
vmem_altmap_free(altmap, size >> PAGE_SHIFT);
return;
}

if (PageReserved(page)) {
unsigned int nr_pages = 1 << order;

while (nr_pages--)
free_reserved_page(page++);
return;
}

free_pages((unsigned long)page_address(page), order);
}

static void __meminit remove_pte_mapping(pte_t *pte_base, unsigned long addr, unsigned long end,
bool is_vmemmap, struct vmem_altmap *altmap)
{
unsigned long next;
pte_t *ptep, pte;

for (; addr < end; addr = next) {
next = (addr + PAGE_SIZE) & PAGE_MASK;
if (next > end)
next = end;

ptep = pte_base + pte_index(addr);
pte = ptep_get(ptep);
if (!pte_present(*ptep))
continue;

pte_clear(&init_mm, addr, ptep);
if (is_vmemmap)
free_vmemmap_storage(pte_page(pte), PAGE_SIZE, altmap);
}
}

static void __meminit remove_pmd_mapping(pmd_t *pmd_base, unsigned long addr, unsigned long end,
bool is_vmemmap, struct vmem_altmap *altmap)
{
unsigned long next;
pte_t *pte_base;
pmd_t *pmdp, pmd;

for (; addr < end; addr = next) {
next = pmd_addr_end(addr, end);
pmdp = pmd_base + pmd_index(addr);
pmd = pmdp_get(pmdp);
if (!pmd_present(pmd))
continue;

if (pmd_leaf(pmd)) {
pmd_clear(pmdp);
if (is_vmemmap)
free_vmemmap_storage(pmd_page(pmd), PMD_SIZE, altmap);
continue;
}

pte_base = (pte_t *)pmd_page_vaddr(*pmdp);
remove_pte_mapping(pte_base, addr, next, is_vmemmap, altmap);
free_pte_table(pte_base, pmdp);
}
}

static void __meminit remove_pud_mapping(pud_t *pud_base, unsigned long addr, unsigned long end,
bool is_vmemmap, struct vmem_altmap *altmap)
{
unsigned long next;
pud_t *pudp, pud;
pmd_t *pmd_base;

for (; addr < end; addr = next) {
next = pud_addr_end(addr, end);
pudp = pud_base + pud_index(addr);
pud = pudp_get(pudp);
if (!pud_present(pud))
continue;

if (pud_leaf(pud)) {
if (pgtable_l4_enabled) {
pud_clear(pudp);
if (is_vmemmap)
free_vmemmap_storage(pud_page(pud), PUD_SIZE, altmap);
}
continue;
}

pmd_base = pmd_offset(pudp, 0);
remove_pmd_mapping(pmd_base, addr, next, is_vmemmap, altmap);

if (pgtable_l4_enabled)
free_pmd_table(pmd_base, pudp);
}
}

static void __meminit remove_p4d_mapping(p4d_t *p4d_base, unsigned long addr, unsigned long end,
bool is_vmemmap, struct vmem_altmap *altmap)
{
unsigned long next;
p4d_t *p4dp, p4d;
pud_t *pud_base;

for (; addr < end; addr = next) {
next = p4d_addr_end(addr, end);
p4dp = p4d_base + p4d_index(addr);
p4d = p4dp_get(p4dp);
if (!p4d_present(p4d))
continue;

if (p4d_leaf(p4d)) {
if (pgtable_l5_enabled) {
p4d_clear(p4dp);
if (is_vmemmap)
free_vmemmap_storage(p4d_page(p4d), P4D_SIZE, altmap);
}
continue;
}

pud_base = pud_offset(p4dp, 0);
remove_pud_mapping(pud_base, addr, next, is_vmemmap, altmap);

if (pgtable_l5_enabled)
free_pud_table(pud_base, p4dp);
}
}

static void __meminit remove_pgd_mapping(unsigned long va, unsigned long end, bool is_vmemmap,
struct vmem_altmap *altmap)
{
unsigned long addr, next;
p4d_t *p4d_base;
pgd_t *pgd;

for (addr = va; addr < end; addr = next) {
next = pgd_addr_end(addr, end);
pgd = pgd_offset_k(addr);

if (!pgd_present(*pgd))
continue;

if (pgd_leaf(*pgd))
continue;

p4d_base = p4d_offset(pgd, 0);
remove_p4d_mapping(p4d_base, addr, next, is_vmemmap, altmap);
}

flush_tlb_all();
}

static void __meminit remove_linear_mapping(phys_addr_t start, u64 size)
{
unsigned long va = (unsigned long)__va(start);
unsigned long end = (unsigned long)__va(start + size);

remove_pgd_mapping(va, end, false, NULL);
}

struct range arch_get_mappable_range(void)
{
struct range mhp_range;

mhp_range.start = __pa(PAGE_OFFSET);
mhp_range.end = __pa(PAGE_END - 1);
return mhp_range;
}

int __ref arch_add_memory(int nid, u64 start, u64 size, struct mhp_params *params)
{
int ret = 0;

create_linear_mapping_range(start, start + size, 0, &params->pgprot);
ret = __add_pages(nid, start >> PAGE_SHIFT, size >> PAGE_SHIFT, params);
if (ret) {
remove_linear_mapping(start, size);
goto out;
}

max_pfn = PFN_UP(start + size);
max_low_pfn = max_pfn;

out:
flush_tlb_all();
return ret;
}

void __ref arch_remove_memory(u64 start, u64 size, struct vmem_altmap *altmap)
{
__remove_pages(start >> PAGE_SHIFT, size >> PAGE_SHIFT, altmap);
remove_linear_mapping(start, size);
flush_tlb_all();
}

void __ref vmemmap_free(unsigned long start, unsigned long end, struct vmem_altmap *altmap)
{
remove_pgd_mapping(start, end, true, altmap);
}
#endif /* CONFIG_MEMORY_HOTPLUG */

0 comments on commit c75a74f

Please sign in to comment.