From c5077953b7d939f54c0ca39ff9f00b258885ec83 Mon Sep 17 00:00:00 2001 From: Juan Sebastian Hoyos Ayala Date: Tue, 27 Dec 2022 15:30:20 -0800 Subject: [PATCH 1/2] CreateDump: Only add pages with committed memory Creating a dump against some kernel implementations is committing empty pages with the page probing technique we currently use during memory enumeration. This change uses the `pagemap` API's in the `/proc` system if available - both regarding existence and permissions - along with the `maps` api to decide what pages contain relevant information. This results in smaller dumps and no increase in resident memory usage when collecting a dump. Co-authored-by: Eugene Zhirov --- src/coreclr/debug/createdump/crashinfo.cpp | 130 +++++++++++------- src/coreclr/debug/createdump/crashinfo.h | 6 +- .../debug/createdump/crashinfounix.cpp | 28 +++- 3 files changed, 106 insertions(+), 58 deletions(-) diff --git a/src/coreclr/debug/createdump/crashinfo.cpp b/src/coreclr/debug/createdump/crashinfo.cpp index d939592efe816..fe87a9a0ae19f 100644 --- a/src/coreclr/debug/createdump/crashinfo.cpp +++ b/src/coreclr/debug/createdump/crashinfo.cpp @@ -26,7 +26,7 @@ CrashInfo::CrashInfo(pid_t pid, bool gatherFrames, pid_t crashThread, uint32_t s m_task = 0; #else m_auxvValues.fill(0); - m_fd = -1; + m_fdMem = -1; #endif } @@ -627,73 +627,105 @@ CrashInfo::InsertMemoryBackedRegion(const MemoryRegion& region) void CrashInfo::InsertMemoryRegion(const MemoryRegion& region) { - // First check if the full memory region can be added without conflicts and is fully valid. - const auto& found = m_memoryRegions.find(region); - if (found == m_memoryRegions.end()) + // Check if the new region overlaps with the previously added ones + const auto& conflictingRegion = m_memoryRegions.find(region); + const bool hasConflict = conflictingRegion != m_memoryRegions.end(); + if (hasConflict && conflictingRegion->Contains(region)) { - // If the region is valid, add the full memory region - if (ValidRegion(region)) { - m_memoryRegions.insert(region); - return; - } - } - else - { - // If the memory region is wholly contained in region found and both have the - // same backed by memory state, we're done. - if (found->Contains(region) && (found->IsBackedByMemory() == region.IsBackedByMemory())) { - return; - } + // The region is contained in the one we added before + // Nothing to do + return; } - // Either part of the region was invalid, part of it hasn't been added or the backed - // by memory state is different. - uint64_t start = region.StartAddress(); - // The region overlaps/conflicts with one already in the set so add one page at a - // time to avoid the overlapping pages. + // Go page by page and split the region into valid sub-regions + uint64_t pageStart = region.StartAddress(); uint64_t numberPages = region.Size() / PAGE_SIZE; - - for (size_t p = 0; p < numberPages; p++, start += PAGE_SIZE) + uint64_t subRegionStart, subRegionEnd; + int pagesAdded = 0; + subRegionStart = subRegionEnd = pageStart; + for (size_t p = 0; p < numberPages; p++, pageStart += PAGE_SIZE) { - MemoryRegion memoryRegionPage(region.Flags(), start, start + PAGE_SIZE); + MemoryRegion page(region.Flags(), pageStart, pageStart + PAGE_SIZE); - const auto& found = m_memoryRegions.find(memoryRegionPage); - if (found == m_memoryRegions.end()) + // avoid searching for conflicts if we know we don't have one + const bool pageHasConflicts = hasConflict && m_memoryRegions.find(page) != m_memoryRegions.end(); + // avoid validating the page if it conflicts: we won't add it in any case + const bool pageIsValid = !pageHasConflicts && PageMappedToPhysicalMemory(pageStart) && PageCanBeRead(pageStart); + + if (pageIsValid) { - // All the single pages added here will be combined in CombineMemoryRegions() - if (ValidRegion(memoryRegionPage)) { - m_memoryRegions.insert(memoryRegionPage); - } + subRegionEnd = page.EndAddress(); + pagesAdded++; } - else { - assert(found->IsBackedByMemory() || !region.IsBackedByMemory()); + else + { + // the next page is not valid thus sub-region is complete + if (subRegionStart != subRegionEnd) + { + m_memoryRegions.insert(MemoryRegion(region.Flags(), subRegionStart, subRegionEnd)); + } + subRegionStart = subRegionEnd = page.EndAddress(); } } + // add the last sub-region if it's not empty + if (subRegionStart != subRegionEnd) + { + m_memoryRegions.insert(MemoryRegion(region.Flags(), subRegionStart, subRegionEnd)); + } } // -// Validates a memory region +// Check the page is really used by the application before adding it to the dump +// On some kernels reading a region from createdump results in committing this region in the parent application +// That leads to OOM in container environment and unnecesserally increses the size of the dump file +// However this is an optimization: if it fails we still try to add the page to the dump // bool -CrashInfo::ValidRegion(const MemoryRegion& region) +CrashInfo::PageMappedToPhysicalMemory(uint64_t start) { - if (region.IsBackedByMemory()) - { - uint64_t start = region.StartAddress(); - - uint64_t numberPages = region.Size() / PAGE_SIZE; - for (size_t p = 0; p < numberPages; p++, start += PAGE_SIZE) + #if !defined(__linux__) + // this check has not been implemented yet for other unix systems + return true; + #else + // https://www.kernel.org/doc/Documentation/vm/pagemap.txt + if (m_fdPagemap == -1) { - BYTE buffer[1]; - size_t read; + // Weren't able to open pagemap file, so don't run this check + // Expected on kernels 4.0 and 4.1 as we need CAP_SYS_ADMIN to open /proc/pid/pagemap + // On kernels after 4.2 we only need PTRACE_MODE_READ_FSCREDS as we are ok with zeroed PFNs + return true; + } - if (!ReadProcessMemory((void*)start, buffer, 1, &read)) - { - return false; - } + uint64_t pagemapOffset = (start / PAGE_SIZE) * sizeof(uint64_t); + uint64_t seekResult = lseek64(m_fdPagemap, (off64_t) pagemapOffset, SEEK_SET); + if (seekResult != pagemapOffset) + { + int seekErrno = errno; + TRACE("Seeking in pagemap file FAILED, addr: %" PRIA PRIx ", pagemap offset: %" PRIA PRIx ", ERRNO %d: %s\n", start, pagemapOffset, seekErrno, strerror(seekErrno)); + return true; } - } - return true; + uint64_t value; + size_t readResult = read(m_fdPagemap, (void*)&value, sizeof(value)); + if (readResult == (size_t) -1) + { + int readErrno = errno; + TRACE("Reading of pagemap file FAILED, addr: %" PRIA PRIx ", pagemap offset: %" PRIA PRIx ", size: %zu, ERRNO %d: %s\n", start, pagemapOffset, sizeof(value), readErrno, strerror(readErrno)); + return true; + } + + bool is_page_present = (value & ((uint64_t)1 << 63)) != 0; + bool is_page_swapped = (value & ((uint64_t)1 << 62)) != 0; + TRACE_VERBOSE("Pagemap value for %" PRIA PRIx ", pagemap offset %" PRIA PRIx " is %" PRIA PRIx " -> %s\n", start, pagemapOffset, value, is_page_present ? "in memory" : (is_page_swapped ? "in swap" : "NOT in memory")); + return is_page_present || is_page_swapped; + #endif +} + +bool +CrashInfo::PageCanBeRead(uint64_t start) +{ + BYTE buffer[1]; + size_t read; + return ReadProcessMemory((void*)start, buffer, 1, &read); } // diff --git a/src/coreclr/debug/createdump/crashinfo.h b/src/coreclr/debug/createdump/crashinfo.h index e100b7f216102..e2407e896e45c 100644 --- a/src/coreclr/debug/createdump/crashinfo.h +++ b/src/coreclr/debug/createdump/crashinfo.h @@ -57,7 +57,8 @@ class CrashInfo : public ICLRDataEnumMemoryRegionsCallback, vm_map_t m_task; // the mach task for the process #else bool m_canUseProcVmReadSyscall; - int m_fd; // /proc//mem handle + int m_fdMem; // /proc//mem handle + int m_fdPagemap; // /proc//pagemap handle #endif std::string m_coreclrPath; // the path of the coreclr module or empty if none #ifdef __APPLE__ @@ -147,8 +148,9 @@ class CrashInfo : public ICLRDataEnumMemoryRegionsCallback, void InsertMemoryBackedRegion(const MemoryRegion& region); void InsertMemoryRegion(const MemoryRegion& region); uint32_t GetMemoryRegionFlags(uint64_t start); - bool ValidRegion(const MemoryRegion& region); void CombineMemoryRegions(); + bool PageCanBeRead(uint64_t start); + bool PageMappedToPhysicalMemory(uint64_t start); void Trace(const char* format, ...); void TraceVerbose(const char* format, ...); }; diff --git a/src/coreclr/debug/createdump/crashinfounix.cpp b/src/coreclr/debug/createdump/crashinfounix.cpp index 03712c3b6847d..05e4496a91389 100644 --- a/src/coreclr/debug/createdump/crashinfounix.cpp +++ b/src/coreclr/debug/createdump/crashinfounix.cpp @@ -17,12 +17,21 @@ CrashInfo::Initialize() char memPath[128]; _snprintf_s(memPath, sizeof(memPath), sizeof(memPath), "/proc/%lu/mem", m_pid); - m_fd = open(memPath, O_RDONLY); - if (m_fd == -1) + m_fdMem = open(memPath, O_RDONLY); + if (m_fdMem == -1) { printf_error("open(%s) FAILED %d (%s)\n", memPath, errno, strerror(errno)); return false; } + + char pagemapPath[128]; + _snprintf_s(pagemapPath, sizeof(pagemapPath), sizeof(pagemapPath), "/proc/%lu/pagemap", m_pid); + m_fdPagemap = open(pagemapPath, O_RDONLY); + if (m_fdPagemap == -1) + { + TRACE("open(%s) FAILED %d (%s), will fallback to dumping all memory regions without checking if they are committed\n", pagemapPath, errno, strerror(errno)); + } + // Get the process info if (!GetStatus(m_pid, &m_ppid, &m_tgid, &m_name)) { @@ -45,10 +54,15 @@ CrashInfo::CleanupAndResumeProcess() waitpid(thread->Tid(), &waitStatus, __WALL); } } - if (m_fd != -1) + if (m_fdMem != -1) + { + close(m_fdMem); + m_fdMem = -1; + } + if (m_fdPagemap != -1) { - close(m_fd); - m_fd = -1; + close(m_fdPagemap); + m_fdPagemap = -1; } } @@ -394,8 +408,8 @@ CrashInfo::ReadProcessMemory(void* address, void* buffer, size_t size, size_t* r // After all, the use of process_vm_readv is largely as a // performance optimization. m_canUseProcVmReadSyscall = false; - assert(m_fd != -1); - *read = pread64(m_fd, buffer, size, (off64_t)address); + assert(m_fdMem != -1); + *read = pread64(m_fdMem, buffer, size, (off64_t)address); } if (*read == (size_t)-1) From 27ff531370a2127cb8b777f95694a50a58703cd8 Mon Sep 17 00:00:00 2001 From: Juan Sebastian Hoyos Ayala Date: Tue, 27 Dec 2022 16:41:11 -0800 Subject: [PATCH 2/2] Add environment variable DbgDisablePagemapUse to disable pagemap use --- src/coreclr/debug/createdump/crashinfo.cpp | 2 +- .../debug/createdump/crashinfounix.cpp | 19 ++++++++++++++----- 2 files changed, 15 insertions(+), 6 deletions(-) diff --git a/src/coreclr/debug/createdump/crashinfo.cpp b/src/coreclr/debug/createdump/crashinfo.cpp index fe87a9a0ae19f..7c60334e981fc 100644 --- a/src/coreclr/debug/createdump/crashinfo.cpp +++ b/src/coreclr/debug/createdump/crashinfo.cpp @@ -690,7 +690,7 @@ CrashInfo::PageMappedToPhysicalMemory(uint64_t start) // https://www.kernel.org/doc/Documentation/vm/pagemap.txt if (m_fdPagemap == -1) { - // Weren't able to open pagemap file, so don't run this check + // Weren't able to open pagemap file or the user explicitly disabled it, so don't run this check. // Expected on kernels 4.0 and 4.1 as we need CAP_SYS_ADMIN to open /proc/pid/pagemap // On kernels after 4.2 we only need PTRACE_MODE_READ_FSCREDS as we are ok with zeroed PFNs return true; diff --git a/src/coreclr/debug/createdump/crashinfounix.cpp b/src/coreclr/debug/createdump/crashinfounix.cpp index 05e4496a91389..4723b7c253d11 100644 --- a/src/coreclr/debug/createdump/crashinfounix.cpp +++ b/src/coreclr/debug/createdump/crashinfounix.cpp @@ -24,12 +24,21 @@ CrashInfo::Initialize() return false; } - char pagemapPath[128]; - _snprintf_s(pagemapPath, sizeof(pagemapPath), sizeof(pagemapPath), "/proc/%lu/pagemap", m_pid); - m_fdPagemap = open(pagemapPath, O_RDONLY); - if (m_fdPagemap == -1) + char* disablePagemapUse = getenv("COMPlus_DbgDisablePagemapUse"); + if (disablePagemapUse != nullptr && strcmp(disablePagemapUse, "1") == 0) { - TRACE("open(%s) FAILED %d (%s), will fallback to dumping all memory regions without checking if they are committed\n", pagemapPath, errno, strerror(errno)); + TRACE("DbgDisablePagemapUse detected, will skip checking page for committed memory in memory enumeration.\n"); + m_fdPagemap = -1; + } + else + { + char pagemapPath[128]; + _snprintf_s(pagemapPath, sizeof(pagemapPath), sizeof(pagemapPath), "/proc/%lu/pagemap", m_pid); + m_fdPagemap = open(pagemapPath, O_RDONLY); + if (m_fdPagemap == -1) + { + TRACE("open(%s) FAILED %d (%s), will fallback to dumping all memory regions without checking if they are committed\n", pagemapPath, errno, strerror(errno)); + } } // Get the process info