Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

custom memmove implementation proposal. #593

Merged
merged 1 commit into from
Nov 16, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
48 changes: 48 additions & 0 deletions src/snmalloc/global/memcpy.h
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,16 @@ namespace snmalloc
}
}

template<size_t Size, size_t PrefetchOffset = 0>
SNMALLOC_FAST_PATH_INLINE void
block_reverse_copy(void* dst, const void* src, size_t len)
{
for (size_t i = (len - 1); int64_t(i + Size) >= 0; i -= Size)
{
copy_one<Size>(pointer_offset(dst, i), pointer_offset(src, i));
}
}

/**
* Perform an overlapping copy of the end. This will copy one (potentially
* unaligned) `T` from the end of the source to the end of the destination.
Expand Down Expand Up @@ -459,4 +469,42 @@ namespace snmalloc
Arch::copy(dst, src, len);
return orig_dst;
}

template<
bool Checked,
bool ReadsChecked = CheckReads,
typename Arch = DefaultArch>
SNMALLOC_FAST_PATH_INLINE void*
memmove(void* dst, const void* src, size_t len)
{
auto orig_dst = dst;
// we don't need to do external
// pointer checks if we hit it. It's also the fastest case, to encourage
// the compiler to favour the other cases.
if (SNMALLOC_UNLIKELY(len == 0 || dst == src))
{
return dst;
}

// Check the bounds of the arguments.
if (SNMALLOC_UNLIKELY(!check_bounds<(Checked && ReadsChecked)>(src, len)))
return report_fatal_bounds_error(
src, len, "memmove with source out of bounds of heap allocation");
if (SNMALLOC_UNLIKELY(!check_bounds<Checked>(dst, len)))
return report_fatal_bounds_error(
dst, len, "memmove with destination out of bounds of heap allocation");

if ((address_cast(dst) - address_cast(src)) < len)
{
// slow 'safe' reverse copy, we avoid optimised rollouts
// to cope with typical memmove use cases, moving
// one element to another address within the same
// contiguous space.
block_reverse_copy<1>(dst, src, len);
return dst;
}

Arch::copy(dst, src, len);
return orig_dst;
}
} // namespace snmalloc
9 changes: 9 additions & 0 deletions src/snmalloc/override/memcpy.cc
Original file line number Diff line number Diff line change
Expand Up @@ -10,4 +10,13 @@ extern "C"
{
return snmalloc::memcpy<true>(dst, src, len);
}

/**
* Snmalloc checked memmove.
*/
SNMALLOC_EXPORT void*
SNMALLOC_NAME_MANGLE(memmove)(void* dst, const void* src, size_t len)
{
return snmalloc::memmove<true>(dst, src, len);
}
}
79 changes: 76 additions & 3 deletions src/test/func/memcpy/func-memcpy.cc
Original file line number Diff line number Diff line change
Expand Up @@ -68,15 +68,24 @@ extern "C" void abort()
* fills one with a well-known pattern, and then copies subsets of this at
* one-byte increments to a target. This gives us unaligned starts.
*/
template<bool overlap>
void check_size(size_t size)
{
START_TEST("checking {}-byte memcpy", size);
if constexpr (!overlap)
{
START_TEST("checking {}-byte memcpy", size);
}
else
{
START_TEST("checking {}-byte memmove", size);
}
auto* s = static_cast<unsigned char*>(my_malloc(size + 1));
auto* d = static_cast<unsigned char*>(my_malloc(size + 1));
d[size] = 0;
s[size] = 255;
for (size_t start = 0; start < size; start++)
{
void* ret;
unsigned char* src = s + start;
unsigned char* dst = d + start;
size_t sz = (size - start);
Expand All @@ -88,7 +97,14 @@ void check_size(size_t size)
{
dst[i] = 0;
}
void* ret = my_memcpy(dst, src, sz);
if constexpr (!overlap)
{
ret = my_memcpy(dst, src, sz);
}
else
{
ret = my_memmove(dst, src, sz);
}
EXPECT(ret == dst, "Return value should be {}, was {}", dst, ret);
for (size_t i = 0; i < sz; ++i)
{
Expand Down Expand Up @@ -147,6 +163,50 @@ void check_bounds(size_t size, size_t out_of_bounds)
my_free(d);
}

void check_overlaps1()
{
size_t size = 16;
START_TEST("memmove overlaps1");
auto* s = static_cast<unsigned int*>(my_malloc(size * sizeof(unsigned int)));
for (size_t i = 0; i < size; ++i)
{
s[i] = static_cast<unsigned int>(i);
}
my_memmove(&s[2], &s[4], sizeof(s[0]));
EXPECT(s[2] == s[4], "overlap error: {} {}", s[2], s[4]);
my_memmove(&s[15], &s[5], sizeof(s[0]));
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Isn't s[15] out of bounds when size == 8?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ah right I forgot to change in the beginning the sizes set were fixed :-)

EXPECT(s[15] == s[5], "overlap error: {} {}", s[15], s[5]);
auto ptr = s;
my_memmove(ptr, s, size * sizeof(s[0]));
EXPECT(ptr == s, "overlap error: {} {}", ptr, s);
my_free(s);
}

template<bool after>
void check_overlaps2(size_t size)
{
START_TEST("memmove overlaps2, size {}", size);
auto sz = size / 2;
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I wonder if it would be good to separate the offset from the size being copied. The current test only covers very small overlaps.

Suggested change
auto sz = size / 2;
auto sz = size / 2;
auto offset = size / 3;

Then use offset in the calculations using after?

auto offset = size / 2;
auto* s = static_cast<unsigned int*>(my_malloc(size * sizeof(unsigned int)));
for (size_t i = 0; i < size; ++i)
{
s[i] = static_cast<unsigned int>(i);
}
auto dst = after ? s + offset : s;
auto src = after ? s : s + offset;
size_t i = after ? 0 : offset;
size_t u = 0;
my_memmove(dst, src, sz * sizeof(unsigned int));
while (u < sz)
{
EXPECT(dst[u] == i, "overlap error: {} {}", dst[u], i);
u++;
i++;
}
my_free(s);
}

int main()
{
// Skip the checks that expect bounds checks to fail when we are not the
Expand Down Expand Up @@ -175,7 +235,20 @@ int main()
# endif
for (size_t x = 0; x < 2048; x++)
{
check_size(x);
check_size<false>(x);
}

for (size_t x = 0; x < 2048; x++)
{
check_size<true>(x);
}

check_overlaps1();

for (size_t x = 8; x < 256; x += 64)
{
check_overlaps2<false>(x);
check_overlaps2<true>(x);
}
}
#endif
Loading