[libc] Fix internal alignment in allcoator (#146738)

Summary:
The allocator interface is supposed to have 16 byte alignment (to keep
it consistent with the CPU allocator. We could probably drop this to 8
if desires.) But this was not enforced because the number of bytes used
for the bitfield sometimes resulted in alignment of 8 instead of 16.
Explicitly align the number of bytes to be a multiple of 16 even if
unused.
This commit is contained in:
Joseph Huber
2025-07-02 12:29:01 -05:00
committed by GitHub
parent e9be5286e1
commit 50f40a5327
3 changed files with 8 additions and 5 deletions

View File

@@ -189,7 +189,9 @@ struct Slab {
// Get the number of bytes needed to contain the bitfield bits.
constexpr static uint32_t bitfield_bytes(uint32_t chunk_size) {
return ((num_chunks(chunk_size) + BITS_IN_WORD - 1) / BITS_IN_WORD) * 8;
return __builtin_align_up(
((num_chunks(chunk_size) + BITS_IN_WORD - 1) / BITS_IN_WORD) * 8,
MIN_ALIGNMENT + 1);
}
// The actual amount of memory available excluding the bitfield and metadata.
@@ -584,7 +586,7 @@ void *aligned_allocate(uint32_t alignment, uint64_t size) {
// If the requested alignment is less than what we already provide this is
// just a normal allocation.
if (alignment < MIN_ALIGNMENT + 1)
if (alignment <= MIN_ALIGNMENT + 1)
return gpu::allocate(size);
// We can't handle alignments greater than 2MiB so we simply fail.
@@ -594,7 +596,7 @@ void *aligned_allocate(uint32_t alignment, uint64_t size) {
// Trying to handle allocation internally would break the assumption that each
// chunk is identical to eachother. Allocate enough memory with worst-case
// alignment and then round up. The index logic will round down properly.
uint64_t rounded = size + alignment - 1;
uint64_t rounded = size + alignment - MIN_ALIGNMENT;
void *ptr = gpu::allocate(rounded);
return __builtin_align_up(ptr, alignment);
}

View File

@@ -10,7 +10,7 @@ TEST_MAIN(int, char **, char **) {
// aligned_alloc with valid alignment and size
void *ptr = LIBC_NAMESPACE::aligned_alloc(32, 16);
EXPECT_NE(ptr, nullptr);
EXPECT_EQ(__builtin_is_aligned(ptr, 32), 0U);
EXPECT_TRUE(__builtin_is_aligned(ptr, 32));
LIBC_NAMESPACE::free(ptr);
@@ -23,7 +23,7 @@ TEST_MAIN(int, char **, char **) {
void *div =
LIBC_NAMESPACE::aligned_alloc(alignment, (gpu::get_thread_id() + 1) * 4);
EXPECT_NE(div, nullptr);
EXPECT_EQ(__builtin_is_aligned(div, alignment), 0U);
EXPECT_TRUE(__builtin_is_aligned(div, alignment));
return 0;
}

View File

@@ -24,6 +24,7 @@ TEST_MAIN(int, char **, char **) {
int *divergent = reinterpret_cast<int *>(
LIBC_NAMESPACE::malloc((gpu::get_thread_id() + 1) * 16));
EXPECT_NE(divergent, nullptr);
EXPECT_TRUE(__builtin_is_aligned(divergent, 16));
*divergent = 1;
EXPECT_EQ(*divergent, 1);
LIBC_NAMESPACE::free(divergent);