diff --git a/libc/docs/dev/undefined_behavior.rst b/libc/docs/dev/undefined_behavior.rst index 7a6b5d71b35d..0cb25c7f2a23 100644 --- a/libc/docs/dev/undefined_behavior.rst +++ b/libc/docs/dev/undefined_behavior.rst @@ -62,3 +62,10 @@ Often the standard will imply an intended behavior through what it states is und Ignoring Bug-For-Bug Compatibility ---------------------------------- Any long running implementations will have bugs and deviations from the standard. Hyrum's Law states that “all observable behaviors of your system will be depended on by somebody” which includes these bugs. An example of a long-standing bug is glibc's scanf float parsing behavior. The behavior is specifically defined in the standard, but it isn't adhered to by all libc implementations. There is a longstanding bug in glibc where it incorrectly parses the string 100er and this caused the C standard to add that specific example to the definition for scanf. The intended behavior is for scanf, when parsing a float, to parse the longest possibly valid prefix and then accept it if and only if that complete parsed value is a float. In the case of 100er the longest possibly valid prefix is 100e but the float parsed from that string is only 100. Since there is no number after the e it shouldn't be included in the float, so scanf should return a parsing error. For LLVM's libc it was decided to follow the standard, even though glibc's version is slightly simpler to implement and this edge case is rare. Following the standard must be the first priority, since that's the goal of the library. + +Design Decisions +================ + +Resizable Tables for hsearch +---------------------------- +The POSIX.1 standard does not delineate the behavior consequent to invoking hsearch or hdestroy without prior initialization of the hash table via hcreate. Furthermore, the standard does not specify the outcomes of successive invocations of hsearch absent intervening hdestroy calls. Libraries such as MUSL and Glibc do not apply checks to these scenarios, potentially leading to memory corruption or leakage. Conversely, FreeBSD's libc and Bionic automatically initialize the hash table to a minimal size if it is found uninitialized, and proceeding to destroy the table only if initialization has occurred. This approach also avoids redundant table allocation if an initialized hash table is already present. Given that the hash table starts with a minimal size, resizing becomes necessary to accommodate additional user insertions. LLVM's libc mirrors the approach of FreeBSD's libc and Bionic, owing to its enhanced robustness and user-friendliness. Notably, such resizing behavior itself aligns with POSIX.1 standards, which explicitly permit implementations to modify the capacity of the hash table. diff --git a/libc/src/__support/HashTable/CMakeLists.txt b/libc/src/__support/HashTable/CMakeLists.txt index 920ba194badd..dce37fdb5143 100644 --- a/libc/src/__support/HashTable/CMakeLists.txt +++ b/libc/src/__support/HashTable/CMakeLists.txt @@ -28,7 +28,6 @@ add_header_library( libc.include.llvm-libc-types.ENTRY libc.src.__support.CPP.bit libc.src.__support.CPP.new - libc.src.__support.CPP.type_traits libc.src.__support.hash libc.src.__support.macros.attributes libc.src.__support.macros.optimization diff --git a/libc/src/__support/HashTable/bitmask.h b/libc/src/__support/HashTable/bitmask.h index c775b09f2236..f97a7bccde32 100644 --- a/libc/src/__support/HashTable/bitmask.h +++ b/libc/src/__support/HashTable/bitmask.h @@ -31,9 +31,7 @@ namespace internal { // | Available | 0b1xxx'xxxx | // | Occupied | 0b0xxx'xxxx | // ============================= -template struct BitMaskAdaptor { - // A masked constant whose bits are all set. - LIBC_INLINE_VAR constexpr static T MASK = WORD_MASK; +template struct BitMaskAdaptor { // A stride in the bitmask may use multiple bits. LIBC_INLINE_VAR constexpr static size_t STRIDE = WORD_STRIDE; diff --git a/libc/src/__support/HashTable/generic/bitmask_impl.inc b/libc/src/__support/HashTable/generic/bitmask_impl.inc index fd451a73488e..b825cb5fbc44 100644 --- a/libc/src/__support/HashTable/generic/bitmask_impl.inc +++ b/libc/src/__support/HashTable/generic/bitmask_impl.inc @@ -34,14 +34,14 @@ LIBC_INLINE constexpr bitmask_t repeat_byte(bitmask_t byte) { return byte; } -using BitMask = BitMaskAdaptor; +using BitMask = BitMaskAdaptor; using IteratableBitMask = IteratableBitMaskAdaptor; struct Group { bitmask_t data; // Load a group of control words from an arbitary address. - LIBC_INLINE static Group load(const void *__restrict addr) { + LIBC_INLINE static Group load(const void *addr) { union { bitmask_t value; char bytes[sizeof(bitmask_t)]; @@ -51,6 +51,11 @@ struct Group { return {data.value}; } + // Load a group of control words from an aligned address. + LIBC_INLINE static Group load_aligned(const void *addr) { + return *static_cast(addr); + } + // Find out the lanes equal to the given byte and return the bitmask // with corresponding bits set. LIBC_INLINE IteratableBitMask match_byte(uint8_t byte) const { @@ -106,6 +111,10 @@ struct Group { LIBC_INLINE BitMask mask_available() const { return {LIBC_NAMESPACE::Endian::to_little_endian(data) & repeat_byte(0x80)}; } + + LIBC_INLINE IteratableBitMask occupied() const { + return {static_cast(mask_available().word ^ repeat_byte(0x80))}; + } }; } // namespace internal } // namespace LIBC_NAMESPACE diff --git a/libc/src/__support/HashTable/sse2/bitmask_impl.inc b/libc/src/__support/HashTable/sse2/bitmask_impl.inc index 6308f2fed666..d65240901ed4 100644 --- a/libc/src/__support/HashTable/sse2/bitmask_impl.inc +++ b/libc/src/__support/HashTable/sse2/bitmask_impl.inc @@ -12,17 +12,22 @@ namespace internal { // With SSE2, every bitmask is iteratable as // we use single bit to encode the data. -using BitMask = BitMaskAdaptor; +using BitMask = BitMaskAdaptor; using IteratableBitMask = IteratableBitMaskAdaptor; struct Group { __m128i data; // Load a group of control words from an arbitary address. - LIBC_INLINE static Group load(const void *__restrict addr) { + LIBC_INLINE static Group load(const void *addr) { return {_mm_loadu_si128(static_cast(addr))}; } + // Load a group of control words from an aligned address. + LIBC_INLINE static Group load_aligned(const void *addr) { + return {_mm_load_si128(static_cast(addr))}; + } + // Find out the lanes equal to the given byte and return the bitmask // with corresponding bits set. LIBC_INLINE IteratableBitMask match_byte(uint8_t byte) const { @@ -35,6 +40,10 @@ struct Group { auto bitmask = static_cast(_mm_movemask_epi8(data)); return {bitmask}; } + + LIBC_INLINE IteratableBitMask occupied() const { + return {static_cast(~mask_available().word)}; + } }; } // namespace internal } // namespace LIBC_NAMESPACE diff --git a/libc/src/__support/HashTable/table.h b/libc/src/__support/HashTable/table.h index 305fe59792d5..d70ca4d23380 100644 --- a/libc/src/__support/HashTable/table.h +++ b/libc/src/__support/HashTable/table.h @@ -12,7 +12,6 @@ #include "include/llvm-libc-types/ENTRY.h" #include "src/__support/CPP/bit.h" // bit_ceil #include "src/__support/CPP/new.h" -#include "src/__support/CPP/type_traits.h" #include "src/__support/HashTable/bitmask.h" #include "src/__support/hash.h" #include "src/__support/macros/attributes.h" @@ -79,7 +78,7 @@ LIBC_INLINE size_t capacity_to_entries(size_t cap) { // | N * Entry | // ======================= <- align boundary // | Header | -// ======================= +// ======================= <- align boundary (for fast resize) // | (N + 1) * Byte | // ======================= // @@ -94,6 +93,20 @@ private: // How many entries are there in the table. LIBC_INLINE size_t num_of_entries() const { return entries_mask + 1; } + // How many entries can we store in the table before resizing. + LIBC_INLINE size_t full_capacity() const { return num_of_entries() / 8 * 7; } + + // The alignment of the whole memory area is the maximum of the alignment + // among the following types: + // - HashTable + // - ENTRY + // - Group + LIBC_INLINE constexpr static size_t table_alignment() { + size_t left_align = alignof(HashTable) > alignof(ENTRY) ? alignof(HashTable) + : alignof(ENTRY); + return left_align > alignof(Group) ? left_align : alignof(Group); + } + LIBC_INLINE bool is_full() const { return available_slots == 0; } LIBC_INLINE size_t offset_from_entries() const { @@ -102,24 +115,30 @@ private: SafeMemSize::offset_to(entries_size, table_alignment()); } - LIBC_INLINE constexpr static size_t table_alignment() { - return alignof(HashTable) > alignof(ENTRY) ? alignof(HashTable) - : alignof(ENTRY); - } - LIBC_INLINE constexpr static size_t offset_to_groups() { - return sizeof(HashTable); + size_t header_size = sizeof(HashTable); + return header_size + SafeMemSize::offset_to(header_size, table_alignment()); } LIBC_INLINE ENTRY &entry(size_t i) { return reinterpret_cast(this)[-i - 1]; } + LIBC_INLINE const ENTRY &entry(size_t i) const { + return reinterpret_cast(this)[-i - 1]; + } + LIBC_INLINE uint8_t &control(size_t i) { uint8_t *ptr = reinterpret_cast(this) + offset_to_groups(); return ptr[i]; } + LIBC_INLINE const uint8_t &control(size_t i) const { + const uint8_t *ptr = + reinterpret_cast(this) + offset_to_groups(); + return ptr[i]; + } + // We duplicate a group of control bytes to the end. Thus, it is possible that // we need to set two control bytes at the same time. LIBC_INLINE void set_ctrl(size_t index, uint8_t value) { @@ -128,6 +147,107 @@ private: control(index2) = value; } + LIBC_INLINE size_t find(const char *key, uint64_t primary) { + uint8_t secondary = secondary_hash(primary); + ProbeSequence sequence{static_cast(primary), 0, entries_mask}; + while (true) { + size_t pos = sequence.next(); + Group ctrls = Group::load(&control(pos)); + IteratableBitMask masks = ctrls.match_byte(secondary); + for (size_t i : masks) { + size_t index = (pos + i) & entries_mask; + ENTRY &entry = this->entry(index); + if (LIBC_LIKELY(entry.key != nullptr && strcmp(entry.key, key) == 0)) + return index; + } + BitMask available = ctrls.mask_available(); + // Since there is no deletion, the first time we find an available slot + // it is also ready to be used as an insertion point. Therefore, we also + // return the first available slot we find. If such entry is empty, the + // key will be nullptr. + if (LIBC_LIKELY(available.any_bit_set())) { + size_t index = + (pos + available.lowest_set_bit_nonzero()) & entries_mask; + return index; + } + } + } + + LIBC_INLINE uint64_t oneshot_hash(const char *key) const { + LIBC_NAMESPACE::internal::HashState hasher = state; + hasher.update(key, strlen(key)); + return hasher.finish(); + } + + // A fast insertion routine without checking if a key already exists. + // Nor does the routine check if the table is full. + // This is only to be used in grow() where we insert all existing entries + // into a new table. Hence, the requirements are naturally satisfied. + LIBC_INLINE ENTRY *unsafe_insert(ENTRY item) { + uint64_t primary = oneshot_hash(item.key); + uint8_t secondary = secondary_hash(primary); + ProbeSequence sequence{static_cast(primary), 0, entries_mask}; + while (true) { + size_t pos = sequence.next(); + Group ctrls = Group::load(&control(pos)); + BitMask available = ctrls.mask_available(); + if (available.any_bit_set()) { + size_t index = + (pos + available.lowest_set_bit_nonzero()) & entries_mask; + set_ctrl(index, secondary); + entry(index).key = item.key; + entry(index).data = item.data; + available_slots--; + return &entry(index); + } + } + } + + LIBC_INLINE HashTable *grow() const { + size_t hint = full_capacity() + 1; + HashState state = this->state; + // migrate to a new random state + state.update(&hint, sizeof(hint)); + HashTable *new_table = allocate(hint, state.finish()); + // It is safe to call unsafe_insert() because we know that: + // - the new table has enough capacity to hold all the entries + // - there is no duplicate key in the old table + if (new_table != nullptr) + for (ENTRY e : *this) + new_table->unsafe_insert(e); + return new_table; + } + + LIBC_INLINE static ENTRY *insert(HashTable *&table, ENTRY item, + uint64_t primary) { + auto index = table->find(item.key, primary); + auto slot = &table->entry(index); + // SVr4 and POSIX.1-2001 specify that action is significant only for + // unsuccessful searches, so that an ENTER should not do anything + // for a successful search. + if (slot->key != nullptr) + return slot; + + // if table of full, we try to grow the table + if (table->is_full()) { + HashTable *new_table = table->grow(); + // allocation failed, return nullptr to indicate failure + if (new_table == nullptr) + return nullptr; + // resized sccuessfully: clean up the old table and use the new one + deallocate(table); + table = new_table; + // it is still valid to use the fastpath insertion. + return table->unsafe_insert(item); + } + + table->set_ctrl(index, secondary_hash(primary)); + slot->key = item.key; + slot->data = item.data; + table->available_slots--; + return slot; + } + public: LIBC_INLINE static void deallocate(HashTable *table) { if (table) { @@ -136,6 +256,7 @@ public: operator delete(ptr, std::align_val_t{table_alignment()}); } } + LIBC_INLINE static HashTable *allocate(size_t capacity, uint64_t randomness) { // check if capacity_to_entries overflows MAX_MEM_SIZE if (capacity > size_t{1} << (8 * sizeof(size_t) - 1 - 3)) @@ -166,68 +287,65 @@ public: return table; } -private: - LIBC_INLINE size_t find(const char *key, uint64_t primary) { - uint8_t secondary = secondary_hash(primary); - ProbeSequence sequence{static_cast(primary), 0, entries_mask}; - while (true) { - size_t pos = sequence.next(); - Group ctrls = Group::load(&control(pos)); - IteratableBitMask masks = ctrls.match_byte(secondary); - for (size_t i : masks) { - size_t index = (pos + i) & entries_mask; - ENTRY &entry = this->entry(index); - if (LIBC_LIKELY(entry.key != nullptr && strcmp(entry.key, key) == 0)) - return index; - } - BitMask available = ctrls.mask_available(); - // Since there is no deletion, the first time we find an available slot - // it is also ready to be used as an insertion point. Therefore, we also - // return the first available slot we find. If such entry is empty, the - // key will be nullptr. - if (LIBC_LIKELY(available.any_bit_set())) { - size_t index = - (pos + available.lowest_set_bit_nonzero()) & entries_mask; - return index; + struct FullTableIterator { + size_t current_offset; + size_t remaining; + IteratableBitMask current_mask; + const HashTable &table; + + // It is fine to use remaining to represent the iterator: + // - this comparison only happens with the same table + // - hashtable will not be mutated during the iteration + LIBC_INLINE bool operator==(const FullTableIterator &other) const { + return remaining == other.remaining; + } + LIBC_INLINE bool operator!=(const FullTableIterator &other) const { + return remaining != other.remaining; + } + + LIBC_INLINE FullTableIterator &operator++() { + this->ensure_valid_group(); + current_mask.remove_lowest_bit(); + remaining--; + return *this; + } + LIBC_INLINE const ENTRY &operator*() { + this->ensure_valid_group(); + return table.entry( + (current_offset + current_mask.lowest_set_bit_nonzero()) & + table.entries_mask); + } + + private: + LIBC_INLINE void ensure_valid_group() { + while (!current_mask.any_bit_set()) { + current_offset += sizeof(Group); + // It is ensured that the load will only happen at aligned boundaries. + current_mask = + Group::load_aligned(&table.control(current_offset)).occupied(); } } + }; + + using value_type = ENTRY; + using iterator = FullTableIterator; + iterator begin() const { + return {0, full_capacity() - available_slots, + Group::load_aligned(&control(0)).occupied(), *this}; } + iterator end() const { return {0, 0, {0}, *this}; } -private: - LIBC_INLINE ENTRY *insert(ENTRY item, uint64_t primary) { - auto index = find(item.key, primary); - auto slot = &this->entry(index); - // SVr4 and POSIX.1-2001 specify that action is significant only for - // unsuccessful searches, so that an ENTER should not do anything - // for a successful search. - if (slot->key != nullptr) - return slot; - - if (!is_full()) { - set_ctrl(index, secondary_hash(primary)); - slot->key = item.key; - slot->data = item.data; - available_slots--; - return slot; - } - return nullptr; - } - -public: LIBC_INLINE ENTRY *find(const char *key) { - LIBC_NAMESPACE::internal::HashState hasher = state; - hasher.update(key, strlen(key)); - uint64_t primary = hasher.finish(); + uint64_t primary = oneshot_hash(key); ENTRY &entry = this->entry(find(key, primary)); if (entry.key == nullptr) return nullptr; return &entry; } - LIBC_INLINE ENTRY *insert(ENTRY item) { - LIBC_NAMESPACE::internal::HashState hasher = state; - hasher.update(item.key, strlen(item.key)); - uint64_t primary = hasher.finish(); - return insert(item, primary); + + LIBC_INLINE static ENTRY *insert(HashTable *&table, ENTRY item) { + uint64_t primary = table->oneshot_hash(item.key); + return insert(table, item, primary); } }; } // namespace internal diff --git a/libc/src/search/CMakeLists.txt b/libc/src/search/CMakeLists.txt index 4ae5274a3ba9..24a4ba67decf 100644 --- a/libc/src/search/CMakeLists.txt +++ b/libc/src/search/CMakeLists.txt @@ -36,7 +36,7 @@ add_entrypoint_object( DEPENDS libc.src.search.hsearch.global libc.src.__support.HashTable.table - libc.src.__support.libc_assert + libc.src.__support.HashTable.randomness libc.src.errno.errno libc.include.search ) @@ -62,7 +62,6 @@ add_entrypoint_object( DEPENDS libc.src.search.hsearch.global libc.src.__support.HashTable.table - libc.src.__support.libc_assert libc.include.search ) diff --git a/libc/src/search/hcreate.cpp b/libc/src/search/hcreate.cpp index 9c05e317a2d0..4bf638b5920e 100644 --- a/libc/src/search/hcreate.cpp +++ b/libc/src/search/hcreate.cpp @@ -14,6 +14,12 @@ namespace LIBC_NAMESPACE { LLVM_LIBC_FUNCTION(int, hcreate, (size_t capacity)) { + // We follow FreeBSD's implementation here. If the global_hash_table is + // already initialized, this function will do nothing and return 1. + // https://cgit.freebsd.org/src/tree/lib/libc/stdlib/hcreate.c + if (internal::global_hash_table != nullptr) + return 1; + uint64_t randomness = internal::randomness::next_random_seed(); internal::HashTable *table = internal::HashTable::allocate(capacity, randomness); diff --git a/libc/src/search/hdestroy.cpp b/libc/src/search/hdestroy.cpp index 1af64f195e32..3c5ea7b7af03 100644 --- a/libc/src/search/hdestroy.cpp +++ b/libc/src/search/hdestroy.cpp @@ -8,12 +8,12 @@ #include "src/search/hdestroy.h" #include "src/__support/HashTable/table.h" -#include "src/__support/libc_assert.h" #include "src/search/hsearch/global.h" namespace LIBC_NAMESPACE { LLVM_LIBC_FUNCTION(void, hdestroy, (void)) { - LIBC_ASSERT(internal::global_hash_table != nullptr); + // HashTable::deallocate will check for nullptr. It will be a no-op if + // global_hash_table is null. internal::HashTable::deallocate(internal::global_hash_table); internal::global_hash_table = nullptr; } diff --git a/libc/src/search/hsearch.cpp b/libc/src/search/hsearch.cpp index 3a0d09aae835..5aeb5c29449e 100644 --- a/libc/src/search/hsearch.cpp +++ b/libc/src/search/hsearch.cpp @@ -7,24 +7,37 @@ //===----------------------------------------------------------------------===// #include "src/search/hsearch.h" +#include "src/__support/HashTable/randomness.h" #include "src/__support/HashTable/table.h" -#include "src/__support/libc_assert.h" #include "src/errno/libc_errno.h" #include "src/search/hsearch/global.h" namespace LIBC_NAMESPACE { LLVM_LIBC_FUNCTION(ENTRY *, hsearch, (ENTRY item, ACTION action)) { ENTRY *result; - LIBC_ASSERT(internal::global_hash_table != nullptr); + if (internal::global_hash_table == nullptr) { + // If global_hash_table is null, we create a new hash table with a minimal + // capacity. Such hashtable will be expanded as needed. + uint64_t randomness = internal::randomness::next_random_seed(); + internal::global_hash_table = internal::HashTable::allocate(0, randomness); + } + + // In rare cases, the global hashtable may still fail to allocate. We treat it + // as ESRCH or ENOMEM depending on the action. switch (action) { case FIND: - result = internal::global_hash_table->find(item.key); + result = internal::global_hash_table + ? internal::global_hash_table->find(item.key) + : nullptr; if (result == nullptr) { libc_errno = ESRCH; } break; case ENTER: - result = internal::global_hash_table->insert(item); + result = + internal::global_hash_table + ? internal::HashTable::insert(internal::global_hash_table, item) + : nullptr; if (result == nullptr) { libc_errno = ENOMEM; } diff --git a/libc/src/search/hsearch_r.cpp b/libc/src/search/hsearch_r.cpp index 958fba7c00d0..a2c3a86eded6 100644 --- a/libc/src/search/hsearch_r.cpp +++ b/libc/src/search/hsearch_r.cpp @@ -29,7 +29,8 @@ LLVM_LIBC_FUNCTION(int, hsearch_r, } break; case ENTER: - *retval = table->insert(item); + *retval = internal::HashTable::insert(table, item); + htab->__opaque = table; if (*retval == nullptr) { libc_errno = ENOMEM; return 0; diff --git a/libc/test/src/__support/HashTable/bitmask_test.cpp b/libc/test/src/__support/HashTable/bitmask_test.cpp index c816c5d10638..5203220e9b5c 100644 --- a/libc/test/src/__support/HashTable/bitmask_test.cpp +++ b/libc/test/src/__support/HashTable/bitmask_test.cpp @@ -11,8 +11,8 @@ namespace LIBC_NAMESPACE { namespace internal { -using ShortBitMask = BitMaskAdaptor; -using LargeBitMask = BitMaskAdaptor; +using ShortBitMask = BitMaskAdaptor; +using LargeBitMask = BitMaskAdaptor; TEST(LlvmLibcHashTableBitMaskTest, SingleBitStrideLowestSetBit) { uint16_t data = 0xffff; @@ -53,7 +53,7 @@ TEST(LlvmLibcHashTableBitMaskTest, SingleBitStrideIteration) { TEST(LlvmLibcHashTableBitMaskTest, MultiBitStrideIteration) { using Iter = IteratableBitMaskAdaptor; - uint64_t data = Iter::MASK; + uint64_t data = 0x8080808080808080ul; for (size_t i = 0; i < 8; ++i) { Iter iter = {data}; size_t j = i; diff --git a/libc/test/src/__support/HashTable/table_test.cpp b/libc/test/src/__support/HashTable/table_test.cpp index 715bd6588d23..dcae6f4e8fca 100644 --- a/libc/test/src/__support/HashTable/table_test.cpp +++ b/libc/test/src/__support/HashTable/table_test.cpp @@ -30,13 +30,63 @@ TEST(LlvmLibcTableTest, AllocationAndDeallocation) { HashTable::deallocate(nullptr); } +TEST(LlvmLibcTableTest, Iteration) { + constexpr size_t TEST_SIZE = 512; + size_t counter[TEST_SIZE]; + struct key { + uint8_t bytes[3]; + } keys[TEST_SIZE]; + HashTable *table = HashTable::allocate(0, 0x7f7f7f7f7f7f7f7f); + ASSERT_NE(table, static_cast(nullptr)); + for (size_t i = 0; i < TEST_SIZE; ++i) { + counter[i] = 0; + if (i >= 256) { + keys[i].bytes[0] = 2; + keys[i].bytes[1] = i % 256; + keys[i].bytes[2] = 0; + } else { + keys[i].bytes[0] = 1; + keys[i].bytes[1] = i; + keys[i].bytes[2] = 0; + } + HashTable::insert(table, {reinterpret_cast(keys[i].bytes), + reinterpret_cast((size_t)i)}); + } + + size_t count = 0; + for (const ENTRY &e : *table) { + size_t data = reinterpret_cast(e.data); + ++counter[data]; + ++count; + } + ASSERT_EQ(count, TEST_SIZE); + for (size_t i = 0; i < TEST_SIZE; ++i) { + ASSERT_EQ(counter[i], static_cast(1)); + } + HashTable::deallocate(table); +} + +// Check if resize works correctly. This test actually covers two things: +// - The sizes are indeed growing. +// - The sizes are growing rapidly enough to reach the upper bound. +TEST(LlvmLibcTableTest, GrowthSequence) { + size_t cap = capacity_to_entries(0); + // right shift 4 to avoid overflow ssize_t. + while (cap < static_cast(-1) >> 4u) { + size_t hint = cap / 8 * 7 + 1; + size_t new_cap = capacity_to_entries(hint); + ASSERT_GT(new_cap, cap); + cap = new_cap; + } +} + TEST(LlvmLibcTableTest, Insertion) { union key { - uint64_t value; - char bytes[8]; + char bytes[2]; } keys[256]; for (size_t k = 0; k < 256; ++k) { - keys[k].value = LIBC_NAMESPACE::Endian::to_little_endian(k); + keys[k].bytes[0] = static_cast(k); + keys[k].bytes[1] = 0; } constexpr size_t CAP = cpp::bit_ceil((sizeof(Group) + 1) * 8 / 7) / 8 * 7; static_assert(CAP + 1 < 256, "CAP is too large for this test."); @@ -46,27 +96,30 @@ TEST(LlvmLibcTableTest, Insertion) { // insert to full capacity. for (size_t i = 0; i < CAP; ++i) { - ASSERT_NE(table->insert({keys[i].bytes, keys[i].bytes}), + ASSERT_NE(HashTable::insert(table, {keys[i].bytes, keys[i].bytes}), static_cast(nullptr)); } - // one more insert should fail. - ASSERT_EQ(table->insert({keys[CAP + 1].bytes, keys[CAP + 1].bytes}), - static_cast(nullptr)); + // One more insert should grow the table successfully. We test the value + // here because the grow finishes with a fastpath insertion that is different + // from the normal insertion. + ASSERT_EQ(HashTable::insert(table, {keys[CAP].bytes, keys[CAP].bytes})->data, + static_cast(keys[CAP].bytes)); - for (size_t i = 0; i < CAP; ++i) { + for (size_t i = 0; i <= CAP; ++i) { ASSERT_EQ(strcmp(table->find(keys[i].bytes)->key, keys[i].bytes), 0); } - for (size_t i = CAP; i < 256; ++i) { + for (size_t i = CAP + 1; i < 256; ++i) { ASSERT_EQ(table->find(keys[i].bytes), static_cast(nullptr)); } // do not replace old value - for (size_t i = 0; i < CAP; ++i) { - ASSERT_NE(table->insert({keys[i].bytes, reinterpret_cast(i)}), - static_cast(nullptr)); + for (size_t i = 0; i <= CAP; ++i) { + ASSERT_NE( + HashTable::insert(table, {keys[i].bytes, reinterpret_cast(i)}), + static_cast(nullptr)); } - for (size_t i = 0; i < CAP; ++i) { + for (size_t i = 0; i <= CAP; ++i) { ASSERT_EQ(table->find(keys[i].bytes)->data, reinterpret_cast(keys[i].bytes)); } diff --git a/libc/test/src/search/hsearch_test.cpp b/libc/test/src/search/hsearch_test.cpp index d6fdeec5714a..f7d94791f2bc 100644 --- a/libc/test/src/search/hsearch_test.cpp +++ b/libc/test/src/search/hsearch_test.cpp @@ -51,17 +51,21 @@ constexpr size_t CAP = LIBC_NAMESPACE::cpp::bit_ceil((GROUP_SIZE + 1) * 8 / 7) / 8 * 7; static_assert(CAP < sizeof(search_data), "CAP too large"); -TEST(LlvmLibcHSearchTest, InsertTooMany) { +TEST(LlvmLibcHSearchTest, GrowFromZero) { using LIBC_NAMESPACE::testing::ErrnoSetterMatcher::Fails; - ASSERT_GT(LIBC_NAMESPACE::hcreate(GROUP_SIZE + 1), 0); - - for (size_t i = 0; i < CAP; ++i) { - ASSERT_EQ(LIBC_NAMESPACE::hsearch({&search_data[i], nullptr}, ENTER)->key, - &search_data[i]); + ASSERT_GT(LIBC_NAMESPACE::hcreate(0), 0); + for (size_t i = 0; i < sizeof(search_data) - 1; ++i) { + ENTRY *inserted = LIBC_NAMESPACE::hsearch( + {&search_data[i], reinterpret_cast(i)}, ENTER); + ASSERT_NE(inserted, static_cast(nullptr)); + ASSERT_EQ(inserted->key, &search_data[i]); } - ASSERT_THAT(static_cast( - LIBC_NAMESPACE::hsearch({search_data2, nullptr}, ENTER)), - Fails(ENOMEM, static_cast(nullptr))); + for (size_t i = sizeof(search_data) - 1; i != 0; --i) { + ASSERT_EQ( + LIBC_NAMESPACE::hsearch({&search_data[i - 1], nullptr}, FIND)->data, + reinterpret_cast(i - 1)); + } + LIBC_NAMESPACE::hdestroy(); } @@ -85,10 +89,10 @@ TEST(LlvmLibcHSearchTest, Found) { using LIBC_NAMESPACE::testing::ErrnoSetterMatcher::Fails; ASSERT_GT(LIBC_NAMESPACE::hcreate(GROUP_SIZE + 1), 0); for (size_t i = 0; i < CAP; ++i) { - ASSERT_EQ(LIBC_NAMESPACE::hsearch( - {&search_data[i], reinterpret_cast(i)}, ENTER) - ->key, - &search_data[i]); + ENTRY *inserted = LIBC_NAMESPACE::hsearch( + {&search_data[i], reinterpret_cast(i)}, ENTER); + ASSERT_NE(inserted, static_cast(nullptr)); + ASSERT_EQ(inserted->key, &search_data[i]); } for (size_t i = 0; i < CAP; ++i) { ASSERT_EQ(LIBC_NAMESPACE::hsearch({&search_data[i], nullptr}, FIND)->data,