[Offload] Guard olMemAlloc/Free with a mutex (#153786)

Both these functions update an `AllocInfoMap` structure in the context,
however they did not use any locks, causing random failures in threaded
code. Now they use a mutex.
This commit is contained in:
Ross Brunton
2025-08-20 13:23:57 +01:00
committed by GitHub
parent 4c295216e4
commit c8986d1ecb

View File

@@ -125,6 +125,7 @@ struct OffloadContext {
bool TracingEnabled = false;
bool ValidationEnabled = true;
DenseMap<void *, AllocInfo> AllocInfoMap{};
std::mutex AllocInfoMapMutex{};
SmallVector<ol_platform_impl_t, 4> Platforms{};
size_t RefCount;
@@ -534,26 +535,33 @@ Error olMemAlloc_impl(ol_device_handle_t Device, ol_alloc_type_t Type,
return Alloc.takeError();
*AllocationOut = *Alloc;
OffloadContext::get().AllocInfoMap.insert_or_assign(*Alloc,
AllocInfo{Device, Type});
{
std::lock_guard<std::mutex> Lock(OffloadContext::get().AllocInfoMapMutex);
OffloadContext::get().AllocInfoMap.insert_or_assign(
*Alloc, AllocInfo{Device, Type});
}
return Error::success();
}
Error olMemFree_impl(void *Address) {
if (!OffloadContext::get().AllocInfoMap.contains(Address))
return createOffloadError(ErrorCode::INVALID_ARGUMENT,
"address is not a known allocation");
ol_device_handle_t Device;
ol_alloc_type_t Type;
{
std::lock_guard<std::mutex> Lock(OffloadContext::get().AllocInfoMapMutex);
if (!OffloadContext::get().AllocInfoMap.contains(Address))
return createOffloadError(ErrorCode::INVALID_ARGUMENT,
"address is not a known allocation");
auto AllocInfo = OffloadContext::get().AllocInfoMap.at(Address);
auto Device = AllocInfo.Device;
auto Type = AllocInfo.Type;
auto AllocInfo = OffloadContext::get().AllocInfoMap.at(Address);
Device = AllocInfo.Device;
Type = AllocInfo.Type;
OffloadContext::get().AllocInfoMap.erase(Address);
}
if (auto Res =
Device->Device->dataDelete(Address, convertOlToPluginAllocTy(Type)))
return Res;
OffloadContext::get().AllocInfoMap.erase(Address);
return Error::success();
}