mirror of
https://github.com/intel/llvm.git
synced 2026-01-16 05:32:28 +08:00
[Offload] Use the kernel argument size directly in AMDGPU offloading (#94667)
Summary: The old COV3 implementation of HSA used to omit the implicit arguments from the kernel argument size. For COV4 and COV5 this is no longer the case so we can simply use the size reported from the symbol information. See https://github.com/ROCm/ROCR-Runtime/issues/117#issuecomment-812758161
This commit is contained in:
@@ -3272,19 +3272,13 @@ Error AMDGPUKernelTy::launchImpl(GenericDeviceTy &GenericDevice,
|
||||
if (ArgsSize < KernelArgsSize)
|
||||
return Plugin::error("Mismatch of kernel arguments size");
|
||||
|
||||
// The args size reported by HSA may or may not contain the implicit args.
|
||||
// For now, assume that HSA does not consider the implicit arguments when
|
||||
// reporting the arguments of a kernel. In the worst case, we can waste
|
||||
// 56 bytes per allocation.
|
||||
uint32_t AllArgsSize = KernelArgsSize + ImplicitArgsSize;
|
||||
|
||||
AMDGPUPluginTy &AMDGPUPlugin =
|
||||
static_cast<AMDGPUPluginTy &>(GenericDevice.Plugin);
|
||||
AMDHostDeviceTy &HostDevice = AMDGPUPlugin.getHostDevice();
|
||||
AMDGPUMemoryManagerTy &ArgsMemoryManager = HostDevice.getArgsMemoryManager();
|
||||
|
||||
void *AllArgs = nullptr;
|
||||
if (auto Err = ArgsMemoryManager.allocate(AllArgsSize, &AllArgs))
|
||||
if (auto Err = ArgsMemoryManager.allocate(ArgsSize, &AllArgs))
|
||||
return Err;
|
||||
|
||||
// Account for user requested dynamic shared memory.
|
||||
|
||||
Reference in New Issue
Block a user