Allocate RTDispatchGlobals as unboxed array

Previously we used an array-of-pointers approach, but using an
array-of-structures is in some ways simpler.

We also split out the RTStack as a separate allocation.

Related-To: LOCI-2966

Signed-off-by: Jim Snow <jim.m.snow@intel.com>
This commit is contained in:
Jim Snow
2022-09-09 03:26:48 +00:00
committed by Compute-Runtime-Automation
parent 519d62d9a1
commit eaa4965ae8
8 changed files with 94 additions and 68 deletions

View File

@@ -883,7 +883,7 @@ ze_result_t KernelImp::initialize(const ze_kernel_desc_t *desc) {
uint32_t bvhLevels = NEO::RayTracingHelper::maxBvhLevels;
auto arg = this->getImmutableData()->getDescriptor().payloadMappings.implicitArgs.rtDispatchGlobals;
if (arg.pointerSize == 0) {
// kernel is allocating its own RTDispatchGlobals manually
// application is allocating its own RTDispatchGlobals manually
neoDevice->initializeRayTracing(0);
} else {
neoDevice->initializeRayTracing(bvhLevels);
@@ -892,17 +892,18 @@ ze_result_t KernelImp::initialize(const ze_kernel_desc_t *desc) {
return ZE_RESULT_ERROR_OUT_OF_HOST_MEMORY;
}
for (auto rtDispatchGlobals : rtDispatchGlobalsInfo->rtDispatchGlobals) {
this->residencyContainer.push_back(rtDispatchGlobals);
for (auto rtStack : rtDispatchGlobalsInfo->rtStacks) {
this->residencyContainer.push_back(rtStack);
}
auto address = rtDispatchGlobalsInfo->rtDispatchGlobals[0]->getGpuAddressToPatch();
auto address = rtDispatchGlobalsInfo->rtDispatchGlobalsArray->getGpuAddressToPatch();
NEO::patchPointer(ArrayRef<uint8_t>(crossThreadData.get(), crossThreadDataSize),
arg,
static_cast<uintptr_t>(address));
this->residencyContainer.push_back(neoDevice->getRTMemoryBackedBuffer());
this->residencyContainer.push_back(rtDispatchGlobalsInfo->rtDispatchGlobalsArray);
}
this->residencyContainer.push_back(neoDevice->getRTMemoryBackedBuffer());
}
return ZE_RESULT_SUCCESS;

View File

@@ -894,7 +894,7 @@ HWTEST2_F(KernelImmutableDataTests, whenHasRTCallsIsTrueAndNoRTDispatchGlobalsIs
neoDevice->executionEnvironment->memoryManager.swap(otherMemoryManager);
}
HWTEST2_F(KernelImmutableDataTests, whenHasRTCallsIsTrueAndRTDispatchGlobalsArrayAllocationFailsThenRayTracingIsNotInitialized, IsAtLeastXeHpgCore) {
HWTEST2_F(KernelImmutableDataTests, whenHasRTCallsIsTrueAndRTStackAllocationFailsThenRayTracingIsNotInitialized, IsAtLeastXeHpgCore) {
KernelDescriptor mockDescriptor = {};
mockDescriptor.kernelAttributes.flags.hasRTCalls = true;
mockDescriptor.kernelMetadata.kernelName = "rt_test";
@@ -928,7 +928,7 @@ HWTEST2_F(KernelImmutableDataTests, whenHasRTCallsIsTrueAndRTDispatchGlobalsArra
neoDevice->rtDispatchGlobalsForceAllocation = false;
std::unique_ptr<NEO::MemoryManager> otherMemoryManager;
// Ensure that allocating RTDispatchGlobals succeeds, but the array allocation fails.
// Ensure that allocating RTDispatchGlobals succeeds, but first RTStack allocation fails.
otherMemoryManager = std::make_unique<NEO::FailMemoryManager>(1, *neoDevice->executionEnvironment);
neoDevice->executionEnvironment->memoryManager.swap(otherMemoryManager);
@@ -1056,7 +1056,7 @@ TEST_F(KernelImmutableDataTests, whenHasRTCallsIsTrueThenCrossThreadDataIsPatche
EXPECT_NE(nullptr, rtDispatchGlobals);
auto dispatchGlobalsAddressPatched = *reinterpret_cast<uint64_t *>(ptrOffset(crossThreadData.get(), rtGlobalPointerPatchOffset));
auto dispatchGlobalsGpuAddressOffset = static_cast<uint64_t>(rtDispatchGlobals->rtDispatchGlobals[0]->getGpuAddressToPatch());
auto dispatchGlobalsGpuAddressOffset = static_cast<uint64_t>(rtDispatchGlobals->rtDispatchGlobalsArray->getGpuAddressToPatch());
EXPECT_EQ(dispatchGlobalsGpuAddressOffset, dispatchGlobalsAddressPatched);
kernel->crossThreadData.release();