Re-enable use case where application allocates own RTDispatchGlobals.

Implementation was assuming that if HasRTCalls is true then the
RTDispatchGlobals patch token is also valid, but that isn't the case
when the application is using its own RTDispatchGlobals instead of the
one provided by the L0 UMD.

Related-To: LOCI-3323

Signed-off-by: Jim Snow <jim.m.snow@intel.com>
This commit is contained in:
Jim Snow 2022-07-30 01:25:29 +00:00 committed by Compute-Runtime-Automation
parent 61510e9a92
commit 6b4375efcd
2 changed files with 64 additions and 15 deletions

View File

@ -903,22 +903,28 @@ ze_result_t KernelImp::initialize(const ze_kernel_desc_t *desc) {
if (this->usesRayTracing()) {
uint32_t bvhLevels = NEO::RayTracingHelper::maxBvhLevels;
neoDevice->initializeRayTracing(bvhLevels);
auto rtDispatchGlobalsInfo = neoDevice->getRTDispatchGlobals(bvhLevels);
if (rtDispatchGlobalsInfo == nullptr) {
return ZE_RESULT_ERROR_OUT_OF_HOST_MEMORY;
auto arg = this->getImmutableData()->getDescriptor().payloadMappings.implicitArgs.rtDispatchGlobals;
if (arg.pointerSize == 0) {
// kernel is allocating its own RTDispatchGlobals manually
neoDevice->initializeRayTracing(0);
} else {
neoDevice->initializeRayTracing(bvhLevels);
auto rtDispatchGlobalsInfo = neoDevice->getRTDispatchGlobals(bvhLevels);
if (rtDispatchGlobalsInfo == nullptr) {
return ZE_RESULT_ERROR_OUT_OF_HOST_MEMORY;
}
for (auto rtDispatchGlobals : rtDispatchGlobalsInfo->rtDispatchGlobals) {
this->residencyContainer.push_back(rtDispatchGlobals);
}
auto address = rtDispatchGlobalsInfo->rtDispatchGlobals[0]->getGpuAddressToPatch();
NEO::patchPointer(ArrayRef<uint8_t>(crossThreadData.get(), crossThreadDataSize),
arg,
static_cast<uintptr_t>(address));
this->residencyContainer.push_back(neoDevice->getRTMemoryBackedBuffer());
}
for (auto rtDispatchGlobals : rtDispatchGlobalsInfo->rtDispatchGlobals) {
this->residencyContainer.push_back(rtDispatchGlobals);
}
auto address = rtDispatchGlobalsInfo->rtDispatchGlobals[0]->getGpuAddressToPatch();
NEO::patchPointer(ArrayRef<uint8_t>(crossThreadData.get(), crossThreadDataSize),
this->getImmutableData()->getDescriptor().payloadMappings.implicitArgs.rtDispatchGlobals,
static_cast<uintptr_t>(address));
this->residencyContainer.push_back(neoDevice->getRTMemoryBackedBuffer());
}
return ZE_RESULT_SUCCESS;

View File

@ -734,6 +734,49 @@ TEST_F(KernelImmutableDataTests, whenHasRTCallsIsTrueThenRayTracingIsInitialized
EXPECT_NE(nullptr, rtDispatchGlobals);
}
TEST_F(KernelImmutableDataTests, whenHasRTCallsIsTrueAndPatchTokenPointerSizeIsZeroThenRayTracingIsInitialized) {
static_cast<OsAgnosticMemoryManager *>(device->getNEODevice()->getMemoryManager())->turnOnFakingBigAllocations();
KernelDescriptor mockDescriptor = {};
mockDescriptor.kernelAttributes.flags.hasRTCalls = true;
mockDescriptor.kernelMetadata.kernelName = "rt_test";
for (auto i = 0u; i < 3u; i++) {
mockDescriptor.kernelAttributes.requiredWorkgroupSize[i] = 0;
}
std::unique_ptr<MockImmutableData> mockKernelImmutableData =
std::make_unique<MockImmutableData>(32u);
mockKernelImmutableData->kernelDescriptor = &mockDescriptor;
mockDescriptor.payloadMappings.implicitArgs.rtDispatchGlobals.pointerSize = 0;
ModuleBuildLog *moduleBuildLog = nullptr;
module = std::make_unique<MockModule>(device,
moduleBuildLog,
ModuleType::User,
32u,
mockKernelImmutableData.get());
module->maxGroupSize = 10;
std::unique_ptr<ModuleImmutableDataFixture::MockKernel> kernel;
kernel = std::make_unique<ModuleImmutableDataFixture::MockKernel>(module.get());
ze_kernel_desc_t kernelDesc = {};
kernelDesc.pKernelName = "rt_test";
auto immDataVector =
const_cast<std::vector<std::unique_ptr<KernelImmutableData>> *>(&module->getKernelImmutableDataVector());
immDataVector->push_back(std::move(mockKernelImmutableData));
auto result = kernel->initialize(&kernelDesc);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
EXPECT_NE(nullptr, module->getDevice()->getNEODevice()->getRTMemoryBackedBuffer());
// Application is expected to allocate its own RTDispatchGlobals manually in this case.
auto rtDispatchGlobals = neoDevice->getRTDispatchGlobals(NEO::RayTracingHelper::maxBvhLevels);
EXPECT_EQ(nullptr, rtDispatchGlobals);
}
HWTEST2_F(KernelImmutableDataTests, whenHasRTCallsIsTrueAndNoRTDispatchGlobalsIsAllocatedThenRayTracingIsNotInitialized, IsAtLeastXeHpgCore) {
KernelDescriptor mockDescriptor = {};
mockDescriptor.kernelAttributes.flags.hasRTCalls = true;