fix: raytracing heapless missing allocation

Related-to: NEO-12737

Signed-off-by: Damian Tomczak <damian.tomczak@intel.com>
This commit is contained in:
Damian Tomczak 2025-02-17 05:49:14 +00:00 committed by Compute-Runtime-Automation
parent 2b370f6a6f
commit 81b0cac65f
13 changed files with 43 additions and 15 deletions

View File

@ -1444,7 +1444,7 @@ TEST_F(KernelImmutableDataTests, whenHasRTCallsIsFalseThenRayTracingIsNotInitial
immDataVector->push_back(std::move(mockKernelImmutableData)); immDataVector->push_back(std::move(mockKernelImmutableData));
EXPECT_EQ(ZE_RESULT_SUCCESS, kernel->initialize(&kernelDesc)); EXPECT_EQ(ZE_RESULT_SUCCESS, kernel->initialize(&kernelDesc));
EXPECT_EQ(nullptr, module->getDevice()->getNEODevice()->getRTMemoryBackedBuffer()); EXPECT_FALSE(module->getDevice()->getNEODevice()->rayTracingIsInitialized());
} }
TEST_F(KernelImmutableDataTests, whenHasRTCallsIsTrueThenCrossThreadDataIsPatched) { TEST_F(KernelImmutableDataTests, whenHasRTCallsIsTrueThenCrossThreadDataIsPatched) {

View File

@ -829,9 +829,10 @@ HWTEST_F(EnqueueKernelTest, givenCommandStreamReceiverInBatchingModeWhenEnqueueK
size_t fenceSurfaceCount = mockCsr->globalFenceAllocation ? 1 : 0; size_t fenceSurfaceCount = mockCsr->globalFenceAllocation ? 1 : 0;
size_t clearColorSize = mockCsr->clearColorAllocation ? 1 : 0; size_t clearColorSize = mockCsr->clearColorAllocation ? 1 : 0;
size_t commandBufferCount = pDevice->getProductHelper().getCommandBuffersPreallocatedPerCommandQueue() > 0 ? 0 : 1; size_t commandBufferCount = pDevice->getProductHelper().getCommandBuffersPreallocatedPerCommandQueue() > 0 ? 0 : 1;
size_t rtSurface = pDevice->getRTMemoryBackedBuffer() ? 1u : 0u;
EXPECT_EQ(mockCsr->heaplessStateInitialized ? 1u : 0u, mockCsr->flushCalledCount); EXPECT_EQ(mockCsr->heaplessStateInitialized ? 1u : 0u, mockCsr->flushCalledCount);
EXPECT_EQ(4u + csrSurfaceCount + timestampPacketSurfacesCount + fenceSurfaceCount + clearColorSize + commandBufferCount, cmdBuffer->surfaces.size()); EXPECT_EQ(4u + csrSurfaceCount + timestampPacketSurfacesCount + fenceSurfaceCount + clearColorSize + commandBufferCount + rtSurface, cmdBuffer->surfaces.size());
} }
HWTEST_F(EnqueueKernelTest, givenReducedAddressSpaceGraphicsAllocationForHostPtrWithL3FlushRequiredWhenEnqueueKernelIsCalledThenFlushIsCalledForReducedAddressSpacePlatforms) { HWTEST_F(EnqueueKernelTest, givenReducedAddressSpaceGraphicsAllocationForHostPtrWithL3FlushRequiredWhenEnqueueKernelIsCalledThenFlushIsCalledForReducedAddressSpacePlatforms) {

View File

@ -78,6 +78,7 @@ HWTEST_F(CommandStreamReceiverFlushTaskTests, givenCsrInBatchingModeWhenFlushTas
csrSurfaceCount += mockCsr->globalFenceAllocation ? 1 : 0; csrSurfaceCount += mockCsr->globalFenceAllocation ? 1 : 0;
csrSurfaceCount += mockCsr->clearColorAllocation ? 1 : 0; csrSurfaceCount += mockCsr->clearColorAllocation ? 1 : 0;
csrSurfaceCount += pDevice->getProductHelper().getCommandBuffersPreallocatedPerCommandQueue() > 0 ? 0 : 1; csrSurfaceCount += pDevice->getProductHelper().getCommandBuffersPreallocatedPerCommandQueue() > 0 ? 0 : 1;
csrSurfaceCount += pDevice->getRTMemoryBackedBuffer() ? 1 : 0;
// we should have 3 heaps, tag allocation and csr command stream + cq // we should have 3 heaps, tag allocation and csr command stream + cq
EXPECT_EQ(4u + csrSurfaceCount, cmdBuffer->surfaces.size()); EXPECT_EQ(4u + csrSurfaceCount, cmdBuffer->surfaces.size());

View File

@ -1,5 +1,5 @@
/* /*
* Copyright (C) 2021-2024 Intel Corporation * Copyright (C) 2021-2025 Intel Corporation
* *
* SPDX-License-Identifier: MIT * SPDX-License-Identifier: MIT
* *
@ -623,6 +623,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, CommandStreamReceiverFlushTaskXeHPAndLaterTests, gi
csrSurfaceCount -= pDevice->getHardwareInfo().capabilityTable.supportsImages ? 0 : 1; csrSurfaceCount -= pDevice->getHardwareInfo().capabilityTable.supportsImages ? 0 : 1;
csrSurfaceCount += mockCsr->globalFenceAllocation ? 1 : 0; csrSurfaceCount += mockCsr->globalFenceAllocation ? 1 : 0;
csrSurfaceCount += mockCsr->clearColorAllocation ? 1 : 0; csrSurfaceCount += mockCsr->clearColorAllocation ? 1 : 0;
csrSurfaceCount += pDevice->getRTMemoryBackedBuffer() ? 1 : 0;
EXPECT_EQ(4u + csrSurfaceCount, cmdBuffer->surfaces.size()); EXPECT_EQ(4u + csrSurfaceCount, cmdBuffer->surfaces.size());

View File

@ -7,6 +7,7 @@
#include "shared/source/device/device.h" #include "shared/source/device/device.h"
#include "shared/source/helpers/blit_commands_helper.h" #include "shared/source/helpers/blit_commands_helper.h"
#include "shared/source/helpers/compiler_product_helper.h"
#include "shared/source/helpers/gfx_core_helper.h" #include "shared/source/helpers/gfx_core_helper.h"
#include "shared/source/helpers/local_memory_access_modes.h" #include "shared/source/helpers/local_memory_access_modes.h"
#include "shared/source/memory_manager/unified_memory_manager.h" #include "shared/source/memory_manager/unified_memory_manager.h"
@ -619,7 +620,8 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, ContextCreateTests, givenGpuHangOnFlushBcsTaskAndLo
executionEnv->rootDeviceEnvironments[0]->getMutableHardwareInfo()->capabilityTable.blitterOperationsSupported = true; executionEnv->rootDeviceEnvironments[0]->getMutableHardwareInfo()->capabilityTable.blitterOperationsSupported = true;
const auto rootDevice = testedDevice->getDevice().getRootDevice(); const auto rootDevice = testedDevice->getDevice().getRootDevice();
const auto blitDevice = rootDevice->getNearestGenericSubDevice(0); const auto leastOccupiedBankDevice = rootDevice->getRTMemoryBackedBuffer() ? 1u : 0u;
const auto blitDevice = rootDevice->getNearestGenericSubDevice(leastOccupiedBankDevice);
auto &selectorCopyEngine = blitDevice->getSelectorCopyEngine(); auto &selectorCopyEngine = blitDevice->getSelectorCopyEngine();
auto deviceBitfield = blitDevice->getDeviceBitfield(); auto deviceBitfield = blitDevice->getDeviceBitfield();

View File

@ -1169,7 +1169,7 @@ void CommandStreamReceiver::createGlobalStatelessHeap() {
} }
bool CommandStreamReceiver::isRayTracingStateProgramingNeeded(Device &device) const { bool CommandStreamReceiver::isRayTracingStateProgramingNeeded(Device &device) const {
return device.getRTMemoryBackedBuffer() && getBtdCommandDirty(); return device.rayTracingIsInitialized() && getBtdCommandDirty();
} }
void CommandStreamReceiver::registerClient(void *client) { void CommandStreamReceiver::registerClient(void *client) {

View File

@ -50,8 +50,6 @@ Device::Device(ExecutionEnvironment *executionEnvironment, const uint32_t rootDe
} }
Device::~Device() { Device::~Device() {
finalizeRayTracing();
DEBUG_BREAK_IF(nullptr == executionEnvironment->memoryManager.get()); DEBUG_BREAK_IF(nullptr == executionEnvironment->memoryManager.get());
if (performanceCounters) { if (performanceCounters) {
@ -62,6 +60,7 @@ Device::~Device() {
engine.commandStreamReceiver->flushBatchedSubmissions(); engine.commandStreamReceiver->flushBatchedSubmissions();
} }
allEngines.clear(); allEngines.clear();
finalizeRayTracing();
for (auto subdevice : subdevices) { for (auto subdevice : subdevices) {
if (subdevice) { if (subdevice) {
@ -889,6 +888,14 @@ RTDispatchGlobalsInfo *Device::getRTDispatchGlobals(uint32_t maxBvhLevels) {
} }
void Device::initializeRayTracing(uint32_t maxBvhLevels) { void Device::initializeRayTracing(uint32_t maxBvhLevels) {
initializeRTMemoryBackedBuffer();
while (rtDispatchGlobalsInfos.size() <= maxBvhLevels) {
rtDispatchGlobalsInfos.push_back(nullptr);
}
}
void Device::initializeRTMemoryBackedBuffer() {
if (rtMemoryBackedBuffer == nullptr) { if (rtMemoryBackedBuffer == nullptr) {
auto size = RayTracingHelper::getTotalMemoryBackedFifoSize(*this); auto size = RayTracingHelper::getTotalMemoryBackedFifoSize(*this);
@ -899,10 +906,6 @@ void Device::initializeRayTracing(uint32_t maxBvhLevels) {
rtMemoryBackedBuffer = getMemoryManager()->allocateGraphicsMemoryWithProperties(allocProps); rtMemoryBackedBuffer = getMemoryManager()->allocateGraphicsMemoryWithProperties(allocProps);
} }
while (rtDispatchGlobalsInfos.size() <= maxBvhLevels) {
rtDispatchGlobalsInfos.push_back(nullptr);
}
} }
void Device::finalizeRayTracing() { void Device::finalizeRayTracing() {

View File

@ -178,7 +178,8 @@ class Device : public ReferenceTrackedObject<Device>, NEO::NonCopyableAndNonMova
std::unique_ptr<SyncBufferHandler> syncBufferHandler; std::unique_ptr<SyncBufferHandler> syncBufferHandler;
GraphicsAllocation *getRTMemoryBackedBuffer() { return rtMemoryBackedBuffer; } GraphicsAllocation *getRTMemoryBackedBuffer() { return rtMemoryBackedBuffer; }
RTDispatchGlobalsInfo *getRTDispatchGlobals(uint32_t maxBvhLevels); RTDispatchGlobalsInfo *getRTDispatchGlobals(uint32_t maxBvhLevels);
bool rayTracingIsInitialized() const { return rtMemoryBackedBuffer != nullptr; } bool rayTracingIsInitialized() const { return rtMemoryBackedBuffer != nullptr && rtDispatchGlobalsInfos.size() != 0; }
void initializeRTMemoryBackedBuffer();
void initializeRayTracing(uint32_t maxBvhLevels); void initializeRayTracing(uint32_t maxBvhLevels);
void allocateRTDispatchGlobals(uint32_t maxBvhLevels); void allocateRTDispatchGlobals(uint32_t maxBvhLevels);

View File

@ -3568,6 +3568,9 @@ HWTEST2_F(CommandStreamReceiverHwTest,
auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver<FamilyType>(); auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver<FamilyType>();
commandStreamReceiver.storeMakeResidentAllocations = true; commandStreamReceiver.storeMakeResidentAllocations = true;
if (commandStreamReceiver.heaplessModeEnabled) {
GTEST_SKIP();
}
EXPECT_FALSE(commandStreamReceiver.isRayTracingStateProgramingNeeded(*pDevice)); EXPECT_FALSE(commandStreamReceiver.isRayTracingStateProgramingNeeded(*pDevice));
@ -4500,6 +4503,9 @@ HWTEST2_F(CommandStreamReceiverHwTest,
auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver<FamilyType>(); auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver<FamilyType>();
commandStreamReceiver.storeMakeResidentAllocations = true; commandStreamReceiver.storeMakeResidentAllocations = true;
if (commandStreamReceiver.heaplessModeEnabled) {
GTEST_SKIP();
}
commandStreamReceiver.flushImmediateTask(commandStream, commandStream.getUsed(), immediateFlushTaskFlags, *pDevice); commandStreamReceiver.flushImmediateTask(commandStream, commandStream.getUsed(), immediateFlushTaskFlags, *pDevice);

View File

@ -9,6 +9,7 @@
#include "shared/source/gmm_helper/gmm.h" #include "shared/source/gmm_helper/gmm.h"
#include "shared/source/helpers/api_specific_config.h" #include "shared/source/helpers/api_specific_config.h"
#include "shared/source/helpers/array_count.h" #include "shared/source/helpers/array_count.h"
#include "shared/source/helpers/compiler_product_helper.h"
#include "shared/source/helpers/gfx_core_helper.h" #include "shared/source/helpers/gfx_core_helper.h"
#include "shared/source/helpers/ray_tracing_helper.h" #include "shared/source/helpers/ray_tracing_helper.h"
#include "shared/source/memory_manager/allocations_list.h" #include "shared/source/memory_manager/allocations_list.h"
@ -185,6 +186,10 @@ TEST(Device, WhenCreatingDeviceThenCapsInitilizedBeforeEnginesAreCreated) {
using DeviceTest = Test<DeviceFixture>; using DeviceTest = Test<DeviceFixture>;
TEST_F(DeviceTest, whenInitializeRayTracingIsCalledAndRtBackedBufferIsNullptrThenMemoryBackedBufferIsCreated) { TEST_F(DeviceTest, whenInitializeRayTracingIsCalledAndRtBackedBufferIsNullptrThenMemoryBackedBufferIsCreated) {
if (pDevice->getCompilerProductHelper().isHeaplessModeEnabled()) {
GTEST_SKIP();
}
EXPECT_EQ(nullptr, pDevice->getRTMemoryBackedBuffer()); EXPECT_EQ(nullptr, pDevice->getRTMemoryBackedBuffer());
EXPECT_EQ(false, pDevice->rayTracingIsInitialized()); EXPECT_EQ(false, pDevice->rayTracingIsInitialized());
pDevice->initializeRayTracing(0); pDevice->initializeRayTracing(0);

View File

@ -8,6 +8,7 @@
#include "shared/source/command_container/command_encoder.h" #include "shared/source/command_container/command_encoder.h"
#include "shared/source/command_stream/tag_allocation_layout.h" #include "shared/source/command_stream/tag_allocation_layout.h"
#include "shared/source/helpers/api_specific_config.h" #include "shared/source/helpers/api_specific_config.h"
#include "shared/source/helpers/compiler_product_helper.h"
#include "shared/source/helpers/flush_stamp.h" #include "shared/source/helpers/flush_stamp.h"
#include "shared/source/memory_manager/internal_allocation_storage.h" #include "shared/source/memory_manager/internal_allocation_storage.h"
#include "shared/test/common/helpers/batch_buffer_helper.h" #include "shared/test/common/helpers/batch_buffer_helper.h"
@ -1134,8 +1135,9 @@ HWTEST_TEMPLATED_F(DrmCommandStreamEnhancedTest,
drmCtxIds[i] = 5u + i; drmCtxIds[i] = 5u + i;
} }
const auto hasFirstSubmission = device->getCompilerProductHelper().isHeaplessModeEnabled() ? 1 : 0;
FlushStamp handleToWait = 123; FlushStamp handleToWait = 123;
*testedCsr->getTagAddress() = 0; *testedCsr->getTagAddress() = hasFirstSubmission;
testedCsr->waitForFlushStamp(handleToWait); testedCsr->waitForFlushStamp(handleToWait);
EXPECT_EQ(0, mock->ioctlCnt.gemWait); EXPECT_EQ(0, mock->ioctlCnt.gemWait);
@ -1284,8 +1286,9 @@ HWTEST_TEMPLATED_F(DrmCommandStreamEnhancedTest,
mock->ioctlCnt.gemWait = 0; mock->ioctlCnt.gemWait = 0;
mock->isVmBindAvailableCall.called = 0u; mock->isVmBindAvailableCall.called = 0u;
const auto hasFirstSubmission = device->getCompilerProductHelper().isHeaplessModeEnabled() ? 1 : 0;
FlushStamp handleToWait = 123; FlushStamp handleToWait = 123;
*testedCsr->getTagAddress() = 0; *testedCsr->getTagAddress() = hasFirstSubmission;
testedCsr->waitForFlushStamp(handleToWait); testedCsr->waitForFlushStamp(handleToWait);
EXPECT_EQ(0, mock->ioctlCnt.gemWait); EXPECT_EQ(0, mock->ioctlCnt.gemWait);

View File

@ -11,6 +11,7 @@
#include "shared/source/gmm_helper/gmm_helper.h" #include "shared/source/gmm_helper/gmm_helper.h"
#include "shared/source/gmm_helper/page_table_mngr.h" #include "shared/source/gmm_helper/page_table_mngr.h"
#include "shared/source/gmm_helper/resource_info.h" #include "shared/source/gmm_helper/resource_info.h"
#include "shared/source/helpers/compiler_product_helper.h"
#include "shared/source/helpers/flush_stamp.h" #include "shared/source/helpers/flush_stamp.h"
#include "shared/source/indirect_heap/indirect_heap.h" #include "shared/source/indirect_heap/indirect_heap.h"
#include "shared/source/memory_manager/graphics_allocation.h" #include "shared/source/memory_manager/graphics_allocation.h"
@ -567,6 +568,7 @@ HWTEST_TEMPLATED_F(DrmCommandStreamBatchingTests, givenCsrWhenDispatchPolicyIsSe
size_t csrSurfaceCount = (device->getPreemptionMode() == PreemptionMode::MidThread) ? 2 : 0; size_t csrSurfaceCount = (device->getPreemptionMode() == PreemptionMode::MidThread) ? 2 : 0;
csrSurfaceCount += testedCsr->globalFenceAllocation ? 1 : 0; csrSurfaceCount += testedCsr->globalFenceAllocation ? 1 : 0;
csrSurfaceCount += testedCsr->clearColorAllocation ? 1 : 0; csrSurfaceCount += testedCsr->clearColorAllocation ? 1 : 0;
csrSurfaceCount += device->getRTMemoryBackedBuffer() ? 1u : 0u;
auto recordedCmdBuffer = cmdBuffers.peekHead(); auto recordedCmdBuffer = cmdBuffers.peekHead();
EXPECT_EQ(3u + csrSurfaceCount, recordedCmdBuffer->surfaces.size()); EXPECT_EQ(3u + csrSurfaceCount, recordedCmdBuffer->surfaces.size());
@ -641,6 +643,7 @@ HWTEST_TEMPLATED_F(DrmCommandStreamBatchingTests, givenRecordedCommandBufferWhen
size_t csrSurfaceCount = (device->getPreemptionMode() == PreemptionMode::MidThread) ? 2 : 0; size_t csrSurfaceCount = (device->getPreemptionMode() == PreemptionMode::MidThread) ? 2 : 0;
csrSurfaceCount += testedCsr->globalFenceAllocation ? 1 : 0; csrSurfaceCount += testedCsr->globalFenceAllocation ? 1 : 0;
csrSurfaceCount += testedCsr->clearColorAllocation ? 1 : 0; csrSurfaceCount += testedCsr->clearColorAllocation ? 1 : 0;
csrSurfaceCount += device->getRTMemoryBackedBuffer() ? 1u : 0u;
// validate that submited command buffer has what we want // validate that submited command buffer has what we want
EXPECT_EQ(3u + csrSurfaceCount, this->mock->execBuffer.getBufferCount()); EXPECT_EQ(3u + csrSurfaceCount, this->mock->execBuffer.getBufferCount());

View File

@ -7,6 +7,7 @@
#include "shared/source/command_container/command_encoder.h" #include "shared/source/command_container/command_encoder.h"
#include "shared/source/command_stream/csr_definitions.h" #include "shared/source/command_stream/csr_definitions.h"
#include "shared/source/helpers/compiler_product_helper.h"
#include "shared/source/memory_manager/memory_banks.h" #include "shared/source/memory_manager/memory_banks.h"
#include "shared/source/os_interface/sys_calls_common.h" #include "shared/source/os_interface/sys_calls_common.h"
#include "shared/test/common/helpers/batch_buffer_helper.h" #include "shared/test/common/helpers/batch_buffer_helper.h"
@ -141,8 +142,9 @@ HWTEST_TEMPLATED_F(DrmCommandStreamEnhancedTestDrmPrelim, givenWaitUserFenceEnab
testDrmCsr->useUserFenceWait = true; testDrmCsr->useUserFenceWait = true;
testDrmCsr->activePartitions = static_cast<uint32_t>(drmCtxSize); testDrmCsr->activePartitions = static_cast<uint32_t>(drmCtxSize);
const auto hasFirstSubmission = device->getCompilerProductHelper().isHeaplessModeEnabled() ? 1 : 0;
auto tagPtr = const_cast<TagAddressType *>(testDrmCsr->getTagAddress()); auto tagPtr = const_cast<TagAddressType *>(testDrmCsr->getTagAddress());
*tagPtr = 0; *tagPtr = hasFirstSubmission;
uint64_t tagAddress = castToUint64(tagPtr); uint64_t tagAddress = castToUint64(tagPtr);
FlushStamp handleToWait = 123; FlushStamp handleToWait = 123;
testDrmCsr->waitForFlushStamp(handleToWait); testDrmCsr->waitForFlushStamp(handleToWait);