fix: raytracing heapless missing allocation
Related-to: NEO-12737 Signed-off-by: Damian Tomczak <damian.tomczak@intel.com>
This commit is contained in:
parent
2b370f6a6f
commit
81b0cac65f
|
@ -1444,7 +1444,7 @@ TEST_F(KernelImmutableDataTests, whenHasRTCallsIsFalseThenRayTracingIsNotInitial
|
|||
immDataVector->push_back(std::move(mockKernelImmutableData));
|
||||
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, kernel->initialize(&kernelDesc));
|
||||
EXPECT_EQ(nullptr, module->getDevice()->getNEODevice()->getRTMemoryBackedBuffer());
|
||||
EXPECT_FALSE(module->getDevice()->getNEODevice()->rayTracingIsInitialized());
|
||||
}
|
||||
|
||||
TEST_F(KernelImmutableDataTests, whenHasRTCallsIsTrueThenCrossThreadDataIsPatched) {
|
||||
|
|
|
@ -829,9 +829,10 @@ HWTEST_F(EnqueueKernelTest, givenCommandStreamReceiverInBatchingModeWhenEnqueueK
|
|||
size_t fenceSurfaceCount = mockCsr->globalFenceAllocation ? 1 : 0;
|
||||
size_t clearColorSize = mockCsr->clearColorAllocation ? 1 : 0;
|
||||
size_t commandBufferCount = pDevice->getProductHelper().getCommandBuffersPreallocatedPerCommandQueue() > 0 ? 0 : 1;
|
||||
size_t rtSurface = pDevice->getRTMemoryBackedBuffer() ? 1u : 0u;
|
||||
|
||||
EXPECT_EQ(mockCsr->heaplessStateInitialized ? 1u : 0u, mockCsr->flushCalledCount);
|
||||
EXPECT_EQ(4u + csrSurfaceCount + timestampPacketSurfacesCount + fenceSurfaceCount + clearColorSize + commandBufferCount, cmdBuffer->surfaces.size());
|
||||
EXPECT_EQ(4u + csrSurfaceCount + timestampPacketSurfacesCount + fenceSurfaceCount + clearColorSize + commandBufferCount + rtSurface, cmdBuffer->surfaces.size());
|
||||
}
|
||||
|
||||
HWTEST_F(EnqueueKernelTest, givenReducedAddressSpaceGraphicsAllocationForHostPtrWithL3FlushRequiredWhenEnqueueKernelIsCalledThenFlushIsCalledForReducedAddressSpacePlatforms) {
|
||||
|
|
|
@ -78,6 +78,7 @@ HWTEST_F(CommandStreamReceiverFlushTaskTests, givenCsrInBatchingModeWhenFlushTas
|
|||
csrSurfaceCount += mockCsr->globalFenceAllocation ? 1 : 0;
|
||||
csrSurfaceCount += mockCsr->clearColorAllocation ? 1 : 0;
|
||||
csrSurfaceCount += pDevice->getProductHelper().getCommandBuffersPreallocatedPerCommandQueue() > 0 ? 0 : 1;
|
||||
csrSurfaceCount += pDevice->getRTMemoryBackedBuffer() ? 1 : 0;
|
||||
|
||||
// we should have 3 heaps, tag allocation and csr command stream + cq
|
||||
EXPECT_EQ(4u + csrSurfaceCount, cmdBuffer->surfaces.size());
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
* Copyright (C) 2021-2024 Intel Corporation
|
||||
* Copyright (C) 2021-2025 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
|
@ -623,6 +623,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, CommandStreamReceiverFlushTaskXeHPAndLaterTests, gi
|
|||
csrSurfaceCount -= pDevice->getHardwareInfo().capabilityTable.supportsImages ? 0 : 1;
|
||||
csrSurfaceCount += mockCsr->globalFenceAllocation ? 1 : 0;
|
||||
csrSurfaceCount += mockCsr->clearColorAllocation ? 1 : 0;
|
||||
csrSurfaceCount += pDevice->getRTMemoryBackedBuffer() ? 1 : 0;
|
||||
|
||||
EXPECT_EQ(4u + csrSurfaceCount, cmdBuffer->surfaces.size());
|
||||
|
||||
|
|
|
@ -7,6 +7,7 @@
|
|||
|
||||
#include "shared/source/device/device.h"
|
||||
#include "shared/source/helpers/blit_commands_helper.h"
|
||||
#include "shared/source/helpers/compiler_product_helper.h"
|
||||
#include "shared/source/helpers/gfx_core_helper.h"
|
||||
#include "shared/source/helpers/local_memory_access_modes.h"
|
||||
#include "shared/source/memory_manager/unified_memory_manager.h"
|
||||
|
@ -619,7 +620,8 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, ContextCreateTests, givenGpuHangOnFlushBcsTaskAndLo
|
|||
executionEnv->rootDeviceEnvironments[0]->getMutableHardwareInfo()->capabilityTable.blitterOperationsSupported = true;
|
||||
|
||||
const auto rootDevice = testedDevice->getDevice().getRootDevice();
|
||||
const auto blitDevice = rootDevice->getNearestGenericSubDevice(0);
|
||||
const auto leastOccupiedBankDevice = rootDevice->getRTMemoryBackedBuffer() ? 1u : 0u;
|
||||
const auto blitDevice = rootDevice->getNearestGenericSubDevice(leastOccupiedBankDevice);
|
||||
auto &selectorCopyEngine = blitDevice->getSelectorCopyEngine();
|
||||
auto deviceBitfield = blitDevice->getDeviceBitfield();
|
||||
|
||||
|
|
|
@ -1169,7 +1169,7 @@ void CommandStreamReceiver::createGlobalStatelessHeap() {
|
|||
}
|
||||
|
||||
bool CommandStreamReceiver::isRayTracingStateProgramingNeeded(Device &device) const {
|
||||
return device.getRTMemoryBackedBuffer() && getBtdCommandDirty();
|
||||
return device.rayTracingIsInitialized() && getBtdCommandDirty();
|
||||
}
|
||||
|
||||
void CommandStreamReceiver::registerClient(void *client) {
|
||||
|
|
|
@ -50,8 +50,6 @@ Device::Device(ExecutionEnvironment *executionEnvironment, const uint32_t rootDe
|
|||
}
|
||||
|
||||
Device::~Device() {
|
||||
finalizeRayTracing();
|
||||
|
||||
DEBUG_BREAK_IF(nullptr == executionEnvironment->memoryManager.get());
|
||||
|
||||
if (performanceCounters) {
|
||||
|
@ -62,6 +60,7 @@ Device::~Device() {
|
|||
engine.commandStreamReceiver->flushBatchedSubmissions();
|
||||
}
|
||||
allEngines.clear();
|
||||
finalizeRayTracing();
|
||||
|
||||
for (auto subdevice : subdevices) {
|
||||
if (subdevice) {
|
||||
|
@ -889,6 +888,14 @@ RTDispatchGlobalsInfo *Device::getRTDispatchGlobals(uint32_t maxBvhLevels) {
|
|||
}
|
||||
|
||||
void Device::initializeRayTracing(uint32_t maxBvhLevels) {
|
||||
initializeRTMemoryBackedBuffer();
|
||||
|
||||
while (rtDispatchGlobalsInfos.size() <= maxBvhLevels) {
|
||||
rtDispatchGlobalsInfos.push_back(nullptr);
|
||||
}
|
||||
}
|
||||
|
||||
void Device::initializeRTMemoryBackedBuffer() {
|
||||
if (rtMemoryBackedBuffer == nullptr) {
|
||||
auto size = RayTracingHelper::getTotalMemoryBackedFifoSize(*this);
|
||||
|
||||
|
@ -899,10 +906,6 @@ void Device::initializeRayTracing(uint32_t maxBvhLevels) {
|
|||
|
||||
rtMemoryBackedBuffer = getMemoryManager()->allocateGraphicsMemoryWithProperties(allocProps);
|
||||
}
|
||||
|
||||
while (rtDispatchGlobalsInfos.size() <= maxBvhLevels) {
|
||||
rtDispatchGlobalsInfos.push_back(nullptr);
|
||||
}
|
||||
}
|
||||
|
||||
void Device::finalizeRayTracing() {
|
||||
|
|
|
@ -178,7 +178,8 @@ class Device : public ReferenceTrackedObject<Device>, NEO::NonCopyableAndNonMova
|
|||
std::unique_ptr<SyncBufferHandler> syncBufferHandler;
|
||||
GraphicsAllocation *getRTMemoryBackedBuffer() { return rtMemoryBackedBuffer; }
|
||||
RTDispatchGlobalsInfo *getRTDispatchGlobals(uint32_t maxBvhLevels);
|
||||
bool rayTracingIsInitialized() const { return rtMemoryBackedBuffer != nullptr; }
|
||||
bool rayTracingIsInitialized() const { return rtMemoryBackedBuffer != nullptr && rtDispatchGlobalsInfos.size() != 0; }
|
||||
void initializeRTMemoryBackedBuffer();
|
||||
void initializeRayTracing(uint32_t maxBvhLevels);
|
||||
void allocateRTDispatchGlobals(uint32_t maxBvhLevels);
|
||||
|
||||
|
|
|
@ -3568,6 +3568,9 @@ HWTEST2_F(CommandStreamReceiverHwTest,
|
|||
|
||||
auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver<FamilyType>();
|
||||
commandStreamReceiver.storeMakeResidentAllocations = true;
|
||||
if (commandStreamReceiver.heaplessModeEnabled) {
|
||||
GTEST_SKIP();
|
||||
}
|
||||
|
||||
EXPECT_FALSE(commandStreamReceiver.isRayTracingStateProgramingNeeded(*pDevice));
|
||||
|
||||
|
@ -4500,6 +4503,9 @@ HWTEST2_F(CommandStreamReceiverHwTest,
|
|||
|
||||
auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver<FamilyType>();
|
||||
commandStreamReceiver.storeMakeResidentAllocations = true;
|
||||
if (commandStreamReceiver.heaplessModeEnabled) {
|
||||
GTEST_SKIP();
|
||||
}
|
||||
|
||||
commandStreamReceiver.flushImmediateTask(commandStream, commandStream.getUsed(), immediateFlushTaskFlags, *pDevice);
|
||||
|
||||
|
|
|
@ -9,6 +9,7 @@
|
|||
#include "shared/source/gmm_helper/gmm.h"
|
||||
#include "shared/source/helpers/api_specific_config.h"
|
||||
#include "shared/source/helpers/array_count.h"
|
||||
#include "shared/source/helpers/compiler_product_helper.h"
|
||||
#include "shared/source/helpers/gfx_core_helper.h"
|
||||
#include "shared/source/helpers/ray_tracing_helper.h"
|
||||
#include "shared/source/memory_manager/allocations_list.h"
|
||||
|
@ -185,6 +186,10 @@ TEST(Device, WhenCreatingDeviceThenCapsInitilizedBeforeEnginesAreCreated) {
|
|||
using DeviceTest = Test<DeviceFixture>;
|
||||
|
||||
TEST_F(DeviceTest, whenInitializeRayTracingIsCalledAndRtBackedBufferIsNullptrThenMemoryBackedBufferIsCreated) {
|
||||
if (pDevice->getCompilerProductHelper().isHeaplessModeEnabled()) {
|
||||
GTEST_SKIP();
|
||||
}
|
||||
|
||||
EXPECT_EQ(nullptr, pDevice->getRTMemoryBackedBuffer());
|
||||
EXPECT_EQ(false, pDevice->rayTracingIsInitialized());
|
||||
pDevice->initializeRayTracing(0);
|
||||
|
|
|
@ -8,6 +8,7 @@
|
|||
#include "shared/source/command_container/command_encoder.h"
|
||||
#include "shared/source/command_stream/tag_allocation_layout.h"
|
||||
#include "shared/source/helpers/api_specific_config.h"
|
||||
#include "shared/source/helpers/compiler_product_helper.h"
|
||||
#include "shared/source/helpers/flush_stamp.h"
|
||||
#include "shared/source/memory_manager/internal_allocation_storage.h"
|
||||
#include "shared/test/common/helpers/batch_buffer_helper.h"
|
||||
|
@ -1134,8 +1135,9 @@ HWTEST_TEMPLATED_F(DrmCommandStreamEnhancedTest,
|
|||
drmCtxIds[i] = 5u + i;
|
||||
}
|
||||
|
||||
const auto hasFirstSubmission = device->getCompilerProductHelper().isHeaplessModeEnabled() ? 1 : 0;
|
||||
FlushStamp handleToWait = 123;
|
||||
*testedCsr->getTagAddress() = 0;
|
||||
*testedCsr->getTagAddress() = hasFirstSubmission;
|
||||
testedCsr->waitForFlushStamp(handleToWait);
|
||||
|
||||
EXPECT_EQ(0, mock->ioctlCnt.gemWait);
|
||||
|
@ -1284,8 +1286,9 @@ HWTEST_TEMPLATED_F(DrmCommandStreamEnhancedTest,
|
|||
mock->ioctlCnt.gemWait = 0;
|
||||
mock->isVmBindAvailableCall.called = 0u;
|
||||
|
||||
const auto hasFirstSubmission = device->getCompilerProductHelper().isHeaplessModeEnabled() ? 1 : 0;
|
||||
FlushStamp handleToWait = 123;
|
||||
*testedCsr->getTagAddress() = 0;
|
||||
*testedCsr->getTagAddress() = hasFirstSubmission;
|
||||
testedCsr->waitForFlushStamp(handleToWait);
|
||||
|
||||
EXPECT_EQ(0, mock->ioctlCnt.gemWait);
|
||||
|
|
|
@ -11,6 +11,7 @@
|
|||
#include "shared/source/gmm_helper/gmm_helper.h"
|
||||
#include "shared/source/gmm_helper/page_table_mngr.h"
|
||||
#include "shared/source/gmm_helper/resource_info.h"
|
||||
#include "shared/source/helpers/compiler_product_helper.h"
|
||||
#include "shared/source/helpers/flush_stamp.h"
|
||||
#include "shared/source/indirect_heap/indirect_heap.h"
|
||||
#include "shared/source/memory_manager/graphics_allocation.h"
|
||||
|
@ -567,6 +568,7 @@ HWTEST_TEMPLATED_F(DrmCommandStreamBatchingTests, givenCsrWhenDispatchPolicyIsSe
|
|||
size_t csrSurfaceCount = (device->getPreemptionMode() == PreemptionMode::MidThread) ? 2 : 0;
|
||||
csrSurfaceCount += testedCsr->globalFenceAllocation ? 1 : 0;
|
||||
csrSurfaceCount += testedCsr->clearColorAllocation ? 1 : 0;
|
||||
csrSurfaceCount += device->getRTMemoryBackedBuffer() ? 1u : 0u;
|
||||
|
||||
auto recordedCmdBuffer = cmdBuffers.peekHead();
|
||||
EXPECT_EQ(3u + csrSurfaceCount, recordedCmdBuffer->surfaces.size());
|
||||
|
@ -641,6 +643,7 @@ HWTEST_TEMPLATED_F(DrmCommandStreamBatchingTests, givenRecordedCommandBufferWhen
|
|||
size_t csrSurfaceCount = (device->getPreemptionMode() == PreemptionMode::MidThread) ? 2 : 0;
|
||||
csrSurfaceCount += testedCsr->globalFenceAllocation ? 1 : 0;
|
||||
csrSurfaceCount += testedCsr->clearColorAllocation ? 1 : 0;
|
||||
csrSurfaceCount += device->getRTMemoryBackedBuffer() ? 1u : 0u;
|
||||
|
||||
// validate that submited command buffer has what we want
|
||||
EXPECT_EQ(3u + csrSurfaceCount, this->mock->execBuffer.getBufferCount());
|
||||
|
|
|
@ -7,6 +7,7 @@
|
|||
|
||||
#include "shared/source/command_container/command_encoder.h"
|
||||
#include "shared/source/command_stream/csr_definitions.h"
|
||||
#include "shared/source/helpers/compiler_product_helper.h"
|
||||
#include "shared/source/memory_manager/memory_banks.h"
|
||||
#include "shared/source/os_interface/sys_calls_common.h"
|
||||
#include "shared/test/common/helpers/batch_buffer_helper.h"
|
||||
|
@ -141,8 +142,9 @@ HWTEST_TEMPLATED_F(DrmCommandStreamEnhancedTestDrmPrelim, givenWaitUserFenceEnab
|
|||
testDrmCsr->useUserFenceWait = true;
|
||||
testDrmCsr->activePartitions = static_cast<uint32_t>(drmCtxSize);
|
||||
|
||||
const auto hasFirstSubmission = device->getCompilerProductHelper().isHeaplessModeEnabled() ? 1 : 0;
|
||||
auto tagPtr = const_cast<TagAddressType *>(testDrmCsr->getTagAddress());
|
||||
*tagPtr = 0;
|
||||
*tagPtr = hasFirstSubmission;
|
||||
uint64_t tagAddress = castToUint64(tagPtr);
|
||||
FlushStamp handleToWait = 123;
|
||||
testDrmCsr->waitForFlushStamp(handleToWait);
|
||||
|
|
Loading…
Reference in New Issue