diff --git a/level_zero/core/test/unit_tests/sources/kernel/test_kernel.cpp b/level_zero/core/test/unit_tests/sources/kernel/test_kernel.cpp index 9c0d205cd0..873f83d5ab 100644 --- a/level_zero/core/test/unit_tests/sources/kernel/test_kernel.cpp +++ b/level_zero/core/test/unit_tests/sources/kernel/test_kernel.cpp @@ -1444,7 +1444,7 @@ TEST_F(KernelImmutableDataTests, whenHasRTCallsIsFalseThenRayTracingIsNotInitial immDataVector->push_back(std::move(mockKernelImmutableData)); EXPECT_EQ(ZE_RESULT_SUCCESS, kernel->initialize(&kernelDesc)); - EXPECT_EQ(nullptr, module->getDevice()->getNEODevice()->getRTMemoryBackedBuffer()); + EXPECT_FALSE(module->getDevice()->getNEODevice()->rayTracingIsInitialized()); } TEST_F(KernelImmutableDataTests, whenHasRTCallsIsTrueThenCrossThreadDataIsPatched) { diff --git a/opencl/test/unit_test/command_queue/enqueue_kernel_1_tests.cpp b/opencl/test/unit_test/command_queue/enqueue_kernel_1_tests.cpp index ea59fb9148..90757ada25 100644 --- a/opencl/test/unit_test/command_queue/enqueue_kernel_1_tests.cpp +++ b/opencl/test/unit_test/command_queue/enqueue_kernel_1_tests.cpp @@ -829,9 +829,10 @@ HWTEST_F(EnqueueKernelTest, givenCommandStreamReceiverInBatchingModeWhenEnqueueK size_t fenceSurfaceCount = mockCsr->globalFenceAllocation ? 1 : 0; size_t clearColorSize = mockCsr->clearColorAllocation ? 1 : 0; size_t commandBufferCount = pDevice->getProductHelper().getCommandBuffersPreallocatedPerCommandQueue() > 0 ? 0 : 1; + size_t rtSurface = pDevice->getRTMemoryBackedBuffer() ? 1u : 0u; EXPECT_EQ(mockCsr->heaplessStateInitialized ? 1u : 0u, mockCsr->flushCalledCount); - EXPECT_EQ(4u + csrSurfaceCount + timestampPacketSurfacesCount + fenceSurfaceCount + clearColorSize + commandBufferCount, cmdBuffer->surfaces.size()); + EXPECT_EQ(4u + csrSurfaceCount + timestampPacketSurfacesCount + fenceSurfaceCount + clearColorSize + commandBufferCount + rtSurface, cmdBuffer->surfaces.size()); } HWTEST_F(EnqueueKernelTest, givenReducedAddressSpaceGraphicsAllocationForHostPtrWithL3FlushRequiredWhenEnqueueKernelIsCalledThenFlushIsCalledForReducedAddressSpacePlatforms) { diff --git a/opencl/test/unit_test/command_stream/command_stream_receiver_flush_task_3_tests.cpp b/opencl/test/unit_test/command_stream/command_stream_receiver_flush_task_3_tests.cpp index 39aa33e08d..84e6d49d2a 100644 --- a/opencl/test/unit_test/command_stream/command_stream_receiver_flush_task_3_tests.cpp +++ b/opencl/test/unit_test/command_stream/command_stream_receiver_flush_task_3_tests.cpp @@ -78,6 +78,7 @@ HWTEST_F(CommandStreamReceiverFlushTaskTests, givenCsrInBatchingModeWhenFlushTas csrSurfaceCount += mockCsr->globalFenceAllocation ? 1 : 0; csrSurfaceCount += mockCsr->clearColorAllocation ? 1 : 0; csrSurfaceCount += pDevice->getProductHelper().getCommandBuffersPreallocatedPerCommandQueue() > 0 ? 0 : 1; + csrSurfaceCount += pDevice->getRTMemoryBackedBuffer() ? 1 : 0; // we should have 3 heaps, tag allocation and csr command stream + cq EXPECT_EQ(4u + csrSurfaceCount, cmdBuffer->surfaces.size()); diff --git a/opencl/test/unit_test/command_stream/command_stream_receiver_flush_task_tests_xehp_and_later.cpp b/opencl/test/unit_test/command_stream/command_stream_receiver_flush_task_tests_xehp_and_later.cpp index 744dde373f..365e707b40 100644 --- a/opencl/test/unit_test/command_stream/command_stream_receiver_flush_task_tests_xehp_and_later.cpp +++ b/opencl/test/unit_test/command_stream/command_stream_receiver_flush_task_tests_xehp_and_later.cpp @@ -1,5 +1,5 @@ /* - * Copyright (C) 2021-2024 Intel Corporation + * Copyright (C) 2021-2025 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -623,6 +623,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, CommandStreamReceiverFlushTaskXeHPAndLaterTests, gi csrSurfaceCount -= pDevice->getHardwareInfo().capabilityTable.supportsImages ? 0 : 1; csrSurfaceCount += mockCsr->globalFenceAllocation ? 1 : 0; csrSurfaceCount += mockCsr->clearColorAllocation ? 1 : 0; + csrSurfaceCount += pDevice->getRTMemoryBackedBuffer() ? 1 : 0; EXPECT_EQ(4u + csrSurfaceCount, cmdBuffer->surfaces.size()); diff --git a/opencl/test/unit_test/context/context_tests.cpp b/opencl/test/unit_test/context/context_tests.cpp index 869fdac42e..c69d372bcc 100644 --- a/opencl/test/unit_test/context/context_tests.cpp +++ b/opencl/test/unit_test/context/context_tests.cpp @@ -7,6 +7,7 @@ #include "shared/source/device/device.h" #include "shared/source/helpers/blit_commands_helper.h" +#include "shared/source/helpers/compiler_product_helper.h" #include "shared/source/helpers/gfx_core_helper.h" #include "shared/source/helpers/local_memory_access_modes.h" #include "shared/source/memory_manager/unified_memory_manager.h" @@ -619,7 +620,8 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, ContextCreateTests, givenGpuHangOnFlushBcsTaskAndLo executionEnv->rootDeviceEnvironments[0]->getMutableHardwareInfo()->capabilityTable.blitterOperationsSupported = true; const auto rootDevice = testedDevice->getDevice().getRootDevice(); - const auto blitDevice = rootDevice->getNearestGenericSubDevice(0); + const auto leastOccupiedBankDevice = rootDevice->getRTMemoryBackedBuffer() ? 1u : 0u; + const auto blitDevice = rootDevice->getNearestGenericSubDevice(leastOccupiedBankDevice); auto &selectorCopyEngine = blitDevice->getSelectorCopyEngine(); auto deviceBitfield = blitDevice->getDeviceBitfield(); diff --git a/shared/source/command_stream/command_stream_receiver.cpp b/shared/source/command_stream/command_stream_receiver.cpp index 3a8f31b0c1..018877d7aa 100644 --- a/shared/source/command_stream/command_stream_receiver.cpp +++ b/shared/source/command_stream/command_stream_receiver.cpp @@ -1169,7 +1169,7 @@ void CommandStreamReceiver::createGlobalStatelessHeap() { } bool CommandStreamReceiver::isRayTracingStateProgramingNeeded(Device &device) const { - return device.getRTMemoryBackedBuffer() && getBtdCommandDirty(); + return device.rayTracingIsInitialized() && getBtdCommandDirty(); } void CommandStreamReceiver::registerClient(void *client) { diff --git a/shared/source/device/device.cpp b/shared/source/device/device.cpp index 99e8034087..bde8228e6d 100644 --- a/shared/source/device/device.cpp +++ b/shared/source/device/device.cpp @@ -50,8 +50,6 @@ Device::Device(ExecutionEnvironment *executionEnvironment, const uint32_t rootDe } Device::~Device() { - finalizeRayTracing(); - DEBUG_BREAK_IF(nullptr == executionEnvironment->memoryManager.get()); if (performanceCounters) { @@ -62,6 +60,7 @@ Device::~Device() { engine.commandStreamReceiver->flushBatchedSubmissions(); } allEngines.clear(); + finalizeRayTracing(); for (auto subdevice : subdevices) { if (subdevice) { @@ -889,6 +888,14 @@ RTDispatchGlobalsInfo *Device::getRTDispatchGlobals(uint32_t maxBvhLevels) { } void Device::initializeRayTracing(uint32_t maxBvhLevels) { + initializeRTMemoryBackedBuffer(); + + while (rtDispatchGlobalsInfos.size() <= maxBvhLevels) { + rtDispatchGlobalsInfos.push_back(nullptr); + } +} + +void Device::initializeRTMemoryBackedBuffer() { if (rtMemoryBackedBuffer == nullptr) { auto size = RayTracingHelper::getTotalMemoryBackedFifoSize(*this); @@ -899,10 +906,6 @@ void Device::initializeRayTracing(uint32_t maxBvhLevels) { rtMemoryBackedBuffer = getMemoryManager()->allocateGraphicsMemoryWithProperties(allocProps); } - - while (rtDispatchGlobalsInfos.size() <= maxBvhLevels) { - rtDispatchGlobalsInfos.push_back(nullptr); - } } void Device::finalizeRayTracing() { diff --git a/shared/source/device/device.h b/shared/source/device/device.h index 05dc689ba6..b9c9822973 100644 --- a/shared/source/device/device.h +++ b/shared/source/device/device.h @@ -178,7 +178,8 @@ class Device : public ReferenceTrackedObject, NEO::NonCopyableAndNonMova std::unique_ptr syncBufferHandler; GraphicsAllocation *getRTMemoryBackedBuffer() { return rtMemoryBackedBuffer; } RTDispatchGlobalsInfo *getRTDispatchGlobals(uint32_t maxBvhLevels); - bool rayTracingIsInitialized() const { return rtMemoryBackedBuffer != nullptr; } + bool rayTracingIsInitialized() const { return rtMemoryBackedBuffer != nullptr && rtDispatchGlobalsInfos.size() != 0; } + void initializeRTMemoryBackedBuffer(); void initializeRayTracing(uint32_t maxBvhLevels); void allocateRTDispatchGlobals(uint32_t maxBvhLevels); diff --git a/shared/test/unit_test/command_stream/command_stream_receiver_tests.cpp b/shared/test/unit_test/command_stream/command_stream_receiver_tests.cpp index 39a6239c55..dc5d29c98e 100644 --- a/shared/test/unit_test/command_stream/command_stream_receiver_tests.cpp +++ b/shared/test/unit_test/command_stream/command_stream_receiver_tests.cpp @@ -3568,6 +3568,9 @@ HWTEST2_F(CommandStreamReceiverHwTest, auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); commandStreamReceiver.storeMakeResidentAllocations = true; + if (commandStreamReceiver.heaplessModeEnabled) { + GTEST_SKIP(); + } EXPECT_FALSE(commandStreamReceiver.isRayTracingStateProgramingNeeded(*pDevice)); @@ -4500,6 +4503,9 @@ HWTEST2_F(CommandStreamReceiverHwTest, auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); commandStreamReceiver.storeMakeResidentAllocations = true; + if (commandStreamReceiver.heaplessModeEnabled) { + GTEST_SKIP(); + } commandStreamReceiver.flushImmediateTask(commandStream, commandStream.getUsed(), immediateFlushTaskFlags, *pDevice); diff --git a/shared/test/unit_test/device/neo_device_tests.cpp b/shared/test/unit_test/device/neo_device_tests.cpp index 4eac1cdedf..8d40994bdf 100644 --- a/shared/test/unit_test/device/neo_device_tests.cpp +++ b/shared/test/unit_test/device/neo_device_tests.cpp @@ -9,6 +9,7 @@ #include "shared/source/gmm_helper/gmm.h" #include "shared/source/helpers/api_specific_config.h" #include "shared/source/helpers/array_count.h" +#include "shared/source/helpers/compiler_product_helper.h" #include "shared/source/helpers/gfx_core_helper.h" #include "shared/source/helpers/ray_tracing_helper.h" #include "shared/source/memory_manager/allocations_list.h" @@ -185,6 +186,10 @@ TEST(Device, WhenCreatingDeviceThenCapsInitilizedBeforeEnginesAreCreated) { using DeviceTest = Test; TEST_F(DeviceTest, whenInitializeRayTracingIsCalledAndRtBackedBufferIsNullptrThenMemoryBackedBufferIsCreated) { + if (pDevice->getCompilerProductHelper().isHeaplessModeEnabled()) { + GTEST_SKIP(); + } + EXPECT_EQ(nullptr, pDevice->getRTMemoryBackedBuffer()); EXPECT_EQ(false, pDevice->rayTracingIsInitialized()); pDevice->initializeRayTracing(0); diff --git a/shared/test/unit_test/os_interface/linux/drm_command_stream_tests.cpp b/shared/test/unit_test/os_interface/linux/drm_command_stream_tests.cpp index 64e96321f5..73bf35e69c 100644 --- a/shared/test/unit_test/os_interface/linux/drm_command_stream_tests.cpp +++ b/shared/test/unit_test/os_interface/linux/drm_command_stream_tests.cpp @@ -8,6 +8,7 @@ #include "shared/source/command_container/command_encoder.h" #include "shared/source/command_stream/tag_allocation_layout.h" #include "shared/source/helpers/api_specific_config.h" +#include "shared/source/helpers/compiler_product_helper.h" #include "shared/source/helpers/flush_stamp.h" #include "shared/source/memory_manager/internal_allocation_storage.h" #include "shared/test/common/helpers/batch_buffer_helper.h" @@ -1134,8 +1135,9 @@ HWTEST_TEMPLATED_F(DrmCommandStreamEnhancedTest, drmCtxIds[i] = 5u + i; } + const auto hasFirstSubmission = device->getCompilerProductHelper().isHeaplessModeEnabled() ? 1 : 0; FlushStamp handleToWait = 123; - *testedCsr->getTagAddress() = 0; + *testedCsr->getTagAddress() = hasFirstSubmission; testedCsr->waitForFlushStamp(handleToWait); EXPECT_EQ(0, mock->ioctlCnt.gemWait); @@ -1284,8 +1286,9 @@ HWTEST_TEMPLATED_F(DrmCommandStreamEnhancedTest, mock->ioctlCnt.gemWait = 0; mock->isVmBindAvailableCall.called = 0u; + const auto hasFirstSubmission = device->getCompilerProductHelper().isHeaplessModeEnabled() ? 1 : 0; FlushStamp handleToWait = 123; - *testedCsr->getTagAddress() = 0; + *testedCsr->getTagAddress() = hasFirstSubmission; testedCsr->waitForFlushStamp(handleToWait); EXPECT_EQ(0, mock->ioctlCnt.gemWait); diff --git a/shared/test/unit_test/os_interface/linux/drm_command_stream_tests_1.cpp b/shared/test/unit_test/os_interface/linux/drm_command_stream_tests_1.cpp index 9604cc5c9e..46b1a75269 100644 --- a/shared/test/unit_test/os_interface/linux/drm_command_stream_tests_1.cpp +++ b/shared/test/unit_test/os_interface/linux/drm_command_stream_tests_1.cpp @@ -11,6 +11,7 @@ #include "shared/source/gmm_helper/gmm_helper.h" #include "shared/source/gmm_helper/page_table_mngr.h" #include "shared/source/gmm_helper/resource_info.h" +#include "shared/source/helpers/compiler_product_helper.h" #include "shared/source/helpers/flush_stamp.h" #include "shared/source/indirect_heap/indirect_heap.h" #include "shared/source/memory_manager/graphics_allocation.h" @@ -567,6 +568,7 @@ HWTEST_TEMPLATED_F(DrmCommandStreamBatchingTests, givenCsrWhenDispatchPolicyIsSe size_t csrSurfaceCount = (device->getPreemptionMode() == PreemptionMode::MidThread) ? 2 : 0; csrSurfaceCount += testedCsr->globalFenceAllocation ? 1 : 0; csrSurfaceCount += testedCsr->clearColorAllocation ? 1 : 0; + csrSurfaceCount += device->getRTMemoryBackedBuffer() ? 1u : 0u; auto recordedCmdBuffer = cmdBuffers.peekHead(); EXPECT_EQ(3u + csrSurfaceCount, recordedCmdBuffer->surfaces.size()); @@ -641,6 +643,7 @@ HWTEST_TEMPLATED_F(DrmCommandStreamBatchingTests, givenRecordedCommandBufferWhen size_t csrSurfaceCount = (device->getPreemptionMode() == PreemptionMode::MidThread) ? 2 : 0; csrSurfaceCount += testedCsr->globalFenceAllocation ? 1 : 0; csrSurfaceCount += testedCsr->clearColorAllocation ? 1 : 0; + csrSurfaceCount += device->getRTMemoryBackedBuffer() ? 1u : 0u; // validate that submited command buffer has what we want EXPECT_EQ(3u + csrSurfaceCount, this->mock->execBuffer.getBufferCount()); diff --git a/shared/test/unit_test/os_interface/linux/drm_command_stream_xehp_and_later_prelim_tests.cpp b/shared/test/unit_test/os_interface/linux/drm_command_stream_xehp_and_later_prelim_tests.cpp index 794f99e48a..e13b670f58 100644 --- a/shared/test/unit_test/os_interface/linux/drm_command_stream_xehp_and_later_prelim_tests.cpp +++ b/shared/test/unit_test/os_interface/linux/drm_command_stream_xehp_and_later_prelim_tests.cpp @@ -7,6 +7,7 @@ #include "shared/source/command_container/command_encoder.h" #include "shared/source/command_stream/csr_definitions.h" +#include "shared/source/helpers/compiler_product_helper.h" #include "shared/source/memory_manager/memory_banks.h" #include "shared/source/os_interface/sys_calls_common.h" #include "shared/test/common/helpers/batch_buffer_helper.h" @@ -141,8 +142,9 @@ HWTEST_TEMPLATED_F(DrmCommandStreamEnhancedTestDrmPrelim, givenWaitUserFenceEnab testDrmCsr->useUserFenceWait = true; testDrmCsr->activePartitions = static_cast(drmCtxSize); + const auto hasFirstSubmission = device->getCompilerProductHelper().isHeaplessModeEnabled() ? 1 : 0; auto tagPtr = const_cast(testDrmCsr->getTagAddress()); - *tagPtr = 0; + *tagPtr = hasFirstSubmission; uint64_t tagAddress = castToUint64(tagPtr); FlushStamp handleToWait = 123; testDrmCsr->waitForFlushStamp(handleToWait);