From 7a6fc209ddf413525e9cde04b9b6571b05aae957 Mon Sep 17 00:00:00 2001 From: Dominik Dabek Date: Wed, 8 Nov 2023 12:44:01 +0000 Subject: [PATCH] performance: prealloc cmdbuffer on mtl Preallocate 2 command buffers allocations per command queue initialized on MTL. Related-To: NEO-8152 Signed-off-by: Dominik Dabek --- .../command_queue_hw_1_tests.cpp | 2 + .../command_queue/command_queue_tests.cpp | 18 ++++++-- .../command_queue/dispatch_walker_tests.cpp | 2 + .../enqueue_fill_buffer_fixture.h | 2 +- .../enqueue_fill_buffer_tests.cpp | 13 ++++++ .../command_queue/enqueue_handler_tests.cpp | 16 +++---- .../command_queue/enqueue_kernel_1_tests.cpp | 5 ++- .../command_queue/enqueue_svm_tests.cpp | 5 +++ ...and_stream_receiver_flush_task_3_tests.cpp | 3 +- .../context/context_negative_tests.cpp | 3 +- .../helpers/task_information_tests.cpp | 2 + .../performance_counters_tests.cpp | 1 + .../command_stream_receiver.cpp | 6 ++- .../os_agnostic_product_helper_xe_lpg.inl | 5 +++ .../command_stream_receiver_tests.cpp | 45 ++++++++++++++++--- .../mtl/excludes_xe_hpg_core_mtl.cpp | 2 +- 16 files changed, 107 insertions(+), 23 deletions(-) diff --git a/opencl/test/unit_test/command_queue/command_queue_hw_1_tests.cpp b/opencl/test/unit_test/command_queue/command_queue_hw_1_tests.cpp index 750df000fc..8deafa0aaf 100644 --- a/opencl/test/unit_test/command_queue/command_queue_hw_1_tests.cpp +++ b/opencl/test/unit_test/command_queue/command_queue_hw_1_tests.cpp @@ -1142,6 +1142,8 @@ HWTEST_F(CommandQueueHwTest, givenCsrClientWhenCallingSyncPointsThenUnregister) } HWTEST_F(CommandQueueHwTest, givenKernelSplitEnqueueReadBufferWhenBlockedThenEnqueueSurfacesMakeResidentIsCalledOnce) { + DebugManagerStateRestore restorer; + DebugManager.flags.SetAmountOfReusableAllocationsPerCmdQueue.set(0); UserEvent userEvent(context); auto &csr = pDevice->getUltCommandStreamReceiver(); csr.storeMakeResidentAllocations = true; diff --git a/opencl/test/unit_test/command_queue/command_queue_tests.cpp b/opencl/test/unit_test/command_queue/command_queue_tests.cpp index 228587745d..f9b50fb325 100644 --- a/opencl/test/unit_test/command_queue/command_queue_tests.cpp +++ b/opencl/test/unit_test/command_queue/command_queue_tests.cpp @@ -499,6 +499,8 @@ TEST_F(CommandQueueCommandStreamTest, WhenGettingCommandStreamWithNewSizeThenMax } TEST_F(CommandQueueCommandStreamTest, givenCommandStreamReceiverWithReusableAllocationsWhenAskedForCommandStreamThenReturnsAllocationFromReusablePool) { + DebugManagerStateRestore restorer; + DebugManager.flags.SetAmountOfReusableAllocationsPerCmdQueue.set(0); const cl_queue_properties props[3] = {CL_QUEUE_PROPERTIES, 0, 0}; MockCommandQueue cmdQ(context.get(), pClDevice, props, false); @@ -518,7 +520,9 @@ TEST_F(CommandQueueCommandStreamTest, givenCommandStreamReceiverWithReusableAllo EXPECT_TRUE(commandStreamReceiver.getAllocationsForReuse().peekIsEmpty()); } -TEST_F(CommandQueueCommandStreamTest, givenCommandQueueWhenItIsDestroyedThenCommandStreamIsPutOnTheReusabeList) { +TEST_F(CommandQueueCommandStreamTest, givenCommandQueueWhenItIsDestroyedThenCommandStreamIsPutOnTheReusableList) { + DebugManagerStateRestore restorer; + DebugManager.flags.SetAmountOfReusableAllocationsPerCmdQueue.set(0); auto cmdQ = new MockCommandQueue(context.get(), pClDevice, 0, false); const auto &commandStream = cmdQ->getCS(100); auto graphicsAllocation = commandStream.getGraphicsAllocation(); @@ -531,6 +535,8 @@ TEST_F(CommandQueueCommandStreamTest, givenCommandQueueWhenItIsDestroyedThenComm } TEST_F(CommandQueueCommandStreamTest, WhenAskedForNewCommandStreamThenOldHeapIsStoredForReuse) { + DebugManagerStateRestore restorer; + DebugManager.flags.SetAmountOfReusableAllocationsPerCmdQueue.set(0); const cl_queue_properties props[3] = {CL_QUEUE_PROPERTIES, 0, 0}; MockCommandQueue cmdQ(context.get(), pClDevice, props, false); @@ -654,6 +660,8 @@ TEST_P(CommandQueueIndirectHeapTest, WhenGettingIndirectHeapThenSizeIsAlignedToC } HWTEST_P(CommandQueueIndirectHeapTest, givenCommandStreamReceiverWithReusableAllocationsWhenAskedForHeapAllocationThenAllocationFromReusablePoolIsReturned) { + DebugManagerStateRestore restorer; + DebugManager.flags.SetAmountOfReusableAllocationsPerCmdQueue.set(0); const cl_queue_properties props[3] = {CL_QUEUE_PROPERTIES, 0, 0}; MockCommandQueue cmdQ(context.get(), pClDevice, props, false); @@ -739,12 +747,14 @@ TEST_P(CommandQueueIndirectHeapTest, GivenCommandQueueWithoutHeapAllocationWhenA memoryManager->freeGraphicsMemory(graphicsAllocation); } -TEST_P(CommandQueueIndirectHeapTest, givenCommandQueueWithResourceCachingActiveWhenQueueISDestroyedThenIndirectHeapIsNotOnReuseList) { +TEST_P(CommandQueueIndirectHeapTest, givenCommandQueueWithResourceCachingActiveWhenQueueIsDestroyedThenIndirectHeapIsNotOnReuseList) { + DebugManagerStateRestore restorer; + DebugManager.flags.SetAmountOfReusableAllocationsPerCmdQueue.set(0); auto cmdQ = new MockCommandQueue(context.get(), pClDevice, 0, false); cmdQ->getIndirectHeap(this->GetParam(), 100); EXPECT_TRUE(pDevice->getDefaultEngine().commandStreamReceiver->getAllocationsForReuse().peekIsEmpty()); - // now destroy command queue, heap should go to reusable list + // now destroy command queue, heap should NOT go to reusable list delete cmdQ; EXPECT_TRUE(pDevice->getDefaultEngine().commandStreamReceiver->getAllocationsForReuse().peekIsEmpty()); } @@ -783,6 +793,8 @@ TEST_P(CommandQueueIndirectHeapTest, GivenCommandQueueWithoutHeapAllocatedWhenIn } TEST_P(CommandQueueIndirectHeapTest, GivenCommandQueueWithHeapWhenGraphicAllocationIsNullThenNothingOnReuseList) { + DebugManagerStateRestore restorer; + DebugManager.flags.SetAmountOfReusableAllocationsPerCmdQueue.set(0); const cl_queue_properties props[3] = {CL_QUEUE_PROPERTIES, 0, 0}; MockCommandQueue cmdQ(context.get(), pClDevice, props, false); diff --git a/opencl/test/unit_test/command_queue/dispatch_walker_tests.cpp b/opencl/test/unit_test/command_queue/dispatch_walker_tests.cpp index dc6f7efe9d..72681e9534 100644 --- a/opencl/test/unit_test/command_queue/dispatch_walker_tests.cpp +++ b/opencl/test/unit_test/command_queue/dispatch_walker_tests.cpp @@ -816,6 +816,8 @@ HWTEST_F(DispatchWalkerTest, givenBlockedQueueWhenDispatchWalkerIsCalledThenComm } HWTEST_F(DispatchWalkerTest, givenThereAreAllocationsForReuseWhenDispatchWalkerIsCalledThenCommandStreamObtainsReusableAllocation) { + DebugManagerStateRestore restorer; + DebugManager.flags.SetAmountOfReusableAllocationsPerCmdQueue.set(0); MockKernel kernel(program.get(), kernelInfo, *pClDevice); ASSERT_EQ(CL_SUCCESS, kernel.initialize()); MockMultiDispatchInfo multiDispatchInfo(pClDevice, &kernel); diff --git a/opencl/test/unit_test/command_queue/enqueue_fill_buffer_fixture.h b/opencl/test/unit_test/command_queue/enqueue_fill_buffer_fixture.h index fcbefa5cbf..3d2e8139a0 100644 --- a/opencl/test/unit_test/command_queue/enqueue_fill_buffer_fixture.h +++ b/opencl/test/unit_test/command_queue/enqueue_fill_buffer_fixture.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2018-2022 Intel Corporation + * Copyright (C) 2018-2023 Intel Corporation * * SPDX-License-Identifier: MIT * diff --git a/opencl/test/unit_test/command_queue/enqueue_fill_buffer_tests.cpp b/opencl/test/unit_test/command_queue/enqueue_fill_buffer_tests.cpp index a14c4d134b..cf9fe78474 100644 --- a/opencl/test/unit_test/command_queue/enqueue_fill_buffer_tests.cpp +++ b/opencl/test/unit_test/command_queue/enqueue_fill_buffer_tests.cpp @@ -10,6 +10,7 @@ #include "shared/source/helpers/aligned_memory.h" #include "shared/source/helpers/ptr_math.h" #include "shared/source/memory_manager/allocations_list.h" +#include "shared/source/memory_manager/internal_allocation_storage.h" #include "shared/source/memory_manager/memory_manager.h" #include "shared/source/os_interface/os_context.h" #include "shared/test/common/helpers/unit_test_helper.h" @@ -416,6 +417,10 @@ HWTEST_F(EnqueueFillBufferCmdTests, WhenFillingBufferThenPatternShouldBeAligned) HWTEST_F(EnqueueFillBufferCmdTests, WhenFillBufferIsCalledTwiceThenPatternAllocationIsReused) { auto &csr = pCmdQ->getGpgpuCommandStreamReceiver(); + if (pDevice->getProductHelper().getCommandBuffersPreallocatedPerCommandQueue() > 0) { + csr.flushTagUpdate(); + csr.getInternalAllocationStorage()->cleanAllocationList(-1, AllocationUsage::REUSABLE_ALLOCATION); + } ASSERT_TRUE(csr.getAllocationsForReuse().peekIsEmpty()); EnqueueFillBufferHelper<>::enqueueFillBuffer(pCmdQ, buffer); ASSERT_FALSE(csr.getAllocationsForReuse().peekIsEmpty()); @@ -429,6 +434,10 @@ HWTEST_F(EnqueueFillBufferCmdTests, WhenFillBufferIsCalledTwiceThenPatternAlloca HWTEST_F(EnqueueFillBufferCmdTests, WhenFillingBufferThenPatternOfSizeOneByteShouldGetPreparedForMiddleKernel) { auto &csr = pCmdQ->getGpgpuCommandStreamReceiver(); + if (pDevice->getProductHelper().getCommandBuffersPreallocatedPerCommandQueue() > 0) { + csr.flushTagUpdate(); + csr.getInternalAllocationStorage()->cleanAllocationList(-1, AllocationUsage::REUSABLE_ALLOCATION); + } ASSERT_TRUE(csr.getAllocationsForReuse().peekIsEmpty()); ASSERT_TRUE(csr.getTemporaryAllocations().peekIsEmpty()); @@ -462,6 +471,10 @@ HWTEST_F(EnqueueFillBufferCmdTests, WhenFillingBufferThenPatternOfSizeOneByteSho HWTEST_F(EnqueueFillBufferCmdTests, WhenFillingBufferThenPatternOfSizeTwoBytesShouldGetPreparedForMiddleKernel) { auto &csr = pCmdQ->getGpgpuCommandStreamReceiver(); + if (pDevice->getProductHelper().getCommandBuffersPreallocatedPerCommandQueue() > 0) { + csr.flushTagUpdate(); + csr.getInternalAllocationStorage()->cleanAllocationList(-1, AllocationUsage::REUSABLE_ALLOCATION); + } ASSERT_TRUE(csr.getAllocationsForReuse().peekIsEmpty()); ASSERT_TRUE(csr.getTemporaryAllocations().peekIsEmpty()); diff --git a/opencl/test/unit_test/command_queue/enqueue_handler_tests.cpp b/opencl/test/unit_test/command_queue/enqueue_handler_tests.cpp index 8e15f115a4..330b0f56b2 100644 --- a/opencl/test/unit_test/command_queue/enqueue_handler_tests.cpp +++ b/opencl/test/unit_test/command_queue/enqueue_handler_tests.cpp @@ -333,7 +333,8 @@ HWTEST_F(EnqueueHandlerTest, WhenEnqueuingHandlerCallOnEnqueueMarkerThenCallProc nullptr); EXPECT_FALSE(csr->processEvictionCalled); - EXPECT_EQ(0u, csr->madeResidentGfxAllocations.size()); + const auto expectedMadeResidentGfxAllocations = pDevice->getProductHelper().getCommandBuffersPreallocatedPerCommandQueue(); + EXPECT_EQ(expectedMadeResidentGfxAllocations, csr->madeResidentGfxAllocations.size()); EXPECT_EQ(0u, csr->madeNonResidentGfxAllocations.size()); } @@ -732,24 +733,23 @@ struct EnqueueHandlerTestBasic : public ::testing::Test { device = std::make_unique(MockDevice::createWithExecutionEnvironment(nullptr, executionEnvironment, 0u)); context = std::make_unique(device.get()); - auto mockCmdQ = std::make_unique(context.get(), device.get(), nullptr); - - auto &ultCsr = static_cast &>(mockCmdQ->getGpgpuCommandStreamReceiver()); - ultCsr.taskCount = initialTaskCount; - + auto &ultCsr = static_cast &>(device->getGpgpuCommandStreamReceiver()); mockInternalAllocationStorage = new MockInternalAllocationStorage(ultCsr); ultCsr.internalAllocationStorage.reset(mockInternalAllocationStorage); + auto mockCmdQ = std::make_unique(context.get(), device.get(), nullptr); + + ultCsr.taskCount = initialTaskCount; + return mockCmdQ; } - MockInternalAllocationStorage *mockInternalAllocationStorage = nullptr; const uint32_t initialTaskCount = 100; std::unique_ptr device; std::unique_ptr context; }; -HWTEST_F(EnqueueHandlerTestBasic, givenEnqueueHandlerWhenCommandIsBlokingThenCompletionStampTaskCountIsPassedToWaitForTaskCountAndCleanAllocationListAsRequiredTaskCount) { +HWTEST_F(EnqueueHandlerTestBasic, givenEnqueueHandlerWhenCommandIsBlockingThenCompletionStampTaskCountIsPassedToWaitForTaskCountAndCleanAllocationListAsRequiredTaskCount) { auto mockCmdQ = setupFixtureAndCreateMockCommandQueue, FamilyType>(); MockKernelWithInternals kernelInternals(*device, context.get()); Kernel *kernel = kernelInternals.mockKernel; diff --git a/opencl/test/unit_test/command_queue/enqueue_kernel_1_tests.cpp b/opencl/test/unit_test/command_queue/enqueue_kernel_1_tests.cpp index 25d225d7a3..c14256e572 100644 --- a/opencl/test/unit_test/command_queue/enqueue_kernel_1_tests.cpp +++ b/opencl/test/unit_test/command_queue/enqueue_kernel_1_tests.cpp @@ -793,9 +793,10 @@ HWTEST_F(EnqueueKernelTest, givenCommandStreamReceiverInBatchingModeWhenEnqueueK size_t timestampPacketSurfacesCount = mockCsr->peekTimestampPacketWriteEnabled() ? 1 : 0; size_t fenceSurfaceCount = mockCsr->globalFenceAllocation ? 1 : 0; size_t clearColorSize = mockCsr->clearColorAllocation ? 1 : 0; + size_t commandBufferCount = pDevice->getProductHelper().getCommandBuffersPreallocatedPerCommandQueue() > 0 ? 0 : 1; EXPECT_EQ(0, mockCsr->flushCalledCount); - EXPECT_EQ(5u + csrSurfaceCount + timestampPacketSurfacesCount + fenceSurfaceCount + clearColorSize, cmdBuffer->surfaces.size()); + EXPECT_EQ(4u + csrSurfaceCount + timestampPacketSurfacesCount + fenceSurfaceCount + clearColorSize + commandBufferCount, cmdBuffer->surfaces.size()); } HWTEST_F(EnqueueKernelTest, givenReducedAddressSpaceGraphicsAllocationForHostPtrWithL3FlushRequiredWhenEnqueueKernelIsCalledThenFlushIsCalledForReducedAddressSpacePlatforms) { @@ -940,6 +941,8 @@ HWTEST_F(EnqueueKernelTest, givenCommandStreamReceiverInBatchingModeWhenKernelIs auto mockedSubmissionsAggregator = new MockSubmissionsAggregator(); mockCsrmockCsr.submissionAggregator.reset(mockedSubmissionsAggregator); + pDevice->getGpgpuCommandStreamReceiver().flushTagUpdate(); // to clear residency allocations after preallocations + MockKernelWithInternals mockKernel(*pClDevice, context); size_t gws[3] = {1, 0, 0}; // make sure csr emits something diff --git a/opencl/test/unit_test/command_queue/enqueue_svm_tests.cpp b/opencl/test/unit_test/command_queue/enqueue_svm_tests.cpp index 1af3cf30d2..94b95f283d 100644 --- a/opencl/test/unit_test/command_queue/enqueue_svm_tests.cpp +++ b/opencl/test/unit_test/command_queue/enqueue_svm_tests.cpp @@ -9,6 +9,7 @@ #include "shared/source/debug_settings/debug_settings_manager.h" #include "shared/source/helpers/aligned_memory.h" #include "shared/source/memory_manager/allocations_list.h" +#include "shared/source/memory_manager/internal_allocation_storage.h" #include "shared/source/memory_manager/surface.h" #include "shared/source/memory_manager/unified_memory_manager.h" #include "shared/source/os_interface/device_factory.h" @@ -862,6 +863,10 @@ TEST_F(EnqueueSvmTest, GivenRepeatCallsWhenFillingMemoryThenSuccessIsReturnedFor TEST_F(EnqueueSvmTest, givenEnqueueSVMMemFillWhenPatternAllocationIsObtainedThenItsTypeShouldBeSetToFillPattern) { auto &csr = pCmdQ->getGpgpuCommandStreamReceiver(); + if (pDevice->getProductHelper().getCommandBuffersPreallocatedPerCommandQueue() > 0) { + csr.flushTagUpdate(); + csr.getInternalAllocationStorage()->cleanAllocationList(-1, AllocationUsage::REUSABLE_ALLOCATION); + } ASSERT_TRUE(csr.getAllocationsForReuse().peekIsEmpty()); const float pattern[1] = {1.2345f}; diff --git a/opencl/test/unit_test/command_stream/command_stream_receiver_flush_task_3_tests.cpp b/opencl/test/unit_test/command_stream/command_stream_receiver_flush_task_3_tests.cpp index d5cdf1caf0..6c7f334c10 100644 --- a/opencl/test/unit_test/command_stream/command_stream_receiver_flush_task_3_tests.cpp +++ b/opencl/test/unit_test/command_stream/command_stream_receiver_flush_task_3_tests.cpp @@ -77,9 +77,10 @@ HWTEST_F(CommandStreamReceiverFlushTaskTests, givenCsrInBatchingModeWhenFlushTas csrSurfaceCount -= pDevice->getHardwareInfo().capabilityTable.supportsImages ? 0 : 1; csrSurfaceCount += mockCsr->globalFenceAllocation ? 1 : 0; csrSurfaceCount += mockCsr->clearColorAllocation ? 1 : 0; + csrSurfaceCount += pDevice->getProductHelper().getCommandBuffersPreallocatedPerCommandQueue() > 0 ? 0 : 1; // we should have 3 heaps, tag allocation and csr command stream + cq - EXPECT_EQ(5u + csrSurfaceCount, cmdBuffer->surfaces.size()); + EXPECT_EQ(4u + csrSurfaceCount, cmdBuffer->surfaces.size()); EXPECT_EQ(0, mockCsr->flushCalledCount); diff --git a/opencl/test/unit_test/context/context_negative_tests.cpp b/opencl/test/unit_test/context/context_negative_tests.cpp index 1f1d4bdee9..9def7ec1d9 100644 --- a/opencl/test/unit_test/context/context_negative_tests.cpp +++ b/opencl/test/unit_test/context/context_negative_tests.cpp @@ -29,7 +29,8 @@ typedef Test ContextFailureInjection; TEST_F(ContextFailureInjection, GivenFailedAllocationInjectionWhenCreatingContextThenOutOfHostMemoryErrorIsReturned) { DebugManagerStateRestore restorer; - DebugManager.flags.ExperimentalSmallBufferPoolAllocator.set(0); // failing to allocate pool buffer is non-critical + DebugManager.flags.ExperimentalSmallBufferPoolAllocator.set(0); // failing to allocate pool buffer is non-critical + DebugManager.flags.SetAmountOfReusableAllocationsPerCmdQueue.set(0); // same for preallocations auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(nullptr)); cl_device_id deviceID = device.get(); diff --git a/opencl/test/unit_test/helpers/task_information_tests.cpp b/opencl/test/unit_test/helpers/task_information_tests.cpp index 85be053834..bace7a4798 100644 --- a/opencl/test/unit_test/helpers/task_information_tests.cpp +++ b/opencl/test/unit_test/helpers/task_information_tests.cpp @@ -190,6 +190,8 @@ TEST(CommandTest, givenWaitlistRequestWhenCommandComputeKernelIsCreatedThenMakeL } TEST(KernelOperationDestruction, givenKernelOperationWhenItIsDestructedThenAllAllocationsAreStoredInInternalStorageForReuse) { + DebugManagerStateRestore restorer; + DebugManager.flags.SetAmountOfReusableAllocationsPerCmdQueue.set(0); auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get())); MockCommandQueue cmdQ(nullptr, device.get(), nullptr, false); InternalAllocationStorage &allocationStorage = *device->getDefaultEngine().commandStreamReceiver->getInternalAllocationStorage(); diff --git a/opencl/test/unit_test/os_interface/performance_counters_tests.cpp b/opencl/test/unit_test/os_interface/performance_counters_tests.cpp index 2ac119ab99..096dd536f5 100644 --- a/opencl/test/unit_test/os_interface/performance_counters_tests.cpp +++ b/opencl/test/unit_test/os_interface/performance_counters_tests.cpp @@ -225,6 +225,7 @@ struct PerformanceCountersMetricsLibraryTest : public PerformanceCountersMetrics void TearDown() override { PerformanceCountersMetricsLibraryFixture::tearDown(); + queue->getGpgpuCommandStreamReceiver().setupContext(*device->getDefaultEngine().osContext); } std::unique_ptr osContext; }; diff --git a/shared/source/command_stream/command_stream_receiver.cpp b/shared/source/command_stream/command_stream_receiver.cpp index 21d03a9611..ff07802eb1 100644 --- a/shared/source/command_stream/command_stream_receiver.cpp +++ b/shared/source/command_stream/command_stream_receiver.cpp @@ -266,8 +266,10 @@ void CommandStreamReceiver::preallocateCommandBuffer() { const AllocationProperties commandStreamAllocationProperties{rootDeviceIndex, true, MemoryConstants::pageSize64k, AllocationType::COMMAND_BUFFER, isMultiOsContextCapable(), false, deviceBitfield}; auto allocation = this->getMemoryManager()->allocateGraphicsMemoryWithProperties(commandStreamAllocationProperties); - getInternalAllocationStorage()->storeAllocation(std::unique_ptr(allocation), REUSABLE_ALLOCATION); - this->makeResident(*allocation); + if (allocation) { + getInternalAllocationStorage()->storeAllocation(std::unique_ptr(allocation), REUSABLE_ALLOCATION); + this->makeResident(*allocation); + } } void CommandStreamReceiver::fillReusableAllocationsList() { diff --git a/shared/source/xe_hpg_core/xe_lpg/os_agnostic_product_helper_xe_lpg.inl b/shared/source/xe_hpg_core/xe_lpg/os_agnostic_product_helper_xe_lpg.inl index b9e2fdfe13..922d083210 100644 --- a/shared/source/xe_hpg_core/xe_lpg/os_agnostic_product_helper_xe_lpg.inl +++ b/shared/source/xe_hpg_core/xe_lpg/os_agnostic_product_helper_xe_lpg.inl @@ -91,4 +91,9 @@ bool ProductHelperHw::isPlatformDp4aSupported() const { return true; } +template <> +uint32_t ProductHelperHw::getCommandBuffersPreallocatedPerCommandQueue() const { + return 2u; +} + } // namespace NEO diff --git a/shared/test/unit_test/command_stream/command_stream_receiver_tests.cpp b/shared/test/unit_test/command_stream/command_stream_receiver_tests.cpp index ac804a8d72..cc8385b849 100644 --- a/shared/test/unit_test/command_stream/command_stream_receiver_tests.cpp +++ b/shared/test/unit_test/command_stream/command_stream_receiver_tests.cpp @@ -173,17 +173,30 @@ HWTEST_F(CommandStreamReceiverTest, givenFlagDisabledWhenCallFillReusableAllocat EXPECT_EQ(0u, commandStreamReceiver->getResidencyAllocations().size()); } -HWTEST_F(CommandStreamReceiverTest, givenUnsetPreallocationsPerQueueWhenRequestPreallocationCalledThenDoNotAllocateCommandBuffer) { +HWTEST_F(CommandStreamReceiverTest, givenUnsetPreallocationsPerQueueWhenRequestPreallocationCalledThenPreallocateCommandBufferCorrectly) { EXPECT_TRUE(commandStreamReceiver->getAllocationsForReuse().peekIsEmpty()); EXPECT_EQ(0u, commandStreamReceiver->getResidencyAllocations().size()); + auto &productHelper = getHelper(); + const auto expectedPreallocations = productHelper.getCommandBuffersPreallocatedPerCommandQueue(); + commandStreamReceiver->requestPreallocation(); - EXPECT_TRUE(commandStreamReceiver->getAllocationsForReuse().peekIsEmpty()); - EXPECT_EQ(0u, commandStreamReceiver->getResidencyAllocations().size()); + if (expectedPreallocations > 0) { + EXPECT_FALSE(commandStreamReceiver->getAllocationsForReuse().peekIsEmpty()); + EXPECT_EQ(expectedPreallocations, commandStreamReceiver->getResidencyAllocations().size()); + } else { + EXPECT_TRUE(commandStreamReceiver->getAllocationsForReuse().peekIsEmpty()); + EXPECT_EQ(0u, commandStreamReceiver->getResidencyAllocations().size()); + } commandStreamReceiver->releasePreallocationRequest(); - EXPECT_TRUE(commandStreamReceiver->getAllocationsForReuse().peekIsEmpty()); - EXPECT_EQ(0u, commandStreamReceiver->getResidencyAllocations().size()); + if (expectedPreallocations > 0) { + EXPECT_FALSE(commandStreamReceiver->getAllocationsForReuse().peekIsEmpty()); + EXPECT_EQ(expectedPreallocations, commandStreamReceiver->getResidencyAllocations().size()); + } else { + EXPECT_TRUE(commandStreamReceiver->getAllocationsForReuse().peekIsEmpty()); + EXPECT_EQ(0u, commandStreamReceiver->getResidencyAllocations().size()); + } } HWTEST_F(CommandStreamReceiverTest, givenPreallocationsPerQueueEqualZeroWhenRequestPreallocationCalledThenDoNotAllocateCommandBuffer) { @@ -220,6 +233,28 @@ HWTEST_F(CommandStreamReceiverTest, givenPreallocationsPerQueueWhenRequestPreall EXPECT_EQ(2u, commandStreamReceiver->getResidencyAllocations().size()); } +HWTEST_F(CommandStreamReceiverTest, givenPreallocationsPerQueueWhenRequestPreallocationCalledButAllocationFailedThenRequestIsIgnored) { + DebugManagerStateRestore restorer; + DebugManager.flags.SetAmountOfReusableAllocationsPerCmdQueue.set(1); + EXPECT_TRUE(commandStreamReceiver->getAllocationsForReuse().peekIsEmpty()); + EXPECT_EQ(0u, commandStreamReceiver->getResidencyAllocations().size()); + + // make allocation fail + ExecutionEnvironment &executionEnvironment = *pDevice->getExecutionEnvironment(); + auto memoryManagerBackup = executionEnvironment.memoryManager.release(); + executionEnvironment.memoryManager.reset(new FailMemoryManager(executionEnvironment)); + + commandStreamReceiver->requestPreallocation(); + EXPECT_TRUE(commandStreamReceiver->getAllocationsForReuse().peekIsEmpty()); + EXPECT_EQ(0u, commandStreamReceiver->getResidencyAllocations().size()); + + // make allocation succeed + executionEnvironment.memoryManager.reset(memoryManagerBackup); + commandStreamReceiver->requestPreallocation(); + EXPECT_FALSE(commandStreamReceiver->getAllocationsForReuse().peekIsEmpty()); + EXPECT_EQ(1u, commandStreamReceiver->getResidencyAllocations().size()); +} + HWTEST_F(CommandStreamReceiverTest, whenRegisterClientThenIncrementClientNum) { auto &csr = pDevice->getUltCommandStreamReceiver(); auto numClients = csr.getNumClients(); diff --git a/shared/test/unit_test/xe_hpg_core/mtl/excludes_xe_hpg_core_mtl.cpp b/shared/test/unit_test/xe_hpg_core/mtl/excludes_xe_hpg_core_mtl.cpp index c83e582198..baf3ea0043 100644 --- a/shared/test/unit_test/xe_hpg_core/mtl/excludes_xe_hpg_core_mtl.cpp +++ b/shared/test/unit_test/xe_hpg_core/mtl/excludes_xe_hpg_core_mtl.cpp @@ -20,4 +20,4 @@ HWTEST_EXCLUDE_PRODUCT(ComputeModeRequirements, givenComputeModeProgrammingWhenR HWTEST_EXCLUDE_PRODUCT(ProductHelperTest, givenProductHelperWhenAskedIfPatIndexProgrammingSupportedThenReturnFalse, IGFX_METEORLAKE); HWTEST_EXCLUDE_PRODUCT(ProductHelperTest, givenProductHelperWhenIsAdjustWalkOrderAvailableCallThenFalseReturn, IGFX_METEORLAKE); HWTEST_EXCLUDE_PRODUCT(ProductHelperTest, givenBooleanUncachedWhenCallOverridePatIndexThenProperPatIndexIsReturned, IGFX_METEORLAKE); -HWTEST_EXCLUDE_PRODUCT(ProductHelperTest, givenProductHelperWhenCheckBlitEnqueueAllowedThenReturnTrue, IGFX_METEORLAKE); \ No newline at end of file +HWTEST_EXCLUDE_PRODUCT(ProductHelperTest, givenProductHelperWhenCheckBlitEnqueueAllowedThenReturnTrue, IGFX_METEORLAKE);