diff --git a/opencl/source/command_queue/enqueue_svm.h b/opencl/source/command_queue/enqueue_svm.h index 3b20f3c3c3..87df93c91c 100644 --- a/opencl/source/command_queue/enqueue_svm.h +++ b/opencl/source/command_queue/enqueue_svm.h @@ -128,9 +128,9 @@ cl_int CommandQueueHw::enqueueSVMMap(cl_bool blockingMap, dc.direction = csrSelectionArgs.direction; MultiDispatchInfo dispatchInfo(dc); - const bool useStateless = forceStateless(svmData->size); + const bool isStateless = isForceStateless || forceStateless(svmData->size); const bool useHeapless = this->getHeaplessModeEnabled(); - auto eBuiltInOps = EBuiltInOps::adjustBuiltinType(useStateless, useHeapless); + auto eBuiltInOps = EBuiltInOps::adjustBuiltinType(isStateless, useHeapless); const auto dispatchResult = dispatchBcsOrGpgpuEnqueue(dispatchInfo, surfaces, eBuiltInOps, numEventsInWaitList, eventWaitList, event, blocking, csr); if (dispatchResult != CL_SUCCESS) { return dispatchResult; @@ -219,9 +219,9 @@ cl_int CommandQueueHw::enqueueSVMUnmap(void *svmPtr, dc.direction = csrSelectionArgs.direction; MultiDispatchInfo dispatchInfo(dc); - const bool useStateless = forceStateless(svmData->size); + const bool isStateless = isForceStateless || forceStateless(svmData->size); const bool useHeapless = this->getHeaplessModeEnabled(); - auto eBuiltInOps = EBuiltInOps::adjustBuiltinType(useStateless, useHeapless); + auto eBuiltInOps = EBuiltInOps::adjustBuiltinType(isStateless, useHeapless); const auto dispatchResult = dispatchBcsOrGpgpuEnqueue(dispatchInfo, surfaces, eBuiltInOps, numEventsInWaitList, eventWaitList, event, false, csr); if (dispatchResult != CL_SUCCESS) { return dispatchResult; @@ -521,9 +521,9 @@ cl_int CommandQueueHw::enqueueSVMMemFill(void *svmPtr, memcpy_s(patternAllocation->getUnderlyingBuffer(), patternSize, pattern, patternSize); } - const bool useStateless = forceStateless(svmData->size); + const bool isStateless = isForceStateless || forceStateless(svmData->size); const bool useHeapless = this->getHeaplessModeEnabled(); - auto builtInType = EBuiltInOps::adjustBuiltinType(useStateless, useHeapless); + auto builtInType = EBuiltInOps::adjustBuiltinType(isStateless, useHeapless); auto &builder = BuiltInDispatchBuilderOp::getBuiltinDispatchInfoBuilder(builtInType, this->getClDevice()); diff --git a/opencl/test/unit_test/command_queue/enqueue_svm_mem_copy_tests.cpp b/opencl/test/unit_test/command_queue/enqueue_svm_mem_copy_tests.cpp index e3a0070d0c..3ac8ea73f9 100644 --- a/opencl/test/unit_test/command_queue/enqueue_svm_mem_copy_tests.cpp +++ b/opencl/test/unit_test/command_queue/enqueue_svm_mem_copy_tests.cpp @@ -485,7 +485,7 @@ HWTEST_F(EnqueueSvmMemCopyHwTest, givenEnqueueSVMMemCopyWhenUsingCopyBufferToBuf EXPECT_EQ(CL_SUCCESS, retVal); } -HWTEST_F(EnqueueSvmMemCopyHwTest, givenEnqueueSVMMemCopyWhenUsingCopyBufferToBufferStatefulBuilderThenSuccessIsReturned) { +HWTEST2_F(EnqueueSvmMemCopyHwTest, givenEnqueueSVMMemCopyWhenUsingCopyBufferToBufferStatefulBuilderThenSuccessIsReturned, IsStatefulBufferPreferredForProduct) { auto cmdQ = std::make_unique>(context.get(), device.get()); if (cmdQ->getHeaplessModeEnabled()) { GTEST_SKIP(); diff --git a/opencl/test/unit_test/command_queue/enqueue_svm_mem_fill_tests.cpp b/opencl/test/unit_test/command_queue/enqueue_svm_mem_fill_tests.cpp index b90cbd706a..0f7ac0ffb8 100644 --- a/opencl/test/unit_test/command_queue/enqueue_svm_mem_fill_tests.cpp +++ b/opencl/test/unit_test/command_queue/enqueue_svm_mem_fill_tests.cpp @@ -40,6 +40,7 @@ struct BaseEnqueueSvmMemFillFixture : public ClDeviceFixture, auto &compilerProductHelper = pDevice->getCompilerProductHelper(); this->useHeapless = compilerProductHelper.isHeaplessModeEnabled(*defaultHwInfo); + this->useStateless = compilerProductHelper.isForceToStatelessRequired(); } void tearDown() { @@ -58,11 +59,15 @@ struct BaseEnqueueSvmMemFillFixture : public ClDeviceFixture, EBuiltInOps::Type getAdjustedFillBufferBuiltIn() { if (useHeapless) { return EBuiltInOps::fillBufferStatelessHeapless; + } else if (useStateless) { + return EBuiltInOps::fillBufferStateless; } return EBuiltInOps::fillBuffer; } + bool useHeapless = false; + bool useStateless = false; }; using BaseEnqueueSvmMemFillTest = Test; @@ -227,7 +232,7 @@ HWTEST_P(EnqueueSvmMemFillTest, givenEnqueueSVMMemFillWhenUsingFillBufferBuilder EXPECT_EQ(Vec3(256 / middleElSize, 1, 1), di->getGWS()); auto kernel = di->getKernel(); - EXPECT_STREQ(EBuiltInOps::isHeapless(builtIn) ? "FillBufferMiddleStateless" : "FillBufferMiddle", kernel->getKernelInfo().kernelDescriptor.kernelMetadata.kernelName.c_str()); + EXPECT_STREQ(EBuiltInOps::isStateless(builtIn) ? "FillBufferMiddleStateless" : "FillBufferMiddle", kernel->getKernelInfo().kernelDescriptor.kernelMetadata.kernelName.c_str()); } INSTANTIATE_TEST_SUITE_P(size_t, @@ -290,7 +295,7 @@ HWTEST_F(EnqueueSvmMemFillHwTest, givenEnqueueSVMMemFillWhenUsingCopyBufferToSys EXPECT_EQ(CL_SUCCESS, retVal); } -HWTEST_F(EnqueueSvmMemFillHwTest, givenEnqueueSVMMemFillWhenUsingCopyBufferToLocalBufferStatefulBuilderThenSuccessIsReturned) { +HWTEST2_F(EnqueueSvmMemFillHwTest, givenEnqueueSVMMemFillWhenUsingCopyBufferToLocalBufferStatefulBuilderThenSuccessIsReturned, IsStatefulBufferPreferredForProduct) { auto cmdQ = std::make_unique>(context.get(), device.get()); if (cmdQ->getHeaplessModeEnabled()) { GTEST_SKIP(); diff --git a/opencl/test/unit_test/command_queue/enqueue_svm_tests.cpp b/opencl/test/unit_test/command_queue/enqueue_svm_tests.cpp index a2e2507b60..9a8251578e 100644 --- a/opencl/test/unit_test/command_queue/enqueue_svm_tests.cpp +++ b/opencl/test/unit_test/command_queue/enqueue_svm_tests.cpp @@ -19,6 +19,8 @@ #include "shared/test/common/cmd_parse/hw_parse.h" #include "shared/test/common/helpers/debug_manager_state_restore.h" #include "shared/test/common/libult/ult_command_stream_receiver.h" +#include "shared/test/common/mocks/mock_align_malloc_memory_manager.h" +#include "shared/test/common/mocks/mock_builtins.h" #include "shared/test/common/mocks/mock_cpu_page_fault_manager.h" #include "shared/test/common/mocks/mock_graphics_allocation.h" #include "shared/test/common/mocks/mock_svm_manager.h" @@ -31,6 +33,7 @@ #include "opencl/test/unit_test/command_queue/enqueue_map_buffer_fixture.h" #include "opencl/test/unit_test/fixtures/buffer_fixture.h" #include "opencl/test/unit_test/fixtures/cl_device_fixture.h" +#include "opencl/test/unit_test/mocks/mock_builder.h" #include "opencl/test/unit_test/mocks/mock_command_queue.h" #include "opencl/test/unit_test/mocks/mock_context.h" #include "opencl/test/unit_test/mocks/mock_kernel.h" @@ -1664,6 +1667,154 @@ HWTEST_F(EnqueueSvmTestLocalMemory, givenEnabledLocalMemoryWhenMappedSvmRegionAn EXPECT_EQ(2u, walkerCount); } +struct StatelessMockAlignedMallocMemoryManagerEnqueueSvmTestLocalMemory : public EnqueueSvmTestLocalMemory { + void SetUp() override { + if (is32bit) { + GTEST_SKIP(); + } + EnqueueSvmTestLocalMemory::SetUp(); + + alignedMemoryManager = std::make_unique(*pClExecutionEnvironment, true); + + memoryManagerBackup = mockSvmManager->memoryManager; + mockSvmManager->memoryManager = alignedMemoryManager.get(); + + size = static_cast(4ull * MemoryConstants::gigaByte); + } + + void TearDown() override { + if (is32bit) { + GTEST_SKIP(); + } + + mockSvmManager->memoryManager = memoryManagerBackup; + EnqueueSvmTestLocalMemory::TearDown(); + } + + private: + std::unique_ptr alignedMemoryManager; + MemoryManager *memoryManagerBackup = nullptr; +}; + +HWTEST_F(StatelessMockAlignedMallocMemoryManagerEnqueueSvmTestLocalMemory, given4gbBufferAndIsForceStatelessIsFalseWhenEnqueueSvmMapCallThenStatelessIsUsed) { + MockCommandQueueHw queue(context.get(), pClDevice, nullptr); + + EBuiltInOps::Type copyBuiltIn = EBuiltInOps::adjustBuiltinType(true, queue.getHeaplessModeEnabled()); + + auto builtIns = new MockBuiltins(); + MockRootDeviceEnvironment::resetBuiltins(queue.getDevice().getExecutionEnvironment()->rootDeviceEnvironments[queue.getDevice().getRootDeviceIndex()].get(), builtIns); + + // substitute original builder with mock builder + auto oldBuilder = pClExecutionEnvironment->setBuiltinDispatchInfoBuilder( + rootDeviceIndex, + copyBuiltIn, + std::unique_ptr(new MockBuilder(*builtIns, queue.getClDevice()))); + + auto svmPtr4gb = mockSvmManager->createSVMAlloc(static_cast(4ull * MemoryConstants::gigaByte), {}, context->getRootDeviceIndices(), context->getDeviceBitfields()); + EXPECT_NE(svmPtr4gb, nullptr); + + auto mockBuilder = static_cast(&BuiltInDispatchBuilderOp::getBuiltinDispatchInfoBuilder( + copyBuiltIn, + *pClDevice)); + + EXPECT_FALSE(mockBuilder->wasBuildDispatchInfosWithBuiltinOpParamsCalled); + retVal = queue.enqueueSVMMap(CL_TRUE, CL_MAP_READ, svmPtr4gb, size, 0, nullptr, nullptr, false); + EXPECT_EQ(CL_SUCCESS, retVal); + EXPECT_TRUE(mockBuilder->wasBuildDispatchInfosWithBuiltinOpParamsCalled); + + retVal = queue.enqueueSVMUnmap(svmPtr4gb, 0, nullptr, nullptr, false); + EXPECT_EQ(CL_SUCCESS, retVal); + + // restore original builder and retrieve mock builder + auto newBuilder = pClExecutionEnvironment->setBuiltinDispatchInfoBuilder( + rootDeviceIndex, + copyBuiltIn, + std::move(oldBuilder)); + EXPECT_EQ(mockBuilder, newBuilder.get()); + + mockSvmManager->freeSVMAlloc(svmPtr4gb); +} + +HWTEST_F(StatelessMockAlignedMallocMemoryManagerEnqueueSvmTestLocalMemory, given4gbBufferAndIsForceStatelessIsFalseWhenEnqueueSvmUnMapCallThenStatelessIsUsed) { + MockCommandQueueHw queue(context.get(), pClDevice, nullptr); + + EBuiltInOps::Type copyBuiltIn = EBuiltInOps::adjustBuiltinType(true, queue.getHeaplessModeEnabled()); + + auto builtIns = new MockBuiltins(); + MockRootDeviceEnvironment::resetBuiltins(queue.getDevice().getExecutionEnvironment()->rootDeviceEnvironments[queue.getDevice().getRootDeviceIndex()].get(), builtIns); + + auto svmPtr4gb = mockSvmManager->createSVMAlloc(static_cast(4ull * MemoryConstants::gigaByte), {}, context->getRootDeviceIndices(), context->getDeviceBitfields()); + EXPECT_NE(svmPtr4gb, nullptr); + + retVal = queue.enqueueSVMMap(CL_TRUE, CL_MAP_WRITE, svmPtr4gb, size, 0, nullptr, nullptr, false); + EXPECT_EQ(CL_SUCCESS, retVal); + + // substitute original builder with mock builder + auto oldBuilder = pClExecutionEnvironment->setBuiltinDispatchInfoBuilder( + rootDeviceIndex, + copyBuiltIn, + std::unique_ptr(new MockBuilder(*builtIns, queue.getClDevice()))); + + auto mockBuilder = static_cast(&BuiltInDispatchBuilderOp::getBuiltinDispatchInfoBuilder( + copyBuiltIn, + *pClDevice)); + + EXPECT_FALSE(mockBuilder->wasBuildDispatchInfosWithBuiltinOpParamsCalled); + retVal = queue.enqueueSVMUnmap(svmPtr4gb, 0, nullptr, nullptr, false); + EXPECT_EQ(CL_SUCCESS, retVal); + EXPECT_TRUE(mockBuilder->wasBuildDispatchInfosWithBuiltinOpParamsCalled); + + // restore original builder and retrieve mock builder + auto newBuilder = pClExecutionEnvironment->setBuiltinDispatchInfoBuilder( + rootDeviceIndex, + copyBuiltIn, + std::move(oldBuilder)); + EXPECT_EQ(mockBuilder, newBuilder.get()); + + mockSvmManager->freeSVMAlloc(svmPtr4gb); +} + +HWTEST_F(StatelessMockAlignedMallocMemoryManagerEnqueueSvmTestLocalMemory, given4gbBufferAndIsForceStatelessIsFalseWhenEnqueueSvmMemFillCallThenStatelessIsUsed) { + MockCommandQueueHw queue(context.get(), pClDevice, nullptr); + + EBuiltInOps::Type copyBuiltIn = EBuiltInOps::adjustBuiltinType(true, queue.getHeaplessModeEnabled()); + + auto builtIns = new MockBuiltins(); + MockRootDeviceEnvironment::resetBuiltins(queue.getDevice().getExecutionEnvironment()->rootDeviceEnvironments[queue.getDevice().getRootDeviceIndex()].get(), builtIns); + + auto svmPtr4gb = mockSvmManager->createSVMAlloc(static_cast(4ull * MemoryConstants::gigaByte), {}, context->getRootDeviceIndices(), context->getDeviceBitfields()); + EXPECT_NE(svmPtr4gb, nullptr); + + retVal = queue.enqueueSVMMap(CL_TRUE, CL_MAP_WRITE, svmPtr4gb, size, 0, nullptr, nullptr, false); + EXPECT_EQ(CL_SUCCESS, retVal); + + // substitute original builder with mock builder + auto oldBuilder = pClExecutionEnvironment->setBuiltinDispatchInfoBuilder( + rootDeviceIndex, + copyBuiltIn, + std::unique_ptr(new MockBuilder(*builtIns, queue.getClDevice()))); + + auto mockBuilder = static_cast(&BuiltInDispatchBuilderOp::getBuiltinDispatchInfoBuilder( + copyBuiltIn, + *pClDevice)); + + EXPECT_FALSE(mockBuilder->wasBuildDispatchInfosWithBuiltinOpParamsCalled); + constexpr float pattern[1] = {1.2345f}; + constexpr size_t patternSize = sizeof(pattern); + retVal = queue.enqueueSVMMemFill(svmPtr4gb, pattern, patternSize, static_cast(4ull * MemoryConstants::gigaByte), 0, nullptr, nullptr); + EXPECT_EQ(CL_SUCCESS, retVal); + EXPECT_TRUE(mockBuilder->wasBuildDispatchInfosWithBuiltinOpParamsCalled); + + // restore original builder and retrieve mock builder + auto newBuilder = pClExecutionEnvironment->setBuiltinDispatchInfoBuilder( + rootDeviceIndex, + copyBuiltIn, + std::move(oldBuilder)); + EXPECT_EQ(mockBuilder, newBuilder.get()); + + mockSvmManager->freeSVMAlloc(svmPtr4gb); +} + template struct FailCsr : public CommandStreamReceiverHw { using CommandStreamReceiverHw::CommandStreamReceiverHw; diff --git a/opencl/test/unit_test/xe_hpc_core/excludes_ocl_xe_hpc_core.cpp b/opencl/test/unit_test/xe_hpc_core/excludes_ocl_xe_hpc_core.cpp index fa6afb1d42..c17b417ee1 100644 --- a/opencl/test/unit_test/xe_hpc_core/excludes_ocl_xe_hpc_core.cpp +++ b/opencl/test/unit_test/xe_hpc_core/excludes_ocl_xe_hpc_core.cpp @@ -18,6 +18,4 @@ HWTEST_EXCLUDE_PRODUCT(CommandStreamReceiverHwTestDg2AndLater, givenGen12AndLate HWTEST_EXCLUDE_PRODUCT(CommandStreamReceiverHwTestDg2AndLater, givenGen12AndLaterWhenRayTracingEnabledButAlreadySentThenCommandIsNotAddedToBatchBuffer_MatcherIsRTCapable, IGFX_XE_HPC_CORE); HWTEST_EXCLUDE_PRODUCT(DeviceGetCapsTest, givenDeviceWhenAskingForSubGroupSizesThenReturnCorrectValues, IGFX_XE_HPC_CORE); HWTEST_EXCLUDE_PRODUCT(ClGfxCoreHelperTest, givenKernelInfoWhenCheckingRequiresAuxResolvesThenCorrectValuesAreReturned, IGFX_XE_HPC_CORE); -HWTEST_EXCLUDE_PRODUCT(EnqueueSvmMemCopyHwTest, givenEnqueueSVMMemCopyWhenUsingCopyBufferToBufferStatefulBuilderThenSuccessIsReturned, IGFX_XE_HPC_CORE); -HWTEST_EXCLUDE_PRODUCT(EnqueueSvmMemFillHwTest, givenEnqueueSVMMemFillWhenUsingCopyBufferToLocalBufferStatefulBuilderThenSuccessIsReturned, IGFX_XE_HPC_CORE); HWTEST_EXCLUDE_PRODUCT(CommandStreamReceiverFlushTaskTests, givenOverrideThreadArbitrationPolicyDebugVariableSetWhenFlushingThenRequestRequiredMode, IGFX_XE_HPC_CORE); diff --git a/shared/test/common/mocks/CMakeLists.txt b/shared/test/common/mocks/CMakeLists.txt index 6712649794..756c89db91 100644 --- a/shared/test/common/mocks/CMakeLists.txt +++ b/shared/test/common/mocks/CMakeLists.txt @@ -101,6 +101,7 @@ set(NEO_CORE_tests_mocks ${CMAKE_CURRENT_SOURCE_DIR}/mock_zebin_wrapper.h ${CMAKE_CURRENT_SOURCE_DIR}/ult_device_factory.cpp ${CMAKE_CURRENT_SOURCE_DIR}/ult_device_factory.h + ${CMAKE_CURRENT_SOURCE_DIR}/mock_align_malloc_memory_manager.h ) if(WIN32) diff --git a/shared/test/common/mocks/mock_align_malloc_memory_manager.h b/shared/test/common/mocks/mock_align_malloc_memory_manager.h new file mode 100644 index 0000000000..fd87f4cfd7 --- /dev/null +++ b/shared/test/common/mocks/mock_align_malloc_memory_manager.h @@ -0,0 +1,58 @@ +/* + * Copyright (C) 2025 Intel Corporation + * + * SPDX-License-Identifier: MIT + * + */ + +#pragma once +#include "shared/test/common/mocks/mock_memory_manager.h" + +using namespace NEO; + +class MockAlignMallocMemoryManager : public MockMemoryManager { + public: + MockAlignMallocMemoryManager(ExecutionEnvironment &executionEnvironment, bool enableLocalMemory = false) : MockMemoryManager(enableLocalMemory, executionEnvironment) { + testMallocRestrictions.minAddress = 0; + alignMallocRestrictions = nullptr; + alignMallocCount = 0; + alignMallocMaxIter = 3; + returnNullBad = false; + returnNullGood = false; + } + + AlignedMallocRestrictions testMallocRestrictions; + AlignedMallocRestrictions *alignMallocRestrictions; + + static const uintptr_t alignMallocMinAddress = 0x100000; + static const uintptr_t alignMallocStep = 10; + int alignMallocMaxIter; + int alignMallocCount; + bool returnNullBad; + bool returnNullGood; + + void *alignedMallocWrapper(size_t size, size_t align) override { + if (alignMallocCount < alignMallocMaxIter) { + alignMallocCount++; + if (!returnNullBad) { + return reinterpret_cast(alignMallocMinAddress - alignMallocStep); + } else { + return nullptr; + } + } + alignMallocCount = 0; + if (!returnNullGood) { + return reinterpret_cast(alignMallocMinAddress + alignMallocStep); + } else { + return nullptr; + } + }; + + void alignedFreeWrapper(void *) override { + alignMallocCount = 0; + } + + AlignedMallocRestrictions *getAlignedMallocRestrictions() override { + return alignMallocRestrictions; + } +}; \ No newline at end of file diff --git a/shared/test/unit_test/memory_manager/memory_manager_tests.cpp b/shared/test/unit_test/memory_manager/memory_manager_tests.cpp index 7d232e345f..942f9690a1 100644 --- a/shared/test/unit_test/memory_manager/memory_manager_tests.cpp +++ b/shared/test/unit_test/memory_manager/memory_manager_tests.cpp @@ -21,6 +21,7 @@ #include "shared/test/common/helpers/debug_manager_state_restore.h" #include "shared/test/common/helpers/engine_descriptor_helper.h" #include "shared/test/common/helpers/raii_gfx_core_helper.h" +#include "shared/test/common/mocks/mock_align_malloc_memory_manager.h" #include "shared/test/common/mocks/mock_allocation_properties.h" #include "shared/test/common/mocks/mock_aub_center.h" #include "shared/test/common/mocks/mock_aub_manager.h" @@ -2176,53 +2177,6 @@ TEST_F(MemoryManagerWithCsrTest, givenAllocationThatWasUsedAndIsNotCompletedWhen usedAllocationAndNotGpuCompleted->updateTaskCount(csr->peekLatestFlushedTaskCount(), csr->getOsContext().getContextId()); } -class MockAlignMallocMemoryManager : public MockMemoryManager { - public: - MockAlignMallocMemoryManager(ExecutionEnvironment &executionEnvironment) : MockMemoryManager(executionEnvironment) { - testMallocRestrictions.minAddress = 0; - alignMallocRestrictions = nullptr; - alignMallocCount = 0; - alignMallocMaxIter = 3; - returnNullBad = false; - returnNullGood = false; - } - - AlignedMallocRestrictions testMallocRestrictions; - AlignedMallocRestrictions *alignMallocRestrictions; - - static const uintptr_t alignMallocMinAddress = 0x100000; - static const uintptr_t alignMallocStep = 10; - int alignMallocMaxIter; - int alignMallocCount; - bool returnNullBad; - bool returnNullGood; - - void *alignedMallocWrapper(size_t size, size_t align) override { - if (alignMallocCount < alignMallocMaxIter) { - alignMallocCount++; - if (!returnNullBad) { - return reinterpret_cast(alignMallocMinAddress - alignMallocStep); - } else { - return nullptr; - } - } - alignMallocCount = 0; - if (!returnNullGood) { - return reinterpret_cast(alignMallocMinAddress + alignMallocStep); - } else { - return nullptr; - } - }; - - void alignedFreeWrapper(void *) override { - alignMallocCount = 0; - } - - AlignedMallocRestrictions *getAlignedMallocRestrictions() override { - return alignMallocRestrictions; - } -}; - struct MockAlignMallocMemoryManagerTest : public MemoryAllocatorTest { void SetUp() override { MemoryAllocatorTest::SetUp();