From bb55d2259e0e5d8850964a9b5914567dd39bd41d Mon Sep 17 00:00:00 2001 From: Szymon Morek Date: Mon, 28 Nov 2022 15:24:46 +0000 Subject: [PATCH] Enable CPU memcpy on DG2 Resolves: NEO-7553 Signed-off-by: Szymon Morek --- .../source/cmdlist/cmdlist_hw_immediate.inl | 2 +- .../sources/cmdlist/test_cmdlist_7.cpp | 442 ++++++++++++++++++ .../xe_hpc_core/test_cmdlist_xe_hpc_core.cpp | 437 ----------------- shared/source/helpers/hw_helper.h | 4 +- shared/source/helpers/hw_helper_base.inl | 2 +- .../xe_hpc_core/hw_helper_xe_hpc_core.cpp | 2 +- .../xe_hpg_core/hw_helper_xe_hpg_core.cpp | 8 + .../unit_test/helpers/test_hw_info_config.cpp | 8 +- .../hw_helper_xe_hpc_core_tests.cpp | 2 +- .../hw_helper_tests_xe_hpg_core.cpp | 9 + 10 files changed, 469 insertions(+), 447 deletions(-) diff --git a/level_zero/core/source/cmdlist/cmdlist_hw_immediate.inl b/level_zero/core/source/cmdlist/cmdlist_hw_immediate.inl index af37a648d3..c4d3f09c55 100644 --- a/level_zero/core/source/cmdlist/cmdlist_hw_immediate.inl +++ b/level_zero/core/source/cmdlist/cmdlist_hw_immediate.inl @@ -602,7 +602,7 @@ bool CommandListCoreFamilyImmediate::preferCopyThroughLockedPtr(N if (NEO::DebugManager.flags.ExperimentalD2HCpuCopyThreshold.get() != -1) { d2HThreshold = NEO::DebugManager.flags.ExperimentalD2HCpuCopyThreshold.get(); } - if (NEO::HwHelper::get(this->device->getHwInfo().platform.eRenderCoreFamily).copyThroughLockedPtrEnabled()) { + if (NEO::HwHelper::get(this->device->getHwInfo().platform.eRenderCoreFamily).copyThroughLockedPtrEnabled(this->device->getHwInfo())) { return (!srcFound && isSuitableUSMDeviceAlloc(dstAlloc, dstFound) && size <= h2DThreshold) || (!dstFound && isSuitableUSMDeviceAlloc(srcAlloc, srcFound) && size <= d2HThreshold); } diff --git a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_7.cpp b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_7.cpp index 6a1409ff8f..0d9cbdb99b 100644 --- a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_7.cpp +++ b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_7.cpp @@ -10,6 +10,7 @@ #include "shared/test/common/cmd_parse/hw_parse.h" #include "shared/test/common/helpers/unit_test_helper.h" #include "shared/test/common/libult/ult_command_stream_receiver.h" +#include "shared/test/common/mocks/mock_ostime.h" #include "shared/test/common/mocks/ult_device_factory.h" #include "shared/test/common/test_macros/hw_test.h" @@ -1880,5 +1881,446 @@ HWTEST_F(CommandListCreate, givenCommandListWhenRemoveDeallocationContainerDataT cmdContainer.getDeallocationContainer().clear(); } +struct AppendMemoryLockedCopyFixture : public DeviceFixture { + void setUp() { + DebugManager.flags.ExperimentalCopyThroughLock.set(1); + DeviceFixture::setUp(); + + nonUsmHostPtr = new char[sz]; + ze_device_mem_alloc_desc_t deviceDesc = {}; + context->allocDeviceMem(device->toHandle(), &deviceDesc, sz, 1u, &devicePtr); + } + void tearDown() { + delete[] nonUsmHostPtr; + context->freeMem(devicePtr); + DeviceFixture::tearDown(); + } + + DebugManagerStateRestore restore; + char *nonUsmHostPtr; + void *devicePtr; + size_t sz = 4 * MemoryConstants::megaByte; +}; + +using AppendMemoryLockedCopyTest = Test; + +HWTEST2_F(AppendMemoryLockedCopyTest, givenImmediateCommandListAndNonUsmHostPtrWhenPreferCopyThroughLockedPtrCalledThenReturnTrue, IsAtLeastSkl) { + MockCommandListImmediateHw cmdList; + cmdList.initialize(device, NEO::EngineGroupType::RenderCompute, 0u); + NEO::SvmAllocationData *srcAllocData; + NEO::SvmAllocationData *dstAllocData; + auto srcFound = device->getDriverHandle()->findAllocationDataForRange(nonUsmHostPtr, 1024, &srcAllocData); + auto dstFound = device->getDriverHandle()->findAllocationDataForRange(devicePtr, 1024, &dstAllocData); + EXPECT_TRUE(cmdList.preferCopyThroughLockedPtr(dstAllocData, dstFound, srcAllocData, srcFound, 1024)); +} + +HWTEST2_F(AppendMemoryLockedCopyTest, givenImmediateCommandListWhenIsSuitableUSMDeviceAllocThenReturnCorrectValue, IsAtLeastSkl) { + MockCommandListImmediateHw cmdList; + cmdList.initialize(device, NEO::EngineGroupType::RenderCompute, 0u); + NEO::SvmAllocationData *srcAllocData; + NEO::SvmAllocationData *dstAllocData; + auto srcFound = device->getDriverHandle()->findAllocationDataForRange(nonUsmHostPtr, 1024, &srcAllocData); + auto dstFound = device->getDriverHandle()->findAllocationDataForRange(devicePtr, 1024, &dstAllocData); + EXPECT_FALSE(cmdList.isSuitableUSMDeviceAlloc(srcAllocData, srcFound)); + EXPECT_TRUE(cmdList.isSuitableUSMDeviceAlloc(dstAllocData, dstFound)); +} + +struct LocalMemoryMultiSubDeviceFixture : public SingleRootMultiSubDeviceFixture { + void setUp() { + DebugManager.flags.EnableLocalMemory.set(1); + DebugManager.flags.EnableImplicitScaling.set(1); + SingleRootMultiSubDeviceFixture::setUp(); + } + DebugManagerStateRestore restore; +}; + +using LocalMemoryMultiSubDeviceTest = Test; + +HWTEST2_F(LocalMemoryMultiSubDeviceTest, givenImmediateCommandListWhenIsSuitableUSMDeviceAllocWithColouredBufferThenReturnFalse, IsAtLeastSkl) { + MockCommandListImmediateHw cmdList; + cmdList.initialize(device, NEO::EngineGroupType::RenderCompute, 0u); + + void *devicePtr; + ze_device_mem_alloc_desc_t deviceDesc = {}; + context->allocDeviceMem(device->toHandle(), &deviceDesc, 2 * MemoryConstants::megaByte, 1u, &devicePtr); + + NEO::SvmAllocationData *allocData; + auto allocFound = device->getDriverHandle()->findAllocationDataForRange(devicePtr, 2 * MemoryConstants::megaByte, &allocData); + EXPECT_FALSE(cmdList.isSuitableUSMDeviceAlloc(allocData, allocFound)); + context->freeMem(devicePtr); +} + +HWTEST2_F(AppendMemoryLockedCopyTest, givenImmediateCommandListAndNonUsmHostPtrAndFlagDisabledWhenPreferCopyThroughLockedPtrCalledThenReturnFalse, IsAtLeastSkl) { + DebugManager.flags.ExperimentalCopyThroughLock.set(0); + MockCommandListImmediateHw cmdList; + cmdList.initialize(device, NEO::EngineGroupType::RenderCompute, 0u); + NEO::SvmAllocationData *srcAllocData; + NEO::SvmAllocationData *dstAllocData; + auto srcFound = device->getDriverHandle()->findAllocationDataForRange(nonUsmHostPtr, 1024, &srcAllocData); + auto dstFound = device->getDriverHandle()->findAllocationDataForRange(devicePtr, 1024, &dstAllocData); + EXPECT_FALSE(cmdList.preferCopyThroughLockedPtr(dstAllocData, dstFound, srcAllocData, srcFound, 1024)); +} + +HWTEST2_F(AppendMemoryLockedCopyTest, givenImmediateCommandListAndNonUsmHostPtrWhenCopyH2DThenLockPtr, IsAtLeastSkl) { + MockCommandListImmediateHw cmdList; + cmdList.initialize(device, NEO::EngineGroupType::RenderCompute, 0u); + cmdList.csr = device->getNEODevice()->getInternalEngine().commandStreamReceiver; + + NEO::SvmAllocationData *allocData; + device->getDriverHandle()->findAllocationDataForRange(devicePtr, 1024, &allocData); + auto dstAlloc = allocData->gpuAllocations.getGraphicsAllocation(device->getRootDeviceIndex()); + + EXPECT_EQ(nullptr, dstAlloc->getLockedPtr()); + cmdList.appendMemoryCopy(devicePtr, nonUsmHostPtr, 1024, nullptr, 0, nullptr); + EXPECT_EQ(1u, reinterpret_cast(device->getDriverHandle()->getMemoryManager())->lockResourceCalled); + EXPECT_NE(nullptr, dstAlloc->getLockedPtr()); +} + +HWTEST2_F(AppendMemoryLockedCopyTest, givenImmediateCommandListAndNonUsmHostPtrWhenCopyD2HThenLockPtr, IsAtLeastSkl) { + MockCommandListImmediateHw cmdList; + cmdList.initialize(device, NEO::EngineGroupType::RenderCompute, 0u); + cmdList.csr = device->getNEODevice()->getInternalEngine().commandStreamReceiver; + + NEO::SvmAllocationData *allocData; + device->getDriverHandle()->findAllocationDataForRange(devicePtr, 1024, &allocData); + auto dstAlloc = allocData->gpuAllocations.getGraphicsAllocation(device->getRootDeviceIndex()); + + EXPECT_EQ(nullptr, dstAlloc->getLockedPtr()); + cmdList.appendMemoryCopy(nonUsmHostPtr, devicePtr, 1024, nullptr, 0, nullptr); + EXPECT_EQ(1u, reinterpret_cast(device->getDriverHandle()->getMemoryManager())->lockResourceCalled); + EXPECT_NE(nullptr, dstAlloc->getLockedPtr()); +} + +HWTEST2_F(AppendMemoryLockedCopyTest, givenImmediateCommandListAndNonUsmHostPtrWhenCopyH2DAndDstPtrLockedThenDontLockAgain, IsAtLeastSkl) { + MockCommandListImmediateHw cmdList; + cmdList.initialize(device, NEO::EngineGroupType::RenderCompute, 0u); + cmdList.csr = device->getNEODevice()->getInternalEngine().commandStreamReceiver; + + NEO::SvmAllocationData *allocData; + device->getDriverHandle()->findAllocationDataForRange(devicePtr, 1024, &allocData); + auto dstAlloc = allocData->gpuAllocations.getGraphicsAllocation(device->getRootDeviceIndex()); + + device->getDriverHandle()->getMemoryManager()->lockResource(dstAlloc); + + EXPECT_EQ(1u, reinterpret_cast(device->getDriverHandle()->getMemoryManager())->lockResourceCalled); + cmdList.appendMemoryCopy(devicePtr, nonUsmHostPtr, 1024, nullptr, 0, nullptr); + EXPECT_EQ(1u, reinterpret_cast(device->getDriverHandle()->getMemoryManager())->lockResourceCalled); +} + +HWTEST2_F(AppendMemoryLockedCopyTest, givenImmediateCommandListAndNonUsmHostPtrWhenCopyH2DThenUseMemcpyAndReturnSuccess, IsAtLeastSkl) { + MockCommandListImmediateHw cmdList; + cmdList.initialize(device, NEO::EngineGroupType::RenderCompute, 0u); + cmdList.csr = device->getNEODevice()->getInternalEngine().commandStreamReceiver; + + memset(nonUsmHostPtr, 1, 1024); + + auto res = cmdList.appendMemoryCopy(devicePtr, nonUsmHostPtr, 1024, nullptr, 0, nullptr); + EXPECT_EQ(res, ZE_RESULT_SUCCESS); + + NEO::SvmAllocationData *allocData; + device->getDriverHandle()->findAllocationDataForRange(devicePtr, 1024, &allocData); + auto dstAlloc = allocData->gpuAllocations.getGraphicsAllocation(device->getRootDeviceIndex()); + auto lockedPtr = reinterpret_cast(dstAlloc->getLockedPtr()); + EXPECT_EQ(0, memcmp(lockedPtr, nonUsmHostPtr, 1024)); +} + +HWTEST2_F(AppendMemoryLockedCopyTest, givenImmediateCommandListAndSignalEventAndNonUsmHostPtrWhenCopyH2DThenSignalEvent, IsAtLeastSkl) { + MockCommandListImmediateHw cmdList; + cmdList.initialize(device, NEO::EngineGroupType::RenderCompute, 0u); + cmdList.csr = device->getNEODevice()->getInternalEngine().commandStreamReceiver; + + ze_event_pool_desc_t eventPoolDesc = {}; + eventPoolDesc.count = 1; + eventPoolDesc.flags = ZE_EVENT_POOL_FLAG_KERNEL_TIMESTAMP; + + ze_event_desc_t eventDesc = {}; + eventDesc.index = 0; + ze_result_t returnValue = ZE_RESULT_SUCCESS; + auto eventPool = std::unique_ptr(EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, returnValue)); + EXPECT_EQ(ZE_RESULT_SUCCESS, returnValue); + auto event = std::unique_ptr(Event::create(eventPool.get(), &eventDesc, device)); + + EXPECT_EQ(event->queryStatus(), ZE_RESULT_NOT_READY); + auto res = cmdList.appendMemoryCopy(devicePtr, nonUsmHostPtr, 1024, event->toHandle(), 0, nullptr); + EXPECT_EQ(res, ZE_RESULT_SUCCESS); + + EXPECT_EQ(event->queryStatus(), ZE_RESULT_SUCCESS); +} + +HWTEST2_F(AppendMemoryLockedCopyTest, givenImmediateCommandListAndSignalEventAndCpuMemcpyWhenGpuHangThenDontSynchronizeEvent, IsAtLeastSkl) { + MockCommandListImmediateHw cmdList; + cmdList.initialize(device, NEO::EngineGroupType::RenderCompute, 0u); + cmdList.csr = device->getNEODevice()->getInternalEngine().commandStreamReceiver; + reinterpret_cast *>(cmdList.csr)->callBaseWaitForCompletionWithTimeout = false; + reinterpret_cast *>(cmdList.csr)->returnWaitForCompletionWithTimeout = WaitStatus::GpuHang; + + ze_event_pool_desc_t eventPoolDesc = {}; + eventPoolDesc.count = 1; + eventPoolDesc.flags = ZE_EVENT_POOL_FLAG_KERNEL_TIMESTAMP; + + ze_event_desc_t eventDesc = {}; + eventDesc.index = 0; + ze_result_t returnValue = ZE_RESULT_SUCCESS; + auto eventPool = std::unique_ptr(EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, returnValue)); + EXPECT_EQ(ZE_RESULT_SUCCESS, returnValue); + auto event = std::unique_ptr(Event::create(eventPool.get(), &eventDesc, device)); + + EXPECT_EQ(event->queryStatus(), ZE_RESULT_NOT_READY); + cmdList.appendBarrier(nullptr, 0, nullptr); + auto res = cmdList.appendMemoryCopy(devicePtr, nonUsmHostPtr, 1024, event->toHandle(), 0, nullptr); + EXPECT_EQ(res, ZE_RESULT_ERROR_DEVICE_LOST); + + EXPECT_EQ(event->queryStatus(), ZE_RESULT_NOT_READY); +} + +HWTEST2_F(AppendMemoryLockedCopyTest, givenImmediateCommandListWhenCpuMemcpyWithoutBarrierThenDontWaitForTagUpdate, IsAtLeastSkl) { + MockCommandListImmediateHw cmdList; + cmdList.initialize(device, NEO::EngineGroupType::RenderCompute, 0u); + cmdList.csr = device->getNEODevice()->getInternalEngine().commandStreamReceiver; + + auto res = cmdList.appendMemoryCopy(devicePtr, nonUsmHostPtr, 1024, nullptr, 0, nullptr); + EXPECT_EQ(res, ZE_RESULT_SUCCESS); + + uint32_t waitForFlushTagUpdateCalled = reinterpret_cast *>(cmdList.csr)->waitForCompletionWithTimeoutTaskCountCalled; + EXPECT_EQ(waitForFlushTagUpdateCalled, 0u); +} + +HWTEST2_F(AppendMemoryLockedCopyTest, givenImmediateCommandListWhenCpuMemcpyWithBarrierThenWaitForTagUpdate, IsAtLeastSkl) { + MockCommandListImmediateHw cmdList; + cmdList.initialize(device, NEO::EngineGroupType::RenderCompute, 0u); + cmdList.csr = device->getNEODevice()->getInternalEngine().commandStreamReceiver; + + cmdList.appendBarrier(nullptr, 0, nullptr); + auto res = cmdList.appendMemoryCopy(devicePtr, nonUsmHostPtr, 1024, nullptr, 0, nullptr); + EXPECT_EQ(res, ZE_RESULT_SUCCESS); + + uint32_t waitForFlushTagUpdateCalled = reinterpret_cast *>(cmdList.csr)->waitForCompletionWithTimeoutTaskCountCalled; + EXPECT_EQ(waitForFlushTagUpdateCalled, 1u); +} + +HWTEST2_F(AppendMemoryLockedCopyTest, givenImmediateCommandListWhenAppendBarrierThenSetDependenciesPresent, IsAtLeastSkl) { + MockCommandListImmediateHw cmdList; + cmdList.initialize(device, NEO::EngineGroupType::RenderCompute, 0u); + cmdList.csr = device->getNEODevice()->getInternalEngine().commandStreamReceiver; + + EXPECT_FALSE(cmdList.dependenciesPresent); + + cmdList.appendBarrier(nullptr, 0, nullptr); + + EXPECT_TRUE(cmdList.dependenciesPresent); + + auto res = cmdList.appendMemoryCopy(devicePtr, nonUsmHostPtr, 1024, nullptr, 0, nullptr); + EXPECT_EQ(res, ZE_RESULT_SUCCESS); + + EXPECT_FALSE(cmdList.dependenciesPresent); +} + +HWTEST2_F(AppendMemoryLockedCopyTest, givenImmediateCommandListWhenAppendWaitOnEventsThenSetDependenciesPresent, IsAtLeastSkl) { + MockCommandListImmediateHw cmdList; + cmdList.initialize(device, NEO::EngineGroupType::RenderCompute, 0u); + cmdList.csr = device->getNEODevice()->getInternalEngine().commandStreamReceiver; + + EXPECT_FALSE(cmdList.dependenciesPresent); + ze_event_pool_desc_t eventPoolDesc = {}; + eventPoolDesc.count = 1; + eventPoolDesc.flags = ZE_EVENT_POOL_FLAG_KERNEL_TIMESTAMP; + ze_event_desc_t eventDesc = {}; + eventDesc.index = 0; + ze_result_t returnValue = ZE_RESULT_SUCCESS; + auto eventPool = std::unique_ptr(EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, returnValue)); + EXPECT_EQ(ZE_RESULT_SUCCESS, returnValue); + auto event = std::unique_ptr(Event::create(eventPool.get(), &eventDesc, device)); + auto eventHandle = event->toHandle(); + cmdList.appendWaitOnEvents(1, &eventHandle); + + EXPECT_TRUE(cmdList.dependenciesPresent); + + auto res = cmdList.appendMemoryCopy(devicePtr, nonUsmHostPtr, 1024, nullptr, 0, nullptr); + EXPECT_EQ(res, ZE_RESULT_SUCCESS); + + EXPECT_FALSE(cmdList.dependenciesPresent); +} + +template +class MockAppendMemoryLockedCopyTestImmediateCmdList : public MockCommandListImmediateHw { + public: + MockAppendMemoryLockedCopyTestImmediateCmdList() : MockCommandListImmediateHw() {} + ze_result_t appendMemoryCopyKernelWithGA(void *dstPtr, NEO::GraphicsAllocation *dstPtrAlloc, + uint64_t dstOffset, void *srcPtr, + NEO::GraphicsAllocation *srcPtrAlloc, + uint64_t srcOffset, uint64_t size, + uint64_t elementSize, Builtin builtin, + Event *signalEvent, + bool isStateless, + CmdListKernelLaunchParams &launchParams) override { + appendMemoryCopyKernelWithGACalled++; + return ZE_RESULT_SUCCESS; + } + ze_result_t appendBarrier(ze_event_handle_t hSignalEvent, + uint32_t numWaitEvents, + ze_event_handle_t *phWaitEvents) override { + appendBarrierCalled++; + return MockCommandListImmediateHw::appendBarrier(hSignalEvent, numWaitEvents, phWaitEvents); + } + + uint32_t appendBarrierCalled = 0; + uint32_t appendMemoryCopyKernelWithGACalled = 0; +}; + +HWTEST2_F(AppendMemoryLockedCopyTest, givenImmediateCommandListAndUsmSrcHostPtrWhenCopyH2DThenUseGpuMemcpy, IsAtLeastSkl) { + MockAppendMemoryLockedCopyTestImmediateCmdList cmdList; + cmdList.initialize(device, NEO::EngineGroupType::RenderCompute, 0u); + void *usmSrcPtr; + ze_host_mem_alloc_desc_t hostDesc = {}; + context->allocHostMem(&hostDesc, 1024, 1u, &usmSrcPtr); + + cmdList.appendMemoryCopy(devicePtr, usmSrcPtr, 1024, nullptr, 0, nullptr); + EXPECT_GE(cmdList.appendMemoryCopyKernelWithGACalled, 1u); + context->freeMem(usmSrcPtr); +} + +HWTEST2_F(AppendMemoryLockedCopyTest, givenImmediateCommandListAndUsmDstHostPtrWhenCopyThenUseGpuMemcpy, IsAtLeastSkl) { + MockAppendMemoryLockedCopyTestImmediateCmdList cmdList; + cmdList.initialize(device, NEO::EngineGroupType::RenderCompute, 0u); + void *usmHostDstPtr; + ze_host_mem_alloc_desc_t hostDesc = {}; + context->allocHostMem(&hostDesc, 1024, 1u, &usmHostDstPtr); + + cmdList.appendMemoryCopy(usmHostDstPtr, nonUsmHostPtr, 1024, nullptr, 0, nullptr); + EXPECT_GE(cmdList.appendMemoryCopyKernelWithGACalled, 1u); + context->freeMem(usmHostDstPtr); +} + +HWTEST2_F(AppendMemoryLockedCopyTest, givenImmediateCommandListAndUsmSrcHostPtrWhenCopyThenUseGpuMemcpy, IsAtLeastSkl) { + MockAppendMemoryLockedCopyTestImmediateCmdList cmdList; + cmdList.initialize(device, NEO::EngineGroupType::RenderCompute, 0u); + void *usmHostSrcPtr; + ze_host_mem_alloc_desc_t hostDesc = {}; + context->allocHostMem(&hostDesc, 1024, 1u, &usmHostSrcPtr); + + cmdList.appendMemoryCopy(nonUsmHostPtr, usmHostSrcPtr, 1024, nullptr, 0, nullptr); + EXPECT_GE(cmdList.appendMemoryCopyKernelWithGACalled, 1u); + context->freeMem(usmHostSrcPtr); +} + +HWTEST2_F(AppendMemoryLockedCopyTest, givenImmediateCommandListAndNonUsmSrcHostPtrWhenSizeTooLargeThenUseGpuMemcpy, IsAtLeastSkl) { + MockAppendMemoryLockedCopyTestImmediateCmdList cmdList; + cmdList.initialize(device, NEO::EngineGroupType::RenderCompute, 0u); + cmdList.appendMemoryCopy(devicePtr, nonUsmHostPtr, 3 * MemoryConstants::megaByte, nullptr, 0, nullptr); + EXPECT_GE(cmdList.appendMemoryCopyKernelWithGACalled, 1u); +} + +HWTEST2_F(AppendMemoryLockedCopyTest, givenImmediateCommandListAndNonUsmDstHostPtrWhenSizeTooLargeThenUseGpuMemcpy, IsAtLeastSkl) { + MockAppendMemoryLockedCopyTestImmediateCmdList cmdList; + cmdList.initialize(device, NEO::EngineGroupType::RenderCompute, 0u); + cmdList.appendMemoryCopy(nonUsmHostPtr, devicePtr, 2 * MemoryConstants::kiloByte, nullptr, 0, nullptr); + EXPECT_GE(cmdList.appendMemoryCopyKernelWithGACalled, 1u); +} + +HWTEST2_F(AppendMemoryLockedCopyTest, givenImmediateCommandListAndD2HCopyWhenSizeTooLargeButFlagSetThenUseCpuMemcpy, IsAtLeastSkl) { + DebugManager.flags.ExperimentalD2HCpuCopyThreshold.set(2048); + MockAppendMemoryLockedCopyTestImmediateCmdList cmdList; + cmdList.initialize(device, NEO::EngineGroupType::RenderCompute, 0u); + cmdList.csr = device->getNEODevice()->getInternalEngine().commandStreamReceiver; + + cmdList.appendMemoryCopy(nonUsmHostPtr, devicePtr, 2 * MemoryConstants::kiloByte, nullptr, 0, nullptr); + EXPECT_EQ(cmdList.appendMemoryCopyKernelWithGACalled, 0u); +} + +HWTEST2_F(AppendMemoryLockedCopyTest, givenImmediateCommandListAndH2DCopyWhenSizeTooLargeButFlagSetThenUseCpuMemcpy, IsAtLeastSkl) { + DebugManager.flags.ExperimentalH2DCpuCopyThreshold.set(3 * MemoryConstants::megaByte); + MockAppendMemoryLockedCopyTestImmediateCmdList cmdList; + cmdList.initialize(device, NEO::EngineGroupType::RenderCompute, 0u); + cmdList.csr = device->getNEODevice()->getInternalEngine().commandStreamReceiver; + cmdList.appendMemoryCopy(devicePtr, nonUsmHostPtr, 3 * MemoryConstants::megaByte, nullptr, 0, nullptr); + EXPECT_EQ(cmdList.appendMemoryCopyKernelWithGACalled, 0u); +} + +HWTEST2_F(AppendMemoryLockedCopyTest, givenImmediateCommandListAndCpuMemcpyWithDependencyThenAppendBarrierCalled, IsAtLeastSkl) { + MockAppendMemoryLockedCopyTestImmediateCmdList cmdList; + cmdList.initialize(device, NEO::EngineGroupType::RenderCompute, 0u); + cmdList.csr = device->getNEODevice()->getInternalEngine().commandStreamReceiver; + + ze_event_pool_desc_t eventPoolDesc = {}; + eventPoolDesc.count = 1; + + ze_event_desc_t eventDesc = {}; + eventDesc.index = 0; + ze_result_t returnValue = ZE_RESULT_SUCCESS; + auto eventPool = std::unique_ptr(EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, returnValue)); + EXPECT_EQ(ZE_RESULT_SUCCESS, returnValue); + auto event = std::unique_ptr(Event::create(eventPool.get(), &eventDesc, device)); + auto phEvent = event->toHandle(); + cmdList.appendMemoryCopy(devicePtr, nonUsmHostPtr, 2 * MemoryConstants::kiloByte, nullptr, 1, &phEvent); + EXPECT_EQ(cmdList.appendBarrierCalled, 1u); +} + +HWTEST2_F(AppendMemoryLockedCopyTest, givenImmediateCommandListAndCpuMemcpyWithoutDependencyThenAppendBarrierNotCalled, IsAtLeastSkl) { + MockAppendMemoryLockedCopyTestImmediateCmdList cmdList; + cmdList.initialize(device, NEO::EngineGroupType::RenderCompute, 0u); + cmdList.csr = device->getNEODevice()->getInternalEngine().commandStreamReceiver; + cmdList.appendMemoryCopy(devicePtr, nonUsmHostPtr, 2 * MemoryConstants::kiloByte, nullptr, 0, nullptr); + EXPECT_EQ(cmdList.appendBarrierCalled, 0u); +} + +HWTEST2_F(AppendMemoryLockedCopyTest, givenImmediateCommandListAndTimestampFlagSetWhenCpuMemcpyThenSetCorrectGpuTimestamps, IsAtLeastSkl) { + MockAppendMemoryLockedCopyTestImmediateCmdList cmdList; + cmdList.initialize(device, NEO::EngineGroupType::RenderCompute, 0u); + cmdList.csr = device->getNEODevice()->getInternalEngine().commandStreamReceiver; + neoDevice->setOSTime(new NEO::MockOSTimeWithConstTimestamp()); + + ze_event_pool_desc_t eventPoolDesc = {}; + eventPoolDesc.count = 1; + eventPoolDesc.flags = ZE_EVENT_POOL_FLAG_KERNEL_TIMESTAMP; + + ze_event_desc_t eventDesc = {}; + eventDesc.index = 0; + + ze_result_t returnValue = ZE_RESULT_SUCCESS; + auto eventPool = std::unique_ptr(EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, returnValue)); + EXPECT_EQ(ZE_RESULT_SUCCESS, returnValue); + + auto event = std::unique_ptr(Event::create(eventPool.get(), &eventDesc, device)); + auto phEvent = event->toHandle(); + cmdList.appendMemoryCopy(devicePtr, nonUsmHostPtr, 2 * MemoryConstants::kiloByte, phEvent, 0, nullptr); + ze_kernel_timestamp_result_t resultTimestamp = {}; + auto result = event->queryKernelTimestamp(&resultTimestamp); + EXPECT_EQ(result, ZE_RESULT_SUCCESS); + + EXPECT_EQ(resultTimestamp.context.kernelStart, NEO::MockDeviceTimeWithConstTimestamp::gpuTimestamp); + EXPECT_EQ(resultTimestamp.global.kernelStart, NEO::MockDeviceTimeWithConstTimestamp::gpuTimestamp); + EXPECT_EQ(resultTimestamp.context.kernelEnd, NEO::MockDeviceTimeWithConstTimestamp::gpuTimestamp); + EXPECT_EQ(resultTimestamp.global.kernelEnd, NEO::MockDeviceTimeWithConstTimestamp::gpuTimestamp); +} + +HWTEST2_F(AppendMemoryLockedCopyTest, givenImmediateCommandListAndTimestampFlagNotSetWhenCpuMemcpyThenDontSetGpuTimestamps, IsAtLeastSkl) { + struct MockGpuTimestampEvent : public EventImp { + using EventImp::gpuStartTimestamp; + using EventImp::gpuEndTimestamp; + }; + MockAppendMemoryLockedCopyTestImmediateCmdList cmdList; + cmdList.initialize(device, NEO::EngineGroupType::RenderCompute, 0u); + cmdList.csr = device->getNEODevice()->getInternalEngine().commandStreamReceiver; + neoDevice->setOSTime(new NEO::MockOSTimeWithConstTimestamp()); + + ze_event_pool_desc_t eventPoolDesc = {}; + eventPoolDesc.count = 1; + + ze_event_desc_t eventDesc = {}; + eventDesc.index = 0; + ze_result_t returnValue = ZE_RESULT_SUCCESS; + auto eventPool = std::unique_ptr(EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, returnValue)); + EXPECT_EQ(ZE_RESULT_SUCCESS, returnValue); + + auto event = std::unique_ptr(Event::create(eventPool.get(), &eventDesc, device)); + auto phEvent = event->toHandle(); + cmdList.appendMemoryCopy(devicePtr, nonUsmHostPtr, 2 * MemoryConstants::kiloByte, phEvent, 0, nullptr); + ze_kernel_timestamp_result_t resultTimestamp = {}; + auto result = event->queryKernelTimestamp(&resultTimestamp); + EXPECT_EQ(result, ZE_RESULT_SUCCESS); + EXPECT_EQ(0u, reinterpret_cast(event.get())->gpuStartTimestamp); + EXPECT_EQ(0u, reinterpret_cast(event.get())->gpuEndTimestamp); +} + } // namespace ult } // namespace L0 diff --git a/level_zero/core/test/unit_tests/xe_hpc_core/test_cmdlist_xe_hpc_core.cpp b/level_zero/core/test/unit_tests/xe_hpc_core/test_cmdlist_xe_hpc_core.cpp index 3c7e72ae98..4723b7d150 100644 --- a/level_zero/core/test/unit_tests/xe_hpc_core/test_cmdlist_xe_hpc_core.cpp +++ b/level_zero/core/test/unit_tests/xe_hpc_core/test_cmdlist_xe_hpc_core.cpp @@ -11,7 +11,6 @@ #include "shared/test/common/libult/ult_command_stream_receiver.h" #include "shared/test/common/memory_manager/mock_prefetch_manager.h" #include "shared/test/common/mocks/mock_graphics_allocation.h" -#include "shared/test/common/mocks/mock_ostime.h" #include "shared/test/common/test_macros/hw_test.h" #include "level_zero/core/source/event/event.h" @@ -886,442 +885,6 @@ HWTEST2_F(CommandListAppendLaunchKernelXeHpcCore, ASSERT_EQ(result, ZE_RESULT_SUCCESS); } -struct AppendMemoryLockedCopyFixture : public DeviceFixture { - void setUp() { - DeviceFixture::setUp(); - - nonUsmHostPtr = new char[sz]; - ze_device_mem_alloc_desc_t deviceDesc = {}; - context->allocDeviceMem(device->toHandle(), &deviceDesc, sz, 1u, &devicePtr); - } - void tearDown() { - delete[] nonUsmHostPtr; - context->freeMem(devicePtr); - DeviceFixture::tearDown(); - } - - DebugManagerStateRestore restore; - char *nonUsmHostPtr; - void *devicePtr; - size_t sz = 4 * MemoryConstants::megaByte; -}; - -using AppendMemoryLockedCopyTest = Test; - -HWTEST2_F(AppendMemoryLockedCopyTest, givenImmediateCommandListAndNonUsmHostPtrWhenPreferCopyThroughLockedPtrCalledThenReturnTrue, IsXeHpcCore) { - MockCommandListImmediateHw cmdList; - cmdList.initialize(device, NEO::EngineGroupType::RenderCompute, 0u); - NEO::SvmAllocationData *srcAllocData; - NEO::SvmAllocationData *dstAllocData; - auto srcFound = device->getDriverHandle()->findAllocationDataForRange(nonUsmHostPtr, 1024, &srcAllocData); - auto dstFound = device->getDriverHandle()->findAllocationDataForRange(devicePtr, 1024, &dstAllocData); - EXPECT_TRUE(cmdList.preferCopyThroughLockedPtr(dstAllocData, dstFound, srcAllocData, srcFound, 1024)); -} - -HWTEST2_F(AppendMemoryLockedCopyTest, givenImmediateCommandListWhenIsSuitableUSMDeviceAllocThenReturnCorrectValue, IsXeHpcCore) { - MockCommandListImmediateHw cmdList; - cmdList.initialize(device, NEO::EngineGroupType::RenderCompute, 0u); - NEO::SvmAllocationData *srcAllocData; - NEO::SvmAllocationData *dstAllocData; - auto srcFound = device->getDriverHandle()->findAllocationDataForRange(nonUsmHostPtr, 1024, &srcAllocData); - auto dstFound = device->getDriverHandle()->findAllocationDataForRange(devicePtr, 1024, &dstAllocData); - EXPECT_FALSE(cmdList.isSuitableUSMDeviceAlloc(srcAllocData, srcFound)); - EXPECT_TRUE(cmdList.isSuitableUSMDeviceAlloc(dstAllocData, dstFound)); -} - -struct LocalMemoryMultiSubDeviceFixture : public SingleRootMultiSubDeviceFixture { - void setUp() { - DebugManager.flags.EnableLocalMemory.set(1); - DebugManager.flags.EnableImplicitScaling.set(1); - SingleRootMultiSubDeviceFixture::setUp(); - } - DebugManagerStateRestore restore; -}; - -using LocalMemoryMultiSubDeviceTest = Test; - -HWTEST2_F(LocalMemoryMultiSubDeviceTest, givenImmediateCommandListWhenIsSuitableUSMDeviceAllocWithColouredBufferThenReturnFalse, IsXeHpcCore) { - MockCommandListImmediateHw cmdList; - cmdList.initialize(device, NEO::EngineGroupType::RenderCompute, 0u); - - void *devicePtr; - ze_device_mem_alloc_desc_t deviceDesc = {}; - context->allocDeviceMem(device->toHandle(), &deviceDesc, 2 * MemoryConstants::megaByte, 1u, &devicePtr); - - NEO::SvmAllocationData *allocData; - auto allocFound = device->getDriverHandle()->findAllocationDataForRange(devicePtr, 2 * MemoryConstants::megaByte, &allocData); - EXPECT_FALSE(cmdList.isSuitableUSMDeviceAlloc(allocData, allocFound)); - context->freeMem(devicePtr); -} - -HWTEST2_F(AppendMemoryLockedCopyTest, givenImmediateCommandListAndNonUsmHostPtrAndFlagDisabledWhenPreferCopyThroughLockedPtrCalledThenReturnFalse, IsXeHpcCore) { - DebugManager.flags.ExperimentalCopyThroughLock.set(0); - MockCommandListImmediateHw cmdList; - cmdList.initialize(device, NEO::EngineGroupType::RenderCompute, 0u); - NEO::SvmAllocationData *srcAllocData; - NEO::SvmAllocationData *dstAllocData; - auto srcFound = device->getDriverHandle()->findAllocationDataForRange(nonUsmHostPtr, 1024, &srcAllocData); - auto dstFound = device->getDriverHandle()->findAllocationDataForRange(devicePtr, 1024, &dstAllocData); - EXPECT_FALSE(cmdList.preferCopyThroughLockedPtr(dstAllocData, dstFound, srcAllocData, srcFound, 1024)); -} - -HWTEST2_F(AppendMemoryLockedCopyTest, givenImmediateCommandListAndNonUsmHostPtrWhenCopyH2DThenLockPtr, IsXeHpcCore) { - MockCommandListImmediateHw cmdList; - cmdList.initialize(device, NEO::EngineGroupType::RenderCompute, 0u); - cmdList.csr = device->getNEODevice()->getInternalEngine().commandStreamReceiver; - - NEO::SvmAllocationData *allocData; - device->getDriverHandle()->findAllocationDataForRange(devicePtr, 1024, &allocData); - auto dstAlloc = allocData->gpuAllocations.getGraphicsAllocation(device->getRootDeviceIndex()); - - EXPECT_EQ(nullptr, dstAlloc->getLockedPtr()); - cmdList.appendMemoryCopy(devicePtr, nonUsmHostPtr, 1024, nullptr, 0, nullptr); - EXPECT_EQ(1u, reinterpret_cast(device->getDriverHandle()->getMemoryManager())->lockResourceCalled); - EXPECT_NE(nullptr, dstAlloc->getLockedPtr()); -} - -HWTEST2_F(AppendMemoryLockedCopyTest, givenImmediateCommandListAndNonUsmHostPtrWhenCopyD2HThenLockPtr, IsXeHpcCore) { - MockCommandListImmediateHw cmdList; - cmdList.initialize(device, NEO::EngineGroupType::RenderCompute, 0u); - cmdList.csr = device->getNEODevice()->getInternalEngine().commandStreamReceiver; - - NEO::SvmAllocationData *allocData; - device->getDriverHandle()->findAllocationDataForRange(devicePtr, 1024, &allocData); - auto dstAlloc = allocData->gpuAllocations.getGraphicsAllocation(device->getRootDeviceIndex()); - - EXPECT_EQ(nullptr, dstAlloc->getLockedPtr()); - cmdList.appendMemoryCopy(nonUsmHostPtr, devicePtr, 1024, nullptr, 0, nullptr); - EXPECT_EQ(1u, reinterpret_cast(device->getDriverHandle()->getMemoryManager())->lockResourceCalled); - EXPECT_NE(nullptr, dstAlloc->getLockedPtr()); -} - -HWTEST2_F(AppendMemoryLockedCopyTest, givenImmediateCommandListAndNonUsmHostPtrWhenCopyH2DAndDstPtrLockedThenDontLockAgain, IsXeHpcCore) { - MockCommandListImmediateHw cmdList; - cmdList.initialize(device, NEO::EngineGroupType::RenderCompute, 0u); - cmdList.csr = device->getNEODevice()->getInternalEngine().commandStreamReceiver; - - NEO::SvmAllocationData *allocData; - device->getDriverHandle()->findAllocationDataForRange(devicePtr, 1024, &allocData); - auto dstAlloc = allocData->gpuAllocations.getGraphicsAllocation(device->getRootDeviceIndex()); - - device->getDriverHandle()->getMemoryManager()->lockResource(dstAlloc); - - EXPECT_EQ(1u, reinterpret_cast(device->getDriverHandle()->getMemoryManager())->lockResourceCalled); - cmdList.appendMemoryCopy(devicePtr, nonUsmHostPtr, 1024, nullptr, 0, nullptr); - EXPECT_EQ(1u, reinterpret_cast(device->getDriverHandle()->getMemoryManager())->lockResourceCalled); -} - -HWTEST2_F(AppendMemoryLockedCopyTest, givenImmediateCommandListAndNonUsmHostPtrWhenCopyH2DThenUseMemcpyAndReturnSuccess, IsXeHpcCore) { - MockCommandListImmediateHw cmdList; - cmdList.initialize(device, NEO::EngineGroupType::RenderCompute, 0u); - cmdList.csr = device->getNEODevice()->getInternalEngine().commandStreamReceiver; - - memset(nonUsmHostPtr, 1, 1024); - - auto res = cmdList.appendMemoryCopy(devicePtr, nonUsmHostPtr, 1024, nullptr, 0, nullptr); - EXPECT_EQ(res, ZE_RESULT_SUCCESS); - - NEO::SvmAllocationData *allocData; - device->getDriverHandle()->findAllocationDataForRange(devicePtr, 1024, &allocData); - auto dstAlloc = allocData->gpuAllocations.getGraphicsAllocation(device->getRootDeviceIndex()); - auto lockedPtr = reinterpret_cast(dstAlloc->getLockedPtr()); - EXPECT_EQ(0, memcmp(lockedPtr, nonUsmHostPtr, 1024)); -} - -HWTEST2_F(AppendMemoryLockedCopyTest, givenImmediateCommandListAndSignalEventAndNonUsmHostPtrWhenCopyH2DThenSignalEvent, IsXeHpcCore) { - MockCommandListImmediateHw cmdList; - cmdList.initialize(device, NEO::EngineGroupType::RenderCompute, 0u); - cmdList.csr = device->getNEODevice()->getInternalEngine().commandStreamReceiver; - - ze_event_pool_desc_t eventPoolDesc = {}; - eventPoolDesc.count = 1; - eventPoolDesc.flags = ZE_EVENT_POOL_FLAG_KERNEL_TIMESTAMP; - - ze_event_desc_t eventDesc = {}; - eventDesc.index = 0; - ze_result_t returnValue = ZE_RESULT_SUCCESS; - auto eventPool = std::unique_ptr(EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, returnValue)); - EXPECT_EQ(ZE_RESULT_SUCCESS, returnValue); - auto event = std::unique_ptr(Event::create(eventPool.get(), &eventDesc, device)); - - EXPECT_EQ(event->queryStatus(), ZE_RESULT_NOT_READY); - auto res = cmdList.appendMemoryCopy(devicePtr, nonUsmHostPtr, 1024, event->toHandle(), 0, nullptr); - EXPECT_EQ(res, ZE_RESULT_SUCCESS); - - EXPECT_EQ(event->queryStatus(), ZE_RESULT_SUCCESS); -} - -HWTEST2_F(AppendMemoryLockedCopyTest, givenImmediateCommandListAndSignalEventAndCpuMemcpyWhenGpuHangThenDontSynchronizeEvent, IsXeHpcCore) { - MockCommandListImmediateHw cmdList; - cmdList.initialize(device, NEO::EngineGroupType::RenderCompute, 0u); - cmdList.csr = device->getNEODevice()->getInternalEngine().commandStreamReceiver; - reinterpret_cast *>(cmdList.csr)->callBaseWaitForCompletionWithTimeout = false; - reinterpret_cast *>(cmdList.csr)->returnWaitForCompletionWithTimeout = WaitStatus::GpuHang; - - ze_event_pool_desc_t eventPoolDesc = {}; - eventPoolDesc.count = 1; - eventPoolDesc.flags = ZE_EVENT_POOL_FLAG_KERNEL_TIMESTAMP; - - ze_event_desc_t eventDesc = {}; - eventDesc.index = 0; - ze_result_t returnValue = ZE_RESULT_SUCCESS; - auto eventPool = std::unique_ptr(EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, returnValue)); - EXPECT_EQ(ZE_RESULT_SUCCESS, returnValue); - auto event = std::unique_ptr(Event::create(eventPool.get(), &eventDesc, device)); - - EXPECT_EQ(event->queryStatus(), ZE_RESULT_NOT_READY); - cmdList.appendBarrier(nullptr, 0, nullptr); - auto res = cmdList.appendMemoryCopy(devicePtr, nonUsmHostPtr, 1024, event->toHandle(), 0, nullptr); - EXPECT_EQ(res, ZE_RESULT_ERROR_DEVICE_LOST); - - EXPECT_EQ(event->queryStatus(), ZE_RESULT_NOT_READY); -} - -HWTEST2_F(AppendMemoryLockedCopyTest, givenImmediateCommandListWhenCpuMemcpyWithoutBarrierThenDontWaitForTagUpdate, IsXeHpcCore) { - MockCommandListImmediateHw cmdList; - cmdList.initialize(device, NEO::EngineGroupType::RenderCompute, 0u); - cmdList.csr = device->getNEODevice()->getInternalEngine().commandStreamReceiver; - - auto res = cmdList.appendMemoryCopy(devicePtr, nonUsmHostPtr, 1024, nullptr, 0, nullptr); - EXPECT_EQ(res, ZE_RESULT_SUCCESS); - - uint32_t waitForFlushTagUpdateCalled = reinterpret_cast *>(cmdList.csr)->waitForCompletionWithTimeoutTaskCountCalled; - EXPECT_EQ(waitForFlushTagUpdateCalled, 0u); -} - -HWTEST2_F(AppendMemoryLockedCopyTest, givenImmediateCommandListWhenCpuMemcpyWithBarrierThenWaitForTagUpdate, IsXeHpcCore) { - MockCommandListImmediateHw cmdList; - cmdList.initialize(device, NEO::EngineGroupType::RenderCompute, 0u); - cmdList.csr = device->getNEODevice()->getInternalEngine().commandStreamReceiver; - - cmdList.appendBarrier(nullptr, 0, nullptr); - auto res = cmdList.appendMemoryCopy(devicePtr, nonUsmHostPtr, 1024, nullptr, 0, nullptr); - EXPECT_EQ(res, ZE_RESULT_SUCCESS); - - uint32_t waitForFlushTagUpdateCalled = reinterpret_cast *>(cmdList.csr)->waitForCompletionWithTimeoutTaskCountCalled; - EXPECT_EQ(waitForFlushTagUpdateCalled, 1u); -} - -HWTEST2_F(AppendMemoryLockedCopyTest, givenImmediateCommandListWhenAppendBarrierThenSetDependenciesPresent, IsXeHpcCore) { - MockCommandListImmediateHw cmdList; - cmdList.initialize(device, NEO::EngineGroupType::RenderCompute, 0u); - cmdList.csr = device->getNEODevice()->getInternalEngine().commandStreamReceiver; - - EXPECT_FALSE(cmdList.dependenciesPresent); - - cmdList.appendBarrier(nullptr, 0, nullptr); - - EXPECT_TRUE(cmdList.dependenciesPresent); - - auto res = cmdList.appendMemoryCopy(devicePtr, nonUsmHostPtr, 1024, nullptr, 0, nullptr); - EXPECT_EQ(res, ZE_RESULT_SUCCESS); - - EXPECT_FALSE(cmdList.dependenciesPresent); -} - -HWTEST2_F(AppendMemoryLockedCopyTest, givenImmediateCommandListWhenAppendWaitOnEventsThenSetDependenciesPresent, IsXeHpcCore) { - MockCommandListImmediateHw cmdList; - cmdList.initialize(device, NEO::EngineGroupType::RenderCompute, 0u); - cmdList.csr = device->getNEODevice()->getInternalEngine().commandStreamReceiver; - - EXPECT_FALSE(cmdList.dependenciesPresent); - ze_event_pool_desc_t eventPoolDesc = {}; - eventPoolDesc.count = 1; - eventPoolDesc.flags = ZE_EVENT_POOL_FLAG_KERNEL_TIMESTAMP; - ze_event_desc_t eventDesc = {}; - eventDesc.index = 0; - ze_result_t returnValue = ZE_RESULT_SUCCESS; - auto eventPool = std::unique_ptr(EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, returnValue)); - EXPECT_EQ(ZE_RESULT_SUCCESS, returnValue); - auto event = std::unique_ptr(Event::create(eventPool.get(), &eventDesc, device)); - auto eventHandle = event->toHandle(); - cmdList.appendWaitOnEvents(1, &eventHandle); - - EXPECT_TRUE(cmdList.dependenciesPresent); - - auto res = cmdList.appendMemoryCopy(devicePtr, nonUsmHostPtr, 1024, nullptr, 0, nullptr); - EXPECT_EQ(res, ZE_RESULT_SUCCESS); - - EXPECT_FALSE(cmdList.dependenciesPresent); -} - -template -class MockAppendMemoryLockedCopyTestImmediateCmdList : public MockCommandListImmediateHw { - public: - MockAppendMemoryLockedCopyTestImmediateCmdList() : MockCommandListImmediateHw() {} - ze_result_t appendMemoryCopyKernelWithGA(void *dstPtr, NEO::GraphicsAllocation *dstPtrAlloc, - uint64_t dstOffset, void *srcPtr, - NEO::GraphicsAllocation *srcPtrAlloc, - uint64_t srcOffset, uint64_t size, - uint64_t elementSize, Builtin builtin, - Event *signalEvent, - bool isStateless, - CmdListKernelLaunchParams &launchParams) override { - appendMemoryCopyKernelWithGACalled++; - return ZE_RESULT_SUCCESS; - } - ze_result_t appendBarrier(ze_event_handle_t hSignalEvent, - uint32_t numWaitEvents, - ze_event_handle_t *phWaitEvents) override { - appendBarrierCalled++; - return MockCommandListImmediateHw::appendBarrier(hSignalEvent, numWaitEvents, phWaitEvents); - } - - uint32_t appendBarrierCalled = 0; - uint32_t appendMemoryCopyKernelWithGACalled = 0; -}; - -HWTEST2_F(AppendMemoryLockedCopyTest, givenImmediateCommandListAndUsmSrcHostPtrWhenCopyH2DThenUseGpuMemcpy, IsXeHpcCore) { - MockAppendMemoryLockedCopyTestImmediateCmdList cmdList; - cmdList.initialize(device, NEO::EngineGroupType::RenderCompute, 0u); - void *usmSrcPtr; - ze_host_mem_alloc_desc_t hostDesc = {}; - context->allocHostMem(&hostDesc, 1024, 1u, &usmSrcPtr); - - cmdList.appendMemoryCopy(devicePtr, usmSrcPtr, 1024, nullptr, 0, nullptr); - EXPECT_GE(cmdList.appendMemoryCopyKernelWithGACalled, 1u); - context->freeMem(usmSrcPtr); -} - -HWTEST2_F(AppendMemoryLockedCopyTest, givenImmediateCommandListAndUsmDstHostPtrWhenCopyThenUseGpuMemcpy, IsXeHpcCore) { - MockAppendMemoryLockedCopyTestImmediateCmdList cmdList; - cmdList.initialize(device, NEO::EngineGroupType::RenderCompute, 0u); - void *usmHostDstPtr; - ze_host_mem_alloc_desc_t hostDesc = {}; - context->allocHostMem(&hostDesc, 1024, 1u, &usmHostDstPtr); - - cmdList.appendMemoryCopy(usmHostDstPtr, nonUsmHostPtr, 1024, nullptr, 0, nullptr); - EXPECT_GE(cmdList.appendMemoryCopyKernelWithGACalled, 1u); - context->freeMem(usmHostDstPtr); -} - -HWTEST2_F(AppendMemoryLockedCopyTest, givenImmediateCommandListAndUsmSrcHostPtrWhenCopyThenUseGpuMemcpy, IsXeHpcCore) { - MockAppendMemoryLockedCopyTestImmediateCmdList cmdList; - cmdList.initialize(device, NEO::EngineGroupType::RenderCompute, 0u); - void *usmHostSrcPtr; - ze_host_mem_alloc_desc_t hostDesc = {}; - context->allocHostMem(&hostDesc, 1024, 1u, &usmHostSrcPtr); - - cmdList.appendMemoryCopy(nonUsmHostPtr, usmHostSrcPtr, 1024, nullptr, 0, nullptr); - EXPECT_GE(cmdList.appendMemoryCopyKernelWithGACalled, 1u); - context->freeMem(usmHostSrcPtr); -} - -HWTEST2_F(AppendMemoryLockedCopyTest, givenImmediateCommandListAndNonUsmSrcHostPtrWhenSizeTooLargeThenUseGpuMemcpy, IsXeHpcCore) { - MockAppendMemoryLockedCopyTestImmediateCmdList cmdList; - cmdList.initialize(device, NEO::EngineGroupType::RenderCompute, 0u); - cmdList.appendMemoryCopy(devicePtr, nonUsmHostPtr, 3 * MemoryConstants::megaByte, nullptr, 0, nullptr); - EXPECT_GE(cmdList.appendMemoryCopyKernelWithGACalled, 1u); -} - -HWTEST2_F(AppendMemoryLockedCopyTest, givenImmediateCommandListAndNonUsmDstHostPtrWhenSizeTooLargeThenUseGpuMemcpy, IsXeHpcCore) { - MockAppendMemoryLockedCopyTestImmediateCmdList cmdList; - cmdList.initialize(device, NEO::EngineGroupType::RenderCompute, 0u); - cmdList.appendMemoryCopy(nonUsmHostPtr, devicePtr, 2 * MemoryConstants::kiloByte, nullptr, 0, nullptr); - EXPECT_GE(cmdList.appendMemoryCopyKernelWithGACalled, 1u); -} - -HWTEST2_F(AppendMemoryLockedCopyTest, givenImmediateCommandListAndD2HCopyWhenSizeTooLargeButFlagSetThenUseCpuMemcpy, IsXeHpcCore) { - DebugManager.flags.ExperimentalD2HCpuCopyThreshold.set(2048); - MockAppendMemoryLockedCopyTestImmediateCmdList cmdList; - cmdList.initialize(device, NEO::EngineGroupType::RenderCompute, 0u); - cmdList.csr = device->getNEODevice()->getInternalEngine().commandStreamReceiver; - - cmdList.appendMemoryCopy(nonUsmHostPtr, devicePtr, 2 * MemoryConstants::kiloByte, nullptr, 0, nullptr); - EXPECT_EQ(cmdList.appendMemoryCopyKernelWithGACalled, 0u); -} - -HWTEST2_F(AppendMemoryLockedCopyTest, givenImmediateCommandListAndH2DCopyWhenSizeTooLargeButFlagSetThenUseCpuMemcpy, IsXeHpcCore) { - DebugManager.flags.ExperimentalH2DCpuCopyThreshold.set(3 * MemoryConstants::megaByte); - MockAppendMemoryLockedCopyTestImmediateCmdList cmdList; - cmdList.initialize(device, NEO::EngineGroupType::RenderCompute, 0u); - cmdList.csr = device->getNEODevice()->getInternalEngine().commandStreamReceiver; - cmdList.appendMemoryCopy(devicePtr, nonUsmHostPtr, 3 * MemoryConstants::megaByte, nullptr, 0, nullptr); - EXPECT_EQ(cmdList.appendMemoryCopyKernelWithGACalled, 0u); -} - -HWTEST2_F(AppendMemoryLockedCopyTest, givenImmediateCommandListAndCpuMemcpyWithDependencyThenAppendBarrierCalled, IsXeHpcCore) { - MockAppendMemoryLockedCopyTestImmediateCmdList cmdList; - cmdList.initialize(device, NEO::EngineGroupType::RenderCompute, 0u); - cmdList.csr = device->getNEODevice()->getInternalEngine().commandStreamReceiver; - - ze_event_pool_desc_t eventPoolDesc = {}; - eventPoolDesc.count = 1; - - ze_event_desc_t eventDesc = {}; - eventDesc.index = 0; - ze_result_t returnValue = ZE_RESULT_SUCCESS; - auto eventPool = std::unique_ptr(EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, returnValue)); - EXPECT_EQ(ZE_RESULT_SUCCESS, returnValue); - auto event = std::unique_ptr(Event::create(eventPool.get(), &eventDesc, device)); - auto phEvent = event->toHandle(); - cmdList.appendMemoryCopy(devicePtr, nonUsmHostPtr, 2 * MemoryConstants::kiloByte, nullptr, 1, &phEvent); - EXPECT_EQ(cmdList.appendBarrierCalled, 1u); -} - -HWTEST2_F(AppendMemoryLockedCopyTest, givenImmediateCommandListAndCpuMemcpyWithoutDependencyThenAppendBarrierNotCalled, IsXeHpcCore) { - MockAppendMemoryLockedCopyTestImmediateCmdList cmdList; - cmdList.initialize(device, NEO::EngineGroupType::RenderCompute, 0u); - cmdList.csr = device->getNEODevice()->getInternalEngine().commandStreamReceiver; - cmdList.appendMemoryCopy(devicePtr, nonUsmHostPtr, 2 * MemoryConstants::kiloByte, nullptr, 0, nullptr); - EXPECT_EQ(cmdList.appendBarrierCalled, 0u); -} - -HWTEST2_F(AppendMemoryLockedCopyTest, givenImmediateCommandListAndTimestampFlagSetWhenCpuMemcpyThenSetCorrectGpuTimestamps, IsXeHpcCore) { - MockAppendMemoryLockedCopyTestImmediateCmdList cmdList; - cmdList.initialize(device, NEO::EngineGroupType::RenderCompute, 0u); - cmdList.csr = device->getNEODevice()->getInternalEngine().commandStreamReceiver; - neoDevice->setOSTime(new NEO::MockOSTimeWithConstTimestamp()); - - ze_event_pool_desc_t eventPoolDesc = {}; - eventPoolDesc.count = 1; - eventPoolDesc.flags = ZE_EVENT_POOL_FLAG_KERNEL_TIMESTAMP; - - ze_event_desc_t eventDesc = {}; - eventDesc.index = 0; - - ze_result_t returnValue = ZE_RESULT_SUCCESS; - auto eventPool = std::unique_ptr(EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, returnValue)); - EXPECT_EQ(ZE_RESULT_SUCCESS, returnValue); - - auto event = std::unique_ptr(Event::create(eventPool.get(), &eventDesc, device)); - auto phEvent = event->toHandle(); - cmdList.appendMemoryCopy(devicePtr, nonUsmHostPtr, 2 * MemoryConstants::kiloByte, phEvent, 0, nullptr); - ze_kernel_timestamp_result_t resultTimestamp = {}; - auto result = event->queryKernelTimestamp(&resultTimestamp); - EXPECT_EQ(result, ZE_RESULT_SUCCESS); - - EXPECT_EQ(resultTimestamp.context.kernelStart, NEO::MockDeviceTimeWithConstTimestamp::gpuTimestamp); - EXPECT_EQ(resultTimestamp.global.kernelStart, NEO::MockDeviceTimeWithConstTimestamp::gpuTimestamp); - EXPECT_EQ(resultTimestamp.context.kernelEnd, NEO::MockDeviceTimeWithConstTimestamp::gpuTimestamp); - EXPECT_EQ(resultTimestamp.global.kernelEnd, NEO::MockDeviceTimeWithConstTimestamp::gpuTimestamp); -} - -HWTEST2_F(AppendMemoryLockedCopyTest, givenImmediateCommandListAndTimestampFlagNotSetWhenCpuMemcpyThenDontSetGpuTimestamps, IsXeHpcCore) { - MockAppendMemoryLockedCopyTestImmediateCmdList cmdList; - cmdList.initialize(device, NEO::EngineGroupType::RenderCompute, 0u); - cmdList.csr = device->getNEODevice()->getInternalEngine().commandStreamReceiver; - neoDevice->setOSTime(new NEO::MockOSTimeWithConstTimestamp()); - - ze_event_pool_desc_t eventPoolDesc = {}; - eventPoolDesc.count = 1; - - ze_event_desc_t eventDesc = {}; - eventDesc.index = 0; - ze_result_t returnValue = ZE_RESULT_SUCCESS; - auto eventPool = std::unique_ptr(EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, returnValue)); - EXPECT_EQ(ZE_RESULT_SUCCESS, returnValue); - - auto event = std::unique_ptr(Event::create(eventPool.get(), &eventDesc, device)); - auto phEvent = event->toHandle(); - cmdList.appendMemoryCopy(devicePtr, nonUsmHostPtr, 2 * MemoryConstants::kiloByte, phEvent, 0, nullptr); - ze_kernel_timestamp_result_t resultTimestamp = {}; - auto result = event->queryKernelTimestamp(&resultTimestamp); - EXPECT_EQ(result, ZE_RESULT_SUCCESS); - - EXPECT_NE(resultTimestamp.context.kernelEnd, NEO::MockDeviceTimeWithConstTimestamp::gpuTimestamp); -} - using CreateCommandListXeHpcTest = Test; HWTEST2_F(CreateCommandListXeHpcTest, givenXeHpcPlatformsWhenImmediateCommandListCreatedThenHeapSharingEnabledWithFlushTask, IsXeHpcCore) { diff --git a/shared/source/helpers/hw_helper.h b/shared/source/helpers/hw_helper.h index 7034483f87..c21be403ee 100644 --- a/shared/source/helpers/hw_helper.h +++ b/shared/source/helpers/hw_helper.h @@ -151,7 +151,7 @@ class HwHelper { virtual const void *getBatchBufferEndReference() const = 0; virtual bool isPlatformFlushTaskEnabled(const NEO::HardwareInfo &hwInfo) const = 0; virtual uint32_t getMinimalScratchSpaceSize() const = 0; - virtual bool copyThroughLockedPtrEnabled() const = 0; + virtual bool copyThroughLockedPtrEnabled(const HardwareInfo &hwInfo) const = 0; virtual uint32_t getAmountOfAllocationsToFill() const = 0; virtual bool isChipsetUniqueUUIDSupported() const = 0; virtual bool isTimestampShiftRequired() const = 0; @@ -367,7 +367,7 @@ class HwHelperHw : public HwHelper { const void *getBatchBufferEndReference() const override; bool isPlatformFlushTaskEnabled(const NEO::HardwareInfo &hwInfo) const override; uint32_t getMinimalScratchSpaceSize() const override; - bool copyThroughLockedPtrEnabled() const override; + bool copyThroughLockedPtrEnabled(const HardwareInfo &hwInfo) const override; uint32_t getAmountOfAllocationsToFill() const override; bool isChipsetUniqueUUIDSupported() const override; bool isTimestampShiftRequired() const override; diff --git a/shared/source/helpers/hw_helper_base.inl b/shared/source/helpers/hw_helper_base.inl index cd9d70f81d..63b2d2081a 100644 --- a/shared/source/helpers/hw_helper_base.inl +++ b/shared/source/helpers/hw_helper_base.inl @@ -704,7 +704,7 @@ uint64_t HwHelperHw::getPatIndex(CacheRegion cacheRegion, CachePolicy } template -bool HwHelperHw::copyThroughLockedPtrEnabled() const { +bool HwHelperHw::copyThroughLockedPtrEnabled(const HardwareInfo &hwInfo) const { if (DebugManager.flags.ExperimentalCopyThroughLock.get() != -1) { return DebugManager.flags.ExperimentalCopyThroughLock.get() == 1; } diff --git a/shared/source/xe_hpc_core/hw_helper_xe_hpc_core.cpp b/shared/source/xe_hpc_core/hw_helper_xe_hpc_core.cpp index 1511c829c4..854c3c25b4 100644 --- a/shared/source/xe_hpc_core/hw_helper_xe_hpc_core.cpp +++ b/shared/source/xe_hpc_core/hw_helper_xe_hpc_core.cpp @@ -428,7 +428,7 @@ uint64_t HwHelperHw::getPatIndex(CacheRegion cacheRegion, CachePolicy ca } template <> -bool HwHelperHw::copyThroughLockedPtrEnabled() const { +bool HwHelperHw::copyThroughLockedPtrEnabled(const HardwareInfo &hwInfo) const { if (DebugManager.flags.ExperimentalCopyThroughLock.get() != -1) { return DebugManager.flags.ExperimentalCopyThroughLock.get() == 1; } diff --git a/shared/source/xe_hpg_core/hw_helper_xe_hpg_core.cpp b/shared/source/xe_hpg_core/hw_helper_xe_hpg_core.cpp index 736afd4ef4..668f4977d4 100644 --- a/shared/source/xe_hpg_core/hw_helper_xe_hpg_core.cpp +++ b/shared/source/xe_hpg_core/hw_helper_xe_hpg_core.cpp @@ -142,6 +142,14 @@ bool HwHelperHw::disableL3CacheForDebug(const HardwareInfo &hwInfo) cons return isWorkaroundRequired(REVISION_A0, REVISION_B, hwInfo); } +template <> +bool HwHelperHw::copyThroughLockedPtrEnabled(const HardwareInfo &hwInfo) const { + if (DebugManager.flags.ExperimentalCopyThroughLock.get() != -1) { + return DebugManager.flags.ExperimentalCopyThroughLock.get() == 1; + } + return this->isLocalMemoryEnabled(hwInfo); +} + template class HwHelperHw; template class FlatBatchBufferHelperHw; template struct MemorySynchronizationCommands; diff --git a/shared/test/unit_test/helpers/test_hw_info_config.cpp b/shared/test/unit_test/helpers/test_hw_info_config.cpp index 1a9e5982ab..2b85a18ec0 100644 --- a/shared/test/unit_test/helpers/test_hw_info_config.cpp +++ b/shared/test/unit_test/helpers/test_hw_info_config.cpp @@ -198,19 +198,19 @@ HWTEST2_F(HwInfoConfigTest, givenHwInfoConfigWhenIsPlatformQueryNotSupportedThen EXPECT_FALSE(hwInfoConfig.isPlatformQuerySupported()); } -HWTEST2_F(HwInfoConfigTest, givenHwHelperWhenCallCopyThroughLockedPtrEnabledThenReturnFalse, IsNotXeHpcCore) { +HWTEST2_F(HwInfoConfigTest, givenHwHelperWhenCallCopyThroughLockedPtrEnabledThenReturnFalse, IsNotXeHpgOrXeHpcCore) { HwHelper &hwHelper = HwHelper::get(defaultHwInfo->platform.eRenderCoreFamily); - EXPECT_FALSE(hwHelper.copyThroughLockedPtrEnabled()); + EXPECT_FALSE(hwHelper.copyThroughLockedPtrEnabled(*defaultHwInfo)); } HWTEST_F(HwInfoConfigTest, givenHwHelperWhenFlagSetAndCallCopyThroughLockedPtrEnabledThenReturnCorrectValue) { DebugManagerStateRestore restorer; HwHelper &hwHelper = HwHelper::get(defaultHwInfo->platform.eRenderCoreFamily); DebugManager.flags.ExperimentalCopyThroughLock.set(0); - EXPECT_FALSE(hwHelper.copyThroughLockedPtrEnabled()); + EXPECT_FALSE(hwHelper.copyThroughLockedPtrEnabled(*defaultHwInfo)); DebugManager.flags.ExperimentalCopyThroughLock.set(1); - EXPECT_TRUE(hwHelper.copyThroughLockedPtrEnabled()); + EXPECT_TRUE(hwHelper.copyThroughLockedPtrEnabled(*defaultHwInfo)); } HWTEST2_F(HwInfoConfigTest, givenHwHelperWhenCallGetAmountOfAllocationsToFillThenReturnFalse, IsNotXeHpcCore) { diff --git a/shared/test/unit_test/xe_hpc_core/hw_helper_xe_hpc_core_tests.cpp b/shared/test/unit_test/xe_hpc_core/hw_helper_xe_hpc_core_tests.cpp index f431e7a0f0..da0529feaa 100644 --- a/shared/test/unit_test/xe_hpc_core/hw_helper_xe_hpc_core_tests.cpp +++ b/shared/test/unit_test/xe_hpc_core/hw_helper_xe_hpc_core_tests.cpp @@ -82,7 +82,7 @@ XE_HPC_CORETEST_F(HwHelperXeHpcCoreTest, givenXeHPCPlatformWhenCheckAssignEngine XE_HPC_CORETEST_F(HwHelperTest, givenHwHelperWhenCallCopyThroughLockedPtrEnabledThenReturnTrue) { auto &hwHelper = HwHelperHw::get(); - EXPECT_TRUE(hwHelper.copyThroughLockedPtrEnabled()); + EXPECT_TRUE(hwHelper.copyThroughLockedPtrEnabled(*defaultHwInfo)); } XE_HPC_CORETEST_F(HwHelperTest, givenHwHelperWhenCallGetAmountOfAllocationsToFillThenReturnTrue) { diff --git a/shared/test/unit_test/xe_hpg_core/hw_helper_tests_xe_hpg_core.cpp b/shared/test/unit_test/xe_hpg_core/hw_helper_tests_xe_hpg_core.cpp index b24207f2d0..b06b76f7a6 100644 --- a/shared/test/unit_test/xe_hpg_core/hw_helper_tests_xe_hpg_core.cpp +++ b/shared/test/unit_test/xe_hpg_core/hw_helper_tests_xe_hpg_core.cpp @@ -286,3 +286,12 @@ XE_HPG_CORETEST_F(HwHelperTestXeHpgCore, EXPECT_EQ(gpuAddress, UnitTestHelper::getPipeControlPostSyncAddress(*pipeControl)); EXPECT_EQ(immediateValue, pipeControl->getImmediateData()); } + +XE_HPG_CORETEST_F(HwHelperTestXeHpgCore, givenHwHelperWhenCallCopyThroughLockedPtrEnabledThenReturnTrue) { + const auto &hwHelper = HwHelper::get(hardwareInfo.platform.eRenderCoreFamily); + if (hwHelper.isLocalMemoryEnabled(*defaultHwInfo)) { + EXPECT_TRUE(hwHelper.copyThroughLockedPtrEnabled(*defaultHwInfo)); + } else { + EXPECT_FALSE(hwHelper.copyThroughLockedPtrEnabled(*defaultHwInfo)); + } +} \ No newline at end of file