diff --git a/level_zero/core/source/cmdlist/cmdlist_hw.h b/level_zero/core/source/cmdlist/cmdlist_hw.h index 2e4474c4fa..d6cb90733c 100644 --- a/level_zero/core/source/cmdlist/cmdlist_hw.h +++ b/level_zero/core/source/cmdlist/cmdlist_hw.h @@ -441,7 +441,7 @@ struct CommandListCoreFamily : public CommandListImp { bool singleEventPacketRequired(bool inputSinglePacketEventRequest) const; void programEventL3Flush(Event *event); virtual ze_result_t flushInOrderCounterSignal(bool waitOnInOrderCounterRequired) { return ZE_RESULT_SUCCESS; }; - bool isCopyOffloadAllowed(const NEO::GraphicsAllocation *srcAllocation, const NEO::GraphicsAllocation *dstAllocation) const; + bool isCopyOffloadAllowed(const NEO::GraphicsAllocation *srcAllocation, const NEO::GraphicsAllocation *dstAllocation, bool imageToBuffer) const; bool isSharedSystemEnabled() const; void emitMemAdviseForSystemCopy(const AlignedAllocationData &allocationStruct, size_t size); void setAdditionalKernelLaunchParams(CmdListKernelLaunchParams &launchParams, Kernel &kernel) const; diff --git a/level_zero/core/source/cmdlist/cmdlist_hw.inl b/level_zero/core/source/cmdlist/cmdlist_hw.inl index 3c51ef4de0..d124066ef0 100644 --- a/level_zero/core/source/cmdlist/cmdlist_hw.inl +++ b/level_zero/core/source/cmdlist/cmdlist_hw.inl @@ -824,7 +824,7 @@ ze_result_t CommandListCoreFamily::appendImageCopyFromMemoryExt(z image = peerImage; } - memoryCopyParams.copyOffloadAllowed = isCopyOffloadAllowed(allocationStruct.alloc, image->getAllocation()); + memoryCopyParams.copyOffloadAllowed = isCopyOffloadAllowed(allocationStruct.alloc, image->getAllocation(), false); if (isCopyOnly(memoryCopyParams.copyOffloadAllowed)) { if ((bytesPerPixel == 3) || (bytesPerPixel == 6) || image->isMimickedImage()) { @@ -1029,7 +1029,7 @@ ze_result_t CommandListCoreFamily::appendImageCopyToMemoryExt(voi image = peerImage; } - memoryCopyParams.copyOffloadAllowed = isCopyOffloadAllowed(image->getAllocation(), allocationStruct.alloc); + memoryCopyParams.copyOffloadAllowed = isCopyOffloadAllowed(image->getAllocation(), allocationStruct.alloc, true); if (isCopyOnly(memoryCopyParams.copyOffloadAllowed)) { if ((bytesPerPixel == 3) || (bytesPerPixel == 6) || image->isMimickedImage()) { @@ -1241,7 +1241,7 @@ ze_result_t CommandListCoreFamily::appendImageCopyRegion(ze_image srcImage = peerImage; } - memoryCopyParams.copyOffloadAllowed = isCopyOffloadAllowed(srcImage->getAllocation(), dstImage->getAllocation()); + memoryCopyParams.copyOffloadAllowed = isCopyOffloadAllowed(srcImage->getAllocation(), dstImage->getAllocation(), false); if (isCopyOnly(memoryCopyParams.copyOffloadAllowed)) { auto bytesPerPixel = static_cast(srcImage->getImageInfo().surfaceFormat->imageElementSizeInBytes); @@ -1764,7 +1764,12 @@ ze_result_t CommandListCoreFamily::appendPageFaultCopy(NEO::Graph } template -bool CommandListCoreFamily::isCopyOffloadAllowed(const NEO::GraphicsAllocation *srcAllocation, const NEO::GraphicsAllocation *dstAllocation) const { +bool CommandListCoreFamily::isCopyOffloadAllowed(const NEO::GraphicsAllocation *srcAllocation, const NEO::GraphicsAllocation *dstAllocation, bool imageToBuffer) const { + bool preferred = device->getProductHelper().blitEnqueuePreferred(imageToBuffer); + if (!NEO::debugManager.flags.EnableBlitterForEnqueueOperations.getIfNotDefault(preferred)) { + return false; + } + if (srcAllocation == nullptr || dstAllocation == nullptr) { return isCopyOffloadEnabled(); } @@ -1911,7 +1916,7 @@ ze_result_t CommandListCoreFamily::appendMemoryCopy(void *dstptr, emitMemAdviseForSystemCopy(dstAllocationStruct, size); emitMemAdviseForSystemCopy(srcAllocationStruct, size); - memoryCopyParams.copyOffloadAllowed = isCopyOffloadAllowed(srcAllocationStruct.alloc, dstAllocationStruct.alloc); + memoryCopyParams.copyOffloadAllowed = isCopyOffloadAllowed(srcAllocationStruct.alloc, dstAllocationStruct.alloc, false); const bool isCopyOnlyEnabled = isCopyOnly(memoryCopyParams.copyOffloadAllowed); const bool inOrderCopyOnlySignalingAllowed = this->isInOrderExecutionEnabled() && !memoryCopyParams.forceDisableCopyOnlyInOrderSignaling && isCopyOnlyEnabled; @@ -2163,7 +2168,7 @@ ze_result_t CommandListCoreFamily::appendMemoryCopyRegion(void *d emitMemAdviseForSystemCopy(dstAllocationStruct, dstSize); emitMemAdviseForSystemCopy(srcAllocationStruct, srcSize); - memoryCopyParams.copyOffloadAllowed = isCopyOffloadAllowed(srcAllocationStruct.alloc, dstAllocationStruct.alloc); + memoryCopyParams.copyOffloadAllowed = isCopyOffloadAllowed(srcAllocationStruct.alloc, dstAllocationStruct.alloc, false); const bool isCopyOnlyEnabled = isCopyOnly(memoryCopyParams.copyOffloadAllowed); const bool inOrderCopyOnlySignalingAllowed = this->isInOrderExecutionEnabled() && !memoryCopyParams.forceDisableCopyOnlyInOrderSignaling && isCopyOnlyEnabled; diff --git a/level_zero/core/test/unit_tests/mocks/mock_cmdlist.h b/level_zero/core/test/unit_tests/mocks/mock_cmdlist.h index c0eb0e0961..5aac823910 100644 --- a/level_zero/core/test/unit_tests/mocks/mock_cmdlist.h +++ b/level_zero/core/test/unit_tests/mocks/mock_cmdlist.h @@ -235,6 +235,7 @@ struct WhiteBox> using BaseClass::internalUsage; using BaseClass::interruptEvents; using BaseClass::isBcsSplitNeeded; + using BaseClass::isCopyOffloadAllowed; using BaseClass::isInOrderNonWalkerSignalingRequired; using BaseClass::isQwordInOrderCounter; using BaseClass::isSyncModeQueue; diff --git a/level_zero/core/test/unit_tests/sources/cmdlist/test_in_order_cmdlist_2.cpp b/level_zero/core/test/unit_tests/sources/cmdlist/test_in_order_cmdlist_2.cpp index b491037cb5..96541dee49 100644 --- a/level_zero/core/test/unit_tests/sources/cmdlist/test_in_order_cmdlist_2.cpp +++ b/level_zero/core/test/unit_tests/sources/cmdlist/test_in_order_cmdlist_2.cpp @@ -25,6 +25,7 @@ namespace ult { struct CopyOffloadInOrderTests : public InOrderCmdListFixture { void SetUp() override { debugManager.flags.EnableLocalMemory.set(1); + debugManager.flags.EnableBlitterForEnqueueOperations.set(1); backupHwInfo = std::make_unique>(defaultHwInfo.get()); defaultHwInfo->capabilityTable.blitterOperationsSupported = true; defaultHwInfo->featureTable.ftrBcsInfo = 0b111; @@ -49,6 +50,49 @@ struct CopyOffloadInOrderTests : public InOrderCmdListFixture { std::unique_ptr> backupHwInfo; }; +HWTEST_F(CopyOffloadInOrderTests, givenDebugFlagSetWhenAskingForCopyOffloadThenReturnCorrectValue) { + auto immCmdList = createImmCmdListWithOffload(); + + MockGraphicsAllocation hostAlloc; + hostAlloc.overrideMemoryPool(NEO::MemoryPool::system64KBPages); + + MockGraphicsAllocation deviceAlloc; + deviceAlloc.overrideMemoryPool(NEO::MemoryPool::localMemory); + + auto &productHelper = device->getProductHelper(); + + std::array allocations = {&hostAlloc, &deviceAlloc, nullptr}; + + for (int32_t flag : {-1, 0, 1}) { + debugManager.flags.EnableBlitterForEnqueueOperations.set(flag); + for (auto srcAlloc : allocations) { + for (auto dstAlloc : allocations) { + for (bool imgToBuffer : {true, false}) { + bool expected = false; + + if (flag != 0) { + bool preferred = productHelper.blitEnqueuePreferred(imgToBuffer); + + if (!debugManager.flags.EnableBlitterForEnqueueOperations.getIfNotDefault(preferred)) { + expected = false; + } else { + if (srcAlloc == nullptr || dstAlloc == nullptr) { + expected = true; + } else { + expected = !(srcAlloc->isAllocatedInLocalMemoryPool() && dstAlloc->isAllocatedInLocalMemoryPool()); + } + } + } else { + expected = false; + } + + EXPECT_EQ(expected, immCmdList->isCopyOffloadAllowed(srcAlloc, dstAlloc, imgToBuffer)); + } + } + } + } +} + HWCMDTEST_F(IGFX_XE_HP_CORE, CopyOffloadInOrderTests, givenCmdsChainingWhenDispatchingCopyOffloadThenDontSkipImplictDependency) { using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT; @@ -88,6 +132,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, CopyOffloadInOrderTests, givenCmdsChainingWhenDispa uint32_t copyData = 0; immCmdList->appendMemoryCopy(©Data, ©Data, 1, nullptr, 0, nullptr, copyParams); + findSemaphores(1); // implicit dependency context->freeMem(alloc);