From 080609e174b77960e62792182e0d149ae49cba4c Mon Sep 17 00:00:00 2001 From: kamdiedrich Date: Mon, 11 May 2020 11:49:08 +0200 Subject: [PATCH] Add flush after write to CPU memory Change-Id: I199c8b203f1afdf648f6520f13b0498efb760c84 --- level_zero/core/source/cmdlist/cmdlist_hw.inl | 17 ++- .../sources/cmdlist/test_cmdlist.cpp | 117 ++++++++++++++++-- 2 files changed, 115 insertions(+), 19 deletions(-) diff --git a/level_zero/core/source/cmdlist/cmdlist_hw.inl b/level_zero/core/source/cmdlist/cmdlist_hw.inl index e8fffdaae6..0a34b50d20 100644 --- a/level_zero/core/source/cmdlist/cmdlist_hw.inl +++ b/level_zero/core/source/cmdlist/cmdlist_hw.inl @@ -531,7 +531,7 @@ ze_result_t CommandListCoreFamily::appendMemAdvise(ze_device_hand auto allocData = device->getDriverHandle()->getSvmAllocsManager()->getSVMAllocs()->get(ptr); if (allocData) { - return ZE_RESULT_SUCCESS; + return ZE_RESULT_SUCCESS; } return ZE_RESULT_ERROR_UNKNOWN; } @@ -728,7 +728,7 @@ ze_result_t CommandListCoreFamily::appendMemoryCopy(void *dstptr, this->appendSignalEventPostWalker(hSignalEvent); } - if (dstAllocationStruct.needsFlush) { + if (dstAllocationStruct.needsFlush && !isCopyOnlyCmdList) { NEO::PipeControlArgs args(true); NEO::MemorySynchronizationCommands::addPipeControl(*commandContainer.getCommandStream(), args); } @@ -786,13 +786,9 @@ ze_result_t CommandListCoreFamily::appendMemoryCopyRegion(void *d return result; } - if (dstAllocationStruct.needsFlush) { - if (isCopyOnlyCmdList) { - NEO::EncodeMiFlushDW::programMiFlushDw(*commandContainer.getCommandStream(), 0, 0, false, false); - } else { - NEO::PipeControlArgs args(true); - NEO::MemorySynchronizationCommands::addPipeControl(*commandContainer.getCommandStream(), args); - } + if (dstAllocationStruct.needsFlush && !isCopyOnlyCmdList) { + NEO::PipeControlArgs args(true); + NEO::MemorySynchronizationCommands::addPipeControl(*commandContainer.getCommandStream(), args); } return ZE_RESULT_SUCCESS; @@ -920,7 +916,7 @@ ze_result_t CommandListCoreFamily::appendMemoryPrefetch(const voi size_t count) { auto allocData = device->getDriverHandle()->getSvmAllocsManager()->getSVMAllocs()->get(ptr); if (allocData) { - return ZE_RESULT_SUCCESS; + return ZE_RESULT_SUCCESS; } return ZE_RESULT_ERROR_UNKNOWN; } @@ -1133,6 +1129,7 @@ inline AlignedAllocationData CommandListCoreFamily::getAlignedAll hostPtrMap.insert(std::make_pair(buffer, alloc)); alignedPtr = static_cast(alloc->getGpuAddress() - offset); + hostPointerNeedsFlush = true; } else { alloc = allocData->gpuAllocation; diff --git a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist.cpp b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist.cpp index aaefe4c734..558adb1719 100644 --- a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist.cpp +++ b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist.cpp @@ -383,16 +383,19 @@ HWTEST2_F(CommandListCreate, givenCommandListAnd3DWhbufferenMemoryCopyRegionCall EXPECT_GT(cmdList.appendMemoryCopyKernel3dCalledTimes, 0u); } -HWTEST2_F(CommandListCreate, givenCommandListAndHostPointersWhenMemoryCopyRegionCalledThenTwoNewAllocationAreAddedToHostMapPtr, Platforms) { - class MockAppendMemoryCopyRegion : public MockCommandList { - public: - using CommandList::hostPtrMap; - AlignedAllocationData getAlignedAllocation(L0::Device *device, const void *buffer, uint64_t bufferSize) override { - return L0::CommandListCoreFamily::getAlignedAllocation(device, buffer, bufferSize); - } - }; +using AppendMemoryCopy = CommandListCreate; - MockAppendMemoryCopyRegion cmdList; +template +class MockAppendMemoryCopy : public MockCommandList { + public: + using CommandList::hostPtrMap; + AlignedAllocationData getAlignedAllocation(L0::Device *device, const void *buffer, uint64_t bufferSize) override { + return L0::CommandListCoreFamily::getAlignedAllocation(device, buffer, bufferSize); + } +}; + +HWTEST2_F(AppendMemoryCopy, givenCommandListAndHostPointersWhenMemoryCopyRegionCalledThenTwoNewAllocationAreAddedToHostMapPtr, Platforms) { + MockAppendMemoryCopy cmdList; cmdList.initialize(device, false); void *srcPtr = reinterpret_cast(0x1234); void *dstPtr = reinterpret_cast(0x2345); @@ -402,6 +405,54 @@ HWTEST2_F(CommandListCreate, givenCommandListAndHostPointersWhenMemoryCopyRegion EXPECT_EQ(cmdList.hostPtrMap.size(), 2u); } +HWTEST2_F(AppendMemoryCopy, givenCommandListAndHostPointersWhenMemoryCopyRegionCalledThenPipeControlWithDcFlushAdded, Platforms) { + using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; + + MockAppendMemoryCopy cmdList; + cmdList.initialize(device, false); + void *srcPtr = reinterpret_cast(0x1234); + void *dstPtr = reinterpret_cast(0x2345); + ze_copy_region_t dstRegion = {4, 4, 4, 2, 2, 2}; + ze_copy_region_t srcRegion = {4, 4, 4, 2, 2, 2}; + cmdList.appendMemoryCopyRegion(dstPtr, &dstRegion, 0, 0, srcPtr, &srcRegion, 0, 0, nullptr); + + auto &commandContainer = cmdList.commandContainer; + GenCmdList genCmdList; + ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer( + genCmdList, ptrOffset(commandContainer.getCommandStream()->getCpuBase(), 0), commandContainer.getCommandStream()->getUsed())); + auto itor = find(genCmdList.begin(), genCmdList.end()); + ASSERT_NE(genCmdList.end(), itor); + PIPE_CONTROL *cmd = nullptr; + while (itor != genCmdList.end()) { + cmd = genCmdCast(*itor); + itor = find(++itor, genCmdList.end()); + } + EXPECT_TRUE(cmd->getDcFlushEnable()); +} + +HWTEST2_F(AppendMemoryCopy, givenCommandListAndHostPointersWhenMemoryCopyCalledThenPipeControlWithDcFlushAdded, Platforms) { + using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; + + MockAppendMemoryCopy cmdList; + cmdList.initialize(device, false); + void *srcPtr = reinterpret_cast(0x1234); + void *dstPtr = reinterpret_cast(0x2345); + cmdList.appendMemoryCopy(dstPtr, srcPtr, 8, nullptr, 0, nullptr); + + auto &commandContainer = cmdList.commandContainer; + GenCmdList genCmdList; + ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer( + genCmdList, ptrOffset(commandContainer.getCommandStream()->getCpuBase(), 0), commandContainer.getCommandStream()->getUsed())); + auto itor = find(genCmdList.begin(), genCmdList.end()); + ASSERT_NE(genCmdList.end(), itor); + PIPE_CONTROL *cmd = nullptr; + while (itor != genCmdList.end()) { + cmd = genCmdCast(*itor); + itor = find(++itor, genCmdList.end()); + } + EXPECT_TRUE(cmd->getDcFlushEnable()); +} + HWTEST2_F(CommandListCreate, givenCommandListAnd2DWhbufferenMemoryCopyRegionCalledThenCopyKernel2DCalled, Platforms) { MockCommandList cmdList; cmdList.initialize(device, false); @@ -416,6 +467,54 @@ HWTEST2_F(CommandListCreate, givenCommandListAnd2DWhbufferenMemoryCopyRegionCall EXPECT_GT(cmdList.appendMemoryCopyKernel2dCalledTimes, 0u); } +HWTEST2_F(AppendMemoryCopy, givenCopyOnlyCommandListAndHostPointersWhenMemoryCopyCalledThenPipeControlWithDcFlushAddedIsNotAddedAfterBlitCopy, Platforms) { + using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; + using GfxFamily = typename NEO::GfxFamilyMapper::GfxFamily; + using XY_COPY_BLT = typename GfxFamily::XY_COPY_BLT; + + WhiteBox> cmdList(1); + cmdList.initialize(device, true); + void *srcPtr = reinterpret_cast(0x1234); + void *dstPtr = reinterpret_cast(0x2345); + cmdList.appendMemoryCopy(dstPtr, srcPtr, 8, nullptr, 0, nullptr); + + auto &commandContainer = cmdList.commandContainer; + GenCmdList genCmdList; + ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer( + genCmdList, ptrOffset(commandContainer.getCommandStream()->getCpuBase(), 0), commandContainer.getCommandStream()->getUsed())); + auto itor = find(genCmdList.begin(), genCmdList.end()); + ASSERT_NE(genCmdList.end(), itor); + + itor = find(++itor, genCmdList.end()); + + EXPECT_EQ(genCmdList.end(), itor); +} + +HWTEST2_F(AppendMemoryCopy, givenCopyOnlyCommandListAndHostPointersWhenMemoryCopyRegionCalledThenPipeControlWithDcFlushAddedIsNotAddedAfterBlitCopy, Platforms) { + using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; + using GfxFamily = typename NEO::GfxFamilyMapper::GfxFamily; + using XY_COPY_BLT = typename GfxFamily::XY_COPY_BLT; + + WhiteBox> cmdList(1); + cmdList.initialize(device, true); + void *srcPtr = reinterpret_cast(0x1234); + void *dstPtr = reinterpret_cast(0x2345); + ze_copy_region_t dstRegion = {4, 4, 0, 2, 2, 1}; + ze_copy_region_t srcRegion = {4, 4, 0, 2, 2, 1}; + cmdList.appendMemoryCopyRegion(dstPtr, &dstRegion, 0, 0, srcPtr, &srcRegion, 0, 0, nullptr); + + auto &commandContainer = cmdList.commandContainer; + GenCmdList genCmdList; + ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer( + genCmdList, ptrOffset(commandContainer.getCommandStream()->getCpuBase(), 0), commandContainer.getCommandStream()->getUsed())); + auto itor = find(genCmdList.begin(), genCmdList.end()); + ASSERT_NE(genCmdList.end(), itor); + + itor = find(++itor, genCmdList.end()); + + EXPECT_EQ(genCmdList.end(), itor); +} + HWTEST2_F(CommandListCreate, givenCopyOnlyCommandListWhenAppendMemoryFillCalledThenAppendBlitFillCalled, Platforms) { MockCommandList cmdList; cmdList.initialize(device, true);