Add flush after write to CPU memory

Change-Id: I199c8b203f1afdf648f6520f13b0498efb760c84
This commit is contained in:
kamdiedrich
2020-05-11 11:49:08 +02:00
committed by sys_ocldev
parent fda985aa0e
commit 080609e174
2 changed files with 115 additions and 19 deletions

View File

@@ -531,7 +531,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemAdvise(ze_device_hand
auto allocData = device->getDriverHandle()->getSvmAllocsManager()->getSVMAllocs()->get(ptr); auto allocData = device->getDriverHandle()->getSvmAllocsManager()->getSVMAllocs()->get(ptr);
if (allocData) { if (allocData) {
return ZE_RESULT_SUCCESS; return ZE_RESULT_SUCCESS;
} }
return ZE_RESULT_ERROR_UNKNOWN; return ZE_RESULT_ERROR_UNKNOWN;
} }
@@ -728,7 +728,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryCopy(void *dstptr,
this->appendSignalEventPostWalker(hSignalEvent); this->appendSignalEventPostWalker(hSignalEvent);
} }
if (dstAllocationStruct.needsFlush) { if (dstAllocationStruct.needsFlush && !isCopyOnlyCmdList) {
NEO::PipeControlArgs args(true); NEO::PipeControlArgs args(true);
NEO::MemorySynchronizationCommands<GfxFamily>::addPipeControl(*commandContainer.getCommandStream(), args); NEO::MemorySynchronizationCommands<GfxFamily>::addPipeControl(*commandContainer.getCommandStream(), args);
} }
@@ -786,13 +786,9 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryCopyRegion(void *d
return result; return result;
} }
if (dstAllocationStruct.needsFlush) { if (dstAllocationStruct.needsFlush && !isCopyOnlyCmdList) {
if (isCopyOnlyCmdList) { NEO::PipeControlArgs args(true);
NEO::EncodeMiFlushDW<GfxFamily>::programMiFlushDw(*commandContainer.getCommandStream(), 0, 0, false, false); NEO::MemorySynchronizationCommands<GfxFamily>::addPipeControl(*commandContainer.getCommandStream(), args);
} else {
NEO::PipeControlArgs args(true);
NEO::MemorySynchronizationCommands<GfxFamily>::addPipeControl(*commandContainer.getCommandStream(), args);
}
} }
return ZE_RESULT_SUCCESS; return ZE_RESULT_SUCCESS;
@@ -920,7 +916,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryPrefetch(const voi
size_t count) { size_t count) {
auto allocData = device->getDriverHandle()->getSvmAllocsManager()->getSVMAllocs()->get(ptr); auto allocData = device->getDriverHandle()->getSvmAllocsManager()->getSVMAllocs()->get(ptr);
if (allocData) { if (allocData) {
return ZE_RESULT_SUCCESS; return ZE_RESULT_SUCCESS;
} }
return ZE_RESULT_ERROR_UNKNOWN; return ZE_RESULT_ERROR_UNKNOWN;
} }
@@ -1133,6 +1129,7 @@ inline AlignedAllocationData CommandListCoreFamily<gfxCoreFamily>::getAlignedAll
hostPtrMap.insert(std::make_pair(buffer, alloc)); hostPtrMap.insert(std::make_pair(buffer, alloc));
alignedPtr = static_cast<uintptr_t>(alloc->getGpuAddress() - offset); alignedPtr = static_cast<uintptr_t>(alloc->getGpuAddress() - offset);
hostPointerNeedsFlush = true;
} else { } else {
alloc = allocData->gpuAllocation; alloc = allocData->gpuAllocation;

View File

@@ -383,16 +383,19 @@ HWTEST2_F(CommandListCreate, givenCommandListAnd3DWhbufferenMemoryCopyRegionCall
EXPECT_GT(cmdList.appendMemoryCopyKernel3dCalledTimes, 0u); EXPECT_GT(cmdList.appendMemoryCopyKernel3dCalledTimes, 0u);
} }
HWTEST2_F(CommandListCreate, givenCommandListAndHostPointersWhenMemoryCopyRegionCalledThenTwoNewAllocationAreAddedToHostMapPtr, Platforms) { using AppendMemoryCopy = CommandListCreate;
class MockAppendMemoryCopyRegion : public MockCommandList<gfxCoreFamily> {
public:
using CommandList::hostPtrMap;
AlignedAllocationData getAlignedAllocation(L0::Device *device, const void *buffer, uint64_t bufferSize) override {
return L0::CommandListCoreFamily<gfxCoreFamily>::getAlignedAllocation(device, buffer, bufferSize);
}
};
MockAppendMemoryCopyRegion cmdList; template <GFXCORE_FAMILY gfxCoreFamily>
class MockAppendMemoryCopy : public MockCommandList<gfxCoreFamily> {
public:
using CommandList::hostPtrMap;
AlignedAllocationData getAlignedAllocation(L0::Device *device, const void *buffer, uint64_t bufferSize) override {
return L0::CommandListCoreFamily<gfxCoreFamily>::getAlignedAllocation(device, buffer, bufferSize);
}
};
HWTEST2_F(AppendMemoryCopy, givenCommandListAndHostPointersWhenMemoryCopyRegionCalledThenTwoNewAllocationAreAddedToHostMapPtr, Platforms) {
MockAppendMemoryCopy<gfxCoreFamily> cmdList;
cmdList.initialize(device, false); cmdList.initialize(device, false);
void *srcPtr = reinterpret_cast<void *>(0x1234); void *srcPtr = reinterpret_cast<void *>(0x1234);
void *dstPtr = reinterpret_cast<void *>(0x2345); void *dstPtr = reinterpret_cast<void *>(0x2345);
@@ -402,6 +405,54 @@ HWTEST2_F(CommandListCreate, givenCommandListAndHostPointersWhenMemoryCopyRegion
EXPECT_EQ(cmdList.hostPtrMap.size(), 2u); EXPECT_EQ(cmdList.hostPtrMap.size(), 2u);
} }
HWTEST2_F(AppendMemoryCopy, givenCommandListAndHostPointersWhenMemoryCopyRegionCalledThenPipeControlWithDcFlushAdded, Platforms) {
using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL;
MockAppendMemoryCopy<gfxCoreFamily> cmdList;
cmdList.initialize(device, false);
void *srcPtr = reinterpret_cast<void *>(0x1234);
void *dstPtr = reinterpret_cast<void *>(0x2345);
ze_copy_region_t dstRegion = {4, 4, 4, 2, 2, 2};
ze_copy_region_t srcRegion = {4, 4, 4, 2, 2, 2};
cmdList.appendMemoryCopyRegion(dstPtr, &dstRegion, 0, 0, srcPtr, &srcRegion, 0, 0, nullptr);
auto &commandContainer = cmdList.commandContainer;
GenCmdList genCmdList;
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(
genCmdList, ptrOffset(commandContainer.getCommandStream()->getCpuBase(), 0), commandContainer.getCommandStream()->getUsed()));
auto itor = find<PIPE_CONTROL *>(genCmdList.begin(), genCmdList.end());
ASSERT_NE(genCmdList.end(), itor);
PIPE_CONTROL *cmd = nullptr;
while (itor != genCmdList.end()) {
cmd = genCmdCast<PIPE_CONTROL *>(*itor);
itor = find<PIPE_CONTROL *>(++itor, genCmdList.end());
}
EXPECT_TRUE(cmd->getDcFlushEnable());
}
HWTEST2_F(AppendMemoryCopy, givenCommandListAndHostPointersWhenMemoryCopyCalledThenPipeControlWithDcFlushAdded, Platforms) {
using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL;
MockAppendMemoryCopy<gfxCoreFamily> cmdList;
cmdList.initialize(device, false);
void *srcPtr = reinterpret_cast<void *>(0x1234);
void *dstPtr = reinterpret_cast<void *>(0x2345);
cmdList.appendMemoryCopy(dstPtr, srcPtr, 8, nullptr, 0, nullptr);
auto &commandContainer = cmdList.commandContainer;
GenCmdList genCmdList;
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(
genCmdList, ptrOffset(commandContainer.getCommandStream()->getCpuBase(), 0), commandContainer.getCommandStream()->getUsed()));
auto itor = find<PIPE_CONTROL *>(genCmdList.begin(), genCmdList.end());
ASSERT_NE(genCmdList.end(), itor);
PIPE_CONTROL *cmd = nullptr;
while (itor != genCmdList.end()) {
cmd = genCmdCast<PIPE_CONTROL *>(*itor);
itor = find<PIPE_CONTROL *>(++itor, genCmdList.end());
}
EXPECT_TRUE(cmd->getDcFlushEnable());
}
HWTEST2_F(CommandListCreate, givenCommandListAnd2DWhbufferenMemoryCopyRegionCalledThenCopyKernel2DCalled, Platforms) { HWTEST2_F(CommandListCreate, givenCommandListAnd2DWhbufferenMemoryCopyRegionCalledThenCopyKernel2DCalled, Platforms) {
MockCommandList<gfxCoreFamily> cmdList; MockCommandList<gfxCoreFamily> cmdList;
cmdList.initialize(device, false); cmdList.initialize(device, false);
@@ -416,6 +467,54 @@ HWTEST2_F(CommandListCreate, givenCommandListAnd2DWhbufferenMemoryCopyRegionCall
EXPECT_GT(cmdList.appendMemoryCopyKernel2dCalledTimes, 0u); EXPECT_GT(cmdList.appendMemoryCopyKernel2dCalledTimes, 0u);
} }
HWTEST2_F(AppendMemoryCopy, givenCopyOnlyCommandListAndHostPointersWhenMemoryCopyCalledThenPipeControlWithDcFlushAddedIsNotAddedAfterBlitCopy, Platforms) {
using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL;
using GfxFamily = typename NEO::GfxFamilyMapper<gfxCoreFamily>::GfxFamily;
using XY_COPY_BLT = typename GfxFamily::XY_COPY_BLT;
WhiteBox<CommandListCoreFamily<gfxCoreFamily>> cmdList(1);
cmdList.initialize(device, true);
void *srcPtr = reinterpret_cast<void *>(0x1234);
void *dstPtr = reinterpret_cast<void *>(0x2345);
cmdList.appendMemoryCopy(dstPtr, srcPtr, 8, nullptr, 0, nullptr);
auto &commandContainer = cmdList.commandContainer;
GenCmdList genCmdList;
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(
genCmdList, ptrOffset(commandContainer.getCommandStream()->getCpuBase(), 0), commandContainer.getCommandStream()->getUsed()));
auto itor = find<XY_COPY_BLT *>(genCmdList.begin(), genCmdList.end());
ASSERT_NE(genCmdList.end(), itor);
itor = find<PIPE_CONTROL *>(++itor, genCmdList.end());
EXPECT_EQ(genCmdList.end(), itor);
}
HWTEST2_F(AppendMemoryCopy, givenCopyOnlyCommandListAndHostPointersWhenMemoryCopyRegionCalledThenPipeControlWithDcFlushAddedIsNotAddedAfterBlitCopy, Platforms) {
using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL;
using GfxFamily = typename NEO::GfxFamilyMapper<gfxCoreFamily>::GfxFamily;
using XY_COPY_BLT = typename GfxFamily::XY_COPY_BLT;
WhiteBox<CommandListCoreFamily<gfxCoreFamily>> cmdList(1);
cmdList.initialize(device, true);
void *srcPtr = reinterpret_cast<void *>(0x1234);
void *dstPtr = reinterpret_cast<void *>(0x2345);
ze_copy_region_t dstRegion = {4, 4, 0, 2, 2, 1};
ze_copy_region_t srcRegion = {4, 4, 0, 2, 2, 1};
cmdList.appendMemoryCopyRegion(dstPtr, &dstRegion, 0, 0, srcPtr, &srcRegion, 0, 0, nullptr);
auto &commandContainer = cmdList.commandContainer;
GenCmdList genCmdList;
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(
genCmdList, ptrOffset(commandContainer.getCommandStream()->getCpuBase(), 0), commandContainer.getCommandStream()->getUsed()));
auto itor = find<XY_COPY_BLT *>(genCmdList.begin(), genCmdList.end());
ASSERT_NE(genCmdList.end(), itor);
itor = find<PIPE_CONTROL *>(++itor, genCmdList.end());
EXPECT_EQ(genCmdList.end(), itor);
}
HWTEST2_F(CommandListCreate, givenCopyOnlyCommandListWhenAppendMemoryFillCalledThenAppendBlitFillCalled, Platforms) { HWTEST2_F(CommandListCreate, givenCopyOnlyCommandListWhenAppendMemoryFillCalledThenAppendBlitFillCalled, Platforms) {
MockCommandList<gfxCoreFamily> cmdList; MockCommandList<gfxCoreFamily> cmdList;
cmdList.initialize(device, true); cmdList.initialize(device, true);