Fix Read timestamp register once when memory copy

Change-Id: Ibd02275cefbaebd702e3d40eb23e153840741aec
Signed-off-by: Maciej Plewka <maciej.plewka@intel.com>
This commit is contained in:
Maciej Plewka
2020-06-19 10:51:13 +02:00
committed by sys_ocldev
parent 87b3acc18a
commit 2beccf9dc0
2 changed files with 60 additions and 23 deletions

View File

@ -625,9 +625,8 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryCopyBlit(NEO::Grap
auto blitProperties = NEO::BlitProperties::constructPropertiesForCopyBuffer(dstPtrAlloc, srcPtrAlloc, {dstOffset, 0, 0}, {srcOffset, 0, 0}, {size, 0, 0}, 0, 0, 0, 0);
commandContainer.addToResidencyContainer(dstPtrAlloc);
commandContainer.addToResidencyContainer(srcPtrAlloc);
appendEventForProfiling(hSignalEvent, true);
NEO::BlitCommandsHelper<GfxFamily>::dispatchBlitCommandsForBuffer(blitProperties, *commandContainer.getCommandStream(), *device->getNEODevice()->getExecutionEnvironment()->rootDeviceEnvironments[device->getRootDeviceIndex()]);
this->appendSignalEventPostWalker(hSignalEvent);
return ZE_RESULT_SUCCESS;
}
@ -652,8 +651,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryCopyBlitRegion(NEO
commandContainer.addToResidencyContainer(srcAlloc);
appendEventForProfiling(hSignalEvent, true);
NEO::BlitCommandsHelper<GfxFamily>::dispatchBlitCommandsForBuffer(blitProperties, *commandContainer.getCommandStream(), *device->getNEODevice()->getExecutionEnvironment()->rootDeviceEnvironments[device->getRootDeviceIndex()]);
this->appendSignalEventPostWalker(hSignalEvent);
appendSignalEventPostWalker(hSignalEvent);
return ZE_RESULT_SUCCESS;
}
@ -677,8 +675,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendCopyImageBlit(NEO::Graph
commandContainer.addToResidencyContainer(src);
appendEventForProfiling(hSignalEvent, true);
NEO::BlitCommandsHelper<GfxFamily>::dispatchBlitCommandsForImages(blitProperties, *commandContainer.getCommandStream(), *device->getNEODevice()->getExecutionEnvironment()->rootDeviceEnvironments[device->getRootDeviceIndex()]);
this->appendSignalEventPostWalker(hSignalEvent);
appendSignalEventPostWalker(hSignalEvent);
return ZE_RESULT_SUCCESS;
}
@ -836,8 +833,6 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryCopyRegion(void *d
dstSize += dstAllocationStruct.offset;
srcSize += srcAllocationStruct.offset;
appendEventForProfiling(hSignalEvent, true);
ze_result_t result = ZE_RESULT_SUCCESS;
if (srcRegion->depth > 1) {
result = isCopyOnlyCmdList ? appendMemoryCopyBlitRegion(srcAllocationStruct.alloc, dstAllocationStruct.alloc, *srcRegion, *dstRegion, {srcRegion->width, srcRegion->height, srcRegion->depth},
@ -1171,7 +1166,9 @@ void CommandListCoreFamily<gfxCoreFamily>::appendEventForProfilingCopyCommand(ze
auto baseAddr = event->getGpuAddress();
auto contextOffset = beforeWalker ? offsetof(KernelTimestampEvent, contextStart) : offsetof(KernelTimestampEvent, contextEnd);
auto globalOffset = beforeWalker ? offsetof(KernelTimestampEvent, globalStart) : offsetof(KernelTimestampEvent, globalEnd);
if (!beforeWalker) {
NEO::EncodeMiFlushDW<GfxFamily>::programMiFlushDw(*commandContainer.getCommandStream(), 0, 0, false, false);
}
NEO::EncodeStoreMMIO<GfxFamily>::encode(*commandContainer.getCommandStream(), REG_GLOBAL_TIMESTAMP_LDW, ptrOffset(baseAddr, globalOffset));
NEO::EncodeStoreMMIO<GfxFamily>::encode(*commandContainer.getCommandStream(), GP_THREAD_TIME_REG_ADDRESS_OFFSET_LOW, ptrOffset(baseAddr, contextOffset));
}

View File

@ -735,11 +735,15 @@ HWTEST2_F(CommandListCreate, givenCopyCommandListWhenCopyFromImagBlitThenCommand
EXPECT_NE(cmdList.end(), itor);
}
HWTEST2_F(CommandListCreate, givenCopyCommandListWhenTimestampPassedToMemoryCopyBlitThenTimeStampRegistersAreAdded, Platforms) {
HWTEST2_F(CommandListCreate, givenCopyCommandListWhenTimestampPassedToMemoryCopyThenAppendProfilingCalledOnceBeforeAndAfterCommand, Platforms) {
using GfxFamily = typename NEO::GfxFamilyMapper<gfxCoreFamily>::GfxFamily;
using MI_STORE_REGISTER_MEM = typename GfxFamily::MI_STORE_REGISTER_MEM;
auto commandList = std::make_unique<WhiteBox<::L0::CommandListCoreFamily<gfxCoreFamily>>>();
commandList->initialize(device, true);
using MI_FLUSH_DW = typename GfxFamily::MI_FLUSH_DW;
MockAppendMemoryCopy<gfxCoreFamily> commandList;
commandList.initialize(device, true);
void *srcPtr = reinterpret_cast<void *>(0x1234);
void *dstPtr = reinterpret_cast<void *>(0x2345);
ze_event_pool_desc_t eventPoolDesc = {
ZE_EVENT_POOL_DESC_VERSION_CURRENT,
ZE_EVENT_POOL_FLAG_TIMESTAMP,
@ -752,22 +756,33 @@ HWTEST2_F(CommandListCreate, givenCopyCommandListWhenTimestampPassedToMemoryCopy
auto eventPool = std::unique_ptr<L0::EventPool>(L0::EventPool::create(driverHandle.get(), 0, nullptr, &eventPoolDesc));
auto event = std::unique_ptr<L0::Event>(L0::Event::create(eventPool.get(), &eventDesc, device));
NEO::MockGraphicsAllocation mockAllocationSrc(0, NEO::GraphicsAllocation::AllocationType::INTERNAL_HOST_MEMORY,
reinterpret_cast<void *>(0x1234), 0x1000, 0, sizeof(uint32_t),
MemoryPool::System4KBPages);
NEO::MockGraphicsAllocation mockAllocationDst(0, NEO::GraphicsAllocation::AllocationType::INTERNAL_HOST_MEMORY,
reinterpret_cast<void *>(0x1234), 0x1000, 0, sizeof(uint32_t),
MemoryPool::System4KBPages);
uint32_t size = 0x1000;
commandList->appendMemoryCopyBlit(&mockAllocationDst, 0, &mockAllocationSrc, 0, size, event->toHandle());
commandList.appendMemoryCopy(dstPtr, srcPtr, 0x100, event->toHandle(), 0, nullptr);
EXPECT_GT(commandList.appendMemoryCopyBlitCalledTimes, 1u);
GenCmdList cmdList;
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(
cmdList, ptrOffset(commandList->commandContainer.getCommandStream()->getCpuBase(), 0), commandList->commandContainer.getCommandStream()->getUsed()));
cmdList, ptrOffset(commandList.commandContainer.getCommandStream()->getCpuBase(), 0), commandList.commandContainer.getCommandStream()->getUsed()));
auto itor = find<MI_STORE_REGISTER_MEM *>(cmdList.begin(), cmdList.end());
EXPECT_NE(cmdList.end(), itor);
auto cmd = genCmdCast<MI_STORE_REGISTER_MEM *>(*itor);
EXPECT_EQ(cmd->getRegisterAddress(), REG_GLOBAL_TIMESTAMP_LDW);
itor++;
EXPECT_NE(cmdList.end(), itor);
cmd = genCmdCast<MI_STORE_REGISTER_MEM *>(*itor);
EXPECT_EQ(cmd->getRegisterAddress(), GP_THREAD_TIME_REG_ADDRESS_OFFSET_LOW);
itor = find<MI_FLUSH_DW *>(itor, cmdList.end());
EXPECT_NE(cmdList.end(), itor);
itor = find<MI_STORE_REGISTER_MEM *>(itor, cmdList.end());
EXPECT_NE(cmdList.end(), itor);
cmd = genCmdCast<MI_STORE_REGISTER_MEM *>(*itor);
EXPECT_EQ(cmd->getRegisterAddress(), REG_GLOBAL_TIMESTAMP_LDW);
itor++;
EXPECT_NE(cmdList.end(), itor);
cmd = genCmdCast<MI_STORE_REGISTER_MEM *>(*itor);
EXPECT_EQ(cmd->getRegisterAddress(), GP_THREAD_TIME_REG_ADDRESS_OFFSET_LOW);
itor++;
EXPECT_EQ(cmdList.end(), itor);
}
HWTEST2_F(CommandListCreate, givenCopyCommandListWhenTimestampPassedToMemoryCopyRegionBlitThenTimeStampRegistersAreAdded, Platforms) {
@ -798,12 +813,38 @@ HWTEST2_F(CommandListCreate, givenCopyCommandListWhenTimestampPassedToMemoryCopy
commandList->appendMemoryCopyBlitRegion(&mockAllocationDst, &mockAllocationSrc, srcRegion, dstRegion, {0, 0, 0}, 0, 0, 0, 0, 0, 0, event->toHandle());
GenCmdList cmdList;
auto baseAddr = event->getGpuAddress();
auto contextStartOffset = offsetof(KernelTimestampEvent, contextStart);
auto globalStartOffset = offsetof(KernelTimestampEvent, globalStart);
auto contextEndOffset = offsetof(KernelTimestampEvent, contextEnd);
auto globalEndOffset = offsetof(KernelTimestampEvent, globalEnd);
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(
cmdList, ptrOffset(commandList->commandContainer.getCommandStream()->getCpuBase(), 0), commandList->commandContainer.getCommandStream()->getUsed()));
auto itor = find<MI_STORE_REGISTER_MEM *>(cmdList.begin(), cmdList.end());
EXPECT_NE(cmdList.end(), itor);
auto cmd = genCmdCast<MI_STORE_REGISTER_MEM *>(*itor);
EXPECT_EQ(cmd->getRegisterAddress(), REG_GLOBAL_TIMESTAMP_LDW);
EXPECT_EQ(cmd->getMemoryAddress(), ptrOffset(baseAddr, globalStartOffset));
itor++;
EXPECT_NE(cmdList.end(), itor);
cmd = genCmdCast<MI_STORE_REGISTER_MEM *>(*itor);
EXPECT_EQ(cmd->getRegisterAddress(), GP_THREAD_TIME_REG_ADDRESS_OFFSET_LOW);
EXPECT_EQ(cmd->getMemoryAddress(), ptrOffset(baseAddr, contextStartOffset));
itor++;
itor = find<MI_STORE_REGISTER_MEM *>(itor, cmdList.end());
EXPECT_NE(cmdList.end(), itor);
cmd = genCmdCast<MI_STORE_REGISTER_MEM *>(*itor);
EXPECT_EQ(cmd->getRegisterAddress(), REG_GLOBAL_TIMESTAMP_LDW);
EXPECT_EQ(cmd->getMemoryAddress(), ptrOffset(baseAddr, globalEndOffset));
itor++;
EXPECT_NE(cmdList.end(), itor);
cmd = genCmdCast<MI_STORE_REGISTER_MEM *>(*itor);
EXPECT_EQ(cmd->getRegisterAddress(), GP_THREAD_TIME_REG_ADDRESS_OFFSET_LOW);
EXPECT_EQ(cmd->getMemoryAddress(), ptrOffset(baseAddr, contextEndOffset));
itor++;
EXPECT_EQ(cmdList.end(), itor);
}
HWTEST2_F(CommandListCreate, givenCopyCommandListWhenTimestampPassedToImageCopyBlitThenTimeStampRegistersAreAdded, Platforms) {
@ -938,6 +979,5 @@ HWTEST2_F(CommandListCreate, givenCopyOnlyCommandListWhenAppendBlitFillCalledWit
EXPECT_EQ(allocValue, pattern[i % 4]);
}
}
} // namespace ult
} // namespace L0