mirror of
https://github.com/intel/compute-runtime.git
synced 2025-09-15 13:01:45 +08:00
AppendEventReset: Use SDI to reset packets
This patch uses SDI to reset all except the last event packet. Fixes GPU address usage for copy command list. Related-To: LOCI-3052 Signed-off-by: Joshua Santosh Ranjan <joshua.santosh.ranjan@intel.com>
This commit is contained in:

committed by
Compute-Runtime-Automation

parent
3d57d11c8a
commit
95ff289bde
@ -297,6 +297,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendEventReset(ze_event_hand
|
|||||||
|
|
||||||
uint64_t baseAddr = event->getGpuAddress(this->device);
|
uint64_t baseAddr = event->getGpuAddress(this->device);
|
||||||
uint32_t packetsToReset = event->getPacketsInUse();
|
uint32_t packetsToReset = event->getPacketsInUse();
|
||||||
|
bool appendPipeControlWithPostSync = false;
|
||||||
|
|
||||||
NEO::Device *neoDevice = device->getNEODevice();
|
NEO::Device *neoDevice = device->getNEODevice();
|
||||||
uint32_t callId = 0;
|
uint32_t callId = 0;
|
||||||
@ -323,17 +324,31 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendEventReset(ze_event_hand
|
|||||||
NEO::MiFlushArgs args;
|
NEO::MiFlushArgs args;
|
||||||
args.commandWithPostSync = true;
|
args.commandWithPostSync = true;
|
||||||
NEO::EncodeMiFlushDW<GfxFamily>::programMiFlushDw(*commandContainer.getCommandStream(),
|
NEO::EncodeMiFlushDW<GfxFamily>::programMiFlushDw(*commandContainer.getCommandStream(),
|
||||||
event->getGpuAddress(this->device),
|
baseAddr,
|
||||||
Event::STATE_CLEARED, args, hwInfo);
|
Event::STATE_CLEARED, args, hwInfo);
|
||||||
} else {
|
} else {
|
||||||
NEO::PipeControlArgs args;
|
bool applyScope = event->signalScope;
|
||||||
args.dcFlushEnable = NEO::MemorySynchronizationCommands<GfxFamily>::getDcFlushEnable(event->signalScope, hwInfo);
|
uint32_t packetsToResetUsingSdi = packetsToReset;
|
||||||
size_t estimateSize = NEO::MemorySynchronizationCommands<GfxFamily>::getSizeForPipeControlWithPostSyncOperation(hwInfo) * packetsToReset;
|
if (applyScope || event->isEventTimestampFlagSet()) {
|
||||||
if (this->partitionCount > 1) {
|
UNRECOVERABLE_IF(packetsToReset == 0);
|
||||||
estimateSize += estimateBufferSizeMultiTileBarrier(hwInfo);
|
packetsToResetUsingSdi = packetsToReset - 1;
|
||||||
|
appendPipeControlWithPostSync = true;
|
||||||
}
|
}
|
||||||
|
|
||||||
for (uint32_t i = 0u; i < packetsToReset; i++) {
|
for (uint32_t i = 0u; i < packetsToResetUsingSdi; i++) {
|
||||||
|
NEO::EncodeStoreMemory<GfxFamily>::programStoreDataImm(
|
||||||
|
*commandContainer.getCommandStream(),
|
||||||
|
baseAddr,
|
||||||
|
Event::STATE_CLEARED,
|
||||||
|
0u,
|
||||||
|
false,
|
||||||
|
false);
|
||||||
|
baseAddr += event->getSinglePacketSize();
|
||||||
|
}
|
||||||
|
|
||||||
|
if (appendPipeControlWithPostSync) {
|
||||||
|
NEO::PipeControlArgs args;
|
||||||
|
args.dcFlushEnable = NEO::MemorySynchronizationCommands<GfxFamily>::getDcFlushEnable(event->signalScope, hwInfo);
|
||||||
NEO::MemorySynchronizationCommands<GfxFamily>::addPipeControlAndProgramPostSyncOperation(
|
NEO::MemorySynchronizationCommands<GfxFamily>::addPipeControlAndProgramPostSyncOperation(
|
||||||
*commandContainer.getCommandStream(),
|
*commandContainer.getCommandStream(),
|
||||||
POST_SYNC_OPERATION::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA,
|
POST_SYNC_OPERATION::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA,
|
||||||
@ -341,8 +356,8 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendEventReset(ze_event_hand
|
|||||||
Event::STATE_CLEARED,
|
Event::STATE_CLEARED,
|
||||||
hwInfo,
|
hwInfo,
|
||||||
args);
|
args);
|
||||||
baseAddr += event->getSinglePacketSize();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if (this->partitionCount > 1) {
|
if (this->partitionCount > 1) {
|
||||||
appendMultiTileBarrier(*neoDevice);
|
appendMultiTileBarrier(*neoDevice);
|
||||||
}
|
}
|
||||||
|
@ -21,9 +21,10 @@ namespace ult {
|
|||||||
|
|
||||||
using CommandListAppendEventReset = Test<CommandListFixture>;
|
using CommandListAppendEventReset = Test<CommandListFixture>;
|
||||||
|
|
||||||
HWTEST_F(CommandListAppendEventReset, givenCmdlistWhenResetEventAppendedThenPostSyncWriteIsGenerated) {
|
HWTEST_F(CommandListAppendEventReset, givenCmdlistWhenResetEventAppendedThenStoreDataImmIsGenerated) {
|
||||||
using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL;
|
using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL;
|
||||||
using POST_SYNC_OPERATION = typename PIPE_CONTROL::POST_SYNC_OPERATION;
|
using POST_SYNC_OPERATION = typename PIPE_CONTROL::POST_SYNC_OPERATION;
|
||||||
|
using MI_STORE_DATA_IMM = typename FamilyType::MI_STORE_DATA_IMM;
|
||||||
|
|
||||||
auto usedSpaceBefore = commandList->commandContainer.getCommandStream()->getUsed();
|
auto usedSpaceBefore = commandList->commandContainer.getCommandStream()->getUsed();
|
||||||
|
|
||||||
@ -33,6 +34,50 @@ HWTEST_F(CommandListAppendEventReset, givenCmdlistWhenResetEventAppendedThenPost
|
|||||||
auto usedSpaceAfter = commandList->commandContainer.getCommandStream()->getUsed();
|
auto usedSpaceAfter = commandList->commandContainer.getCommandStream()->getUsed();
|
||||||
ASSERT_GT(usedSpaceAfter, usedSpaceBefore);
|
ASSERT_GT(usedSpaceAfter, usedSpaceBefore);
|
||||||
|
|
||||||
|
GenCmdList cmdList;
|
||||||
|
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(cmdList,
|
||||||
|
ptrOffset(commandList->commandContainer.getCommandStream()->getCpuBase(), 0),
|
||||||
|
usedSpaceAfter));
|
||||||
|
|
||||||
|
auto gpuAddress = event->getGpuAddress(device);
|
||||||
|
if (event->isUsingContextEndOffset()) {
|
||||||
|
gpuAddress += event->getContextEndOffset();
|
||||||
|
}
|
||||||
|
auto itorSdi = findAll<MI_STORE_DATA_IMM *>(cmdList.begin(), cmdList.end());
|
||||||
|
uint32_t sdiFound = 0;
|
||||||
|
ASSERT_NE(0u, itorSdi.size());
|
||||||
|
for (auto it : itorSdi) {
|
||||||
|
auto cmd = genCmdCast<MI_STORE_DATA_IMM *>(*it);
|
||||||
|
EXPECT_EQ(gpuAddress, cmd->getAddress());
|
||||||
|
gpuAddress += event->getSinglePacketSize();
|
||||||
|
sdiFound++;
|
||||||
|
}
|
||||||
|
EXPECT_NE(0u, sdiFound);
|
||||||
|
}
|
||||||
|
|
||||||
|
HWTEST_F(CommandListAppendEventReset, givenCmdlistWhenResetEventWithTimeStampIsAppendedThenStoreDataImmAndPostSyncWriteIsGenerated) {
|
||||||
|
using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL;
|
||||||
|
using POST_SYNC_OPERATION = typename PIPE_CONTROL::POST_SYNC_OPERATION;
|
||||||
|
using MI_STORE_DATA_IMM = typename FamilyType::MI_STORE_DATA_IMM;
|
||||||
|
|
||||||
|
ze_event_pool_desc_t eventPoolDesc = {};
|
||||||
|
eventPoolDesc.count = 1;
|
||||||
|
eventPoolDesc.flags = ZE_EVENT_POOL_FLAG_KERNEL_TIMESTAMP;
|
||||||
|
|
||||||
|
ze_event_desc_t eventDesc = {};
|
||||||
|
eventDesc.index = 0;
|
||||||
|
ze_result_t result = ZE_RESULT_SUCCESS;
|
||||||
|
auto eventPool = std::unique_ptr<L0::EventPool>(L0::EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, result));
|
||||||
|
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
|
||||||
|
auto event = std::unique_ptr<L0::Event>(L0::Event::create<uint32_t>(eventPool.get(), &eventDesc, device));
|
||||||
|
auto usedSpaceBefore = commandList->commandContainer.getCommandStream()->getUsed();
|
||||||
|
|
||||||
|
result = commandList->appendEventReset(event->toHandle());
|
||||||
|
ASSERT_EQ(ZE_RESULT_SUCCESS, result);
|
||||||
|
|
||||||
|
auto usedSpaceAfter = commandList->commandContainer.getCommandStream()->getUsed();
|
||||||
|
ASSERT_GT(usedSpaceAfter, usedSpaceBefore);
|
||||||
|
|
||||||
GenCmdList cmdList;
|
GenCmdList cmdList;
|
||||||
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(cmdList,
|
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(cmdList,
|
||||||
ptrOffset(commandList->commandContainer.getCommandStream()->getCpuBase(), 0),
|
ptrOffset(commandList->commandContainer.getCommandStream()->getCpuBase(), 0),
|
||||||
@ -45,6 +90,17 @@ HWTEST_F(CommandListAppendEventReset, givenCmdlistWhenResetEventAppendedThenPost
|
|||||||
gpuAddress += event->getContextEndOffset();
|
gpuAddress += event->getContextEndOffset();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
auto itorSdi = findAll<MI_STORE_DATA_IMM *>(cmdList.begin(), cmdList.end());
|
||||||
|
uint32_t sdiFound = 0;
|
||||||
|
ASSERT_NE(0u, itorSdi.size());
|
||||||
|
for (auto it : itorSdi) {
|
||||||
|
auto cmd = genCmdCast<MI_STORE_DATA_IMM *>(*it);
|
||||||
|
EXPECT_EQ(gpuAddress, cmd->getAddress());
|
||||||
|
gpuAddress += event->getSinglePacketSize();
|
||||||
|
sdiFound++;
|
||||||
|
}
|
||||||
|
EXPECT_EQ(EventPacketsCount::eventPackets - 1, sdiFound);
|
||||||
|
|
||||||
uint32_t postSyncFound = 0;
|
uint32_t postSyncFound = 0;
|
||||||
for (auto it : itorPC) {
|
for (auto it : itorPC) {
|
||||||
auto cmd = genCmdCast<PIPE_CONTROL *>(*it);
|
auto cmd = genCmdCast<PIPE_CONTROL *>(*it);
|
||||||
@ -98,11 +154,16 @@ HWTEST_F(CommandListAppendEventReset, givenCopyOnlyCmdlistWhenResetEventAppended
|
|||||||
auto itorPC = findAll<MI_FLUSH_DW *>(cmdList.begin(), cmdList.end());
|
auto itorPC = findAll<MI_FLUSH_DW *>(cmdList.begin(), cmdList.end());
|
||||||
ASSERT_NE(0u, itorPC.size());
|
ASSERT_NE(0u, itorPC.size());
|
||||||
bool postSyncFound = false;
|
bool postSyncFound = false;
|
||||||
|
|
||||||
|
auto gpuAddress = event->getGpuAddress(device);
|
||||||
|
if (event->isUsingContextEndOffset()) {
|
||||||
|
gpuAddress += event->getContextEndOffset();
|
||||||
|
}
|
||||||
|
|
||||||
for (auto it : itorPC) {
|
for (auto it : itorPC) {
|
||||||
auto cmd = genCmdCast<MI_FLUSH_DW *>(*it);
|
auto cmd = genCmdCast<MI_FLUSH_DW *>(*it);
|
||||||
if (cmd->getPostSyncOperation() == MI_FLUSH_DW::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA_QWORD) {
|
if (cmd->getPostSyncOperation() == MI_FLUSH_DW::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA_QWORD) {
|
||||||
EXPECT_EQ(cmd->getImmediateData(), Event::STATE_INITIAL);
|
EXPECT_EQ(cmd->getImmediateData(), Event::STATE_INITIAL);
|
||||||
auto gpuAddress = event->getGpuAddress(device);
|
|
||||||
EXPECT_EQ(cmd->getDestinationAddress(), gpuAddress);
|
EXPECT_EQ(cmd->getDestinationAddress(), gpuAddress);
|
||||||
postSyncFound = true;
|
postSyncFound = true;
|
||||||
}
|
}
|
||||||
@ -164,6 +225,7 @@ HWTEST2_F(CommandListAppendEventReset, givenTimestampEventUsedInResetThenPipeCon
|
|||||||
auto contextOffset = event->getContextEndOffset();
|
auto contextOffset = event->getContextEndOffset();
|
||||||
auto baseAddr = event->getGpuAddress(device);
|
auto baseAddr = event->getGpuAddress(device);
|
||||||
auto gpuAddress = ptrOffset(baseAddr, contextOffset);
|
auto gpuAddress = ptrOffset(baseAddr, contextOffset);
|
||||||
|
gpuAddress += ((EventPacketsCount::eventPackets - 1) * event->getSinglePacketSize());
|
||||||
|
|
||||||
GenCmdList cmdList;
|
GenCmdList cmdList;
|
||||||
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(
|
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(
|
||||||
@ -179,11 +241,11 @@ HWTEST2_F(CommandListAppendEventReset, givenTimestampEventUsedInResetThenPipeCon
|
|||||||
EXPECT_TRUE(cmd->getCommandStreamerStallEnable());
|
EXPECT_TRUE(cmd->getCommandStreamerStallEnable());
|
||||||
EXPECT_EQ(gpuAddress, NEO::UnitTestHelper<FamilyType>::getPipeControlPostSyncAddress(*cmd));
|
EXPECT_EQ(gpuAddress, NEO::UnitTestHelper<FamilyType>::getPipeControlPostSyncAddress(*cmd));
|
||||||
EXPECT_FALSE(cmd->getDcFlushEnable());
|
EXPECT_FALSE(cmd->getDcFlushEnable());
|
||||||
postSyncFound++;
|
|
||||||
gpuAddress += event->getSinglePacketSize();
|
gpuAddress += event->getSinglePacketSize();
|
||||||
|
postSyncFound++;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
ASSERT_EQ(EventPacketsCount::eventPackets, postSyncFound);
|
ASSERT_EQ(1u, postSyncFound);
|
||||||
}
|
}
|
||||||
|
|
||||||
HWTEST2_F(CommandListAppendEventReset, givenEventWithHostScopeUsedInResetThenPipeControlWithDcFlushAppended, IsAtLeastSkl) {
|
HWTEST2_F(CommandListAppendEventReset, givenEventWithHostScopeUsedInResetThenPipeControlWithDcFlushAppended, IsAtLeastSkl) {
|
||||||
@ -231,13 +293,14 @@ HWTEST2_F(CommandListAppendEventReset, givenEventWithHostScopeUsedInResetThenPip
|
|||||||
}
|
}
|
||||||
|
|
||||||
HWTEST2_F(CommandListAppendEventReset,
|
HWTEST2_F(CommandListAppendEventReset,
|
||||||
givenMultiTileCommandListWhenAppendingMultiPacketEventThenExpectSameNumberOfResetPostSyncAndMultiBarrierCommands, IsAtLeastXeHpCore) {
|
givenMultiTileCommandListWhenAppendingMultiPacketEventThenExpectCorrectNumberOfStoreDataImmAndResetPostSyncAndMultiBarrierCommands, IsAtLeastXeHpCore) {
|
||||||
using GfxFamily = typename NEO::GfxFamilyMapper<gfxCoreFamily>::GfxFamily;
|
using GfxFamily = typename NEO::GfxFamilyMapper<gfxCoreFamily>::GfxFamily;
|
||||||
using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL;
|
using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL;
|
||||||
using POST_SYNC_OPERATION = typename PIPE_CONTROL::POST_SYNC_OPERATION;
|
using POST_SYNC_OPERATION = typename PIPE_CONTROL::POST_SYNC_OPERATION;
|
||||||
using MI_BATCH_BUFFER_END = typename FamilyType::MI_BATCH_BUFFER_END;
|
using MI_BATCH_BUFFER_END = typename FamilyType::MI_BATCH_BUFFER_END;
|
||||||
using MI_ATOMIC = typename FamilyType::MI_ATOMIC;
|
using MI_ATOMIC = typename FamilyType::MI_ATOMIC;
|
||||||
using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT;
|
using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT;
|
||||||
|
using MI_STORE_DATA_IMM = typename FamilyType::MI_STORE_DATA_IMM;
|
||||||
|
|
||||||
auto commandList = std::make_unique<::L0::ult::CommandListCoreFamily<gfxCoreFamily>>();
|
auto commandList = std::make_unique<::L0::ult::CommandListCoreFamily<gfxCoreFamily>>();
|
||||||
ASSERT_NE(nullptr, commandList);
|
ASSERT_NE(nullptr, commandList);
|
||||||
@ -264,7 +327,8 @@ HWTEST2_F(CommandListAppendEventReset,
|
|||||||
auto gpuAddress = event->getGpuAddress(device) + event->getContextEndOffset();
|
auto gpuAddress = event->getGpuAddress(device) + event->getContextEndOffset();
|
||||||
auto &hwInfo = device->getNEODevice()->getHardwareInfo();
|
auto &hwInfo = device->getNEODevice()->getHardwareInfo();
|
||||||
|
|
||||||
size_t expectedSize = NEO::MemorySynchronizationCommands<GfxFamily>::getSizeForPipeControlWithPostSyncOperation(hwInfo) * packets +
|
size_t expectedSize = NEO::MemorySynchronizationCommands<GfxFamily>::getSizeForPipeControlWithPostSyncOperation(hwInfo) +
|
||||||
|
((packets - 1) * sizeof(MI_STORE_DATA_IMM)) +
|
||||||
commandList->estimateBufferSizeMultiTileBarrier(hwInfo);
|
commandList->estimateBufferSizeMultiTileBarrier(hwInfo);
|
||||||
size_t usedSize = cmdStream->getUsed();
|
size_t usedSize = cmdStream->getUsed();
|
||||||
EXPECT_EQ(expectedSize, usedSize);
|
EXPECT_EQ(expectedSize, usedSize);
|
||||||
@ -275,6 +339,11 @@ HWTEST2_F(CommandListAppendEventReset,
|
|||||||
cmdStream->getCpuBase(),
|
cmdStream->getCpuBase(),
|
||||||
usedSize));
|
usedSize));
|
||||||
|
|
||||||
|
auto itorSdi = find<MI_STORE_DATA_IMM *>(cmdList.begin(), cmdList.end());
|
||||||
|
auto cmd = genCmdCast<MI_STORE_DATA_IMM *>(*itorSdi);
|
||||||
|
EXPECT_EQ(gpuAddress, cmd->getAddress());
|
||||||
|
gpuAddress += event->getSinglePacketSize();
|
||||||
|
|
||||||
auto pipeControlList = findAll<PIPE_CONTROL *>(cmdList.begin(), cmdList.end());
|
auto pipeControlList = findAll<PIPE_CONTROL *>(cmdList.begin(), cmdList.end());
|
||||||
ASSERT_NE(0u, pipeControlList.size());
|
ASSERT_NE(0u, pipeControlList.size());
|
||||||
uint32_t postSyncFound = 0;
|
uint32_t postSyncFound = 0;
|
||||||
@ -291,7 +360,7 @@ HWTEST2_F(CommandListAppendEventReset,
|
|||||||
postSyncPipeControlItor = it;
|
postSyncPipeControlItor = it;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
EXPECT_EQ(packets, postSyncFound);
|
EXPECT_EQ(1u, postSyncFound);
|
||||||
postSyncPipeControlItor++;
|
postSyncPipeControlItor++;
|
||||||
ASSERT_NE(cmdList.end(), postSyncPipeControlItor);
|
ASSERT_NE(cmdList.end(), postSyncPipeControlItor);
|
||||||
|
|
||||||
|
Reference in New Issue
Block a user