refactor: improve PostSync handling

Signed-off-by: Dunajski, Bartosz <bartosz.dunajski@intel.com>
This commit is contained in:
Dunajski, Bartosz
2023-12-27 12:38:25 +00:00
committed by Compute-Runtime-Automation
parent ea78831e28
commit 8360d90fa3
5 changed files with 31 additions and 12 deletions

View File

@@ -413,7 +413,7 @@ ze_result_t EventImp<TagSizeT>::hostEventSetValue(TagSizeT eventVal) {
uint32_t packets = 0;
std::array<uint64_t, 16 * 3> tempCopyData = {}; // 16 packets, 3 kernels
std::array<uint64_t, 16 * 4 * 3> tempCopyData = {}; // 16 packets, 4 timestamps, 3 kernels
UNRECOVERABLE_IF(tempCopyData.size() * sizeof(uint64_t) < totalEventSize);
const auto numElements = getMaxPacketsCount() * kernelCount;

View File

@@ -56,7 +56,7 @@ uint32_t L0GfxCoreHelperHw<Family>::getCmdListWaitOnMemoryDataSize() const {
template <typename Family>
bool L0GfxCoreHelperHw<Family>::hasUnifiedPostSyncAllocationLayout() const {
return false;
return (getImmediateWritePostSyncOffset() == NEO::ImplicitScalingDispatch<Family>::getTimeStampPostSyncOffset());
}
template <typename Family>

View File

@@ -282,12 +282,22 @@ HWTEST2_F(CommandListAppendUsedPacketSignalEvent, givenMultiTileAndDynamicPostSy
auto lriItor = cmdList.begin();
auto lriCmd = genCmdCast<MI_LOAD_REGISTER_IMM *>(*lriItor);
ASSERT_NE(nullptr, lriCmd);
EXPECT_EQ(NEO::PartitionRegisters<FamilyType>::addressOffsetCCSOffset, lriCmd->getRegisterOffset());
EXPECT_EQ(NEO::ImplicitScalingDispatch<FamilyType>::getTimeStampPostSyncOffset(), lriCmd->getDataDword());
std::vector<GenCmdList::iterator> pipeControlList;
if (unifiedPostSyncLayout) {
EXPECT_EQ(nullptr, lriCmd);
pipeControlList = findAll<PIPE_CONTROL *>(cmdList.begin(), cmdList.end());
} else {
ASSERT_NE(nullptr, lriCmd);
EXPECT_EQ(NEO::PartitionRegisters<FamilyType>::addressOffsetCCSOffset, lriCmd->getRegisterOffset());
EXPECT_EQ(NEO::ImplicitScalingDispatch<FamilyType>::getTimeStampPostSyncOffset(), lriCmd->getDataDword());
pipeControlList = findAll<PIPE_CONTROL *>(++lriItor, cmdList.end());
}
auto pipeControlList = findAll<PIPE_CONTROL *>(++lriItor, cmdList.end());
ASSERT_NE(0u, pipeControlList.size());
auto endLriItor = cmdList.rbegin();

View File

@@ -2826,6 +2826,11 @@ TEST_F(EventTests, givenRegularEventUseMultiplePacketsWhenHostSignalThenExpectAl
eventDesc.index = 0;
eventDesc.signal = 0;
eventDesc.wait = 0;
constexpr uint32_t packetsUsed = 4u;
eventPool->setEventSize(static_cast<uint32_t>(alignUp(packetsUsed * device->getGfxCoreHelper().getSingleTimestampPacketSize(), 64)));
auto event = std::unique_ptr<L0::EventImp<uint32_t>>(static_cast<L0::EventImp<uint32_t> *>(L0::Event::create<uint32_t>(eventPool.get(),
&eventDesc,
device)));
@@ -2835,7 +2840,6 @@ TEST_F(EventTests, givenRegularEventUseMultiplePacketsWhenHostSignalThenExpectAl
EXPECT_EQ(*hostAddr, Event::STATE_INITIAL);
EXPECT_EQ(1u, event->getPacketsInUse());
constexpr uint32_t packetsUsed = 4u;
event->setPacketsInUse(packetsUsed);
event->setEventTimestampFlag(false);
event->hostSignal();
@@ -2849,6 +2853,11 @@ TEST_F(EventUsedPacketSignalTests, givenEventUseMultiplePacketsWhenHostSignalThe
eventDesc.index = 0;
eventDesc.signal = 0;
eventDesc.wait = 0;
constexpr uint32_t packetsUsed = 4u;
eventPool->setEventSize(static_cast<uint32_t>(alignUp(packetsUsed * device->getGfxCoreHelper().getSingleTimestampPacketSize(), 64)));
auto event = std::unique_ptr<L0::EventImp<uint32_t>>(static_cast<L0::EventImp<uint32_t> *>(L0::Event::create<uint32_t>(eventPool.get(),
&eventDesc,
device)));
@@ -2861,7 +2870,6 @@ TEST_F(EventUsedPacketSignalTests, givenEventUseMultiplePacketsWhenHostSignalThe
EXPECT_EQ(Event::STATE_INITIAL, *hostAddr);
EXPECT_EQ(1u, event->getPacketsInUse());
constexpr uint32_t packetsUsed = 4u;
event->setPacketsInUse(packetsUsed);
event->setEventTimestampFlag(false);
@@ -3409,7 +3417,7 @@ HWTEST_F(EventTests, GivenCsrTbxModeWhenEventCreatedAndSignaledThenEventAllocati
EXPECT_EQ(2u, ultCsr.writeMemoryParams.callCount);
EXPECT_EQ(eventAllocation, ultCsr.writeMemoryParams.latestGfxAllocation);
EXPECT_TRUE(ultCsr.writeMemoryParams.latestChunkedMode);
EXPECT_EQ(sizeof(uint64_t), ultCsr.writeMemoryParams.latestChunkSize);
EXPECT_EQ(event->getSinglePacketSize(), ultCsr.writeMemoryParams.latestChunkSize);
EXPECT_EQ(0u, ultCsr.writeMemoryParams.latestGpuVaChunkOffset);
EXPECT_TRUE(eventAllocation->isTbxWritable(expectedBanks));
@@ -3428,7 +3436,7 @@ HWTEST_F(EventTests, GivenCsrTbxModeWhenEventCreatedAndSignaledThenEventAllocati
EXPECT_EQ(3u, ultCsr.writeMemoryParams.callCount);
EXPECT_EQ(eventAllocation, ultCsr.writeMemoryParams.latestGfxAllocation);
EXPECT_TRUE(ultCsr.writeMemoryParams.latestChunkedMode);
EXPECT_EQ(sizeof(uint64_t), ultCsr.writeMemoryParams.latestChunkSize);
EXPECT_EQ(event->getSinglePacketSize(), ultCsr.writeMemoryParams.latestChunkSize);
EXPECT_EQ(0u, ultCsr.writeMemoryParams.latestGpuVaChunkOffset);
EXPECT_TRUE(eventAllocation->isTbxWritable(expectedBanks));

View File

@@ -888,11 +888,12 @@ HWTEST2_F(L0GfxCoreHelperTest, givenL0GfxCoreHelperOnGenPlatformsWhenGettingPlat
EXPECT_FALSE(l0GfxCoreHelper.platformSupportsImmediateComputeFlushTask());
}
HWTEST_F(L0GfxCoreHelperTest, whenAskingForUnifiedPostSyncAllocLayoutThenReturnFalse) {
HWTEST_F(L0GfxCoreHelperTest, whenAskingForUnifiedPostSyncAllocLayoutThenCheckImmWriteOffset) {
MockExecutionEnvironment executionEnvironment;
auto &l0GfxCoreHelper = executionEnvironment.rootDeviceEnvironments[0]->getHelper<L0GfxCoreHelper>();
EXPECT_FALSE(l0GfxCoreHelper.hasUnifiedPostSyncAllocationLayout());
EXPECT_EQ((NEO::ImplicitScalingDispatch<FamilyType>::getImmediateWritePostSyncOffset() == NEO::ImplicitScalingDispatch<FamilyType>::getTimeStampPostSyncOffset()),
l0GfxCoreHelper.hasUnifiedPostSyncAllocationLayout());
}
HWTEST_F(L0GfxCoreHelperTest, whenAskingForImmediateWritePostSyncOffsetThenReturnValueFromImplicitScalingHelper) {