mirror of
https://github.com/intel/compute-runtime.git
synced 2025-12-30 01:35:20 +08:00
Fix issues in signal all event packets 7/n
This fix is a refactor that improves few parts of the code - code is easier to analyze, read and maintain - dispatching process and common code is unified and reused - signal of all event packets is incorporated in shared code - number of post sync hw commands is optimized thanks to multi-tile post sync capabilities Related-To: NEO-7490 Signed-off-by: Zbigniew Zdanowicz <zbigniew.zdanowicz@intel.com>
This commit is contained in:
committed by
Compute-Runtime-Automation
parent
614928ed45
commit
8f2af28b11
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) 2020-2022 Intel Corporation
|
||||
* Copyright (C) 2020-2023 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
@@ -53,6 +53,12 @@ struct CmdListFillKernelArguments {
|
||||
uint32_t patternSizeInEls = 0;
|
||||
};
|
||||
|
||||
struct CmdListEventOperation {
|
||||
size_t operationOffset = 0;
|
||||
uint32_t operationCount = 0;
|
||||
bool workPartitionOperation = false;
|
||||
};
|
||||
|
||||
struct EventPool;
|
||||
struct Event;
|
||||
|
||||
@@ -294,8 +300,12 @@ struct CommandListCoreFamily : CommandListImp {
|
||||
compactL3FlushEvent(dcFlush);
|
||||
}
|
||||
void allocateKernelPrivateMemoryIfNeeded(Kernel *kernel, uint32_t sizePerHwThread);
|
||||
void setRemainingEventPackets(Event *event, uint32_t value);
|
||||
void waitOnRemainingEventPackets(Event *event);
|
||||
CmdListEventOperation estimateEventPostSync(Event *event, uint32_t operations);
|
||||
void dispatchPostSyncCopy(uint64_t gpuAddress, uint32_t value, bool workloadPartition);
|
||||
void dispatchPostSyncCompute(uint64_t gpuAddress, uint32_t value, bool workloadPartition);
|
||||
void dispatchPostSyncCommands(const CmdListEventOperation &eventOperations, uint64_t gpuAddress, uint32_t value);
|
||||
void dispatchEventPostSyncOperation(Event *event, uint32_t value, bool omitFirstOperation, bool useMax, bool useLastPipeControl);
|
||||
|
||||
size_t cmdListCurrentStartOffset = 0;
|
||||
bool containsAnyKernel = false;
|
||||
|
||||
@@ -380,10 +380,6 @@ template <GFXCORE_FAMILY gfxCoreFamily>
|
||||
ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendEventReset(ze_event_handle_t hEvent) {
|
||||
auto event = Event::fromHandle(hEvent);
|
||||
|
||||
uint64_t baseAddr = event->getGpuAddress(this->device);
|
||||
uint32_t packetsToReset = event->getPacketsInUse();
|
||||
bool appendPipeControlWithPostSync = false;
|
||||
|
||||
NEO::Device *neoDevice = device->getNEODevice();
|
||||
uint32_t callId = 0;
|
||||
if (NEO::DebugManager.flags.EnableSWTags.get()) {
|
||||
@@ -395,65 +391,17 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendEventReset(ze_event_hand
|
||||
callId = neoDevice->getRootDeviceEnvironment().tagsManager->currentCallCount;
|
||||
}
|
||||
|
||||
if (event->isUsingContextEndOffset()) {
|
||||
baseAddr += event->getContextEndOffset();
|
||||
}
|
||||
|
||||
if (event->isEventTimestampFlagSet()) {
|
||||
packetsToReset = event->getMaxPacketsCount();
|
||||
}
|
||||
event->resetPackets(false);
|
||||
event->disableHostCaching(this->cmdListType == CommandList::CommandListType::TYPE_REGULAR);
|
||||
commandContainer.addToResidencyContainer(&event->getAllocation(this->device));
|
||||
const auto &hwInfo = this->device->getHwInfo();
|
||||
if (isCopyOnly()) {
|
||||
NEO::MiFlushArgs args;
|
||||
args.commandWithPostSync = true;
|
||||
for (uint32_t i = 0u; i < packetsToReset; i++) {
|
||||
NEO::EncodeMiFlushDW<GfxFamily>::programMiFlushDw(*commandContainer.getCommandStream(),
|
||||
baseAddr,
|
||||
Event::STATE_CLEARED, args, hwInfo);
|
||||
baseAddr += event->getSinglePacketSize();
|
||||
}
|
||||
if ((this->signalAllEventPackets) && (packetsToReset < event->getMaxPacketsCount())) {
|
||||
setRemainingEventPackets(event, Event::STATE_CLEARED);
|
||||
}
|
||||
} else {
|
||||
bool applyScope = event->signalScope;
|
||||
uint32_t packetsToResetUsingSdi = packetsToReset;
|
||||
if (applyScope || event->isEventTimestampFlagSet()) {
|
||||
UNRECOVERABLE_IF(packetsToReset == 0);
|
||||
packetsToResetUsingSdi = packetsToReset - 1;
|
||||
appendPipeControlWithPostSync = true;
|
||||
}
|
||||
|
||||
for (uint32_t i = 0u; i < packetsToResetUsingSdi; i++) {
|
||||
NEO::EncodeStoreMemory<GfxFamily>::programStoreDataImm(
|
||||
*commandContainer.getCommandStream(),
|
||||
baseAddr,
|
||||
Event::STATE_CLEARED,
|
||||
0u,
|
||||
false,
|
||||
false);
|
||||
baseAddr += event->getSinglePacketSize();
|
||||
}
|
||||
// default state of event is single packet, handle case when reset is used 1st, launchkernel 2nd - just reset all packets then, use max
|
||||
bool useMaxPackets = event->isEventTimestampFlagSet() || (event->getPacketsInUse() < this->partitionCount);
|
||||
|
||||
if ((this->signalAllEventPackets) && (packetsToReset < event->getMaxPacketsCount())) {
|
||||
setRemainingEventPackets(event, Event::STATE_CLEARED);
|
||||
}
|
||||
|
||||
if (appendPipeControlWithPostSync) {
|
||||
NEO::PipeControlArgs args;
|
||||
args.dcFlushEnable = getDcFlushRequired(!!event->signalScope);
|
||||
NEO::MemorySynchronizationCommands<GfxFamily>::addBarrierWithPostSyncOperation(
|
||||
*commandContainer.getCommandStream(),
|
||||
NEO::PostSyncMode::ImmediateData,
|
||||
baseAddr,
|
||||
Event::STATE_CLEARED,
|
||||
hwInfo,
|
||||
args);
|
||||
}
|
||||
bool appendPipeControlWithPostSync = (!isCopyOnly()) && (!!event->signalScope || event->isEventTimestampFlagSet());
|
||||
dispatchEventPostSyncOperation(event, Event::STATE_CLEARED, false, useMaxPackets, appendPipeControlWithPostSync);
|
||||
|
||||
if (!isCopyOnly()) {
|
||||
if (this->partitionCount > 1) {
|
||||
appendMultiTileBarrier(*neoDevice);
|
||||
}
|
||||
@@ -1847,38 +1795,9 @@ void CommandListCoreFamily<gfxCoreFamily>::appendSignalEventPostWalker(Event *ev
|
||||
} else {
|
||||
event->resetKernelCountAndPacketUsedCount();
|
||||
commandContainer.addToResidencyContainer(&event->getAllocation(this->device));
|
||||
uint64_t baseAddr = event->getGpuAddress(this->device);
|
||||
if (event->isUsingContextEndOffset()) {
|
||||
baseAddr += event->getContextEndOffset();
|
||||
}
|
||||
|
||||
const auto &hwInfo = this->device->getHwInfo();
|
||||
if (isCopyOnly()) {
|
||||
NEO::MiFlushArgs args;
|
||||
args.commandWithPostSync = true;
|
||||
NEO::EncodeMiFlushDW<GfxFamily>::programMiFlushDw(*commandContainer.getCommandStream(), baseAddr, Event::STATE_SIGNALED,
|
||||
args, hwInfo);
|
||||
if (this->signalAllEventPackets && (event->getPacketsInUse() < event->getMaxPacketsCount())) {
|
||||
setRemainingEventPackets(event, Event::STATE_SIGNALED);
|
||||
}
|
||||
} else {
|
||||
NEO::PipeControlArgs args;
|
||||
args.dcFlushEnable = getDcFlushRequired(!!event->signalScope);
|
||||
if (this->partitionCount > 1) {
|
||||
args.workloadPartitionOffset = true;
|
||||
event->setPacketsInUse(this->partitionCount);
|
||||
}
|
||||
if (this->signalAllEventPackets && (event->getPacketsInUse() < event->getMaxPacketsCount())) {
|
||||
setRemainingEventPackets(event, Event::STATE_SIGNALED);
|
||||
}
|
||||
NEO::MemorySynchronizationCommands<GfxFamily>::addBarrierWithPostSyncOperation(
|
||||
*commandContainer.getCommandStream(),
|
||||
NEO::PostSyncMode::ImmediateData,
|
||||
baseAddr,
|
||||
Event::STATE_SIGNALED,
|
||||
hwInfo,
|
||||
args);
|
||||
}
|
||||
event->setPacketsInUse(this->partitionCount);
|
||||
dispatchEventPostSyncOperation(event, Event::STATE_SIGNALED, false, false, !isCopyOnly());
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1895,9 +1814,7 @@ void CommandListCoreFamily<gfxCoreFamily>::appendEventForProfilingCopyCommand(Ev
|
||||
NEO::MiFlushArgs args;
|
||||
const auto &hwInfo = this->device->getHwInfo();
|
||||
NEO::EncodeMiFlushDW<GfxFamily>::programMiFlushDw(*commandContainer.getCommandStream(), 0, 0, args, hwInfo);
|
||||
if (this->signalAllEventPackets && (event->getPacketsInUse() < event->getMaxPacketsCount())) {
|
||||
setRemainingEventPackets(event, Event::STATE_SIGNALED);
|
||||
}
|
||||
dispatchEventPostSyncOperation(event, Event::STATE_SIGNALED, true, false, false);
|
||||
}
|
||||
appendWriteKernelTimestamp(event, beforeWalker, false, false);
|
||||
}
|
||||
@@ -2017,7 +1934,6 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendSignalEvent(ze_event_han
|
||||
event->resetKernelCountAndPacketUsedCount();
|
||||
|
||||
commandContainer.addToResidencyContainer(&event->getAllocation(this->device));
|
||||
uint64_t baseAddr = event->getGpuAddress(this->device);
|
||||
NEO::Device *neoDevice = device->getNEODevice();
|
||||
uint32_t callId = 0;
|
||||
if (NEO::DebugManager.flags.EnableSWTags.get()) {
|
||||
@@ -2028,53 +1944,10 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendSignalEvent(ze_event_han
|
||||
++neoDevice->getRootDeviceEnvironment().tagsManager->currentCallCount);
|
||||
callId = neoDevice->getRootDeviceEnvironment().tagsManager->currentCallCount;
|
||||
}
|
||||
size_t eventSignalOffset = 0;
|
||||
|
||||
if (event->isUsingContextEndOffset()) {
|
||||
eventSignalOffset = event->getContextEndOffset();
|
||||
}
|
||||
|
||||
const auto &hwInfo = this->device->getHwInfo();
|
||||
if (isCopyOnly()) {
|
||||
NEO::MiFlushArgs args;
|
||||
args.commandWithPostSync = true;
|
||||
NEO::EncodeMiFlushDW<GfxFamily>::programMiFlushDw(*commandContainer.getCommandStream(), ptrOffset(baseAddr, eventSignalOffset),
|
||||
Event::STATE_SIGNALED, args, hwInfo);
|
||||
|
||||
if (this->signalAllEventPackets && (event->getPacketsInUse() < event->getMaxPacketsCount())) {
|
||||
setRemainingEventPackets(event, Event::STATE_SIGNALED);
|
||||
}
|
||||
} else {
|
||||
NEO::PipeControlArgs args;
|
||||
bool applyScope = !!event->signalScope;
|
||||
args.dcFlushEnable = getDcFlushRequired(applyScope);
|
||||
if (this->partitionCount > 1) {
|
||||
event->setPacketsInUse(this->partitionCount);
|
||||
args.workloadPartitionOffset = true;
|
||||
}
|
||||
|
||||
if (this->signalAllEventPackets && (event->getPacketsInUse() < event->getMaxPacketsCount())) {
|
||||
setRemainingEventPackets(event, Event::STATE_SIGNALED);
|
||||
}
|
||||
|
||||
if (applyScope || event->isEventTimestampFlagSet()) {
|
||||
NEO::MemorySynchronizationCommands<GfxFamily>::addBarrierWithPostSyncOperation(
|
||||
*commandContainer.getCommandStream(),
|
||||
NEO::PostSyncMode::ImmediateData,
|
||||
ptrOffset(baseAddr, eventSignalOffset),
|
||||
Event::STATE_SIGNALED,
|
||||
hwInfo,
|
||||
args);
|
||||
} else {
|
||||
NEO::EncodeStoreMemory<GfxFamily>::programStoreDataImm(
|
||||
*commandContainer.getCommandStream(),
|
||||
ptrOffset(baseAddr, eventSignalOffset),
|
||||
Event::STATE_SIGNALED,
|
||||
0u,
|
||||
false,
|
||||
args.workloadPartitionOffset);
|
||||
}
|
||||
}
|
||||
event->setPacketsInUse(this->partitionCount);
|
||||
bool appendPipeControlWithPostSync = (!isCopyOnly()) && (!!event->signalScope || event->isEventTimestampFlagSet());
|
||||
dispatchEventPostSyncOperation(event, Event::STATE_SIGNALED, false, false, appendPipeControlWithPostSync);
|
||||
|
||||
if (NEO::DebugManager.flags.EnableSWTags.get()) {
|
||||
neoDevice->getRootDeviceEnvironment().tagsManager->insertTag<GfxFamily, NEO::SWTags::CallNameEndTag>(
|
||||
@@ -2232,9 +2105,7 @@ void CommandListCoreFamily<gfxCoreFamily>::appendEventForProfiling(Event *event,
|
||||
bool workloadPartition = setupTimestampEventForMultiTile(event);
|
||||
appendWriteKernelTimestamp(event, beforeWalker, true, workloadPartition);
|
||||
} else {
|
||||
if (this->signalAllEventPackets && (event->getPacketsInUse() < event->getMaxPacketsCount())) {
|
||||
setRemainingEventPackets(event, Event::STATE_SIGNALED);
|
||||
}
|
||||
dispatchEventPostSyncOperation(event, Event::STATE_SIGNALED, true, false, false);
|
||||
|
||||
const auto &hwInfo = this->device->getHwInfo();
|
||||
NEO::PipeControlArgs args;
|
||||
@@ -2843,56 +2714,6 @@ void CommandListCoreFamily<gfxCoreFamily>::allocateKernelPrivateMemoryIfNeeded(K
|
||||
}
|
||||
}
|
||||
|
||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||
void CommandListCoreFamily<gfxCoreFamily>::setRemainingEventPackets(Event *event, uint32_t value) {
|
||||
uint32_t packetUsed = event->getPacketsInUse();
|
||||
uint32_t packetsRemaining = event->getMaxPacketsCount() - packetUsed;
|
||||
if (packetsRemaining == 0) {
|
||||
return;
|
||||
}
|
||||
|
||||
uint64_t gpuAddress = event->getGpuAddress(this->device);
|
||||
size_t packetSize = event->getSinglePacketSize();
|
||||
gpuAddress += packetSize * packetUsed;
|
||||
if (event->isUsingContextEndOffset()) {
|
||||
gpuAddress += event->getContextEndOffset();
|
||||
}
|
||||
|
||||
uint32_t operationsRemaining = packetsRemaining;
|
||||
size_t operationOffset = packetSize;
|
||||
bool partitionEnabled = false;
|
||||
|
||||
if ((this->partitionCount > 1) && (packetsRemaining % this->partitionCount == 0)) {
|
||||
operationsRemaining = operationsRemaining / this->partitionCount;
|
||||
operationOffset = operationOffset * this->partitionCount;
|
||||
partitionEnabled = true;
|
||||
}
|
||||
|
||||
for (uint32_t i = 0; i < operationsRemaining; i++) {
|
||||
if (isCopyOnly()) {
|
||||
const auto &hwInfo = this->device->getHwInfo();
|
||||
NEO::MiFlushArgs args;
|
||||
args.commandWithPostSync = true;
|
||||
NEO::EncodeMiFlushDW<GfxFamily>::programMiFlushDw(
|
||||
*commandContainer.getCommandStream(),
|
||||
gpuAddress,
|
||||
value,
|
||||
args,
|
||||
hwInfo);
|
||||
} else {
|
||||
NEO::EncodeStoreMemory<GfxFamily>::programStoreDataImm(
|
||||
*commandContainer.getCommandStream(),
|
||||
gpuAddress,
|
||||
value,
|
||||
0u,
|
||||
false,
|
||||
partitionEnabled);
|
||||
}
|
||||
|
||||
gpuAddress += operationOffset;
|
||||
}
|
||||
}
|
||||
|
||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||
void CommandListCoreFamily<gfxCoreFamily>::waitOnRemainingEventPackets(Event *event) {
|
||||
using COMPARE_OPERATION = typename GfxFamily::MI_SEMAPHORE_WAIT::COMPARE_OPERATION;
|
||||
@@ -2919,4 +2740,97 @@ void CommandListCoreFamily<gfxCoreFamily>::waitOnRemainingEventPackets(Event *ev
|
||||
}
|
||||
}
|
||||
|
||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||
CmdListEventOperation CommandListCoreFamily<gfxCoreFamily>::estimateEventPostSync(Event *event, uint32_t operations) {
|
||||
CmdListEventOperation ret;
|
||||
|
||||
UNRECOVERABLE_IF(operations & (this->partitionCount - 1));
|
||||
|
||||
ret.operationCount = operations / this->partitionCount;
|
||||
ret.operationOffset = event->getSinglePacketSize() * this->partitionCount;
|
||||
ret.workPartitionOperation = this->partitionCount > 1;
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||
void CommandListCoreFamily<gfxCoreFamily>::dispatchPostSyncCopy(uint64_t gpuAddress, uint32_t value, bool workloadPartition) {
|
||||
const auto &hwInfo = this->device->getHwInfo();
|
||||
|
||||
NEO::MiFlushArgs miFlushArgs;
|
||||
miFlushArgs.commandWithPostSync = true;
|
||||
NEO::EncodeMiFlushDW<GfxFamily>::programMiFlushDw(
|
||||
*commandContainer.getCommandStream(),
|
||||
gpuAddress,
|
||||
value,
|
||||
miFlushArgs,
|
||||
hwInfo);
|
||||
}
|
||||
|
||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||
void CommandListCoreFamily<gfxCoreFamily>::dispatchPostSyncCompute(uint64_t gpuAddress, uint32_t value, bool workloadPartition) {
|
||||
NEO::EncodeStoreMemory<GfxFamily>::programStoreDataImm(
|
||||
*commandContainer.getCommandStream(),
|
||||
gpuAddress,
|
||||
value,
|
||||
0u,
|
||||
false,
|
||||
workloadPartition);
|
||||
}
|
||||
|
||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||
void CommandListCoreFamily<gfxCoreFamily>::dispatchPostSyncCommands(const CmdListEventOperation &eventOperations, uint64_t gpuAddress, uint32_t value) {
|
||||
decltype(&CommandListCoreFamily<gfxCoreFamily>::dispatchPostSyncCompute) dispatchFunction = &CommandListCoreFamily<gfxCoreFamily>::dispatchPostSyncCompute;
|
||||
if (isCopyOnly()) {
|
||||
dispatchFunction = &CommandListCoreFamily<gfxCoreFamily>::dispatchPostSyncCopy;
|
||||
}
|
||||
|
||||
for (uint32_t i = 0; i < eventOperations.operationCount; i++) {
|
||||
(this->*dispatchFunction)(gpuAddress, value, eventOperations.workPartitionOperation);
|
||||
|
||||
gpuAddress += eventOperations.operationOffset;
|
||||
}
|
||||
}
|
||||
|
||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||
void CommandListCoreFamily<gfxCoreFamily>::dispatchEventPostSyncOperation(Event *event, uint32_t value, bool omitFirstOperation, bool useMax, bool useLastPipeControl) {
|
||||
uint32_t packets = event->getPacketsInUse();
|
||||
if (this->signalAllEventPackets || useMax) {
|
||||
packets = event->getMaxPacketsCount();
|
||||
}
|
||||
auto eventPostSync = estimateEventPostSync(event, packets);
|
||||
|
||||
uint64_t gpuAddress = event->getGpuAddress(this->device);
|
||||
if (event->isUsingContextEndOffset()) {
|
||||
gpuAddress += event->getContextEndOffset();
|
||||
}
|
||||
if (omitFirstOperation) {
|
||||
gpuAddress += eventPostSync.operationOffset;
|
||||
eventPostSync.operationCount--;
|
||||
}
|
||||
if (useLastPipeControl) {
|
||||
eventPostSync.operationCount--;
|
||||
}
|
||||
|
||||
dispatchPostSyncCommands(eventPostSync, gpuAddress, value);
|
||||
|
||||
if (useLastPipeControl) {
|
||||
const auto &hwInfo = this->device->getHwInfo();
|
||||
|
||||
NEO::PipeControlArgs pipeControlArgs;
|
||||
pipeControlArgs.dcFlushEnable = getDcFlushRequired(!!event->signalScope);
|
||||
pipeControlArgs.workloadPartitionOffset = eventPostSync.workPartitionOperation;
|
||||
|
||||
gpuAddress += eventPostSync.operationCount * eventPostSync.operationOffset;
|
||||
|
||||
NEO::MemorySynchronizationCommands<GfxFamily>::addBarrierWithPostSyncOperation(
|
||||
*commandContainer.getCommandStream(),
|
||||
NEO::PostSyncMode::ImmediateData,
|
||||
gpuAddress,
|
||||
value,
|
||||
hwInfo,
|
||||
pipeControlArgs);
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace L0
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) 2021-2022 Intel Corporation
|
||||
* Copyright (C) 2021-2023 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
@@ -294,8 +294,16 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernelWithParams(K
|
||||
if (l3FlushEnable) {
|
||||
programEventL3Flush<gfxCoreFamily>(event, this->device, partitionCount, commandContainer);
|
||||
}
|
||||
if (this->signalAllEventPackets) {
|
||||
setRemainingEventPackets(event, Event::STATE_SIGNALED);
|
||||
if (this->signalAllEventPackets && event->getPacketsInUse() < event->getMaxPacketsCount()) {
|
||||
uint32_t packets = event->getMaxPacketsCount() - event->getPacketsInUse();
|
||||
CmdListEventOperation remainingPacketsOperation = estimateEventPostSync(event, packets);
|
||||
|
||||
uint64_t eventAddress = event->getGpuAddress(device) + event->getSinglePacketSize() * event->getPacketsInUse();
|
||||
if (event->isUsingContextEndOffset()) {
|
||||
eventAddress += event->getContextEndOffset();
|
||||
}
|
||||
|
||||
dispatchPostSyncCommands(remainingPacketsOperation, eventAddress, Event::STATE_SIGNALED);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) 2020-2022 Intel Corporation
|
||||
* Copyright (C) 2020-2023 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
@@ -71,7 +71,6 @@ struct WhiteBox<::L0::CommandListCoreFamily<gfxCoreFamily>>
|
||||
using BaseClass::pipeControlMultiKernelEventSync;
|
||||
using BaseClass::pipelineSelectStateTracking;
|
||||
using BaseClass::requiredStreamState;
|
||||
using BaseClass::setRemainingEventPackets;
|
||||
using BaseClass::setupTimestampEventForMultiTile;
|
||||
using BaseClass::signalAllEventPackets;
|
||||
using BaseClass::stateComputeModeTracking;
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) 2020-2022 Intel Corporation
|
||||
* Copyright (C) 2020-2023 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
@@ -407,7 +407,6 @@ HWTEST2_F(CommandListAppendUsedPacketSignalEvent,
|
||||
auto &hwInfo = device->getNEODevice()->getHardwareInfo();
|
||||
|
||||
size_t expectedSize = NEO::MemorySynchronizationCommands<GfxFamily>::getSizeForBarrierWithPostSyncOperation(hwInfo, false) +
|
||||
((packets - 1) * sizeof(MI_STORE_DATA_IMM)) +
|
||||
commandList->estimateBufferSizeMultiTileBarrier(hwInfo);
|
||||
size_t usedSize = cmdStream->getUsed();
|
||||
EXPECT_EQ(expectedSize, usedSize);
|
||||
@@ -418,10 +417,9 @@ HWTEST2_F(CommandListAppendUsedPacketSignalEvent,
|
||||
cmdStream->getCpuBase(),
|
||||
usedSize));
|
||||
|
||||
auto itorSdi = find<MI_STORE_DATA_IMM *>(cmdList.begin(), cmdList.end());
|
||||
auto cmd = genCmdCast<MI_STORE_DATA_IMM *>(*itorSdi);
|
||||
EXPECT_EQ(gpuAddress, cmd->getAddress());
|
||||
gpuAddress += event->getSinglePacketSize();
|
||||
auto itorSdi = findAll<MI_STORE_DATA_IMM *>(cmdList.begin(), cmdList.end());
|
||||
// multi tile barrier self-cleanup commands
|
||||
ASSERT_EQ(2u, itorSdi.size());
|
||||
|
||||
auto pipeControlList = findAll<PIPE_CONTROL *>(cmdList.begin(), cmdList.end());
|
||||
ASSERT_NE(0u, pipeControlList.size());
|
||||
@@ -434,8 +432,9 @@ HWTEST2_F(CommandListAppendUsedPacketSignalEvent,
|
||||
EXPECT_TRUE(cmd->getCommandStreamerStallEnable());
|
||||
EXPECT_EQ(gpuAddress, NEO::UnitTestHelper<FamilyType>::getPipeControlPostSyncAddress(*cmd));
|
||||
EXPECT_EQ(MemorySynchronizationCommands<FamilyType>::getDcFlushEnable(true, *defaultHwInfo), cmd->getDcFlushEnable());
|
||||
EXPECT_TRUE(cmd->getWorkloadPartitionIdOffsetEnable());
|
||||
postSyncFound++;
|
||||
gpuAddress += event->getSinglePacketSize();
|
||||
gpuAddress += event->getSinglePacketSize() * commandList->partitionCount;
|
||||
postSyncPipeControlItor = it;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) 2021-2022 Intel Corporation
|
||||
* Copyright (C) 2021-2023 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
@@ -553,76 +553,6 @@ HWTEST2_F(CommandListAppendLaunchKernelMultiTileCompactL3FlushEnabledTest,
|
||||
testAppendLaunchKernelAndL3Flush<gfxCoreFamily>(input, arg);
|
||||
}
|
||||
|
||||
HWTEST2_F(CommandListTests, GivenCopyCommandListWhenSettingRemainingEventPacketsThenExpectMiDwordFlushCommandsProgrammingPackets, IsAtLeastXeHpCore) {
|
||||
using MI_FLUSH_DW = typename FamilyType::MI_FLUSH_DW;
|
||||
|
||||
DebugManagerStateRestore restorer;
|
||||
|
||||
NEO::DebugManager.flags.UseDynamicEventPacketsCount.set(1);
|
||||
NEO::DebugManager.flags.SignalAllEventPackets.set(1);
|
||||
NEO::DebugManager.flags.UsePipeControlMultiKernelEventSync.set(0);
|
||||
NEO::DebugManager.flags.CompactL3FlushEventPacket.set(0);
|
||||
|
||||
auto commandList = std::make_unique<WhiteBox<::L0::CommandListCoreFamily<gfxCoreFamily>>>();
|
||||
auto result = commandList->initialize(device, NEO::EngineGroupType::Copy, 0u);
|
||||
ASSERT_EQ(ZE_RESULT_SUCCESS, result);
|
||||
|
||||
auto cmdStream = commandList->commandContainer.getCommandStream();
|
||||
|
||||
ze_event_pool_desc_t eventPoolDesc = {};
|
||||
eventPoolDesc.count = 1;
|
||||
eventPoolDesc.flags = 0;
|
||||
|
||||
ze_event_desc_t eventDesc = {};
|
||||
eventDesc.index = 0;
|
||||
eventDesc.signal = 0;
|
||||
|
||||
auto eventPool = std::unique_ptr<L0::EventPool>(L0::EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, result));
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
|
||||
auto event = std::unique_ptr<L0::Event>(L0::Event::create<uint32_t>(eventPool.get(), &eventDesc, device));
|
||||
ASSERT_NE(nullptr, event.get());
|
||||
|
||||
uint32_t packetUsed = event->getPacketsInUse();
|
||||
uint32_t remainingPackets = event->getMaxPacketsCount() - packetUsed;
|
||||
|
||||
size_t sizeBefore = cmdStream->getUsed();
|
||||
commandList->setRemainingEventPackets(event.get(), Event::STATE_SIGNALED);
|
||||
size_t sizeAfter = cmdStream->getUsed();
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
|
||||
|
||||
GenCmdList cmdList;
|
||||
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(
|
||||
cmdList,
|
||||
ptrOffset(cmdStream->getCpuBase(), sizeBefore),
|
||||
(sizeAfter - sizeBefore)));
|
||||
|
||||
uint32_t expectedMiFlushCount = remainingPackets;
|
||||
if (NEO::EncodeMiFlushDW<FamilyType>::getMiFlushDwWaSize() > 0) {
|
||||
expectedMiFlushCount *= 2;
|
||||
}
|
||||
|
||||
auto miFlushList = findAll<MI_FLUSH_DW *>(cmdList.begin(), cmdList.end());
|
||||
EXPECT_EQ(expectedMiFlushCount, static_cast<uint32_t>(miFlushList.size()));
|
||||
|
||||
uint64_t gpuAddress = event->getGpuAddress(device);
|
||||
gpuAddress += (packetUsed * event->getSinglePacketSize());
|
||||
if (event->isUsingContextEndOffset()) {
|
||||
gpuAddress += event->getContextEndOffset();
|
||||
}
|
||||
|
||||
for (uint32_t i = 0; i < expectedMiFlushCount; i++) {
|
||||
if ((expectedMiFlushCount == 2 * remainingPackets) && (i % 2 == 0)) {
|
||||
continue;
|
||||
}
|
||||
auto cmd = genCmdCast<MI_FLUSH_DW *>(*miFlushList[i]);
|
||||
EXPECT_EQ(gpuAddress, cmd->getDestinationAddress());
|
||||
EXPECT_EQ(Event::STATE_SIGNALED, cmd->getImmediateData());
|
||||
EXPECT_EQ(MI_FLUSH_DW::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA_QWORD, cmd->getPostSyncOperation());
|
||||
|
||||
gpuAddress += event->getSinglePacketSize();
|
||||
}
|
||||
}
|
||||
|
||||
template <uint32_t multiTile, uint32_t limitEventPacketes, uint32_t copyOnly>
|
||||
struct CommandListSignalAllEventPacketFixture : public ModuleFixture {
|
||||
void setUp() {
|
||||
@@ -719,7 +649,8 @@ struct CommandListSignalAllEventPacketFixture : public ModuleFixture {
|
||||
gpuAddress += event->getContextEndOffset();
|
||||
}
|
||||
|
||||
for (uint32_t i = extraCleanupStoreDataImm; i < itorStoreDataImm.size(); i++) {
|
||||
uint32_t startIndex = extraCleanupStoreDataImm;
|
||||
for (uint32_t i = startIndex; i < remainingPackets + extraCleanupStoreDataImm; i++) {
|
||||
auto cmd = genCmdCast<MI_STORE_DATA_IMM *>(*itorStoreDataImm[i]);
|
||||
EXPECT_EQ(gpuAddress, cmd->getAddress());
|
||||
EXPECT_FALSE(cmd->getStoreQword());
|
||||
@@ -739,6 +670,8 @@ struct CommandListSignalAllEventPacketFixture : public ModuleFixture {
|
||||
using FamilyType = typename NEO::GfxFamilyMapper<gfxCoreFamily>::GfxFamily;
|
||||
using MI_STORE_DATA_IMM = typename FamilyType::MI_STORE_DATA_IMM;
|
||||
using MI_FLUSH_DW = typename FamilyType::MI_FLUSH_DW;
|
||||
using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL;
|
||||
using POST_SYNC_OPERATION = typename FamilyType::PIPE_CONTROL::POST_SYNC_OPERATION;
|
||||
|
||||
auto commandList = std::make_unique<WhiteBox<::L0::CommandListCoreFamily<gfxCoreFamily>>>();
|
||||
auto engineType = copyOnly == 1 ? NEO::EngineGroupType::Copy : NEO::EngineGroupType::Compute;
|
||||
@@ -802,6 +735,7 @@ struct CommandListSignalAllEventPacketFixture : public ModuleFixture {
|
||||
|
||||
} else {
|
||||
auto itorStoreDataImm = findAll<MI_STORE_DATA_IMM *>(cmdList.begin(), cmdList.end());
|
||||
auto itorPipeControl = findAll<PIPE_CONTROL *>(cmdList.begin(), cmdList.end());
|
||||
|
||||
uint64_t gpuAddress = event->getGpuAddress(device);
|
||||
if (event->isUsingContextEndOffset()) {
|
||||
@@ -825,29 +759,34 @@ struct CommandListSignalAllEventPacketFixture : public ModuleFixture {
|
||||
} else {
|
||||
EXPECT_FALSE(cmd->getWorkloadPartitionIdOffsetEnable());
|
||||
}
|
||||
} else {
|
||||
uint32_t postSyncPipeControls = 0;
|
||||
for (auto it : itorPipeControl) {
|
||||
auto cmd = genCmdCast<PIPE_CONTROL *>(*it);
|
||||
if (cmd->getPostSyncOperation() == POST_SYNC_OPERATION::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA) {
|
||||
postSyncPipeControls++;
|
||||
EXPECT_EQ(gpuAddress, NEO::UnitTestHelper<FamilyType>::getPipeControlPostSyncAddress(*cmd));
|
||||
EXPECT_EQ(Event::STATE_SIGNALED, cmd->getImmediateData());
|
||||
if constexpr (multiTile == 1) {
|
||||
EXPECT_TRUE(cmd->getWorkloadPartitionIdOffsetEnable());
|
||||
} else {
|
||||
EXPECT_FALSE(cmd->getWorkloadPartitionIdOffsetEnable());
|
||||
}
|
||||
}
|
||||
}
|
||||
EXPECT_EQ(1u, postSyncPipeControls);
|
||||
}
|
||||
} else {
|
||||
uint32_t packetUsed = event->getPacketsInUse();
|
||||
uint32_t remainingPackets = event->getMaxPacketsCount() - packetUsed;
|
||||
EXPECT_EQ(0u, remainingPackets % commandList->partitionCount);
|
||||
remainingPackets /= commandList->partitionCount;
|
||||
ASSERT_EQ(remainingPackets + extraSignalStoreDataImm, static_cast<uint32_t>(itorStoreDataImm.size()));
|
||||
|
||||
if (extraSignalStoreDataImm == 1) {
|
||||
auto cmd = genCmdCast<MI_STORE_DATA_IMM *>(*itorStoreDataImm[itorStoreDataImm.size() - 1]);
|
||||
EXPECT_EQ(gpuAddress, cmd->getAddress());
|
||||
EXPECT_FALSE(cmd->getStoreQword());
|
||||
EXPECT_EQ(Event::STATE_SIGNALED, cmd->getDataDword0());
|
||||
if constexpr (multiTile == 1) {
|
||||
EXPECT_TRUE(cmd->getWorkloadPartitionIdOffsetEnable());
|
||||
} else {
|
||||
EXPECT_FALSE(cmd->getWorkloadPartitionIdOffsetEnable());
|
||||
}
|
||||
uint32_t packets = event->getMaxPacketsCount();
|
||||
EXPECT_EQ(0u, packets % commandList->partitionCount);
|
||||
packets /= commandList->partitionCount;
|
||||
if (extraSignalStoreDataImm == 0) {
|
||||
packets--;
|
||||
}
|
||||
|
||||
gpuAddress += (packetUsed * event->getSinglePacketSize());
|
||||
ASSERT_EQ(packets, static_cast<uint32_t>(itorStoreDataImm.size()));
|
||||
|
||||
for (uint32_t i = 0; i < itorStoreDataImm.size() - extraSignalStoreDataImm; i++) {
|
||||
for (uint32_t i = 0; i < packets; i++) {
|
||||
auto cmd = genCmdCast<MI_STORE_DATA_IMM *>(*itorStoreDataImm[i]);
|
||||
EXPECT_EQ(gpuAddress, cmd->getAddress());
|
||||
EXPECT_FALSE(cmd->getStoreQword());
|
||||
@@ -859,6 +798,24 @@ struct CommandListSignalAllEventPacketFixture : public ModuleFixture {
|
||||
}
|
||||
gpuAddress += (event->getSinglePacketSize() * commandList->partitionCount);
|
||||
}
|
||||
|
||||
if (extraSignalStoreDataImm == 0) {
|
||||
uint32_t postSyncPipeControls = 0;
|
||||
for (auto it : itorPipeControl) {
|
||||
auto cmd = genCmdCast<PIPE_CONTROL *>(*it);
|
||||
if (cmd->getPostSyncOperation() == POST_SYNC_OPERATION::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA) {
|
||||
postSyncPipeControls++;
|
||||
EXPECT_EQ(gpuAddress, NEO::UnitTestHelper<FamilyType>::getPipeControlPostSyncAddress(*cmd));
|
||||
EXPECT_EQ(Event::STATE_SIGNALED, cmd->getImmediateData());
|
||||
if constexpr (multiTile == 1) {
|
||||
EXPECT_TRUE(cmd->getWorkloadPartitionIdOffsetEnable());
|
||||
} else {
|
||||
EXPECT_FALSE(cmd->getWorkloadPartitionIdOffsetEnable());
|
||||
}
|
||||
}
|
||||
}
|
||||
EXPECT_EQ(1u, postSyncPipeControls);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -938,6 +895,7 @@ struct CommandListSignalAllEventPacketFixture : public ModuleFixture {
|
||||
using MI_STORE_DATA_IMM = typename FamilyType::MI_STORE_DATA_IMM;
|
||||
using MI_FLUSH_DW = typename FamilyType::MI_FLUSH_DW;
|
||||
using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL;
|
||||
using POST_SYNC_OPERATION = typename FamilyType::PIPE_CONTROL::POST_SYNC_OPERATION;
|
||||
|
||||
auto commandList = std::make_unique<WhiteBox<::L0::CommandListCoreFamily<gfxCoreFamily>>>();
|
||||
auto engineType = copyOnly == 1 ? NEO::EngineGroupType::Copy : NEO::EngineGroupType::Compute;
|
||||
@@ -971,6 +929,11 @@ struct CommandListSignalAllEventPacketFixture : public ModuleFixture {
|
||||
ptrOffset(cmdStream->getCpuBase(), sizeBefore),
|
||||
(sizeAfter - sizeBefore)));
|
||||
|
||||
uint64_t gpuAddress = event->getGpuAddress(device);
|
||||
if (event->isUsingContextEndOffset()) {
|
||||
gpuAddress += event->getContextEndOffset();
|
||||
}
|
||||
|
||||
if constexpr (copyOnly == 1) {
|
||||
auto itorFlushDw = findAll<MI_FLUSH_DW *>(cmdList.begin(), cmdList.end());
|
||||
|
||||
@@ -983,11 +946,6 @@ struct CommandListSignalAllEventPacketFixture : public ModuleFixture {
|
||||
expectedFlushDw *= flushCmdWaFactor;
|
||||
ASSERT_EQ(expectedFlushDw, itorFlushDw.size());
|
||||
|
||||
uint64_t gpuAddress = event->getGpuAddress(device);
|
||||
if (event->isUsingContextEndOffset()) {
|
||||
gpuAddress += event->getContextEndOffset();
|
||||
}
|
||||
|
||||
uint32_t startingSignalCmd = 0;
|
||||
if (eventPoolFlags != 0) {
|
||||
auto cmd = genCmdCast<MI_FLUSH_DW *>(*itorFlushDw[(flushCmdWaFactor - 1)]);
|
||||
@@ -1013,23 +971,46 @@ struct CommandListSignalAllEventPacketFixture : public ModuleFixture {
|
||||
|
||||
} else {
|
||||
auto itorStoreDataImm = findAll<MI_STORE_DATA_IMM *>(cmdList.begin(), cmdList.end());
|
||||
auto itorPipeControl = findAll<PIPE_CONTROL *>(cmdList.begin(), cmdList.end());
|
||||
|
||||
uint32_t expectedPostSyncPipeControls = 0;
|
||||
if (eventPoolFlags == 0) {
|
||||
expectedPostSyncPipeControls = 1;
|
||||
}
|
||||
|
||||
if constexpr (limitEventPacketes == 1) {
|
||||
constexpr uint32_t expectedStoreDataImm = 0;
|
||||
ASSERT_EQ(expectedStoreDataImm, itorStoreDataImm.size());
|
||||
} else {
|
||||
uint32_t packetUsed = event->getPacketsInUse();
|
||||
uint32_t remainingPackets = event->getMaxPacketsCount() - packetUsed;
|
||||
remainingPackets /= commandList->partitionCount;
|
||||
ASSERT_EQ(remainingPackets, static_cast<uint32_t>(itorStoreDataImm.size()));
|
||||
|
||||
uint64_t gpuAddress = event->getGpuAddress(device);
|
||||
gpuAddress += (packetUsed * event->getSinglePacketSize());
|
||||
if (event->isUsingContextEndOffset()) {
|
||||
gpuAddress += event->getContextEndOffset();
|
||||
uint32_t postSyncPipeControls = 0;
|
||||
for (auto it : itorPipeControl) {
|
||||
auto cmd = genCmdCast<PIPE_CONTROL *>(*it);
|
||||
if (cmd->getPostSyncOperation() == POST_SYNC_OPERATION::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA) {
|
||||
postSyncPipeControls++;
|
||||
EXPECT_EQ(gpuAddress, NEO::UnitTestHelper<FamilyType>::getPipeControlPostSyncAddress(*cmd));
|
||||
EXPECT_EQ(Event::STATE_SIGNALED, cmd->getImmediateData());
|
||||
if constexpr (multiTile == 1) {
|
||||
EXPECT_TRUE(cmd->getWorkloadPartitionIdOffsetEnable());
|
||||
} else {
|
||||
EXPECT_FALSE(cmd->getWorkloadPartitionIdOffsetEnable());
|
||||
}
|
||||
}
|
||||
}
|
||||
EXPECT_EQ(expectedPostSyncPipeControls, postSyncPipeControls);
|
||||
|
||||
} else {
|
||||
uint32_t packets = event->getMaxPacketsCount();
|
||||
EXPECT_EQ(0u, packets % commandList->partitionCount);
|
||||
packets /= commandList->partitionCount;
|
||||
packets--;
|
||||
|
||||
ASSERT_EQ(packets, static_cast<uint32_t>(itorStoreDataImm.size()));
|
||||
|
||||
if (eventPoolFlags != 0) {
|
||||
gpuAddress += (event->getSinglePacketSize() * commandList->partitionCount);
|
||||
}
|
||||
|
||||
for (uint32_t i = 0; i < remainingPackets; i++) {
|
||||
for (uint32_t i = 0; i < packets; i++) {
|
||||
auto cmd = genCmdCast<MI_STORE_DATA_IMM *>(*itorStoreDataImm[i]);
|
||||
EXPECT_EQ(gpuAddress, cmd->getAddress());
|
||||
EXPECT_FALSE(cmd->getStoreQword());
|
||||
@@ -1041,12 +1022,22 @@ struct CommandListSignalAllEventPacketFixture : public ModuleFixture {
|
||||
}
|
||||
gpuAddress += (event->getSinglePacketSize() * commandList->partitionCount);
|
||||
}
|
||||
if (remainingPackets > 0) {
|
||||
auto lastIterator = itorStoreDataImm[itorStoreDataImm.size() - 1];
|
||||
++lastIterator;
|
||||
auto cmd = genCmdCast<PIPE_CONTROL *>(*lastIterator);
|
||||
EXPECT_NE(nullptr, cmd);
|
||||
|
||||
uint32_t postSyncPipeControls = 0;
|
||||
for (auto it : itorPipeControl) {
|
||||
auto cmd = genCmdCast<PIPE_CONTROL *>(*it);
|
||||
if (cmd->getPostSyncOperation() == POST_SYNC_OPERATION::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA) {
|
||||
postSyncPipeControls++;
|
||||
EXPECT_EQ(gpuAddress, NEO::UnitTestHelper<FamilyType>::getPipeControlPostSyncAddress(*cmd));
|
||||
EXPECT_EQ(Event::STATE_SIGNALED, cmd->getImmediateData());
|
||||
if constexpr (multiTile == 1) {
|
||||
EXPECT_TRUE(cmd->getWorkloadPartitionIdOffsetEnable());
|
||||
} else {
|
||||
EXPECT_FALSE(cmd->getWorkloadPartitionIdOffsetEnable());
|
||||
}
|
||||
}
|
||||
}
|
||||
EXPECT_EQ(expectedPostSyncPipeControls, postSyncPipeControls);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1116,6 +1107,8 @@ struct CommandListSignalAllEventPacketFixture : public ModuleFixture {
|
||||
using FamilyType = typename NEO::GfxFamilyMapper<gfxCoreFamily>::GfxFamily;
|
||||
using MI_STORE_DATA_IMM = typename FamilyType::MI_STORE_DATA_IMM;
|
||||
using MI_FLUSH_DW = typename FamilyType::MI_FLUSH_DW;
|
||||
using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL;
|
||||
using POST_SYNC_OPERATION = typename FamilyType::PIPE_CONTROL::POST_SYNC_OPERATION;
|
||||
|
||||
auto commandList = std::make_unique<WhiteBox<::L0::CommandListCoreFamily<gfxCoreFamily>>>();
|
||||
auto engineType = copyOnly == 1 ? NEO::EngineGroupType::Copy : NEO::EngineGroupType::Compute;
|
||||
@@ -1146,6 +1139,11 @@ struct CommandListSignalAllEventPacketFixture : public ModuleFixture {
|
||||
size_t sizeAfter = cmdStream->getUsed();
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
|
||||
|
||||
uint64_t gpuAddress = event->getGpuAddress(device);
|
||||
if (event->isUsingContextEndOffset()) {
|
||||
gpuAddress += event->getContextEndOffset();
|
||||
}
|
||||
|
||||
GenCmdList cmdList;
|
||||
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(
|
||||
cmdList,
|
||||
@@ -1163,11 +1161,6 @@ struct CommandListSignalAllEventPacketFixture : public ModuleFixture {
|
||||
uint32_t expectedFlushDw = event->getMaxPacketsCount() * flushCmdWaFactor;
|
||||
ASSERT_EQ(expectedFlushDw, itorFlushDw.size());
|
||||
|
||||
uint64_t gpuAddress = event->getGpuAddress(device);
|
||||
if (event->isUsingContextEndOffset()) {
|
||||
gpuAddress += event->getContextEndOffset();
|
||||
}
|
||||
|
||||
for (uint32_t i = 0; i < expectedFlushDw; i++) {
|
||||
auto cmd = genCmdCast<MI_FLUSH_DW *>(*itorFlushDw[i]);
|
||||
if (flushCmdWaFactor == 2) {
|
||||
@@ -1183,6 +1176,7 @@ struct CommandListSignalAllEventPacketFixture : public ModuleFixture {
|
||||
|
||||
} else {
|
||||
auto itorStoreDataImm = findAll<MI_STORE_DATA_IMM *>(cmdList.begin(), cmdList.end());
|
||||
auto itorPipeControl = findAll<PIPE_CONTROL *>(cmdList.begin(), cmdList.end());
|
||||
|
||||
uint32_t extraCleanupStoreDataImm = 0;
|
||||
if constexpr (multiTile == 1) {
|
||||
@@ -1190,70 +1184,49 @@ struct CommandListSignalAllEventPacketFixture : public ModuleFixture {
|
||||
extraCleanupStoreDataImm = 2;
|
||||
}
|
||||
|
||||
if constexpr (limitEventPacketes == 1) { // single packet for single tile, two packets for two tiles
|
||||
uint32_t expectedStoreDataImm = 0; // single packet will be reset by PC or SDI - assume here PC is used for timestamp event
|
||||
uint32_t expectedStoreDataImm = event->getMaxPacketsCount() / commandList->partitionCount;
|
||||
if constexpr (limitEventPacketes == 1) {
|
||||
// single packet will be reset by PC or SDI
|
||||
expectedStoreDataImm = 1;
|
||||
}
|
||||
|
||||
uint32_t expectedPostSyncPipeControls = 0;
|
||||
// last packet is reset by PIPE_CONTROL w/ post sync
|
||||
if (eventPoolFlags == ZE_EVENT_POOL_FLAG_KERNEL_TIMESTAMP) {
|
||||
expectedStoreDataImm--;
|
||||
expectedPostSyncPipeControls = 1;
|
||||
}
|
||||
|
||||
ASSERT_EQ(expectedStoreDataImm + extraCleanupStoreDataImm, static_cast<uint32_t>(itorStoreDataImm.size()));
|
||||
|
||||
for (uint32_t i = 0; i < expectedStoreDataImm; i++) {
|
||||
auto cmd = genCmdCast<MI_STORE_DATA_IMM *>(*itorStoreDataImm[i]);
|
||||
EXPECT_EQ(gpuAddress, cmd->getAddress());
|
||||
EXPECT_FALSE(cmd->getStoreQword());
|
||||
EXPECT_EQ(Event::STATE_CLEARED, cmd->getDataDword0());
|
||||
if constexpr (multiTile == 1) {
|
||||
expectedStoreDataImm = 1; // single SDI to reset second packet
|
||||
}
|
||||
if (eventPoolFlags == 0) {
|
||||
expectedStoreDataImm++; // but for immediate events, SDI is used instead PC, then add 1 here
|
||||
}
|
||||
ASSERT_EQ(expectedStoreDataImm + extraCleanupStoreDataImm, itorStoreDataImm.size());
|
||||
} else {
|
||||
// TS events reset uses getMaxPacketsCount(), no need to reset not used packets
|
||||
if (eventPoolFlags == ZE_EVENT_POOL_FLAG_KERNEL_TIMESTAMP) {
|
||||
uint64_t gpuAddress = event->getGpuAddress(device);
|
||||
gpuAddress += event->getContextEndOffset();
|
||||
// last packet is reset by PIPE_CONTROL w/ post sync
|
||||
uint32_t expectedStoreDataImm = event->getMaxPacketsCount() - 1;
|
||||
|
||||
ASSERT_EQ(expectedStoreDataImm + extraCleanupStoreDataImm, static_cast<uint32_t>(itorStoreDataImm.size()));
|
||||
for (uint32_t i = 0; i < expectedStoreDataImm; i++) {
|
||||
auto cmd = genCmdCast<MI_STORE_DATA_IMM *>(*itorStoreDataImm[i]);
|
||||
EXPECT_EQ(gpuAddress, cmd->getAddress());
|
||||
EXPECT_FALSE(cmd->getStoreQword());
|
||||
EXPECT_EQ(Event::STATE_CLEARED, cmd->getDataDword0());
|
||||
gpuAddress += event->getSinglePacketSize();
|
||||
}
|
||||
EXPECT_TRUE(cmd->getWorkloadPartitionIdOffsetEnable());
|
||||
} else {
|
||||
uint32_t packetUsed = event->getPacketsInUse();
|
||||
uint32_t remainingResetSdiCommands = event->getMaxPacketsCount() - packetUsed;
|
||||
EXPECT_FALSE(cmd->getWorkloadPartitionIdOffsetEnable());
|
||||
}
|
||||
|
||||
uint32_t packetOffsetFactor = 1;
|
||||
uint32_t usePacketSignalStoreDataImm = 1; // single SDI to reset single packet in single tile
|
||||
bool usePartitioningWrite = false;
|
||||
if (this->alignEventPacketsForReset) {
|
||||
remainingResetSdiCommands /= commandList->partitionCount;
|
||||
packetOffsetFactor = commandList->partitionCount;
|
||||
|
||||
if constexpr (multiTile == 1) {
|
||||
usePacketSignalStoreDataImm++; // and two SDI to reset two packets in multi tile
|
||||
usePartitioningWrite = true; // only when number of not used packets is aligned to partition count, multi-tile reset can be split to both tiles
|
||||
}
|
||||
}
|
||||
|
||||
ASSERT_EQ(remainingResetSdiCommands + usePacketSignalStoreDataImm + extraCleanupStoreDataImm, static_cast<uint32_t>(itorStoreDataImm.size()));
|
||||
|
||||
uint64_t gpuAddress = event->getGpuAddress(device);
|
||||
gpuAddress += (packetUsed * event->getSinglePacketSize());
|
||||
if (event->isUsingContextEndOffset()) {
|
||||
gpuAddress += event->getContextEndOffset();
|
||||
}
|
||||
|
||||
for (uint32_t i = usePacketSignalStoreDataImm; i < itorStoreDataImm.size() - extraCleanupStoreDataImm; i++) {
|
||||
auto cmd = genCmdCast<MI_STORE_DATA_IMM *>(*itorStoreDataImm[i]);
|
||||
EXPECT_EQ(gpuAddress, cmd->getAddress());
|
||||
EXPECT_FALSE(cmd->getStoreQword());
|
||||
EXPECT_EQ(Event::STATE_CLEARED, cmd->getDataDword0());
|
||||
if (usePartitioningWrite) {
|
||||
EXPECT_TRUE(cmd->getWorkloadPartitionIdOffsetEnable());
|
||||
} else {
|
||||
EXPECT_FALSE(cmd->getWorkloadPartitionIdOffsetEnable());
|
||||
}
|
||||
gpuAddress += (event->getSinglePacketSize() * packetOffsetFactor);
|
||||
gpuAddress += event->getSinglePacketSize() * commandList->partitionCount;
|
||||
}
|
||||
uint32_t postSyncPipeControls = 0;
|
||||
for (auto it : itorPipeControl) {
|
||||
auto cmd = genCmdCast<PIPE_CONTROL *>(*it);
|
||||
if (cmd->getPostSyncOperation() == POST_SYNC_OPERATION::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA) {
|
||||
postSyncPipeControls++;
|
||||
EXPECT_EQ(gpuAddress, NEO::UnitTestHelper<FamilyType>::getPipeControlPostSyncAddress(*cmd));
|
||||
EXPECT_EQ(Event::STATE_CLEARED, cmd->getImmediateData());
|
||||
if constexpr (multiTile == 1) {
|
||||
EXPECT_TRUE(cmd->getWorkloadPartitionIdOffsetEnable());
|
||||
} else {
|
||||
EXPECT_FALSE(cmd->getWorkloadPartitionIdOffsetEnable());
|
||||
}
|
||||
}
|
||||
}
|
||||
EXPECT_EQ(expectedPostSyncPipeControls, postSyncPipeControls);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user