Add multi tile support for event reset commands

Related-To: NEO-6262

Signed-off-by: Zbigniew Zdanowicz <zbigniew.zdanowicz@intel.com>
This commit is contained in:
Zbigniew Zdanowicz
2021-12-07 12:09:38 +00:00
committed by Compute-Runtime-Automation
parent b922f7ddb1
commit 882814751f
7 changed files with 148 additions and 21 deletions

View File

@@ -244,6 +244,8 @@ struct CommandListCoreFamily : CommandListImp {
void programThreadArbitrationPolicy(Device *device);
void appendComputeBarrierCommand();
NEO::PipeControlArgs createBarrierFlags();
void appendMultiTileBarrier(NEO::Device &neoDevice);
size_t estimateBufferSizeMultiTileBarrier(const NEO::HardwareInfo &hwInfo);
uint64_t getInputBufferSize(NEO::ImageType imageType, uint64_t bytesPerPixel, const ze_image_region_t *region);
MOCKABLE_VIRTUAL AlignedAllocationData getAlignedAllocation(Device *device, const void *buffer, uint64_t bufferSize, bool hostCopyAllowed);

View File

@@ -295,8 +295,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendEventReset(ze_event_hand
auto event = Event::fromHandle(hEvent);
uint64_t baseAddr = event->getGpuAddress(this->device);
uint32_t packetsToReset = 1;
uint32_t packetsToReset = event->getPacketsInUse();
NEO::Device *neoDevice = device->getNEODevice();
uint32_t callId = 0;
@@ -312,8 +311,8 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendEventReset(ze_event_hand
if (event->isEventTimestampFlagSet()) {
baseAddr += event->getContextEndOffset();
packetsToReset = EventPacketsCount::eventPackets;
event->resetPackets();
}
event->resetPackets();
commandContainer.addToResidencyContainer(&event->getAllocation(this->device));
if (isCopyOnly()) {
NEO::MiFlushArgs args;
@@ -324,11 +323,15 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendEventReset(ze_event_hand
} else {
NEO::PipeControlArgs args;
if (NEO::MemorySynchronizationCommands<GfxFamily>::isDcFlushAllowed()) {
args.dcFlushEnable = (!event->signalScope) ? false : true;
args.dcFlushEnable = !!event->signalScope;
}
auto &hwInfo = device->getNEODevice()->getHardwareInfo();
increaseCommandStreamSpace(NEO::MemorySynchronizationCommands<GfxFamily>::getSizeForPipeControlWithPostSyncOperation(hwInfo) * packetsToReset);
auto &hwInfo = neoDevice->getHardwareInfo();
size_t estimateSize = NEO::MemorySynchronizationCommands<GfxFamily>::getSizeForPipeControlWithPostSyncOperation(hwInfo) * packetsToReset;
if (this->partitionCount > 1) {
estimateSize += estimateBufferSizeMultiTileBarrier(hwInfo);
}
increaseCommandStreamSpace(estimateSize);
for (uint32_t i = 0u; i < packetsToReset; i++) {
NEO::MemorySynchronizationCommands<GfxFamily>::addPipeControlAndProgramPostSyncOperation(
@@ -340,6 +343,9 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendEventReset(ze_event_hand
args);
baseAddr += event->getSinglePacketSize();
}
if (this->partitionCount > 1) {
appendMultiTileBarrier(*neoDevice);
}
}
if (NEO::DebugManager.flags.EnableSWTags.get()) {

View File

@@ -191,9 +191,18 @@ void CommandListCoreFamily<gfxCoreFamily>::appendComputeBarrierCommand() {
}
template <GFXCORE_FAMILY gfxCoreFamily>
NEO::PipeControlArgs CommandListCoreFamily<gfxCoreFamily>::createBarrierFlags() {
inline NEO::PipeControlArgs CommandListCoreFamily<gfxCoreFamily>::createBarrierFlags() {
NEO::PipeControlArgs args;
return args;
}
template <GFXCORE_FAMILY gfxCoreFamily>
inline void CommandListCoreFamily<gfxCoreFamily>::appendMultiTileBarrier(NEO::Device &neoDevice) {
}
template <GFXCORE_FAMILY gfxCoreFamily>
inline size_t CommandListCoreFamily<gfxCoreFamily>::estimateBufferSizeMultiTileBarrier(const NEO::HardwareInfo &hwInfo) {
return 0;
}
} // namespace L0

View File

@@ -313,25 +313,14 @@ void CommandListCoreFamily<gfxCoreFamily>::appendMultiPartitionEpilogue() {
template <GFXCORE_FAMILY gfxCoreFamily>
void CommandListCoreFamily<gfxCoreFamily>::appendComputeBarrierCommand() {
NEO::PipeControlArgs args = createBarrierFlags();
if (this->partitionCount > 1) {
auto neoDevice = device->getNEODevice();
auto &hwInfo = neoDevice->getHardwareInfo();
size_t estimatedSizeRequired = NEO::ImplicitScalingDispatch<GfxFamily>::getBarrierSize(hwInfo,
true,
false);
increaseCommandStreamSpace(estimatedSizeRequired);
NEO::ImplicitScalingDispatch<GfxFamily>::dispatchBarrierCommands(*commandContainer.getCommandStream(),
neoDevice->getDeviceBitfield(),
args,
hwInfo,
0,
0,
true,
true);
increaseCommandStreamSpace(estimateBufferSizeMultiTileBarrier(hwInfo));
appendMultiTileBarrier(*neoDevice);
} else {
NEO::PipeControlArgs args = createBarrierFlags();
size_t estimatedSizeRequired = NEO::MemorySynchronizationCommands<GfxFamily>::getSizeForSinglePipeControl();
increaseCommandStreamSpace(estimatedSizeRequired);
NEO::MemorySynchronizationCommands<GfxFamily>::addPipeControl(*commandContainer.getCommandStream(), args);
@@ -345,4 +334,25 @@ NEO::PipeControlArgs CommandListCoreFamily<gfxCoreFamily>::createBarrierFlags()
return args;
}
template <GFXCORE_FAMILY gfxCoreFamily>
void CommandListCoreFamily<gfxCoreFamily>::appendMultiTileBarrier(NEO::Device &neoDevice) {
NEO::PipeControlArgs args = createBarrierFlags();
auto &hwInfo = neoDevice.getHardwareInfo();
NEO::ImplicitScalingDispatch<GfxFamily>::dispatchBarrierCommands(*commandContainer.getCommandStream(),
neoDevice.getDeviceBitfield(),
args,
hwInfo,
0,
0,
true,
true);
}
template <GFXCORE_FAMILY gfxCoreFamily>
inline size_t CommandListCoreFamily<gfxCoreFamily>::estimateBufferSizeMultiTileBarrier(const NEO::HardwareInfo &hwInfo) {
return NEO::ImplicitScalingDispatch<GfxFamily>::getBarrierSize(hwInfo,
true,
false);
}
} // namespace L0

View File

@@ -35,11 +35,13 @@ struct WhiteBox<::L0::CommandListCoreFamily<gfxCoreFamily>>
using BaseClass::appendLaunchKernelWithParams;
using BaseClass::appendMemoryCopyBlit;
using BaseClass::appendMemoryCopyBlitRegion;
using BaseClass::appendMultiTileBarrier;
using BaseClass::appendSignalEventPostWalker;
using BaseClass::appendWriteKernelTimestamp;
using BaseClass::applyMemoryRangesBarrier;
using BaseClass::clearCommandsToPatch;
using BaseClass::cmdQImmediate;
using BaseClass::commandContainer;
using BaseClass::commandListPerThreadScratchSize;
using BaseClass::commandListPreemptionMode;
using BaseClass::commandsToPatch;
@@ -47,6 +49,7 @@ struct WhiteBox<::L0::CommandListCoreFamily<gfxCoreFamily>>
using BaseClass::containsCooperativeKernelsFlag;
using BaseClass::csr;
using BaseClass::engineGroupType;
using BaseClass::estimateBufferSizeMultiTileBarrier;
using BaseClass::finalStreamState;
using BaseClass::flags;
using BaseClass::getAlignedAllocation;

View File

@@ -668,5 +668,26 @@ HWTEST2_F(HostPointerManagerCommandListTest, givenDebugModeToRegisterAllHostPoin
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
}
using SingleTileOnlyPlatforms = IsWithinGfxCore<IGFX_GEN9_CORE, IGFX_GEN12LP_CORE>;
HWTEST2_F(CommandListCreate, givenSingleTileOnlyPlatformsWhenProgrammingMultiTileBarrierThenNoProgrammingIsExpected, SingleTileOnlyPlatforms) {
using GfxFamily = typename NEO::GfxFamilyMapper<gfxCoreFamily>::GfxFamily;
auto neoDevice = device->getNEODevice();
auto &hwInfo = neoDevice->getHardwareInfo();
auto commandList = std::make_unique<::L0::ult::CommandListCoreFamily<gfxCoreFamily>>();
ASSERT_NE(nullptr, commandList);
ze_result_t returnValue = commandList->initialize(device, NEO::EngineGroupType::Compute, 0u);
EXPECT_EQ(ZE_RESULT_SUCCESS, returnValue);
EXPECT_EQ(0u, commandList->estimateBufferSizeMultiTileBarrier(hwInfo));
auto cmdListStream = commandList->commandContainer.getCommandStream();
size_t usedBefore = cmdListStream->getUsed();
commandList->appendMultiTileBarrier(*neoDevice);
size_t usedAfter = cmdListStream->getUsed();
EXPECT_EQ(usedBefore, usedAfter);
}
} // namespace ult
} // namespace L0

View File

@@ -224,5 +224,81 @@ HWTEST2_F(CommandListAppendEventReset, givenEventWithHostScopeUsedInResetThenPip
}
ASSERT_TRUE(postSyncFound);
}
HWTEST2_F(CommandListAppendEventReset,
givenMultiTileCommandListWhenAppendingMultiPacketEventThenExpectSameNumberOfResetPostSyncAndMultiBarrierCommands, IsAtLeastXeHpCore) {
using GfxFamily = typename NEO::GfxFamilyMapper<gfxCoreFamily>::GfxFamily;
using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL;
using POST_SYNC_OPERATION = typename PIPE_CONTROL::POST_SYNC_OPERATION;
using MI_BATCH_BUFFER_END = typename FamilyType::MI_BATCH_BUFFER_END;
using MI_ATOMIC = typename FamilyType::MI_ATOMIC;
using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT;
auto commandList = std::make_unique<::L0::ult::CommandListCoreFamily<gfxCoreFamily>>();
ASSERT_NE(nullptr, commandList);
ze_result_t returnValue = commandList->initialize(device, NEO::EngineGroupType::Compute, 0u);
EXPECT_EQ(ZE_RESULT_SUCCESS, returnValue);
auto cmdStream = commandList->commandContainer.getCommandStream();
size_t useSize = cmdStream->getAvailableSpace();
useSize -= sizeof(MI_BATCH_BUFFER_END);
cmdStream->getSpace(useSize);
constexpr uint32_t packets = 2u;
event->setPacketsInUse(packets);
event->setEventTimestampFlag(false);
event->signalScope = ZE_EVENT_SCOPE_FLAG_HOST;
commandList->partitionCount = packets;
returnValue = commandList->appendEventReset(event->toHandle());
EXPECT_EQ(ZE_RESULT_SUCCESS, returnValue);
EXPECT_EQ(1u, event->getPacketsInUse());
auto gpuAddress = event->getGpuAddress(device);
auto &hwInfo = device->getNEODevice()->getHardwareInfo();
size_t expectedSize = NEO::MemorySynchronizationCommands<GfxFamily>::getSizeForPipeControlWithPostSyncOperation(hwInfo) * packets +
commandList->estimateBufferSizeMultiTileBarrier(hwInfo);
size_t usedSize = cmdStream->getUsed();
EXPECT_EQ(expectedSize, usedSize);
GenCmdList cmdList;
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(
cmdList,
cmdStream->getCpuBase(),
usedSize));
auto pipeControlList = findAll<PIPE_CONTROL *>(cmdList.begin(), cmdList.end());
ASSERT_NE(0u, pipeControlList.size());
uint32_t postSyncFound = 0;
auto postSyncPipeControlItor = cmdList.end();
for (auto &it : pipeControlList) {
auto cmd = genCmdCast<PIPE_CONTROL *>(*it);
if (cmd->getPostSyncOperation() == POST_SYNC_OPERATION::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA) {
EXPECT_EQ(cmd->getImmediateData(), Event::STATE_CLEARED);
EXPECT_TRUE(cmd->getCommandStreamerStallEnable());
EXPECT_EQ(gpuAddress, NEO::UnitTestHelper<FamilyType>::getPipeControlPostSyncAddress(*cmd));
EXPECT_EQ(MemorySynchronizationCommands<FamilyType>::isDcFlushAllowed(), cmd->getDcFlushEnable());
postSyncFound++;
gpuAddress += event->getSinglePacketSize();
postSyncPipeControlItor = it;
}
}
EXPECT_EQ(packets, postSyncFound);
postSyncPipeControlItor++;
ASSERT_NE(cmdList.end(), postSyncPipeControlItor);
//find multi tile barrier section: pipe control + atomic/semaphore
auto itorPipeControl = find<PIPE_CONTROL *>(postSyncPipeControlItor, cmdList.end());
ASSERT_NE(cmdList.end(), itorPipeControl);
auto itorAtomic = find<MI_ATOMIC *>(itorPipeControl, cmdList.end());
ASSERT_NE(cmdList.end(), itorAtomic);
auto itorSemaphore = find<MI_SEMAPHORE_WAIT *>(itorAtomic, cmdList.end());
ASSERT_NE(cmdList.end(), itorSemaphore);
}
} // namespace ult
} // namespace L0