mirror of
https://github.com/intel/compute-runtime.git
synced 2026-01-03 23:03:02 +08:00
Add multi tile support for event reset commands
Related-To: NEO-6262 Signed-off-by: Zbigniew Zdanowicz <zbigniew.zdanowicz@intel.com>
This commit is contained in:
committed by
Compute-Runtime-Automation
parent
b922f7ddb1
commit
882814751f
@@ -244,6 +244,8 @@ struct CommandListCoreFamily : CommandListImp {
|
||||
void programThreadArbitrationPolicy(Device *device);
|
||||
void appendComputeBarrierCommand();
|
||||
NEO::PipeControlArgs createBarrierFlags();
|
||||
void appendMultiTileBarrier(NEO::Device &neoDevice);
|
||||
size_t estimateBufferSizeMultiTileBarrier(const NEO::HardwareInfo &hwInfo);
|
||||
|
||||
uint64_t getInputBufferSize(NEO::ImageType imageType, uint64_t bytesPerPixel, const ze_image_region_t *region);
|
||||
MOCKABLE_VIRTUAL AlignedAllocationData getAlignedAllocation(Device *device, const void *buffer, uint64_t bufferSize, bool hostCopyAllowed);
|
||||
|
||||
@@ -295,8 +295,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendEventReset(ze_event_hand
|
||||
auto event = Event::fromHandle(hEvent);
|
||||
|
||||
uint64_t baseAddr = event->getGpuAddress(this->device);
|
||||
|
||||
uint32_t packetsToReset = 1;
|
||||
uint32_t packetsToReset = event->getPacketsInUse();
|
||||
|
||||
NEO::Device *neoDevice = device->getNEODevice();
|
||||
uint32_t callId = 0;
|
||||
@@ -312,8 +311,8 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendEventReset(ze_event_hand
|
||||
if (event->isEventTimestampFlagSet()) {
|
||||
baseAddr += event->getContextEndOffset();
|
||||
packetsToReset = EventPacketsCount::eventPackets;
|
||||
event->resetPackets();
|
||||
}
|
||||
event->resetPackets();
|
||||
commandContainer.addToResidencyContainer(&event->getAllocation(this->device));
|
||||
if (isCopyOnly()) {
|
||||
NEO::MiFlushArgs args;
|
||||
@@ -324,11 +323,15 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendEventReset(ze_event_hand
|
||||
} else {
|
||||
NEO::PipeControlArgs args;
|
||||
if (NEO::MemorySynchronizationCommands<GfxFamily>::isDcFlushAllowed()) {
|
||||
args.dcFlushEnable = (!event->signalScope) ? false : true;
|
||||
args.dcFlushEnable = !!event->signalScope;
|
||||
}
|
||||
|
||||
auto &hwInfo = device->getNEODevice()->getHardwareInfo();
|
||||
increaseCommandStreamSpace(NEO::MemorySynchronizationCommands<GfxFamily>::getSizeForPipeControlWithPostSyncOperation(hwInfo) * packetsToReset);
|
||||
auto &hwInfo = neoDevice->getHardwareInfo();
|
||||
size_t estimateSize = NEO::MemorySynchronizationCommands<GfxFamily>::getSizeForPipeControlWithPostSyncOperation(hwInfo) * packetsToReset;
|
||||
if (this->partitionCount > 1) {
|
||||
estimateSize += estimateBufferSizeMultiTileBarrier(hwInfo);
|
||||
}
|
||||
increaseCommandStreamSpace(estimateSize);
|
||||
|
||||
for (uint32_t i = 0u; i < packetsToReset; i++) {
|
||||
NEO::MemorySynchronizationCommands<GfxFamily>::addPipeControlAndProgramPostSyncOperation(
|
||||
@@ -340,6 +343,9 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendEventReset(ze_event_hand
|
||||
args);
|
||||
baseAddr += event->getSinglePacketSize();
|
||||
}
|
||||
if (this->partitionCount > 1) {
|
||||
appendMultiTileBarrier(*neoDevice);
|
||||
}
|
||||
}
|
||||
|
||||
if (NEO::DebugManager.flags.EnableSWTags.get()) {
|
||||
|
||||
@@ -191,9 +191,18 @@ void CommandListCoreFamily<gfxCoreFamily>::appendComputeBarrierCommand() {
|
||||
}
|
||||
|
||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||
NEO::PipeControlArgs CommandListCoreFamily<gfxCoreFamily>::createBarrierFlags() {
|
||||
inline NEO::PipeControlArgs CommandListCoreFamily<gfxCoreFamily>::createBarrierFlags() {
|
||||
NEO::PipeControlArgs args;
|
||||
return args;
|
||||
}
|
||||
|
||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||
inline void CommandListCoreFamily<gfxCoreFamily>::appendMultiTileBarrier(NEO::Device &neoDevice) {
|
||||
}
|
||||
|
||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||
inline size_t CommandListCoreFamily<gfxCoreFamily>::estimateBufferSizeMultiTileBarrier(const NEO::HardwareInfo &hwInfo) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
} // namespace L0
|
||||
|
||||
@@ -313,25 +313,14 @@ void CommandListCoreFamily<gfxCoreFamily>::appendMultiPartitionEpilogue() {
|
||||
|
||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||
void CommandListCoreFamily<gfxCoreFamily>::appendComputeBarrierCommand() {
|
||||
NEO::PipeControlArgs args = createBarrierFlags();
|
||||
if (this->partitionCount > 1) {
|
||||
auto neoDevice = device->getNEODevice();
|
||||
auto &hwInfo = neoDevice->getHardwareInfo();
|
||||
|
||||
size_t estimatedSizeRequired = NEO::ImplicitScalingDispatch<GfxFamily>::getBarrierSize(hwInfo,
|
||||
true,
|
||||
false);
|
||||
increaseCommandStreamSpace(estimatedSizeRequired);
|
||||
|
||||
NEO::ImplicitScalingDispatch<GfxFamily>::dispatchBarrierCommands(*commandContainer.getCommandStream(),
|
||||
neoDevice->getDeviceBitfield(),
|
||||
args,
|
||||
hwInfo,
|
||||
0,
|
||||
0,
|
||||
true,
|
||||
true);
|
||||
increaseCommandStreamSpace(estimateBufferSizeMultiTileBarrier(hwInfo));
|
||||
appendMultiTileBarrier(*neoDevice);
|
||||
} else {
|
||||
NEO::PipeControlArgs args = createBarrierFlags();
|
||||
size_t estimatedSizeRequired = NEO::MemorySynchronizationCommands<GfxFamily>::getSizeForSinglePipeControl();
|
||||
increaseCommandStreamSpace(estimatedSizeRequired);
|
||||
NEO::MemorySynchronizationCommands<GfxFamily>::addPipeControl(*commandContainer.getCommandStream(), args);
|
||||
@@ -345,4 +334,25 @@ NEO::PipeControlArgs CommandListCoreFamily<gfxCoreFamily>::createBarrierFlags()
|
||||
return args;
|
||||
}
|
||||
|
||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||
void CommandListCoreFamily<gfxCoreFamily>::appendMultiTileBarrier(NEO::Device &neoDevice) {
|
||||
NEO::PipeControlArgs args = createBarrierFlags();
|
||||
auto &hwInfo = neoDevice.getHardwareInfo();
|
||||
NEO::ImplicitScalingDispatch<GfxFamily>::dispatchBarrierCommands(*commandContainer.getCommandStream(),
|
||||
neoDevice.getDeviceBitfield(),
|
||||
args,
|
||||
hwInfo,
|
||||
0,
|
||||
0,
|
||||
true,
|
||||
true);
|
||||
}
|
||||
|
||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||
inline size_t CommandListCoreFamily<gfxCoreFamily>::estimateBufferSizeMultiTileBarrier(const NEO::HardwareInfo &hwInfo) {
|
||||
return NEO::ImplicitScalingDispatch<GfxFamily>::getBarrierSize(hwInfo,
|
||||
true,
|
||||
false);
|
||||
}
|
||||
|
||||
} // namespace L0
|
||||
|
||||
@@ -35,11 +35,13 @@ struct WhiteBox<::L0::CommandListCoreFamily<gfxCoreFamily>>
|
||||
using BaseClass::appendLaunchKernelWithParams;
|
||||
using BaseClass::appendMemoryCopyBlit;
|
||||
using BaseClass::appendMemoryCopyBlitRegion;
|
||||
using BaseClass::appendMultiTileBarrier;
|
||||
using BaseClass::appendSignalEventPostWalker;
|
||||
using BaseClass::appendWriteKernelTimestamp;
|
||||
using BaseClass::applyMemoryRangesBarrier;
|
||||
using BaseClass::clearCommandsToPatch;
|
||||
using BaseClass::cmdQImmediate;
|
||||
using BaseClass::commandContainer;
|
||||
using BaseClass::commandListPerThreadScratchSize;
|
||||
using BaseClass::commandListPreemptionMode;
|
||||
using BaseClass::commandsToPatch;
|
||||
@@ -47,6 +49,7 @@ struct WhiteBox<::L0::CommandListCoreFamily<gfxCoreFamily>>
|
||||
using BaseClass::containsCooperativeKernelsFlag;
|
||||
using BaseClass::csr;
|
||||
using BaseClass::engineGroupType;
|
||||
using BaseClass::estimateBufferSizeMultiTileBarrier;
|
||||
using BaseClass::finalStreamState;
|
||||
using BaseClass::flags;
|
||||
using BaseClass::getAlignedAllocation;
|
||||
|
||||
@@ -668,5 +668,26 @@ HWTEST2_F(HostPointerManagerCommandListTest, givenDebugModeToRegisterAllHostPoin
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
|
||||
}
|
||||
|
||||
using SingleTileOnlyPlatforms = IsWithinGfxCore<IGFX_GEN9_CORE, IGFX_GEN12LP_CORE>;
|
||||
HWTEST2_F(CommandListCreate, givenSingleTileOnlyPlatformsWhenProgrammingMultiTileBarrierThenNoProgrammingIsExpected, SingleTileOnlyPlatforms) {
|
||||
using GfxFamily = typename NEO::GfxFamilyMapper<gfxCoreFamily>::GfxFamily;
|
||||
|
||||
auto neoDevice = device->getNEODevice();
|
||||
auto &hwInfo = neoDevice->getHardwareInfo();
|
||||
|
||||
auto commandList = std::make_unique<::L0::ult::CommandListCoreFamily<gfxCoreFamily>>();
|
||||
ASSERT_NE(nullptr, commandList);
|
||||
ze_result_t returnValue = commandList->initialize(device, NEO::EngineGroupType::Compute, 0u);
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, returnValue);
|
||||
|
||||
EXPECT_EQ(0u, commandList->estimateBufferSizeMultiTileBarrier(hwInfo));
|
||||
|
||||
auto cmdListStream = commandList->commandContainer.getCommandStream();
|
||||
size_t usedBefore = cmdListStream->getUsed();
|
||||
commandList->appendMultiTileBarrier(*neoDevice);
|
||||
size_t usedAfter = cmdListStream->getUsed();
|
||||
EXPECT_EQ(usedBefore, usedAfter);
|
||||
}
|
||||
|
||||
} // namespace ult
|
||||
} // namespace L0
|
||||
|
||||
@@ -224,5 +224,81 @@ HWTEST2_F(CommandListAppendEventReset, givenEventWithHostScopeUsedInResetThenPip
|
||||
}
|
||||
ASSERT_TRUE(postSyncFound);
|
||||
}
|
||||
|
||||
HWTEST2_F(CommandListAppendEventReset,
|
||||
givenMultiTileCommandListWhenAppendingMultiPacketEventThenExpectSameNumberOfResetPostSyncAndMultiBarrierCommands, IsAtLeastXeHpCore) {
|
||||
using GfxFamily = typename NEO::GfxFamilyMapper<gfxCoreFamily>::GfxFamily;
|
||||
using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL;
|
||||
using POST_SYNC_OPERATION = typename PIPE_CONTROL::POST_SYNC_OPERATION;
|
||||
using MI_BATCH_BUFFER_END = typename FamilyType::MI_BATCH_BUFFER_END;
|
||||
using MI_ATOMIC = typename FamilyType::MI_ATOMIC;
|
||||
using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT;
|
||||
|
||||
auto commandList = std::make_unique<::L0::ult::CommandListCoreFamily<gfxCoreFamily>>();
|
||||
ASSERT_NE(nullptr, commandList);
|
||||
ze_result_t returnValue = commandList->initialize(device, NEO::EngineGroupType::Compute, 0u);
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, returnValue);
|
||||
|
||||
auto cmdStream = commandList->commandContainer.getCommandStream();
|
||||
|
||||
size_t useSize = cmdStream->getAvailableSpace();
|
||||
useSize -= sizeof(MI_BATCH_BUFFER_END);
|
||||
cmdStream->getSpace(useSize);
|
||||
|
||||
constexpr uint32_t packets = 2u;
|
||||
event->setPacketsInUse(packets);
|
||||
event->setEventTimestampFlag(false);
|
||||
event->signalScope = ZE_EVENT_SCOPE_FLAG_HOST;
|
||||
|
||||
commandList->partitionCount = packets;
|
||||
returnValue = commandList->appendEventReset(event->toHandle());
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, returnValue);
|
||||
EXPECT_EQ(1u, event->getPacketsInUse());
|
||||
|
||||
auto gpuAddress = event->getGpuAddress(device);
|
||||
auto &hwInfo = device->getNEODevice()->getHardwareInfo();
|
||||
|
||||
size_t expectedSize = NEO::MemorySynchronizationCommands<GfxFamily>::getSizeForPipeControlWithPostSyncOperation(hwInfo) * packets +
|
||||
commandList->estimateBufferSizeMultiTileBarrier(hwInfo);
|
||||
size_t usedSize = cmdStream->getUsed();
|
||||
EXPECT_EQ(expectedSize, usedSize);
|
||||
|
||||
GenCmdList cmdList;
|
||||
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(
|
||||
cmdList,
|
||||
cmdStream->getCpuBase(),
|
||||
usedSize));
|
||||
|
||||
auto pipeControlList = findAll<PIPE_CONTROL *>(cmdList.begin(), cmdList.end());
|
||||
ASSERT_NE(0u, pipeControlList.size());
|
||||
uint32_t postSyncFound = 0;
|
||||
auto postSyncPipeControlItor = cmdList.end();
|
||||
for (auto &it : pipeControlList) {
|
||||
auto cmd = genCmdCast<PIPE_CONTROL *>(*it);
|
||||
if (cmd->getPostSyncOperation() == POST_SYNC_OPERATION::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA) {
|
||||
EXPECT_EQ(cmd->getImmediateData(), Event::STATE_CLEARED);
|
||||
EXPECT_TRUE(cmd->getCommandStreamerStallEnable());
|
||||
EXPECT_EQ(gpuAddress, NEO::UnitTestHelper<FamilyType>::getPipeControlPostSyncAddress(*cmd));
|
||||
EXPECT_EQ(MemorySynchronizationCommands<FamilyType>::isDcFlushAllowed(), cmd->getDcFlushEnable());
|
||||
postSyncFound++;
|
||||
gpuAddress += event->getSinglePacketSize();
|
||||
postSyncPipeControlItor = it;
|
||||
}
|
||||
}
|
||||
EXPECT_EQ(packets, postSyncFound);
|
||||
postSyncPipeControlItor++;
|
||||
ASSERT_NE(cmdList.end(), postSyncPipeControlItor);
|
||||
|
||||
//find multi tile barrier section: pipe control + atomic/semaphore
|
||||
auto itorPipeControl = find<PIPE_CONTROL *>(postSyncPipeControlItor, cmdList.end());
|
||||
ASSERT_NE(cmdList.end(), itorPipeControl);
|
||||
|
||||
auto itorAtomic = find<MI_ATOMIC *>(itorPipeControl, cmdList.end());
|
||||
ASSERT_NE(cmdList.end(), itorAtomic);
|
||||
|
||||
auto itorSemaphore = find<MI_SEMAPHORE_WAIT *>(itorAtomic, cmdList.end());
|
||||
ASSERT_NE(cmdList.end(), itorSemaphore);
|
||||
}
|
||||
|
||||
} // namespace ult
|
||||
} // namespace L0
|
||||
|
||||
Reference in New Issue
Block a user