performance: use resource_barrier on Xe2 and PTL

Related-To: NEO-14943

Signed-off-by: Szymon Morek <szymon.morek@intel.com>
This commit is contained in:
Szymon Morek
2025-06-23 13:42:45 +00:00
committed by Compute-Runtime-Automation
parent 6b33e62d15
commit 0db5ce22a1
35 changed files with 246 additions and 79 deletions

View File

@@ -379,6 +379,7 @@ HWTEST_F(CommandListAppendSignalEvent, givenInOrderImmediateCmdListWhenAppending
using MI_STORE_DATA_IMM = typename FamilyType::MI_STORE_DATA_IMM;
using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT;
using MI_BATCH_BUFFER_START = typename FamilyType::MI_BATCH_BUFFER_START;
using StallingBarrierType = typename FamilyType::StallingBarrierType;
ze_event_pool_desc_t eventPoolDesc = {};
eventPoolDesc.count = 1;
@@ -446,7 +447,7 @@ HWTEST_F(CommandListAppendSignalEvent, givenInOrderImmediateCmdListWhenAppending
GenCmdList::iterator itorResolveCmd = itorBbStart;
if (NEO::MemorySynchronizationCommands<FamilyType>::getDcFlushEnable(true, neoDevice->getRootDeviceEnvironment())) {
itorResolveCmd = find<PIPE_CONTROL *>(cmdList.begin(), itorBbStart);
itorResolveCmd = find<StallingBarrierType *>(cmdList.begin(), itorBbStart);
} else {
itorResolveCmd = find<MI_SEMAPHORE_WAIT *>(cmdList.begin(), itorBbStart);
}

View File

@@ -1009,7 +1009,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, InOrderCmdListTests, givenResolveDependenciesViaPip
ptrOffset(cmdStream->getCpuBase(), offset),
cmdStream->getUsed() - offset));
auto itor = find<typename FamilyType::PIPE_CONTROL *>(cmdList.begin(), cmdList.end());
auto itor = find<typename FamilyType::StallingBarrierType *>(cmdList.begin(), cmdList.end());
ASSERT_NE(cmdList.end(), itor);
completeHostAddress<FamilyType::gfxCoreFamily, WhiteBox<L0::CommandListCoreFamilyImmediate<FamilyType::gfxCoreFamily>>>(immCmdList.get());
@@ -1040,7 +1040,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, InOrderCmdListTests, givenOptimizedCbEventWhenSubmi
cmdStream->getUsed() - offset));
if (immCmdList->dcFlushSupport || !immCmdList->isHeaplessModeEnabled()) {
auto itor = find<typename FamilyType::PIPE_CONTROL *>(cmdList.begin(), cmdList.end());
auto itor = find<typename FamilyType::StallingBarrierType *>(cmdList.begin(), cmdList.end());
ASSERT_NE(cmdList.end(), itor);
} else {
auto itor = find<typename FamilyType::MI_SEMAPHORE_WAIT *>(cmdList.begin(), cmdList.end());
@@ -1075,7 +1075,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, InOrderCmdListTests, givenInOrderCmdListWhenSubmitt
cmdStream->getUsed() - offset));
if (immCmdList->dcFlushSupport) {
auto itor = find<typename FamilyType::PIPE_CONTROL *>(cmdList.begin(), cmdList.end());
auto itor = find<typename FamilyType::StallingBarrierType *>(cmdList.begin(), cmdList.end());
ASSERT_NE(cmdList.end(), itor);
} else {
auto itor = find<typename FamilyType::MI_SEMAPHORE_WAIT *>(cmdList.begin(), cmdList.end());

View File

@@ -99,7 +99,7 @@ size_t EnqueueOperation<GfxFamily>::getTotalSizeRequiredCS(uint32_t eventType, c
expectedSizeCS += TimestampPacketHelper::getRequiredCmdStreamSize<GfxFamily>(csrDeps, relaxedOrderingEnabled);
expectedSizeCS += EnqueueOperation<GfxFamily>::getSizeRequiredForTimestampPacketWrite();
if (resolveDependenciesByPipecontrol) {
expectedSizeCS += MemorySynchronizationCommands<GfxFamily>::getSizeForSingleBarrier();
expectedSizeCS += MemorySynchronizationCommands<GfxFamily>::getSizeForStallingBarrier();
}
if (isMarkerWithProfiling) {
if (!eventsInWaitlist) {

View File

@@ -579,7 +579,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, XeHPAndLaterDispatchWalkerBasicTest, givenTimestamp
size_t additionalSize = 0u;
if (isResolveDependenciesByPipeControlsEnabled) {
additionalSize = MemorySynchronizationCommands<FamilyType>::getSizeForSingleBarrier();
additionalSize = MemorySynchronizationCommands<FamilyType>::getSizeForStallingBarrier();
}
EXPECT_EQ(sizeWithEnabled, sizeWithDisabled + additionalSize);

View File

@@ -2181,7 +2181,7 @@ HWTEST_F(PauseOnGpuTests, givenGpuScratchWriteEnabledWhenEstimatingCommandStream
EXPECT_EQ(baseCommandStreamSize + sizeof(typename FamilyType::MI_LOAD_REGISTER_IMM), extendedCommandStreamSize);
}
HWTEST_F(PauseOnGpuTests, givenResolveDependenciesByPipecontrolWhenEstimatingCommandStreamSizeThenPipeControlSizeIsIncluded) {
HWTEST_F(PauseOnGpuTests, givenResolveDependenciesByPipecontrolWhenEstimatingCommandStreamSizeThenStallingBarrierSizeIsIncluded) {
MockKernelWithInternals mockKernel(*pClDevice);
DispatchInfo dispatchInfo;
MultiDispatchInfo multiDispatchInfo(mockKernel.mockKernel);
@@ -2194,7 +2194,7 @@ HWTEST_F(PauseOnGpuTests, givenResolveDependenciesByPipecontrolWhenEstimatingCom
auto extendedCommandStreamSize = EnqueueOperation<FamilyType>::getTotalSizeRequiredCS(CL_COMMAND_NDRANGE_KERNEL, {}, false, false, false, *pCmdQ, multiDispatchInfo, false, false, true, nullptr);
EXPECT_EQ(baseCommandStreamSize + MemorySynchronizationCommands<FamilyType>::getSizeForSingleBarrier(), extendedCommandStreamSize);
EXPECT_EQ(baseCommandStreamSize + MemorySynchronizationCommands<FamilyType>::getSizeForStallingBarrier(), extendedCommandStreamSize);
}
HWTEST_F(PauseOnGpuTests, givenTimestampPacketWriteDisabledAndMarkerWithProfilingWhenEstimatingCommandStreamSizeThenStoreMMIOSizeIsIncluded) {

View File

@@ -400,38 +400,28 @@ HWCMDTEST_F(IGFX_GEN12LP_CORE, CommandStreamReceiverFlushTaskTests, WhenForcePip
GenCmdList::iterator itor = cmdList.begin();
int counterPC = 0;
while (itor != cmdList.end()) {
auto pipeControl = genCmdCast<typename FamilyType::PIPE_CONTROL *>(*itor);
if (pipeControl) {
switch (counterPC) {
case 0: // First pipe control with CS Stall
EXPECT_EQ(bool(pipeControl->getCommandStreamerStallEnable()), true);
EXPECT_EQ(bool(pipeControl->getDcFlushEnable()), false);
EXPECT_EQ(bool(pipeControl->getRenderTargetCacheFlushEnable()), false);
EXPECT_EQ(bool(pipeControl->getInstructionCacheInvalidateEnable()), false);
EXPECT_EQ(bool(pipeControl->getTextureCacheInvalidationEnable()), false);
EXPECT_EQ(bool(pipeControl->getPipeControlFlushEnable()), false);
EXPECT_EQ(bool(pipeControl->getVfCacheInvalidationEnable()), false);
EXPECT_EQ(bool(pipeControl->getConstantCacheInvalidationEnable()), false);
EXPECT_EQ(bool(pipeControl->getStateCacheInvalidationEnable()), false);
EXPECT_EQ(bool(pipeControl->getTlbInvalidate()), false);
break;
case 1: // Second pipe control with all flushes
EXPECT_EQ(bool(pipeControl->getCommandStreamerStallEnable()), true);
EXPECT_EQ(bool(pipeControl->getDcFlushEnable()), true);
EXPECT_EQ(bool(pipeControl->getRenderTargetCacheFlushEnable()), true);
EXPECT_EQ(bool(pipeControl->getInstructionCacheInvalidateEnable()), true);
EXPECT_EQ(bool(pipeControl->getTextureCacheInvalidationEnable()), true);
EXPECT_EQ(bool(pipeControl->getPipeControlFlushEnable()), true);
EXPECT_EQ(bool(pipeControl->getVfCacheInvalidationEnable()), true);
EXPECT_EQ(bool(pipeControl->getConstantCacheInvalidationEnable()), true);
EXPECT_EQ(bool(pipeControl->getStateCacheInvalidationEnable()), true);
EXPECT_EQ(bool(pipeControl->getTlbInvalidate()), true);
default:
break;
}
if (counterPC == 0 && isStallingBarrier<FamilyType>(itor)) {
counterPC++;
itor++;
continue;
}
auto pipeControl = genCmdCast<typename FamilyType::PIPE_CONTROL *>(*itor);
if (pipeControl != nullptr) {
// Second pipe control with all flushes
EXPECT_EQ(1, counterPC);
EXPECT_EQ(bool(pipeControl->getCommandStreamerStallEnable()), true);
EXPECT_EQ(bool(pipeControl->getDcFlushEnable()), true);
EXPECT_EQ(bool(pipeControl->getRenderTargetCacheFlushEnable()), true);
EXPECT_EQ(bool(pipeControl->getInstructionCacheInvalidateEnable()), true);
EXPECT_EQ(bool(pipeControl->getTextureCacheInvalidationEnable()), true);
EXPECT_EQ(bool(pipeControl->getPipeControlFlushEnable()), true);
EXPECT_EQ(bool(pipeControl->getVfCacheInvalidationEnable()), true);
EXPECT_EQ(bool(pipeControl->getConstantCacheInvalidationEnable()), true);
EXPECT_EQ(bool(pipeControl->getStateCacheInvalidationEnable()), true);
EXPECT_EQ(bool(pipeControl->getTlbInvalidate()), true);
counterPC++;
break;
}
++itor;
}

View File

@@ -376,36 +376,28 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, CommandStreamReceiverFlushTaskXeHPAndLaterTests, Wh
GenCmdList::iterator itor = cmdList.begin();
int counterPC = 0;
while (itor != cmdList.end()) {
auto pipeControl = genCmdCast<typename FamilyType::PIPE_CONTROL *>(*itor);
if (pipeControl) {
switch (counterPC) {
case 0: // First pipe control with CS Stall
EXPECT_EQ(bool(pipeControl->getCommandStreamerStallEnable()), true);
EXPECT_EQ(bool(pipeControl->getDcFlushEnable()), false);
EXPECT_EQ(bool(pipeControl->getRenderTargetCacheFlushEnable()), false);
EXPECT_EQ(bool(pipeControl->getInstructionCacheInvalidateEnable()), false);
EXPECT_EQ(bool(pipeControl->getTextureCacheInvalidationEnable()), false);
EXPECT_EQ(bool(pipeControl->getPipeControlFlushEnable()), false);
EXPECT_EQ(bool(pipeControl->getVfCacheInvalidationEnable()), false);
EXPECT_EQ(bool(pipeControl->getConstantCacheInvalidationEnable()), false);
EXPECT_EQ(bool(pipeControl->getStateCacheInvalidationEnable()), false);
break;
case 1: // Second pipe control with all flushes
EXPECT_EQ(bool(pipeControl->getCommandStreamerStallEnable()), true);
EXPECT_EQ(bool(pipeControl->getDcFlushEnable()), true);
EXPECT_EQ(bool(pipeControl->getRenderTargetCacheFlushEnable()), true);
EXPECT_EQ(bool(pipeControl->getInstructionCacheInvalidateEnable()), true);
EXPECT_EQ(bool(pipeControl->getTextureCacheInvalidationEnable()), true);
EXPECT_EQ(bool(pipeControl->getPipeControlFlushEnable()), true);
EXPECT_EQ(bool(pipeControl->getVfCacheInvalidationEnable()), true);
EXPECT_EQ(bool(pipeControl->getConstantCacheInvalidationEnable()), true);
EXPECT_EQ(bool(pipeControl->getStateCacheInvalidationEnable()), true);
default:
break;
}
if (counterPC == 0 && isStallingBarrier<FamilyType>(itor)) {
counterPC++;
itor++;
continue;
}
auto pipeControl = genCmdCast<typename FamilyType::PIPE_CONTROL *>(*itor);
if (pipeControl != nullptr) {
// Second pipe control with all flushes
EXPECT_EQ(1, counterPC);
EXPECT_EQ(bool(pipeControl->getCommandStreamerStallEnable()), true);
EXPECT_EQ(bool(pipeControl->getDcFlushEnable()), true);
EXPECT_EQ(bool(pipeControl->getRenderTargetCacheFlushEnable()), true);
EXPECT_EQ(bool(pipeControl->getInstructionCacheInvalidateEnable()), true);
EXPECT_EQ(bool(pipeControl->getTextureCacheInvalidationEnable()), true);
EXPECT_EQ(bool(pipeControl->getPipeControlFlushEnable()), true);
EXPECT_EQ(bool(pipeControl->getVfCacheInvalidationEnable()), true);
EXPECT_EQ(bool(pipeControl->getConstantCacheInvalidationEnable()), true);
EXPECT_EQ(bool(pipeControl->getStateCacheInvalidationEnable()), true);
EXPECT_EQ(bool(pipeControl->getTlbInvalidate()), true);
counterPC++;
break;
}
++itor;
}

View File

@@ -227,7 +227,7 @@ HWTEST_F(TimestampPacketTests, givenTimestampPacketWriteEnabledWhenEstimatingStr
size_t sizeForPipeControl = 0;
if (isResolveDependenciesByPipeControlsEnabled) {
sizeForPipeControl = MemorySynchronizationCommands<FamilyType>::getSizeForSingleBarrier();
sizeForPipeControl = MemorySynchronizationCommands<FamilyType>::getSizeForStallingBarrier();
}
size_t extendedSize = sizeWithDisabled + EnqueueOperation<FamilyType>::getSizeRequiredForTimestampPacketWrite() + sizeForNodeDependency + sizeForPipeControl;
@@ -1653,6 +1653,7 @@ HWTEST_F(TimestampPacketTests, givenTimestampPacketWriteEnabledAndDependenciesRe
using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT;
using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL;
using MI_BATCH_BUFFER_END = typename FamilyType::MI_BATCH_BUFFER_END;
using StallingBarrierType = typename FamilyType::StallingBarrierType;
device->getUltCommandStreamReceiver<FamilyType>().timestampPacketWriteEnabled = true;
@@ -1684,9 +1685,11 @@ HWTEST_F(TimestampPacketTests, givenTimestampPacketWriteEnabledAndDependenciesRe
size_t pipeControlCountSecondEnqueue = 0u;
size_t semaphoreWaitCount = 0u;
size_t currentEnqueue = 1u;
bool stallingBarrierProgrammed = false;
while (it != hwParser.cmdList.end()) {
MI_SEMAPHORE_WAIT *semaphoreWaitCmd = genCmdCast<MI_SEMAPHORE_WAIT *>(*it);
PIPE_CONTROL *pipeControlCmd = genCmdCast<PIPE_CONTROL *>(*it);
StallingBarrierType *stallingBarrierCmd = genCmdCast<StallingBarrierType *>(*it);
MI_BATCH_BUFFER_END *miBatchBufferEnd = genCmdCast<MI_BATCH_BUFFER_END *>(*it);
if (pipeControlCmd != nullptr) {
if (currentEnqueue == 1) {
@@ -1694,6 +1697,9 @@ HWTEST_F(TimestampPacketTests, givenTimestampPacketWriteEnabledAndDependenciesRe
} else if (currentEnqueue == 2) {
++pipeControlCountSecondEnqueue;
}
} else if (stallingBarrierCmd != nullptr) {
EXPECT_EQ(2u, currentEnqueue);
stallingBarrierProgrammed = true;
} else if (semaphoreWaitCmd != nullptr) {
++semaphoreWaitCount;
} else if (miBatchBufferEnd != nullptr) {
@@ -1704,7 +1710,8 @@ HWTEST_F(TimestampPacketTests, givenTimestampPacketWriteEnabledAndDependenciesRe
++it;
}
EXPECT_EQ(semaphoreWaitCount, 0u);
EXPECT_EQ(pipeControlCountSecondEnqueue, pipeControlCountFirstEnqueue + 1);
auto stallingBarrierAsPC = stallingBarrierProgrammed ? 0 : 1;
EXPECT_EQ(pipeControlCountSecondEnqueue, pipeControlCountFirstEnqueue + stallingBarrierAsPC);
}
HWTEST2_F(TimestampPacketTests, givenTimestampPacketWriteEnabledAndDependenciesResolvedViaPipeControlsAndSingleIOQWhenEnqueueKernelThenDoNotProgramSemaphoresButProgramPipeControlWithProperFlagsBeforeGpgpuWalker, IsXeHpgCore) {

View File

@@ -224,7 +224,6 @@ HWTEST_F(TimestampPacketTests, whenEnqueueingBarrierThenDontRequestPipeControlOn
HWTEST_F(TimestampPacketTests, givenWaitlistWhenEnqueueingBarrierThenProgramNonStallingBarrier) {
using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT;
using MI_STORE_DATA_IMM = typename FamilyType::MI_STORE_DATA_IMM;
using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL;
auto &csr = device->getUltCommandStreamReceiver<FamilyType>();
csr.timestampPacketWriteEnabled = true;
@@ -251,7 +250,7 @@ HWTEST_F(TimestampPacketTests, givenWaitlistWhenEnqueueingBarrierThenProgramNonS
auto it = hwParser.cmdList.begin();
if (device->getProductHelper().isResolveDependenciesByPipeControlsSupported(device->getHardwareInfo(), false, cmdQ.taskCount, cmdQ.getGpgpuCommandStreamReceiver())) {
EXPECT_NE(nullptr, genCmdCast<PIPE_CONTROL *>(*it));
EXPECT_TRUE(hwParser.isStallingBarrier<FamilyType>(it));
} else {
EXPECT_NE(nullptr, genCmdCast<MI_SEMAPHORE_WAIT *>(*it));
EXPECT_NE(nullptr, genCmdCast<MI_SEMAPHORE_WAIT *>(*(++it)));

View File

@@ -26,7 +26,7 @@ size_t EncodeComputeMode<Family>::getCmdSizeForComputeMode(const RootDeviceEnvir
}
size += sizeof(typename Family::STATE_COMPUTE_MODE);
if (hasSharedHandles) {
size += MemorySynchronizationCommands<Family>::getSizeForSingleBarrier();
size += MemorySynchronizationCommands<Family>::getSizeForStallingBarrier();
}
if (productHelper.is3DPipelineSelectWARequired() && isRcs) {
size += (2 * PreambleHelper<Family>::getCmdSizeForPipelineSelect(rootDeviceEnvironment));

View File

@@ -831,7 +831,8 @@ size_t CommandStreamReceiverHw<GfxFamily>::getRequiredCmdStreamSize(const Dispat
}
if (debugManager.flags.ForcePipeControlPriorToWalker.get()) {
size += 2 * MemorySynchronizationCommands<GfxFamily>::getSizeForSingleBarrier();
size += MemorySynchronizationCommands<GfxFamily>::getSizeForStallingBarrier();
size += MemorySynchronizationCommands<GfxFamily>::getSizeForSingleBarrier();
}
return size;

View File

@@ -101,6 +101,7 @@ struct Gen12LpFamily : public Gen12Lp {
using XY_COLOR_BLT = typename GfxFamily::XY_FAST_COLOR_BLT;
using MI_STORE_REGISTER_MEM_CMD = typename GfxFamily::MI_STORE_REGISTER_MEM;
using TimestampPacketType = uint32_t;
using StallingBarrierType = PIPE_CONTROL;
static const GPGPU_WALKER cmdInitGpgpuWalker;
static const INTERFACE_DESCRIPTOR_DATA cmdInitInterfaceDescriptorData;
static const MEDIA_INTERFACE_DESCRIPTOR_LOAD cmdInitMediaInterfaceDescriptorLoad;

View File

@@ -491,6 +491,7 @@ struct MemorySynchronizationCommands {
static void setSingleBarrier(void *commandsBuffer, PostSyncMode postSyncMode, uint64_t gpuAddress, uint64_t immediateData, PipeControlArgs &args);
static void addSingleBarrier(LinearStream &commandStream, PipeControlArgs &args);
static void setSingleBarrier(void *commandsBuffer, PipeControlArgs &args);
static void setStallingBarrier(void *commandsBuffer, PipeControlArgs &args);
static void addBarrierWithPostSyncOperation(LinearStream &commandStream, PostSyncMode postSyncMode, uint64_t gpuAddress, uint64_t immediateData, const RootDeviceEnvironment &rootDeviceEnvironment, PipeControlArgs &args);
static void setBarrierWithPostSyncOperation(void *&commandsBuffer, PostSyncMode postSyncMode, uint64_t gpuAddress, uint64_t immediateData, const RootDeviceEnvironment &rootDeviceEnvironment, PipeControlArgs &args);
@@ -525,6 +526,7 @@ struct MemorySynchronizationCommands {
static size_t getSizeForSingleAdditionalSynchronization(NEO::FenceType fenceType, const RootDeviceEnvironment &rootDeviceEnvironment);
static size_t getSizeForAdditionalSynchronization(NEO::FenceType fenceType, const RootDeviceEnvironment &rootDeviceEnvironment);
static size_t getSizeForInstructionCacheFlush();
static size_t getSizeForStallingBarrier();
static bool isBarrierWaRequired(const RootDeviceEnvironment &rootDeviceEnvironment);
static bool isBarrierPriorToPipelineSelectWaRequired(const RootDeviceEnvironment &rootDeviceEnvironment);

View File

@@ -236,25 +236,40 @@ void MemorySynchronizationCommands<GfxFamily>::setSingleBarrier(void *commandsBu
template <typename GfxFamily>
void MemorySynchronizationCommands<GfxFamily>::addSingleBarrier(LinearStream &commandStream, PostSyncMode postSyncMode, uint64_t gpuAddress, uint64_t immediateData, PipeControlArgs &args) {
auto barrier = commandStream.getSpace(MemorySynchronizationCommands<GfxFamily>::getSizeForSingleBarrier());
auto barrierSize = MemorySynchronizationCommands<GfxFamily>::getSizeForSingleBarrier();
if (args.csStallOnly) {
barrierSize = MemorySynchronizationCommands<GfxFamily>::getSizeForStallingBarrier();
}
auto barrier = commandStream.getSpace(barrierSize);
setSingleBarrier(barrier, postSyncMode, gpuAddress, immediateData, args);
}
template <typename GfxFamily>
void MemorySynchronizationCommands<GfxFamily>::setSingleBarrier(void *commandsBuffer, PostSyncMode postSyncMode, uint64_t gpuAddress, uint64_t immediateData, PipeControlArgs &args) {
void MemorySynchronizationCommands<GfxFamily>::setStallingBarrier(void *commandsBuffer, PipeControlArgs &args) {
using PIPE_CONTROL = typename GfxFamily::PIPE_CONTROL;
PIPE_CONTROL pipeControl = GfxFamily::cmdInitPipeControl;
pipeControl.setCommandStreamerStallEnable(true);
setBarrierExtraProperties(&pipeControl, args);
*reinterpret_cast<PIPE_CONTROL *>(commandsBuffer) = pipeControl;
}
template <typename GfxFamily>
void MemorySynchronizationCommands<GfxFamily>::setSingleBarrier(void *commandsBuffer, PostSyncMode postSyncMode, uint64_t gpuAddress, uint64_t immediateData, PipeControlArgs &args) {
using PIPE_CONTROL = typename GfxFamily::PIPE_CONTROL;
if (args.csStallOnly) {
*reinterpret_cast<PIPE_CONTROL *>(commandsBuffer) = pipeControl;
setStallingBarrier(commandsBuffer, args);
return;
}
PIPE_CONTROL pipeControl = GfxFamily::cmdInitPipeControl;
pipeControl.setCommandStreamerStallEnable(true);
setBarrierExtraProperties(&pipeControl, args);
pipeControl.setConstantCacheInvalidationEnable(args.constantCacheInvalidationEnable);
pipeControl.setInstructionCacheInvalidateEnable(args.instructionCacheInvalidateEnable);
pipeControl.setPipeControlFlushEnable(args.pipeControlFlushEnable);
@@ -335,7 +350,7 @@ void MemorySynchronizationCommands<GfxFamily>::setBarrierWa(void *&commandsBuffe
additionalArgs.csStallOnly = true;
MemorySynchronizationCommands<GfxFamily>::setSingleBarrier(commandsBuffer, additionalArgs);
commandsBuffer = ptrOffset(commandsBuffer, sizeof(PIPE_CONTROL));
commandsBuffer = ptrOffset(commandsBuffer, getSizeForStallingBarrier());
}
}
@@ -384,7 +399,7 @@ size_t MemorySynchronizationCommands<GfxFamily>::getSizeForBarrierWa(const RootD
size = getSizeForSingleBarrier() +
getSizeForSingleAdditionalSynchronization(NEO::FenceType::release, rootDeviceEnvironment);
} else if (releaseHelper && postSyncMode == PostSyncMode::timestamp && releaseHelper->programmAdditionalStallPriorToBarrierWithTimestamp()) {
size = getSizeForSingleBarrier();
size = getSizeForStallingBarrier();
}
return size;
}
@@ -851,6 +866,11 @@ bool GfxCoreHelperHw<Family>::isCacheFlushPriorImageReadRequired() const {
return false;
}
template <typename GfxFamily>
size_t MemorySynchronizationCommands<GfxFamily>::getSizeForStallingBarrier() {
return sizeof(typename GfxFamily::StallingBarrierType);
}
template <typename Family>
uint32_t GfxCoreHelperHw<Family>::getQueuePriorityLevels() const {
return 2;

View File

@@ -291,6 +291,17 @@ uint32_t GfxCoreHelperHw<Family>::getDeviceTimestampWidth() const {
return 64u;
};
template <>
void MemorySynchronizationCommands<Family>::setStallingBarrier(void *commandsBuffer, PipeControlArgs &args) {
using RESOURCE_BARRIER = typename Family::RESOURCE_BARRIER;
auto resourceBarrier = Family::cmdInitResourceBarrier;
resourceBarrier.setBarrierType(RESOURCE_BARRIER::BARRIER_TYPE::BARRIER_TYPE_IMMEDIATE);
resourceBarrier.setWaitStage(RESOURCE_BARRIER::WAIT_STAGE::WAIT_STAGE_TOP);
resourceBarrier.setSignalStage(RESOURCE_BARRIER::SIGNAL_STAGE::SIGNAL_STAGE_GPGPU);
*reinterpret_cast<RESOURCE_BARRIER *>(commandsBuffer) = resourceBarrier;
}
} // namespace NEO
namespace NEO {

View File

@@ -108,6 +108,7 @@ struct Xe2HpgCoreFamily : public Xe2HpgCore {
using XY_COLOR_BLT = typename GfxFamily::XY_FAST_COLOR_BLT;
using MI_STORE_REGISTER_MEM_CMD = typename GfxFamily::MI_STORE_REGISTER_MEM;
using TimestampPacketType = uint64_t;
using StallingBarrierType = RESOURCE_BARRIER;
static const COMPUTE_WALKER cmdInitGpgpuWalker;
static const CFE_STATE cmdInitCfeState;
static const INTERFACE_DESCRIPTOR_DATA cmdInitInterfaceDescriptorData;

View File

@@ -259,6 +259,18 @@ uint32_t GfxCoreHelperHw<Family>::adjustMaxWorkGroupSize(const uint32_t grfCount
const uint32_t threadsPerThreadGroup = calculateNumThreadsPerThreadGroup(simd, defaultMaxGroupSize, grfCount, rootDeviceEnvironment);
return (threadsPerThreadGroup * simd);
}
template <>
void MemorySynchronizationCommands<Family>::setStallingBarrier(void *commandsBuffer, PipeControlArgs &args) {
using RESOURCE_BARRIER = typename Family::RESOURCE_BARRIER;
auto resourceBarrier = Family::cmdInitResourceBarrier;
resourceBarrier.setBarrierType(RESOURCE_BARRIER::BARRIER_TYPE::BARRIER_TYPE_IMMEDIATE);
resourceBarrier.setWaitStage(RESOURCE_BARRIER::WAIT_STAGE::WAIT_STAGE_TOP);
resourceBarrier.setSignalStage(RESOURCE_BARRIER::SIGNAL_STAGE::SIGNAL_STAGE_GPGPU);
*reinterpret_cast<RESOURCE_BARRIER *>(commandsBuffer) = resourceBarrier;
}
} // namespace NEO
namespace NEO {

View File

@@ -110,6 +110,7 @@ struct Xe3CoreFamily : public Xe3Core {
using XY_COLOR_BLT = typename GfxFamily::XY_FAST_COLOR_BLT;
using MI_STORE_REGISTER_MEM_CMD = typename GfxFamily::MI_STORE_REGISTER_MEM;
using TimestampPacketType = uint64_t;
using StallingBarrierType = RESOURCE_BARRIER;
static const COMPUTE_WALKER cmdInitGpgpuWalker;
static const CFE_STATE cmdInitCfeState;
static const INTERFACE_DESCRIPTOR_DATA cmdInitInterfaceDescriptorData;

View File

@@ -90,6 +90,7 @@ struct XeHpcCoreFamily : public XeHpcCore {
using XY_COLOR_BLT = typename GfxFamily::XY_FAST_COLOR_BLT;
using MI_STORE_REGISTER_MEM_CMD = typename GfxFamily::MI_STORE_REGISTER_MEM;
using TimestampPacketType = uint32_t;
using StallingBarrierType = PIPE_CONTROL;
static const COMPUTE_WALKER cmdInitGpgpuWalker;
static const CFE_STATE cmdInitCfeState;
static const INTERFACE_DESCRIPTOR_DATA cmdInitInterfaceDescriptorData;

View File

@@ -110,6 +110,7 @@ struct XeHpgCoreFamily : public XeHpgCore {
using XY_COLOR_BLT = typename GfxFamily::XY_FAST_COLOR_BLT;
using MI_STORE_REGISTER_MEM_CMD = typename GfxFamily::MI_STORE_REGISTER_MEM;
using TimestampPacketType = uint32_t;
using StallingBarrierType = PIPE_CONTROL;
static const COMPUTE_WALKER cmdInitGpgpuWalker;
static const CFE_STATE cmdInitCfeState;
static const INTERFACE_DESCRIPTOR_DATA cmdInitInterfaceDescriptorData;

View File

@@ -40,5 +40,6 @@ if(TESTS_XE2_AND_LATER)
target_sources(neo_libult_common PRIVATE
${CMAKE_CURRENT_SOURCE_DIR}/cmd_parse_state_context_data_base_address.inl
${CMAKE_CURRENT_SOURCE_DIR}/hw_parse_xe2_hpg_and_later.inl
${CMAKE_CURRENT_SOURCE_DIR}/cmd_parse_resource_barrier.inl
)
endif()

View File

@@ -0,0 +1,22 @@
/*
* Copyright (C) 2025 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#include "shared/test/common/cmd_parse/gen_cmd_parse.h"
using namespace NEO;
using RESOURCE_BARRIER = GenStruct::RESOURCE_BARRIER;
template <>
RESOURCE_BARRIER *genCmdCast<RESOURCE_BARRIER *>(void *buffer) {
auto pCmd = reinterpret_cast<RESOURCE_BARRIER *>(buffer);
return (0x3 == pCmd->TheStructure.Common.DwordLength &&
0x3 == pCmd->TheStructure.Common.Opcode &&
0x5 == pCmd->TheStructure.Common.CommandType)
? pCmd
: nullptr;
}

View File

@@ -63,6 +63,9 @@ struct HardwareParse : NEO::NonCopyableAndNonMovableClass {
return true;
}
template <typename FamilyType>
bool isStallingBarrier(GenCmdList::iterator &iter);
template <typename FamilyType>
void findHardwareCommands();

View File

@@ -155,4 +155,22 @@ const typename FamilyType::RENDER_SURFACE_STATE *HardwareParse::getSurfaceState(
}
}
template <typename FamilyType>
bool HardwareParse::isStallingBarrier(GenCmdList::iterator &iter) {
PIPE_CONTROL *pipeControlCmd = genCmdCast<PIPE_CONTROL *>(*iter);
if (pipeControlCmd == nullptr) {
return false;
}
EXPECT_EQ(pipeControlCmd->getCommandStreamerStallEnable(), true);
EXPECT_EQ(pipeControlCmd->getDcFlushEnable(), false);
EXPECT_EQ(pipeControlCmd->getRenderTargetCacheFlushEnable(), false);
EXPECT_EQ(pipeControlCmd->getInstructionCacheInvalidateEnable(), false);
EXPECT_EQ(pipeControlCmd->getTextureCacheInvalidationEnable(), false);
EXPECT_EQ(pipeControlCmd->getPipeControlFlushEnable(), false);
EXPECT_EQ(pipeControlCmd->getVfCacheInvalidationEnable(), false);
EXPECT_EQ(pipeControlCmd->getConstantCacheInvalidationEnable(), false);
EXPECT_EQ(pipeControlCmd->getStateCacheInvalidationEnable(), false);
return true;
}
} // namespace NEO

View File

@@ -23,4 +23,13 @@ bool HardwareParse::requiresPipelineSelectBeforeMediaState<GenGfxFamily>() {
return false;
}
template <>
bool HardwareParse::isStallingBarrier<GenGfxFamily>(GenCmdList::iterator &iter) {
GenGfxFamily::RESOURCE_BARRIER *resourceBarrierCmd = genCmdCast<GenGfxFamily::RESOURCE_BARRIER *>(*iter);
EXPECT_EQ(resourceBarrierCmd->getBarrierType(), RESOURCE_BARRIER::BARRIER_TYPE::BARRIER_TYPE_IMMEDIATE);
EXPECT_EQ(resourceBarrierCmd->getWaitStage(), RESOURCE_BARRIER::WAIT_STAGE::WAIT_STAGE_TOP);
EXPECT_EQ(resourceBarrierCmd->getSignalStage(), RESOURCE_BARRIER::SIGNAL_STAGE::SIGNAL_STAGE_GPGPU);
return resourceBarrierCmd != nullptr;
}
} // namespace NEO

View File

@@ -132,4 +132,6 @@ template void HardwareParse::findHardwareCommands<Gen12LpFamily>();
template void HardwareParse::findHardwareCommands<Gen12LpFamily>(IndirectHeap *);
template const void *HardwareParse::getStatelessArgumentPointer<Gen12LpFamily>(const KernelInfo &kernelInfo, uint32_t indexArg, IndirectHeap &ioh, uint32_t rootDeviceIndex);
template const typename Gen12LpFamily::RENDER_SURFACE_STATE *HardwareParse::getSurfaceState<Gen12LpFamily>(IndirectHeap *ssh, uint32_t index);
template bool HardwareParse::isStallingBarrier<Gen12LpFamily>(GenCmdList::iterator &iter);
} // namespace NEO

View File

@@ -11,6 +11,7 @@ using GenGfxFamily = NEO::Xe2HpgCoreFamily;
#include "shared/test/common/cmd_parse/cmd_parse_mem_fence.inl"
#include "shared/test/common/cmd_parse/cmd_parse_mem_set.inl"
#include "shared/test/common/cmd_parse/cmd_parse_resource_barrier.inl"
#include "shared/test/common/cmd_parse/cmd_parse_state_context_data_base_address.inl"
#include "shared/test/common/cmd_parse/cmd_parse_state_prefetch.inl"
#include "shared/test/common/cmd_parse/cmd_parse_system_mem_fence_address.inl"
@@ -22,6 +23,7 @@ using STATE_SYSTEM_MEM_FENCE_ADDRESS = GenStruct::STATE_SYSTEM_MEM_FENCE_ADDRESS
using STATE_PREFETCH = GenStruct::STATE_PREFETCH;
using MEM_SET = GenStruct::MEM_SET;
using STATE_CONTEXT_DATA_BASE_ADDRESS = GenStruct::STATE_CONTEXT_DATA_BASE_ADDRESS;
using RESOURCE_BARRIER = GenStruct::RESOURCE_BARRIER;
template <>
size_t CmdParse<GenGfxFamily>::getAdditionalCommandLength(void *cmd) {
@@ -50,6 +52,11 @@ size_t CmdParse<GenGfxFamily>::getAdditionalCommandLength(void *cmd) {
return sizeof(STATE_CONTEXT_DATA_BASE_ADDRESS) / sizeof(uint32_t);
}
}
{
if (genCmdCast<RESOURCE_BARRIER *>(cmd)) {
return sizeof(RESOURCE_BARRIER) / sizeof(uint32_t);
}
}
return 0;
}

View File

@@ -12,6 +12,7 @@ using GenGfxFamily = NEO::Xe3CoreFamily;
#include "shared/test/common/cmd_parse/cmd_parse_mem_fence.inl"
#include "shared/test/common/cmd_parse/cmd_parse_mem_set.inl"
#include "shared/test/common/cmd_parse/cmd_parse_resource_barrier.inl"
#include "shared/test/common/cmd_parse/cmd_parse_state_context_data_base_address.inl"
#include "shared/test/common/cmd_parse/cmd_parse_state_prefetch.inl"
#include "shared/test/common/cmd_parse/cmd_parse_system_mem_fence_address.inl"
@@ -23,6 +24,7 @@ using STATE_SYSTEM_MEM_FENCE_ADDRESS = GenStruct::STATE_SYSTEM_MEM_FENCE_ADDRESS
using STATE_PREFETCH = GenStruct::STATE_PREFETCH;
using MEM_SET = GenStruct::MEM_SET;
using STATE_CONTEXT_DATA_BASE_ADDRESS = GenStruct::STATE_CONTEXT_DATA_BASE_ADDRESS;
using RESOURCE_BARRIER = GenStruct::RESOURCE_BARRIER;
template <>
size_t CmdParse<GenGfxFamily>::getAdditionalCommandLength(void *cmd) {
@@ -51,6 +53,11 @@ size_t CmdParse<GenGfxFamily>::getAdditionalCommandLength(void *cmd) {
return sizeof(STATE_CONTEXT_DATA_BASE_ADDRESS) / sizeof(uint32_t);
}
}
{
if (genCmdCast<RESOURCE_BARRIER *>(cmd)) {
return sizeof(RESOURCE_BARRIER) / sizeof(uint32_t);
}
}
return 0;
}

View File

@@ -71,3 +71,4 @@ const char *CmdParse<GenGfxFamily>::getAdditionalCommandName(void *cmd) {
#include "shared/test/common/cmd_parse/hw_parse_xe_hpg_and_later.inl"
template const typename GenGfxFamily::RENDER_SURFACE_STATE *NEO::HardwareParse::getSurfaceState<GenGfxFamily>(IndirectHeap *ssh, uint32_t index);
template bool NEO::HardwareParse::isStallingBarrier<GenGfxFamily>(GenCmdList::iterator &iter);

View File

@@ -57,3 +57,4 @@ const char *CmdParse<GenGfxFamily>::getAdditionalCommandName(void *cmd) {
#include "shared/test/common/cmd_parse/hw_parse_xe_hpg_and_later.inl"
template const typename GenGfxFamily::RENDER_SURFACE_STATE *NEO::HardwareParse::getSurfaceState<GenGfxFamily>(IndirectHeap *ssh, uint32_t index);
template bool NEO::HardwareParse::isStallingBarrier<GenGfxFamily>(GenCmdList::iterator &iter);

View File

@@ -70,7 +70,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, ComputeModeRequirements, givenCoherencyWithSharedHa
overrideComputeModeRequest<FamilyType>(false, true, true);
EXPECT_FALSE(getCsrHw<FamilyType>()->streamProperties.stateComputeMode.isDirty());
auto cmdsSize = sizeof(STATE_COMPUTE_MODE) + sizeof(PIPE_CONTROL);
auto cmdsSize = sizeof(STATE_COMPUTE_MODE) + MemorySynchronizationCommands<FamilyType>::getSizeForStallingBarrier();
if (isBasicWARequired) {
cmdsSize += +sizeof(PIPE_CONTROL);
}

View File

@@ -1927,6 +1927,24 @@ HWTEST2_F(GfxCoreHelperTest, whenIsCacheFlushPriorImageReadRequiredCalledThenFal
EXPECT_FALSE(helper.isCacheFlushPriorImageReadRequired());
}
HWTEST2_F(GfxCoreHelperTest, givenAtLeastXe2HpgWhenSetStallOnlyBarrierThenPipeControlProgrammed, IsAtMostXeCore) {
using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL;
constexpr static auto bufferSize = sizeof(PIPE_CONTROL);
char streamBuffer[bufferSize];
LinearStream stream(streamBuffer, bufferSize);
PipeControlArgs args;
args.csStallOnly = true;
MemorySynchronizationCommands<FamilyType>::addSingleBarrier(stream, PostSyncMode::noWrite, 0u, 0u, args);
HardwareParse hwParser;
hwParser.parseCommands<FamilyType>(stream, 0);
GenCmdList pipeControlList = hwParser.getCommandsList<PIPE_CONTROL>();
EXPECT_EQ(1u, pipeControlList.size());
GenCmdList::iterator itor = pipeControlList.begin();
EXPECT_TRUE(hwParser.isStallingBarrier<FamilyType>(itor));
}
TEST_F(GfxCoreHelperTest, whenGetQueuePriorityLevelsQueriedThen2IsReturned) {
auto &gfxCoreHelper = getHelper<GfxCoreHelper>();
EXPECT_EQ(2u, gfxCoreHelper.getQueuePriorityLevels());

View File

@@ -111,4 +111,4 @@ HWTEST2_F(GfxCoreHelperXe2AndLaterTests, givenAtLeastXe2HpgWhenIsCacheFlushPrior
MockExecutionEnvironment mockExecutionEnvironment{};
auto &gfxCoreHelper = mockExecutionEnvironment.rootDeviceEnvironments[0]->getHelper<GfxCoreHelper>();
EXPECT_TRUE(gfxCoreHelper.isCacheFlushPriorImageReadRequired());
}
}

View File

@@ -9,6 +9,7 @@
#include "shared/source/helpers/compiler_product_helper.h"
#include "shared/source/helpers/engine_node_helper.h"
#include "shared/source/helpers/gfx_core_helper.h"
#include "shared/source/helpers/pipe_control_args.h"
#include "shared/source/memory_manager/allocation_properties.h"
#include "shared/source/os_interface/product_helper.h"
#include "shared/source/program/kernel_info.h"
@@ -870,3 +871,21 @@ XE2_HPG_CORETEST_F(GfxCoreHelperTestsXe2HpgCoreWithEnginesCheck, whenGetEnginesC
EXPECT_EQ(0u, getEngineCount(aub_stream::ENGINE_CCS, EngineUsage::regular));
EXPECT_EQ(1u, getEngineCount(aub_stream::ENGINE_CCCS, EngineUsage::regular));
}
XE2_HPG_CORETEST_F(GfxCoreHelperTestsXe2HpgCore, givenXe2HpgWhenSetStallOnlyBarrierThenResourceBarrierProgrammed) {
using RESOURCE_BARRIER = typename FamilyType::RESOURCE_BARRIER;
constexpr static auto bufferSize = sizeof(RESOURCE_BARRIER);
char streamBuffer[bufferSize];
LinearStream stream(streamBuffer, bufferSize);
PipeControlArgs args;
args.csStallOnly = true;
MemorySynchronizationCommands<FamilyType>::addSingleBarrier(stream, PostSyncMode::noWrite, 0u, 0u, args);
HardwareParse hwParser;
hwParser.parseCommands<FamilyType>(stream, 0);
GenCmdList resourceBarrierList = hwParser.getCommandsList<RESOURCE_BARRIER>();
EXPECT_EQ(1u, resourceBarrierList.size());
GenCmdList::iterator itor = resourceBarrierList.begin();
EXPECT_TRUE(hwParser.isStallingBarrier<FamilyType>(itor));
}

View File

@@ -9,6 +9,7 @@
#include "shared/source/helpers/compiler_product_helper.h"
#include "shared/source/helpers/engine_node_helper.h"
#include "shared/source/helpers/gfx_core_helper.h"
#include "shared/source/helpers/pipe_control_args.h"
#include "shared/source/helpers/simd_helper.h"
#include "shared/source/memory_manager/allocation_properties.h"
#include "shared/test/common/cmd_parse/hw_parse.h"
@@ -873,3 +874,21 @@ XE3_CORETEST_F(GfxCoreHelperTestsXe3CoreWithEnginesCheck, whenGetEnginesCalledTh
EXPECT_EQ(0u, getEngineCount(aub_stream::ENGINE_CCS, EngineUsage::regular));
EXPECT_EQ(1u, getEngineCount(aub_stream::ENGINE_CCCS, EngineUsage::regular));
}
XE3_CORETEST_F(GfxCoreHelperTestsXe3Core, givenXe3WhenSetStallOnlyBarrierThenResourceBarrierProgrammed) {
using RESOURCE_BARRIER = typename FamilyType::RESOURCE_BARRIER;
constexpr static auto bufferSize = sizeof(RESOURCE_BARRIER);
char streamBuffer[bufferSize];
LinearStream stream(streamBuffer, bufferSize);
PipeControlArgs args;
args.csStallOnly = true;
MemorySynchronizationCommands<FamilyType>::addSingleBarrier(stream, PostSyncMode::noWrite, 0u, 0u, args);
HardwareParse hwParser;
hwParser.parseCommands<FamilyType>(stream, 0);
GenCmdList resourceBarrierList = hwParser.getCommandsList<RESOURCE_BARRIER>();
EXPECT_EQ(1u, resourceBarrierList.size());
GenCmdList::iterator itor = resourceBarrierList.begin();
EXPECT_TRUE(hwParser.isStallingBarrier<FamilyType>(itor));
}