Revert "performance: use RESOURCE_BARRIER as stalling barrier"

This reverts commit 556c0b64c6.

Signed-off-by: Compute-Runtime-Validation <compute-runtime-validation@intel.com>
This commit is contained in:
Compute-Runtime-Validation
2025-05-30 13:39:42 +02:00
committed by Compute-Runtime-Automation
parent 13a5e90124
commit 1a50e8e7c0
32 changed files with 80 additions and 215 deletions

View File

@@ -379,7 +379,6 @@ HWTEST_F(CommandListAppendSignalEvent, givenInOrderImmediateCmdListWhenAppending
using MI_STORE_DATA_IMM = typename FamilyType::MI_STORE_DATA_IMM;
using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT;
using MI_BATCH_BUFFER_START = typename FamilyType::MI_BATCH_BUFFER_START;
using StallingBarrierType = typename FamilyType::StallingBarrierType;
ze_event_pool_desc_t eventPoolDesc = {};
eventPoolDesc.count = 1;
@@ -447,7 +446,7 @@ HWTEST_F(CommandListAppendSignalEvent, givenInOrderImmediateCmdListWhenAppending
GenCmdList::iterator itorResolveCmd = itorBbStart;
if (NEO::MemorySynchronizationCommands<FamilyType>::getDcFlushEnable(true, neoDevice->getRootDeviceEnvironment())) {
itorResolveCmd = find<StallingBarrierType *>(cmdList.begin(), itorBbStart);
itorResolveCmd = find<PIPE_CONTROL *>(cmdList.begin(), itorBbStart);
} else {
itorResolveCmd = find<MI_SEMAPHORE_WAIT *>(cmdList.begin(), itorBbStart);
}

View File

@@ -993,7 +993,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, InOrderCmdListTests, givenResolveDependenciesViaPip
ptrOffset(cmdStream->getCpuBase(), offset),
cmdStream->getUsed() - offset));
auto itor = find<typename FamilyType::StallingBarrierType *>(cmdList.begin(), cmdList.end());
auto itor = find<typename FamilyType::PIPE_CONTROL *>(cmdList.begin(), cmdList.end());
ASSERT_NE(cmdList.end(), itor);
completeHostAddress<FamilyType::gfxCoreFamily, WhiteBox<L0::CommandListCoreFamilyImmediate<FamilyType::gfxCoreFamily>>>(immCmdList.get());
@@ -1024,7 +1024,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, InOrderCmdListTests, givenOptimizedCbEventWhenSubmi
cmdStream->getUsed() - offset));
if (immCmdList->dcFlushSupport || !immCmdList->isHeaplessModeEnabled()) {
auto itor = find<typename FamilyType::StallingBarrierType *>(cmdList.begin(), cmdList.end());
auto itor = find<typename FamilyType::PIPE_CONTROL *>(cmdList.begin(), cmdList.end());
ASSERT_NE(cmdList.end(), itor);
} else {
auto itor = find<typename FamilyType::MI_SEMAPHORE_WAIT *>(cmdList.begin(), cmdList.end());
@@ -1059,7 +1059,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, InOrderCmdListTests, givenInOrderCmdListWhenSubmitt
cmdStream->getUsed() - offset));
if (immCmdList->dcFlushSupport) {
auto itor = find<typename FamilyType::StallingBarrierType *>(cmdList.begin(), cmdList.end());
auto itor = find<typename FamilyType::PIPE_CONTROL *>(cmdList.begin(), cmdList.end());
ASSERT_NE(cmdList.end(), itor);
} else {
auto itor = find<typename FamilyType::MI_SEMAPHORE_WAIT *>(cmdList.begin(), cmdList.end());

View File

@@ -99,7 +99,7 @@ size_t EnqueueOperation<GfxFamily>::getTotalSizeRequiredCS(uint32_t eventType, c
expectedSizeCS += TimestampPacketHelper::getRequiredCmdStreamSize<GfxFamily>(csrDeps, relaxedOrderingEnabled);
expectedSizeCS += EnqueueOperation<GfxFamily>::getSizeRequiredForTimestampPacketWrite();
if (resolveDependenciesByPipecontrol) {
expectedSizeCS += MemorySynchronizationCommands<GfxFamily>::getSizeForStallingBarrier();
expectedSizeCS += MemorySynchronizationCommands<GfxFamily>::getSizeForSingleBarrier();
}
if (isMarkerWithProfiling) {
if (!eventsInWaitlist) {

View File

@@ -586,7 +586,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, XeHPAndLaterDispatchWalkerBasicTest, givenTimestamp
size_t additionalSize = 0u;
if (isResolveDependenciesByPipeControlsEnabled) {
additionalSize = MemorySynchronizationCommands<FamilyType>::getSizeForStallingBarrier();
additionalSize = MemorySynchronizationCommands<FamilyType>::getSizeForSingleBarrier();
}
EXPECT_EQ(sizeWithEnabled, sizeWithDisabled + additionalSize);

View File

@@ -2179,7 +2179,7 @@ HWTEST_F(PauseOnGpuTests, givenGpuScratchWriteEnabledWhenEstimatingCommandStream
EXPECT_EQ(baseCommandStreamSize + sizeof(typename FamilyType::MI_LOAD_REGISTER_IMM), extendedCommandStreamSize);
}
HWTEST_F(PauseOnGpuTests, givenResolveDependenciesByPipecontrolWhenEstimatingCommandStreamSizeThenStallingBarrierSizeIsIncluded) {
HWTEST_F(PauseOnGpuTests, givenResolveDependenciesByPipecontrolWhenEstimatingCommandStreamSizeThenPipeControlSizeIsIncluded) {
MockKernelWithInternals mockKernel(*pClDevice);
DispatchInfo dispatchInfo;
MultiDispatchInfo multiDispatchInfo(mockKernel.mockKernel);
@@ -2192,7 +2192,7 @@ HWTEST_F(PauseOnGpuTests, givenResolveDependenciesByPipecontrolWhenEstimatingCom
auto extendedCommandStreamSize = EnqueueOperation<FamilyType>::getTotalSizeRequiredCS(CL_COMMAND_NDRANGE_KERNEL, {}, false, false, false, *pCmdQ, multiDispatchInfo, false, false, true, nullptr);
EXPECT_EQ(baseCommandStreamSize + MemorySynchronizationCommands<FamilyType>::getSizeForStallingBarrier(), extendedCommandStreamSize);
EXPECT_EQ(baseCommandStreamSize + MemorySynchronizationCommands<FamilyType>::getSizeForSingleBarrier(), extendedCommandStreamSize);
}
HWTEST_F(PauseOnGpuTests, givenTimestampPacketWriteDisabledAndMarkerWithProfilingWhenEstimatingCommandStreamSizeThenStoreMMIOSizeIsIncluded) {

View File

@@ -393,28 +393,38 @@ HWCMDTEST_F(IGFX_GEN12LP_CORE, CommandStreamReceiverFlushTaskTests, WhenForcePip
GenCmdList::iterator itor = cmdList.begin();
int counterPC = 0;
while (itor != cmdList.end()) {
if (counterPC == 0 && isStallingBarrier<FamilyType>(itor)) {
counterPC++;
itor++;
continue;
}
auto pipeControl = genCmdCast<typename FamilyType::PIPE_CONTROL *>(*itor);
if (pipeControl != nullptr) {
// Second pipe control with all flushes
EXPECT_EQ(1, counterPC);
EXPECT_EQ(bool(pipeControl->getCommandStreamerStallEnable()), true);
EXPECT_EQ(bool(pipeControl->getDcFlushEnable()), true);
EXPECT_EQ(bool(pipeControl->getRenderTargetCacheFlushEnable()), true);
EXPECT_EQ(bool(pipeControl->getInstructionCacheInvalidateEnable()), true);
EXPECT_EQ(bool(pipeControl->getTextureCacheInvalidationEnable()), true);
EXPECT_EQ(bool(pipeControl->getPipeControlFlushEnable()), true);
EXPECT_EQ(bool(pipeControl->getVfCacheInvalidationEnable()), true);
EXPECT_EQ(bool(pipeControl->getConstantCacheInvalidationEnable()), true);
EXPECT_EQ(bool(pipeControl->getStateCacheInvalidationEnable()), true);
EXPECT_EQ(bool(pipeControl->getTlbInvalidate()), true);
if (pipeControl) {
switch (counterPC) {
case 0: // First pipe control with CS Stall
EXPECT_EQ(bool(pipeControl->getCommandStreamerStallEnable()), true);
EXPECT_EQ(bool(pipeControl->getDcFlushEnable()), false);
EXPECT_EQ(bool(pipeControl->getRenderTargetCacheFlushEnable()), false);
EXPECT_EQ(bool(pipeControl->getInstructionCacheInvalidateEnable()), false);
EXPECT_EQ(bool(pipeControl->getTextureCacheInvalidationEnable()), false);
EXPECT_EQ(bool(pipeControl->getPipeControlFlushEnable()), false);
EXPECT_EQ(bool(pipeControl->getVfCacheInvalidationEnable()), false);
EXPECT_EQ(bool(pipeControl->getConstantCacheInvalidationEnable()), false);
EXPECT_EQ(bool(pipeControl->getStateCacheInvalidationEnable()), false);
EXPECT_EQ(bool(pipeControl->getTlbInvalidate()), false);
break;
case 1: // Second pipe control with all flushes
EXPECT_EQ(bool(pipeControl->getCommandStreamerStallEnable()), true);
EXPECT_EQ(bool(pipeControl->getDcFlushEnable()), true);
EXPECT_EQ(bool(pipeControl->getRenderTargetCacheFlushEnable()), true);
EXPECT_EQ(bool(pipeControl->getInstructionCacheInvalidateEnable()), true);
EXPECT_EQ(bool(pipeControl->getTextureCacheInvalidationEnable()), true);
EXPECT_EQ(bool(pipeControl->getPipeControlFlushEnable()), true);
EXPECT_EQ(bool(pipeControl->getVfCacheInvalidationEnable()), true);
EXPECT_EQ(bool(pipeControl->getConstantCacheInvalidationEnable()), true);
EXPECT_EQ(bool(pipeControl->getStateCacheInvalidationEnable()), true);
EXPECT_EQ(bool(pipeControl->getTlbInvalidate()), true);
default:
break;
}
counterPC++;
break;
}
++itor;
}

View File

@@ -372,28 +372,36 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, CommandStreamReceiverFlushTaskXeHPAndLaterTests, Wh
GenCmdList::iterator itor = cmdList.begin();
int counterPC = 0;
while (itor != cmdList.end()) {
if (counterPC == 0 && isStallingBarrier<FamilyType>(itor)) {
counterPC++;
itor++;
continue;
}
auto pipeControl = genCmdCast<typename FamilyType::PIPE_CONTROL *>(*itor);
if (pipeControl != nullptr) {
// Second pipe control with all flushes
EXPECT_EQ(1, counterPC);
EXPECT_EQ(bool(pipeControl->getCommandStreamerStallEnable()), true);
EXPECT_EQ(bool(pipeControl->getDcFlushEnable()), true);
EXPECT_EQ(bool(pipeControl->getRenderTargetCacheFlushEnable()), true);
EXPECT_EQ(bool(pipeControl->getInstructionCacheInvalidateEnable()), true);
EXPECT_EQ(bool(pipeControl->getTextureCacheInvalidationEnable()), true);
EXPECT_EQ(bool(pipeControl->getPipeControlFlushEnable()), true);
EXPECT_EQ(bool(pipeControl->getVfCacheInvalidationEnable()), true);
EXPECT_EQ(bool(pipeControl->getConstantCacheInvalidationEnable()), true);
EXPECT_EQ(bool(pipeControl->getStateCacheInvalidationEnable()), true);
EXPECT_EQ(bool(pipeControl->getTlbInvalidate()), true);
if (pipeControl) {
switch (counterPC) {
case 0: // First pipe control with CS Stall
EXPECT_EQ(bool(pipeControl->getCommandStreamerStallEnable()), true);
EXPECT_EQ(bool(pipeControl->getDcFlushEnable()), false);
EXPECT_EQ(bool(pipeControl->getRenderTargetCacheFlushEnable()), false);
EXPECT_EQ(bool(pipeControl->getInstructionCacheInvalidateEnable()), false);
EXPECT_EQ(bool(pipeControl->getTextureCacheInvalidationEnable()), false);
EXPECT_EQ(bool(pipeControl->getPipeControlFlushEnable()), false);
EXPECT_EQ(bool(pipeControl->getVfCacheInvalidationEnable()), false);
EXPECT_EQ(bool(pipeControl->getConstantCacheInvalidationEnable()), false);
EXPECT_EQ(bool(pipeControl->getStateCacheInvalidationEnable()), false);
break;
case 1: // Second pipe control with all flushes
EXPECT_EQ(bool(pipeControl->getCommandStreamerStallEnable()), true);
EXPECT_EQ(bool(pipeControl->getDcFlushEnable()), true);
EXPECT_EQ(bool(pipeControl->getRenderTargetCacheFlushEnable()), true);
EXPECT_EQ(bool(pipeControl->getInstructionCacheInvalidateEnable()), true);
EXPECT_EQ(bool(pipeControl->getTextureCacheInvalidationEnable()), true);
EXPECT_EQ(bool(pipeControl->getPipeControlFlushEnable()), true);
EXPECT_EQ(bool(pipeControl->getVfCacheInvalidationEnable()), true);
EXPECT_EQ(bool(pipeControl->getConstantCacheInvalidationEnable()), true);
EXPECT_EQ(bool(pipeControl->getStateCacheInvalidationEnable()), true);
default:
break;
}
counterPC++;
break;
}
++itor;
}

View File

@@ -227,7 +227,7 @@ HWTEST_F(TimestampPacketTests, givenTimestampPacketWriteEnabledWhenEstimatingStr
size_t sizeForPipeControl = 0;
if (isResolveDependenciesByPipeControlsEnabled) {
sizeForPipeControl = MemorySynchronizationCommands<FamilyType>::getSizeForStallingBarrier();
sizeForPipeControl = MemorySynchronizationCommands<FamilyType>::getSizeForSingleBarrier();
}
size_t extendedSize = sizeWithDisabled + EnqueueOperation<FamilyType>::getSizeRequiredForTimestampPacketWrite() + sizeForNodeDependency + sizeForPipeControl;
@@ -1656,7 +1656,6 @@ HWTEST_F(TimestampPacketTests, givenTimestampPacketWriteEnabledAndDependenciesRe
using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT;
using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL;
using MI_BATCH_BUFFER_END = typename FamilyType::MI_BATCH_BUFFER_END;
using StallingBarrierType = typename FamilyType::StallingBarrierType;
device->getUltCommandStreamReceiver<FamilyType>().timestampPacketWriteEnabled = true;
@@ -1688,11 +1687,9 @@ HWTEST_F(TimestampPacketTests, givenTimestampPacketWriteEnabledAndDependenciesRe
size_t pipeControlCountSecondEnqueue = 0u;
size_t semaphoreWaitCount = 0u;
size_t currentEnqueue = 1u;
bool stallingBarrierProgrammed = false;
while (it != hwParser.cmdList.end()) {
MI_SEMAPHORE_WAIT *semaphoreWaitCmd = genCmdCast<MI_SEMAPHORE_WAIT *>(*it);
PIPE_CONTROL *pipeControlCmd = genCmdCast<PIPE_CONTROL *>(*it);
StallingBarrierType *stallingBarrierCmd = genCmdCast<StallingBarrierType *>(*it);
MI_BATCH_BUFFER_END *miBatchBufferEnd = genCmdCast<MI_BATCH_BUFFER_END *>(*it);
if (pipeControlCmd != nullptr) {
if (currentEnqueue == 1) {
@@ -1700,9 +1697,6 @@ HWTEST_F(TimestampPacketTests, givenTimestampPacketWriteEnabledAndDependenciesRe
} else if (currentEnqueue == 2) {
++pipeControlCountSecondEnqueue;
}
} else if (stallingBarrierCmd != nullptr) {
EXPECT_EQ(2u, currentEnqueue);
stallingBarrierProgrammed = true;
} else if (semaphoreWaitCmd != nullptr) {
++semaphoreWaitCount;
} else if (miBatchBufferEnd != nullptr) {
@@ -1713,8 +1707,7 @@ HWTEST_F(TimestampPacketTests, givenTimestampPacketWriteEnabledAndDependenciesRe
++it;
}
EXPECT_EQ(semaphoreWaitCount, 0u);
auto stallingBarrierAsPC = stallingBarrierProgrammed ? 0 : 1;
EXPECT_EQ(pipeControlCountSecondEnqueue, pipeControlCountFirstEnqueue + stallingBarrierAsPC);
EXPECT_EQ(pipeControlCountSecondEnqueue, pipeControlCountFirstEnqueue + 1);
}
HWTEST2_F(TimestampPacketTests, givenTimestampPacketWriteEnabledAndDependenciesResolvedViaPipeControlsAndSingleIOQWhenEnqueueKernelThenDoNotProgramSemaphoresButProgramPipeControlWithProperFlagsBeforeGpgpuWalker, IsXeHpgCore) {

View File

@@ -246,6 +246,7 @@ HWTEST_F(TimestampPacketTests, whenEnqueueingBarrierThenDontRequestPipeControlOn
HWTEST_F(TimestampPacketTests, givenWaitlistWhenEnqueueingBarrierThenProgramNonStallingBarrier) {
using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT;
using MI_STORE_DATA_IMM = typename FamilyType::MI_STORE_DATA_IMM;
using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL;
auto &csr = device->getUltCommandStreamReceiver<FamilyType>();
csr.timestampPacketWriteEnabled = true;
@@ -272,7 +273,7 @@ HWTEST_F(TimestampPacketTests, givenWaitlistWhenEnqueueingBarrierThenProgramNonS
auto it = hwParser.cmdList.begin();
if (device->getProductHelper().isResolveDependenciesByPipeControlsSupported(device->getHardwareInfo(), false, cmdQ.taskCount, cmdQ.getGpgpuCommandStreamReceiver())) {
EXPECT_TRUE(hwParser.isStallingBarrier<FamilyType>(it));
EXPECT_NE(nullptr, genCmdCast<PIPE_CONTROL *>(*it));
} else {
EXPECT_NE(nullptr, genCmdCast<MI_SEMAPHORE_WAIT *>(*it));
EXPECT_NE(nullptr, genCmdCast<MI_SEMAPHORE_WAIT *>(*(++it)));

View File

@@ -26,7 +26,7 @@ size_t EncodeComputeMode<Family>::getCmdSizeForComputeMode(const RootDeviceEnvir
}
size += sizeof(typename Family::STATE_COMPUTE_MODE);
if (hasSharedHandles) {
size += MemorySynchronizationCommands<Family>::getSizeForStallingBarrier();
size += MemorySynchronizationCommands<Family>::getSizeForSingleBarrier();
}
if (productHelper.is3DPipelineSelectWARequired() && isRcs) {
size += (2 * PreambleHelper<Family>::getCmdSizeForPipelineSelect(rootDeviceEnvironment));

View File

@@ -813,8 +813,7 @@ size_t CommandStreamReceiverHw<GfxFamily>::getRequiredCmdStreamSize(const Dispat
}
if (debugManager.flags.ForcePipeControlPriorToWalker.get()) {
size += MemorySynchronizationCommands<GfxFamily>::getSizeForStallingBarrier();
size += MemorySynchronizationCommands<GfxFamily>::getSizeForSingleBarrier();
size += 2 * MemorySynchronizationCommands<GfxFamily>::getSizeForSingleBarrier();
}
return size;

View File

@@ -102,7 +102,6 @@ struct Gen12LpFamily : public Gen12Lp {
using XY_COLOR_BLT = typename GfxFamily::XY_FAST_COLOR_BLT;
using MI_STORE_REGISTER_MEM_CMD = typename GfxFamily::MI_STORE_REGISTER_MEM;
using TimestampPacketType = uint32_t;
using StallingBarrierType = PIPE_CONTROL;
static const GPGPU_WALKER cmdInitGpgpuWalker;
static const INTERFACE_DESCRIPTOR_DATA cmdInitInterfaceDescriptorData;
static const MEDIA_INTERFACE_DESCRIPTOR_LOAD cmdInitMediaInterfaceDescriptorLoad;

View File

@@ -493,7 +493,6 @@ struct MemorySynchronizationCommands {
static void setSingleBarrier(void *commandsBuffer, PostSyncMode postSyncMode, uint64_t gpuAddress, uint64_t immediateData, PipeControlArgs &args);
static void addSingleBarrier(LinearStream &commandStream, PipeControlArgs &args);
static void setSingleBarrier(void *commandsBuffer, PipeControlArgs &args);
static void setStallingBarrier(void *commandsBuffer, PipeControlArgs &args);
static void addBarrierWithPostSyncOperation(LinearStream &commandStream, PostSyncMode postSyncMode, uint64_t gpuAddress, uint64_t immediateData, const RootDeviceEnvironment &rootDeviceEnvironment, PipeControlArgs &args);
static void setBarrierWithPostSyncOperation(void *&commandsBuffer, PostSyncMode postSyncMode, uint64_t gpuAddress, uint64_t immediateData, const RootDeviceEnvironment &rootDeviceEnvironment, PipeControlArgs &args);
@@ -528,7 +527,6 @@ struct MemorySynchronizationCommands {
static size_t getSizeForSingleAdditionalSynchronization(NEO::FenceType fenceType, const RootDeviceEnvironment &rootDeviceEnvironment);
static size_t getSizeForAdditionalSynchronization(NEO::FenceType fenceType, const RootDeviceEnvironment &rootDeviceEnvironment);
static size_t getSizeForInstructionCacheFlush();
static size_t getSizeForStallingBarrier();
static bool isBarrierWaRequired(const RootDeviceEnvironment &rootDeviceEnvironment);
static bool isBarrierPriorToPipelineSelectWaRequired(const RootDeviceEnvironment &rootDeviceEnvironment);

View File

@@ -236,40 +236,25 @@ void MemorySynchronizationCommands<GfxFamily>::setSingleBarrier(void *commandsBu
template <typename GfxFamily>
void MemorySynchronizationCommands<GfxFamily>::addSingleBarrier(LinearStream &commandStream, PostSyncMode postSyncMode, uint64_t gpuAddress, uint64_t immediateData, PipeControlArgs &args) {
auto barrierSize = MemorySynchronizationCommands<GfxFamily>::getSizeForSingleBarrier();
if (args.csStallOnly) {
barrierSize = MemorySynchronizationCommands<GfxFamily>::getSizeForStallingBarrier();
}
auto barrier = commandStream.getSpace(barrierSize);
auto barrier = commandStream.getSpace(MemorySynchronizationCommands<GfxFamily>::getSizeForSingleBarrier());
setSingleBarrier(barrier, postSyncMode, gpuAddress, immediateData, args);
}
template <typename GfxFamily>
void MemorySynchronizationCommands<GfxFamily>::setStallingBarrier(void *commandsBuffer, PipeControlArgs &args) {
using PIPE_CONTROL = typename GfxFamily::PIPE_CONTROL;
PIPE_CONTROL pipeControl = GfxFamily::cmdInitPipeControl;
pipeControl.setCommandStreamerStallEnable(true);
setBarrierExtraProperties(&pipeControl, args);
*reinterpret_cast<PIPE_CONTROL *>(commandsBuffer) = pipeControl;
}
template <typename GfxFamily>
void MemorySynchronizationCommands<GfxFamily>::setSingleBarrier(void *commandsBuffer, PostSyncMode postSyncMode, uint64_t gpuAddress, uint64_t immediateData, PipeControlArgs &args) {
using PIPE_CONTROL = typename GfxFamily::PIPE_CONTROL;
if (args.csStallOnly) {
setStallingBarrier(commandsBuffer, args);
return;
}
PIPE_CONTROL pipeControl = GfxFamily::cmdInitPipeControl;
pipeControl.setCommandStreamerStallEnable(true);
setBarrierExtraProperties(&pipeControl, args);
if (args.csStallOnly) {
*reinterpret_cast<PIPE_CONTROL *>(commandsBuffer) = pipeControl;
return;
}
pipeControl.setConstantCacheInvalidationEnable(args.constantCacheInvalidationEnable);
pipeControl.setInstructionCacheInvalidateEnable(args.instructionCacheInvalidateEnable);
pipeControl.setPipeControlFlushEnable(args.pipeControlFlushEnable);
@@ -350,7 +335,7 @@ void MemorySynchronizationCommands<GfxFamily>::setBarrierWa(void *&commandsBuffe
additionalArgs.csStallOnly = true;
MemorySynchronizationCommands<GfxFamily>::setSingleBarrier(commandsBuffer, additionalArgs);
commandsBuffer = ptrOffset(commandsBuffer, getSizeForStallingBarrier());
commandsBuffer = ptrOffset(commandsBuffer, sizeof(PIPE_CONTROL));
}
}
@@ -399,7 +384,7 @@ size_t MemorySynchronizationCommands<GfxFamily>::getSizeForBarrierWa(const RootD
size = getSizeForSingleBarrier() +
getSizeForSingleAdditionalSynchronization(NEO::FenceType::release, rootDeviceEnvironment);
} else if (releaseHelper && postSyncMode == PostSyncMode::timestamp && releaseHelper->programmAdditionalStallPriorToBarrierWithTimestamp()) {
size = getSizeForStallingBarrier();
size = getSizeForSingleBarrier();
}
return size;
}
@@ -871,11 +856,6 @@ bool GfxCoreHelperHw<Family>::isCacheFlushPriorImageReadRequired() const {
return false;
}
template <typename GfxFamily>
size_t MemorySynchronizationCommands<GfxFamily>::getSizeForStallingBarrier() {
return sizeof(typename GfxFamily::StallingBarrierType);
}
template <typename Family>
uint32_t GfxCoreHelperHw<Family>::getQueuePriorityLevels() const {
return 2;

View File

@@ -97,15 +97,4 @@ bool GfxCoreHelperHw<Family>::isCacheFlushPriorImageReadRequired() const {
return true;
}
template <>
void MemorySynchronizationCommands<Family>::setStallingBarrier(void *commandsBuffer, PipeControlArgs &args) {
using RESOURCE_BARRIER = typename Family::RESOURCE_BARRIER;
auto resourceBarrier = Family::cmdInitResourceBarrier;
resourceBarrier.setBarrierType(RESOURCE_BARRIER::BARRIER_TYPE::BARRIER_TYPE_IMMEDIATE);
resourceBarrier.setWaitStage(RESOURCE_BARRIER::WAIT_STAGE::WAIT_STAGE_TOP);
resourceBarrier.setSignalStage(RESOURCE_BARRIER::SIGNAL_STAGE::SIGNAL_STAGE_GPGPU);
*reinterpret_cast<RESOURCE_BARRIER *>(commandsBuffer) = resourceBarrier;
}
} // namespace NEO

View File

@@ -109,7 +109,6 @@ struct Xe2HpgCoreFamily : public Xe2HpgCore {
using XY_COLOR_BLT = typename GfxFamily::XY_FAST_COLOR_BLT;
using MI_STORE_REGISTER_MEM_CMD = typename GfxFamily::MI_STORE_REGISTER_MEM;
using TimestampPacketType = uint64_t;
using StallingBarrierType = RESOURCE_BARRIER;
static const COMPUTE_WALKER cmdInitGpgpuWalker;
static const CFE_STATE cmdInitCfeState;
static const INTERFACE_DESCRIPTOR_DATA cmdInitInterfaceDescriptorData;

View File

@@ -112,7 +112,6 @@ struct Xe3CoreFamily : public Xe3Core {
using XY_COLOR_BLT = typename GfxFamily::XY_FAST_COLOR_BLT;
using MI_STORE_REGISTER_MEM_CMD = typename GfxFamily::MI_STORE_REGISTER_MEM;
using TimestampPacketType = uint64_t;
using StallingBarrierType = RESOURCE_BARRIER;
static const COMPUTE_WALKER cmdInitGpgpuWalker;
static const CFE_STATE cmdInitCfeState;
static const INTERFACE_DESCRIPTOR_DATA cmdInitInterfaceDescriptorData;

View File

@@ -91,7 +91,6 @@ struct XeHpcCoreFamily : public XeHpcCore {
using XY_COLOR_BLT = typename GfxFamily::XY_FAST_COLOR_BLT;
using MI_STORE_REGISTER_MEM_CMD = typename GfxFamily::MI_STORE_REGISTER_MEM;
using TimestampPacketType = uint32_t;
using StallingBarrierType = PIPE_CONTROL;
static const COMPUTE_WALKER cmdInitGpgpuWalker;
static const CFE_STATE cmdInitCfeState;
static const INTERFACE_DESCRIPTOR_DATA cmdInitInterfaceDescriptorData;

View File

@@ -111,7 +111,6 @@ struct XeHpgCoreFamily : public XeHpgCore {
using XY_COLOR_BLT = typename GfxFamily::XY_FAST_COLOR_BLT;
using MI_STORE_REGISTER_MEM_CMD = typename GfxFamily::MI_STORE_REGISTER_MEM;
using TimestampPacketType = uint32_t;
using StallingBarrierType = PIPE_CONTROL;
static const COMPUTE_WALKER cmdInitGpgpuWalker;
static const CFE_STATE cmdInitCfeState;
static const INTERFACE_DESCRIPTOR_DATA cmdInitInterfaceDescriptorData;

View File

@@ -40,6 +40,5 @@ if(TESTS_XE2_AND_LATER)
target_sources(neo_libult_common PRIVATE
${CMAKE_CURRENT_SOURCE_DIR}/cmd_parse_state_context_data_base_address.inl
${CMAKE_CURRENT_SOURCE_DIR}/hw_parse_xe2_hpg_and_later.inl
${CMAKE_CURRENT_SOURCE_DIR}/cmd_parse_resource_barrier.inl
)
endif()

View File

@@ -1,22 +0,0 @@
/*
* Copyright (C) 2025 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#include "shared/test/common/cmd_parse/gen_cmd_parse.h"
using namespace NEO;
using RESOURCE_BARRIER = GenStruct::RESOURCE_BARRIER;
template <>
RESOURCE_BARRIER *genCmdCast<RESOURCE_BARRIER *>(void *buffer) {
auto pCmd = reinterpret_cast<RESOURCE_BARRIER *>(buffer);
return (0x3 == pCmd->TheStructure.Common.DwordLength &&
0x3 == pCmd->TheStructure.Common.Opcode &&
0x5 == pCmd->TheStructure.Common.CommandType)
? pCmd
: nullptr;
}

View File

@@ -63,9 +63,6 @@ struct HardwareParse : NEO::NonCopyableAndNonMovableClass {
return true;
}
template <typename FamilyType>
bool isStallingBarrier(GenCmdList::iterator &iter);
template <typename FamilyType>
void findHardwareCommands();

View File

@@ -155,22 +155,4 @@ const typename FamilyType::RENDER_SURFACE_STATE *HardwareParse::getSurfaceState(
}
}
template <typename FamilyType>
bool HardwareParse::isStallingBarrier(GenCmdList::iterator &iter) {
PIPE_CONTROL *pipeControlCmd = genCmdCast<PIPE_CONTROL *>(*iter);
if (pipeControlCmd == nullptr) {
return false;
}
EXPECT_EQ(pipeControlCmd->getCommandStreamerStallEnable(), true);
EXPECT_EQ(pipeControlCmd->getDcFlushEnable(), false);
EXPECT_EQ(pipeControlCmd->getRenderTargetCacheFlushEnable(), false);
EXPECT_EQ(pipeControlCmd->getInstructionCacheInvalidateEnable(), false);
EXPECT_EQ(pipeControlCmd->getTextureCacheInvalidationEnable(), false);
EXPECT_EQ(pipeControlCmd->getPipeControlFlushEnable(), false);
EXPECT_EQ(pipeControlCmd->getVfCacheInvalidationEnable(), false);
EXPECT_EQ(pipeControlCmd->getConstantCacheInvalidationEnable(), false);
EXPECT_EQ(pipeControlCmd->getStateCacheInvalidationEnable(), false);
return true;
}
} // namespace NEO

View File

@@ -23,13 +23,4 @@ bool HardwareParse::requiresPipelineSelectBeforeMediaState<GenGfxFamily>() {
return false;
}
template <>
bool HardwareParse::isStallingBarrier<GenGfxFamily>(GenCmdList::iterator &iter) {
GenGfxFamily::RESOURCE_BARRIER *resourceBarrierCmd = genCmdCast<GenGfxFamily::RESOURCE_BARRIER *>(*iter);
EXPECT_EQ(resourceBarrierCmd->getBarrierType(), RESOURCE_BARRIER::BARRIER_TYPE::BARRIER_TYPE_IMMEDIATE);
EXPECT_EQ(resourceBarrierCmd->getWaitStage(), RESOURCE_BARRIER::WAIT_STAGE::WAIT_STAGE_TOP);
EXPECT_EQ(resourceBarrierCmd->getSignalStage(), RESOURCE_BARRIER::SIGNAL_STAGE::SIGNAL_STAGE_GPGPU);
return resourceBarrierCmd != nullptr;
}
} // namespace NEO

View File

@@ -132,6 +132,4 @@ template void HardwareParse::findHardwareCommands<Gen12LpFamily>();
template void HardwareParse::findHardwareCommands<Gen12LpFamily>(IndirectHeap *);
template const void *HardwareParse::getStatelessArgumentPointer<Gen12LpFamily>(const KernelInfo &kernelInfo, uint32_t indexArg, IndirectHeap &ioh, uint32_t rootDeviceIndex);
template const typename Gen12LpFamily::RENDER_SURFACE_STATE *HardwareParse::getSurfaceState<Gen12LpFamily>(IndirectHeap *ssh, uint32_t index);
template bool HardwareParse::isStallingBarrier<Gen12LpFamily>(GenCmdList::iterator &iter);
} // namespace NEO

View File

@@ -11,7 +11,6 @@ using GenGfxFamily = NEO::Xe2HpgCoreFamily;
#include "shared/test/common/cmd_parse/cmd_parse_mem_fence.inl"
#include "shared/test/common/cmd_parse/cmd_parse_mem_set.inl"
#include "shared/test/common/cmd_parse/cmd_parse_resource_barrier.inl"
#include "shared/test/common/cmd_parse/cmd_parse_state_context_data_base_address.inl"
#include "shared/test/common/cmd_parse/cmd_parse_state_prefetch.inl"
#include "shared/test/common/cmd_parse/cmd_parse_system_mem_fence_address.inl"
@@ -23,7 +22,6 @@ using STATE_SYSTEM_MEM_FENCE_ADDRESS = GenStruct::STATE_SYSTEM_MEM_FENCE_ADDRESS
using STATE_PREFETCH = GenStruct::STATE_PREFETCH;
using MEM_SET = GenStruct::MEM_SET;
using STATE_CONTEXT_DATA_BASE_ADDRESS = GenStruct::STATE_CONTEXT_DATA_BASE_ADDRESS;
using RESOURCE_BARRIER = GenStruct::RESOURCE_BARRIER;
template <>
size_t CmdParse<GenGfxFamily>::getAdditionalCommandLength(void *cmd) {
@@ -52,11 +50,6 @@ size_t CmdParse<GenGfxFamily>::getAdditionalCommandLength(void *cmd) {
return sizeof(STATE_CONTEXT_DATA_BASE_ADDRESS) / sizeof(uint32_t);
}
}
{
if (genCmdCast<RESOURCE_BARRIER *>(cmd)) {
return sizeof(RESOURCE_BARRIER) / sizeof(uint32_t);
}
}
return 0;
}

View File

@@ -12,7 +12,6 @@ using GenGfxFamily = NEO::Xe3CoreFamily;
#include "shared/test/common/cmd_parse/cmd_parse_mem_fence.inl"
#include "shared/test/common/cmd_parse/cmd_parse_mem_set.inl"
#include "shared/test/common/cmd_parse/cmd_parse_resource_barrier.inl"
#include "shared/test/common/cmd_parse/cmd_parse_state_context_data_base_address.inl"
#include "shared/test/common/cmd_parse/cmd_parse_state_prefetch.inl"
#include "shared/test/common/cmd_parse/cmd_parse_system_mem_fence_address.inl"
@@ -24,7 +23,6 @@ using STATE_SYSTEM_MEM_FENCE_ADDRESS = GenStruct::STATE_SYSTEM_MEM_FENCE_ADDRESS
using STATE_PREFETCH = GenStruct::STATE_PREFETCH;
using MEM_SET = GenStruct::MEM_SET;
using STATE_CONTEXT_DATA_BASE_ADDRESS = GenStruct::STATE_CONTEXT_DATA_BASE_ADDRESS;
using RESOURCE_BARRIER = GenStruct::RESOURCE_BARRIER;
template <>
size_t CmdParse<GenGfxFamily>::getAdditionalCommandLength(void *cmd) {
@@ -53,11 +51,6 @@ size_t CmdParse<GenGfxFamily>::getAdditionalCommandLength(void *cmd) {
return sizeof(STATE_CONTEXT_DATA_BASE_ADDRESS) / sizeof(uint32_t);
}
}
{
if (genCmdCast<RESOURCE_BARRIER *>(cmd)) {
return sizeof(RESOURCE_BARRIER) / sizeof(uint32_t);
}
}
return 0;
}

View File

@@ -71,4 +71,3 @@ const char *CmdParse<GenGfxFamily>::getAdditionalCommandName(void *cmd) {
#include "shared/test/common/cmd_parse/hw_parse_xe_hpg_and_later.inl"
template const typename GenGfxFamily::RENDER_SURFACE_STATE *NEO::HardwareParse::getSurfaceState<GenGfxFamily>(IndirectHeap *ssh, uint32_t index);
template bool NEO::HardwareParse::isStallingBarrier<GenGfxFamily>(GenCmdList::iterator &iter);

View File

@@ -57,4 +57,3 @@ const char *CmdParse<GenGfxFamily>::getAdditionalCommandName(void *cmd) {
#include "shared/test/common/cmd_parse/hw_parse_xe_hpg_and_later.inl"
template const typename GenGfxFamily::RENDER_SURFACE_STATE *NEO::HardwareParse::getSurfaceState<GenGfxFamily>(IndirectHeap *ssh, uint32_t index);
template bool NEO::HardwareParse::isStallingBarrier<GenGfxFamily>(GenCmdList::iterator &iter);

View File

@@ -70,7 +70,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, ComputeModeRequirements, givenCoherencyWithSharedHa
overrideComputeModeRequest<FamilyType>(false, true, true);
EXPECT_FALSE(getCsrHw<FamilyType>()->streamProperties.stateComputeMode.isDirty());
auto cmdsSize = sizeof(STATE_COMPUTE_MODE) + MemorySynchronizationCommands<FamilyType>::getSizeForStallingBarrier();
auto cmdsSize = sizeof(STATE_COMPUTE_MODE) + sizeof(PIPE_CONTROL);
if (isBasicWARequired) {
cmdsSize += +sizeof(PIPE_CONTROL);
}

View File

@@ -1933,24 +1933,6 @@ HWTEST2_F(GfxCoreHelperTest, whenIsCacheFlushPriorImageReadRequiredCalledThenFal
EXPECT_FALSE(helper.isCacheFlushPriorImageReadRequired());
}
HWTEST2_F(GfxCoreHelperTest, givenAtLeastXe2HpgWhenSetStallOnlyBarrierThenPipeControlProgrammed, IsBeforeXe2HpgCore) {
using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL;
constexpr static auto bufferSize = sizeof(PIPE_CONTROL);
char streamBuffer[bufferSize];
LinearStream stream(streamBuffer, bufferSize);
PipeControlArgs args;
args.csStallOnly = true;
MemorySynchronizationCommands<FamilyType>::addSingleBarrier(stream, PostSyncMode::noWrite, 0u, 0u, args);
HardwareParse hwParser;
hwParser.parseCommands<FamilyType>(stream, 0);
GenCmdList pipeControlList = hwParser.getCommandsList<PIPE_CONTROL>();
EXPECT_EQ(1u, pipeControlList.size());
GenCmdList::iterator itor = pipeControlList.begin();
EXPECT_TRUE(hwParser.isStallingBarrier<FamilyType>(itor));
}
TEST_F(GfxCoreHelperTest, whenGetQueuePriorityLevelsQueriedThen2IsReturned) {
auto &gfxCoreHelper = getHelper<GfxCoreHelper>();
EXPECT_EQ(2u, gfxCoreHelper.getQueuePriorityLevels());

View File

@@ -111,22 +111,4 @@ HWTEST2_F(GfxCoreHelperXe2AndLaterTests, givenAtLeastXe2HpgWhenIsCacheFlushPrior
MockExecutionEnvironment mockExecutionEnvironment{};
auto &gfxCoreHelper = mockExecutionEnvironment.rootDeviceEnvironments[0]->getHelper<GfxCoreHelper>();
EXPECT_TRUE(gfxCoreHelper.isCacheFlushPriorImageReadRequired());
}
HWTEST2_F(GfxCoreHelperXe2AndLaterTests, givenAtLeastXe2HpgWhenSetStallOnlyBarrierThenResourceBarrierProgrammed, IsAtLeastXe2HpgCore) {
using RESOURCE_BARRIER = typename FamilyType::RESOURCE_BARRIER;
constexpr static auto bufferSize = sizeof(RESOURCE_BARRIER);
char streamBuffer[bufferSize];
LinearStream stream(streamBuffer, bufferSize);
PipeControlArgs args;
args.csStallOnly = true;
MemorySynchronizationCommands<FamilyType>::addSingleBarrier(stream, PostSyncMode::noWrite, 0u, 0u, args);
HardwareParse hwParser;
hwParser.parseCommands<FamilyType>(stream, 0);
GenCmdList resourceBarrierList = hwParser.getCommandsList<RESOURCE_BARRIER>();
EXPECT_EQ(1u, resourceBarrierList.size());
GenCmdList::iterator itor = resourceBarrierList.begin();
EXPECT_TRUE(hwParser.isStallingBarrier<FamilyType>(itor));
}
}