Add implicit barriers capability to opencl

Related-To: NEO-6262

Signed-off-by: Zbigniew Zdanowicz <zbigniew.zdanowicz@intel.com>
This commit is contained in:
Zbigniew Zdanowicz
2021-11-06 01:42:54 +00:00
committed by Compute-Runtime-Automation
parent 0897cb11ca
commit 61641bb70a
12 changed files with 182 additions and 43 deletions

View File

@@ -265,7 +265,7 @@ void CommandQueueHw<GfxFamily>::enqueueHandler(Surface **surfacesForResidency,
processDispatchForCacheFlush(surfacesForResidency, numSurfaceForResidency, &commandStream, csrDeps);
} else if (getGpgpuCommandStreamReceiver().peekTimestampPacketWriteEnabled()) {
if (CL_COMMAND_BARRIER == commandType) {
getGpgpuCommandStreamReceiver().requestStallingPipeControlOnNextFlush();
getGpgpuCommandStreamReceiver().requestStallingCommandsOnNextFlush();
}
for (size_t i = 0; i < eventsRequest.numEventsInWaitList; i++) {
@@ -1207,7 +1207,7 @@ void CommandQueueHw<GfxFamily>::enqueueBlit(const MultiDispatchInfo &multiDispat
timestampPacketDependencies.cacheFlushNodes.add(allocator->getTag());
}
if (!blockQueue && getGpgpuCommandStreamReceiver().isStallingPipeControlOnNextFlushRequired()) {
if (!blockQueue && getGpgpuCommandStreamReceiver().isStallingCommandsOnNextFlushRequired()) {
timestampPacketDependencies.barrierNodes.add(allocator->getTag());
}

View File

@@ -354,7 +354,7 @@ CompletionStamp &CommandWithoutKernel::submit(uint32_t taskLevel, bool terminate
enqueueOperationType = EnqueueProperties::Operation::Blit;
UNRECOVERABLE_IF(!barrierNodes);
if (commandStreamReceiver.isStallingPipeControlOnNextFlushRequired()) {
if (commandStreamReceiver.isStallingCommandsOnNextFlushRequired()) {
barrierNodes->add(commandStreamReceiver.getTimestampPacketAllocator()->getTag());
}
}

View File

@@ -839,7 +839,6 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, CommandStreamReceiverHwTestXeHPAndLater, whenCreati
}
HWTEST2_F(CommandStreamReceiverHwTestXeHPAndLater, givenXeHpWhenRayTracingEnabledThenDoNotAddCommandBatchBuffer, IsXEHP) {
MockCsrHw<FamilyType> commandStreamReceiver(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield());
auto cmdSize = commandStreamReceiver.getCmdSizeForPerDssBackedBuffer(pDevice->getHardwareInfo());
EXPECT_EQ(0u, cmdSize);
@@ -852,3 +851,102 @@ HWTEST2_F(CommandStreamReceiverHwTestXeHPAndLater, givenXeHpWhenRayTracingEnable
commandStreamReceiver.programPerDssBackedBuffer(cs, *pDevice, dispatchFlags);
EXPECT_EQ(0u, cs.getUsed());
}
HWTEST2_F(CommandStreamReceiverHwTestXeHPAndLater, givenStaticPartitionEnabledWhenOnlySinglePartitionUsedThenExpectSinglePipeControlAsBarrier, IsAtLeastXeHpCore) {
using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL;
MockCsrHw<FamilyType> commandStreamReceiver(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield());
constexpr size_t cmdSize = 256;
std::unique_ptr<char> buffer(new char[cmdSize]);
LinearStream cs(buffer.get(), cmdSize);
commandStreamReceiver.staticWorkPartitioningEnabled = true;
commandStreamReceiver.activePartitions = 1;
size_t estimatedCmdSize = commandStreamReceiver.getCmdSizeForStallingNoPostSyncCommands();
EXPECT_EQ(sizeof(PIPE_CONTROL), estimatedCmdSize);
commandStreamReceiver.programStallingNoPostSyncCommandsForBarrier(cs);
EXPECT_EQ(estimatedCmdSize, cs.getUsed());
PIPE_CONTROL *pipeControl = genCmdCast<PIPE_CONTROL *>(buffer.get());
ASSERT_NE(nullptr, pipeControl);
}
HWTEST2_F(CommandStreamReceiverHwTestXeHPAndLater, givenStaticPartitionDisabledWhenMultiplePartitionsUsedThenExpectSinglePipeControlAsBarrier, IsAtLeastXeHpCore) {
using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL;
MockCsrHw<FamilyType> commandStreamReceiver(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield());
constexpr size_t cmdSize = 256;
std::unique_ptr<char> buffer(new char[cmdSize]);
LinearStream cs(buffer.get(), cmdSize);
commandStreamReceiver.staticWorkPartitioningEnabled = false;
commandStreamReceiver.activePartitions = 2;
size_t estimatedCmdSize = commandStreamReceiver.getCmdSizeForStallingNoPostSyncCommands();
EXPECT_EQ(sizeof(PIPE_CONTROL), estimatedCmdSize);
commandStreamReceiver.programStallingNoPostSyncCommandsForBarrier(cs);
EXPECT_EQ(estimatedCmdSize, cs.getUsed());
PIPE_CONTROL *pipeControl = genCmdCast<PIPE_CONTROL *>(buffer.get());
ASSERT_NE(nullptr, pipeControl);
}
HWTEST2_F(CommandStreamReceiverHwTestXeHPAndLater, givenStaticPartitionEnabledWhenMultiplePartitionsUsedThenExpectImplicitScalingWithoutSelfCleanupBarrier, IsAtLeastXeHpCore) {
using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL;
using MI_BATCH_BUFFER_START = typename FamilyType::MI_BATCH_BUFFER_START;
using MI_ATOMIC = typename FamilyType::MI_ATOMIC;
using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT;
MockCsrHw<FamilyType> commandStreamReceiver(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield());
constexpr size_t cmdSize = 256;
std::unique_ptr<char> buffer(new char[cmdSize]);
MockGraphicsAllocation allocation(buffer.get(), cmdSize);
allocation.gpuAddress = 0xFF000;
LinearStream cs(buffer.get(), cmdSize);
cs.replaceGraphicsAllocation(&allocation);
commandStreamReceiver.staticWorkPartitioningEnabled = true;
commandStreamReceiver.activePartitions = 2;
size_t expectedSize = sizeof(PIPE_CONTROL) +
sizeof(MI_ATOMIC) + sizeof(MI_SEMAPHORE_WAIT) +
sizeof(MI_BATCH_BUFFER_START) +
2 * sizeof(uint32_t);
size_t estimatedCmdSize = commandStreamReceiver.getCmdSizeForStallingNoPostSyncCommands();
EXPECT_EQ(expectedSize, estimatedCmdSize);
commandStreamReceiver.programStallingNoPostSyncCommandsForBarrier(cs);
EXPECT_EQ(estimatedCmdSize, cs.getUsed());
void *cmdBuffer = buffer.get();
size_t offset = 0;
PIPE_CONTROL *pipeControl = genCmdCast<PIPE_CONTROL *>(cmdBuffer);
ASSERT_NE(nullptr, pipeControl);
offset += sizeof(PIPE_CONTROL);
MI_ATOMIC *miAtomic = genCmdCast<MI_ATOMIC *>(ptrOffset(cmdBuffer, offset));
ASSERT_NE(nullptr, miAtomic);
offset += sizeof(MI_ATOMIC);
MI_SEMAPHORE_WAIT *miSemaphore = genCmdCast<MI_SEMAPHORE_WAIT *>(ptrOffset(cmdBuffer, offset));
ASSERT_NE(nullptr, miSemaphore);
offset += sizeof(MI_SEMAPHORE_WAIT);
MI_BATCH_BUFFER_START *bbStart = genCmdCast<MI_BATCH_BUFFER_START *>(ptrOffset(cmdBuffer, offset));
ASSERT_NE(nullptr, bbStart);
offset += sizeof(MI_BATCH_BUFFER_START);
uint32_t *data = reinterpret_cast<uint32_t *>(ptrOffset(cmdBuffer, offset));
EXPECT_EQ(0u, *data);
offset += sizeof(uint32_t);
data = reinterpret_cast<uint32_t *>(ptrOffset(cmdBuffer, offset));
EXPECT_EQ(0u, *data);
offset += sizeof(uint32_t);
EXPECT_EQ(estimatedCmdSize, offset);
}

View File

@@ -26,7 +26,7 @@ HWTEST_F(TimestampPacketTests, givenEmptyWaitlistAndNoOutputEventWhenEnqueueingM
cmdQ->enqueueMarkerWithWaitList(0, nullptr, nullptr);
EXPECT_EQ(0u, cmdQ->timestampPacketContainer->peekNodes().size());
EXPECT_FALSE(csr.stallingPipeControlOnNextFlushRequired);
EXPECT_FALSE(csr.stallingCommandsOnNextFlushRequired);
}
HWTEST_F(TimestampPacketTests, givenEmptyWaitlistAndEventWhenEnqueueingMarkerWithProfilingEnabledThenObtainNewNode) {
@@ -128,7 +128,7 @@ HWTEST_F(TimestampPacketTests, whenEnqueueingBarrierThenRequestPipeControlOnCsrF
auto &csr = device->getUltCommandStreamReceiver<FamilyType>();
csr.timestampPacketWriteEnabled = true;
EXPECT_FALSE(csr.stallingPipeControlOnNextFlushRequired);
EXPECT_FALSE(csr.stallingCommandsOnNextFlushRequired);
MockCommandQueueHw<FamilyType> cmdQ(context, device.get(), nullptr);
@@ -143,33 +143,33 @@ HWTEST_F(TimestampPacketTests, whenEnqueueingBarrierThenRequestPipeControlOnCsrF
EXPECT_EQ(cmdQ.timestampPacketContainer->peekNodes().at(0), cmdQNodes.peekNodes().at(0)); // dont obtain new node
EXPECT_EQ(1u, cmdQ.timestampPacketContainer->peekNodes().size());
EXPECT_TRUE(csr.stallingPipeControlOnNextFlushRequired);
EXPECT_TRUE(csr.stallingCommandsOnNextFlushRequired);
}
HWTEST_F(TimestampPacketTests, givenTimestampPacketWriteDisabledWhenEnqueueingBarrierThenDontRequestPipeControlOnCsrFlush) {
auto &csr = device->getUltCommandStreamReceiver<FamilyType>();
csr.timestampPacketWriteEnabled = false;
EXPECT_FALSE(csr.stallingPipeControlOnNextFlushRequired);
EXPECT_FALSE(csr.stallingCommandsOnNextFlushRequired);
MockCommandQueueHw<FamilyType> cmdQ(context, device.get(), nullptr);
cmdQ.enqueueBarrierWithWaitList(0, nullptr, nullptr);
EXPECT_FALSE(csr.stallingPipeControlOnNextFlushRequired);
EXPECT_FALSE(csr.stallingCommandsOnNextFlushRequired);
}
HWTEST_F(TimestampPacketTests, givenBlockedQueueWhenEnqueueingBarrierThenRequestPipeControlOnCsrFlush) {
auto &csr = device->getUltCommandStreamReceiver<FamilyType>();
csr.timestampPacketWriteEnabled = true;
EXPECT_FALSE(csr.stallingPipeControlOnNextFlushRequired);
EXPECT_FALSE(csr.stallingCommandsOnNextFlushRequired);
MockCommandQueueHw<FamilyType> cmdQ(context, device.get(), nullptr);
auto userEvent = make_releaseable<UserEvent>();
cl_event waitlist[] = {userEvent.get()};
cmdQ.enqueueBarrierWithWaitList(1, waitlist, nullptr);
EXPECT_TRUE(csr.stallingPipeControlOnNextFlushRequired);
EXPECT_TRUE(csr.stallingCommandsOnNextFlushRequired);
userEvent->setStatus(CL_COMPLETE);
}
@@ -177,10 +177,10 @@ HWTEST_F(TimestampPacketTests, givenPipeControlRequestWhenEstimatingCsrStreamSiz
auto &csr = device->getUltCommandStreamReceiver<FamilyType>();
DispatchFlags flags = DispatchFlagsHelper::createDefaultDispatchFlags();
csr.stallingPipeControlOnNextFlushRequired = false;
csr.stallingCommandsOnNextFlushRequired = false;
auto sizeWithoutPcRequest = device->getUltCommandStreamReceiver<FamilyType>().getRequiredCmdStreamSize(flags, device->getDevice());
csr.stallingPipeControlOnNextFlushRequired = true;
csr.stallingCommandsOnNextFlushRequired = true;
auto sizeWithPcRequest = device->getUltCommandStreamReceiver<FamilyType>().getRequiredCmdStreamSize(flags, device->getDevice());
size_t extendedSize = sizeWithoutPcRequest + sizeof(typename FamilyType::PIPE_CONTROL);
@@ -197,10 +197,10 @@ HWTEST_F(TimestampPacketTests, givenPipeControlRequestWithBarrierWriteWhenEstima
flags.barrierTimestampPacketNodes = &barrierTimestampPacketNode;
csr.stallingPipeControlOnNextFlushRequired = false;
csr.stallingCommandsOnNextFlushRequired = false;
auto sizeWithoutPcRequest = device->getUltCommandStreamReceiver<FamilyType>().getRequiredCmdStreamSize(flags, device->getDevice());
csr.stallingPipeControlOnNextFlushRequired = true;
csr.stallingCommandsOnNextFlushRequired = true;
auto sizeWithPcRequest = device->getUltCommandStreamReceiver<FamilyType>().getRequiredCmdStreamSize(flags, device->getDevice());
size_t extendedSize = sizeWithoutPcRequest + MemorySynchronizationCommands<FamilyType>::getSizeForPipeControlWithPostSyncOperation(device->getHardwareInfo());
@@ -226,7 +226,7 @@ HWTEST_F(TimestampPacketTests, givenInstructionCacheRequesWhenSizeIsEstimatedThe
HWTEST_F(TimestampPacketTests, givenPipeControlRequestWhenFlushingThenProgramPipeControlAndResetRequestFlag) {
using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL;
auto &csr = device->getUltCommandStreamReceiver<FamilyType>();
csr.stallingPipeControlOnNextFlushRequired = true;
csr.stallingCommandsOnNextFlushRequired = true;
csr.timestampPacketWriteEnabled = true;
MockCommandQueueHw<FamilyType> cmdQ(context, device.get(), nullptr);
@@ -234,7 +234,7 @@ HWTEST_F(TimestampPacketTests, givenPipeControlRequestWhenFlushingThenProgramPip
MockKernelWithInternals mockKernel(*device, context);
cmdQ.enqueueKernel(mockKernel.mockKernel, 1, nullptr, gws, nullptr, 0, nullptr, nullptr);
EXPECT_FALSE(csr.stallingPipeControlOnNextFlushRequired);
EXPECT_FALSE(csr.stallingCommandsOnNextFlushRequired);
HardwareParse hwParser;
hwParser.parseCommands<FamilyType>(csr.commandStream, 0);

View File

@@ -630,7 +630,7 @@ HWTEST_TEMPLATED_F(BcsBufferTests, givenPipeControlRequestWhenDispatchingBlitEnq
auto bcsCsr = static_cast<UltCommandStreamReceiver<FamilyType> *>(this->bcsCsr);
auto queueCsr = static_cast<UltCommandStreamReceiver<FamilyType> *>(cmdQ->gpgpuEngine->commandStreamReceiver);
queueCsr->stallingPipeControlOnNextFlushRequired = true;
queueCsr->stallingCommandsOnNextFlushRequired = true;
cl_int retVal = CL_SUCCESS;
auto buffer = clUniquePtr<Buffer>(Buffer::create(bcsMockContext.get(), CL_MEM_READ_WRITE, 1, nullptr, retVal));
@@ -712,9 +712,9 @@ HWTEST_TEMPLATED_F(BcsBufferTests, givenBarrierWhenReleasingMultipleBlockedEnque
};
auto &csrStream = cmdQ->getGpgpuCommandStreamReceiver().getCS(0);
EXPECT_TRUE(cmdQ->getGpgpuCommandStreamReceiver().isStallingPipeControlOnNextFlushRequired());
EXPECT_TRUE(cmdQ->getGpgpuCommandStreamReceiver().isStallingCommandsOnNextFlushRequired());
userEvent0.setStatus(CL_COMPLETE);
EXPECT_FALSE(cmdQ->getGpgpuCommandStreamReceiver().isStallingPipeControlOnNextFlushRequired());
EXPECT_FALSE(cmdQ->getGpgpuCommandStreamReceiver().isStallingCommandsOnNextFlushRequired());
EXPECT_TRUE(pipeControlLookup(csrStream, 0));
auto csrOffset = csrStream.getUsed();
@@ -731,7 +731,7 @@ HWTEST_TEMPLATED_F(BcsBufferTests, givenPipeControlRequestWhenDispatchingBlocked
auto bcsCsr = static_cast<UltCommandStreamReceiver<FamilyType> *>(this->bcsCsr);
auto queueCsr = static_cast<UltCommandStreamReceiver<FamilyType> *>(cmdQ->gpgpuEngine->commandStreamReceiver);
queueCsr->stallingPipeControlOnNextFlushRequired = true;
queueCsr->stallingCommandsOnNextFlushRequired = true;
cl_int retVal = CL_SUCCESS;
auto buffer = clUniquePtr<Buffer>(Buffer::create(bcsMockContext.get(), CL_MEM_READ_WRITE, 1, nullptr, retVal));

View File

@@ -155,8 +155,8 @@ class CommandStreamReceiver {
GraphicsAllocation *getGlobalFenceAllocation() const { return globalFenceAllocation; }
GraphicsAllocation *getWorkPartitionAllocation() const { return workPartitionAllocation; }
void requestStallingPipeControlOnNextFlush() { stallingPipeControlOnNextFlushRequired = true; }
bool isStallingPipeControlOnNextFlushRequired() const { return stallingPipeControlOnNextFlushRequired; }
void requestStallingCommandsOnNextFlush() { stallingCommandsOnNextFlushRequired = true; }
bool isStallingCommandsOnNextFlushRequired() const { return stallingCommandsOnNextFlushRequired; }
virtual void waitForTaskCountWithKmdNotifyFallback(uint32_t taskCountToWait, FlushStamp flushStampToWait, bool useQuickKmdSleep, bool forcePowerSavingMode) = 0;
virtual bool waitForCompletionWithTimeout(bool enableTimeout, int64_t timeoutMicroseconds, uint32_t taskCountToWait);
@@ -375,7 +375,7 @@ class CommandStreamReceiver {
bool bindingTableBaseAddressRequired = false;
bool mediaVfeStateDirty = true;
bool lastVmeSubslicesConfig = false;
bool stallingPipeControlOnNextFlushRequired = false;
bool stallingCommandsOnNextFlushRequired = false;
bool timestampPacketWriteEnabled = false;
bool staticWorkPartitioningEnabled = false;
bool nTo1SubmissionModelEnabled = false;

View File

@@ -71,6 +71,8 @@ class CommandStreamReceiverHw : public CommandStreamReceiver {
size_t getCmdSizeForEngineMode(const DispatchFlags &dispatchFlags) const;
size_t getCmdSizeForPerDssBackedBuffer(const HardwareInfo &hwInfo);
size_t getCmdSizeForActivePartitionConfig() const;
size_t getCmdSizeForStallingCommands(const DispatchFlags &dispatchFlags) const;
size_t getCmdSizeForStallingNoPostSyncCommands() const;
bool isComputeModeNeeded() const;
bool isPipelineSelectAlreadyProgrammed() const;
@@ -147,7 +149,8 @@ class CommandStreamReceiverHw : public CommandStreamReceiver {
void programPerDssBackedBuffer(LinearStream &scr, Device &device, DispatchFlags &dispatchFlags);
void programStateSip(LinearStream &cmdStream, Device &device);
void programVFEState(LinearStream &csr, DispatchFlags &dispatchFlags, uint32_t maxFrontEndThreads);
void programStallingPipeControlForBarrier(LinearStream &cmdStream, DispatchFlags &dispatchFlags);
void programStallingCommandsForBarrier(LinearStream &cmdStream, DispatchFlags &dispatchFlags);
void programStallingNoPostSyncCommandsForBarrier(LinearStream &cmdStream);
void programEngineModeCommands(LinearStream &csr, const DispatchFlags &dispatchFlags);
void programEngineModeEpliogue(LinearStream &csr, const DispatchFlags &dispatchFlags);
void programActivePartitionConfig();

View File

@@ -331,8 +331,10 @@ CompletionStamp CommandStreamReceiverHw<GfxFamily>::flushTask(
TimestampPacketHelper::programCsrDependenciesForTimestampPacketContainer<GfxFamily>(commandStreamCSR, dispatchFlags.csrDependencies);
TimestampPacketHelper::programCsrDependenciesForForTaskCountContainer<GfxFamily>(commandStreamCSR, dispatchFlags.csrDependencies);
if (stallingPipeControlOnNextFlushRequired) {
programStallingPipeControlForBarrier(commandStreamCSR, dispatchFlags);
programActivePartitionConfig();
if (stallingCommandsOnNextFlushRequired) {
programStallingCommandsForBarrier(commandStreamCSR, dispatchFlags);
}
programEngineModeCommands(commandStreamCSR, dispatchFlags);
@@ -359,7 +361,6 @@ CompletionStamp CommandStreamReceiverHw<GfxFamily>::flushTask(
programVFEState(commandStreamCSR, dispatchFlags, device.getDeviceInfo().maxFrontEndThreads);
programPreemption(commandStreamCSR, dispatchFlags);
programActivePartitionConfig();
bool dshDirty = dshState.updateAndCheck(&dsh);
bool iohDirty = iohState.updateAndCheck(&ioh);
@@ -656,8 +657,8 @@ void CommandStreamReceiverHw<GfxFamily>::forcePipeControl(NEO::LinearStream &com
}
template <typename GfxFamily>
inline void CommandStreamReceiverHw<GfxFamily>::programStallingPipeControlForBarrier(LinearStream &cmdStream, DispatchFlags &dispatchFlags) {
stallingPipeControlOnNextFlushRequired = false;
inline void CommandStreamReceiverHw<GfxFamily>::programStallingCommandsForBarrier(LinearStream &cmdStream, DispatchFlags &dispatchFlags) {
stallingCommandsOnNextFlushRequired = false;
auto barrierTimestampPacketNodes = dispatchFlags.barrierTimestampPacketNodes;
@@ -672,11 +673,9 @@ inline void CommandStreamReceiverHw<GfxFamily>::programStallingPipeControlForBar
0,
peekHwInfo(),
args);
dispatchFlags.barrierTimestampPacketNodes->makeResident(*this);
} else {
PipeControlArgs args;
MemorySynchronizationCommands<GfxFamily>::addPipeControl(cmdStream, args);
programStallingNoPostSyncCommandsForBarrier(cmdStream);
}
}
@@ -835,13 +834,8 @@ size_t CommandStreamReceiverHw<GfxFamily>::getRequiredCmdStreamSize(const Dispat
size += TimestampPacketHelper::getRequiredCmdStreamSize<GfxFamily>(dispatchFlags.csrDependencies);
size += TimestampPacketHelper::getRequiredCmdStreamSizeForTaskCountContainer<GfxFamily>(dispatchFlags.csrDependencies);
if (stallingPipeControlOnNextFlushRequired) {
auto barrierTimestampPacketNodes = dispatchFlags.barrierTimestampPacketNodes;
if (barrierTimestampPacketNodes && barrierTimestampPacketNodes->peekNodes().size() > 0) {
size += MemorySynchronizationCommands<GfxFamily>::getSizeForPipeControlWithPostSyncOperation(peekHwInfo());
} else {
size += sizeof(typename GfxFamily::PIPE_CONTROL);
}
if (stallingCommandsOnNextFlushRequired) {
size += getCmdSizeForStallingCommands(dispatchFlags);
}
if (requiresInstructionCacheFlush) {
@@ -857,7 +851,6 @@ size_t CommandStreamReceiverHw<GfxFamily>::getRequiredCmdStreamSize(const Dispat
template <typename GfxFamily>
inline size_t CommandStreamReceiverHw<GfxFamily>::getCmdSizeForPipelineSelect() const {
size_t size = 0;
if ((csrSizeRequestFlags.mediaSamplerConfigChanged ||
csrSizeRequestFlags.specialPipelineSelectModeChanged ||
@@ -1475,4 +1468,14 @@ void CommandStreamReceiverHw<GfxFamily>::postInitFlagsSetup() {
}
}
template <typename GfxFamily>
size_t CommandStreamReceiverHw<GfxFamily>::getCmdSizeForStallingCommands(const DispatchFlags &dispatchFlags) const {
auto barrierTimestampPacketNodes = dispatchFlags.barrierTimestampPacketNodes;
if (barrierTimestampPacketNodes && barrierTimestampPacketNodes->peekNodes().size() > 0) {
return MemorySynchronizationCommands<GfxFamily>::getSizeForPipeControlWithPostSyncOperation(peekHwInfo());
} else {
return getCmdSizeForStallingNoPostSyncCommands();
}
}
} // namespace NEO

View File

@@ -154,4 +154,15 @@ template <typename GfxFamily>
void CommandStreamReceiverHw<GfxFamily>::programActivePartitionConfig() {
}
template <typename GfxFamily>
inline size_t CommandStreamReceiverHw<GfxFamily>::getCmdSizeForStallingNoPostSyncCommands() const {
return sizeof(typename GfxFamily::PIPE_CONTROL);
}
template <typename GfxFamily>
inline void CommandStreamReceiverHw<GfxFamily>::programStallingNoPostSyncCommandsForBarrier(LinearStream &cmdStream) {
PipeControlArgs args;
MemorySynchronizationCommands<GfxFamily>::addPipeControl(cmdStream, args);
}
} // namespace NEO

View File

@@ -201,4 +201,27 @@ inline void CommandStreamReceiverHw<GfxFamily>::addPipeControlBeforeStateSip(Lin
}
}
template <typename GfxFamily>
inline size_t CommandStreamReceiverHw<GfxFamily>::getCmdSizeForStallingNoPostSyncCommands() const {
if (this->activePartitions > 1 && this->staticWorkPartitioningEnabled) {
return ImplicitScalingDispatch<GfxFamily>::getBarrierSize(false);
} else {
return sizeof(typename GfxFamily::PIPE_CONTROL);
}
}
template <typename GfxFamily>
inline void CommandStreamReceiverHw<GfxFamily>::programStallingNoPostSyncCommandsForBarrier(LinearStream &cmdStream) {
PipeControlArgs args;
if (this->activePartitions > 1 && this->staticWorkPartitioningEnabled) {
ImplicitScalingDispatch<GfxFamily>::dispatchBarrierCommands(cmdStream,
this->deviceBitfield,
args,
false,
false);
} else {
MemorySynchronizationCommands<GfxFamily>::addPipeControl(cmdStream, args);
}
}
} // namespace NEO

View File

@@ -149,7 +149,7 @@ void BlitProperties::setupDependenciesForAuxTranslation(BlitPropertiesContainer
blitPropertiesContainer[i + numObjects].outputTimestampPacket = timestampPacketDependencies.nonAuxToAuxNodes.peekNodes()[i];
}
gpguCsr.requestStallingPipeControlOnNextFlush();
gpguCsr.requestStallingCommandsOnNextFlush();
auto nodesAllocator = gpguCsr.getTimestampPacketAllocator();
timestampPacketDependencies.barrierNodes.add(nodesAllocator->getTag());

View File

@@ -48,6 +48,7 @@ class UltCommandStreamReceiver : public CommandStreamReceiverHw<GfxFamily>, publ
using BaseClass::programEnginePrologue;
using BaseClass::programPerDssBackedBuffer;
using BaseClass::programPreamble;
using BaseClass::programStallingNoPostSyncCommandsForBarrier;
using BaseClass::programStateSip;
using BaseClass::programVFEState;
using BaseClass::requiresInstructionCacheFlush;
@@ -104,7 +105,7 @@ class UltCommandStreamReceiver : public CommandStreamReceiverHw<GfxFamily>, publ
using BaseClass::CommandStreamReceiver::requiredThreadArbitrationPolicy;
using BaseClass::CommandStreamReceiver::samplerCacheFlushRequired;
using BaseClass::CommandStreamReceiver::scratchSpaceController;
using BaseClass::CommandStreamReceiver::stallingPipeControlOnNextFlushRequired;
using BaseClass::CommandStreamReceiver::stallingCommandsOnNextFlushRequired;
using BaseClass::CommandStreamReceiver::submissionAggregator;
using BaseClass::CommandStreamReceiver::tagAddress;
using BaseClass::CommandStreamReceiver::taskCount;