mirror of
https://github.com/intel/compute-runtime.git
synced 2026-01-08 22:12:59 +08:00
Add implicit barriers capability to opencl
Related-To: NEO-6262 Signed-off-by: Zbigniew Zdanowicz <zbigniew.zdanowicz@intel.com>
This commit is contained in:
committed by
Compute-Runtime-Automation
parent
0897cb11ca
commit
61641bb70a
@@ -265,7 +265,7 @@ void CommandQueueHw<GfxFamily>::enqueueHandler(Surface **surfacesForResidency,
|
||||
processDispatchForCacheFlush(surfacesForResidency, numSurfaceForResidency, &commandStream, csrDeps);
|
||||
} else if (getGpgpuCommandStreamReceiver().peekTimestampPacketWriteEnabled()) {
|
||||
if (CL_COMMAND_BARRIER == commandType) {
|
||||
getGpgpuCommandStreamReceiver().requestStallingPipeControlOnNextFlush();
|
||||
getGpgpuCommandStreamReceiver().requestStallingCommandsOnNextFlush();
|
||||
}
|
||||
|
||||
for (size_t i = 0; i < eventsRequest.numEventsInWaitList; i++) {
|
||||
@@ -1207,7 +1207,7 @@ void CommandQueueHw<GfxFamily>::enqueueBlit(const MultiDispatchInfo &multiDispat
|
||||
timestampPacketDependencies.cacheFlushNodes.add(allocator->getTag());
|
||||
}
|
||||
|
||||
if (!blockQueue && getGpgpuCommandStreamReceiver().isStallingPipeControlOnNextFlushRequired()) {
|
||||
if (!blockQueue && getGpgpuCommandStreamReceiver().isStallingCommandsOnNextFlushRequired()) {
|
||||
timestampPacketDependencies.barrierNodes.add(allocator->getTag());
|
||||
}
|
||||
|
||||
|
||||
@@ -354,7 +354,7 @@ CompletionStamp &CommandWithoutKernel::submit(uint32_t taskLevel, bool terminate
|
||||
enqueueOperationType = EnqueueProperties::Operation::Blit;
|
||||
|
||||
UNRECOVERABLE_IF(!barrierNodes);
|
||||
if (commandStreamReceiver.isStallingPipeControlOnNextFlushRequired()) {
|
||||
if (commandStreamReceiver.isStallingCommandsOnNextFlushRequired()) {
|
||||
barrierNodes->add(commandStreamReceiver.getTimestampPacketAllocator()->getTag());
|
||||
}
|
||||
}
|
||||
|
||||
@@ -839,7 +839,6 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, CommandStreamReceiverHwTestXeHPAndLater, whenCreati
|
||||
}
|
||||
|
||||
HWTEST2_F(CommandStreamReceiverHwTestXeHPAndLater, givenXeHpWhenRayTracingEnabledThenDoNotAddCommandBatchBuffer, IsXEHP) {
|
||||
|
||||
MockCsrHw<FamilyType> commandStreamReceiver(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield());
|
||||
auto cmdSize = commandStreamReceiver.getCmdSizeForPerDssBackedBuffer(pDevice->getHardwareInfo());
|
||||
EXPECT_EQ(0u, cmdSize);
|
||||
@@ -852,3 +851,102 @@ HWTEST2_F(CommandStreamReceiverHwTestXeHPAndLater, givenXeHpWhenRayTracingEnable
|
||||
commandStreamReceiver.programPerDssBackedBuffer(cs, *pDevice, dispatchFlags);
|
||||
EXPECT_EQ(0u, cs.getUsed());
|
||||
}
|
||||
|
||||
HWTEST2_F(CommandStreamReceiverHwTestXeHPAndLater, givenStaticPartitionEnabledWhenOnlySinglePartitionUsedThenExpectSinglePipeControlAsBarrier, IsAtLeastXeHpCore) {
|
||||
using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL;
|
||||
|
||||
MockCsrHw<FamilyType> commandStreamReceiver(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield());
|
||||
constexpr size_t cmdSize = 256;
|
||||
std::unique_ptr<char> buffer(new char[cmdSize]);
|
||||
LinearStream cs(buffer.get(), cmdSize);
|
||||
|
||||
commandStreamReceiver.staticWorkPartitioningEnabled = true;
|
||||
commandStreamReceiver.activePartitions = 1;
|
||||
|
||||
size_t estimatedCmdSize = commandStreamReceiver.getCmdSizeForStallingNoPostSyncCommands();
|
||||
EXPECT_EQ(sizeof(PIPE_CONTROL), estimatedCmdSize);
|
||||
|
||||
commandStreamReceiver.programStallingNoPostSyncCommandsForBarrier(cs);
|
||||
EXPECT_EQ(estimatedCmdSize, cs.getUsed());
|
||||
|
||||
PIPE_CONTROL *pipeControl = genCmdCast<PIPE_CONTROL *>(buffer.get());
|
||||
ASSERT_NE(nullptr, pipeControl);
|
||||
}
|
||||
|
||||
HWTEST2_F(CommandStreamReceiverHwTestXeHPAndLater, givenStaticPartitionDisabledWhenMultiplePartitionsUsedThenExpectSinglePipeControlAsBarrier, IsAtLeastXeHpCore) {
|
||||
using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL;
|
||||
|
||||
MockCsrHw<FamilyType> commandStreamReceiver(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield());
|
||||
constexpr size_t cmdSize = 256;
|
||||
std::unique_ptr<char> buffer(new char[cmdSize]);
|
||||
LinearStream cs(buffer.get(), cmdSize);
|
||||
|
||||
commandStreamReceiver.staticWorkPartitioningEnabled = false;
|
||||
commandStreamReceiver.activePartitions = 2;
|
||||
|
||||
size_t estimatedCmdSize = commandStreamReceiver.getCmdSizeForStallingNoPostSyncCommands();
|
||||
EXPECT_EQ(sizeof(PIPE_CONTROL), estimatedCmdSize);
|
||||
|
||||
commandStreamReceiver.programStallingNoPostSyncCommandsForBarrier(cs);
|
||||
EXPECT_EQ(estimatedCmdSize, cs.getUsed());
|
||||
|
||||
PIPE_CONTROL *pipeControl = genCmdCast<PIPE_CONTROL *>(buffer.get());
|
||||
ASSERT_NE(nullptr, pipeControl);
|
||||
}
|
||||
|
||||
HWTEST2_F(CommandStreamReceiverHwTestXeHPAndLater, givenStaticPartitionEnabledWhenMultiplePartitionsUsedThenExpectImplicitScalingWithoutSelfCleanupBarrier, IsAtLeastXeHpCore) {
|
||||
using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL;
|
||||
using MI_BATCH_BUFFER_START = typename FamilyType::MI_BATCH_BUFFER_START;
|
||||
using MI_ATOMIC = typename FamilyType::MI_ATOMIC;
|
||||
using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT;
|
||||
|
||||
MockCsrHw<FamilyType> commandStreamReceiver(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield());
|
||||
constexpr size_t cmdSize = 256;
|
||||
std::unique_ptr<char> buffer(new char[cmdSize]);
|
||||
MockGraphicsAllocation allocation(buffer.get(), cmdSize);
|
||||
allocation.gpuAddress = 0xFF000;
|
||||
LinearStream cs(buffer.get(), cmdSize);
|
||||
cs.replaceGraphicsAllocation(&allocation);
|
||||
|
||||
commandStreamReceiver.staticWorkPartitioningEnabled = true;
|
||||
commandStreamReceiver.activePartitions = 2;
|
||||
|
||||
size_t expectedSize = sizeof(PIPE_CONTROL) +
|
||||
sizeof(MI_ATOMIC) + sizeof(MI_SEMAPHORE_WAIT) +
|
||||
sizeof(MI_BATCH_BUFFER_START) +
|
||||
2 * sizeof(uint32_t);
|
||||
size_t estimatedCmdSize = commandStreamReceiver.getCmdSizeForStallingNoPostSyncCommands();
|
||||
EXPECT_EQ(expectedSize, estimatedCmdSize);
|
||||
|
||||
commandStreamReceiver.programStallingNoPostSyncCommandsForBarrier(cs);
|
||||
EXPECT_EQ(estimatedCmdSize, cs.getUsed());
|
||||
|
||||
void *cmdBuffer = buffer.get();
|
||||
size_t offset = 0;
|
||||
|
||||
PIPE_CONTROL *pipeControl = genCmdCast<PIPE_CONTROL *>(cmdBuffer);
|
||||
ASSERT_NE(nullptr, pipeControl);
|
||||
offset += sizeof(PIPE_CONTROL);
|
||||
|
||||
MI_ATOMIC *miAtomic = genCmdCast<MI_ATOMIC *>(ptrOffset(cmdBuffer, offset));
|
||||
ASSERT_NE(nullptr, miAtomic);
|
||||
offset += sizeof(MI_ATOMIC);
|
||||
|
||||
MI_SEMAPHORE_WAIT *miSemaphore = genCmdCast<MI_SEMAPHORE_WAIT *>(ptrOffset(cmdBuffer, offset));
|
||||
ASSERT_NE(nullptr, miSemaphore);
|
||||
offset += sizeof(MI_SEMAPHORE_WAIT);
|
||||
|
||||
MI_BATCH_BUFFER_START *bbStart = genCmdCast<MI_BATCH_BUFFER_START *>(ptrOffset(cmdBuffer, offset));
|
||||
ASSERT_NE(nullptr, bbStart);
|
||||
offset += sizeof(MI_BATCH_BUFFER_START);
|
||||
|
||||
uint32_t *data = reinterpret_cast<uint32_t *>(ptrOffset(cmdBuffer, offset));
|
||||
EXPECT_EQ(0u, *data);
|
||||
offset += sizeof(uint32_t);
|
||||
|
||||
data = reinterpret_cast<uint32_t *>(ptrOffset(cmdBuffer, offset));
|
||||
EXPECT_EQ(0u, *data);
|
||||
offset += sizeof(uint32_t);
|
||||
|
||||
EXPECT_EQ(estimatedCmdSize, offset);
|
||||
}
|
||||
|
||||
@@ -26,7 +26,7 @@ HWTEST_F(TimestampPacketTests, givenEmptyWaitlistAndNoOutputEventWhenEnqueueingM
|
||||
cmdQ->enqueueMarkerWithWaitList(0, nullptr, nullptr);
|
||||
|
||||
EXPECT_EQ(0u, cmdQ->timestampPacketContainer->peekNodes().size());
|
||||
EXPECT_FALSE(csr.stallingPipeControlOnNextFlushRequired);
|
||||
EXPECT_FALSE(csr.stallingCommandsOnNextFlushRequired);
|
||||
}
|
||||
|
||||
HWTEST_F(TimestampPacketTests, givenEmptyWaitlistAndEventWhenEnqueueingMarkerWithProfilingEnabledThenObtainNewNode) {
|
||||
@@ -128,7 +128,7 @@ HWTEST_F(TimestampPacketTests, whenEnqueueingBarrierThenRequestPipeControlOnCsrF
|
||||
auto &csr = device->getUltCommandStreamReceiver<FamilyType>();
|
||||
csr.timestampPacketWriteEnabled = true;
|
||||
|
||||
EXPECT_FALSE(csr.stallingPipeControlOnNextFlushRequired);
|
||||
EXPECT_FALSE(csr.stallingCommandsOnNextFlushRequired);
|
||||
|
||||
MockCommandQueueHw<FamilyType> cmdQ(context, device.get(), nullptr);
|
||||
|
||||
@@ -143,33 +143,33 @@ HWTEST_F(TimestampPacketTests, whenEnqueueingBarrierThenRequestPipeControlOnCsrF
|
||||
EXPECT_EQ(cmdQ.timestampPacketContainer->peekNodes().at(0), cmdQNodes.peekNodes().at(0)); // dont obtain new node
|
||||
EXPECT_EQ(1u, cmdQ.timestampPacketContainer->peekNodes().size());
|
||||
|
||||
EXPECT_TRUE(csr.stallingPipeControlOnNextFlushRequired);
|
||||
EXPECT_TRUE(csr.stallingCommandsOnNextFlushRequired);
|
||||
}
|
||||
|
||||
HWTEST_F(TimestampPacketTests, givenTimestampPacketWriteDisabledWhenEnqueueingBarrierThenDontRequestPipeControlOnCsrFlush) {
|
||||
auto &csr = device->getUltCommandStreamReceiver<FamilyType>();
|
||||
csr.timestampPacketWriteEnabled = false;
|
||||
|
||||
EXPECT_FALSE(csr.stallingPipeControlOnNextFlushRequired);
|
||||
EXPECT_FALSE(csr.stallingCommandsOnNextFlushRequired);
|
||||
|
||||
MockCommandQueueHw<FamilyType> cmdQ(context, device.get(), nullptr);
|
||||
|
||||
cmdQ.enqueueBarrierWithWaitList(0, nullptr, nullptr);
|
||||
|
||||
EXPECT_FALSE(csr.stallingPipeControlOnNextFlushRequired);
|
||||
EXPECT_FALSE(csr.stallingCommandsOnNextFlushRequired);
|
||||
}
|
||||
|
||||
HWTEST_F(TimestampPacketTests, givenBlockedQueueWhenEnqueueingBarrierThenRequestPipeControlOnCsrFlush) {
|
||||
auto &csr = device->getUltCommandStreamReceiver<FamilyType>();
|
||||
csr.timestampPacketWriteEnabled = true;
|
||||
EXPECT_FALSE(csr.stallingPipeControlOnNextFlushRequired);
|
||||
EXPECT_FALSE(csr.stallingCommandsOnNextFlushRequired);
|
||||
|
||||
MockCommandQueueHw<FamilyType> cmdQ(context, device.get(), nullptr);
|
||||
|
||||
auto userEvent = make_releaseable<UserEvent>();
|
||||
cl_event waitlist[] = {userEvent.get()};
|
||||
cmdQ.enqueueBarrierWithWaitList(1, waitlist, nullptr);
|
||||
EXPECT_TRUE(csr.stallingPipeControlOnNextFlushRequired);
|
||||
EXPECT_TRUE(csr.stallingCommandsOnNextFlushRequired);
|
||||
userEvent->setStatus(CL_COMPLETE);
|
||||
}
|
||||
|
||||
@@ -177,10 +177,10 @@ HWTEST_F(TimestampPacketTests, givenPipeControlRequestWhenEstimatingCsrStreamSiz
|
||||
auto &csr = device->getUltCommandStreamReceiver<FamilyType>();
|
||||
DispatchFlags flags = DispatchFlagsHelper::createDefaultDispatchFlags();
|
||||
|
||||
csr.stallingPipeControlOnNextFlushRequired = false;
|
||||
csr.stallingCommandsOnNextFlushRequired = false;
|
||||
auto sizeWithoutPcRequest = device->getUltCommandStreamReceiver<FamilyType>().getRequiredCmdStreamSize(flags, device->getDevice());
|
||||
|
||||
csr.stallingPipeControlOnNextFlushRequired = true;
|
||||
csr.stallingCommandsOnNextFlushRequired = true;
|
||||
auto sizeWithPcRequest = device->getUltCommandStreamReceiver<FamilyType>().getRequiredCmdStreamSize(flags, device->getDevice());
|
||||
|
||||
size_t extendedSize = sizeWithoutPcRequest + sizeof(typename FamilyType::PIPE_CONTROL);
|
||||
@@ -197,10 +197,10 @@ HWTEST_F(TimestampPacketTests, givenPipeControlRequestWithBarrierWriteWhenEstima
|
||||
|
||||
flags.barrierTimestampPacketNodes = &barrierTimestampPacketNode;
|
||||
|
||||
csr.stallingPipeControlOnNextFlushRequired = false;
|
||||
csr.stallingCommandsOnNextFlushRequired = false;
|
||||
auto sizeWithoutPcRequest = device->getUltCommandStreamReceiver<FamilyType>().getRequiredCmdStreamSize(flags, device->getDevice());
|
||||
|
||||
csr.stallingPipeControlOnNextFlushRequired = true;
|
||||
csr.stallingCommandsOnNextFlushRequired = true;
|
||||
auto sizeWithPcRequest = device->getUltCommandStreamReceiver<FamilyType>().getRequiredCmdStreamSize(flags, device->getDevice());
|
||||
|
||||
size_t extendedSize = sizeWithoutPcRequest + MemorySynchronizationCommands<FamilyType>::getSizeForPipeControlWithPostSyncOperation(device->getHardwareInfo());
|
||||
@@ -226,7 +226,7 @@ HWTEST_F(TimestampPacketTests, givenInstructionCacheRequesWhenSizeIsEstimatedThe
|
||||
HWTEST_F(TimestampPacketTests, givenPipeControlRequestWhenFlushingThenProgramPipeControlAndResetRequestFlag) {
|
||||
using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL;
|
||||
auto &csr = device->getUltCommandStreamReceiver<FamilyType>();
|
||||
csr.stallingPipeControlOnNextFlushRequired = true;
|
||||
csr.stallingCommandsOnNextFlushRequired = true;
|
||||
csr.timestampPacketWriteEnabled = true;
|
||||
|
||||
MockCommandQueueHw<FamilyType> cmdQ(context, device.get(), nullptr);
|
||||
@@ -234,7 +234,7 @@ HWTEST_F(TimestampPacketTests, givenPipeControlRequestWhenFlushingThenProgramPip
|
||||
MockKernelWithInternals mockKernel(*device, context);
|
||||
cmdQ.enqueueKernel(mockKernel.mockKernel, 1, nullptr, gws, nullptr, 0, nullptr, nullptr);
|
||||
|
||||
EXPECT_FALSE(csr.stallingPipeControlOnNextFlushRequired);
|
||||
EXPECT_FALSE(csr.stallingCommandsOnNextFlushRequired);
|
||||
|
||||
HardwareParse hwParser;
|
||||
hwParser.parseCommands<FamilyType>(csr.commandStream, 0);
|
||||
|
||||
@@ -630,7 +630,7 @@ HWTEST_TEMPLATED_F(BcsBufferTests, givenPipeControlRequestWhenDispatchingBlitEnq
|
||||
auto bcsCsr = static_cast<UltCommandStreamReceiver<FamilyType> *>(this->bcsCsr);
|
||||
|
||||
auto queueCsr = static_cast<UltCommandStreamReceiver<FamilyType> *>(cmdQ->gpgpuEngine->commandStreamReceiver);
|
||||
queueCsr->stallingPipeControlOnNextFlushRequired = true;
|
||||
queueCsr->stallingCommandsOnNextFlushRequired = true;
|
||||
|
||||
cl_int retVal = CL_SUCCESS;
|
||||
auto buffer = clUniquePtr<Buffer>(Buffer::create(bcsMockContext.get(), CL_MEM_READ_WRITE, 1, nullptr, retVal));
|
||||
@@ -712,9 +712,9 @@ HWTEST_TEMPLATED_F(BcsBufferTests, givenBarrierWhenReleasingMultipleBlockedEnque
|
||||
};
|
||||
|
||||
auto &csrStream = cmdQ->getGpgpuCommandStreamReceiver().getCS(0);
|
||||
EXPECT_TRUE(cmdQ->getGpgpuCommandStreamReceiver().isStallingPipeControlOnNextFlushRequired());
|
||||
EXPECT_TRUE(cmdQ->getGpgpuCommandStreamReceiver().isStallingCommandsOnNextFlushRequired());
|
||||
userEvent0.setStatus(CL_COMPLETE);
|
||||
EXPECT_FALSE(cmdQ->getGpgpuCommandStreamReceiver().isStallingPipeControlOnNextFlushRequired());
|
||||
EXPECT_FALSE(cmdQ->getGpgpuCommandStreamReceiver().isStallingCommandsOnNextFlushRequired());
|
||||
EXPECT_TRUE(pipeControlLookup(csrStream, 0));
|
||||
|
||||
auto csrOffset = csrStream.getUsed();
|
||||
@@ -731,7 +731,7 @@ HWTEST_TEMPLATED_F(BcsBufferTests, givenPipeControlRequestWhenDispatchingBlocked
|
||||
auto bcsCsr = static_cast<UltCommandStreamReceiver<FamilyType> *>(this->bcsCsr);
|
||||
|
||||
auto queueCsr = static_cast<UltCommandStreamReceiver<FamilyType> *>(cmdQ->gpgpuEngine->commandStreamReceiver);
|
||||
queueCsr->stallingPipeControlOnNextFlushRequired = true;
|
||||
queueCsr->stallingCommandsOnNextFlushRequired = true;
|
||||
|
||||
cl_int retVal = CL_SUCCESS;
|
||||
auto buffer = clUniquePtr<Buffer>(Buffer::create(bcsMockContext.get(), CL_MEM_READ_WRITE, 1, nullptr, retVal));
|
||||
|
||||
@@ -155,8 +155,8 @@ class CommandStreamReceiver {
|
||||
GraphicsAllocation *getGlobalFenceAllocation() const { return globalFenceAllocation; }
|
||||
GraphicsAllocation *getWorkPartitionAllocation() const { return workPartitionAllocation; }
|
||||
|
||||
void requestStallingPipeControlOnNextFlush() { stallingPipeControlOnNextFlushRequired = true; }
|
||||
bool isStallingPipeControlOnNextFlushRequired() const { return stallingPipeControlOnNextFlushRequired; }
|
||||
void requestStallingCommandsOnNextFlush() { stallingCommandsOnNextFlushRequired = true; }
|
||||
bool isStallingCommandsOnNextFlushRequired() const { return stallingCommandsOnNextFlushRequired; }
|
||||
|
||||
virtual void waitForTaskCountWithKmdNotifyFallback(uint32_t taskCountToWait, FlushStamp flushStampToWait, bool useQuickKmdSleep, bool forcePowerSavingMode) = 0;
|
||||
virtual bool waitForCompletionWithTimeout(bool enableTimeout, int64_t timeoutMicroseconds, uint32_t taskCountToWait);
|
||||
@@ -375,7 +375,7 @@ class CommandStreamReceiver {
|
||||
bool bindingTableBaseAddressRequired = false;
|
||||
bool mediaVfeStateDirty = true;
|
||||
bool lastVmeSubslicesConfig = false;
|
||||
bool stallingPipeControlOnNextFlushRequired = false;
|
||||
bool stallingCommandsOnNextFlushRequired = false;
|
||||
bool timestampPacketWriteEnabled = false;
|
||||
bool staticWorkPartitioningEnabled = false;
|
||||
bool nTo1SubmissionModelEnabled = false;
|
||||
|
||||
@@ -71,6 +71,8 @@ class CommandStreamReceiverHw : public CommandStreamReceiver {
|
||||
size_t getCmdSizeForEngineMode(const DispatchFlags &dispatchFlags) const;
|
||||
size_t getCmdSizeForPerDssBackedBuffer(const HardwareInfo &hwInfo);
|
||||
size_t getCmdSizeForActivePartitionConfig() const;
|
||||
size_t getCmdSizeForStallingCommands(const DispatchFlags &dispatchFlags) const;
|
||||
size_t getCmdSizeForStallingNoPostSyncCommands() const;
|
||||
|
||||
bool isComputeModeNeeded() const;
|
||||
bool isPipelineSelectAlreadyProgrammed() const;
|
||||
@@ -147,7 +149,8 @@ class CommandStreamReceiverHw : public CommandStreamReceiver {
|
||||
void programPerDssBackedBuffer(LinearStream &scr, Device &device, DispatchFlags &dispatchFlags);
|
||||
void programStateSip(LinearStream &cmdStream, Device &device);
|
||||
void programVFEState(LinearStream &csr, DispatchFlags &dispatchFlags, uint32_t maxFrontEndThreads);
|
||||
void programStallingPipeControlForBarrier(LinearStream &cmdStream, DispatchFlags &dispatchFlags);
|
||||
void programStallingCommandsForBarrier(LinearStream &cmdStream, DispatchFlags &dispatchFlags);
|
||||
void programStallingNoPostSyncCommandsForBarrier(LinearStream &cmdStream);
|
||||
void programEngineModeCommands(LinearStream &csr, const DispatchFlags &dispatchFlags);
|
||||
void programEngineModeEpliogue(LinearStream &csr, const DispatchFlags &dispatchFlags);
|
||||
void programActivePartitionConfig();
|
||||
|
||||
@@ -331,8 +331,10 @@ CompletionStamp CommandStreamReceiverHw<GfxFamily>::flushTask(
|
||||
TimestampPacketHelper::programCsrDependenciesForTimestampPacketContainer<GfxFamily>(commandStreamCSR, dispatchFlags.csrDependencies);
|
||||
TimestampPacketHelper::programCsrDependenciesForForTaskCountContainer<GfxFamily>(commandStreamCSR, dispatchFlags.csrDependencies);
|
||||
|
||||
if (stallingPipeControlOnNextFlushRequired) {
|
||||
programStallingPipeControlForBarrier(commandStreamCSR, dispatchFlags);
|
||||
programActivePartitionConfig();
|
||||
|
||||
if (stallingCommandsOnNextFlushRequired) {
|
||||
programStallingCommandsForBarrier(commandStreamCSR, dispatchFlags);
|
||||
}
|
||||
|
||||
programEngineModeCommands(commandStreamCSR, dispatchFlags);
|
||||
@@ -359,7 +361,6 @@ CompletionStamp CommandStreamReceiverHw<GfxFamily>::flushTask(
|
||||
programVFEState(commandStreamCSR, dispatchFlags, device.getDeviceInfo().maxFrontEndThreads);
|
||||
|
||||
programPreemption(commandStreamCSR, dispatchFlags);
|
||||
programActivePartitionConfig();
|
||||
|
||||
bool dshDirty = dshState.updateAndCheck(&dsh);
|
||||
bool iohDirty = iohState.updateAndCheck(&ioh);
|
||||
@@ -656,8 +657,8 @@ void CommandStreamReceiverHw<GfxFamily>::forcePipeControl(NEO::LinearStream &com
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
inline void CommandStreamReceiverHw<GfxFamily>::programStallingPipeControlForBarrier(LinearStream &cmdStream, DispatchFlags &dispatchFlags) {
|
||||
stallingPipeControlOnNextFlushRequired = false;
|
||||
inline void CommandStreamReceiverHw<GfxFamily>::programStallingCommandsForBarrier(LinearStream &cmdStream, DispatchFlags &dispatchFlags) {
|
||||
stallingCommandsOnNextFlushRequired = false;
|
||||
|
||||
auto barrierTimestampPacketNodes = dispatchFlags.barrierTimestampPacketNodes;
|
||||
|
||||
@@ -672,11 +673,9 @@ inline void CommandStreamReceiverHw<GfxFamily>::programStallingPipeControlForBar
|
||||
0,
|
||||
peekHwInfo(),
|
||||
args);
|
||||
|
||||
dispatchFlags.barrierTimestampPacketNodes->makeResident(*this);
|
||||
} else {
|
||||
PipeControlArgs args;
|
||||
MemorySynchronizationCommands<GfxFamily>::addPipeControl(cmdStream, args);
|
||||
programStallingNoPostSyncCommandsForBarrier(cmdStream);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -835,13 +834,8 @@ size_t CommandStreamReceiverHw<GfxFamily>::getRequiredCmdStreamSize(const Dispat
|
||||
size += TimestampPacketHelper::getRequiredCmdStreamSize<GfxFamily>(dispatchFlags.csrDependencies);
|
||||
size += TimestampPacketHelper::getRequiredCmdStreamSizeForTaskCountContainer<GfxFamily>(dispatchFlags.csrDependencies);
|
||||
|
||||
if (stallingPipeControlOnNextFlushRequired) {
|
||||
auto barrierTimestampPacketNodes = dispatchFlags.barrierTimestampPacketNodes;
|
||||
if (barrierTimestampPacketNodes && barrierTimestampPacketNodes->peekNodes().size() > 0) {
|
||||
size += MemorySynchronizationCommands<GfxFamily>::getSizeForPipeControlWithPostSyncOperation(peekHwInfo());
|
||||
} else {
|
||||
size += sizeof(typename GfxFamily::PIPE_CONTROL);
|
||||
}
|
||||
if (stallingCommandsOnNextFlushRequired) {
|
||||
size += getCmdSizeForStallingCommands(dispatchFlags);
|
||||
}
|
||||
|
||||
if (requiresInstructionCacheFlush) {
|
||||
@@ -857,7 +851,6 @@ size_t CommandStreamReceiverHw<GfxFamily>::getRequiredCmdStreamSize(const Dispat
|
||||
|
||||
template <typename GfxFamily>
|
||||
inline size_t CommandStreamReceiverHw<GfxFamily>::getCmdSizeForPipelineSelect() const {
|
||||
|
||||
size_t size = 0;
|
||||
if ((csrSizeRequestFlags.mediaSamplerConfigChanged ||
|
||||
csrSizeRequestFlags.specialPipelineSelectModeChanged ||
|
||||
@@ -1475,4 +1468,14 @@ void CommandStreamReceiverHw<GfxFamily>::postInitFlagsSetup() {
|
||||
}
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
size_t CommandStreamReceiverHw<GfxFamily>::getCmdSizeForStallingCommands(const DispatchFlags &dispatchFlags) const {
|
||||
auto barrierTimestampPacketNodes = dispatchFlags.barrierTimestampPacketNodes;
|
||||
if (barrierTimestampPacketNodes && barrierTimestampPacketNodes->peekNodes().size() > 0) {
|
||||
return MemorySynchronizationCommands<GfxFamily>::getSizeForPipeControlWithPostSyncOperation(peekHwInfo());
|
||||
} else {
|
||||
return getCmdSizeForStallingNoPostSyncCommands();
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace NEO
|
||||
|
||||
@@ -154,4 +154,15 @@ template <typename GfxFamily>
|
||||
void CommandStreamReceiverHw<GfxFamily>::programActivePartitionConfig() {
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
inline size_t CommandStreamReceiverHw<GfxFamily>::getCmdSizeForStallingNoPostSyncCommands() const {
|
||||
return sizeof(typename GfxFamily::PIPE_CONTROL);
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
inline void CommandStreamReceiverHw<GfxFamily>::programStallingNoPostSyncCommandsForBarrier(LinearStream &cmdStream) {
|
||||
PipeControlArgs args;
|
||||
MemorySynchronizationCommands<GfxFamily>::addPipeControl(cmdStream, args);
|
||||
}
|
||||
|
||||
} // namespace NEO
|
||||
|
||||
@@ -201,4 +201,27 @@ inline void CommandStreamReceiverHw<GfxFamily>::addPipeControlBeforeStateSip(Lin
|
||||
}
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
inline size_t CommandStreamReceiverHw<GfxFamily>::getCmdSizeForStallingNoPostSyncCommands() const {
|
||||
if (this->activePartitions > 1 && this->staticWorkPartitioningEnabled) {
|
||||
return ImplicitScalingDispatch<GfxFamily>::getBarrierSize(false);
|
||||
} else {
|
||||
return sizeof(typename GfxFamily::PIPE_CONTROL);
|
||||
}
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
inline void CommandStreamReceiverHw<GfxFamily>::programStallingNoPostSyncCommandsForBarrier(LinearStream &cmdStream) {
|
||||
PipeControlArgs args;
|
||||
if (this->activePartitions > 1 && this->staticWorkPartitioningEnabled) {
|
||||
ImplicitScalingDispatch<GfxFamily>::dispatchBarrierCommands(cmdStream,
|
||||
this->deviceBitfield,
|
||||
args,
|
||||
false,
|
||||
false);
|
||||
} else {
|
||||
MemorySynchronizationCommands<GfxFamily>::addPipeControl(cmdStream, args);
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace NEO
|
||||
|
||||
@@ -149,7 +149,7 @@ void BlitProperties::setupDependenciesForAuxTranslation(BlitPropertiesContainer
|
||||
blitPropertiesContainer[i + numObjects].outputTimestampPacket = timestampPacketDependencies.nonAuxToAuxNodes.peekNodes()[i];
|
||||
}
|
||||
|
||||
gpguCsr.requestStallingPipeControlOnNextFlush();
|
||||
gpguCsr.requestStallingCommandsOnNextFlush();
|
||||
auto nodesAllocator = gpguCsr.getTimestampPacketAllocator();
|
||||
timestampPacketDependencies.barrierNodes.add(nodesAllocator->getTag());
|
||||
|
||||
|
||||
@@ -48,6 +48,7 @@ class UltCommandStreamReceiver : public CommandStreamReceiverHw<GfxFamily>, publ
|
||||
using BaseClass::programEnginePrologue;
|
||||
using BaseClass::programPerDssBackedBuffer;
|
||||
using BaseClass::programPreamble;
|
||||
using BaseClass::programStallingNoPostSyncCommandsForBarrier;
|
||||
using BaseClass::programStateSip;
|
||||
using BaseClass::programVFEState;
|
||||
using BaseClass::requiresInstructionCacheFlush;
|
||||
@@ -104,7 +105,7 @@ class UltCommandStreamReceiver : public CommandStreamReceiverHw<GfxFamily>, publ
|
||||
using BaseClass::CommandStreamReceiver::requiredThreadArbitrationPolicy;
|
||||
using BaseClass::CommandStreamReceiver::samplerCacheFlushRequired;
|
||||
using BaseClass::CommandStreamReceiver::scratchSpaceController;
|
||||
using BaseClass::CommandStreamReceiver::stallingPipeControlOnNextFlushRequired;
|
||||
using BaseClass::CommandStreamReceiver::stallingCommandsOnNextFlushRequired;
|
||||
using BaseClass::CommandStreamReceiver::submissionAggregator;
|
||||
using BaseClass::CommandStreamReceiver::tagAddress;
|
||||
using BaseClass::CommandStreamReceiver::taskCount;
|
||||
|
||||
Reference in New Issue
Block a user