performance(ocl): program barrier pc in taskStream

Program barrier immediately to task stream.
This will reduce the number of batch buffer starts.

Related-To: NEO-8147

Signed-off-by: Dominik Dabek <dominik.dabek@intel.com>
This commit is contained in:
Dominik Dabek
2023-08-30 15:06:48 +00:00
committed by Compute-Runtime-Automation
parent a38ac3557b
commit 839c2d6737
16 changed files with 163 additions and 40 deletions

View File

@@ -162,6 +162,7 @@ class CommandStreamReceiverHw : public CommandStreamReceiver {
size_t getCmdsSizeForComputeBarrierCommand() const override {
return getCmdSizeForStallingNoPostSyncCommands();
}
void programStallingCommandsForBarrier(LinearStream &cmdStream, TimestampPacketContainer *barrierTimestampPacketNodes, const bool isDcFlushRequired) override;
SubmissionStatus initializeDeviceWithFirstSubmission() override;
HeapDirtyState &getDshState() {
@@ -187,7 +188,6 @@ class CommandStreamReceiverHw : public CommandStreamReceiver {
void programPerDssBackedBuffer(LinearStream &scr, Device &device, DispatchFlags &dispatchFlags);
void programStateSip(LinearStream &cmdStream, Device &device);
void programVFEState(LinearStream &csr, DispatchFlags &dispatchFlags, uint32_t maxFrontEndThreads);
void programStallingCommandsForBarrier(LinearStream &cmdStream, DispatchFlags &dispatchFlags);
void programStallingNoPostSyncCommandsForBarrier(LinearStream &cmdStream);
void programStallingPostSyncCommandsForBarrier(LinearStream &cmdStream, TagNodeBase &tagNode, bool dcFlushRequired);
void programEngineModeCommands(LinearStream &csr, const DispatchFlags &dispatchFlags);