mirror of
https://github.com/intel/compute-runtime.git
synced 2026-01-09 22:43:00 +08:00
performance(ocl): program barrier pc in taskStream
Program barrier immediately to task stream. This will reduce the number of batch buffer starts. Related-To: NEO-8147 Signed-off-by: Dominik Dabek <dominik.dabek@intel.com>
This commit is contained in:
committed by
Compute-Runtime-Automation
parent
a38ac3557b
commit
839c2d6737
@@ -359,6 +359,8 @@ class CommandStreamReceiver {
|
||||
virtual void programComputeBarrierCommand(LinearStream &cmdStream) = 0;
|
||||
virtual size_t getCmdsSizeForComputeBarrierCommand() const = 0;
|
||||
|
||||
virtual void programStallingCommandsForBarrier(LinearStream &cmdStream, TimestampPacketContainer *barrierTimestampPacketNodes, const bool isDcFlushRequired) = 0;
|
||||
|
||||
const HardwareInfo &peekHwInfo() const;
|
||||
const RootDeviceEnvironment &peekRootDeviceEnvironment() const;
|
||||
|
||||
|
||||
@@ -162,6 +162,7 @@ class CommandStreamReceiverHw : public CommandStreamReceiver {
|
||||
size_t getCmdsSizeForComputeBarrierCommand() const override {
|
||||
return getCmdSizeForStallingNoPostSyncCommands();
|
||||
}
|
||||
void programStallingCommandsForBarrier(LinearStream &cmdStream, TimestampPacketContainer *barrierTimestampPacketNodes, const bool isDcFlushRequired) override;
|
||||
SubmissionStatus initializeDeviceWithFirstSubmission() override;
|
||||
|
||||
HeapDirtyState &getDshState() {
|
||||
@@ -187,7 +188,6 @@ class CommandStreamReceiverHw : public CommandStreamReceiver {
|
||||
void programPerDssBackedBuffer(LinearStream &scr, Device &device, DispatchFlags &dispatchFlags);
|
||||
void programStateSip(LinearStream &cmdStream, Device &device);
|
||||
void programVFEState(LinearStream &csr, DispatchFlags &dispatchFlags, uint32_t maxFrontEndThreads);
|
||||
void programStallingCommandsForBarrier(LinearStream &cmdStream, DispatchFlags &dispatchFlags);
|
||||
void programStallingNoPostSyncCommandsForBarrier(LinearStream &cmdStream);
|
||||
void programStallingPostSyncCommandsForBarrier(LinearStream &cmdStream, TagNodeBase &tagNode, bool dcFlushRequired);
|
||||
void programEngineModeCommands(LinearStream &csr, const DispatchFlags &dispatchFlags);
|
||||
|
||||
@@ -508,9 +508,9 @@ CompletionStamp CommandStreamReceiverHw<GfxFamily>::flushTask(
|
||||
|
||||
if (dispatchFlags.isStallingCommandsOnNextFlushRequired) {
|
||||
if (DebugManager.flags.ProgramBarrierInCommandStreamTask.get() == 1) {
|
||||
programStallingCommandsForBarrier(commandStreamTask, dispatchFlags);
|
||||
programStallingCommandsForBarrier(commandStreamTask, dispatchFlags.barrierTimestampPacketNodes, dispatchFlags.isDcFlushRequiredOnStallingCommandsOnNextFlush);
|
||||
} else {
|
||||
programStallingCommandsForBarrier(commandStreamCSR, dispatchFlags);
|
||||
programStallingCommandsForBarrier(commandStreamCSR, dispatchFlags.barrierTimestampPacketNodes, dispatchFlags.isDcFlushRequiredOnStallingCommandsOnNextFlush);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -744,12 +744,9 @@ void CommandStreamReceiverHw<GfxFamily>::programComputeMode(LinearStream &stream
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
inline void CommandStreamReceiverHw<GfxFamily>::programStallingCommandsForBarrier(LinearStream &cmdStream, DispatchFlags &dispatchFlags) {
|
||||
|
||||
auto barrierTimestampPacketNodes = dispatchFlags.barrierTimestampPacketNodes;
|
||||
|
||||
inline void CommandStreamReceiverHw<GfxFamily>::programStallingCommandsForBarrier(LinearStream &cmdStream, TimestampPacketContainer *barrierTimestampPacketNodes, const bool isDcFlushRequired) {
|
||||
if (barrierTimestampPacketNodes && barrierTimestampPacketNodes->peekNodes().size() != 0) {
|
||||
programStallingPostSyncCommandsForBarrier(cmdStream, *barrierTimestampPacketNodes->peekNodes()[0], dispatchFlags.isDcFlushRequiredOnStallingCommandsOnNextFlush);
|
||||
programStallingPostSyncCommandsForBarrier(cmdStream, *barrierTimestampPacketNodes->peekNodes()[0], isDcFlushRequired);
|
||||
barrierTimestampPacketNodes->makeResident(*this);
|
||||
} else {
|
||||
programStallingNoPostSyncCommandsForBarrier(cmdStream);
|
||||
|
||||
@@ -166,6 +166,9 @@ class MockCommandStreamReceiver : public CommandStreamReceiver {
|
||||
size_t getCmdsSizeForComputeBarrierCommand() const override {
|
||||
return 0;
|
||||
}
|
||||
void programStallingCommandsForBarrier(LinearStream &cmdStream, TimestampPacketContainer *barrierTimestampPacketNodes, const bool isDcFlushRequired) override {
|
||||
programStallingCommandsForBarrierCalled = true;
|
||||
}
|
||||
|
||||
bool createPreemptionAllocation() override {
|
||||
if (createPreemptionAllocationParentCall) {
|
||||
@@ -214,6 +217,7 @@ class MockCommandStreamReceiver : public CommandStreamReceiver {
|
||||
bool createPreemptionAllocationReturn = true;
|
||||
bool createPreemptionAllocationParentCall = false;
|
||||
bool programComputeBarrierCommandCalled = false;
|
||||
bool programStallingCommandsForBarrierCalled = false;
|
||||
std::optional<bool> isGpuHangDetectedReturnValue{};
|
||||
std::optional<bool> testTaskCountReadyReturnValue{};
|
||||
WaitStatus waitForCompletionWithTimeoutReturnValue{WaitStatus::Ready};
|
||||
|
||||
@@ -9,11 +9,14 @@
|
||||
#include "shared/source/command_stream/command_stream_receiver_simulated_hw.h"
|
||||
#include "shared/source/helpers/array_count.h"
|
||||
#include "shared/source/helpers/hardware_context_controller.h"
|
||||
#include "shared/source/helpers/timestamp_packet.h"
|
||||
#include "shared/source/memory_manager/memory_pool.h"
|
||||
#include "shared/source/os_interface/os_context.h"
|
||||
#include "shared/test/common/cmd_parse/hw_parse.h"
|
||||
#include "shared/test/common/helpers/debug_manager_state_restore.h"
|
||||
#include "shared/test/common/helpers/engine_descriptor_helper.h"
|
||||
#include "shared/test/common/helpers/gfx_core_helper_tests.h"
|
||||
#include "shared/test/common/helpers/unit_test_helper.h"
|
||||
#include "shared/test/common/mocks/mock_aub_manager.h"
|
||||
#include "shared/test/common/mocks/mock_gmm.h"
|
||||
#include "shared/test/common/mocks/mock_graphics_allocation.h"
|
||||
@@ -595,3 +598,98 @@ HWTEST_F(CommandStreamSimulatedTests, givenSpecificMemoryPoolAllocationWhenWrite
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
HWTEST_F(CommandStreamSimulatedTests, givenBarrierNodesWhenProgramStallingCommandsForBarrierCalledThenPostSyncWritePipeControlIsProgrammed) {
|
||||
using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL;
|
||||
auto csr = std::make_unique<MockSimulatedCsrHw<FamilyType>>(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield());
|
||||
MockOsContext osContext(0, EngineDescriptorHelper::getDefaultDescriptor());
|
||||
csr->setupContext(osContext);
|
||||
|
||||
TagAllocatorBase *allocator = pDevice->getGpgpuCommandStreamReceiver().getTimestampPacketAllocator();
|
||||
auto barrierNode = allocator->getTag();
|
||||
const auto barrierNodeAddress = TimestampPacketHelper::getContextEndGpuAddress(*barrierNode);
|
||||
TimestampPacketContainer barrierNodes{};
|
||||
barrierNodes.add(barrierNode);
|
||||
|
||||
{
|
||||
MockGraphicsAllocation streamAllocation{};
|
||||
uint32_t streamBuffer[100] = {};
|
||||
LinearStream linearStream(&streamAllocation, streamBuffer, sizeof(streamBuffer));
|
||||
|
||||
csr->programStallingCommandsForBarrier(linearStream, &barrierNodes, false);
|
||||
|
||||
HardwareParse hwParser;
|
||||
hwParser.parseCommands<FamilyType>(linearStream);
|
||||
auto pipeControlItor = find<PIPE_CONTROL *>(hwParser.cmdList.begin(), hwParser.cmdList.end());
|
||||
auto pipeControl = genCmdCast<PIPE_CONTROL *>(*pipeControlItor);
|
||||
if (UnitTestHelper<FamilyType>::isPipeControlWArequired(hardwareInfo)) {
|
||||
auto nextPipeControlItor = find<PIPE_CONTROL *>(++pipeControlItor, hwParser.cmdList.end());
|
||||
pipeControl = genCmdCast<PIPE_CONTROL *>(*nextPipeControlItor);
|
||||
}
|
||||
ASSERT_NE(nullptr, pipeControl);
|
||||
EXPECT_EQ(PIPE_CONTROL::POST_SYNC_OPERATION::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA, pipeControl->getPostSyncOperation());
|
||||
EXPECT_FALSE(pipeControl->getDcFlushEnable());
|
||||
EXPECT_EQ(barrierNodeAddress, UnitTestHelper<FamilyType>::getPipeControlPostSyncAddress(*pipeControl));
|
||||
}
|
||||
{
|
||||
MockGraphicsAllocation streamAllocation{};
|
||||
uint32_t streamBuffer[100] = {};
|
||||
LinearStream linearStream(&streamAllocation, streamBuffer, sizeof(streamBuffer));
|
||||
|
||||
csr->programStallingCommandsForBarrier(linearStream, &barrierNodes, true);
|
||||
|
||||
HardwareParse hwParser;
|
||||
hwParser.parseCommands<FamilyType>(linearStream);
|
||||
auto pipeControlItor = find<PIPE_CONTROL *>(hwParser.cmdList.begin(), hwParser.cmdList.end());
|
||||
auto pipeControl = genCmdCast<PIPE_CONTROL *>(*pipeControlItor);
|
||||
if (UnitTestHelper<FamilyType>::isPipeControlWArequired(hardwareInfo)) {
|
||||
auto nextPipeControlItor = find<PIPE_CONTROL *>(++pipeControlItor, hwParser.cmdList.end());
|
||||
pipeControl = genCmdCast<PIPE_CONTROL *>(*nextPipeControlItor);
|
||||
}
|
||||
ASSERT_NE(nullptr, pipeControl);
|
||||
EXPECT_EQ(PIPE_CONTROL::POST_SYNC_OPERATION::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA, pipeControl->getPostSyncOperation());
|
||||
EXPECT_EQ(csr->getDcFlushSupport(), pipeControl->getDcFlushEnable());
|
||||
EXPECT_EQ(barrierNodeAddress, UnitTestHelper<FamilyType>::getPipeControlPostSyncAddress(*pipeControl));
|
||||
}
|
||||
}
|
||||
|
||||
HWTEST_F(CommandStreamSimulatedTests, givenEmptyBarrierNodesWhenProgramStallingCommandsForBarrierCalledThenNoWritePipeControlIsProgrammed) {
|
||||
using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL;
|
||||
auto csr = std::make_unique<MockSimulatedCsrHw<FamilyType>>(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield());
|
||||
MockOsContext osContext(0, EngineDescriptorHelper::getDefaultDescriptor());
|
||||
csr->setupContext(osContext);
|
||||
|
||||
{
|
||||
TimestampPacketContainer barrierNodes{};
|
||||
|
||||
MockGraphicsAllocation streamAllocation{};
|
||||
uint32_t streamBuffer[100] = {};
|
||||
LinearStream linearStream(&streamAllocation, streamBuffer, sizeof(streamBuffer));
|
||||
|
||||
csr->programStallingCommandsForBarrier(linearStream, &barrierNodes, false);
|
||||
|
||||
HardwareParse hwParser;
|
||||
hwParser.parseCommands<FamilyType>(linearStream);
|
||||
const auto pipeControlItor = find<PIPE_CONTROL *>(hwParser.cmdList.begin(), hwParser.cmdList.end());
|
||||
const auto pipeControl = genCmdCast<PIPE_CONTROL *>(*pipeControlItor);
|
||||
ASSERT_NE(nullptr, pipeControl);
|
||||
EXPECT_EQ(PIPE_CONTROL::POST_SYNC_OPERATION::POST_SYNC_OPERATION_NO_WRITE, pipeControl->getPostSyncOperation());
|
||||
EXPECT_EQ(0u, UnitTestHelper<FamilyType>::getPipeControlPostSyncAddress(*pipeControl));
|
||||
}
|
||||
|
||||
{
|
||||
MockGraphicsAllocation streamAllocation{};
|
||||
uint32_t streamBuffer[100] = {};
|
||||
LinearStream linearStream(&streamAllocation, streamBuffer, sizeof(streamBuffer));
|
||||
|
||||
csr->programStallingCommandsForBarrier(linearStream, nullptr, false);
|
||||
|
||||
HardwareParse hwParser;
|
||||
hwParser.parseCommands<FamilyType>(linearStream);
|
||||
const auto pipeControlItor = find<PIPE_CONTROL *>(hwParser.cmdList.begin(), hwParser.cmdList.end());
|
||||
const auto pipeControl = genCmdCast<PIPE_CONTROL *>(*pipeControlItor);
|
||||
ASSERT_NE(nullptr, pipeControl);
|
||||
EXPECT_EQ(PIPE_CONTROL::POST_SYNC_OPERATION::POST_SYNC_OPERATION_NO_WRITE, pipeControl->getPostSyncOperation());
|
||||
EXPECT_EQ(0u, UnitTestHelper<FamilyType>::getPipeControlPostSyncAddress(*pipeControl));
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user