mirror of
https://github.com/intel/compute-runtime.git
synced 2026-01-16 04:08:35 +08:00
performance(ocl): program barrier pc in taskStream
Program barrier to task stream, before next enqueue kernel. This will reduce the number of batch buffer starts for sequences of enqueue, barrier, enqueue, ... . Related-To: NEO-8147 Signed-off-by: Dominik Dabek <dominik.dabek@intel.com>
This commit is contained in:
committed by
Compute-Runtime-Automation
parent
e08d46085b
commit
1b7e178b25
@@ -9,11 +9,14 @@
|
||||
#include "shared/source/command_stream/command_stream_receiver_simulated_hw.h"
|
||||
#include "shared/source/helpers/array_count.h"
|
||||
#include "shared/source/helpers/hardware_context_controller.h"
|
||||
#include "shared/source/helpers/timestamp_packet.h"
|
||||
#include "shared/source/memory_manager/memory_pool.h"
|
||||
#include "shared/source/os_interface/os_context.h"
|
||||
#include "shared/test/common/cmd_parse/hw_parse.h"
|
||||
#include "shared/test/common/helpers/debug_manager_state_restore.h"
|
||||
#include "shared/test/common/helpers/engine_descriptor_helper.h"
|
||||
#include "shared/test/common/helpers/gfx_core_helper_tests.h"
|
||||
#include "shared/test/common/helpers/unit_test_helper.h"
|
||||
#include "shared/test/common/mocks/mock_aub_manager.h"
|
||||
#include "shared/test/common/mocks/mock_gmm.h"
|
||||
#include "shared/test/common/mocks/mock_graphics_allocation.h"
|
||||
@@ -595,3 +598,98 @@ HWTEST_F(CommandStreamSimulatedTests, givenSpecificMemoryPoolAllocationWhenWrite
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
HWTEST_F(CommandStreamSimulatedTests, givenBarrierNodesWhenProgramStallingCommandsForBarrierCalledThenPostSyncWritePipeControlIsProgrammed) {
|
||||
using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL;
|
||||
auto csr = std::make_unique<MockSimulatedCsrHw<FamilyType>>(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield());
|
||||
MockOsContext osContext(0, EngineDescriptorHelper::getDefaultDescriptor());
|
||||
csr->setupContext(osContext);
|
||||
|
||||
TagAllocatorBase *allocator = pDevice->getGpgpuCommandStreamReceiver().getTimestampPacketAllocator();
|
||||
auto barrierNode = allocator->getTag();
|
||||
const auto barrierNodeAddress = TimestampPacketHelper::getContextEndGpuAddress(*barrierNode);
|
||||
TimestampPacketContainer barrierNodes{};
|
||||
barrierNodes.add(barrierNode);
|
||||
|
||||
{
|
||||
MockGraphicsAllocation streamAllocation{};
|
||||
uint32_t streamBuffer[100] = {};
|
||||
LinearStream linearStream(&streamAllocation, streamBuffer, sizeof(streamBuffer));
|
||||
|
||||
csr->programStallingCommandsForBarrier(linearStream, &barrierNodes, false);
|
||||
|
||||
HardwareParse hwParser;
|
||||
hwParser.parseCommands<FamilyType>(linearStream);
|
||||
auto pipeControlItor = find<PIPE_CONTROL *>(hwParser.cmdList.begin(), hwParser.cmdList.end());
|
||||
auto pipeControl = genCmdCast<PIPE_CONTROL *>(*pipeControlItor);
|
||||
if (UnitTestHelper<FamilyType>::isPipeControlWArequired(hardwareInfo)) {
|
||||
auto nextPipeControlItor = find<PIPE_CONTROL *>(++pipeControlItor, hwParser.cmdList.end());
|
||||
pipeControl = genCmdCast<PIPE_CONTROL *>(*nextPipeControlItor);
|
||||
}
|
||||
ASSERT_NE(nullptr, pipeControl);
|
||||
EXPECT_EQ(PIPE_CONTROL::POST_SYNC_OPERATION::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA, pipeControl->getPostSyncOperation());
|
||||
EXPECT_FALSE(pipeControl->getDcFlushEnable());
|
||||
EXPECT_EQ(barrierNodeAddress, UnitTestHelper<FamilyType>::getPipeControlPostSyncAddress(*pipeControl));
|
||||
}
|
||||
{
|
||||
MockGraphicsAllocation streamAllocation{};
|
||||
uint32_t streamBuffer[100] = {};
|
||||
LinearStream linearStream(&streamAllocation, streamBuffer, sizeof(streamBuffer));
|
||||
|
||||
csr->programStallingCommandsForBarrier(linearStream, &barrierNodes, true);
|
||||
|
||||
HardwareParse hwParser;
|
||||
hwParser.parseCommands<FamilyType>(linearStream);
|
||||
auto pipeControlItor = find<PIPE_CONTROL *>(hwParser.cmdList.begin(), hwParser.cmdList.end());
|
||||
auto pipeControl = genCmdCast<PIPE_CONTROL *>(*pipeControlItor);
|
||||
if (UnitTestHelper<FamilyType>::isPipeControlWArequired(hardwareInfo)) {
|
||||
auto nextPipeControlItor = find<PIPE_CONTROL *>(++pipeControlItor, hwParser.cmdList.end());
|
||||
pipeControl = genCmdCast<PIPE_CONTROL *>(*nextPipeControlItor);
|
||||
}
|
||||
ASSERT_NE(nullptr, pipeControl);
|
||||
EXPECT_EQ(PIPE_CONTROL::POST_SYNC_OPERATION::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA, pipeControl->getPostSyncOperation());
|
||||
EXPECT_EQ(csr->getDcFlushSupport(), pipeControl->getDcFlushEnable());
|
||||
EXPECT_EQ(barrierNodeAddress, UnitTestHelper<FamilyType>::getPipeControlPostSyncAddress(*pipeControl));
|
||||
}
|
||||
}
|
||||
|
||||
HWTEST_F(CommandStreamSimulatedTests, givenEmptyBarrierNodesWhenProgramStallingCommandsForBarrierCalledThenNoWritePipeControlIsProgrammed) {
|
||||
using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL;
|
||||
auto csr = std::make_unique<MockSimulatedCsrHw<FamilyType>>(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield());
|
||||
MockOsContext osContext(0, EngineDescriptorHelper::getDefaultDescriptor());
|
||||
csr->setupContext(osContext);
|
||||
|
||||
{
|
||||
TimestampPacketContainer barrierNodes{};
|
||||
|
||||
MockGraphicsAllocation streamAllocation{};
|
||||
uint32_t streamBuffer[100] = {};
|
||||
LinearStream linearStream(&streamAllocation, streamBuffer, sizeof(streamBuffer));
|
||||
|
||||
csr->programStallingCommandsForBarrier(linearStream, &barrierNodes, false);
|
||||
|
||||
HardwareParse hwParser;
|
||||
hwParser.parseCommands<FamilyType>(linearStream);
|
||||
const auto pipeControlItor = find<PIPE_CONTROL *>(hwParser.cmdList.begin(), hwParser.cmdList.end());
|
||||
const auto pipeControl = genCmdCast<PIPE_CONTROL *>(*pipeControlItor);
|
||||
ASSERT_NE(nullptr, pipeControl);
|
||||
EXPECT_EQ(PIPE_CONTROL::POST_SYNC_OPERATION::POST_SYNC_OPERATION_NO_WRITE, pipeControl->getPostSyncOperation());
|
||||
EXPECT_EQ(0u, UnitTestHelper<FamilyType>::getPipeControlPostSyncAddress(*pipeControl));
|
||||
}
|
||||
|
||||
{
|
||||
MockGraphicsAllocation streamAllocation{};
|
||||
uint32_t streamBuffer[100] = {};
|
||||
LinearStream linearStream(&streamAllocation, streamBuffer, sizeof(streamBuffer));
|
||||
|
||||
csr->programStallingCommandsForBarrier(linearStream, nullptr, false);
|
||||
|
||||
HardwareParse hwParser;
|
||||
hwParser.parseCommands<FamilyType>(linearStream);
|
||||
const auto pipeControlItor = find<PIPE_CONTROL *>(hwParser.cmdList.begin(), hwParser.cmdList.end());
|
||||
const auto pipeControl = genCmdCast<PIPE_CONTROL *>(*pipeControlItor);
|
||||
ASSERT_NE(nullptr, pipeControl);
|
||||
EXPECT_EQ(PIPE_CONTROL::POST_SYNC_OPERATION::POST_SYNC_OPERATION_NO_WRITE, pipeControl->getPostSyncOperation());
|
||||
EXPECT_EQ(0u, UnitTestHelper<FamilyType>::getPipeControlPostSyncAddress(*pipeControl));
|
||||
}
|
||||
}
|
||||
@@ -2665,6 +2665,33 @@ HWTEST_F(CommandStreamReceiverHwTest, givenDcFlushFlagSetWhenGettingCsrFlagValue
|
||||
EXPECT_EQ(helperValue, csrValue);
|
||||
}
|
||||
|
||||
HWTEST_F(CommandStreamReceiverHwTest, givenBarrierTimestampPacketNodesWhenGetCmdSizeForStallingCommandsCalledThenReturnCorrectSize) {
|
||||
|
||||
auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver<FamilyType>();
|
||||
|
||||
const auto expectedCmdSizeNoPostSync = commandStreamReceiver.getCmdSizeForStallingNoPostSyncCommands();
|
||||
{
|
||||
DispatchFlags dispatchFlags = DispatchFlagsHelper::createDefaultDispatchFlags();
|
||||
dispatchFlags.barrierTimestampPacketNodes = nullptr;
|
||||
EXPECT_EQ(expectedCmdSizeNoPostSync, commandStreamReceiver.getCmdSizeForStallingCommands(dispatchFlags));
|
||||
}
|
||||
{
|
||||
DispatchFlags dispatchFlags = DispatchFlagsHelper::createDefaultDispatchFlags();
|
||||
TimestampPacketContainer emptyContainer;
|
||||
dispatchFlags.barrierTimestampPacketNodes = &emptyContainer;
|
||||
EXPECT_EQ(expectedCmdSizeNoPostSync, commandStreamReceiver.getCmdSizeForStallingCommands(dispatchFlags));
|
||||
}
|
||||
|
||||
const auto expectedCmdSizePostSync = commandStreamReceiver.getCmdSizeForStallingPostSyncCommands();
|
||||
{
|
||||
DispatchFlags dispatchFlags = DispatchFlagsHelper::createDefaultDispatchFlags();
|
||||
TimestampPacketContainer barrierNodes;
|
||||
barrierNodes.add(commandStreamReceiver.getTimestampPacketAllocator()->getTag());
|
||||
dispatchFlags.barrierTimestampPacketNodes = &barrierNodes;
|
||||
EXPECT_EQ(expectedCmdSizePostSync, commandStreamReceiver.getCmdSizeForStallingCommands(dispatchFlags));
|
||||
}
|
||||
}
|
||||
|
||||
struct MockRequiredScratchSpaceController : public ScratchSpaceControllerBase {
|
||||
MockRequiredScratchSpaceController(uint32_t rootDeviceIndex,
|
||||
ExecutionEnvironment &environment,
|
||||
@@ -4426,39 +4453,6 @@ HWTEST_F(CommandStreamReceiverHwTest, givenDcFlushRequiredFalseWhenProgramStalli
|
||||
EXPECT_FALSE(pipeControl->getDcFlushEnable());
|
||||
}
|
||||
|
||||
HWTEST_F(CommandStreamReceiverHwTest, givenFlagProgramBarrierInCommandStreamTaskWhenFlushTaskThenPipeControlProgrammedInTaskCommandStream) {
|
||||
using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL;
|
||||
DebugManagerStateRestore restorer;
|
||||
DebugManager.flags.ProgramBarrierInCommandStreamTask.set(1);
|
||||
auto &ultCsr = pDevice->getUltCommandStreamReceiver<FamilyType>();
|
||||
|
||||
GraphicsAllocation *allocation = pDevice->getMemoryManager()->allocateGraphicsMemoryWithProperties({ultCsr.getRootDeviceIndex(), MemoryConstants::pageSize, AllocationType::COMMAND_BUFFER, pDevice->getDeviceBitfield()});
|
||||
LinearStream commandStream{allocation};
|
||||
ASSERT_NE(nullptr, commandStream.getGraphicsAllocation());
|
||||
auto dispatchFlags = DispatchFlagsHelper::createDefaultDispatchFlags();
|
||||
dispatchFlags.isStallingCommandsOnNextFlushRequired = true;
|
||||
ultCsr.flushTask(commandStream,
|
||||
MemoryConstants::pageSize,
|
||||
&dsh,
|
||||
&ioh,
|
||||
&ssh,
|
||||
0,
|
||||
dispatchFlags,
|
||||
*pDevice);
|
||||
|
||||
GenCmdList cmdList;
|
||||
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(
|
||||
cmdList,
|
||||
commandStream.getCpuBase(),
|
||||
commandStream.getUsed()));
|
||||
auto pipeControlIteratorVector = findAll<PIPE_CONTROL *>(cmdList.begin(), cmdList.end());
|
||||
ASSERT_EQ(pipeControlIteratorVector.size(), 1u);
|
||||
auto pipeControlIterator = pipeControlIteratorVector[0];
|
||||
auto pipeControl = genCmdCast<PIPE_CONTROL *>(*pipeControlIterator);
|
||||
ASSERT_NE(nullptr, pipeControl);
|
||||
pDevice->getMemoryManager()->freeGraphicsMemory(allocation);
|
||||
}
|
||||
|
||||
HWTEST2_F(CommandStreamReceiverHwTest,
|
||||
givenImmediateFlushTaskWhenNextDispatchRequiresScratchSpaceAndSshPointerIsNullThenFrontEndCommandIsNotDispatched,
|
||||
IsAtLeastXeHpCore) {
|
||||
|
||||
Reference in New Issue
Block a user