RelaxedOrdering: Disable feature for BCS

Related-To: NEO-7458

Signed-off-by: Dunajski, Bartosz <bartosz.dunajski@intel.com>
This commit is contained in:
Dunajski, Bartosz
2022-11-29 14:57:51 +00:00
committed by Compute-Runtime-Automation
parent e268b46df1
commit 20b6c76298
8 changed files with 132 additions and 7 deletions

View File

@ -15,6 +15,7 @@
#include "shared/test/common/mocks/mock_allocation_properties.h"
#include "shared/test/common/mocks/mock_csr.h"
#include "shared/test/common/mocks/mock_device.h"
#include "shared/test/common/mocks/mock_direct_submission_hw.h"
#include "shared/test/common/mocks/mock_os_context.h"
#include "shared/test/common/mocks/mock_submissions_aggregator.h"
#include "shared/test/common/mocks/mock_svm_manager.h"
@ -1207,6 +1208,7 @@ HWTEST_F(CommandStreamReceiverFlushTaskTests, givenEnabledDirectSubmissionUpdate
struct MockCsrHwDirectSubmission : public MockCsrHw2<FamilyType> {
using MockCsrHw2<FamilyType>::MockCsrHw2;
using MockCsrHw2<FamilyType>::directSubmission;
bool isDirectSubmissionEnabled() const override {
return true;
}
@ -1222,6 +1224,7 @@ HWTEST_F(CommandStreamReceiverFlushTaskTests, givenEnabledDirectSubmissionUpdate
mockCsr->overrideDispatchPolicy(DispatchMode::BatchedDispatch);
mockCsr->taskCount.store(10);
mockCsr->latestFlushedTaskCount.store(5);
mockCsr->directSubmission = std::make_unique<MockDirectSubmissionHw<FamilyType, RenderDispatcher<FamilyType>>>(*mockCsr);
const auto waitStatus = commandQueue.waitForAllEngines(false, nullptr);
EXPECT_EQ(WaitStatus::Ready, waitStatus);

View File

@ -13,6 +13,7 @@
#include "shared/test/common/libult/ult_command_stream_receiver.h"
#include "shared/test/common/mocks/mock_csr.h"
#include "shared/test/common/mocks/mock_debugger.h"
#include "shared/test/common/mocks/mock_direct_submission_hw.h"
#include "shared/test/common/mocks/mock_os_context.h"
#include "shared/test/common/mocks/mock_submissions_aggregator.h"
#include "shared/test/common/test_macros/hw_test.h"
@ -1066,6 +1067,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, CommandStreamReceiverFlushTaskXeHPAndLaterMultiTile
commandStreamReceiver.createPreemptionAllocation();
}
commandStreamReceiver.directSubmission = std::make_unique<MockDirectSubmissionHw<FamilyType, RenderDispatcher<FamilyType>>>(commandStreamReceiver);
commandStreamReceiver.directSubmissionAvailable = true;
EXPECT_EQ(1u, commandStreamReceiver.activePartitionsConfig);

View File

@ -109,8 +109,15 @@ inline void CommandStreamReceiverHw<GfxFamily>::programEndingCmd(LinearStream &c
startAddress = 0;
}
bool relaxedOrderingEnabled = false;
if (isBlitterDirectSubmissionEnabled() && EngineHelpers::isBcs(this->osContext->getEngineType())) {
relaxedOrderingEnabled = this->blitterDirectSubmission->isRelaxedOrderingEnabled();
} else if (isDirectSubmissionEnabled()) {
relaxedOrderingEnabled = this->directSubmission->isRelaxedOrderingEnabled();
}
bool indirect = false;
if (DebugManager.flags.DirectSubmissionRelaxedOrdering.get() == 1 && hasRelaxedOrderingDependencies) {
if (relaxedOrderingEnabled && hasRelaxedOrderingDependencies) {
NEO::EncodeSetMMIO<GfxFamily>::encodeREG(commandStream, CS_GPR_R0, CS_GPR_R3);
NEO::EncodeSetMMIO<GfxFamily>::encodeREG(commandStream, CS_GPR_R0 + 4, CS_GPR_R3 + 4);

View File

@ -333,6 +333,7 @@ DECLARE_DEBUG_VARIABLE(int32_t, DirectSubmissionInsertSfenceInstructionPriorToSu
DECLARE_DEBUG_VARIABLE(int32_t, DirectSubmissionMaxRingBuffers, -1, "-1: default, >0: max ring buffer count, During switch ring buffer, if there is no available ring, wait for completion instead of allocating new one if DirectSubmissionMaxRingBuffers is reached")
DECLARE_DEBUG_VARIABLE(int32_t, DirectSubmissionDisablePrefetcher, -1, "-1: default, 0 - disable, 1 - enable. If enabled, disable prefetcher is being dispatched")
DECLARE_DEBUG_VARIABLE(int32_t, DirectSubmissionRelaxedOrdering, -1, "-1: default, 0 - disable, 1 - enable. If enabled, tasks sent to direct submission ring may be dispatched out of order")
DECLARE_DEBUG_VARIABLE(int32_t, DirectSubmissionRelaxedOrderingForBcs, -1, "-1: default, 0 - disable, 1 - enable. If set, enable RelaxedOrdering feature for BCS engine")
DECLARE_DEBUG_VARIABLE(bool, DirectSubmissionPrintBuffers, false, "Print address of submitted command buffers")
/*FEATURE FLAGS*/

View File

@ -78,6 +78,10 @@ DirectSubmissionHw<GfxFamily, Dispatcher>::DirectSubmissionHw(const DirectSubmis
dcFlushRequired = MemorySynchronizationCommands<GfxFamily>::getDcFlushEnable(true, *hwInfo);
relaxedOrderingEnabled = (DebugManager.flags.DirectSubmissionRelaxedOrdering.get() == 1);
if (EngineHelpers::isBcs(this->osContext.getEngineType()) && relaxedOrderingEnabled) {
relaxedOrderingEnabled = (DebugManager.flags.DirectSubmissionRelaxedOrderingForBcs.get() == 1);
}
}
template <typename GfxFamily, typename Dispatcher>

View File

@ -488,4 +488,5 @@ ForceNonblockingExecbufferCalls = -1
UseHighAlignmentForHeapExtended = -1
ForceAutoGrfCompilationMode = -1
ForceComputeWalkerPostSyncFlush = -1
DirectSubmissionRelaxedOrdering = -1
DirectSubmissionRelaxedOrdering = -1
DirectSubmissionRelaxedOrderingForBcs = -1

View File

@ -665,8 +665,14 @@ HWTEST_F(DirectSubmissionTest, givenBaseCsrWhenCheckingDirectSubmissionAvailable
HWTEST_F(DirectSubmissionTest, givenDirectSubmissionAvailableWhenProgrammingEndingCommandThenUseBatchBufferStart) {
using MI_BATCH_BUFFER_START = typename FamilyType::MI_BATCH_BUFFER_START;
int32_t executionStamp = 0;
std::unique_ptr<MockCsr<FamilyType>> mockCsr =
std::make_unique<MockCsr<FamilyType>>(executionStamp, *pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield());
mockCsr->setupContext(*osContext);
mockCsr->directSubmission = std::make_unique<MockDirectSubmissionHw<FamilyType, RenderDispatcher<FamilyType>>>(*mockCsr);
mockCsr->directSubmissionAvailable = true;
bool ret = mockCsr->isDirectSubmissionEnabled();
EXPECT_TRUE(ret);
@ -701,6 +707,9 @@ HWTEST_F(DirectSubmissionTest, givenDebugFlagSetWhenProgrammingEndingCommandThen
pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield());
mockCsr->setupContext(*osContext);
mockCsr->directSubmission = std::make_unique<MockDirectSubmissionHw<FamilyType, RenderDispatcher<FamilyType>>>(*mockCsr);
mockCsr->directSubmissionAvailable = true;
bool ret = mockCsr->isDirectSubmissionEnabled();
EXPECT_TRUE(ret);
@ -883,7 +892,7 @@ HWTEST_F(DirectSubmissionTest,
EXPECT_TRUE(directSubmission.ringStart);
EXPECT_EQ(0u, directSubmission.disabledDiagnosticCalled);
EXPECT_EQ(1u, NEO::IoFunctions::mockFopenCalled);
//1 - preamble, 1 - init time, 5 - exec logs
// 1 - preamble, 1 - init time, 5 - exec logs
EXPECT_EQ(7u, NEO::IoFunctions::mockVfptrinfCalled);
EXPECT_EQ(1u, NEO::IoFunctions::mockFcloseCalled);
EXPECT_EQ(expectedSize, directSubmission.ringCommandStream.getUsed());
@ -981,7 +990,7 @@ HWTEST_F(DirectSubmissionTest,
EXPECT_TRUE(directSubmission.ringStart);
EXPECT_EQ(0u, directSubmission.disabledDiagnosticCalled);
EXPECT_EQ(1u, NEO::IoFunctions::mockFopenCalled);
//1 - preamble, 1 - init time, 0 exec logs in mode 2
// 1 - preamble, 1 - init time, 0 exec logs in mode 2
EXPECT_EQ(2u, NEO::IoFunctions::mockVfptrinfCalled);
EXPECT_EQ(1u, NEO::IoFunctions::mockFcloseCalled);
EXPECT_EQ(expectedSize, directSubmission.ringCommandStream.getUsed());
@ -1021,7 +1030,7 @@ HWTEST_F(DirectSubmissionTest,
EXPECT_NE(nullptr, directSubmission.diagnostic.get());
EXPECT_EQ(1u, NEO::IoFunctions::mockFopenCalled);
//ctor: preamble 1 call
// ctor: preamble 1 call
EXPECT_EQ(1u, NEO::IoFunctions::mockVfptrinfCalled);
EXPECT_EQ(0u, NEO::IoFunctions::mockFcloseCalled);
@ -1034,7 +1043,7 @@ HWTEST_F(DirectSubmissionTest,
DebugManager.flags.DirectSubmissionDisableCacheFlush.get(),
DebugManager.flags.DirectSubmissionDisableMonitorFence.get());
EXPECT_EQ(2u, NEO::IoFunctions::mockFopenCalled);
//dtor: 1 call general delta, 2 calls storing execution, ctor: preamble 1 call
// dtor: 1 call general delta, 2 calls storing execution, ctor: preamble 1 call
EXPECT_EQ(5u, NEO::IoFunctions::mockVfptrinfCalled);
EXPECT_EQ(1u, NEO::IoFunctions::mockFcloseCalled);
@ -1055,7 +1064,7 @@ HWTEST_F(DirectSubmissionTest,
EXPECT_NE(0ll, mockDiagnostic->executionList[1].submitWaitTimeDiff);
EXPECT_EQ(0ll, mockDiagnostic->executionList[1].dispatchSubmitTimeDiff);
//1 call general delta, 2 calls storing execution
// 1 call general delta, 2 calls storing execution
uint32_t expectedVfprintfCall = NEO::IoFunctions::mockVfptrinfCalled + 1u + 2u;
directSubmission.diagnostic.reset(nullptr);
EXPECT_EQ(2u, NEO::IoFunctions::mockFopenCalled);

View File

@ -2080,11 +2080,109 @@ HWTEST2_F(DirectSubmissionRelaxedOrderingTests, WhenStoppingRingWithoutSubmissio
EXPECT_FALSE(success);
}
HWTEST2_F(DirectSubmissionRelaxedOrderingTests, givenDebugFlagSetWhenCreatingBcsDispatcherThenEnableRelaxedOrdering, IsAtLeastXeHpcCore) {
std::unique_ptr<OsContext> osContext(OsContext::create(pDevice->getExecutionEnvironment()->rootDeviceEnvironments[0]->osInterface.get(), pDevice->getRootDeviceIndex(), 0,
EngineDescriptorHelper::getDefaultDescriptor({aub_stream::ENGINE_BCS, EngineUsage::Regular},
PreemptionMode::ThreadGroup, pDevice->getDeviceBitfield())));
auto ultCsr = static_cast<UltCommandStreamReceiver<FamilyType> *>(pDevice->getDefaultEngine().commandStreamReceiver);
ultCsr->setupContext(*osContext);
{
DebugManager.flags.DirectSubmissionRelaxedOrdering.set(0);
DebugManager.flags.DirectSubmissionRelaxedOrderingForBcs.set(1);
MockDirectSubmissionHw<FamilyType, BlitterDispatcher<FamilyType>> directSubmission(*ultCsr);
EXPECT_FALSE(directSubmission.isRelaxedOrderingEnabled());
}
{
DebugManager.flags.DirectSubmissionRelaxedOrdering.set(1);
DebugManager.flags.DirectSubmissionRelaxedOrderingForBcs.set(0);
MockDirectSubmissionHw<FamilyType, BlitterDispatcher<FamilyType>> directSubmission(*ultCsr);
EXPECT_FALSE(directSubmission.isRelaxedOrderingEnabled());
}
{
DebugManager.flags.DirectSubmissionRelaxedOrdering.set(1);
DebugManager.flags.DirectSubmissionRelaxedOrderingForBcs.set(1);
MockDirectSubmissionHw<FamilyType, BlitterDispatcher<FamilyType>> directSubmission(*ultCsr);
EXPECT_TRUE(directSubmission.isRelaxedOrderingEnabled());
}
}
HWTEST2_F(DirectSubmissionRelaxedOrderingTests, givenBcsRelaxedOrderingEnabledWhenProgrammingEndingCommandsThenSetReturnPtrs, IsAtLeastXeHpcCore) {
using MI_LOAD_REGISTER_REG = typename FamilyType::MI_LOAD_REGISTER_REG;
using MI_BATCH_BUFFER_START = typename FamilyType::MI_BATCH_BUFFER_START;
DebugManager.flags.DirectSubmissionRelaxedOrderingForBcs.set(1);
std::unique_ptr<OsContext> osContext(OsContext::create(pDevice->getExecutionEnvironment()->rootDeviceEnvironments[0]->osInterface.get(), pDevice->getRootDeviceIndex(), 0,
EngineDescriptorHelper::getDefaultDescriptor({aub_stream::ENGINE_BCS, EngineUsage::Regular},
PreemptionMode::ThreadGroup, pDevice->getDeviceBitfield())));
auto ultCsr = static_cast<UltCommandStreamReceiver<FamilyType> *>(pDevice->getDefaultEngine().commandStreamReceiver);
ultCsr->setupContext(*osContext);
ultCsr->blitterDirectSubmissionAvailable = true;
auto directSubmission = new MockDirectSubmissionHw<FamilyType, BlitterDispatcher<FamilyType>>(*ultCsr);
ultCsr->blitterDirectSubmission.reset(directSubmission);
auto &commandStream = ultCsr->getCS(0x100);
auto endingPtr = commandStream.getSpace(0);
ultCsr->programEndingCmd(commandStream, &endingPtr, true, true, false);
auto lrrCmd = reinterpret_cast<MI_LOAD_REGISTER_REG *>(commandStream.getCpuBase());
EXPECT_EQ(lrrCmd->getSourceRegisterAddress(), CS_GPR_R3);
EXPECT_EQ(lrrCmd->getDestinationRegisterAddress(), CS_GPR_R0);
lrrCmd++;
EXPECT_EQ(lrrCmd->getSourceRegisterAddress(), CS_GPR_R3 + 4);
EXPECT_EQ(lrrCmd->getDestinationRegisterAddress(), CS_GPR_R0 + 4);
auto bbStartCmd = reinterpret_cast<MI_BATCH_BUFFER_START *>(++lrrCmd);
EXPECT_EQ(1u, bbStartCmd->getIndirectAddressEnable());
}
HWTEST2_F(DirectSubmissionRelaxedOrderingTests, givenBcsRelaxedOrderingDisabledWhenProgrammingEndingCommandsThenDontSetReturnPtrs, IsAtLeastXeHpcCore) {
using MI_BATCH_BUFFER_START = typename FamilyType::MI_BATCH_BUFFER_START;
DebugManager.flags.DirectSubmissionRelaxedOrderingForBcs.set(0);
std::unique_ptr<OsContext> osContext(OsContext::create(pDevice->getExecutionEnvironment()->rootDeviceEnvironments[0]->osInterface.get(), pDevice->getRootDeviceIndex(), 0,
EngineDescriptorHelper::getDefaultDescriptor({aub_stream::ENGINE_BCS, EngineUsage::Regular},
PreemptionMode::ThreadGroup, pDevice->getDeviceBitfield())));
auto ultCsr = static_cast<UltCommandStreamReceiver<FamilyType> *>(pDevice->getDefaultEngine().commandStreamReceiver);
ultCsr->setupContext(*osContext);
auto directSubmission = new MockDirectSubmissionHw<FamilyType, BlitterDispatcher<FamilyType>>(*ultCsr);
ultCsr->blitterDirectSubmission.reset(directSubmission);
auto &commandStream = ultCsr->getCS(0x100);
auto endingPtr = commandStream.getSpace(0);
ultCsr->programEndingCmd(commandStream, &endingPtr, true, false, false);
auto bbStartCmd = genCmdCast<MI_BATCH_BUFFER_START *>(commandStream.getCpuBase());
ASSERT_NE(nullptr, bbStartCmd);
EXPECT_EQ(0u, bbStartCmd->getIndirectAddressEnable());
}
HWTEST2_F(DirectSubmissionRelaxedOrderingTests, whenProgrammingEndingCmdsThenSetReturnRegisters, IsAtLeastXeHpcCore) {
using MI_LOAD_REGISTER_REG = typename FamilyType::MI_LOAD_REGISTER_REG;
using MI_BATCH_BUFFER_START = typename FamilyType::MI_BATCH_BUFFER_START;
auto ultCsr = static_cast<UltCommandStreamReceiver<FamilyType> *>(pDevice->getDefaultEngine().commandStreamReceiver);
ultCsr->directSubmissionAvailable = true;
auto directSubmission = new MockDirectSubmissionHw<FamilyType, RenderDispatcher<FamilyType>>(*ultCsr);
directSubmission->initialize(true, false);