mirror of
https://github.com/intel/compute-runtime.git
synced 2026-01-13 10:14:36 +08:00
RelaxedOrdering dispatch for OCL 1/n
Related-To: NEO-7458 Signed-off-by: Dunajski, Bartosz <bartosz.dunajski@intel.com>
This commit is contained in:
committed by
Compute-Runtime-Automation
parent
7e46668fc7
commit
97a35debde
@@ -11,6 +11,7 @@
|
||||
#include "shared/test/common/mocks/mock_allocation_properties.h"
|
||||
#include "shared/test/common/mocks/mock_builtins.h"
|
||||
#include "shared/test/common/mocks/mock_csr.h"
|
||||
#include "shared/test/common/mocks/mock_direct_submission_hw.h"
|
||||
#include "shared/test/common/mocks/mock_os_library.h"
|
||||
#include "shared/test/common/mocks/mock_source_level_debugger.h"
|
||||
#include "shared/test/common/mocks/mock_timestamp_container.h"
|
||||
@@ -1188,3 +1189,31 @@ HWTEST_F(CommandQueueHwTest, givenNoGpuHangWhenFinishingCommandQueueHwThenWaitFo
|
||||
EXPECT_EQ(1, mockCmdQueueHw.waitForAllEnginesCalledCount);
|
||||
EXPECT_EQ(CL_SUCCESS, finishResult);
|
||||
}
|
||||
|
||||
HWTEST_F(CommandQueueHwTest, givenRelaxedOrderingEnabledWhenCheckingIfAllowedByCommandQueueThenReturnFalse) {
|
||||
DebugManagerStateRestore restore;
|
||||
|
||||
MockCommandQueueHw<FamilyType> mockCmdQueueHw{context, pClDevice, nullptr};
|
||||
|
||||
auto &ultCsr = mockCmdQueueHw.getUltCommandStreamReceiver();
|
||||
|
||||
auto directSubmission = new MockDirectSubmissionHw<FamilyType, RenderDispatcher<FamilyType>>(ultCsr);
|
||||
directSubmission->relaxedOrderingEnabled = true;
|
||||
ultCsr.directSubmission.reset(directSubmission);
|
||||
|
||||
EXPECT_FALSE(mockCmdQueueHw.relaxedOrderingForGpgpuAllowed(0));
|
||||
EXPECT_FALSE(mockCmdQueueHw.relaxedOrderingForGpgpuAllowed(1));
|
||||
|
||||
DebugManager.flags.DirectSubmissionRelaxedOrdering.set(1);
|
||||
EXPECT_FALSE(mockCmdQueueHw.relaxedOrderingForGpgpuAllowed(0));
|
||||
EXPECT_FALSE(mockCmdQueueHw.relaxedOrderingForGpgpuAllowed(1));
|
||||
|
||||
ultCsr.registerClient();
|
||||
EXPECT_FALSE(mockCmdQueueHw.relaxedOrderingForGpgpuAllowed(0));
|
||||
EXPECT_FALSE(mockCmdQueueHw.relaxedOrderingForGpgpuAllowed(1));
|
||||
|
||||
ultCsr.registerClient();
|
||||
|
||||
EXPECT_FALSE(mockCmdQueueHw.relaxedOrderingForGpgpuAllowed(0));
|
||||
EXPECT_TRUE(mockCmdQueueHw.relaxedOrderingForGpgpuAllowed(1));
|
||||
}
|
||||
|
||||
@@ -14,7 +14,10 @@
|
||||
#include "shared/source/memory_manager/allocations_list.h"
|
||||
#include "shared/test/common/cmd_parse/gen_cmd_parse.h"
|
||||
#include "shared/test/common/helpers/debug_manager_state_restore.h"
|
||||
#include "shared/test/common/helpers/relaxed_ordering_commands_helper.h"
|
||||
#include "shared/test/common/mocks/mock_csr.h"
|
||||
#include "shared/test/common/mocks/mock_direct_submission_hw.h"
|
||||
#include "shared/test/common/mocks/mock_timestamp_container.h"
|
||||
#include "shared/test/common/mocks/ult_device_factory.h"
|
||||
#include "shared/test/common/utilities/base_object_utils.h"
|
||||
|
||||
@@ -1021,6 +1024,145 @@ HWTEST_F(EnqueueKernelTest, givenTimestampWriteEnableWhenMarkerProfilingWithoutW
|
||||
EXPECT_EQ(baseCommandStreamSize + 4 * EncodeStoreMMIO<FamilyType>::size + MemorySynchronizationCommands<FamilyType>::getSizeForSingleBarrier(false), extendedCommandStreamSize);
|
||||
}
|
||||
|
||||
HWTEST_F(EnqueueKernelTest, givenRelaxedOrderingEnabledWhenCheckingSizeForCsThenReturnCorrectValue) {
|
||||
auto &ultCsr = pDevice->getUltCommandStreamReceiver<FamilyType>();
|
||||
ultCsr.timestampPacketWriteEnabled = true;
|
||||
|
||||
auto directSubmission = new MockDirectSubmissionHw<FamilyType, RenderDispatcher<FamilyType>>(ultCsr);
|
||||
ultCsr.directSubmission.reset(directSubmission);
|
||||
|
||||
MockKernelWithInternals mockKernel(*pClDevice);
|
||||
DispatchInfo dispatchInfo;
|
||||
MultiDispatchInfo multiDispatchInfo(mockKernel.mockKernel);
|
||||
dispatchInfo.setKernel(mockKernel.mockKernel);
|
||||
multiDispatchInfo.push(dispatchInfo);
|
||||
|
||||
uint32_t numberOfDependencyContainers = 2;
|
||||
size_t numberNodesPerContainer = 5;
|
||||
|
||||
MockTimestampPacketContainer timestamp0(*ultCsr.getTimestampPacketAllocator(), numberNodesPerContainer);
|
||||
MockTimestampPacketContainer timestamp1(*ultCsr.getTimestampPacketAllocator(), numberNodesPerContainer);
|
||||
|
||||
CsrDependencies csrDeps;
|
||||
|
||||
csrDeps.timestampPacketContainer.push_back(×tamp0);
|
||||
csrDeps.timestampPacketContainer.push_back(×tamp1);
|
||||
|
||||
directSubmission->relaxedOrderingEnabled = false;
|
||||
auto baseCommandStreamSize = EnqueueOperation<FamilyType>::getTotalSizeRequiredCS(CL_COMMAND_NDRANGE_KERNEL, csrDeps, false, false, false, *pCmdQ, multiDispatchInfo, false, false, nullptr);
|
||||
|
||||
directSubmission->relaxedOrderingEnabled = true;
|
||||
auto newCommandStreamSize = EnqueueOperation<FamilyType>::getTotalSizeRequiredCS(CL_COMMAND_NDRANGE_KERNEL, csrDeps, false, false, false, *pCmdQ, multiDispatchInfo, false, false, nullptr);
|
||||
|
||||
auto semaphoresSize = numberOfDependencyContainers * numberNodesPerContainer * sizeof(typename FamilyType::MI_SEMAPHORE_WAIT);
|
||||
auto conditionalBbsSize = numberOfDependencyContainers * numberNodesPerContainer * EncodeBatchBufferStartOrEnd<FamilyType>::getCmdSizeConditionalDataMemBatchBufferStart();
|
||||
auto registersSize = 2 * EncodeSetMMIO<FamilyType>::sizeREG;
|
||||
|
||||
auto expectedSize = baseCommandStreamSize - semaphoresSize + conditionalBbsSize + registersSize;
|
||||
|
||||
EXPECT_EQ(expectedSize, newCommandStreamSize);
|
||||
}
|
||||
|
||||
struct RelaxedOrderingEnqueueKernelTests : public EnqueueKernelTest {
|
||||
void SetUp() override {
|
||||
ultHwConfigBackup = std::make_unique<VariableBackup<UltHwConfig>>(&ultHwConfig);
|
||||
|
||||
DebugManager.flags.DirectSubmissionRelaxedOrdering.set(1);
|
||||
DebugManager.flags.UpdateTaskCountFromWait.set(1);
|
||||
|
||||
ultHwConfig.csrBaseCallDirectSubmissionAvailable = true;
|
||||
|
||||
EnqueueKernelTest::SetUp();
|
||||
}
|
||||
|
||||
std::unique_ptr<VariableBackup<UltHwConfig>> ultHwConfigBackup;
|
||||
|
||||
DebugManagerStateRestore restore;
|
||||
size_t gws[3] = {1, 1, 1};
|
||||
};
|
||||
|
||||
HWTEST2_F(RelaxedOrderingEnqueueKernelTests, givenEnqueueKernelWhenProgrammingDependenciesThenUseConditionalBbStarts, IsAtLeastXeHpcCore) {
|
||||
using MI_LOAD_REGISTER_REG = typename FamilyType::MI_LOAD_REGISTER_REG;
|
||||
using MI_LOAD_REGISTER_MEM = typename FamilyType::MI_LOAD_REGISTER_MEM;
|
||||
using MI_LOAD_REGISTER_IMM = typename FamilyType::MI_LOAD_REGISTER_IMM;
|
||||
|
||||
auto &ultCsr = pDevice->getUltCommandStreamReceiver<FamilyType>();
|
||||
auto directSubmission = new MockDirectSubmissionHw<FamilyType, RenderDispatcher<FamilyType>>(ultCsr);
|
||||
ultCsr.directSubmission.reset(directSubmission);
|
||||
ultCsr.registerClient();
|
||||
ultCsr.registerClient();
|
||||
|
||||
MockCommandQueueHw<FamilyType> mockCmdQueueHw{context, pClDevice, nullptr};
|
||||
|
||||
cl_event outEvent;
|
||||
|
||||
MockKernelWithInternals mockKernel(*pClDevice);
|
||||
|
||||
mockCmdQueueHw.enqueueKernel(mockKernel.mockKernel, 1, nullptr, gws, nullptr, 0, nullptr, &outEvent);
|
||||
mockCmdQueueHw.flush();
|
||||
|
||||
EXPECT_FALSE(ultCsr.recordedDispatchFlags.hasStallingCmds);
|
||||
EXPECT_FALSE(ultCsr.recordedDispatchFlags.hasRelaxedOrderingDependencies);
|
||||
|
||||
auto cmdsOffset = mockCmdQueueHw.getCS(0).getUsed();
|
||||
|
||||
mockCmdQueueHw.enqueueKernel(mockKernel.mockKernel, 1, nullptr, gws, nullptr, 1, &outEvent, nullptr);
|
||||
mockCmdQueueHw.flush();
|
||||
|
||||
EXPECT_FALSE(ultCsr.recordedDispatchFlags.hasStallingCmds);
|
||||
EXPECT_TRUE(ultCsr.recordedDispatchFlags.hasRelaxedOrderingDependencies);
|
||||
|
||||
auto lrrCmd = reinterpret_cast<MI_LOAD_REGISTER_REG *>(ptrOffset(mockCmdQueueHw.getCS(0).getCpuBase(), cmdsOffset));
|
||||
EXPECT_EQ(lrrCmd->getSourceRegisterAddress(), CS_GPR_R4);
|
||||
EXPECT_EQ(lrrCmd->getDestinationRegisterAddress(), CS_GPR_R0);
|
||||
|
||||
lrrCmd++;
|
||||
EXPECT_EQ(lrrCmd->getSourceRegisterAddress(), CS_GPR_R4 + 4);
|
||||
EXPECT_EQ(lrrCmd->getDestinationRegisterAddress(), CS_GPR_R0 + 4);
|
||||
|
||||
auto eventNode = castToObject<Event>(outEvent)->getTimestampPacketNodes()->peekNodes()[0];
|
||||
auto compareAddress = eventNode->getGpuAddress() + eventNode->getContextEndOffset();
|
||||
|
||||
EXPECT_TRUE(RelaxedOrderingCommandsHelper::verifyConditionalDataMemBbStart<FamilyType>(++lrrCmd, 0, compareAddress, 1, CompareOperation::Equal, true));
|
||||
|
||||
mockCmdQueueHw.enqueueBarrierWithWaitList(1, &outEvent, nullptr);
|
||||
mockCmdQueueHw.flush();
|
||||
|
||||
EXPECT_TRUE(ultCsr.recordedDispatchFlags.hasStallingCmds);
|
||||
EXPECT_FALSE(ultCsr.recordedDispatchFlags.hasRelaxedOrderingDependencies);
|
||||
|
||||
clReleaseEvent(outEvent);
|
||||
}
|
||||
|
||||
HWTEST2_F(RelaxedOrderingEnqueueKernelTests, givenEnqueueWithPipeControlWhenSendingBbThenMarkAsStallingDispatch, IsAtLeastXeHpcCore) {
|
||||
|
||||
auto &ultCsr = pDevice->getUltCommandStreamReceiver<FamilyType>();
|
||||
auto directSubmission = new MockDirectSubmissionHw<FamilyType, RenderDispatcher<FamilyType>>(ultCsr);
|
||||
ultCsr.directSubmission.reset(directSubmission);
|
||||
ultCsr.registerClient();
|
||||
ultCsr.registerClient();
|
||||
|
||||
ultCsr.recordFlusheBatchBuffer = true;
|
||||
|
||||
MockCommandQueueHw<FamilyType> mockCmdQueueHw{context, pClDevice, nullptr};
|
||||
MockKernelWithInternals mockKernel(*pClDevice);
|
||||
|
||||
// warmup
|
||||
mockCmdQueueHw.enqueueKernel(mockKernel.mockKernel, 1, nullptr, gws, nullptr, 0, nullptr, nullptr);
|
||||
mockCmdQueueHw.flush();
|
||||
|
||||
mockCmdQueueHw.enqueueKernel(mockKernel.mockKernel, 1, nullptr, gws, nullptr, 0, nullptr, nullptr);
|
||||
mockCmdQueueHw.flush();
|
||||
|
||||
EXPECT_FALSE(ultCsr.latestFlushedBatchBuffer.hasStallingCmds);
|
||||
|
||||
ultCsr.heapStorageRequiresRecyclingTag = true;
|
||||
mockCmdQueueHw.enqueueKernel(mockKernel.mockKernel, 1, nullptr, gws, nullptr, 0, nullptr, nullptr);
|
||||
mockCmdQueueHw.flush();
|
||||
|
||||
EXPECT_TRUE(ultCsr.latestFlushedBatchBuffer.hasStallingCmds);
|
||||
}
|
||||
|
||||
HWCMDTEST_F(IGFX_XE_HP_CORE, EnqueueKernelTest, givenTimestampWriteEnableOnMultiTileQueueWhenMarkerProfilingWithoutWaitListThenSizeHasFourMMIOStoresAndCrossTileBarrier) {
|
||||
auto &csr = pDevice->getUltCommandStreamReceiver<FamilyType>();
|
||||
csr.timestampPacketWriteEnabled = true;
|
||||
|
||||
Reference in New Issue
Block a user