From f0d32ed5f797605532a75b86877425c26052159e Mon Sep 17 00:00:00 2001 From: Zbigniew Zdanowicz Date: Tue, 12 Oct 2021 21:28:34 +0000 Subject: [PATCH] Fix multi tile synchronization in direct submission Related-To: NEO-6244 Signed-off-by: Zbigniew Zdanowicz --- .../direct_submission/direct_submission_hw.h | 1 + .../direct_submission_hw.inl | 18 +++++ .../dispatchers/blitter_dispatcher.h | 3 + .../dispatchers/render_dispatcher.h | 3 + .../linux/drm_direct_submission.inl | 14 +++- shared/test/common/mocks/mock_device.h | 1 + .../common/mocks/mock_direct_submission_hw.h | 3 + .../direct_submission/CMakeLists.txt | 3 +- ...ests.cpp => direct_submission_tests_1.cpp} | 0 .../direct_submission_tests_2.cpp | 76 +++++++++++++++++++ .../dispatchers/blitter_dispatcher_tests.cpp | 6 +- .../dispatchers/render_dispatcher_tests.cpp | 4 + .../linux/drm_direct_submission_tests.cpp | 27 ++++++- 13 files changed, 152 insertions(+), 7 deletions(-) rename shared/test/unit_test/direct_submission/{direct_submission_tests.cpp => direct_submission_tests_1.cpp} (100%) create mode 100644 shared/test/unit_test/direct_submission/direct_submission_tests_2.cpp diff --git a/shared/source/direct_submission/direct_submission_hw.h b/shared/source/direct_submission/direct_submission_hw.h index 67bc85286b..1a8170cfa1 100644 --- a/shared/source/direct_submission/direct_submission_hw.h +++ b/shared/source/direct_submission/direct_submission_hw.h @@ -138,6 +138,7 @@ class DirectSubmissionHw { GraphicsAllocation *ringBuffer = nullptr; GraphicsAllocation *ringBuffer2 = nullptr; GraphicsAllocation *semaphores = nullptr; + GraphicsAllocation *workPartitionAllocation = nullptr; void *semaphorePtr = nullptr; volatile RingSemaphoreData *semaphoreData = nullptr; volatile void *workloadModeOneStoreAddress = nullptr; diff --git a/shared/source/direct_submission/direct_submission_hw.inl b/shared/source/direct_submission/direct_submission_hw.inl index 49445a3484..ae63cecdf9 100644 --- a/shared/source/direct_submission/direct_submission_hw.inl +++ b/shared/source/direct_submission/direct_submission_hw.inl @@ -6,6 +6,7 @@ */ #include "shared/source/command_container/command_encoder.h" +#include "shared/source/command_container/implicit_scaling.h" #include "shared/source/command_stream/command_stream_receiver_hw.h" #include "shared/source/command_stream/submissions_aggregator.h" #include "shared/source/debug_settings/debug_settings_manager.h" @@ -81,6 +82,10 @@ bool DirectSubmissionHw::allocateResources() { UNRECOVERABLE_IF(semaphores == nullptr); allocations.push_back(semaphores); + if (this->workPartitionAllocation != nullptr) { + allocations.push_back(workPartitionAllocation); + } + handleResidency(); ringCommandStream.replaceBuffer(ringBuffer->getUnderlyingBuffer(), minimumRequiredSize); ringCommandStream.replaceGraphicsAllocation(ringBuffer); @@ -139,7 +144,20 @@ bool DirectSubmissionHw::initialize(bool submitOnInit) { if (ret && submitOnInit) { size_t startBufferSize = Dispatcher::getSizePreemption() + getSizeSemaphoreSection(); + if (this->partitionedMode) { + startBufferSize += EncodeSetMMIO::sizeMEM; + startBufferSize += EncodeSetMMIO::sizeIMM; + } Dispatcher::dispatchPreemption(ringCommandStream); + if (this->partitionedMode) { + EncodeSetMMIO::encodeMEM(ringCommandStream, + PartitionRegisters::wparidCCSOffset, + this->workPartitionAllocation->getGpuAddress()); + EncodeSetMMIO::encodeIMM(ringCommandStream, + PartitionRegisters::addressOffsetCCSOffset, + CommonConstants::partitionAddressOffset, + true); + } if (workloadMode == 1) { dispatchDiagnosticModeSection(); startBufferSize += getDiagnosticModeSection(); diff --git a/shared/source/direct_submission/dispatchers/blitter_dispatcher.h b/shared/source/direct_submission/dispatchers/blitter_dispatcher.h index a4cee09516..66bde2488b 100644 --- a/shared/source/direct_submission/dispatchers/blitter_dispatcher.h +++ b/shared/source/direct_submission/dispatchers/blitter_dispatcher.h @@ -28,5 +28,8 @@ class BlitterDispatcher : public Dispatcher { static void dispatchTlbFlush(LinearStream &cmdBuffer, uint64_t address); static size_t getSizeCacheFlush(const HardwareInfo &hwInfo); static size_t getSizeTlbFlush(); + static bool isMultiTileSynchronizationSupported() { + return false; + } }; } // namespace NEO diff --git a/shared/source/direct_submission/dispatchers/render_dispatcher.h b/shared/source/direct_submission/dispatchers/render_dispatcher.h index 983b544ad8..115f974049 100644 --- a/shared/source/direct_submission/dispatchers/render_dispatcher.h +++ b/shared/source/direct_submission/dispatchers/render_dispatcher.h @@ -28,5 +28,8 @@ class RenderDispatcher : public Dispatcher { static void dispatchTlbFlush(LinearStream &cmdBuffer, uint64_t address); static size_t getSizeCacheFlush(const HardwareInfo &hwInfo); static size_t getSizeTlbFlush(); + static bool isMultiTileSynchronizationSupported() { + return true; + } }; } // namespace NEO diff --git a/shared/source/direct_submission/linux/drm_direct_submission.inl b/shared/source/direct_submission/linux/drm_direct_submission.inl index 1ac542001d..fa9d391316 100644 --- a/shared/source/direct_submission/linux/drm_direct_submission.inl +++ b/shared/source/direct_submission/linux/drm_direct_submission.inl @@ -30,13 +30,19 @@ DrmDirectSubmission::DrmDirectSubmission(Device &device, this->disableMonitorFence = DebugManager.flags.DirectSubmissionDisableMonitorFence.get(); } auto subDevices = device.getDeviceBitfield(); - this->activeTiles = ImplicitScalingHelper::isImplicitScalingEnabled(subDevices, true) - ? static_cast(subDevices.count()) - : 1u; + bool dispatcherSupport = Dispatcher::isMultiTileSynchronizationSupported(); + if (ImplicitScalingHelper::isImplicitScalingEnabled(subDevices, true) && dispatcherSupport) { + this->activeTiles = static_cast(subDevices.count()); + } this->partitionedMode = this->activeTiles > 1u; auto osContextLinux = static_cast(&this->osContext); osContextLinux->getDrm().setDirectSubmissionActive(true); -}; + + if (this->partitionedMode) { + this->workPartitionAllocation = device.getDefaultEngine().commandStreamReceiver->getWorkPartitionAllocation(); + UNRECOVERABLE_IF(this->workPartitionAllocation == nullptr); + } +} template inline DrmDirectSubmission::~DrmDirectSubmission() { diff --git a/shared/test/common/mocks/mock_device.h b/shared/test/common/mocks/mock_device.h index d3b3844815..f357dc2d47 100644 --- a/shared/test/common/mocks/mock_device.h +++ b/shared/test/common/mocks/mock_device.h @@ -61,6 +61,7 @@ class MockDevice : public RootDevice { using Device::getGlobalMemorySize; using Device::initializeCaps; using Device::isDebuggerActive; + using Device::rootCsrCreated; using Device::rtMemoryBackedBuffer; using RootDevice::createEngines; using RootDevice::defaultEngineIndex; diff --git a/shared/test/common/mocks/mock_direct_submission_hw.h b/shared/test/common/mocks/mock_direct_submission_hw.h index ef3ae89eef..3f826593ed 100644 --- a/shared/test/common/mocks/mock_direct_submission_hw.h +++ b/shared/test/common/mocks/mock_direct_submission_hw.h @@ -61,6 +61,7 @@ struct MockDirectSubmissionHw : public DirectSubmissionHw using BaseClass::workloadMode; using BaseClass::workloadModeOneExpectedValue; using BaseClass::workloadModeOneStoreAddress; + using BaseClass::workPartitionAllocation; using typename BaseClass::RingBufferUse; ~MockDirectSubmissionHw() override { @@ -75,6 +76,7 @@ struct MockDirectSubmissionHw : public DirectSubmissionHw } bool makeResourcesResident(DirectSubmissionAllocations &allocations) override { + makeResourcesResidentVectorSize = static_cast(allocations.size()); if (callBaseResident) { return BaseClass::makeResourcesResident(allocations); } @@ -124,6 +126,7 @@ struct MockDirectSubmissionHw : public DirectSubmissionHw uint32_t submitCount = 0u; uint32_t handleResidencyCount = 0u; uint32_t disabledDiagnosticCalled = 0u; + uint32_t makeResourcesResidentVectorSize = 0u; bool allocateOsResourcesReturn = true; bool submitReturn = true; bool handleResidencyReturn = true; diff --git a/shared/test/unit_test/direct_submission/CMakeLists.txt b/shared/test/unit_test/direct_submission/CMakeLists.txt index 4417c74679..31d0fec327 100644 --- a/shared/test/unit_test/direct_submission/CMakeLists.txt +++ b/shared/test/unit_test/direct_submission/CMakeLists.txt @@ -8,7 +8,8 @@ target_sources(${TARGET_NAME} PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/direct_submission_controller_mock.h ${CMAKE_CURRENT_SOURCE_DIR}/direct_submission_controller_tests.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/direct_submission_tests.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/direct_submission_tests_1.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/direct_submission_tests_2.cpp ) if(TESTS_XE_HP_CORE) diff --git a/shared/test/unit_test/direct_submission/direct_submission_tests.cpp b/shared/test/unit_test/direct_submission/direct_submission_tests_1.cpp similarity index 100% rename from shared/test/unit_test/direct_submission/direct_submission_tests.cpp rename to shared/test/unit_test/direct_submission/direct_submission_tests_1.cpp diff --git a/shared/test/unit_test/direct_submission/direct_submission_tests_2.cpp b/shared/test/unit_test/direct_submission/direct_submission_tests_2.cpp new file mode 100644 index 0000000000..82ecaa92e2 --- /dev/null +++ b/shared/test/unit_test/direct_submission/direct_submission_tests_2.cpp @@ -0,0 +1,76 @@ +/* + * Copyright (C) 2020-2021 Intel Corporation + * + * SPDX-License-Identifier: MIT + * + */ + +#include "shared/source/command_stream/submissions_aggregator.h" +#include "shared/source/debug_settings/debug_settings_manager.h" +#include "shared/source/direct_submission/dispatchers/render_dispatcher.h" +#include "shared/source/helpers/flush_stamp.h" +#include "shared/test/common/cmd_parse/hw_parse.h" +#include "shared/test/common/fixtures/direct_submission_fixture.h" +#include "shared/test/common/helpers/debug_manager_state_restore.h" +#include "shared/test/common/helpers/dispatch_flags_helper.h" +#include "shared/test/common/helpers/ult_hw_config.h" +#include "shared/test/common/helpers/variable_backup.h" +#include "shared/test/common/mocks/mock_csr.h" +#include "shared/test/common/mocks/mock_direct_submission_diagnostic_collector.h" +#include "shared/test/common/mocks/mock_direct_submission_hw.h" +#include "shared/test/common/mocks/mock_io_functions.h" + +#include "test.h" + +using DirectSubmissionTest = Test; + +using DirectSubmissionDispatchBufferTest = Test; + +HWCMDTEST_F(IGFX_GEN12_CORE, DirectSubmissionDispatchBufferTest, + givenDirectSubmissionRingStartWhenMultiTileSupportedThenExpectMultiTileConfigSetAndWorkPartitionResident) { + using MI_LOAD_REGISTER_IMM = typename FamilyType::MI_LOAD_REGISTER_IMM; + using MI_LOAD_REGISTER_MEM = typename FamilyType::MI_LOAD_REGISTER_MEM; + + pDevice->rootCsrCreated = true; + pDevice->numSubDevices = 2; + + auto ultCsr = reinterpret_cast *>(pDevice->getDefaultEngine().commandStreamReceiver); + ultCsr->staticWorkPartitioningEnabled = true; + ultCsr->createWorkPartitionAllocation(*pDevice); + + FlushStampTracker flushStamp(true); + + MockDirectSubmissionHw> directSubmission(*pDevice, + *osContext.get()); + directSubmission.activeTiles = 2; + directSubmission.partitionedMode = true; + directSubmission.workPartitionAllocation = ultCsr->getWorkPartitionAllocation(); + + bool ret = directSubmission.initialize(true); + EXPECT_TRUE(ret); + EXPECT_NE(0x0u, directSubmission.ringCommandStream.getUsed()); + + size_t submitSize = RenderDispatcher::getSizePreemption() + + directSubmission.getSizeSemaphoreSection() + + sizeof(MI_LOAD_REGISTER_IMM) + + sizeof(MI_LOAD_REGISTER_MEM); + EXPECT_EQ(submitSize, directSubmission.submitSize); + EXPECT_EQ(1u, directSubmission.handleResidencyCount); + EXPECT_EQ(4u, directSubmission.makeResourcesResidentVectorSize); + + HardwareParse hwParse; + hwParse.parseCommands(directSubmission.ringCommandStream, 0); + hwParse.findHardwareCommands(); + + ASSERT_NE(hwParse.lriList.end(), hwParse.lriList.begin()); + auto loadRegisterImm = reinterpret_cast(*hwParse.lriList.begin()); + EXPECT_EQ(0x23B4u, loadRegisterImm->getRegisterOffset()); + EXPECT_EQ(8u, loadRegisterImm->getDataDword()); + + auto loadRegisterMemItor = find(hwParse.cmdList.begin(), hwParse.cmdList.end()); + ASSERT_NE(hwParse.lriList.end(), loadRegisterMemItor); + auto loadRegisterMem = reinterpret_cast(*loadRegisterMemItor); + EXPECT_EQ(0x23B4u, loadRegisterMem->getRegisterOffset()); + uint64_t gpuAddress = ultCsr->getWorkPartitionAllocation()->getGpuAddress(); + EXPECT_EQ(gpuAddress, loadRegisterMem->getMemoryAddress()); +} diff --git a/shared/test/unit_test/direct_submission/dispatchers/blitter_dispatcher_tests.cpp b/shared/test/unit_test/direct_submission/dispatchers/blitter_dispatcher_tests.cpp index e0e9caf18c..263b167e23 100644 --- a/shared/test/unit_test/direct_submission/dispatchers/blitter_dispatcher_tests.cpp +++ b/shared/test/unit_test/direct_submission/dispatchers/blitter_dispatcher_tests.cpp @@ -90,4 +90,8 @@ HWTEST_F(BlitterDispatcheTest, givenBlitterWhenDispatchingTlbFlushThenDispatchMi EXPECT_EQ(miFlushDw->getPostSyncOperation(), MI_FLUSH_DW::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA_QWORD); EXPECT_EQ(BlitterDispatcher::getSizeTlbFlush(), EncodeMiFlushDW::getMiFlushDwCmdSizeForDataWrite()); -} \ No newline at end of file +} + +HWTEST_F(BlitterDispatcheTest, givenBlitterWhenCheckingForMultiTileSynchronizationSupportThenExpectFalse) { + EXPECT_FALSE(BlitterDispatcher::isMultiTileSynchronizationSupported()); +} diff --git a/shared/test/unit_test/direct_submission/dispatchers/render_dispatcher_tests.cpp b/shared/test/unit_test/direct_submission/dispatchers/render_dispatcher_tests.cpp index 01af2f5571..b56cbfe5ed 100644 --- a/shared/test/unit_test/direct_submission/dispatchers/render_dispatcher_tests.cpp +++ b/shared/test/unit_test/direct_submission/dispatchers/render_dispatcher_tests.cpp @@ -150,3 +150,7 @@ HWCMDTEST_F(IGFX_GEN12_CORE, RenderDispatcherTest, } EXPECT_TRUE(foundMonitorFence); } + +HWTEST_F(RenderDispatcherTest, givenRenderWhenCheckingForMultiTileSynchronizationSupportThenExpectTrue) { + EXPECT_TRUE(RenderDispatcher::isMultiTileSynchronizationSupported()); +} diff --git a/shared/test/unit_test/direct_submission/linux/drm_direct_submission_tests.cpp b/shared/test/unit_test/direct_submission/linux/drm_direct_submission_tests.cpp index 815126a735..eb4ee2cb95 100644 --- a/shared/test/unit_test/direct_submission/linux/drm_direct_submission_tests.cpp +++ b/shared/test/unit_test/direct_submission/linux/drm_direct_submission_tests.cpp @@ -6,6 +6,7 @@ */ #include "shared/source/command_container/implicit_scaling.h" +#include "shared/source/direct_submission/dispatchers/blitter_dispatcher.h" #include "shared/source/direct_submission/dispatchers/render_dispatcher.h" #include "shared/source/direct_submission/linux/drm_direct_submission.h" #include "shared/source/os_interface/linux/os_context_linux.h" @@ -15,6 +16,7 @@ #include "shared/test/common/helpers/ult_hw_config.h" #include "shared/test/common/helpers/variable_backup.h" #include "shared/test/common/libult/linux/drm_mock.h" +#include "shared/test/common/libult/ult_command_stream_receiver.h" #include "shared/test/common/mocks/mock_device.h" #include "opencl/test/unit_test/os_interface/linux/drm_memory_manager_tests.h" @@ -72,6 +74,7 @@ struct MockDrmDirectSubmission : public DrmDirectSubmission(device, osContext) { this->disableMonitorFence = false; @@ -311,11 +314,17 @@ HWTEST_F(DrmDirectSubmissionTest, givenMultipleActiveTilesWhenWaitingForTagUpdat EXPECT_EQ(2u, CpuIntrinsicsTests::pauseCounter); } -HWTEST_F(DrmDirectSubmissionTest, givenMultiTileWhenCreatingDirectSubmissionThenExpectActiveTilesMatchSubDeviceCount) { +HWTEST_F(DrmDirectSubmissionTest, givenRenderDispatcherAndMultiTileDeviceWhenCreatingDirectSubmissionThenExpectActiveTilesMatchSubDeviceCount) { using Dispatcher = RenderDispatcher; VariableBackup backup(&ImplicitScaling::apiSupport, true); device->deviceBitfield.set(0b11); + device->rootCsrCreated = true; + device->numSubDevices = 2; + + auto ultCsr = reinterpret_cast *>(device->getDefaultEngine().commandStreamReceiver); + ultCsr->staticWorkPartitioningEnabled = true; + ultCsr->createWorkPartitionAllocation(*device); MockDrmDirectSubmission directSubmission(*device.get(), *osContext.get()); @@ -326,3 +335,19 @@ HWTEST_F(DrmDirectSubmissionTest, givenMultiTileWhenCreatingDirectSubmissionThen bool ret = directSubmission.allocateResources(); EXPECT_TRUE(ret); } + +HWTEST_F(DrmDirectSubmissionTest, givenBlitterDispatcherAndMultiTileDeviceWhenCreatingDirectSubmissionThenExpectActiveTilesEqualsOne) { + using Dispatcher = BlitterDispatcher; + + VariableBackup backup(&ImplicitScaling::apiSupport, true); + device->deviceBitfield.set(0b11); + + MockDrmDirectSubmission directSubmission(*device.get(), + *osContext.get()); + + EXPECT_EQ(1u, directSubmission.activeTiles); + EXPECT_FALSE(directSubmission.partitionedMode); + + bool ret = directSubmission.allocateResources(); + EXPECT_TRUE(ret); +}