Fix multi tile synchronization in direct submission

Related-To: NEO-6244

Signed-off-by: Zbigniew Zdanowicz <zbigniew.zdanowicz@intel.com>
This commit is contained in:
Zbigniew Zdanowicz
2021-10-12 21:28:34 +00:00
committed by Compute-Runtime-Automation
parent 4768be244b
commit f0d32ed5f7
13 changed files with 152 additions and 7 deletions

View File

@@ -138,6 +138,7 @@ class DirectSubmissionHw {
GraphicsAllocation *ringBuffer = nullptr;
GraphicsAllocation *ringBuffer2 = nullptr;
GraphicsAllocation *semaphores = nullptr;
GraphicsAllocation *workPartitionAllocation = nullptr;
void *semaphorePtr = nullptr;
volatile RingSemaphoreData *semaphoreData = nullptr;
volatile void *workloadModeOneStoreAddress = nullptr;

View File

@@ -6,6 +6,7 @@
*/
#include "shared/source/command_container/command_encoder.h"
#include "shared/source/command_container/implicit_scaling.h"
#include "shared/source/command_stream/command_stream_receiver_hw.h"
#include "shared/source/command_stream/submissions_aggregator.h"
#include "shared/source/debug_settings/debug_settings_manager.h"
@@ -81,6 +82,10 @@ bool DirectSubmissionHw<GfxFamily, Dispatcher>::allocateResources() {
UNRECOVERABLE_IF(semaphores == nullptr);
allocations.push_back(semaphores);
if (this->workPartitionAllocation != nullptr) {
allocations.push_back(workPartitionAllocation);
}
handleResidency();
ringCommandStream.replaceBuffer(ringBuffer->getUnderlyingBuffer(), minimumRequiredSize);
ringCommandStream.replaceGraphicsAllocation(ringBuffer);
@@ -139,7 +144,20 @@ bool DirectSubmissionHw<GfxFamily, Dispatcher>::initialize(bool submitOnInit) {
if (ret && submitOnInit) {
size_t startBufferSize = Dispatcher::getSizePreemption() +
getSizeSemaphoreSection();
if (this->partitionedMode) {
startBufferSize += EncodeSetMMIO<GfxFamily>::sizeMEM;
startBufferSize += EncodeSetMMIO<GfxFamily>::sizeIMM;
}
Dispatcher::dispatchPreemption(ringCommandStream);
if (this->partitionedMode) {
EncodeSetMMIO<GfxFamily>::encodeMEM(ringCommandStream,
PartitionRegisters<GfxFamily>::wparidCCSOffset,
this->workPartitionAllocation->getGpuAddress());
EncodeSetMMIO<GfxFamily>::encodeIMM(ringCommandStream,
PartitionRegisters<GfxFamily>::addressOffsetCCSOffset,
CommonConstants::partitionAddressOffset,
true);
}
if (workloadMode == 1) {
dispatchDiagnosticModeSection();
startBufferSize += getDiagnosticModeSection();

View File

@@ -28,5 +28,8 @@ class BlitterDispatcher : public Dispatcher<GfxFamily> {
static void dispatchTlbFlush(LinearStream &cmdBuffer, uint64_t address);
static size_t getSizeCacheFlush(const HardwareInfo &hwInfo);
static size_t getSizeTlbFlush();
static bool isMultiTileSynchronizationSupported() {
return false;
}
};
} // namespace NEO

View File

@@ -28,5 +28,8 @@ class RenderDispatcher : public Dispatcher<GfxFamily> {
static void dispatchTlbFlush(LinearStream &cmdBuffer, uint64_t address);
static size_t getSizeCacheFlush(const HardwareInfo &hwInfo);
static size_t getSizeTlbFlush();
static bool isMultiTileSynchronizationSupported() {
return true;
}
};
} // namespace NEO

View File

@@ -30,13 +30,19 @@ DrmDirectSubmission<GfxFamily, Dispatcher>::DrmDirectSubmission(Device &device,
this->disableMonitorFence = DebugManager.flags.DirectSubmissionDisableMonitorFence.get();
}
auto subDevices = device.getDeviceBitfield();
this->activeTiles = ImplicitScalingHelper::isImplicitScalingEnabled(subDevices, true)
? static_cast<uint32_t>(subDevices.count())
: 1u;
bool dispatcherSupport = Dispatcher::isMultiTileSynchronizationSupported();
if (ImplicitScalingHelper::isImplicitScalingEnabled(subDevices, true) && dispatcherSupport) {
this->activeTiles = static_cast<uint32_t>(subDevices.count());
}
this->partitionedMode = this->activeTiles > 1u;
auto osContextLinux = static_cast<OsContextLinux *>(&this->osContext);
osContextLinux->getDrm().setDirectSubmissionActive(true);
};
if (this->partitionedMode) {
this->workPartitionAllocation = device.getDefaultEngine().commandStreamReceiver->getWorkPartitionAllocation();
UNRECOVERABLE_IF(this->workPartitionAllocation == nullptr);
}
}
template <typename GfxFamily, typename Dispatcher>
inline DrmDirectSubmission<GfxFamily, Dispatcher>::~DrmDirectSubmission() {

View File

@@ -61,6 +61,7 @@ class MockDevice : public RootDevice {
using Device::getGlobalMemorySize;
using Device::initializeCaps;
using Device::isDebuggerActive;
using Device::rootCsrCreated;
using Device::rtMemoryBackedBuffer;
using RootDevice::createEngines;
using RootDevice::defaultEngineIndex;

View File

@@ -61,6 +61,7 @@ struct MockDirectSubmissionHw : public DirectSubmissionHw<GfxFamily, Dispatcher>
using BaseClass::workloadMode;
using BaseClass::workloadModeOneExpectedValue;
using BaseClass::workloadModeOneStoreAddress;
using BaseClass::workPartitionAllocation;
using typename BaseClass::RingBufferUse;
~MockDirectSubmissionHw() override {
@@ -75,6 +76,7 @@ struct MockDirectSubmissionHw : public DirectSubmissionHw<GfxFamily, Dispatcher>
}
bool makeResourcesResident(DirectSubmissionAllocations &allocations) override {
makeResourcesResidentVectorSize = static_cast<uint32_t>(allocations.size());
if (callBaseResident) {
return BaseClass::makeResourcesResident(allocations);
}
@@ -124,6 +126,7 @@ struct MockDirectSubmissionHw : public DirectSubmissionHw<GfxFamily, Dispatcher>
uint32_t submitCount = 0u;
uint32_t handleResidencyCount = 0u;
uint32_t disabledDiagnosticCalled = 0u;
uint32_t makeResourcesResidentVectorSize = 0u;
bool allocateOsResourcesReturn = true;
bool submitReturn = true;
bool handleResidencyReturn = true;

View File

@@ -8,7 +8,8 @@ target_sources(${TARGET_NAME} PRIVATE
${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt
${CMAKE_CURRENT_SOURCE_DIR}/direct_submission_controller_mock.h
${CMAKE_CURRENT_SOURCE_DIR}/direct_submission_controller_tests.cpp
${CMAKE_CURRENT_SOURCE_DIR}/direct_submission_tests.cpp
${CMAKE_CURRENT_SOURCE_DIR}/direct_submission_tests_1.cpp
${CMAKE_CURRENT_SOURCE_DIR}/direct_submission_tests_2.cpp
)
if(TESTS_XE_HP_CORE)

View File

@@ -0,0 +1,76 @@
/*
* Copyright (C) 2020-2021 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#include "shared/source/command_stream/submissions_aggregator.h"
#include "shared/source/debug_settings/debug_settings_manager.h"
#include "shared/source/direct_submission/dispatchers/render_dispatcher.h"
#include "shared/source/helpers/flush_stamp.h"
#include "shared/test/common/cmd_parse/hw_parse.h"
#include "shared/test/common/fixtures/direct_submission_fixture.h"
#include "shared/test/common/helpers/debug_manager_state_restore.h"
#include "shared/test/common/helpers/dispatch_flags_helper.h"
#include "shared/test/common/helpers/ult_hw_config.h"
#include "shared/test/common/helpers/variable_backup.h"
#include "shared/test/common/mocks/mock_csr.h"
#include "shared/test/common/mocks/mock_direct_submission_diagnostic_collector.h"
#include "shared/test/common/mocks/mock_direct_submission_hw.h"
#include "shared/test/common/mocks/mock_io_functions.h"
#include "test.h"
using DirectSubmissionTest = Test<DirectSubmissionFixture>;
using DirectSubmissionDispatchBufferTest = Test<DirectSubmissionDispatchBufferFixture>;
HWCMDTEST_F(IGFX_GEN12_CORE, DirectSubmissionDispatchBufferTest,
givenDirectSubmissionRingStartWhenMultiTileSupportedThenExpectMultiTileConfigSetAndWorkPartitionResident) {
using MI_LOAD_REGISTER_IMM = typename FamilyType::MI_LOAD_REGISTER_IMM;
using MI_LOAD_REGISTER_MEM = typename FamilyType::MI_LOAD_REGISTER_MEM;
pDevice->rootCsrCreated = true;
pDevice->numSubDevices = 2;
auto ultCsr = reinterpret_cast<UltCommandStreamReceiver<FamilyType> *>(pDevice->getDefaultEngine().commandStreamReceiver);
ultCsr->staticWorkPartitioningEnabled = true;
ultCsr->createWorkPartitionAllocation(*pDevice);
FlushStampTracker flushStamp(true);
MockDirectSubmissionHw<FamilyType, RenderDispatcher<FamilyType>> directSubmission(*pDevice,
*osContext.get());
directSubmission.activeTiles = 2;
directSubmission.partitionedMode = true;
directSubmission.workPartitionAllocation = ultCsr->getWorkPartitionAllocation();
bool ret = directSubmission.initialize(true);
EXPECT_TRUE(ret);
EXPECT_NE(0x0u, directSubmission.ringCommandStream.getUsed());
size_t submitSize = RenderDispatcher<FamilyType>::getSizePreemption() +
directSubmission.getSizeSemaphoreSection() +
sizeof(MI_LOAD_REGISTER_IMM) +
sizeof(MI_LOAD_REGISTER_MEM);
EXPECT_EQ(submitSize, directSubmission.submitSize);
EXPECT_EQ(1u, directSubmission.handleResidencyCount);
EXPECT_EQ(4u, directSubmission.makeResourcesResidentVectorSize);
HardwareParse hwParse;
hwParse.parseCommands<FamilyType>(directSubmission.ringCommandStream, 0);
hwParse.findHardwareCommands<FamilyType>();
ASSERT_NE(hwParse.lriList.end(), hwParse.lriList.begin());
auto loadRegisterImm = reinterpret_cast<MI_LOAD_REGISTER_IMM *>(*hwParse.lriList.begin());
EXPECT_EQ(0x23B4u, loadRegisterImm->getRegisterOffset());
EXPECT_EQ(8u, loadRegisterImm->getDataDword());
auto loadRegisterMemItor = find<MI_LOAD_REGISTER_MEM *>(hwParse.cmdList.begin(), hwParse.cmdList.end());
ASSERT_NE(hwParse.lriList.end(), loadRegisterMemItor);
auto loadRegisterMem = reinterpret_cast<MI_LOAD_REGISTER_MEM *>(*loadRegisterMemItor);
EXPECT_EQ(0x23B4u, loadRegisterMem->getRegisterOffset());
uint64_t gpuAddress = ultCsr->getWorkPartitionAllocation()->getGpuAddress();
EXPECT_EQ(gpuAddress, loadRegisterMem->getMemoryAddress());
}

View File

@@ -90,4 +90,8 @@ HWTEST_F(BlitterDispatcheTest, givenBlitterWhenDispatchingTlbFlushThenDispatchMi
EXPECT_EQ(miFlushDw->getPostSyncOperation(), MI_FLUSH_DW::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA_QWORD);
EXPECT_EQ(BlitterDispatcher<FamilyType>::getSizeTlbFlush(), EncodeMiFlushDW<FamilyType>::getMiFlushDwCmdSizeForDataWrite());
}
}
HWTEST_F(BlitterDispatcheTest, givenBlitterWhenCheckingForMultiTileSynchronizationSupportThenExpectFalse) {
EXPECT_FALSE(BlitterDispatcher<FamilyType>::isMultiTileSynchronizationSupported());
}

View File

@@ -150,3 +150,7 @@ HWCMDTEST_F(IGFX_GEN12_CORE, RenderDispatcherTest,
}
EXPECT_TRUE(foundMonitorFence);
}
HWTEST_F(RenderDispatcherTest, givenRenderWhenCheckingForMultiTileSynchronizationSupportThenExpectTrue) {
EXPECT_TRUE(RenderDispatcher<FamilyType>::isMultiTileSynchronizationSupported());
}

View File

@@ -6,6 +6,7 @@
*/
#include "shared/source/command_container/implicit_scaling.h"
#include "shared/source/direct_submission/dispatchers/blitter_dispatcher.h"
#include "shared/source/direct_submission/dispatchers/render_dispatcher.h"
#include "shared/source/direct_submission/linux/drm_direct_submission.h"
#include "shared/source/os_interface/linux/os_context_linux.h"
@@ -15,6 +16,7 @@
#include "shared/test/common/helpers/ult_hw_config.h"
#include "shared/test/common/helpers/variable_backup.h"
#include "shared/test/common/libult/linux/drm_mock.h"
#include "shared/test/common/libult/ult_command_stream_receiver.h"
#include "shared/test/common/mocks/mock_device.h"
#include "opencl/test/unit_test/os_interface/linux/drm_memory_manager_tests.h"
@@ -72,6 +74,7 @@ struct MockDrmDirectSubmission : public DrmDirectSubmission<GfxFamily, Dispatche
using BaseClass::tagAddress;
using BaseClass::updateTagValue;
using BaseClass::wait;
using BaseClass::workPartitionAllocation;
MockDrmDirectSubmission(Device &device, OsContext &osContext) : DrmDirectSubmission<GfxFamily, Dispatcher>(device, osContext) {
this->disableMonitorFence = false;
@@ -311,11 +314,17 @@ HWTEST_F(DrmDirectSubmissionTest, givenMultipleActiveTilesWhenWaitingForTagUpdat
EXPECT_EQ(2u, CpuIntrinsicsTests::pauseCounter);
}
HWTEST_F(DrmDirectSubmissionTest, givenMultiTileWhenCreatingDirectSubmissionThenExpectActiveTilesMatchSubDeviceCount) {
HWTEST_F(DrmDirectSubmissionTest, givenRenderDispatcherAndMultiTileDeviceWhenCreatingDirectSubmissionThenExpectActiveTilesMatchSubDeviceCount) {
using Dispatcher = RenderDispatcher<FamilyType>;
VariableBackup<bool> backup(&ImplicitScaling::apiSupport, true);
device->deviceBitfield.set(0b11);
device->rootCsrCreated = true;
device->numSubDevices = 2;
auto ultCsr = reinterpret_cast<UltCommandStreamReceiver<FamilyType> *>(device->getDefaultEngine().commandStreamReceiver);
ultCsr->staticWorkPartitioningEnabled = true;
ultCsr->createWorkPartitionAllocation(*device);
MockDrmDirectSubmission<FamilyType, Dispatcher> directSubmission(*device.get(),
*osContext.get());
@@ -326,3 +335,19 @@ HWTEST_F(DrmDirectSubmissionTest, givenMultiTileWhenCreatingDirectSubmissionThen
bool ret = directSubmission.allocateResources();
EXPECT_TRUE(ret);
}
HWTEST_F(DrmDirectSubmissionTest, givenBlitterDispatcherAndMultiTileDeviceWhenCreatingDirectSubmissionThenExpectActiveTilesEqualsOne) {
using Dispatcher = BlitterDispatcher<FamilyType>;
VariableBackup<bool> backup(&ImplicitScaling::apiSupport, true);
device->deviceBitfield.set(0b11);
MockDrmDirectSubmission<FamilyType, Dispatcher> directSubmission(*device.get(),
*osContext.get());
EXPECT_EQ(1u, directSubmission.activeTiles);
EXPECT_FALSE(directSubmission.partitionedMode);
bool ret = directSubmission.allocateResources();
EXPECT_TRUE(ret);
}