mirror of
https://github.com/intel/compute-runtime.git
synced 2025-12-21 01:04:57 +08:00
Fix multi tile synchronization in direct submission
Related-To: NEO-6244 Signed-off-by: Zbigniew Zdanowicz <zbigniew.zdanowicz@intel.com>
This commit is contained in:
committed by
Compute-Runtime-Automation
parent
4768be244b
commit
f0d32ed5f7
@@ -138,6 +138,7 @@ class DirectSubmissionHw {
|
||||
GraphicsAllocation *ringBuffer = nullptr;
|
||||
GraphicsAllocation *ringBuffer2 = nullptr;
|
||||
GraphicsAllocation *semaphores = nullptr;
|
||||
GraphicsAllocation *workPartitionAllocation = nullptr;
|
||||
void *semaphorePtr = nullptr;
|
||||
volatile RingSemaphoreData *semaphoreData = nullptr;
|
||||
volatile void *workloadModeOneStoreAddress = nullptr;
|
||||
|
||||
@@ -6,6 +6,7 @@
|
||||
*/
|
||||
|
||||
#include "shared/source/command_container/command_encoder.h"
|
||||
#include "shared/source/command_container/implicit_scaling.h"
|
||||
#include "shared/source/command_stream/command_stream_receiver_hw.h"
|
||||
#include "shared/source/command_stream/submissions_aggregator.h"
|
||||
#include "shared/source/debug_settings/debug_settings_manager.h"
|
||||
@@ -81,6 +82,10 @@ bool DirectSubmissionHw<GfxFamily, Dispatcher>::allocateResources() {
|
||||
UNRECOVERABLE_IF(semaphores == nullptr);
|
||||
allocations.push_back(semaphores);
|
||||
|
||||
if (this->workPartitionAllocation != nullptr) {
|
||||
allocations.push_back(workPartitionAllocation);
|
||||
}
|
||||
|
||||
handleResidency();
|
||||
ringCommandStream.replaceBuffer(ringBuffer->getUnderlyingBuffer(), minimumRequiredSize);
|
||||
ringCommandStream.replaceGraphicsAllocation(ringBuffer);
|
||||
@@ -139,7 +144,20 @@ bool DirectSubmissionHw<GfxFamily, Dispatcher>::initialize(bool submitOnInit) {
|
||||
if (ret && submitOnInit) {
|
||||
size_t startBufferSize = Dispatcher::getSizePreemption() +
|
||||
getSizeSemaphoreSection();
|
||||
if (this->partitionedMode) {
|
||||
startBufferSize += EncodeSetMMIO<GfxFamily>::sizeMEM;
|
||||
startBufferSize += EncodeSetMMIO<GfxFamily>::sizeIMM;
|
||||
}
|
||||
Dispatcher::dispatchPreemption(ringCommandStream);
|
||||
if (this->partitionedMode) {
|
||||
EncodeSetMMIO<GfxFamily>::encodeMEM(ringCommandStream,
|
||||
PartitionRegisters<GfxFamily>::wparidCCSOffset,
|
||||
this->workPartitionAllocation->getGpuAddress());
|
||||
EncodeSetMMIO<GfxFamily>::encodeIMM(ringCommandStream,
|
||||
PartitionRegisters<GfxFamily>::addressOffsetCCSOffset,
|
||||
CommonConstants::partitionAddressOffset,
|
||||
true);
|
||||
}
|
||||
if (workloadMode == 1) {
|
||||
dispatchDiagnosticModeSection();
|
||||
startBufferSize += getDiagnosticModeSection();
|
||||
|
||||
@@ -28,5 +28,8 @@ class BlitterDispatcher : public Dispatcher<GfxFamily> {
|
||||
static void dispatchTlbFlush(LinearStream &cmdBuffer, uint64_t address);
|
||||
static size_t getSizeCacheFlush(const HardwareInfo &hwInfo);
|
||||
static size_t getSizeTlbFlush();
|
||||
static bool isMultiTileSynchronizationSupported() {
|
||||
return false;
|
||||
}
|
||||
};
|
||||
} // namespace NEO
|
||||
|
||||
@@ -28,5 +28,8 @@ class RenderDispatcher : public Dispatcher<GfxFamily> {
|
||||
static void dispatchTlbFlush(LinearStream &cmdBuffer, uint64_t address);
|
||||
static size_t getSizeCacheFlush(const HardwareInfo &hwInfo);
|
||||
static size_t getSizeTlbFlush();
|
||||
static bool isMultiTileSynchronizationSupported() {
|
||||
return true;
|
||||
}
|
||||
};
|
||||
} // namespace NEO
|
||||
|
||||
@@ -30,13 +30,19 @@ DrmDirectSubmission<GfxFamily, Dispatcher>::DrmDirectSubmission(Device &device,
|
||||
this->disableMonitorFence = DebugManager.flags.DirectSubmissionDisableMonitorFence.get();
|
||||
}
|
||||
auto subDevices = device.getDeviceBitfield();
|
||||
this->activeTiles = ImplicitScalingHelper::isImplicitScalingEnabled(subDevices, true)
|
||||
? static_cast<uint32_t>(subDevices.count())
|
||||
: 1u;
|
||||
bool dispatcherSupport = Dispatcher::isMultiTileSynchronizationSupported();
|
||||
if (ImplicitScalingHelper::isImplicitScalingEnabled(subDevices, true) && dispatcherSupport) {
|
||||
this->activeTiles = static_cast<uint32_t>(subDevices.count());
|
||||
}
|
||||
this->partitionedMode = this->activeTiles > 1u;
|
||||
auto osContextLinux = static_cast<OsContextLinux *>(&this->osContext);
|
||||
osContextLinux->getDrm().setDirectSubmissionActive(true);
|
||||
};
|
||||
|
||||
if (this->partitionedMode) {
|
||||
this->workPartitionAllocation = device.getDefaultEngine().commandStreamReceiver->getWorkPartitionAllocation();
|
||||
UNRECOVERABLE_IF(this->workPartitionAllocation == nullptr);
|
||||
}
|
||||
}
|
||||
|
||||
template <typename GfxFamily, typename Dispatcher>
|
||||
inline DrmDirectSubmission<GfxFamily, Dispatcher>::~DrmDirectSubmission() {
|
||||
|
||||
@@ -61,6 +61,7 @@ class MockDevice : public RootDevice {
|
||||
using Device::getGlobalMemorySize;
|
||||
using Device::initializeCaps;
|
||||
using Device::isDebuggerActive;
|
||||
using Device::rootCsrCreated;
|
||||
using Device::rtMemoryBackedBuffer;
|
||||
using RootDevice::createEngines;
|
||||
using RootDevice::defaultEngineIndex;
|
||||
|
||||
@@ -61,6 +61,7 @@ struct MockDirectSubmissionHw : public DirectSubmissionHw<GfxFamily, Dispatcher>
|
||||
using BaseClass::workloadMode;
|
||||
using BaseClass::workloadModeOneExpectedValue;
|
||||
using BaseClass::workloadModeOneStoreAddress;
|
||||
using BaseClass::workPartitionAllocation;
|
||||
using typename BaseClass::RingBufferUse;
|
||||
|
||||
~MockDirectSubmissionHw() override {
|
||||
@@ -75,6 +76,7 @@ struct MockDirectSubmissionHw : public DirectSubmissionHw<GfxFamily, Dispatcher>
|
||||
}
|
||||
|
||||
bool makeResourcesResident(DirectSubmissionAllocations &allocations) override {
|
||||
makeResourcesResidentVectorSize = static_cast<uint32_t>(allocations.size());
|
||||
if (callBaseResident) {
|
||||
return BaseClass::makeResourcesResident(allocations);
|
||||
}
|
||||
@@ -124,6 +126,7 @@ struct MockDirectSubmissionHw : public DirectSubmissionHw<GfxFamily, Dispatcher>
|
||||
uint32_t submitCount = 0u;
|
||||
uint32_t handleResidencyCount = 0u;
|
||||
uint32_t disabledDiagnosticCalled = 0u;
|
||||
uint32_t makeResourcesResidentVectorSize = 0u;
|
||||
bool allocateOsResourcesReturn = true;
|
||||
bool submitReturn = true;
|
||||
bool handleResidencyReturn = true;
|
||||
|
||||
@@ -8,7 +8,8 @@ target_sources(${TARGET_NAME} PRIVATE
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/direct_submission_controller_mock.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/direct_submission_controller_tests.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/direct_submission_tests.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/direct_submission_tests_1.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/direct_submission_tests_2.cpp
|
||||
)
|
||||
|
||||
if(TESTS_XE_HP_CORE)
|
||||
|
||||
@@ -0,0 +1,76 @@
|
||||
/*
|
||||
* Copyright (C) 2020-2021 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#include "shared/source/command_stream/submissions_aggregator.h"
|
||||
#include "shared/source/debug_settings/debug_settings_manager.h"
|
||||
#include "shared/source/direct_submission/dispatchers/render_dispatcher.h"
|
||||
#include "shared/source/helpers/flush_stamp.h"
|
||||
#include "shared/test/common/cmd_parse/hw_parse.h"
|
||||
#include "shared/test/common/fixtures/direct_submission_fixture.h"
|
||||
#include "shared/test/common/helpers/debug_manager_state_restore.h"
|
||||
#include "shared/test/common/helpers/dispatch_flags_helper.h"
|
||||
#include "shared/test/common/helpers/ult_hw_config.h"
|
||||
#include "shared/test/common/helpers/variable_backup.h"
|
||||
#include "shared/test/common/mocks/mock_csr.h"
|
||||
#include "shared/test/common/mocks/mock_direct_submission_diagnostic_collector.h"
|
||||
#include "shared/test/common/mocks/mock_direct_submission_hw.h"
|
||||
#include "shared/test/common/mocks/mock_io_functions.h"
|
||||
|
||||
#include "test.h"
|
||||
|
||||
using DirectSubmissionTest = Test<DirectSubmissionFixture>;
|
||||
|
||||
using DirectSubmissionDispatchBufferTest = Test<DirectSubmissionDispatchBufferFixture>;
|
||||
|
||||
HWCMDTEST_F(IGFX_GEN12_CORE, DirectSubmissionDispatchBufferTest,
|
||||
givenDirectSubmissionRingStartWhenMultiTileSupportedThenExpectMultiTileConfigSetAndWorkPartitionResident) {
|
||||
using MI_LOAD_REGISTER_IMM = typename FamilyType::MI_LOAD_REGISTER_IMM;
|
||||
using MI_LOAD_REGISTER_MEM = typename FamilyType::MI_LOAD_REGISTER_MEM;
|
||||
|
||||
pDevice->rootCsrCreated = true;
|
||||
pDevice->numSubDevices = 2;
|
||||
|
||||
auto ultCsr = reinterpret_cast<UltCommandStreamReceiver<FamilyType> *>(pDevice->getDefaultEngine().commandStreamReceiver);
|
||||
ultCsr->staticWorkPartitioningEnabled = true;
|
||||
ultCsr->createWorkPartitionAllocation(*pDevice);
|
||||
|
||||
FlushStampTracker flushStamp(true);
|
||||
|
||||
MockDirectSubmissionHw<FamilyType, RenderDispatcher<FamilyType>> directSubmission(*pDevice,
|
||||
*osContext.get());
|
||||
directSubmission.activeTiles = 2;
|
||||
directSubmission.partitionedMode = true;
|
||||
directSubmission.workPartitionAllocation = ultCsr->getWorkPartitionAllocation();
|
||||
|
||||
bool ret = directSubmission.initialize(true);
|
||||
EXPECT_TRUE(ret);
|
||||
EXPECT_NE(0x0u, directSubmission.ringCommandStream.getUsed());
|
||||
|
||||
size_t submitSize = RenderDispatcher<FamilyType>::getSizePreemption() +
|
||||
directSubmission.getSizeSemaphoreSection() +
|
||||
sizeof(MI_LOAD_REGISTER_IMM) +
|
||||
sizeof(MI_LOAD_REGISTER_MEM);
|
||||
EXPECT_EQ(submitSize, directSubmission.submitSize);
|
||||
EXPECT_EQ(1u, directSubmission.handleResidencyCount);
|
||||
EXPECT_EQ(4u, directSubmission.makeResourcesResidentVectorSize);
|
||||
|
||||
HardwareParse hwParse;
|
||||
hwParse.parseCommands<FamilyType>(directSubmission.ringCommandStream, 0);
|
||||
hwParse.findHardwareCommands<FamilyType>();
|
||||
|
||||
ASSERT_NE(hwParse.lriList.end(), hwParse.lriList.begin());
|
||||
auto loadRegisterImm = reinterpret_cast<MI_LOAD_REGISTER_IMM *>(*hwParse.lriList.begin());
|
||||
EXPECT_EQ(0x23B4u, loadRegisterImm->getRegisterOffset());
|
||||
EXPECT_EQ(8u, loadRegisterImm->getDataDword());
|
||||
|
||||
auto loadRegisterMemItor = find<MI_LOAD_REGISTER_MEM *>(hwParse.cmdList.begin(), hwParse.cmdList.end());
|
||||
ASSERT_NE(hwParse.lriList.end(), loadRegisterMemItor);
|
||||
auto loadRegisterMem = reinterpret_cast<MI_LOAD_REGISTER_MEM *>(*loadRegisterMemItor);
|
||||
EXPECT_EQ(0x23B4u, loadRegisterMem->getRegisterOffset());
|
||||
uint64_t gpuAddress = ultCsr->getWorkPartitionAllocation()->getGpuAddress();
|
||||
EXPECT_EQ(gpuAddress, loadRegisterMem->getMemoryAddress());
|
||||
}
|
||||
@@ -90,4 +90,8 @@ HWTEST_F(BlitterDispatcheTest, givenBlitterWhenDispatchingTlbFlushThenDispatchMi
|
||||
EXPECT_EQ(miFlushDw->getPostSyncOperation(), MI_FLUSH_DW::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA_QWORD);
|
||||
|
||||
EXPECT_EQ(BlitterDispatcher<FamilyType>::getSizeTlbFlush(), EncodeMiFlushDW<FamilyType>::getMiFlushDwCmdSizeForDataWrite());
|
||||
}
|
||||
}
|
||||
|
||||
HWTEST_F(BlitterDispatcheTest, givenBlitterWhenCheckingForMultiTileSynchronizationSupportThenExpectFalse) {
|
||||
EXPECT_FALSE(BlitterDispatcher<FamilyType>::isMultiTileSynchronizationSupported());
|
||||
}
|
||||
|
||||
@@ -150,3 +150,7 @@ HWCMDTEST_F(IGFX_GEN12_CORE, RenderDispatcherTest,
|
||||
}
|
||||
EXPECT_TRUE(foundMonitorFence);
|
||||
}
|
||||
|
||||
HWTEST_F(RenderDispatcherTest, givenRenderWhenCheckingForMultiTileSynchronizationSupportThenExpectTrue) {
|
||||
EXPECT_TRUE(RenderDispatcher<FamilyType>::isMultiTileSynchronizationSupported());
|
||||
}
|
||||
|
||||
@@ -6,6 +6,7 @@
|
||||
*/
|
||||
|
||||
#include "shared/source/command_container/implicit_scaling.h"
|
||||
#include "shared/source/direct_submission/dispatchers/blitter_dispatcher.h"
|
||||
#include "shared/source/direct_submission/dispatchers/render_dispatcher.h"
|
||||
#include "shared/source/direct_submission/linux/drm_direct_submission.h"
|
||||
#include "shared/source/os_interface/linux/os_context_linux.h"
|
||||
@@ -15,6 +16,7 @@
|
||||
#include "shared/test/common/helpers/ult_hw_config.h"
|
||||
#include "shared/test/common/helpers/variable_backup.h"
|
||||
#include "shared/test/common/libult/linux/drm_mock.h"
|
||||
#include "shared/test/common/libult/ult_command_stream_receiver.h"
|
||||
#include "shared/test/common/mocks/mock_device.h"
|
||||
|
||||
#include "opencl/test/unit_test/os_interface/linux/drm_memory_manager_tests.h"
|
||||
@@ -72,6 +74,7 @@ struct MockDrmDirectSubmission : public DrmDirectSubmission<GfxFamily, Dispatche
|
||||
using BaseClass::tagAddress;
|
||||
using BaseClass::updateTagValue;
|
||||
using BaseClass::wait;
|
||||
using BaseClass::workPartitionAllocation;
|
||||
|
||||
MockDrmDirectSubmission(Device &device, OsContext &osContext) : DrmDirectSubmission<GfxFamily, Dispatcher>(device, osContext) {
|
||||
this->disableMonitorFence = false;
|
||||
@@ -311,11 +314,17 @@ HWTEST_F(DrmDirectSubmissionTest, givenMultipleActiveTilesWhenWaitingForTagUpdat
|
||||
EXPECT_EQ(2u, CpuIntrinsicsTests::pauseCounter);
|
||||
}
|
||||
|
||||
HWTEST_F(DrmDirectSubmissionTest, givenMultiTileWhenCreatingDirectSubmissionThenExpectActiveTilesMatchSubDeviceCount) {
|
||||
HWTEST_F(DrmDirectSubmissionTest, givenRenderDispatcherAndMultiTileDeviceWhenCreatingDirectSubmissionThenExpectActiveTilesMatchSubDeviceCount) {
|
||||
using Dispatcher = RenderDispatcher<FamilyType>;
|
||||
|
||||
VariableBackup<bool> backup(&ImplicitScaling::apiSupport, true);
|
||||
device->deviceBitfield.set(0b11);
|
||||
device->rootCsrCreated = true;
|
||||
device->numSubDevices = 2;
|
||||
|
||||
auto ultCsr = reinterpret_cast<UltCommandStreamReceiver<FamilyType> *>(device->getDefaultEngine().commandStreamReceiver);
|
||||
ultCsr->staticWorkPartitioningEnabled = true;
|
||||
ultCsr->createWorkPartitionAllocation(*device);
|
||||
|
||||
MockDrmDirectSubmission<FamilyType, Dispatcher> directSubmission(*device.get(),
|
||||
*osContext.get());
|
||||
@@ -326,3 +335,19 @@ HWTEST_F(DrmDirectSubmissionTest, givenMultiTileWhenCreatingDirectSubmissionThen
|
||||
bool ret = directSubmission.allocateResources();
|
||||
EXPECT_TRUE(ret);
|
||||
}
|
||||
|
||||
HWTEST_F(DrmDirectSubmissionTest, givenBlitterDispatcherAndMultiTileDeviceWhenCreatingDirectSubmissionThenExpectActiveTilesEqualsOne) {
|
||||
using Dispatcher = BlitterDispatcher<FamilyType>;
|
||||
|
||||
VariableBackup<bool> backup(&ImplicitScaling::apiSupport, true);
|
||||
device->deviceBitfield.set(0b11);
|
||||
|
||||
MockDrmDirectSubmission<FamilyType, Dispatcher> directSubmission(*device.get(),
|
||||
*osContext.get());
|
||||
|
||||
EXPECT_EQ(1u, directSubmission.activeTiles);
|
||||
EXPECT_FALSE(directSubmission.partitionedMode);
|
||||
|
||||
bool ret = directSubmission.allocateResources();
|
||||
EXPECT_TRUE(ret);
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user