mirror of
https://github.com/intel/compute-runtime.git
synced 2025-09-20 13:11:34 +08:00
Fix direct submission wait on multi tile device using single tile context
Related-To: NEO-6244 Signed-off-by: Zbigniew Zdanowicz <zbigniew.zdanowicz@intel.com>
This commit is contained in:

committed by
Compute-Runtime-Automation

parent
fe432abbb3
commit
60805cdbcf
@ -154,5 +154,6 @@ class DirectSubmissionHw {
|
||||
bool disableCacheFlush = false;
|
||||
bool disableMonitorFence = false;
|
||||
bool partitionedMode = false;
|
||||
bool partitionConfigSet = true;
|
||||
};
|
||||
} // namespace NEO
|
||||
|
@ -144,12 +144,12 @@ bool DirectSubmissionHw<GfxFamily, Dispatcher>::initialize(bool submitOnInit) {
|
||||
if (ret && submitOnInit) {
|
||||
size_t startBufferSize = Dispatcher::getSizePreemption() +
|
||||
getSizeSemaphoreSection();
|
||||
if (this->partitionedMode) {
|
||||
startBufferSize += EncodeSetMMIO<GfxFamily>::sizeMEM;
|
||||
startBufferSize += EncodeSetMMIO<GfxFamily>::sizeIMM;
|
||||
}
|
||||
|
||||
Dispatcher::dispatchPreemption(ringCommandStream);
|
||||
if (this->partitionedMode) {
|
||||
startBufferSize += (EncodeSetMMIO<GfxFamily>::sizeMEM +
|
||||
EncodeSetMMIO<GfxFamily>::sizeIMM);
|
||||
|
||||
EncodeSetMMIO<GfxFamily>::encodeMEM(ringCommandStream,
|
||||
PartitionRegisters<GfxFamily>::wparidCCSOffset,
|
||||
this->workPartitionAllocation->getGpuAddress());
|
||||
@ -157,6 +157,7 @@ bool DirectSubmissionHw<GfxFamily, Dispatcher>::initialize(bool submitOnInit) {
|
||||
PartitionRegisters<GfxFamily>::addressOffsetCCSOffset,
|
||||
CommonConstants::partitionAddressOffset,
|
||||
true);
|
||||
this->partitionConfigSet = true;
|
||||
}
|
||||
if (workloadMode == 1) {
|
||||
dispatchDiagnosticModeSection();
|
||||
@ -178,12 +179,27 @@ bool DirectSubmissionHw<GfxFamily, Dispatcher>::startRingBuffer() {
|
||||
}
|
||||
|
||||
size_t startSize = getSizeSemaphoreSection();
|
||||
if (!this->partitionConfigSet) {
|
||||
startSize += (EncodeSetMMIO<GfxFamily>::sizeMEM +
|
||||
EncodeSetMMIO<GfxFamily>::sizeIMM);
|
||||
}
|
||||
size_t requiredSize = startSize + getSizeDispatch() + getSizeEnd();
|
||||
if (ringCommandStream.getAvailableSpace() < requiredSize) {
|
||||
switchRingBuffers();
|
||||
}
|
||||
uint64_t gpuStartVa = getCommandBufferPositionGpuAddress(ringCommandStream.getSpace(0));
|
||||
|
||||
if (!this->partitionConfigSet) {
|
||||
EncodeSetMMIO<GfxFamily>::encodeMEM(ringCommandStream,
|
||||
PartitionRegisters<GfxFamily>::wparidCCSOffset,
|
||||
this->workPartitionAllocation->getGpuAddress());
|
||||
EncodeSetMMIO<GfxFamily>::encodeIMM(ringCommandStream,
|
||||
PartitionRegisters<GfxFamily>::addressOffsetCCSOffset,
|
||||
CommonConstants::partitionAddressOffset,
|
||||
true);
|
||||
this->partitionConfigSet = true;
|
||||
}
|
||||
|
||||
currentQueueWorkCount++;
|
||||
dispatchSemaphoreSection(currentQueueWorkCount);
|
||||
|
||||
|
@ -29,13 +29,17 @@ DrmDirectSubmission<GfxFamily, Dispatcher>::DrmDirectSubmission(Device &device,
|
||||
if (DebugManager.flags.DirectSubmissionDisableMonitorFence.get() != -1) {
|
||||
this->disableMonitorFence = DebugManager.flags.DirectSubmissionDisableMonitorFence.get();
|
||||
}
|
||||
auto subDevices = device.getDeviceBitfield();
|
||||
|
||||
auto osContextLinux = static_cast<OsContextLinux *>(&this->osContext);
|
||||
|
||||
auto subDevices = osContextLinux->getDeviceBitfield();
|
||||
bool dispatcherSupport = Dispatcher::isMultiTileSynchronizationSupported();
|
||||
if (ImplicitScalingHelper::isImplicitScalingEnabled(subDevices, true) && dispatcherSupport) {
|
||||
this->activeTiles = static_cast<uint32_t>(subDevices.count());
|
||||
}
|
||||
this->partitionedMode = this->activeTiles > 1u;
|
||||
auto osContextLinux = static_cast<OsContextLinux *>(&this->osContext);
|
||||
this->partitionConfigSet = !this->partitionedMode;
|
||||
|
||||
osContextLinux->getDrm().setDirectSubmissionActive(true);
|
||||
|
||||
if (this->partitionedMode) {
|
||||
|
@ -45,6 +45,7 @@ struct MockDirectSubmissionHw : public DirectSubmissionHw<GfxFamily, Dispatcher>
|
||||
using BaseClass::getSizeSwitchRingBufferSection;
|
||||
using BaseClass::hwInfo;
|
||||
using BaseClass::osContext;
|
||||
using BaseClass::partitionConfigSet;
|
||||
using BaseClass::partitionedMode;
|
||||
using BaseClass::performDiagnosticMode;
|
||||
using BaseClass::ringBuffer;
|
||||
@ -56,6 +57,7 @@ struct MockDirectSubmissionHw : public DirectSubmissionHw<GfxFamily, Dispatcher>
|
||||
using BaseClass::semaphorePtr;
|
||||
using BaseClass::semaphores;
|
||||
using BaseClass::setReturnAddress;
|
||||
using BaseClass::startRingBuffer;
|
||||
using BaseClass::stopRingBuffer;
|
||||
using BaseClass::switchRingBuffersAllocations;
|
||||
using BaseClass::workloadMode;
|
||||
|
@ -44,12 +44,15 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, DirectSubmissionDispatchBufferTest,
|
||||
|
||||
MockDirectSubmissionHw<FamilyType, RenderDispatcher<FamilyType>> directSubmission(*pDevice,
|
||||
*osContext.get());
|
||||
EXPECT_TRUE(directSubmission.partitionConfigSet);
|
||||
directSubmission.partitionConfigSet = false;
|
||||
directSubmission.disableMonitorFence = false;
|
||||
directSubmission.partitionedMode = true;
|
||||
directSubmission.workPartitionAllocation = ultCsr->getWorkPartitionAllocation();
|
||||
|
||||
bool ret = directSubmission.initialize(true);
|
||||
EXPECT_TRUE(ret);
|
||||
EXPECT_TRUE(directSubmission.partitionConfigSet);
|
||||
EXPECT_NE(0x0u, directSubmission.ringCommandStream.getUsed());
|
||||
GraphicsAllocation *oldRingAllocation = directSubmission.ringCommandStream.getGraphicsAllocation();
|
||||
|
||||
@ -119,12 +122,15 @@ HWTEST_F(DirectSubmissionDispatchBufferTest,
|
||||
|
||||
MockDirectSubmissionHw<FamilyType, RenderDispatcher<FamilyType>> directSubmission(*pDevice,
|
||||
*osContext.get());
|
||||
EXPECT_TRUE(directSubmission.partitionConfigSet);
|
||||
directSubmission.activeTiles = 2;
|
||||
directSubmission.partitionedMode = true;
|
||||
directSubmission.partitionConfigSet = false;
|
||||
directSubmission.workPartitionAllocation = ultCsr->getWorkPartitionAllocation();
|
||||
|
||||
bool ret = directSubmission.initialize(true);
|
||||
EXPECT_TRUE(ret);
|
||||
EXPECT_TRUE(directSubmission.partitionConfigSet);
|
||||
EXPECT_NE(0x0u, directSubmission.ringCommandStream.getUsed());
|
||||
|
||||
size_t submitSize = RenderDispatcher<FamilyType>::getSizePreemption() +
|
||||
@ -158,3 +164,60 @@ HWTEST_F(DirectSubmissionDispatchBufferTest,
|
||||
uint64_t gpuAddress = ultCsr->getWorkPartitionAllocation()->getGpuAddress();
|
||||
EXPECT_EQ(gpuAddress, loadRegisterMem->getMemoryAddress());
|
||||
}
|
||||
|
||||
HWTEST_F(DirectSubmissionDispatchBufferTest,
|
||||
givenDirectSubmissionRingNotStartOnInitWhenMultiTileSupportedThenExpectMultiTileConfigSetDuringExplicitRingStart) {
|
||||
using MI_LOAD_REGISTER_IMM = typename FamilyType::MI_LOAD_REGISTER_IMM;
|
||||
using MI_LOAD_REGISTER_MEM = typename FamilyType::MI_LOAD_REGISTER_MEM;
|
||||
|
||||
pDevice->rootCsrCreated = true;
|
||||
pDevice->numSubDevices = 2;
|
||||
|
||||
auto ultCsr = reinterpret_cast<UltCommandStreamReceiver<FamilyType> *>(pDevice->getDefaultEngine().commandStreamReceiver);
|
||||
ultCsr->staticWorkPartitioningEnabled = true;
|
||||
ultCsr->createWorkPartitionAllocation(*pDevice);
|
||||
|
||||
FlushStampTracker flushStamp(true);
|
||||
|
||||
MockDirectSubmissionHw<FamilyType, RenderDispatcher<FamilyType>> directSubmission(*pDevice,
|
||||
*osContext.get());
|
||||
EXPECT_TRUE(directSubmission.partitionConfigSet);
|
||||
directSubmission.activeTiles = 2;
|
||||
directSubmission.partitionedMode = true;
|
||||
directSubmission.partitionConfigSet = false;
|
||||
directSubmission.workPartitionAllocation = ultCsr->getWorkPartitionAllocation();
|
||||
|
||||
bool ret = directSubmission.initialize(false);
|
||||
EXPECT_TRUE(ret);
|
||||
EXPECT_FALSE(directSubmission.partitionConfigSet);
|
||||
EXPECT_FALSE(directSubmission.ringStart);
|
||||
EXPECT_EQ(0x0u, directSubmission.ringCommandStream.getUsed());
|
||||
|
||||
ret = directSubmission.startRingBuffer();
|
||||
EXPECT_TRUE(ret);
|
||||
EXPECT_TRUE(directSubmission.partitionConfigSet);
|
||||
EXPECT_TRUE(directSubmission.ringStart);
|
||||
|
||||
HardwareParse hwParse;
|
||||
hwParse.parseCommands<FamilyType>(directSubmission.ringCommandStream, 0);
|
||||
hwParse.findHardwareCommands<FamilyType>();
|
||||
|
||||
ASSERT_NE(hwParse.lriList.end(), hwParse.lriList.begin());
|
||||
bool partitionRegisterFound = false;
|
||||
for (auto &it : hwParse.lriList) {
|
||||
auto loadRegisterImm = reinterpret_cast<MI_LOAD_REGISTER_IMM *>(it);
|
||||
if (loadRegisterImm->getRegisterOffset() == 0x23B4u) {
|
||||
|
||||
EXPECT_EQ(8u, loadRegisterImm->getDataDword());
|
||||
partitionRegisterFound = true;
|
||||
}
|
||||
}
|
||||
EXPECT_TRUE(partitionRegisterFound);
|
||||
|
||||
auto loadRegisterMemItor = find<MI_LOAD_REGISTER_MEM *>(hwParse.cmdList.begin(), hwParse.cmdList.end());
|
||||
ASSERT_NE(hwParse.cmdList.end(), loadRegisterMemItor);
|
||||
auto loadRegisterMem = reinterpret_cast<MI_LOAD_REGISTER_MEM *>(*loadRegisterMemItor);
|
||||
EXPECT_EQ(0x221Cu, loadRegisterMem->getRegisterAddress());
|
||||
uint64_t gpuAddress = ultCsr->getWorkPartitionAllocation()->getGpuAddress();
|
||||
EXPECT_EQ(gpuAddress, loadRegisterMem->getMemoryAddress());
|
||||
}
|
||||
|
@ -67,6 +67,7 @@ struct MockDrmDirectSubmission : public DrmDirectSubmission<GfxFamily, Dispatche
|
||||
using BaseClass::handleNewResourcesSubmission;
|
||||
using BaseClass::handleResidency;
|
||||
using BaseClass::isNewResourceHandleNeeded;
|
||||
using BaseClass::partitionConfigSet;
|
||||
using BaseClass::partitionedMode;
|
||||
using BaseClass::ringStart;
|
||||
using BaseClass::submit;
|
||||
@ -314,7 +315,8 @@ HWTEST_F(DrmDirectSubmissionTest, givenMultipleActiveTilesWhenWaitingForTagUpdat
|
||||
EXPECT_EQ(2u, CpuIntrinsicsTests::pauseCounter);
|
||||
}
|
||||
|
||||
HWTEST_F(DrmDirectSubmissionTest, givenRenderDispatcherAndMultiTileDeviceWhenCreatingDirectSubmissionThenExpectActiveTilesMatchSubDeviceCount) {
|
||||
HWTEST_F(DrmDirectSubmissionTest,
|
||||
givenRenderDispatcherAndMultiTileDeviceWhenCreatingDirectSubmissionUsingMultiTileContextThenExpectActiveTilesMatchSubDeviceCount) {
|
||||
using Dispatcher = RenderDispatcher<FamilyType>;
|
||||
|
||||
VariableBackup<bool> backup(&ImplicitScaling::apiSupport, true);
|
||||
@ -322,6 +324,12 @@ HWTEST_F(DrmDirectSubmissionTest, givenRenderDispatcherAndMultiTileDeviceWhenCre
|
||||
device->rootCsrCreated = true;
|
||||
device->numSubDevices = 2;
|
||||
|
||||
osContext = std::make_unique<OsContextLinux>(*executionEnvironment.rootDeviceEnvironments[0]->osInterface->getDriverModel()->as<Drm>(), 0u,
|
||||
EngineDescriptorHelper::getDefaultDescriptor({aub_stream::ENGINE_RCS, EngineUsage::Regular},
|
||||
PreemptionMode::ThreadGroup, device->getDeviceBitfield()));
|
||||
osContext->ensureContextInitialized();
|
||||
EXPECT_EQ(2u, osContext->getDeviceBitfield().count());
|
||||
|
||||
auto ultCsr = reinterpret_cast<UltCommandStreamReceiver<FamilyType> *>(device->getDefaultEngine().commandStreamReceiver);
|
||||
ultCsr->staticWorkPartitioningEnabled = true;
|
||||
ultCsr->createWorkPartitionAllocation(*device);
|
||||
@ -331,6 +339,32 @@ HWTEST_F(DrmDirectSubmissionTest, givenRenderDispatcherAndMultiTileDeviceWhenCre
|
||||
|
||||
EXPECT_EQ(2u, directSubmission.activeTiles);
|
||||
EXPECT_TRUE(directSubmission.partitionedMode);
|
||||
EXPECT_FALSE(directSubmission.partitionConfigSet);
|
||||
|
||||
bool ret = directSubmission.allocateResources();
|
||||
EXPECT_TRUE(ret);
|
||||
}
|
||||
|
||||
HWTEST_F(DrmDirectSubmissionTest, givenRenderDispatcherAndMultiTileDeviceWhenCreatingDirectSubmissionSingleTileContextThenExpectActiveTilesEqualsSingleTile) {
|
||||
using Dispatcher = RenderDispatcher<FamilyType>;
|
||||
|
||||
VariableBackup<bool> backup(&ImplicitScaling::apiSupport, true);
|
||||
device->deviceBitfield.set(0b11);
|
||||
device->rootCsrCreated = true;
|
||||
device->numSubDevices = 2;
|
||||
|
||||
EXPECT_EQ(1u, osContext->getDeviceBitfield().count());
|
||||
|
||||
auto ultCsr = reinterpret_cast<UltCommandStreamReceiver<FamilyType> *>(device->getDefaultEngine().commandStreamReceiver);
|
||||
ultCsr->staticWorkPartitioningEnabled = true;
|
||||
ultCsr->createWorkPartitionAllocation(*device);
|
||||
|
||||
MockDrmDirectSubmission<FamilyType, Dispatcher> directSubmission(*device.get(),
|
||||
*osContext.get());
|
||||
|
||||
EXPECT_EQ(1u, directSubmission.activeTiles);
|
||||
EXPECT_FALSE(directSubmission.partitionedMode);
|
||||
EXPECT_TRUE(directSubmission.partitionConfigSet);
|
||||
|
||||
bool ret = directSubmission.allocateResources();
|
||||
EXPECT_TRUE(ret);
|
||||
@ -342,11 +376,18 @@ HWTEST_F(DrmDirectSubmissionTest, givenBlitterDispatcherAndMultiTileDeviceWhenCr
|
||||
VariableBackup<bool> backup(&ImplicitScaling::apiSupport, true);
|
||||
device->deviceBitfield.set(0b11);
|
||||
|
||||
osContext = std::make_unique<OsContextLinux>(*executionEnvironment.rootDeviceEnvironments[0]->osInterface->getDriverModel()->as<Drm>(), 0u,
|
||||
EngineDescriptorHelper::getDefaultDescriptor({aub_stream::ENGINE_RCS, EngineUsage::Regular},
|
||||
PreemptionMode::ThreadGroup, device->getDeviceBitfield()));
|
||||
osContext->ensureContextInitialized();
|
||||
EXPECT_EQ(2u, osContext->getDeviceBitfield().count());
|
||||
|
||||
MockDrmDirectSubmission<FamilyType, Dispatcher> directSubmission(*device.get(),
|
||||
*osContext.get());
|
||||
|
||||
EXPECT_EQ(1u, directSubmission.activeTiles);
|
||||
EXPECT_FALSE(directSubmission.partitionedMode);
|
||||
EXPECT_TRUE(directSubmission.partitionConfigSet);
|
||||
|
||||
bool ret = directSubmission.allocateResources();
|
||||
EXPECT_TRUE(ret);
|
||||
|
Reference in New Issue
Block a user