feature direct submission: use tag allocation as a completion fence

use tag allocation address as a completion address in exec call
wait for completion value before destroying drm direct submission

Related-To: NEO-6643
Signed-off-by: Mateusz Jablonski <mateusz.jablonski@intel.com>
This commit is contained in:
Mateusz Jablonski
2022-04-20 17:32:39 +00:00
committed by Compute-Runtime-Automation
parent 38190c5d17
commit 03185f7111
12 changed files with 408 additions and 5 deletions

View File

@@ -17,6 +17,7 @@
#include "shared/test/common/helpers/variable_backup.h"
#include "shared/test/common/libult/linux/drm_mock.h"
#include "shared/test/common/libult/ult_command_stream_receiver.h"
#include "shared/test/common/mocks/linux/mock_drm_allocation.h"
#include "shared/test/common/mocks/mock_device.h"
#include "shared/test/common/os_interface/linux/drm_memory_manager_tests.h"
#include "shared/test/common/test_macros/test.h"
@@ -57,6 +58,8 @@ struct MockDrmDirectSubmission : public DrmDirectSubmission<GfxFamily, Dispatche
using BaseClass = DrmDirectSubmission<GfxFamily, Dispatcher>;
using BaseClass::activeTiles;
using BaseClass::allocateResources;
using BaseClass::completionFenceAllocation;
using BaseClass::completionFenceValue;
using BaseClass::currentTagData;
using BaseClass::disableMonitorFence;
using BaseClass::dispatchSwitchRingBufferSection;
@@ -70,6 +73,7 @@ struct MockDrmDirectSubmission : public DrmDirectSubmission<GfxFamily, Dispatche
using BaseClass::partitionConfigSet;
using BaseClass::partitionedMode;
using BaseClass::postSyncOffset;
using BaseClass::ringBuffer;
using BaseClass::ringStart;
using BaseClass::submit;
using BaseClass::switchRingBuffers;
@@ -116,6 +120,294 @@ HWTEST_F(DrmDirectSubmissionTest, whenCreateDirectSubmissionThenValidObjectIsRet
EXPECT_TRUE(ret);
}
HWTEST_F(DrmDirectSubmissionTest, givenCompletionFenceSupportWhenCreateDrmDirectSubmissionThenTagAllocationIsSetAsCompletionFenceAllocation) {
DebugManagerStateRestore restorer;
DebugManager.flags.EnableDrmCompletionFence.set(1);
auto &commandStreamReceiver = *device->getDefaultEngine().commandStreamReceiver;
auto drm = executionEnvironment.rootDeviceEnvironments[0]->osInterface->getDriverModel()->as<Drm>();
ASSERT_TRUE(drm->completionFenceSupport());
auto expectedCompletionFenceAllocation = commandStreamReceiver.getTagAllocation();
EXPECT_NE(nullptr, expectedCompletionFenceAllocation);
{
MockDrmDirectSubmission<FamilyType, RenderDispatcher<FamilyType>> directSubmission(commandStreamReceiver);
EXPECT_EQ(expectedCompletionFenceAllocation, directSubmission.completionFenceAllocation);
}
{
MockDrmDirectSubmission<FamilyType, BlitterDispatcher<FamilyType>> directSubmission(commandStreamReceiver);
EXPECT_EQ(expectedCompletionFenceAllocation, directSubmission.completionFenceAllocation);
}
}
HWTEST_F(DrmDirectSubmissionTest, givenNoCompletionFenceSupportWhenCreateDrmDirectSubmissionThenCompletionFenceAllocationIsNotSet) {
DebugManagerStateRestore restorer;
DebugManager.flags.EnableDrmCompletionFence.set(0);
auto &commandStreamReceiver = *device->getDefaultEngine().commandStreamReceiver;
auto drm = executionEnvironment.rootDeviceEnvironments[0]->osInterface->getDriverModel()->as<Drm>();
ASSERT_FALSE(drm->completionFenceSupport());
{
MockDrmDirectSubmission<FamilyType, RenderDispatcher<FamilyType>> directSubmission(commandStreamReceiver);
EXPECT_EQ(nullptr, directSubmission.completionFenceAllocation);
}
{
MockDrmDirectSubmission<FamilyType, BlitterDispatcher<FamilyType>> directSubmission(commandStreamReceiver);
EXPECT_EQ(nullptr, directSubmission.completionFenceAllocation);
}
}
HWTEST_F(DrmDirectSubmissionTest, givenDirectSubmissionWithoutCompletionFenceAllocationWhenDestroyingThenNoWaitForUserFenceIsCalled) {
DebugManagerStateRestore restorer;
DebugManager.flags.EnableDrmCompletionFence.set(0);
auto &commandStreamReceiver = *device->getDefaultEngine().commandStreamReceiver;
auto drm = static_cast<DrmMock *>(executionEnvironment.rootDeviceEnvironments[0]->osInterface->getDriverModel()->as<Drm>());
ASSERT_FALSE(drm->completionFenceSupport());
drm->waitUserFenceParams.clear();
{
MockDrmDirectSubmission<FamilyType, RenderDispatcher<FamilyType>> directSubmission(commandStreamReceiver);
directSubmission.completionFenceValue = 10;
}
EXPECT_EQ(0u, drm->waitUserFenceParams.size());
}
HWTEST_F(DrmDirectSubmissionTest, givenCompletionFenceSupportAndFenceIsNotCompletedWhenDestroyingThenWaitForUserFenceIsCalled) {
DebugManagerStateRestore restorer;
DebugManager.flags.EnableDrmCompletionFence.set(1);
auto &commandStreamReceiver = *device->getDefaultEngine().commandStreamReceiver;
auto drm = static_cast<DrmMock *>(executionEnvironment.rootDeviceEnvironments[0]->osInterface->getDriverModel()->as<Drm>());
ASSERT_TRUE(drm->completionFenceSupport());
drm->waitUserFenceParams.clear();
{
MockDrmDirectSubmission<FamilyType, RenderDispatcher<FamilyType>> directSubmission(commandStreamReceiver);
directSubmission.completionFenceValue = 10;
}
EXPECT_EQ(osContext->getDrmContextIds().size(), drm->waitUserFenceParams.size());
}
HWTEST_F(DrmDirectSubmissionTest, givenCompletionFenceSupportAndFenceIsNotCompletedWhenWaitOnSpecificAddressesPerOsContext) {
DebugManagerStateRestore restorer;
DebugManager.flags.EnableDrmCompletionFence.set(1);
auto &commandStreamReceiver = device->getUltCommandStreamReceiver<FamilyType>();
memset(commandStreamReceiver.getTagAllocation()->getUnderlyingBuffer(), 0, commandStreamReceiver.getTagAllocation()->getUnderlyingBufferSize());
auto drm = static_cast<DrmMock *>(executionEnvironment.rootDeviceEnvironments[0]->osInterface->getDriverModel()->as<Drm>());
ASSERT_TRUE(drm->completionFenceSupport());
auto completionFenceBaseCpuAddress = reinterpret_cast<uint64_t>(commandStreamReceiver.getTagAddress()) + Drm::completionFenceOffset;
uint32_t expectedCompletionValueToWait = 10u;
{
DeviceBitfield firstTileBitfield{0b01};
OsContextLinux osContext(*drm, 0u,
EngineDescriptorHelper::getDefaultDescriptor({aub_stream::ENGINE_RCS, EngineUsage::Regular},
PreemptionMode::ThreadGroup, firstTileBitfield));
osContext.ensureContextInitialized();
commandStreamReceiver.setupContext(osContext);
drm->waitUserFenceParams.clear();
{
MockDrmDirectSubmission<FamilyType, RenderDispatcher<FamilyType>> directSubmission(commandStreamReceiver);
directSubmission.completionFenceValue = expectedCompletionValueToWait;
}
EXPECT_EQ(1u, drm->waitUserFenceParams.size());
EXPECT_EQ(expectedCompletionValueToWait, drm->waitUserFenceParams[0].value);
EXPECT_EQ(completionFenceBaseCpuAddress, drm->waitUserFenceParams[0].address);
}
{
DeviceBitfield secondTileBitfield{0b10};
OsContextLinux osContext(*drm, 0u,
EngineDescriptorHelper::getDefaultDescriptor({aub_stream::ENGINE_RCS, EngineUsage::Regular},
PreemptionMode::ThreadGroup, secondTileBitfield));
osContext.ensureContextInitialized();
commandStreamReceiver.setupContext(osContext);
drm->waitUserFenceParams.clear();
{
MockDrmDirectSubmission<FamilyType, RenderDispatcher<FamilyType>> directSubmission(commandStreamReceiver);
directSubmission.completionFenceValue = expectedCompletionValueToWait;
}
EXPECT_EQ(1u, drm->waitUserFenceParams.size());
EXPECT_EQ(expectedCompletionValueToWait, drm->waitUserFenceParams[0].value);
EXPECT_EQ(completionFenceBaseCpuAddress, drm->waitUserFenceParams[0].address);
}
{
DeviceBitfield twoTilesBitfield{0b11};
OsContextLinux osContext(*drm, 0u,
EngineDescriptorHelper::getDefaultDescriptor({aub_stream::ENGINE_RCS, EngineUsage::Regular},
PreemptionMode::ThreadGroup, twoTilesBitfield));
osContext.ensureContextInitialized();
commandStreamReceiver.setupContext(osContext);
drm->waitUserFenceParams.clear();
MockGraphicsAllocation workPartitionAllocation{};
commandStreamReceiver.workPartitionAllocation = &workPartitionAllocation;
{
MockDrmDirectSubmission<FamilyType, RenderDispatcher<FamilyType>> directSubmission(commandStreamReceiver);
directSubmission.completionFenceValue = expectedCompletionValueToWait;
}
commandStreamReceiver.workPartitionAllocation = nullptr;
EXPECT_EQ(2u, drm->waitUserFenceParams.size());
EXPECT_EQ(expectedCompletionValueToWait, drm->waitUserFenceParams[0].value);
EXPECT_EQ(completionFenceBaseCpuAddress, drm->waitUserFenceParams[0].address);
EXPECT_EQ(expectedCompletionValueToWait, drm->waitUserFenceParams[1].value);
EXPECT_EQ(completionFenceBaseCpuAddress + commandStreamReceiver.getPostSyncWriteOffset(), drm->waitUserFenceParams[1].address);
}
commandStreamReceiver.setupContext(*osContext);
}
HWTEST_F(DrmDirectSubmissionTest, givenNoCompletionFenceSupportWhenSubmittingThenNoCompletionAddressIsPassedToExec) {
uint64_t gpuAddress = 0x1000;
size_t size = 0x1000;
MockDrmDirectSubmission<FamilyType, RenderDispatcher<FamilyType>> drmDirectSubmission(*device->getDefaultEngine().commandStreamReceiver);
drmDirectSubmission.completionFenceAllocation = nullptr;
EXPECT_TRUE(drmDirectSubmission.allocateResources());
auto ringBuffer = static_cast<DrmAllocation *>(drmDirectSubmission.ringBuffer);
auto initialBO = ringBuffer->getBufferObjectToModify(0);
auto drm = executionEnvironment.rootDeviceEnvironments[0]->osInterface->getDriverModel()->as<Drm>();
MockBufferObject mockBO(drm);
ringBuffer->getBufferObjectToModify(0) = &mockBO;
for (auto i = 0; i < 2; i++) {
mockBO.passedExecParams.clear();
EXPECT_TRUE(drmDirectSubmission.submit(gpuAddress, size));
ASSERT_EQ(1u, mockBO.passedExecParams.size());
EXPECT_EQ(0u, mockBO.passedExecParams[0].completionGpuAddress);
EXPECT_EQ(0u, mockBO.passedExecParams[0].completionValue);
}
ringBuffer->getBufferObjectToModify(0) = initialBO;
}
HWTEST_F(DrmDirectSubmissionTest, givenTile0AndCompletionFenceSupportWhenSubmittingThenCompletionAddressAndValueArePassedToExec) {
uint64_t gpuAddress = 0x1000;
size_t size = 0x1000;
auto &commandStreamReceiver = *device->getDefaultEngine().commandStreamReceiver;
auto drm = executionEnvironment.rootDeviceEnvironments[0]->osInterface->getDriverModel()->as<Drm>();
auto completionFenceBaseGpuAddress = commandStreamReceiver.getTagAllocation()->getGpuAddress() + Drm::completionFenceOffset;
DeviceBitfield firstTileBitfield{0b01};
OsContextLinux osContextTile0(*drm, 0u,
EngineDescriptorHelper::getDefaultDescriptor({aub_stream::ENGINE_RCS, EngineUsage::Regular},
PreemptionMode::ThreadGroup, firstTileBitfield));
osContextTile0.ensureContextInitialized();
commandStreamReceiver.setupContext(osContextTile0);
MockDrmDirectSubmission<FamilyType, RenderDispatcher<FamilyType>> drmDirectSubmission(commandStreamReceiver);
drmDirectSubmission.completionFenceAllocation = commandStreamReceiver.getTagAllocation();
EXPECT_TRUE(drmDirectSubmission.allocateResources());
auto ringBuffer = static_cast<DrmAllocation *>(drmDirectSubmission.ringBuffer);
auto initialBO = ringBuffer->getBufferObjectToModify(0);
MockBufferObject mockBO(drm);
ringBuffer->getBufferObjectToModify(0) = &mockBO;
for (auto i = 0u; i < 2; i++) {
mockBO.passedExecParams.clear();
EXPECT_TRUE(drmDirectSubmission.submit(gpuAddress, size));
ASSERT_EQ(1u, mockBO.passedExecParams.size());
EXPECT_EQ(completionFenceBaseGpuAddress, mockBO.passedExecParams[0].completionGpuAddress);
EXPECT_EQ(i + 1, mockBO.passedExecParams[0].completionValue);
}
ringBuffer->getBufferObjectToModify(0) = initialBO;
commandStreamReceiver.setupContext(*osContext);
}
HWTEST_F(DrmDirectSubmissionTest, givenTile1AndCompletionFenceSupportWhenSubmittingThenCompletionAddressAndValueArePassedToExec) {
uint64_t gpuAddress = 0x1000;
size_t size = 0x1000;
auto &commandStreamReceiver = *device->getDefaultEngine().commandStreamReceiver;
auto drm = executionEnvironment.rootDeviceEnvironments[0]->osInterface->getDriverModel()->as<Drm>();
auto completionFenceBaseGpuAddress = commandStreamReceiver.getTagAllocation()->getGpuAddress() + Drm::completionFenceOffset;
DeviceBitfield secondTileBitfield{0b10};
OsContextLinux osContextTile1(*drm, 0u,
EngineDescriptorHelper::getDefaultDescriptor({aub_stream::ENGINE_RCS, EngineUsage::Regular},
PreemptionMode::ThreadGroup, secondTileBitfield));
osContextTile1.ensureContextInitialized();
commandStreamReceiver.setupContext(osContextTile1);
MockDrmDirectSubmission<FamilyType, RenderDispatcher<FamilyType>> drmDirectSubmission(commandStreamReceiver);
drmDirectSubmission.completionFenceAllocation = commandStreamReceiver.getTagAllocation();
EXPECT_TRUE(drmDirectSubmission.allocateResources());
auto ringBuffer = static_cast<DrmAllocation *>(drmDirectSubmission.ringBuffer);
auto initialBO = ringBuffer->getBufferObjectToModify(0);
MockBufferObject mockBO(drm);
ringBuffer->getBufferObjectToModify(0) = &mockBO;
for (auto i = 0u; i < 2; i++) {
mockBO.passedExecParams.clear();
EXPECT_TRUE(drmDirectSubmission.submit(gpuAddress, size));
ASSERT_EQ(1u, mockBO.passedExecParams.size());
EXPECT_EQ(completionFenceBaseGpuAddress, mockBO.passedExecParams[0].completionGpuAddress);
EXPECT_EQ(i + 1, mockBO.passedExecParams[0].completionValue);
}
ringBuffer->getBufferObjectToModify(0) = initialBO;
commandStreamReceiver.setupContext(*osContext);
}
HWTEST_F(DrmDirectSubmissionTest, givenTwoTilesAndCompletionFenceSupportWhenSubmittingThenCompletionAddressAndValueArePassedToExec) {
uint64_t gpuAddress = 0x1000;
size_t size = 0x1000;
auto &commandStreamReceiver = device->getUltCommandStreamReceiver<FamilyType>();
auto drm = executionEnvironment.rootDeviceEnvironments[0]->osInterface->getDriverModel()->as<Drm>();
auto completionFenceBaseGpuAddress = commandStreamReceiver.getTagAllocation()->getGpuAddress() + Drm::completionFenceOffset;
DeviceBitfield twoTilesBitfield{0b11};
OsContextLinux osContextBothTiles(*drm, 0u,
EngineDescriptorHelper::getDefaultDescriptor({aub_stream::ENGINE_RCS, EngineUsage::Regular},
PreemptionMode::ThreadGroup, twoTilesBitfield));
osContextBothTiles.ensureContextInitialized();
commandStreamReceiver.setupContext(osContextBothTiles);
MockGraphicsAllocation workPartitionAllocation{};
commandStreamReceiver.workPartitionAllocation = &workPartitionAllocation;
MockDrmDirectSubmission<FamilyType, RenderDispatcher<FamilyType>> drmDirectSubmission(commandStreamReceiver);
commandStreamReceiver.workPartitionAllocation = nullptr;
drmDirectSubmission.completionFenceAllocation = commandStreamReceiver.getTagAllocation();
EXPECT_TRUE(drmDirectSubmission.allocateResources());
auto ringBuffer = static_cast<DrmAllocation *>(drmDirectSubmission.ringBuffer);
auto initialBO = ringBuffer->getBufferObjectToModify(0);
MockBufferObject mockBO(drm);
ringBuffer->getBufferObjectToModify(0) = &mockBO;
for (auto i = 0u; i < 2; i++) {
mockBO.passedExecParams.clear();
EXPECT_TRUE(drmDirectSubmission.submit(gpuAddress, size));
ASSERT_EQ(2u, mockBO.passedExecParams.size());
EXPECT_EQ(completionFenceBaseGpuAddress, mockBO.passedExecParams[0].completionGpuAddress);
EXPECT_EQ(i + 1, mockBO.passedExecParams[0].completionValue);
EXPECT_EQ(completionFenceBaseGpuAddress + commandStreamReceiver.getPostSyncWriteOffset(), mockBO.passedExecParams[1].completionGpuAddress);
EXPECT_EQ(i + 1, mockBO.passedExecParams[1].completionValue);
}
ringBuffer->getBufferObjectToModify(0) = initialBO;
commandStreamReceiver.setupContext(*osContext);
}
HWTEST_F(DrmDirectSubmissionTest, givenDisabledMonitorFenceWhenDispatchSwitchRingBufferThenDispatchPipeControl) {
using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL;
using Dispatcher = RenderDispatcher<FamilyType>;