feature direct submission: use tag allocation as a completion fence

use tag allocation address as a completion address in exec call
wait for completion value before destroying drm direct submission

Related-To: NEO-6643
Signed-off-by: Mateusz Jablonski <mateusz.jablonski@intel.com>
This commit is contained in:
Mateusz Jablonski
2022-04-20 17:32:39 +00:00
committed by Compute-Runtime-Automation
parent 38190c5d17
commit 03185f7111
12 changed files with 408 additions and 5 deletions

View File

@@ -321,7 +321,7 @@ class CommandStreamReceiver {
MOCKABLE_VIRTUAL bool isGpuHangDetected() const; MOCKABLE_VIRTUAL bool isGpuHangDetected() const;
uint64_t getCompletionAddress() { uint64_t getCompletionAddress() const {
uint64_t completionFenceAddress = castToUint64(const_cast<uint32_t *>(getTagAddress())); uint64_t completionFenceAddress = castToUint64(const_cast<uint32_t *>(getTagAddress()));
if (completionFenceAddress == 0) { if (completionFenceAddress == 0) {
return 0; return 0;

View File

@@ -14,6 +14,7 @@ DirectSubmissionInputParams::DirectSubmissionInputParams(const CommandStreamRece
memoryManager = commandStreamReceiver.getMemoryManager(); memoryManager = commandStreamReceiver.getMemoryManager();
globalFenceAllocation = commandStreamReceiver.getGlobalFenceAllocation(); globalFenceAllocation = commandStreamReceiver.getGlobalFenceAllocation();
workPartitionAllocation = commandStreamReceiver.getWorkPartitionAllocation(); workPartitionAllocation = commandStreamReceiver.getWorkPartitionAllocation();
completionFenceAllocation = commandStreamReceiver.getTagAllocation();
} }
} // namespace NEO } // namespace NEO

View File

@@ -64,6 +64,7 @@ struct DirectSubmissionInputParams : NonCopyableClass {
MemoryManager *memoryManager = nullptr; MemoryManager *memoryManager = nullptr;
const GraphicsAllocation *globalFenceAllocation = nullptr; const GraphicsAllocation *globalFenceAllocation = nullptr;
GraphicsAllocation *workPartitionAllocation = nullptr; GraphicsAllocation *workPartitionAllocation = nullptr;
GraphicsAllocation *completionFenceAllocation = nullptr;
const uint32_t rootDeviceIndex; const uint32_t rootDeviceIndex;
}; };
@@ -160,6 +161,7 @@ class DirectSubmissionHw {
MemoryOperationsHandler *memoryOperationHandler = nullptr; MemoryOperationsHandler *memoryOperationHandler = nullptr;
const HardwareInfo *hwInfo = nullptr; const HardwareInfo *hwInfo = nullptr;
const GraphicsAllocation *globalFenceAllocation = nullptr; const GraphicsAllocation *globalFenceAllocation = nullptr;
GraphicsAllocation *completionFenceAllocation = nullptr;
GraphicsAllocation *ringBuffer = nullptr; GraphicsAllocation *ringBuffer = nullptr;
GraphicsAllocation *ringBuffer2 = nullptr; GraphicsAllocation *ringBuffer2 = nullptr;
GraphicsAllocation *semaphores = nullptr; GraphicsAllocation *semaphores = nullptr;

View File

@@ -100,6 +100,10 @@ bool DirectSubmissionHw<GfxFamily, Dispatcher>::allocateResources() {
allocations.push_back(workPartitionAllocation); allocations.push_back(workPartitionAllocation);
} }
if (completionFenceAllocation != nullptr) {
allocations.push_back(completionFenceAllocation);
}
if (DebugManager.flags.DirectSubmissionPrintBuffers.get()) { if (DebugManager.flags.DirectSubmissionPrintBuffers.get()) {
printf("Ring buffer 1 - gpu address: %" PRIx64 " - %" PRIx64 ", cpu address: %p - %p, size: %zu \n", printf("Ring buffer 1 - gpu address: %" PRIx64 " - %" PRIx64 ", cpu address: %p - %p, size: %zu \n",
ringBuffer->getGpuAddress(), ringBuffer->getGpuAddress(),

View File

@@ -35,7 +35,8 @@ class DrmDirectSubmission : public DirectSubmissionHw<GfxFamily, Dispatcher> {
MOCKABLE_VIRTUAL void wait(uint32_t taskCountToWait); MOCKABLE_VIRTUAL void wait(uint32_t taskCountToWait);
TagData currentTagData; TagData currentTagData{};
volatile uint32_t *tagAddress; volatile uint32_t *tagAddress;
uint32_t completionFenceValue{};
}; };
} // namespace NEO } // namespace NEO

View File

@@ -39,12 +39,17 @@ DrmDirectSubmission<GfxFamily, Dispatcher>::DrmDirectSubmission(const DirectSubm
this->partitionedMode = this->activeTiles > 1u; this->partitionedMode = this->activeTiles > 1u;
this->partitionConfigSet = !this->partitionedMode; this->partitionConfigSet = !this->partitionedMode;
osContextLinux->getDrm().setDirectSubmissionActive(true); auto &drm = osContextLinux->getDrm();
drm.setDirectSubmissionActive(true);
if (this->partitionedMode) { if (this->partitionedMode) {
this->workPartitionAllocation = inputParams.workPartitionAllocation; this->workPartitionAllocation = inputParams.workPartitionAllocation;
UNRECOVERABLE_IF(this->workPartitionAllocation == nullptr); UNRECOVERABLE_IF(this->workPartitionAllocation == nullptr);
} }
if (drm.completionFenceSupport()) {
this->completionFenceAllocation = inputParams.completionFenceAllocation;
}
} }
template <typename GfxFamily, typename Dispatcher> template <typename GfxFamily, typename Dispatcher>
@@ -53,6 +58,24 @@ inline DrmDirectSubmission<GfxFamily, Dispatcher>::~DrmDirectSubmission() {
this->stopRingBuffer(); this->stopRingBuffer();
this->wait(static_cast<uint32_t>(this->currentTagData.tagValue)); this->wait(static_cast<uint32_t>(this->currentTagData.tagValue));
} }
if (this->completionFenceAllocation) {
auto osContextLinux = static_cast<OsContextLinux *>(&this->osContext);
auto &drm = osContextLinux->getDrm();
auto &drmContextIds = osContextLinux->getDrmContextIds();
uint32_t drmContextId = 0u;
auto completionFenceCpuAddress = reinterpret_cast<uint64_t>(this->completionFenceAllocation->getUnderlyingBuffer()) + Drm::completionFenceOffset;
for (auto drmIterator = 0u; drmIterator < osContextLinux->getDeviceBitfield().size(); drmIterator++) {
if (osContextLinux->getDeviceBitfield().test(drmIterator)) {
if (*reinterpret_cast<uint32_t *>(completionFenceCpuAddress) < completionFenceValue) {
constexpr int64_t timeout = -1;
constexpr uint16_t flags = 0;
drm.waitUserFence(drmContextIds[drmContextId], completionFenceCpuAddress, completionFenceValue, Drm::ValueWidth::U32, timeout, flags);
}
drmContextId++;
completionFenceCpuAddress = ptrOffset(completionFenceCpuAddress, this->postSyncOffset);
}
}
}
this->deallocateResources(); this->deallocateResources();
} }
@@ -81,6 +104,14 @@ bool DrmDirectSubmission<GfxFamily, Dispatcher>::submit(uint64_t gpuAddress, siz
bool ret = false; bool ret = false;
uint32_t drmContextId = 0u; uint32_t drmContextId = 0u;
uint32_t completionValue = 0u;
uint64_t completionFenceGpuAddress = 0u;
if (this->completionFenceAllocation) {
completionValue = ++completionFenceValue;
completionFenceGpuAddress = this->completionFenceAllocation->getGpuAddress() + Drm::completionFenceOffset;
}
for (auto drmIterator = 0u; drmIterator < osContextLinux->getDeviceBitfield().size(); drmIterator++) { for (auto drmIterator = 0u; drmIterator < osContextLinux->getDeviceBitfield().size(); drmIterator++) {
if (osContextLinux->getDeviceBitfield().test(drmIterator)) { if (osContextLinux->getDeviceBitfield().test(drmIterator)) {
ret |= !!bb->exec(static_cast<uint32_t>(size), ret |= !!bb->exec(static_cast<uint32_t>(size),
@@ -93,9 +124,12 @@ bool DrmDirectSubmission<GfxFamily, Dispatcher>::submit(uint64_t gpuAddress, siz
nullptr, nullptr,
0, 0,
&execObject, &execObject,
0, completionFenceGpuAddress,
0); completionValue);
drmContextId++; drmContextId++;
if (completionFenceGpuAddress) {
completionFenceGpuAddress += this->postSyncOffset;
}
} }
} }

View File

@@ -122,6 +122,7 @@ class UltCommandStreamReceiver : public CommandStreamReceiverHw<GfxFamily>, publ
using BaseClass::CommandStreamReceiver::useNotifyEnableForPostSync; using BaseClass::CommandStreamReceiver::useNotifyEnableForPostSync;
using BaseClass::CommandStreamReceiver::userPauseConfirmation; using BaseClass::CommandStreamReceiver::userPauseConfirmation;
using BaseClass::CommandStreamReceiver::waitForTaskCountAndCleanAllocationList; using BaseClass::CommandStreamReceiver::waitForTaskCountAndCleanAllocationList;
using BaseClass::CommandStreamReceiver::workPartitionAllocation;
UltCommandStreamReceiver(ExecutionEnvironment &executionEnvironment, UltCommandStreamReceiver(ExecutionEnvironment &executionEnvironment,
uint32_t rootDeviceIndex, uint32_t rootDeviceIndex,

View File

@@ -18,8 +18,20 @@ class MockBufferObject : public BufferObject {
using BufferObject::BufferObject; using BufferObject::BufferObject;
using BufferObject::handle; using BufferObject::handle;
struct ExecParams {
uint64_t completionGpuAddress = 0;
uint32_t completionValue = 0;
};
std::vector<ExecParams> passedExecParams{};
MockBufferObject(Drm *drm) : BufferObject(drm, CommonConstants::unsupportedPatIndex, 0, 0, 1) { MockBufferObject(Drm *drm) : BufferObject(drm, CommonConstants::unsupportedPatIndex, 0, 0, 1) {
} }
int exec(uint32_t used, size_t startOffset, unsigned int flags, bool requiresCoherency, OsContext *osContext, uint32_t vmHandleId, uint32_t drmContextId,
BufferObject *const residency[], size_t residencyCount, drm_i915_gem_exec_object2 *execObjectsStorage, uint64_t completionGpuAddress, uint32_t completionValue) override {
passedExecParams.push_back({completionGpuAddress, completionValue});
return BufferObject::exec(used, startOffset, flags, requiresCoherency, osContext, vmHandleId, drmContextId,
residency, residencyCount, execObjectsStorage, completionGpuAddress, completionValue);
}
}; };
class MockDrmAllocation : public DrmAllocation { class MockDrmAllocation : public DrmAllocation {

View File

@@ -17,6 +17,7 @@ struct MockDirectSubmissionHw : public DirectSubmissionHw<GfxFamily, Dispatcher>
using BaseClass = DirectSubmissionHw<GfxFamily, Dispatcher>; using BaseClass = DirectSubmissionHw<GfxFamily, Dispatcher>;
using BaseClass::activeTiles; using BaseClass::activeTiles;
using BaseClass::allocateResources; using BaseClass::allocateResources;
using BaseClass::completionFenceAllocation;
using BaseClass::completionRingBuffers; using BaseClass::completionRingBuffers;
using BaseClass::cpuCachelineFlush; using BaseClass::cpuCachelineFlush;
using BaseClass::currentQueueWorkCount; using BaseClass::currentQueueWorkCount;

View File

@@ -57,6 +57,11 @@ class MockMemoryOperations : public MemoryOperationsHandler {
if (osContext) { if (osContext) {
makeResidentContextId = osContext->getContextId(); makeResidentContextId = osContext->getContextId();
} }
if (captureGfxAllocationsForMakeResident) {
for (auto &gfxAllocation : gfxAllocations) {
gfxAllocationsForMakeResident.push_back(gfxAllocation);
}
}
return MemoryOperationsStatus::SUCCESS; return MemoryOperationsStatus::SUCCESS;
} }
MemoryOperationsStatus evictWithinOsContext(OsContext *osContext, GraphicsAllocation &gfxAllocation) override { MemoryOperationsStatus evictWithinOsContext(OsContext *osContext, GraphicsAllocation &gfxAllocation) override {
@@ -64,9 +69,11 @@ class MockMemoryOperations : public MemoryOperationsHandler {
return MemoryOperationsStatus::SUCCESS; return MemoryOperationsStatus::SUCCESS;
} }
std::vector<GraphicsAllocation *> gfxAllocationsForMakeResident{};
int makeResidentCalledCount = 0; int makeResidentCalledCount = 0;
int evictCalledCount = 0; int evictCalledCount = 0;
uint32_t makeResidentContextId = std::numeric_limits<uint32_t>::max(); uint32_t makeResidentContextId = std::numeric_limits<uint32_t>::max();
bool captureGfxAllocationsForMakeResident = false;
}; };
} // namespace NEO } // namespace NEO

View File

@@ -118,6 +118,54 @@ HWTEST_F(DirectSubmissionTest, givenDirectSubmissionWhenMakingResourcesResidentT
pDevice->getRootDeviceEnvironmentRef().memoryOperationsInterface.release(); pDevice->getRootDeviceEnvironmentRef().memoryOperationsInterface.release();
} }
HWTEST_F(DirectSubmissionTest, givenDirectSubmissionWithoutCompletionFenceAllocationWhenAllocatingResourcesThenMakeResidentIsCalledForRingAndSemaphoreBuffers) {
auto mockMemoryOperations = std::make_unique<MockMemoryOperations>();
mockMemoryOperations->captureGfxAllocationsForMakeResident = true;
pDevice->getRootDeviceEnvironmentRef().memoryOperationsInterface.reset(mockMemoryOperations.get());
MockDirectSubmissionHw<FamilyType, RenderDispatcher<FamilyType>> directSubmission(*pDevice->getDefaultEngine().commandStreamReceiver);
directSubmission.callBaseResident = true;
bool ret = directSubmission.initialize(true, false);
EXPECT_TRUE(ret);
EXPECT_EQ(nullptr, directSubmission.completionFenceAllocation);
EXPECT_EQ(1, mockMemoryOperations->makeResidentCalledCount);
ASSERT_EQ(3u, mockMemoryOperations->gfxAllocationsForMakeResident.size());
EXPECT_EQ(directSubmission.ringBuffer, mockMemoryOperations->gfxAllocationsForMakeResident[0]);
EXPECT_EQ(directSubmission.ringBuffer2, mockMemoryOperations->gfxAllocationsForMakeResident[1]);
EXPECT_EQ(directSubmission.semaphores, mockMemoryOperations->gfxAllocationsForMakeResident[2]);
pDevice->getRootDeviceEnvironmentRef().memoryOperationsInterface.release();
}
HWTEST_F(DirectSubmissionTest, givenDirectSubmissionWithCompletionFenceAllocationWhenAllocatingResourcesThenMakeResidentIsCalledForRingAndSemaphoreBuffersAndCompletionFenceAllocation) {
auto mockMemoryOperations = std::make_unique<MockMemoryOperations>();
mockMemoryOperations->captureGfxAllocationsForMakeResident = true;
pDevice->getRootDeviceEnvironmentRef().memoryOperationsInterface.reset(mockMemoryOperations.get());
MockDirectSubmissionHw<FamilyType, RenderDispatcher<FamilyType>> directSubmission(*pDevice->getDefaultEngine().commandStreamReceiver);
MockGraphicsAllocation completionFenceAllocation{};
directSubmission.completionFenceAllocation = &completionFenceAllocation;
directSubmission.callBaseResident = true;
bool ret = directSubmission.initialize(true, false);
EXPECT_TRUE(ret);
EXPECT_EQ(&completionFenceAllocation, directSubmission.completionFenceAllocation);
EXPECT_EQ(1, mockMemoryOperations->makeResidentCalledCount);
ASSERT_EQ(4u, mockMemoryOperations->gfxAllocationsForMakeResident.size());
EXPECT_EQ(directSubmission.ringBuffer, mockMemoryOperations->gfxAllocationsForMakeResident[0]);
EXPECT_EQ(directSubmission.ringBuffer2, mockMemoryOperations->gfxAllocationsForMakeResident[1]);
EXPECT_EQ(directSubmission.semaphores, mockMemoryOperations->gfxAllocationsForMakeResident[2]);
EXPECT_EQ(directSubmission.completionFenceAllocation, mockMemoryOperations->gfxAllocationsForMakeResident[3]);
pDevice->getRootDeviceEnvironmentRef().memoryOperationsInterface.release();
}
HWTEST_F(DirectSubmissionTest, givenDirectSubmissionInitializedWhenRingIsStartedThenExpectAllocationsCreatedAndCommandsDispatched) { HWTEST_F(DirectSubmissionTest, givenDirectSubmissionInitializedWhenRingIsStartedThenExpectAllocationsCreatedAndCommandsDispatched) {
MockDirectSubmissionHw<FamilyType, RenderDispatcher<FamilyType>> directSubmission(*pDevice->getDefaultEngine().commandStreamReceiver); MockDirectSubmissionHw<FamilyType, RenderDispatcher<FamilyType>> directSubmission(*pDevice->getDefaultEngine().commandStreamReceiver);
EXPECT_TRUE(directSubmission.disableCpuCacheFlush); EXPECT_TRUE(directSubmission.disableCpuCacheFlush);

View File

@@ -17,6 +17,7 @@
#include "shared/test/common/helpers/variable_backup.h" #include "shared/test/common/helpers/variable_backup.h"
#include "shared/test/common/libult/linux/drm_mock.h" #include "shared/test/common/libult/linux/drm_mock.h"
#include "shared/test/common/libult/ult_command_stream_receiver.h" #include "shared/test/common/libult/ult_command_stream_receiver.h"
#include "shared/test/common/mocks/linux/mock_drm_allocation.h"
#include "shared/test/common/mocks/mock_device.h" #include "shared/test/common/mocks/mock_device.h"
#include "shared/test/common/os_interface/linux/drm_memory_manager_tests.h" #include "shared/test/common/os_interface/linux/drm_memory_manager_tests.h"
#include "shared/test/common/test_macros/test.h" #include "shared/test/common/test_macros/test.h"
@@ -57,6 +58,8 @@ struct MockDrmDirectSubmission : public DrmDirectSubmission<GfxFamily, Dispatche
using BaseClass = DrmDirectSubmission<GfxFamily, Dispatcher>; using BaseClass = DrmDirectSubmission<GfxFamily, Dispatcher>;
using BaseClass::activeTiles; using BaseClass::activeTiles;
using BaseClass::allocateResources; using BaseClass::allocateResources;
using BaseClass::completionFenceAllocation;
using BaseClass::completionFenceValue;
using BaseClass::currentTagData; using BaseClass::currentTagData;
using BaseClass::disableMonitorFence; using BaseClass::disableMonitorFence;
using BaseClass::dispatchSwitchRingBufferSection; using BaseClass::dispatchSwitchRingBufferSection;
@@ -70,6 +73,7 @@ struct MockDrmDirectSubmission : public DrmDirectSubmission<GfxFamily, Dispatche
using BaseClass::partitionConfigSet; using BaseClass::partitionConfigSet;
using BaseClass::partitionedMode; using BaseClass::partitionedMode;
using BaseClass::postSyncOffset; using BaseClass::postSyncOffset;
using BaseClass::ringBuffer;
using BaseClass::ringStart; using BaseClass::ringStart;
using BaseClass::submit; using BaseClass::submit;
using BaseClass::switchRingBuffers; using BaseClass::switchRingBuffers;
@@ -116,6 +120,294 @@ HWTEST_F(DrmDirectSubmissionTest, whenCreateDirectSubmissionThenValidObjectIsRet
EXPECT_TRUE(ret); EXPECT_TRUE(ret);
} }
HWTEST_F(DrmDirectSubmissionTest, givenCompletionFenceSupportWhenCreateDrmDirectSubmissionThenTagAllocationIsSetAsCompletionFenceAllocation) {
DebugManagerStateRestore restorer;
DebugManager.flags.EnableDrmCompletionFence.set(1);
auto &commandStreamReceiver = *device->getDefaultEngine().commandStreamReceiver;
auto drm = executionEnvironment.rootDeviceEnvironments[0]->osInterface->getDriverModel()->as<Drm>();
ASSERT_TRUE(drm->completionFenceSupport());
auto expectedCompletionFenceAllocation = commandStreamReceiver.getTagAllocation();
EXPECT_NE(nullptr, expectedCompletionFenceAllocation);
{
MockDrmDirectSubmission<FamilyType, RenderDispatcher<FamilyType>> directSubmission(commandStreamReceiver);
EXPECT_EQ(expectedCompletionFenceAllocation, directSubmission.completionFenceAllocation);
}
{
MockDrmDirectSubmission<FamilyType, BlitterDispatcher<FamilyType>> directSubmission(commandStreamReceiver);
EXPECT_EQ(expectedCompletionFenceAllocation, directSubmission.completionFenceAllocation);
}
}
HWTEST_F(DrmDirectSubmissionTest, givenNoCompletionFenceSupportWhenCreateDrmDirectSubmissionThenCompletionFenceAllocationIsNotSet) {
DebugManagerStateRestore restorer;
DebugManager.flags.EnableDrmCompletionFence.set(0);
auto &commandStreamReceiver = *device->getDefaultEngine().commandStreamReceiver;
auto drm = executionEnvironment.rootDeviceEnvironments[0]->osInterface->getDriverModel()->as<Drm>();
ASSERT_FALSE(drm->completionFenceSupport());
{
MockDrmDirectSubmission<FamilyType, RenderDispatcher<FamilyType>> directSubmission(commandStreamReceiver);
EXPECT_EQ(nullptr, directSubmission.completionFenceAllocation);
}
{
MockDrmDirectSubmission<FamilyType, BlitterDispatcher<FamilyType>> directSubmission(commandStreamReceiver);
EXPECT_EQ(nullptr, directSubmission.completionFenceAllocation);
}
}
HWTEST_F(DrmDirectSubmissionTest, givenDirectSubmissionWithoutCompletionFenceAllocationWhenDestroyingThenNoWaitForUserFenceIsCalled) {
DebugManagerStateRestore restorer;
DebugManager.flags.EnableDrmCompletionFence.set(0);
auto &commandStreamReceiver = *device->getDefaultEngine().commandStreamReceiver;
auto drm = static_cast<DrmMock *>(executionEnvironment.rootDeviceEnvironments[0]->osInterface->getDriverModel()->as<Drm>());
ASSERT_FALSE(drm->completionFenceSupport());
drm->waitUserFenceParams.clear();
{
MockDrmDirectSubmission<FamilyType, RenderDispatcher<FamilyType>> directSubmission(commandStreamReceiver);
directSubmission.completionFenceValue = 10;
}
EXPECT_EQ(0u, drm->waitUserFenceParams.size());
}
HWTEST_F(DrmDirectSubmissionTest, givenCompletionFenceSupportAndFenceIsNotCompletedWhenDestroyingThenWaitForUserFenceIsCalled) {
DebugManagerStateRestore restorer;
DebugManager.flags.EnableDrmCompletionFence.set(1);
auto &commandStreamReceiver = *device->getDefaultEngine().commandStreamReceiver;
auto drm = static_cast<DrmMock *>(executionEnvironment.rootDeviceEnvironments[0]->osInterface->getDriverModel()->as<Drm>());
ASSERT_TRUE(drm->completionFenceSupport());
drm->waitUserFenceParams.clear();
{
MockDrmDirectSubmission<FamilyType, RenderDispatcher<FamilyType>> directSubmission(commandStreamReceiver);
directSubmission.completionFenceValue = 10;
}
EXPECT_EQ(osContext->getDrmContextIds().size(), drm->waitUserFenceParams.size());
}
HWTEST_F(DrmDirectSubmissionTest, givenCompletionFenceSupportAndFenceIsNotCompletedWhenWaitOnSpecificAddressesPerOsContext) {
DebugManagerStateRestore restorer;
DebugManager.flags.EnableDrmCompletionFence.set(1);
auto &commandStreamReceiver = device->getUltCommandStreamReceiver<FamilyType>();
memset(commandStreamReceiver.getTagAllocation()->getUnderlyingBuffer(), 0, commandStreamReceiver.getTagAllocation()->getUnderlyingBufferSize());
auto drm = static_cast<DrmMock *>(executionEnvironment.rootDeviceEnvironments[0]->osInterface->getDriverModel()->as<Drm>());
ASSERT_TRUE(drm->completionFenceSupport());
auto completionFenceBaseCpuAddress = reinterpret_cast<uint64_t>(commandStreamReceiver.getTagAddress()) + Drm::completionFenceOffset;
uint32_t expectedCompletionValueToWait = 10u;
{
DeviceBitfield firstTileBitfield{0b01};
OsContextLinux osContext(*drm, 0u,
EngineDescriptorHelper::getDefaultDescriptor({aub_stream::ENGINE_RCS, EngineUsage::Regular},
PreemptionMode::ThreadGroup, firstTileBitfield));
osContext.ensureContextInitialized();
commandStreamReceiver.setupContext(osContext);
drm->waitUserFenceParams.clear();
{
MockDrmDirectSubmission<FamilyType, RenderDispatcher<FamilyType>> directSubmission(commandStreamReceiver);
directSubmission.completionFenceValue = expectedCompletionValueToWait;
}
EXPECT_EQ(1u, drm->waitUserFenceParams.size());
EXPECT_EQ(expectedCompletionValueToWait, drm->waitUserFenceParams[0].value);
EXPECT_EQ(completionFenceBaseCpuAddress, drm->waitUserFenceParams[0].address);
}
{
DeviceBitfield secondTileBitfield{0b10};
OsContextLinux osContext(*drm, 0u,
EngineDescriptorHelper::getDefaultDescriptor({aub_stream::ENGINE_RCS, EngineUsage::Regular},
PreemptionMode::ThreadGroup, secondTileBitfield));
osContext.ensureContextInitialized();
commandStreamReceiver.setupContext(osContext);
drm->waitUserFenceParams.clear();
{
MockDrmDirectSubmission<FamilyType, RenderDispatcher<FamilyType>> directSubmission(commandStreamReceiver);
directSubmission.completionFenceValue = expectedCompletionValueToWait;
}
EXPECT_EQ(1u, drm->waitUserFenceParams.size());
EXPECT_EQ(expectedCompletionValueToWait, drm->waitUserFenceParams[0].value);
EXPECT_EQ(completionFenceBaseCpuAddress, drm->waitUserFenceParams[0].address);
}
{
DeviceBitfield twoTilesBitfield{0b11};
OsContextLinux osContext(*drm, 0u,
EngineDescriptorHelper::getDefaultDescriptor({aub_stream::ENGINE_RCS, EngineUsage::Regular},
PreemptionMode::ThreadGroup, twoTilesBitfield));
osContext.ensureContextInitialized();
commandStreamReceiver.setupContext(osContext);
drm->waitUserFenceParams.clear();
MockGraphicsAllocation workPartitionAllocation{};
commandStreamReceiver.workPartitionAllocation = &workPartitionAllocation;
{
MockDrmDirectSubmission<FamilyType, RenderDispatcher<FamilyType>> directSubmission(commandStreamReceiver);
directSubmission.completionFenceValue = expectedCompletionValueToWait;
}
commandStreamReceiver.workPartitionAllocation = nullptr;
EXPECT_EQ(2u, drm->waitUserFenceParams.size());
EXPECT_EQ(expectedCompletionValueToWait, drm->waitUserFenceParams[0].value);
EXPECT_EQ(completionFenceBaseCpuAddress, drm->waitUserFenceParams[0].address);
EXPECT_EQ(expectedCompletionValueToWait, drm->waitUserFenceParams[1].value);
EXPECT_EQ(completionFenceBaseCpuAddress + commandStreamReceiver.getPostSyncWriteOffset(), drm->waitUserFenceParams[1].address);
}
commandStreamReceiver.setupContext(*osContext);
}
HWTEST_F(DrmDirectSubmissionTest, givenNoCompletionFenceSupportWhenSubmittingThenNoCompletionAddressIsPassedToExec) {
uint64_t gpuAddress = 0x1000;
size_t size = 0x1000;
MockDrmDirectSubmission<FamilyType, RenderDispatcher<FamilyType>> drmDirectSubmission(*device->getDefaultEngine().commandStreamReceiver);
drmDirectSubmission.completionFenceAllocation = nullptr;
EXPECT_TRUE(drmDirectSubmission.allocateResources());
auto ringBuffer = static_cast<DrmAllocation *>(drmDirectSubmission.ringBuffer);
auto initialBO = ringBuffer->getBufferObjectToModify(0);
auto drm = executionEnvironment.rootDeviceEnvironments[0]->osInterface->getDriverModel()->as<Drm>();
MockBufferObject mockBO(drm);
ringBuffer->getBufferObjectToModify(0) = &mockBO;
for (auto i = 0; i < 2; i++) {
mockBO.passedExecParams.clear();
EXPECT_TRUE(drmDirectSubmission.submit(gpuAddress, size));
ASSERT_EQ(1u, mockBO.passedExecParams.size());
EXPECT_EQ(0u, mockBO.passedExecParams[0].completionGpuAddress);
EXPECT_EQ(0u, mockBO.passedExecParams[0].completionValue);
}
ringBuffer->getBufferObjectToModify(0) = initialBO;
}
HWTEST_F(DrmDirectSubmissionTest, givenTile0AndCompletionFenceSupportWhenSubmittingThenCompletionAddressAndValueArePassedToExec) {
uint64_t gpuAddress = 0x1000;
size_t size = 0x1000;
auto &commandStreamReceiver = *device->getDefaultEngine().commandStreamReceiver;
auto drm = executionEnvironment.rootDeviceEnvironments[0]->osInterface->getDriverModel()->as<Drm>();
auto completionFenceBaseGpuAddress = commandStreamReceiver.getTagAllocation()->getGpuAddress() + Drm::completionFenceOffset;
DeviceBitfield firstTileBitfield{0b01};
OsContextLinux osContextTile0(*drm, 0u,
EngineDescriptorHelper::getDefaultDescriptor({aub_stream::ENGINE_RCS, EngineUsage::Regular},
PreemptionMode::ThreadGroup, firstTileBitfield));
osContextTile0.ensureContextInitialized();
commandStreamReceiver.setupContext(osContextTile0);
MockDrmDirectSubmission<FamilyType, RenderDispatcher<FamilyType>> drmDirectSubmission(commandStreamReceiver);
drmDirectSubmission.completionFenceAllocation = commandStreamReceiver.getTagAllocation();
EXPECT_TRUE(drmDirectSubmission.allocateResources());
auto ringBuffer = static_cast<DrmAllocation *>(drmDirectSubmission.ringBuffer);
auto initialBO = ringBuffer->getBufferObjectToModify(0);
MockBufferObject mockBO(drm);
ringBuffer->getBufferObjectToModify(0) = &mockBO;
for (auto i = 0u; i < 2; i++) {
mockBO.passedExecParams.clear();
EXPECT_TRUE(drmDirectSubmission.submit(gpuAddress, size));
ASSERT_EQ(1u, mockBO.passedExecParams.size());
EXPECT_EQ(completionFenceBaseGpuAddress, mockBO.passedExecParams[0].completionGpuAddress);
EXPECT_EQ(i + 1, mockBO.passedExecParams[0].completionValue);
}
ringBuffer->getBufferObjectToModify(0) = initialBO;
commandStreamReceiver.setupContext(*osContext);
}
HWTEST_F(DrmDirectSubmissionTest, givenTile1AndCompletionFenceSupportWhenSubmittingThenCompletionAddressAndValueArePassedToExec) {
uint64_t gpuAddress = 0x1000;
size_t size = 0x1000;
auto &commandStreamReceiver = *device->getDefaultEngine().commandStreamReceiver;
auto drm = executionEnvironment.rootDeviceEnvironments[0]->osInterface->getDriverModel()->as<Drm>();
auto completionFenceBaseGpuAddress = commandStreamReceiver.getTagAllocation()->getGpuAddress() + Drm::completionFenceOffset;
DeviceBitfield secondTileBitfield{0b10};
OsContextLinux osContextTile1(*drm, 0u,
EngineDescriptorHelper::getDefaultDescriptor({aub_stream::ENGINE_RCS, EngineUsage::Regular},
PreemptionMode::ThreadGroup, secondTileBitfield));
osContextTile1.ensureContextInitialized();
commandStreamReceiver.setupContext(osContextTile1);
MockDrmDirectSubmission<FamilyType, RenderDispatcher<FamilyType>> drmDirectSubmission(commandStreamReceiver);
drmDirectSubmission.completionFenceAllocation = commandStreamReceiver.getTagAllocation();
EXPECT_TRUE(drmDirectSubmission.allocateResources());
auto ringBuffer = static_cast<DrmAllocation *>(drmDirectSubmission.ringBuffer);
auto initialBO = ringBuffer->getBufferObjectToModify(0);
MockBufferObject mockBO(drm);
ringBuffer->getBufferObjectToModify(0) = &mockBO;
for (auto i = 0u; i < 2; i++) {
mockBO.passedExecParams.clear();
EXPECT_TRUE(drmDirectSubmission.submit(gpuAddress, size));
ASSERT_EQ(1u, mockBO.passedExecParams.size());
EXPECT_EQ(completionFenceBaseGpuAddress, mockBO.passedExecParams[0].completionGpuAddress);
EXPECT_EQ(i + 1, mockBO.passedExecParams[0].completionValue);
}
ringBuffer->getBufferObjectToModify(0) = initialBO;
commandStreamReceiver.setupContext(*osContext);
}
HWTEST_F(DrmDirectSubmissionTest, givenTwoTilesAndCompletionFenceSupportWhenSubmittingThenCompletionAddressAndValueArePassedToExec) {
uint64_t gpuAddress = 0x1000;
size_t size = 0x1000;
auto &commandStreamReceiver = device->getUltCommandStreamReceiver<FamilyType>();
auto drm = executionEnvironment.rootDeviceEnvironments[0]->osInterface->getDriverModel()->as<Drm>();
auto completionFenceBaseGpuAddress = commandStreamReceiver.getTagAllocation()->getGpuAddress() + Drm::completionFenceOffset;
DeviceBitfield twoTilesBitfield{0b11};
OsContextLinux osContextBothTiles(*drm, 0u,
EngineDescriptorHelper::getDefaultDescriptor({aub_stream::ENGINE_RCS, EngineUsage::Regular},
PreemptionMode::ThreadGroup, twoTilesBitfield));
osContextBothTiles.ensureContextInitialized();
commandStreamReceiver.setupContext(osContextBothTiles);
MockGraphicsAllocation workPartitionAllocation{};
commandStreamReceiver.workPartitionAllocation = &workPartitionAllocation;
MockDrmDirectSubmission<FamilyType, RenderDispatcher<FamilyType>> drmDirectSubmission(commandStreamReceiver);
commandStreamReceiver.workPartitionAllocation = nullptr;
drmDirectSubmission.completionFenceAllocation = commandStreamReceiver.getTagAllocation();
EXPECT_TRUE(drmDirectSubmission.allocateResources());
auto ringBuffer = static_cast<DrmAllocation *>(drmDirectSubmission.ringBuffer);
auto initialBO = ringBuffer->getBufferObjectToModify(0);
MockBufferObject mockBO(drm);
ringBuffer->getBufferObjectToModify(0) = &mockBO;
for (auto i = 0u; i < 2; i++) {
mockBO.passedExecParams.clear();
EXPECT_TRUE(drmDirectSubmission.submit(gpuAddress, size));
ASSERT_EQ(2u, mockBO.passedExecParams.size());
EXPECT_EQ(completionFenceBaseGpuAddress, mockBO.passedExecParams[0].completionGpuAddress);
EXPECT_EQ(i + 1, mockBO.passedExecParams[0].completionValue);
EXPECT_EQ(completionFenceBaseGpuAddress + commandStreamReceiver.getPostSyncWriteOffset(), mockBO.passedExecParams[1].completionGpuAddress);
EXPECT_EQ(i + 1, mockBO.passedExecParams[1].completionValue);
}
ringBuffer->getBufferObjectToModify(0) = initialBO;
commandStreamReceiver.setupContext(*osContext);
}
HWTEST_F(DrmDirectSubmissionTest, givenDisabledMonitorFenceWhenDispatchSwitchRingBufferThenDispatchPipeControl) { HWTEST_F(DrmDirectSubmissionTest, givenDisabledMonitorFenceWhenDispatchSwitchRingBufferThenDispatchPipeControl) {
using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL;
using Dispatcher = RenderDispatcher<FamilyType>; using Dispatcher = RenderDispatcher<FamilyType>;