mirror of
https://github.com/intel/compute-runtime.git
synced 2025-12-21 09:14:47 +08:00
feature direct submission: use tag allocation as a completion fence
use tag allocation address as a completion address in exec call wait for completion value before destroying drm direct submission Related-To: NEO-6643 Signed-off-by: Mateusz Jablonski <mateusz.jablonski@intel.com>
This commit is contained in:
committed by
Compute-Runtime-Automation
parent
38190c5d17
commit
03185f7111
@@ -321,7 +321,7 @@ class CommandStreamReceiver {
|
|||||||
|
|
||||||
MOCKABLE_VIRTUAL bool isGpuHangDetected() const;
|
MOCKABLE_VIRTUAL bool isGpuHangDetected() const;
|
||||||
|
|
||||||
uint64_t getCompletionAddress() {
|
uint64_t getCompletionAddress() const {
|
||||||
uint64_t completionFenceAddress = castToUint64(const_cast<uint32_t *>(getTagAddress()));
|
uint64_t completionFenceAddress = castToUint64(const_cast<uint32_t *>(getTagAddress()));
|
||||||
if (completionFenceAddress == 0) {
|
if (completionFenceAddress == 0) {
|
||||||
return 0;
|
return 0;
|
||||||
|
|||||||
@@ -14,6 +14,7 @@ DirectSubmissionInputParams::DirectSubmissionInputParams(const CommandStreamRece
|
|||||||
memoryManager = commandStreamReceiver.getMemoryManager();
|
memoryManager = commandStreamReceiver.getMemoryManager();
|
||||||
globalFenceAllocation = commandStreamReceiver.getGlobalFenceAllocation();
|
globalFenceAllocation = commandStreamReceiver.getGlobalFenceAllocation();
|
||||||
workPartitionAllocation = commandStreamReceiver.getWorkPartitionAllocation();
|
workPartitionAllocation = commandStreamReceiver.getWorkPartitionAllocation();
|
||||||
|
completionFenceAllocation = commandStreamReceiver.getTagAllocation();
|
||||||
}
|
}
|
||||||
|
|
||||||
} // namespace NEO
|
} // namespace NEO
|
||||||
|
|||||||
@@ -64,6 +64,7 @@ struct DirectSubmissionInputParams : NonCopyableClass {
|
|||||||
MemoryManager *memoryManager = nullptr;
|
MemoryManager *memoryManager = nullptr;
|
||||||
const GraphicsAllocation *globalFenceAllocation = nullptr;
|
const GraphicsAllocation *globalFenceAllocation = nullptr;
|
||||||
GraphicsAllocation *workPartitionAllocation = nullptr;
|
GraphicsAllocation *workPartitionAllocation = nullptr;
|
||||||
|
GraphicsAllocation *completionFenceAllocation = nullptr;
|
||||||
const uint32_t rootDeviceIndex;
|
const uint32_t rootDeviceIndex;
|
||||||
};
|
};
|
||||||
|
|
||||||
@@ -160,6 +161,7 @@ class DirectSubmissionHw {
|
|||||||
MemoryOperationsHandler *memoryOperationHandler = nullptr;
|
MemoryOperationsHandler *memoryOperationHandler = nullptr;
|
||||||
const HardwareInfo *hwInfo = nullptr;
|
const HardwareInfo *hwInfo = nullptr;
|
||||||
const GraphicsAllocation *globalFenceAllocation = nullptr;
|
const GraphicsAllocation *globalFenceAllocation = nullptr;
|
||||||
|
GraphicsAllocation *completionFenceAllocation = nullptr;
|
||||||
GraphicsAllocation *ringBuffer = nullptr;
|
GraphicsAllocation *ringBuffer = nullptr;
|
||||||
GraphicsAllocation *ringBuffer2 = nullptr;
|
GraphicsAllocation *ringBuffer2 = nullptr;
|
||||||
GraphicsAllocation *semaphores = nullptr;
|
GraphicsAllocation *semaphores = nullptr;
|
||||||
|
|||||||
@@ -100,6 +100,10 @@ bool DirectSubmissionHw<GfxFamily, Dispatcher>::allocateResources() {
|
|||||||
allocations.push_back(workPartitionAllocation);
|
allocations.push_back(workPartitionAllocation);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (completionFenceAllocation != nullptr) {
|
||||||
|
allocations.push_back(completionFenceAllocation);
|
||||||
|
}
|
||||||
|
|
||||||
if (DebugManager.flags.DirectSubmissionPrintBuffers.get()) {
|
if (DebugManager.flags.DirectSubmissionPrintBuffers.get()) {
|
||||||
printf("Ring buffer 1 - gpu address: %" PRIx64 " - %" PRIx64 ", cpu address: %p - %p, size: %zu \n",
|
printf("Ring buffer 1 - gpu address: %" PRIx64 " - %" PRIx64 ", cpu address: %p - %p, size: %zu \n",
|
||||||
ringBuffer->getGpuAddress(),
|
ringBuffer->getGpuAddress(),
|
||||||
|
|||||||
@@ -35,7 +35,8 @@ class DrmDirectSubmission : public DirectSubmissionHw<GfxFamily, Dispatcher> {
|
|||||||
|
|
||||||
MOCKABLE_VIRTUAL void wait(uint32_t taskCountToWait);
|
MOCKABLE_VIRTUAL void wait(uint32_t taskCountToWait);
|
||||||
|
|
||||||
TagData currentTagData;
|
TagData currentTagData{};
|
||||||
volatile uint32_t *tagAddress;
|
volatile uint32_t *tagAddress;
|
||||||
|
uint32_t completionFenceValue{};
|
||||||
};
|
};
|
||||||
} // namespace NEO
|
} // namespace NEO
|
||||||
|
|||||||
@@ -39,12 +39,17 @@ DrmDirectSubmission<GfxFamily, Dispatcher>::DrmDirectSubmission(const DirectSubm
|
|||||||
this->partitionedMode = this->activeTiles > 1u;
|
this->partitionedMode = this->activeTiles > 1u;
|
||||||
this->partitionConfigSet = !this->partitionedMode;
|
this->partitionConfigSet = !this->partitionedMode;
|
||||||
|
|
||||||
osContextLinux->getDrm().setDirectSubmissionActive(true);
|
auto &drm = osContextLinux->getDrm();
|
||||||
|
drm.setDirectSubmissionActive(true);
|
||||||
|
|
||||||
if (this->partitionedMode) {
|
if (this->partitionedMode) {
|
||||||
this->workPartitionAllocation = inputParams.workPartitionAllocation;
|
this->workPartitionAllocation = inputParams.workPartitionAllocation;
|
||||||
UNRECOVERABLE_IF(this->workPartitionAllocation == nullptr);
|
UNRECOVERABLE_IF(this->workPartitionAllocation == nullptr);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (drm.completionFenceSupport()) {
|
||||||
|
this->completionFenceAllocation = inputParams.completionFenceAllocation;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename GfxFamily, typename Dispatcher>
|
template <typename GfxFamily, typename Dispatcher>
|
||||||
@@ -53,6 +58,24 @@ inline DrmDirectSubmission<GfxFamily, Dispatcher>::~DrmDirectSubmission() {
|
|||||||
this->stopRingBuffer();
|
this->stopRingBuffer();
|
||||||
this->wait(static_cast<uint32_t>(this->currentTagData.tagValue));
|
this->wait(static_cast<uint32_t>(this->currentTagData.tagValue));
|
||||||
}
|
}
|
||||||
|
if (this->completionFenceAllocation) {
|
||||||
|
auto osContextLinux = static_cast<OsContextLinux *>(&this->osContext);
|
||||||
|
auto &drm = osContextLinux->getDrm();
|
||||||
|
auto &drmContextIds = osContextLinux->getDrmContextIds();
|
||||||
|
uint32_t drmContextId = 0u;
|
||||||
|
auto completionFenceCpuAddress = reinterpret_cast<uint64_t>(this->completionFenceAllocation->getUnderlyingBuffer()) + Drm::completionFenceOffset;
|
||||||
|
for (auto drmIterator = 0u; drmIterator < osContextLinux->getDeviceBitfield().size(); drmIterator++) {
|
||||||
|
if (osContextLinux->getDeviceBitfield().test(drmIterator)) {
|
||||||
|
if (*reinterpret_cast<uint32_t *>(completionFenceCpuAddress) < completionFenceValue) {
|
||||||
|
constexpr int64_t timeout = -1;
|
||||||
|
constexpr uint16_t flags = 0;
|
||||||
|
drm.waitUserFence(drmContextIds[drmContextId], completionFenceCpuAddress, completionFenceValue, Drm::ValueWidth::U32, timeout, flags);
|
||||||
|
}
|
||||||
|
drmContextId++;
|
||||||
|
completionFenceCpuAddress = ptrOffset(completionFenceCpuAddress, this->postSyncOffset);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
this->deallocateResources();
|
this->deallocateResources();
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -81,6 +104,14 @@ bool DrmDirectSubmission<GfxFamily, Dispatcher>::submit(uint64_t gpuAddress, siz
|
|||||||
|
|
||||||
bool ret = false;
|
bool ret = false;
|
||||||
uint32_t drmContextId = 0u;
|
uint32_t drmContextId = 0u;
|
||||||
|
|
||||||
|
uint32_t completionValue = 0u;
|
||||||
|
uint64_t completionFenceGpuAddress = 0u;
|
||||||
|
if (this->completionFenceAllocation) {
|
||||||
|
completionValue = ++completionFenceValue;
|
||||||
|
completionFenceGpuAddress = this->completionFenceAllocation->getGpuAddress() + Drm::completionFenceOffset;
|
||||||
|
}
|
||||||
|
|
||||||
for (auto drmIterator = 0u; drmIterator < osContextLinux->getDeviceBitfield().size(); drmIterator++) {
|
for (auto drmIterator = 0u; drmIterator < osContextLinux->getDeviceBitfield().size(); drmIterator++) {
|
||||||
if (osContextLinux->getDeviceBitfield().test(drmIterator)) {
|
if (osContextLinux->getDeviceBitfield().test(drmIterator)) {
|
||||||
ret |= !!bb->exec(static_cast<uint32_t>(size),
|
ret |= !!bb->exec(static_cast<uint32_t>(size),
|
||||||
@@ -93,9 +124,12 @@ bool DrmDirectSubmission<GfxFamily, Dispatcher>::submit(uint64_t gpuAddress, siz
|
|||||||
nullptr,
|
nullptr,
|
||||||
0,
|
0,
|
||||||
&execObject,
|
&execObject,
|
||||||
0,
|
completionFenceGpuAddress,
|
||||||
0);
|
completionValue);
|
||||||
drmContextId++;
|
drmContextId++;
|
||||||
|
if (completionFenceGpuAddress) {
|
||||||
|
completionFenceGpuAddress += this->postSyncOffset;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -122,6 +122,7 @@ class UltCommandStreamReceiver : public CommandStreamReceiverHw<GfxFamily>, publ
|
|||||||
using BaseClass::CommandStreamReceiver::useNotifyEnableForPostSync;
|
using BaseClass::CommandStreamReceiver::useNotifyEnableForPostSync;
|
||||||
using BaseClass::CommandStreamReceiver::userPauseConfirmation;
|
using BaseClass::CommandStreamReceiver::userPauseConfirmation;
|
||||||
using BaseClass::CommandStreamReceiver::waitForTaskCountAndCleanAllocationList;
|
using BaseClass::CommandStreamReceiver::waitForTaskCountAndCleanAllocationList;
|
||||||
|
using BaseClass::CommandStreamReceiver::workPartitionAllocation;
|
||||||
|
|
||||||
UltCommandStreamReceiver(ExecutionEnvironment &executionEnvironment,
|
UltCommandStreamReceiver(ExecutionEnvironment &executionEnvironment,
|
||||||
uint32_t rootDeviceIndex,
|
uint32_t rootDeviceIndex,
|
||||||
|
|||||||
@@ -18,8 +18,20 @@ class MockBufferObject : public BufferObject {
|
|||||||
using BufferObject::BufferObject;
|
using BufferObject::BufferObject;
|
||||||
using BufferObject::handle;
|
using BufferObject::handle;
|
||||||
|
|
||||||
|
struct ExecParams {
|
||||||
|
uint64_t completionGpuAddress = 0;
|
||||||
|
uint32_t completionValue = 0;
|
||||||
|
};
|
||||||
|
|
||||||
|
std::vector<ExecParams> passedExecParams{};
|
||||||
MockBufferObject(Drm *drm) : BufferObject(drm, CommonConstants::unsupportedPatIndex, 0, 0, 1) {
|
MockBufferObject(Drm *drm) : BufferObject(drm, CommonConstants::unsupportedPatIndex, 0, 0, 1) {
|
||||||
}
|
}
|
||||||
|
int exec(uint32_t used, size_t startOffset, unsigned int flags, bool requiresCoherency, OsContext *osContext, uint32_t vmHandleId, uint32_t drmContextId,
|
||||||
|
BufferObject *const residency[], size_t residencyCount, drm_i915_gem_exec_object2 *execObjectsStorage, uint64_t completionGpuAddress, uint32_t completionValue) override {
|
||||||
|
passedExecParams.push_back({completionGpuAddress, completionValue});
|
||||||
|
return BufferObject::exec(used, startOffset, flags, requiresCoherency, osContext, vmHandleId, drmContextId,
|
||||||
|
residency, residencyCount, execObjectsStorage, completionGpuAddress, completionValue);
|
||||||
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
class MockDrmAllocation : public DrmAllocation {
|
class MockDrmAllocation : public DrmAllocation {
|
||||||
|
|||||||
@@ -17,6 +17,7 @@ struct MockDirectSubmissionHw : public DirectSubmissionHw<GfxFamily, Dispatcher>
|
|||||||
using BaseClass = DirectSubmissionHw<GfxFamily, Dispatcher>;
|
using BaseClass = DirectSubmissionHw<GfxFamily, Dispatcher>;
|
||||||
using BaseClass::activeTiles;
|
using BaseClass::activeTiles;
|
||||||
using BaseClass::allocateResources;
|
using BaseClass::allocateResources;
|
||||||
|
using BaseClass::completionFenceAllocation;
|
||||||
using BaseClass::completionRingBuffers;
|
using BaseClass::completionRingBuffers;
|
||||||
using BaseClass::cpuCachelineFlush;
|
using BaseClass::cpuCachelineFlush;
|
||||||
using BaseClass::currentQueueWorkCount;
|
using BaseClass::currentQueueWorkCount;
|
||||||
|
|||||||
@@ -57,6 +57,11 @@ class MockMemoryOperations : public MemoryOperationsHandler {
|
|||||||
if (osContext) {
|
if (osContext) {
|
||||||
makeResidentContextId = osContext->getContextId();
|
makeResidentContextId = osContext->getContextId();
|
||||||
}
|
}
|
||||||
|
if (captureGfxAllocationsForMakeResident) {
|
||||||
|
for (auto &gfxAllocation : gfxAllocations) {
|
||||||
|
gfxAllocationsForMakeResident.push_back(gfxAllocation);
|
||||||
|
}
|
||||||
|
}
|
||||||
return MemoryOperationsStatus::SUCCESS;
|
return MemoryOperationsStatus::SUCCESS;
|
||||||
}
|
}
|
||||||
MemoryOperationsStatus evictWithinOsContext(OsContext *osContext, GraphicsAllocation &gfxAllocation) override {
|
MemoryOperationsStatus evictWithinOsContext(OsContext *osContext, GraphicsAllocation &gfxAllocation) override {
|
||||||
@@ -64,9 +69,11 @@ class MockMemoryOperations : public MemoryOperationsHandler {
|
|||||||
return MemoryOperationsStatus::SUCCESS;
|
return MemoryOperationsStatus::SUCCESS;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
std::vector<GraphicsAllocation *> gfxAllocationsForMakeResident{};
|
||||||
int makeResidentCalledCount = 0;
|
int makeResidentCalledCount = 0;
|
||||||
int evictCalledCount = 0;
|
int evictCalledCount = 0;
|
||||||
uint32_t makeResidentContextId = std::numeric_limits<uint32_t>::max();
|
uint32_t makeResidentContextId = std::numeric_limits<uint32_t>::max();
|
||||||
|
bool captureGfxAllocationsForMakeResident = false;
|
||||||
};
|
};
|
||||||
|
|
||||||
} // namespace NEO
|
} // namespace NEO
|
||||||
|
|||||||
@@ -118,6 +118,54 @@ HWTEST_F(DirectSubmissionTest, givenDirectSubmissionWhenMakingResourcesResidentT
|
|||||||
pDevice->getRootDeviceEnvironmentRef().memoryOperationsInterface.release();
|
pDevice->getRootDeviceEnvironmentRef().memoryOperationsInterface.release();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
HWTEST_F(DirectSubmissionTest, givenDirectSubmissionWithoutCompletionFenceAllocationWhenAllocatingResourcesThenMakeResidentIsCalledForRingAndSemaphoreBuffers) {
|
||||||
|
auto mockMemoryOperations = std::make_unique<MockMemoryOperations>();
|
||||||
|
mockMemoryOperations->captureGfxAllocationsForMakeResident = true;
|
||||||
|
pDevice->getRootDeviceEnvironmentRef().memoryOperationsInterface.reset(mockMemoryOperations.get());
|
||||||
|
|
||||||
|
MockDirectSubmissionHw<FamilyType, RenderDispatcher<FamilyType>> directSubmission(*pDevice->getDefaultEngine().commandStreamReceiver);
|
||||||
|
|
||||||
|
directSubmission.callBaseResident = true;
|
||||||
|
bool ret = directSubmission.initialize(true, false);
|
||||||
|
EXPECT_TRUE(ret);
|
||||||
|
EXPECT_EQ(nullptr, directSubmission.completionFenceAllocation);
|
||||||
|
|
||||||
|
EXPECT_EQ(1, mockMemoryOperations->makeResidentCalledCount);
|
||||||
|
ASSERT_EQ(3u, mockMemoryOperations->gfxAllocationsForMakeResident.size());
|
||||||
|
EXPECT_EQ(directSubmission.ringBuffer, mockMemoryOperations->gfxAllocationsForMakeResident[0]);
|
||||||
|
EXPECT_EQ(directSubmission.ringBuffer2, mockMemoryOperations->gfxAllocationsForMakeResident[1]);
|
||||||
|
EXPECT_EQ(directSubmission.semaphores, mockMemoryOperations->gfxAllocationsForMakeResident[2]);
|
||||||
|
|
||||||
|
pDevice->getRootDeviceEnvironmentRef().memoryOperationsInterface.release();
|
||||||
|
}
|
||||||
|
|
||||||
|
HWTEST_F(DirectSubmissionTest, givenDirectSubmissionWithCompletionFenceAllocationWhenAllocatingResourcesThenMakeResidentIsCalledForRingAndSemaphoreBuffersAndCompletionFenceAllocation) {
|
||||||
|
auto mockMemoryOperations = std::make_unique<MockMemoryOperations>();
|
||||||
|
mockMemoryOperations->captureGfxAllocationsForMakeResident = true;
|
||||||
|
|
||||||
|
pDevice->getRootDeviceEnvironmentRef().memoryOperationsInterface.reset(mockMemoryOperations.get());
|
||||||
|
|
||||||
|
MockDirectSubmissionHw<FamilyType, RenderDispatcher<FamilyType>> directSubmission(*pDevice->getDefaultEngine().commandStreamReceiver);
|
||||||
|
|
||||||
|
MockGraphicsAllocation completionFenceAllocation{};
|
||||||
|
|
||||||
|
directSubmission.completionFenceAllocation = &completionFenceAllocation;
|
||||||
|
|
||||||
|
directSubmission.callBaseResident = true;
|
||||||
|
bool ret = directSubmission.initialize(true, false);
|
||||||
|
EXPECT_TRUE(ret);
|
||||||
|
EXPECT_EQ(&completionFenceAllocation, directSubmission.completionFenceAllocation);
|
||||||
|
|
||||||
|
EXPECT_EQ(1, mockMemoryOperations->makeResidentCalledCount);
|
||||||
|
ASSERT_EQ(4u, mockMemoryOperations->gfxAllocationsForMakeResident.size());
|
||||||
|
EXPECT_EQ(directSubmission.ringBuffer, mockMemoryOperations->gfxAllocationsForMakeResident[0]);
|
||||||
|
EXPECT_EQ(directSubmission.ringBuffer2, mockMemoryOperations->gfxAllocationsForMakeResident[1]);
|
||||||
|
EXPECT_EQ(directSubmission.semaphores, mockMemoryOperations->gfxAllocationsForMakeResident[2]);
|
||||||
|
EXPECT_EQ(directSubmission.completionFenceAllocation, mockMemoryOperations->gfxAllocationsForMakeResident[3]);
|
||||||
|
|
||||||
|
pDevice->getRootDeviceEnvironmentRef().memoryOperationsInterface.release();
|
||||||
|
}
|
||||||
|
|
||||||
HWTEST_F(DirectSubmissionTest, givenDirectSubmissionInitializedWhenRingIsStartedThenExpectAllocationsCreatedAndCommandsDispatched) {
|
HWTEST_F(DirectSubmissionTest, givenDirectSubmissionInitializedWhenRingIsStartedThenExpectAllocationsCreatedAndCommandsDispatched) {
|
||||||
MockDirectSubmissionHw<FamilyType, RenderDispatcher<FamilyType>> directSubmission(*pDevice->getDefaultEngine().commandStreamReceiver);
|
MockDirectSubmissionHw<FamilyType, RenderDispatcher<FamilyType>> directSubmission(*pDevice->getDefaultEngine().commandStreamReceiver);
|
||||||
EXPECT_TRUE(directSubmission.disableCpuCacheFlush);
|
EXPECT_TRUE(directSubmission.disableCpuCacheFlush);
|
||||||
|
|||||||
@@ -17,6 +17,7 @@
|
|||||||
#include "shared/test/common/helpers/variable_backup.h"
|
#include "shared/test/common/helpers/variable_backup.h"
|
||||||
#include "shared/test/common/libult/linux/drm_mock.h"
|
#include "shared/test/common/libult/linux/drm_mock.h"
|
||||||
#include "shared/test/common/libult/ult_command_stream_receiver.h"
|
#include "shared/test/common/libult/ult_command_stream_receiver.h"
|
||||||
|
#include "shared/test/common/mocks/linux/mock_drm_allocation.h"
|
||||||
#include "shared/test/common/mocks/mock_device.h"
|
#include "shared/test/common/mocks/mock_device.h"
|
||||||
#include "shared/test/common/os_interface/linux/drm_memory_manager_tests.h"
|
#include "shared/test/common/os_interface/linux/drm_memory_manager_tests.h"
|
||||||
#include "shared/test/common/test_macros/test.h"
|
#include "shared/test/common/test_macros/test.h"
|
||||||
@@ -57,6 +58,8 @@ struct MockDrmDirectSubmission : public DrmDirectSubmission<GfxFamily, Dispatche
|
|||||||
using BaseClass = DrmDirectSubmission<GfxFamily, Dispatcher>;
|
using BaseClass = DrmDirectSubmission<GfxFamily, Dispatcher>;
|
||||||
using BaseClass::activeTiles;
|
using BaseClass::activeTiles;
|
||||||
using BaseClass::allocateResources;
|
using BaseClass::allocateResources;
|
||||||
|
using BaseClass::completionFenceAllocation;
|
||||||
|
using BaseClass::completionFenceValue;
|
||||||
using BaseClass::currentTagData;
|
using BaseClass::currentTagData;
|
||||||
using BaseClass::disableMonitorFence;
|
using BaseClass::disableMonitorFence;
|
||||||
using BaseClass::dispatchSwitchRingBufferSection;
|
using BaseClass::dispatchSwitchRingBufferSection;
|
||||||
@@ -70,6 +73,7 @@ struct MockDrmDirectSubmission : public DrmDirectSubmission<GfxFamily, Dispatche
|
|||||||
using BaseClass::partitionConfigSet;
|
using BaseClass::partitionConfigSet;
|
||||||
using BaseClass::partitionedMode;
|
using BaseClass::partitionedMode;
|
||||||
using BaseClass::postSyncOffset;
|
using BaseClass::postSyncOffset;
|
||||||
|
using BaseClass::ringBuffer;
|
||||||
using BaseClass::ringStart;
|
using BaseClass::ringStart;
|
||||||
using BaseClass::submit;
|
using BaseClass::submit;
|
||||||
using BaseClass::switchRingBuffers;
|
using BaseClass::switchRingBuffers;
|
||||||
@@ -116,6 +120,294 @@ HWTEST_F(DrmDirectSubmissionTest, whenCreateDirectSubmissionThenValidObjectIsRet
|
|||||||
EXPECT_TRUE(ret);
|
EXPECT_TRUE(ret);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
HWTEST_F(DrmDirectSubmissionTest, givenCompletionFenceSupportWhenCreateDrmDirectSubmissionThenTagAllocationIsSetAsCompletionFenceAllocation) {
|
||||||
|
DebugManagerStateRestore restorer;
|
||||||
|
DebugManager.flags.EnableDrmCompletionFence.set(1);
|
||||||
|
auto &commandStreamReceiver = *device->getDefaultEngine().commandStreamReceiver;
|
||||||
|
auto drm = executionEnvironment.rootDeviceEnvironments[0]->osInterface->getDriverModel()->as<Drm>();
|
||||||
|
|
||||||
|
ASSERT_TRUE(drm->completionFenceSupport());
|
||||||
|
|
||||||
|
auto expectedCompletionFenceAllocation = commandStreamReceiver.getTagAllocation();
|
||||||
|
EXPECT_NE(nullptr, expectedCompletionFenceAllocation);
|
||||||
|
{
|
||||||
|
MockDrmDirectSubmission<FamilyType, RenderDispatcher<FamilyType>> directSubmission(commandStreamReceiver);
|
||||||
|
EXPECT_EQ(expectedCompletionFenceAllocation, directSubmission.completionFenceAllocation);
|
||||||
|
}
|
||||||
|
{
|
||||||
|
MockDrmDirectSubmission<FamilyType, BlitterDispatcher<FamilyType>> directSubmission(commandStreamReceiver);
|
||||||
|
EXPECT_EQ(expectedCompletionFenceAllocation, directSubmission.completionFenceAllocation);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
HWTEST_F(DrmDirectSubmissionTest, givenNoCompletionFenceSupportWhenCreateDrmDirectSubmissionThenCompletionFenceAllocationIsNotSet) {
|
||||||
|
DebugManagerStateRestore restorer;
|
||||||
|
DebugManager.flags.EnableDrmCompletionFence.set(0);
|
||||||
|
auto &commandStreamReceiver = *device->getDefaultEngine().commandStreamReceiver;
|
||||||
|
auto drm = executionEnvironment.rootDeviceEnvironments[0]->osInterface->getDriverModel()->as<Drm>();
|
||||||
|
|
||||||
|
ASSERT_FALSE(drm->completionFenceSupport());
|
||||||
|
{
|
||||||
|
MockDrmDirectSubmission<FamilyType, RenderDispatcher<FamilyType>> directSubmission(commandStreamReceiver);
|
||||||
|
EXPECT_EQ(nullptr, directSubmission.completionFenceAllocation);
|
||||||
|
}
|
||||||
|
{
|
||||||
|
MockDrmDirectSubmission<FamilyType, BlitterDispatcher<FamilyType>> directSubmission(commandStreamReceiver);
|
||||||
|
EXPECT_EQ(nullptr, directSubmission.completionFenceAllocation);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
HWTEST_F(DrmDirectSubmissionTest, givenDirectSubmissionWithoutCompletionFenceAllocationWhenDestroyingThenNoWaitForUserFenceIsCalled) {
|
||||||
|
DebugManagerStateRestore restorer;
|
||||||
|
DebugManager.flags.EnableDrmCompletionFence.set(0);
|
||||||
|
auto &commandStreamReceiver = *device->getDefaultEngine().commandStreamReceiver;
|
||||||
|
auto drm = static_cast<DrmMock *>(executionEnvironment.rootDeviceEnvironments[0]->osInterface->getDriverModel()->as<Drm>());
|
||||||
|
|
||||||
|
ASSERT_FALSE(drm->completionFenceSupport());
|
||||||
|
|
||||||
|
drm->waitUserFenceParams.clear();
|
||||||
|
{
|
||||||
|
MockDrmDirectSubmission<FamilyType, RenderDispatcher<FamilyType>> directSubmission(commandStreamReceiver);
|
||||||
|
directSubmission.completionFenceValue = 10;
|
||||||
|
}
|
||||||
|
|
||||||
|
EXPECT_EQ(0u, drm->waitUserFenceParams.size());
|
||||||
|
}
|
||||||
|
|
||||||
|
HWTEST_F(DrmDirectSubmissionTest, givenCompletionFenceSupportAndFenceIsNotCompletedWhenDestroyingThenWaitForUserFenceIsCalled) {
|
||||||
|
DebugManagerStateRestore restorer;
|
||||||
|
DebugManager.flags.EnableDrmCompletionFence.set(1);
|
||||||
|
|
||||||
|
auto &commandStreamReceiver = *device->getDefaultEngine().commandStreamReceiver;
|
||||||
|
auto drm = static_cast<DrmMock *>(executionEnvironment.rootDeviceEnvironments[0]->osInterface->getDriverModel()->as<Drm>());
|
||||||
|
|
||||||
|
ASSERT_TRUE(drm->completionFenceSupport());
|
||||||
|
|
||||||
|
drm->waitUserFenceParams.clear();
|
||||||
|
{
|
||||||
|
MockDrmDirectSubmission<FamilyType, RenderDispatcher<FamilyType>> directSubmission(commandStreamReceiver);
|
||||||
|
directSubmission.completionFenceValue = 10;
|
||||||
|
}
|
||||||
|
|
||||||
|
EXPECT_EQ(osContext->getDrmContextIds().size(), drm->waitUserFenceParams.size());
|
||||||
|
}
|
||||||
|
|
||||||
|
HWTEST_F(DrmDirectSubmissionTest, givenCompletionFenceSupportAndFenceIsNotCompletedWhenWaitOnSpecificAddressesPerOsContext) {
|
||||||
|
DebugManagerStateRestore restorer;
|
||||||
|
DebugManager.flags.EnableDrmCompletionFence.set(1);
|
||||||
|
|
||||||
|
auto &commandStreamReceiver = device->getUltCommandStreamReceiver<FamilyType>();
|
||||||
|
memset(commandStreamReceiver.getTagAllocation()->getUnderlyingBuffer(), 0, commandStreamReceiver.getTagAllocation()->getUnderlyingBufferSize());
|
||||||
|
auto drm = static_cast<DrmMock *>(executionEnvironment.rootDeviceEnvironments[0]->osInterface->getDriverModel()->as<Drm>());
|
||||||
|
|
||||||
|
ASSERT_TRUE(drm->completionFenceSupport());
|
||||||
|
auto completionFenceBaseCpuAddress = reinterpret_cast<uint64_t>(commandStreamReceiver.getTagAddress()) + Drm::completionFenceOffset;
|
||||||
|
uint32_t expectedCompletionValueToWait = 10u;
|
||||||
|
|
||||||
|
{
|
||||||
|
DeviceBitfield firstTileBitfield{0b01};
|
||||||
|
OsContextLinux osContext(*drm, 0u,
|
||||||
|
EngineDescriptorHelper::getDefaultDescriptor({aub_stream::ENGINE_RCS, EngineUsage::Regular},
|
||||||
|
PreemptionMode::ThreadGroup, firstTileBitfield));
|
||||||
|
osContext.ensureContextInitialized();
|
||||||
|
commandStreamReceiver.setupContext(osContext);
|
||||||
|
drm->waitUserFenceParams.clear();
|
||||||
|
{
|
||||||
|
MockDrmDirectSubmission<FamilyType, RenderDispatcher<FamilyType>> directSubmission(commandStreamReceiver);
|
||||||
|
directSubmission.completionFenceValue = expectedCompletionValueToWait;
|
||||||
|
}
|
||||||
|
EXPECT_EQ(1u, drm->waitUserFenceParams.size());
|
||||||
|
EXPECT_EQ(expectedCompletionValueToWait, drm->waitUserFenceParams[0].value);
|
||||||
|
EXPECT_EQ(completionFenceBaseCpuAddress, drm->waitUserFenceParams[0].address);
|
||||||
|
}
|
||||||
|
{
|
||||||
|
DeviceBitfield secondTileBitfield{0b10};
|
||||||
|
OsContextLinux osContext(*drm, 0u,
|
||||||
|
EngineDescriptorHelper::getDefaultDescriptor({aub_stream::ENGINE_RCS, EngineUsage::Regular},
|
||||||
|
PreemptionMode::ThreadGroup, secondTileBitfield));
|
||||||
|
osContext.ensureContextInitialized();
|
||||||
|
commandStreamReceiver.setupContext(osContext);
|
||||||
|
drm->waitUserFenceParams.clear();
|
||||||
|
{
|
||||||
|
MockDrmDirectSubmission<FamilyType, RenderDispatcher<FamilyType>> directSubmission(commandStreamReceiver);
|
||||||
|
directSubmission.completionFenceValue = expectedCompletionValueToWait;
|
||||||
|
}
|
||||||
|
EXPECT_EQ(1u, drm->waitUserFenceParams.size());
|
||||||
|
EXPECT_EQ(expectedCompletionValueToWait, drm->waitUserFenceParams[0].value);
|
||||||
|
EXPECT_EQ(completionFenceBaseCpuAddress, drm->waitUserFenceParams[0].address);
|
||||||
|
}
|
||||||
|
|
||||||
|
{
|
||||||
|
DeviceBitfield twoTilesBitfield{0b11};
|
||||||
|
OsContextLinux osContext(*drm, 0u,
|
||||||
|
EngineDescriptorHelper::getDefaultDescriptor({aub_stream::ENGINE_RCS, EngineUsage::Regular},
|
||||||
|
PreemptionMode::ThreadGroup, twoTilesBitfield));
|
||||||
|
osContext.ensureContextInitialized();
|
||||||
|
commandStreamReceiver.setupContext(osContext);
|
||||||
|
drm->waitUserFenceParams.clear();
|
||||||
|
MockGraphicsAllocation workPartitionAllocation{};
|
||||||
|
commandStreamReceiver.workPartitionAllocation = &workPartitionAllocation;
|
||||||
|
{
|
||||||
|
MockDrmDirectSubmission<FamilyType, RenderDispatcher<FamilyType>> directSubmission(commandStreamReceiver);
|
||||||
|
directSubmission.completionFenceValue = expectedCompletionValueToWait;
|
||||||
|
}
|
||||||
|
commandStreamReceiver.workPartitionAllocation = nullptr;
|
||||||
|
|
||||||
|
EXPECT_EQ(2u, drm->waitUserFenceParams.size());
|
||||||
|
EXPECT_EQ(expectedCompletionValueToWait, drm->waitUserFenceParams[0].value);
|
||||||
|
EXPECT_EQ(completionFenceBaseCpuAddress, drm->waitUserFenceParams[0].address);
|
||||||
|
|
||||||
|
EXPECT_EQ(expectedCompletionValueToWait, drm->waitUserFenceParams[1].value);
|
||||||
|
EXPECT_EQ(completionFenceBaseCpuAddress + commandStreamReceiver.getPostSyncWriteOffset(), drm->waitUserFenceParams[1].address);
|
||||||
|
}
|
||||||
|
commandStreamReceiver.setupContext(*osContext);
|
||||||
|
}
|
||||||
|
|
||||||
|
HWTEST_F(DrmDirectSubmissionTest, givenNoCompletionFenceSupportWhenSubmittingThenNoCompletionAddressIsPassedToExec) {
|
||||||
|
uint64_t gpuAddress = 0x1000;
|
||||||
|
size_t size = 0x1000;
|
||||||
|
|
||||||
|
MockDrmDirectSubmission<FamilyType, RenderDispatcher<FamilyType>> drmDirectSubmission(*device->getDefaultEngine().commandStreamReceiver);
|
||||||
|
drmDirectSubmission.completionFenceAllocation = nullptr;
|
||||||
|
EXPECT_TRUE(drmDirectSubmission.allocateResources());
|
||||||
|
auto ringBuffer = static_cast<DrmAllocation *>(drmDirectSubmission.ringBuffer);
|
||||||
|
auto initialBO = ringBuffer->getBufferObjectToModify(0);
|
||||||
|
|
||||||
|
auto drm = executionEnvironment.rootDeviceEnvironments[0]->osInterface->getDriverModel()->as<Drm>();
|
||||||
|
MockBufferObject mockBO(drm);
|
||||||
|
ringBuffer->getBufferObjectToModify(0) = &mockBO;
|
||||||
|
|
||||||
|
for (auto i = 0; i < 2; i++) {
|
||||||
|
mockBO.passedExecParams.clear();
|
||||||
|
EXPECT_TRUE(drmDirectSubmission.submit(gpuAddress, size));
|
||||||
|
|
||||||
|
ASSERT_EQ(1u, mockBO.passedExecParams.size());
|
||||||
|
EXPECT_EQ(0u, mockBO.passedExecParams[0].completionGpuAddress);
|
||||||
|
EXPECT_EQ(0u, mockBO.passedExecParams[0].completionValue);
|
||||||
|
}
|
||||||
|
ringBuffer->getBufferObjectToModify(0) = initialBO;
|
||||||
|
}
|
||||||
|
|
||||||
|
HWTEST_F(DrmDirectSubmissionTest, givenTile0AndCompletionFenceSupportWhenSubmittingThenCompletionAddressAndValueArePassedToExec) {
|
||||||
|
uint64_t gpuAddress = 0x1000;
|
||||||
|
size_t size = 0x1000;
|
||||||
|
|
||||||
|
auto &commandStreamReceiver = *device->getDefaultEngine().commandStreamReceiver;
|
||||||
|
auto drm = executionEnvironment.rootDeviceEnvironments[0]->osInterface->getDriverModel()->as<Drm>();
|
||||||
|
auto completionFenceBaseGpuAddress = commandStreamReceiver.getTagAllocation()->getGpuAddress() + Drm::completionFenceOffset;
|
||||||
|
|
||||||
|
DeviceBitfield firstTileBitfield{0b01};
|
||||||
|
OsContextLinux osContextTile0(*drm, 0u,
|
||||||
|
EngineDescriptorHelper::getDefaultDescriptor({aub_stream::ENGINE_RCS, EngineUsage::Regular},
|
||||||
|
PreemptionMode::ThreadGroup, firstTileBitfield));
|
||||||
|
osContextTile0.ensureContextInitialized();
|
||||||
|
commandStreamReceiver.setupContext(osContextTile0);
|
||||||
|
|
||||||
|
MockDrmDirectSubmission<FamilyType, RenderDispatcher<FamilyType>> drmDirectSubmission(commandStreamReceiver);
|
||||||
|
drmDirectSubmission.completionFenceAllocation = commandStreamReceiver.getTagAllocation();
|
||||||
|
EXPECT_TRUE(drmDirectSubmission.allocateResources());
|
||||||
|
auto ringBuffer = static_cast<DrmAllocation *>(drmDirectSubmission.ringBuffer);
|
||||||
|
auto initialBO = ringBuffer->getBufferObjectToModify(0);
|
||||||
|
|
||||||
|
MockBufferObject mockBO(drm);
|
||||||
|
ringBuffer->getBufferObjectToModify(0) = &mockBO;
|
||||||
|
|
||||||
|
for (auto i = 0u; i < 2; i++) {
|
||||||
|
mockBO.passedExecParams.clear();
|
||||||
|
EXPECT_TRUE(drmDirectSubmission.submit(gpuAddress, size));
|
||||||
|
|
||||||
|
ASSERT_EQ(1u, mockBO.passedExecParams.size());
|
||||||
|
EXPECT_EQ(completionFenceBaseGpuAddress, mockBO.passedExecParams[0].completionGpuAddress);
|
||||||
|
EXPECT_EQ(i + 1, mockBO.passedExecParams[0].completionValue);
|
||||||
|
}
|
||||||
|
ringBuffer->getBufferObjectToModify(0) = initialBO;
|
||||||
|
|
||||||
|
commandStreamReceiver.setupContext(*osContext);
|
||||||
|
}
|
||||||
|
|
||||||
|
HWTEST_F(DrmDirectSubmissionTest, givenTile1AndCompletionFenceSupportWhenSubmittingThenCompletionAddressAndValueArePassedToExec) {
|
||||||
|
uint64_t gpuAddress = 0x1000;
|
||||||
|
size_t size = 0x1000;
|
||||||
|
|
||||||
|
auto &commandStreamReceiver = *device->getDefaultEngine().commandStreamReceiver;
|
||||||
|
auto drm = executionEnvironment.rootDeviceEnvironments[0]->osInterface->getDriverModel()->as<Drm>();
|
||||||
|
auto completionFenceBaseGpuAddress = commandStreamReceiver.getTagAllocation()->getGpuAddress() + Drm::completionFenceOffset;
|
||||||
|
|
||||||
|
DeviceBitfield secondTileBitfield{0b10};
|
||||||
|
OsContextLinux osContextTile1(*drm, 0u,
|
||||||
|
EngineDescriptorHelper::getDefaultDescriptor({aub_stream::ENGINE_RCS, EngineUsage::Regular},
|
||||||
|
PreemptionMode::ThreadGroup, secondTileBitfield));
|
||||||
|
osContextTile1.ensureContextInitialized();
|
||||||
|
commandStreamReceiver.setupContext(osContextTile1);
|
||||||
|
|
||||||
|
MockDrmDirectSubmission<FamilyType, RenderDispatcher<FamilyType>> drmDirectSubmission(commandStreamReceiver);
|
||||||
|
drmDirectSubmission.completionFenceAllocation = commandStreamReceiver.getTagAllocation();
|
||||||
|
EXPECT_TRUE(drmDirectSubmission.allocateResources());
|
||||||
|
auto ringBuffer = static_cast<DrmAllocation *>(drmDirectSubmission.ringBuffer);
|
||||||
|
auto initialBO = ringBuffer->getBufferObjectToModify(0);
|
||||||
|
|
||||||
|
MockBufferObject mockBO(drm);
|
||||||
|
ringBuffer->getBufferObjectToModify(0) = &mockBO;
|
||||||
|
|
||||||
|
for (auto i = 0u; i < 2; i++) {
|
||||||
|
mockBO.passedExecParams.clear();
|
||||||
|
EXPECT_TRUE(drmDirectSubmission.submit(gpuAddress, size));
|
||||||
|
|
||||||
|
ASSERT_EQ(1u, mockBO.passedExecParams.size());
|
||||||
|
EXPECT_EQ(completionFenceBaseGpuAddress, mockBO.passedExecParams[0].completionGpuAddress);
|
||||||
|
EXPECT_EQ(i + 1, mockBO.passedExecParams[0].completionValue);
|
||||||
|
}
|
||||||
|
ringBuffer->getBufferObjectToModify(0) = initialBO;
|
||||||
|
|
||||||
|
commandStreamReceiver.setupContext(*osContext);
|
||||||
|
}
|
||||||
|
|
||||||
|
HWTEST_F(DrmDirectSubmissionTest, givenTwoTilesAndCompletionFenceSupportWhenSubmittingThenCompletionAddressAndValueArePassedToExec) {
|
||||||
|
uint64_t gpuAddress = 0x1000;
|
||||||
|
size_t size = 0x1000;
|
||||||
|
|
||||||
|
auto &commandStreamReceiver = device->getUltCommandStreamReceiver<FamilyType>();
|
||||||
|
auto drm = executionEnvironment.rootDeviceEnvironments[0]->osInterface->getDriverModel()->as<Drm>();
|
||||||
|
auto completionFenceBaseGpuAddress = commandStreamReceiver.getTagAllocation()->getGpuAddress() + Drm::completionFenceOffset;
|
||||||
|
|
||||||
|
DeviceBitfield twoTilesBitfield{0b11};
|
||||||
|
OsContextLinux osContextBothTiles(*drm, 0u,
|
||||||
|
EngineDescriptorHelper::getDefaultDescriptor({aub_stream::ENGINE_RCS, EngineUsage::Regular},
|
||||||
|
PreemptionMode::ThreadGroup, twoTilesBitfield));
|
||||||
|
osContextBothTiles.ensureContextInitialized();
|
||||||
|
commandStreamReceiver.setupContext(osContextBothTiles);
|
||||||
|
|
||||||
|
MockGraphicsAllocation workPartitionAllocation{};
|
||||||
|
commandStreamReceiver.workPartitionAllocation = &workPartitionAllocation;
|
||||||
|
|
||||||
|
MockDrmDirectSubmission<FamilyType, RenderDispatcher<FamilyType>> drmDirectSubmission(commandStreamReceiver);
|
||||||
|
|
||||||
|
commandStreamReceiver.workPartitionAllocation = nullptr;
|
||||||
|
|
||||||
|
drmDirectSubmission.completionFenceAllocation = commandStreamReceiver.getTagAllocation();
|
||||||
|
EXPECT_TRUE(drmDirectSubmission.allocateResources());
|
||||||
|
auto ringBuffer = static_cast<DrmAllocation *>(drmDirectSubmission.ringBuffer);
|
||||||
|
auto initialBO = ringBuffer->getBufferObjectToModify(0);
|
||||||
|
|
||||||
|
MockBufferObject mockBO(drm);
|
||||||
|
ringBuffer->getBufferObjectToModify(0) = &mockBO;
|
||||||
|
|
||||||
|
for (auto i = 0u; i < 2; i++) {
|
||||||
|
mockBO.passedExecParams.clear();
|
||||||
|
EXPECT_TRUE(drmDirectSubmission.submit(gpuAddress, size));
|
||||||
|
|
||||||
|
ASSERT_EQ(2u, mockBO.passedExecParams.size());
|
||||||
|
EXPECT_EQ(completionFenceBaseGpuAddress, mockBO.passedExecParams[0].completionGpuAddress);
|
||||||
|
EXPECT_EQ(i + 1, mockBO.passedExecParams[0].completionValue);
|
||||||
|
|
||||||
|
EXPECT_EQ(completionFenceBaseGpuAddress + commandStreamReceiver.getPostSyncWriteOffset(), mockBO.passedExecParams[1].completionGpuAddress);
|
||||||
|
EXPECT_EQ(i + 1, mockBO.passedExecParams[1].completionValue);
|
||||||
|
}
|
||||||
|
ringBuffer->getBufferObjectToModify(0) = initialBO;
|
||||||
|
|
||||||
|
commandStreamReceiver.setupContext(*osContext);
|
||||||
|
}
|
||||||
|
|
||||||
HWTEST_F(DrmDirectSubmissionTest, givenDisabledMonitorFenceWhenDispatchSwitchRingBufferThenDispatchPipeControl) {
|
HWTEST_F(DrmDirectSubmissionTest, givenDisabledMonitorFenceWhenDispatchSwitchRingBufferThenDispatchPipeControl) {
|
||||||
using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL;
|
using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL;
|
||||||
using Dispatcher = RenderDispatcher<FamilyType>;
|
using Dispatcher = RenderDispatcher<FamilyType>;
|
||||||
|
|||||||
Reference in New Issue
Block a user