Correct usage of user fence wait call

Related-To: NEO-5845

Signed-off-by: Zbigniew Zdanowicz <zbigniew.zdanowicz@intel.com>
This commit is contained in:
Zbigniew Zdanowicz 2021-06-18 01:11:36 +00:00 committed by Compute-Runtime-Automation
parent 43b95656ff
commit f98b46e2cc
7 changed files with 192 additions and 30 deletions

View File

@ -66,12 +66,15 @@ class DrmCommandStreamReceiver : public DeviceCommandStreamReceiver<GfxFamily> {
MOCKABLE_VIRTUAL void flushInternal(const BatchBuffer &batchBuffer, const ResidencyContainer &allocationsForResidency);
MOCKABLE_VIRTUAL void exec(const BatchBuffer &batchBuffer, uint32_t vmHandleId, uint32_t drmContextId);
MOCKABLE_VIRTUAL int waitUserFence(uint32_t waitValue);
bool isUserFenceWaitActive();
std::vector<BufferObject *> residency;
std::vector<drm_i915_gem_exec_object2> execObjectsStorage;
Drm *drm;
gemCloseWorkerMode gemCloseWorkerOperationMode;
int32_t kmdWaitTimeout = -1;
bool useUserFenceWait = false;
bool useContextForUserFenceWait = true;
};

View File

@ -61,6 +61,7 @@ DrmCommandStreamReceiver<GfxFamily>::DrmCommandStreamReceiver(ExecutionEnvironme
useContextForUserFenceWait = !!(overrideUserFenceUseCtxId);
}
useNotifyEnableForPostSync = useUserFenceWait;
kmdWaitTimeout = DebugManager.flags.SetKmdWaitTimeout.get();
}
template <typename GfxFamily>
@ -104,8 +105,8 @@ bool DrmCommandStreamReceiver<GfxFamily>::flush(BatchBuffer &batchBuffer, Reside
return this->blitterDirectSubmission->dispatchCommandBuffer(batchBuffer, *this->flushStamp.get());
}
if (useUserFenceWait) {
this->flushStamp->setStamp(taskCount + 1);
if (isUserFenceWaitActive()) {
this->flushStamp->setStamp(taskCount);
} else {
this->flushStamp->setStamp(bb->peekHandle());
}
@ -209,10 +210,10 @@ GmmPageTableMngr *DrmCommandStreamReceiver<GfxFamily>::createPageTableManager()
template <typename GfxFamily>
bool DrmCommandStreamReceiver<GfxFamily>::waitForFlushStamp(FlushStamp &flushStamp) {
auto waitValue = static_cast<uint32_t>(flushStamp);
if (useUserFenceWait) {
if (isUserFenceWaitActive()) {
waitUserFence(waitValue);
} else {
this->drm->waitHandle(waitValue, -1);
this->drm->waitHandle(waitValue, kmdWaitTimeout);
}
return true;
@ -221,9 +222,14 @@ bool DrmCommandStreamReceiver<GfxFamily>::waitForFlushStamp(FlushStamp &flushSta
template <typename GfxFamily>
bool DrmCommandStreamReceiver<GfxFamily>::isKmdWaitModeActive() {
if (this->drm->isVmBindAvailable()) {
return useUserFenceWait && useContextForUserFenceWait;
return useUserFenceWait;
}
return true;
}
template <typename GfxFamily>
inline bool DrmCommandStreamReceiver<GfxFamily>::isUserFenceWaitActive() {
return (this->drm->isVmBindAvailable() && useUserFenceWait);
}
} // namespace NEO

View File

@ -20,10 +20,11 @@ void DrmCommandStreamReceiver<GfxFamily>::flushInternal(const BatchBuffer &batch
template <typename GfxFamily>
int DrmCommandStreamReceiver<GfxFamily>::waitUserFence(uint32_t waitValue) {
uint32_t ctxId = 0u;
uint64_t tagAddress = castToUint64(const_cast<uint32_t *>(getTagAddress()));
if (useContextForUserFenceWait) {
ctxId = static_cast<const OsContextLinux *>(osContext)->getDrmContextIds()[0];
}
return this->drm->waitUserFence(ctxId, getTagAllocation()->getGpuAddress(), waitValue, Drm::ValueWidth::U32, -1);
return this->drm->waitUserFence(ctxId, tagAddress, waitValue, Drm::ValueWidth::U32, kmdWaitTimeout);
}
} // namespace NEO

View File

@ -198,6 +198,8 @@ class DrmMockCustom : public Drm {
//DRM_IOCTL_I915_GEM_CONTEXT_GETPARAM
drm_i915_gem_context_param recordedGetContextParam = {0};
__u64 getContextParamRetValue = 0;
//DRM_IOCTL_I915_GEM_WAIT
int64_t gemWaitTimeout = 0;
int errnoValue = 0;
@ -275,9 +277,11 @@ class DrmMockCustom : public Drm {
ioctl_cnt.gemSetDomain++;
} break;
case DRM_IOCTL_I915_GEM_WAIT:
case DRM_IOCTL_I915_GEM_WAIT: {
auto gemWaitParams = (drm_i915_gem_wait *)arg;
gemWaitTimeout = gemWaitParams->timeout_ns;
ioctl_cnt.gemWait++;
break;
} break;
case DRM_IOCTL_GEM_CLOSE:
ioctl_cnt.gemClose++;

View File

@ -1664,10 +1664,14 @@ HWTEST_TEMPLATED_F(DrmCommandStreamTest, givenPageTableManagerAndMapFalseWhenUpd
EXPECT_TRUE(result);
}
HWTEST_TEMPLATED_F(DrmCommandStreamEnhancedTest, givenWaitUserFenceFlagSetWhenDrmCsrFlushedThenExpectTaskCountPlusOneStoredAsFlushStamp) {
HWTEST_TEMPLATED_F(DrmCommandStreamEnhancedTest,
givenWaitUserFenceFlagAndVmBindAvailableSetWhenDrmCsrFlushedThenExpectTaskCountStoredAsFlushStamp) {
DebugManagerStateRestore restorer;
DebugManager.flags.EnableUserFenceForCompletionWait.set(1);
mock->isVmBindAvailableCall.callParent = false;
mock->isVmBindAvailableCall.returnValue = true;
TestedDrmCommandStreamReceiver<FamilyType> *testedCsr =
new TestedDrmCommandStreamReceiver<FamilyType>(gemCloseWorkerMode::gemCloseWorkerInactive,
*this->executionEnvironment,
@ -1683,10 +1687,75 @@ HWTEST_TEMPLATED_F(DrmCommandStreamEnhancedTest, givenWaitUserFenceFlagSetWhenDr
CommandStreamReceiverHw<FamilyType>::alignToCacheLine(cs);
BatchBuffer batchBuffer{cs.getGraphicsAllocation(), 0, 0, nullptr, false, false, QueueThrottle::MEDIUM, QueueSliceCount::defaultSliceCount, cs.getUsed(), &cs, nullptr, false};
testedCsr->taskCount = 10u;
testedCsr->taskCount = 160u;
testedCsr->flush(batchBuffer, testedCsr->getResidencyAllocations());
EXPECT_EQ(11u, testedCsr->flushStamp->peekStamp());
EXPECT_EQ(160u, testedCsr->flushStamp->peekStamp());
mm->freeGraphicsMemory(commandBuffer);
}
HWTEST_TEMPLATED_F(DrmCommandStreamEnhancedTest,
givenNoWaitUserFenceFlagAndVmBindAvailableSetWhenDrmCsrFlushedThenExpectCommandBufferBoHandleAsFlushStamp) {
mock->isVmBindAvailableCall.callParent = false;
mock->isVmBindAvailableCall.returnValue = true;
TestedDrmCommandStreamReceiver<FamilyType> *testedCsr =
new TestedDrmCommandStreamReceiver<FamilyType>(gemCloseWorkerMode::gemCloseWorkerInactive,
*this->executionEnvironment,
1);
EXPECT_FALSE(testedCsr->useUserFenceWait);
device->resetCommandStreamReceiver(testedCsr);
auto commandBuffer = mm->allocateGraphicsMemoryWithProperties(MockAllocationProperties{testedCsr->getRootDeviceIndex(), MemoryConstants::pageSize});
ASSERT_NE(nullptr, commandBuffer);
LinearStream cs(commandBuffer);
CommandStreamReceiverHw<FamilyType>::addBatchBufferEnd(cs, nullptr);
CommandStreamReceiverHw<FamilyType>::alignToCacheLine(cs);
BatchBuffer batchBuffer{cs.getGraphicsAllocation(), 0, 0, nullptr, false, false, QueueThrottle::MEDIUM, QueueSliceCount::defaultSliceCount, cs.getUsed(), &cs, nullptr, false};
DrmAllocation *alloc = static_cast<DrmAllocation *>(cs.getGraphicsAllocation());
auto boHandle = static_cast<FlushStamp>(alloc->getBO()->peekHandle());
testedCsr->taskCount = 160u;
testedCsr->flush(batchBuffer, testedCsr->getResidencyAllocations());
EXPECT_EQ(boHandle, testedCsr->flushStamp->peekStamp());
mm->freeGraphicsMemory(commandBuffer);
}
HWTEST_TEMPLATED_F(DrmCommandStreamEnhancedTest,
givenWaitUserFenceFlagAndNoVmBindAvailableSetWhenDrmCsrFlushedThenExpectCommandBufferBoHandleAsFlushStamp) {
DebugManagerStateRestore restorer;
DebugManager.flags.EnableUserFenceForCompletionWait.set(1);
mock->isVmBindAvailableCall.callParent = false;
mock->isVmBindAvailableCall.returnValue = false;
TestedDrmCommandStreamReceiver<FamilyType> *testedCsr =
new TestedDrmCommandStreamReceiver<FamilyType>(gemCloseWorkerMode::gemCloseWorkerInactive,
*this->executionEnvironment,
1);
EXPECT_TRUE(testedCsr->useUserFenceWait);
device->resetCommandStreamReceiver(testedCsr);
auto commandBuffer = mm->allocateGraphicsMemoryWithProperties(MockAllocationProperties{testedCsr->getRootDeviceIndex(), MemoryConstants::pageSize});
ASSERT_NE(nullptr, commandBuffer);
LinearStream cs(commandBuffer);
CommandStreamReceiverHw<FamilyType>::addBatchBufferEnd(cs, nullptr);
CommandStreamReceiverHw<FamilyType>::alignToCacheLine(cs);
BatchBuffer batchBuffer{cs.getGraphicsAllocation(), 0, 0, nullptr, false, false, QueueThrottle::MEDIUM, QueueSliceCount::defaultSliceCount, cs.getUsed(), &cs, nullptr, false};
DrmAllocation *alloc = static_cast<DrmAllocation *>(cs.getGraphicsAllocation());
auto boHandle = static_cast<FlushStamp>(alloc->getBO()->peekHandle());
testedCsr->taskCount = 160u;
testedCsr->flush(batchBuffer, testedCsr->getResidencyAllocations());
EXPECT_EQ(boHandle, testedCsr->flushStamp->peekStamp());
mm->freeGraphicsMemory(commandBuffer);
}
@ -1706,13 +1775,40 @@ HWTEST_TEMPLATED_F(DrmCommandStreamEnhancedTest, givenWaitUserFenceFlagNotSetWhe
testedCsr->waitForFlushStamp(handleToWait);
EXPECT_EQ(1, mock->ioctl_cnt.gemWait);
EXPECT_EQ(-1, mock->gemWaitTimeout);
EXPECT_EQ(0u, testedCsr->waitUserFenceResult.called);
}
HWTEST_TEMPLATED_F(DrmCommandStreamEnhancedTest, givenWaitUserFenceFlagSetWhenDrmCsrWaitsForFlushStampThenExpectUseDrmWaitUserFenceCallWithNonZeroContext) {
HWTEST_TEMPLATED_F(DrmCommandStreamEnhancedTest, givenGemWaitUsedWhenKmdTimeoutUsedWhenDrmCsrWaitsForFlushStampThenExpectUseDrmGemWaitCallAndOverrideTimeout) {
DebugManagerStateRestore restorer;
DebugManager.flags.SetKmdWaitTimeout.set(1000);
TestedDrmCommandStreamReceiver<FamilyType> *testedCsr =
new TestedDrmCommandStreamReceiver<FamilyType>(gemCloseWorkerMode::gemCloseWorkerInactive,
*this->executionEnvironment,
1);
EXPECT_FALSE(testedCsr->useUserFenceWait);
EXPECT_FALSE(testedCsr->isUsedNotifyEnableForPostSync());
EXPECT_TRUE(testedCsr->useContextForUserFenceWait);
device->resetCommandStreamReceiver(testedCsr);
mock->ioctl_cnt.gemWait = 0;
FlushStamp handleToWait = 123;
testedCsr->waitForFlushStamp(handleToWait);
EXPECT_EQ(1, mock->ioctl_cnt.gemWait);
EXPECT_EQ(1000, mock->gemWaitTimeout);
EXPECT_EQ(0u, testedCsr->waitUserFenceResult.called);
}
HWTEST_TEMPLATED_F(DrmCommandStreamEnhancedTest,
givenWaitUserFenceFlagSetAndVmBindAvailableWhenDrmCsrWaitsForFlushStampThenExpectUseDrmWaitUserFenceCallWithNonZeroContext) {
DebugManagerStateRestore restorer;
DebugManager.flags.EnableUserFenceForCompletionWait.set(1);
mock->isVmBindAvailableCall.callParent = false;
mock->isVmBindAvailableCall.returnValue = true;
TestedDrmCommandStreamReceiver<FamilyType> *testedCsr =
new TestedDrmCommandStreamReceiver<FamilyType>(gemCloseWorkerMode::gemCloseWorkerInactive,
*this->executionEnvironment,
@ -1722,6 +1818,7 @@ HWTEST_TEMPLATED_F(DrmCommandStreamEnhancedTest, givenWaitUserFenceFlagSetWhenDr
EXPECT_TRUE(testedCsr->useContextForUserFenceWait);
device->resetCommandStreamReceiver(testedCsr);
mock->ioctl_cnt.gemWait = 0;
mock->isVmBindAvailableCall.called = 0u;
auto osContextLinux = static_cast<const OsContextLinux *>(device->getDefaultEngine().osContext);
std::vector<uint32_t> &drmCtxIds = const_cast<std::vector<uint32_t> &>(osContextLinux->getDrmContextIds());
@ -1737,6 +1834,7 @@ HWTEST_TEMPLATED_F(DrmCommandStreamEnhancedTest, givenWaitUserFenceFlagSetWhenDr
EXPECT_EQ(1u, testedCsr->waitUserFenceResult.called);
EXPECT_EQ(123u, testedCsr->waitUserFenceResult.waitValue);
EXPECT_EQ(1u, mock->isVmBindAvailableCall.called);
EXPECT_EQ(1u, mock->waitUserFenceCall.called);
EXPECT_NE(0u, mock->waitUserFenceCall.ctxId);
@ -1744,10 +1842,70 @@ HWTEST_TEMPLATED_F(DrmCommandStreamEnhancedTest, givenWaitUserFenceFlagSetWhenDr
EXPECT_EQ(Drm::ValueWidth::U32, mock->waitUserFenceCall.dataWidth);
}
HWTEST_TEMPLATED_F(DrmCommandStreamEnhancedTest, givenWaitUserFenceSetAndUseCtxFlagsNotSetWhenDrmCsrWaitsForFlushStampThenExpectUseDrmWaitUserFenceCallWithZeroContext) {
HWTEST_TEMPLATED_F(DrmCommandStreamEnhancedTest,
givenWaitUserFenceFlagSetAndVmBindNotAvailableWhenDrmCsrWaitsForFlushStampThenExpectUseDrmGemWaitCall) {
DebugManagerStateRestore restorer;
DebugManager.flags.EnableUserFenceForCompletionWait.set(1);
mock->isVmBindAvailableCall.callParent = false;
mock->isVmBindAvailableCall.returnValue = false;
TestedDrmCommandStreamReceiver<FamilyType> *testedCsr =
new TestedDrmCommandStreamReceiver<FamilyType>(gemCloseWorkerMode::gemCloseWorkerInactive,
*this->executionEnvironment,
1);
EXPECT_TRUE(testedCsr->useUserFenceWait);
EXPECT_TRUE(testedCsr->isUsedNotifyEnableForPostSync());
EXPECT_TRUE(testedCsr->useContextForUserFenceWait);
device->resetCommandStreamReceiver(testedCsr);
mock->ioctl_cnt.gemWait = 0;
mock->isVmBindAvailableCall.called = 0u;
FlushStamp handleToWait = 123;
testedCsr->waitForFlushStamp(handleToWait);
EXPECT_EQ(1, mock->ioctl_cnt.gemWait);
EXPECT_EQ(0u, testedCsr->waitUserFenceResult.called);
EXPECT_EQ(1u, mock->isVmBindAvailableCall.called);
EXPECT_EQ(0u, mock->waitUserFenceCall.called);
}
HWTEST_TEMPLATED_F(DrmCommandStreamEnhancedTest,
givenNoWaitUserFenceFlagSetAndVmBindAvailableWhenDrmCsrWaitsForFlushStampThenExpectUseDrmGemWaitCall) {
mock->isVmBindAvailableCall.callParent = false;
mock->isVmBindAvailableCall.returnValue = true;
TestedDrmCommandStreamReceiver<FamilyType> *testedCsr =
new TestedDrmCommandStreamReceiver<FamilyType>(gemCloseWorkerMode::gemCloseWorkerInactive,
*this->executionEnvironment,
1);
EXPECT_FALSE(testedCsr->useUserFenceWait);
EXPECT_FALSE(testedCsr->isUsedNotifyEnableForPostSync());
EXPECT_TRUE(testedCsr->useContextForUserFenceWait);
device->resetCommandStreamReceiver(testedCsr);
mock->ioctl_cnt.gemWait = 0;
mock->isVmBindAvailableCall.called = 0u;
FlushStamp handleToWait = 123;
testedCsr->waitForFlushStamp(handleToWait);
EXPECT_EQ(1, mock->ioctl_cnt.gemWait);
EXPECT_EQ(0u, testedCsr->waitUserFenceResult.called);
EXPECT_EQ(1u, mock->isVmBindAvailableCall.called);
EXPECT_EQ(0u, mock->waitUserFenceCall.called);
}
HWTEST_TEMPLATED_F(DrmCommandStreamEnhancedTest,
givenWaitUserFenceSetAndUseCtxFlagsNotSetAndVmBindAvailableWhenDrmCsrWaitsForFlushStampThenExpectUseDrmWaitUserFenceCallWithZeroContext) {
DebugManagerStateRestore restorer;
DebugManager.flags.EnableUserFenceForCompletionWait.set(1);
DebugManager.flags.EnableUserFenceUseCtxId.set(0);
DebugManager.flags.SetKmdWaitTimeout.set(1000);
mock->isVmBindAvailableCall.callParent = false;
mock->isVmBindAvailableCall.returnValue = true;
TestedDrmCommandStreamReceiver<FamilyType> *testedCsr =
new TestedDrmCommandStreamReceiver<FamilyType>(gemCloseWorkerMode::gemCloseWorkerInactive,
@ -1758,6 +1916,7 @@ HWTEST_TEMPLATED_F(DrmCommandStreamEnhancedTest, givenWaitUserFenceSetAndUseCtxF
EXPECT_FALSE(testedCsr->useContextForUserFenceWait);
device->resetCommandStreamReceiver(testedCsr);
mock->ioctl_cnt.gemWait = 0;
mock->isVmBindAvailableCall.called = 0u;
FlushStamp handleToWait = 123;
testedCsr->waitForFlushStamp(handleToWait);
@ -1767,9 +1926,10 @@ HWTEST_TEMPLATED_F(DrmCommandStreamEnhancedTest, givenWaitUserFenceSetAndUseCtxF
EXPECT_EQ(123u, testedCsr->waitUserFenceResult.waitValue);
EXPECT_EQ(1u, mock->waitUserFenceCall.called);
EXPECT_EQ(1u, mock->isVmBindAvailableCall.called);
EXPECT_EQ(0u, mock->waitUserFenceCall.ctxId);
EXPECT_EQ(-1, mock->waitUserFenceCall.timeout);
EXPECT_EQ(1000, mock->waitUserFenceCall.timeout);
EXPECT_EQ(Drm::ValueWidth::U32, mock->waitUserFenceCall.dataWidth);
}
@ -1783,37 +1943,23 @@ HWTEST_TEMPLATED_F(DrmCommandStreamEnhancedTest, givenVmBindNotAvailableWhenChec
EXPECT_EQ(1u, mock->isVmBindAvailableCall.called);
}
HWTEST_TEMPLATED_F(DrmCommandStreamEnhancedTest, givenVmBindAvailableUseWaitCallAndUseContextIdTrueWhenCheckingForKmdWaitModeActiveThenReturnTrue) {
HWTEST_TEMPLATED_F(DrmCommandStreamEnhancedTest, givenVmBindAvailableUseWaitCallTrueWhenCheckingForKmdWaitModeActiveThenReturnTrue) {
auto testDrmCsr = static_cast<TestedDrmCommandStreamReceiver<FamilyType> *>(csr);
mock->isVmBindAvailableCall.called = 0u;
mock->isVmBindAvailableCall.callParent = false;
mock->isVmBindAvailableCall.returnValue = true;
testDrmCsr->useUserFenceWait = true;
testDrmCsr->useContextForUserFenceWait = true;
EXPECT_TRUE(testDrmCsr->isKmdWaitModeActive());
EXPECT_EQ(1u, mock->isVmBindAvailableCall.called);
}
HWTEST_TEMPLATED_F(DrmCommandStreamEnhancedTest, givenVmBindAvailableUseWaitCallFalseAndUseContextIdTrueWhenCheckingForKmdWaitModeActiveThenReturnFalse) {
HWTEST_TEMPLATED_F(DrmCommandStreamEnhancedTest, givenVmBindAvailableUseWaitCallFalseWhenCheckingForKmdWaitModeActiveThenReturnFalse) {
auto testDrmCsr = static_cast<TestedDrmCommandStreamReceiver<FamilyType> *>(csr);
mock->isVmBindAvailableCall.called = 0u;
mock->isVmBindAvailableCall.callParent = false;
mock->isVmBindAvailableCall.returnValue = true;
testDrmCsr->useUserFenceWait = false;
testDrmCsr->useContextForUserFenceWait = true;
EXPECT_FALSE(testDrmCsr->isKmdWaitModeActive());
EXPECT_EQ(1u, mock->isVmBindAvailableCall.called);
}
HWTEST_TEMPLATED_F(DrmCommandStreamEnhancedTest, givenVmBindAvailableUseWaitCallTrueAndUseContextIdFalseWhenCheckingForKmdWaitModeActiveThenReturnFalse) {
auto testDrmCsr = static_cast<TestedDrmCommandStreamReceiver<FamilyType> *>(csr);
mock->isVmBindAvailableCall.called = 0u;
mock->isVmBindAvailableCall.callParent = false;
mock->isVmBindAvailableCall.returnValue = true;
testDrmCsr->useUserFenceWait = true;
testDrmCsr->useContextForUserFenceWait = false;
EXPECT_FALSE(testDrmCsr->isKmdWaitModeActive());
EXPECT_EQ(1u, mock->isVmBindAvailableCall.called);

View File

@ -249,3 +249,4 @@ OverrideMaxWorkGroupCount = -1
UseUmKmDataTranslator = 0
EnableUserFenceForCompletionWait = -1
EnableUserFenceUseCtxId = -1
SetKmdWaitTimeout = -1

View File

@ -233,6 +233,7 @@ DECLARE_DEBUG_VARIABLE(int32_t, GTPinAllocateBufferInSharedMemory, -1, "Force GT
DECLARE_DEBUG_VARIABLE(int32_t, AlignLocalMemoryVaTo2MB, -1, "Allow 2MB pages for allocations with size>=2MB. On Linux it means aligned VA, on Windows it means aligned size. -1: default, 0: disabled, 1: enabled")
DECLARE_DEBUG_VARIABLE(int32_t, EnableUserFenceForCompletionWait, -1, "-1: default (disabled), 0: disable, 1: enable : Use Wait User Fence instead Gem Wait")
DECLARE_DEBUG_VARIABLE(int32_t, EnableUserFenceUseCtxId, -1, "-1: default (enabled), 0: disable, 1: enable : Use Context Id in Wait User Fence when waiting for completion tag")
DECLARE_DEBUG_VARIABLE(int32_t, SetKmdWaitTimeout, -1, "-1: default (infinity), >0: amount of time units for wait function timeout")
/*EXPERIMENTAL TOGGLES*/
DECLARE_DEBUG_VARIABLE(int32_t, ExperimentalEnableCustomLocalMemoryAlignment, 0, "Align local memory allocations to a given value. Works only with allocations at least as big as the value. 0: no effect, 2097152: 2 megabytes, 1073741824: 1 gigabyte")