Synchronize switching command buffers for all partitions

Signed-off-by: Zbigniew Zdanowicz <zbigniew.zdanowicz@intel.com>
This commit is contained in:
Zbigniew Zdanowicz
2021-09-06 22:29:47 +00:00
committed by Compute-Runtime-Automation
parent 6b062a62b8
commit cd4f3c221a
35 changed files with 271 additions and 112 deletions

View File

@@ -198,8 +198,12 @@ void CommandQueue::waitUntilComplete(uint32_t gpgpuTaskCountToWait, uint32_t bcs
bool forcePowerSavingMode = this->throttle == QueueThrottle::LOW;
getGpgpuCommandStreamReceiver().waitForTaskCountWithKmdNotifyFallback(gpgpuTaskCountToWait, flushStampToWait,
useQuickKmdSleep, forcePowerSavingMode);
getGpgpuCommandStreamReceiver().waitForTaskCountWithKmdNotifyFallback(gpgpuTaskCountToWait,
flushStampToWait,
useQuickKmdSleep,
forcePowerSavingMode,
1u,
0u);
DEBUG_BREAK_IF(getHwTag() < gpgpuTaskCountToWait);
if (gtpinIsGTPinInitialized()) {
@@ -207,7 +211,7 @@ void CommandQueue::waitUntilComplete(uint32_t gpgpuTaskCountToWait, uint32_t bcs
}
if (auto bcsCsr = getBcsCommandStreamReceiver()) {
bcsCsr->waitForTaskCountWithKmdNotifyFallback(bcsTaskCountToWait, 0, false, false);
bcsCsr->waitForTaskCountWithKmdNotifyFallback(bcsTaskCountToWait, 0, false, false, 1u, 0u);
bcsCsr->waitForTaskCountAndCleanTemporaryAllocationList(bcsTaskCountToWait);
}

View File

@@ -63,7 +63,7 @@ class AUBCommandStreamReceiverHw : public CommandStreamReceiverSimulatedHw<GfxFa
MOCKABLE_VIRTUAL void submitBatchBufferAub(uint64_t batchBufferGpuAddress, const void *batchBuffer, size_t batchBufferSize, uint32_t memoryBank, uint64_t entryBits);
void pollForCompletion() override;
void pollForCompletionImpl() override;
void waitForTaskCountWithKmdNotifyFallback(uint32_t taskCountToWait, FlushStamp flushStampToWait, bool useQuickKmdSleep, bool forcePowerSavingMode) override;
void waitForTaskCountWithKmdNotifyFallback(uint32_t taskCountToWait, FlushStamp flushStampToWait, bool useQuickKmdSleep, bool forcePowerSavingMode, uint32_t partitionCount, uint32_t offsetSize) override;
uint32_t getDumpHandle();
MOCKABLE_VIRTUAL void addContextToken(uint32_t dumpHandle);

View File

@@ -599,8 +599,8 @@ void AUBCommandStreamReceiverHw<GfxFamily>::pollForCompletionImpl() {
}
template <typename GfxFamily>
inline void AUBCommandStreamReceiverHw<GfxFamily>::waitForTaskCountWithKmdNotifyFallback(uint32_t taskCountToWait, FlushStamp flushStampToWait, bool useQuickKmdSleep, bool forcePowerSavingMode) {
CommandStreamReceiverSimulatedHw<GfxFamily>::waitForTaskCountWithKmdNotifyFallback(taskCountToWait, flushStampToWait, useQuickKmdSleep, forcePowerSavingMode);
inline void AUBCommandStreamReceiverHw<GfxFamily>::waitForTaskCountWithKmdNotifyFallback(uint32_t taskCountToWait, FlushStamp flushStampToWait, bool useQuickKmdSleep, bool forcePowerSavingMode, uint32_t partitionCount, uint32_t offsetSize) {
CommandStreamReceiverSimulatedHw<GfxFamily>::waitForTaskCountWithKmdNotifyFallback(taskCountToWait, flushStampToWait, useQuickKmdSleep, forcePowerSavingMode, partitionCount, offsetSize);
pollForCompletion();
}

View File

@@ -1,5 +1,5 @@
/*
* Copyright (C) 2018-2020 Intel Corporation
* Copyright (C) 2018-2021 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@@ -40,7 +40,8 @@ class CommandStreamReceiverWithAUBDump : public BaseCSR {
}
void waitForTaskCountWithKmdNotifyFallback(uint32_t taskCountToWait, FlushStamp flushStampToWait,
bool useQuickKmdSleep, bool forcePowerSavingMode) override;
bool useQuickKmdSleep, bool forcePowerSavingMode,
uint32_t partitionCount, uint32_t offsetSize) override;
size_t getPreferredTagPoolSize() const override { return 1; }

View File

@@ -71,12 +71,13 @@ void CommandStreamReceiverWithAUBDump<BaseCSR>::setupContext(OsContext &osContex
template <typename BaseCSR>
void CommandStreamReceiverWithAUBDump<BaseCSR>::waitForTaskCountWithKmdNotifyFallback(uint32_t taskCountToWait, FlushStamp flushStampToWait,
bool useQuickKmdSleep, bool forcePowerSavingMode) {
bool useQuickKmdSleep, bool forcePowerSavingMode,
uint32_t partitionCount, uint32_t offsetSize) {
if (aubCSR) {
aubCSR->waitForTaskCountWithKmdNotifyFallback(taskCountToWait, flushStampToWait, useQuickKmdSleep, forcePowerSavingMode);
aubCSR->waitForTaskCountWithKmdNotifyFallback(taskCountToWait, flushStampToWait, useQuickKmdSleep, forcePowerSavingMode, partitionCount, offsetSize);
}
BaseCSR::waitForTaskCountWithKmdNotifyFallback(taskCountToWait, flushStampToWait, useQuickKmdSleep, forcePowerSavingMode);
BaseCSR::waitForTaskCountWithKmdNotifyFallback(taskCountToWait, flushStampToWait, useQuickKmdSleep, forcePowerSavingMode, partitionCount, offsetSize);
}
template <typename BaseCSR>

View File

@@ -47,7 +47,7 @@ class DrmCommandStreamReceiver : public DeviceCommandStreamReceiver<GfxFamily> {
bool flush(BatchBuffer &batchBuffer, ResidencyContainer &allocationsForResidency) override;
MOCKABLE_VIRTUAL void processResidency(const ResidencyContainer &allocationsForResidency, uint32_t handleId) override;
void makeNonResident(GraphicsAllocation &gfxAllocation) override;
bool waitForFlushStamp(FlushStamp &flushStampToWait) override;
bool waitForFlushStamp(FlushStamp &flushStampToWait, uint32_t partitionCount, uint32_t offsetSize) override;
bool isKmdWaitModeActive() override;
DrmMemoryManager *getMemoryManager() const;
@@ -66,7 +66,7 @@ class DrmCommandStreamReceiver : public DeviceCommandStreamReceiver<GfxFamily> {
protected:
MOCKABLE_VIRTUAL void flushInternal(const BatchBuffer &batchBuffer, const ResidencyContainer &allocationsForResidency);
MOCKABLE_VIRTUAL void exec(const BatchBuffer &batchBuffer, uint32_t vmHandleId, uint32_t drmContextId);
MOCKABLE_VIRTUAL int waitUserFence(uint32_t waitValue);
MOCKABLE_VIRTUAL int waitUserFence(uint32_t waitValue, uint32_t partitionCount, uint32_t offsetSize);
bool isUserFenceWaitActive();
std::vector<BufferObject *> residency;

View File

@@ -218,10 +218,10 @@ GmmPageTableMngr *DrmCommandStreamReceiver<GfxFamily>::createPageTableManager()
}
template <typename GfxFamily>
bool DrmCommandStreamReceiver<GfxFamily>::waitForFlushStamp(FlushStamp &flushStamp) {
bool DrmCommandStreamReceiver<GfxFamily>::waitForFlushStamp(FlushStamp &flushStamp, uint32_t partitionCount, uint32_t offsetSize) {
auto waitValue = static_cast<uint32_t>(flushStamp);
if (isUserFenceWaitActive()) {
waitUserFence(waitValue);
waitUserFence(waitValue, partitionCount, offsetSize);
} else {
this->drm->waitHandle(waitValue, kmdWaitTimeout);
}

View File

@@ -18,7 +18,7 @@ void DrmCommandStreamReceiver<GfxFamily>::flushInternal(const BatchBuffer &batch
}
template <typename GfxFamily>
int DrmCommandStreamReceiver<GfxFamily>::waitUserFence(uint32_t waitValue) {
int DrmCommandStreamReceiver<GfxFamily>::waitUserFence(uint32_t waitValue, uint32_t partitionCount, uint32_t offsetSize) {
uint32_t ctxId = 0u;
uint64_t tagAddress = castToUint64(const_cast<uint32_t *>(getTagAddress()));
if (useContextForUserFenceWait) {

View File

@@ -51,19 +51,28 @@ void DrmCommandStreamReceiver<GfxFamily>::flushInternal(const BatchBuffer &batch
}
template <typename GfxFamily>
int DrmCommandStreamReceiver<GfxFamily>::waitUserFence(uint32_t waitValue) {
int DrmCommandStreamReceiver<GfxFamily>::waitUserFence(uint32_t waitValue, uint32_t partitionCount, uint32_t offsetSize) {
int ret = 0;
StackVec<uint32_t, 32> ctxIds;
uint64_t tagAddress = castToUint64(const_cast<uint32_t *>(getTagAddress()));
if (useContextForUserFenceWait) {
for (auto tileIterator = 0u; tileIterator < this->osContext->getDeviceBitfield().size(); tileIterator++) {
uint32_t ctxId = 0u;
if (this->osContext->getDeviceBitfield().test(tileIterator)) {
ctxId = static_cast<const OsContextLinux *>(osContext)->getDrmContextIds()[tileIterator];
ret |= this->drm->waitUserFence(ctxId, tagAddress, waitValue, Drm::ValueWidth::U32, kmdWaitTimeout, 0u);
ctxIds.push_back(ctxId);
}
}
UNRECOVERABLE_IF(ctxIds.size() != partitionCount);
for (uint32_t i = 0; i < partitionCount; i++) {
ret |= this->drm->waitUserFence(ctxIds[i], tagAddress, waitValue, Drm::ValueWidth::U32, kmdWaitTimeout, 0u);
tagAddress += offsetSize;
}
} else {
ret = this->drm->waitUserFence(0u, tagAddress, waitValue, Drm::ValueWidth::U32, kmdWaitTimeout, 0u);
for (uint32_t i = 0; i < partitionCount; i++) {
ret |= this->drm->waitUserFence(0u, tagAddress, waitValue, Drm::ValueWidth::U32, kmdWaitTimeout, 0u);
tagAddress += offsetSize;
}
}
return ret;

View File

@@ -27,7 +27,7 @@ class WddmCommandStreamReceiver : public DeviceCommandStreamReceiver<GfxFamily>
bool flush(BatchBuffer &batchBuffer, ResidencyContainer &allocationsForResidency) override;
void processResidency(const ResidencyContainer &allocationsForResidency, uint32_t handleId) override;
void processEviction() override;
bool waitForFlushStamp(FlushStamp &flushStampToWait) override;
bool waitForFlushStamp(FlushStamp &flushStampToWait, uint32_t partitionCount, uint32_t offsetSize) override;
WddmMemoryManager *getMemoryManager() const;
Wddm *peekWddm() const {

View File

@@ -132,7 +132,7 @@ WddmMemoryManager *WddmCommandStreamReceiver<GfxFamily>::getMemoryManager() cons
}
template <typename GfxFamily>
bool WddmCommandStreamReceiver<GfxFamily>::waitForFlushStamp(FlushStamp &flushStampToWait) {
bool WddmCommandStreamReceiver<GfxFamily>::waitForFlushStamp(FlushStamp &flushStampToWait, uint32_t partitionCount, uint32_t offsetSize) {
return wddm->waitFromCpu(flushStampToWait, static_cast<OsContextWin *>(this->osContext)->getResidencyController().getMonitoredFence());
}

View File

@@ -319,7 +319,7 @@ HWTEST_F(AubFileStreamTests, givenAubCommandStreamReceiverWhenCallingInsertAubWa
auto aubExecutionEnvironment = getEnvironment<MockAubCsr<FamilyType>>(true, true, true);
auto aubCsr = aubExecutionEnvironment->template getCsr<MockAubCsr<FamilyType>>();
ASSERT_FALSE(aubCsr->pollForCompletionCalled);
aubCsr->waitForTaskCountWithKmdNotifyFallback(0, 0, false, false);
aubCsr->waitForTaskCountWithKmdNotifyFallback(0, 0, false, false, 1, 0);
EXPECT_TRUE(aubCsr->pollForCompletionCalled);
}

View File

@@ -434,6 +434,24 @@ HWTEST_F(UltCommandStreamReceiverTest, givenComputeOverrideDisableWhenComputeSup
EXPECT_FALSE(startInContext);
}
HWTEST_F(UltCommandStreamReceiverTest, givenSinglePartitionWhenCallingWaitKmdNotifyThenExpectImplicitBusyLoopWaitCalled) {
auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver<FamilyType>();
commandStreamReceiver.callBaseWaitForCompletionWithTimeout = false;
commandStreamReceiver.returnWaitForCompletionWithTimeout = false;
commandStreamReceiver.waitForTaskCountWithKmdNotifyFallback(0, 0, false, false, 1, 0);
EXPECT_EQ(2u, commandStreamReceiver.waitForCompletionWithTimeoutTaskCountCalled);
}
HWTEST_F(UltCommandStreamReceiverTest, givenMultiplePartitionsWhenCallingWaitKmdNotifyThenExpectExplicitBusyLoopWaitCalled) {
auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver<FamilyType>();
commandStreamReceiver.callBaseWaitForCompletionWithTimeout = false;
commandStreamReceiver.returnWaitForCompletionWithTimeout = false;
commandStreamReceiver.waitForTaskCountWithKmdNotifyFallback(0, 0, false, false, 2, 8);
EXPECT_EQ(2u, commandStreamReceiver.waitForCompletionWithTimeoutTaskCountExplicitCalled);
}
typedef UltCommandStreamReceiverTest CommandStreamReceiverFlushTests;
HWTEST_F(CommandStreamReceiverFlushTests, WhenAddingBatchBufferEndThenBatchBufferEndIsAppendedCorrectly) {

View File

@@ -579,19 +579,24 @@ HWTEST_F(BcsTests, whenBlitFromHostPtrCalledThenCallWaitWithKmdFallback) {
using UltCommandStreamReceiver<FamilyType>::UltCommandStreamReceiver;
void waitForTaskCountWithKmdNotifyFallback(uint32_t taskCountToWait, FlushStamp flushStampToWait,
bool useQuickKmdSleep, bool forcePowerSavingMode) override {
bool useQuickKmdSleep, bool forcePowerSavingMode,
uint32_t partitionCount, uint32_t offsetSize) override {
waitForTaskCountWithKmdNotifyFallbackCalled++;
taskCountToWaitPassed = taskCountToWait;
flushStampToWaitPassed = flushStampToWait;
useQuickKmdSleepPassed = useQuickKmdSleep;
forcePowerSavingModePassed = forcePowerSavingMode;
partitionCountPassed = partitionCount;
offsetSizePassed = offsetSize;
}
uint32_t taskCountToWaitPassed = 0;
FlushStamp flushStampToWaitPassed = 0;
uint32_t taskCountToWaitPassed = 0;
uint32_t waitForTaskCountWithKmdNotifyFallbackCalled = 0;
uint32_t partitionCountPassed = 0;
uint32_t offsetSizePassed = 0;
bool useQuickKmdSleepPassed = false;
bool forcePowerSavingModePassed = false;
uint32_t waitForTaskCountWithKmdNotifyFallbackCalled = 0;
};
auto myMockCsr = std::make_unique<::testing::NiceMock<MyMockCsr>>(*pDevice->getExecutionEnvironment(), pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield());
@@ -625,6 +630,8 @@ HWTEST_F(BcsTests, whenBlitFromHostPtrCalledThenCallWaitWithKmdFallback) {
EXPECT_EQ(myMockCsr->flushStamp->peekStamp(), myMockCsr->flushStampToWaitPassed);
EXPECT_FALSE(myMockCsr->useQuickKmdSleepPassed);
EXPECT_FALSE(myMockCsr->forcePowerSavingModePassed);
EXPECT_EQ(1u, myMockCsr->partitionCountPassed);
EXPECT_EQ(0u, myMockCsr->offsetSizePassed);
}
HWTEST_F(BcsTests, whenBlitFromHostPtrCalledThenCleanTemporaryAllocations) {

View File

@@ -229,11 +229,11 @@ HWTEST_F(CommandStreamReceiverWithAubDumpSimpleTest, givenCsrWithAubDumpWhenWait
csrWithAubDump.aubCSR.reset(mockAubCsr);
EXPECT_FALSE(mockAubCsr->pollForCompletionCalled);
csrWithAubDump.waitForTaskCountWithKmdNotifyFallback(1, 0, false, false);
csrWithAubDump.waitForTaskCountWithKmdNotifyFallback(1, 0, false, false, 1, 0);
EXPECT_TRUE(mockAubCsr->pollForCompletionCalled);
csrWithAubDump.aubCSR.reset(nullptr);
csrWithAubDump.waitForTaskCountWithKmdNotifyFallback(1, 0, false, false);
csrWithAubDump.waitForTaskCountWithKmdNotifyFallback(1, 0, false, false, 1, 0);
}
HWTEST_F(CommandStreamReceiverWithAubDumpSimpleTest, givenCsrWithAubDumpWhenCreatingAubCsrThenInitializeTagAllocation) {

View File

@@ -361,7 +361,7 @@ HWTEST_F(TbxCommandSteamSimpleTest, givenTbxCsrWhenCallingWaitForTaskCountWithKm
tbxCsr.allocationsForDownload = {&allocation1, &allocation2, &allocation3};
tbxCsr.waitForTaskCountWithKmdNotifyFallback(0u, 0u, false, false);
tbxCsr.waitForTaskCountWithKmdNotifyFallback(0u, 0u, false, false, 1, 0);
std::set<GraphicsAllocation *> expectedDownloadedAllocations = {tbxCsr.getTagAllocation(), &allocation1, &allocation2, &allocation3};
EXPECT_EQ(expectedDownloadedAllocations, tbxCsr.downloadedAllocations);

View File

@@ -1479,12 +1479,14 @@ TEST_F(EventTest, GivenCompletedEventWhenAddingChildThenNumEventsBlockingThisIsZ
}
}
template <typename GfxFamily>
struct TestEventCsr : public UltCommandStreamReceiver<GfxFamily> {
TestEventCsr(const ExecutionEnvironment &executionEnvironment, const DeviceBitfield deviceBitfield)
: UltCommandStreamReceiver<GfxFamily>(const_cast<ExecutionEnvironment &>(executionEnvironment), 0, deviceBitfield) {}
MOCK_METHOD3(waitForCompletionWithTimeout, bool(bool enableTimeout, int64_t timeoutMs, uint32_t taskCountToWait));
};
HWTEST_F(EventTest, givenQuickKmdSleepRequestWhenWaitIsCalledThenPassRequestToWaitingFunction) {
struct MyCsr : public UltCommandStreamReceiver<FamilyType> {
MyCsr(const ExecutionEnvironment &executionEnvironment, const DeviceBitfield deviceBitfield)
: UltCommandStreamReceiver<FamilyType>(const_cast<ExecutionEnvironment &>(executionEnvironment), 0, deviceBitfield) {}
MOCK_METHOD3(waitForCompletionWithTimeout, bool(bool enableTimeout, int64_t timeoutMs, uint32_t taskCountToWait));
};
HardwareInfo localHwInfo = pDevice->getHardwareInfo();
localHwInfo.capabilityTable.kmdNotifyProperties.enableKmdNotify = true;
localHwInfo.capabilityTable.kmdNotifyProperties.enableQuickKmdSleep = true;
@@ -1493,7 +1495,7 @@ HWTEST_F(EventTest, givenQuickKmdSleepRequestWhenWaitIsCalledThenPassRequestToWa
pDevice->executionEnvironment->rootDeviceEnvironments[pDevice->getRootDeviceIndex()]->setHwInfo(&localHwInfo);
auto csr = new ::testing::NiceMock<MyCsr>(*pDevice->executionEnvironment, pDevice->getDeviceBitfield());
auto csr = new ::testing::NiceMock<TestEventCsr<FamilyType>>(*pDevice->executionEnvironment, pDevice->getDeviceBitfield());
pDevice->resetCommandStreamReceiver(csr);
Event event(pCmdQ, CL_COMMAND_NDRANGE_KERNEL, 0, 0);
@@ -1508,11 +1510,6 @@ HWTEST_F(EventTest, givenQuickKmdSleepRequestWhenWaitIsCalledThenPassRequestToWa
}
HWTEST_F(EventTest, givenNonQuickKmdSleepRequestWhenWaitIsCalledThenPassRequestToWaitingFunction) {
struct MyCsr : public UltCommandStreamReceiver<FamilyType> {
MyCsr(const ExecutionEnvironment &executionEnvironment, const DeviceBitfield deviceBitfield)
: UltCommandStreamReceiver<FamilyType>(const_cast<ExecutionEnvironment &>(executionEnvironment), 0, deviceBitfield) {}
MOCK_METHOD3(waitForCompletionWithTimeout, bool(bool enableTimeout, int64_t timeoutMs, uint32_t taskCountToWait));
};
HardwareInfo localHwInfo = pDevice->getHardwareInfo();
localHwInfo.capabilityTable.kmdNotifyProperties.enableKmdNotify = true;
localHwInfo.capabilityTable.kmdNotifyProperties.enableQuickKmdSleep = true;
@@ -1522,7 +1519,7 @@ HWTEST_F(EventTest, givenNonQuickKmdSleepRequestWhenWaitIsCalledThenPassRequestT
pDevice->executionEnvironment->rootDeviceEnvironments[pDevice->getRootDeviceIndex()]->setHwInfo(&localHwInfo);
auto csr = new ::testing::NiceMock<MyCsr>(*pDevice->executionEnvironment, pDevice->getDeviceBitfield());
auto csr = new ::testing::NiceMock<TestEventCsr<FamilyType>>(*pDevice->executionEnvironment, pDevice->getDeviceBitfield());
pDevice->resetCommandStreamReceiver(csr);
Event event(pCmdQ, CL_COMMAND_NDRANGE_KERNEL, 0, 0);

View File

@@ -25,7 +25,7 @@ struct KmdNotifyTests : public ::testing::Test {
hwInfo = device->getRootDeviceEnvironment().getMutableHardwareInfo();
cmdQ.reset(new MockCommandQueue(&context, device.get(), nullptr, false));
*device->getDefaultEngine().commandStreamReceiver->getTagAddress() = taskCountToWait;
cmdQ->getGpgpuCommandStreamReceiver().waitForFlushStamp(flushStampToWait);
cmdQ->getGpgpuCommandStreamReceiver().waitForFlushStamp(flushStampToWait, 1, 0);
overrideKmdNotifyParams(true, 2, true, 1, false, 0);
}
@@ -70,7 +70,7 @@ struct KmdNotifyTests : public ::testing::Test {
public:
MockKmdNotifyCsr(const ExecutionEnvironment &executionEnvironment, const DeviceBitfield deviceBitfield)
: UltCommandStreamReceiver<Family>(const_cast<ExecutionEnvironment &>(executionEnvironment), 0, deviceBitfield) {}
MOCK_METHOD1(waitForFlushStamp, bool(FlushStamp &flushStampToWait));
MOCK_METHOD3(waitForFlushStamp, bool(FlushStamp &flushStampToWait, uint32_t partitionCount, uint32_t offsetSize));
MOCK_METHOD3(waitForCompletionWithTimeout, bool(bool enableTimeout, int64_t timeoutMs, uint32_t taskCountToWait));
};
@@ -107,7 +107,7 @@ HWTEST_F(KmdNotifyTests, givenTaskCountAndKmdNotifyDisabledWhenWaitUntilCompleti
auto csr = createMockCsr<FamilyType>();
EXPECT_CALL(*csr, waitForCompletionWithTimeout(false, 0, taskCountToWait)).Times(1).WillOnce(::testing::Return(true));
EXPECT_CALL(*csr, waitForFlushStamp(::testing::_)).Times(0);
EXPECT_CALL(*csr, waitForFlushStamp(::testing::_, ::testing::_, ::testing::_)).Times(0);
cmdQ->waitUntilComplete(taskCountToWait, 0, flushStampToWait, false);
}
@@ -118,7 +118,7 @@ HWTEST_F(KmdNotifyTests, givenNotReadyTaskCountWhenWaitUntilCompletionCalledThen
::testing::InSequence is;
EXPECT_CALL(*csr, waitForCompletionWithTimeout(true, 2, taskCountToWait)).Times(1).WillOnce(::testing::Return(false));
EXPECT_CALL(*csr, waitForFlushStamp(flushStampToWait)).Times(1).WillOnce(::testing::Return(true));
EXPECT_CALL(*csr, waitForFlushStamp(flushStampToWait, 1, 0)).Times(1).WillOnce(::testing::Return(true));
EXPECT_CALL(*csr, waitForCompletionWithTimeout(false, 0, taskCountToWait)).Times(1).WillOnce(::testing::Return(false));
//we have unrecoverable for this case, this will throw.
@@ -130,7 +130,7 @@ HWTEST_F(KmdNotifyTests, givenReadyTaskCountWhenWaitUntilCompletionCalledThenTry
::testing::InSequence is;
EXPECT_CALL(*csr, waitForCompletionWithTimeout(true, 2, taskCountToWait)).Times(1).WillOnce(::testing::Return(true));
EXPECT_CALL(*csr, waitForFlushStamp(::testing::_)).Times(0);
EXPECT_CALL(*csr, waitForFlushStamp(::testing::_, ::testing::_, ::testing::_)).Times(0);
cmdQ->waitUntilComplete(taskCountToWait, 0, flushStampToWait, false);
}
@@ -174,9 +174,9 @@ HWTEST_F(KmdNotifyTests, givenZeroFlushStampWhenWaitIsCalledThenDisableTimeout)
EXPECT_TRUE(device->getHardwareInfo().capabilityTable.kmdNotifyProperties.enableKmdNotify);
EXPECT_CALL(*csr, waitForCompletionWithTimeout(false, ::testing::_, taskCountToWait)).Times(1).WillOnce(::testing::Return(true));
EXPECT_CALL(*csr, waitForFlushStamp(::testing::_)).Times(0);
EXPECT_CALL(*csr, waitForFlushStamp(::testing::_, ::testing::_, ::testing::_)).Times(0);
csr->waitForTaskCountWithKmdNotifyFallback(taskCountToWait, 0, false, false);
csr->waitForTaskCountWithKmdNotifyFallback(taskCountToWait, 0, false, false, 1, 0);
}
HWTEST_F(KmdNotifyTests, givenNonQuickSleepRequestWhenItsSporadicWaitThenOverrideQuickSleepRequest) {
@@ -189,7 +189,7 @@ HWTEST_F(KmdNotifyTests, givenNonQuickSleepRequestWhenItsSporadicWaitThenOverrid
int64_t timeSinceLastWait = mockKmdNotifyHelper->properties->delayQuickKmdSleepForSporadicWaitsMicroseconds + 1;
mockKmdNotifyHelper->lastWaitForCompletionTimestampUs = mockKmdNotifyHelper->getMicrosecondsSinceEpoch() - timeSinceLastWait;
csr->waitForTaskCountWithKmdNotifyFallback(taskCountToWait, 1, false, false);
csr->waitForTaskCountWithKmdNotifyFallback(taskCountToWait, 1, false, false, 1, 0);
}
HWTEST_F(KmdNotifyTests, givenNonQuickSleepRequestWhenItsNotSporadicWaitThenOverrideQuickSleepRequest) {
@@ -199,14 +199,14 @@ HWTEST_F(KmdNotifyTests, givenNonQuickSleepRequestWhenItsNotSporadicWaitThenOver
auto expectedDelay = device->getHardwareInfo().capabilityTable.kmdNotifyProperties.delayKmdNotifyMicroseconds;
EXPECT_CALL(*csr, waitForCompletionWithTimeout(::testing::_, expectedDelay, ::testing::_)).Times(1).WillOnce(::testing::Return(true));
csr->waitForTaskCountWithKmdNotifyFallback(taskCountToWait, 1, false, false);
csr->waitForTaskCountWithKmdNotifyFallback(taskCountToWait, 1, false, false, 1, 0);
}
HWTEST_F(KmdNotifyTests, givenKmdNotifyDisabledWhenPowerSavingModeIsRequestedThenTimeoutIsEnabled) {
overrideKmdNotifyParams(false, 3, false, 2, false, 9999999);
auto csr = createMockCsr<FamilyType>();
EXPECT_CALL(*csr, waitForCompletionWithTimeout(true, 1, ::testing::_)).Times(1).WillOnce(::testing::Return(true));
csr->waitForTaskCountWithKmdNotifyFallback(taskCountToWait, 1, false, true);
csr->waitForTaskCountWithKmdNotifyFallback(taskCountToWait, 1, false, true, 1, 0);
}
HWTEST_F(KmdNotifyTests, givenKmdNotifyDisabledWhenQueueHasPowerSavingModeAndCallWaitThenTimeoutIsEnabled) {
@@ -233,7 +233,7 @@ HWTEST_F(KmdNotifyTests, givenQuickSleepRequestWhenItsSporadicWaitOptimizationIs
auto expectedDelay = device->getHardwareInfo().capabilityTable.kmdNotifyProperties.delayQuickKmdSleepMicroseconds;
EXPECT_CALL(*csr, waitForCompletionWithTimeout(::testing::_, expectedDelay, ::testing::_)).Times(1).WillOnce(::testing::Return(true));
csr->waitForTaskCountWithKmdNotifyFallback(taskCountToWait, 1, true, false);
csr->waitForTaskCountWithKmdNotifyFallback(taskCountToWait, 1, true, false, 1, 0);
}
HWTEST_F(KmdNotifyTests, givenTaskCountEqualToHwTagWhenWaitCalledThenDontMultiplyTimeout) {
@@ -244,7 +244,7 @@ HWTEST_F(KmdNotifyTests, givenTaskCountEqualToHwTagWhenWaitCalledThenDontMultipl
EXPECT_CALL(*csr, waitForCompletionWithTimeout(true, expectedTimeout, ::testing::_)).Times(1).WillOnce(::testing::Return(true));
csr->waitForTaskCountWithKmdNotifyFallback(taskCountToWait, 1, false, false);
csr->waitForTaskCountWithKmdNotifyFallback(taskCountToWait, 1, false, false, 1, 0);
}
HWTEST_F(KmdNotifyTests, givenTaskCountLowerThanHwTagWhenWaitCalledThenDontMultiplyTimeout) {
@@ -255,7 +255,7 @@ HWTEST_F(KmdNotifyTests, givenTaskCountLowerThanHwTagWhenWaitCalledThenDontMulti
EXPECT_CALL(*csr, waitForCompletionWithTimeout(true, expectedTimeout, ::testing::_)).Times(1).WillOnce(::testing::Return(true));
csr->waitForTaskCountWithKmdNotifyFallback(taskCountToWait, 1, false, false);
csr->waitForTaskCountWithKmdNotifyFallback(taskCountToWait, 1, false, false, 1, 0);
}
HWTEST_F(KmdNotifyTests, givenDefaultCommandStreamReceiverWhenWaitCalledThenUpdateWaitTimestamp) {
@@ -265,7 +265,7 @@ HWTEST_F(KmdNotifyTests, givenDefaultCommandStreamReceiverWhenWaitCalledThenUpda
EXPECT_NE(0, mockKmdNotifyHelper->lastWaitForCompletionTimestampUs.load());
EXPECT_EQ(1u, mockKmdNotifyHelper->updateLastWaitForCompletionTimestampCalled);
csr->waitForTaskCountWithKmdNotifyFallback(0, 0, false, false);
csr->waitForTaskCountWithKmdNotifyFallback(0, 0, false, false, 1, 0);
EXPECT_EQ(2u, mockKmdNotifyHelper->updateLastWaitForCompletionTimestampCalled);
}
@@ -275,7 +275,7 @@ HWTEST_F(KmdNotifyTests, givenDefaultCommandStreamReceiverWithDisabledSporadicWa
auto csr = createMockCsr<FamilyType>();
EXPECT_EQ(0, mockKmdNotifyHelper->lastWaitForCompletionTimestampUs.load());
csr->waitForTaskCountWithKmdNotifyFallback(0, 0, false, false);
csr->waitForTaskCountWithKmdNotifyFallback(0, 0, false, false, 1, 0);
EXPECT_EQ(0u, mockKmdNotifyHelper->updateLastWaitForCompletionTimestampCalled);
}

View File

@@ -504,7 +504,7 @@ class CommandStreamReceiverMock : public CommandStreamReceiver {
return true;
}
void waitForTaskCountWithKmdNotifyFallback(uint32_t taskCountToWait, FlushStamp flushStampToWait, bool quickKmdSleep, bool forcePowerSavingMode) override {
void waitForTaskCountWithKmdNotifyFallback(uint32_t taskCountToWait, FlushStamp flushStampToWait, bool quickKmdSleep, bool forcePowerSavingMode, uint32_t partitionCount, uint32_t offsetSize) override {
}
uint32_t blitBuffer(const BlitPropertiesContainer &blitPropertiesContainer, bool blocking, bool profilingEnabled, Device &device) override { return taskCount; };

View File

@@ -101,6 +101,7 @@ class UltCommandStreamReceiver : public CommandStreamReceiverHw<GfxFamily>, publ
using BaseClass::CommandStreamReceiver::scratchSpaceController;
using BaseClass::CommandStreamReceiver::stallingPipeControlOnNextFlushRequired;
using BaseClass::CommandStreamReceiver::submissionAggregator;
using BaseClass::CommandStreamReceiver::tagAddress;
using BaseClass::CommandStreamReceiver::taskCount;
using BaseClass::CommandStreamReceiver::taskLevel;
using BaseClass::CommandStreamReceiver::timestampPacketAllocator;
@@ -159,7 +160,20 @@ class UltCommandStreamReceiver : public CommandStreamReceiverHw<GfxFamily>, publ
bool waitForCompletionWithTimeout(bool enableTimeout, int64_t timeoutMicroseconds, uint32_t taskCountToWait) override {
latestWaitForCompletionWithTimeoutTaskCount.store(taskCountToWait);
return BaseClass::waitForCompletionWithTimeout(enableTimeout, timeoutMicroseconds, taskCountToWait);
waitForCompletionWithTimeoutTaskCountCalled++;
if (callBaseWaitForCompletionWithTimeout) {
return BaseClass::waitForCompletionWithTimeout(enableTimeout, timeoutMicroseconds, taskCountToWait);
}
return returnWaitForCompletionWithTimeout;
}
bool waitForCompletionWithTimeout(volatile uint32_t *pollAddress, bool enableTimeout, int64_t timeoutMicroseconds, uint32_t taskCountToWait, uint32_t partitionCount, uint32_t offsetSize) override {
latestWaitForCompletionWithTimeoutTaskCountExplicit.store(taskCountToWait);
waitForCompletionWithTimeoutTaskCountExplicitCalled++;
if (callBaseWaitForCompletionWithTimeout) {
return BaseClass::waitForCompletionWithTimeout(pollAddress, enableTimeout, timeoutMicroseconds, taskCountToWait, partitionCount, offsetSize);
}
return returnWaitForCompletionWithTimeout;
}
void overrideCsrSizeReqFlags(CsrSizeRequestFlags &flags) { this->csrSizeRequestFlags = flags; }
@@ -278,6 +292,10 @@ class UltCommandStreamReceiver : public CommandStreamReceiverHw<GfxFamily>, publ
std::atomic<uint32_t> recursiveLockCounter;
std::atomic<uint32_t> latestWaitForCompletionWithTimeoutTaskCount{0};
std::atomic<uint32_t> latestWaitForCompletionWithTimeoutTaskCountExplicit{0};
std::atomic<uint32_t> waitForCompletionWithTimeoutTaskCountCalled{0};
std::atomic<uint32_t> waitForCompletionWithTimeoutTaskCountExplicitCalled{0};
LinearStream *lastFlushedCommandStream = nullptr;
@@ -301,5 +319,7 @@ class UltCommandStreamReceiver : public CommandStreamReceiverHw<GfxFamily>, publ
bool directSubmissionAvailable = false;
bool blitterDirectSubmissionAvailable = false;
bool callBaseIsMultiOsContextCapable = false;
bool callBaseWaitForCompletionWithTimeout = true;
bool returnWaitForCompletionWithTimeout = true;
};
} // namespace NEO

View File

@@ -69,11 +69,14 @@ struct BcsBufferTests : public ::testing::Test {
using UltCommandStreamReceiver<FamilyType>::UltCommandStreamReceiver;
void waitForTaskCountWithKmdNotifyFallback(uint32_t taskCountToWait, FlushStamp flushStampToWait,
bool useQuickKmdSleep, bool forcePowerSavingMode) override {
bool useQuickKmdSleep, bool forcePowerSavingMode,
uint32_t partitionCount, uint32_t offsetSize) override {
EXPECT_EQ(this->latestFlushedTaskCount, taskCountToWait);
EXPECT_EQ(0u, flushStampToWait);
EXPECT_FALSE(useQuickKmdSleep);
EXPECT_FALSE(forcePowerSavingMode);
EXPECT_EQ(1u, partitionCount);
EXPECT_EQ(0u, offsetSize);
waitForTaskCountWithKmdNotifyFallbackCalled++;
}

View File

@@ -85,18 +85,22 @@ class TestedDrmCommandStreamReceiver : public DrmCommandStreamReceiver<GfxFamily
struct WaitUserFenceResult {
uint32_t called = 0u;
uint32_t waitValue = 0u;
uint32_t partitionCount = 0;
uint32_t offsetSize = 0;
int returnValue = 0;
bool callParent = true;
};
WaitUserFenceResult waitUserFenceResult;
int waitUserFence(uint32_t waitValue) override {
int waitUserFence(uint32_t waitValue, uint32_t partitionCount, uint32_t offsetSize) override {
waitUserFenceResult.called++;
waitUserFenceResult.waitValue = waitValue;
waitUserFenceResult.partitionCount = partitionCount;
waitUserFenceResult.offsetSize = offsetSize;
if (waitUserFenceResult.callParent) {
return DrmCommandStreamReceiver<GfxFamily>::waitUserFence(waitValue);
return DrmCommandStreamReceiver<GfxFamily>::waitUserFence(waitValue, partitionCount, offsetSize);
} else {
return waitUserFenceResult.returnValue;
}

View File

@@ -61,7 +61,7 @@ HWTEST_TEMPLATED_F(DrmCommandStreamTest, givenFlushStampWhenWaitCalledThenWaitFo
.Times(1)
.WillRepeatedly(copyIoctlParam(&calledWait));
csr->waitForFlushStamp(handleToWait);
csr->waitForFlushStamp(handleToWait, 1, 0);
EXPECT_TRUE(memcmp(&expectedWait, &calledWait, sizeof(drm_i915_gem_wait)) == 0);
}

View File

@@ -902,7 +902,7 @@ HWTEST_TEMPLATED_F(DrmCommandStreamEnhancedTest, givenWaitUserFenceFlagNotSetWhe
mock->ioctl_cnt.gemWait = 0;
FlushStamp handleToWait = 123;
testedCsr->waitForFlushStamp(handleToWait);
testedCsr->waitForFlushStamp(handleToWait, 1, 0);
EXPECT_EQ(1, mock->ioctl_cnt.gemWait);
EXPECT_EQ(-1, mock->gemWaitTimeout);
@@ -925,7 +925,7 @@ HWTEST_TEMPLATED_F(DrmCommandStreamEnhancedTest, givenGemWaitUsedWhenKmdTimeoutU
mock->ioctl_cnt.gemWait = 0;
FlushStamp handleToWait = 123;
testedCsr->waitForFlushStamp(handleToWait);
testedCsr->waitForFlushStamp(handleToWait, 1, 0);
EXPECT_EQ(1, mock->ioctl_cnt.gemWait);
EXPECT_EQ(1000, mock->gemWaitTimeout);
@@ -959,7 +959,7 @@ HWTEST_TEMPLATED_F(DrmCommandStreamEnhancedTest,
}
FlushStamp handleToWait = 123;
testedCsr->waitForFlushStamp(handleToWait);
testedCsr->waitForFlushStamp(handleToWait, 1, 0);
EXPECT_EQ(0, mock->ioctl_cnt.gemWait);
EXPECT_EQ(1u, testedCsr->waitUserFenceResult.called);
@@ -993,7 +993,7 @@ HWTEST_TEMPLATED_F(DrmCommandStreamEnhancedTest,
mock->isVmBindAvailableCall.called = 0u;
FlushStamp handleToWait = 123;
testedCsr->waitForFlushStamp(handleToWait);
testedCsr->waitForFlushStamp(handleToWait, 1, 0);
EXPECT_EQ(1, mock->ioctl_cnt.gemWait);
EXPECT_EQ(0u, testedCsr->waitUserFenceResult.called);
@@ -1022,7 +1022,7 @@ HWTEST_TEMPLATED_F(DrmCommandStreamEnhancedTest,
mock->isVmBindAvailableCall.called = 0u;
FlushStamp handleToWait = 123;
EXPECT_ANY_THROW(testedCsr->waitForFlushStamp(handleToWait));
EXPECT_ANY_THROW(testedCsr->waitForFlushStamp(handleToWait, 1, 0));
EXPECT_EQ(0, mock->ioctl_cnt.gemWait);
EXPECT_EQ(0u, testedCsr->waitUserFenceResult.called);
@@ -1053,7 +1053,7 @@ HWTEST_TEMPLATED_F(DrmCommandStreamEnhancedTest,
mock->isVmBindAvailableCall.called = 0u;
FlushStamp handleToWait = 123;
testedCsr->waitForFlushStamp(handleToWait);
testedCsr->waitForFlushStamp(handleToWait, 1, 0);
EXPECT_EQ(0, mock->ioctl_cnt.gemWait);
EXPECT_EQ(1u, testedCsr->waitUserFenceResult.called);

View File

@@ -219,7 +219,7 @@ TEST_F(DeviceCommandStreamTest, WhenCreatingWddmCsrWithAubDumpThenAubCsrIsCreate
TEST_F(WddmCommandStreamTest, givenFlushStampWhenWaitCalledThenWaitForSpecifiedMonitoredFence) {
uint64_t stampToWait = 123;
wddm->waitFromCpuResult.called = 0u;
csr->waitForFlushStamp(stampToWait);
csr->waitForFlushStamp(stampToWait, 1, 0);
EXPECT_EQ(1u, wddm->waitFromCpuResult.called);
EXPECT_TRUE(wddm->waitFromCpuResult.success);
EXPECT_EQ(stampToWait, wddm->waitFromCpuResult.uint64ParamPassed);