Allocate new ring buffer if all are in use

Signed-off-by: Lukasz Jobczyk <lukasz.jobczyk@intel.com>
This commit is contained in:
Lukasz Jobczyk
2022-06-01 10:05:07 +00:00
committed by Compute-Runtime-Automation
parent 9bc0250e52
commit 24ff26c396
14 changed files with 296 additions and 118 deletions

View File

@@ -298,11 +298,12 @@ DECLARE_DEBUG_VARIABLE(int32_t, EnableDirectSubmissionController, -1, "Enable di
DECLARE_DEBUG_VARIABLE(int32_t, DirectSubmissionControllerTimeout, -1, "Set direct submission controller timeout, -1: default 5000 us, >=0: timeout in us")
DECLARE_DEBUG_VARIABLE(int32_t, DirectSubmissionControllerDivisor, -1, "Set direct submission controller timeout divider, -1: default 2, >0: divider value")
DECLARE_DEBUG_VARIABLE(int32_t, DirectSubmissionForceLocalMemoryStorageMode, -1, "Force local memory storage for command/ring/semaphore buffer, -1: default - for all engines, 0: disabled, 1: for multiOsContextCapable engine, 2: for all engines")
DECLARE_DEBUG_VARIABLE(int32_t, EnableRingSwitchTagUpdateWa, -1, "-1: default, 0 - disable, 1 - enable. If enabled, completionRingBuffers wont be updated if ring is not running.")
DECLARE_DEBUG_VARIABLE(int32_t, EnableRingSwitchTagUpdateWa, -1, "-1: default, 0 - disable, 1 - enable. If enabled, completionFences wont be updated if ring is not running.")
DECLARE_DEBUG_VARIABLE(int32_t, DirectSubmissionReadBackCommandBuffer, -1, "-1: default - disabled, 0 - disable, 1 - enable. If enabled, read first dword of cmd buffer after handling residency.")
DECLARE_DEBUG_VARIABLE(int32_t, DirectSubmissionReadBackRingBuffer, -1, "-1: default - disabled, 0 - disable, 1 - enable. If enabled, read first dword of ring buffer after handling residency.")
DECLARE_DEBUG_VARIABLE(int32_t, DirectSubmissionInsertExtraMiMemFenceCommands, -1, "-1: default, 0 - disable, 1 - enable. If enabled, add extra MI_MEM_FENCE instructions with acquire bit set")
DECLARE_DEBUG_VARIABLE(int32_t, DirectSubmissionInsertSfenceInstructionPriorToSubmission, -1, "-1: default, 0 - disable, 1 - Insert _mm_sfence before unlocking semaphore only, 2 - insert before and after semaphore")
DECLARE_DEBUG_VARIABLE(int32_t, DirectSubmissionInsertSfenceInstructionPriorToSubmission, -1, "-1: default, 0 - disable, 1 - Instert _mm_sfence before unlocking semaphore only, 2 - insert before and after semaphore")
DECLARE_DEBUG_VARIABLE(int32_t, DirectSubmissionMaxRingBuffers, -1, "-1: default, >0: max ring buffer count, During switch ring buffer, if there is no available ring, wait for completion instead of allocating new one if DirectSubmissionMaxRingBuffers is reached")
/* IMPLICIT SCALING */
DECLARE_DEBUG_VARIABLE(int32_t, EnableWalkerPartition, -1, "-1: default, 0: disable, 1: enable, Enables Walker Partitioning via WPARID.")

View File

@@ -145,14 +145,23 @@ class DirectSubmissionHw {
size_t getDiagnosticModeSection();
void setPostSyncOffset();
enum RingBufferUse : uint32_t {
FirstBuffer,
SecondBuffer,
MaxBuffers
virtual bool isCompleted(uint32_t ringBufferIndex) = 0;
struct RingBufferUse {
RingBufferUse() = default;
RingBufferUse(FlushStamp completionFence, GraphicsAllocation *ringBuffer) : completionFence(completionFence), ringBuffer(ringBuffer){};
constexpr static uint32_t initialRingBufferCount = 2u;
FlushStamp completionFence = 0ull;
GraphicsAllocation *ringBuffer = nullptr;
};
std::vector<RingBufferUse> ringBuffers;
uint32_t currentRingBuffer = 0u;
uint32_t previousRingBuffer = 0u;
uint32_t maxRingBufferCount = std::numeric_limits<uint32_t>::max();
LinearStream ringCommandStream;
FlushStamp completionRingBuffers[RingBufferUse::MaxBuffers] = {0ull, 0ull};
std::unique_ptr<DirectSubmissionDiagnosticsCollector> diagnostic;
uint64_t semaphoreGpuVa = 0u;
@@ -165,8 +174,6 @@ class DirectSubmissionHw {
const HardwareInfo *hwInfo = nullptr;
const GraphicsAllocation *globalFenceAllocation = nullptr;
GraphicsAllocation *completionFenceAllocation = nullptr;
GraphicsAllocation *ringBuffer = nullptr;
GraphicsAllocation *ringBuffer2 = nullptr;
GraphicsAllocation *semaphores = nullptr;
GraphicsAllocation *workPartitionAllocation = nullptr;
void *semaphorePtr = nullptr;
@@ -174,7 +181,6 @@ class DirectSubmissionHw {
volatile void *workloadModeOneStoreAddress = nullptr;
uint32_t currentQueueWorkCount = 1u;
RingBufferUse currentRingBuffer = RingBufferUse::FirstBuffer;
uint32_t workloadMode = 0;
uint32_t workloadModeOneExpectedValue = 0u;
uint32_t activeTiles = 1u;

View File

@@ -31,7 +31,7 @@ namespace NEO {
template <typename GfxFamily, typename Dispatcher>
DirectSubmissionHw<GfxFamily, Dispatcher>::DirectSubmissionHw(const DirectSubmissionInputParams &inputParams)
: osContext(inputParams.osContext), rootDeviceIndex(inputParams.rootDeviceIndex) {
: ringBuffers(RingBufferUse::initialRingBufferCount), osContext(inputParams.osContext), rootDeviceIndex(inputParams.rootDeviceIndex) {
memoryManager = inputParams.memoryManager;
globalFenceAllocation = inputParams.globalFenceAllocation;
hwInfo = inputParams.rootDeviceEnvironment.getHardwareInfo();
@@ -42,6 +42,10 @@ DirectSubmissionHw<GfxFamily, Dispatcher>::DirectSubmissionHw(const DirectSubmis
disableCacheFlush = UllsDefaults::defaultDisableCacheFlush;
disableMonitorFence = UllsDefaults::defaultDisableMonitorFence;
if (DebugManager.flags.DirectSubmissionMaxRingBuffers.get() != -1) {
this->maxRingBufferCount = DebugManager.flags.DirectSubmissionMaxRingBuffers.get();
}
if (DebugManager.flags.DirectSubmissionDisableCacheFlush.get() != -1) {
disableCacheFlush = !!DebugManager.flags.DirectSubmissionDisableCacheFlush.get();
}
@@ -80,13 +84,14 @@ bool DirectSubmissionHw<GfxFamily, Dispatcher>::allocateResources() {
true, allocationSize,
AllocationType::RING_BUFFER,
isMultiOsContextCapable, false, osContext.getDeviceBitfield()};
ringBuffer = memoryManager->allocateGraphicsMemoryWithProperties(commandStreamAllocationProperties);
UNRECOVERABLE_IF(ringBuffer == nullptr);
allocations.push_back(ringBuffer);
ringBuffer2 = memoryManager->allocateGraphicsMemoryWithProperties(commandStreamAllocationProperties);
UNRECOVERABLE_IF(ringBuffer2 == nullptr);
allocations.push_back(ringBuffer2);
for (uint32_t ringBufferIndex = 0; ringBufferIndex < RingBufferUse::initialRingBufferCount; ringBufferIndex++) {
auto ringBuffer = memoryManager->allocateGraphicsMemoryWithProperties(commandStreamAllocationProperties);
this->ringBuffers[ringBufferIndex].ringBuffer = ringBuffer;
UNRECOVERABLE_IF(ringBuffer == nullptr);
allocations.push_back(ringBuffer);
memset(ringBuffer->getUnderlyingBuffer(), 0, allocationSize);
}
const AllocationProperties semaphoreAllocationProperties{rootDeviceIndex,
true, MemoryConstants::pageSize,
@@ -105,27 +110,23 @@ bool DirectSubmissionHw<GfxFamily, Dispatcher>::allocateResources() {
}
if (DebugManager.flags.DirectSubmissionPrintBuffers.get()) {
printf("Ring buffer 1 - gpu address: %" PRIx64 " - %" PRIx64 ", cpu address: %p - %p, size: %zu \n",
ringBuffer->getGpuAddress(),
ptrOffset(ringBuffer->getGpuAddress(), ringBuffer->getUnderlyingBufferSize()),
ringBuffer->getUnderlyingBuffer(),
ptrOffset(ringBuffer->getUnderlyingBuffer(), ringBuffer->getUnderlyingBufferSize()),
ringBuffer->getUnderlyingBufferSize());
for (uint32_t ringBufferIndex = 0; ringBufferIndex < RingBufferUse::initialRingBufferCount; ringBufferIndex++) {
const auto ringBuffer = this->ringBuffers[ringBufferIndex].ringBuffer;
printf("Ring buffer 2 - gpu address: %" PRIx64 " - %" PRIx64 ", cpu address: %p - %p, size: %zu \n",
ringBuffer2->getGpuAddress(),
ptrOffset(ringBuffer2->getGpuAddress(), ringBuffer2->getUnderlyingBufferSize()),
ringBuffer2->getUnderlyingBuffer(),
ptrOffset(ringBuffer2->getUnderlyingBuffer(), ringBuffer2->getUnderlyingBufferSize()),
ringBuffer2->getUnderlyingBufferSize());
printf("Ring buffer %u - gpu address: %" PRIx64 " - %" PRIx64 ", cpu address: %p - %p, size: %zu \n",
ringBufferIndex,
ringBuffer->getGpuAddress(),
ptrOffset(ringBuffer->getGpuAddress(), ringBuffer->getUnderlyingBufferSize()),
ringBuffer->getUnderlyingBuffer(),
ptrOffset(ringBuffer->getUnderlyingBuffer(), ringBuffer->getUnderlyingBufferSize()),
ringBuffer->getUnderlyingBufferSize());
}
}
handleResidency();
ringCommandStream.replaceBuffer(ringBuffer->getUnderlyingBuffer(), minimumRequiredSize);
ringCommandStream.replaceGraphicsAllocation(ringBuffer);
ringCommandStream.replaceBuffer(this->ringBuffers[0u].ringBuffer->getUnderlyingBuffer(), minimumRequiredSize);
ringCommandStream.replaceGraphicsAllocation(this->ringBuffers[0].ringBuffer);
memset(ringBuffer->getUnderlyingBuffer(), 0, allocationSize);
memset(ringBuffer2->getUnderlyingBuffer(), 0, allocationSize);
semaphorePtr = semaphores->getUnderlyingBuffer();
semaphoreGpuVa = semaphores->getGpuAddress();
semaphoreData = static_cast<volatile RingSemaphoreData *>(semaphorePtr);
@@ -525,27 +526,46 @@ inline uint64_t DirectSubmissionHw<GfxFamily, Dispatcher>::switchRingBuffers() {
template <typename GfxFamily, typename Dispatcher>
inline GraphicsAllocation *DirectSubmissionHw<GfxFamily, Dispatcher>::switchRingBuffersAllocations() {
this->previousRingBuffer = this->currentRingBuffer;
GraphicsAllocation *nextAllocation = nullptr;
if (currentRingBuffer == RingBufferUse::FirstBuffer) {
nextAllocation = ringBuffer2;
currentRingBuffer = RingBufferUse::SecondBuffer;
} else {
nextAllocation = ringBuffer;
currentRingBuffer = RingBufferUse::FirstBuffer;
for (uint32_t ringBufferIndex = 0; ringBufferIndex < this->ringBuffers.size(); ringBufferIndex++) {
if (ringBufferIndex != this->currentRingBuffer && this->isCompleted(ringBufferIndex)) {
this->currentRingBuffer = ringBufferIndex;
nextAllocation = this->ringBuffers[ringBufferIndex].ringBuffer;
break;
}
}
if (nextAllocation == nullptr) {
if (this->ringBuffers.size() == this->maxRingBufferCount) {
this->currentRingBuffer = (this->currentRingBuffer + 1) % this->ringBuffers.size();
nextAllocation = this->ringBuffers[this->currentRingBuffer].ringBuffer;
} else {
bool isMultiOsContextCapable = osContext.getNumSupportedDevices() > 1u;
constexpr size_t minimumRequiredSize = 256 * MemoryConstants::kiloByte;
constexpr size_t additionalAllocationSize = MemoryConstants::pageSize;
const auto allocationSize = alignUp(minimumRequiredSize + additionalAllocationSize, MemoryConstants::pageSize64k);
const AllocationProperties commandStreamAllocationProperties{rootDeviceIndex,
true, allocationSize,
AllocationType::RING_BUFFER,
isMultiOsContextCapable, false, osContext.getDeviceBitfield()};
nextAllocation = memoryManager->allocateGraphicsMemoryWithProperties(commandStreamAllocationProperties);
this->currentRingBuffer = static_cast<uint32_t>(this->ringBuffers.size());
this->ringBuffers.emplace_back(0ull, nextAllocation);
auto ret = memoryOperationHandler->makeResidentWithinOsContext(&this->osContext, ArrayRef<GraphicsAllocation *>(&nextAllocation, 1u), false) == MemoryOperationsStatus::SUCCESS;
UNRECOVERABLE_IF(!ret);
}
}
UNRECOVERABLE_IF(this->currentRingBuffer == this->previousRingBuffer);
return nextAllocation;
}
template <typename GfxFamily, typename Dispatcher>
void DirectSubmissionHw<GfxFamily, Dispatcher>::deallocateResources() {
if (ringBuffer) {
memoryManager->freeGraphicsMemory(ringBuffer);
ringBuffer = nullptr;
}
if (ringBuffer2) {
memoryManager->freeGraphicsMemory(ringBuffer2);
ringBuffer2 = nullptr;
for (uint32_t ringBufferIndex = 0; ringBufferIndex < this->ringBuffers.size(); ringBufferIndex++) {
memoryManager->freeGraphicsMemory(this->ringBuffers[ringBufferIndex].ringBuffer);
}
this->ringBuffers.clear();
if (semaphores) {
memoryManager->freeGraphicsMemory(semaphores);
semaphores = nullptr;

View File

@@ -39,6 +39,7 @@ class DrmDirectSubmission : public DirectSubmissionHw<GfxFamily, Dispatcher> {
void handleSwitchRingBuffers() override;
uint64_t updateTagValue() override;
void getTagAddressValue(TagData &tagData) override;
bool isCompleted(uint32_t ringBufferIndex) override;
MOCKABLE_VIRTUAL void wait(uint32_t taskCountToWait);

View File

@@ -175,22 +175,21 @@ void DrmDirectSubmission<GfxFamily, Dispatcher>::handleStopRingBuffer() {
template <typename GfxFamily, typename Dispatcher>
void DrmDirectSubmission<GfxFamily, Dispatcher>::handleSwitchRingBuffers() {
if (this->disableMonitorFence) {
auto previousRingBuffer = this->currentRingBuffer == DirectSubmissionHw<GfxFamily, Dispatcher>::RingBufferUse::FirstBuffer ? DirectSubmissionHw<GfxFamily, Dispatcher>::RingBufferUse::SecondBuffer : DirectSubmissionHw<GfxFamily, Dispatcher>::RingBufferUse::FirstBuffer;
this->currentTagData.tagValue++;
bool updateCompletionRingBuffers = this->ringStart;
bool updateCompletionFences = this->ringStart;
if (DebugManager.flags.EnableRingSwitchTagUpdateWa.get() == 0) {
updateCompletionRingBuffers = true;
updateCompletionFences = true;
}
if (updateCompletionRingBuffers) {
this->completionRingBuffers[previousRingBuffer] = this->currentTagData.tagValue;
if (updateCompletionFences) {
this->ringBuffers[this->previousRingBuffer].completionFence = this->currentTagData.tagValue;
}
}
if (this->ringStart) {
if (this->completionRingBuffers[this->currentRingBuffer] != 0) {
this->wait(static_cast<uint32_t>(this->completionRingBuffers[this->currentRingBuffer]));
if (this->ringBuffers[this->currentRingBuffer].completionFence != 0) {
this->wait(static_cast<uint32_t>(this->ringBuffers[this->currentRingBuffer].completionFence));
}
}
}
@@ -199,7 +198,7 @@ template <typename GfxFamily, typename Dispatcher>
uint64_t DrmDirectSubmission<GfxFamily, Dispatcher>::updateTagValue() {
if (!this->disableMonitorFence) {
this->currentTagData.tagValue++;
this->completionRingBuffers[this->currentRingBuffer] = this->currentTagData.tagValue;
this->ringBuffers[this->currentRingBuffer].completionFence = this->currentTagData.tagValue;
}
return 0ull;
}
@@ -210,6 +209,19 @@ void DrmDirectSubmission<GfxFamily, Dispatcher>::getTagAddressValue(TagData &tag
tagData.tagValue = this->currentTagData.tagValue + 1;
}
template <typename GfxFamily, typename Dispatcher>
inline bool DrmDirectSubmission<GfxFamily, Dispatcher>::isCompleted(uint32_t ringBufferIndex) {
auto taskCount = this->ringBuffers[ringBufferIndex].completionFence;
auto pollAddress = this->tagAddress;
for (uint32_t i = 0; i < this->activeTiles; i++) {
if (*pollAddress < taskCount) {
return false;
}
pollAddress = ptrOffset(pollAddress, this->postSyncOffset);
}
return true;
}
template <typename GfxFamily, typename Dispatcher>
void DrmDirectSubmission<GfxFamily, Dispatcher>::wait(uint32_t taskCountToWait) {
auto pollAddress = this->tagAddress;

View File

@@ -28,10 +28,11 @@ class WddmDirectSubmission : public DirectSubmissionHw<GfxFamily, Dispatcher> {
bool submit(uint64_t gpuAddress, size_t size) override;
bool handleResidency() override;
void handleCompletionRingBuffer(uint64_t completionValue, MonitoredFence &fence);
void handleCompletionFence(uint64_t completionValue, MonitoredFence &fence);
void handleSwitchRingBuffers() override;
uint64_t updateTagValue() override;
void getTagAddressValue(TagData &tagData) override;
bool isCompleted(uint32_t ringBufferIndex) override;
OsContextWin *osContextWin;
Wddm *wddm;

View File

@@ -42,7 +42,7 @@ WddmDirectSubmission<GfxFamily, Dispatcher>::~WddmDirectSubmission() {
perfLogResidencyVariadicLog(wddm->getResidencyLogger(), "Stopping Wddm ULLS\n");
if (this->ringStart) {
this->stopRingBuffer();
WddmDirectSubmission<GfxFamily, Dispatcher>::handleCompletionRingBuffer(ringFence.lastSubmittedFence, ringFence);
WddmDirectSubmission<GfxFamily, Dispatcher>::handleCompletionFence(ringFence.lastSubmittedFence, ringFence);
}
this->deallocateResources();
wddm->getWddmInterface()->destroyMonitorFence(ringFence);
@@ -89,9 +89,9 @@ bool WddmDirectSubmission<GfxFamily, Dispatcher>::handleResidency() {
template <typename GfxFamily, typename Dispatcher>
void WddmDirectSubmission<GfxFamily, Dispatcher>::handleSwitchRingBuffers() {
if (this->ringStart) {
if (this->completionRingBuffers[this->currentRingBuffer] != 0) {
if (this->ringBuffers[this->currentRingBuffer].completionFence != 0) {
MonitoredFence &currentFence = osContextWin->getResidencyController().getMonitoredFence();
handleCompletionRingBuffer(this->completionRingBuffers[this->currentRingBuffer], currentFence);
handleCompletionFence(this->ringBuffers[this->currentRingBuffer].completionFence, currentFence);
}
}
}
@@ -102,13 +102,13 @@ uint64_t WddmDirectSubmission<GfxFamily, Dispatcher>::updateTagValue() {
currentFence.lastSubmittedFence = currentFence.currentFenceValue;
currentFence.currentFenceValue++;
this->completionRingBuffers[this->currentRingBuffer] = currentFence.lastSubmittedFence;
this->ringBuffers[this->currentRingBuffer].completionFence = currentFence.lastSubmittedFence;
return currentFence.lastSubmittedFence;
}
template <typename GfxFamily, typename Dispatcher>
void WddmDirectSubmission<GfxFamily, Dispatcher>::handleCompletionRingBuffer(uint64_t completionValue, MonitoredFence &fence) {
void WddmDirectSubmission<GfxFamily, Dispatcher>::handleCompletionFence(uint64_t completionValue, MonitoredFence &fence) {
wddm->waitFromCpu(completionValue, fence);
}
@@ -121,4 +121,14 @@ void WddmDirectSubmission<GfxFamily, Dispatcher>::getTagAddressValue(TagData &ta
tagData.tagValue = currentFence.currentFenceValue;
}
template <typename GfxFamily, typename Dispatcher>
inline bool WddmDirectSubmission<GfxFamily, Dispatcher>::isCompleted(uint32_t ringBufferIndex) {
MonitoredFence &currentFence = osContextWin->getResidencyController().getMonitoredFence();
auto lastSubmittedFence = this->ringBuffers[ringBufferIndex].completionFence;
if (lastSubmittedFence > *currentFence.cpuAddress) {
return false;
}
return true;
}
} // namespace NEO

View File

@@ -18,7 +18,6 @@ struct MockDirectSubmissionHw : public DirectSubmissionHw<GfxFamily, Dispatcher>
using BaseClass::activeTiles;
using BaseClass::allocateResources;
using BaseClass::completionFenceAllocation;
using BaseClass::completionRingBuffers;
using BaseClass::cpuCachelineFlush;
using BaseClass::currentQueueWorkCount;
using BaseClass::currentRingBuffer;
@@ -54,8 +53,7 @@ struct MockDirectSubmissionHw : public DirectSubmissionHw<GfxFamily, Dispatcher>
using BaseClass::performDiagnosticMode;
using BaseClass::postSyncOffset;
using BaseClass::reserved;
using BaseClass::ringBuffer;
using BaseClass::ringBuffer2;
using BaseClass::ringBuffers;
using BaseClass::ringCommandStream;
using BaseClass::ringStart;
using BaseClass::semaphoreData;
@@ -128,6 +126,10 @@ struct MockDirectSubmissionHw : public DirectSubmissionHw<GfxFamily, Dispatcher>
BaseClass::performDiagnosticMode();
}
bool isCompleted(uint32_t ringBufferIndex) override {
return this->isCompletedReturn;
}
uint64_t updateTagValueReturn = 1ull;
uint64_t tagAddressSetValue = MemoryConstants::pageSize;
uint64_t tagValueSetValue = 1ull;
@@ -141,5 +143,6 @@ struct MockDirectSubmissionHw : public DirectSubmissionHw<GfxFamily, Dispatcher>
bool submitReturn = true;
bool handleResidencyReturn = true;
bool callBaseResident = false;
bool isCompletedReturn = true;
};
} // namespace NEO

View File

@@ -17,19 +17,18 @@ struct MockWddmDirectSubmission : public WddmDirectSubmission<GfxFamily, Dispatc
using BaseClass::allocateOsResources;
using BaseClass::allocateResources;
using BaseClass::commandBufferHeader;
using BaseClass::completionRingBuffers;
using BaseClass::currentRingBuffer;
using BaseClass::getSizeDispatch;
using BaseClass::getSizeSemaphoreSection;
using BaseClass::getSizeSwitchRingBufferSection;
using BaseClass::getSizeSystemMemoryFenceAddress;
using BaseClass::getTagAddressValue;
using BaseClass::handleCompletionRingBuffer;
using BaseClass::handleCompletionFence;
using BaseClass::handleResidency;
using BaseClass::isCompleted;
using BaseClass::miMemFenceRequired;
using BaseClass::osContextWin;
using BaseClass::ringBuffer;
using BaseClass::ringBuffer2;
using BaseClass::ringBuffers;
using BaseClass::ringCommandStream;
using BaseClass::ringFence;
using BaseClass::ringStart;

View File

@@ -98,6 +98,7 @@ DirectSubmissionNewResourceTlbFlush = -1
DirectSubmissionDisableCacheFlush = -1
DirectSubmissionDisableMonitorFence = -1
DirectSubmissionPrintBuffers = 0
DirectSubmissionMaxRingBuffers = -1
USMEvictAfterMigration = 0
EnableDirectSubmissionController = -1
DirectSubmissionControllerTimeout = -1

View File

@@ -132,8 +132,8 @@ HWTEST_F(DirectSubmissionTest, givenDirectSubmissionWithoutCompletionFenceAlloca
EXPECT_EQ(1, mockMemoryOperations->makeResidentCalledCount);
ASSERT_EQ(3u, mockMemoryOperations->gfxAllocationsForMakeResident.size());
EXPECT_EQ(directSubmission.ringBuffer, mockMemoryOperations->gfxAllocationsForMakeResident[0]);
EXPECT_EQ(directSubmission.ringBuffer2, mockMemoryOperations->gfxAllocationsForMakeResident[1]);
EXPECT_EQ(directSubmission.ringBuffers[0].ringBuffer, mockMemoryOperations->gfxAllocationsForMakeResident[0]);
EXPECT_EQ(directSubmission.ringBuffers[1].ringBuffer, mockMemoryOperations->gfxAllocationsForMakeResident[1]);
EXPECT_EQ(directSubmission.semaphores, mockMemoryOperations->gfxAllocationsForMakeResident[2]);
pDevice->getRootDeviceEnvironmentRef().memoryOperationsInterface.release();
@@ -158,8 +158,8 @@ HWTEST_F(DirectSubmissionTest, givenDirectSubmissionWithCompletionFenceAllocatio
EXPECT_EQ(1, mockMemoryOperations->makeResidentCalledCount);
ASSERT_EQ(4u, mockMemoryOperations->gfxAllocationsForMakeResident.size());
EXPECT_EQ(directSubmission.ringBuffer, mockMemoryOperations->gfxAllocationsForMakeResident[0]);
EXPECT_EQ(directSubmission.ringBuffer2, mockMemoryOperations->gfxAllocationsForMakeResident[1]);
EXPECT_EQ(directSubmission.ringBuffers[0].ringBuffer, mockMemoryOperations->gfxAllocationsForMakeResident[0]);
EXPECT_EQ(directSubmission.ringBuffers[1].ringBuffer, mockMemoryOperations->gfxAllocationsForMakeResident[1]);
EXPECT_EQ(directSubmission.semaphores, mockMemoryOperations->gfxAllocationsForMakeResident[2]);
EXPECT_EQ(directSubmission.completionFenceAllocation, mockMemoryOperations->gfxAllocationsForMakeResident[3]);
@@ -174,8 +174,8 @@ HWTEST_F(DirectSubmissionTest, givenDirectSubmissionInitializedWhenRingIsStarted
EXPECT_TRUE(ret);
EXPECT_TRUE(directSubmission.ringStart);
EXPECT_NE(nullptr, directSubmission.ringBuffer);
EXPECT_NE(nullptr, directSubmission.ringBuffer2);
EXPECT_NE(nullptr, directSubmission.ringBuffers[0].ringBuffer);
EXPECT_NE(nullptr, directSubmission.ringBuffers[1].ringBuffer);
EXPECT_NE(nullptr, directSubmission.semaphores);
EXPECT_NE(0u, directSubmission.ringCommandStream.getUsed());
@@ -188,42 +188,99 @@ HWTEST_F(DirectSubmissionTest, givenDirectSubmissionInitializedWhenRingIsNotStar
EXPECT_TRUE(ret);
EXPECT_FALSE(directSubmission.ringStart);
EXPECT_NE(nullptr, directSubmission.ringBuffer);
EXPECT_NE(nullptr, directSubmission.ringBuffer2);
EXPECT_NE(nullptr, directSubmission.ringBuffers[0].ringBuffer);
EXPECT_NE(nullptr, directSubmission.ringBuffers[1].ringBuffer);
EXPECT_NE(nullptr, directSubmission.semaphores);
EXPECT_EQ(0u, directSubmission.ringCommandStream.getUsed());
}
HWTEST_F(DirectSubmissionTest, givenDirectSubmissionSwitchBuffersWhenCurrentIsPrimaryThenExpectNextSecondary) {
using RingBufferUse = typename MockDirectSubmissionHw<FamilyType, RenderDispatcher<FamilyType>>::RingBufferUse;
MockDirectSubmissionHw<FamilyType, RenderDispatcher<FamilyType>> directSubmission(*pDevice->getDefaultEngine().commandStreamReceiver);
bool ret = directSubmission.initialize(false, false);
EXPECT_TRUE(ret);
EXPECT_EQ(RingBufferUse::FirstBuffer, directSubmission.currentRingBuffer);
EXPECT_EQ(0u, directSubmission.currentRingBuffer);
GraphicsAllocation *nextRing = directSubmission.switchRingBuffersAllocations();
EXPECT_EQ(directSubmission.ringBuffer2, nextRing);
EXPECT_EQ(RingBufferUse::SecondBuffer, directSubmission.currentRingBuffer);
EXPECT_EQ(directSubmission.ringBuffers[1].ringBuffer, nextRing);
EXPECT_EQ(1u, directSubmission.currentRingBuffer);
}
HWTEST_F(DirectSubmissionTest, givenDirectSubmissionSwitchBuffersWhenCurrentIsSecondaryThenExpectNextPrimary) {
using RingBufferUse = typename MockDirectSubmissionHw<FamilyType, RenderDispatcher<FamilyType>>::RingBufferUse;
MockDirectSubmissionHw<FamilyType, RenderDispatcher<FamilyType>> directSubmission(*pDevice->getDefaultEngine().commandStreamReceiver);
bool ret = directSubmission.initialize(false, false);
EXPECT_TRUE(ret);
EXPECT_EQ(RingBufferUse::FirstBuffer, directSubmission.currentRingBuffer);
EXPECT_EQ(0u, directSubmission.currentRingBuffer);
GraphicsAllocation *nextRing = directSubmission.switchRingBuffersAllocations();
EXPECT_EQ(directSubmission.ringBuffer2, nextRing);
EXPECT_EQ(RingBufferUse::SecondBuffer, directSubmission.currentRingBuffer);
EXPECT_EQ(directSubmission.ringBuffers[1].ringBuffer, nextRing);
EXPECT_EQ(1u, directSubmission.currentRingBuffer);
nextRing = directSubmission.switchRingBuffersAllocations();
EXPECT_EQ(directSubmission.ringBuffer, nextRing);
EXPECT_EQ(RingBufferUse::FirstBuffer, directSubmission.currentRingBuffer);
EXPECT_EQ(directSubmission.ringBuffers[0].ringBuffer, nextRing);
EXPECT_EQ(0u, directSubmission.currentRingBuffer);
}
HWTEST_F(DirectSubmissionTest, givenDirectSubmissionCurrentRingBuffersInUseWhenSwitchRingBufferThenAllocateNewInsteadOfWaiting) {
auto mockMemoryOperations = std::make_unique<MockMemoryOperations>();
pDevice->getRootDeviceEnvironmentRef().memoryOperationsInterface.reset(mockMemoryOperations.get());
MockDirectSubmissionHw<FamilyType, RenderDispatcher<FamilyType>> directSubmission(*pDevice->getDefaultEngine().commandStreamReceiver);
directSubmission.isCompletedReturn = false;
bool ret = directSubmission.initialize(false, false);
EXPECT_TRUE(ret);
EXPECT_EQ(0u, directSubmission.currentRingBuffer);
EXPECT_EQ(2u, directSubmission.ringBuffers.size());
auto nextRing = directSubmission.switchRingBuffersAllocations();
EXPECT_EQ(3u, directSubmission.ringBuffers.size());
EXPECT_EQ(directSubmission.ringBuffers[2].ringBuffer, nextRing);
EXPECT_EQ(2u, directSubmission.currentRingBuffer);
nextRing = directSubmission.switchRingBuffersAllocations();
EXPECT_EQ(4u, directSubmission.ringBuffers.size());
EXPECT_EQ(directSubmission.ringBuffers[3].ringBuffer, nextRing);
EXPECT_EQ(3u, directSubmission.currentRingBuffer);
directSubmission.isCompletedReturn = true;
nextRing = directSubmission.switchRingBuffersAllocations();
EXPECT_EQ(4u, directSubmission.ringBuffers.size());
EXPECT_EQ(directSubmission.ringBuffers[0].ringBuffer, nextRing);
EXPECT_EQ(0u, directSubmission.currentRingBuffer);
nextRing = directSubmission.switchRingBuffersAllocations();
EXPECT_EQ(4u, directSubmission.ringBuffers.size());
EXPECT_EQ(directSubmission.ringBuffers[1].ringBuffer, nextRing);
EXPECT_EQ(1u, directSubmission.currentRingBuffer);
nextRing = directSubmission.switchRingBuffersAllocations();
EXPECT_EQ(4u, directSubmission.ringBuffers.size());
EXPECT_EQ(directSubmission.ringBuffers[0].ringBuffer, nextRing);
EXPECT_EQ(0u, directSubmission.currentRingBuffer);
nextRing = directSubmission.switchRingBuffersAllocations();
EXPECT_EQ(4u, directSubmission.ringBuffers.size());
EXPECT_EQ(directSubmission.ringBuffers[1].ringBuffer, nextRing);
EXPECT_EQ(1u, directSubmission.currentRingBuffer);
nextRing = directSubmission.switchRingBuffersAllocations();
EXPECT_EQ(4u, directSubmission.ringBuffers.size());
EXPECT_EQ(directSubmission.ringBuffers[0].ringBuffer, nextRing);
EXPECT_EQ(0u, directSubmission.currentRingBuffer);
directSubmission.isCompletedReturn = false;
nextRing = directSubmission.switchRingBuffersAllocations();
EXPECT_EQ(5u, directSubmission.ringBuffers.size());
EXPECT_EQ(directSubmission.ringBuffers[4].ringBuffer, nextRing);
EXPECT_EQ(4u, directSubmission.currentRingBuffer);
pDevice->getRootDeviceEnvironmentRef().memoryOperationsInterface.release();
}
HWTEST_F(DirectSubmissionTest, givenDirectSubmissionAllocateFailWhenRingIsStartedThenExpectRingNotStarted) {
MockDirectSubmissionHw<FamilyType, RenderDispatcher<FamilyType>> directSubmission(*pDevice->getDefaultEngine().commandStreamReceiver);
EXPECT_TRUE(directSubmission.disableCpuCacheFlush);
@@ -549,8 +606,8 @@ HWTEST_F(DirectSubmissionTest, whenDirectSubmissionInitializedThenExpectCreatedA
bool ret = directSubmission->initialize(false, false);
EXPECT_TRUE(ret);
GraphicsAllocation *nulledAllocation = directSubmission->ringBuffer;
directSubmission->ringBuffer = nullptr;
GraphicsAllocation *nulledAllocation = directSubmission->ringBuffers[0u].ringBuffer;
directSubmission->ringBuffers[0u].ringBuffer = nullptr;
directSubmission.reset(nullptr);
memoryManager->freeGraphicsMemory(nulledAllocation);
@@ -559,8 +616,8 @@ HWTEST_F(DirectSubmissionTest, whenDirectSubmissionInitializedThenExpectCreatedA
ret = directSubmission->initialize(false, false);
EXPECT_TRUE(ret);
nulledAllocation = directSubmission->ringBuffer2;
directSubmission->ringBuffer2 = nullptr;
nulledAllocation = directSubmission->ringBuffers[1u].ringBuffer;
directSubmission->ringBuffers[1u].ringBuffer = nullptr;
directSubmission.reset(nullptr);
memoryManager->freeGraphicsMemory(nulledAllocation);

View File

@@ -585,9 +585,9 @@ HWTEST_F(DirectSubmissionDispatchBufferTest, givenDirectSubmissionPrintBuffersWh
std::string output = testing::internal::GetCapturedStdout();
auto pos = output.find("Ring buffer 1");
auto pos = output.find("Ring buffer 0");
EXPECT_TRUE(pos != std::string::npos);
pos = output.find("Ring buffer 2");
pos = output.find("Ring buffer 1");
EXPECT_TRUE(pos != std::string::npos);
pos = output.find("Client buffer");
EXPECT_TRUE(pos != std::string::npos);

View File

@@ -60,6 +60,7 @@ struct MockDrmDirectSubmission : public DrmDirectSubmission<GfxFamily, Dispatche
using BaseClass::allocateResources;
using BaseClass::completionFenceAllocation;
using BaseClass::completionFenceValue;
using BaseClass::currentRingBuffer;
using BaseClass::currentTagData;
using BaseClass::disableMonitorFence;
using BaseClass::dispatchSwitchRingBufferSection;
@@ -69,11 +70,12 @@ struct MockDrmDirectSubmission : public DrmDirectSubmission<GfxFamily, Dispatche
using BaseClass::getTagAddressValue;
using BaseClass::handleNewResourcesSubmission;
using BaseClass::handleResidency;
using BaseClass::isCompleted;
using BaseClass::isNewResourceHandleNeeded;
using BaseClass::partitionConfigSet;
using BaseClass::partitionedMode;
using BaseClass::postSyncOffset;
using BaseClass::ringBuffer;
using BaseClass::ringBuffers;
using BaseClass::ringStart;
using BaseClass::submit;
using BaseClass::switchRingBuffers;
@@ -112,6 +114,22 @@ HWTEST_F(DrmDirectSubmissionTest, givenDrmDirectSubmissionWhenCallingLinuxImplem
*drmDirectSubmission.tagAddress = 1u;
}
HWTEST_F(DrmDirectSubmissionTest, givenDrmDirectSubmissionWhenCallingIsCompletedThenProperValueReturned) {
MockDrmDirectSubmission<FamilyType, RenderDispatcher<FamilyType>> drmDirectSubmission(*device->getDefaultEngine().commandStreamReceiver);
auto drm = static_cast<DrmMock *>(executionEnvironment.rootDeviceEnvironments[0]->osInterface->getDriverModel()->as<Drm>());
EXPECT_TRUE(drm->isDirectSubmissionActive());
EXPECT_TRUE(drmDirectSubmission.allocateResources());
drmDirectSubmission.ringBuffers[0].completionFence = 1u;
EXPECT_FALSE(drmDirectSubmission.isCompleted(0u));
*drmDirectSubmission.tagAddress = 1u;
EXPECT_TRUE(drmDirectSubmission.isCompleted(0u));
drmDirectSubmission.ringBuffers[0].completionFence = 0u;
}
HWTEST_F(DrmDirectSubmissionTest, whenCreateDirectSubmissionThenValidObjectIsReturned) {
auto directSubmission = DirectSubmissionHw<FamilyType, RenderDispatcher<FamilyType>>::create(*device->getDefaultEngine().commandStreamReceiver);
EXPECT_NE(directSubmission.get(), nullptr);
@@ -295,7 +313,7 @@ HWTEST_F(DrmDirectSubmissionTest, givenNoCompletionFenceSupportWhenSubmittingThe
MockDrmDirectSubmission<FamilyType, RenderDispatcher<FamilyType>> drmDirectSubmission(*device->getDefaultEngine().commandStreamReceiver);
drmDirectSubmission.completionFenceAllocation = nullptr;
EXPECT_TRUE(drmDirectSubmission.allocateResources());
auto ringBuffer = static_cast<DrmAllocation *>(drmDirectSubmission.ringBuffer);
auto ringBuffer = static_cast<DrmAllocation *>(drmDirectSubmission.ringBuffers[drmDirectSubmission.currentRingBuffer].ringBuffer);
auto initialBO = ringBuffer->getBufferObjectToModify(0);
auto drm = executionEnvironment.rootDeviceEnvironments[0]->osInterface->getDriverModel()->as<Drm>();
@@ -331,7 +349,7 @@ HWTEST_F(DrmDirectSubmissionTest, givenTile0AndCompletionFenceSupportWhenSubmitt
MockDrmDirectSubmission<FamilyType, RenderDispatcher<FamilyType>> drmDirectSubmission(commandStreamReceiver);
drmDirectSubmission.completionFenceAllocation = commandStreamReceiver.getTagAllocation();
EXPECT_TRUE(drmDirectSubmission.allocateResources());
auto ringBuffer = static_cast<DrmAllocation *>(drmDirectSubmission.ringBuffer);
auto ringBuffer = static_cast<DrmAllocation *>(drmDirectSubmission.ringBuffers[drmDirectSubmission.currentRingBuffer].ringBuffer);
auto initialBO = ringBuffer->getBufferObjectToModify(0);
MockBufferObject mockBO(drm);
@@ -368,7 +386,7 @@ HWTEST_F(DrmDirectSubmissionTest, givenTile1AndCompletionFenceSupportWhenSubmitt
MockDrmDirectSubmission<FamilyType, RenderDispatcher<FamilyType>> drmDirectSubmission(commandStreamReceiver);
drmDirectSubmission.completionFenceAllocation = commandStreamReceiver.getTagAllocation();
EXPECT_TRUE(drmDirectSubmission.allocateResources());
auto ringBuffer = static_cast<DrmAllocation *>(drmDirectSubmission.ringBuffer);
auto ringBuffer = static_cast<DrmAllocation *>(drmDirectSubmission.ringBuffers[drmDirectSubmission.currentRingBuffer].ringBuffer);
auto initialBO = ringBuffer->getBufferObjectToModify(0);
MockBufferObject mockBO(drm);
@@ -411,7 +429,7 @@ HWTEST_F(DrmDirectSubmissionTest, givenTwoTilesAndCompletionFenceSupportWhenSubm
drmDirectSubmission.completionFenceAllocation = commandStreamReceiver.getTagAllocation();
EXPECT_TRUE(drmDirectSubmission.allocateResources());
auto ringBuffer = static_cast<DrmAllocation *>(drmDirectSubmission.ringBuffer);
auto ringBuffer = static_cast<DrmAllocation *>(drmDirectSubmission.ringBuffers[drmDirectSubmission.currentRingBuffer].ringBuffer);
auto initialBO = ringBuffer->getBufferObjectToModify(0);
MockBufferObject mockBO(drm);

View File

@@ -59,8 +59,8 @@ HWTEST_F(WddmDirectSubmissionTest, givenWddmWhenDirectIsInitializedAndStartedThe
bool ret = wddmDirectSubmission->initialize(true, false);
EXPECT_TRUE(ret);
EXPECT_TRUE(wddmDirectSubmission->ringStart);
EXPECT_NE(nullptr, wddmDirectSubmission->ringBuffer);
EXPECT_NE(nullptr, wddmDirectSubmission->ringBuffer2);
EXPECT_NE(nullptr, wddmDirectSubmission->ringBuffers[0].ringBuffer);
EXPECT_NE(nullptr, wddmDirectSubmission->ringBuffers[1].ringBuffer);
EXPECT_NE(nullptr, wddmDirectSubmission->semaphores);
EXPECT_EQ(1u, wddm->makeResidentResult.called);
@@ -73,7 +73,7 @@ HWTEST_F(WddmDirectSubmissionTest, givenWddmWhenDirectIsInitializedAndStartedThe
EXPECT_NE(0u, wddmDirectSubmission->ringCommandStream.getUsed());
*wddmDirectSubmission->ringFence.cpuAddress = 1ull;
wddmDirectSubmission->completionRingBuffers[wddmDirectSubmission->currentRingBuffer] = 2ull;
wddmDirectSubmission->ringBuffers[wddmDirectSubmission->currentRingBuffer].completionFence = 2ull;
wddmDirectSubmission.reset(nullptr);
EXPECT_EQ(1u, wddm->waitFromCpuResult.called);
@@ -90,8 +90,8 @@ HWTEST_F(WddmDirectSubmissionNoPreemptionTest, givenWddmWhenDirectIsInitializedA
bool ret = wddmDirectSubmission->initialize(false, false);
EXPECT_TRUE(ret);
EXPECT_FALSE(wddmDirectSubmission->ringStart);
EXPECT_NE(nullptr, wddmDirectSubmission->ringBuffer);
EXPECT_NE(nullptr, wddmDirectSubmission->ringBuffer2);
EXPECT_NE(nullptr, wddmDirectSubmission->ringBuffers[0].ringBuffer);
EXPECT_NE(nullptr, wddmDirectSubmission->ringBuffers[1].ringBuffer);
EXPECT_NE(nullptr, wddmDirectSubmission->semaphores);
EXPECT_EQ(1u, wddm->makeResidentResult.called);
@@ -211,7 +211,7 @@ HWTEST_F(WddmDirectSubmissionTest, givenWddmWhenHandlingRingBufferCompletionThen
MockWddmDirectSubmission<FamilyType, RenderDispatcher<FamilyType>> wddmDirectSubmission(*device->getDefaultEngine().commandStreamReceiver);
uint64_t completionValue = 0x12345679ull;
wddmDirectSubmission.handleCompletionRingBuffer(completionValue, contextFence);
wddmDirectSubmission.handleCompletionFence(completionValue, contextFence);
EXPECT_EQ(1u, wddm->waitFromCpuResult.called);
EXPECT_EQ(completionValue, wddm->waitFromCpuResult.uint64ParamPassed);
@@ -219,6 +219,21 @@ HWTEST_F(WddmDirectSubmissionTest, givenWddmWhenHandlingRingBufferCompletionThen
EXPECT_EQ(value, wddm->waitFromCpuResult.monitoredFence->currentFenceValue);
}
HWTEST_F(WddmDirectSubmissionTest, givenWddmWhenCallIsCompleteThenProperValueIsReturned) {
MonitoredFence &contextFence = osContext->getResidencyController().getMonitoredFence();
MockWddmDirectSubmission<FamilyType, RenderDispatcher<FamilyType>> wddmDirectSubmission(*device->getDefaultEngine().commandStreamReceiver);
*contextFence.cpuAddress = 0u;
wddmDirectSubmission.ringBuffers[0].completionFence = 1u;
EXPECT_FALSE(wddmDirectSubmission.isCompleted(0u));
*contextFence.cpuAddress = 1u;
EXPECT_TRUE(wddmDirectSubmission.isCompleted(0u));
wddmDirectSubmission.ringBuffers[0].completionFence = 0u;
}
HWTEST_F(WddmDirectSubmissionTest, givenWddmWhenSwitchingRingBufferStartedThenExpectDispatchSwitchCommandsLinearStreamUpdated) {
using MI_BATCH_BUFFER_START = typename FamilyType::MI_BATCH_BUFFER_START;
MockWddmDirectSubmission<FamilyType, RenderDispatcher<FamilyType>> wddmDirectSubmission(*device->getDefaultEngine().commandStreamReceiver);
@@ -226,14 +241,14 @@ HWTEST_F(WddmDirectSubmissionTest, givenWddmWhenSwitchingRingBufferStartedThenEx
bool ret = wddmDirectSubmission.initialize(true, false);
EXPECT_TRUE(ret);
size_t usedSpace = wddmDirectSubmission.ringCommandStream.getUsed();
uint64_t expectedGpuVa = wddmDirectSubmission.ringBuffer->getGpuAddress() + usedSpace;
uint64_t expectedGpuVa = wddmDirectSubmission.ringBuffers[0].ringBuffer->getGpuAddress() + usedSpace;
uint64_t gpuVa = wddmDirectSubmission.switchRingBuffers();
EXPECT_EQ(expectedGpuVa, gpuVa);
EXPECT_EQ(wddmDirectSubmission.ringBuffer2, wddmDirectSubmission.ringCommandStream.getGraphicsAllocation());
EXPECT_EQ(wddmDirectSubmission.ringBuffers[1].ringBuffer, wddmDirectSubmission.ringCommandStream.getGraphicsAllocation());
LinearStream tmpCmdBuffer;
tmpCmdBuffer.replaceBuffer(wddmDirectSubmission.ringBuffer->getUnderlyingBuffer(),
tmpCmdBuffer.replaceBuffer(wddmDirectSubmission.ringBuffers[0].ringBuffer->getUnderlyingBuffer(),
wddmDirectSubmission.ringCommandStream.getMaxAvailableSpace());
tmpCmdBuffer.getSpace(usedSpace + wddmDirectSubmission.getSizeSwitchRingBufferSection());
HardwareParse hwParse;
@@ -242,7 +257,7 @@ HWTEST_F(WddmDirectSubmissionTest, givenWddmWhenSwitchingRingBufferStartedThenEx
ASSERT_NE(nullptr, bbStart);
auto gmmHelper = device->getGmmHelper();
uint64_t actualGpuVa = gmmHelper->canonize(bbStart->getBatchBufferStartAddress());
EXPECT_EQ(wddmDirectSubmission.ringBuffer2->getGpuAddress(), actualGpuVa);
EXPECT_EQ(wddmDirectSubmission.ringBuffers[1].ringBuffer->getGpuAddress(), actualGpuVa);
}
HWTEST_F(WddmDirectSubmissionTest, givenWddmWhenSwitchingRingBufferNotStartedThenExpectNoSwitchCommandsLinearStreamUpdated) {
@@ -255,14 +270,14 @@ HWTEST_F(WddmDirectSubmissionTest, givenWddmWhenSwitchingRingBufferNotStartedThe
size_t usedSpace = wddmDirectSubmission.ringCommandStream.getUsed();
EXPECT_EQ(0u, usedSpace);
uint64_t expectedGpuVa = wddmDirectSubmission.ringBuffer->getGpuAddress();
uint64_t expectedGpuVa = wddmDirectSubmission.ringBuffers[0].ringBuffer->getGpuAddress();
uint64_t gpuVa = wddmDirectSubmission.switchRingBuffers();
EXPECT_EQ(expectedGpuVa, gpuVa);
EXPECT_EQ(wddmDirectSubmission.ringBuffer2, wddmDirectSubmission.ringCommandStream.getGraphicsAllocation());
EXPECT_EQ(wddmDirectSubmission.ringBuffers[1].ringBuffer, wddmDirectSubmission.ringCommandStream.getGraphicsAllocation());
LinearStream tmpCmdBuffer;
tmpCmdBuffer.replaceBuffer(wddmDirectSubmission.ringBuffer->getUnderlyingBuffer(),
tmpCmdBuffer.replaceBuffer(wddmDirectSubmission.ringBuffers[0].ringBuffer->getUnderlyingBuffer(),
wddmDirectSubmission.ringCommandStream.getMaxAvailableSpace());
HardwareParse hwParse;
hwParse.parseCommands<FamilyType>(tmpCmdBuffer, 0u);
@@ -270,24 +285,23 @@ HWTEST_F(WddmDirectSubmissionTest, givenWddmWhenSwitchingRingBufferNotStartedThe
EXPECT_EQ(nullptr, bbStart);
}
HWTEST_F(WddmDirectSubmissionTest, givenWddmWhenSwitchingRingBufferStartedAndWaitFenceUpdateThenExpectWaitCalled) {
using RingBufferUse = typename MockWddmDirectSubmission<FamilyType, RenderDispatcher<FamilyType>>::RingBufferUse;
HWTEST_F(WddmDirectSubmissionTest, givenWddmWhenSwitchingRingBufferStartedAndWaitFenceUpdateThenExpectNewRingBufferAllocated) {
using MI_BATCH_BUFFER_START = typename FamilyType::MI_BATCH_BUFFER_START;
MockWddmDirectSubmission<FamilyType, RenderDispatcher<FamilyType>> wddmDirectSubmission(*device->getDefaultEngine().commandStreamReceiver);
bool ret = wddmDirectSubmission.initialize(true, false);
EXPECT_TRUE(ret);
uint64_t expectedWaitFence = 0x10ull;
wddmDirectSubmission.completionRingBuffers[RingBufferUse::SecondBuffer] = expectedWaitFence;
wddmDirectSubmission.ringBuffers[1u].completionFence = expectedWaitFence;
size_t usedSpace = wddmDirectSubmission.ringCommandStream.getUsed();
uint64_t expectedGpuVa = wddmDirectSubmission.ringBuffer->getGpuAddress() + usedSpace;
uint64_t expectedGpuVa = wddmDirectSubmission.ringBuffers[0].ringBuffer->getGpuAddress() + usedSpace;
uint64_t gpuVa = wddmDirectSubmission.switchRingBuffers();
EXPECT_EQ(expectedGpuVa, gpuVa);
EXPECT_EQ(wddmDirectSubmission.ringBuffer2, wddmDirectSubmission.ringCommandStream.getGraphicsAllocation());
EXPECT_EQ(wddmDirectSubmission.ringBuffers[2u].ringBuffer, wddmDirectSubmission.ringCommandStream.getGraphicsAllocation());
LinearStream tmpCmdBuffer;
tmpCmdBuffer.replaceBuffer(wddmDirectSubmission.ringBuffer->getUnderlyingBuffer(),
tmpCmdBuffer.replaceBuffer(wddmDirectSubmission.ringBuffers[0].ringBuffer->getUnderlyingBuffer(),
wddmDirectSubmission.ringCommandStream.getMaxAvailableSpace());
tmpCmdBuffer.getSpace(usedSpace + wddmDirectSubmission.getSizeSwitchRingBufferSection());
HardwareParse hwParse;
@@ -296,13 +310,48 @@ HWTEST_F(WddmDirectSubmissionTest, givenWddmWhenSwitchingRingBufferStartedAndWai
ASSERT_NE(nullptr, bbStart);
auto gmmHelper = device->getGmmHelper();
uint64_t actualGpuVa = gmmHelper->canonize(bbStart->getBatchBufferStartAddress());
EXPECT_EQ(wddmDirectSubmission.ringBuffer2->getGpuAddress(), actualGpuVa);
EXPECT_EQ(wddmDirectSubmission.ringBuffers[2u].ringBuffer->getGpuAddress(), actualGpuVa);
EXPECT_EQ(0u, wddm->waitFromCpuResult.called);
}
HWTEST_F(WddmDirectSubmissionTest, givenWddmWhenSwitchingRingBufferStartedAndWaitFenceUpdateThenExpectWaitCalled) {
using MI_BATCH_BUFFER_START = typename FamilyType::MI_BATCH_BUFFER_START;
DebugManagerStateRestore restorer;
DebugManager.flags.DirectSubmissionMaxRingBuffers.set(2u);
MockWddmDirectSubmission<FamilyType, RenderDispatcher<FamilyType>> wddmDirectSubmission(*device->getDefaultEngine().commandStreamReceiver);
bool ret = wddmDirectSubmission.initialize(true, false);
EXPECT_TRUE(ret);
uint64_t expectedWaitFence = 0x10ull;
wddmDirectSubmission.ringBuffers[1u].completionFence = expectedWaitFence;
size_t usedSpace = wddmDirectSubmission.ringCommandStream.getUsed();
uint64_t expectedGpuVa = wddmDirectSubmission.ringBuffers[0].ringBuffer->getGpuAddress() + usedSpace;
uint64_t gpuVa = wddmDirectSubmission.switchRingBuffers();
EXPECT_EQ(expectedGpuVa, gpuVa);
EXPECT_EQ(wddmDirectSubmission.ringBuffers.size(), 2u);
EXPECT_EQ(wddmDirectSubmission.ringBuffers[1u].ringBuffer, wddmDirectSubmission.ringCommandStream.getGraphicsAllocation());
LinearStream tmpCmdBuffer;
tmpCmdBuffer.replaceBuffer(wddmDirectSubmission.ringBuffers[0].ringBuffer->getUnderlyingBuffer(),
wddmDirectSubmission.ringCommandStream.getMaxAvailableSpace());
tmpCmdBuffer.getSpace(usedSpace + wddmDirectSubmission.getSizeSwitchRingBufferSection());
HardwareParse hwParse;
hwParse.parseCommands<FamilyType>(tmpCmdBuffer, usedSpace);
MI_BATCH_BUFFER_START *bbStart = hwParse.getCommand<MI_BATCH_BUFFER_START>();
ASSERT_NE(nullptr, bbStart);
auto gmmHelper = device->getGmmHelper();
uint64_t actualGpuVa = gmmHelper->canonize(bbStart->getBatchBufferStartAddress());
EXPECT_EQ(wddmDirectSubmission.ringBuffers[1u].ringBuffer->getGpuAddress(), actualGpuVa);
EXPECT_EQ(1u, wddm->waitFromCpuResult.called);
EXPECT_EQ(expectedWaitFence, wddm->waitFromCpuResult.uint64ParamPassed);
}
HWTEST_F(WddmDirectSubmissionTest, givenWddmWhenUpdatingTagValueThenExpectCompletionRingBufferUpdated) {
HWTEST_F(WddmDirectSubmissionTest, givenWddmWhenUpdatingTagValueThenExpectcompletionFenceUpdated) {
uint64_t address = 0xFF00FF0000ull;
uint64_t value = 0x12345678ull;
MonitoredFence &contextFence = osContext->getResidencyController().getMonitoredFence();
@@ -314,7 +363,7 @@ HWTEST_F(WddmDirectSubmissionTest, givenWddmWhenUpdatingTagValueThenExpectComple
uint64_t actualTagValue = wddmDirectSubmission.updateTagValue();
EXPECT_EQ(value, actualTagValue);
EXPECT_EQ(value + 1, contextFence.currentFenceValue);
EXPECT_EQ(value, wddmDirectSubmission.completionRingBuffers[wddmDirectSubmission.currentRingBuffer]);
EXPECT_EQ(value, wddmDirectSubmission.ringBuffers[wddmDirectSubmission.currentRingBuffer].completionFence);
}
HWTEST_F(WddmDirectSubmissionTest, givenWddmResidencyEnabledWhenCreatingDestroyingThenSubmitterNotifiesResidencyLogger) {