performance: don't wait for paging fence on user thread

Related-To: NEO-12197

Currently for new resources user thread must wait before submitting
actual workload. With this commit, instead of waiting on user thread,
request is sent to background ULLS controller thread and additional
semaphore is programmed. ULLS controller will perform actual wait
and signal semaphore when paging fence reaches required value.

Signed-off-by: Szymon Morek <szymon.morek@intel.com>
This commit is contained in:
Szymon Morek
2024-08-06 16:13:06 +00:00
committed by Compute-Runtime-Automation
parent d071fd7e9a
commit d4c1631ac7
31 changed files with 615 additions and 47 deletions

View File

@@ -7,6 +7,7 @@
#include "shared/source/command_stream/command_stream_receiver_with_aub_dump.h"
#include "shared/source/command_stream/preemption.h"
#include "shared/source/direct_submission/direct_submission_controller.h"
#include "shared/source/direct_submission/relaxed_ordering_helper.h"
#include "shared/source/helpers/gfx_core_helper.h"
#include "shared/source/helpers/windows/gmm_callbacks.h"
@@ -1346,3 +1347,144 @@ TEST_F(WddmCommandStreamTest, givenResidencyLoggingAvailableWhenFlushingCommandB
memoryManager->freeGraphicsMemory(commandBuffer);
}
struct MockWddmDirectSubmissionCsr : public WddmCommandStreamReceiver<DEFAULT_TEST_FAMILY_NAME> {
using BaseClass = WddmCommandStreamReceiver<DEFAULT_TEST_FAMILY_NAME>;
MockWddmDirectSubmissionCsr(ExecutionEnvironment &executionEnvironment, uint32_t rootDeviceIndex, const DeviceBitfield deviceBitfield) : BaseClass(executionEnvironment, rootDeviceIndex, deviceBitfield) {}
bool isDirectSubmissionEnabled() const override {
return directSubmissionAvailable;
}
bool directSubmissionAvailable = false;
};
struct SemaphorWaitForResidencyTest : public WddmCommandStreamTest {
void SetUp() override {
WddmCommandStreamTest::setUp();
debugManager.flags.WaitForPagingFenceInController.set(1);
debugManager.flags.EnableDirectSubmissionController.set(1);
auto executionEnvironment = device->getExecutionEnvironment();
mockCsr = new MockWddmDirectSubmissionCsr(*executionEnvironment, 0, 1);
device->resetCommandStreamReceiver(mockCsr);
mockCsr->getOsContext().ensureContextInitialized(false);
buffer = memoryManager->allocateGraphicsMemoryWithProperties({mockCsr->getRootDeviceIndex(), MemoryConstants::pageSize, AllocationType::buffer, device->getDeviceBitfield()});
commandBuffer = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{mockCsr->getRootDeviceIndex(), MemoryConstants::pageSize});
bufferHostMemory = memoryManager->allocateGraphicsMemoryWithProperties({mockCsr->getRootDeviceIndex(), MemoryConstants::pageSize, AllocationType::bufferHostMemory, device->getDeviceBitfield()});
}
void TearDown() override {
memoryManager->freeGraphicsMemory(buffer);
memoryManager->freeGraphicsMemory(bufferHostMemory);
memoryManager->freeGraphicsMemory(commandBuffer);
WddmCommandStreamTest::tearDown();
}
DebugManagerStateRestore restorer{};
GraphicsAllocation *buffer;
GraphicsAllocation *commandBuffer;
GraphicsAllocation *bufferHostMemory;
MockWddmDirectSubmissionCsr *mockCsr;
};
TEST_F(SemaphorWaitForResidencyTest, givenNoAllocationsToMakeResidentThenSignalFlag) {
LinearStream cs(commandBuffer);
BatchBuffer batchBuffer = BatchBufferHelper::createDefaultBatchBuffer(cs.getGraphicsAllocation(), &cs, cs.getUsed());
// no allocations to make resident, no need to wait
mockCsr->flush(batchBuffer, mockCsr->getResidencyAllocations());
EXPECT_TRUE(batchBuffer.pagingFenceSemInfo.requiresBlockingResidencyHandling);
}
TEST_F(SemaphorWaitForResidencyTest, givenPagingFenceNotUpdatedThenSignalFlag) {
LinearStream cs(commandBuffer);
BatchBuffer batchBuffer = BatchBufferHelper::createDefaultBatchBuffer(cs.getGraphicsAllocation(), &cs, cs.getUsed());
mockCsr->flush(batchBuffer, mockCsr->getResidencyAllocations());
mockCsr->getResidencyAllocations().push_back(buffer);
mockCsr->flush(batchBuffer, mockCsr->getResidencyAllocations());
EXPECT_TRUE(batchBuffer.pagingFenceSemInfo.requiresBlockingResidencyHandling);
}
TEST_F(SemaphorWaitForResidencyTest, givenUllsControllerNotEnabledThenSignalFlag) {
LinearStream cs(commandBuffer);
BatchBuffer batchBuffer = BatchBufferHelper::createDefaultBatchBuffer(cs.getGraphicsAllocation(), &cs, cs.getUsed());
mockCsr->flush(batchBuffer, mockCsr->getResidencyAllocations());
mockCsr->getResidencyAllocations().push_back(buffer);
*wddm->pagingFenceAddress = 0u;
wddm->currentPagingFenceValue = 100u;
mockCsr->flush(batchBuffer, mockCsr->getResidencyAllocations());
EXPECT_TRUE(batchBuffer.pagingFenceSemInfo.requiresBlockingResidencyHandling);
}
TEST_F(SemaphorWaitForResidencyTest, givenBufferAllocationThenSignalFlagForPagingFenceSemWait) {
LinearStream cs(commandBuffer);
BatchBuffer batchBuffer = BatchBufferHelper::createDefaultBatchBuffer(cs.getGraphicsAllocation(), &cs, cs.getUsed());
mockCsr->flush(batchBuffer, mockCsr->getResidencyAllocations());
mockCsr->getResidencyAllocations().push_back(buffer);
*wddm->pagingFenceAddress = 0u;
wddm->currentPagingFenceValue = 100u;
auto controller = mockCsr->peekExecutionEnvironment().initializeDirectSubmissionController();
controller->stopThread();
mockCsr->directSubmissionAvailable = true;
mockCsr->flush(batchBuffer, mockCsr->getResidencyAllocations());
EXPECT_FALSE(batchBuffer.pagingFenceSemInfo.requiresBlockingResidencyHandling);
EXPECT_EQ(100u, batchBuffer.pagingFenceSemInfo.pagingFenceValue);
}
TEST_F(SemaphorWaitForResidencyTest, givenBufferHostMemoryAllocationThenSignalFlagForPagingFenceSemWait) {
LinearStream cs(commandBuffer);
BatchBuffer batchBuffer = BatchBufferHelper::createDefaultBatchBuffer(cs.getGraphicsAllocation(), &cs, cs.getUsed());
mockCsr->flush(batchBuffer, mockCsr->getResidencyAllocations());
mockCsr->getResidencyAllocations().push_back(bufferHostMemory);
*wddm->pagingFenceAddress = 0u;
wddm->currentPagingFenceValue = 100u;
auto controller = mockCsr->peekExecutionEnvironment().initializeDirectSubmissionController();
controller->stopThread();
mockCsr->directSubmissionAvailable = true;
mockCsr->flush(batchBuffer, mockCsr->getResidencyAllocations());
EXPECT_FALSE(batchBuffer.pagingFenceSemInfo.requiresBlockingResidencyHandling);
EXPECT_EQ(100u, batchBuffer.pagingFenceSemInfo.pagingFenceValue);
}
TEST_F(SemaphorWaitForResidencyTest, givenDebugFlagDisabledThenDontSignalFlag) {
debugManager.flags.WaitForPagingFenceInController.set(0);
LinearStream cs(commandBuffer);
BatchBuffer batchBuffer = BatchBufferHelper::createDefaultBatchBuffer(cs.getGraphicsAllocation(), &cs, cs.getUsed());
mockCsr->flush(batchBuffer, mockCsr->getResidencyAllocations());
mockCsr->getResidencyAllocations().push_back(buffer);
*wddm->pagingFenceAddress = 0u;
wddm->currentPagingFenceValue = 100u;
auto controller = mockCsr->peekExecutionEnvironment().initializeDirectSubmissionController();
controller->stopThread();
mockCsr->flush(batchBuffer, mockCsr->getResidencyAllocations());
EXPECT_TRUE(batchBuffer.pagingFenceSemInfo.requiresBlockingResidencyHandling);
}
TEST_F(SemaphorWaitForResidencyTest, givenIllegalAllocationTypeThenDontSignalFlag) {
debugManager.flags.WaitForPagingFenceInController.set(0);
LinearStream cs(commandBuffer);
BatchBuffer batchBuffer = BatchBufferHelper::createDefaultBatchBuffer(cs.getGraphicsAllocation(), &cs, cs.getUsed());
mockCsr->flush(batchBuffer, mockCsr->getResidencyAllocations());
auto cmdBuffer = memoryManager->allocateGraphicsMemoryWithProperties({mockCsr->getRootDeviceIndex(), MemoryConstants::pageSize, AllocationType::commandBuffer, device->getDeviceBitfield()});
mockCsr->getResidencyAllocations().push_back(cmdBuffer);
mockCsr->getResidencyAllocations().push_back(buffer);
*wddm->pagingFenceAddress = 0u;
wddm->currentPagingFenceValue = 100u;
auto controller = mockCsr->peekExecutionEnvironment().initializeDirectSubmissionController();
controller->stopThread();
mockCsr->flush(batchBuffer, mockCsr->getResidencyAllocations());
EXPECT_TRUE(batchBuffer.pagingFenceSemInfo.requiresBlockingResidencyHandling);
memoryManager->freeGraphicsMemory(cmdBuffer);
}

View File

@@ -953,8 +953,8 @@ TEST_F(WddmResidencyControllerWithGdiAndMemoryManagerTest, WhenMakingResidentRes
MockWddmAllocation allocation3(gmmHelper);
MockWddmAllocation allocation4(gmmHelper);
ResidencyContainer residencyPack{&allocation1, &allocation2, &allocation3, &allocation4};
residencyController->makeResidentResidencyAllocations(residencyPack);
bool requiresBlockingResidencyHandling = true;
residencyController->makeResidentResidencyAllocations(residencyPack, requiresBlockingResidencyHandling);
EXPECT_TRUE(allocation1.getResidencyData().resident[osContextId]);
EXPECT_TRUE(allocation2.getResidencyData().resident[osContextId]);
@@ -970,8 +970,8 @@ TEST_F(WddmResidencyControllerWithGdiAndMemoryManagerTest, WhenMakingResidentRes
ResidencyContainer residencyPack{&allocation1, &allocation2, &allocation3, &allocation4};
residencyController->getMonitoredFence().currentFenceValue = 20;
residencyController->makeResidentResidencyAllocations(residencyPack);
bool requiresBlockingResidencyHandling = true;
residencyController->makeResidentResidencyAllocations(residencyPack, requiresBlockingResidencyHandling);
EXPECT_EQ(20u, allocation1.getResidencyData().getFenceValueForContextId(osContext->getContextId()));
EXPECT_EQ(20u, allocation2.getResidencyData().getFenceValueForContextId(osContext->getContextId()));
@@ -990,8 +990,8 @@ TEST_F(WddmResidencyControllerWithGdiAndMemoryManagerTest, GivenTripleAllocation
WddmAllocation *allocationTriple = (WddmAllocation *)memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{csr->getRootDeviceIndex(), false, 2 * MemoryConstants::pageSize}, ptr);
ResidencyContainer residencyPack{&allocation1, allocationTriple, &allocation2};
residencyController->makeResidentResidencyAllocations(residencyPack);
bool requiresBlockingResidencyHandling = true;
residencyController->makeResidentResidencyAllocations(residencyPack, requiresBlockingResidencyHandling);
for (uint32_t i = 0; i < allocationTriple->fragmentsStorage.fragmentCount; i++) {
EXPECT_TRUE(allocationTriple->fragmentsStorage.fragmentStorageData[i].residency->resident[osContextId]);
@@ -1009,9 +1009,9 @@ TEST_F(WddmResidencyControllerWithGdiAndMemoryManagerTest, GivenTripleAllocation
WddmAllocation *allocationTriple = static_cast<WddmAllocation *>(memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{csr->getRootDeviceIndex(), false, 2 * MemoryConstants::pageSize}, reinterpret_cast<void *>(0x1500)));
residencyController->getMonitoredFence().currentFenceValue = 20;
bool requiresBlockingResidencyHandling = true;
ResidencyContainer residencyPack{&allocation1, allocationTriple, &allocation2};
residencyController->makeResidentResidencyAllocations(residencyPack);
residencyController->makeResidentResidencyAllocations(residencyPack, requiresBlockingResidencyHandling);
for (uint32_t i = 0; i < allocationTriple->fragmentsStorage.fragmentCount; i++) {
EXPECT_EQ(20u, allocationTriple->fragmentsStorage.fragmentStorageData[i].residency->getFenceValueForContextId(0));
@@ -1030,7 +1030,8 @@ TEST_F(WddmResidencyControllerWithMockWddmTest, givenMakeResidentFailsWhenCallin
wddm->makeResidentStatus = false;
ResidencyContainer residencyPack{&allocation1, &allocation2, &allocation3, &allocation4};
bool result = residencyController->makeResidentResidencyAllocations(residencyPack);
bool requiresBlockingResidencyHandling = true;
bool result = residencyController->makeResidentResidencyAllocations(residencyPack, requiresBlockingResidencyHandling);
EXPECT_FALSE(result);
@@ -1053,7 +1054,8 @@ TEST_F(WddmResidencyControllerWithMockWddmTest, givenMakeResidentFailsWhenCallin
wddm->makeResidentStatus = false;
ResidencyContainer residencyPack{&allocation1, allocationTriple, &allocation2};
bool result = residencyController->makeResidentResidencyAllocations(residencyPack);
bool requiresBlockingResidencyHandling = true;
bool result = residencyController->makeResidentResidencyAllocations(residencyPack, requiresBlockingResidencyHandling);
EXPECT_FALSE(result);
@@ -1072,7 +1074,8 @@ TEST_F(WddmResidencyControllerWithMockWddmTest, givenMakeResidentFailsWhenCallin
wddm->makeResidentStatus = false;
ResidencyContainer residencyPack{&allocation1};
bool result = residencyController->makeResidentResidencyAllocations(residencyPack);
bool requiresBlockingResidencyHandling = true;
bool result = residencyController->makeResidentResidencyAllocations(residencyPack, requiresBlockingResidencyHandling);
EXPECT_FALSE(result);
EXPECT_NE(wddm->makeResidentParamsPassed[0].cantTrimFurther, wddm->makeResidentParamsPassed[1].cantTrimFurther);
@@ -1085,8 +1088,8 @@ TEST_F(WddmResidencyControllerWithMockWddmTest, givenAllocationPackPassedWhenCal
allocation1.handle = 1;
allocation2.handle = 2;
ResidencyContainer residencyPack{&allocation1, &allocation2};
bool result = residencyController->makeResidentResidencyAllocations(residencyPack);
bool requiresBlockingResidencyHandling = true;
bool result = residencyController->makeResidentResidencyAllocations(residencyPack, requiresBlockingResidencyHandling);
EXPECT_TRUE(result);
EXPECT_EQ(2 * EngineLimits::maxHandleCount, wddm->makeResidentResult.handleCount);
EXPECT_EQ(false, wddm->makeResidentResult.cantTrimFurther);
@@ -1110,7 +1113,8 @@ TEST_F(WddmResidencyControllerWithMockWddmTest, givenMakeResidentFailsAndTrimToB
residencyController->addToTrimCandidateList(&allocationToTrim);
ResidencyContainer residencyPack{&allocation1};
bool result = residencyController->makeResidentResidencyAllocations(residencyPack);
bool requiresBlockingResidencyHandling = true;
bool result = residencyController->makeResidentResidencyAllocations(residencyPack, requiresBlockingResidencyHandling);
EXPECT_TRUE(result);
@@ -1123,8 +1127,8 @@ TEST_F(WddmResidencyControllerWithMockWddmTest, givenMakeResidentFailsWhenCallin
ResidencyContainer residencyPack{&allocation1};
wddm->makeResidentResults = {false, true};
residencyController->makeResidentResidencyAllocations(residencyPack);
bool requiresBlockingResidencyHandling = true;
residencyController->makeResidentResidencyAllocations(residencyPack, requiresBlockingResidencyHandling);
EXPECT_TRUE(residencyController->isMemoryBudgetExhausted());
EXPECT_EQ(2u, wddm->makeResidentResult.called);
}
@@ -1173,8 +1177,8 @@ TEST_F(WddmResidencyControllerWithMockWddmMakeResidentTest, givenMakeResidentFai
MockWddmAllocation allocation1(gmmHelper);
ResidencyContainer residencyPack{&allocation1};
bool result = residencyController->makeResidentResidencyAllocations(residencyPack);
bool requiresBlockingResidencyHandling = true;
bool result = residencyController->makeResidentResidencyAllocations(residencyPack, requiresBlockingResidencyHandling);
EXPECT_TRUE(result);
EXPECT_EQ(3u, wddm->makeResidentResult.called);