KM DAF AubCapture to recapture command streams and heap allocations

This commit introduces a recapture of CS and Heap resources on every submit.

Change-Id: I2a5a763e8988de804da1a6c2c8042154b0786b2e
This commit is contained in:
Milczarek, Slawomir
2018-03-22 21:13:45 +01:00
committed by sys_ocldev
parent e3b1ba2112
commit 32825e203e
12 changed files with 197 additions and 3 deletions

View File

@@ -260,6 +260,8 @@ IndirectHeap &CommandQueue::getIndirectHeap(IndirectHeap::Type heapType,
finalHeapSize = std::max(heapMemory->getUnderlyingBufferSize(), finalHeapSize); finalHeapSize = std::max(heapMemory->getUnderlyingBufferSize(), finalHeapSize);
} }
heapMemory->setAllocationType(GraphicsAllocation::ALLOCATION_TYPE_LINEAR_STREAM);
if (IndirectHeap::SURFACE_STATE == heapType) { if (IndirectHeap::SURFACE_STATE == heapType) {
DEBUG_BREAK_IF(minRequiredSize > maxSshSize); DEBUG_BREAK_IF(minRequiredSize > maxSshSize);
finalHeapSize = maxSshSize; finalHeapSize = maxSshSize;
@@ -324,6 +326,8 @@ LinearStream &CommandQueue::getCS(size_t minRequiredSize) {
allocation = memoryManager->allocateGraphicsMemory(requiredSize, MemoryConstants::pageSize); allocation = memoryManager->allocateGraphicsMemory(requiredSize, MemoryConstants::pageSize);
} }
allocation->setAllocationType(GraphicsAllocation::ALLOCATION_TYPE_LINEAR_STREAM);
// Deallocate the old block, if not null // Deallocate the old block, if not null
auto oldAllocation = commandStream->getGraphicsAllocation(); auto oldAllocation = commandStream->getGraphicsAllocation();

View File

@@ -144,6 +144,8 @@ LinearStream &CommandStreamReceiver::getCS(size_t minRequiredSize) {
allocation = memoryManager->allocateGraphicsMemory(requiredSize, MemoryConstants::pageSize); allocation = memoryManager->allocateGraphicsMemory(requiredSize, MemoryConstants::pageSize);
} }
allocation->setAllocationType(GraphicsAllocation::ALLOCATION_TYPE_LINEAR_STREAM);
//pass current allocation to reusable list //pass current allocation to reusable list
if (commandStream.getCpuBase()) { if (commandStream.getCpuBase()) {
memoryManager->storeAllocation(std::unique_ptr<GraphicsAllocation>(commandStream.getGraphicsAllocation()), REUSABLE_ALLOCATION); memoryManager->storeAllocation(std::unique_ptr<GraphicsAllocation>(commandStream.getGraphicsAllocation()), REUSABLE_ALLOCATION);

View File

@@ -57,6 +57,7 @@ class GraphicsAllocation : public IDNode<GraphicsAllocation> {
ALLOCATION_TYPE_BUFFER, ALLOCATION_TYPE_BUFFER,
ALLOCATION_TYPE_IMAGE, ALLOCATION_TYPE_IMAGE,
ALLOCATION_TYPE_TAG_BUFFER, ALLOCATION_TYPE_TAG_BUFFER,
ALLOCATION_TYPE_LINEAR_STREAM,
ALLOCATION_TYPE_NON_AUB_WRITABLE = 0x40000000, ALLOCATION_TYPE_NON_AUB_WRITABLE = 0x40000000,
ALLOCATION_TYPE_WRITABLE = 0x80000000 ALLOCATION_TYPE_WRITABLE = 0x80000000
}; };
@@ -107,8 +108,8 @@ class GraphicsAllocation : public IDNode<GraphicsAllocation> {
void setSize(size_t size) { this->size = size; } void setSize(size_t size) { this->size = size; }
osHandle peekSharedHandle() { return sharedHandle; } osHandle peekSharedHandle() { return sharedHandle; }
void setAllocationType(int allocationType) { this->allocationType = allocationType; } void setAllocationType(uint32_t allocationType) { this->allocationType = allocationType; }
int getAllocationType() const { return allocationType; } uint32_t getAllocationType() const { return allocationType; }
uint32_t taskCount = ObjectNotUsed; uint32_t taskCount = ObjectNotUsed;
OsHandleStorage fragmentsStorage; OsHandleStorage fragmentsStorage;
@@ -130,7 +131,7 @@ class GraphicsAllocation : public IDNode<GraphicsAllocation> {
bool cpuPtrAllocated = false; // flag indicating if cpuPtr is driver-allocated bool cpuPtrAllocated = false; // flag indicating if cpuPtr is driver-allocated
private: private:
int allocationType; uint32_t allocationType;
//this variable can only be modified from SubmissionAggregator //this variable can only be modified from SubmissionAggregator
friend class SubmissionAggregator; friend class SubmissionAggregator;

View File

@@ -708,6 +708,10 @@ void Wddm::unlockResource(WddmAllocation *wddmAllocation) {
kmDafListener->notifyUnlock(featureTable->ftrKmdDaf, adapter, device, &wddmAllocation->handle, 1, gdi->escape); kmDafListener->notifyUnlock(featureTable->ftrKmdDaf, adapter, device, &wddmAllocation->handle, 1, gdi->escape);
} }
void Wddm::kmDafLock(WddmAllocation *wddmAllocation) {
kmDafListener->notifyLock(featureTable->ftrKmdDaf, adapter, device, wddmAllocation->handle, 0, gdi->escape);
}
D3DKMT_HANDLE Wddm::createContext() { D3DKMT_HANDLE Wddm::createContext() {
NTSTATUS status = STATUS_UNSUCCESSFUL; NTSTATUS status = STATUS_UNSUCCESSFUL;
D3DKMT_CREATECONTEXTVIRTUAL CreateContext = {0}; D3DKMT_CREATECONTEXTVIRTUAL CreateContext = {0};

View File

@@ -87,6 +87,8 @@ class Wddm {
bool openNTHandle(HANDLE handle, WddmAllocation *alloc); bool openNTHandle(HANDLE handle, WddmAllocation *alloc);
MOCKABLE_VIRTUAL void *lockResource(WddmAllocation *wddmAllocation); MOCKABLE_VIRTUAL void *lockResource(WddmAllocation *wddmAllocation);
MOCKABLE_VIRTUAL void unlockResource(WddmAllocation *wddmAllocation); MOCKABLE_VIRTUAL void unlockResource(WddmAllocation *wddmAllocation);
MOCKABLE_VIRTUAL void kmDafLock(WddmAllocation *wddmAllocation);
MOCKABLE_VIRTUAL bool isKmDafEnabled() { return featureTable->ftrKmdDaf; };
MOCKABLE_VIRTUAL bool destroyContext(D3DKMT_HANDLE context); MOCKABLE_VIRTUAL bool destroyContext(D3DKMT_HANDLE context);
MOCKABLE_VIRTUAL bool queryAdapterInfo(); MOCKABLE_VIRTUAL bool queryAdapterInfo();

View File

@@ -54,6 +54,7 @@ class WddmCommandStreamReceiver : public DeviceCommandStreamReceiver<GfxFamily>
protected: protected:
void initPageTableManagerRegisters(LinearStream &csr) override; void initPageTableManagerRegisters(LinearStream &csr) override;
void kmDafLockAllocations(ResidencyContainer *allocationsForResidency);
GmmPageTableMngr *createPageTableManager(); GmmPageTableMngr *createPageTableManager();
Wddm *wddm; Wddm *wddm;

View File

@@ -123,7 +123,12 @@ FlushStamp WddmCommandStreamReceiver<GfxFamily>::flush(BatchBuffer &batchBuffer,
break; break;
} }
if (wddm->isKmDafEnabled()) {
this->kmDafLockAllocations(allocationsForResidency);
}
wddm->submit(commandStreamAddress, batchBuffer.usedSize - batchBuffer.startOffset, commandBufferHeader); wddm->submit(commandStreamAddress, batchBuffer.usedSize - batchBuffer.startOffset, commandBufferHeader);
return wddm->getMonitoredFence().lastSubmittedFence; return wddm->getMonitoredFence().lastSubmittedFence;
} }
@@ -208,4 +213,15 @@ void WddmCommandStreamReceiver<GfxFamily>::initPageTableManagerRegisters(LinearS
pageTableManagerInitialized = true; pageTableManagerInitialized = true;
} }
} }
template <typename GfxFamily>
void WddmCommandStreamReceiver<GfxFamily>::kmDafLockAllocations(ResidencyContainer *allocationsForResidency) {
auto &residencyAllocations = allocationsForResidency ? *allocationsForResidency : getMemoryManager()->getResidencyAllocations();
for (uint32_t i = 0; i < residencyAllocations.size(); i++) {
if (GraphicsAllocation::ALLOCATION_TYPE_LINEAR_STREAM == residencyAllocations[i]->getAllocationType()) {
wddm->kmDafLock(static_cast<WddmAllocation *>(residencyAllocations[i]));
}
}
}
} // namespace OCLRT } // namespace OCLRT

View File

@@ -383,6 +383,17 @@ TEST_F(CommandQueueCommandStreamTest, CommandQueueWhenAskedForNewCommandStreamSt
EXPECT_TRUE(memoryManager->allocationsForReuse.peekContains(*graphicsAllocation)); EXPECT_TRUE(memoryManager->allocationsForReuse.peekContains(*graphicsAllocation));
} }
TEST_F(CommandQueueCommandStreamTest, givenCommandQueueWhenGetCSIsCalledThenCommandStreamAllocationTypeShouldBeSetToLinearStream) {
const cl_queue_properties props[3] = {CL_QUEUE_PROPERTIES, 0, 0};
CommandQueue cmdQ(&context, pDevice, props);
const auto &commandStream = cmdQ.getCS(100);
auto commandStreamAllocation = commandStream.getGraphicsAllocation();
ASSERT_NE(nullptr, commandStreamAllocation);
EXPECT_EQ(GraphicsAllocation::ALLOCATION_TYPE_LINEAR_STREAM, commandStreamAllocation->getAllocationType());
}
struct CommandQueueIndirectHeapTest : public CommandQueueMemoryDevice, struct CommandQueueIndirectHeapTest : public CommandQueueMemoryDevice,
public ::testing::TestWithParam<IndirectHeap::Type> { public ::testing::TestWithParam<IndirectHeap::Type> {
void SetUp() override { void SetUp() override {
@@ -598,6 +609,17 @@ TEST_P(CommandQueueIndirectHeapTest, GivenCommandQueueWithHeapWhenGraphicAllocat
memoryManager->freeGraphicsMemory(allocation); memoryManager->freeGraphicsMemory(allocation);
} }
TEST_P(CommandQueueIndirectHeapTest, givenCommandQueueWhenGetIndirectHeapIsCalledThenIndirectHeapAllocationTypeShouldBeSetToLinearStream) {
const cl_queue_properties props[3] = {CL_QUEUE_PROPERTIES, 0, 0};
CommandQueue cmdQ(&context, pDevice, props);
const auto &indirectHeap = cmdQ.getIndirectHeap(this->GetParam(), 100);
auto indirectHeapAllocation = indirectHeap.getGraphicsAllocation();
ASSERT_NE(nullptr, indirectHeapAllocation);
EXPECT_EQ(GraphicsAllocation::ALLOCATION_TYPE_LINEAR_STREAM, indirectHeapAllocation->getAllocationType());
}
INSTANTIATE_TEST_CASE_P( INSTANTIATE_TEST_CASE_P(
Device, Device,
CommandQueueIndirectHeapTest, CommandQueueIndirectHeapTest,

View File

@@ -127,6 +127,14 @@ TEST_F(CommandStreamReceiverTest, getCommandStreamCanRecycle) {
EXPECT_GE(commandStream.getMaxAvailableSpace(), requiredSize); EXPECT_GE(commandStream.getMaxAvailableSpace(), requiredSize);
} }
TEST_F(CommandStreamReceiverTest, givenCommandStreamReceiverWhenGetCSIsCalledThenCommandStreamAllocationTypeShouldBeSetToLinearStream) {
const auto &commandStream = commandStreamReceiver->getCS();
auto commandStreamAllocation = commandStream.getGraphicsAllocation();
ASSERT_NE(nullptr, commandStreamAllocation);
EXPECT_EQ(GraphicsAllocation::ALLOCATION_TYPE_LINEAR_STREAM, commandStreamAllocation->getAllocationType());
}
TEST_F(CommandStreamReceiverTest, createAllocationAndHandleResidency) { TEST_F(CommandStreamReceiverTest, createAllocationAndHandleResidency) {
void *host_ptr = (void *)0x1212341; void *host_ptr = (void *)0x1212341;
auto size = 17262u; auto size = 17262u;

View File

@@ -306,6 +306,109 @@ TEST_F(WddmCommandStreamTest, givenWdmmWhenSubmitIsCalledAndThrottleIsToHighThen
memManager->freeGraphicsMemory(commandBuffer); memManager->freeGraphicsMemory(commandBuffer);
} }
TEST_F(WddmCommandStreamTest, givenWddmWithKmDafDisabledWhenFlushIsCalledWithAllocationsForResidencyThenNoneAllocationShouldBeKmDafLocked) {
GraphicsAllocation *commandBuffer = memManager->allocateGraphicsMemory(4096, 4096);
ASSERT_NE(nullptr, commandBuffer);
LinearStream cs(commandBuffer);
BatchBuffer batchBuffer{cs.getGraphicsAllocation(), 0, 0, nullptr, false, false, QueueThrottle::MEDIUM, cs.getUsed(), &cs};
auto linearStreamAllocation = memManager->allocateGraphicsMemory(sizeof(uint32_t), sizeof(uint32_t), false, false);
ASSERT_NE(nullptr, linearStreamAllocation);
linearStreamAllocation->setAllocationType(GraphicsAllocation::ALLOCATION_TYPE_LINEAR_STREAM);
ResidencyContainer allocationsForResidency = {linearStreamAllocation};
EXPECT_FALSE(wddm->isKmDafEnabled());
auto flushStamp = csr->flush(batchBuffer, EngineType::ENGINE_RCS, &allocationsForResidency);
EXPECT_EQ(0u, wddm->kmDafLockResult.called);
EXPECT_EQ(0u, wddm->kmDafLockResult.lockedAllocations.size());
memManager->freeGraphicsMemory(commandBuffer);
memManager->freeGraphicsMemory(linearStreamAllocation);
}
TEST_F(WddmCommandStreamTest, givenWddmWithKmDafEnabledWhenFlushIsCalledWithoutAllocationsForResidencyThenNoneAllocationShouldBeKmDafLocked) {
GraphicsAllocation *commandBuffer = memManager->allocateGraphicsMemory(4096, 4096);
ASSERT_NE(nullptr, commandBuffer);
LinearStream cs(commandBuffer);
BatchBuffer batchBuffer{cs.getGraphicsAllocation(), 0, 0, nullptr, false, false, QueueThrottle::MEDIUM, cs.getUsed(), &cs};
wddm->setKmDafEnabled(true);
auto flushStamp = csr->flush(batchBuffer, EngineType::ENGINE_RCS, nullptr);
EXPECT_EQ(0u, wddm->kmDafLockResult.called);
EXPECT_EQ(0u, wddm->kmDafLockResult.lockedAllocations.size());
memManager->freeGraphicsMemory(commandBuffer);
}
TEST_F(WddmCommandStreamTest, givenWddmWithKmDafEnabledWhenFlushIsCalledWithResidencyAllocationsInMemoryManagerThenLinearStreamAllocationsShouldBeKmDafLocked) {
GraphicsAllocation *commandBuffer = memManager->allocateGraphicsMemory(4096, 4096);
ASSERT_NE(nullptr, commandBuffer);
LinearStream cs(commandBuffer);
BatchBuffer batchBuffer{cs.getGraphicsAllocation(), 0, 0, nullptr, false, false, QueueThrottle::MEDIUM, cs.getUsed(), &cs};
auto linearStreamAllocation = memManager->allocateGraphicsMemory(sizeof(uint32_t), sizeof(uint32_t), false, false);
ASSERT_NE(nullptr, linearStreamAllocation);
linearStreamAllocation->setAllocationType(GraphicsAllocation::ALLOCATION_TYPE_LINEAR_STREAM);
csr->makeResident(*linearStreamAllocation);
EXPECT_EQ(1u, memManager->getResidencyAllocations().size());
EXPECT_EQ(linearStreamAllocation, memManager->getResidencyAllocations()[0]);
wddm->setKmDafEnabled(true);
auto flushStamp = csr->flush(batchBuffer, EngineType::ENGINE_RCS, nullptr);
EXPECT_EQ(1u, wddm->kmDafLockResult.called);
EXPECT_EQ(1u, wddm->kmDafLockResult.lockedAllocations.size());
EXPECT_EQ(linearStreamAllocation, wddm->kmDafLockResult.lockedAllocations[0]);
memManager->freeGraphicsMemory(commandBuffer);
memManager->freeGraphicsMemory(linearStreamAllocation);
}
TEST_F(WddmCommandStreamTest, givenWddmWithKmDafEnabledWhenFlushIsCalledWithAllocationsForResidencyThenLinearStreamAllocationsShouldBeKmDafLocked) {
GraphicsAllocation *commandBuffer = memManager->allocateGraphicsMemory(4096, 4096);
ASSERT_NE(nullptr, commandBuffer);
LinearStream cs(commandBuffer);
BatchBuffer batchBuffer{cs.getGraphicsAllocation(), 0, 0, nullptr, false, false, QueueThrottle::MEDIUM, cs.getUsed(), &cs};
auto linearStreamAllocation = memManager->allocateGraphicsMemory(sizeof(uint32_t), sizeof(uint32_t), false, false);
ASSERT_NE(nullptr, linearStreamAllocation);
linearStreamAllocation->setAllocationType(GraphicsAllocation::ALLOCATION_TYPE_LINEAR_STREAM);
ResidencyContainer allocationsForResidency = {linearStreamAllocation};
wddm->setKmDafEnabled(true);
auto flushStamp = csr->flush(batchBuffer, EngineType::ENGINE_RCS, &allocationsForResidency);
EXPECT_EQ(1u, wddm->kmDafLockResult.called);
EXPECT_EQ(1u, wddm->kmDafLockResult.lockedAllocations.size());
EXPECT_EQ(linearStreamAllocation, wddm->kmDafLockResult.lockedAllocations[0]);
memManager->freeGraphicsMemory(commandBuffer);
memManager->freeGraphicsMemory(linearStreamAllocation);
}
TEST_F(WddmCommandStreamTest, givenWddmWithKmDafEnabledWhenFlushIsCalledWithAllocationsForResidencyThenNonLinearStreamAllocationShouldNotBeKmDafLocked) {
GraphicsAllocation *commandBuffer = memManager->allocateGraphicsMemory(4096, 4096);
ASSERT_NE(nullptr, commandBuffer);
LinearStream cs(commandBuffer);
BatchBuffer batchBuffer{cs.getGraphicsAllocation(), 0, 0, nullptr, false, false, QueueThrottle::MEDIUM, cs.getUsed(), &cs};
auto nonLinearStreamAllocation = memManager->allocateGraphicsMemory(sizeof(uint32_t), sizeof(uint32_t), false, false);
ASSERT_NE(nullptr, nonLinearStreamAllocation);
ResidencyContainer allocationsForResidency = {nonLinearStreamAllocation};
wddm->setKmDafEnabled(true);
auto flushStamp = csr->flush(batchBuffer, EngineType::ENGINE_RCS, &allocationsForResidency);
EXPECT_EQ(0u, wddm->kmDafLockResult.called);
EXPECT_EQ(0u, wddm->kmDafLockResult.lockedAllocations.size());
memManager->freeGraphicsMemory(commandBuffer);
memManager->freeGraphicsMemory(nonLinearStreamAllocation);
}
TEST_F(WddmCommandStreamTest, makeResident) { TEST_F(WddmCommandStreamTest, makeResident) {
WddmMemoryManager *wddmMM = reinterpret_cast<WddmMemoryManager *>(memManager); WddmMemoryManager *wddmMM = reinterpret_cast<WddmMemoryManager *>(memManager);

View File

@@ -58,6 +58,9 @@ class WddmMock : public Wddm {
std::vector<D3DKMT_HANDLE> handlePack; std::vector<D3DKMT_HANDLE> handlePack;
uint32_t handleCount = 0; uint32_t handleCount = 0;
}; };
struct KmDafLockCall : public CallResult {
std::vector<GraphicsAllocation *> lockedAllocations;
};
public: public:
using Wddm::adapter; using Wddm::adapter;
@@ -238,6 +241,18 @@ class WddmMock : public Wddm {
unlockResult.success = true; unlockResult.success = true;
Wddm::unlockResource(allocation); Wddm::unlockResource(allocation);
} }
void kmDafLock(WddmAllocation *allocation) override {
kmDafLockResult.called++;
kmDafLockResult.success = true;
kmDafLockResult.lockedAllocations.push_back(allocation);
Wddm::kmDafLock(allocation);
}
bool isKmDafEnabled() override {
return kmDafEnabled;
}
void setKmDafEnabled(bool state) {
kmDafEnabled = state;
}
void setHwContextId(unsigned long hwContextId) { void setHwContextId(unsigned long hwContextId) {
this->hwContextId = hwContextId; this->hwContextId = hwContextId;
} }
@@ -315,6 +330,7 @@ class WddmMock : public Wddm {
CallResult createContextResult; CallResult createContextResult;
CallResult lockResult; CallResult lockResult;
CallResult unlockResult; CallResult unlockResult;
KmDafLockCall kmDafLockResult;
CallResult waitFromCpuResult; CallResult waitFromCpuResult;
CallResult releaseReservedAddressResult; CallResult releaseReservedAddressResult;
CallResult reserveValidAddressRangeResult; CallResult reserveValidAddressRangeResult;
@@ -325,6 +341,7 @@ class WddmMock : public Wddm {
bool callBaseMapGpuVa = true; bool callBaseMapGpuVa = true;
std::set<void *> reservedAddresses; std::set<void *> reservedAddresses;
uintptr_t virtualAllocAddress; uintptr_t virtualAllocAddress;
bool kmDafEnabled = false;
}; };
class WddmMockReserveAddress : public WddmMock { class WddmMockReserveAddress : public WddmMock {

View File

@@ -196,3 +196,17 @@ HWTEST_F(WddmKmDafListenerTest, givenWddmWhenCreateAllocationsAndMapGpuVaIsCalle
EXPECT_EQ(Gmm::decanonize(osHandle.gpuPtr), wddmWithKmDafMock->getKmDafListenerMock().notifyMapGpuVAParametrization.GpuVirtualAddress); EXPECT_EQ(Gmm::decanonize(osHandle.gpuPtr), wddmWithKmDafMock->getKmDafListenerMock().notifyMapGpuVAParametrization.GpuVirtualAddress);
EXPECT_EQ(wddmWithKmDafMock->getGdi()->escape, wddmWithKmDafMock->getKmDafListenerMock().notifyMapGpuVAParametrization.pfnEscape); EXPECT_EQ(wddmWithKmDafMock->getGdi()->escape, wddmWithKmDafMock->getKmDafListenerMock().notifyMapGpuVAParametrization.pfnEscape);
} }
HWTEST_F(WddmKmDafListenerTest, givenWddmWhenKmDafLockIsCalledThenKmDafListenerNotifyLockIsFedWithCorrectParams) {
WddmAllocation allocation;
allocation.handle = ALLOCATION_HANDLE;
wddmWithKmDafMock->kmDafLock(&allocation);
EXPECT_EQ(wddmWithKmDafMock->getFeatureTable()->ftrKmdDaf, wddmWithKmDafMock->getKmDafListenerMock().notifyLockParametrization.ftrKmdDaf);
EXPECT_EQ(wddmWithKmDafMock->getAdapter(), wddmWithKmDafMock->getKmDafListenerMock().notifyLockParametrization.hAdapter);
EXPECT_EQ(wddmWithKmDafMock->getDevice(), wddmWithKmDafMock->getKmDafListenerMock().notifyLockParametrization.hDevice);
EXPECT_EQ(allocation.handle, wddmWithKmDafMock->getKmDafListenerMock().notifyLockParametrization.hAllocation);
EXPECT_EQ(0, wddmWithKmDafMock->getKmDafListenerMock().notifyLockParametrization.pLockFlags);
EXPECT_EQ(wddmWithKmDafMock->getGdi()->escape, wddmWithKmDafMock->getKmDafListenerMock().notifyLockParametrization.pfnEscape);
}