performance: Add CCS Optimization
Signed-off-by: Lukasz Jobczyk <lukasz.jobczyk@intel.com>
This commit is contained in:
parent
171f1e27a3
commit
e687e11ab1
|
@ -364,6 +364,7 @@ DECLARE_DEBUG_VARIABLE(bool, DisableConcurrentBlockExecution, false, "disables c
|
|||
DECLARE_DEBUG_VARIABLE(bool, UseNoRingFlushesKmdMode, true, "Windows only, passes flag to KMD that informs KMD to not emit any ring buffer flushes.")
|
||||
DECLARE_DEBUG_VARIABLE(bool, DisableZeroCopyForUseHostPtr, false, "When active all buffer allocations created with CL_MEM_USE_HOST_PTR flag will not share memory with CPU.")
|
||||
DECLARE_DEBUG_VARIABLE(bool, ForceNonCoherentModeForTimestamps, false, "When active timestamp buffers are allocated in non coherent memory.")
|
||||
DECLARE_DEBUG_VARIABLE(bool, SetAssumeNotInUse, true, "Set AssumeNotInUse flag in d3d destroy allocation.")
|
||||
DECLARE_DEBUG_VARIABLE(int32_t, EnableReusingGpuTimestamps, -1, "Reuse GPU timestamp for next device time requests. -1: os-specific, 0: disable, 1: enable")
|
||||
DECLARE_DEBUG_VARIABLE(int32_t, AllowZeroCopyWithoutCoherency, -1, "Use cacheline flush instead of memory copy for map/unmap mem object")
|
||||
DECLARE_DEBUG_VARIABLE(int32_t, EnableHostPtrTracking, -1, "Enable host ptr tracking: -1 - default platform setting, 0 - disabled, 1 - enabled")
|
||||
|
@ -595,6 +596,7 @@ DECLARE_DEBUG_VARIABLE(bool, PrintBOChunkingLogs, false, "Print some logs on BO
|
|||
DECLARE_DEBUG_VARIABLE(bool, EnableBOChunkingPrefetch, false, "Enables prefetching of Shared Memory chunks")
|
||||
DECLARE_DEBUG_VARIABLE(bool, EnableBOChunkingDevMemPrefetch, false, "Enables prefetching of Device Memory chunks")
|
||||
DECLARE_DEBUG_VARIABLE(bool, EnableBOChunkingPreferredLocationHint, false, "Enables preferred location advise on chunks")
|
||||
DECLARE_DEBUG_VARIABLE(bool, DestroyAllocationsViaGmm, false, "Use DeAllocate2 wrapper instead of raw GDI destroy allocations")
|
||||
DECLARE_DEBUG_VARIABLE(int32_t, EnableBOChunking, -1, "Enables use of chunking of BOs in the KMD, mask: -1 = default, 0 = no chunking, 1 = shared allocations only, 2 = multi-tile device allocations only, 3 = shared and multi-tile device allocations .")
|
||||
DECLARE_DEBUG_VARIABLE(int32_t, NumberOfBOChunks, 2, "Number of chunks to use")
|
||||
DECLARE_DEBUG_VARIABLE(int32_t, SetBOChunkingSize, -1, "Size of chunk in bytes: -1 = default, otherwise power of two chunk size in bytes")
|
||||
|
|
|
@ -16,6 +16,7 @@ struct RootDeviceEnvironment;
|
|||
class GmmHandleAllocator;
|
||||
class MapGpuVirtualAddressGmm;
|
||||
class FreeGpuVirtualAddressGmm;
|
||||
class DeallocateGmm;
|
||||
|
||||
class GmmClientContext {
|
||||
public:
|
||||
|
@ -28,6 +29,7 @@ class GmmClientContext {
|
|||
MOCKABLE_VIRTUAL GMM_RESOURCE_INFO *createResInfoObject(GMM_RESCREATE_PARAMS *pCreateParams);
|
||||
MOCKABLE_VIRTUAL GMM_RESOURCE_INFO *copyResInfoObject(GMM_RESOURCE_INFO *pSrcRes);
|
||||
MOCKABLE_VIRTUAL void destroyResInfoObject(GMM_RESOURCE_INFO *pResInfo);
|
||||
MOCKABLE_VIRTUAL long deallocate2(DeallocateGmm *deallocateGmm);
|
||||
MOCKABLE_VIRTUAL uint64_t mapGpuVirtualAddress(MapGpuVirtualAddressGmm *pMapGpuVa);
|
||||
MOCKABLE_VIRTUAL uint64_t freeGpuVirtualAddress(FreeGpuVirtualAddressGmm *pFreeGpuVa);
|
||||
GMM_CLIENT_CONTEXT *getHandle() const;
|
||||
|
|
|
@ -14,5 +14,8 @@ uint64_t GmmClientContext::mapGpuVirtualAddress(MapGpuVirtualAddressGmm *pMapGpu
|
|||
uint64_t GmmClientContext::freeGpuVirtualAddress(FreeGpuVirtualAddressGmm *pFreeGpuVa) {
|
||||
return 0;
|
||||
}
|
||||
long GmmClientContext::deallocate2(DeallocateGmm *deallocateGmm) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
} // namespace NEO
|
||||
|
|
|
@ -16,5 +16,8 @@ uint64_t GmmClientContext::mapGpuVirtualAddress(MapGpuVirtualAddressGmm *pMapGpu
|
|||
uint64_t GmmClientContext::freeGpuVirtualAddress(FreeGpuVirtualAddressGmm *pFreeGpuVa) {
|
||||
return 0;
|
||||
}
|
||||
long GmmClientContext::deallocate2(DeallocateGmm *deallocateGmm) {
|
||||
return deallocateGmm->gdi->destroyAllocation2(deallocateGmm->destroyAllocation2);
|
||||
}
|
||||
|
||||
} // namespace NEO
|
||||
|
|
|
@ -31,5 +31,11 @@ uint64_t GmmClientContext::freeGpuVirtualAddress(FreeGpuVirtualAddressGmm *pFree
|
|||
return 0;
|
||||
}
|
||||
}
|
||||
long GmmClientContext::deallocate2(DeallocateGmm *deallocateGmm) {
|
||||
GMM_DESTROYALLOCATION2 gmmDestroyAllocation2{};
|
||||
memcpy_s(&gmmDestroyAllocation2.KmtObj, sizeof(D3DKMT_DESTROYALLOCATION2), deallocateGmm->destroyAllocation2, sizeof(D3DKMT_DESTROYALLOCATION2));
|
||||
|
||||
return clientContext->DeAllocate2(&gmmDestroyAllocation2);
|
||||
}
|
||||
|
||||
} // namespace NEO
|
||||
|
|
|
@ -31,4 +31,11 @@ class FreeGpuVirtualAddressGmm {
|
|||
Gdi *gdi;
|
||||
};
|
||||
|
||||
class DeallocateGmm {
|
||||
public:
|
||||
DeallocateGmm(D3DKMT_DESTROYALLOCATION2 *destroyAllocation2, Gdi *gdi) : destroyAllocation2(destroyAllocation2), gdi(gdi) {}
|
||||
D3DKMT_DESTROYALLOCATION2 *destroyAllocation2;
|
||||
Gdi *gdi;
|
||||
};
|
||||
|
||||
} // namespace NEO
|
|
@ -69,6 +69,7 @@ Gmm::Gmm(GmmHelper *gmmHelper, const void *alignedPtr, size_t alignedSize, size_
|
|||
applyAuxFlagsForBuffer(gmmRequirements.preferCompressed && !storageInfo.isLockable);
|
||||
applyMemoryFlags(storageInfo);
|
||||
applyAppResource(storageInfo);
|
||||
applyExtraInitFlag();
|
||||
applyDebugOverrides();
|
||||
|
||||
gmmResourceInfo.reset(GmmResourceInfo::create(gmmHelper->getClientContext(), &resourceParams));
|
||||
|
|
|
@ -80,6 +80,7 @@ class Gmm {
|
|||
void setupImageResourceParams(ImageInfo &imgInfo, bool preferCompressed);
|
||||
bool extraMemoryFlagsRequired();
|
||||
void applyExtraMemoryFlags(const StorageInfo &storageInfo);
|
||||
void applyExtraInitFlag();
|
||||
void applyDebugOverrides();
|
||||
GmmHelper *gmmHelper = nullptr;
|
||||
|
||||
|
|
|
@ -13,3 +13,4 @@ using namespace NEO;
|
|||
void Gmm::applyExtraMemoryFlags(const StorageInfo &storageInfo) {}
|
||||
bool Gmm::extraMemoryFlagsRequired() { return false; }
|
||||
void Gmm::applyAppResource(const StorageInfo &storageInfo) {}
|
||||
void Gmm::applyExtraInitFlag() {}
|
||||
|
|
|
@ -819,7 +819,9 @@ bool Wddm::destroyAllocations(const D3DKMT_HANDLE *handles, uint32_t allocationC
|
|||
if ((0U == allocationCount) && (0U == resourceHandle)) {
|
||||
return true;
|
||||
}
|
||||
|
||||
NTSTATUS status = STATUS_SUCCESS;
|
||||
|
||||
D3DKMT_DESTROYALLOCATION2 destroyAllocation = {};
|
||||
DEBUG_BREAK_IF(!(allocationCount <= 1 || resourceHandle == 0));
|
||||
|
||||
|
@ -827,10 +829,15 @@ bool Wddm::destroyAllocations(const D3DKMT_HANDLE *handles, uint32_t allocationC
|
|||
destroyAllocation.hResource = resourceHandle;
|
||||
destroyAllocation.phAllocationList = handles;
|
||||
destroyAllocation.AllocationCount = allocationCount;
|
||||
destroyAllocation.Flags.AssumeNotInUse = debugManager.flags.SetAssumeNotInUse.get();
|
||||
|
||||
destroyAllocation.Flags.AssumeNotInUse = 1;
|
||||
DeallocateGmm deallocateGmm{&destroyAllocation, getGdi()};
|
||||
|
||||
status = getGdi()->destroyAllocation2(&destroyAllocation);
|
||||
if (debugManager.flags.DestroyAllocationsViaGmm.get()) {
|
||||
status = static_cast<NTSTATUS>(this->rootDeviceEnvironment.getGmmClientContext()->deallocate2(&deallocateGmm));
|
||||
} else {
|
||||
status = getGdi()->destroyAllocation2(&destroyAllocation);
|
||||
}
|
||||
|
||||
return status == STATUS_SUCCESS;
|
||||
}
|
||||
|
|
|
@ -22,6 +22,7 @@ class MockGmmClientContextBase : public GmmClientContext {
|
|||
GMM_RESOURCE_INFO *createResInfoObject(GMM_RESCREATE_PARAMS *pCreateParams) override;
|
||||
GMM_RESOURCE_INFO *copyResInfoObject(GMM_RESOURCE_INFO *pSrcRes) override;
|
||||
void destroyResInfoObject(GMM_RESOURCE_INFO *pResInfo) override;
|
||||
long deallocate2(DeallocateGmm *deallocateGmm) override;
|
||||
uint8_t getSurfaceStateCompressionFormat(GMM_RESOURCE_FORMAT format) override;
|
||||
uint8_t getMediaSurfaceStateCompressionFormat(GMM_RESOURCE_FORMAT format) override;
|
||||
void setGmmDeviceInfo(GMM_DEVICE_INFO *deviceInfo) override;
|
||||
|
|
|
@ -12,4 +12,7 @@ uint64_t MockGmmClientContextBase::mapGpuVirtualAddress(MapGpuVirtualAddressGmm
|
|||
mapGpuVirtualAddressCalled++;
|
||||
return 0;
|
||||
}
|
||||
long MockGmmClientContextBase::deallocate2(DeallocateGmm *deallocateGmm) {
|
||||
return 0;
|
||||
}
|
||||
} // namespace NEO
|
|
@ -14,4 +14,7 @@ uint64_t MockGmmClientContextBase::mapGpuVirtualAddress(MapGpuVirtualAddressGmm
|
|||
mapGpuVirtualAddressCalled++;
|
||||
return pMapGpuVa->gdi->mapGpuVirtualAddress(pMapGpuVa->mapGpuVirtualAddressParams);
|
||||
}
|
||||
long MockGmmClientContextBase::deallocate2(DeallocateGmm *deallocateGmm) {
|
||||
return deallocateGmm->gdi->destroyAllocation2(deallocateGmm->destroyAllocation2);
|
||||
}
|
||||
} // namespace NEO
|
||||
|
|
|
@ -14,4 +14,7 @@ uint64_t MockGmmClientContextBase::mapGpuVirtualAddress(MapGpuVirtualAddressGmm
|
|||
mapGpuVirtualAddressCalled++;
|
||||
return pMapGpuVa->gdi->mapGpuVirtualAddress(pMapGpuVa->mapGpuVirtualAddressParams);
|
||||
}
|
||||
long MockGmmClientContextBase::deallocate2(DeallocateGmm *deallocateGmm) {
|
||||
return deallocateGmm->gdi->destroyAllocation2(deallocateGmm->destroyAllocation2);
|
||||
}
|
||||
} // namespace NEO
|
||||
|
|
|
@ -541,6 +541,7 @@ PrintBOChunkingLogs = 0
|
|||
EnableBOChunkingPrefetch = 0
|
||||
EnableBOChunkingDevMemPrefetch = 0
|
||||
EnableBOChunkingPreferredLocationHint = 0
|
||||
DestroyAllocationsViaGmm = 0
|
||||
NumberOfBOChunks = 2
|
||||
SetBOChunkingSize = -1
|
||||
EnableBOChunking = -1
|
||||
|
@ -623,6 +624,7 @@ DeferStateInitSubmissionToFirstRegularUsage = -1
|
|||
WaitForPagingFenceInController = -1
|
||||
DirectSubmissionPrintSemaphoreUsage = -1
|
||||
ForceNonCoherentModeForTimestamps = 0
|
||||
SetAssumeNotInUse = 1
|
||||
ExperimentalUSMAllocationReuseVersion = -1
|
||||
ForceNonWalkerSplitMemoryCopy = -1
|
||||
DirectSubmissionSwitchSemaphoreMode = -1
|
||||
|
|
|
@ -394,6 +394,30 @@ TEST_F(Wddm20WithMockGdiDllTests, GivenThreeOsHandlesWhenAskedForDestroyAllocati
|
|||
EXPECT_EQ(1u, ptrToDestroyAlloc2->Flags.AssumeNotInUse);
|
||||
}
|
||||
|
||||
TEST_F(Wddm20WithMockGdiDllTests, GivenSetAssumeNotInUseSetToFalseWhenDestroyAllocationsThenAssumeNotInUseNotSet) {
|
||||
DebugManagerStateRestore restorer;
|
||||
debugManager.flags.SetAssumeNotInUse.set(false);
|
||||
|
||||
OsHandleStorage storage;
|
||||
OsHandleWin osHandle1;
|
||||
|
||||
osHandle1.handle = ALLOCATION_HANDLE;
|
||||
|
||||
storage.fragmentStorageData[0].osHandleStorage = &osHandle1;
|
||||
storage.fragmentStorageData[0].freeTheFragment = true;
|
||||
|
||||
D3DKMT_HANDLE handles[1] = {ALLOCATION_HANDLE};
|
||||
bool retVal = wddm->destroyAllocations(handles, 1, 0);
|
||||
EXPECT_TRUE(retVal);
|
||||
|
||||
auto destroyWithResourceHandleCalled = 0u;
|
||||
D3DKMT_DESTROYALLOCATION2 *ptrToDestroyAlloc2 = nullptr;
|
||||
|
||||
getSizesFcn(destroyWithResourceHandleCalled, ptrToDestroyAlloc2);
|
||||
|
||||
EXPECT_EQ(0u, ptrToDestroyAlloc2->Flags.AssumeNotInUse);
|
||||
}
|
||||
|
||||
TEST_F(Wddm20Tests, WhenMappingAndFreeingGpuVaThenReturnIsCorrect) {
|
||||
OsAgnosticMemoryManager mm(*executionEnvironment);
|
||||
auto gmmHelper = getGmmHelper();
|
||||
|
|
Loading…
Reference in New Issue