Move OsContext to Device

Change-Id: I030b65372fbdc075423d22720e9da34ac65b8e68
Signed-off-by: Mateusz Jablonski <mateusz.jablonski@intel.com>
This commit is contained in:
Mateusz Jablonski
2018-08-27 15:48:29 +02:00
committed by sys_ocldev
parent c014c49102
commit 92bfd2e3d2
58 changed files with 615 additions and 429 deletions

View File

@@ -30,10 +30,10 @@ DeferrableDeletion *DeferrableDeletion::create(Args... args) {
return new DeferrableDeletionImpl(std::forward<Args>(args)...);
}
template DeferrableDeletion *DeferrableDeletion::create(Wddm *wddm, D3DKMT_HANDLE *handles, uint32_t allocationCount, uint64_t lastFenceValue,
D3DKMT_HANDLE resourceHandle);
D3DKMT_HANDLE resourceHandle, OsContextWin *osContext);
DeferrableDeletionImpl::DeferrableDeletionImpl(Wddm *wddm, D3DKMT_HANDLE *handles, uint32_t allocationCount, uint64_t lastFenceValue,
D3DKMT_HANDLE resourceHandle) {
D3DKMT_HANDLE resourceHandle, OsContextWin *osContext) : osContext(osContext) {
this->wddm = wddm;
if (handles) {
this->handles = new D3DKMT_HANDLE[allocationCount];
@@ -46,7 +46,7 @@ DeferrableDeletionImpl::DeferrableDeletionImpl(Wddm *wddm, D3DKMT_HANDLE *handle
this->resourceHandle = resourceHandle;
}
void DeferrableDeletionImpl::apply() {
bool destroyStatus = wddm->destroyAllocations(handles, allocationCount, lastFenceValue, resourceHandle);
bool destroyStatus = wddm->destroyAllocations(handles, allocationCount, lastFenceValue, resourceHandle, osContext);
DEBUG_BREAK_IF(!destroyStatus);
}
DeferrableDeletionImpl::~DeferrableDeletionImpl() {

View File

@@ -22,6 +22,7 @@
#pragma once
#include "runtime/memory_manager/deferrable_deletion.h"
#include "runtime/os_interface/os_context.h"
#include "runtime/os_interface/windows/windows_wrapper.h"
#include <d3dkmthk.h>
@@ -29,10 +30,12 @@ namespace OCLRT {
class Wddm;
using OsContextWin = OsContext::OsContextImpl;
class DeferrableDeletionImpl : public DeferrableDeletion {
public:
DeferrableDeletionImpl(Wddm *wddm, D3DKMT_HANDLE *handles, uint32_t allocationCount, uint64_t lastFenceValue,
D3DKMT_HANDLE resourceHandle);
D3DKMT_HANDLE resourceHandle, OsContextWin *osContext);
void apply() override;
~DeferrableDeletionImpl();
@@ -45,5 +48,6 @@ class DeferrableDeletionImpl : public DeferrableDeletion {
uint32_t allocationCount;
uint64_t lastFenceValue;
D3DKMT_HANDLE resourceHandle;
OsContextWin *osContext = nullptr;
};
} // namespace OCLRT

View File

@@ -28,10 +28,8 @@
namespace OCLRT {
OsContextWin::OsContextImpl(Wddm &wddm) : wddm(wddm) {
UNRECOVERABLE_IF(!wddm.isInitialized());
auto wddmInterface = wddm.getWddmInterface();
if (!wddmInterface) {
return;
}
if (!wddm.createContext(context)) {
return;
}
@@ -43,9 +41,7 @@ OsContextWin::OsContextImpl(Wddm &wddm) : wddm(wddm) {
initialized = wddmInterface->createMonitoredFence(*this);
};
OsContextWin::~OsContextImpl() {
if (wddm.getWddmInterface()) {
wddm.getWddmInterface()->destroyHwQueue(hwQueueHandle);
}
wddm.getWddmInterface()->destroyHwQueue(hwQueueHandle);
wddm.destroyContext(context);
}
@@ -57,8 +53,10 @@ void OsContextWin::resetMonitoredFenceParams(D3DKMT_HANDLE &handle, uint64_t *cp
monitoredFence.gpuAddress = gpuAddress;
}
OsContext::OsContext(OSInterface &osInterface) {
osContextImpl = std::make_unique<OsContextWin>(*osInterface.get()->getWddm());
OsContext::OsContext(OSInterface *osInterface) {
if (osInterface) {
osContextImpl = std::make_unique<OsContextWin>(*osInterface->get()->getWddm());
}
}
OsContext::~OsContext() = default;

View File

@@ -49,6 +49,7 @@ class OsContext::OsContextImpl {
}
MonitoredFence &getMonitoredFence() { return monitoredFence; }
void resetMonitoredFenceParams(D3DKMT_HANDLE &handle, uint64_t *cpuAddress, D3DGPU_VIRTUAL_ADDRESS &gpuAddress);
Wddm *getWddm() const { return &wddm; }
protected:
bool initialized = false;

View File

@@ -539,11 +539,13 @@ NTSTATUS Wddm::createAllocationsAndMapGpuVa(OsHandleStorage &osHandles) {
return status;
}
bool Wddm::destroyAllocations(D3DKMT_HANDLE *handles, uint32_t allocationCount, uint64_t lastFenceValue, D3DKMT_HANDLE resourceHandle) {
bool Wddm::destroyAllocations(D3DKMT_HANDLE *handles, uint32_t allocationCount, uint64_t lastFenceValue, D3DKMT_HANDLE resourceHandle, OsContextWin *osContext) {
NTSTATUS status = STATUS_SUCCESS;
D3DKMT_DESTROYALLOCATION2 DestroyAllocation = {0};
DEBUG_BREAK_IF(!(allocationCount <= 1 || resourceHandle == 0));
waitFromCpu(lastFenceValue);
if (lastFenceValue > 0) {
waitFromCpu(lastFenceValue, *osContext);
}
DestroyAllocation.hDevice = device;
DestroyAllocation.hResource = resourceHandle;
@@ -711,17 +713,17 @@ bool Wddm::destroyContext(D3DKMT_HANDLE context) {
return status == STATUS_SUCCESS;
}
bool Wddm::submit(uint64_t commandBuffer, size_t size, void *commandHeader) {
bool Wddm::submit(uint64_t commandBuffer, size_t size, void *commandHeader, OsContextWin &osContext) {
bool status = false;
if (currentPagingFenceValue > *pagingFenceAddress && !waitOnGPU(osContext->getContext())) {
if (currentPagingFenceValue > *pagingFenceAddress && !waitOnGPU(osContext.getContext())) {
return false;
}
DBG_LOG(ResidencyDebugEnable, "Residency:", __FUNCTION__, "currentFenceValue =", osContext->getMonitoredFence().currentFenceValue);
DBG_LOG(ResidencyDebugEnable, "Residency:", __FUNCTION__, "currentFenceValue =", osContext.getMonitoredFence().currentFenceValue);
status = wddmInterface->submit(commandBuffer, size, commandHeader, *osContext);
status = wddmInterface->submit(commandBuffer, size, commandHeader, osContext);
if (status) {
osContext->getMonitoredFence().lastSubmittedFence = osContext->getMonitoredFence().currentFenceValue;
osContext->getMonitoredFence().currentFenceValue++;
osContext.getMonitoredFence().lastSubmittedFence = osContext.getMonitoredFence().currentFenceValue;
osContext.getMonitoredFence().currentFenceValue++;
}
getDeviceState();
UNRECOVERABLE_IF(!status);
@@ -746,10 +748,10 @@ void Wddm::getDeviceState() {
#endif
}
void Wddm::handleCompletion() {
if (osContext->getMonitoredFence().cpuAddress) {
auto *currentTag = osContext->getMonitoredFence().cpuAddress;
while (*currentTag < osContext->getMonitoredFence().currentFenceValue - 1)
void Wddm::handleCompletion(OsContextWin &osContext) {
if (osContext.getMonitoredFence().cpuAddress) {
auto *currentTag = osContext.getMonitoredFence().cpuAddress;
while (*currentTag < osContext.getMonitoredFence().currentFenceValue - 1)
;
}
}
@@ -772,13 +774,13 @@ bool Wddm::waitOnGPU(D3DKMT_HANDLE context) {
return status == STATUS_SUCCESS;
}
bool Wddm::waitFromCpu(uint64_t lastFenceValue) {
bool Wddm::waitFromCpu(uint64_t lastFenceValue, OsContextWin &osContext) {
NTSTATUS status = STATUS_SUCCESS;
if (lastFenceValue > *osContext->getMonitoredFence().cpuAddress) {
if (lastFenceValue > *osContext.getMonitoredFence().cpuAddress) {
D3DKMT_WAITFORSYNCHRONIZATIONOBJECTFROMCPU waitFromCpu = {0};
waitFromCpu.ObjectCount = 1;
waitFromCpu.ObjectHandleArray = &osContext->getMonitoredFence().fenceHandle;
waitFromCpu.ObjectHandleArray = &osContext.getMonitoredFence().fenceHandle;
waitFromCpu.FenceValueArray = &lastFenceValue;
waitFromCpu.hDevice = device;
waitFromCpu.hAsyncEvent = NULL;
@@ -884,11 +886,6 @@ void *Wddm::virtualAlloc(void *inPtr, size_t size, unsigned long flags, unsigned
int Wddm::virtualFree(void *ptr, size_t size, unsigned long flags) {
return virtualFreeFnc(ptr, size, flags);
}
MonitoredFence &Wddm::getMonitoredFence() { return osContext->getMonitoredFence(); }
D3DKMT_HANDLE Wddm::getOsDeviceContext() const {
return osContext->getContext();
}
bool Wddm::configureDeviceAddressSpace() {
SYSTEM_INFO sysInfo;
@@ -927,11 +924,7 @@ bool Wddm::init() {
if (!gmmMemory) {
gmmMemory.reset(GmmMemory::create());
}
if (!configureDeviceAddressSpace()) {
return false;
}
osContext = std::make_unique<OsContextWin>(*this);
initialized = osContext->isInitialized();
initialized = configureDeviceAddressSpace();
}
return initialized;
}

View File

@@ -78,7 +78,7 @@ class Wddm {
MOCKABLE_VIRTUAL NTSTATUS createAllocation(WddmAllocation *alloc);
MOCKABLE_VIRTUAL bool createAllocation64k(WddmAllocation *alloc);
MOCKABLE_VIRTUAL NTSTATUS createAllocationsAndMapGpuVa(OsHandleStorage &osHandles);
MOCKABLE_VIRTUAL bool destroyAllocations(D3DKMT_HANDLE *handles, uint32_t allocationCount, uint64_t lastFenceValue, D3DKMT_HANDLE resourceHandle);
MOCKABLE_VIRTUAL bool destroyAllocations(D3DKMT_HANDLE *handles, uint32_t allocationCount, uint64_t lastFenceValue, D3DKMT_HANDLE resourceHandle, OsContextWin *osContext);
MOCKABLE_VIRTUAL bool openSharedHandle(D3DKMT_HANDLE handle, WddmAllocation *alloc);
bool openNTHandle(HANDLE handle, WddmAllocation *alloc);
MOCKABLE_VIRTUAL void *lockResource(WddmAllocation *wddmAllocation);
@@ -89,8 +89,8 @@ class Wddm {
MOCKABLE_VIRTUAL bool destroyContext(D3DKMT_HANDLE context);
MOCKABLE_VIRTUAL bool queryAdapterInfo();
MOCKABLE_VIRTUAL bool submit(uint64_t commandBuffer, size_t size, void *commandHeader);
MOCKABLE_VIRTUAL bool waitFromCpu(uint64_t lastFenceValue);
MOCKABLE_VIRTUAL bool submit(uint64_t commandBuffer, size_t size, void *commandHeader, OsContextWin &osContext);
MOCKABLE_VIRTUAL bool waitFromCpu(uint64_t lastFenceValue, OsContextWin &osContext);
NTSTATUS escape(D3DKMT_ESCAPE &escapeCommand);
void registerTrimCallback(PFND3DKMT_TRIMNOTIFICATIONCALLBACK callback, WddmMemoryManager *memoryManager);
@@ -121,8 +121,6 @@ class Wddm {
return deviceRegistryPath;
}
MonitoredFence &getMonitoredFence();
uint64_t getSystemSharedMemory() const;
uint64_t getMaxApplicationAddress() const;
@@ -165,7 +163,6 @@ class Wddm {
PreemptionMode getPreemptionMode() const {
return preemptionMode;
}
D3DKMT_HANDLE getOsDeviceContext() const;
unsigned int readEnablePreemptionRegKey();
@@ -210,7 +207,7 @@ class Wddm {
bool destroyDevice();
bool closeAdapter();
void getDeviceState();
void handleCompletion();
void handleCompletion(OsContextWin &osContext);
static CreateDXGIFactoryFcn createDxgiFactory;
static GetSystemInfoFcn getSystemInfo;
@@ -221,6 +218,5 @@ class Wddm {
std::unique_ptr<KmDafListener> kmDafListener;
std::unique_ptr<WddmInterface> wddmInterface;
std::unique_ptr<OsContextWin> osContext;
};
} // namespace OCLRT

View File

@@ -56,7 +56,7 @@ bool OCLRT::WddmInterface20::submit(uint64_t commandBuffer, size_t size, void *c
D3DKMT_SUBMITCOMMAND SubmitCommand = {0};
NTSTATUS status = STATUS_SUCCESS;
auto monitoredFence = wddm.getMonitoredFence();
auto monitoredFence = osContext.getMonitoredFence();
SubmitCommand.Commands = commandBuffer;
SubmitCommand.CommandLength = static_cast<UINT>(size);
SubmitCommand.BroadcastContextCount = 1;
@@ -119,7 +119,7 @@ const bool OCLRT::WddmInterface23::hwQueuesSupported() {
}
bool OCLRT::WddmInterface23::submit(uint64_t commandBuffer, size_t size, void *commandHeader, OsContextWin &osContext) {
auto monitoredFence = wddm.getMonitoredFence();
auto monitoredFence = osContext.getMonitoredFence();
D3DKMT_SUBMITCOMMANDTOHWQUEUE submitCommand = {};
submitCommand.hHwQueue = osContext.getHwQueue();

View File

@@ -40,11 +40,11 @@ class WddmCommandStreamReceiver : public DeviceCommandStreamReceiver<GfxFamily>
WddmCommandStreamReceiver(const HardwareInfo &hwInfoIn, Wddm *wddm, ExecutionEnvironment &executionEnvironment);
virtual ~WddmCommandStreamReceiver();
FlushStamp flush(BatchBuffer &batchBuffer, EngineType engineType, ResidencyContainer *allocationsForResidency) override;
FlushStamp flush(BatchBuffer &batchBuffer, EngineType engineType, ResidencyContainer *allocationsForResidency, OsContext &osContext) override;
void makeResident(GraphicsAllocation &gfxAllocation) override;
void processResidency(ResidencyContainer *allocationsForResidency) override;
void processResidency(ResidencyContainer *allocationsForResidency, OsContext &osContext) override;
void processEviction() override;
bool waitForFlushStamp(FlushStamp &flushStampToWait) override;
bool waitForFlushStamp(FlushStamp &flushStampToWait, OsContext &osContext) override;
WddmMemoryManager *getMemoryManager();
MemoryManager *createMemoryManager(bool enable64kbPages);

View File

@@ -39,6 +39,7 @@
#undef max
#include "runtime/os_interface/windows/gdi_interface.h"
#include "runtime/os_interface/windows/os_context_win.h"
#include "runtime/os_interface/windows/os_interface.h"
#include "runtime/os_interface/windows/wddm_engine_mapper.h"
#include "runtime/os_interface/windows/wddm_memory_manager.h"
@@ -59,6 +60,7 @@ WddmCommandStreamReceiver<GfxFamily>::WddmCommandStreamReceiver(const HardwareIn
this->wddm->setNode(nodeOrdinal);
PreemptionMode preemptionMode = PreemptionHelper::getDefaultPreemptionMode(hwInfoIn);
this->wddm->setPreemptionMode(preemptionMode);
executionEnvironment.osInterface.reset(new OSInterface());
this->osInterface = executionEnvironment.osInterface.get();
this->osInterface->get()->setWddm(this->wddm);
@@ -89,7 +91,7 @@ WddmCommandStreamReceiver<GfxFamily>::~WddmCommandStreamReceiver() {
template <typename GfxFamily>
FlushStamp WddmCommandStreamReceiver<GfxFamily>::flush(BatchBuffer &batchBuffer,
EngineType engineType, ResidencyContainer *allocationsForResidency) {
EngineType engineType, ResidencyContainer *allocationsForResidency, OsContext &osContext) {
auto commandStreamAddress = ptrOffset(batchBuffer.commandBufferAllocation->getGpuAddress(), batchBuffer.startOffset);
if (this->dispatchMode == DispatchMode::ImmediateDispatch) {
@@ -99,7 +101,7 @@ FlushStamp WddmCommandStreamReceiver<GfxFamily>::flush(BatchBuffer &batchBuffer,
batchBuffer.commandBufferAllocation->residencyTaskCount = this->taskCount;
}
this->processResidency(allocationsForResidency);
this->processResidency(allocationsForResidency, osContext);
COMMAND_BUFFER_HEADER *pHeader = reinterpret_cast<COMMAND_BUFFER_HEADER *>(commandBufferHeader);
pHeader->RequiresCoherency = batchBuffer.requiresCoherency;
@@ -124,9 +126,9 @@ FlushStamp WddmCommandStreamReceiver<GfxFamily>::flush(BatchBuffer &batchBuffer,
this->kmDafLockAllocations(allocationsForResidency);
}
wddm->submit(commandStreamAddress, batchBuffer.usedSize - batchBuffer.startOffset, commandBufferHeader);
wddm->submit(commandStreamAddress, batchBuffer.usedSize - batchBuffer.startOffset, commandBufferHeader, *osContext.get());
return wddm->getMonitoredFence().lastSubmittedFence;
return osContext.get()->getMonitoredFence().lastSubmittedFence;
}
template <typename GfxFamily>
@@ -145,8 +147,8 @@ void WddmCommandStreamReceiver<GfxFamily>::makeResident(GraphicsAllocation &gfxA
}
template <typename GfxFamily>
void WddmCommandStreamReceiver<GfxFamily>::processResidency(ResidencyContainer *allocationsForResidency) {
bool success = getMemoryManager()->makeResidentResidencyAllocations(allocationsForResidency);
void WddmCommandStreamReceiver<GfxFamily>::processResidency(ResidencyContainer *allocationsForResidency, OsContext &osContext) {
bool success = getMemoryManager()->makeResidentResidencyAllocations(allocationsForResidency, osContext);
DEBUG_BREAK_IF(!success);
}
@@ -167,8 +169,8 @@ MemoryManager *WddmCommandStreamReceiver<GfxFamily>::createMemoryManager(bool en
}
template <typename GfxFamily>
bool WddmCommandStreamReceiver<GfxFamily>::waitForFlushStamp(FlushStamp &flushStampToWait) {
return wddm->waitFromCpu(flushStampToWait);
bool WddmCommandStreamReceiver<GfxFamily>::waitForFlushStamp(FlushStamp &flushStampToWait, OsContext &osContext) {
return wddm->waitFromCpu(flushStampToWait, *osContext.get());
}
template <typename GfxFamily>

View File

@@ -33,6 +33,8 @@
#include "runtime/memory_manager/deferred_deleter.h"
#include "runtime/os_interface/windows/wddm/wddm.h"
#include "runtime/os_interface/windows/wddm_allocation.h"
#include "runtime/os_interface/windows/os_context_win.h"
#include "runtime/platform/platform.h"
#include <algorithm>
namespace OCLRT {
@@ -324,7 +326,11 @@ void WddmMemoryManager::freeGraphicsMemoryImpl(GraphicsAllocation *gfxAllocation
unlockResource(input);
input->setLocked(false);
}
auto status = tryDeferDeletions(allocationHandles, allocationCount, input->getResidencyData().lastFence, resourceHandle);
OsContextWin *osContextWin = nullptr;
if (input->getResidencyData().osContext) {
osContextWin = input->getResidencyData().osContext->get();
}
auto status = tryDeferDeletions(allocationHandles, allocationCount, input->getResidencyData().lastFence, resourceHandle, osContextWin);
DEBUG_BREAK_IF(!status);
alignedFreeWrapper(cpuPtr);
}
@@ -332,12 +338,12 @@ void WddmMemoryManager::freeGraphicsMemoryImpl(GraphicsAllocation *gfxAllocation
delete gfxAllocation;
}
bool WddmMemoryManager::tryDeferDeletions(D3DKMT_HANDLE *handles, uint32_t allocationCount, uint64_t lastFenceValue, D3DKMT_HANDLE resourceHandle) {
bool WddmMemoryManager::tryDeferDeletions(D3DKMT_HANDLE *handles, uint32_t allocationCount, uint64_t lastFenceValue, D3DKMT_HANDLE resourceHandle, OsContextWin *osContext) {
bool status = true;
if (deferredDeleter) {
deferredDeleter->deferDeletion(DeferrableDeletion::create(wddm, handles, allocationCount, lastFenceValue, resourceHandle));
deferredDeleter->deferDeletion(DeferrableDeletion::create(wddm, handles, allocationCount, lastFenceValue, resourceHandle, osContext));
} else {
status = wddm->destroyAllocations(handles, allocationCount, lastFenceValue, resourceHandle);
status = wddm->destroyAllocations(handles, allocationCount, lastFenceValue, resourceHandle, osContext);
}
return status;
}
@@ -385,6 +391,7 @@ void WddmMemoryManager::cleanOsHandles(OsHandleStorage &handleStorage) {
auto allocationCount = 0;
uint64_t lastFenceValue = 0;
OsContext *osContext = nullptr;
for (unsigned int i = 0; i < max_fragments_count; i++) {
if (handleStorage.fragmentStorageData[i].freeTheFragment) {
@@ -392,10 +399,15 @@ void WddmMemoryManager::cleanOsHandles(OsHandleStorage &handleStorage) {
handleStorage.fragmentStorageData[i].residency->resident = false;
allocationCount++;
lastFenceValue = std::max(handleStorage.fragmentStorageData[i].residency->lastFence, lastFenceValue);
osContext = handleStorage.fragmentStorageData[i].residency->osContext;
}
}
bool success = tryDeferDeletions(handles, allocationCount, lastFenceValue, 0);
OsContextWin *osContextWin = nullptr;
if (osContext) {
osContextWin = osContext->get();
}
bool success = tryDeferDeletions(handles, allocationCount, lastFenceValue, 0, osContextWin);
for (unsigned int i = 0; i < max_fragments_count; i++) {
if (handleStorage.fragmentStorageData[i].freeTheFragment) {
@@ -445,7 +457,7 @@ uint64_t WddmMemoryManager::getInternalHeapBaseAddress() {
return this->wddm->getGfxPartition().Heap32[1].Base;
}
bool WddmMemoryManager::makeResidentResidencyAllocations(ResidencyContainer *allocationsForResidency) {
bool WddmMemoryManager::makeResidentResidencyAllocations(ResidencyContainer *allocationsForResidency, OsContext &osContext) {
auto &residencyAllocations = allocationsForResidency ? *allocationsForResidency : this->residencyAllocations;
@@ -456,7 +468,7 @@ bool WddmMemoryManager::makeResidentResidencyAllocations(ResidencyContainer *all
acquireResidencyLock();
DBG_LOG(ResidencyDebugEnable, "Residency:", __FUNCTION__, "currentFenceValue =", wddm->getMonitoredFence().currentFenceValue);
DBG_LOG(ResidencyDebugEnable, "Residency:", __FUNCTION__, "currentFenceValue =", osContext.get()->getMonitoredFence().currentFenceValue);
for (uint32_t i = 0; i < residencyCount; i++) {
WddmAllocation *allocation = reinterpret_cast<WddmAllocation *>(residencyAllocations[i]);
@@ -511,13 +523,16 @@ bool WddmMemoryManager::makeResidentResidencyAllocations(ResidencyContainer *all
for (uint32_t i = 0; i < residencyCount; i++) {
WddmAllocation *allocation = reinterpret_cast<WddmAllocation *>(residencyAllocations[i]);
// Update fence value not to early destroy / evict allocation
allocation->getResidencyData().lastFence = wddm->getMonitoredFence().currentFenceValue;
allocation->getResidencyData().addOsContext(&osContext);
allocation->getResidencyData().lastFence = osContext.get()->getMonitoredFence().currentFenceValue;
allocation->getResidencyData().resident = true;
for (uint32_t allocationId = 0; allocationId < allocation->fragmentsStorage.fragmentCount; allocationId++) {
allocation->fragmentsStorage.fragmentStorageData[allocationId].residency->resident = true;
auto residencyData = allocation->fragmentsStorage.fragmentStorageData[allocationId].residency;
residencyData->addOsContext(&osContext);
residencyData->resident = allocation->getResidencyData().resident;
// Update fence value not to remove the fragment referenced by different GA in trimming callback
allocation->fragmentsStorage.fragmentStorageData[allocationId].residency->lastFence = wddm->getMonitoredFence().currentFenceValue;
residencyData->lastFence = allocation->getResidencyData().lastFence;
}
}
}
@@ -635,6 +650,7 @@ void WddmMemoryManager::compactTrimCandidateList() {
}
void WddmMemoryManager::trimResidency(D3DDDI_TRIMRESIDENCYSET_FLAGS flags, uint64_t bytes) {
OsContext *osContext = nullptr;
if (flags.PeriodicTrim) {
bool periodicTrimDone = false;
D3DKMT_HANDLE fragmentEvictHandles[3] = {0};
@@ -648,7 +664,7 @@ void WddmMemoryManager::trimResidency(D3DDDI_TRIMRESIDENCYSET_FLAGS flags, uint6
DBG_LOG(ResidencyDebugEnable, "Residency:", __FUNCTION__, "lastPeriodicTrimFenceValue = ", lastPeriodicTrimFenceValue);
// allocation was not used from last periodic trim
if ((wddmAllocation)->getResidencyData().lastFence <= lastPeriodicTrimFenceValue) {
if (wddmAllocation->getResidencyData().lastFence <= lastPeriodicTrimFenceValue) {
DBG_LOG(ResidencyDebugEnable, "Residency:", __FUNCTION__, "allocation: handle =", wddmAllocation->handle, "lastFence =", (wddmAllocation)->getResidencyData().lastFence);
@@ -674,6 +690,7 @@ void WddmMemoryManager::trimResidency(D3DDDI_TRIMRESIDENCYSET_FLAGS flags, uint6
}
wddmAllocation->getResidencyData().resident = false;
osContext = wddmAllocation->getResidencyData().osContext;
removeFromTrimCandidateList(wddmAllocation);
} else {
periodicTrimDone = true;
@@ -698,7 +715,10 @@ void WddmMemoryManager::trimResidency(D3DDDI_TRIMRESIDENCYSET_FLAGS flags, uint6
}
if (flags.PeriodicTrim || flags.RestartPeriodicTrim) {
lastPeriodicTrimFenceValue = *wddm->getMonitoredFence().cpuAddress;
if (!osContext) {
osContext = platform()->getDevice(0)->getOsContext();
}
lastPeriodicTrimFenceValue = *osContext->get()->getMonitoredFence().cpuAddress;
DBG_LOG(ResidencyDebugEnable, "Residency:", __FUNCTION__, "updated lastPeriodicTrimFenceValue =", lastPeriodicTrimFenceValue);
}
}
@@ -739,14 +759,20 @@ bool WddmMemoryManager::trimResidencyToBudget(uint64_t bytes) {
}
lastFence = wddmAllocation->getResidencyData().lastFence;
auto osContext = wddmAllocation->getResidencyData().osContext;
if (!osContext) {
removeFromTrimCandidateList(wddmAllocation);
continue;
}
auto &monitoredFence = osContext->get()->getMonitoredFence();
if (lastFence <= wddm->getMonitoredFence().lastSubmittedFence) {
if (lastFence <= monitoredFence.lastSubmittedFence) {
uint32_t fragmentsToEvict = 0;
uint64_t sizeEvicted = 0;
uint64_t sizeToTrim = 0;
if (lastFence > *wddm->getMonitoredFence().cpuAddress) {
wddm->waitFromCpu(lastFence);
if (lastFence > *monitoredFence.cpuAddress) {
wddm->waitFromCpu(lastFence, *osContext->get());
}
if (wddmAllocation->fragmentsStorage.fragmentCount == 0) {
@@ -754,9 +780,10 @@ bool WddmMemoryManager::trimResidencyToBudget(uint64_t bytes) {
sizeEvicted = wddmAllocation->getAlignedSize();
} else {
auto &fragmentStorageData = wddmAllocation->fragmentsStorage.fragmentStorageData;
for (uint32_t allocationId = 0; allocationId < wddmAllocation->fragmentsStorage.fragmentCount; allocationId++) {
if (wddmAllocation->fragmentsStorage.fragmentStorageData[allocationId].residency->lastFence <= wddm->getMonitoredFence().lastSubmittedFence) {
fragmentEvictHandles[fragmentsToEvict++] = wddmAllocation->fragmentsStorage.fragmentStorageData[allocationId].osHandleStorage->handle;
if (fragmentStorageData[allocationId].residency->lastFence <= monitoredFence.lastSubmittedFence) {
fragmentEvictHandles[fragmentsToEvict++] = fragmentStorageData[allocationId].osHandleStorage->handle;
}
}
@@ -764,9 +791,9 @@ bool WddmMemoryManager::trimResidencyToBudget(uint64_t bytes) {
wddm->evict((D3DKMT_HANDLE *)fragmentEvictHandles, fragmentsToEvict, sizeToTrim);
for (uint32_t allocationId = 0; allocationId < wddmAllocation->fragmentsStorage.fragmentCount; allocationId++) {
if (wddmAllocation->fragmentsStorage.fragmentStorageData[allocationId].residency->lastFence <= wddm->getMonitoredFence().lastSubmittedFence) {
wddmAllocation->fragmentsStorage.fragmentStorageData[allocationId].residency->resident = false;
sizeEvicted += wddmAllocation->fragmentsStorage.fragmentStorageData[allocationId].fragmentSize;
if (fragmentStorageData[allocationId].residency->lastFence <= monitoredFence.lastSubmittedFence) {
fragmentStorageData[allocationId].residency->resident = false;
sizeEvicted += fragmentStorageData[allocationId].fragmentSize;
}
}
}
@@ -815,7 +842,7 @@ bool WddmMemoryManager::createWddmAllocation(WddmAllocation *allocation, Allocat
mapSuccess = wddm->mapGpuVirtualAddress(allocation, allocation->getAlignedCpuPtr(), allocation->is32BitAllocation, false, useHeap1);
}
if (!mapSuccess) {
wddm->destroyAllocations(&allocation->handle, 1, 0, allocation->resourceHandle);
wddm->destroyAllocations(&allocation->handle, 1, 0, allocation->resourceHandle, nullptr);
wddmSuccess = STATUS_UNSUCCESSFUL;
}
allocation->setGpuAddress(allocation->gpuPtr);

View File

@@ -23,6 +23,7 @@
#pragma once
#include "runtime/helpers/aligned_memory.h"
#include "runtime/memory_manager/memory_manager.h"
#include "runtime/os_interface/os_context.h"
#include "runtime/os_interface/windows/wddm_allocation.h"
#include "runtime/os_interface/windows/windows_wrapper.h"
#include <d3dkmthk.h>
@@ -34,6 +35,8 @@ namespace OCLRT {
class Gmm;
class Wddm;
using OsContextWin = OsContext::OsContextImpl;
class WddmMemoryManager : public MemoryManager {
public:
using MemoryManager::allocateGraphicsMemory;
@@ -60,7 +63,7 @@ class WddmMemoryManager : public MemoryManager {
void *lockResource(GraphicsAllocation *graphicsAllocation) override;
void unlockResource(GraphicsAllocation *graphicsAllocation) override;
bool makeResidentResidencyAllocations(ResidencyContainer *allocationsForResidency);
bool makeResidentResidencyAllocations(ResidencyContainer *allocationsForResidency, OsContext &osContext);
void makeNonResidentEvictionAllocations();
AllocationStatus populateOsHandles(OsHandleStorage &handleStorage) override;
@@ -89,7 +92,7 @@ class WddmMemoryManager : public MemoryManager {
residencyLock = false;
}
bool tryDeferDeletions(D3DKMT_HANDLE *handles, uint32_t allocationCount, uint64_t lastFenceValue, D3DKMT_HANDLE resourceHandle);
bool tryDeferDeletions(D3DKMT_HANDLE *handles, uint32_t allocationCount, uint64_t lastFenceValue, D3DKMT_HANDLE resourceHandle, OsContextWin *osContext);
bool isMemoryBudgetExhausted() const override { return memoryBudgetExhausted; }