Wait for resource not being used in freeGraphicsMemory

Change-Id: I201d914569fc0cf6f9eb616d456a670b0b8741ab
Signed-off-by: Maciej Dziuban <maciej.dziuban@intel.com>
This commit is contained in:
Maciej Dziuban
2019-03-04 14:50:26 +01:00
parent 04c2064382
commit 79d0878e64
18 changed files with 167 additions and 39 deletions

2
Jenkinsfile vendored
View File

@@ -1,5 +1,5 @@
#!groovy
dependenciesRevision='03eca0b06275854df960e425756318fc68ac6d3c-1215'
strategy='EQUAL'
allowedCD=273
allowedCD=274
allowedF=4

View File

@@ -133,7 +133,15 @@ void MemoryManager::freeGraphicsMemory(GraphicsAllocation *gfxAllocation) {
if (!gfxAllocation) {
return;
}
if (gfxAllocation->isLocked()) {
const bool hasFragments = gfxAllocation->fragmentsStorage.fragmentCount != 0;
const bool isLocked = gfxAllocation->isLocked();
DEBUG_BREAK_IF(hasFragments && isLocked);
if (!hasFragments) {
handleFenceCompletion(gfxAllocation);
}
if (isLocked) {
freeAssociatedResourceImpl(*gfxAllocation);
}
freeGraphicsMemoryImpl(gfxAllocation);

View File

@@ -140,8 +140,8 @@ class MemoryManager {
void freeSystemMemory(void *ptr);
virtual void freeGraphicsMemoryImpl(GraphicsAllocation *gfxAllocation) = 0;
void freeGraphicsMemory(GraphicsAllocation *gfxAllocation);
virtual void handleFenceCompletion(GraphicsAllocation *allocation){};
void checkGpuUsageAndDestroyGraphicsAllocations(GraphicsAllocation *gfxAllocation);

View File

@@ -12,9 +12,6 @@
using namespace OCLRT;
void ResidencyData::updateCompletionData(uint64_t newFenceValue, uint32_t contextId) {
if (contextId + 1 > lastFenceValues.size()) {
lastFenceValues.resize(contextId + 1);
}
lastFenceValues[contextId] = newFenceValue;
}

View File

@@ -8,8 +8,10 @@
#pragma once
#include "engine_node.h"
#include <array>
#include <cinttypes>
#include <vector>
namespace OCLRT {
struct ResidencyData {
@@ -24,6 +26,6 @@ struct ResidencyData {
uint64_t getFenceValueForContextId(uint32_t contextId);
protected:
std::vector<uint64_t> lastFenceValues;
std::array<uint64_t, maxOsContextCount> lastFenceValues = {};
};
} // namespace OCLRT

View File

@@ -573,10 +573,13 @@ void DrmMemoryManager::freeGraphicsMemoryImpl(GraphicsAllocation *gfxAllocation)
delete gfxAllocation;
search->wait(-1);
unreference(search);
}
void DrmMemoryManager::handleFenceCompletion(GraphicsAllocation *allocation) {
static_cast<DrmAllocation *>(allocation)->getBO()->wait(-1);
}
uint64_t DrmMemoryManager::getSystemSharedMemory() {
uint64_t hostMemorySize = MemoryConstants::pageSize * (uint64_t)(sysconf(_SC_PHYS_PAGES));

View File

@@ -30,6 +30,7 @@ class DrmMemoryManager : public MemoryManager {
void addAllocationToHostPtrManager(GraphicsAllocation *gfxAllocation) override;
void removeAllocationFromHostPtrManager(GraphicsAllocation *gfxAllocation) override;
void freeGraphicsMemoryImpl(GraphicsAllocation *gfxAllocation) override;
void handleFenceCompletion(GraphicsAllocation *allocation) override;
DrmAllocation *allocateGraphicsMemoryForNonSvmHostPtr(size_t size, void *cpuPtr) override;
GraphicsAllocation *createGraphicsAllocationFromSharedHandle(osHandle handle, bool requireSpecificBitness) override;
GraphicsAllocation *createPaddedAllocation(GraphicsAllocation *inputGraphicsAllocation, size_t sizeWithPadding) override;

View File

@@ -340,6 +340,18 @@ void WddmMemoryManager::freeGraphicsMemoryImpl(GraphicsAllocation *gfxAllocation
delete gfxAllocation;
}
void WddmMemoryManager::handleFenceCompletion(GraphicsAllocation *allocation) {
auto wddmAllocation = static_cast<WddmAllocation *>(allocation);
for (auto &engine : this->registeredEngines) {
const auto lastFenceValue = wddmAllocation->getResidencyData().getFenceValueForContextId(engine.osContext->getContextId());
if (lastFenceValue != 0u) {
const auto &monitoredFence = static_cast<OsContextWin *>(engine.osContext)->getResidencyController().getMonitoredFence();
const auto wddm = static_cast<OsContextWin *>(engine.osContext)->getWddm();
wddm->waitFromCpu(lastFenceValue, monitoredFence);
}
}
}
bool WddmMemoryManager::tryDeferDeletions(const D3DKMT_HANDLE *handles, uint32_t allocationCount, D3DKMT_HANDLE resourceHandle) {
bool status = true;
if (deferredDeleter) {

View File

@@ -32,6 +32,8 @@ class WddmMemoryManager : public MemoryManager {
WddmMemoryManager &operator=(const WddmMemoryManager &) = delete;
void freeGraphicsMemoryImpl(GraphicsAllocation *gfxAllocation) override;
void handleFenceCompletion(GraphicsAllocation *allocation) override;
GraphicsAllocation *allocateGraphicsMemory(const AllocationProperties &properties, const void *ptr) override;
GraphicsAllocation *allocateGraphicsMemoryForNonSvmHostPtr(size_t size, void *cpuPtr) override;
GraphicsAllocation *createGraphicsAllocationFromSharedHandle(osHandle handle, bool requireSpecificBitness) override;

View File

@@ -101,18 +101,4 @@ namespace OCLRT {
extern bool overrideDeviceWithDefaultHardwareInfo;
extern bool overrideCommandStreamReceiverCreation;
TEST(MultiDeviceTests, givenCreateMultipleDevicesAndLimitAmountOfReturnedDevicesFlagWhenClGetDeviceIdsIsCalledThenLowerValueIsReturned) {
platformImpl.reset(nullptr);
VariableBackup<bool> backup(&overrideCommandStreamReceiverCreation, true);
VariableBackup<bool> overrideHelper(&overrideDeviceWithDefaultHardwareInfo, false);
DeviceFactoryCleaner cleaner;
DebugManagerStateRestore stateRestore;
DebugManager.flags.CreateMultipleDevices.set(2);
DebugManager.flags.LimitAmountOfReturnedDevices.set(1);
cl_uint numDevices = 0;
auto retVal = clGetDeviceIDs(nullptr, CL_DEVICE_TYPE_GPU, 0, nullptr, &numDevices);
EXPECT_EQ(CL_SUCCESS, retVal);
EXPECT_EQ(1u, numDevices);
}
} // namespace OCLRT

View File

@@ -39,21 +39,28 @@ struct GetDevicesTest : ::testing::Test {
DebugManagerStateRestore stateRestorer;
};
HWTEST_F(GetDevicesTest, givenGetDevicesWhenCsrIsSetToValidTypeThenTheFunctionReturnsTheExpectedValueOfHardwareInfo) {
for (int productFamily = 0; productFamily < IGFX_MAX_PRODUCT; productFamily++) {
const char *hwPrefix = hardwarePrefix[productFamily];
HWTEST_F(GetDevicesTest, givenGetDevicesWhenCsrIsSetToVariousTypesThenTheFunctionReturnsTheExpectedValueOfHardwareInfo) {
for (int productFamilyIndex = 0; productFamilyIndex < IGFX_MAX_PRODUCT; productFamilyIndex++) {
const char *hwPrefix = hardwarePrefix[productFamilyIndex];
if (hwPrefix == nullptr) {
continue;
}
for (int csrTypes = 0; csrTypes <= CSR_TYPES_NUM; csrTypes++) {
CommandStreamReceiverType csrType = static_cast<CommandStreamReceiverType>(csrTypes);
std::string productFamily(hwPrefix);
const std::string productFamily(hwPrefix);
for (int csrTypes = -1; csrTypes <= CSR_TYPES_NUM; csrTypes++) {
CommandStreamReceiverType csrType;
if (csrTypes != -1) {
csrType = static_cast<CommandStreamReceiverType>(csrTypes);
DebugManager.flags.SetCommandStreamReceiver.set(csrType);
} else {
csrType = CSR_HW;
DebugManager.flags.SetCommandStreamReceiver.set(-1);
}
DebugManager.flags.SetCommandStreamReceiver.set(csrType);
DebugManager.flags.ProductFamilyOverride.set(productFamily);
ExecutionEnvironment exeEnv;
auto ret = getDevices(&hwInfo, numDevices, exeEnv);
const auto ret = getDevices(&hwInfo, numDevices, exeEnv);
switch (csrType) {
case CSR_HW:

View File

@@ -388,6 +388,29 @@ TEST_F(MemoryAllocatorTest, givenMemoryManagerWhenAskedFor32bitAllocationWithPtr
memoryManager->freeGraphicsMemory(allocation);
}
TEST_F(MemoryAllocatorTest, givenAllocationWithFragmentsWhenCallingFreeGraphicsMemoryThenDoNotCallHandleFenceCompletion) {
auto size = 3u * MemoryConstants::pageSize;
auto *ptr = reinterpret_cast<void *>(0xbeef1);
AllocationProperties properties{size, GraphicsAllocation::AllocationType::UNDECIDED};
properties.flags.allocateMemory = false;
auto allocation = memoryManager->allocateGraphicsMemory(properties, ptr);
EXPECT_EQ(3u, allocation->fragmentsStorage.fragmentCount);
EXPECT_EQ(0u, memoryManager->handleFenceCompletionCalled);
memoryManager->freeGraphicsMemory(allocation);
EXPECT_EQ(0u, memoryManager->handleFenceCompletionCalled);
}
TEST_F(MemoryAllocatorTest, givenAllocationWithoutFragmentsWhenCallingFreeGraphicsMemoryThenCallHandleFenceCompletion) {
auto allocation = memoryManager->allocateGraphicsMemoryWithProperties({MemoryConstants::pageSize, GraphicsAllocation::AllocationType::BUFFER});
EXPECT_EQ(0u, allocation->fragmentsStorage.fragmentCount);
EXPECT_EQ(0u, memoryManager->handleFenceCompletionCalled);
memoryManager->freeGraphicsMemory(allocation);
EXPECT_EQ(1u, memoryManager->handleFenceCompletionCalled);
}
class MockPrintfHandler : public PrintfHandler {
public:
static MockPrintfHandler *create(const MultiDispatchInfo &multiDispatchInfo, Device &deviceArg) {
@@ -1564,16 +1587,16 @@ TEST(ResidencyDataTest, givenResidencyDataWhenUpdateCompletionDataIsCalledThenIt
auto lastFenceValue2 = 23llu;
auto lastFenceValue3 = 373llu;
EXPECT_EQ(0u, residency.lastFenceValues.size());
EXPECT_EQ(maxOsContextCount, residency.lastFenceValues.size());
residency.updateCompletionData(lastFenceValue, osContext.getContextId());
EXPECT_EQ(1u, residency.lastFenceValues.size());
EXPECT_EQ(maxOsContextCount, residency.lastFenceValues.size());
EXPECT_EQ(lastFenceValue, residency.lastFenceValues[0]);
EXPECT_EQ(lastFenceValue, residency.getFenceValueForContextId(osContext.getContextId()));
residency.updateCompletionData(lastFenceValue2, osContext2.getContextId());
EXPECT_EQ(2u, residency.lastFenceValues.size());
EXPECT_EQ(maxOsContextCount, residency.lastFenceValues.size());
EXPECT_EQ(lastFenceValue2, residency.lastFenceValues[1]);
EXPECT_EQ(lastFenceValue2, residency.getFenceValueForContextId(osContext2.getContextId()));

View File

@@ -65,11 +65,18 @@ class MockMemoryManager : public OsAgnosticMemoryManager {
unlockResourceCalled++;
OsAgnosticMemoryManager::unlockResourceImpl(gfxAllocation);
}
void handleFenceCompletion(GraphicsAllocation *graphicsAllocation) override {
handleFenceCompletionCalled++;
OsAgnosticMemoryManager::handleFenceCompletion(graphicsAllocation);
}
GraphicsAllocation *allocate32BitGraphicsMemory(size_t size, const void *ptr, GraphicsAllocation::AllocationType allocationType);
uint32_t freeGraphicsMemoryCalled = 0u;
uint32_t unlockResourceCalled = 0u;
uint32_t lockResourceCalled = 0u;
uint32_t handleFenceCompletionCalled = 0u;
bool allocationCreated = false;
bool allocation64kbPageCreated = false;
bool allocationInDevicePoolCreated = false;

View File

@@ -196,6 +196,7 @@ GMM_GFX_PARTITIONING *WddmMock::getGfxPartitionPtr() {
bool WddmMock::waitFromCpu(uint64_t lastFenceValue, const MonitoredFence &monitoredFence) {
waitFromCpuResult.called++;
waitFromCpuResult.uint64ParamPassed = lastFenceValue;
waitFromCpuResult.monitoredFence = &monitoredFence;
return waitFromCpuResult.success = Wddm::waitFromCpu(lastFenceValue, monitoredFence);
}

View File

@@ -28,16 +28,19 @@ struct CallResult {
void *commandHeaderSubmitted = nullptr;
void *cpuPtrPassed = nullptr;
};
struct MakeResidentCall : public CallResult {
struct MakeResidentCall : CallResult {
std::vector<D3DKMT_HANDLE> handlePack;
uint32_t handleCount = 0;
};
struct EvictCallResult : public CallResult {
struct EvictCallResult : CallResult {
EvictionStatus status = EvictionStatus::UNKNOWN;
};
struct KmDafLockCall : public CallResult {
struct KmDafLockCall : CallResult {
std::vector<D3DKMT_HANDLE> lockedAllocations;
};
struct WaitFromCpuResult : CallResult {
const MonitoredFence *monitoredFence = nullptr;
};
} // namespace WddmMockHelpers
class WddmMock : public Wddm {
@@ -126,7 +129,7 @@ class WddmMock : public Wddm {
WddmMockHelpers::CallResult lockResult;
WddmMockHelpers::CallResult unlockResult;
WddmMockHelpers::KmDafLockCall kmDafLockResult;
WddmMockHelpers::CallResult waitFromCpuResult;
WddmMockHelpers::WaitFromCpuResult waitFromCpuResult;
WddmMockHelpers::CallResult releaseReservedAddressResult;
WddmMockHelpers::CallResult reserveValidAddressRangeResult;
WddmMockHelpers::EvictCallResult evictAllTemporaryResourcesResult;

View File

@@ -534,6 +534,21 @@ TEST_F(DrmMemoryManagerTest, Allocate_HostPtr_UserptrFail) {
mock->ioctl_res = 0;
}
TEST_F(DrmMemoryManagerTest, givenDrmAllocationWhenHandleFenceCompletionThenCallBufferObjectWait) {
mock->ioctl_expected.gemUserptr = 1;
mock->ioctl_expected.gemWait = 1;
mock->ioctl_expected.contextDestroy = 0;
auto allocation = memoryManager->allocateGraphicsMemoryWithProperties({1024, GraphicsAllocation::AllocationType::UNDECIDED});
memoryManager->handleFenceCompletion(allocation);
mock->testIoctls();
mock->ioctl_expected.gemClose = 1;
mock->ioctl_expected.gemWait = 2;
mock->ioctl_expected.contextDestroy = static_cast<int>(device->getExecutionEnvironment()->commandStreamReceivers[0].size());
memoryManager->freeGraphicsMemory(allocation);
}
TEST_F(DrmMemoryManagerTest, givenDrmMemoryManagerWhengetSystemSharedMemoryIsCalledThenContextGetParamIsCalled) {
mock->getContextParamRetValue = 16 * MemoryConstants::gigaByte;
uint64_t mem = memoryManager->getSystemSharedMemory();

View File

@@ -173,6 +173,7 @@ TEST_F(DeviceCommandStreamTest, CreateWddmCSRWithAubDump) {
TEST_F(WddmCommandStreamTest, givenFlushStampWhenWaitCalledThenWaitForSpecifiedMonitoredFence) {
uint64_t stampToWait = 123;
wddm->waitFromCpuResult.called = 0u;
csr->waitForFlushStamp(stampToWait);
EXPECT_EQ(1u, wddm->waitFromCpuResult.called);
EXPECT_TRUE(wddm->waitFromCpuResult.success);

View File

@@ -232,8 +232,7 @@ TEST_F(WddmMemoryManagerSimpleTest,
memoryManager->freeGraphicsMemory(allocation);
}
TEST_F(WddmMemoryManagerTest,
givenAllocateGraphicsMemoryForNonSvmHostPtrIsCalledWhencreateWddmAllocationFailsThenGraphicsAllocationIsNotCreated) {
TEST_F(WddmMemoryManagerTest, givenAllocateGraphicsMemoryForNonSvmHostPtrIsCalledWhencreateWddmAllocationFailsThenGraphicsAllocationIsNotCreated) {
char hostPtr[64];
memoryManager->setDeferredDeleter(nullptr);
setMapGpuVaFailConfigFcn(0, 1);
@@ -243,6 +242,67 @@ TEST_F(WddmMemoryManagerTest,
memoryManager->freeGraphicsMemory(allocation);
}
TEST_F(WddmMemoryManagerSimpleTest, givenZeroFenceValueOnSingleEngineRegisteredWhenHandleFenceCompletionIsCalledThenDoNotWaitOnCpu) {
ASSERT_EQ(1u, memoryManager->getRegisteredEnginesCount());
auto allocation = static_cast<WddmAllocation *>(memoryManager->allocateGraphicsMemoryWithProperties({32, GraphicsAllocation::AllocationType::BUFFER}));
allocation->getResidencyData().updateCompletionData(0u, 0u);
memoryManager->handleFenceCompletion(allocation);
EXPECT_EQ(0u, wddm->waitFromCpuResult.called);
memoryManager->freeGraphicsMemory(allocation);
}
TEST_F(WddmMemoryManagerSimpleTest, givenNonZeroFenceValueOnSingleEngineRegisteredWhenHandleFenceCompletionIsCalledThenWaitOnCpuOnce) {
ASSERT_EQ(1u, memoryManager->getRegisteredEnginesCount());
auto allocation = static_cast<WddmAllocation *>(memoryManager->allocateGraphicsMemoryWithProperties({32, GraphicsAllocation::AllocationType::BUFFER}));
auto fence = &static_cast<OsContextWin *>(memoryManager->getRegisteredEngines()[0].osContext)->getResidencyController().getMonitoredFence();
allocation->getResidencyData().updateCompletionData(129u, 0u);
memoryManager->handleFenceCompletion(allocation);
EXPECT_EQ(1u, wddm->waitFromCpuResult.called);
EXPECT_EQ(129u, wddm->waitFromCpuResult.uint64ParamPassed);
EXPECT_EQ(fence, wddm->waitFromCpuResult.monitoredFence);
memoryManager->freeGraphicsMemory(allocation);
}
TEST_F(WddmMemoryManagerSimpleTest, givenNonZeroFenceValuesOnMultipleEnginesRegisteredWhenHandleFenceCompletionIsCalledThenWaitOnCpuForEachEngine) {
memoryManager->createAndRegisterOsContext(nullptr, HwHelper::get(platformDevices[0]->pPlatform->eRenderCoreFamily).getGpgpuEngineInstances()[1], 2, PreemptionHelper::getDefaultPreemptionMode(*platformDevices[0]));
ASSERT_EQ(2u, memoryManager->getRegisteredEnginesCount());
auto allocation = static_cast<WddmAllocation *>(memoryManager->allocateGraphicsMemoryWithProperties({32, GraphicsAllocation::AllocationType::BUFFER}));
auto lastEngineFence = &static_cast<OsContextWin *>(memoryManager->getRegisteredEngines()[1].osContext)->getResidencyController().getMonitoredFence();
allocation->getResidencyData().updateCompletionData(129u, 0u);
allocation->getResidencyData().updateCompletionData(152u, 1u);
memoryManager->handleFenceCompletion(allocation);
EXPECT_EQ(2u, wddm->waitFromCpuResult.called);
EXPECT_EQ(152u, wddm->waitFromCpuResult.uint64ParamPassed);
EXPECT_EQ(lastEngineFence, wddm->waitFromCpuResult.monitoredFence);
memoryManager->freeGraphicsMemory(allocation);
}
TEST_F(WddmMemoryManagerSimpleTest, givenNonZeroFenceValueOnSomeOfMultipleEnginesRegisteredWhenHandleFenceCompletionIsCalledThenWaitOnCpuForTheseEngines) {
memoryManager->createAndRegisterOsContext(nullptr, HwHelper::get(platformDevices[0]->pPlatform->eRenderCoreFamily).getGpgpuEngineInstances()[1], 2, PreemptionHelper::getDefaultPreemptionMode(*platformDevices[0]));
ASSERT_EQ(2u, memoryManager->getRegisteredEnginesCount());
auto allocation = static_cast<WddmAllocation *>(memoryManager->allocateGraphicsMemoryWithProperties({32, GraphicsAllocation::AllocationType::BUFFER}));
auto lastEngineFence = &static_cast<OsContextWin *>(memoryManager->getRegisteredEngines()[0].osContext)->getResidencyController().getMonitoredFence();
allocation->getResidencyData().updateCompletionData(129u, 0u);
allocation->getResidencyData().updateCompletionData(0, 1u);
memoryManager->handleFenceCompletion(allocation);
EXPECT_EQ(1u, wddm->waitFromCpuResult.called);
EXPECT_EQ(129, wddm->waitFromCpuResult.uint64ParamPassed);
EXPECT_EQ(lastEngineFence, wddm->waitFromCpuResult.monitoredFence);
memoryManager->freeGraphicsMemory(allocation);
}
TEST_F(WddmMemoryManagerTest, givenDefaultWddmMemoryManagerWhenAskedForVirtualPaddingSupportThenFalseIsReturned) {
EXPECT_FALSE(memoryManager->peekVirtualPaddingSupport());
}