Force resource locking on transfer calls

Add debug variables to force resource locking on memory transfer calls
and to call makeResident() on mapVirtualAddress() call.

Change-Id: Ifa78d951fcb81812b10a98252bd414124dec9c74
This commit is contained in:
Pawel Wilma
2018-12-12 14:36:45 +01:00
committed by sys_ocldev
parent 3581bdb804
commit 5094c630f7
17 changed files with 280 additions and 6 deletions

View File

@ -128,11 +128,11 @@ void *CommandQueue::cpuDataTransferHandler(TransferProperties &transferPropertie
}
break;
case CL_COMMAND_READ_BUFFER:
memcpy_s(transferProperties.ptr, transferProperties.size[0], ptrOffset(transferProperties.memObj->getCpuAddressForMemoryTransfer(), transferProperties.offset[0]), transferProperties.size[0]);
memcpy_s(transferProperties.ptr, transferProperties.size[0], transferProperties.getCpuPtrForReadWrite(), transferProperties.size[0]);
eventCompleted = true;
break;
case CL_COMMAND_WRITE_BUFFER:
memcpy_s(ptrOffset(transferProperties.memObj->getCpuAddressForMemoryTransfer(), transferProperties.offset[0]), transferProperties.size[0], transferProperties.ptr, transferProperties.size[0]);
memcpy_s(transferProperties.getCpuPtrForReadWrite(), transferProperties.size[0], transferProperties.ptr, transferProperties.size[0]);
eventCompleted = true;
break;
case CL_COMMAND_MARKER:

View File

@ -54,7 +54,11 @@ cl_int CommandQueueHw<GfxFamily>::enqueueReadBuffer(
TransferProperties transferProperties(buffer, CL_COMMAND_READ_BUFFER, 0, true, &offset, &size, ptr);
EventsRequest eventsRequest(numEventsInWaitList, eventWaitList, event);
cpuDataTransferHandler(transferProperties, eventsRequest, retVal);
if (DebugManager.flags.ForceResourceLockOnTransferCalls.get()) {
if (transferProperties.lockedPtr != nullptr) {
buffer->getMemoryManager()->unlockResource(buffer->getGraphicsAllocation());
}
}
return retVal;
}
MultiDispatchInfo dispatchInfo;

View File

@ -52,9 +52,15 @@ cl_int CommandQueueHw<GfxFamily>::enqueueWriteBuffer(
TransferProperties transferProperties(buffer, CL_COMMAND_WRITE_BUFFER, 0, true, &offset, &size, const_cast<void *>(ptr));
EventsRequest eventsRequest(numEventsInWaitList, eventWaitList, event);
cpuDataTransferHandler(transferProperties, eventsRequest, retVal);
if (DebugManager.flags.ForceResourceLockOnTransferCalls.get()) {
if (transferProperties.lockedPtr != nullptr) {
buffer->getMemoryManager()->unlockResource(buffer->getGraphicsAllocation());
}
}
return retVal;
}
MultiDispatchInfo dispatchInfo;
if (!isMemTransferNeeded) {
NullSurface s;

View File

@ -9,6 +9,7 @@
#include "runtime/helpers/properties_helper.h"
#include "runtime/mem_obj/image.h"
#include "runtime/mem_obj/mem_obj.h"
#include "runtime/memory_manager/memory_manager.h"
namespace OCLRT {
TransferProperties::TransferProperties(MemObj *memObj, cl_command_type cmdType, cl_map_flags mapFlags, bool blocking,
@ -20,6 +21,11 @@ TransferProperties::TransferProperties(MemObj *memObj, cl_command_type cmdType,
if (memObj->peekClMemObjType() == CL_MEM_OBJECT_BUFFER) {
size[0] = *sizePtr;
offset[0] = *offsetPtr;
if (DebugManager.flags.ForceResourceLockOnTransferCalls.get()) {
if ((false == MemoryPool::isSystemMemoryPool(memObj->getGraphicsAllocation()->getMemoryPool())) && (memObj->getMemoryManager() != nullptr)) {
this->lockedPtr = memObj->getMemoryManager()->lockResource(memObj->getGraphicsAllocation());
}
}
} else {
size = {{sizePtr[0], sizePtr[1], sizePtr[2]}};
offset = {{offsetPtr[0], offsetPtr[1], offsetPtr[2]}};
@ -35,4 +41,9 @@ TransferProperties::TransferProperties(MemObj *memObj, cl_command_type cmdType,
}
}
}
void *TransferProperties::getCpuPtrForReadWrite() {
return ptrOffset(lockedPtr ? lockedPtr : memObj->getCpuAddressForMemoryTransfer(), offset[0]);
}
} // namespace OCLRT

View File

@ -57,6 +57,9 @@ struct TransferProperties {
void *ptr = nullptr;
uint32_t mipLevel = 0;
uint32_t mipPtrOffset = 0;
void *lockedPtr = nullptr;
void *getCpuPtrForReadWrite();
};
struct MapInfo {

View File

@ -105,6 +105,9 @@ class MemObj : public BaseObject<_cl_mem> {
size_t calculateMappedPtrLength(const MemObjSizeArray &size) const { return calculateOffsetForMapping(size); }
cl_mem_object_type peekClMemObjType() const { return memObjectType; }
size_t getOffset() const { return offset; }
MemoryManager *getMemoryManager() const {
return memoryManager;
}
protected:
void getOsSpecificMemObjectInfo(const cl_mem_info &paramName, size_t *srcParamSize, void **srcParam);

View File

@ -54,7 +54,7 @@ class OsAgnosticMemoryManager : public MemoryManager {
void addAllocationToHostPtrManager(GraphicsAllocation *gfxAllocation) override;
void removeAllocationFromHostPtrManager(GraphicsAllocation *gfxAllocation) override;
void freeGraphicsMemoryImpl(GraphicsAllocation *gfxAllocation) override;
void *lockResource(GraphicsAllocation *graphicsAllocation) override { return nullptr; };
void *lockResource(GraphicsAllocation *graphicsAllocation) override { return ptrOffset(graphicsAllocation->getUnderlyingBuffer(), static_cast<size_t>(graphicsAllocation->allocationOffset)); };
void unlockResource(GraphicsAllocation *graphicsAllocation) override{};
AllocationStatus populateOsHandles(OsHandleStorage &handleStorage) override;

View File

@ -73,6 +73,8 @@ DECLARE_DEBUG_VARIABLE(bool, UseNewHeapAllocator, true, "Custom 4GB heap allocat
DECLARE_DEBUG_VARIABLE(bool, UseNoRingFlushesKmdMode, true, "Windows only, passes flag to KMD that informs KMD to not emit any ring buffer flushes.")
DECLARE_DEBUG_VARIABLE(bool, DisableZeroCopyForUseHostPtr, false, "When active all buffer allocations created with CL_MEM_USE_HOST_PTR flag will not share memory with CPU.")
DECLARE_DEBUG_VARIABLE(bool, DisableZeroCopyForBuffers, false, "When active all buffer allocations will not share memory with CPU.")
DECLARE_DEBUG_VARIABLE(bool, ForceResourceLockOnTransferCalls, 0, "Forces resource locking on memory transfer calls")
DECLARE_DEBUG_VARIABLE(bool, EnableMakeResidentOnMapGpuVa, 0, "Make allocations resident on call mapGpuVirtualAddress")
/*FEATURE FLAGS*/
DECLARE_DEBUG_VARIABLE(bool, EnableNV12, true, "Enables NV12 extension")

View File

@ -370,6 +370,10 @@ bool Wddm::mapGpuVirtualAddressImpl(Gmm *gmm, D3DKMT_HANDLE handle, void *cpuPtr
kmDafListener->notifyMapGpuVA(featureTable->ftrKmdDaf, adapter, device, handle, MapGPUVA.VirtualAddress, gdi->escape);
if (DebugManager.flags.EnableMakeResidentOnMapGpuVa.get()) {
this->makeResident(&handle, 1, true, nullptr);
}
if (gmm->isRenderCompressed && pageTableManager.get()) {
return updateAuxTable(gpuPtr, gmm, true);
}

View File

@ -453,6 +453,62 @@ HWTEST_F(EnqueueReadBufferTypeTest, givenCommandQueueWhenEnqueueReadBufferIsCall
EXPECT_TRUE(mockCmdQ->notifyEnqueueReadBufferCalled);
}
HWTEST_F(EnqueueReadBufferTypeTest, givenEnqueueReadBufferCalledWhenLockedPtrInTransferPropertisIsAvailableThenItIsUnlocked) {
DebugManagerStateRestore dbgRestore;
DebugManager.flags.ForceResourceLockOnTransferCalls.set(true);
DebugManager.flags.DoCpuCopyOnReadBuffer.set(true);
ExecutionEnvironment executionEnvironment;
MockMemoryManager memoryManager(false, true, executionEnvironment);
MockContext ctx;
cl_int retVal;
ctx.setMemoryManager(&memoryManager);
auto mockCmdQ = std::make_unique<MockCommandQueueHw<FamilyType>>(context, pDevice, nullptr);
std::unique_ptr<Buffer> buffer(Buffer::create(&ctx, 0, 1, nullptr, retVal));
static_cast<MemoryAllocation *>(buffer->getGraphicsAllocation())->overrideMemoryPool(MemoryPool::SystemCpuInaccessible);
void *ptr = nonZeroCopyBuffer->getCpuAddressForMemoryTransfer();
retVal = mockCmdQ->enqueueReadBuffer(buffer.get(),
CL_TRUE,
0,
MemoryConstants::cacheLineSize,
ptr,
0,
nullptr,
nullptr);
EXPECT_EQ(CL_SUCCESS, retVal);
EXPECT_EQ(1u, memoryManager.unlockResourceCalled);
}
HWTEST_F(EnqueueReadBufferTypeTest, gicenEnqueueReadBufferCalledWhenLockedPtrInTransferPropertisIsNotAvailableThenItIsNotUnlocked) {
DebugManagerStateRestore dbgRestore;
DebugManager.flags.ForceResourceLockOnTransferCalls.set(true);
DebugManager.flags.DoCpuCopyOnReadBuffer.set(true);
ExecutionEnvironment executionEnvironment;
MockMemoryManager memoryManager(false, true, executionEnvironment);
MockContext ctx;
cl_int retVal;
ctx.setMemoryManager(&memoryManager);
auto mockCmdQ = std::make_unique<MockCommandQueueHw<FamilyType>>(context, pDevice, nullptr);
std::unique_ptr<Buffer> buffer(Buffer::create(&ctx, 0, 1, nullptr, retVal));
static_cast<MemoryAllocation *>(buffer->getGraphicsAllocation())->overrideMemoryPool(MemoryPool::System4KBPages);
void *ptr = nonZeroCopyBuffer->getCpuAddressForMemoryTransfer();
retVal = mockCmdQ->enqueueReadBuffer(buffer.get(),
CL_TRUE,
0,
MemoryConstants::cacheLineSize,
ptr,
0,
nullptr,
nullptr);
EXPECT_EQ(CL_SUCCESS, retVal);
EXPECT_EQ(0u, memoryManager.unlockResourceCalled);
}
using NegativeFailAllocationTest = Test<NegativeFailAllocationCommandEnqueueBaseFixture>;
HWTEST_F(NegativeFailAllocationTest, givenEnqueueReadBufferWhenHostPtrAllocationCreationFailsThenReturnOutOfResource) {

View File

@ -14,6 +14,7 @@
#include "unit_tests/gen_common/gen_commands_common_validation.h"
#include "unit_tests/helpers/debug_manager_state_restore.h"
#include "unit_tests/helpers/unit_test_helper.h"
#include "unit_tests/mocks/mock_command_queue.h"
#include "test.h"
using namespace OCLRT;
@ -361,6 +362,62 @@ HWTEST_F(EnqueueWriteBufferTypeTest, givenInOrderQueueAndEnabledSupportCpuCopies
EXPECT_EQ(pCmdQ->taskLevel, 1u);
}
HWTEST_F(EnqueueWriteBufferTypeTest, givenEnqueueWriteBufferCalledWhenLockedPtrInTransferPropertisIsAvailableThenItIsUnlocked) {
DebugManagerStateRestore dbgRestore;
DebugManager.flags.ForceResourceLockOnTransferCalls.set(true);
DebugManager.flags.DoCpuCopyOnWriteBuffer.set(true);
ExecutionEnvironment executionEnvironment;
MockMemoryManager memoryManager(false, true, executionEnvironment);
MockContext ctx;
cl_int retVal;
ctx.setMemoryManager(&memoryManager);
auto mockCmdQ = std::make_unique<MockCommandQueueHw<FamilyType>>(context, pDevice, nullptr);
std::unique_ptr<Buffer> buffer(Buffer::create(&ctx, 0, 1, nullptr, retVal));
static_cast<MemoryAllocation *>(buffer->getGraphicsAllocation())->overrideMemoryPool(MemoryPool::SystemCpuInaccessible);
void *ptr = srcBuffer->getCpuAddressForMemoryTransfer();
retVal = mockCmdQ->enqueueWriteBuffer(buffer.get(),
CL_FALSE,
0,
MemoryConstants::cacheLineSize,
ptr,
0,
nullptr,
nullptr);
EXPECT_EQ(CL_SUCCESS, retVal);
EXPECT_EQ(1u, memoryManager.unlockResourceCalled);
}
HWTEST_F(EnqueueWriteBufferTypeTest, givenEnqueueWriteBufferCalledWhenLockedPtrInTransferPropertisIsNotAvailableThenItIsNotUnlocked) {
DebugManagerStateRestore dbgRestore;
DebugManager.flags.ForceResourceLockOnTransferCalls.set(true);
DebugManager.flags.DoCpuCopyOnWriteBuffer.set(true);
ExecutionEnvironment executionEnvironment;
MockMemoryManager memoryManager(false, true, executionEnvironment);
MockContext ctx;
cl_int retVal;
ctx.setMemoryManager(&memoryManager);
auto mockCmdQ = std::make_unique<MockCommandQueueHw<FamilyType>>(context, pDevice, nullptr);
std::unique_ptr<Buffer> buffer(Buffer::create(&ctx, 0, 1, nullptr, retVal));
static_cast<MemoryAllocation *>(buffer->getGraphicsAllocation())->overrideMemoryPool(MemoryPool::System4KBPages);
void *ptr = srcBuffer->getCpuAddressForMemoryTransfer();
retVal = mockCmdQ->enqueueWriteBuffer(buffer.get(),
CL_FALSE,
0,
MemoryConstants::cacheLineSize,
ptr,
0,
nullptr,
nullptr);
EXPECT_EQ(CL_SUCCESS, retVal);
EXPECT_EQ(0u, memoryManager.unlockResourceCalled);
}
using NegativeFailAllocationTest = Test<NegativeFailAllocationCommandEnqueueBaseFixture>;
HWTEST_F(NegativeFailAllocationTest, givenEnqueueWriteBufferWhenHostPtrAllocationCreationFailsThenReturnOutOfResource) {

View File

@ -43,6 +43,7 @@ set(IGDRCL_SRCS_tests_helpers
${CMAKE_CURRENT_SOURCE_DIR}/task_information_tests.cpp
${CMAKE_CURRENT_SOURCE_DIR}/test_debug_variables.inl
${CMAKE_CURRENT_SOURCE_DIR}/timestamp_packet_tests.cpp
${CMAKE_CURRENT_SOURCE_DIR}/transfer_properties_tests.cpp
${CMAKE_CURRENT_SOURCE_DIR}/uint16_sse4_tests.cpp
${CMAKE_CURRENT_SOURCE_DIR}/unit_test_helper.h
${CMAKE_CURRENT_SOURCE_DIR}/unit_test_helper.inl

View File

@ -0,0 +1,102 @@
/*
* Copyright (C) 2018 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#include "runtime/helpers/properties_helper.h"
#include "unit_tests/helpers/debug_manager_state_restore.h"
#include "unit_tests/mocks/mock_buffer.h"
#include "gtest/gtest.h"
using namespace OCLRT;
TEST(TransferPropertiesTest, givenTransferPropertiesCreatedWhenDefaultDebugSettingThenLockPtrIsNotSet) {
MockBuffer buffer;
size_t offset = 0;
size_t size = 4096u;
TransferProperties transferProperties(&buffer, CL_COMMAND_MAP_BUFFER, 0, false, &offset, &size, nullptr);
EXPECT_EQ(nullptr, transferProperties.lockedPtr);
}
TEST(TransferPropertiesTest, givenTransferPropertiesCreatedWhenForceResourceLockOnTransferCallsSetThenLockPtrIsSet) {
DebugManagerStateRestore dbgRestore;
DebugManager.flags.ForceResourceLockOnTransferCalls.set(true);
ExecutionEnvironment executionEnvironment;
OsAgnosticMemoryManager memoryManager(false, true, executionEnvironment);
MockContext ctx;
ctx.setMemoryManager(&memoryManager);
cl_int retVal;
std::unique_ptr<Buffer> buffer(Buffer::create(&ctx, 0, 1, nullptr, retVal));
static_cast<MemoryAllocation *>(buffer->getGraphicsAllocation())->overrideMemoryPool(MemoryPool::SystemCpuInaccessible);
size_t offset = 0;
size_t size = 4096u;
TransferProperties transferProperties(buffer.get(), CL_COMMAND_MAP_BUFFER, 0, false, &offset, &size, nullptr);
EXPECT_NE(nullptr, transferProperties.lockedPtr);
}
TEST(TransferPropertiesTest, givenTransferPropertiesCreatedWhenForceResourceLockOnTransferCallsSetAndMemoryPoolIsSystemMemoryThenLockPtrIsNotSet) {
DebugManagerStateRestore dbgRestore;
DebugManager.flags.ForceResourceLockOnTransferCalls.set(true);
ExecutionEnvironment executionEnvironment;
OsAgnosticMemoryManager memoryManager(false, true, executionEnvironment);
MockContext ctx;
ctx.setMemoryManager(&memoryManager);
cl_int retVal;
std::unique_ptr<Buffer> buffer(Buffer::create(&ctx, 0, 1, nullptr, retVal));
static_cast<MemoryAllocation *>(buffer->getGraphicsAllocation())->overrideMemoryPool(MemoryPool::System4KBPages);
size_t offset = 0;
size_t size = 4096u;
TransferProperties transferProperties(buffer.get(), CL_COMMAND_MAP_BUFFER, 0, false, &offset, &size, nullptr);
EXPECT_EQ(nullptr, transferProperties.lockedPtr);
}
TEST(TransferPropertiesTest, givenTransferPropertiesCreatedWhenForceResourceLockOnTransferCallsSetAndMemoryManagerInMemObjectIsNotSetThenLockPtrIsNotSet) {
DebugManagerStateRestore dbgRestore;
DebugManager.flags.ForceResourceLockOnTransferCalls.set(true);
MockBuffer buffer;
size_t offset = 0;
size_t size = 4096u;
TransferProperties transferProperties(&buffer, CL_COMMAND_MAP_BUFFER, 0, false, &offset, &size, nullptr);
EXPECT_EQ(nullptr, transferProperties.lockedPtr);
}
TEST(TransferPropertiesTest, givenTransferPropertiesWhenLockedPtrIsSetThenItIsReturnedForReadWrite) {
DebugManagerStateRestore dbgRestore;
DebugManager.flags.ForceResourceLockOnTransferCalls.set(true);
ExecutionEnvironment executionEnvironment;
OsAgnosticMemoryManager memoryManager(false, true, executionEnvironment);
MockContext ctx;
ctx.setMemoryManager(&memoryManager);
cl_int retVal;
std::unique_ptr<Buffer> buffer(Buffer::create(&ctx, 0, 1, nullptr, retVal));
static_cast<MemoryAllocation *>(buffer->getGraphicsAllocation())->overrideMemoryPool(MemoryPool::SystemCpuInaccessible);
size_t offset = 0;
size_t size = 4096u;
TransferProperties transferProperties(buffer.get(), CL_COMMAND_MAP_BUFFER, 0, false, &offset, &size, nullptr);
ASSERT_NE(nullptr, transferProperties.lockedPtr);
EXPECT_EQ(transferProperties.lockedPtr, transferProperties.getCpuPtrForReadWrite());
}
TEST(TransferPropertiesTest, givenTransferPropertiesWhenLockedPtrIsNotSetThenItIsNotReturnedForReadWrite) {
MockBuffer buffer;
size_t offset = 0;
size_t size = 4096u;
TransferProperties transferProperties(&buffer, CL_COMMAND_MAP_BUFFER, 0, false, &offset, &size, nullptr);
ASSERT_EQ(nullptr, transferProperties.lockedPtr);
EXPECT_NE(transferProperties.lockedPtr, transferProperties.getCpuPtrForReadWrite());
}

View File

@ -762,14 +762,14 @@ TEST(OsAgnosticMemoryManager, givenMemoryManagerWhenCreateAllocationFromNtHandle
EXPECT_EQ(nullptr, graphicsAllocation);
}
TEST(OsAgnosticMemoryManager, givenMemoryManagerWhenLockUnlockCalledThenDoNothing) {
TEST(OsAgnosticMemoryManager, givenMemoryManagerWhenLockUnlockCalledThenReturnCpuPtr) {
ExecutionEnvironment executionEnvironment;
OsAgnosticMemoryManager memoryManager(false, false, executionEnvironment);
auto allocation = memoryManager.allocateGraphicsMemory(1);
ASSERT_NE(nullptr, allocation);
auto ptr = memoryManager.lockResource(allocation);
EXPECT_EQ(nullptr, ptr);
EXPECT_EQ(ptrOffset(allocation->getUnderlyingBuffer(), static_cast<size_t>(allocation->allocationOffset)), ptr);
memoryManager.unlockResource(allocation);
memoryManager.freeGraphicsMemory(allocation);

View File

@ -46,7 +46,13 @@ class MockMemoryManager : public OsAgnosticMemoryManager {
OsAgnosticMemoryManager::freeGraphicsMemoryImpl(gfxAllocation);
};
void unlockResource(GraphicsAllocation *gfxAllocation) override {
unlockResourceCalled++;
OsAgnosticMemoryManager::unlockResource(gfxAllocation);
}
uint32_t freeGraphicsMemoryCalled = 0u;
uint32_t unlockResourceCalled = 0u;
bool allocationCreated = false;
bool allocation64kbPageCreated = false;
bool allocationInDevicePoolCreated = false;

View File

@ -1460,3 +1460,20 @@ TEST(WddmMemoryManagerCleanupTest, givenUsedTagAllocationInWddmMemoryManagerWhen
executionEnvironment.commandStreamReceivers.clear();
EXPECT_NO_THROW(executionEnvironment.memoryManager.reset());
}
TEST_F(MockWddmMemoryManagerTest, givenWddmAllocationWhenEnableMakeResidentOnMapGpuVaIsSetThenMakeResidentIsCalledInMapVirtualAddress) {
DebugManagerStateRestore dbgRestore;
DebugManager.flags.EnableMakeResidentOnMapGpuVa.set(true);
std::unique_ptr<Gmm> gmm(new Gmm(reinterpret_cast<void *>(123), 4096u, false));
D3DGPU_VIRTUAL_ADDRESS gpuVa = 0;
WddmMock wddm;
EXPECT_TRUE(wddm.init(PreemptionHelper::getDefaultPreemptionMode(*platformDevices[0])));
auto mockMngr = new NiceMock<MockGmmPageTableMngr>();
wddm.resetPageTableManager(mockMngr);
auto result = wddm.mapGpuVirtualAddressImpl(gmm.get(), ALLOCATION_HANDLE, nullptr, gpuVa, false, false, false);
EXPECT_EQ(1u, wddm.makeResidentResult.called);
ASSERT_TRUE(result);
}

View File

@ -96,3 +96,5 @@ AubDumpOverrideMmioRegister = 0
AubDumpOverrideMmioRegisterValue = 0
PowerSavingMode = 0
AubDumpAddMmioRegistersList = unk
ForceResourceLockOnTransferCalls = 0
EnableMakeResidentOnMapGpuVa = 0