Reuse hostPtr allocations

Change-Id: Ie7e24e6630b26809fac1215b66cd90b3cafda53f
Signed-off-by: Maciej Plewka <maciej.plewka@intel.com>
This commit is contained in:
Maciej Plewka
2020-06-26 14:36:57 +02:00
committed by sys_ocldev
parent e96981b34c
commit a661f4b878
5 changed files with 146 additions and 3 deletions

View File

@@ -38,6 +38,33 @@ void CommandList::removeHostPtrAllocations() {
hostPtrMap.clear();
}
NEO::GraphicsAllocation *CommandList::getAllocationFromHostPtrMap(const void *buffer, uint64_t bufferSize) {
auto allocation = hostPtrMap.lower_bound(buffer);
if (allocation != hostPtrMap.end()) {
if (buffer == allocation->first && ptrOffset(allocation->first, allocation->second->getUnderlyingBufferSize()) >= ptrOffset(buffer, bufferSize)) {
return allocation->second;
}
}
if (allocation != hostPtrMap.begin()) {
allocation--;
if (ptrOffset(allocation->first, allocation->second->getUnderlyingBufferSize()) >= ptrOffset(buffer, bufferSize)) {
return allocation->second;
}
}
return nullptr;
}
NEO::GraphicsAllocation *CommandList::getHostPtrAlloc(const void *buffer, uint64_t bufferSize, size_t *offset) {
NEO::GraphicsAllocation *alloc = getAllocationFromHostPtrMap(buffer, bufferSize);
if (alloc) {
*offset += ptrDiff(buffer, alloc->getUnderlyingBuffer());
return alloc;
}
alloc = device->allocateMemoryFromHostPtr(buffer, bufferSize);
hostPtrMap.insert(std::make_pair(buffer, alloc));
return alloc;
}
void CommandList::removeDeallocationContainerData() {
auto memoryManager = device ? device->getNEODevice()->getMemoryManager() : nullptr;

View File

@@ -181,6 +181,8 @@ struct CommandList : _ze_command_list_handle_t {
bool isCopyOnlyCmdList = false;
UnifiedMemoryControls unifiedMemoryControls;
bool indirectAllocationsAllowed = false;
NEO::GraphicsAllocation *getAllocationFromHostPtrMap(const void *buffer, uint64_t bufferSize);
NEO::GraphicsAllocation *getHostPtrAlloc(const void *buffer, uint64_t bufferSize, size_t *offset);
};
using CommandListAllocatorFn = CommandList *(*)(uint32_t);

View File

@@ -1209,10 +1209,9 @@ inline AlignedAllocationData CommandListCoreFamily<gfxCoreFamily>::getAlignedAll
bool hostPointerNeedsFlush = false;
if (srcAllocFound == false) {
alloc = device->allocateMemoryFromHostPtr(buffer, bufferSize);
hostPtrMap.insert(std::make_pair(buffer, alloc));
alloc = getHostPtrAlloc(buffer, bufferSize, &offset);
alignedPtr = static_cast<uintptr_t>(alloc->getGpuAddress() - offset);
alignedPtr = static_cast<uintptr_t>(alignDown(alloc->getGpuAddress(), NEO::EncodeSurfaceState<GfxFamily>::getSurfaceBaseAddressAlignment()));
hostPointerNeedsFlush = true;
} else {
alloc = allocData->gpuAllocation;

View File

@@ -41,6 +41,8 @@ struct WhiteBox<::L0::CommandListCoreFamily<gfxCoreFamily>>
using BaseClass::appendSignalEventPostWalker;
using BaseClass::commandListPreemptionMode;
using BaseClass::getAlignedAllocation;
using BaseClass::getAllocationFromHostPtrMap;
using BaseClass::getHostPtrAlloc;
using BaseClass::hostPtrMap;
WhiteBox() : ::L0::CommandListCoreFamily<gfxCoreFamily>(BaseClass::defaultNumIddsPerBlock) {}

View File

@@ -1070,5 +1070,118 @@ HWTEST2_F(CommandListCreate, givenCopyOnlyCommandListWhenAppendBlitFillCalledWit
EXPECT_EQ(allocValue, pattern[i % 4]);
}
}
HWTEST2_F(CommandListCreate, givenHostAllocInMapWhenGettingAllocInRangeThenAllocFromMapReturned, Platforms) {
auto commandList = std::make_unique<WhiteBox<::L0::CommandListCoreFamily<gfxCoreFamily>>>();
commandList->initialize(device, true);
uint64_t gpuAddress = 0x1200;
const void *cpuPtr = reinterpret_cast<const void *>(gpuAddress);
size_t allocSize = 0x1000;
NEO::MockGraphicsAllocation alloc(const_cast<void *>(cpuPtr), gpuAddress, allocSize);
commandList->hostPtrMap.insert(std::make_pair(cpuPtr, &alloc));
auto newBufferPtr = ptrOffset(cpuPtr, 0x10);
auto newBufferSize = allocSize - 0x20;
auto newAlloc = commandList->getAllocationFromHostPtrMap(newBufferPtr, newBufferSize);
EXPECT_NE(newAlloc, nullptr);
commandList->hostPtrMap.clear();
}
HWTEST2_F(CommandListCreate, givenHostAllocInMapWhenSizeIsOutOfRangeThenNullPtrReturned, Platforms) {
auto commandList = std::make_unique<WhiteBox<::L0::CommandListCoreFamily<gfxCoreFamily>>>();
commandList->initialize(device, true);
uint64_t gpuAddress = 0x1200;
const void *cpuPtr = reinterpret_cast<const void *>(gpuAddress);
size_t allocSize = 0x1000;
NEO::MockGraphicsAllocation alloc(const_cast<void *>(cpuPtr), gpuAddress, allocSize);
commandList->hostPtrMap.insert(std::make_pair(cpuPtr, &alloc));
auto newBufferPtr = ptrOffset(cpuPtr, 0x10);
auto newBufferSize = allocSize + 0x20;
auto newAlloc = commandList->getAllocationFromHostPtrMap(newBufferPtr, newBufferSize);
EXPECT_EQ(newAlloc, nullptr);
commandList->hostPtrMap.clear();
}
HWTEST2_F(CommandListCreate, givenHostAllocInMapWhenPtrIsOutOfRangeThenNullPtrReturned, Platforms) {
auto commandList = std::make_unique<WhiteBox<::L0::CommandListCoreFamily<gfxCoreFamily>>>();
commandList->initialize(device, true);
uint64_t gpuAddress = 0x1200;
const void *cpuPtr = reinterpret_cast<const void *>(gpuAddress);
size_t allocSize = 0x1000;
NEO::MockGraphicsAllocation alloc(const_cast<void *>(cpuPtr), gpuAddress, allocSize);
commandList->hostPtrMap.insert(std::make_pair(cpuPtr, &alloc));
auto newBufferPtr = reinterpret_cast<const void *>(gpuAddress - 0x100);
auto newBufferSize = allocSize - 0x200;
auto newAlloc = commandList->getAllocationFromHostPtrMap(newBufferPtr, newBufferSize);
EXPECT_EQ(newAlloc, nullptr);
commandList->hostPtrMap.clear();
}
HWTEST2_F(CommandListCreate, givenHostAllocInMapWhenGetHostPtrAllocCalledThenCorrectOffsetIsSet, Platforms) {
auto commandList = std::make_unique<WhiteBox<::L0::CommandListCoreFamily<gfxCoreFamily>>>();
commandList->initialize(device, true);
uint64_t gpuAddress = 0x1200;
const void *cpuPtr = reinterpret_cast<const void *>(gpuAddress);
size_t allocSize = 0x1000;
NEO::MockGraphicsAllocation alloc(const_cast<void *>(cpuPtr), gpuAddress, allocSize);
commandList->hostPtrMap.insert(std::make_pair(cpuPtr, &alloc));
size_t expectedOffset = 0x10;
auto newBufferPtr = ptrOffset(cpuPtr, expectedOffset);
auto newBufferSize = allocSize - 0x20;
size_t offset = 0;
auto newAlloc = commandList->getHostPtrAlloc(newBufferPtr, newBufferSize, &offset);
EXPECT_NE(newAlloc, nullptr);
EXPECT_EQ(offset, expectedOffset);
commandList->hostPtrMap.clear();
}
HWTEST2_F(CommandListCreate, givenHostAllocInMapWhenPtrIsInMapThenAllocationReturned, Platforms) {
auto commandList = std::make_unique<WhiteBox<::L0::CommandListCoreFamily<gfxCoreFamily>>>();
commandList->initialize(device, true);
uint64_t gpuAddress = 0x1200;
const void *cpuPtr = reinterpret_cast<const void *>(gpuAddress);
size_t allocSize = 0x1000;
NEO::MockGraphicsAllocation alloc(const_cast<void *>(cpuPtr), gpuAddress, allocSize);
commandList->hostPtrMap.insert(std::make_pair(cpuPtr, &alloc));
auto newBufferPtr = cpuPtr;
auto newBufferSize = allocSize - 0x20;
auto newAlloc = commandList->getAllocationFromHostPtrMap(newBufferPtr, newBufferSize);
EXPECT_EQ(newAlloc, &alloc);
commandList->hostPtrMap.clear();
}
HWTEST2_F(CommandListCreate, givenHostAllocInMapWhenPtrIsInMapButWithBiggerSizeThenNullPtrReturned, Platforms) {
auto commandList = std::make_unique<WhiteBox<::L0::CommandListCoreFamily<gfxCoreFamily>>>();
commandList->initialize(device, true);
uint64_t gpuAddress = 0x1200;
const void *cpuPtr = reinterpret_cast<const void *>(gpuAddress);
size_t allocSize = 0x1000;
NEO::MockGraphicsAllocation alloc(const_cast<void *>(cpuPtr), gpuAddress, allocSize);
commandList->hostPtrMap.insert(std::make_pair(cpuPtr, &alloc));
auto newBufferPtr = cpuPtr;
auto newBufferSize = allocSize + 0x20;
auto newAlloc = commandList->getAllocationFromHostPtrMap(newBufferPtr, newBufferSize);
EXPECT_EQ(newAlloc, nullptr);
commandList->hostPtrMap.clear();
}
HWTEST2_F(CommandListCreate, givenHostAllocInMapWhenPtrLowerThanAnyInMapThenNullPtrReturned, Platforms) {
auto commandList = std::make_unique<WhiteBox<::L0::CommandListCoreFamily<gfxCoreFamily>>>();
commandList->initialize(device, true);
uint64_t gpuAddress = 0x1200;
const void *cpuPtr = reinterpret_cast<const void *>(gpuAddress);
size_t allocSize = 0x1000;
NEO::MockGraphicsAllocation alloc(const_cast<void *>(cpuPtr), gpuAddress, allocSize);
commandList->hostPtrMap.insert(std::make_pair(cpuPtr, &alloc));
auto newBufferPtr = reinterpret_cast<const void *>(gpuAddress - 0x10);
auto newBufferSize = allocSize - 0x20;
auto newAlloc = commandList->getAllocationFromHostPtrMap(newBufferPtr, newBufferSize);
EXPECT_EQ(newAlloc, nullptr);
commandList->hostPtrMap.clear();
}
} // namespace ult
} // namespace L0