diff --git a/level_zero/core/source/cmdlist/cmdlist.cpp b/level_zero/core/source/cmdlist/cmdlist.cpp index 385019d999..a80ea40f77 100644 --- a/level_zero/core/source/cmdlist/cmdlist.cpp +++ b/level_zero/core/source/cmdlist/cmdlist.cpp @@ -38,6 +38,33 @@ void CommandList::removeHostPtrAllocations() { hostPtrMap.clear(); } +NEO::GraphicsAllocation *CommandList::getAllocationFromHostPtrMap(const void *buffer, uint64_t bufferSize) { + auto allocation = hostPtrMap.lower_bound(buffer); + if (allocation != hostPtrMap.end()) { + if (buffer == allocation->first && ptrOffset(allocation->first, allocation->second->getUnderlyingBufferSize()) >= ptrOffset(buffer, bufferSize)) { + return allocation->second; + } + } + if (allocation != hostPtrMap.begin()) { + allocation--; + if (ptrOffset(allocation->first, allocation->second->getUnderlyingBufferSize()) >= ptrOffset(buffer, bufferSize)) { + return allocation->second; + } + } + return nullptr; +} + +NEO::GraphicsAllocation *CommandList::getHostPtrAlloc(const void *buffer, uint64_t bufferSize, size_t *offset) { + NEO::GraphicsAllocation *alloc = getAllocationFromHostPtrMap(buffer, bufferSize); + if (alloc) { + *offset += ptrDiff(buffer, alloc->getUnderlyingBuffer()); + return alloc; + } + alloc = device->allocateMemoryFromHostPtr(buffer, bufferSize); + hostPtrMap.insert(std::make_pair(buffer, alloc)); + return alloc; +} + void CommandList::removeDeallocationContainerData() { auto memoryManager = device ? device->getNEODevice()->getMemoryManager() : nullptr; diff --git a/level_zero/core/source/cmdlist/cmdlist.h b/level_zero/core/source/cmdlist/cmdlist.h index 40e215748f..264d488e8c 100644 --- a/level_zero/core/source/cmdlist/cmdlist.h +++ b/level_zero/core/source/cmdlist/cmdlist.h @@ -181,6 +181,8 @@ struct CommandList : _ze_command_list_handle_t { bool isCopyOnlyCmdList = false; UnifiedMemoryControls unifiedMemoryControls; bool indirectAllocationsAllowed = false; + NEO::GraphicsAllocation *getAllocationFromHostPtrMap(const void *buffer, uint64_t bufferSize); + NEO::GraphicsAllocation *getHostPtrAlloc(const void *buffer, uint64_t bufferSize, size_t *offset); }; using CommandListAllocatorFn = CommandList *(*)(uint32_t); diff --git a/level_zero/core/source/cmdlist/cmdlist_hw.inl b/level_zero/core/source/cmdlist/cmdlist_hw.inl index 82c6054d28..cd583f297e 100644 --- a/level_zero/core/source/cmdlist/cmdlist_hw.inl +++ b/level_zero/core/source/cmdlist/cmdlist_hw.inl @@ -1209,10 +1209,9 @@ inline AlignedAllocationData CommandListCoreFamily::getAlignedAll bool hostPointerNeedsFlush = false; if (srcAllocFound == false) { - alloc = device->allocateMemoryFromHostPtr(buffer, bufferSize); - hostPtrMap.insert(std::make_pair(buffer, alloc)); + alloc = getHostPtrAlloc(buffer, bufferSize, &offset); - alignedPtr = static_cast(alloc->getGpuAddress() - offset); + alignedPtr = static_cast(alignDown(alloc->getGpuAddress(), NEO::EncodeSurfaceState::getSurfaceBaseAddressAlignment())); hostPointerNeedsFlush = true; } else { alloc = allocData->gpuAllocation; diff --git a/level_zero/core/test/unit_tests/mocks/mock_cmdlist.h b/level_zero/core/test/unit_tests/mocks/mock_cmdlist.h index f2f6f1c07d..45ca5dba2b 100644 --- a/level_zero/core/test/unit_tests/mocks/mock_cmdlist.h +++ b/level_zero/core/test/unit_tests/mocks/mock_cmdlist.h @@ -41,6 +41,8 @@ struct WhiteBox<::L0::CommandListCoreFamily> using BaseClass::appendSignalEventPostWalker; using BaseClass::commandListPreemptionMode; using BaseClass::getAlignedAllocation; + using BaseClass::getAllocationFromHostPtrMap; + using BaseClass::getHostPtrAlloc; using BaseClass::hostPtrMap; WhiteBox() : ::L0::CommandListCoreFamily(BaseClass::defaultNumIddsPerBlock) {} diff --git a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist.cpp b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist.cpp index 5cd769a775..038d2364b1 100644 --- a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist.cpp +++ b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist.cpp @@ -1070,5 +1070,118 @@ HWTEST2_F(CommandListCreate, givenCopyOnlyCommandListWhenAppendBlitFillCalledWit EXPECT_EQ(allocValue, pattern[i % 4]); } } + +HWTEST2_F(CommandListCreate, givenHostAllocInMapWhenGettingAllocInRangeThenAllocFromMapReturned, Platforms) { + auto commandList = std::make_unique>>(); + commandList->initialize(device, true); + uint64_t gpuAddress = 0x1200; + const void *cpuPtr = reinterpret_cast(gpuAddress); + size_t allocSize = 0x1000; + NEO::MockGraphicsAllocation alloc(const_cast(cpuPtr), gpuAddress, allocSize); + commandList->hostPtrMap.insert(std::make_pair(cpuPtr, &alloc)); + + auto newBufferPtr = ptrOffset(cpuPtr, 0x10); + auto newBufferSize = allocSize - 0x20; + auto newAlloc = commandList->getAllocationFromHostPtrMap(newBufferPtr, newBufferSize); + EXPECT_NE(newAlloc, nullptr); + commandList->hostPtrMap.clear(); +} + +HWTEST2_F(CommandListCreate, givenHostAllocInMapWhenSizeIsOutOfRangeThenNullPtrReturned, Platforms) { + auto commandList = std::make_unique>>(); + commandList->initialize(device, true); + uint64_t gpuAddress = 0x1200; + const void *cpuPtr = reinterpret_cast(gpuAddress); + size_t allocSize = 0x1000; + NEO::MockGraphicsAllocation alloc(const_cast(cpuPtr), gpuAddress, allocSize); + commandList->hostPtrMap.insert(std::make_pair(cpuPtr, &alloc)); + + auto newBufferPtr = ptrOffset(cpuPtr, 0x10); + auto newBufferSize = allocSize + 0x20; + auto newAlloc = commandList->getAllocationFromHostPtrMap(newBufferPtr, newBufferSize); + EXPECT_EQ(newAlloc, nullptr); + commandList->hostPtrMap.clear(); +} + +HWTEST2_F(CommandListCreate, givenHostAllocInMapWhenPtrIsOutOfRangeThenNullPtrReturned, Platforms) { + auto commandList = std::make_unique>>(); + commandList->initialize(device, true); + uint64_t gpuAddress = 0x1200; + const void *cpuPtr = reinterpret_cast(gpuAddress); + size_t allocSize = 0x1000; + NEO::MockGraphicsAllocation alloc(const_cast(cpuPtr), gpuAddress, allocSize); + commandList->hostPtrMap.insert(std::make_pair(cpuPtr, &alloc)); + + auto newBufferPtr = reinterpret_cast(gpuAddress - 0x100); + auto newBufferSize = allocSize - 0x200; + auto newAlloc = commandList->getAllocationFromHostPtrMap(newBufferPtr, newBufferSize); + EXPECT_EQ(newAlloc, nullptr); + commandList->hostPtrMap.clear(); +} + +HWTEST2_F(CommandListCreate, givenHostAllocInMapWhenGetHostPtrAllocCalledThenCorrectOffsetIsSet, Platforms) { + auto commandList = std::make_unique>>(); + commandList->initialize(device, true); + uint64_t gpuAddress = 0x1200; + const void *cpuPtr = reinterpret_cast(gpuAddress); + size_t allocSize = 0x1000; + NEO::MockGraphicsAllocation alloc(const_cast(cpuPtr), gpuAddress, allocSize); + commandList->hostPtrMap.insert(std::make_pair(cpuPtr, &alloc)); + size_t expectedOffset = 0x10; + auto newBufferPtr = ptrOffset(cpuPtr, expectedOffset); + auto newBufferSize = allocSize - 0x20; + size_t offset = 0; + auto newAlloc = commandList->getHostPtrAlloc(newBufferPtr, newBufferSize, &offset); + EXPECT_NE(newAlloc, nullptr); + EXPECT_EQ(offset, expectedOffset); + commandList->hostPtrMap.clear(); +} + +HWTEST2_F(CommandListCreate, givenHostAllocInMapWhenPtrIsInMapThenAllocationReturned, Platforms) { + auto commandList = std::make_unique>>(); + commandList->initialize(device, true); + uint64_t gpuAddress = 0x1200; + const void *cpuPtr = reinterpret_cast(gpuAddress); + size_t allocSize = 0x1000; + NEO::MockGraphicsAllocation alloc(const_cast(cpuPtr), gpuAddress, allocSize); + commandList->hostPtrMap.insert(std::make_pair(cpuPtr, &alloc)); + + auto newBufferPtr = cpuPtr; + auto newBufferSize = allocSize - 0x20; + auto newAlloc = commandList->getAllocationFromHostPtrMap(newBufferPtr, newBufferSize); + EXPECT_EQ(newAlloc, &alloc); + commandList->hostPtrMap.clear(); +} +HWTEST2_F(CommandListCreate, givenHostAllocInMapWhenPtrIsInMapButWithBiggerSizeThenNullPtrReturned, Platforms) { + auto commandList = std::make_unique>>(); + commandList->initialize(device, true); + uint64_t gpuAddress = 0x1200; + const void *cpuPtr = reinterpret_cast(gpuAddress); + size_t allocSize = 0x1000; + NEO::MockGraphicsAllocation alloc(const_cast(cpuPtr), gpuAddress, allocSize); + commandList->hostPtrMap.insert(std::make_pair(cpuPtr, &alloc)); + + auto newBufferPtr = cpuPtr; + auto newBufferSize = allocSize + 0x20; + auto newAlloc = commandList->getAllocationFromHostPtrMap(newBufferPtr, newBufferSize); + EXPECT_EQ(newAlloc, nullptr); + commandList->hostPtrMap.clear(); +} +HWTEST2_F(CommandListCreate, givenHostAllocInMapWhenPtrLowerThanAnyInMapThenNullPtrReturned, Platforms) { + auto commandList = std::make_unique>>(); + commandList->initialize(device, true); + uint64_t gpuAddress = 0x1200; + const void *cpuPtr = reinterpret_cast(gpuAddress); + size_t allocSize = 0x1000; + NEO::MockGraphicsAllocation alloc(const_cast(cpuPtr), gpuAddress, allocSize); + commandList->hostPtrMap.insert(std::make_pair(cpuPtr, &alloc)); + + auto newBufferPtr = reinterpret_cast(gpuAddress - 0x10); + auto newBufferSize = allocSize - 0x20; + auto newAlloc = commandList->getAllocationFromHostPtrMap(newBufferPtr, newBufferSize); + EXPECT_EQ(newAlloc, nullptr); + commandList->hostPtrMap.clear(); +} + } // namespace ult } // namespace L0