From 43433244f9d17e9c989116808757705754ddbfee Mon Sep 17 00:00:00 2001 From: Raiyan Latif Date: Thu, 12 Mar 2020 16:33:22 -0700 Subject: [PATCH] Enable support for indirect allocation access to Kernels Change-Id: I3ded8cce3761a38aa4a4d71b4089f79c844311c5 Signed-off-by: Raiyan Latif --- level_zero/core/source/cmdlist_hw.inl | 2 ++ level_zero/core/source/cmdlist_hw_base.inl | 10 +++++---- .../command_queue/enqueue_svm_tests.cpp | 22 +++++++++++++++++++ .../source/command_container/cmdcontainer.cpp | 10 ++++----- .../source/command_container/cmdcontainer.h | 1 + .../memory_manager/unified_memory_manager.cpp | 9 ++++++++ .../memory_manager/unified_memory_manager.h | 2 ++ .../command_container_tests.cpp | 9 ++++++-- 8 files changed, 54 insertions(+), 11 deletions(-) diff --git a/level_zero/core/source/cmdlist_hw.inl b/level_zero/core/source/cmdlist_hw.inl index 3bf7112dc0..22c9b55cf5 100644 --- a/level_zero/core/source/cmdlist_hw.inl +++ b/level_zero/core/source/cmdlist_hw.inl @@ -64,6 +64,8 @@ ze_result_t CommandListCoreFamily::executeCommandListImmediate(bo template ze_result_t CommandListCoreFamily::close() { using GfxFamily = typename NEO::GfxFamilyMapper::GfxFamily; + + commandContainer.removeDuplicatesFromResidencyContainer(); NEO::EncodeBatchBufferStartOrEnd::programBatchBufferEnd(commandContainer); return ZE_RESULT_SUCCESS; diff --git a/level_zero/core/source/cmdlist_hw_base.inl b/level_zero/core/source/cmdlist_hw_base.inl index eed6e70527..3f9fe8ad78 100644 --- a/level_zero/core/source/cmdlist_hw_base.inl +++ b/level_zero/core/source/cmdlist_hw_base.inl @@ -13,6 +13,7 @@ #include "shared/source/helpers/register_offsets.h" #include "shared/source/helpers/simd_helper.h" #include "shared/source/memory_manager/memory_manager.h" +#include "shared/source/memory_manager/residency_container.h" #include "shared/source/unified_memory/unified_memory.h" #include @@ -43,11 +44,12 @@ ze_result_t CommandListCoreFamily::appendLaunchFunctionWithParams prepareIndirectParams(pThreadGroupDimensions); } - auto csr = device->getNEODevice()->getDefaultEngine().commandStreamReceiver; - - UnifiedMemoryControls unifiedMemoryControls = function->getUnifiedMemoryControls(); if (function->hasIndirectAllocationsAllowed()) { - device->getDriverHandle()->getSvmAllocsManager()->makeInternalAllocationsResident(*csr, unifiedMemoryControls.generateMask()); + UnifiedMemoryControls unifiedMemoryControls = function->getUnifiedMemoryControls(); + auto svmAllocsManager = device->getDriverHandle()->getSvmAllocsManager(); + auto &residencyContainer = commandContainer.getResidencyContainer(); + + svmAllocsManager->addInternalAllocationsToResidencyContainer(residencyContainer, unifiedMemoryControls.generateMask()); } NEO::EncodeDispatchKernel::encode(commandContainer, diff --git a/opencl/test/unit_test/command_queue/enqueue_svm_tests.cpp b/opencl/test/unit_test/command_queue/enqueue_svm_tests.cpp index 22fd6dce90..2bf505ad71 100644 --- a/opencl/test/unit_test/command_queue/enqueue_svm_tests.cpp +++ b/opencl/test/unit_test/command_queue/enqueue_svm_tests.cpp @@ -1335,6 +1335,28 @@ HWTEST_F(EnqueueSvmTest, whenInternalAllocationsAreMadeResidentThenOnlyNonSvmAll svmManager->freeSVMAlloc(unifiedMemoryPtr); } +HWTEST_F(EnqueueSvmTest, whenInternalAllocationsAreAddedToResidencyContainerThenOnlyExpectedAllocationsAreAdded) { + SVMAllocsManager::UnifiedMemoryProperties unifiedMemoryProperties; + unifiedMemoryProperties.memoryType = InternalMemoryType::DEVICE_UNIFIED_MEMORY; + auto allocationSize = 4096u; + auto svmManager = this->context->getSVMAllocsManager(); + EXPECT_NE(0u, svmManager->getNumAllocs()); + auto unifiedMemoryPtr = svmManager->createUnifiedMemoryAllocation(pDevice->getRootDeviceIndex(), allocationSize, unifiedMemoryProperties); + EXPECT_NE(nullptr, unifiedMemoryPtr); + EXPECT_EQ(2u, svmManager->getNumAllocs()); + + ResidencyContainer residencyContainer; + EXPECT_EQ(0u, residencyContainer.size()); + + svmManager->addInternalAllocationsToResidencyContainer(residencyContainer, InternalMemoryType::DEVICE_UNIFIED_MEMORY); + + //only unified memory allocation is added to residency container + EXPECT_EQ(1u, residencyContainer.size()); + EXPECT_EQ(residencyContainer[0]->getGpuAddress(), castToUint64(unifiedMemoryPtr)); + + svmManager->freeSVMAlloc(unifiedMemoryPtr); +} + HWTEST_F(EnqueueSvmTest, GivenDstHostPtrWhenHostPtrAllocationCreationFailsThenReturnOutOfResource) { char dstHostPtr[260]; void *pDstSVM = dstHostPtr; diff --git a/shared/source/command_container/cmdcontainer.cpp b/shared/source/command_container/cmdcontainer.cpp index bc484e4c09..b8d826fbdc 100644 --- a/shared/source/command_container/cmdcontainer.cpp +++ b/shared/source/command_container/cmdcontainer.cpp @@ -91,15 +91,15 @@ void CommandContainer::addToResidencyContainer(GraphicsAllocation *alloc) { if (alloc == nullptr) { return; } - auto end = this->residencyContainer.end(); - bool isUnique = (end == std::find(this->residencyContainer.begin(), end, alloc)); - if (isUnique == false) { - return; - } this->residencyContainer.push_back(alloc); } +void CommandContainer::removeDuplicatesFromResidencyContainer() { + std::sort(this->residencyContainer.begin(), this->residencyContainer.end()); + this->residencyContainer.erase(std::unique(this->residencyContainer.begin(), this->residencyContainer.end()), this->residencyContainer.end()); +} + void CommandContainer::reset() { setDirtyStateForAllHeaps(true); slmSize = std::numeric_limits::max(); diff --git a/shared/source/command_container/cmdcontainer.h b/shared/source/command_container/cmdcontainer.h index 3b498573ca..1f7b8278b7 100644 --- a/shared/source/command_container/cmdcontainer.h +++ b/shared/source/command_container/cmdcontainer.h @@ -54,6 +54,7 @@ class CommandContainer : public NonCopyableOrMovableClass { std::vector &getDeallocationContainer() { return deallocationContainer; } void addToResidencyContainer(GraphicsAllocation *alloc); + void removeDuplicatesFromResidencyContainer(); LinearStream *getCommandStream() { return commandStream.get(); } diff --git a/shared/source/memory_manager/unified_memory_manager.cpp b/shared/source/memory_manager/unified_memory_manager.cpp index e071807672..9ab3ac0870 100644 --- a/shared/source/memory_manager/unified_memory_manager.cpp +++ b/shared/source/memory_manager/unified_memory_manager.cpp @@ -69,6 +69,15 @@ SvmMapOperation *SVMAllocsManager::MapOperationsTracker::get(const void *regionP return &iter->second; } +void SVMAllocsManager::addInternalAllocationsToResidencyContainer(ResidencyContainer &residencyContainer, uint32_t requestedTypesMask) { + std::unique_lock lock(mtx); + for (auto &allocation : this->SVMAllocs.allocations) { + if (allocation.second.memoryType & requestedTypesMask) { + residencyContainer.push_back(allocation.second.gpuAllocation); + } + } +} + void SVMAllocsManager::makeInternalAllocationsResident(CommandStreamReceiver &commandStreamReceiver, uint32_t requestedTypesMask) { std::unique_lock lock(mtx); for (auto &allocation : this->SVMAllocs.allocations) { diff --git a/shared/source/memory_manager/unified_memory_manager.h b/shared/source/memory_manager/unified_memory_manager.h index 618edafe45..d334579a67 100644 --- a/shared/source/memory_manager/unified_memory_manager.h +++ b/shared/source/memory_manager/unified_memory_manager.h @@ -7,6 +7,7 @@ #pragma once #include "shared/source/helpers/common_types.h" +#include "shared/source/memory_manager/residency_container.h" #include "shared/source/unified_memory/unified_memory.h" #include "shared/source/utilities/spinlock.h" @@ -93,6 +94,7 @@ class SVMAllocsManager { void insertSvmMapOperation(void *regionSvmPtr, size_t regionSize, void *baseSvmPtr, size_t offset, bool readOnlyMap); void removeSvmMapOperation(const void *regionSvmPtr); SvmMapOperation *getSvmMapOperation(const void *regionPtr); + void addInternalAllocationsToResidencyContainer(ResidencyContainer &residencyContainer, uint32_t requestedTypesMask); void makeInternalAllocationsResident(CommandStreamReceiver &commandStreamReceiver, uint32_t requestedTypesMask); void *createUnifiedAllocationWithDeviceStorage(uint32_t rootDeviceIndex, size_t size, const SvmAllocationProperties &svmProperties, const UnifiedMemoryProperties &unifiedMemoryProperties); void freeSvmAllocationWithDeviceStorage(SvmAllocationData *svmData); diff --git a/shared/test/unit_test/command_container/command_container_tests.cpp b/shared/test/unit_test/command_container/command_container_tests.cpp index 3ab6b26fa4..fd5638abce 100644 --- a/shared/test/unit_test/command_container/command_container_tests.cpp +++ b/shared/test/unit_test/command_container/command_container_tests.cpp @@ -164,7 +164,7 @@ TEST_F(CommandContainerTest, givenCommandContainerWhenWantToAddNullPtrToResidenc EXPECT_EQ(cmdContainer.getResidencyContainer().size(), size); } -TEST_F(CommandContainerTest, givenCommandContainerWhenWantToAddAleradyAddedAllocationThenNothingIsAdded) { +TEST_F(CommandContainerTest, givenCommandContainerWhenWantToAddAlreadyAddedAllocationAndDuplicatesRemovedThenExpectedSizeIsReturned) { CommandContainer cmdContainer; cmdContainer.initialize(pDevice); MockGraphicsAllocation mockAllocation; @@ -179,7 +179,12 @@ TEST_F(CommandContainerTest, givenCommandContainerWhenWantToAddAleradyAddedAlloc cmdContainer.addToResidencyContainer(&mockAllocation); auto sizeAfterSecondAdd = cmdContainer.getResidencyContainer().size(); - EXPECT_EQ(sizeAfterFirstAdd, sizeAfterSecondAdd); + EXPECT_NE(sizeAfterFirstAdd, sizeAfterSecondAdd); + + cmdContainer.removeDuplicatesFromResidencyContainer(); + auto sizeAfterDuplicatesRemoved = cmdContainer.getResidencyContainer().size(); + + EXPECT_EQ(sizeAfterFirstAdd, sizeAfterDuplicatesRemoved); } TEST_F(CommandContainerTest, givenAvailableSpaceWhenGetHeapWithRequiredSizeAndAlignmentCalledThenExistingAllocationIsReturned) {