From c70df154a3831878c6058201ac2ad7aaa4c7b0fc Mon Sep 17 00:00:00 2001 From: Szymon Morek Date: Mon, 3 Oct 2022 14:32:58 +0000 Subject: [PATCH] Store EXTERNAL_HOST_PTR as a temporary allocation Related-To: NEO-7359 Currently, for copy only async immediate cmdlists we don't release external host ptr when it's no longer needed. This is not spec complaint. This PR fixes this. Signed-off-by: Szymon Morek --- level_zero/core/source/cmdlist/cmdlist.cpp | 4 ++-- level_zero/core/source/cmdlist/cmdlist.h | 3 +++ level_zero/core/source/cmdlist/cmdlist_hw.h | 1 + level_zero/core/source/cmdlist/cmdlist_hw.inl | 6 ++++++ .../source/cmdlist/cmdlist_hw_immediate.inl | 3 +-- .../core/test/unit_tests/mocks/mock_cmdlist.h | 1 + .../sources/cmdlist/test_cmdlist_3.cpp | 20 +++++++++++++++++++ .../cmdlist/test_cmdlist_append_memory.cpp | 2 +- 8 files changed, 35 insertions(+), 5 deletions(-) diff --git a/level_zero/core/source/cmdlist/cmdlist.cpp b/level_zero/core/source/cmdlist/cmdlist.cpp index 1263ec8db2..0ddbe9fc0d 100644 --- a/level_zero/core/source/cmdlist/cmdlist.cpp +++ b/level_zero/core/source/cmdlist/cmdlist.cpp @@ -64,7 +64,7 @@ NEO::GraphicsAllocation *CommandList::getAllocationFromHostPtrMap(const void *bu return allocation->second; } } - if (this->cmdListType == CommandListType::TYPE_IMMEDIATE && this->isFlushTaskSubmissionEnabled) { + if (this->storeExternalPtrAsTemporary()) { auto allocation = this->csr->getInternalAllocationStorage()->obtainTemporaryAllocationWithPtr(bufferSize, buffer, NEO::AllocationType::EXTERNAL_HOST_PTR); if (allocation != nullptr) { auto alloc = allocation.get(); @@ -82,7 +82,7 @@ NEO::GraphicsAllocation *CommandList::getHostPtrAlloc(const void *buffer, uint64 } alloc = device->allocateMemoryFromHostPtr(buffer, bufferSize, hostCopyAllowed); UNRECOVERABLE_IF(alloc == nullptr); - if (this->cmdListType == CommandListType::TYPE_IMMEDIATE && this->isFlushTaskSubmissionEnabled) { + if (this->storeExternalPtrAsTemporary()) { this->csr->getInternalAllocationStorage()->storeAllocation(std::unique_ptr(alloc), NEO::AllocationUsage::TEMPORARY_ALLOCATION); } else if (alloc->getAllocationType() == NEO::AllocationType::EXTERNAL_HOST_PTR) { hostPtrMap.insert(std::make_pair(buffer, alloc)); diff --git a/level_zero/core/source/cmdlist/cmdlist.h b/level_zero/core/source/cmdlist/cmdlist.h index 20a42738ef..1785825bda 100644 --- a/level_zero/core/source/cmdlist/cmdlist.h +++ b/level_zero/core/source/cmdlist/cmdlist.h @@ -241,6 +241,9 @@ struct CommandList : _ze_command_list_handle_t { bool isMemoryPrefetchRequested() const { return performMemoryPrefetch; } + bool storeExternalPtrAsTemporary() const { + return this->cmdListType == CommandListType::TYPE_IMMEDIATE && (this->isFlushTaskSubmissionEnabled || isCopyOnly()); + } enum CommandListType : uint32_t { TYPE_REGULAR = 0u, diff --git a/level_zero/core/source/cmdlist/cmdlist_hw.h b/level_zero/core/source/cmdlist/cmdlist_hw.h index 7f0d2f5691..b63159a1cf 100644 --- a/level_zero/core/source/cmdlist/cmdlist_hw.h +++ b/level_zero/core/source/cmdlist/cmdlist_hw.h @@ -266,6 +266,7 @@ struct CommandListCoreFamily : CommandListImp { uint64_t getInputBufferSize(NEO::ImageType imageType, uint64_t bytesPerPixel, const ze_image_region_t *region); MOCKABLE_VIRTUAL AlignedAllocationData getAlignedAllocation(Device *device, const void *buffer, uint64_t bufferSize, bool hostCopyAllowed); void addFlushRequiredCommand(bool flushOperationRequired, Event *signalEvent); + void handlePostSubmissionState(); virtual void createLogicalStateHelper(); diff --git a/level_zero/core/source/cmdlist/cmdlist_hw.inl b/level_zero/core/source/cmdlist/cmdlist_hw.inl index 34361ad040..d80f480603 100644 --- a/level_zero/core/source/cmdlist/cmdlist_hw.inl +++ b/level_zero/core/source/cmdlist/cmdlist_hw.inl @@ -116,6 +116,11 @@ ze_result_t CommandListCoreFamily::reset() { return ZE_RESULT_SUCCESS; } +template +void CommandListCoreFamily::handlePostSubmissionState() { + this->commandContainer.getResidencyContainer().clear(); +} + template ze_result_t CommandListCoreFamily::initialize(Device *device, NEO::EngineGroupType engineGroupType, ze_command_list_flags_t flags) { @@ -189,6 +194,7 @@ inline ze_result_t CommandListCoreFamily::executeCommandListImmed if (this->isCopyOnly() && !this->isSyncModeQueue && !this->isTbxMode) { this->commandContainer.currentLinearStreamStartOffset = this->commandContainer.getCommandStream()->getUsed(); + this->handlePostSubmissionState(); } else { const auto synchronizationResult = cmdQImmediate->synchronize(std::numeric_limits::max()); if (synchronizationResult == ZE_RESULT_ERROR_DEVICE_LOST) { diff --git a/level_zero/core/source/cmdlist/cmdlist_hw_immediate.inl b/level_zero/core/source/cmdlist/cmdlist_hw_immediate.inl index 28a47b8377..8aea57b4d8 100644 --- a/level_zero/core/source/cmdlist/cmdlist_hw_immediate.inl +++ b/level_zero/core/source/cmdlist/cmdlist_hw_immediate.inl @@ -193,8 +193,7 @@ ze_result_t CommandListCoreFamilyImmediate::executeCommandListImm this->cmdListCurrentStartOffset = commandStream->getUsed(); this->containsAnyKernel = false; - this->commandContainer.getResidencyContainer().clear(); - + this->handlePostSubmissionState(); return ZE_RESULT_SUCCESS; } diff --git a/level_zero/core/test/unit_tests/mocks/mock_cmdlist.h b/level_zero/core/test/unit_tests/mocks/mock_cmdlist.h index c642f5eca5..1a1c9972ce 100644 --- a/level_zero/core/test/unit_tests/mocks/mock_cmdlist.h +++ b/level_zero/core/test/unit_tests/mocks/mock_cmdlist.h @@ -124,6 +124,7 @@ struct WhiteBox> using BaseClass::csr; using BaseClass::finalStreamState; using BaseClass::frontEndStateTracking; + using BaseClass::getHostPtrAlloc; using BaseClass::immediateCmdListHeapSharing; using BaseClass::isFlushTaskSubmissionEnabled; using BaseClass::partitionCount; diff --git a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_3.cpp b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_3.cpp index 2987f0e94f..a6de14d61c 100644 --- a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_3.cpp +++ b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_3.cpp @@ -5,6 +5,7 @@ * */ +#include "shared/source/memory_manager/internal_allocation_storage.h" #include "shared/test/common/cmd_parse/gen_cmd_parse.h" #include "shared/test/common/mocks/mock_graphics_allocation.h" #include "shared/test/common/mocks/mock_memory_manager.h" @@ -1402,6 +1403,25 @@ HWTEST2_F(CommandListCreate, givenHostPtrAllocAllocWhenExternalMemCreatedThenNew commandList->hostPtrMap.clear(); } +HWTEST2_F(CommandListCreateWithBcs, givenHostPtrAllocAllocAndImmediateCmdListWhenExternalMemCreatedThenNewAllocAddedToInternalAllocationStorage, IsAtLeastSkl) { + auto myDevice = std::make_unique>(device->getNEODevice(), execEnv); + myDevice->neoDevice = device->getNEODevice(); + auto commandList = std::make_unique>>(); + commandList->initialize(myDevice.get(), NEO::EngineGroupType::Copy, 0u); + commandList->cmdListType = CommandList::CommandListType::TYPE_IMMEDIATE; + if (neoDevice->getInternalCopyEngine()) { + commandList->csr = neoDevice->getInternalCopyEngine()->commandStreamReceiver; + } else { + commandList->csr = neoDevice->getInternalEngine().commandStreamReceiver; + } + auto buffer = std::make_unique(0x100); + + EXPECT_TRUE(commandList->csr->getInternalAllocationStorage()->getTemporaryAllocations().peekIsEmpty()); + auto alloc = commandList->getHostPtrAlloc(buffer.get(), 0x100, true); + EXPECT_FALSE(commandList->csr->getInternalAllocationStorage()->getTemporaryAllocations().peekIsEmpty()); + EXPECT_EQ(alloc, commandList->csr->getInternalAllocationStorage()->getTemporaryAllocations().peekHead()); +} + HWTEST2_F(CommandListCreate, givenGetAlignedAllocationWhenInternalMemWithinDifferentAllocThenReturnNewAlloc, IsAtLeastSkl) { auto myDevice = std::make_unique>(device->getNEODevice(), execEnv); myDevice->neoDevice = device->getNEODevice(); diff --git a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_memory.cpp b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_memory.cpp index 4231e56928..a4246b3a32 100644 --- a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_memory.cpp +++ b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_memory.cpp @@ -178,7 +178,7 @@ HWTEST2_F(AppendMemoryCopy, givenAsyncImmediateCommandListWhenAppendingMemoryCop EXPECT_EQ(1u, cmdQueue.executeCommandListsCalled); EXPECT_EQ(0u, cmdQueue.synchronizeCalled); - + EXPECT_EQ(0u, commandList->commandContainer.getResidencyContainer().size()); commandList->cmdQImmediate = nullptr; }