Store EXTERNAL_HOST_PTR as a temporary allocation

Related-To: NEO-7359

Currently, for copy only async immediate cmdlists
we don't release external host ptr when it's no
longer needed. This is not spec complaint.
This PR fixes this.

Signed-off-by: Szymon Morek <szymon.morek@intel.com>
This commit is contained in:
Szymon Morek
2022-10-03 14:32:58 +00:00
committed by Compute-Runtime-Automation
parent 625f88d64d
commit c70df154a3
8 changed files with 35 additions and 5 deletions

View File

@@ -64,7 +64,7 @@ NEO::GraphicsAllocation *CommandList::getAllocationFromHostPtrMap(const void *bu
return allocation->second;
}
}
if (this->cmdListType == CommandListType::TYPE_IMMEDIATE && this->isFlushTaskSubmissionEnabled) {
if (this->storeExternalPtrAsTemporary()) {
auto allocation = this->csr->getInternalAllocationStorage()->obtainTemporaryAllocationWithPtr(bufferSize, buffer, NEO::AllocationType::EXTERNAL_HOST_PTR);
if (allocation != nullptr) {
auto alloc = allocation.get();
@@ -82,7 +82,7 @@ NEO::GraphicsAllocation *CommandList::getHostPtrAlloc(const void *buffer, uint64
}
alloc = device->allocateMemoryFromHostPtr(buffer, bufferSize, hostCopyAllowed);
UNRECOVERABLE_IF(alloc == nullptr);
if (this->cmdListType == CommandListType::TYPE_IMMEDIATE && this->isFlushTaskSubmissionEnabled) {
if (this->storeExternalPtrAsTemporary()) {
this->csr->getInternalAllocationStorage()->storeAllocation(std::unique_ptr<NEO::GraphicsAllocation>(alloc), NEO::AllocationUsage::TEMPORARY_ALLOCATION);
} else if (alloc->getAllocationType() == NEO::AllocationType::EXTERNAL_HOST_PTR) {
hostPtrMap.insert(std::make_pair(buffer, alloc));

View File

@@ -241,6 +241,9 @@ struct CommandList : _ze_command_list_handle_t {
bool isMemoryPrefetchRequested() const {
return performMemoryPrefetch;
}
bool storeExternalPtrAsTemporary() const {
return this->cmdListType == CommandListType::TYPE_IMMEDIATE && (this->isFlushTaskSubmissionEnabled || isCopyOnly());
}
enum CommandListType : uint32_t {
TYPE_REGULAR = 0u,

View File

@@ -266,6 +266,7 @@ struct CommandListCoreFamily : CommandListImp {
uint64_t getInputBufferSize(NEO::ImageType imageType, uint64_t bytesPerPixel, const ze_image_region_t *region);
MOCKABLE_VIRTUAL AlignedAllocationData getAlignedAllocation(Device *device, const void *buffer, uint64_t bufferSize, bool hostCopyAllowed);
void addFlushRequiredCommand(bool flushOperationRequired, Event *signalEvent);
void handlePostSubmissionState();
virtual void createLogicalStateHelper();

View File

@@ -116,6 +116,11 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::reset() {
return ZE_RESULT_SUCCESS;
}
template <GFXCORE_FAMILY gfxCoreFamily>
void CommandListCoreFamily<gfxCoreFamily>::handlePostSubmissionState() {
this->commandContainer.getResidencyContainer().clear();
}
template <GFXCORE_FAMILY gfxCoreFamily>
ze_result_t CommandListCoreFamily<gfxCoreFamily>::initialize(Device *device, NEO::EngineGroupType engineGroupType,
ze_command_list_flags_t flags) {
@@ -189,6 +194,7 @@ inline ze_result_t CommandListCoreFamily<gfxCoreFamily>::executeCommandListImmed
if (this->isCopyOnly() && !this->isSyncModeQueue && !this->isTbxMode) {
this->commandContainer.currentLinearStreamStartOffset = this->commandContainer.getCommandStream()->getUsed();
this->handlePostSubmissionState();
} else {
const auto synchronizationResult = cmdQImmediate->synchronize(std::numeric_limits<uint64_t>::max());
if (synchronizationResult == ZE_RESULT_ERROR_DEVICE_LOST) {

View File

@@ -193,8 +193,7 @@ ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::executeCommandListImm
this->cmdListCurrentStartOffset = commandStream->getUsed();
this->containsAnyKernel = false;
this->commandContainer.getResidencyContainer().clear();
this->handlePostSubmissionState();
return ZE_RESULT_SUCCESS;
}

View File

@@ -124,6 +124,7 @@ struct WhiteBox<L0::CommandListCoreFamilyImmediate<gfxCoreFamily>>
using BaseClass::csr;
using BaseClass::finalStreamState;
using BaseClass::frontEndStateTracking;
using BaseClass::getHostPtrAlloc;
using BaseClass::immediateCmdListHeapSharing;
using BaseClass::isFlushTaskSubmissionEnabled;
using BaseClass::partitionCount;

View File

@@ -5,6 +5,7 @@
*
*/
#include "shared/source/memory_manager/internal_allocation_storage.h"
#include "shared/test/common/cmd_parse/gen_cmd_parse.h"
#include "shared/test/common/mocks/mock_graphics_allocation.h"
#include "shared/test/common/mocks/mock_memory_manager.h"
@@ -1402,6 +1403,25 @@ HWTEST2_F(CommandListCreate, givenHostPtrAllocAllocWhenExternalMemCreatedThenNew
commandList->hostPtrMap.clear();
}
HWTEST2_F(CommandListCreateWithBcs, givenHostPtrAllocAllocAndImmediateCmdListWhenExternalMemCreatedThenNewAllocAddedToInternalAllocationStorage, IsAtLeastSkl) {
auto myDevice = std::make_unique<MyDeviceMock<NEO::AllocationType::EXTERNAL_HOST_PTR>>(device->getNEODevice(), execEnv);
myDevice->neoDevice = device->getNEODevice();
auto commandList = std::make_unique<WhiteBox<L0::CommandListCoreFamilyImmediate<gfxCoreFamily>>>();
commandList->initialize(myDevice.get(), NEO::EngineGroupType::Copy, 0u);
commandList->cmdListType = CommandList::CommandListType::TYPE_IMMEDIATE;
if (neoDevice->getInternalCopyEngine()) {
commandList->csr = neoDevice->getInternalCopyEngine()->commandStreamReceiver;
} else {
commandList->csr = neoDevice->getInternalEngine().commandStreamReceiver;
}
auto buffer = std::make_unique<uint8_t>(0x100);
EXPECT_TRUE(commandList->csr->getInternalAllocationStorage()->getTemporaryAllocations().peekIsEmpty());
auto alloc = commandList->getHostPtrAlloc(buffer.get(), 0x100, true);
EXPECT_FALSE(commandList->csr->getInternalAllocationStorage()->getTemporaryAllocations().peekIsEmpty());
EXPECT_EQ(alloc, commandList->csr->getInternalAllocationStorage()->getTemporaryAllocations().peekHead());
}
HWTEST2_F(CommandListCreate, givenGetAlignedAllocationWhenInternalMemWithinDifferentAllocThenReturnNewAlloc, IsAtLeastSkl) {
auto myDevice = std::make_unique<MyDeviceMock<NEO::AllocationType::INTERNAL_HOST_MEMORY>>(device->getNEODevice(), execEnv);
myDevice->neoDevice = device->getNEODevice();

View File

@@ -178,7 +178,7 @@ HWTEST2_F(AppendMemoryCopy, givenAsyncImmediateCommandListWhenAppendingMemoryCop
EXPECT_EQ(1u, cmdQueue.executeCommandListsCalled);
EXPECT_EQ(0u, cmdQueue.synchronizeCalled);
EXPECT_EQ(0u, commandList->commandContainer.getResidencyContainer().size());
commandList->cmdQImmediate = nullptr;
}