mirror of
https://github.com/intel/compute-runtime.git
synced 2026-01-03 06:49:52 +08:00
fix: l0, tag update on external host ptr operation
Add tag update required when flushing operations with external host ptr: write to memory, image copy from/to memory Related-To: NEO-15663 Signed-off-by: Dominik Dabek <dominik.dabek@intel.com>
This commit is contained in:
committed by
Compute-Runtime-Automation
parent
9b0c958774
commit
a5491f5c67
@@ -15,6 +15,7 @@
|
||||
#include "shared/source/helpers/common_types.h"
|
||||
#include "shared/source/helpers/definitions/command_encoder_args.h"
|
||||
#include "shared/source/helpers/heap_base_address_model.h"
|
||||
#include "shared/source/memory_manager/graphics_allocation.h"
|
||||
#include "shared/source/memory_manager/prefetch_manager.h"
|
||||
#include "shared/source/unified_memory/unified_memory.h"
|
||||
#include "shared/source/utilities/stackvec.h"
|
||||
@@ -229,6 +230,10 @@ struct CommandList : _ze_command_list_handle_t {
|
||||
return static_cast<CommandList *>(handle);
|
||||
}
|
||||
|
||||
static bool isExternalHostPtrAlloc(NEO::GraphicsAllocation *alloc) {
|
||||
return alloc && alloc->getAllocationType() == NEO::AllocationType::externalHostPtr;
|
||||
}
|
||||
|
||||
inline ze_command_list_handle_t toHandle() { return this; }
|
||||
|
||||
uint32_t getCommandListPerThreadScratchSize(uint32_t slotId) const {
|
||||
|
||||
@@ -178,6 +178,8 @@ struct CommandListCoreFamily : public CommandListImp {
|
||||
ze_result_t appendWaitOnMemory(void *desc, void *ptr, uint64_t data, ze_event_handle_t signalEventHandle, bool useQwordData) override;
|
||||
ze_result_t appendWriteToMemory(void *desc, void *ptr,
|
||||
uint64_t data) override;
|
||||
ze_result_t appendWriteToMemory(void *desc, void *ptr,
|
||||
uint64_t data, bool *requireTaskCountUpdate);
|
||||
|
||||
ze_result_t appendWaitExternalSemaphores(uint32_t numExternalSemaphores, const ze_external_semaphore_ext_handle_t *hSemaphores,
|
||||
const ze_external_semaphore_wait_params_ext_t *params, ze_event_handle_t hSignalEvent,
|
||||
|
||||
@@ -836,6 +836,8 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendImageCopyFromMemoryExt(z
|
||||
return ZE_RESULT_ERROR_OUT_OF_DEVICE_MEMORY;
|
||||
}
|
||||
|
||||
memoryCopyParams.taskCountUpdateRequired |= CommandList::isExternalHostPtrAlloc(allocationStruct.alloc);
|
||||
|
||||
DriverHandleImp *driverHandle = static_cast<DriverHandleImp *>(device->getDriverHandle());
|
||||
if (driverHandle->isRemoteImageNeeded(image, device)) {
|
||||
L0::Image *peerImage = nullptr;
|
||||
@@ -1034,6 +1036,8 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendImageCopyToMemoryExt(voi
|
||||
return ZE_RESULT_ERROR_OUT_OF_DEVICE_MEMORY;
|
||||
}
|
||||
|
||||
memoryCopyParams.taskCountUpdateRequired |= CommandList::isExternalHostPtrAlloc(allocationStruct.alloc);
|
||||
|
||||
DriverHandleImp *driverHandle = static_cast<DriverHandleImp *>(device->getDriverHandle());
|
||||
if (driverHandle->isRemoteImageNeeded(image, device)) {
|
||||
L0::Image *peerImage = nullptr;
|
||||
@@ -1800,8 +1804,8 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryCopy(void *dstptr,
|
||||
}
|
||||
|
||||
if (this->isImmediateType()) {
|
||||
memoryCopyParams.taskCountUpdateRequired |= (dstAllocationStruct.alloc && dstAllocationStruct.alloc->getAllocationType() == NEO::AllocationType::externalHostPtr) ||
|
||||
(srcAllocationStruct.alloc && srcAllocationStruct.alloc->getAllocationType() == NEO::AllocationType::externalHostPtr);
|
||||
memoryCopyParams.taskCountUpdateRequired |= CommandList::isExternalHostPtrAlloc(dstAllocationStruct.alloc) ||
|
||||
CommandList::isExternalHostPtrAlloc(srcAllocationStruct.alloc);
|
||||
}
|
||||
|
||||
if ((dstAllocationStruct.alloc == nullptr) && (NEO::debugManager.flags.EmitMemAdvisePriorToCopyForNonUsm.get() == 1)) {
|
||||
@@ -2058,8 +2062,8 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryCopyRegion(void *d
|
||||
}
|
||||
|
||||
if (this->isImmediateType()) {
|
||||
memoryCopyParams.taskCountUpdateRequired |= dstAllocationStruct.alloc->getAllocationType() == NEO::AllocationType::externalHostPtr ||
|
||||
srcAllocationStruct.alloc->getAllocationType() == NEO::AllocationType::externalHostPtr;
|
||||
memoryCopyParams.taskCountUpdateRequired |= CommandList::isExternalHostPtrAlloc(dstAllocationStruct.alloc) ||
|
||||
CommandList::isExternalHostPtrAlloc(srcAllocationStruct.alloc);
|
||||
}
|
||||
|
||||
memoryCopyParams.copyOffloadAllowed = isCopyOffloadAllowed(*srcAllocationStruct.alloc, *dstAllocationStruct.alloc);
|
||||
@@ -4308,6 +4312,14 @@ template <GFXCORE_FAMILY gfxCoreFamily>
|
||||
ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendWriteToMemory(void *desc,
|
||||
void *ptr,
|
||||
uint64_t data) {
|
||||
return this->appendWriteToMemory(desc, ptr, data, nullptr);
|
||||
}
|
||||
|
||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||
ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendWriteToMemory(void *desc,
|
||||
void *ptr,
|
||||
uint64_t data,
|
||||
bool *requireTaskCountUpdate) {
|
||||
auto descriptor = reinterpret_cast<zex_write_to_mem_desc_t *>(desc);
|
||||
|
||||
size_t bufSize = sizeof(uint64_t);
|
||||
@@ -4315,6 +4327,11 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendWriteToMemory(void *desc
|
||||
if (dstAllocationStruct.alloc == nullptr) {
|
||||
return ZE_RESULT_ERROR_OUT_OF_DEVICE_MEMORY;
|
||||
}
|
||||
|
||||
if (requireTaskCountUpdate) {
|
||||
*requireTaskCountUpdate = CommandList::isExternalHostPtrAlloc(dstAllocationStruct.alloc);
|
||||
}
|
||||
|
||||
UNRECOVERABLE_IF(dstAllocationStruct.alloc == nullptr);
|
||||
commandContainer.addToResidencyContainer(dstAllocationStruct.alloc);
|
||||
|
||||
|
||||
@@ -936,7 +936,7 @@ ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::appendImageCopyFromMe
|
||||
numWaitEvents, phWaitEvents, memoryCopyParams);
|
||||
|
||||
return flushImmediate(ret, true, hasStallingCmdsForRelaxedOrdering(numWaitEvents, memoryCopyParams.relaxedOrderingDispatch), memoryCopyParams.relaxedOrderingDispatch, NEO::AppendOperations::kernel,
|
||||
memoryCopyParams.copyOffloadAllowed, hSignalEvent, false, nullptr, nullptr);
|
||||
memoryCopyParams.copyOffloadAllowed, hSignalEvent, memoryCopyParams.taskCountUpdateRequired, nullptr, nullptr);
|
||||
}
|
||||
|
||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||
@@ -955,7 +955,7 @@ ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::appendImageCopyToMemo
|
||||
numWaitEvents, phWaitEvents, memoryCopyParams);
|
||||
|
||||
return flushImmediate(ret, true, hasStallingCmdsForRelaxedOrdering(numWaitEvents, memoryCopyParams.relaxedOrderingDispatch), memoryCopyParams.relaxedOrderingDispatch, NEO::AppendOperations::kernel,
|
||||
memoryCopyParams.copyOffloadAllowed, hSignalEvent, false, nullptr, nullptr);
|
||||
memoryCopyParams.copyOffloadAllowed, hSignalEvent, memoryCopyParams.taskCountUpdateRequired, nullptr, nullptr);
|
||||
}
|
||||
|
||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||
@@ -976,7 +976,7 @@ ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::appendImageCopyFromMe
|
||||
hSignalEvent, numWaitEvents, phWaitEvents, memoryCopyParams);
|
||||
|
||||
return flushImmediate(ret, true, hasStallingCmdsForRelaxedOrdering(numWaitEvents, memoryCopyParams.relaxedOrderingDispatch), memoryCopyParams.relaxedOrderingDispatch, NEO::AppendOperations::kernel,
|
||||
memoryCopyParams.copyOffloadAllowed, hSignalEvent, false, nullptr, nullptr);
|
||||
memoryCopyParams.copyOffloadAllowed, hSignalEvent, memoryCopyParams.taskCountUpdateRequired, nullptr, nullptr);
|
||||
}
|
||||
|
||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||
@@ -997,7 +997,7 @@ ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::appendImageCopyToMemo
|
||||
hSignalEvent, numWaitEvents, phWaitEvents, memoryCopyParams);
|
||||
|
||||
return flushImmediate(ret, true, hasStallingCmdsForRelaxedOrdering(numWaitEvents, memoryCopyParams.relaxedOrderingDispatch), memoryCopyParams.relaxedOrderingDispatch, NEO::AppendOperations::kernel,
|
||||
memoryCopyParams.copyOffloadAllowed, hSignalEvent, false, nullptr, nullptr);
|
||||
memoryCopyParams.copyOffloadAllowed, hSignalEvent, memoryCopyParams.taskCountUpdateRequired, nullptr, nullptr);
|
||||
}
|
||||
|
||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||
@@ -1023,8 +1023,9 @@ ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::appendWaitOnMemory(vo
|
||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||
ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::appendWriteToMemory(void *desc, void *ptr, uint64_t data) {
|
||||
checkAvailableSpace(0, false, commonImmediateCommandSize, false);
|
||||
auto ret = CommandListCoreFamily<gfxCoreFamily>::appendWriteToMemory(desc, ptr, data);
|
||||
return flushImmediate(ret, true, false, false, NEO::AppendOperations::nonKernel, false, nullptr, false, nullptr, nullptr);
|
||||
bool requireTaskCountUpdate = false;
|
||||
auto ret = CommandListCoreFamily<gfxCoreFamily>::appendWriteToMemory(desc, ptr, data, &requireTaskCountUpdate);
|
||||
return flushImmediate(ret, true, false, false, NEO::AppendOperations::nonKernel, false, nullptr, requireTaskCountUpdate, nullptr, nullptr);
|
||||
}
|
||||
|
||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||
|
||||
@@ -834,6 +834,44 @@ HWTEST2_F(CommandListAppend, givenCopyCommandListAndNullDestinationRegionWhenIma
|
||||
EXPECT_TRUE(cmdList.useEvents);
|
||||
}
|
||||
|
||||
HWTEST2_F(CommandListAppend, givenImmediateCommandListWhenImageCopyFromOrToMemoryWithExternalHostPtrThenRequireTaskCountUpdate, ImageSupport) {
|
||||
ze_command_queue_desc_t desc = {};
|
||||
ze_result_t ret = ZE_RESULT_SUCCESS;
|
||||
std::unique_ptr<L0::ult::CommandList> cmdList(CommandList::whiteboxCast(CommandList::createImmediate(productFamily, device, &desc, false, NEO::EngineGroupType::renderCompute, ret)));
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, ret);
|
||||
|
||||
void *hostPtr = reinterpret_cast<void *>(0x1234);
|
||||
ze_image_desc_t zeDesc = {};
|
||||
zeDesc.stype = ZE_STRUCTURE_TYPE_IMAGE_DESC;
|
||||
zeDesc.width = 1;
|
||||
zeDesc.height = 1;
|
||||
zeDesc.depth = 1;
|
||||
auto imageHW = std::make_unique<WhiteBox<::L0::ImageCoreFamily<FamilyType::gfxCoreFamily>>>();
|
||||
imageHW->initialize(device, &zeDesc);
|
||||
|
||||
auto kernel = device->getBuiltinFunctionsLib()->getImageFunction(ImageBuiltin::copyImageRegion);
|
||||
auto mockBuiltinKernel = static_cast<Mock<::L0::KernelImp> *>(kernel);
|
||||
mockBuiltinKernel->setArgRedescribedImageCallBase = false;
|
||||
cmdList->appendImageCopyFromMemory(imageHW->toHandle(), hostPtr, nullptr, nullptr, 0, nullptr, copyParams);
|
||||
auto ultCsr = static_cast<NEO::UltCommandStreamReceiver<FamilyType> *>(cmdList->getCsr(false));
|
||||
if (L0GfxCoreHelper::useImmediateComputeFlushTask(device->getNEODevice()->getRootDeviceEnvironment())) {
|
||||
ImmediateDispatchFlags &recordedImmediateDispatchFlags = ultCsr->recordedImmediateDispatchFlags;
|
||||
EXPECT_TRUE(recordedImmediateDispatchFlags.requireTaskCountUpdate);
|
||||
} else {
|
||||
DispatchFlags &recordedDispatchFlags = ultCsr->recordedDispatchFlags;
|
||||
EXPECT_TRUE(recordedDispatchFlags.guardCommandBufferWithPipeControl);
|
||||
}
|
||||
|
||||
cmdList->appendImageCopyToMemory(hostPtr, imageHW->toHandle(), nullptr, nullptr, 0u, nullptr, copyParams);
|
||||
if (L0GfxCoreHelper::useImmediateComputeFlushTask(device->getNEODevice()->getRootDeviceEnvironment())) {
|
||||
ImmediateDispatchFlags &recordedImmediateDispatchFlags = ultCsr->recordedImmediateDispatchFlags;
|
||||
EXPECT_TRUE(recordedImmediateDispatchFlags.requireTaskCountUpdate);
|
||||
} else {
|
||||
DispatchFlags &recordedDispatchFlags = ultCsr->recordedDispatchFlags;
|
||||
EXPECT_TRUE(recordedDispatchFlags.guardCommandBufferWithPipeControl);
|
||||
}
|
||||
}
|
||||
|
||||
HWTEST2_F(CommandListAppend, givenCopyCommandListAndNullDestinationRegionWhenImageCopyToMemoryThenBlitImageCopyCalledWithCorrectImageSize, ImageSupport) {
|
||||
MockCommandListHw<FamilyType::gfxCoreFamily> cmdList;
|
||||
cmdList.initialize(device, NEO::EngineGroupType::copy, 0u);
|
||||
|
||||
@@ -13,6 +13,7 @@
|
||||
#include "shared/source/helpers/register_offsets.h"
|
||||
#include "shared/test/common/cmd_parse/gen_cmd_parse.h"
|
||||
#include "shared/test/common/helpers/unit_test_helper.h"
|
||||
#include "shared/test/common/libult/ult_command_stream_receiver.h"
|
||||
#include "shared/test/common/mocks/mock_graphics_allocation.h"
|
||||
#include "shared/test/common/test_macros/hw_test.h"
|
||||
|
||||
@@ -1387,6 +1388,7 @@ HWTEST_F(ImmediateCommandListAppendWriteToMem, givenAppendWriteToMemWithNoScopeT
|
||||
EXPECT_TRUE(cmd->getCommandStreamerStallEnable());
|
||||
EXPECT_FALSE(cmd->getDcFlushEnable());
|
||||
postSyncFound = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
ASSERT_TRUE(postSyncFound);
|
||||
@@ -1414,6 +1416,7 @@ HWTEST_F(ImmediateCommandListAppendWriteToMem, givenAppendWriteToMemOnBcsWithNoS
|
||||
if (cmd->getPostSyncOperation() == MI_FLUSH_DW::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA_QWORD) {
|
||||
EXPECT_EQ(cmd->getImmediateData(), data);
|
||||
postSyncFound = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
ASSERT_TRUE(postSyncFound);
|
||||
@@ -1432,6 +1435,17 @@ HWTEST_F(ImmediateCommandListAppendWriteToMem, givenAppendWriteToMemWithScopeThe
|
||||
result = immCommandList->appendWriteToMemory(reinterpret_cast<void *>(&desc), ptr, data);
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
|
||||
|
||||
auto whiteBoxCmdList = static_cast<CommandList *>(immCommandList.get());
|
||||
auto ultCsr = static_cast<NEO::UltCommandStreamReceiver<FamilyType> *>(whiteBoxCmdList->getCsr(false));
|
||||
|
||||
if (L0GfxCoreHelper::useImmediateComputeFlushTask(device->getNEODevice()->getRootDeviceEnvironment())) {
|
||||
ImmediateDispatchFlags &recordedImmediateDispatchFlags = ultCsr->recordedImmediateDispatchFlags;
|
||||
EXPECT_TRUE(recordedImmediateDispatchFlags.requireTaskCountUpdate);
|
||||
} else {
|
||||
DispatchFlags &recordedDispatchFlags = ultCsr->recordedDispatchFlags;
|
||||
EXPECT_TRUE(recordedDispatchFlags.guardCommandBufferWithPipeControl);
|
||||
}
|
||||
|
||||
GenCmdList cmdList;
|
||||
ASSERT_TRUE(FamilyType::Parse::parseCommandBuffer(
|
||||
cmdList, ptrOffset(commandContainer.getCommandStream()->getCpuBase(), 0), commandContainer.getCommandStream()->getUsed()));
|
||||
@@ -1446,6 +1460,7 @@ HWTEST_F(ImmediateCommandListAppendWriteToMem, givenAppendWriteToMemWithScopeThe
|
||||
EXPECT_TRUE(cmd->getCommandStreamerStallEnable());
|
||||
EXPECT_EQ(NEO::MemorySynchronizationCommands<FamilyType>::getDcFlushEnable(true, device->getNEODevice()->getRootDeviceEnvironment()), cmd->getDcFlushEnable());
|
||||
postSyncFound = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
ASSERT_TRUE(postSyncFound);
|
||||
|
||||
Reference in New Issue
Block a user