mirror of
https://github.com/intel/compute-runtime.git
synced 2026-01-03 06:49:52 +08:00
fix: l0, tag update on mem copy ext host ptr
Require tag update on mem copy with external host ptr. Without this, temporary allocation might not be cleaned before next copy operation. If a second copy operation is passed same ptr that has been reallocated, there will be a pagefault. Related-To: NEO-15663 Signed-off-by: Dominik Dabek <dominik.dabek@intel.com>
This commit is contained in:
committed by
Compute-Runtime-Automation
parent
34ddf678ad
commit
77470acf7a
@@ -1799,6 +1799,11 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryCopy(void *dstptr,
|
||||
return ZE_RESULT_ERROR_OUT_OF_DEVICE_MEMORY;
|
||||
}
|
||||
|
||||
if (this->isImmediateType()) {
|
||||
memoryCopyParams.taskCountUpdateRequired |= (dstAllocationStruct.alloc && dstAllocationStruct.alloc->getAllocationType() == NEO::AllocationType::externalHostPtr) ||
|
||||
(srcAllocationStruct.alloc && srcAllocationStruct.alloc->getAllocationType() == NEO::AllocationType::externalHostPtr);
|
||||
}
|
||||
|
||||
if ((dstAllocationStruct.alloc == nullptr) && (NEO::debugManager.flags.EmitMemAdvisePriorToCopyForNonUsm.get() == 1)) {
|
||||
appendMemAdvise(device, reinterpret_cast<void *>(dstAllocationStruct.alignedAllocationPtr), size, static_cast<ze_memory_advice_t>(ZE_MEMORY_ADVICE_SET_SYSTEM_MEMORY_PREFERRED_LOCATION));
|
||||
}
|
||||
@@ -2052,6 +2057,11 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryCopyRegion(void *d
|
||||
return ZE_RESULT_ERROR_OUT_OF_DEVICE_MEMORY;
|
||||
}
|
||||
|
||||
if (this->isImmediateType()) {
|
||||
memoryCopyParams.taskCountUpdateRequired |= dstAllocationStruct.alloc->getAllocationType() == NEO::AllocationType::externalHostPtr ||
|
||||
srcAllocationStruct.alloc->getAllocationType() == NEO::AllocationType::externalHostPtr;
|
||||
}
|
||||
|
||||
memoryCopyParams.copyOffloadAllowed = isCopyOffloadAllowed(*srcAllocationStruct.alloc, *dstAllocationStruct.alloc);
|
||||
const bool isCopyOnlyEnabled = isCopyOnly(memoryCopyParams.copyOffloadAllowed);
|
||||
const bool inOrderCopyOnlySignalingAllowed = this->isInOrderExecutionEnabled() && !memoryCopyParams.forceDisableCopyOnlyInOrderSignaling && isCopyOnlyEnabled;
|
||||
|
||||
@@ -2201,41 +2201,47 @@ HWTEST2_F(CommandListCreateTests, givenDirectSubmissionAndImmCmdListWhenDispatch
|
||||
|
||||
auto ultCsr = static_cast<NEO::UltCommandStreamReceiver<FamilyType> *>(whiteBoxCmdList->getCsr(false));
|
||||
|
||||
auto verifyFlag = [&ultCsr](ze_result_t result, bool dispatchFlag) {
|
||||
auto verifyWalkerWithProfilingEnqueued = [&ultCsr](ze_result_t result, bool expectEnqueued) {
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
|
||||
EXPECT_EQ(ultCsr->isWalkerWithProfilingEnqueued, dispatchFlag);
|
||||
const auto enqueueTimes = ultCsr->walkerWithProfilingEnqueuedTimes + ultCsr->isWalkerWithProfilingEnqueued;
|
||||
if (expectEnqueued) {
|
||||
EXPECT_GT(enqueueTimes, 0u);
|
||||
} else {
|
||||
EXPECT_EQ(0u, enqueueTimes);
|
||||
}
|
||||
ultCsr->walkerWithProfilingEnqueuedTimes = 0u;
|
||||
ultCsr->isWalkerWithProfilingEnqueued = false;
|
||||
};
|
||||
|
||||
auto expectFlagEnabled = true && this->device->getNEODevice()->getProductHelper().shouldRegisterEnqueuedWalkerWithProfiling();
|
||||
bool expectWalkerWithProfilingEnqueued = this->device->getNEODevice()->getProductHelper().shouldRegisterEnqueuedWalkerWithProfiling();
|
||||
// non-pipelined state
|
||||
verifyFlag(commandList->appendLaunchKernel(kernel.toHandle(), groupCount, event, 0, nullptr, launchParams), expectFlagEnabled);
|
||||
verifyWalkerWithProfilingEnqueued(commandList->appendLaunchKernel(kernel.toHandle(), groupCount, event, 0, nullptr, launchParams), expectWalkerWithProfilingEnqueued);
|
||||
|
||||
// non-pipelined state already programmed
|
||||
verifyFlag(commandList->appendLaunchKernel(kernel.toHandle(), groupCount, event, 0, nullptr, launchParams), expectFlagEnabled);
|
||||
verifyWalkerWithProfilingEnqueued(commandList->appendLaunchKernel(kernel.toHandle(), groupCount, event, 0, nullptr, launchParams), expectWalkerWithProfilingEnqueued);
|
||||
|
||||
verifyFlag(commandList->appendLaunchKernel(kernel.toHandle(), groupCount, nullptr, 0, nullptr, launchParams), false);
|
||||
verifyWalkerWithProfilingEnqueued(commandList->appendLaunchKernel(kernel.toHandle(), groupCount, nullptr, 0, nullptr, launchParams), false);
|
||||
|
||||
verifyFlag(commandList->appendLaunchKernelIndirect(kernel.toHandle(), groupCount, event, 0, nullptr, false), expectFlagEnabled);
|
||||
verifyWalkerWithProfilingEnqueued(commandList->appendLaunchKernelIndirect(kernel.toHandle(), groupCount, event, 0, nullptr, false), expectWalkerWithProfilingEnqueued);
|
||||
|
||||
verifyFlag(commandList->appendBarrier(event, 0, nullptr, false), false);
|
||||
verifyWalkerWithProfilingEnqueued(commandList->appendBarrier(event, 0, nullptr, false), false);
|
||||
|
||||
CmdListMemoryCopyParams copyParams = {};
|
||||
verifyFlag(commandList->appendMemoryCopy(dstPtr, srcPtr, 8, event, 0, nullptr, copyParams), expectFlagEnabled);
|
||||
verifyWalkerWithProfilingEnqueued(commandList->appendMemoryCopy(dstPtr, srcPtr, 8, event, 0, nullptr, copyParams), expectWalkerWithProfilingEnqueued);
|
||||
|
||||
verifyFlag(commandList->appendMemoryCopyRegion(dstPtr, ®ion, 0, 0, srcPtr, ®ion, 0, 0, event, 0, nullptr, copyParams), expectFlagEnabled);
|
||||
verifyWalkerWithProfilingEnqueued(commandList->appendMemoryCopyRegion(dstPtr, ®ion, 0, 0, srcPtr, ®ion, 0, 0, event, 0, nullptr, copyParams), expectWalkerWithProfilingEnqueued);
|
||||
|
||||
verifyFlag(commandList->appendMemoryFill(dstPtr, srcPtr, 8, 1, event, 0, nullptr, copyParams), expectFlagEnabled);
|
||||
verifyWalkerWithProfilingEnqueued(commandList->appendMemoryFill(dstPtr, srcPtr, 8, 1, event, 0, nullptr, copyParams), expectWalkerWithProfilingEnqueued);
|
||||
|
||||
verifyFlag(commandList->appendEventReset(event), false);
|
||||
verifyWalkerWithProfilingEnqueued(commandList->appendEventReset(event), false);
|
||||
|
||||
verifyFlag(commandList->appendSignalEvent(event, false), false);
|
||||
verifyWalkerWithProfilingEnqueued(commandList->appendSignalEvent(event, false), false);
|
||||
|
||||
verifyFlag(commandList->appendPageFaultCopy(kernel.getIsaAllocation(), kernel.getIsaAllocation(), 1, false), false);
|
||||
verifyWalkerWithProfilingEnqueued(commandList->appendPageFaultCopy(kernel.getIsaAllocation(), kernel.getIsaAllocation(), 1, false), false);
|
||||
|
||||
verifyFlag(commandList->appendWaitOnEvents(1, &event, nullptr, false, true, false, false, false, false), false);
|
||||
verifyWalkerWithProfilingEnqueued(commandList->appendWaitOnEvents(1, &event, nullptr, false, true, false, false, false, false), false);
|
||||
|
||||
verifyFlag(commandList->appendWriteGlobalTimestamp(reinterpret_cast<uint64_t *>(dstPtr), event, 0, nullptr), false);
|
||||
verifyWalkerWithProfilingEnqueued(commandList->appendWriteGlobalTimestamp(reinterpret_cast<uint64_t *>(dstPtr), event, 0, nullptr), false);
|
||||
|
||||
if constexpr (FamilyType::supportsSampler) {
|
||||
auto kernel = device->getBuiltinFunctionsLib()->getImageFunction(ImageBuiltin::copyImageRegion);
|
||||
@@ -2251,27 +2257,27 @@ HWTEST2_F(CommandListCreateTests, givenDirectSubmissionAndImmCmdListWhenDispatch
|
||||
|
||||
CmdListMemoryCopyParams copyParams = {};
|
||||
|
||||
verifyFlag(commandList->appendImageCopyRegion(image->toHandle(), image->toHandle(), &imgRegion, &imgRegion, event, 0, nullptr, copyParams), expectFlagEnabled);
|
||||
verifyWalkerWithProfilingEnqueued(commandList->appendImageCopyRegion(image->toHandle(), image->toHandle(), &imgRegion, &imgRegion, event, 0, nullptr, copyParams), expectWalkerWithProfilingEnqueued);
|
||||
|
||||
verifyFlag(commandList->appendImageCopyFromMemory(image->toHandle(), dstPtr, &imgRegion, event, 0, nullptr, copyParams), expectFlagEnabled);
|
||||
verifyWalkerWithProfilingEnqueued(commandList->appendImageCopyFromMemory(image->toHandle(), dstPtr, &imgRegion, event, 0, nullptr, copyParams), expectWalkerWithProfilingEnqueued);
|
||||
|
||||
verifyFlag(commandList->appendImageCopyToMemory(dstPtr, image->toHandle(), &imgRegion, event, 0, nullptr, copyParams), expectFlagEnabled);
|
||||
verifyWalkerWithProfilingEnqueued(commandList->appendImageCopyToMemory(dstPtr, image->toHandle(), &imgRegion, event, 0, nullptr, copyParams), expectWalkerWithProfilingEnqueued);
|
||||
|
||||
verifyFlag(commandList->appendImageCopyFromMemoryExt(image->toHandle(), dstPtr, &imgRegion, bytesPerPixel, bytesPerPixel, event, 0, nullptr, copyParams), expectFlagEnabled);
|
||||
verifyWalkerWithProfilingEnqueued(commandList->appendImageCopyFromMemoryExt(image->toHandle(), dstPtr, &imgRegion, bytesPerPixel, bytesPerPixel, event, 0, nullptr, copyParams), expectWalkerWithProfilingEnqueued);
|
||||
|
||||
verifyFlag(commandList->appendImageCopyToMemoryExt(dstPtr, image->toHandle(), &imgRegion, bytesPerPixel, bytesPerPixel, event, 0, nullptr, copyParams), expectFlagEnabled);
|
||||
verifyWalkerWithProfilingEnqueued(commandList->appendImageCopyToMemoryExt(dstPtr, image->toHandle(), &imgRegion, bytesPerPixel, bytesPerPixel, event, 0, nullptr, copyParams), expectWalkerWithProfilingEnqueued);
|
||||
}
|
||||
|
||||
size_t rangeSizes = 1;
|
||||
const void **ranges = reinterpret_cast<const void **>(&dstPtr[0]);
|
||||
verifyFlag(commandList->appendMemoryRangesBarrier(1, &rangeSizes, ranges, event, 0, nullptr), false);
|
||||
verifyWalkerWithProfilingEnqueued(commandList->appendMemoryRangesBarrier(1, &rangeSizes, ranges, event, 0, nullptr), false);
|
||||
|
||||
CmdListKernelLaunchParams cooperativeParams = {};
|
||||
cooperativeParams.isCooperative = true;
|
||||
|
||||
verifyFlag(commandList->appendLaunchKernel(kernel.toHandle(), groupCount, event, 0, nullptr, cooperativeParams), expectFlagEnabled);
|
||||
verifyWalkerWithProfilingEnqueued(commandList->appendLaunchKernel(kernel.toHandle(), groupCount, event, 0, nullptr, cooperativeParams), expectWalkerWithProfilingEnqueued);
|
||||
|
||||
verifyFlag(commandList->appendLaunchKernel(kernel.toHandle(), groupCount, event, 0, nullptr, cooperativeParams), expectFlagEnabled);
|
||||
verifyWalkerWithProfilingEnqueued(commandList->appendLaunchKernel(kernel.toHandle(), groupCount, event, 0, nullptr, cooperativeParams), expectWalkerWithProfilingEnqueued);
|
||||
|
||||
driverHandle->releaseImportedPointer(dstPtr);
|
||||
}
|
||||
@@ -2959,6 +2965,50 @@ TEST_F(CommandListCreateTests, whenInvokingAppendMemoryCopyFromContextForImmedia
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
|
||||
}
|
||||
|
||||
HWTEST_F(CommandListCreateTests, givenImmediateCmdListWhenInvokingAppendMemoryCopyWithExternalHostPtrThenRequireTaskCountUpdate) {
|
||||
ze_command_queue_desc_t desc = {};
|
||||
desc.mode = ZE_COMMAND_QUEUE_MODE_ASYNCHRONOUS;
|
||||
ze_result_t returnValue;
|
||||
std::unique_ptr<L0::CommandList> commandList(CommandList::createImmediate(productFamily, device, &desc, false, NEO::EngineGroupType::compute, returnValue));
|
||||
ASSERT_NE(nullptr, commandList);
|
||||
auto whiteBoxCmdList = static_cast<CommandList *>(commandList.get());
|
||||
|
||||
EXPECT_EQ(device, commandList->getDevice());
|
||||
EXPECT_TRUE(commandList->isImmediateType());
|
||||
EXPECT_NE(nullptr, whiteBoxCmdList->cmdQImmediate);
|
||||
|
||||
constexpr size_t transferSize = sizeof(size_t);
|
||||
void *hostPtr;
|
||||
ze_host_mem_alloc_desc_t hostDesc = {};
|
||||
ASSERT_EQ(ZE_RESULT_SUCCESS, context->allocHostMem(&hostDesc, transferSize, 0u, &hostPtr));
|
||||
size_t externalHostAlloc = 0;
|
||||
CmdListMemoryCopyParams copyParams = {};
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, commandList->appendMemoryCopy(hostPtr, &externalHostAlloc, sizeof(size_t), nullptr, 0, nullptr, copyParams));
|
||||
|
||||
auto ultCsr = static_cast<NEO::UltCommandStreamReceiver<FamilyType> *>(whiteBoxCmdList->getCsr(false));
|
||||
|
||||
if (L0GfxCoreHelper::useImmediateComputeFlushTask(device->getNEODevice()->getRootDeviceEnvironment())) {
|
||||
ImmediateDispatchFlags &recordedImmediateDispatchFlags = ultCsr->recordedImmediateDispatchFlags;
|
||||
EXPECT_TRUE(recordedImmediateDispatchFlags.requireTaskCountUpdate);
|
||||
} else {
|
||||
DispatchFlags &recordedDispatchFlags = ultCsr->recordedDispatchFlags;
|
||||
EXPECT_TRUE(recordedDispatchFlags.guardCommandBufferWithPipeControl);
|
||||
}
|
||||
|
||||
const ze_copy_region_t region = {0U, 0U, 0U, 1, 1, 0U};
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, commandList->appendMemoryCopyRegion(hostPtr, ®ion, 0, 0, &externalHostAlloc, ®ion, 0, 0, nullptr, 0, nullptr, copyParams));
|
||||
|
||||
if (L0GfxCoreHelper::useImmediateComputeFlushTask(device->getNEODevice()->getRootDeviceEnvironment())) {
|
||||
ImmediateDispatchFlags &recordedImmediateDispatchFlags = ultCsr->recordedImmediateDispatchFlags;
|
||||
EXPECT_TRUE(recordedImmediateDispatchFlags.requireTaskCountUpdate);
|
||||
} else {
|
||||
DispatchFlags &recordedDispatchFlags = ultCsr->recordedDispatchFlags;
|
||||
EXPECT_TRUE(recordedDispatchFlags.guardCommandBufferWithPipeControl);
|
||||
}
|
||||
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, context->freeMem(hostPtr));
|
||||
}
|
||||
|
||||
TEST_F(CommandListCreateTests, whenInvokingAppendMemoryCopyFromContextForImmediateCommandListThenSuccessIsReturned) {
|
||||
const ze_command_queue_desc_t desc = {};
|
||||
ze_result_t returnValue;
|
||||
|
||||
@@ -1208,13 +1208,20 @@ HWTEST2_F(ImmediateCommandListTest, givenCopyEngineAsyncCmdListWhenAppendingCopy
|
||||
auto ultCsr = static_cast<NEO::UltCommandStreamReceiver<FamilyType> *>(whiteBoxCmdList->getCsr(false));
|
||||
ultCsr->recordFlushedBatchBuffer = true;
|
||||
|
||||
size_t src = 0;
|
||||
size_t dst = 0;
|
||||
constexpr size_t transferSize = sizeof(size_t);
|
||||
void *src, *dst;
|
||||
ze_host_mem_alloc_desc_t hostDesc = {};
|
||||
ASSERT_EQ(ZE_RESULT_SUCCESS, context->allocHostMem(&hostDesc, transferSize, 0u, &src));
|
||||
ASSERT_EQ(ZE_RESULT_SUCCESS, context->allocHostMem(&hostDesc, transferSize, 0u, &dst));
|
||||
|
||||
CmdListMemoryCopyParams copyParams = {};
|
||||
returnValue = commandList->appendMemoryCopy(&dst, &src, sizeof(size_t), nullptr, 0, nullptr, copyParams);
|
||||
returnValue = commandList->appendMemoryCopy(dst, src, transferSize, nullptr, 0, nullptr, copyParams);
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, returnValue);
|
||||
|
||||
EXPECT_FALSE(ultCsr->latestFlushedBatchBuffer.dispatchMonitorFence);
|
||||
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, context->freeMem(src));
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, context->freeMem(dst));
|
||||
}
|
||||
|
||||
HWTEST2_F(ImmediateCommandListTest, givenCopyEngineSyncCmdListWhenAppendingCopyOperationThenRequireMonitorFence, IsAtLeastXeHpcCore) {
|
||||
|
||||
@@ -448,8 +448,12 @@ HWTEST_F(AppendMemoryCopyTests, givenAsyncImmediateCommandListWhenAppendingMemor
|
||||
auto cmdQueue = std::make_unique<Mock<CommandQueue>>();
|
||||
cmdQueue->csr = ultCsr;
|
||||
cmdQueue->isCopyOnlyCommandQueue = true;
|
||||
size_t src = 0;
|
||||
size_t dst = 0;
|
||||
|
||||
constexpr size_t transferSize = sizeof(size_t);
|
||||
void *src, *dst;
|
||||
ze_host_mem_alloc_desc_t hostDesc = {};
|
||||
ASSERT_EQ(ZE_RESULT_SUCCESS, context->allocHostMem(&hostDesc, transferSize, 0u, &src));
|
||||
ASSERT_EQ(ZE_RESULT_SUCCESS, context->allocHostMem(&hostDesc, transferSize, 0u, &dst));
|
||||
|
||||
auto commandList = std::make_unique<WhiteBox<L0::CommandListCoreFamilyImmediate<FamilyType::gfxCoreFamily>>>();
|
||||
ASSERT_NE(nullptr, commandList);
|
||||
@@ -477,7 +481,7 @@ HWTEST_F(AppendMemoryCopyTests, givenAsyncImmediateCommandListWhenAppendingMemor
|
||||
expectedSize = alignUp(ultCsr->getCmdsSizeForHardwareContext() + sizeof(typename FamilyType::MI_BATCH_BUFFER_START), MemoryConstants::cacheLineSize);
|
||||
}
|
||||
|
||||
ASSERT_EQ(ZE_RESULT_SUCCESS, commandList->appendMemoryCopy(&dst, &src, sizeof(size_t), nullptr, 0, nullptr, copyParams));
|
||||
ASSERT_EQ(ZE_RESULT_SUCCESS, commandList->appendMemoryCopy(dst, src, transferSize, nullptr, 0, nullptr, copyParams));
|
||||
|
||||
EXPECT_EQ(expectedSize, ultCsr->getCS(0).getUsed() - sizeUsedBefore);
|
||||
|
||||
@@ -528,13 +532,16 @@ HWTEST_F(AppendMemoryCopyTests, givenAsyncImmediateCommandListWhenAppendingMemor
|
||||
size_t csrOfffset = ultCsr->getCS(0).getUsed();
|
||||
size_t cmdListOffset = commandList->commandContainer.getCommandStream()->getUsed();
|
||||
|
||||
ASSERT_EQ(ZE_RESULT_SUCCESS, commandList->appendMemoryCopy(&dst, &src, sizeof(size_t), nullptr, 0, nullptr, copyParams));
|
||||
ASSERT_EQ(ZE_RESULT_SUCCESS, commandList->appendMemoryCopy(dst, src, transferSize, nullptr, 0, nullptr, copyParams));
|
||||
|
||||
EXPECT_EQ(csrOfffset, ultCsr->getCS(0).getUsed());
|
||||
|
||||
EXPECT_FALSE(findTagUpdate(ptrOffset(commandList->commandContainer.getCommandStream()->getCpuBase(), cmdListOffset),
|
||||
commandList->commandContainer.getCommandStream()->getUsed() - cmdListOffset,
|
||||
ultCsr->getTagAllocation()->getGpuAddress()));
|
||||
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, context->freeMem(src));
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, context->freeMem(dst));
|
||||
}
|
||||
|
||||
HWTEST_F(AppendMemoryCopyTests, givenSyncImmediateCommandListWhenAppendingMemoryCopyWithCopyEngineThenProgramCmdStreamWithFlushTask) {
|
||||
|
||||
Reference in New Issue
Block a user