Fix offseted ptr in bliter memory copy

Change-Id: I90c818a3ead16070beb70c91a05622b1d9d14881
Signed-off-by: Maciej Plewka <maciej.plewka@intel.com>
This commit is contained in:
Maciej Plewka
2020-07-10 15:00:48 +02:00
committed by sys_ocldev
parent 2912cebbd4
commit 3afeb00d3a
3 changed files with 62 additions and 15 deletions

View File

@@ -130,10 +130,12 @@ struct CommandListCoreFamily : CommandListImp {
uint64_t srcOffset, uint32_t size,
uint32_t elementSize, Builtin builtin);
MOCKABLE_VIRTUAL ze_result_t appendMemoryCopyBlit(NEO::GraphicsAllocation *dstPtrAlloc,
uint64_t dstOffset,
MOCKABLE_VIRTUAL ze_result_t appendMemoryCopyBlit(uintptr_t dstPtr,
NEO::GraphicsAllocation *dstPtrAlloc,
uint64_t dstOffset, uintptr_t srcPtr,
NEO::GraphicsAllocation *srcPtrAlloc,
uint64_t srcOffset, uint32_t size,
uint64_t srcOffset,
uint32_t size,
ze_event_handle_t hSignalEvent);
MOCKABLE_VIRTUAL ze_result_t appendMemoryCopyBlitRegion(NEO::GraphicsAllocation *srcAlloc,

View File

@@ -631,12 +631,15 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryCopyKernelWithGA(v
}
template <GFXCORE_FAMILY gfxCoreFamily>
ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryCopyBlit(NEO::GraphicsAllocation *dstPtrAlloc,
uint64_t dstOffset,
ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryCopyBlit(uintptr_t dstPtr,
NEO::GraphicsAllocation *dstPtrAlloc,
uint64_t dstOffset, uintptr_t srcPtr,
NEO::GraphicsAllocation *srcPtrAlloc,
uint64_t srcOffset,
uint32_t size,
ze_event_handle_t hSignalEvent) {
dstOffset += ptrDiff<uintptr_t>(dstPtr, dstPtrAlloc->getGpuAddress());
srcOffset += ptrDiff<uintptr_t>(srcPtr, srcPtrAlloc->getGpuAddress());
using GfxFamily = typename NEO::GfxFamilyMapper<gfxCoreFamily>::GfxFamily;
auto blitProperties = NEO::BlitProperties::constructPropertiesForCopyBuffer(dstPtrAlloc, srcPtrAlloc, {dstOffset, 0, 0}, {srcOffset, 0, 0}, {size, 0, 0}, 0, 0, 0, 0);
commandContainer.addToResidencyContainer(dstPtrAlloc);
@@ -782,9 +785,10 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryCopy(void *dstptr,
ze_result_t ret = ZE_RESULT_SUCCESS;
appendEventForProfiling(hSignalEvent, true);
if (ret == ZE_RESULT_SUCCESS && leftSize) {
ret = isCopyOnlyCmdList ? appendMemoryCopyBlit(dstAllocationStruct.alloc, dstAllocationStruct.offset,
ret = isCopyOnlyCmdList ? appendMemoryCopyBlit(dstAllocationStruct.alignedAllocationPtr,
dstAllocationStruct.alloc, dstAllocationStruct.offset,
srcAllocationStruct.alignedAllocationPtr,
srcAllocationStruct.alloc, srcAllocationStruct.offset, static_cast<uint32_t>(leftSize), hSignalEvent)
: appendMemoryCopyKernelWithGA(reinterpret_cast<void *>(&dstAllocationStruct.alignedAllocationPtr),
dstAllocationStruct.alloc, dstAllocationStruct.offset,
@@ -795,7 +799,9 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryCopy(void *dstptr,
}
if (ret == ZE_RESULT_SUCCESS && middleSizeBytes) {
ret = isCopyOnlyCmdList ? appendMemoryCopyBlit(dstAllocationStruct.alloc, leftSize + dstAllocationStruct.offset,
ret = isCopyOnlyCmdList ? appendMemoryCopyBlit(dstAllocationStruct.alignedAllocationPtr,
dstAllocationStruct.alloc, leftSize + dstAllocationStruct.offset,
srcAllocationStruct.alignedAllocationPtr,
srcAllocationStruct.alloc, leftSize + srcAllocationStruct.offset, static_cast<uint32_t>(middleSizeBytes), hSignalEvent)
: appendMemoryCopyKernelWithGA(reinterpret_cast<void *>(&dstAllocationStruct.alignedAllocationPtr),
dstAllocationStruct.alloc, leftSize + dstAllocationStruct.offset,
@@ -807,7 +813,9 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryCopy(void *dstptr,
}
if (ret == ZE_RESULT_SUCCESS && rightSize) {
ret = isCopyOnlyCmdList ? appendMemoryCopyBlit(dstAllocationStruct.alloc, leftSize + middleSizeBytes + dstAllocationStruct.offset,
ret = isCopyOnlyCmdList ? appendMemoryCopyBlit(dstAllocationStruct.alignedAllocationPtr,
dstAllocationStruct.alloc, leftSize + middleSizeBytes + dstAllocationStruct.offset,
srcAllocationStruct.alignedAllocationPtr,
srcAllocationStruct.alloc, leftSize + middleSizeBytes + srcAllocationStruct.offset, static_cast<uint32_t>(rightSize), hSignalEvent)
: appendMemoryCopyKernelWithGA(reinterpret_cast<void *>(&dstAllocationStruct.alignedAllocationPtr),
dstAllocationStruct.alloc, leftSize + middleSizeBytes + dstAllocationStruct.offset,

View File

@@ -265,10 +265,13 @@ class MockCommandList : public WhiteBox<::L0::CommandListCoreFamily<gfxCoreFamil
appendMemoryCopyKernelWithGACalledTimes++;
return ZE_RESULT_SUCCESS;
}
ze_result_t appendMemoryCopyBlit(NEO::GraphicsAllocation *dstPtrAlloc,
uint64_t dstOffset,
ze_result_t appendMemoryCopyBlit(uintptr_t dstPtr,
NEO::GraphicsAllocation *dstPtrAlloc,
uint64_t dstOffset, uintptr_t srcPtr,
NEO::GraphicsAllocation *srcPtrAlloc,
uint64_t srcOffset, uint32_t size, ze_event_handle_t hSignalEvent) override {
uint64_t srcOffset,
uint32_t size,
ze_event_handle_t hSignalEvent) override {
appendMemoryCopyBlitCalledTimes++;
return ZE_RESULT_SUCCESS;
}
@@ -686,10 +689,13 @@ class MockCommandListForMemFill : public WhiteBox<::L0::CommandListCoreFamily<gf
AlignedAllocationData getAlignedAllocation(L0::Device *device, const void *buffer, uint64_t bufferSize) override {
return {0, 0, nullptr, true};
}
ze_result_t appendMemoryCopyBlit(NEO::GraphicsAllocation *dstPtrAlloc,
uint64_t dstOffset,
ze_result_t appendMemoryCopyBlit(uintptr_t dstPtr,
NEO::GraphicsAllocation *dstPtrAlloc,
uint64_t dstOffset, uintptr_t srcPtr,
NEO::GraphicsAllocation *srcPtrAlloc,
uint64_t srcOffset, uint32_t size, ze_event_handle_t hSignalEvent) override {
uint64_t srcOffset,
uint32_t size,
ze_event_handle_t hSignalEvent) override {
appendMemoryCopyBlitCalledTimes++;
return ZE_RESULT_SUCCESS;
}
@@ -1554,5 +1560,36 @@ HWTEST2_F(CommandListCreate, givenPitchAndSlicePitchWhenMemoryCopyRegionCalledSi
EXPECT_EQ(cmdList.srcSize.x, pitch);
EXPECT_EQ(cmdList.srcSize.y, slicePitch / pitch);
}
HWTEST2_F(AppendMemoryCopy, givenCopyOnlyCommandListWhenWithDcFlushAddedIsNotAddedAfterBlitCopy, Platforms) {
using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL;
using GfxFamily = typename NEO::GfxFamilyMapper<gfxCoreFamily>::GfxFamily;
using XY_COPY_BLT = typename GfxFamily::XY_COPY_BLT;
auto commandList = std::make_unique<WhiteBox<::L0::CommandListCoreFamily<gfxCoreFamily>>>();
commandList->initialize(device, true);
uintptr_t srcPtr = 0x5001;
uintptr_t dstPtr = 0x7001;
uint64_t srcOffset = 0x101;
uint64_t dstOffset = 0x201;
uint32_t copySize = 0x301;
NEO::MockGraphicsAllocation mockAllocationSrc(0, NEO::GraphicsAllocation::AllocationType::INTERNAL_HOST_MEMORY,
reinterpret_cast<void *>(srcPtr), 0x1000, 0, sizeof(uint32_t),
MemoryPool::System4KBPages);
NEO::MockGraphicsAllocation mockAllocationDst(0, NEO::GraphicsAllocation::AllocationType::INTERNAL_HOST_MEMORY,
reinterpret_cast<void *>(dstPtr), 0x1000, 0, sizeof(uint32_t),
MemoryPool::System4KBPages);
commandList->appendMemoryCopyBlit(ptrOffset(dstPtr, dstOffset), &mockAllocationDst, 0, ptrOffset(srcPtr, srcOffset), &mockAllocationSrc, 0, copySize, nullptr);
auto &commandContainer = commandList->commandContainer;
GenCmdList genCmdList;
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(
genCmdList, ptrOffset(commandContainer.getCommandStream()->getCpuBase(), 0), commandContainer.getCommandStream()->getUsed()));
auto itor = find<XY_COPY_BLT *>(genCmdList.begin(), genCmdList.end());
ASSERT_NE(genCmdList.end(), itor);
auto cmd = genCmdCast<XY_COPY_BLT *>(*itor);
EXPECT_EQ(cmd->getDestinationBaseAddress(), ptrOffset(dstPtr, dstOffset));
EXPECT_EQ(cmd->getSourceBaseAddress(), ptrOffset(srcPtr, srcOffset));
}
} // namespace ult
} // namespace L0