feature: use heapless builtins for copyRegion

Related-To: NEO-15323, NEO-7621

Signed-off-by: Narendra Bagria <narendra.bagria@intel.com>
This commit is contained in:
Narendra Bagria
2025-09-17 07:44:27 +00:00
committed by Compute-Runtime-Automation
parent a5025edc20
commit d9ed61bb19
10 changed files with 156 additions and 26 deletions

View File

@@ -24,8 +24,10 @@ enum class Builtin : uint32_t {
copyBufferBytesStatelessHeapless,
copyBufferRectBytes2d,
copyBufferRectBytes2dStateless,
copyBufferRectBytes2dStatelessHeapless,
copyBufferRectBytes3d,
copyBufferRectBytes3dStateless,
copyBufferRectBytes3dStatelessHeapless,
copyBufferToBufferMiddle,
copyBufferToBufferMiddleStateless,
copyBufferToBufferMiddleStatelessHeapless,
@@ -144,7 +146,9 @@ constexpr Builtin adjustBuiltinType<Builtin::copyBufferBytes>(const bool isState
template <>
constexpr Builtin adjustBuiltinType<Builtin::copyBufferRectBytes2d>(const bool isStateless, const bool isHeapless) {
if (isStateless) {
if (isHeapless) {
return Builtin::copyBufferRectBytes2dStatelessHeapless;
} else if (isStateless) {
return Builtin::copyBufferRectBytes2dStateless;
}
return Builtin::copyBufferRectBytes2d;
@@ -152,7 +156,9 @@ constexpr Builtin adjustBuiltinType<Builtin::copyBufferRectBytes2d>(const bool i
template <>
constexpr Builtin adjustBuiltinType<Builtin::copyBufferRectBytes3d>(const bool isStateless, const bool isHeapless) {
if (isStateless) {
if (isHeapless) {
return Builtin::copyBufferRectBytes3dStatelessHeapless;
} else if (isStateless) {
return Builtin::copyBufferRectBytes3dStateless;
}
return Builtin::copyBufferRectBytes3d;

View File

@@ -53,6 +53,10 @@ void BuiltinFunctionsLibImpl::initBuiltinKernel(Builtin func) {
kernelName = "CopyBufferRectBytes2dStateless";
builtin = NEO::EBuiltInOps::copyBufferRectStateless;
break;
case Builtin::copyBufferRectBytes2dStatelessHeapless:
kernelName = "CopyBufferRectBytes2dStateless";
builtin = NEO::EBuiltInOps::copyBufferRectStatelessHeapless;
break;
case Builtin::copyBufferRectBytes3d:
kernelName = "CopyBufferRectBytes3d";
builtin = NEO::EBuiltInOps::copyBufferRect;
@@ -61,6 +65,10 @@ void BuiltinFunctionsLibImpl::initBuiltinKernel(Builtin func) {
kernelName = "CopyBufferRectBytes3dStateless";
builtin = NEO::EBuiltInOps::copyBufferRectStateless;
break;
case Builtin::copyBufferRectBytes3dStatelessHeapless:
kernelName = "CopyBufferRectBytes3dStateless";
builtin = NEO::EBuiltInOps::copyBufferRectStatelessHeapless;
break;
case Builtin::copyBufferToBufferMiddle:
kernelName = "CopyBufferToBufferMiddleRegion";
builtin = NEO::EBuiltInOps::copyBufferToBuffer;

View File

@@ -287,7 +287,7 @@ struct CommandListCoreFamily : public CommandListImp {
const ze_copy_region_t *srcRegion, uint32_t srcPitch,
size_t srcOffset, Event *signalEvent,
uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents,
bool relaxedOrderingDispatch, const bool isStateless);
bool relaxedOrderingDispatch, const bool isStateless, const bool isHeapless);
MOCKABLE_VIRTUAL ze_result_t appendMemoryCopyKernel3d(AlignedAllocationData *dstAlignedAllocation, AlignedAllocationData *srcAlignedAllocation,
Builtin builtin, const ze_copy_region_t *dstRegion,
@@ -295,7 +295,8 @@ struct CommandListCoreFamily : public CommandListImp {
const ze_copy_region_t *srcRegion, uint32_t srcPitch,
uint32_t srcSlicePitch, size_t srcOffset,
Event *signalEvent, uint32_t numWaitEvents,
ze_event_handle_t *phWaitEvents, bool relaxedOrderingDispatch, const bool isStateless);
ze_event_handle_t *phWaitEvents, bool relaxedOrderingDispatch,
const bool isStateless, const bool isHeapless);
MOCKABLE_VIRTUAL ze_result_t appendBlitFill(void *ptr, const void *pattern,
size_t patternSize, size_t size,

View File

@@ -2128,6 +2128,8 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryCopyRegion(void *d
isStateless = true;
}
const bool isHeapless = this->isHeaplessModeEnabled();
ze_result_t result = ZE_RESULT_SUCCESS;
if (isCopyOnlyEnabled) {
result = appendMemoryCopyBlitRegion(&srcAllocationStruct, &dstAllocationStruct, *srcRegion, *dstRegion,
@@ -2135,19 +2137,19 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryCopyRegion(void *d
srcPitch, srcSlicePitch, dstPitch, dstSlicePitch, srcSize3, dstSize3,
signalEvent, numWaitEvents, phWaitEvents, memoryCopyParams.relaxedOrderingDispatch, isDualStreamCopyOffloadOperation(memoryCopyParams.copyOffloadAllowed));
} else if ((srcRegion->depth > 1) || (srcRegion->originZ != 0) || (dstRegion->originZ != 0)) {
Builtin builtInType = BuiltinTypeHelper::adjustBuiltinType<Builtin::copyBufferRectBytes3d>(isStateless, false);
Builtin builtInType = BuiltinTypeHelper::adjustBuiltinType<Builtin::copyBufferRectBytes3d>(isStateless, isHeapless);
result = this->appendMemoryCopyKernel3d(&dstAllocationStruct, &srcAllocationStruct, builtInType,
dstRegion, dstPitch, dstSlicePitch, dstAllocationStruct.offset,
srcRegion, srcPitch, srcSlicePitch, srcAllocationStruct.offset,
signalEvent, numWaitEvents, phWaitEvents,
memoryCopyParams.relaxedOrderingDispatch, isStateless);
memoryCopyParams.relaxedOrderingDispatch, isStateless, isHeapless);
} else {
Builtin builtInType = BuiltinTypeHelper::adjustBuiltinType<Builtin::copyBufferRectBytes2d>(isStateless, false);
Builtin builtInType = BuiltinTypeHelper::adjustBuiltinType<Builtin::copyBufferRectBytes2d>(isStateless, isHeapless);
result = this->appendMemoryCopyKernel2d(&dstAllocationStruct, &srcAllocationStruct, builtInType,
dstRegion, dstPitch, dstAllocationStruct.offset,
srcRegion, srcPitch, srcAllocationStruct.offset,
signalEvent, numWaitEvents, phWaitEvents,
memoryCopyParams.relaxedOrderingDispatch, isStateless);
memoryCopyParams.relaxedOrderingDispatch, isStateless, isHeapless);
}
if (result) {
@@ -2197,7 +2199,8 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryCopyKernel3d(Align
Event *signalEvent,
uint32_t numWaitEvents,
ze_event_handle_t *phWaitEvents,
bool relaxedOrderingDispatch, const bool isStateless) {
bool relaxedOrderingDispatch,
const bool isStateless, const bool isHeapless) {
auto lock = device->getBuiltinFunctionsLib()->obtainUniqueOwnership();
const auto driverHandle = static_cast<DriverHandleImp *>(device->getDriverHandle());
@@ -2236,7 +2239,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryCopyKernel3d(Align
builtinSetArg(builtinKernel, 0u, srcAlignedAllocation->alignedAllocationPtr, srcAlignedAllocation->alloc);
builtinSetArg(builtinKernel, 1u, dstAlignedAllocation->alignedAllocationPtr, dstAlignedAllocation->alloc);
if (isStateless) {
if (isStateless || isHeapless) {
uint64_t srcOrigin64[3] = {static_cast<uint64_t>(srcRegion->originX) + static_cast<uint64_t>(srcOffset),
static_cast<uint64_t>(srcRegion->originY), static_cast<uint64_t>(srcRegion->originZ)};
uint64_t dstOrigin64[3] = {static_cast<uint64_t>(dstRegion->originX) + static_cast<uint64_t>(dstOffset),
@@ -2290,7 +2293,8 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryCopyKernel2d(Align
Event *signalEvent,
uint32_t numWaitEvents,
ze_event_handle_t *phWaitEvents,
bool relaxedOrderingDispatch, const bool isStateless) {
bool relaxedOrderingDispatch,
const bool isStateless, const bool isHeapless) {
auto lock = device->getBuiltinFunctionsLib()->obtainUniqueOwnership();
const auto driverHandle = static_cast<DriverHandleImp *>(device->getDriverHandle());
@@ -2328,7 +2332,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryCopyKernel2d(Align
builtinSetArg(builtinKernel, 0u, srcAlignedAllocation->alignedAllocationPtr, srcAlignedAllocation->alloc);
builtinSetArg(builtinKernel, 1u, dstAlignedAllocation->alignedAllocationPtr, dstAlignedAllocation->alloc);
if (isStateless) {
if (isHeapless || isStateless) {
uint64_t srcOrigin64[2] = {static_cast<uint64_t>(srcRegion->originX) + static_cast<uint64_t>(srcOffset),
static_cast<uint64_t>(srcRegion->originY)};
uint64_t dstOrigin64[2] = {static_cast<uint64_t>(dstRegion->originX) + static_cast<uint64_t>(dstOffset),

View File

@@ -719,10 +719,10 @@ class MockCommandListCoreFamily : public CommandListCoreFamily<gfxCoreFamily> {
const ze_copy_region_t *srcRegion, uint32_t srcPitch,
size_t srcOffset, L0::Event *signalEvent,
uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents,
bool relaxedOrderingDispatch, const bool isStateless) override {
bool relaxedOrderingDispatch, const bool isStateless, const bool isHeapless) override {
srcAlignedPtr = srcAlignedAllocation->alignedAllocationPtr;
dstAlignedPtr = dstAlignedAllocation->alignedAllocationPtr;
return L0::CommandListCoreFamily<gfxCoreFamily>::appendMemoryCopyKernel2d(dstAlignedAllocation, srcAlignedAllocation, builtin, dstRegion, dstPitch, dstOffset, srcRegion, srcPitch, srcOffset, signalEvent, numWaitEvents, phWaitEvents, relaxedOrderingDispatch, isStateless);
return L0::CommandListCoreFamily<gfxCoreFamily>::appendMemoryCopyKernel2d(dstAlignedAllocation, srcAlignedAllocation, builtin, dstRegion, dstPitch, dstOffset, srcRegion, srcPitch, srcOffset, signalEvent, numWaitEvents, phWaitEvents, relaxedOrderingDispatch, isStateless, isHeapless);
}
ze_result_t appendMemoryCopyKernel3d(AlignedAllocationData *dstAlignedAllocation, AlignedAllocationData *srcAlignedAllocation,
@@ -731,10 +731,11 @@ class MockCommandListCoreFamily : public CommandListCoreFamily<gfxCoreFamily> {
const ze_copy_region_t *srcRegion, uint32_t srcPitch,
uint32_t srcSlicePitch, size_t srcOffset,
L0::Event *signalEvent, uint32_t numWaitEvents,
ze_event_handle_t *phWaitEvents, bool relaxedOrderingDispatch, const bool isStateless) override {
ze_event_handle_t *phWaitEvents, bool relaxedOrderingDispatch,
const bool isStateless, const bool isHeapless) override {
srcAlignedPtr = srcAlignedAllocation->alignedAllocationPtr;
dstAlignedPtr = dstAlignedAllocation->alignedAllocationPtr;
return L0::CommandListCoreFamily<gfxCoreFamily>::appendMemoryCopyKernel3d(dstAlignedAllocation, srcAlignedAllocation, builtin, dstRegion, dstPitch, dstSlicePitch, dstOffset, srcRegion, srcPitch, srcSlicePitch, srcOffset, signalEvent, numWaitEvents, phWaitEvents, relaxedOrderingDispatch, isStateless);
return L0::CommandListCoreFamily<gfxCoreFamily>::appendMemoryCopyKernel3d(dstAlignedAllocation, srcAlignedAllocation, builtin, dstRegion, dstPitch, dstSlicePitch, dstOffset, srcRegion, srcPitch, srcSlicePitch, srcOffset, signalEvent, numWaitEvents, phWaitEvents, relaxedOrderingDispatch, isStateless, isHeapless);
}
ze_result_t appendMemoryCopyBlitRegion(AlignedAllocationData *srcAllocationData,

View File

@@ -251,6 +251,19 @@ HWTEST_F(TestBuiltinFunctionsLibImpl, givenHeaplessImageBuiltinsWhenInitBuiltinK
EXPECT_STREQ("CopyImage3dToImage3d", lib.kernelNamePassed.c_str());
}
HWTEST_F(TestBuiltinFunctionsLibImpl, givenHeaplessBufferRectBuiltinsWhenInitBuiltinKernelThenCorrectArgumentsArePassed) {
MockCheckPassedArgumentsBuiltinFunctionsLibImpl lib(device, device->getNEODevice()->getBuiltIns());
lib.initBuiltinKernel(L0::Builtin::copyBufferRectBytes2dStatelessHeapless);
EXPECT_EQ(NEO::EBuiltInOps::copyBufferRectStatelessHeapless, lib.builtinPassed);
EXPECT_STREQ("CopyBufferRectBytes2dStateless", lib.kernelNamePassed.c_str());
lib.initBuiltinKernel(L0::Builtin::copyBufferRectBytes3dStatelessHeapless);
EXPECT_EQ(NEO::EBuiltInOps::copyBufferRectStatelessHeapless, lib.builtinPassed);
EXPECT_STREQ("CopyBufferRectBytes3dStateless", lib.kernelNamePassed.c_str());
}
HWTEST_F(TestBuiltinFunctionsLibImpl, givenStatelessBufferRectBuiltinsWhenInitBuiltinKernelThenCorrectArgumentsArePassed) {
MockCheckPassedArgumentsBuiltinFunctionsLibImpl lib(device, device->getNEODevice()->getBuiltIns());

View File

@@ -3575,6 +3575,8 @@ TEST(BuiltinTypeHelperTest, givenHeaplessWhenAdjustBuiltinTypeIsCalledThenCorrec
bool isHeapless = true;
EXPECT_EQ(Builtin::copyBufferBytesStatelessHeapless, BuiltinTypeHelper::adjustBuiltinType<Builtin::copyBufferBytes>(isStateless, isHeapless));
EXPECT_EQ(Builtin::copyBufferRectBytes2dStatelessHeapless, BuiltinTypeHelper::adjustBuiltinType<Builtin::copyBufferRectBytes2d>(isStateless, isHeapless));
EXPECT_EQ(Builtin::copyBufferRectBytes3dStatelessHeapless, BuiltinTypeHelper::adjustBuiltinType<Builtin::copyBufferRectBytes3d>(isStateless, isHeapless));
EXPECT_EQ(Builtin::copyBufferToBufferMiddleStatelessHeapless, BuiltinTypeHelper::adjustBuiltinType<Builtin::copyBufferToBufferMiddle>(isStateless, isHeapless));
EXPECT_EQ(Builtin::copyBufferToBufferSideStatelessHeapless, BuiltinTypeHelper::adjustBuiltinType<Builtin::copyBufferToBufferSide>(isStateless, isHeapless));
EXPECT_EQ(Builtin::fillBufferImmediateStatelessHeapless, BuiltinTypeHelper::adjustBuiltinType<Builtin::fillBufferImmediate>(isStateless, isHeapless));
@@ -3583,6 +3585,7 @@ TEST(BuiltinTypeHelperTest, givenHeaplessWhenAdjustBuiltinTypeIsCalledThenCorrec
EXPECT_EQ(Builtin::fillBufferMiddleStatelessHeapless, BuiltinTypeHelper::adjustBuiltinType<Builtin::fillBufferMiddle>(isStateless, isHeapless));
EXPECT_EQ(Builtin::fillBufferRightLeftoverStatelessHeapless, BuiltinTypeHelper::adjustBuiltinType<Builtin::fillBufferRightLeftover>(isStateless, isHeapless));
}
HWTEST2_F(CommandListCreateTests, givenDummyBlitRequiredWhenEncodeMiFlushWithPostSyncThenDummyBlitIsProgrammedPriorToMiFlushAndDummyAllocationIsAddedToResidencyContainer, IsAtLeastXeCore) {
using MI_FLUSH_DW = typename FamilyType::MI_FLUSH_DW;
DebugManagerStateRestore restorer;

View File

@@ -115,7 +115,7 @@ class MockCommandListHw : public WhiteBox<::L0::CommandListCoreFamily<gfxCoreFam
const ze_copy_region_t *srcRegion, uint32_t srcPitch,
size_t srcOffset, L0::Event *signalEvent,
uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents,
bool relaxedOrderingDispatch, const bool isStateless) override {
bool relaxedOrderingDispatch, const bool isStateless, const bool isHeapless) override {
appendMemoryCopyKernel2dCalledTimes++;
return ZE_RESULT_SUCCESS;
}
@@ -126,7 +126,8 @@ class MockCommandListHw : public WhiteBox<::L0::CommandListCoreFamily<gfxCoreFam
const ze_copy_region_t *srcRegion, uint32_t srcPitch,
uint32_t srcSlicePitch, size_t srcOffset,
L0::Event *signalEvent, uint32_t numWaitEvents,
ze_event_handle_t *phWaitEvents, bool relaxedOrderingDispatch, const bool isStateless) override {
ze_event_handle_t *phWaitEvents, bool relaxedOrderingDispatch,
const bool isStateless, const bool isHeapless) override {
appendMemoryCopyKernel3dCalledTimes++;
return ZE_RESULT_SUCCESS;
}

View File

@@ -644,7 +644,71 @@ HWTEST_F(CommandListTest, givenComputeCommandListAnd3dRegionWhenMemoryCopyRegion
context->freeMem(dstBuffer);
}
HWTEST_F(CommandListTest, givenStatelessAnd2dRegionWhenAppendMemoryCopyRegionThenPitchArgumentsAreSetCorrectly) {
HWTEST_F(CommandListTest, givenHeaplessAnd2dRegionWhenAppendMemoryCopyRegionThenOriginAndPitchArgumentsAreSetAs64Bit) {
auto kernel = device->getBuiltinFunctionsLib()->getFunction(Builtin::copyBufferRectBytes2d);
auto mockBuiltinKernel = static_cast<Mock<::L0::KernelImp> *>(kernel);
mockBuiltinKernel->checkPassedArgumentValues = true;
mockBuiltinKernel->passedArgumentValues.clear();
mockBuiltinKernel->passedArgumentValues.resize(6);
auto commandList = std::make_unique<WhiteBox<::L0::CommandListCoreFamily<FamilyType::gfxCoreFamily>>>();
ASSERT_EQ(ZE_RESULT_SUCCESS, commandList->initialize(device, NEO::EngineGroupType::renderCompute, 0u));
commandList->heaplessModeEnabled = true;
commandList->statelessBuiltinsEnabled = false;
void *dstPtr = reinterpret_cast<void *>(0x1234);
void *srcPtr = reinterpret_cast<void *>(0x2345);
ze_copy_region_t dstRegion = {4, 4, 0, 2, 2, 1};
ze_copy_region_t srcRegion = {4, 4, 0, 2, 2, 1};
EXPECT_EQ(ZE_RESULT_SUCCESS, commandList->appendMemoryCopyRegion(dstPtr, &dstRegion, 0, 0, srcPtr, &srcRegion, 0, 0, nullptr, 0, nullptr, copyParams));
EXPECT_TRUE(commandList->usedKernelLaunchParams.isBuiltInKernel);
auto passedArgSizeSrcOrigin = mockBuiltinKernel->passedArgumentValues[2u].size();
auto passedArgSizeDstOrigin = mockBuiltinKernel->passedArgumentValues[3u].size();
auto passedArgSizeSrcPitch = mockBuiltinKernel->passedArgumentValues[4u].size();
auto passedArgSizeDstPitch = mockBuiltinKernel->passedArgumentValues[5u].size();
EXPECT_EQ(sizeof(uint64_t) * 2, passedArgSizeSrcOrigin);
EXPECT_EQ(sizeof(uint64_t) * 2, passedArgSizeDstOrigin);
EXPECT_EQ(sizeof(uint64_t), passedArgSizeSrcPitch);
EXPECT_EQ(sizeof(uint64_t), passedArgSizeDstPitch);
}
HWTEST_F(CommandListTest, givenHeaplessAnd3dRegionWhenAppendMemoryCopyRegionThenOriginAndPitchArgumentsAreSetAs64Bit) {
auto kernel = device->getBuiltinFunctionsLib()->getFunction(Builtin::copyBufferRectBytes3d);
auto mockBuiltinKernel = static_cast<Mock<::L0::KernelImp> *>(kernel);
mockBuiltinKernel->checkPassedArgumentValues = true;
mockBuiltinKernel->passedArgumentValues.clear();
mockBuiltinKernel->passedArgumentValues.resize(6);
auto commandList = std::make_unique<WhiteBox<::L0::CommandListCoreFamily<FamilyType::gfxCoreFamily>>>();
ASSERT_EQ(ZE_RESULT_SUCCESS, commandList->initialize(device, NEO::EngineGroupType::renderCompute, 0u));
commandList->heaplessModeEnabled = true;
commandList->statelessBuiltinsEnabled = false;
void *dstPtr = reinterpret_cast<void *>(0x1234);
void *srcPtr = reinterpret_cast<void *>(0x2345);
ze_copy_region_t dstRegion = {4, 4, 4, 2, 2, 2};
ze_copy_region_t srcRegion = {4, 4, 4, 2, 2, 2};
EXPECT_EQ(ZE_RESULT_SUCCESS, commandList->appendMemoryCopyRegion(dstPtr, &dstRegion, 0, 0, srcPtr, &srcRegion, 0, 0, nullptr, 0, nullptr, copyParams));
EXPECT_TRUE(commandList->usedKernelLaunchParams.isBuiltInKernel);
auto passedArgSizeSrcOrigin = mockBuiltinKernel->passedArgumentValues[2u].size();
auto passedArgSizeDstOrigin = mockBuiltinKernel->passedArgumentValues[3u].size();
auto passedArgSizeSrcPitch = mockBuiltinKernel->passedArgumentValues[4u].size();
auto passedArgSizeDstPitch = mockBuiltinKernel->passedArgumentValues[5u].size();
EXPECT_EQ(sizeof(uint64_t) * 3, passedArgSizeSrcOrigin);
EXPECT_EQ(sizeof(uint64_t) * 3, passedArgSizeDstOrigin);
EXPECT_EQ(sizeof(uint64_t) * 2, passedArgSizeSrcPitch);
EXPECT_EQ(sizeof(uint64_t) * 2, passedArgSizeDstPitch);
}
HWTEST_F(CommandListTest, givenStatelessAnd2dRegionWhenAppendMemoryCopyRegionThenOriginAndPitchArgumentsAreSetCorrectly) {
for (bool isStateless : {false, true}) {
Builtin func = isStateless ? Builtin::copyBufferRectBytes2dStateless : Builtin::copyBufferRectBytes2d;
@@ -672,19 +736,25 @@ HWTEST_F(CommandListTest, givenStatelessAnd2dRegionWhenAppendMemoryCopyRegionThe
EXPECT_TRUE(commandList->usedKernelLaunchParams.isBuiltInKernel);
auto passedArgSizeSrcOrigin = mockBuiltinKernel->passedArgumentValues[2u].size();
auto passedArgSizeDstOrigin = mockBuiltinKernel->passedArgumentValues[3u].size();
auto passedArgSizeSrcPitch = mockBuiltinKernel->passedArgumentValues[4u].size();
auto passedArgSizeDstPitch = mockBuiltinKernel->passedArgumentValues[5u].size();
if (isStateless) {
EXPECT_EQ(sizeof(uint64_t) * 2, passedArgSizeSrcOrigin);
EXPECT_EQ(sizeof(uint64_t) * 2, passedArgSizeDstOrigin);
EXPECT_EQ(sizeof(uint64_t), passedArgSizeSrcPitch);
EXPECT_EQ(sizeof(uint64_t), passedArgSizeDstPitch);
} else {
EXPECT_EQ(sizeof(uint32_t) * 2, passedArgSizeSrcOrigin);
EXPECT_EQ(sizeof(uint32_t) * 2, passedArgSizeDstOrigin);
EXPECT_EQ(sizeof(uint32_t), passedArgSizeSrcPitch);
EXPECT_EQ(sizeof(uint32_t), passedArgSizeDstPitch);
}
}
}
HWTEST_F(CommandListTest, givenStatelessAnd3dRegionWhenAppendMemoryCopyRegionThenPitchArgumentsAreSetCorrectly) {
HWTEST_F(CommandListTest, givenStatelessAnd3dRegionWhenAppendMemoryCopyRegionThenOriginAndPitchArgumentsAreSetCorrectly) {
for (bool isStateless : {false, true}) {
Builtin func = isStateless ? Builtin::copyBufferRectBytes3dStateless : Builtin::copyBufferRectBytes3d;
@@ -713,19 +783,25 @@ HWTEST_F(CommandListTest, givenStatelessAnd3dRegionWhenAppendMemoryCopyRegionThe
EXPECT_TRUE(commandList->usedKernelLaunchParams.isBuiltInKernel);
auto passedArgSizeSrcOrigin = mockBuiltinKernel->passedArgumentValues[2u].size();
auto passedArgSizeDstOrigin = mockBuiltinKernel->passedArgumentValues[3u].size();
auto passedArgSizeSrcPitch = mockBuiltinKernel->passedArgumentValues[4u].size();
auto passedArgSizeDstPitch = mockBuiltinKernel->passedArgumentValues[5u].size();
if (isStateless) {
EXPECT_EQ(sizeof(uint64_t) * 3, passedArgSizeSrcOrigin);
EXPECT_EQ(sizeof(uint64_t) * 3, passedArgSizeDstOrigin);
EXPECT_EQ(sizeof(uint64_t) * 2, passedArgSizeSrcPitch);
EXPECT_EQ(sizeof(uint64_t) * 2, passedArgSizeDstPitch);
} else {
EXPECT_EQ(sizeof(uint32_t) * 3, passedArgSizeSrcOrigin);
EXPECT_EQ(sizeof(uint32_t) * 3, passedArgSizeDstOrigin);
EXPECT_EQ(sizeof(uint32_t) * 2, passedArgSizeSrcPitch);
EXPECT_EQ(sizeof(uint32_t) * 2, passedArgSizeDstPitch);
}
}
}
HWTEST_F(CommandListTest, given4GBOrGreater2dSrcAndDstRegionsWhenAppendMemoryCopyRegionThenPitchArgumentsAreSetAs64Bit) {
HWTEST_F(CommandListTest, given4GBOrGreater2dSrcAndDstRegionsWhenAppendMemoryCopyRegionThenOriginAndPitchArgumentsAreSetAs64Bit) {
auto kernel = device->getBuiltinFunctionsLib()->getFunction(Builtin::copyBufferRectBytes2d);
auto mockBuiltinKernel = static_cast<Mock<::L0::KernelImp> *>(kernel);
mockBuiltinKernel->checkPassedArgumentValues = true;
@@ -750,14 +826,18 @@ HWTEST_F(CommandListTest, given4GBOrGreater2dSrcAndDstRegionsWhenAppendMemoryCop
EXPECT_TRUE(commandList->usedKernelLaunchParams.isBuiltInKernel);
auto passedArgSizeSrcOrigin = mockBuiltinKernel->passedArgumentValues[2u].size();
auto passedArgSizeDstOrigin = mockBuiltinKernel->passedArgumentValues[3u].size();
auto passedArgSizeSrcPitch = mockBuiltinKernel->passedArgumentValues[4u].size();
auto passedArgSizeDstPitch = mockBuiltinKernel->passedArgumentValues[5u].size();
EXPECT_EQ(sizeof(uint64_t) * 2, passedArgSizeSrcOrigin);
EXPECT_EQ(sizeof(uint64_t) * 2, passedArgSizeDstOrigin);
EXPECT_EQ(sizeof(uint64_t), passedArgSizeSrcPitch);
EXPECT_EQ(sizeof(uint64_t), passedArgSizeDstPitch);
}
HWTEST_F(CommandListTest, given4GBOrGreater3dSrcAndDstRegionsWhenAppendMemoryCopyRegionThenPitchArgumentsAreSetAs64Bit) {
HWTEST_F(CommandListTest, given4GBOrGreater3dSrcAndDstRegionsWhenAppendMemoryCopyRegionThenOriginAndPitchArgumentsAreSetAs64Bit) {
auto kernel = device->getBuiltinFunctionsLib()->getFunction(Builtin::copyBufferRectBytes3d);
auto mockBuiltinKernel = static_cast<Mock<::L0::KernelImp> *>(kernel);
mockBuiltinKernel->checkPassedArgumentValues = true;
@@ -783,14 +863,18 @@ HWTEST_F(CommandListTest, given4GBOrGreater3dSrcAndDstRegionsWhenAppendMemoryCop
EXPECT_TRUE(commandList->usedKernelLaunchParams.isBuiltInKernel);
auto passedArgSizeSrcOrigin = mockBuiltinKernel->passedArgumentValues[2u].size();
auto passedArgSizeDstOrigin = mockBuiltinKernel->passedArgumentValues[3u].size();
auto passedArgSizeSrcPitch = mockBuiltinKernel->passedArgumentValues[4u].size();
auto passedArgSizeDstPitch = mockBuiltinKernel->passedArgumentValues[5u].size();
EXPECT_EQ(sizeof(uint64_t) * 3, passedArgSizeSrcOrigin);
EXPECT_EQ(sizeof(uint64_t) * 3, passedArgSizeDstOrigin);
EXPECT_EQ(sizeof(uint64_t) * 2, passedArgSizeSrcPitch);
EXPECT_EQ(sizeof(uint64_t) * 2, passedArgSizeDstPitch);
}
HWTEST_F(CommandListTest, given4GBOrGreater2dDstRegionWhenAppendMemoryCopyRegionThenPitchArgumentsAreSetAs64Bit) {
HWTEST_F(CommandListTest, given4GBOrGreater2dDstRegionWhenAppendMemoryCopyRegionThenOriginAndPitchArgumentsAreSetAs64Bit) {
auto kernel = device->getBuiltinFunctionsLib()->getFunction(Builtin::copyBufferRectBytes2d);
auto mockBuiltinKernel = static_cast<Mock<::L0::KernelImp> *>(kernel);
mockBuiltinKernel->checkPassedArgumentValues = true;
@@ -815,14 +899,18 @@ HWTEST_F(CommandListTest, given4GBOrGreater2dDstRegionWhenAppendMemoryCopyRegion
EXPECT_TRUE(commandList->usedKernelLaunchParams.isBuiltInKernel);
auto passedArgSizeSrcOrigin = mockBuiltinKernel->passedArgumentValues[2u].size();
auto passedArgSizeDstOrigin = mockBuiltinKernel->passedArgumentValues[3u].size();
auto passedArgSizeSrcPitch = mockBuiltinKernel->passedArgumentValues[4u].size();
auto passedArgSizeDstPitch = mockBuiltinKernel->passedArgumentValues[5u].size();
EXPECT_EQ(sizeof(uint64_t) * 2, passedArgSizeSrcOrigin);
EXPECT_EQ(sizeof(uint64_t) * 2, passedArgSizeDstOrigin);
EXPECT_EQ(sizeof(uint64_t), passedArgSizeSrcPitch);
EXPECT_EQ(sizeof(uint64_t), passedArgSizeDstPitch);
}
HWTEST_F(CommandListTest, given4GBOrGreater3dDstRegionWhenAppendMemoryCopyRegionThenPitchArgumentsAreSetAs64Bit) {
HWTEST_F(CommandListTest, given4GBOrGreater3dDstRegionWhenAppendMemoryCopyRegionThenOriginAndPitchArgumentsAreSetAs64Bit) {
auto kernel = device->getBuiltinFunctionsLib()->getFunction(Builtin::copyBufferRectBytes3d);
auto mockBuiltinKernel = static_cast<Mock<::L0::KernelImp> *>(kernel);
mockBuiltinKernel->checkPassedArgumentValues = true;
@@ -848,9 +936,13 @@ HWTEST_F(CommandListTest, given4GBOrGreater3dDstRegionWhenAppendMemoryCopyRegion
EXPECT_TRUE(commandList->usedKernelLaunchParams.isBuiltInKernel);
auto passedArgSizeSrcOrigin = mockBuiltinKernel->passedArgumentValues[2u].size();
auto passedArgSizeDstOrigin = mockBuiltinKernel->passedArgumentValues[3u].size();
auto passedArgSizeSrcPitch = mockBuiltinKernel->passedArgumentValues[4u].size();
auto passedArgSizeDstPitch = mockBuiltinKernel->passedArgumentValues[5u].size();
EXPECT_EQ(sizeof(uint64_t) * 3, passedArgSizeSrcOrigin);
EXPECT_EQ(sizeof(uint64_t) * 3, passedArgSizeDstOrigin);
EXPECT_EQ(sizeof(uint64_t) * 2, passedArgSizeSrcPitch);
EXPECT_EQ(sizeof(uint64_t) * 2, passedArgSizeDstPitch);
}

View File

@@ -157,7 +157,7 @@ class MockCommandListExtensionHw : public WhiteBox<::L0::CommandListCoreFamily<g
const ze_copy_region_t *srcRegion, uint32_t srcPitch,
size_t srcOffset, Event *signalEvent,
uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents,
bool relaxedOrderingDispatch, const bool isStateless) override {
bool relaxedOrderingDispatch, const bool isStateless, const bool isHeapless) override {
appendMemoryCopyKernel2dCalledTimes++;
return ZE_RESULT_SUCCESS;
}
@@ -168,7 +168,8 @@ class MockCommandListExtensionHw : public WhiteBox<::L0::CommandListCoreFamily<g
const ze_copy_region_t *srcRegion, uint32_t srcPitch,
uint32_t srcSlicePitch, size_t srcOffset,
Event *signalEvent, uint32_t numWaitEvents,
ze_event_handle_t *phWaitEvents, bool relaxedOrderingDispatch, const bool isStateless) override {
ze_event_handle_t *phWaitEvents, bool relaxedOrderingDispatch,
const bool isStateless, const bool isHeapless) override {
appendMemoryCopyKernel3dCalledTimes++;
return ZE_RESULT_SUCCESS;
}