mirror of
https://github.com/intel/compute-runtime.git
synced 2025-12-21 09:14:47 +08:00
feature: use heapless builtins for copyRegion
Related-To: NEO-15323, NEO-7621 Signed-off-by: Narendra Bagria <narendra.bagria@intel.com>
This commit is contained in:
committed by
Compute-Runtime-Automation
parent
a5025edc20
commit
d9ed61bb19
@@ -24,8 +24,10 @@ enum class Builtin : uint32_t {
|
||||
copyBufferBytesStatelessHeapless,
|
||||
copyBufferRectBytes2d,
|
||||
copyBufferRectBytes2dStateless,
|
||||
copyBufferRectBytes2dStatelessHeapless,
|
||||
copyBufferRectBytes3d,
|
||||
copyBufferRectBytes3dStateless,
|
||||
copyBufferRectBytes3dStatelessHeapless,
|
||||
copyBufferToBufferMiddle,
|
||||
copyBufferToBufferMiddleStateless,
|
||||
copyBufferToBufferMiddleStatelessHeapless,
|
||||
@@ -144,7 +146,9 @@ constexpr Builtin adjustBuiltinType<Builtin::copyBufferBytes>(const bool isState
|
||||
|
||||
template <>
|
||||
constexpr Builtin adjustBuiltinType<Builtin::copyBufferRectBytes2d>(const bool isStateless, const bool isHeapless) {
|
||||
if (isStateless) {
|
||||
if (isHeapless) {
|
||||
return Builtin::copyBufferRectBytes2dStatelessHeapless;
|
||||
} else if (isStateless) {
|
||||
return Builtin::copyBufferRectBytes2dStateless;
|
||||
}
|
||||
return Builtin::copyBufferRectBytes2d;
|
||||
@@ -152,7 +156,9 @@ constexpr Builtin adjustBuiltinType<Builtin::copyBufferRectBytes2d>(const bool i
|
||||
|
||||
template <>
|
||||
constexpr Builtin adjustBuiltinType<Builtin::copyBufferRectBytes3d>(const bool isStateless, const bool isHeapless) {
|
||||
if (isStateless) {
|
||||
if (isHeapless) {
|
||||
return Builtin::copyBufferRectBytes3dStatelessHeapless;
|
||||
} else if (isStateless) {
|
||||
return Builtin::copyBufferRectBytes3dStateless;
|
||||
}
|
||||
return Builtin::copyBufferRectBytes3d;
|
||||
|
||||
@@ -53,6 +53,10 @@ void BuiltinFunctionsLibImpl::initBuiltinKernel(Builtin func) {
|
||||
kernelName = "CopyBufferRectBytes2dStateless";
|
||||
builtin = NEO::EBuiltInOps::copyBufferRectStateless;
|
||||
break;
|
||||
case Builtin::copyBufferRectBytes2dStatelessHeapless:
|
||||
kernelName = "CopyBufferRectBytes2dStateless";
|
||||
builtin = NEO::EBuiltInOps::copyBufferRectStatelessHeapless;
|
||||
break;
|
||||
case Builtin::copyBufferRectBytes3d:
|
||||
kernelName = "CopyBufferRectBytes3d";
|
||||
builtin = NEO::EBuiltInOps::copyBufferRect;
|
||||
@@ -61,6 +65,10 @@ void BuiltinFunctionsLibImpl::initBuiltinKernel(Builtin func) {
|
||||
kernelName = "CopyBufferRectBytes3dStateless";
|
||||
builtin = NEO::EBuiltInOps::copyBufferRectStateless;
|
||||
break;
|
||||
case Builtin::copyBufferRectBytes3dStatelessHeapless:
|
||||
kernelName = "CopyBufferRectBytes3dStateless";
|
||||
builtin = NEO::EBuiltInOps::copyBufferRectStatelessHeapless;
|
||||
break;
|
||||
case Builtin::copyBufferToBufferMiddle:
|
||||
kernelName = "CopyBufferToBufferMiddleRegion";
|
||||
builtin = NEO::EBuiltInOps::copyBufferToBuffer;
|
||||
|
||||
@@ -287,7 +287,7 @@ struct CommandListCoreFamily : public CommandListImp {
|
||||
const ze_copy_region_t *srcRegion, uint32_t srcPitch,
|
||||
size_t srcOffset, Event *signalEvent,
|
||||
uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents,
|
||||
bool relaxedOrderingDispatch, const bool isStateless);
|
||||
bool relaxedOrderingDispatch, const bool isStateless, const bool isHeapless);
|
||||
|
||||
MOCKABLE_VIRTUAL ze_result_t appendMemoryCopyKernel3d(AlignedAllocationData *dstAlignedAllocation, AlignedAllocationData *srcAlignedAllocation,
|
||||
Builtin builtin, const ze_copy_region_t *dstRegion,
|
||||
@@ -295,7 +295,8 @@ struct CommandListCoreFamily : public CommandListImp {
|
||||
const ze_copy_region_t *srcRegion, uint32_t srcPitch,
|
||||
uint32_t srcSlicePitch, size_t srcOffset,
|
||||
Event *signalEvent, uint32_t numWaitEvents,
|
||||
ze_event_handle_t *phWaitEvents, bool relaxedOrderingDispatch, const bool isStateless);
|
||||
ze_event_handle_t *phWaitEvents, bool relaxedOrderingDispatch,
|
||||
const bool isStateless, const bool isHeapless);
|
||||
|
||||
MOCKABLE_VIRTUAL ze_result_t appendBlitFill(void *ptr, const void *pattern,
|
||||
size_t patternSize, size_t size,
|
||||
|
||||
@@ -2128,6 +2128,8 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryCopyRegion(void *d
|
||||
isStateless = true;
|
||||
}
|
||||
|
||||
const bool isHeapless = this->isHeaplessModeEnabled();
|
||||
|
||||
ze_result_t result = ZE_RESULT_SUCCESS;
|
||||
if (isCopyOnlyEnabled) {
|
||||
result = appendMemoryCopyBlitRegion(&srcAllocationStruct, &dstAllocationStruct, *srcRegion, *dstRegion,
|
||||
@@ -2135,19 +2137,19 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryCopyRegion(void *d
|
||||
srcPitch, srcSlicePitch, dstPitch, dstSlicePitch, srcSize3, dstSize3,
|
||||
signalEvent, numWaitEvents, phWaitEvents, memoryCopyParams.relaxedOrderingDispatch, isDualStreamCopyOffloadOperation(memoryCopyParams.copyOffloadAllowed));
|
||||
} else if ((srcRegion->depth > 1) || (srcRegion->originZ != 0) || (dstRegion->originZ != 0)) {
|
||||
Builtin builtInType = BuiltinTypeHelper::adjustBuiltinType<Builtin::copyBufferRectBytes3d>(isStateless, false);
|
||||
Builtin builtInType = BuiltinTypeHelper::adjustBuiltinType<Builtin::copyBufferRectBytes3d>(isStateless, isHeapless);
|
||||
result = this->appendMemoryCopyKernel3d(&dstAllocationStruct, &srcAllocationStruct, builtInType,
|
||||
dstRegion, dstPitch, dstSlicePitch, dstAllocationStruct.offset,
|
||||
srcRegion, srcPitch, srcSlicePitch, srcAllocationStruct.offset,
|
||||
signalEvent, numWaitEvents, phWaitEvents,
|
||||
memoryCopyParams.relaxedOrderingDispatch, isStateless);
|
||||
memoryCopyParams.relaxedOrderingDispatch, isStateless, isHeapless);
|
||||
} else {
|
||||
Builtin builtInType = BuiltinTypeHelper::adjustBuiltinType<Builtin::copyBufferRectBytes2d>(isStateless, false);
|
||||
Builtin builtInType = BuiltinTypeHelper::adjustBuiltinType<Builtin::copyBufferRectBytes2d>(isStateless, isHeapless);
|
||||
result = this->appendMemoryCopyKernel2d(&dstAllocationStruct, &srcAllocationStruct, builtInType,
|
||||
dstRegion, dstPitch, dstAllocationStruct.offset,
|
||||
srcRegion, srcPitch, srcAllocationStruct.offset,
|
||||
signalEvent, numWaitEvents, phWaitEvents,
|
||||
memoryCopyParams.relaxedOrderingDispatch, isStateless);
|
||||
memoryCopyParams.relaxedOrderingDispatch, isStateless, isHeapless);
|
||||
}
|
||||
|
||||
if (result) {
|
||||
@@ -2197,7 +2199,8 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryCopyKernel3d(Align
|
||||
Event *signalEvent,
|
||||
uint32_t numWaitEvents,
|
||||
ze_event_handle_t *phWaitEvents,
|
||||
bool relaxedOrderingDispatch, const bool isStateless) {
|
||||
bool relaxedOrderingDispatch,
|
||||
const bool isStateless, const bool isHeapless) {
|
||||
|
||||
auto lock = device->getBuiltinFunctionsLib()->obtainUniqueOwnership();
|
||||
const auto driverHandle = static_cast<DriverHandleImp *>(device->getDriverHandle());
|
||||
@@ -2236,7 +2239,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryCopyKernel3d(Align
|
||||
builtinSetArg(builtinKernel, 0u, srcAlignedAllocation->alignedAllocationPtr, srcAlignedAllocation->alloc);
|
||||
builtinSetArg(builtinKernel, 1u, dstAlignedAllocation->alignedAllocationPtr, dstAlignedAllocation->alloc);
|
||||
|
||||
if (isStateless) {
|
||||
if (isStateless || isHeapless) {
|
||||
uint64_t srcOrigin64[3] = {static_cast<uint64_t>(srcRegion->originX) + static_cast<uint64_t>(srcOffset),
|
||||
static_cast<uint64_t>(srcRegion->originY), static_cast<uint64_t>(srcRegion->originZ)};
|
||||
uint64_t dstOrigin64[3] = {static_cast<uint64_t>(dstRegion->originX) + static_cast<uint64_t>(dstOffset),
|
||||
@@ -2290,7 +2293,8 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryCopyKernel2d(Align
|
||||
Event *signalEvent,
|
||||
uint32_t numWaitEvents,
|
||||
ze_event_handle_t *phWaitEvents,
|
||||
bool relaxedOrderingDispatch, const bool isStateless) {
|
||||
bool relaxedOrderingDispatch,
|
||||
const bool isStateless, const bool isHeapless) {
|
||||
|
||||
auto lock = device->getBuiltinFunctionsLib()->obtainUniqueOwnership();
|
||||
const auto driverHandle = static_cast<DriverHandleImp *>(device->getDriverHandle());
|
||||
@@ -2328,7 +2332,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryCopyKernel2d(Align
|
||||
builtinSetArg(builtinKernel, 0u, srcAlignedAllocation->alignedAllocationPtr, srcAlignedAllocation->alloc);
|
||||
builtinSetArg(builtinKernel, 1u, dstAlignedAllocation->alignedAllocationPtr, dstAlignedAllocation->alloc);
|
||||
|
||||
if (isStateless) {
|
||||
if (isHeapless || isStateless) {
|
||||
uint64_t srcOrigin64[2] = {static_cast<uint64_t>(srcRegion->originX) + static_cast<uint64_t>(srcOffset),
|
||||
static_cast<uint64_t>(srcRegion->originY)};
|
||||
uint64_t dstOrigin64[2] = {static_cast<uint64_t>(dstRegion->originX) + static_cast<uint64_t>(dstOffset),
|
||||
|
||||
@@ -719,10 +719,10 @@ class MockCommandListCoreFamily : public CommandListCoreFamily<gfxCoreFamily> {
|
||||
const ze_copy_region_t *srcRegion, uint32_t srcPitch,
|
||||
size_t srcOffset, L0::Event *signalEvent,
|
||||
uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents,
|
||||
bool relaxedOrderingDispatch, const bool isStateless) override {
|
||||
bool relaxedOrderingDispatch, const bool isStateless, const bool isHeapless) override {
|
||||
srcAlignedPtr = srcAlignedAllocation->alignedAllocationPtr;
|
||||
dstAlignedPtr = dstAlignedAllocation->alignedAllocationPtr;
|
||||
return L0::CommandListCoreFamily<gfxCoreFamily>::appendMemoryCopyKernel2d(dstAlignedAllocation, srcAlignedAllocation, builtin, dstRegion, dstPitch, dstOffset, srcRegion, srcPitch, srcOffset, signalEvent, numWaitEvents, phWaitEvents, relaxedOrderingDispatch, isStateless);
|
||||
return L0::CommandListCoreFamily<gfxCoreFamily>::appendMemoryCopyKernel2d(dstAlignedAllocation, srcAlignedAllocation, builtin, dstRegion, dstPitch, dstOffset, srcRegion, srcPitch, srcOffset, signalEvent, numWaitEvents, phWaitEvents, relaxedOrderingDispatch, isStateless, isHeapless);
|
||||
}
|
||||
|
||||
ze_result_t appendMemoryCopyKernel3d(AlignedAllocationData *dstAlignedAllocation, AlignedAllocationData *srcAlignedAllocation,
|
||||
@@ -731,10 +731,11 @@ class MockCommandListCoreFamily : public CommandListCoreFamily<gfxCoreFamily> {
|
||||
const ze_copy_region_t *srcRegion, uint32_t srcPitch,
|
||||
uint32_t srcSlicePitch, size_t srcOffset,
|
||||
L0::Event *signalEvent, uint32_t numWaitEvents,
|
||||
ze_event_handle_t *phWaitEvents, bool relaxedOrderingDispatch, const bool isStateless) override {
|
||||
ze_event_handle_t *phWaitEvents, bool relaxedOrderingDispatch,
|
||||
const bool isStateless, const bool isHeapless) override {
|
||||
srcAlignedPtr = srcAlignedAllocation->alignedAllocationPtr;
|
||||
dstAlignedPtr = dstAlignedAllocation->alignedAllocationPtr;
|
||||
return L0::CommandListCoreFamily<gfxCoreFamily>::appendMemoryCopyKernel3d(dstAlignedAllocation, srcAlignedAllocation, builtin, dstRegion, dstPitch, dstSlicePitch, dstOffset, srcRegion, srcPitch, srcSlicePitch, srcOffset, signalEvent, numWaitEvents, phWaitEvents, relaxedOrderingDispatch, isStateless);
|
||||
return L0::CommandListCoreFamily<gfxCoreFamily>::appendMemoryCopyKernel3d(dstAlignedAllocation, srcAlignedAllocation, builtin, dstRegion, dstPitch, dstSlicePitch, dstOffset, srcRegion, srcPitch, srcSlicePitch, srcOffset, signalEvent, numWaitEvents, phWaitEvents, relaxedOrderingDispatch, isStateless, isHeapless);
|
||||
}
|
||||
|
||||
ze_result_t appendMemoryCopyBlitRegion(AlignedAllocationData *srcAllocationData,
|
||||
|
||||
@@ -251,6 +251,19 @@ HWTEST_F(TestBuiltinFunctionsLibImpl, givenHeaplessImageBuiltinsWhenInitBuiltinK
|
||||
EXPECT_STREQ("CopyImage3dToImage3d", lib.kernelNamePassed.c_str());
|
||||
}
|
||||
|
||||
HWTEST_F(TestBuiltinFunctionsLibImpl, givenHeaplessBufferRectBuiltinsWhenInitBuiltinKernelThenCorrectArgumentsArePassed) {
|
||||
|
||||
MockCheckPassedArgumentsBuiltinFunctionsLibImpl lib(device, device->getNEODevice()->getBuiltIns());
|
||||
|
||||
lib.initBuiltinKernel(L0::Builtin::copyBufferRectBytes2dStatelessHeapless);
|
||||
EXPECT_EQ(NEO::EBuiltInOps::copyBufferRectStatelessHeapless, lib.builtinPassed);
|
||||
EXPECT_STREQ("CopyBufferRectBytes2dStateless", lib.kernelNamePassed.c_str());
|
||||
|
||||
lib.initBuiltinKernel(L0::Builtin::copyBufferRectBytes3dStatelessHeapless);
|
||||
EXPECT_EQ(NEO::EBuiltInOps::copyBufferRectStatelessHeapless, lib.builtinPassed);
|
||||
EXPECT_STREQ("CopyBufferRectBytes3dStateless", lib.kernelNamePassed.c_str());
|
||||
}
|
||||
|
||||
HWTEST_F(TestBuiltinFunctionsLibImpl, givenStatelessBufferRectBuiltinsWhenInitBuiltinKernelThenCorrectArgumentsArePassed) {
|
||||
|
||||
MockCheckPassedArgumentsBuiltinFunctionsLibImpl lib(device, device->getNEODevice()->getBuiltIns());
|
||||
|
||||
@@ -3575,6 +3575,8 @@ TEST(BuiltinTypeHelperTest, givenHeaplessWhenAdjustBuiltinTypeIsCalledThenCorrec
|
||||
bool isHeapless = true;
|
||||
|
||||
EXPECT_EQ(Builtin::copyBufferBytesStatelessHeapless, BuiltinTypeHelper::adjustBuiltinType<Builtin::copyBufferBytes>(isStateless, isHeapless));
|
||||
EXPECT_EQ(Builtin::copyBufferRectBytes2dStatelessHeapless, BuiltinTypeHelper::adjustBuiltinType<Builtin::copyBufferRectBytes2d>(isStateless, isHeapless));
|
||||
EXPECT_EQ(Builtin::copyBufferRectBytes3dStatelessHeapless, BuiltinTypeHelper::adjustBuiltinType<Builtin::copyBufferRectBytes3d>(isStateless, isHeapless));
|
||||
EXPECT_EQ(Builtin::copyBufferToBufferMiddleStatelessHeapless, BuiltinTypeHelper::adjustBuiltinType<Builtin::copyBufferToBufferMiddle>(isStateless, isHeapless));
|
||||
EXPECT_EQ(Builtin::copyBufferToBufferSideStatelessHeapless, BuiltinTypeHelper::adjustBuiltinType<Builtin::copyBufferToBufferSide>(isStateless, isHeapless));
|
||||
EXPECT_EQ(Builtin::fillBufferImmediateStatelessHeapless, BuiltinTypeHelper::adjustBuiltinType<Builtin::fillBufferImmediate>(isStateless, isHeapless));
|
||||
@@ -3583,6 +3585,7 @@ TEST(BuiltinTypeHelperTest, givenHeaplessWhenAdjustBuiltinTypeIsCalledThenCorrec
|
||||
EXPECT_EQ(Builtin::fillBufferMiddleStatelessHeapless, BuiltinTypeHelper::adjustBuiltinType<Builtin::fillBufferMiddle>(isStateless, isHeapless));
|
||||
EXPECT_EQ(Builtin::fillBufferRightLeftoverStatelessHeapless, BuiltinTypeHelper::adjustBuiltinType<Builtin::fillBufferRightLeftover>(isStateless, isHeapless));
|
||||
}
|
||||
|
||||
HWTEST2_F(CommandListCreateTests, givenDummyBlitRequiredWhenEncodeMiFlushWithPostSyncThenDummyBlitIsProgrammedPriorToMiFlushAndDummyAllocationIsAddedToResidencyContainer, IsAtLeastXeCore) {
|
||||
using MI_FLUSH_DW = typename FamilyType::MI_FLUSH_DW;
|
||||
DebugManagerStateRestore restorer;
|
||||
|
||||
@@ -115,7 +115,7 @@ class MockCommandListHw : public WhiteBox<::L0::CommandListCoreFamily<gfxCoreFam
|
||||
const ze_copy_region_t *srcRegion, uint32_t srcPitch,
|
||||
size_t srcOffset, L0::Event *signalEvent,
|
||||
uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents,
|
||||
bool relaxedOrderingDispatch, const bool isStateless) override {
|
||||
bool relaxedOrderingDispatch, const bool isStateless, const bool isHeapless) override {
|
||||
appendMemoryCopyKernel2dCalledTimes++;
|
||||
return ZE_RESULT_SUCCESS;
|
||||
}
|
||||
@@ -126,7 +126,8 @@ class MockCommandListHw : public WhiteBox<::L0::CommandListCoreFamily<gfxCoreFam
|
||||
const ze_copy_region_t *srcRegion, uint32_t srcPitch,
|
||||
uint32_t srcSlicePitch, size_t srcOffset,
|
||||
L0::Event *signalEvent, uint32_t numWaitEvents,
|
||||
ze_event_handle_t *phWaitEvents, bool relaxedOrderingDispatch, const bool isStateless) override {
|
||||
ze_event_handle_t *phWaitEvents, bool relaxedOrderingDispatch,
|
||||
const bool isStateless, const bool isHeapless) override {
|
||||
appendMemoryCopyKernel3dCalledTimes++;
|
||||
return ZE_RESULT_SUCCESS;
|
||||
}
|
||||
|
||||
@@ -644,7 +644,71 @@ HWTEST_F(CommandListTest, givenComputeCommandListAnd3dRegionWhenMemoryCopyRegion
|
||||
context->freeMem(dstBuffer);
|
||||
}
|
||||
|
||||
HWTEST_F(CommandListTest, givenStatelessAnd2dRegionWhenAppendMemoryCopyRegionThenPitchArgumentsAreSetCorrectly) {
|
||||
HWTEST_F(CommandListTest, givenHeaplessAnd2dRegionWhenAppendMemoryCopyRegionThenOriginAndPitchArgumentsAreSetAs64Bit) {
|
||||
auto kernel = device->getBuiltinFunctionsLib()->getFunction(Builtin::copyBufferRectBytes2d);
|
||||
auto mockBuiltinKernel = static_cast<Mock<::L0::KernelImp> *>(kernel);
|
||||
mockBuiltinKernel->checkPassedArgumentValues = true;
|
||||
mockBuiltinKernel->passedArgumentValues.clear();
|
||||
mockBuiltinKernel->passedArgumentValues.resize(6);
|
||||
|
||||
auto commandList = std::make_unique<WhiteBox<::L0::CommandListCoreFamily<FamilyType::gfxCoreFamily>>>();
|
||||
ASSERT_EQ(ZE_RESULT_SUCCESS, commandList->initialize(device, NEO::EngineGroupType::renderCompute, 0u));
|
||||
commandList->heaplessModeEnabled = true;
|
||||
commandList->statelessBuiltinsEnabled = false;
|
||||
|
||||
void *dstPtr = reinterpret_cast<void *>(0x1234);
|
||||
void *srcPtr = reinterpret_cast<void *>(0x2345);
|
||||
ze_copy_region_t dstRegion = {4, 4, 0, 2, 2, 1};
|
||||
ze_copy_region_t srcRegion = {4, 4, 0, 2, 2, 1};
|
||||
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, commandList->appendMemoryCopyRegion(dstPtr, &dstRegion, 0, 0, srcPtr, &srcRegion, 0, 0, nullptr, 0, nullptr, copyParams));
|
||||
|
||||
EXPECT_TRUE(commandList->usedKernelLaunchParams.isBuiltInKernel);
|
||||
|
||||
auto passedArgSizeSrcOrigin = mockBuiltinKernel->passedArgumentValues[2u].size();
|
||||
auto passedArgSizeDstOrigin = mockBuiltinKernel->passedArgumentValues[3u].size();
|
||||
auto passedArgSizeSrcPitch = mockBuiltinKernel->passedArgumentValues[4u].size();
|
||||
auto passedArgSizeDstPitch = mockBuiltinKernel->passedArgumentValues[5u].size();
|
||||
|
||||
EXPECT_EQ(sizeof(uint64_t) * 2, passedArgSizeSrcOrigin);
|
||||
EXPECT_EQ(sizeof(uint64_t) * 2, passedArgSizeDstOrigin);
|
||||
EXPECT_EQ(sizeof(uint64_t), passedArgSizeSrcPitch);
|
||||
EXPECT_EQ(sizeof(uint64_t), passedArgSizeDstPitch);
|
||||
}
|
||||
|
||||
HWTEST_F(CommandListTest, givenHeaplessAnd3dRegionWhenAppendMemoryCopyRegionThenOriginAndPitchArgumentsAreSetAs64Bit) {
|
||||
auto kernel = device->getBuiltinFunctionsLib()->getFunction(Builtin::copyBufferRectBytes3d);
|
||||
auto mockBuiltinKernel = static_cast<Mock<::L0::KernelImp> *>(kernel);
|
||||
mockBuiltinKernel->checkPassedArgumentValues = true;
|
||||
mockBuiltinKernel->passedArgumentValues.clear();
|
||||
mockBuiltinKernel->passedArgumentValues.resize(6);
|
||||
|
||||
auto commandList = std::make_unique<WhiteBox<::L0::CommandListCoreFamily<FamilyType::gfxCoreFamily>>>();
|
||||
ASSERT_EQ(ZE_RESULT_SUCCESS, commandList->initialize(device, NEO::EngineGroupType::renderCompute, 0u));
|
||||
commandList->heaplessModeEnabled = true;
|
||||
commandList->statelessBuiltinsEnabled = false;
|
||||
|
||||
void *dstPtr = reinterpret_cast<void *>(0x1234);
|
||||
void *srcPtr = reinterpret_cast<void *>(0x2345);
|
||||
ze_copy_region_t dstRegion = {4, 4, 4, 2, 2, 2};
|
||||
ze_copy_region_t srcRegion = {4, 4, 4, 2, 2, 2};
|
||||
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, commandList->appendMemoryCopyRegion(dstPtr, &dstRegion, 0, 0, srcPtr, &srcRegion, 0, 0, nullptr, 0, nullptr, copyParams));
|
||||
|
||||
EXPECT_TRUE(commandList->usedKernelLaunchParams.isBuiltInKernel);
|
||||
|
||||
auto passedArgSizeSrcOrigin = mockBuiltinKernel->passedArgumentValues[2u].size();
|
||||
auto passedArgSizeDstOrigin = mockBuiltinKernel->passedArgumentValues[3u].size();
|
||||
auto passedArgSizeSrcPitch = mockBuiltinKernel->passedArgumentValues[4u].size();
|
||||
auto passedArgSizeDstPitch = mockBuiltinKernel->passedArgumentValues[5u].size();
|
||||
|
||||
EXPECT_EQ(sizeof(uint64_t) * 3, passedArgSizeSrcOrigin);
|
||||
EXPECT_EQ(sizeof(uint64_t) * 3, passedArgSizeDstOrigin);
|
||||
EXPECT_EQ(sizeof(uint64_t) * 2, passedArgSizeSrcPitch);
|
||||
EXPECT_EQ(sizeof(uint64_t) * 2, passedArgSizeDstPitch);
|
||||
}
|
||||
|
||||
HWTEST_F(CommandListTest, givenStatelessAnd2dRegionWhenAppendMemoryCopyRegionThenOriginAndPitchArgumentsAreSetCorrectly) {
|
||||
for (bool isStateless : {false, true}) {
|
||||
Builtin func = isStateless ? Builtin::copyBufferRectBytes2dStateless : Builtin::copyBufferRectBytes2d;
|
||||
|
||||
@@ -672,19 +736,25 @@ HWTEST_F(CommandListTest, givenStatelessAnd2dRegionWhenAppendMemoryCopyRegionThe
|
||||
|
||||
EXPECT_TRUE(commandList->usedKernelLaunchParams.isBuiltInKernel);
|
||||
|
||||
auto passedArgSizeSrcOrigin = mockBuiltinKernel->passedArgumentValues[2u].size();
|
||||
auto passedArgSizeDstOrigin = mockBuiltinKernel->passedArgumentValues[3u].size();
|
||||
auto passedArgSizeSrcPitch = mockBuiltinKernel->passedArgumentValues[4u].size();
|
||||
auto passedArgSizeDstPitch = mockBuiltinKernel->passedArgumentValues[5u].size();
|
||||
if (isStateless) {
|
||||
EXPECT_EQ(sizeof(uint64_t) * 2, passedArgSizeSrcOrigin);
|
||||
EXPECT_EQ(sizeof(uint64_t) * 2, passedArgSizeDstOrigin);
|
||||
EXPECT_EQ(sizeof(uint64_t), passedArgSizeSrcPitch);
|
||||
EXPECT_EQ(sizeof(uint64_t), passedArgSizeDstPitch);
|
||||
} else {
|
||||
EXPECT_EQ(sizeof(uint32_t) * 2, passedArgSizeSrcOrigin);
|
||||
EXPECT_EQ(sizeof(uint32_t) * 2, passedArgSizeDstOrigin);
|
||||
EXPECT_EQ(sizeof(uint32_t), passedArgSizeSrcPitch);
|
||||
EXPECT_EQ(sizeof(uint32_t), passedArgSizeDstPitch);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
HWTEST_F(CommandListTest, givenStatelessAnd3dRegionWhenAppendMemoryCopyRegionThenPitchArgumentsAreSetCorrectly) {
|
||||
HWTEST_F(CommandListTest, givenStatelessAnd3dRegionWhenAppendMemoryCopyRegionThenOriginAndPitchArgumentsAreSetCorrectly) {
|
||||
for (bool isStateless : {false, true}) {
|
||||
Builtin func = isStateless ? Builtin::copyBufferRectBytes3dStateless : Builtin::copyBufferRectBytes3d;
|
||||
|
||||
@@ -713,19 +783,25 @@ HWTEST_F(CommandListTest, givenStatelessAnd3dRegionWhenAppendMemoryCopyRegionThe
|
||||
|
||||
EXPECT_TRUE(commandList->usedKernelLaunchParams.isBuiltInKernel);
|
||||
|
||||
auto passedArgSizeSrcOrigin = mockBuiltinKernel->passedArgumentValues[2u].size();
|
||||
auto passedArgSizeDstOrigin = mockBuiltinKernel->passedArgumentValues[3u].size();
|
||||
auto passedArgSizeSrcPitch = mockBuiltinKernel->passedArgumentValues[4u].size();
|
||||
auto passedArgSizeDstPitch = mockBuiltinKernel->passedArgumentValues[5u].size();
|
||||
if (isStateless) {
|
||||
EXPECT_EQ(sizeof(uint64_t) * 3, passedArgSizeSrcOrigin);
|
||||
EXPECT_EQ(sizeof(uint64_t) * 3, passedArgSizeDstOrigin);
|
||||
EXPECT_EQ(sizeof(uint64_t) * 2, passedArgSizeSrcPitch);
|
||||
EXPECT_EQ(sizeof(uint64_t) * 2, passedArgSizeDstPitch);
|
||||
} else {
|
||||
EXPECT_EQ(sizeof(uint32_t) * 3, passedArgSizeSrcOrigin);
|
||||
EXPECT_EQ(sizeof(uint32_t) * 3, passedArgSizeDstOrigin);
|
||||
EXPECT_EQ(sizeof(uint32_t) * 2, passedArgSizeSrcPitch);
|
||||
EXPECT_EQ(sizeof(uint32_t) * 2, passedArgSizeDstPitch);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
HWTEST_F(CommandListTest, given4GBOrGreater2dSrcAndDstRegionsWhenAppendMemoryCopyRegionThenPitchArgumentsAreSetAs64Bit) {
|
||||
HWTEST_F(CommandListTest, given4GBOrGreater2dSrcAndDstRegionsWhenAppendMemoryCopyRegionThenOriginAndPitchArgumentsAreSetAs64Bit) {
|
||||
auto kernel = device->getBuiltinFunctionsLib()->getFunction(Builtin::copyBufferRectBytes2d);
|
||||
auto mockBuiltinKernel = static_cast<Mock<::L0::KernelImp> *>(kernel);
|
||||
mockBuiltinKernel->checkPassedArgumentValues = true;
|
||||
@@ -750,14 +826,18 @@ HWTEST_F(CommandListTest, given4GBOrGreater2dSrcAndDstRegionsWhenAppendMemoryCop
|
||||
|
||||
EXPECT_TRUE(commandList->usedKernelLaunchParams.isBuiltInKernel);
|
||||
|
||||
auto passedArgSizeSrcOrigin = mockBuiltinKernel->passedArgumentValues[2u].size();
|
||||
auto passedArgSizeDstOrigin = mockBuiltinKernel->passedArgumentValues[3u].size();
|
||||
auto passedArgSizeSrcPitch = mockBuiltinKernel->passedArgumentValues[4u].size();
|
||||
auto passedArgSizeDstPitch = mockBuiltinKernel->passedArgumentValues[5u].size();
|
||||
|
||||
EXPECT_EQ(sizeof(uint64_t) * 2, passedArgSizeSrcOrigin);
|
||||
EXPECT_EQ(sizeof(uint64_t) * 2, passedArgSizeDstOrigin);
|
||||
EXPECT_EQ(sizeof(uint64_t), passedArgSizeSrcPitch);
|
||||
EXPECT_EQ(sizeof(uint64_t), passedArgSizeDstPitch);
|
||||
}
|
||||
|
||||
HWTEST_F(CommandListTest, given4GBOrGreater3dSrcAndDstRegionsWhenAppendMemoryCopyRegionThenPitchArgumentsAreSetAs64Bit) {
|
||||
HWTEST_F(CommandListTest, given4GBOrGreater3dSrcAndDstRegionsWhenAppendMemoryCopyRegionThenOriginAndPitchArgumentsAreSetAs64Bit) {
|
||||
auto kernel = device->getBuiltinFunctionsLib()->getFunction(Builtin::copyBufferRectBytes3d);
|
||||
auto mockBuiltinKernel = static_cast<Mock<::L0::KernelImp> *>(kernel);
|
||||
mockBuiltinKernel->checkPassedArgumentValues = true;
|
||||
@@ -783,14 +863,18 @@ HWTEST_F(CommandListTest, given4GBOrGreater3dSrcAndDstRegionsWhenAppendMemoryCop
|
||||
|
||||
EXPECT_TRUE(commandList->usedKernelLaunchParams.isBuiltInKernel);
|
||||
|
||||
auto passedArgSizeSrcOrigin = mockBuiltinKernel->passedArgumentValues[2u].size();
|
||||
auto passedArgSizeDstOrigin = mockBuiltinKernel->passedArgumentValues[3u].size();
|
||||
auto passedArgSizeSrcPitch = mockBuiltinKernel->passedArgumentValues[4u].size();
|
||||
auto passedArgSizeDstPitch = mockBuiltinKernel->passedArgumentValues[5u].size();
|
||||
|
||||
EXPECT_EQ(sizeof(uint64_t) * 3, passedArgSizeSrcOrigin);
|
||||
EXPECT_EQ(sizeof(uint64_t) * 3, passedArgSizeDstOrigin);
|
||||
EXPECT_EQ(sizeof(uint64_t) * 2, passedArgSizeSrcPitch);
|
||||
EXPECT_EQ(sizeof(uint64_t) * 2, passedArgSizeDstPitch);
|
||||
}
|
||||
|
||||
HWTEST_F(CommandListTest, given4GBOrGreater2dDstRegionWhenAppendMemoryCopyRegionThenPitchArgumentsAreSetAs64Bit) {
|
||||
HWTEST_F(CommandListTest, given4GBOrGreater2dDstRegionWhenAppendMemoryCopyRegionThenOriginAndPitchArgumentsAreSetAs64Bit) {
|
||||
auto kernel = device->getBuiltinFunctionsLib()->getFunction(Builtin::copyBufferRectBytes2d);
|
||||
auto mockBuiltinKernel = static_cast<Mock<::L0::KernelImp> *>(kernel);
|
||||
mockBuiltinKernel->checkPassedArgumentValues = true;
|
||||
@@ -815,14 +899,18 @@ HWTEST_F(CommandListTest, given4GBOrGreater2dDstRegionWhenAppendMemoryCopyRegion
|
||||
|
||||
EXPECT_TRUE(commandList->usedKernelLaunchParams.isBuiltInKernel);
|
||||
|
||||
auto passedArgSizeSrcOrigin = mockBuiltinKernel->passedArgumentValues[2u].size();
|
||||
auto passedArgSizeDstOrigin = mockBuiltinKernel->passedArgumentValues[3u].size();
|
||||
auto passedArgSizeSrcPitch = mockBuiltinKernel->passedArgumentValues[4u].size();
|
||||
auto passedArgSizeDstPitch = mockBuiltinKernel->passedArgumentValues[5u].size();
|
||||
|
||||
EXPECT_EQ(sizeof(uint64_t) * 2, passedArgSizeSrcOrigin);
|
||||
EXPECT_EQ(sizeof(uint64_t) * 2, passedArgSizeDstOrigin);
|
||||
EXPECT_EQ(sizeof(uint64_t), passedArgSizeSrcPitch);
|
||||
EXPECT_EQ(sizeof(uint64_t), passedArgSizeDstPitch);
|
||||
}
|
||||
|
||||
HWTEST_F(CommandListTest, given4GBOrGreater3dDstRegionWhenAppendMemoryCopyRegionThenPitchArgumentsAreSetAs64Bit) {
|
||||
HWTEST_F(CommandListTest, given4GBOrGreater3dDstRegionWhenAppendMemoryCopyRegionThenOriginAndPitchArgumentsAreSetAs64Bit) {
|
||||
auto kernel = device->getBuiltinFunctionsLib()->getFunction(Builtin::copyBufferRectBytes3d);
|
||||
auto mockBuiltinKernel = static_cast<Mock<::L0::KernelImp> *>(kernel);
|
||||
mockBuiltinKernel->checkPassedArgumentValues = true;
|
||||
@@ -848,9 +936,13 @@ HWTEST_F(CommandListTest, given4GBOrGreater3dDstRegionWhenAppendMemoryCopyRegion
|
||||
|
||||
EXPECT_TRUE(commandList->usedKernelLaunchParams.isBuiltInKernel);
|
||||
|
||||
auto passedArgSizeSrcOrigin = mockBuiltinKernel->passedArgumentValues[2u].size();
|
||||
auto passedArgSizeDstOrigin = mockBuiltinKernel->passedArgumentValues[3u].size();
|
||||
auto passedArgSizeSrcPitch = mockBuiltinKernel->passedArgumentValues[4u].size();
|
||||
auto passedArgSizeDstPitch = mockBuiltinKernel->passedArgumentValues[5u].size();
|
||||
|
||||
EXPECT_EQ(sizeof(uint64_t) * 3, passedArgSizeSrcOrigin);
|
||||
EXPECT_EQ(sizeof(uint64_t) * 3, passedArgSizeDstOrigin);
|
||||
EXPECT_EQ(sizeof(uint64_t) * 2, passedArgSizeSrcPitch);
|
||||
EXPECT_EQ(sizeof(uint64_t) * 2, passedArgSizeDstPitch);
|
||||
}
|
||||
|
||||
@@ -157,7 +157,7 @@ class MockCommandListExtensionHw : public WhiteBox<::L0::CommandListCoreFamily<g
|
||||
const ze_copy_region_t *srcRegion, uint32_t srcPitch,
|
||||
size_t srcOffset, Event *signalEvent,
|
||||
uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents,
|
||||
bool relaxedOrderingDispatch, const bool isStateless) override {
|
||||
bool relaxedOrderingDispatch, const bool isStateless, const bool isHeapless) override {
|
||||
appendMemoryCopyKernel2dCalledTimes++;
|
||||
return ZE_RESULT_SUCCESS;
|
||||
}
|
||||
@@ -168,7 +168,8 @@ class MockCommandListExtensionHw : public WhiteBox<::L0::CommandListCoreFamily<g
|
||||
const ze_copy_region_t *srcRegion, uint32_t srcPitch,
|
||||
uint32_t srcSlicePitch, size_t srcOffset,
|
||||
Event *signalEvent, uint32_t numWaitEvents,
|
||||
ze_event_handle_t *phWaitEvents, bool relaxedOrderingDispatch, const bool isStateless) override {
|
||||
ze_event_handle_t *phWaitEvents, bool relaxedOrderingDispatch,
|
||||
const bool isStateless, const bool isHeapless) override {
|
||||
appendMemoryCopyKernel3dCalledTimes++;
|
||||
return ZE_RESULT_SUCCESS;
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user