Add option to compact event L3 flush packet

Related-To: NEO-7434

Signed-off-by: Zbigniew Zdanowicz <zbigniew.zdanowicz@intel.com>
This commit is contained in:
Zbigniew Zdanowicz
2022-10-27 11:40:44 +00:00
committed by Compute-Runtime-Automation
parent 709e322a4a
commit 6a6ab80113
13 changed files with 1063 additions and 30 deletions

View File

@@ -287,10 +287,18 @@ struct CommandListCoreFamily : CommandListImp {
size_t dstSize,
CmdListFillKernelArguments &outArguments,
Kernel *kernel);
bool compactL3FlushEvent(bool dcFlush) const {
return this->compactL3FlushEventPacket && dcFlush;
}
bool eventSignalPipeControl(bool splitKernel, bool dcFlush) const {
return (this->pipeControlMultiKernelEventSync && splitKernel) ||
compactL3FlushEvent(dcFlush);
}
size_t cmdListCurrentStartOffset = 0;
bool containsAnyKernel = false;
bool pipeControlMultiKernelEventSync = false;
bool compactL3FlushEventPacket = false;
};
template <PRODUCT_FAMILY gfxProductFamily>

View File

@@ -138,6 +138,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::initialize(Device *device, NEO
this->frontEndStateTracking = L0HwHelper::enableFrontEndStateTracking(hwInfo);
this->pipelineSelectStateTracking = L0HwHelper::enablePipelineSelectStateTracking(hwInfo);
this->pipeControlMultiKernelEventSync = L0HwHelper::usePipeControlMultiKernelEventSync(hwInfo);
this->compactL3FlushEventPacket = L0HwHelper::useCompactL3FlushEventPacket(hwInfo);
if (device->isImplicitScalingCapable() && !this->internalUsage && !isCopyOnly()) {
this->partitionCount = static_cast<uint32_t>(this->device->getNEODevice()->getDeviceBitfield().count());
@@ -1187,11 +1188,12 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryCopy(void *dstptr,
}
CmdListKernelLaunchParams launchParams = {};
bool dcFlush = false;
Event *signalEvent = nullptr;
if (hSignalEvent) {
signalEvent = Event::fromHandle(hSignalEvent);
launchParams.isHostSignalScopeEvent = !!(signalEvent->signalScope & ZE_EVENT_SCOPE_FLAG_HOST);
dcFlush = getDcFlushRequired(!!signalEvent->signalScope);
}
uint32_t kernelCounter = leftSize > 0 ? 1 : 0;
@@ -1199,7 +1201,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryCopy(void *dstptr,
kernelCounter += rightSize > 0 ? 1 : 0;
launchParams.isKernelSplitOperation = kernelCounter > 1;
bool singlePipeControlPacket = this->pipeControlMultiKernelEventSync && launchParams.isKernelSplitOperation;
bool singlePipeControlPacket = eventSignalPipeControl(launchParams.isKernelSplitOperation, dcFlush);
appendEventForProfilingAllWalkers(signalEvent, true, singlePipeControlPacket);
@@ -1551,9 +1553,11 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryFill(void *ptr,
CmdListKernelLaunchParams launchParams = {};
Event *signalEvent = nullptr;
bool dcFlush = false;
if (hSignalEvent) {
signalEvent = Event::fromHandle(hSignalEvent);
launchParams.isHostSignalScopeEvent = !!(signalEvent->signalScope & ZE_EVENT_SCOPE_FLAG_HOST);
dcFlush = getDcFlushRequired(!!signalEvent->signalScope);
}
if (isCopyOnly()) {
@@ -1610,7 +1614,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryFill(void *ptr,
setupFillKernelArguments(dstAllocation.offset, patternSize, size, fillArguments, builtinKernel);
launchParams.isKernelSplitOperation = (fillArguments.leftRemainingBytes > 0 || fillArguments.rightRemainingBytes > 0);
bool singlePipeControlPacket = this->pipeControlMultiKernelEventSync && launchParams.isKernelSplitOperation;
bool singlePipeControlPacket = eventSignalPipeControl(launchParams.isKernelSplitOperation, dcFlush);
appendEventForProfilingAllWalkers(signalEvent, true, singlePipeControlPacket);

View File

@@ -163,20 +163,26 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernelWithParams(K
threadGroupDimensions->groupCountY,
threadGroupDimensions->groupCountZ);
}
NEO::GraphicsAllocation *eventAlloc = nullptr;
uint64_t eventAddress = 0;
bool isTimestampEvent = false;
bool l3FlushEnable = false;
bool isHostSignalScopeEvent = launchParams.isHostSignalScopeEvent;
Event *compactEvent = nullptr;
if (event) {
eventAlloc = &event->getAllocation(this->device);
commandContainer.addToResidencyContainer(eventAlloc);
bool flushRequired = !!event->signalScope &&
!launchParams.isKernelSplitOperation;
l3FlushEnable = getDcFlushRequired(flushRequired);
isTimestampEvent = event->isUsingContextEndOffset();
eventAddress = event->getPacketAddress(this->device);
isHostSignalScopeEvent = !!(event->signalScope & ZE_EVENT_SCOPE_FLAG_HOST);
if (compactL3FlushEvent(getDcFlushRequired(!!event->signalScope))) {
compactEvent = event;
event = nullptr;
} else {
NEO::GraphicsAllocation *eventAlloc = &event->getAllocation(this->device);
commandContainer.addToResidencyContainer(eventAlloc);
bool flushRequired = !!event->signalScope &&
!launchParams.isKernelSplitOperation;
l3FlushEnable = getDcFlushRequired(flushRequired);
isTimestampEvent = event->isUsingContextEndOffset();
eventAddress = event->getPacketAddress(this->device);
}
}
bool isKernelUsingSystemAllocation = false;
@@ -249,6 +255,10 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernelWithParams(K
std::list<void *> additionalCommands;
if (compactEvent) {
appendEventForProfilingAllWalkers(compactEvent, true, true);
}
NEO::EncodeDispatchKernelArgs dispatchKernelArgs{
eventAddress, // eventAddress
neoDevice, // device
@@ -273,7 +283,9 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernelWithParams(K
NEO::EncodeDispatchKernel<GfxFamily>::encode(commandContainer, dispatchKernelArgs, getLogicalStateHelper());
this->containsStatelessUncachedResource = dispatchKernelArgs.requiresUncachedMocs;
if (event) {
if (compactEvent) {
appendEventForProfilingAllWalkers(compactEvent, false, true);
} else if (event) {
if (partitionCount > 1) {
event->setPacketsInUse(partitionCount);
}
@@ -404,7 +416,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernelSplit(Kernel
Event *event,
const CmdListKernelLaunchParams &launchParams) {
if (event) {
if (this->pipeControlMultiKernelEventSync && launchParams.isKernelSplitOperation) {
if (eventSignalPipeControl(launchParams.isKernelSplitOperation, getDcFlushRequired(!!event->signalScope))) {
event = nullptr;
} else {
event->increaseKernelCount();

View File

@@ -53,4 +53,11 @@ bool L0HwHelper::usePipeControlMultiKernelEventSync(const NEO::HardwareInfo &hwI
return false;
}
bool L0HwHelper::useCompactL3FlushEventPacket(const NEO::HardwareInfo &hwInfo) {
if (NEO::DebugManager.flags.CompactL3FlushEventPacket.get() != -1) {
return !!NEO::DebugManager.flags.CompactL3FlushEventPacket.get();
}
return false;
}
} // namespace L0

View File

@@ -35,6 +35,7 @@ class L0HwHelper {
static bool enableStateComputeModeTracking(const NEO::HardwareInfo &hwInfo);
static bool enableImmediateCmdListHeapSharing(const NEO::HardwareInfo &hwInfo, bool cmdlistSupport);
static bool usePipeControlMultiKernelEventSync(const NEO::HardwareInfo &hwInfo);
static bool useCompactL3FlushEventPacket(const NEO::HardwareInfo &hwInfo);
virtual void setAdditionalGroupProperty(ze_command_queue_group_properties_t &groupProperty, NEO::EngineGroupT &group) const = 0;
virtual L0::Event *createEvent(L0::EventPool *eventPool, const ze_event_desc_t *desc, L0::Device *device) const = 0;

View File

@@ -188,7 +188,9 @@ struct TestExpectedValues {
uint32_t expectedKernelCount = 0;
uint32_t expectedWalkerPostSyncOp = 0;
uint32_t expectedPostSyncPipeControls = 0;
uint32_t expectDcFlush = 0;
bool postSyncAddressZero = false;
bool workloadPartition = false;
};
} // namespace ult

View File

@@ -47,6 +47,7 @@ struct WhiteBox<::L0::CommandListCoreFamily<gfxCoreFamily>>
using BaseClass::commandListPerThreadScratchSize;
using BaseClass::commandListPreemptionMode;
using BaseClass::commandsToPatch;
using BaseClass::compactL3FlushEventPacket;
using BaseClass::containsAnyKernel;
using BaseClass::containsCooperativeKernelsFlag;
using BaseClass::csr;
@@ -123,6 +124,7 @@ struct WhiteBox<L0::CommandListCoreFamilyImmediate<gfxCoreFamily>>
using BaseClass::clearCommandsToPatch;
using BaseClass::cmdQImmediate;
using BaseClass::commandsToPatch;
using BaseClass::compactL3FlushEventPacket;
using BaseClass::csr;
using BaseClass::finalStreamState;
using BaseClass::frontEndStateTracking;
@@ -142,6 +144,7 @@ struct WhiteBox<L0::CommandListCoreFamilyImmediate<gfxCoreFamily>>
template <GFXCORE_FAMILY gfxCoreFamily>
struct MockCommandListImmediate : public CommandListCoreFamilyImmediate<gfxCoreFamily> {
using BaseClass = CommandListCoreFamilyImmediate<gfxCoreFamily>;
using BaseClass::compactL3FlushEventPacket;
using BaseClass::containsAnyKernel;
using BaseClass::immediateCmdListHeapSharing;
using BaseClass::indirectAllocationsAllowed;

View File

@@ -28,12 +28,15 @@ struct CopyTestInput {
ze_event_pool_flags_t eventPoolFlags = 0;
int32_t usePipeControlMultiPacketEventSync;
bool useFirstEventPacketAddress = false;
};
template <int32_t usePipeControlMultiPacketEventSync, uint32_t multiTile>
template <int32_t usePipeControlMultiPacketEventSync, int32_t compactL3FlushEventPacket, uint32_t multiTile>
struct AppendMemoryCopyMultiPacketEventFixture : public DeviceFixture {
void setUp() {
DebugManager.flags.UsePipeControlMultiKernelEventSync.set(usePipeControlMultiPacketEventSync);
DebugManager.flags.CompactL3FlushEventPacket.set(compactL3FlushEventPacket);
if (multiTile == 1) {
DebugManager.flags.CreateMultipleSubDevices.set(2);
DebugManager.flags.EnableImplicitScaling.set(1);
@@ -170,8 +173,8 @@ void testSingleTileAppendMemoryCopyThreeKernelsAndL3Flush(CopyTestInput &input,
EXPECT_EQ(static_cast<OPERATION>(arg.expectedWalkerPostSyncOp), walkerCmd->getPostSync().getOperation());
EXPECT_EQ(thirdKernelEventAddress, walkerCmd->getPostSync().getDestinationAddress());
uint64_t l3FlushPostSyncAddress = thirdKernelEventAddress + event->getSinglePacketSize();
if (input.usePipeControlMultiPacketEventSync == 1) {
uint64_t l3FlushPostSyncAddress = event->getGpuAddress(input.device) + 2 * event->getSinglePacketSize() + event->getSinglePacketSize();
if (input.usePipeControlMultiPacketEventSync == 1 || input.useFirstEventPacketAddress) {
l3FlushPostSyncAddress = event->getGpuAddress(input.device);
}
if (event->isUsingContextEndOffset()) {
@@ -291,7 +294,10 @@ void testSingleTileAppendMemoryCopySingleKernelAndL3Flush(CopyTestInput &input,
EXPECT_EQ(static_cast<OPERATION>(arg.expectedWalkerPostSyncOp), walkerCmd->getPostSync().getOperation());
EXPECT_EQ(firstKernelEventAddress, walkerCmd->getPostSync().getDestinationAddress());
uint64_t l3FlushPostSyncAddress = firstKernelEventAddress + event->getSinglePacketSize();
uint64_t l3FlushPostSyncAddress = event->getGpuAddress(input.device) + event->getSinglePacketSize();
if (input.useFirstEventPacketAddress) {
l3FlushPostSyncAddress = event->getGpuAddress(input.device);
}
if (event->isUsingContextEndOffset()) {
l3FlushPostSyncAddress += event->getContextEndOffset();
}
@@ -496,7 +502,7 @@ void testMultiTileAppendMemoryCopyThreeKernelsAndL3Flush(CopyTestInput &input, T
EXPECT_EQ(thirdKernelEventAddress, walkerCmd->getPostSync().getDestinationAddress());
uint64_t l3FlushPostSyncAddress = thirdKernelEventAddress + 2 * event->getSinglePacketSize();
if (input.usePipeControlMultiPacketEventSync == 1) {
if (input.usePipeControlMultiPacketEventSync == 1 || input.useFirstEventPacketAddress) {
l3FlushPostSyncAddress = event->getGpuAddress(input.device);
}
if (event->isUsingContextEndOffset()) {
@@ -627,7 +633,12 @@ void testMultiTileAppendMemoryCopySingleKernelAndL3Flush(CopyTestInput &input, T
EXPECT_EQ(static_cast<OPERATION>(arg.expectedWalkerPostSyncOp), walkerCmd->getPostSync().getOperation());
EXPECT_EQ(firstKernelEventAddress, walkerCmd->getPostSync().getDestinationAddress());
uint64_t l3FlushPostSyncAddress = firstKernelEventAddress + 2 * event->getSinglePacketSize();
uint64_t l3FlushPostSyncAddress = 0;
if (input.useFirstEventPacketAddress) {
l3FlushPostSyncAddress = event->getGpuAddress(input.device);
} else {
l3FlushPostSyncAddress = event->getGpuAddress(input.device) + 2 * event->getSinglePacketSize();
}
if (event->isUsingContextEndOffset()) {
l3FlushPostSyncAddress += event->getContextEndOffset();
}
@@ -655,7 +666,7 @@ void testMultiTileAppendMemoryCopySingleKernelAndL3Flush(CopyTestInput &input, T
EXPECT_EQ(expectedDcFlush, dcFlushFound);
}
using AppendMemoryCopyXeHpAndLaterMultiPacket = Test<AppendMemoryCopyMultiPacketEventFixture<0, 0>>;
using AppendMemoryCopyXeHpAndLaterMultiPacket = Test<AppendMemoryCopyMultiPacketEventFixture<0, 0, 0>>;
HWTEST2_F(AppendMemoryCopyXeHpAndLaterMultiPacket,
givenCommandListWhenTimestampProvidedByComputeWalkerPostSyncPassedToMemoryCopyThenAppendProfilingCalledForThreeSeparateKernels,
@@ -768,7 +779,7 @@ HWTEST2_F(AppendMemoryCopyXeHpAndLaterMultiPacket,
testSingleTileAppendMemoryCopySignalScopeEventToSubDevice<gfxCoreFamily>(input, arg);
}
using AppendMemoryCopyXeHpAndLaterSinglePacket = Test<AppendMemoryCopyMultiPacketEventFixture<1, 0>>;
using AppendMemoryCopyXeHpAndLaterSinglePacket = Test<AppendMemoryCopyMultiPacketEventFixture<1, 0, 0>>;
HWTEST2_F(AppendMemoryCopyXeHpAndLaterSinglePacket,
givenCommandListWhenTimestampProvidedByRegisterPostSyncPassedToMemoryCopyThenAppendProfilingCalledForRegisterOnly,
@@ -881,7 +892,7 @@ HWTEST2_F(AppendMemoryCopyXeHpAndLaterSinglePacket,
testSingleTileAppendMemoryCopySignalScopeEventToSubDevice<gfxCoreFamily>(input, arg);
}
using MultiTileAppendMemoryCopyXeHpAndLaterMultiPacket = Test<AppendMemoryCopyMultiPacketEventFixture<0, 1>>;
using MultiTileAppendMemoryCopyXeHpAndLaterMultiPacket = Test<AppendMemoryCopyMultiPacketEventFixture<0, 0, 1>>;
HWTEST2_F(MultiTileAppendMemoryCopyXeHpAndLaterMultiPacket,
givenMultiTileCommandListWhenTimestampProvidedByComputeWalkerPostSyncPassedToMemoryCopyThenAppendProfilingCalledForThreeSeparateMultiTileKernels,
@@ -985,7 +996,7 @@ HWTEST2_F(MultiTileAppendMemoryCopyXeHpAndLaterMultiPacket,
testMultiTileAppendMemoryCopySingleKernelAndL3Flush<gfxCoreFamily>(input, arg);
}
using MultiTileAppendMemoryCopyXeHpAndLaterSinglePacket = Test<AppendMemoryCopyMultiPacketEventFixture<1, 1>>;
using MultiTileAppendMemoryCopyXeHpAndLaterSinglePacket = Test<AppendMemoryCopyMultiPacketEventFixture<1, 0, 1>>;
HWTEST2_F(MultiTileAppendMemoryCopyXeHpAndLaterSinglePacket,
givenMultiTileCommandListWhenTimestampProvidedByRegisterPostSyncPassedToMemoryCopyThenAppendProfilingCalledForMultiTileRegisterPipeControlPacket,
@@ -1088,5 +1099,428 @@ HWTEST2_F(MultiTileAppendMemoryCopyXeHpAndLaterSinglePacket,
testMultiTileAppendMemoryCopySingleKernelAndL3Flush<gfxCoreFamily>(input, arg);
}
using AppendMemoryCopyL3CompactEventTest = Test<AppendMemoryCopyMultiPacketEventFixture<0, 1, 0>>;
HWTEST2_F(AppendMemoryCopyL3CompactEventTest,
givenCommandListWhenTimestampProvidedByComputeWalkerPostSyncPassedToMemoryCopyThenAppendProfilingCalledForThreeSeparateKernels,
IsAtLeastXeHpCore) {
arg.expectedPacketsInUse = 3;
arg.expectedKernelCount = 3;
arg.expectedWalkerPostSyncOp = 3;
arg.postSyncAddressZero = false;
input.srcPtr = reinterpret_cast<void *>(0x1231);
input.dstPtr = reinterpret_cast<void *>(0x200002345);
input.size = 0x100002345;
testSingleTileAppendMemoryCopyThreeKernels<gfxCoreFamily>(input, arg);
}
HWTEST2_F(AppendMemoryCopyL3CompactEventTest,
givenCommandListWhenTimestampProvidedByComputeWalkerPostSyncPassedToMemoryCopyThenAppendProfilingCalledForSingleKernel,
IsAtLeastXeHpCore) {
arg.expectedPacketsInUse = 1;
arg.expectedKernelCount = 1;
arg.expectedWalkerPostSyncOp = 3;
arg.postSyncAddressZero = false;
input.srcPtr = reinterpret_cast<void *>(0x1000);
input.dstPtr = reinterpret_cast<void *>(0x20000000);
input.size = 0x100000000;
testSingleTileAppendMemoryCopySingleKernel<gfxCoreFamily>(input, arg);
}
HWTEST2_F(AppendMemoryCopyL3CompactEventTest,
givenCommandListAndTimestampEventWithSignalScopeWhenTimestampProvidedByComputeWalkerPostSyncPassedToMemoryCopyThenAppendProfilingCalledForThreeSeparateKernelsAndL3FlushWithPostSyncAddedOnce,
IsXeHpOrXeHpgCore) {
arg.expectedPacketsInUse = 1;
arg.expectedKernelCount = 1;
arg.expectedWalkerPostSyncOp = 0;
arg.expectedPostSyncPipeControls = 0;
arg.postSyncAddressZero = true;
input.srcPtr = reinterpret_cast<void *>(0x1231);
input.dstPtr = reinterpret_cast<void *>(0x200002345);
input.size = 0x100002345;
input.eventPoolFlags = ZE_EVENT_POOL_FLAG_KERNEL_TIMESTAMP;
testSingleTileAppendMemoryCopyThreeKernelsAndL3Flush<gfxCoreFamily>(input, arg);
}
HWTEST2_F(AppendMemoryCopyL3CompactEventTest,
givenCommandListAndEventWithSignalScopeWhenImmediateProvidedByPipeControlPostSyncPassedToMemoryCopyThenAppendProfilingCalledForForL3FlushWithPostSyncAddedOnce,
IsXeHpOrXeHpgCore) {
arg.expectedPacketsInUse = 1;
arg.expectedKernelCount = 1;
arg.expectedWalkerPostSyncOp = 0;
arg.expectedPostSyncPipeControls = 1;
arg.postSyncAddressZero = true;
input.srcPtr = reinterpret_cast<void *>(0x1231);
input.dstPtr = reinterpret_cast<void *>(0x200002345);
input.size = 0x100002345;
input.eventPoolFlags = 0;
input.useFirstEventPacketAddress = true;
testSingleTileAppendMemoryCopyThreeKernelsAndL3Flush<gfxCoreFamily>(input, arg);
}
HWTEST2_F(AppendMemoryCopyL3CompactEventTest,
givenCommandListAndTimestampEventWithSignalScopeWhenTimestampProvidedByRegisterPostSyncPassedToMemoryCopyThenAppendProfilingCalledForL3FlushWithPostSyncAddedOnce,
IsXeHpOrXeHpgCore) {
arg.expectedPacketsInUse = 1;
arg.expectedKernelCount = 1;
arg.expectedWalkerPostSyncOp = 0;
arg.expectedPostSyncPipeControls = 0;
arg.postSyncAddressZero = true;
input.srcPtr = reinterpret_cast<void *>(0x1000);
input.dstPtr = reinterpret_cast<void *>(0x20000000);
input.size = 0x100000000;
input.eventPoolFlags = ZE_EVENT_POOL_FLAG_KERNEL_TIMESTAMP;
testSingleTileAppendMemoryCopySingleKernelAndL3Flush<gfxCoreFamily>(input, arg);
}
HWTEST2_F(AppendMemoryCopyL3CompactEventTest,
givenCommandListAndEventWithSignalScopeWhenImmediateProvidedByPipeControlPostSyncPassedToMemoryCopyThenAppendProfilingCalledForL3FlushWithPostSyncAddedOnce,
IsXeHpOrXeHpgCore) {
arg.expectedPacketsInUse = 1;
arg.expectedKernelCount = 1;
arg.expectedWalkerPostSyncOp = 0;
arg.expectedPostSyncPipeControls = 1;
arg.postSyncAddressZero = true;
input.srcPtr = reinterpret_cast<void *>(0x1000);
input.dstPtr = reinterpret_cast<void *>(0x20000000);
input.size = 0x100000000;
input.eventPoolFlags = 0;
input.useFirstEventPacketAddress = true;
testSingleTileAppendMemoryCopySingleKernelAndL3Flush<gfxCoreFamily>(input, arg);
}
using MultiTileAppendMemoryCopyL3CompactEventTest = Test<AppendMemoryCopyMultiPacketEventFixture<0, 1, 1>>;
HWTEST2_F(MultiTileAppendMemoryCopyL3CompactEventTest,
givenMultiTileCommandListWhenTimestampProvidedByComputeWalkerPostSyncPassedToMemoryCopyThenAppendProfilingCalledForThreeSeparateMultiTileKernels,
IsAtLeastXeHpCore) {
arg.expectedPacketsInUse = 6;
arg.expectedKernelCount = 3;
arg.expectedWalkerPostSyncOp = 3;
arg.postSyncAddressZero = false;
input.srcPtr = reinterpret_cast<void *>(0x1231);
input.dstPtr = reinterpret_cast<void *>(0x200002345);
input.size = 0x100002345;
testMultiTileAppendMemoryCopyThreeKernels<gfxCoreFamily>(input, arg);
}
HWTEST2_F(MultiTileAppendMemoryCopyL3CompactEventTest,
givenMultiTileCommandListWhenTimestampProvidedByComputeWalkerPostSyncPassedToMemoryCopyThenAppendProfilingCalledForSingleSeparateMultiTileKernel,
IsAtLeastXeHpCore) {
arg.expectedPacketsInUse = 2;
arg.expectedKernelCount = 1;
arg.expectedWalkerPostSyncOp = 3;
arg.postSyncAddressZero = false;
input.srcPtr = reinterpret_cast<void *>(0x1000);
input.dstPtr = reinterpret_cast<void *>(0x20000000);
input.size = 0x100000000;
testMultiTileAppendMemoryCopySingleKernel<gfxCoreFamily>(input, arg);
}
HWTEST2_F(MultiTileAppendMemoryCopyL3CompactEventTest,
givenMultiTileCommandListCopyUsingThreeKernelsAndTimestampEventWithSignalScopeWhenTimestampProvidedByRegisterPostSyncPassedToMemoryCopyThenAppendProfilingCalledForL3FlushWithPostSyncAddedForScopedEvent,
IsXeHpOrXeHpgCore) {
arg.expectedPacketsInUse = 2;
arg.expectedKernelCount = 1;
arg.expectedWalkerPostSyncOp = 0;
arg.expectedPostSyncPipeControls = 0;
arg.postSyncAddressZero = true;
input.srcPtr = reinterpret_cast<void *>(0x1231);
input.dstPtr = reinterpret_cast<void *>(0x200002345);
input.size = 0x100002345;
input.eventPoolFlags = ZE_EVENT_POOL_FLAG_KERNEL_TIMESTAMP;
testMultiTileAppendMemoryCopyThreeKernelsAndL3Flush<gfxCoreFamily>(input, arg);
}
HWTEST2_F(MultiTileAppendMemoryCopyL3CompactEventTest,
givenMultiTileCommandListCopyUsingThreeKernelsAndEventWithSignalScopeWhenImmdiateProvidedByPipeControlPostSyncPassedToMemoryCopyThenAppendProfilingCalledForL3FlushWithPostSyncAddedForScopedEvent,
IsXeHpOrXeHpgCore) {
arg.expectedPacketsInUse = 2;
arg.expectedKernelCount = 1;
arg.expectedWalkerPostSyncOp = 0;
arg.expectedPostSyncPipeControls = 1;
arg.postSyncAddressZero = true;
input.srcPtr = reinterpret_cast<void *>(0x1231);
input.dstPtr = reinterpret_cast<void *>(0x200002345);
input.size = 0x100002345;
input.eventPoolFlags = 0;
input.useFirstEventPacketAddress = true;
testMultiTileAppendMemoryCopyThreeKernelsAndL3Flush<gfxCoreFamily>(input, arg);
}
HWTEST2_F(MultiTileAppendMemoryCopyL3CompactEventTest,
givenMultiTileCommandListCopyUsingSingleKernelAndTimestampEventWithSignalScopeWhenTimestampProvidedByRegisterPostSyncPassedToMemoryCopyThenAppendProfilingCalledForL3FlushWithPostSyncAddedForScopedEvent,
IsXeHpOrXeHpgCore) {
arg.expectedPacketsInUse = 2;
arg.expectedKernelCount = 1;
arg.expectedWalkerPostSyncOp = 0;
arg.expectedPostSyncPipeControls = 0;
arg.postSyncAddressZero = true;
input.srcPtr = reinterpret_cast<void *>(0x1000);
input.dstPtr = reinterpret_cast<void *>(0x20000000);
input.size = 0x100000000;
input.eventPoolFlags = ZE_EVENT_POOL_FLAG_KERNEL_TIMESTAMP;
testMultiTileAppendMemoryCopySingleKernelAndL3Flush<gfxCoreFamily>(input, arg);
}
HWTEST2_F(MultiTileAppendMemoryCopyL3CompactEventTest,
givenMultiTileCommandListCopyUsingSingleKernelAndEventWithSignalScopeWhenImmdiateProvidedByPipeControlPostSyncPassedToMemoryCopyThenAppendProfilingCalledForL3FlushWithPostSyncAddedForScopedEvent,
IsXeHpOrXeHpgCore) {
arg.expectedPacketsInUse = 2;
arg.expectedKernelCount = 1;
arg.expectedWalkerPostSyncOp = 0;
arg.expectedPostSyncPipeControls = 1;
arg.postSyncAddressZero = true;
input.srcPtr = reinterpret_cast<void *>(0x1000);
input.dstPtr = reinterpret_cast<void *>(0x20000000);
input.size = 0x100000000;
input.eventPoolFlags = 0;
input.useFirstEventPacketAddress = true;
testMultiTileAppendMemoryCopySingleKernelAndL3Flush<gfxCoreFamily>(input, arg);
}
using AppendMemoryCopyL3CompactAndSingleKernelPacketEventTest = Test<AppendMemoryCopyMultiPacketEventFixture<1, 1, 0>>;
HWTEST2_F(AppendMemoryCopyL3CompactAndSingleKernelPacketEventTest,
givenCommandListWhenTimestampProvidedByRegisterPostSyncPassedToMemoryCopyThenAppendProfilingCalledForSinglePacket,
IsAtLeastXeHpCore) {
arg.expectedPacketsInUse = 1;
arg.expectedKernelCount = 1;
arg.expectedWalkerPostSyncOp = 0;
arg.postSyncAddressZero = true;
input.srcPtr = reinterpret_cast<void *>(0x1231);
input.dstPtr = reinterpret_cast<void *>(0x200002345);
input.size = 0x100002345;
testSingleTileAppendMemoryCopyThreeKernels<gfxCoreFamily>(input, arg);
}
HWTEST2_F(AppendMemoryCopyL3CompactAndSingleKernelPacketEventTest,
givenCommandListWhenTimestampProvidedByComputeWalkerPostSyncPassedToMemoryCopyThenAppendProfilingCalledForSingleKernel,
IsAtLeastXeHpCore) {
arg.expectedPacketsInUse = 1;
arg.expectedKernelCount = 1;
arg.expectedWalkerPostSyncOp = 3;
arg.postSyncAddressZero = false;
input.srcPtr = reinterpret_cast<void *>(0x1000);
input.dstPtr = reinterpret_cast<void *>(0x20000000);
input.size = 0x100000000;
testSingleTileAppendMemoryCopySingleKernel<gfxCoreFamily>(input, arg);
}
HWTEST2_F(AppendMemoryCopyL3CompactAndSingleKernelPacketEventTest,
givenCommandListCopyUsingThreeKernelsAndTimestampEventWithSignalScopeWhenTimestampProvidedByRegisterPostSyncPassedToMemoryCopyThenAppendProfilingCalledForL3FlushWithPostSyncAddedOnce,
IsXeHpOrXeHpgCore) {
arg.expectedPacketsInUse = 1;
arg.expectedKernelCount = 1;
arg.expectedWalkerPostSyncOp = 0;
arg.expectedPostSyncPipeControls = 0;
arg.postSyncAddressZero = true;
input.srcPtr = reinterpret_cast<void *>(0x1231);
input.dstPtr = reinterpret_cast<void *>(0x200002345);
input.size = 0x100002345;
input.eventPoolFlags = ZE_EVENT_POOL_FLAG_KERNEL_TIMESTAMP;
testSingleTileAppendMemoryCopyThreeKernelsAndL3Flush<gfxCoreFamily>(input, arg);
}
HWTEST2_F(AppendMemoryCopyL3CompactAndSingleKernelPacketEventTest,
givenCommandListCopyUsingThreeKernelsAndEventWithSignalScopeWhenImmediateProvidedByPipeControlPostSyncPassedToMemoryCopyThenAppendProfilingCalledForL3FlushWithPostSyncAddedOnce,
IsXeHpOrXeHpgCore) {
arg.expectedPacketsInUse = 1;
arg.expectedKernelCount = 1;
arg.expectedWalkerPostSyncOp = 0;
arg.expectedPostSyncPipeControls = 1;
arg.postSyncAddressZero = true;
input.srcPtr = reinterpret_cast<void *>(0x1231);
input.dstPtr = reinterpret_cast<void *>(0x200002345);
input.size = 0x100002345;
input.eventPoolFlags = 0;
testSingleTileAppendMemoryCopyThreeKernelsAndL3Flush<gfxCoreFamily>(input, arg);
}
HWTEST2_F(AppendMemoryCopyL3CompactAndSingleKernelPacketEventTest,
givenCommandListCopyUsingSingleKernelAndTimestampEventWithSignalScopeWhenTimestampProvidedByRegisterPostSyncPassedToMemoryCopyThenAppendProfilingCalledForL3FlushWithPostSyncAddedOnce,
IsXeHpOrXeHpgCore) {
arg.expectedPacketsInUse = 1;
arg.expectedKernelCount = 1;
arg.expectedWalkerPostSyncOp = 0;
arg.expectedPostSyncPipeControls = 0;
arg.postSyncAddressZero = true;
input.srcPtr = reinterpret_cast<void *>(0x1000);
input.dstPtr = reinterpret_cast<void *>(0x20000000);
input.size = 0x100000000;
input.eventPoolFlags = ZE_EVENT_POOL_FLAG_KERNEL_TIMESTAMP;
testSingleTileAppendMemoryCopySingleKernelAndL3Flush<gfxCoreFamily>(input, arg);
}
HWTEST2_F(AppendMemoryCopyL3CompactAndSingleKernelPacketEventTest,
givenCommandListCopyUsingSingleKernelAndEventWithSignalScopeWhenImmediateProvidedByPipeControlPostSyncPassedToMemoryCopyThenAppendProfilingCalledForL3FlushWithPostSyncAddedOnce,
IsXeHpOrXeHpgCore) {
arg.expectedPacketsInUse = 1;
arg.expectedKernelCount = 1;
arg.expectedWalkerPostSyncOp = 0;
arg.expectedPostSyncPipeControls = 1;
arg.postSyncAddressZero = true;
input.srcPtr = reinterpret_cast<void *>(0x1000);
input.dstPtr = reinterpret_cast<void *>(0x20000000);
input.size = 0x100000000;
input.eventPoolFlags = 0;
input.useFirstEventPacketAddress = true;
testSingleTileAppendMemoryCopySingleKernelAndL3Flush<gfxCoreFamily>(input, arg);
}
using MultiTileAppendMemoryCopyL3CompactAndSingleKernelPacketEventTest = Test<AppendMemoryCopyMultiPacketEventFixture<1, 1, 1>>;
HWTEST2_F(MultiTileAppendMemoryCopyL3CompactAndSingleKernelPacketEventTest,
givenMultiTileCommandListWhenTimestampProvidedByComputeWalkerPostSyncPassedToMemoryCopyThenAppendProfilingCalledForThreeSeparateMultiTileKernels,
IsAtLeastXeHpCore) {
arg.expectedPacketsInUse = 2;
arg.expectedKernelCount = 1;
arg.expectedWalkerPostSyncOp = 0;
arg.postSyncAddressZero = true;
input.srcPtr = reinterpret_cast<void *>(0x1231);
input.dstPtr = reinterpret_cast<void *>(0x200002345);
input.size = 0x100002345;
testMultiTileAppendMemoryCopyThreeKernels<gfxCoreFamily>(input, arg);
}
HWTEST2_F(MultiTileAppendMemoryCopyL3CompactAndSingleKernelPacketEventTest,
givenMultiTileCommandListWhenTimestampProvidedByComputeWalkerPostSyncPassedToMemoryCopyThenAppendProfilingCalledForSingleMultiTileKernel,
IsAtLeastXeHpCore) {
arg.expectedPacketsInUse = 2;
arg.expectedKernelCount = 1;
arg.expectedWalkerPostSyncOp = 3;
arg.postSyncAddressZero = false;
input.srcPtr = reinterpret_cast<void *>(0x1000);
input.dstPtr = reinterpret_cast<void *>(0x20000000);
input.size = 0x100000000;
testMultiTileAppendMemoryCopySingleKernel<gfxCoreFamily>(input, arg);
}
HWTEST2_F(MultiTileAppendMemoryCopyL3CompactAndSingleKernelPacketEventTest,
givenMultiTileCommandListCopyUsingThreeKernelsAndTimestampEventWithSignalScopeWhenTimestampProvidedByRegisterPostSyncPassedToMemoryCopyThenAppendProfilingCalledForL3FlushWithPostSyncAddedForScopedEvent,
IsXeHpOrXeHpgCore) {
arg.expectedPacketsInUse = 2;
arg.expectedKernelCount = 1;
arg.expectedWalkerPostSyncOp = 0;
arg.expectedPostSyncPipeControls = 0;
arg.postSyncAddressZero = true;
input.srcPtr = reinterpret_cast<void *>(0x1231);
input.dstPtr = reinterpret_cast<void *>(0x200002345);
input.size = 0x100002345;
input.eventPoolFlags = ZE_EVENT_POOL_FLAG_KERNEL_TIMESTAMP;
testMultiTileAppendMemoryCopyThreeKernelsAndL3Flush<gfxCoreFamily>(input, arg);
}
HWTEST2_F(MultiTileAppendMemoryCopyL3CompactAndSingleKernelPacketEventTest,
givenMultiTileCommandListCopyUsingThreeKernelsAndEventWithSignalScopeWhenImmdiateProvidedByPipeControlPostSyncPassedToMemoryCopyThenAppendProfilingCalledForL3FlushWithPostSyncAddedForScopedEvent,
IsXeHpOrXeHpgCore) {
arg.expectedPacketsInUse = 2;
arg.expectedKernelCount = 1;
arg.expectedWalkerPostSyncOp = 0;
arg.expectedPostSyncPipeControls = 1;
arg.postSyncAddressZero = true;
input.srcPtr = reinterpret_cast<void *>(0x1231);
input.dstPtr = reinterpret_cast<void *>(0x200002345);
input.size = 0x100002345;
input.eventPoolFlags = 0;
testMultiTileAppendMemoryCopyThreeKernelsAndL3Flush<gfxCoreFamily>(input, arg);
}
HWTEST2_F(MultiTileAppendMemoryCopyL3CompactAndSingleKernelPacketEventTest,
givenMultiTileCommandListCopyUsingThreeKernelAndTimestampEventWithSignalScopeWhenTimestampProvidedByRegisterPostSyncPassedToMemoryCopyThenAppendProfilingCalledForL3FlushWithPostSyncAddedForScopedEvent,
IsXeHpOrXeHpgCore) {
arg.expectedPacketsInUse = 2;
arg.expectedKernelCount = 1;
arg.expectedWalkerPostSyncOp = 0;
arg.expectedPostSyncPipeControls = 0;
arg.postSyncAddressZero = true;
input.srcPtr = reinterpret_cast<void *>(0x1000);
input.dstPtr = reinterpret_cast<void *>(0x20000000);
input.size = 0x100000000;
input.eventPoolFlags = ZE_EVENT_POOL_FLAG_KERNEL_TIMESTAMP;
testMultiTileAppendMemoryCopySingleKernelAndL3Flush<gfxCoreFamily>(input, arg);
}
HWTEST2_F(MultiTileAppendMemoryCopyL3CompactAndSingleKernelPacketEventTest,
givenMultiTileCommandListCopyUsingSingleKernelAndEventWithSignalScopeWhenImmdiateProvidedByPipeControlPostSyncPassedToMemoryCopyThenAppendProfilingCalledForL3FlushWithPostSyncAddedForScopedEvent,
IsXeHpOrXeHpgCore) {
arg.expectedPacketsInUse = 2;
arg.expectedKernelCount = 1;
arg.expectedWalkerPostSyncOp = 0;
arg.expectedPostSyncPipeControls = 1;
arg.postSyncAddressZero = true;
input.srcPtr = reinterpret_cast<void *>(0x1000);
input.dstPtr = reinterpret_cast<void *>(0x20000000);
input.size = 0x100000000;
input.eventPoolFlags = 0;
input.useFirstEventPacketAddress = true;
testMultiTileAppendMemoryCopySingleKernelAndL3Flush<gfxCoreFamily>(input, arg);
}
} // namespace ult
} // namespace L0

View File

@@ -26,12 +26,15 @@ struct FillTestInput {
void *patternPtr = nullptr;
ze_event_pool_flags_t eventPoolFlags = 0;
bool useFirstEventPacketAddress = false;
};
template <int32_t usePipeControlMultiPacketEventSync, uint32_t multiTile>
template <int32_t usePipeControlMultiPacketEventSync, int32_t compactL3FlushEventPacket, uint32_t multiTile>
struct AppendFillMultiPacketEventFixture : public AppendFillFixture {
void setUp() {
DebugManager.flags.UsePipeControlMultiKernelEventSync.set(usePipeControlMultiPacketEventSync);
DebugManager.flags.CompactL3FlushEventPacket.set(compactL3FlushEventPacket);
if (multiTile == 1) {
DebugManager.flags.CreateMultipleSubDevices.set(2);
DebugManager.flags.EnableImplicitScaling.set(1);
@@ -157,6 +160,62 @@ void testSingleTileAppendMemoryFillManyKernels(FillTestInput &input, TestExpecte
EXPECT_EQ(secondKernelEventAddress, walkerCmd->getPostSync().getDestinationAddress());
}
template <GFXCORE_FAMILY gfxCoreFamily>
void testSingleTileAppendMemoryFillManyKernelsAndL3Flush(FillTestInput &input, TestExpectedValues &arg) {
using FamilyType = typename NEO::GfxFamilyMapper<gfxCoreFamily>::GfxFamily;
using COMPUTE_WALKER = typename FamilyType::COMPUTE_WALKER;
using POSTSYNC_DATA = typename FamilyType::POSTSYNC_DATA;
using OPERATION = typename POSTSYNC_DATA::OPERATION;
ze_event_pool_desc_t eventPoolDesc = {};
eventPoolDesc.count = 1;
eventPoolDesc.flags = input.eventPoolFlags;
ze_event_desc_t eventDesc = {};
eventDesc.index = 0;
eventDesc.signal = ZE_EVENT_SCOPE_FLAG_HOST;
ze_result_t result = ZE_RESULT_SUCCESS;
auto eventPool = std::unique_ptr<L0::EventPool>(L0::EventPool::create(input.driver, input.context, 0, nullptr, &eventPoolDesc, result));
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
auto event = std::unique_ptr<L0::Event>(L0::Event::create<uint32_t>(eventPool.get(), &eventDesc, input.device));
uint64_t firstKernelEventAddress = arg.postSyncAddressZero ? 0 : event->getGpuAddress(input.device);
uint64_t secondKernelEventAddress = arg.postSyncAddressZero ? 0 : event->getGpuAddress(input.device) + event->getSinglePacketSize();
auto commandList = std::make_unique<CommandListCoreFamily<gfxCoreFamily>>();
commandList->initialize(input.device, NEO::EngineGroupType::RenderCompute, 0u);
auto &commandContainer = commandList->commandContainer;
size_t usedBefore = commandContainer.getCommandStream()->getUsed();
result = commandList->appendMemoryFill(input.dstPtr, input.patternPtr,
input.patternSize, input.allocSize, event->toHandle(), 0, nullptr);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
size_t usedAfter = commandContainer.getCommandStream()->getUsed();
EXPECT_EQ(arg.expectedPacketsInUse, event->getPacketsInUse());
EXPECT_EQ(arg.expectedKernelCount, event->getKernelCount());
GenCmdList cmdList;
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(
cmdList,
ptrOffset(commandContainer.getCommandStream()->getCpuBase(), usedBefore),
usedAfter - usedBefore));
auto itorWalkers = findAll<COMPUTE_WALKER *>(cmdList.begin(), cmdList.end());
ASSERT_EQ(2u, itorWalkers.size());
auto firstWalker = itorWalkers[0];
auto secondWalker = itorWalkers[1];
auto walkerCmd = genCmdCast<COMPUTE_WALKER *>(*firstWalker);
EXPECT_EQ(static_cast<OPERATION>(arg.expectedWalkerPostSyncOp), walkerCmd->getPostSync().getOperation());
EXPECT_EQ(firstKernelEventAddress, walkerCmd->getPostSync().getDestinationAddress());
walkerCmd = genCmdCast<COMPUTE_WALKER *>(*secondWalker);
EXPECT_EQ(static_cast<OPERATION>(arg.expectedWalkerPostSyncOp), walkerCmd->getPostSync().getOperation());
EXPECT_EQ(secondKernelEventAddress, walkerCmd->getPostSync().getDestinationAddress());
}
template <GFXCORE_FAMILY gfxCoreFamily>
void testSingleTileAppendMemoryFillSingleKernel(FillTestInput &input, TestExpectedValues &arg) {
using FamilyType = typename NEO::GfxFamilyMapper<gfxCoreFamily>::GfxFamily;
@@ -220,7 +279,7 @@ void testSingleTileAppendMemoryFillSingleKernelAndL3Flush(FillTestInput &input,
ze_event_pool_desc_t eventPoolDesc = {};
eventPoolDesc.count = 1;
eventPoolDesc.flags = ZE_EVENT_POOL_FLAG_KERNEL_TIMESTAMP;
eventPoolDesc.flags = input.eventPoolFlags;
ze_event_desc_t eventDesc = {};
eventDesc.index = 0;
@@ -263,7 +322,10 @@ void testSingleTileAppendMemoryFillSingleKernelAndL3Flush(FillTestInput &input,
EXPECT_EQ(static_cast<OPERATION>(arg.expectedWalkerPostSyncOp), walkerCmd->getPostSync().getOperation());
EXPECT_EQ(firstKernelEventAddress, walkerCmd->getPostSync().getDestinationAddress());
uint64_t l3FlushPostSyncAddress = firstKernelEventAddress + event->getSinglePacketSize();
uint64_t l3FlushPostSyncAddress = event->getGpuAddress(input.device);
if (!input.useFirstEventPacketAddress) {
l3FlushPostSyncAddress += event->getSinglePacketSize();
}
if (event->isUsingContextEndOffset()) {
l3FlushPostSyncAddress += event->getContextEndOffset();
}
@@ -452,7 +514,7 @@ void testMultiTileAppendMemoryFillSingleKernelAndL3Flush(FillTestInput &input, T
EXPECT_EQ(expectedDcFlush, dcFlushFound);
}
using AppendFillMultiPacketEventTest = Test<AppendFillMultiPacketEventFixture<0, 0>>;
using AppendFillMultiPacketEventTest = Test<AppendFillMultiPacketEventFixture<0, 0, 0>>;
HWTEST2_F(AppendFillMultiPacketEventTest,
givenCallToAppendMemoryFillWithImmediateValueWhenTimestampEventUsesComputeWalkerPostSyncThenSeparateKernelsUsesPostSyncProfiling,
@@ -506,10 +568,12 @@ HWTEST2_F(AppendFillMultiPacketEventTest,
arg.expectedPostSyncPipeControls = 1;
arg.postSyncAddressZero = false;
input.eventPoolFlags = ZE_EVENT_POOL_FLAG_KERNEL_TIMESTAMP;
testSingleTileAppendMemoryFillSingleKernelAndL3Flush<gfxCoreFamily>(input, arg);
}
using AppendFillSinglePacketEventTest = Test<AppendFillMultiPacketEventFixture<1, 0>>;
using AppendFillSinglePacketEventTest = Test<AppendFillMultiPacketEventFixture<1, 0, 0>>;
HWTEST2_F(AppendFillSinglePacketEventTest,
givenCallToAppendMemoryFillWithImmediateValueWhenTimestampEventUsesRegisterPostSyncThenSeparateKernelsNotUsesWalkerPostSyncProfiling,
@@ -563,10 +627,12 @@ HWTEST2_F(AppendFillSinglePacketEventTest,
arg.expectedPostSyncPipeControls = 1;
arg.postSyncAddressZero = false;
input.eventPoolFlags = ZE_EVENT_POOL_FLAG_KERNEL_TIMESTAMP;
testSingleTileAppendMemoryFillSingleKernelAndL3Flush<gfxCoreFamily>(input, arg);
}
using MultiTileAppendFillEventMultiPacketTest = Test<AppendFillMultiPacketEventFixture<0, 1>>;
using MultiTileAppendFillEventMultiPacketTest = Test<AppendFillMultiPacketEventFixture<0, 0, 1>>;
HWTEST2_F(MultiTileAppendFillEventMultiPacketTest,
givenMultiTileCmdListCallToAppendMemoryFillWhenSignalScopeTimestampEventUsesComputeWalkerPostSyncThenSeparateKernelsUsesWalkerPostSyncProfilingAndSingleDcFlushWithImmediatePostSync, IsAtLeastXeHpCore) {
@@ -646,7 +712,7 @@ HWTEST2_F(MultiTileAppendFillEventMultiPacketTest,
testMultiTileAppendMemoryFillSingleKernelAndL3Flush<gfxCoreFamily>(input, arg);
}
using MultiTileAppendFillEventSinglePacketTest = Test<AppendFillMultiPacketEventFixture<1, 1>>;
using MultiTileAppendFillEventSinglePacketTest = Test<AppendFillMultiPacketEventFixture<1, 0, 1>>;
HWTEST2_F(MultiTileAppendFillEventSinglePacketTest,
givenMultiTileCmdListCallToAppendMemoryFillWhenSignalScopeTimestampEventUsesRegisterPostSyncThenSeparateKernelsNotUsesWalkerPostSyncProfilingAndDcFlushWithNoPostSync, IsAtLeastXeHpCore) {
@@ -685,5 +751,248 @@ HWTEST2_F(MultiTileAppendFillEventSinglePacketTest,
testMultiTileAppendMemoryFillManyKernels<gfxCoreFamily>(input, arg);
}
using AppendFillCompactL3EventTest = Test<AppendFillMultiPacketEventFixture<0, 1, 0>>;
HWTEST2_F(AppendFillCompactL3EventTest,
givenCallToAppendMemoryFillWithImmediateValueWhenTimestampEventUsesWalkerPostSyncThenSeparateKernelsUsesWalkerPostSyncProfiling,
IsAtLeastXeHpCore) {
arg.expectedPacketsInUse = 2;
arg.expectedKernelCount = 2;
arg.expectedWalkerPostSyncOp = 3;
arg.postSyncAddressZero = false;
input.dstPtr = immediateDstPtr;
input.allocSize = immediateAllocSize;
input.patternPtr = &immediatePattern;
input.patternSize = sizeof(immediatePattern);
testSingleTileAppendMemoryFillManyImmediateKernels<gfxCoreFamily>(input, arg);
}
HWTEST2_F(AppendFillCompactL3EventTest,
givenCallToAppendMemoryFillWhenTimestampEventUsesWalkerPostSyncThenSeparateKernelsUsesWalkerPostSyncProfiling,
IsAtLeastXeHpCore) {
arg.expectedPacketsInUse = 2;
arg.expectedKernelCount = 2;
arg.expectedWalkerPostSyncOp = 3;
arg.postSyncAddressZero = false;
input.dstPtr = dstPtr;
input.allocSize = allocSize;
input.patternPtr = pattern;
input.patternSize = patternSize;
testSingleTileAppendMemoryFillManyKernels<gfxCoreFamily>(input, arg);
}
HWTEST2_F(AppendFillCompactL3EventTest,
givenAppendMemoryFillUsingSinglePacketEventWhenPatternDispatchOneKernelThenUseComputeWalkerPostSync,
IsAtLeastXeHpCore) {
arg.expectedPacketsInUse = 1;
arg.expectedKernelCount = 1;
arg.expectedWalkerPostSyncOp = 3;
arg.postSyncAddressZero = false;
testSingleTileAppendMemoryFillSingleKernel<gfxCoreFamily>(input, arg);
}
HWTEST2_F(AppendFillCompactL3EventTest,
givenAppendMemoryFillUsingL3CompactEventWhenPatternDispatchOneKernelThenUseRegisterPostSync,
IsXeHpOrXeHpgCore) {
arg.expectedPacketsInUse = 1;
arg.expectedKernelCount = 1;
arg.expectedWalkerPostSyncOp = 0;
arg.expectedPostSyncPipeControls = 0;
arg.postSyncAddressZero = true;
input.eventPoolFlags = ZE_EVENT_POOL_FLAG_KERNEL_TIMESTAMP;
testSingleTileAppendMemoryFillSingleKernelAndL3Flush<gfxCoreFamily>(input, arg);
}
HWTEST2_F(AppendFillCompactL3EventTest,
givenCallToAppendMemoryFillWhenL3CompactImmediateEventUsesPipeControlPostSyncThenSinglePipeControlPostSyncUsed,
IsXeHpOrXeHpgCore) {
arg.expectedPacketsInUse = 1;
arg.expectedKernelCount = 1;
arg.expectedWalkerPostSyncOp = 0;
arg.expectedPostSyncPipeControls = 1;
arg.postSyncAddressZero = true;
input.eventPoolFlags = 0;
input.useFirstEventPacketAddress = true;
testSingleTileAppendMemoryFillSingleKernelAndL3Flush<gfxCoreFamily>(input, arg);
}
using MultiTileAppendFillCompactL3EventTest = Test<AppendFillMultiPacketEventFixture<0, 1, 1>>;
HWTEST2_F(MultiTileAppendFillCompactL3EventTest,
givenMultiTileCmdListCallToAppendMemoryFillWhenPlatformNeedsDcFlushAndL3CompactTimestampEventThenRegisterPostSyncUsedOtherwiseUsesWalkerPostSyncProfiling,
IsAtLeastXeHpCore) {
if (NEO::MemorySynchronizationCommands<FamilyType>::getDcFlushEnable(true, *defaultHwInfo)) {
arg.expectedPacketsInUse = 2;
arg.expectedKernelCount = 1;
arg.expectedWalkerPostSyncOp = 0;
arg.expectedPostSyncPipeControls = 0;
arg.postSyncAddressZero = true;
} else {
arg.expectedPacketsInUse = 4;
arg.expectedKernelCount = 2;
arg.expectedWalkerPostSyncOp = 3;
arg.expectedPostSyncPipeControls = 0;
arg.postSyncAddressZero = false;
}
input.eventPoolFlags = ZE_EVENT_POOL_FLAG_KERNEL_TIMESTAMP;
input.dstPtr = dstPtr;
input.allocSize = allocSize;
input.patternPtr = pattern;
input.patternSize = patternSize;
testMultiTileAppendMemoryFillManyKernels<gfxCoreFamily>(input, arg);
}
HWTEST2_F(MultiTileAppendFillCompactL3EventTest,
givenMultiTileCmdListCallToAppendMemoryFillWhenPlatformNeedsDcFlushAndL3CompactImmediateEventThenPipeControlPostSyncUsedOtherwiseUsesWalkerPostSyncProfiling,
IsAtLeastXeHpCore) {
if (NEO::MemorySynchronizationCommands<FamilyType>::getDcFlushEnable(true, *defaultHwInfo)) {
arg.expectedPacketsInUse = 2;
arg.expectedKernelCount = 1;
arg.expectedWalkerPostSyncOp = 0;
arg.expectedPostSyncPipeControls = 1;
arg.postSyncAddressZero = true;
} else {
arg.expectedPacketsInUse = 4;
arg.expectedKernelCount = 2;
arg.expectedWalkerPostSyncOp = 3;
arg.expectedPostSyncPipeControls = 0;
arg.postSyncAddressZero = false;
}
input.eventPoolFlags = 0;
input.dstPtr = dstPtr;
input.allocSize = allocSize;
input.patternPtr = pattern;
input.patternSize = patternSize;
testMultiTileAppendMemoryFillManyKernels<gfxCoreFamily>(input, arg);
}
using AppendFillKernelSplitAndCompactL3EventTest = Test<AppendFillMultiPacketEventFixture<1, 1, 0>>;
HWTEST2_F(AppendFillKernelSplitAndCompactL3EventTest,
givenCallToAppendMemoryFillWithImmediateValueWhenTimestampEventUsesRegisterPostSyncThenSeparateKernelsNotUsesWalkerPostSyncProfiling,
IsAtLeastXeHpCore) {
arg.expectedPacketsInUse = 1;
arg.expectedKernelCount = 1;
arg.expectedWalkerPostSyncOp = 0;
arg.postSyncAddressZero = true;
input.dstPtr = immediateDstPtr;
input.allocSize = immediateAllocSize;
input.patternPtr = &immediatePattern;
input.patternSize = sizeof(immediatePattern);
testSingleTileAppendMemoryFillManyImmediateKernels<gfxCoreFamily>(input, arg);
}
HWTEST2_F(AppendFillKernelSplitAndCompactL3EventTest,
givenCallToAppendMemoryFillWhenTimestampEventUsesRegisterPostSyncThenSeparateKernelsNotUsesWalkerPostSyncProfiling,
IsAtLeastXeHpCore) {
arg.expectedPacketsInUse = 1;
arg.expectedKernelCount = 1;
arg.expectedWalkerPostSyncOp = 0;
arg.postSyncAddressZero = true;
input.dstPtr = dstPtr;
input.allocSize = allocSize;
input.patternPtr = pattern;
input.patternSize = patternSize;
testSingleTileAppendMemoryFillManyKernels<gfxCoreFamily>(input, arg);
}
HWTEST2_F(AppendFillKernelSplitAndCompactL3EventTest,
givenAppendMemoryFillUsingSinglePacketEventWhenPatternDispatchOneKernelThenUseComputeWalkerPostSync,
IsAtLeastXeHpCore) {
arg.expectedPacketsInUse = 1;
arg.expectedKernelCount = 1;
arg.expectedWalkerPostSyncOp = 3;
arg.postSyncAddressZero = false;
testSingleTileAppendMemoryFillSingleKernel<gfxCoreFamily>(input, arg);
}
HWTEST2_F(AppendFillKernelSplitAndCompactL3EventTest,
givenAppendMemoryFillUsingL3CompactTimestampEventWhenPatternDispatchOneKernelThenUseRegisterPostSync,
IsXeHpOrXeHpgCore) {
arg.expectedPacketsInUse = 1;
arg.expectedKernelCount = 1;
arg.expectedWalkerPostSyncOp = 0;
arg.expectedPostSyncPipeControls = 0;
arg.postSyncAddressZero = true;
input.eventPoolFlags = ZE_EVENT_POOL_FLAG_KERNEL_TIMESTAMP;
testSingleTileAppendMemoryFillSingleKernelAndL3Flush<gfxCoreFamily>(input, arg);
}
HWTEST2_F(AppendFillKernelSplitAndCompactL3EventTest,
givenAppendMemoryFillUsingL3CompactImmediateEventWhenPatternDispatchOneKernelThenUsePipeControlPostSync,
IsXeHpOrXeHpgCore) {
arg.expectedPacketsInUse = 1;
arg.expectedKernelCount = 1;
arg.expectedWalkerPostSyncOp = 0;
arg.expectedPostSyncPipeControls = 1;
arg.postSyncAddressZero = true;
input.eventPoolFlags = 0;
input.useFirstEventPacketAddress = true;
testSingleTileAppendMemoryFillSingleKernelAndL3Flush<gfxCoreFamily>(input, arg);
}
using MultiTileAppendFillKernelSplitAndCompactL3EventTest = Test<AppendFillMultiPacketEventFixture<1, 1, 1>>;
HWTEST2_F(MultiTileAppendFillKernelSplitAndCompactL3EventTest,
givenMultiTileCmdListCallToAppendMemoryFillWhenL3CompactTimestampEventUsesRegisterPostSyncThenSeparateKernelsNotUsesWalkerPostSyncProfilingAndDcFlushWithNoPostSync, IsAtLeastXeHpCore) {
arg.expectedPacketsInUse = 2;
arg.expectedKernelCount = 1;
arg.expectedWalkerPostSyncOp = 0;
arg.expectedPostSyncPipeControls = 0;
arg.postSyncAddressZero = true;
input.eventPoolFlags = ZE_EVENT_POOL_FLAG_KERNEL_TIMESTAMP;
input.dstPtr = dstPtr;
input.allocSize = allocSize;
input.patternPtr = pattern;
input.patternSize = patternSize;
testMultiTileAppendMemoryFillManyKernels<gfxCoreFamily>(input, arg);
}
HWTEST2_F(MultiTileAppendFillKernelSplitAndCompactL3EventTest,
givenMultiTileCmdListCallToAppendMemoryFillWhenL3CompactImmediateEventUsesPipeControlPostSyncThenSeparateKernelsNotUsesWalkerPostSyncProfilingAndDcFlushWithImmediatePostSync, IsAtLeastXeHpCore) {
arg.expectedPacketsInUse = 2;
arg.expectedKernelCount = 1;
arg.expectedPacketsInUse = 2;
arg.expectedWalkerPostSyncOp = 0;
arg.expectedPostSyncPipeControls = 1;
arg.postSyncAddressZero = true;
input.eventPoolFlags = 0;
input.dstPtr = dstPtr;
input.allocSize = allocSize;
input.patternPtr = pattern;
input.patternSize = patternSize;
testMultiTileAppendMemoryFillManyKernels<gfxCoreFamily>(input, arg);
}
} // namespace ult
} // namespace L0

View File

@@ -296,5 +296,250 @@ HWTEST2_F(CommandListAppendLaunchKernel, givenVariousKernelsAndPatchingDisallowe
pCommandList->reset();
}
struct AppendKernelTestInput {
DriverHandle *driver = nullptr;
L0::Context *context = nullptr;
L0::Device *device = nullptr;
ze_event_pool_flags_t eventPoolFlags = 0;
uint32_t packetOffsetMul = 1;
bool useFirstEventPacketAddress = false;
};
template <int32_t compactL3FlushEventPacket, uint32_t multiTile>
struct CommandListAppendLaunchKernelCompactL3FlushEventFixture : public ModuleFixture {
void setUp() {
DebugManager.flags.CompactL3FlushEventPacket.set(compactL3FlushEventPacket);
if constexpr (multiTile == 1) {
DebugManager.flags.CreateMultipleSubDevices.set(2);
DebugManager.flags.EnableImplicitScaling.set(1);
arg.workloadPartition = true;
arg.expectDcFlush = 2; // DC Flush multi-tile platforms require DC Flush + x-tile sync after implicit scaling COMPUTE_WALKER
input.packetOffsetMul = 2;
} else {
arg.expectDcFlush = 1;
}
ModuleFixture::setUp();
input.driver = driverHandle.get();
input.context = context;
input.device = device;
}
template <GFXCORE_FAMILY gfxCoreFamily>
void testAppendLaunchKernelAndL3Flush(AppendKernelTestInput &input, TestExpectedValues &arg) {
using FamilyType = typename NEO::GfxFamilyMapper<gfxCoreFamily>::GfxFamily;
using COMPUTE_WALKER = typename FamilyType::COMPUTE_WALKER;
using POSTSYNC_DATA = typename FamilyType::POSTSYNC_DATA;
using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL;
using POST_SYNC_OPERATION = typename FamilyType::PIPE_CONTROL::POST_SYNC_OPERATION;
using OPERATION = typename POSTSYNC_DATA::OPERATION;
Mock<::L0::Kernel> kernel;
auto module = std::unique_ptr<Module>(new Mock<Module>(input.device, nullptr));
kernel.module = module.get();
auto commandList = std::make_unique<WhiteBox<::L0::CommandListCoreFamily<gfxCoreFamily>>>();
auto result = commandList->initialize(device, NEO::EngineGroupType::Compute, 0u);
ASSERT_EQ(ZE_RESULT_SUCCESS, result);
ze_event_pool_desc_t eventPoolDesc = {};
eventPoolDesc.count = 1;
eventPoolDesc.flags = input.eventPoolFlags;
ze_event_desc_t eventDesc = {};
eventDesc.index = 0;
eventDesc.signal = ZE_EVENT_SCOPE_FLAG_HOST;
auto eventPool = std::unique_ptr<L0::EventPool>(L0::EventPool::create(input.driver, input.context, 0, nullptr, &eventPoolDesc, result));
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
auto event = std::unique_ptr<L0::Event>(L0::Event::create<uint32_t>(eventPool.get(), &eventDesc, input.device));
uint64_t firstKernelEventAddress = arg.postSyncAddressZero ? 0 : event->getGpuAddress(input.device);
ze_group_count_t groupCount{1, 1, 1};
CmdListKernelLaunchParams launchParams = {};
result = commandList->appendLaunchKernel(kernel.toHandle(), &groupCount, event->toHandle(), 0, nullptr, launchParams);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
EXPECT_EQ(arg.expectedPacketsInUse, event->getPacketsInUse());
EXPECT_EQ(arg.expectedKernelCount, event->getKernelCount());
GenCmdList cmdList;
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(
cmdList, ptrOffset(commandList->commandContainer.getCommandStream()->getCpuBase(), 0),
commandList->commandContainer.getCommandStream()->getUsed()));
auto itorWalkers = findAll<COMPUTE_WALKER *>(cmdList.begin(), cmdList.end());
ASSERT_EQ(1u, itorWalkers.size());
auto firstWalker = itorWalkers[0];
auto walkerCmd = genCmdCast<COMPUTE_WALKER *>(*firstWalker);
EXPECT_EQ(static_cast<OPERATION>(arg.expectedWalkerPostSyncOp), walkerCmd->getPostSync().getOperation());
EXPECT_EQ(firstKernelEventAddress, walkerCmd->getPostSync().getDestinationAddress());
uint64_t l3FlushPostSyncAddress = event->getGpuAddress(input.device) + input.packetOffsetMul * event->getSinglePacketSize();
if (input.useFirstEventPacketAddress) {
l3FlushPostSyncAddress = event->getGpuAddress(input.device);
}
if (event->isUsingContextEndOffset()) {
l3FlushPostSyncAddress += event->getContextEndOffset();
}
auto itorPipeControls = findAll<PIPE_CONTROL *>(firstWalker, cmdList.end());
uint32_t postSyncPipeControls = 0;
uint32_t dcFlushFound = 0;
for (auto it : itorPipeControls) {
auto cmd = genCmdCast<PIPE_CONTROL *>(*it);
if (cmd->getPostSyncOperation() == POST_SYNC_OPERATION::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA) {
postSyncPipeControls++;
EXPECT_EQ(l3FlushPostSyncAddress, NEO::UnitTestHelper<FamilyType>::getPipeControlPostSyncAddress(*cmd));
EXPECT_EQ(Event::STATE_SIGNALED, cmd->getImmediateData());
if (arg.workloadPartition) {
EXPECT_TRUE(cmd->getWorkloadPartitionIdOffsetEnable());
} else {
EXPECT_FALSE(cmd->getWorkloadPartitionIdOffsetEnable());
}
}
if (cmd->getDcFlushEnable()) {
dcFlushFound++;
}
}
EXPECT_EQ(arg.expectedPostSyncPipeControls, postSyncPipeControls);
EXPECT_EQ(arg.expectDcFlush, dcFlushFound);
}
DebugManagerStateRestore restorer;
AppendKernelTestInput input = {};
TestExpectedValues arg = {};
};
using CommandListAppendLaunchKernelCompactL3FlushDisabledTest = Test<CommandListAppendLaunchKernelCompactL3FlushEventFixture<0, 0>>;
HWTEST2_F(CommandListAppendLaunchKernelCompactL3FlushDisabledTest,
givenAppendKernelWithSignalScopeTimestampEventWhenComputeWalkerTimestampPostsyncAndL3ImmediatePostsyncUsedThenExpectComputeWalkerAndPipeControlPostsync,
IsXeHpOrXeHpgCore) {
arg.expectedKernelCount = 1;
arg.expectedPacketsInUse = 2;
arg.expectedPostSyncPipeControls = 1;
arg.expectedWalkerPostSyncOp = 3;
arg.postSyncAddressZero = false;
input.eventPoolFlags = ZE_EVENT_POOL_FLAG_KERNEL_TIMESTAMP;
testAppendLaunchKernelAndL3Flush<gfxCoreFamily>(input, arg);
}
HWTEST2_F(CommandListAppendLaunchKernelCompactL3FlushDisabledTest,
givenAppendKernelWithSignalScopeImmediateEventWhenComputeWalkerImmediatePostsyncAndL3ImmediatePostsyncUsedThenExpectComputeWalkerAndPipeControlPostsync,
IsXeHpOrXeHpgCore) {
arg.expectedKernelCount = 1;
arg.expectedPacketsInUse = 2;
arg.expectedPostSyncPipeControls = 1;
arg.expectedWalkerPostSyncOp = L0HwHelper::get(gfxCoreFamily).multiTileCapablePlatform() ? 3 : 1;
arg.postSyncAddressZero = false;
input.eventPoolFlags = 0;
testAppendLaunchKernelAndL3Flush<gfxCoreFamily>(input, arg);
}
using CommandListAppendLaunchKernelCompactL3FlushEnabledTest = Test<CommandListAppendLaunchKernelCompactL3FlushEventFixture<1, 0>>;
HWTEST2_F(CommandListAppendLaunchKernelCompactL3FlushEnabledTest,
givenAppendKernelWithSignalScopeTimestampEventWhenRegisterTimestampPostsyncUsedThenExpectNoComputeWalkerAndPipeControlPostsync,
IsXeHpOrXeHpgCore) {
arg.expectedKernelCount = 1;
arg.expectedPacketsInUse = 1;
arg.expectedPostSyncPipeControls = 0;
arg.expectedWalkerPostSyncOp = 0;
arg.postSyncAddressZero = true;
input.eventPoolFlags = ZE_EVENT_POOL_FLAG_KERNEL_TIMESTAMP;
input.useFirstEventPacketAddress = true;
testAppendLaunchKernelAndL3Flush<gfxCoreFamily>(input, arg);
}
HWTEST2_F(CommandListAppendLaunchKernelCompactL3FlushEnabledTest,
givenAppendKernelWithSignalScopeImmediateEventWhenL3ImmediatePostsyncUsedThenExpectPipeControlPostsync,
IsXeHpOrXeHpgCore) {
arg.expectedKernelCount = 1;
arg.expectedPacketsInUse = 1;
arg.expectedPostSyncPipeControls = 1;
arg.expectedWalkerPostSyncOp = 0;
arg.postSyncAddressZero = true;
input.eventPoolFlags = 0;
input.useFirstEventPacketAddress = true;
testAppendLaunchKernelAndL3Flush<gfxCoreFamily>(input, arg);
}
using CommandListAppendLaunchKernelMultiTileCompactL3FlushDisabledTest = Test<CommandListAppendLaunchKernelCompactL3FlushEventFixture<0, 1>>;
HWTEST2_F(CommandListAppendLaunchKernelMultiTileCompactL3FlushDisabledTest,
givenAppendMultiTileKernelWithSignalScopeTimestampEventWhenComputeWalkerTimestampPostsyncAndL3ImmediatePostsyncUsedThenExpectComputeWalkerAndPipeControlPostsync,
IsXeHpOrXeHpgCore) {
arg.expectedKernelCount = 1;
arg.expectedPacketsInUse = 4;
arg.expectedPostSyncPipeControls = 1;
arg.expectedWalkerPostSyncOp = 3;
arg.postSyncAddressZero = false;
input.eventPoolFlags = ZE_EVENT_POOL_FLAG_KERNEL_TIMESTAMP;
testAppendLaunchKernelAndL3Flush<gfxCoreFamily>(input, arg);
}
HWTEST2_F(CommandListAppendLaunchKernelMultiTileCompactL3FlushDisabledTest,
givenAppendMultiTileKernelWithSignalScopeImmediateEventWhenComputeWalkerImmediatePostsyncAndL3ImmediatePostsyncUsedThenExpectComputeWalkerAndPipeControlPostsync,
IsXeHpOrXeHpgCore) {
arg.expectedKernelCount = 1;
arg.expectedPacketsInUse = 4;
arg.expectedPostSyncPipeControls = 1;
arg.expectedWalkerPostSyncOp = 3;
arg.postSyncAddressZero = false;
input.eventPoolFlags = 0;
testAppendLaunchKernelAndL3Flush<gfxCoreFamily>(input, arg);
}
using CommandListAppendLaunchKernelMultiTileCompactL3FlushEnabledTest = Test<CommandListAppendLaunchKernelCompactL3FlushEventFixture<1, 1>>;
HWTEST2_F(CommandListAppendLaunchKernelMultiTileCompactL3FlushEnabledTest,
givenAppendMultiTileKernelWithSignalScopeTimestampEventWhenRegisterTimestampPostsyncUsedThenExpectNoComputeWalkerAndPipeControlPostsync,
IsXeHpOrXeHpgCore) {
arg.expectedKernelCount = 1;
arg.expectedPacketsInUse = 2;
arg.expectedPostSyncPipeControls = 0;
arg.expectedWalkerPostSyncOp = 0;
arg.postSyncAddressZero = true;
input.eventPoolFlags = ZE_EVENT_POOL_FLAG_KERNEL_TIMESTAMP;
input.useFirstEventPacketAddress = true;
testAppendLaunchKernelAndL3Flush<gfxCoreFamily>(input, arg);
}
HWTEST2_F(CommandListAppendLaunchKernelMultiTileCompactL3FlushEnabledTest,
givenAppendMultiTileKernelWithSignalScopeImmediateEventWhenL3ImmediatePostsyncUsedThenExpectPipeControlPostsync,
IsXeHpOrXeHpgCore) {
arg.expectedKernelCount = 1;
arg.expectedPacketsInUse = 2;
arg.expectedPostSyncPipeControls = 1;
arg.expectedWalkerPostSyncOp = 0;
arg.postSyncAddressZero = true;
input.eventPoolFlags = 0;
input.useFirstEventPacketAddress = true;
testAppendLaunchKernelAndL3Flush<gfxCoreFamily>(input, arg);
}
} // namespace ult
} // namespace L0

View File

@@ -635,5 +635,11 @@ TEST_F(L0HwHelperTest, givenL0HelperWhenGettingDefaultValueForUsePipeControlMult
EXPECT_FALSE(defaultValue);
}
TEST_F(L0HwHelperTest, givenL0HelperWhenGettingDefaultValueForCompactL3FlushEventPacketThenReturnFalse) {
auto hwInfo = *NEO::defaultHwInfo.get();
bool defaultValue = L0::L0HwHelper::useCompactL3FlushEventPacket(hwInfo);
EXPECT_FALSE(defaultValue);
}
} // namespace ult
} // namespace L0