mirror of
https://github.com/intel/compute-runtime.git
synced 2026-01-05 17:41:26 +08:00
Add option to compact event L3 flush packet
Related-To: NEO-7434 Signed-off-by: Zbigniew Zdanowicz <zbigniew.zdanowicz@intel.com>
This commit is contained in:
committed by
Compute-Runtime-Automation
parent
709e322a4a
commit
6a6ab80113
@@ -287,10 +287,18 @@ struct CommandListCoreFamily : CommandListImp {
|
||||
size_t dstSize,
|
||||
CmdListFillKernelArguments &outArguments,
|
||||
Kernel *kernel);
|
||||
bool compactL3FlushEvent(bool dcFlush) const {
|
||||
return this->compactL3FlushEventPacket && dcFlush;
|
||||
}
|
||||
bool eventSignalPipeControl(bool splitKernel, bool dcFlush) const {
|
||||
return (this->pipeControlMultiKernelEventSync && splitKernel) ||
|
||||
compactL3FlushEvent(dcFlush);
|
||||
}
|
||||
|
||||
size_t cmdListCurrentStartOffset = 0;
|
||||
bool containsAnyKernel = false;
|
||||
bool pipeControlMultiKernelEventSync = false;
|
||||
bool compactL3FlushEventPacket = false;
|
||||
};
|
||||
|
||||
template <PRODUCT_FAMILY gfxProductFamily>
|
||||
|
||||
@@ -138,6 +138,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::initialize(Device *device, NEO
|
||||
this->frontEndStateTracking = L0HwHelper::enableFrontEndStateTracking(hwInfo);
|
||||
this->pipelineSelectStateTracking = L0HwHelper::enablePipelineSelectStateTracking(hwInfo);
|
||||
this->pipeControlMultiKernelEventSync = L0HwHelper::usePipeControlMultiKernelEventSync(hwInfo);
|
||||
this->compactL3FlushEventPacket = L0HwHelper::useCompactL3FlushEventPacket(hwInfo);
|
||||
|
||||
if (device->isImplicitScalingCapable() && !this->internalUsage && !isCopyOnly()) {
|
||||
this->partitionCount = static_cast<uint32_t>(this->device->getNEODevice()->getDeviceBitfield().count());
|
||||
@@ -1187,11 +1188,12 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryCopy(void *dstptr,
|
||||
}
|
||||
|
||||
CmdListKernelLaunchParams launchParams = {};
|
||||
|
||||
bool dcFlush = false;
|
||||
Event *signalEvent = nullptr;
|
||||
if (hSignalEvent) {
|
||||
signalEvent = Event::fromHandle(hSignalEvent);
|
||||
launchParams.isHostSignalScopeEvent = !!(signalEvent->signalScope & ZE_EVENT_SCOPE_FLAG_HOST);
|
||||
dcFlush = getDcFlushRequired(!!signalEvent->signalScope);
|
||||
}
|
||||
|
||||
uint32_t kernelCounter = leftSize > 0 ? 1 : 0;
|
||||
@@ -1199,7 +1201,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryCopy(void *dstptr,
|
||||
kernelCounter += rightSize > 0 ? 1 : 0;
|
||||
|
||||
launchParams.isKernelSplitOperation = kernelCounter > 1;
|
||||
bool singlePipeControlPacket = this->pipeControlMultiKernelEventSync && launchParams.isKernelSplitOperation;
|
||||
bool singlePipeControlPacket = eventSignalPipeControl(launchParams.isKernelSplitOperation, dcFlush);
|
||||
|
||||
appendEventForProfilingAllWalkers(signalEvent, true, singlePipeControlPacket);
|
||||
|
||||
@@ -1551,9 +1553,11 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryFill(void *ptr,
|
||||
CmdListKernelLaunchParams launchParams = {};
|
||||
|
||||
Event *signalEvent = nullptr;
|
||||
bool dcFlush = false;
|
||||
if (hSignalEvent) {
|
||||
signalEvent = Event::fromHandle(hSignalEvent);
|
||||
launchParams.isHostSignalScopeEvent = !!(signalEvent->signalScope & ZE_EVENT_SCOPE_FLAG_HOST);
|
||||
dcFlush = getDcFlushRequired(!!signalEvent->signalScope);
|
||||
}
|
||||
|
||||
if (isCopyOnly()) {
|
||||
@@ -1610,7 +1614,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryFill(void *ptr,
|
||||
setupFillKernelArguments(dstAllocation.offset, patternSize, size, fillArguments, builtinKernel);
|
||||
|
||||
launchParams.isKernelSplitOperation = (fillArguments.leftRemainingBytes > 0 || fillArguments.rightRemainingBytes > 0);
|
||||
bool singlePipeControlPacket = this->pipeControlMultiKernelEventSync && launchParams.isKernelSplitOperation;
|
||||
bool singlePipeControlPacket = eventSignalPipeControl(launchParams.isKernelSplitOperation, dcFlush);
|
||||
|
||||
appendEventForProfilingAllWalkers(signalEvent, true, singlePipeControlPacket);
|
||||
|
||||
|
||||
@@ -163,20 +163,26 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernelWithParams(K
|
||||
threadGroupDimensions->groupCountY,
|
||||
threadGroupDimensions->groupCountZ);
|
||||
}
|
||||
NEO::GraphicsAllocation *eventAlloc = nullptr;
|
||||
|
||||
uint64_t eventAddress = 0;
|
||||
bool isTimestampEvent = false;
|
||||
bool l3FlushEnable = false;
|
||||
bool isHostSignalScopeEvent = launchParams.isHostSignalScopeEvent;
|
||||
Event *compactEvent = nullptr;
|
||||
if (event) {
|
||||
eventAlloc = &event->getAllocation(this->device);
|
||||
commandContainer.addToResidencyContainer(eventAlloc);
|
||||
bool flushRequired = !!event->signalScope &&
|
||||
!launchParams.isKernelSplitOperation;
|
||||
l3FlushEnable = getDcFlushRequired(flushRequired);
|
||||
isTimestampEvent = event->isUsingContextEndOffset();
|
||||
eventAddress = event->getPacketAddress(this->device);
|
||||
isHostSignalScopeEvent = !!(event->signalScope & ZE_EVENT_SCOPE_FLAG_HOST);
|
||||
if (compactL3FlushEvent(getDcFlushRequired(!!event->signalScope))) {
|
||||
compactEvent = event;
|
||||
event = nullptr;
|
||||
} else {
|
||||
NEO::GraphicsAllocation *eventAlloc = &event->getAllocation(this->device);
|
||||
commandContainer.addToResidencyContainer(eventAlloc);
|
||||
bool flushRequired = !!event->signalScope &&
|
||||
!launchParams.isKernelSplitOperation;
|
||||
l3FlushEnable = getDcFlushRequired(flushRequired);
|
||||
isTimestampEvent = event->isUsingContextEndOffset();
|
||||
eventAddress = event->getPacketAddress(this->device);
|
||||
}
|
||||
}
|
||||
|
||||
bool isKernelUsingSystemAllocation = false;
|
||||
@@ -249,6 +255,10 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernelWithParams(K
|
||||
|
||||
std::list<void *> additionalCommands;
|
||||
|
||||
if (compactEvent) {
|
||||
appendEventForProfilingAllWalkers(compactEvent, true, true);
|
||||
}
|
||||
|
||||
NEO::EncodeDispatchKernelArgs dispatchKernelArgs{
|
||||
eventAddress, // eventAddress
|
||||
neoDevice, // device
|
||||
@@ -273,7 +283,9 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernelWithParams(K
|
||||
NEO::EncodeDispatchKernel<GfxFamily>::encode(commandContainer, dispatchKernelArgs, getLogicalStateHelper());
|
||||
this->containsStatelessUncachedResource = dispatchKernelArgs.requiresUncachedMocs;
|
||||
|
||||
if (event) {
|
||||
if (compactEvent) {
|
||||
appendEventForProfilingAllWalkers(compactEvent, false, true);
|
||||
} else if (event) {
|
||||
if (partitionCount > 1) {
|
||||
event->setPacketsInUse(partitionCount);
|
||||
}
|
||||
@@ -404,7 +416,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernelSplit(Kernel
|
||||
Event *event,
|
||||
const CmdListKernelLaunchParams &launchParams) {
|
||||
if (event) {
|
||||
if (this->pipeControlMultiKernelEventSync && launchParams.isKernelSplitOperation) {
|
||||
if (eventSignalPipeControl(launchParams.isKernelSplitOperation, getDcFlushRequired(!!event->signalScope))) {
|
||||
event = nullptr;
|
||||
} else {
|
||||
event->increaseKernelCount();
|
||||
|
||||
@@ -53,4 +53,11 @@ bool L0HwHelper::usePipeControlMultiKernelEventSync(const NEO::HardwareInfo &hwI
|
||||
return false;
|
||||
}
|
||||
|
||||
bool L0HwHelper::useCompactL3FlushEventPacket(const NEO::HardwareInfo &hwInfo) {
|
||||
if (NEO::DebugManager.flags.CompactL3FlushEventPacket.get() != -1) {
|
||||
return !!NEO::DebugManager.flags.CompactL3FlushEventPacket.get();
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
} // namespace L0
|
||||
|
||||
@@ -35,6 +35,7 @@ class L0HwHelper {
|
||||
static bool enableStateComputeModeTracking(const NEO::HardwareInfo &hwInfo);
|
||||
static bool enableImmediateCmdListHeapSharing(const NEO::HardwareInfo &hwInfo, bool cmdlistSupport);
|
||||
static bool usePipeControlMultiKernelEventSync(const NEO::HardwareInfo &hwInfo);
|
||||
static bool useCompactL3FlushEventPacket(const NEO::HardwareInfo &hwInfo);
|
||||
virtual void setAdditionalGroupProperty(ze_command_queue_group_properties_t &groupProperty, NEO::EngineGroupT &group) const = 0;
|
||||
virtual L0::Event *createEvent(L0::EventPool *eventPool, const ze_event_desc_t *desc, L0::Device *device) const = 0;
|
||||
|
||||
|
||||
@@ -188,7 +188,9 @@ struct TestExpectedValues {
|
||||
uint32_t expectedKernelCount = 0;
|
||||
uint32_t expectedWalkerPostSyncOp = 0;
|
||||
uint32_t expectedPostSyncPipeControls = 0;
|
||||
uint32_t expectDcFlush = 0;
|
||||
bool postSyncAddressZero = false;
|
||||
bool workloadPartition = false;
|
||||
};
|
||||
|
||||
} // namespace ult
|
||||
|
||||
@@ -47,6 +47,7 @@ struct WhiteBox<::L0::CommandListCoreFamily<gfxCoreFamily>>
|
||||
using BaseClass::commandListPerThreadScratchSize;
|
||||
using BaseClass::commandListPreemptionMode;
|
||||
using BaseClass::commandsToPatch;
|
||||
using BaseClass::compactL3FlushEventPacket;
|
||||
using BaseClass::containsAnyKernel;
|
||||
using BaseClass::containsCooperativeKernelsFlag;
|
||||
using BaseClass::csr;
|
||||
@@ -123,6 +124,7 @@ struct WhiteBox<L0::CommandListCoreFamilyImmediate<gfxCoreFamily>>
|
||||
using BaseClass::clearCommandsToPatch;
|
||||
using BaseClass::cmdQImmediate;
|
||||
using BaseClass::commandsToPatch;
|
||||
using BaseClass::compactL3FlushEventPacket;
|
||||
using BaseClass::csr;
|
||||
using BaseClass::finalStreamState;
|
||||
using BaseClass::frontEndStateTracking;
|
||||
@@ -142,6 +144,7 @@ struct WhiteBox<L0::CommandListCoreFamilyImmediate<gfxCoreFamily>>
|
||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||
struct MockCommandListImmediate : public CommandListCoreFamilyImmediate<gfxCoreFamily> {
|
||||
using BaseClass = CommandListCoreFamilyImmediate<gfxCoreFamily>;
|
||||
using BaseClass::compactL3FlushEventPacket;
|
||||
using BaseClass::containsAnyKernel;
|
||||
using BaseClass::immediateCmdListHeapSharing;
|
||||
using BaseClass::indirectAllocationsAllowed;
|
||||
|
||||
@@ -28,12 +28,15 @@ struct CopyTestInput {
|
||||
ze_event_pool_flags_t eventPoolFlags = 0;
|
||||
|
||||
int32_t usePipeControlMultiPacketEventSync;
|
||||
|
||||
bool useFirstEventPacketAddress = false;
|
||||
};
|
||||
|
||||
template <int32_t usePipeControlMultiPacketEventSync, uint32_t multiTile>
|
||||
template <int32_t usePipeControlMultiPacketEventSync, int32_t compactL3FlushEventPacket, uint32_t multiTile>
|
||||
struct AppendMemoryCopyMultiPacketEventFixture : public DeviceFixture {
|
||||
void setUp() {
|
||||
DebugManager.flags.UsePipeControlMultiKernelEventSync.set(usePipeControlMultiPacketEventSync);
|
||||
DebugManager.flags.CompactL3FlushEventPacket.set(compactL3FlushEventPacket);
|
||||
if (multiTile == 1) {
|
||||
DebugManager.flags.CreateMultipleSubDevices.set(2);
|
||||
DebugManager.flags.EnableImplicitScaling.set(1);
|
||||
@@ -170,8 +173,8 @@ void testSingleTileAppendMemoryCopyThreeKernelsAndL3Flush(CopyTestInput &input,
|
||||
EXPECT_EQ(static_cast<OPERATION>(arg.expectedWalkerPostSyncOp), walkerCmd->getPostSync().getOperation());
|
||||
EXPECT_EQ(thirdKernelEventAddress, walkerCmd->getPostSync().getDestinationAddress());
|
||||
|
||||
uint64_t l3FlushPostSyncAddress = thirdKernelEventAddress + event->getSinglePacketSize();
|
||||
if (input.usePipeControlMultiPacketEventSync == 1) {
|
||||
uint64_t l3FlushPostSyncAddress = event->getGpuAddress(input.device) + 2 * event->getSinglePacketSize() + event->getSinglePacketSize();
|
||||
if (input.usePipeControlMultiPacketEventSync == 1 || input.useFirstEventPacketAddress) {
|
||||
l3FlushPostSyncAddress = event->getGpuAddress(input.device);
|
||||
}
|
||||
if (event->isUsingContextEndOffset()) {
|
||||
@@ -291,7 +294,10 @@ void testSingleTileAppendMemoryCopySingleKernelAndL3Flush(CopyTestInput &input,
|
||||
EXPECT_EQ(static_cast<OPERATION>(arg.expectedWalkerPostSyncOp), walkerCmd->getPostSync().getOperation());
|
||||
EXPECT_EQ(firstKernelEventAddress, walkerCmd->getPostSync().getDestinationAddress());
|
||||
|
||||
uint64_t l3FlushPostSyncAddress = firstKernelEventAddress + event->getSinglePacketSize();
|
||||
uint64_t l3FlushPostSyncAddress = event->getGpuAddress(input.device) + event->getSinglePacketSize();
|
||||
if (input.useFirstEventPacketAddress) {
|
||||
l3FlushPostSyncAddress = event->getGpuAddress(input.device);
|
||||
}
|
||||
if (event->isUsingContextEndOffset()) {
|
||||
l3FlushPostSyncAddress += event->getContextEndOffset();
|
||||
}
|
||||
@@ -496,7 +502,7 @@ void testMultiTileAppendMemoryCopyThreeKernelsAndL3Flush(CopyTestInput &input, T
|
||||
EXPECT_EQ(thirdKernelEventAddress, walkerCmd->getPostSync().getDestinationAddress());
|
||||
|
||||
uint64_t l3FlushPostSyncAddress = thirdKernelEventAddress + 2 * event->getSinglePacketSize();
|
||||
if (input.usePipeControlMultiPacketEventSync == 1) {
|
||||
if (input.usePipeControlMultiPacketEventSync == 1 || input.useFirstEventPacketAddress) {
|
||||
l3FlushPostSyncAddress = event->getGpuAddress(input.device);
|
||||
}
|
||||
if (event->isUsingContextEndOffset()) {
|
||||
@@ -627,7 +633,12 @@ void testMultiTileAppendMemoryCopySingleKernelAndL3Flush(CopyTestInput &input, T
|
||||
EXPECT_EQ(static_cast<OPERATION>(arg.expectedWalkerPostSyncOp), walkerCmd->getPostSync().getOperation());
|
||||
EXPECT_EQ(firstKernelEventAddress, walkerCmd->getPostSync().getDestinationAddress());
|
||||
|
||||
uint64_t l3FlushPostSyncAddress = firstKernelEventAddress + 2 * event->getSinglePacketSize();
|
||||
uint64_t l3FlushPostSyncAddress = 0;
|
||||
if (input.useFirstEventPacketAddress) {
|
||||
l3FlushPostSyncAddress = event->getGpuAddress(input.device);
|
||||
} else {
|
||||
l3FlushPostSyncAddress = event->getGpuAddress(input.device) + 2 * event->getSinglePacketSize();
|
||||
}
|
||||
if (event->isUsingContextEndOffset()) {
|
||||
l3FlushPostSyncAddress += event->getContextEndOffset();
|
||||
}
|
||||
@@ -655,7 +666,7 @@ void testMultiTileAppendMemoryCopySingleKernelAndL3Flush(CopyTestInput &input, T
|
||||
EXPECT_EQ(expectedDcFlush, dcFlushFound);
|
||||
}
|
||||
|
||||
using AppendMemoryCopyXeHpAndLaterMultiPacket = Test<AppendMemoryCopyMultiPacketEventFixture<0, 0>>;
|
||||
using AppendMemoryCopyXeHpAndLaterMultiPacket = Test<AppendMemoryCopyMultiPacketEventFixture<0, 0, 0>>;
|
||||
|
||||
HWTEST2_F(AppendMemoryCopyXeHpAndLaterMultiPacket,
|
||||
givenCommandListWhenTimestampProvidedByComputeWalkerPostSyncPassedToMemoryCopyThenAppendProfilingCalledForThreeSeparateKernels,
|
||||
@@ -768,7 +779,7 @@ HWTEST2_F(AppendMemoryCopyXeHpAndLaterMultiPacket,
|
||||
testSingleTileAppendMemoryCopySignalScopeEventToSubDevice<gfxCoreFamily>(input, arg);
|
||||
}
|
||||
|
||||
using AppendMemoryCopyXeHpAndLaterSinglePacket = Test<AppendMemoryCopyMultiPacketEventFixture<1, 0>>;
|
||||
using AppendMemoryCopyXeHpAndLaterSinglePacket = Test<AppendMemoryCopyMultiPacketEventFixture<1, 0, 0>>;
|
||||
|
||||
HWTEST2_F(AppendMemoryCopyXeHpAndLaterSinglePacket,
|
||||
givenCommandListWhenTimestampProvidedByRegisterPostSyncPassedToMemoryCopyThenAppendProfilingCalledForRegisterOnly,
|
||||
@@ -881,7 +892,7 @@ HWTEST2_F(AppendMemoryCopyXeHpAndLaterSinglePacket,
|
||||
testSingleTileAppendMemoryCopySignalScopeEventToSubDevice<gfxCoreFamily>(input, arg);
|
||||
}
|
||||
|
||||
using MultiTileAppendMemoryCopyXeHpAndLaterMultiPacket = Test<AppendMemoryCopyMultiPacketEventFixture<0, 1>>;
|
||||
using MultiTileAppendMemoryCopyXeHpAndLaterMultiPacket = Test<AppendMemoryCopyMultiPacketEventFixture<0, 0, 1>>;
|
||||
|
||||
HWTEST2_F(MultiTileAppendMemoryCopyXeHpAndLaterMultiPacket,
|
||||
givenMultiTileCommandListWhenTimestampProvidedByComputeWalkerPostSyncPassedToMemoryCopyThenAppendProfilingCalledForThreeSeparateMultiTileKernels,
|
||||
@@ -985,7 +996,7 @@ HWTEST2_F(MultiTileAppendMemoryCopyXeHpAndLaterMultiPacket,
|
||||
testMultiTileAppendMemoryCopySingleKernelAndL3Flush<gfxCoreFamily>(input, arg);
|
||||
}
|
||||
|
||||
using MultiTileAppendMemoryCopyXeHpAndLaterSinglePacket = Test<AppendMemoryCopyMultiPacketEventFixture<1, 1>>;
|
||||
using MultiTileAppendMemoryCopyXeHpAndLaterSinglePacket = Test<AppendMemoryCopyMultiPacketEventFixture<1, 0, 1>>;
|
||||
|
||||
HWTEST2_F(MultiTileAppendMemoryCopyXeHpAndLaterSinglePacket,
|
||||
givenMultiTileCommandListWhenTimestampProvidedByRegisterPostSyncPassedToMemoryCopyThenAppendProfilingCalledForMultiTileRegisterPipeControlPacket,
|
||||
@@ -1088,5 +1099,428 @@ HWTEST2_F(MultiTileAppendMemoryCopyXeHpAndLaterSinglePacket,
|
||||
|
||||
testMultiTileAppendMemoryCopySingleKernelAndL3Flush<gfxCoreFamily>(input, arg);
|
||||
}
|
||||
|
||||
using AppendMemoryCopyL3CompactEventTest = Test<AppendMemoryCopyMultiPacketEventFixture<0, 1, 0>>;
|
||||
|
||||
HWTEST2_F(AppendMemoryCopyL3CompactEventTest,
|
||||
givenCommandListWhenTimestampProvidedByComputeWalkerPostSyncPassedToMemoryCopyThenAppendProfilingCalledForThreeSeparateKernels,
|
||||
IsAtLeastXeHpCore) {
|
||||
arg.expectedPacketsInUse = 3;
|
||||
arg.expectedKernelCount = 3;
|
||||
arg.expectedWalkerPostSyncOp = 3;
|
||||
arg.postSyncAddressZero = false;
|
||||
|
||||
input.srcPtr = reinterpret_cast<void *>(0x1231);
|
||||
input.dstPtr = reinterpret_cast<void *>(0x200002345);
|
||||
input.size = 0x100002345;
|
||||
|
||||
testSingleTileAppendMemoryCopyThreeKernels<gfxCoreFamily>(input, arg);
|
||||
}
|
||||
|
||||
HWTEST2_F(AppendMemoryCopyL3CompactEventTest,
|
||||
givenCommandListWhenTimestampProvidedByComputeWalkerPostSyncPassedToMemoryCopyThenAppendProfilingCalledForSingleKernel,
|
||||
IsAtLeastXeHpCore) {
|
||||
arg.expectedPacketsInUse = 1;
|
||||
arg.expectedKernelCount = 1;
|
||||
arg.expectedWalkerPostSyncOp = 3;
|
||||
arg.postSyncAddressZero = false;
|
||||
|
||||
input.srcPtr = reinterpret_cast<void *>(0x1000);
|
||||
input.dstPtr = reinterpret_cast<void *>(0x20000000);
|
||||
input.size = 0x100000000;
|
||||
|
||||
testSingleTileAppendMemoryCopySingleKernel<gfxCoreFamily>(input, arg);
|
||||
}
|
||||
|
||||
HWTEST2_F(AppendMemoryCopyL3CompactEventTest,
|
||||
givenCommandListAndTimestampEventWithSignalScopeWhenTimestampProvidedByComputeWalkerPostSyncPassedToMemoryCopyThenAppendProfilingCalledForThreeSeparateKernelsAndL3FlushWithPostSyncAddedOnce,
|
||||
IsXeHpOrXeHpgCore) {
|
||||
arg.expectedPacketsInUse = 1;
|
||||
arg.expectedKernelCount = 1;
|
||||
arg.expectedWalkerPostSyncOp = 0;
|
||||
arg.expectedPostSyncPipeControls = 0;
|
||||
arg.postSyncAddressZero = true;
|
||||
|
||||
input.srcPtr = reinterpret_cast<void *>(0x1231);
|
||||
input.dstPtr = reinterpret_cast<void *>(0x200002345);
|
||||
input.size = 0x100002345;
|
||||
|
||||
input.eventPoolFlags = ZE_EVENT_POOL_FLAG_KERNEL_TIMESTAMP;
|
||||
|
||||
testSingleTileAppendMemoryCopyThreeKernelsAndL3Flush<gfxCoreFamily>(input, arg);
|
||||
}
|
||||
|
||||
HWTEST2_F(AppendMemoryCopyL3CompactEventTest,
|
||||
givenCommandListAndEventWithSignalScopeWhenImmediateProvidedByPipeControlPostSyncPassedToMemoryCopyThenAppendProfilingCalledForForL3FlushWithPostSyncAddedOnce,
|
||||
IsXeHpOrXeHpgCore) {
|
||||
arg.expectedPacketsInUse = 1;
|
||||
arg.expectedKernelCount = 1;
|
||||
arg.expectedWalkerPostSyncOp = 0;
|
||||
arg.expectedPostSyncPipeControls = 1;
|
||||
arg.postSyncAddressZero = true;
|
||||
|
||||
input.srcPtr = reinterpret_cast<void *>(0x1231);
|
||||
input.dstPtr = reinterpret_cast<void *>(0x200002345);
|
||||
input.size = 0x100002345;
|
||||
|
||||
input.eventPoolFlags = 0;
|
||||
input.useFirstEventPacketAddress = true;
|
||||
|
||||
testSingleTileAppendMemoryCopyThreeKernelsAndL3Flush<gfxCoreFamily>(input, arg);
|
||||
}
|
||||
|
||||
HWTEST2_F(AppendMemoryCopyL3CompactEventTest,
|
||||
givenCommandListAndTimestampEventWithSignalScopeWhenTimestampProvidedByRegisterPostSyncPassedToMemoryCopyThenAppendProfilingCalledForL3FlushWithPostSyncAddedOnce,
|
||||
IsXeHpOrXeHpgCore) {
|
||||
arg.expectedPacketsInUse = 1;
|
||||
arg.expectedKernelCount = 1;
|
||||
arg.expectedWalkerPostSyncOp = 0;
|
||||
arg.expectedPostSyncPipeControls = 0;
|
||||
arg.postSyncAddressZero = true;
|
||||
|
||||
input.srcPtr = reinterpret_cast<void *>(0x1000);
|
||||
input.dstPtr = reinterpret_cast<void *>(0x20000000);
|
||||
input.size = 0x100000000;
|
||||
|
||||
input.eventPoolFlags = ZE_EVENT_POOL_FLAG_KERNEL_TIMESTAMP;
|
||||
|
||||
testSingleTileAppendMemoryCopySingleKernelAndL3Flush<gfxCoreFamily>(input, arg);
|
||||
}
|
||||
|
||||
HWTEST2_F(AppendMemoryCopyL3CompactEventTest,
|
||||
givenCommandListAndEventWithSignalScopeWhenImmediateProvidedByPipeControlPostSyncPassedToMemoryCopyThenAppendProfilingCalledForL3FlushWithPostSyncAddedOnce,
|
||||
IsXeHpOrXeHpgCore) {
|
||||
arg.expectedPacketsInUse = 1;
|
||||
arg.expectedKernelCount = 1;
|
||||
arg.expectedWalkerPostSyncOp = 0;
|
||||
arg.expectedPostSyncPipeControls = 1;
|
||||
arg.postSyncAddressZero = true;
|
||||
|
||||
input.srcPtr = reinterpret_cast<void *>(0x1000);
|
||||
input.dstPtr = reinterpret_cast<void *>(0x20000000);
|
||||
input.size = 0x100000000;
|
||||
|
||||
input.eventPoolFlags = 0;
|
||||
input.useFirstEventPacketAddress = true;
|
||||
|
||||
testSingleTileAppendMemoryCopySingleKernelAndL3Flush<gfxCoreFamily>(input, arg);
|
||||
}
|
||||
|
||||
using MultiTileAppendMemoryCopyL3CompactEventTest = Test<AppendMemoryCopyMultiPacketEventFixture<0, 1, 1>>;
|
||||
|
||||
HWTEST2_F(MultiTileAppendMemoryCopyL3CompactEventTest,
|
||||
givenMultiTileCommandListWhenTimestampProvidedByComputeWalkerPostSyncPassedToMemoryCopyThenAppendProfilingCalledForThreeSeparateMultiTileKernels,
|
||||
IsAtLeastXeHpCore) {
|
||||
arg.expectedPacketsInUse = 6;
|
||||
arg.expectedKernelCount = 3;
|
||||
arg.expectedWalkerPostSyncOp = 3;
|
||||
arg.postSyncAddressZero = false;
|
||||
|
||||
input.srcPtr = reinterpret_cast<void *>(0x1231);
|
||||
input.dstPtr = reinterpret_cast<void *>(0x200002345);
|
||||
input.size = 0x100002345;
|
||||
|
||||
testMultiTileAppendMemoryCopyThreeKernels<gfxCoreFamily>(input, arg);
|
||||
}
|
||||
|
||||
HWTEST2_F(MultiTileAppendMemoryCopyL3CompactEventTest,
|
||||
givenMultiTileCommandListWhenTimestampProvidedByComputeWalkerPostSyncPassedToMemoryCopyThenAppendProfilingCalledForSingleSeparateMultiTileKernel,
|
||||
IsAtLeastXeHpCore) {
|
||||
arg.expectedPacketsInUse = 2;
|
||||
arg.expectedKernelCount = 1;
|
||||
arg.expectedWalkerPostSyncOp = 3;
|
||||
arg.postSyncAddressZero = false;
|
||||
|
||||
input.srcPtr = reinterpret_cast<void *>(0x1000);
|
||||
input.dstPtr = reinterpret_cast<void *>(0x20000000);
|
||||
input.size = 0x100000000;
|
||||
|
||||
testMultiTileAppendMemoryCopySingleKernel<gfxCoreFamily>(input, arg);
|
||||
}
|
||||
|
||||
HWTEST2_F(MultiTileAppendMemoryCopyL3CompactEventTest,
|
||||
givenMultiTileCommandListCopyUsingThreeKernelsAndTimestampEventWithSignalScopeWhenTimestampProvidedByRegisterPostSyncPassedToMemoryCopyThenAppendProfilingCalledForL3FlushWithPostSyncAddedForScopedEvent,
|
||||
IsXeHpOrXeHpgCore) {
|
||||
arg.expectedPacketsInUse = 2;
|
||||
arg.expectedKernelCount = 1;
|
||||
arg.expectedWalkerPostSyncOp = 0;
|
||||
arg.expectedPostSyncPipeControls = 0;
|
||||
arg.postSyncAddressZero = true;
|
||||
|
||||
input.srcPtr = reinterpret_cast<void *>(0x1231);
|
||||
input.dstPtr = reinterpret_cast<void *>(0x200002345);
|
||||
input.size = 0x100002345;
|
||||
|
||||
input.eventPoolFlags = ZE_EVENT_POOL_FLAG_KERNEL_TIMESTAMP;
|
||||
|
||||
testMultiTileAppendMemoryCopyThreeKernelsAndL3Flush<gfxCoreFamily>(input, arg);
|
||||
}
|
||||
|
||||
HWTEST2_F(MultiTileAppendMemoryCopyL3CompactEventTest,
|
||||
givenMultiTileCommandListCopyUsingThreeKernelsAndEventWithSignalScopeWhenImmdiateProvidedByPipeControlPostSyncPassedToMemoryCopyThenAppendProfilingCalledForL3FlushWithPostSyncAddedForScopedEvent,
|
||||
IsXeHpOrXeHpgCore) {
|
||||
arg.expectedPacketsInUse = 2;
|
||||
arg.expectedKernelCount = 1;
|
||||
arg.expectedWalkerPostSyncOp = 0;
|
||||
arg.expectedPostSyncPipeControls = 1;
|
||||
arg.postSyncAddressZero = true;
|
||||
|
||||
input.srcPtr = reinterpret_cast<void *>(0x1231);
|
||||
input.dstPtr = reinterpret_cast<void *>(0x200002345);
|
||||
input.size = 0x100002345;
|
||||
|
||||
input.eventPoolFlags = 0;
|
||||
input.useFirstEventPacketAddress = true;
|
||||
|
||||
testMultiTileAppendMemoryCopyThreeKernelsAndL3Flush<gfxCoreFamily>(input, arg);
|
||||
}
|
||||
|
||||
HWTEST2_F(MultiTileAppendMemoryCopyL3CompactEventTest,
|
||||
givenMultiTileCommandListCopyUsingSingleKernelAndTimestampEventWithSignalScopeWhenTimestampProvidedByRegisterPostSyncPassedToMemoryCopyThenAppendProfilingCalledForL3FlushWithPostSyncAddedForScopedEvent,
|
||||
IsXeHpOrXeHpgCore) {
|
||||
arg.expectedPacketsInUse = 2;
|
||||
arg.expectedKernelCount = 1;
|
||||
arg.expectedWalkerPostSyncOp = 0;
|
||||
arg.expectedPostSyncPipeControls = 0;
|
||||
arg.postSyncAddressZero = true;
|
||||
|
||||
input.srcPtr = reinterpret_cast<void *>(0x1000);
|
||||
input.dstPtr = reinterpret_cast<void *>(0x20000000);
|
||||
input.size = 0x100000000;
|
||||
|
||||
input.eventPoolFlags = ZE_EVENT_POOL_FLAG_KERNEL_TIMESTAMP;
|
||||
|
||||
testMultiTileAppendMemoryCopySingleKernelAndL3Flush<gfxCoreFamily>(input, arg);
|
||||
}
|
||||
|
||||
HWTEST2_F(MultiTileAppendMemoryCopyL3CompactEventTest,
|
||||
givenMultiTileCommandListCopyUsingSingleKernelAndEventWithSignalScopeWhenImmdiateProvidedByPipeControlPostSyncPassedToMemoryCopyThenAppendProfilingCalledForL3FlushWithPostSyncAddedForScopedEvent,
|
||||
IsXeHpOrXeHpgCore) {
|
||||
arg.expectedPacketsInUse = 2;
|
||||
arg.expectedKernelCount = 1;
|
||||
arg.expectedWalkerPostSyncOp = 0;
|
||||
arg.expectedPostSyncPipeControls = 1;
|
||||
arg.postSyncAddressZero = true;
|
||||
|
||||
input.srcPtr = reinterpret_cast<void *>(0x1000);
|
||||
input.dstPtr = reinterpret_cast<void *>(0x20000000);
|
||||
input.size = 0x100000000;
|
||||
|
||||
input.eventPoolFlags = 0;
|
||||
input.useFirstEventPacketAddress = true;
|
||||
|
||||
testMultiTileAppendMemoryCopySingleKernelAndL3Flush<gfxCoreFamily>(input, arg);
|
||||
}
|
||||
|
||||
using AppendMemoryCopyL3CompactAndSingleKernelPacketEventTest = Test<AppendMemoryCopyMultiPacketEventFixture<1, 1, 0>>;
|
||||
|
||||
HWTEST2_F(AppendMemoryCopyL3CompactAndSingleKernelPacketEventTest,
|
||||
givenCommandListWhenTimestampProvidedByRegisterPostSyncPassedToMemoryCopyThenAppendProfilingCalledForSinglePacket,
|
||||
IsAtLeastXeHpCore) {
|
||||
arg.expectedPacketsInUse = 1;
|
||||
arg.expectedKernelCount = 1;
|
||||
arg.expectedWalkerPostSyncOp = 0;
|
||||
arg.postSyncAddressZero = true;
|
||||
|
||||
input.srcPtr = reinterpret_cast<void *>(0x1231);
|
||||
input.dstPtr = reinterpret_cast<void *>(0x200002345);
|
||||
input.size = 0x100002345;
|
||||
|
||||
testSingleTileAppendMemoryCopyThreeKernels<gfxCoreFamily>(input, arg);
|
||||
}
|
||||
|
||||
HWTEST2_F(AppendMemoryCopyL3CompactAndSingleKernelPacketEventTest,
|
||||
givenCommandListWhenTimestampProvidedByComputeWalkerPostSyncPassedToMemoryCopyThenAppendProfilingCalledForSingleKernel,
|
||||
IsAtLeastXeHpCore) {
|
||||
arg.expectedPacketsInUse = 1;
|
||||
arg.expectedKernelCount = 1;
|
||||
arg.expectedWalkerPostSyncOp = 3;
|
||||
arg.postSyncAddressZero = false;
|
||||
|
||||
input.srcPtr = reinterpret_cast<void *>(0x1000);
|
||||
input.dstPtr = reinterpret_cast<void *>(0x20000000);
|
||||
input.size = 0x100000000;
|
||||
|
||||
testSingleTileAppendMemoryCopySingleKernel<gfxCoreFamily>(input, arg);
|
||||
}
|
||||
|
||||
HWTEST2_F(AppendMemoryCopyL3CompactAndSingleKernelPacketEventTest,
|
||||
givenCommandListCopyUsingThreeKernelsAndTimestampEventWithSignalScopeWhenTimestampProvidedByRegisterPostSyncPassedToMemoryCopyThenAppendProfilingCalledForL3FlushWithPostSyncAddedOnce,
|
||||
IsXeHpOrXeHpgCore) {
|
||||
arg.expectedPacketsInUse = 1;
|
||||
arg.expectedKernelCount = 1;
|
||||
arg.expectedWalkerPostSyncOp = 0;
|
||||
arg.expectedPostSyncPipeControls = 0;
|
||||
arg.postSyncAddressZero = true;
|
||||
|
||||
input.srcPtr = reinterpret_cast<void *>(0x1231);
|
||||
input.dstPtr = reinterpret_cast<void *>(0x200002345);
|
||||
input.size = 0x100002345;
|
||||
|
||||
input.eventPoolFlags = ZE_EVENT_POOL_FLAG_KERNEL_TIMESTAMP;
|
||||
|
||||
testSingleTileAppendMemoryCopyThreeKernelsAndL3Flush<gfxCoreFamily>(input, arg);
|
||||
}
|
||||
|
||||
HWTEST2_F(AppendMemoryCopyL3CompactAndSingleKernelPacketEventTest,
|
||||
givenCommandListCopyUsingThreeKernelsAndEventWithSignalScopeWhenImmediateProvidedByPipeControlPostSyncPassedToMemoryCopyThenAppendProfilingCalledForL3FlushWithPostSyncAddedOnce,
|
||||
IsXeHpOrXeHpgCore) {
|
||||
arg.expectedPacketsInUse = 1;
|
||||
arg.expectedKernelCount = 1;
|
||||
arg.expectedWalkerPostSyncOp = 0;
|
||||
arg.expectedPostSyncPipeControls = 1;
|
||||
arg.postSyncAddressZero = true;
|
||||
|
||||
input.srcPtr = reinterpret_cast<void *>(0x1231);
|
||||
input.dstPtr = reinterpret_cast<void *>(0x200002345);
|
||||
input.size = 0x100002345;
|
||||
|
||||
input.eventPoolFlags = 0;
|
||||
|
||||
testSingleTileAppendMemoryCopyThreeKernelsAndL3Flush<gfxCoreFamily>(input, arg);
|
||||
}
|
||||
|
||||
HWTEST2_F(AppendMemoryCopyL3CompactAndSingleKernelPacketEventTest,
|
||||
givenCommandListCopyUsingSingleKernelAndTimestampEventWithSignalScopeWhenTimestampProvidedByRegisterPostSyncPassedToMemoryCopyThenAppendProfilingCalledForL3FlushWithPostSyncAddedOnce,
|
||||
IsXeHpOrXeHpgCore) {
|
||||
arg.expectedPacketsInUse = 1;
|
||||
arg.expectedKernelCount = 1;
|
||||
arg.expectedWalkerPostSyncOp = 0;
|
||||
arg.expectedPostSyncPipeControls = 0;
|
||||
arg.postSyncAddressZero = true;
|
||||
|
||||
input.srcPtr = reinterpret_cast<void *>(0x1000);
|
||||
input.dstPtr = reinterpret_cast<void *>(0x20000000);
|
||||
input.size = 0x100000000;
|
||||
|
||||
input.eventPoolFlags = ZE_EVENT_POOL_FLAG_KERNEL_TIMESTAMP;
|
||||
|
||||
testSingleTileAppendMemoryCopySingleKernelAndL3Flush<gfxCoreFamily>(input, arg);
|
||||
}
|
||||
|
||||
HWTEST2_F(AppendMemoryCopyL3CompactAndSingleKernelPacketEventTest,
|
||||
givenCommandListCopyUsingSingleKernelAndEventWithSignalScopeWhenImmediateProvidedByPipeControlPostSyncPassedToMemoryCopyThenAppendProfilingCalledForL3FlushWithPostSyncAddedOnce,
|
||||
IsXeHpOrXeHpgCore) {
|
||||
arg.expectedPacketsInUse = 1;
|
||||
arg.expectedKernelCount = 1;
|
||||
arg.expectedWalkerPostSyncOp = 0;
|
||||
arg.expectedPostSyncPipeControls = 1;
|
||||
arg.postSyncAddressZero = true;
|
||||
|
||||
input.srcPtr = reinterpret_cast<void *>(0x1000);
|
||||
input.dstPtr = reinterpret_cast<void *>(0x20000000);
|
||||
input.size = 0x100000000;
|
||||
|
||||
input.eventPoolFlags = 0;
|
||||
input.useFirstEventPacketAddress = true;
|
||||
|
||||
testSingleTileAppendMemoryCopySingleKernelAndL3Flush<gfxCoreFamily>(input, arg);
|
||||
}
|
||||
|
||||
using MultiTileAppendMemoryCopyL3CompactAndSingleKernelPacketEventTest = Test<AppendMemoryCopyMultiPacketEventFixture<1, 1, 1>>;
|
||||
|
||||
HWTEST2_F(MultiTileAppendMemoryCopyL3CompactAndSingleKernelPacketEventTest,
|
||||
givenMultiTileCommandListWhenTimestampProvidedByComputeWalkerPostSyncPassedToMemoryCopyThenAppendProfilingCalledForThreeSeparateMultiTileKernels,
|
||||
IsAtLeastXeHpCore) {
|
||||
arg.expectedPacketsInUse = 2;
|
||||
arg.expectedKernelCount = 1;
|
||||
arg.expectedWalkerPostSyncOp = 0;
|
||||
arg.postSyncAddressZero = true;
|
||||
|
||||
input.srcPtr = reinterpret_cast<void *>(0x1231);
|
||||
input.dstPtr = reinterpret_cast<void *>(0x200002345);
|
||||
input.size = 0x100002345;
|
||||
|
||||
testMultiTileAppendMemoryCopyThreeKernels<gfxCoreFamily>(input, arg);
|
||||
}
|
||||
|
||||
HWTEST2_F(MultiTileAppendMemoryCopyL3CompactAndSingleKernelPacketEventTest,
|
||||
givenMultiTileCommandListWhenTimestampProvidedByComputeWalkerPostSyncPassedToMemoryCopyThenAppendProfilingCalledForSingleMultiTileKernel,
|
||||
IsAtLeastXeHpCore) {
|
||||
arg.expectedPacketsInUse = 2;
|
||||
arg.expectedKernelCount = 1;
|
||||
arg.expectedWalkerPostSyncOp = 3;
|
||||
arg.postSyncAddressZero = false;
|
||||
|
||||
input.srcPtr = reinterpret_cast<void *>(0x1000);
|
||||
input.dstPtr = reinterpret_cast<void *>(0x20000000);
|
||||
input.size = 0x100000000;
|
||||
|
||||
testMultiTileAppendMemoryCopySingleKernel<gfxCoreFamily>(input, arg);
|
||||
}
|
||||
|
||||
HWTEST2_F(MultiTileAppendMemoryCopyL3CompactAndSingleKernelPacketEventTest,
|
||||
givenMultiTileCommandListCopyUsingThreeKernelsAndTimestampEventWithSignalScopeWhenTimestampProvidedByRegisterPostSyncPassedToMemoryCopyThenAppendProfilingCalledForL3FlushWithPostSyncAddedForScopedEvent,
|
||||
IsXeHpOrXeHpgCore) {
|
||||
arg.expectedPacketsInUse = 2;
|
||||
arg.expectedKernelCount = 1;
|
||||
arg.expectedWalkerPostSyncOp = 0;
|
||||
arg.expectedPostSyncPipeControls = 0;
|
||||
arg.postSyncAddressZero = true;
|
||||
|
||||
input.srcPtr = reinterpret_cast<void *>(0x1231);
|
||||
input.dstPtr = reinterpret_cast<void *>(0x200002345);
|
||||
input.size = 0x100002345;
|
||||
|
||||
input.eventPoolFlags = ZE_EVENT_POOL_FLAG_KERNEL_TIMESTAMP;
|
||||
|
||||
testMultiTileAppendMemoryCopyThreeKernelsAndL3Flush<gfxCoreFamily>(input, arg);
|
||||
}
|
||||
|
||||
HWTEST2_F(MultiTileAppendMemoryCopyL3CompactAndSingleKernelPacketEventTest,
|
||||
givenMultiTileCommandListCopyUsingThreeKernelsAndEventWithSignalScopeWhenImmdiateProvidedByPipeControlPostSyncPassedToMemoryCopyThenAppendProfilingCalledForL3FlushWithPostSyncAddedForScopedEvent,
|
||||
IsXeHpOrXeHpgCore) {
|
||||
arg.expectedPacketsInUse = 2;
|
||||
arg.expectedKernelCount = 1;
|
||||
arg.expectedWalkerPostSyncOp = 0;
|
||||
arg.expectedPostSyncPipeControls = 1;
|
||||
arg.postSyncAddressZero = true;
|
||||
|
||||
input.srcPtr = reinterpret_cast<void *>(0x1231);
|
||||
input.dstPtr = reinterpret_cast<void *>(0x200002345);
|
||||
input.size = 0x100002345;
|
||||
|
||||
input.eventPoolFlags = 0;
|
||||
|
||||
testMultiTileAppendMemoryCopyThreeKernelsAndL3Flush<gfxCoreFamily>(input, arg);
|
||||
}
|
||||
|
||||
HWTEST2_F(MultiTileAppendMemoryCopyL3CompactAndSingleKernelPacketEventTest,
|
||||
givenMultiTileCommandListCopyUsingThreeKernelAndTimestampEventWithSignalScopeWhenTimestampProvidedByRegisterPostSyncPassedToMemoryCopyThenAppendProfilingCalledForL3FlushWithPostSyncAddedForScopedEvent,
|
||||
IsXeHpOrXeHpgCore) {
|
||||
arg.expectedPacketsInUse = 2;
|
||||
arg.expectedKernelCount = 1;
|
||||
arg.expectedWalkerPostSyncOp = 0;
|
||||
arg.expectedPostSyncPipeControls = 0;
|
||||
arg.postSyncAddressZero = true;
|
||||
|
||||
input.srcPtr = reinterpret_cast<void *>(0x1000);
|
||||
input.dstPtr = reinterpret_cast<void *>(0x20000000);
|
||||
input.size = 0x100000000;
|
||||
|
||||
input.eventPoolFlags = ZE_EVENT_POOL_FLAG_KERNEL_TIMESTAMP;
|
||||
|
||||
testMultiTileAppendMemoryCopySingleKernelAndL3Flush<gfxCoreFamily>(input, arg);
|
||||
}
|
||||
|
||||
HWTEST2_F(MultiTileAppendMemoryCopyL3CompactAndSingleKernelPacketEventTest,
|
||||
givenMultiTileCommandListCopyUsingSingleKernelAndEventWithSignalScopeWhenImmdiateProvidedByPipeControlPostSyncPassedToMemoryCopyThenAppendProfilingCalledForL3FlushWithPostSyncAddedForScopedEvent,
|
||||
IsXeHpOrXeHpgCore) {
|
||||
arg.expectedPacketsInUse = 2;
|
||||
arg.expectedKernelCount = 1;
|
||||
arg.expectedWalkerPostSyncOp = 0;
|
||||
arg.expectedPostSyncPipeControls = 1;
|
||||
arg.postSyncAddressZero = true;
|
||||
|
||||
input.srcPtr = reinterpret_cast<void *>(0x1000);
|
||||
input.dstPtr = reinterpret_cast<void *>(0x20000000);
|
||||
input.size = 0x100000000;
|
||||
|
||||
input.eventPoolFlags = 0;
|
||||
input.useFirstEventPacketAddress = true;
|
||||
|
||||
testMultiTileAppendMemoryCopySingleKernelAndL3Flush<gfxCoreFamily>(input, arg);
|
||||
}
|
||||
|
||||
} // namespace ult
|
||||
} // namespace L0
|
||||
|
||||
@@ -26,12 +26,15 @@ struct FillTestInput {
|
||||
void *patternPtr = nullptr;
|
||||
|
||||
ze_event_pool_flags_t eventPoolFlags = 0;
|
||||
|
||||
bool useFirstEventPacketAddress = false;
|
||||
};
|
||||
|
||||
template <int32_t usePipeControlMultiPacketEventSync, uint32_t multiTile>
|
||||
template <int32_t usePipeControlMultiPacketEventSync, int32_t compactL3FlushEventPacket, uint32_t multiTile>
|
||||
struct AppendFillMultiPacketEventFixture : public AppendFillFixture {
|
||||
void setUp() {
|
||||
DebugManager.flags.UsePipeControlMultiKernelEventSync.set(usePipeControlMultiPacketEventSync);
|
||||
DebugManager.flags.CompactL3FlushEventPacket.set(compactL3FlushEventPacket);
|
||||
if (multiTile == 1) {
|
||||
DebugManager.flags.CreateMultipleSubDevices.set(2);
|
||||
DebugManager.flags.EnableImplicitScaling.set(1);
|
||||
@@ -157,6 +160,62 @@ void testSingleTileAppendMemoryFillManyKernels(FillTestInput &input, TestExpecte
|
||||
EXPECT_EQ(secondKernelEventAddress, walkerCmd->getPostSync().getDestinationAddress());
|
||||
}
|
||||
|
||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||
void testSingleTileAppendMemoryFillManyKernelsAndL3Flush(FillTestInput &input, TestExpectedValues &arg) {
|
||||
using FamilyType = typename NEO::GfxFamilyMapper<gfxCoreFamily>::GfxFamily;
|
||||
using COMPUTE_WALKER = typename FamilyType::COMPUTE_WALKER;
|
||||
using POSTSYNC_DATA = typename FamilyType::POSTSYNC_DATA;
|
||||
using OPERATION = typename POSTSYNC_DATA::OPERATION;
|
||||
|
||||
ze_event_pool_desc_t eventPoolDesc = {};
|
||||
eventPoolDesc.count = 1;
|
||||
eventPoolDesc.flags = input.eventPoolFlags;
|
||||
|
||||
ze_event_desc_t eventDesc = {};
|
||||
eventDesc.index = 0;
|
||||
eventDesc.signal = ZE_EVENT_SCOPE_FLAG_HOST;
|
||||
|
||||
ze_result_t result = ZE_RESULT_SUCCESS;
|
||||
auto eventPool = std::unique_ptr<L0::EventPool>(L0::EventPool::create(input.driver, input.context, 0, nullptr, &eventPoolDesc, result));
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
|
||||
auto event = std::unique_ptr<L0::Event>(L0::Event::create<uint32_t>(eventPool.get(), &eventDesc, input.device));
|
||||
|
||||
uint64_t firstKernelEventAddress = arg.postSyncAddressZero ? 0 : event->getGpuAddress(input.device);
|
||||
uint64_t secondKernelEventAddress = arg.postSyncAddressZero ? 0 : event->getGpuAddress(input.device) + event->getSinglePacketSize();
|
||||
|
||||
auto commandList = std::make_unique<CommandListCoreFamily<gfxCoreFamily>>();
|
||||
commandList->initialize(input.device, NEO::EngineGroupType::RenderCompute, 0u);
|
||||
auto &commandContainer = commandList->commandContainer;
|
||||
|
||||
size_t usedBefore = commandContainer.getCommandStream()->getUsed();
|
||||
result = commandList->appendMemoryFill(input.dstPtr, input.patternPtr,
|
||||
input.patternSize, input.allocSize, event->toHandle(), 0, nullptr);
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
|
||||
size_t usedAfter = commandContainer.getCommandStream()->getUsed();
|
||||
|
||||
EXPECT_EQ(arg.expectedPacketsInUse, event->getPacketsInUse());
|
||||
EXPECT_EQ(arg.expectedKernelCount, event->getKernelCount());
|
||||
|
||||
GenCmdList cmdList;
|
||||
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(
|
||||
cmdList,
|
||||
ptrOffset(commandContainer.getCommandStream()->getCpuBase(), usedBefore),
|
||||
usedAfter - usedBefore));
|
||||
|
||||
auto itorWalkers = findAll<COMPUTE_WALKER *>(cmdList.begin(), cmdList.end());
|
||||
ASSERT_EQ(2u, itorWalkers.size());
|
||||
auto firstWalker = itorWalkers[0];
|
||||
auto secondWalker = itorWalkers[1];
|
||||
|
||||
auto walkerCmd = genCmdCast<COMPUTE_WALKER *>(*firstWalker);
|
||||
EXPECT_EQ(static_cast<OPERATION>(arg.expectedWalkerPostSyncOp), walkerCmd->getPostSync().getOperation());
|
||||
EXPECT_EQ(firstKernelEventAddress, walkerCmd->getPostSync().getDestinationAddress());
|
||||
|
||||
walkerCmd = genCmdCast<COMPUTE_WALKER *>(*secondWalker);
|
||||
EXPECT_EQ(static_cast<OPERATION>(arg.expectedWalkerPostSyncOp), walkerCmd->getPostSync().getOperation());
|
||||
EXPECT_EQ(secondKernelEventAddress, walkerCmd->getPostSync().getDestinationAddress());
|
||||
}
|
||||
|
||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||
void testSingleTileAppendMemoryFillSingleKernel(FillTestInput &input, TestExpectedValues &arg) {
|
||||
using FamilyType = typename NEO::GfxFamilyMapper<gfxCoreFamily>::GfxFamily;
|
||||
@@ -220,7 +279,7 @@ void testSingleTileAppendMemoryFillSingleKernelAndL3Flush(FillTestInput &input,
|
||||
|
||||
ze_event_pool_desc_t eventPoolDesc = {};
|
||||
eventPoolDesc.count = 1;
|
||||
eventPoolDesc.flags = ZE_EVENT_POOL_FLAG_KERNEL_TIMESTAMP;
|
||||
eventPoolDesc.flags = input.eventPoolFlags;
|
||||
|
||||
ze_event_desc_t eventDesc = {};
|
||||
eventDesc.index = 0;
|
||||
@@ -263,7 +322,10 @@ void testSingleTileAppendMemoryFillSingleKernelAndL3Flush(FillTestInput &input,
|
||||
EXPECT_EQ(static_cast<OPERATION>(arg.expectedWalkerPostSyncOp), walkerCmd->getPostSync().getOperation());
|
||||
EXPECT_EQ(firstKernelEventAddress, walkerCmd->getPostSync().getDestinationAddress());
|
||||
|
||||
uint64_t l3FlushPostSyncAddress = firstKernelEventAddress + event->getSinglePacketSize();
|
||||
uint64_t l3FlushPostSyncAddress = event->getGpuAddress(input.device);
|
||||
if (!input.useFirstEventPacketAddress) {
|
||||
l3FlushPostSyncAddress += event->getSinglePacketSize();
|
||||
}
|
||||
if (event->isUsingContextEndOffset()) {
|
||||
l3FlushPostSyncAddress += event->getContextEndOffset();
|
||||
}
|
||||
@@ -452,7 +514,7 @@ void testMultiTileAppendMemoryFillSingleKernelAndL3Flush(FillTestInput &input, T
|
||||
EXPECT_EQ(expectedDcFlush, dcFlushFound);
|
||||
}
|
||||
|
||||
using AppendFillMultiPacketEventTest = Test<AppendFillMultiPacketEventFixture<0, 0>>;
|
||||
using AppendFillMultiPacketEventTest = Test<AppendFillMultiPacketEventFixture<0, 0, 0>>;
|
||||
|
||||
HWTEST2_F(AppendFillMultiPacketEventTest,
|
||||
givenCallToAppendMemoryFillWithImmediateValueWhenTimestampEventUsesComputeWalkerPostSyncThenSeparateKernelsUsesPostSyncProfiling,
|
||||
@@ -506,10 +568,12 @@ HWTEST2_F(AppendFillMultiPacketEventTest,
|
||||
arg.expectedPostSyncPipeControls = 1;
|
||||
arg.postSyncAddressZero = false;
|
||||
|
||||
input.eventPoolFlags = ZE_EVENT_POOL_FLAG_KERNEL_TIMESTAMP;
|
||||
|
||||
testSingleTileAppendMemoryFillSingleKernelAndL3Flush<gfxCoreFamily>(input, arg);
|
||||
}
|
||||
|
||||
using AppendFillSinglePacketEventTest = Test<AppendFillMultiPacketEventFixture<1, 0>>;
|
||||
using AppendFillSinglePacketEventTest = Test<AppendFillMultiPacketEventFixture<1, 0, 0>>;
|
||||
|
||||
HWTEST2_F(AppendFillSinglePacketEventTest,
|
||||
givenCallToAppendMemoryFillWithImmediateValueWhenTimestampEventUsesRegisterPostSyncThenSeparateKernelsNotUsesWalkerPostSyncProfiling,
|
||||
@@ -563,10 +627,12 @@ HWTEST2_F(AppendFillSinglePacketEventTest,
|
||||
arg.expectedPostSyncPipeControls = 1;
|
||||
arg.postSyncAddressZero = false;
|
||||
|
||||
input.eventPoolFlags = ZE_EVENT_POOL_FLAG_KERNEL_TIMESTAMP;
|
||||
|
||||
testSingleTileAppendMemoryFillSingleKernelAndL3Flush<gfxCoreFamily>(input, arg);
|
||||
}
|
||||
|
||||
using MultiTileAppendFillEventMultiPacketTest = Test<AppendFillMultiPacketEventFixture<0, 1>>;
|
||||
using MultiTileAppendFillEventMultiPacketTest = Test<AppendFillMultiPacketEventFixture<0, 0, 1>>;
|
||||
|
||||
HWTEST2_F(MultiTileAppendFillEventMultiPacketTest,
|
||||
givenMultiTileCmdListCallToAppendMemoryFillWhenSignalScopeTimestampEventUsesComputeWalkerPostSyncThenSeparateKernelsUsesWalkerPostSyncProfilingAndSingleDcFlushWithImmediatePostSync, IsAtLeastXeHpCore) {
|
||||
@@ -646,7 +712,7 @@ HWTEST2_F(MultiTileAppendFillEventMultiPacketTest,
|
||||
testMultiTileAppendMemoryFillSingleKernelAndL3Flush<gfxCoreFamily>(input, arg);
|
||||
}
|
||||
|
||||
using MultiTileAppendFillEventSinglePacketTest = Test<AppendFillMultiPacketEventFixture<1, 1>>;
|
||||
using MultiTileAppendFillEventSinglePacketTest = Test<AppendFillMultiPacketEventFixture<1, 0, 1>>;
|
||||
|
||||
HWTEST2_F(MultiTileAppendFillEventSinglePacketTest,
|
||||
givenMultiTileCmdListCallToAppendMemoryFillWhenSignalScopeTimestampEventUsesRegisterPostSyncThenSeparateKernelsNotUsesWalkerPostSyncProfilingAndDcFlushWithNoPostSync, IsAtLeastXeHpCore) {
|
||||
@@ -685,5 +751,248 @@ HWTEST2_F(MultiTileAppendFillEventSinglePacketTest,
|
||||
testMultiTileAppendMemoryFillManyKernels<gfxCoreFamily>(input, arg);
|
||||
}
|
||||
|
||||
using AppendFillCompactL3EventTest = Test<AppendFillMultiPacketEventFixture<0, 1, 0>>;
|
||||
|
||||
HWTEST2_F(AppendFillCompactL3EventTest,
|
||||
givenCallToAppendMemoryFillWithImmediateValueWhenTimestampEventUsesWalkerPostSyncThenSeparateKernelsUsesWalkerPostSyncProfiling,
|
||||
IsAtLeastXeHpCore) {
|
||||
arg.expectedPacketsInUse = 2;
|
||||
arg.expectedKernelCount = 2;
|
||||
arg.expectedWalkerPostSyncOp = 3;
|
||||
arg.postSyncAddressZero = false;
|
||||
|
||||
input.dstPtr = immediateDstPtr;
|
||||
input.allocSize = immediateAllocSize;
|
||||
input.patternPtr = &immediatePattern;
|
||||
input.patternSize = sizeof(immediatePattern);
|
||||
|
||||
testSingleTileAppendMemoryFillManyImmediateKernels<gfxCoreFamily>(input, arg);
|
||||
}
|
||||
|
||||
HWTEST2_F(AppendFillCompactL3EventTest,
|
||||
givenCallToAppendMemoryFillWhenTimestampEventUsesWalkerPostSyncThenSeparateKernelsUsesWalkerPostSyncProfiling,
|
||||
IsAtLeastXeHpCore) {
|
||||
arg.expectedPacketsInUse = 2;
|
||||
arg.expectedKernelCount = 2;
|
||||
arg.expectedWalkerPostSyncOp = 3;
|
||||
arg.postSyncAddressZero = false;
|
||||
|
||||
input.dstPtr = dstPtr;
|
||||
input.allocSize = allocSize;
|
||||
input.patternPtr = pattern;
|
||||
input.patternSize = patternSize;
|
||||
|
||||
testSingleTileAppendMemoryFillManyKernels<gfxCoreFamily>(input, arg);
|
||||
}
|
||||
|
||||
HWTEST2_F(AppendFillCompactL3EventTest,
|
||||
givenAppendMemoryFillUsingSinglePacketEventWhenPatternDispatchOneKernelThenUseComputeWalkerPostSync,
|
||||
IsAtLeastXeHpCore) {
|
||||
arg.expectedPacketsInUse = 1;
|
||||
arg.expectedKernelCount = 1;
|
||||
arg.expectedWalkerPostSyncOp = 3;
|
||||
arg.postSyncAddressZero = false;
|
||||
|
||||
testSingleTileAppendMemoryFillSingleKernel<gfxCoreFamily>(input, arg);
|
||||
}
|
||||
|
||||
HWTEST2_F(AppendFillCompactL3EventTest,
|
||||
givenAppendMemoryFillUsingL3CompactEventWhenPatternDispatchOneKernelThenUseRegisterPostSync,
|
||||
IsXeHpOrXeHpgCore) {
|
||||
arg.expectedPacketsInUse = 1;
|
||||
arg.expectedKernelCount = 1;
|
||||
arg.expectedWalkerPostSyncOp = 0;
|
||||
arg.expectedPostSyncPipeControls = 0;
|
||||
arg.postSyncAddressZero = true;
|
||||
|
||||
input.eventPoolFlags = ZE_EVENT_POOL_FLAG_KERNEL_TIMESTAMP;
|
||||
|
||||
testSingleTileAppendMemoryFillSingleKernelAndL3Flush<gfxCoreFamily>(input, arg);
|
||||
}
|
||||
|
||||
HWTEST2_F(AppendFillCompactL3EventTest,
|
||||
givenCallToAppendMemoryFillWhenL3CompactImmediateEventUsesPipeControlPostSyncThenSinglePipeControlPostSyncUsed,
|
||||
IsXeHpOrXeHpgCore) {
|
||||
arg.expectedPacketsInUse = 1;
|
||||
arg.expectedKernelCount = 1;
|
||||
arg.expectedWalkerPostSyncOp = 0;
|
||||
arg.expectedPostSyncPipeControls = 1;
|
||||
arg.postSyncAddressZero = true;
|
||||
|
||||
input.eventPoolFlags = 0;
|
||||
input.useFirstEventPacketAddress = true;
|
||||
|
||||
testSingleTileAppendMemoryFillSingleKernelAndL3Flush<gfxCoreFamily>(input, arg);
|
||||
}
|
||||
|
||||
using MultiTileAppendFillCompactL3EventTest = Test<AppendFillMultiPacketEventFixture<0, 1, 1>>;
|
||||
|
||||
HWTEST2_F(MultiTileAppendFillCompactL3EventTest,
|
||||
givenMultiTileCmdListCallToAppendMemoryFillWhenPlatformNeedsDcFlushAndL3CompactTimestampEventThenRegisterPostSyncUsedOtherwiseUsesWalkerPostSyncProfiling,
|
||||
IsAtLeastXeHpCore) {
|
||||
if (NEO::MemorySynchronizationCommands<FamilyType>::getDcFlushEnable(true, *defaultHwInfo)) {
|
||||
arg.expectedPacketsInUse = 2;
|
||||
arg.expectedKernelCount = 1;
|
||||
arg.expectedWalkerPostSyncOp = 0;
|
||||
arg.expectedPostSyncPipeControls = 0;
|
||||
arg.postSyncAddressZero = true;
|
||||
} else {
|
||||
arg.expectedPacketsInUse = 4;
|
||||
arg.expectedKernelCount = 2;
|
||||
arg.expectedWalkerPostSyncOp = 3;
|
||||
arg.expectedPostSyncPipeControls = 0;
|
||||
arg.postSyncAddressZero = false;
|
||||
}
|
||||
|
||||
input.eventPoolFlags = ZE_EVENT_POOL_FLAG_KERNEL_TIMESTAMP;
|
||||
|
||||
input.dstPtr = dstPtr;
|
||||
input.allocSize = allocSize;
|
||||
input.patternPtr = pattern;
|
||||
input.patternSize = patternSize;
|
||||
|
||||
testMultiTileAppendMemoryFillManyKernels<gfxCoreFamily>(input, arg);
|
||||
}
|
||||
|
||||
HWTEST2_F(MultiTileAppendFillCompactL3EventTest,
|
||||
givenMultiTileCmdListCallToAppendMemoryFillWhenPlatformNeedsDcFlushAndL3CompactImmediateEventThenPipeControlPostSyncUsedOtherwiseUsesWalkerPostSyncProfiling,
|
||||
IsAtLeastXeHpCore) {
|
||||
if (NEO::MemorySynchronizationCommands<FamilyType>::getDcFlushEnable(true, *defaultHwInfo)) {
|
||||
arg.expectedPacketsInUse = 2;
|
||||
arg.expectedKernelCount = 1;
|
||||
arg.expectedWalkerPostSyncOp = 0;
|
||||
arg.expectedPostSyncPipeControls = 1;
|
||||
arg.postSyncAddressZero = true;
|
||||
} else {
|
||||
arg.expectedPacketsInUse = 4;
|
||||
arg.expectedKernelCount = 2;
|
||||
arg.expectedWalkerPostSyncOp = 3;
|
||||
arg.expectedPostSyncPipeControls = 0;
|
||||
arg.postSyncAddressZero = false;
|
||||
}
|
||||
|
||||
input.eventPoolFlags = 0;
|
||||
|
||||
input.dstPtr = dstPtr;
|
||||
input.allocSize = allocSize;
|
||||
input.patternPtr = pattern;
|
||||
input.patternSize = patternSize;
|
||||
|
||||
testMultiTileAppendMemoryFillManyKernels<gfxCoreFamily>(input, arg);
|
||||
}
|
||||
|
||||
using AppendFillKernelSplitAndCompactL3EventTest = Test<AppendFillMultiPacketEventFixture<1, 1, 0>>;
|
||||
|
||||
HWTEST2_F(AppendFillKernelSplitAndCompactL3EventTest,
|
||||
givenCallToAppendMemoryFillWithImmediateValueWhenTimestampEventUsesRegisterPostSyncThenSeparateKernelsNotUsesWalkerPostSyncProfiling,
|
||||
IsAtLeastXeHpCore) {
|
||||
arg.expectedPacketsInUse = 1;
|
||||
arg.expectedKernelCount = 1;
|
||||
arg.expectedWalkerPostSyncOp = 0;
|
||||
arg.postSyncAddressZero = true;
|
||||
|
||||
input.dstPtr = immediateDstPtr;
|
||||
input.allocSize = immediateAllocSize;
|
||||
input.patternPtr = &immediatePattern;
|
||||
input.patternSize = sizeof(immediatePattern);
|
||||
|
||||
testSingleTileAppendMemoryFillManyImmediateKernels<gfxCoreFamily>(input, arg);
|
||||
}
|
||||
|
||||
HWTEST2_F(AppendFillKernelSplitAndCompactL3EventTest,
|
||||
givenCallToAppendMemoryFillWhenTimestampEventUsesRegisterPostSyncThenSeparateKernelsNotUsesWalkerPostSyncProfiling,
|
||||
IsAtLeastXeHpCore) {
|
||||
arg.expectedPacketsInUse = 1;
|
||||
arg.expectedKernelCount = 1;
|
||||
arg.expectedWalkerPostSyncOp = 0;
|
||||
arg.postSyncAddressZero = true;
|
||||
|
||||
input.dstPtr = dstPtr;
|
||||
input.allocSize = allocSize;
|
||||
input.patternPtr = pattern;
|
||||
input.patternSize = patternSize;
|
||||
|
||||
testSingleTileAppendMemoryFillManyKernels<gfxCoreFamily>(input, arg);
|
||||
}
|
||||
|
||||
HWTEST2_F(AppendFillKernelSplitAndCompactL3EventTest,
|
||||
givenAppendMemoryFillUsingSinglePacketEventWhenPatternDispatchOneKernelThenUseComputeWalkerPostSync,
|
||||
IsAtLeastXeHpCore) {
|
||||
arg.expectedPacketsInUse = 1;
|
||||
arg.expectedKernelCount = 1;
|
||||
arg.expectedWalkerPostSyncOp = 3;
|
||||
arg.postSyncAddressZero = false;
|
||||
|
||||
testSingleTileAppendMemoryFillSingleKernel<gfxCoreFamily>(input, arg);
|
||||
}
|
||||
|
||||
HWTEST2_F(AppendFillKernelSplitAndCompactL3EventTest,
|
||||
givenAppendMemoryFillUsingL3CompactTimestampEventWhenPatternDispatchOneKernelThenUseRegisterPostSync,
|
||||
IsXeHpOrXeHpgCore) {
|
||||
arg.expectedPacketsInUse = 1;
|
||||
arg.expectedKernelCount = 1;
|
||||
arg.expectedWalkerPostSyncOp = 0;
|
||||
arg.expectedPostSyncPipeControls = 0;
|
||||
arg.postSyncAddressZero = true;
|
||||
|
||||
input.eventPoolFlags = ZE_EVENT_POOL_FLAG_KERNEL_TIMESTAMP;
|
||||
|
||||
testSingleTileAppendMemoryFillSingleKernelAndL3Flush<gfxCoreFamily>(input, arg);
|
||||
}
|
||||
|
||||
HWTEST2_F(AppendFillKernelSplitAndCompactL3EventTest,
|
||||
givenAppendMemoryFillUsingL3CompactImmediateEventWhenPatternDispatchOneKernelThenUsePipeControlPostSync,
|
||||
IsXeHpOrXeHpgCore) {
|
||||
arg.expectedPacketsInUse = 1;
|
||||
arg.expectedKernelCount = 1;
|
||||
arg.expectedWalkerPostSyncOp = 0;
|
||||
arg.expectedPostSyncPipeControls = 1;
|
||||
arg.postSyncAddressZero = true;
|
||||
|
||||
input.eventPoolFlags = 0;
|
||||
input.useFirstEventPacketAddress = true;
|
||||
|
||||
testSingleTileAppendMemoryFillSingleKernelAndL3Flush<gfxCoreFamily>(input, arg);
|
||||
}
|
||||
|
||||
using MultiTileAppendFillKernelSplitAndCompactL3EventTest = Test<AppendFillMultiPacketEventFixture<1, 1, 1>>;
|
||||
|
||||
HWTEST2_F(MultiTileAppendFillKernelSplitAndCompactL3EventTest,
|
||||
givenMultiTileCmdListCallToAppendMemoryFillWhenL3CompactTimestampEventUsesRegisterPostSyncThenSeparateKernelsNotUsesWalkerPostSyncProfilingAndDcFlushWithNoPostSync, IsAtLeastXeHpCore) {
|
||||
arg.expectedPacketsInUse = 2;
|
||||
arg.expectedKernelCount = 1;
|
||||
arg.expectedWalkerPostSyncOp = 0;
|
||||
arg.expectedPostSyncPipeControls = 0;
|
||||
arg.postSyncAddressZero = true;
|
||||
|
||||
input.eventPoolFlags = ZE_EVENT_POOL_FLAG_KERNEL_TIMESTAMP;
|
||||
|
||||
input.dstPtr = dstPtr;
|
||||
input.allocSize = allocSize;
|
||||
input.patternPtr = pattern;
|
||||
input.patternSize = patternSize;
|
||||
|
||||
testMultiTileAppendMemoryFillManyKernels<gfxCoreFamily>(input, arg);
|
||||
}
|
||||
|
||||
HWTEST2_F(MultiTileAppendFillKernelSplitAndCompactL3EventTest,
|
||||
givenMultiTileCmdListCallToAppendMemoryFillWhenL3CompactImmediateEventUsesPipeControlPostSyncThenSeparateKernelsNotUsesWalkerPostSyncProfilingAndDcFlushWithImmediatePostSync, IsAtLeastXeHpCore) {
|
||||
arg.expectedPacketsInUse = 2;
|
||||
arg.expectedKernelCount = 1;
|
||||
arg.expectedPacketsInUse = 2;
|
||||
arg.expectedWalkerPostSyncOp = 0;
|
||||
arg.expectedPostSyncPipeControls = 1;
|
||||
arg.postSyncAddressZero = true;
|
||||
|
||||
input.eventPoolFlags = 0;
|
||||
|
||||
input.dstPtr = dstPtr;
|
||||
input.allocSize = allocSize;
|
||||
input.patternPtr = pattern;
|
||||
input.patternSize = patternSize;
|
||||
|
||||
testMultiTileAppendMemoryFillManyKernels<gfxCoreFamily>(input, arg);
|
||||
}
|
||||
|
||||
} // namespace ult
|
||||
} // namespace L0
|
||||
|
||||
@@ -296,5 +296,250 @@ HWTEST2_F(CommandListAppendLaunchKernel, givenVariousKernelsAndPatchingDisallowe
|
||||
pCommandList->reset();
|
||||
}
|
||||
|
||||
struct AppendKernelTestInput {
|
||||
DriverHandle *driver = nullptr;
|
||||
L0::Context *context = nullptr;
|
||||
L0::Device *device = nullptr;
|
||||
|
||||
ze_event_pool_flags_t eventPoolFlags = 0;
|
||||
|
||||
uint32_t packetOffsetMul = 1;
|
||||
|
||||
bool useFirstEventPacketAddress = false;
|
||||
};
|
||||
|
||||
template <int32_t compactL3FlushEventPacket, uint32_t multiTile>
|
||||
struct CommandListAppendLaunchKernelCompactL3FlushEventFixture : public ModuleFixture {
|
||||
void setUp() {
|
||||
DebugManager.flags.CompactL3FlushEventPacket.set(compactL3FlushEventPacket);
|
||||
if constexpr (multiTile == 1) {
|
||||
DebugManager.flags.CreateMultipleSubDevices.set(2);
|
||||
DebugManager.flags.EnableImplicitScaling.set(1);
|
||||
arg.workloadPartition = true;
|
||||
arg.expectDcFlush = 2; // DC Flush multi-tile platforms require DC Flush + x-tile sync after implicit scaling COMPUTE_WALKER
|
||||
input.packetOffsetMul = 2;
|
||||
} else {
|
||||
arg.expectDcFlush = 1;
|
||||
}
|
||||
ModuleFixture::setUp();
|
||||
|
||||
input.driver = driverHandle.get();
|
||||
input.context = context;
|
||||
input.device = device;
|
||||
}
|
||||
|
||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||
void testAppendLaunchKernelAndL3Flush(AppendKernelTestInput &input, TestExpectedValues &arg) {
|
||||
using FamilyType = typename NEO::GfxFamilyMapper<gfxCoreFamily>::GfxFamily;
|
||||
using COMPUTE_WALKER = typename FamilyType::COMPUTE_WALKER;
|
||||
using POSTSYNC_DATA = typename FamilyType::POSTSYNC_DATA;
|
||||
using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL;
|
||||
using POST_SYNC_OPERATION = typename FamilyType::PIPE_CONTROL::POST_SYNC_OPERATION;
|
||||
using OPERATION = typename POSTSYNC_DATA::OPERATION;
|
||||
|
||||
Mock<::L0::Kernel> kernel;
|
||||
auto module = std::unique_ptr<Module>(new Mock<Module>(input.device, nullptr));
|
||||
kernel.module = module.get();
|
||||
|
||||
auto commandList = std::make_unique<WhiteBox<::L0::CommandListCoreFamily<gfxCoreFamily>>>();
|
||||
auto result = commandList->initialize(device, NEO::EngineGroupType::Compute, 0u);
|
||||
ASSERT_EQ(ZE_RESULT_SUCCESS, result);
|
||||
|
||||
ze_event_pool_desc_t eventPoolDesc = {};
|
||||
eventPoolDesc.count = 1;
|
||||
eventPoolDesc.flags = input.eventPoolFlags;
|
||||
|
||||
ze_event_desc_t eventDesc = {};
|
||||
eventDesc.index = 0;
|
||||
eventDesc.signal = ZE_EVENT_SCOPE_FLAG_HOST;
|
||||
|
||||
auto eventPool = std::unique_ptr<L0::EventPool>(L0::EventPool::create(input.driver, input.context, 0, nullptr, &eventPoolDesc, result));
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
|
||||
auto event = std::unique_ptr<L0::Event>(L0::Event::create<uint32_t>(eventPool.get(), &eventDesc, input.device));
|
||||
|
||||
uint64_t firstKernelEventAddress = arg.postSyncAddressZero ? 0 : event->getGpuAddress(input.device);
|
||||
|
||||
ze_group_count_t groupCount{1, 1, 1};
|
||||
CmdListKernelLaunchParams launchParams = {};
|
||||
result = commandList->appendLaunchKernel(kernel.toHandle(), &groupCount, event->toHandle(), 0, nullptr, launchParams);
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
|
||||
EXPECT_EQ(arg.expectedPacketsInUse, event->getPacketsInUse());
|
||||
EXPECT_EQ(arg.expectedKernelCount, event->getKernelCount());
|
||||
|
||||
GenCmdList cmdList;
|
||||
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(
|
||||
cmdList, ptrOffset(commandList->commandContainer.getCommandStream()->getCpuBase(), 0),
|
||||
commandList->commandContainer.getCommandStream()->getUsed()));
|
||||
|
||||
auto itorWalkers = findAll<COMPUTE_WALKER *>(cmdList.begin(), cmdList.end());
|
||||
ASSERT_EQ(1u, itorWalkers.size());
|
||||
auto firstWalker = itorWalkers[0];
|
||||
|
||||
auto walkerCmd = genCmdCast<COMPUTE_WALKER *>(*firstWalker);
|
||||
EXPECT_EQ(static_cast<OPERATION>(arg.expectedWalkerPostSyncOp), walkerCmd->getPostSync().getOperation());
|
||||
EXPECT_EQ(firstKernelEventAddress, walkerCmd->getPostSync().getDestinationAddress());
|
||||
|
||||
uint64_t l3FlushPostSyncAddress = event->getGpuAddress(input.device) + input.packetOffsetMul * event->getSinglePacketSize();
|
||||
if (input.useFirstEventPacketAddress) {
|
||||
l3FlushPostSyncAddress = event->getGpuAddress(input.device);
|
||||
}
|
||||
if (event->isUsingContextEndOffset()) {
|
||||
l3FlushPostSyncAddress += event->getContextEndOffset();
|
||||
}
|
||||
|
||||
auto itorPipeControls = findAll<PIPE_CONTROL *>(firstWalker, cmdList.end());
|
||||
|
||||
uint32_t postSyncPipeControls = 0;
|
||||
uint32_t dcFlushFound = 0;
|
||||
for (auto it : itorPipeControls) {
|
||||
auto cmd = genCmdCast<PIPE_CONTROL *>(*it);
|
||||
if (cmd->getPostSyncOperation() == POST_SYNC_OPERATION::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA) {
|
||||
postSyncPipeControls++;
|
||||
EXPECT_EQ(l3FlushPostSyncAddress, NEO::UnitTestHelper<FamilyType>::getPipeControlPostSyncAddress(*cmd));
|
||||
EXPECT_EQ(Event::STATE_SIGNALED, cmd->getImmediateData());
|
||||
if (arg.workloadPartition) {
|
||||
EXPECT_TRUE(cmd->getWorkloadPartitionIdOffsetEnable());
|
||||
} else {
|
||||
EXPECT_FALSE(cmd->getWorkloadPartitionIdOffsetEnable());
|
||||
}
|
||||
}
|
||||
if (cmd->getDcFlushEnable()) {
|
||||
dcFlushFound++;
|
||||
}
|
||||
}
|
||||
EXPECT_EQ(arg.expectedPostSyncPipeControls, postSyncPipeControls);
|
||||
EXPECT_EQ(arg.expectDcFlush, dcFlushFound);
|
||||
}
|
||||
|
||||
DebugManagerStateRestore restorer;
|
||||
|
||||
AppendKernelTestInput input = {};
|
||||
TestExpectedValues arg = {};
|
||||
};
|
||||
|
||||
using CommandListAppendLaunchKernelCompactL3FlushDisabledTest = Test<CommandListAppendLaunchKernelCompactL3FlushEventFixture<0, 0>>;
|
||||
|
||||
HWTEST2_F(CommandListAppendLaunchKernelCompactL3FlushDisabledTest,
|
||||
givenAppendKernelWithSignalScopeTimestampEventWhenComputeWalkerTimestampPostsyncAndL3ImmediatePostsyncUsedThenExpectComputeWalkerAndPipeControlPostsync,
|
||||
IsXeHpOrXeHpgCore) {
|
||||
arg.expectedKernelCount = 1;
|
||||
arg.expectedPacketsInUse = 2;
|
||||
arg.expectedPostSyncPipeControls = 1;
|
||||
arg.expectedWalkerPostSyncOp = 3;
|
||||
arg.postSyncAddressZero = false;
|
||||
|
||||
input.eventPoolFlags = ZE_EVENT_POOL_FLAG_KERNEL_TIMESTAMP;
|
||||
|
||||
testAppendLaunchKernelAndL3Flush<gfxCoreFamily>(input, arg);
|
||||
}
|
||||
|
||||
HWTEST2_F(CommandListAppendLaunchKernelCompactL3FlushDisabledTest,
|
||||
givenAppendKernelWithSignalScopeImmediateEventWhenComputeWalkerImmediatePostsyncAndL3ImmediatePostsyncUsedThenExpectComputeWalkerAndPipeControlPostsync,
|
||||
IsXeHpOrXeHpgCore) {
|
||||
arg.expectedKernelCount = 1;
|
||||
arg.expectedPacketsInUse = 2;
|
||||
arg.expectedPostSyncPipeControls = 1;
|
||||
arg.expectedWalkerPostSyncOp = L0HwHelper::get(gfxCoreFamily).multiTileCapablePlatform() ? 3 : 1;
|
||||
arg.postSyncAddressZero = false;
|
||||
|
||||
input.eventPoolFlags = 0;
|
||||
|
||||
testAppendLaunchKernelAndL3Flush<gfxCoreFamily>(input, arg);
|
||||
}
|
||||
|
||||
using CommandListAppendLaunchKernelCompactL3FlushEnabledTest = Test<CommandListAppendLaunchKernelCompactL3FlushEventFixture<1, 0>>;
|
||||
|
||||
HWTEST2_F(CommandListAppendLaunchKernelCompactL3FlushEnabledTest,
|
||||
givenAppendKernelWithSignalScopeTimestampEventWhenRegisterTimestampPostsyncUsedThenExpectNoComputeWalkerAndPipeControlPostsync,
|
||||
IsXeHpOrXeHpgCore) {
|
||||
arg.expectedKernelCount = 1;
|
||||
arg.expectedPacketsInUse = 1;
|
||||
arg.expectedPostSyncPipeControls = 0;
|
||||
arg.expectedWalkerPostSyncOp = 0;
|
||||
arg.postSyncAddressZero = true;
|
||||
|
||||
input.eventPoolFlags = ZE_EVENT_POOL_FLAG_KERNEL_TIMESTAMP;
|
||||
input.useFirstEventPacketAddress = true;
|
||||
|
||||
testAppendLaunchKernelAndL3Flush<gfxCoreFamily>(input, arg);
|
||||
}
|
||||
|
||||
HWTEST2_F(CommandListAppendLaunchKernelCompactL3FlushEnabledTest,
|
||||
givenAppendKernelWithSignalScopeImmediateEventWhenL3ImmediatePostsyncUsedThenExpectPipeControlPostsync,
|
||||
IsXeHpOrXeHpgCore) {
|
||||
arg.expectedKernelCount = 1;
|
||||
arg.expectedPacketsInUse = 1;
|
||||
arg.expectedPostSyncPipeControls = 1;
|
||||
arg.expectedWalkerPostSyncOp = 0;
|
||||
arg.postSyncAddressZero = true;
|
||||
|
||||
input.eventPoolFlags = 0;
|
||||
input.useFirstEventPacketAddress = true;
|
||||
|
||||
testAppendLaunchKernelAndL3Flush<gfxCoreFamily>(input, arg);
|
||||
}
|
||||
|
||||
using CommandListAppendLaunchKernelMultiTileCompactL3FlushDisabledTest = Test<CommandListAppendLaunchKernelCompactL3FlushEventFixture<0, 1>>;
|
||||
|
||||
HWTEST2_F(CommandListAppendLaunchKernelMultiTileCompactL3FlushDisabledTest,
|
||||
givenAppendMultiTileKernelWithSignalScopeTimestampEventWhenComputeWalkerTimestampPostsyncAndL3ImmediatePostsyncUsedThenExpectComputeWalkerAndPipeControlPostsync,
|
||||
IsXeHpOrXeHpgCore) {
|
||||
arg.expectedKernelCount = 1;
|
||||
arg.expectedPacketsInUse = 4;
|
||||
arg.expectedPostSyncPipeControls = 1;
|
||||
arg.expectedWalkerPostSyncOp = 3;
|
||||
arg.postSyncAddressZero = false;
|
||||
|
||||
input.eventPoolFlags = ZE_EVENT_POOL_FLAG_KERNEL_TIMESTAMP;
|
||||
|
||||
testAppendLaunchKernelAndL3Flush<gfxCoreFamily>(input, arg);
|
||||
}
|
||||
|
||||
HWTEST2_F(CommandListAppendLaunchKernelMultiTileCompactL3FlushDisabledTest,
|
||||
givenAppendMultiTileKernelWithSignalScopeImmediateEventWhenComputeWalkerImmediatePostsyncAndL3ImmediatePostsyncUsedThenExpectComputeWalkerAndPipeControlPostsync,
|
||||
IsXeHpOrXeHpgCore) {
|
||||
arg.expectedKernelCount = 1;
|
||||
arg.expectedPacketsInUse = 4;
|
||||
arg.expectedPostSyncPipeControls = 1;
|
||||
arg.expectedWalkerPostSyncOp = 3;
|
||||
arg.postSyncAddressZero = false;
|
||||
|
||||
input.eventPoolFlags = 0;
|
||||
|
||||
testAppendLaunchKernelAndL3Flush<gfxCoreFamily>(input, arg);
|
||||
}
|
||||
|
||||
using CommandListAppendLaunchKernelMultiTileCompactL3FlushEnabledTest = Test<CommandListAppendLaunchKernelCompactL3FlushEventFixture<1, 1>>;
|
||||
|
||||
HWTEST2_F(CommandListAppendLaunchKernelMultiTileCompactL3FlushEnabledTest,
|
||||
givenAppendMultiTileKernelWithSignalScopeTimestampEventWhenRegisterTimestampPostsyncUsedThenExpectNoComputeWalkerAndPipeControlPostsync,
|
||||
IsXeHpOrXeHpgCore) {
|
||||
arg.expectedKernelCount = 1;
|
||||
arg.expectedPacketsInUse = 2;
|
||||
arg.expectedPostSyncPipeControls = 0;
|
||||
arg.expectedWalkerPostSyncOp = 0;
|
||||
arg.postSyncAddressZero = true;
|
||||
|
||||
input.eventPoolFlags = ZE_EVENT_POOL_FLAG_KERNEL_TIMESTAMP;
|
||||
input.useFirstEventPacketAddress = true;
|
||||
|
||||
testAppendLaunchKernelAndL3Flush<gfxCoreFamily>(input, arg);
|
||||
}
|
||||
|
||||
HWTEST2_F(CommandListAppendLaunchKernelMultiTileCompactL3FlushEnabledTest,
|
||||
givenAppendMultiTileKernelWithSignalScopeImmediateEventWhenL3ImmediatePostsyncUsedThenExpectPipeControlPostsync,
|
||||
IsXeHpOrXeHpgCore) {
|
||||
arg.expectedKernelCount = 1;
|
||||
arg.expectedPacketsInUse = 2;
|
||||
arg.expectedPostSyncPipeControls = 1;
|
||||
arg.expectedWalkerPostSyncOp = 0;
|
||||
arg.postSyncAddressZero = true;
|
||||
|
||||
input.eventPoolFlags = 0;
|
||||
input.useFirstEventPacketAddress = true;
|
||||
|
||||
testAppendLaunchKernelAndL3Flush<gfxCoreFamily>(input, arg);
|
||||
}
|
||||
|
||||
} // namespace ult
|
||||
} // namespace L0
|
||||
|
||||
@@ -635,5 +635,11 @@ TEST_F(L0HwHelperTest, givenL0HelperWhenGettingDefaultValueForUsePipeControlMult
|
||||
EXPECT_FALSE(defaultValue);
|
||||
}
|
||||
|
||||
TEST_F(L0HwHelperTest, givenL0HelperWhenGettingDefaultValueForCompactL3FlushEventPacketThenReturnFalse) {
|
||||
auto hwInfo = *NEO::defaultHwInfo.get();
|
||||
bool defaultValue = L0::L0HwHelper::useCompactL3FlushEventPacket(hwInfo);
|
||||
EXPECT_FALSE(defaultValue);
|
||||
}
|
||||
|
||||
} // namespace ult
|
||||
} // namespace L0
|
||||
|
||||
Reference in New Issue
Block a user