Add option to compact event L3 flush packet

Related-To: NEO-7434

Signed-off-by: Zbigniew Zdanowicz <zbigniew.zdanowicz@intel.com>
This commit is contained in:
Zbigniew Zdanowicz
2022-10-27 11:40:44 +00:00
committed by Compute-Runtime-Automation
parent 709e322a4a
commit 6a6ab80113
13 changed files with 1063 additions and 30 deletions

View File

@@ -287,10 +287,18 @@ struct CommandListCoreFamily : CommandListImp {
size_t dstSize,
CmdListFillKernelArguments &outArguments,
Kernel *kernel);
bool compactL3FlushEvent(bool dcFlush) const {
return this->compactL3FlushEventPacket && dcFlush;
}
bool eventSignalPipeControl(bool splitKernel, bool dcFlush) const {
return (this->pipeControlMultiKernelEventSync && splitKernel) ||
compactL3FlushEvent(dcFlush);
}
size_t cmdListCurrentStartOffset = 0;
bool containsAnyKernel = false;
bool pipeControlMultiKernelEventSync = false;
bool compactL3FlushEventPacket = false;
};
template <PRODUCT_FAMILY gfxProductFamily>

View File

@@ -138,6 +138,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::initialize(Device *device, NEO
this->frontEndStateTracking = L0HwHelper::enableFrontEndStateTracking(hwInfo);
this->pipelineSelectStateTracking = L0HwHelper::enablePipelineSelectStateTracking(hwInfo);
this->pipeControlMultiKernelEventSync = L0HwHelper::usePipeControlMultiKernelEventSync(hwInfo);
this->compactL3FlushEventPacket = L0HwHelper::useCompactL3FlushEventPacket(hwInfo);
if (device->isImplicitScalingCapable() && !this->internalUsage && !isCopyOnly()) {
this->partitionCount = static_cast<uint32_t>(this->device->getNEODevice()->getDeviceBitfield().count());
@@ -1187,11 +1188,12 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryCopy(void *dstptr,
}
CmdListKernelLaunchParams launchParams = {};
bool dcFlush = false;
Event *signalEvent = nullptr;
if (hSignalEvent) {
signalEvent = Event::fromHandle(hSignalEvent);
launchParams.isHostSignalScopeEvent = !!(signalEvent->signalScope & ZE_EVENT_SCOPE_FLAG_HOST);
dcFlush = getDcFlushRequired(!!signalEvent->signalScope);
}
uint32_t kernelCounter = leftSize > 0 ? 1 : 0;
@@ -1199,7 +1201,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryCopy(void *dstptr,
kernelCounter += rightSize > 0 ? 1 : 0;
launchParams.isKernelSplitOperation = kernelCounter > 1;
bool singlePipeControlPacket = this->pipeControlMultiKernelEventSync && launchParams.isKernelSplitOperation;
bool singlePipeControlPacket = eventSignalPipeControl(launchParams.isKernelSplitOperation, dcFlush);
appendEventForProfilingAllWalkers(signalEvent, true, singlePipeControlPacket);
@@ -1551,9 +1553,11 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryFill(void *ptr,
CmdListKernelLaunchParams launchParams = {};
Event *signalEvent = nullptr;
bool dcFlush = false;
if (hSignalEvent) {
signalEvent = Event::fromHandle(hSignalEvent);
launchParams.isHostSignalScopeEvent = !!(signalEvent->signalScope & ZE_EVENT_SCOPE_FLAG_HOST);
dcFlush = getDcFlushRequired(!!signalEvent->signalScope);
}
if (isCopyOnly()) {
@@ -1610,7 +1614,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryFill(void *ptr,
setupFillKernelArguments(dstAllocation.offset, patternSize, size, fillArguments, builtinKernel);
launchParams.isKernelSplitOperation = (fillArguments.leftRemainingBytes > 0 || fillArguments.rightRemainingBytes > 0);
bool singlePipeControlPacket = this->pipeControlMultiKernelEventSync && launchParams.isKernelSplitOperation;
bool singlePipeControlPacket = eventSignalPipeControl(launchParams.isKernelSplitOperation, dcFlush);
appendEventForProfilingAllWalkers(signalEvent, true, singlePipeControlPacket);

View File

@@ -163,20 +163,26 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernelWithParams(K
threadGroupDimensions->groupCountY,
threadGroupDimensions->groupCountZ);
}
NEO::GraphicsAllocation *eventAlloc = nullptr;
uint64_t eventAddress = 0;
bool isTimestampEvent = false;
bool l3FlushEnable = false;
bool isHostSignalScopeEvent = launchParams.isHostSignalScopeEvent;
Event *compactEvent = nullptr;
if (event) {
eventAlloc = &event->getAllocation(this->device);
commandContainer.addToResidencyContainer(eventAlloc);
bool flushRequired = !!event->signalScope &&
!launchParams.isKernelSplitOperation;
l3FlushEnable = getDcFlushRequired(flushRequired);
isTimestampEvent = event->isUsingContextEndOffset();
eventAddress = event->getPacketAddress(this->device);
isHostSignalScopeEvent = !!(event->signalScope & ZE_EVENT_SCOPE_FLAG_HOST);
if (compactL3FlushEvent(getDcFlushRequired(!!event->signalScope))) {
compactEvent = event;
event = nullptr;
} else {
NEO::GraphicsAllocation *eventAlloc = &event->getAllocation(this->device);
commandContainer.addToResidencyContainer(eventAlloc);
bool flushRequired = !!event->signalScope &&
!launchParams.isKernelSplitOperation;
l3FlushEnable = getDcFlushRequired(flushRequired);
isTimestampEvent = event->isUsingContextEndOffset();
eventAddress = event->getPacketAddress(this->device);
}
}
bool isKernelUsingSystemAllocation = false;
@@ -249,6 +255,10 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernelWithParams(K
std::list<void *> additionalCommands;
if (compactEvent) {
appendEventForProfilingAllWalkers(compactEvent, true, true);
}
NEO::EncodeDispatchKernelArgs dispatchKernelArgs{
eventAddress, // eventAddress
neoDevice, // device
@@ -273,7 +283,9 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernelWithParams(K
NEO::EncodeDispatchKernel<GfxFamily>::encode(commandContainer, dispatchKernelArgs, getLogicalStateHelper());
this->containsStatelessUncachedResource = dispatchKernelArgs.requiresUncachedMocs;
if (event) {
if (compactEvent) {
appendEventForProfilingAllWalkers(compactEvent, false, true);
} else if (event) {
if (partitionCount > 1) {
event->setPacketsInUse(partitionCount);
}
@@ -404,7 +416,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernelSplit(Kernel
Event *event,
const CmdListKernelLaunchParams &launchParams) {
if (event) {
if (this->pipeControlMultiKernelEventSync && launchParams.isKernelSplitOperation) {
if (eventSignalPipeControl(launchParams.isKernelSplitOperation, getDcFlushRequired(!!event->signalScope))) {
event = nullptr;
} else {
event->increaseKernelCount();

View File

@@ -53,4 +53,11 @@ bool L0HwHelper::usePipeControlMultiKernelEventSync(const NEO::HardwareInfo &hwI
return false;
}
bool L0HwHelper::useCompactL3FlushEventPacket(const NEO::HardwareInfo &hwInfo) {
if (NEO::DebugManager.flags.CompactL3FlushEventPacket.get() != -1) {
return !!NEO::DebugManager.flags.CompactL3FlushEventPacket.get();
}
return false;
}
} // namespace L0

View File

@@ -35,6 +35,7 @@ class L0HwHelper {
static bool enableStateComputeModeTracking(const NEO::HardwareInfo &hwInfo);
static bool enableImmediateCmdListHeapSharing(const NEO::HardwareInfo &hwInfo, bool cmdlistSupport);
static bool usePipeControlMultiKernelEventSync(const NEO::HardwareInfo &hwInfo);
static bool useCompactL3FlushEventPacket(const NEO::HardwareInfo &hwInfo);
virtual void setAdditionalGroupProperty(ze_command_queue_group_properties_t &groupProperty, NEO::EngineGroupT &group) const = 0;
virtual L0::Event *createEvent(L0::EventPool *eventPool, const ze_event_desc_t *desc, L0::Device *device) const = 0;