wa: add dummy blits before command MI_FLUSH_DW

to guarantee that all subblt got complete for previous copy
affect xe hpg

temporary changes under flag ForceDummyBlitWa

Related-To: NEO-7450

Signed-off-by: Cencelewska, Katarzyna <katarzyna.cencelewska@intel.com>
This commit is contained in:
Cencelewska, Katarzyna
2023-03-06 12:42:09 +00:00
committed by Compute-Runtime-Automation
parent f0c7eef667
commit c274309d7b
42 changed files with 370 additions and 323 deletions

View File

@@ -15,7 +15,7 @@
#include "shared/source/helpers/api_specific_config.h"
#include "shared/source/helpers/blit_commands_helper.h"
#include "shared/source/helpers/blit_properties.h"
#include "shared/source/helpers/definitions/mi_flush_args.h"
#include "shared/source/helpers/definitions/command_encoder_args.h"
#include "shared/source/helpers/gfx_core_helper.h"
#include "shared/source/helpers/logical_state_helper.h"
#include "shared/source/helpers/pipe_control_args.h"
@@ -1866,8 +1866,9 @@ void CommandListCoreFamily<gfxCoreFamily>::appendEventForProfilingCopyCommand(Ev
event->resetKernelCountAndPacketUsedCount();
} else {
NEO::MiFlushArgs args;
const auto &productHelper = this->device->getProductHelper();
NEO::EncodeMiFlushDW<GfxFamily>::programMiFlushDw(*commandContainer.getCommandStream(), 0, 0, args, productHelper);
args.waArgs.isBcs = isCopyOnly();
args.waArgs.rootDeviceEnvironment = &(this->device->getNEODevice()->getRootDeviceEnvironmentRef());
NEO::EncodeMiFlushDW<GfxFamily>::programWithWa(*commandContainer.getCommandStream(), 0, 0, args);
dispatchEventPostSyncOperation(event, Event::STATE_SIGNALED, true, false, false);
}
appendWriteKernelTimestamp(event, beforeWalker, false, false);
@@ -2046,8 +2047,9 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendWaitOnEvents(uint32_t nu
if (dcFlushRequired) {
if (isCopyOnly()) {
NEO::MiFlushArgs args;
const auto &productHelper = this->device->getProductHelper();
NEO::EncodeMiFlushDW<GfxFamily>::programMiFlushDw(*commandContainer.getCommandStream(), 0, 0, args, productHelper);
args.waArgs.isBcs = true;
args.waArgs.rootDeviceEnvironment = &(this->device->getNEODevice()->getRootDeviceEnvironmentRef());
NEO::EncodeMiFlushDW<GfxFamily>::programWithWa(*commandContainer.getCommandStream(), 0, 0, args);
} else {
NEO::PipeControlArgs args;
args.dcFlushEnable = true;
@@ -2087,8 +2089,9 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendWaitOnEvents(uint32_t nu
if (this->cmdListType == TYPE_IMMEDIATE && isCopyOnly() && trackDependencies) {
NEO::MiFlushArgs args;
args.commandWithPostSync = true;
const auto &productHelper = this->device->getProductHelper();
NEO::EncodeMiFlushDW<GfxFamily>::programMiFlushDw(*commandContainer.getCommandStream(), this->csr->getBarrierCountGpuAddress(), this->csr->getNextBarrierCount() + 1, args, productHelper);
args.waArgs.isBcs = true;
args.waArgs.rootDeviceEnvironment = &(this->device->getNEODevice()->getRootDeviceEnvironmentRef());
NEO::EncodeMiFlushDW<GfxFamily>::programWithWa(*commandContainer.getCommandStream(), this->csr->getBarrierCountGpuAddress(), this->csr->getNextBarrierCount() + 1, args);
commandContainer.addToResidencyContainer(this->csr->getTagAllocation());
}
@@ -2207,12 +2210,12 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendWriteGlobalTimestamp(
NEO::MiFlushArgs args;
args.timeStampOperation = true;
args.commandWithPostSync = true;
const auto &productHelper = this->device->getProductHelper();
NEO::EncodeMiFlushDW<GfxFamily>::programMiFlushDw(*commandContainer.getCommandStream(),
reinterpret_cast<uint64_t>(dstptr),
0,
args,
productHelper);
args.waArgs.isBcs = true;
args.waArgs.rootDeviceEnvironment = &(this->device->getNEODevice()->getRootDeviceEnvironmentRef());
NEO::EncodeMiFlushDW<GfxFamily>::programWithWa(*commandContainer.getCommandStream(),
reinterpret_cast<uint64_t>(dstptr),
0,
args);
} else {
NEO::PipeControlArgs args;
@@ -2698,6 +2701,8 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendBarrier(ze_event_handle_
NEO::MiFlushArgs args;
uint64_t gpuAddress = 0u;
TaskCountType value = 0u;
args.waArgs.isBcs = true;
args.waArgs.rootDeviceEnvironment = &(this->device->getNEODevice()->getRootDeviceEnvironmentRef());
if (this->cmdListType == TYPE_IMMEDIATE) {
args.commandWithPostSync = true;
@@ -2706,8 +2711,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendBarrier(ze_event_handle_
commandContainer.addToResidencyContainer(this->csr->getTagAllocation());
}
const auto &productHelper = this->device->getProductHelper();
NEO::EncodeMiFlushDW<GfxFamily>::programMiFlushDw(*commandContainer.getCommandStream(), gpuAddress, value, args, productHelper);
NEO::EncodeMiFlushDW<GfxFamily>::programWithWa(*commandContainer.getCommandStream(), gpuAddress, value, args);
} else {
appendComputeBarrierCommand();
}
@@ -2870,9 +2874,10 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendWriteToMemory(void *desc
if (isCopyOnly()) {
NEO::MiFlushArgs args;
args.commandWithPostSync = true;
const auto &productHelper = this->device->getProductHelper();
NEO::EncodeMiFlushDW<GfxFamily>::programMiFlushDw(*commandContainer.getCommandStream(), gpuAddress,
data, args, productHelper);
args.waArgs.isBcs = true;
args.waArgs.rootDeviceEnvironment = &(this->device->getNEODevice()->getRootDeviceEnvironmentRef());
NEO::EncodeMiFlushDW<GfxFamily>::programWithWa(*commandContainer.getCommandStream(), gpuAddress,
data, args);
} else {
NEO::PipeControlArgs args;
args.dcFlushEnable = getDcFlushRequired(!!descriptor->writeScope);
@@ -2918,13 +2923,13 @@ void CommandListCoreFamily<gfxCoreFamily>::dispatchPostSyncCopy(uint64_t gpuAddr
NEO::MiFlushArgs miFlushArgs;
miFlushArgs.commandWithPostSync = true;
const auto &productHelper = this->device->getProductHelper();
NEO::EncodeMiFlushDW<GfxFamily>::programMiFlushDw(
miFlushArgs.waArgs.isBcs = isCopyOnly();
miFlushArgs.waArgs.rootDeviceEnvironment = &(this->device->getNEODevice()->getRootDeviceEnvironmentRef());
NEO::EncodeMiFlushDW<GfxFamily>::programWithWa(
*commandContainer.getCommandStream(),
gpuAddress,
value,
miFlushArgs,
productHelper);
miFlushArgs);
}
template <GFXCORE_FAMILY gfxCoreFamily>

View File

@@ -18,7 +18,7 @@
#include "shared/source/debugger/debugger_l0.h"
#include "shared/source/device/device.h"
#include "shared/source/execution_environment/root_device_environment.h"
#include "shared/source/helpers/definitions/mi_flush_args.h"
#include "shared/source/helpers/definitions/command_encoder_args.h"
#include "shared/source/helpers/gfx_core_helper.h"
#include "shared/source/helpers/heap_base_address_model.h"
#include "shared/source/helpers/logical_state_helper.h"
@@ -220,8 +220,9 @@ ze_result_t CommandQueueHw<gfxCoreFamily>::executeCommandListsCopyOnly(
size_t linearStreamSizeEstimate = this->estimateLinearStreamSizeInitial(ctx, phCommandLists, numCommandLists);
this->csr->getResidencyAllocations().reserve(ctx.spaceForResidency);
linearStreamSizeEstimate += NEO::EncodeMiFlushDW<GfxFamily>::getMiFlushDwCmdSizeForDataWrite();
auto isBcs = NEO::EngineHelpers::isBcs(this->csr->getOsContext().getEngineType());
NEO::EncodeDummyBlitWaArgs waArgs{isBcs, &(this->device->getNEODevice()->getRootDeviceEnvironmentRef())};
linearStreamSizeEstimate += NEO::EncodeMiFlushDW<GfxFamily>::getCommandSizeWithWa(waArgs);
NEO::LinearStream child(nullptr);
if (const auto ret = this->makeAlignedChildStreamAndSetGpuBase(child, linearStreamSizeEstimate); ret != ZE_RESULT_SUCCESS) {
@@ -992,8 +993,9 @@ void CommandQueueHw<gfxCoreFamily>::dispatchTaskCountPostSyncByMiFlushDw(
NEO::MiFlushArgs args;
args.commandWithPostSync = true;
args.notifyEnable = this->csr->isUsedNotifyEnableForPostSync();
const auto &productHelper = this->device->getProductHelper();
NEO::EncodeMiFlushDW<GfxFamily>::programMiFlushDw(cmdStream, postSyncAddress, postSyncData, args, productHelper);
args.waArgs.isBcs = NEO::EngineHelpers::isBcs(this->csr->getOsContext().getEngineType());
args.waArgs.rootDeviceEnvironment = &(this->device->getNEODevice()->getRootDeviceEnvironmentRef());
NEO::EncodeMiFlushDW<GfxFamily>::programWithWa(cmdStream, postSyncAddress, postSyncData, args);
}
template <GFXCORE_FAMILY gfxCoreFamily>