refactor: add forceComputeWalkerPostSyncFlushWithWrite function

Signed-off-by: Kamil Kopryk <kamil.kopryk@intel.com>
This commit is contained in:
Kamil Kopryk
2024-07-26 12:59:09 +00:00
committed by Compute-Runtime-Automation
parent a168bf2f33
commit d2546d4062
7 changed files with 62 additions and 1 deletions

View File

@@ -210,6 +210,9 @@ struct EncodeDispatchKernel {
static size_t getInlineDataOffset(EncodeDispatchKernelArgs &args);
static void *getImplicitArgsAddress(EncodeDispatchKernelArgs &args, const KernelDescriptor &kernelDescriptor);
static size_t getScratchPtrOffsetOfImplicitArgs();
template <typename WalkerType>
static void forceComputeWalkerPostSyncFlushWithWrite(WalkerType &walkerCmd);
};
template <typename GfxFamily>

View File

@@ -638,6 +638,11 @@ inline size_t EncodeDispatchKernel<Family>::getInlineDataOffset(EncodeDispatchKe
return 0;
}
template <typename Family>
template <typename WalkerType>
void EncodeDispatchKernel<Family>::forceComputeWalkerPostSyncFlushWithWrite(WalkerType &walkerCmd) {
}
template <typename Family>
size_t EncodeStates<Family>::getSshHeapSize() {
return 64 * MemoryConstants::kiloByte;

View File

@@ -28,6 +28,7 @@ template void NEO::EncodeDispatchKernel<Family>::programInlineDataHeapless<false
template void NEO::EncodeDispatchKernel<Family>::encodeEuSchedulingPolicy<Family::INTERFACE_DESCRIPTOR_DATA>(Family::INTERFACE_DESCRIPTOR_DATA *pInterfaceDescriptor, const KernelDescriptor &kernelDesc, int32_t defaultPipelinedThreadArbitrationPolicy);
template uint64_t NEO::EncodeDispatchKernel<Family>::getScratchAddressForImmediatePatching<false>(CommandContainer &container, EncodeDispatchKernelArgs &args);
template void NEO::EncodeDispatchKernel<Family>::patchScratchAddressInImplicitArgs<false>(ImplicitArgs &implicitArgs, uint64_t scratchAddress, bool scratchPtrPatchingRequired);
template void NEO::EncodeDispatchKernel<Family>::forceComputeWalkerPostSyncFlushWithWrite<Family::DefaultWalkerType>(Family::DefaultWalkerType &walkerCmd);
template struct NEO::EncodeStates<Family>;
template struct NEO::EncodeMath<Family>;

View File

@@ -354,6 +354,8 @@ void EncodeDispatchKernel<Family>::encode(CommandContainer &container, EncodeDis
EncodeDispatchKernel<Family>::setupPostSyncForInOrderExec<WalkerType>(walkerCmd, args);
} else if (args.eventAddress) {
EncodeDispatchKernel<Family>::setupPostSyncForRegularEvent<WalkerType>(walkerCmd, args);
} else {
EncodeDispatchKernel<Family>::forceComputeWalkerPostSyncFlushWithWrite<WalkerType>(walkerCmd);
}
if (debugManager.flags.ForceComputeWalkerPostSyncFlush.get() == 1) {
@@ -938,6 +940,22 @@ inline size_t EncodeDispatchKernel<Family>::getInlineDataOffset(EncodeDispatchKe
return offsetof(DefaultWalkerType, TheStructure.Common.InlineData);
}
template <typename Family>
template <typename WalkerType>
void EncodeDispatchKernel<Family>::forceComputeWalkerPostSyncFlushWithWrite(WalkerType &walkerCmd) {
using PostSyncType = typename WalkerType::PostSyncType;
using OperationType = typename PostSyncType::OPERATION;
if (debugManager.flags.ForceComputeWalkerPostSyncFlushWithWrite.get() != -1) {
auto &postSync = walkerCmd.getPostSync();
postSync.setDataportPipelineFlush(true);
postSync.setDataportSubsliceCacheFlush(true);
postSync.setDestinationAddress(static_cast<uint64_t>(debugManager.flags.ForceComputeWalkerPostSyncFlushWithWrite.get()));
postSync.setOperation(OperationType::OPERATION_WRITE_IMMEDIATE_DATA);
postSync.setImmediateData(0u);
}
}
template <typename Family>
size_t EncodeStates<Family>::getSshHeapSize() {
return 2 * MemoryConstants::megaByte;

View File

@@ -291,6 +291,7 @@ DECLARE_DEBUG_VARIABLE(int32_t, ForceSipClass, -1, "-1: default, otherwise based
DECLARE_DEBUG_VARIABLE(int32_t, ForceScratchAndMTPBufferSizeMode, -1, "-1: default, 0: Full, 1: Min. BMG+: Reduce required memory for Scrach and MTP buffers on CCS context")
DECLARE_DEBUG_VARIABLE(int32_t, CFEStackIDControl, -1, "Set Stack ID Control in CFE_STATE on Xe2+, -1 - do not set")
DECLARE_DEBUG_VARIABLE(int32_t, StandaloneInOrderTimestampAllocationEnabled, -1, "-1: default, 0: disabled, 1: enabled. If enabled, use internal allocations, instead of Event pool for timestamps")
DECLARE_DEBUG_VARIABLE(int32_t, ForceComputeWalkerPostSyncFlushWithWrite, -1, "-1: ignore. >=0: Force PostSync cache flush and override postSync immediate write address to given value")
/*LOGGING FLAGS*/
DECLARE_DEBUG_VARIABLE(int32_t, PrintDriverDiagnostics, -1, "prints driver diagnostics messages to standard output, value corresponds to hint level")