refactor: add EncodePostSyncArgs to EncodeDispatchKernelArgs

Refactored various member variables in EncodeDispatchKernel to directly
include EncodePostSync
Changed command encoder and command list to use the modified
EncodeDispatchKernel.

Related-To: NEO-13003
Signed-off-by: Young Jin Yoon <young.jin.yoon@intel.com>
This commit is contained in:
Young Jin Yoon 2025-04-14 19:06:51 +00:00 committed by Compute-Runtime-Automation
parent f335295432
commit 40aef1555e
19 changed files with 196 additions and 173 deletions

View File

@ -188,13 +188,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernelWithParams(K
auto maxWgCountPerTile = kernel->getMaxWgCountPerTile(this->engineGroupType); auto maxWgCountPerTile = kernel->getMaxWgCountPerTile(this->engineGroupType);
NEO::EncodeDispatchKernelArgs dispatchKernelArgs{ NEO::EncodeDispatchKernelArgs dispatchKernelArgs{
.eventAddress = 0,
.postSyncImmValue = static_cast<uint64_t>(Event::STATE_SIGNALED),
.inOrderCounterValue = 0,
.inOrderIncrementGpuAddress = 0,
.inOrderIncrementValue = 0,
.device = neoDevice, .device = neoDevice,
.inOrderExecInfo = nullptr,
.dispatchInterface = kernel, .dispatchInterface = kernel,
.surfaceStateHeap = ssh, .surfaceStateHeap = ssh,
.dynamicStateHeap = dsh, .dynamicStateHeap = dsh,
@ -205,6 +199,22 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernelWithParams(K
.outImplicitArgsPtr = nullptr, .outImplicitArgsPtr = nullptr,
.additionalCommands = &additionalCommands, .additionalCommands = &additionalCommands,
.extendedArgs = nullptr, .extendedArgs = nullptr,
.postSyncArgs = {
.eventAddress = 0,
.postSyncImmValue = static_cast<uint64_t>(Event::STATE_SIGNALED),
.inOrderCounterValue = 0,
.inOrderIncrementGpuAddress = 0,
.inOrderIncrementValue = 0,
.device = neoDevice,
.inOrderExecInfo = nullptr,
.isTimestampEvent = false,
.isHostScopeSignalEvent = false,
.isKernelUsingSystemAllocation = false,
.dcFlushEnable = this->dcFlushSupport,
.interruptEvent = false,
.isFlushL3ForExternalAllocationRequired = false,
.isFlushL3ForHostUsmRequired = false,
},
.preemptionMode = commandListPreemptionMode, .preemptionMode = commandListPreemptionMode,
.requiredPartitionDim = launchParams.requiredPartitionDim, .requiredPartitionDim = launchParams.requiredPartitionDim,
.requiredDispatchWalkOrder = launchParams.requiredDispatchWalkOrder, .requiredDispatchWalkOrder = launchParams.requiredDispatchWalkOrder,
@ -215,22 +225,15 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernelWithParams(K
.defaultPipelinedThreadArbitrationPolicy = NEO::ThreadArbitrationPolicy::NotPresent, .defaultPipelinedThreadArbitrationPolicy = NEO::ThreadArbitrationPolicy::NotPresent,
.isIndirect = launchParams.isIndirect, .isIndirect = launchParams.isIndirect,
.isPredicate = launchParams.isPredicate, .isPredicate = launchParams.isPredicate,
.isTimestampEvent = false,
.requiresUncachedMocs = uncachedMocsKernel, .requiresUncachedMocs = uncachedMocsKernel,
.isInternal = internalUsage, .isInternal = internalUsage,
.isCooperative = launchParams.isCooperative, .isCooperative = launchParams.isCooperative,
.isHostScopeSignalEvent = false,
.isKernelUsingSystemAllocation = false,
.isKernelDispatchedFromImmediateCmdList = isImmediateType(), .isKernelDispatchedFromImmediateCmdList = isImmediateType(),
.isRcs = engineGroupType == NEO::EngineGroupType::renderCompute, .isRcs = engineGroupType == NEO::EngineGroupType::renderCompute,
.dcFlushEnable = this->dcFlushSupport,
.isHeaplessModeEnabled = this->heaplessModeEnabled, .isHeaplessModeEnabled = this->heaplessModeEnabled,
.isHeaplessStateInitEnabled = this->heaplessStateInitEnabled, .isHeaplessStateInitEnabled = this->heaplessStateInitEnabled,
.interruptEvent = false,
.immediateScratchAddressPatching = !this->scratchAddressPatchingEnabled, .immediateScratchAddressPatching = !this->scratchAddressPatchingEnabled,
.makeCommandView = false, .makeCommandView = false,
.isFlushL3AfterPostSyncForExternalAllocationRequired = false,
.isFlushL3AfterPostSyncForHostUsmRequired = false,
}; };
NEO::EncodeDispatchKernel<GfxFamily>::encodeCommon(commandContainer, dispatchKernelArgs); NEO::EncodeDispatchKernel<GfxFamily>::encodeCommon(commandContainer, dispatchKernelArgs);

View File

@ -365,13 +365,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernelWithParams(K
NEO::EncodeKernelArgsExt dispatchKernelArgsExt = {}; NEO::EncodeKernelArgsExt dispatchKernelArgsExt = {};
NEO::EncodeDispatchKernelArgs dispatchKernelArgs{ NEO::EncodeDispatchKernelArgs dispatchKernelArgs{
.eventAddress = eventAddress,
.postSyncImmValue = static_cast<uint64_t>(Event::STATE_SIGNALED),
.inOrderCounterValue = inOrderCounterValue,
.inOrderIncrementGpuAddress = inOrderIncrementGpuAddress,
.inOrderIncrementValue = inOrderIncrementValue,
.device = neoDevice, .device = neoDevice,
.inOrderExecInfo = inOrderExecInfo,
.dispatchInterface = kernel, .dispatchInterface = kernel,
.surfaceStateHeap = ssh, .surfaceStateHeap = ssh,
.dynamicStateHeap = dsh, .dynamicStateHeap = dsh,
@ -382,6 +376,22 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernelWithParams(K
.outImplicitArgsPtr = nullptr, .outImplicitArgsPtr = nullptr,
.additionalCommands = &additionalCommands, .additionalCommands = &additionalCommands,
.extendedArgs = &dispatchKernelArgsExt, .extendedArgs = &dispatchKernelArgsExt,
.postSyncArgs = {
.eventAddress = eventAddress,
.postSyncImmValue = static_cast<uint64_t>(Event::STATE_SIGNALED),
.inOrderCounterValue = inOrderCounterValue,
.inOrderIncrementGpuAddress = inOrderIncrementGpuAddress,
.inOrderIncrementValue = inOrderIncrementValue,
.device = neoDevice,
.inOrderExecInfo = inOrderExecInfo,
.isTimestampEvent = isTimestampEvent,
.isHostScopeSignalEvent = isHostSignalScopeEvent,
.isKernelUsingSystemAllocation = isKernelUsingSystemAllocation,
.dcFlushEnable = this->dcFlushSupport,
.interruptEvent = interruptEvent,
.isFlushL3ForExternalAllocationRequired = isFlushL3AfterPostSync && isKernelUsingExternalAllocation,
.isFlushL3ForHostUsmRequired = isFlushL3AfterPostSync && isKernelUsingSystemAllocation,
},
.preemptionMode = kernelPreemptionMode, .preemptionMode = kernelPreemptionMode,
.requiredPartitionDim = launchParams.requiredPartitionDim, .requiredPartitionDim = launchParams.requiredPartitionDim,
.requiredDispatchWalkOrder = launchParams.requiredDispatchWalkOrder, .requiredDispatchWalkOrder = launchParams.requiredDispatchWalkOrder,
@ -392,22 +402,15 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernelWithParams(K
.defaultPipelinedThreadArbitrationPolicy = this->defaultPipelinedThreadArbitrationPolicy, .defaultPipelinedThreadArbitrationPolicy = this->defaultPipelinedThreadArbitrationPolicy,
.isIndirect = launchParams.isIndirect, .isIndirect = launchParams.isIndirect,
.isPredicate = launchParams.isPredicate, .isPredicate = launchParams.isPredicate,
.isTimestampEvent = isTimestampEvent,
.requiresUncachedMocs = uncachedMocsKernel, .requiresUncachedMocs = uncachedMocsKernel,
.isInternal = internalUsage, .isInternal = internalUsage,
.isCooperative = launchParams.isCooperative, .isCooperative = launchParams.isCooperative,
.isHostScopeSignalEvent = isHostSignalScopeEvent,
.isKernelUsingSystemAllocation = isKernelUsingSystemAllocation,
.isKernelDispatchedFromImmediateCmdList = isImmediateType(), .isKernelDispatchedFromImmediateCmdList = isImmediateType(),
.isRcs = engineGroupType == NEO::EngineGroupType::renderCompute, .isRcs = engineGroupType == NEO::EngineGroupType::renderCompute,
.dcFlushEnable = this->dcFlushSupport,
.isHeaplessModeEnabled = this->heaplessModeEnabled, .isHeaplessModeEnabled = this->heaplessModeEnabled,
.isHeaplessStateInitEnabled = this->heaplessStateInitEnabled, .isHeaplessStateInitEnabled = this->heaplessStateInitEnabled,
.interruptEvent = interruptEvent,
.immediateScratchAddressPatching = !this->scratchAddressPatchingEnabled, .immediateScratchAddressPatching = !this->scratchAddressPatchingEnabled,
.makeCommandView = launchParams.makeKernelCommandView, .makeCommandView = launchParams.makeKernelCommandView,
.isFlushL3AfterPostSyncForExternalAllocationRequired = isFlushL3AfterPostSync && isKernelUsingExternalAllocation,
.isFlushL3AfterPostSyncForHostUsmRequired = isFlushL3AfterPostSync && isKernelUsingSystemAllocation,
}; };
setAdditionalDispatchKernelArgsFromLaunchParams(dispatchKernelArgs, launchParams); setAdditionalDispatchKernelArgsFromLaunchParams(dispatchKernelArgs, launchParams);

View File

@ -208,7 +208,7 @@ HWTEST2_F(CommandListAppendLaunchKernel, givenNotEnoughSpaceInCommandStreamWhenA
const uint32_t threadGroupDimensions[3] = {1, 1, 1}; const uint32_t threadGroupDimensions[3] = {1, 1, 1};
auto dispatchKernelArgs = CommandEncodeStatesFixture::createDefaultDispatchKernelArgs(device->getNEODevice(), kernel.get(), threadGroupDimensions, false); auto dispatchKernelArgs = CommandEncodeStatesFixture::createDefaultDispatchKernelArgs(device->getNEODevice(), kernel.get(), threadGroupDimensions, false);
dispatchKernelArgs.dcFlushEnable = commandList->getDcFlushRequired(true); dispatchKernelArgs.postSyncArgs.dcFlushEnable = commandList->getDcFlushRequired(true);
NEO::EncodeDispatchKernel<FamilyType>::template encode<DefaultWalkerType>(commandContainer, dispatchKernelArgs); NEO::EncodeDispatchKernel<FamilyType>::template encode<DefaultWalkerType>(commandContainer, dispatchKernelArgs);

View File

@ -918,7 +918,7 @@ HWTEST2_F(CommandListAppendLaunchKernel, givenNotEnoughSpaceInCommandStreamWhenA
const uint32_t threadGroupDimensions[3] = {1, 1, 1}; const uint32_t threadGroupDimensions[3] = {1, 1, 1};
auto dispatchKernelArgs = CommandEncodeStatesFixture::createDefaultDispatchKernelArgs(device->getNEODevice(), kernel.get(), threadGroupDimensions, false); auto dispatchKernelArgs = CommandEncodeStatesFixture::createDefaultDispatchKernelArgs(device->getNEODevice(), kernel.get(), threadGroupDimensions, false);
dispatchKernelArgs.dcFlushEnable = commandList->getDcFlushRequired(true); dispatchKernelArgs.postSyncArgs.dcFlushEnable = commandList->getDcFlushRequired(true);
EXPECT_THROW(NEO::EncodeDispatchKernel<FamilyType>::template encode<DefaultWalkerType>(commandContainer, dispatchKernelArgs), std::exception); EXPECT_THROW(NEO::EncodeDispatchKernel<FamilyType>::template encode<DefaultWalkerType>(commandContainer, dispatchKernelArgs), std::exception);
} }

View File

@ -49,7 +49,7 @@ struct StateComputeModeProperties;
struct ImplicitArgs; struct ImplicitArgs;
struct EncodeKernelArgsExt; struct EncodeKernelArgsExt;
struct EncodeDispatchKernelArgs { struct EncodePostSyncArgs {
uint64_t eventAddress = 0; uint64_t eventAddress = 0;
uint64_t postSyncImmValue = 0; uint64_t postSyncImmValue = 0;
uint64_t inOrderCounterValue = 0; uint64_t inOrderCounterValue = 0;
@ -57,6 +57,53 @@ struct EncodeDispatchKernelArgs {
uint64_t inOrderIncrementValue = 0; uint64_t inOrderIncrementValue = 0;
Device *device = nullptr; Device *device = nullptr;
NEO::InOrderExecInfo *inOrderExecInfo = nullptr; NEO::InOrderExecInfo *inOrderExecInfo = nullptr;
bool isTimestampEvent = false;
bool isHostScopeSignalEvent = false;
bool isKernelUsingSystemAllocation = false;
bool dcFlushEnable = false;
bool interruptEvent = false;
bool isFlushL3ForExternalAllocationRequired = false;
bool isFlushL3ForHostUsmRequired = false;
bool requiresSystemMemoryFence() const {
return (isHostScopeSignalEvent && isKernelUsingSystemAllocation && !device->getHardwareInfo().capabilityTable.isIntegratedDevice);
}
};
template <typename GfxFamily>
struct EncodePostSync {
static constexpr size_t timestampDestinationAddressAlignment = 16;
static constexpr size_t immWriteDestinationAddressAlignment = 8;
template <typename CommandType>
static void encodeL3Flush(CommandType &cmd, const EncodePostSyncArgs &args);
template <typename CommandType>
static void setupPostSyncForRegularEvent(CommandType &cmd, const EncodePostSyncArgs &args);
template <typename CommandType>
static void setupPostSyncForInOrderExec(CommandType &cmd, const EncodePostSyncArgs &args);
static uint32_t getPostSyncMocs(const RootDeviceEnvironment &rootDeviceEnvironment, const bool dcFlush);
template <typename CommandType>
static auto &getPostSync(CommandType &cmd, size_t index);
template <typename PostSyncT>
static void setPostSyncData(PostSyncT &postSyncData, const typename PostSyncT::OPERATION operation, const uint64_t gpuVa, const uint64_t immData, [[maybe_unused]] const uint32_t atomicOpcode, const uint32_t mocs, [[maybe_unused]] const bool interrupt, const bool requiresSystemMemoryFence);
template <typename PostSyncT>
static void setPostSyncDataCommon(PostSyncT &postSyncData, const typename PostSyncT::OPERATION operation, const uint64_t gpuVa, const uint64_t immData);
template <typename CommandType>
static void setCommandLevelInterrupt(CommandType &cmd, bool interrupt);
template <typename CommandType>
static void adjustTimestampPacket(CommandType &cmd, const EncodePostSyncArgs &args);
};
struct EncodeDispatchKernelArgs {
Device *device = nullptr;
DispatchKernelEncoderI *dispatchInterface = nullptr; DispatchKernelEncoderI *dispatchInterface = nullptr;
IndirectHeap *surfaceStateHeap = nullptr; IndirectHeap *surfaceStateHeap = nullptr;
IndirectHeap *dynamicStateHeap = nullptr; IndirectHeap *dynamicStateHeap = nullptr;
@ -67,6 +114,7 @@ struct EncodeDispatchKernelArgs {
void *outImplicitArgsPtr = nullptr; void *outImplicitArgsPtr = nullptr;
std::list<void *> *additionalCommands = nullptr; std::list<void *> *additionalCommands = nullptr;
EncodeKernelArgsExt *extendedArgs = nullptr; EncodeKernelArgsExt *extendedArgs = nullptr;
NEO::EncodePostSyncArgs postSyncArgs{};
PreemptionMode preemptionMode = PreemptionMode::Initial; PreemptionMode preemptionMode = PreemptionMode::Initial;
NEO::RequiredPartitionDim requiredPartitionDim = NEO::RequiredPartitionDim::none; NEO::RequiredPartitionDim requiredPartitionDim = NEO::RequiredPartitionDim::none;
NEO::RequiredDispatchWalkOrder requiredDispatchWalkOrder = NEO::RequiredDispatchWalkOrder::none; NEO::RequiredDispatchWalkOrder requiredDispatchWalkOrder = NEO::RequiredDispatchWalkOrder::none;
@ -77,26 +125,15 @@ struct EncodeDispatchKernelArgs {
int32_t defaultPipelinedThreadArbitrationPolicy = NEO::ThreadArbitrationPolicy::NotPresent; int32_t defaultPipelinedThreadArbitrationPolicy = NEO::ThreadArbitrationPolicy::NotPresent;
bool isIndirect = false; bool isIndirect = false;
bool isPredicate = false; bool isPredicate = false;
bool isTimestampEvent = false;
bool requiresUncachedMocs = false; bool requiresUncachedMocs = false;
bool isInternal = false; bool isInternal = false;
bool isCooperative = false; bool isCooperative = false;
bool isHostScopeSignalEvent = false;
bool isKernelUsingSystemAllocation = false;
bool isKernelDispatchedFromImmediateCmdList = false; bool isKernelDispatchedFromImmediateCmdList = false;
bool isRcs = false; bool isRcs = false;
bool dcFlushEnable = false;
bool isHeaplessModeEnabled = false; bool isHeaplessModeEnabled = false;
bool isHeaplessStateInitEnabled = false; bool isHeaplessStateInitEnabled = false;
bool interruptEvent = false;
bool immediateScratchAddressPatching = false; bool immediateScratchAddressPatching = false;
bool makeCommandView = false; bool makeCommandView = false;
bool isFlushL3AfterPostSyncForExternalAllocationRequired = false;
bool isFlushL3AfterPostSyncForHostUsmRequired = false;
bool requiresSystemMemoryFence() const {
return (isHostScopeSignalEvent && isKernelUsingSystemAllocation && !device->getHardwareInfo().capabilityTable.isIntegratedDevice);
}
}; };
struct EncodeStoreMMIOParams { struct EncodeStoreMMIOParams {
@ -709,58 +746,4 @@ struct EnodeUserInterrupt {
static void encode(LinearStream &commandStream); static void encode(LinearStream &commandStream);
}; };
struct EncodePostSyncArgs {
uint64_t eventAddress = 0;
uint64_t postSyncImmValue = 0;
uint64_t inOrderCounterValue = 0;
uint64_t inOrderIncrementGpuAddress = 0;
uint64_t inOrderIncrementValue = 0;
Device *device = nullptr;
NEO::InOrderExecInfo *inOrderExecInfo = nullptr;
bool isTimestampEvent = false;
bool isHostScopeSignalEvent = false;
bool isKernelUsingSystemAllocation = false;
bool dcFlushEnable = false;
bool interruptEvent = false;
bool isFlushL3ForExternalAllocationRequired = false;
bool isFlushL3ForHostUsmRequired = false;
bool requiresSystemMemoryFence() const {
return (isHostScopeSignalEvent && isKernelUsingSystemAllocation);
}
};
template <typename GfxFamily>
struct EncodePostSync {
static constexpr size_t timestampDestinationAddressAlignment = 16;
static constexpr size_t immWriteDestinationAddressAlignment = 8;
static EncodePostSyncArgs createPostSyncArgs(const EncodeDispatchKernelArgs &args);
template <typename CommandType>
static void encodeL3Flush(CommandType &cmd, const EncodePostSyncArgs &args);
template <typename CommandType>
static void setupPostSyncForRegularEvent(CommandType &cmd, const EncodePostSyncArgs &args);
template <typename CommandType>
static void setupPostSyncForInOrderExec(CommandType &cmd, const EncodePostSyncArgs &args);
static uint32_t getPostSyncMocs(const RootDeviceEnvironment &rootDeviceEnvironment, const bool dcFlush);
template <typename CommandType>
static auto &getPostSync(CommandType &cmd, size_t index);
template <typename PostSyncT>
static void setPostSyncData(PostSyncT &postSyncData, const typename PostSyncT::OPERATION operation, const uint64_t gpuVa, const uint64_t immData, [[maybe_unused]] const uint32_t atomicOpcode, const uint32_t mocs, [[maybe_unused]] const bool interrupt, const bool requiresSystemMemoryFence);
template <typename PostSyncT>
static void setPostSyncDataCommon(PostSyncT &postSyncData, const typename PostSyncT::OPERATION operation, const uint64_t gpuVa, const uint64_t immData);
template <typename CommandType>
static void setCommandLevelInterrupt(CommandType &cmd, bool interrupt);
template <typename CommandType>
static void adjustTimestampPacket(CommandType &cmd, const EncodePostSyncArgs &args);
};
} // namespace NEO } // namespace NEO

View File

@ -1173,23 +1173,4 @@ void EncodeComputeMode<Family>::adjustPipelineSelect(CommandContainer &container
container.getDevice()->getRootDeviceEnvironment()); container.getDevice()->getRootDeviceEnvironment());
} }
template <typename Family>
EncodePostSyncArgs EncodePostSync<Family>::createPostSyncArgs(const EncodeDispatchKernelArgs &args) {
return EncodePostSyncArgs{
.eventAddress = args.eventAddress,
.postSyncImmValue = args.postSyncImmValue,
.inOrderCounterValue = args.inOrderCounterValue,
.inOrderIncrementGpuAddress = args.inOrderIncrementGpuAddress,
.inOrderIncrementValue = args.inOrderIncrementValue,
.device = args.device,
.inOrderExecInfo = args.inOrderExecInfo,
.isTimestampEvent = args.isTimestampEvent,
.isHostScopeSignalEvent = args.isHostScopeSignalEvent,
.isKernelUsingSystemAllocation = args.isKernelUsingSystemAllocation,
.dcFlushEnable = args.dcFlushEnable,
.interruptEvent = args.interruptEvent,
.isFlushL3ForExternalAllocationRequired = args.isFlushL3AfterPostSyncForExternalAllocationRequired,
.isFlushL3ForHostUsmRequired = args.isFlushL3AfterPostSyncForHostUsmRequired};
}
} // namespace NEO } // namespace NEO

View File

@ -95,10 +95,6 @@ template <typename Family>
template <typename CommandType> template <typename CommandType>
void EncodePostSync<Family>::encodeL3Flush(CommandType &cmd, const EncodePostSyncArgs &args) {} void EncodePostSync<Family>::encodeL3Flush(CommandType &cmd, const EncodePostSyncArgs &args) {}
template <typename Family>
template <typename CommandType>
void EncodePostSync<Family>::setCommandLevelInterrupt(CommandType &cmd, bool interrupt) {}
template <typename Family> template <typename Family>
template <typename WalkerType> template <typename WalkerType>
void EncodeDispatchKernel<Family>::setWalkerRegionSettings(WalkerType &walkerCmd, const NEO::Device &device, uint32_t partitionCount, uint32_t workgroupSize, uint32_t threadGroupCount, uint32_t maxWgCountPerTile, bool requiredDispatchWalkOrder) {} void EncodeDispatchKernel<Family>::setWalkerRegionSettings(WalkerType &walkerCmd, const NEO::Device &device, uint32_t partitionCount, uint32_t workgroupSize, uint32_t threadGroupCount, uint32_t maxWgCountPerTile, bool requiredDispatchWalkOrder) {}

View File

@ -304,7 +304,7 @@ void EncodeDispatchKernel<Family>::encode(CommandContainer &container, EncodeDis
args.requiresUncachedMocs) { args.requiresUncachedMocs) {
PipeControlArgs syncArgs; PipeControlArgs syncArgs;
syncArgs.dcFlushEnable = args.dcFlushEnable; syncArgs.dcFlushEnable = args.postSyncArgs.dcFlushEnable;
MemorySynchronizationCommands<Family>::addSingleBarrier(*container.getCommandStream(), syncArgs); MemorySynchronizationCommands<Family>::addSingleBarrier(*container.getCommandStream(), syncArgs);
STATE_BASE_ADDRESS sbaCmd; STATE_BASE_ADDRESS sbaCmd;
auto gmmHelper = container.getDevice()->getGmmHelper(); auto gmmHelper = container.getDevice()->getGmmHelper();
@ -370,11 +370,10 @@ void EncodeDispatchKernel<Family>::encode(CommandContainer &container, EncodeDis
requiredWorkgroupOrder, requiredWorkgroupOrder,
rootDeviceEnvironment); rootDeviceEnvironment);
auto postSyncArgs = EncodePostSync<Family>::createPostSyncArgs(args); if (args.postSyncArgs.inOrderExecInfo) {
if (args.inOrderExecInfo) { EncodePostSync<Family>::setupPostSyncForInOrderExec(walkerCmd, args.postSyncArgs);
EncodePostSync<Family>::setupPostSyncForInOrderExec(walkerCmd, postSyncArgs); } else if (args.postSyncArgs.eventAddress) {
} else if (args.eventAddress) { EncodePostSync<Family>::setupPostSyncForRegularEvent(walkerCmd, args.postSyncArgs);
EncodePostSync<Family>::setupPostSyncForRegularEvent(walkerCmd, postSyncArgs);
} else { } else {
EncodeDispatchKernel<Family>::forceComputeWalkerPostSyncFlushWithWrite(walkerCmd); EncodeDispatchKernel<Family>::forceComputeWalkerPostSyncFlushWithWrite(walkerCmd);
} }
@ -416,7 +415,7 @@ void EncodeDispatchKernel<Family>::encode(CommandContainer &container, EncodeDis
.requiredDispatchWalkOrder = args.requiredDispatchWalkOrder, .requiredDispatchWalkOrder = args.requiredDispatchWalkOrder,
.localRegionSize = args.localRegionSize, .localRegionSize = args.localRegionSize,
.maxFrontEndThreads = args.device->getDeviceInfo().maxFrontEndThreads, .maxFrontEndThreads = args.device->getDeviceInfo().maxFrontEndThreads,
.requiredSystemFence = args.requiresSystemMemoryFence() && args.device->getGfxCoreHelper().isFenceAllocationRequired(hwInfo), .requiredSystemFence = args.postSyncArgs.requiresSystemMemoryFence() && args.device->getGfxCoreHelper().isFenceAllocationRequired(hwInfo),
.hasSample = kernelDescriptor.kernelAttributes.flags.hasSample, .hasSample = kernelDescriptor.kernelAttributes.flags.hasSample,
.l0DebuggerEnabled = args.device->getL0Debugger() != nullptr}; .l0DebuggerEnabled = args.device->getL0Debugger() != nullptr};
@ -442,7 +441,7 @@ void EncodeDispatchKernel<Family>::encode(CommandContainer &container, EncodeDis
args.maxWgCountPerTile, // maxWgCountPerTile args.maxWgCountPerTile, // maxWgCountPerTile
!(container.getFlushTaskUsedForImmediate() || container.isUsingPrimaryBuffer()), // useSecondaryBatchBuffer !(container.getFlushTaskUsedForImmediate() || container.isUsingPrimaryBuffer()), // useSecondaryBatchBuffer
!args.isKernelDispatchedFromImmediateCmdList, // apiSelfCleanup !args.isKernelDispatchedFromImmediateCmdList, // apiSelfCleanup
args.dcFlushEnable, // dcFlush args.postSyncArgs.dcFlushEnable, // dcFlush
EncodeDispatchKernel<Family>::singleTileExecImplicitScalingRequired(args.isCooperative), // forceExecutionOnSingleTile EncodeDispatchKernel<Family>::singleTileExecImplicitScalingRequired(args.isCooperative), // forceExecutionOnSingleTile
args.makeCommandView, // blockDispatchToCommandBuffer args.makeCommandView, // blockDispatchToCommandBuffer
isRequiredDispatchWorkGroupOrder}; // isRequiredDispatchWorkGroupOrder isRequiredDispatchWorkGroupOrder}; // isRequiredDispatchWorkGroupOrder

View File

@ -230,7 +230,7 @@ void EncodeDispatchKernel<Family>::encode(CommandContainer &container, EncodeDis
if (flush) { if (flush) {
PipeControlArgs syncArgs; PipeControlArgs syncArgs;
syncArgs.dcFlushEnable = args.dcFlushEnable; syncArgs.dcFlushEnable = args.postSyncArgs.dcFlushEnable;
if (dirtyHeaps) { if (dirtyHeaps) {
syncArgs.hdcPipelineFlush = true; syncArgs.hdcPipelineFlush = true;
} }
@ -298,7 +298,7 @@ void EncodeDispatchKernel<Family>::encode(CommandContainer &container, EncodeDis
.requiredDispatchWalkOrder = args.requiredDispatchWalkOrder, .requiredDispatchWalkOrder = args.requiredDispatchWalkOrder,
.localRegionSize = args.localRegionSize, .localRegionSize = args.localRegionSize,
.maxFrontEndThreads = args.device->getDeviceInfo().maxFrontEndThreads, .maxFrontEndThreads = args.device->getDeviceInfo().maxFrontEndThreads,
.requiredSystemFence = args.requiresSystemMemoryFence(), .requiredSystemFence = args.postSyncArgs.requiresSystemMemoryFence(),
.hasSample = false}; .hasSample = false};
using INTERFACE_DESCRIPTOR_DATA = typename Family::INTERFACE_DESCRIPTOR_DATA; using INTERFACE_DESCRIPTOR_DATA = typename Family::INTERFACE_DESCRIPTOR_DATA;

View File

@ -704,6 +704,59 @@ HWCMDTEST_F(IGFX_GEN12LP_CORE, CommandEncoderTests, givenPreXeHpPlatformWhenSetu
EXPECT_EQ(0u, mocs); EXPECT_EQ(0u, mocs);
} }
HWCMDTEST_F(IGFX_GEN12LP_CORE, CommandEncoderTests, givenPreXeHpPlatformWhenCallingAdjustTimestampPacketThenNothingHappen) {
using DefaultWalkerType = typename FamilyType::DefaultWalkerType;
MockExecutionEnvironment executionEnvironment{};
DefaultWalkerType walkerCmd{};
DefaultWalkerType walkerOnStart{};
EncodePostSyncArgs args = {.isTimestampEvent = true};
EncodePostSync<FamilyType>::template adjustTimestampPacket<DefaultWalkerType>(walkerCmd, args);
EXPECT_EQ(0, memcmp(&walkerOnStart, &walkerCmd, sizeof(DefaultWalkerType))); // no change
}
HWCMDTEST_F(IGFX_GEN12LP_CORE, CommandEncoderTests, givenPreXeHpPlatformWhenCallingEncodeL3FlushThenNothingHappen) {
using DefaultWalkerType = typename FamilyType::DefaultWalkerType;
MockExecutionEnvironment executionEnvironment{};
DefaultWalkerType walkerCmd{};
DefaultWalkerType walkerOnStart{};
EncodePostSyncArgs args = {.isFlushL3ForExternalAllocationRequired = true};
EncodePostSync<FamilyType>::template encodeL3Flush<DefaultWalkerType>(walkerCmd, args);
EXPECT_EQ(0, memcmp(&walkerOnStart, &walkerCmd, sizeof(DefaultWalkerType))); // no change
args = {.isFlushL3ForHostUsmRequired = true};
EncodePostSync<FamilyType>::template encodeL3Flush<DefaultWalkerType>(walkerCmd, args);
EXPECT_EQ(0, memcmp(&walkerOnStart, &walkerCmd, sizeof(DefaultWalkerType))); // no change
}
HWCMDTEST_F(IGFX_GEN12LP_CORE, CommandEncoderTests, givenPreXeHpPlatformWhenCallingSetupPostSyncForRegularEventThenNothingHappen) {
using DefaultWalkerType = typename FamilyType::DefaultWalkerType;
MockExecutionEnvironment executionEnvironment{};
DefaultWalkerType walkerCmd{};
DefaultWalkerType walkerOnStart{};
EncodePostSyncArgs args = {.eventAddress = 0x1234};
EncodePostSync<FamilyType>::template setupPostSyncForRegularEvent<DefaultWalkerType>(walkerCmd, args);
EXPECT_EQ(0, memcmp(&walkerOnStart, &walkerCmd, sizeof(DefaultWalkerType))); // no change
}
HWCMDTEST_F(IGFX_GEN12LP_CORE, CommandEncoderTests, givenPreXeHpPlatformWhenCallingSetupPostSyncForInOrderExecThenNothingHappen) {
using DefaultWalkerType = typename FamilyType::DefaultWalkerType;
MockExecutionEnvironment executionEnvironment{};
MockDevice mockDevice;
DefaultWalkerType walkerCmd{};
DefaultWalkerType walkerOnStart{};
MockTagAllocator<DeviceAllocNodeType<true>> deviceTagAllocator(0, mockDevice.getMemoryManager());
auto deviceNode = deviceTagAllocator.getTag();
InOrderExecInfo inOrderExecInfo(deviceNode, nullptr, mockDevice, 2, true, true);
EncodePostSyncArgs args = {.inOrderExecInfo = &inOrderExecInfo};
EncodePostSync<FamilyType>::template setupPostSyncForInOrderExec<DefaultWalkerType>(walkerCmd, args);
EXPECT_EQ(0, memcmp(&walkerOnStart, &walkerCmd, sizeof(DefaultWalkerType))); // no change
}
HWCMDTEST_F(IGFX_XE_HP_CORE, CommandEncoderTests, givenAtLeastXeHpPlatformWhenSetupPostSyncMocsThenCorrect) { HWCMDTEST_F(IGFX_XE_HP_CORE, CommandEncoderTests, givenAtLeastXeHpPlatformWhenSetupPostSyncMocsThenCorrect) {
using DefaultWalkerType = typename FamilyType::DefaultWalkerType; using DefaultWalkerType = typename FamilyType::DefaultWalkerType;

View File

@ -216,12 +216,14 @@ HWTEST2_F(CommandEncoderTest, givenPredicateBitSetWhenProgrammingBbStartThenSetC
HWTEST_F(CommandEncoderTest, givenEncodePostSyncArgsWhenCallingRequiresSystemMemoryFenceThenCorrectValuesAreReturned) { HWTEST_F(CommandEncoderTest, givenEncodePostSyncArgsWhenCallingRequiresSystemMemoryFenceThenCorrectValuesAreReturned) {
EncodePostSyncArgs args{}; EncodePostSyncArgs args{};
bool integrated = pDevice->getHardwareInfo().capabilityTable.isIntegratedDevice;
for (bool hostScopeSignalEvent : {true, false}) { for (bool hostScopeSignalEvent : {true, false}) {
for (bool kernelUsingSystemAllocation : {true, false}) { for (bool kernelUsingSystemAllocation : {true, false}) {
args.device = pDevice;
args.isHostScopeSignalEvent = hostScopeSignalEvent; args.isHostScopeSignalEvent = hostScopeSignalEvent;
args.isKernelUsingSystemAllocation = kernelUsingSystemAllocation; args.isKernelUsingSystemAllocation = kernelUsingSystemAllocation;
if (hostScopeSignalEvent && kernelUsingSystemAllocation) { if (hostScopeSignalEvent && kernelUsingSystemAllocation && !integrated) {
EXPECT_TRUE(args.requiresSystemMemoryFence()); EXPECT_TRUE(args.requiresSystemMemoryFence());
} else { } else {
EXPECT_FALSE(args.requiresSystemMemoryFence()); EXPECT_FALSE(args.requiresSystemMemoryFence());

View File

@ -712,7 +712,7 @@ HWCMDTEST_F(IGFX_GEN12LP_CORE, CommandEncodeStatesTest, givenDirtyHeapsWhenDispa
bool requiresUncachedMocs = false; bool requiresUncachedMocs = false;
EncodeDispatchKernelArgs dispatchArgs = createDefaultDispatchKernelArgs(pDevice, dispatchInterface.get(), dims, requiresUncachedMocs); EncodeDispatchKernelArgs dispatchArgs = createDefaultDispatchKernelArgs(pDevice, dispatchInterface.get(), dims, requiresUncachedMocs);
dispatchArgs.dcFlushEnable = MemorySynchronizationCommands<FamilyType>::getDcFlushEnable(true, pDevice->getRootDeviceEnvironment()); dispatchArgs.postSyncArgs.dcFlushEnable = MemorySynchronizationCommands<FamilyType>::getDcFlushEnable(true, pDevice->getRootDeviceEnvironment());
EncodeDispatchKernel<FamilyType>::template encode<DefaultWalkerType>(*cmdContainer.get(), dispatchArgs); EncodeDispatchKernel<FamilyType>::template encode<DefaultWalkerType>(*cmdContainer.get(), dispatchArgs);

View File

@ -33,8 +33,8 @@ HWTEST2_F(CommandEncodeStatesTestDg2AndLater, givenEventAddressWhenEncodeAndPVCA
bool requiresUncachedMocs = false; bool requiresUncachedMocs = false;
EncodeDispatchKernelArgs dispatchArgs = createDefaultDispatchKernelArgs(pDevice, dispatchInterface.get(), dims, requiresUncachedMocs); EncodeDispatchKernelArgs dispatchArgs = createDefaultDispatchKernelArgs(pDevice, dispatchInterface.get(), dims, requiresUncachedMocs);
dispatchArgs.eventAddress = eventAddress; dispatchArgs.postSyncArgs.eventAddress = eventAddress;
dispatchArgs.isTimestampEvent = true; dispatchArgs.postSyncArgs.isTimestampEvent = true;
EncodeDispatchKernel<FamilyType>::template encode<DefaultWalkerType>(*cmdContainer.get(), dispatchArgs); EncodeDispatchKernel<FamilyType>::template encode<DefaultWalkerType>(*cmdContainer.get(), dispatchArgs);
@ -58,8 +58,8 @@ HWTEST2_F(CommandEncodeStatesTestDg2AndLater, givenDebugVariableToForceL1FlushWh
bool requiresUncachedMocs = false; bool requiresUncachedMocs = false;
EncodeDispatchKernelArgs dispatchArgs = createDefaultDispatchKernelArgs(pDevice, dispatchInterface.get(), dims, requiresUncachedMocs); EncodeDispatchKernelArgs dispatchArgs = createDefaultDispatchKernelArgs(pDevice, dispatchInterface.get(), dims, requiresUncachedMocs);
dispatchArgs.eventAddress = eventAddress; dispatchArgs.postSyncArgs.eventAddress = eventAddress;
dispatchArgs.isTimestampEvent = true; dispatchArgs.postSyncArgs.isTimestampEvent = true;
EncodeDispatchKernel<FamilyType>::template encode<DefaultWalkerType>(*cmdContainer.get(), dispatchArgs); EncodeDispatchKernel<FamilyType>::template encode<DefaultWalkerType>(*cmdContainer.get(), dispatchArgs);
@ -82,9 +82,9 @@ HWTEST2_F(CommandEncodeStatesTestDg2AndLater, givenEventAddressWhenEncodeThenMoc
bool requiresUncachedMocs = false; bool requiresUncachedMocs = false;
EncodeDispatchKernelArgs dispatchArgs = createDefaultDispatchKernelArgs(pDevice, dispatchInterface.get(), dims, requiresUncachedMocs); EncodeDispatchKernelArgs dispatchArgs = createDefaultDispatchKernelArgs(pDevice, dispatchInterface.get(), dims, requiresUncachedMocs);
dispatchArgs.eventAddress = eventAddress; dispatchArgs.postSyncArgs.eventAddress = eventAddress;
dispatchArgs.isTimestampEvent = true; dispatchArgs.postSyncArgs.isTimestampEvent = true;
dispatchArgs.dcFlushEnable = MemorySynchronizationCommands<FamilyType>::getDcFlushEnable(true, pDevice->getRootDeviceEnvironment()); dispatchArgs.postSyncArgs.dcFlushEnable = MemorySynchronizationCommands<FamilyType>::getDcFlushEnable(true, pDevice->getRootDeviceEnvironment());
EncodeDispatchKernel<FamilyType>::template encode<DefaultWalkerType>(*cmdContainer.get(), dispatchArgs); EncodeDispatchKernel<FamilyType>::template encode<DefaultWalkerType>(*cmdContainer.get(), dispatchArgs);

View File

@ -126,8 +126,8 @@ HWTEST2_F(CommandEncodeStatesTestPvcAndLater, givenDebugVariableWhenPostSyncIsPr
auto inOrderExecInfo = InOrderExecInfo::create(deviceTagAllocator.getTag(), nullptr, *pDevice, 1, false); auto inOrderExecInfo = InOrderExecInfo::create(deviceTagAllocator.getTag(), nullptr, *pDevice, 1, false);
dispatchArgs.inOrderExecInfo = inOrderExecInfo.get(); dispatchArgs.postSyncArgs.inOrderExecInfo = inOrderExecInfo.get();
auto postSyncArgs = EncodePostSync<FamilyType>::createPostSyncArgs(dispatchArgs); auto &postSyncArgs = dispatchArgs.postSyncArgs;
EncodePostSync<FamilyType>::template setupPostSyncForInOrderExec<DefaultWalkerType>(walkerCmd, postSyncArgs); EncodePostSync<FamilyType>::template setupPostSyncForInOrderExec<DefaultWalkerType>(walkerCmd, postSyncArgs);
auto &postSyncData = walkerCmd.getPostSync(); auto &postSyncData = walkerCmd.getPostSync();

View File

@ -395,8 +395,8 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, CommandEncodeStatesTest, givenEventAllocationWhenDi
bool requiresUncachedMocs = false; bool requiresUncachedMocs = false;
EncodeDispatchKernelArgs dispatchArgs = createDefaultDispatchKernelArgs(pDevice, dispatchInterface.get(), dims, requiresUncachedMocs); EncodeDispatchKernelArgs dispatchArgs = createDefaultDispatchKernelArgs(pDevice, dispatchInterface.get(), dims, requiresUncachedMocs);
dispatchArgs.eventAddress = eventAddress; dispatchArgs.postSyncArgs.eventAddress = eventAddress;
dispatchArgs.isTimestampEvent = true; dispatchArgs.postSyncArgs.isTimestampEvent = true;
EncodeDispatchKernel<FamilyType>::template encode<DefaultWalkerType>(*cmdContainer.get(), dispatchArgs); EncodeDispatchKernel<FamilyType>::template encode<DefaultWalkerType>(*cmdContainer.get(), dispatchArgs);
@ -418,9 +418,9 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, CommandEncodeStatesTest, givenEventAddressWhenEncod
bool requiresUncachedMocs = false; bool requiresUncachedMocs = false;
EncodeDispatchKernelArgs dispatchArgs = createDefaultDispatchKernelArgs(pDevice, dispatchInterface.get(), dims, requiresUncachedMocs); EncodeDispatchKernelArgs dispatchArgs = createDefaultDispatchKernelArgs(pDevice, dispatchInterface.get(), dims, requiresUncachedMocs);
dispatchArgs.eventAddress = eventAddress; dispatchArgs.postSyncArgs.eventAddress = eventAddress;
dispatchArgs.isTimestampEvent = true; dispatchArgs.postSyncArgs.isTimestampEvent = true;
dispatchArgs.dcFlushEnable = MemorySynchronizationCommands<FamilyType>::getDcFlushEnable(true, pDevice->getRootDeviceEnvironment()); dispatchArgs.postSyncArgs.dcFlushEnable = MemorySynchronizationCommands<FamilyType>::getDcFlushEnable(true, pDevice->getRootDeviceEnvironment());
EncodeDispatchKernel<FamilyType>::template encode<DefaultWalkerType>(*cmdContainer.get(), dispatchArgs); EncodeDispatchKernel<FamilyType>::template encode<DefaultWalkerType>(*cmdContainer.get(), dispatchArgs);
@ -1208,7 +1208,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, CommandEncodeStatesDynamicImplicitScaling, givenImp
EncodeDispatchKernelArgs dispatchArgs = createDefaultDispatchKernelArgs(pDevice, dispatchInterface.get(), dims, requiresUncachedMocs); EncodeDispatchKernelArgs dispatchArgs = createDefaultDispatchKernelArgs(pDevice, dispatchInterface.get(), dims, requiresUncachedMocs);
dispatchArgs.isInternal = isInternal; dispatchArgs.isInternal = isInternal;
dispatchArgs.partitionCount = 2; dispatchArgs.partitionCount = 2;
dispatchArgs.dcFlushEnable = MemorySynchronizationCommands<FamilyType>::getDcFlushEnable(true, pDevice->getRootDeviceEnvironment()); dispatchArgs.postSyncArgs.dcFlushEnable = MemorySynchronizationCommands<FamilyType>::getDcFlushEnable(true, pDevice->getRootDeviceEnvironment());
EncodeDispatchKernel<FamilyType>::template encode<DefaultWalkerType>(*cmdContainer.get(), dispatchArgs); EncodeDispatchKernel<FamilyType>::template encode<DefaultWalkerType>(*cmdContainer.get(), dispatchArgs);
@ -1244,7 +1244,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, CommandEncodeStatesDynamicImplicitScaling, givenImp
args.emitPipeControlStall = true; args.emitPipeControlStall = true;
args.partitionCount = dispatchArgs.partitionCount; args.partitionCount = dispatchArgs.partitionCount;
args.emitSelfCleanup = true; args.emitSelfCleanup = true;
args.dcFlushEnable = dispatchArgs.dcFlushEnable; args.dcFlushEnable = dispatchArgs.postSyncArgs.dcFlushEnable;
auto cleanupSectionOffset = WalkerPartition::computeControlSectionOffset<FamilyType, DefaultWalkerType>(args); auto cleanupSectionOffset = WalkerPartition::computeControlSectionOffset<FamilyType, DefaultWalkerType>(args);
uint64_t expectedCleanupGpuVa = cmdContainer->getCommandStream()->getGraphicsAllocation()->getGpuAddress() + uint64_t expectedCleanupGpuVa = cmdContainer->getCommandStream()->getGraphicsAllocation()->getGpuAddress() +
@ -1389,8 +1389,8 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, CommandEncodeStatesTest, givenNonTimestampEventWhen
bool requiresUncachedMocs = false; bool requiresUncachedMocs = false;
EncodeDispatchKernelArgs dispatchArgs = createDefaultDispatchKernelArgs(pDevice, dispatchInterface.get(), dims, requiresUncachedMocs); EncodeDispatchKernelArgs dispatchArgs = createDefaultDispatchKernelArgs(pDevice, dispatchInterface.get(), dims, requiresUncachedMocs);
dispatchArgs.eventAddress = eventAddress; dispatchArgs.postSyncArgs.eventAddress = eventAddress;
dispatchArgs.isTimestampEvent = true; dispatchArgs.postSyncArgs.isTimestampEvent = true;
EncodeDispatchKernel<FamilyType>::template encode<DefaultWalkerType>(*cmdContainer.get(), dispatchArgs); EncodeDispatchKernel<FamilyType>::template encode<DefaultWalkerType>(*cmdContainer.get(), dispatchArgs);
@ -1522,7 +1522,7 @@ struct CommandEncodeStatesImplicitScalingPrimaryBufferFixture : public CommandEn
bool requiresUncachedMocs = false; bool requiresUncachedMocs = false;
uint64_t eventAddress = 0xFF112233000; uint64_t eventAddress = 0xFF112233000;
EncodeDispatchKernelArgs dispatchArgs = BaseClass::createDefaultDispatchKernelArgs(BaseClass::pDevice, dispatchInterface.get(), dims, requiresUncachedMocs); EncodeDispatchKernelArgs dispatchArgs = BaseClass::createDefaultDispatchKernelArgs(BaseClass::pDevice, dispatchInterface.get(), dims, requiresUncachedMocs);
dispatchArgs.eventAddress = eventAddress; dispatchArgs.postSyncArgs.eventAddress = eventAddress;
dispatchArgs.partitionCount = 2; dispatchArgs.partitionCount = 2;
EncodeDispatchKernel<FamilyType>::template encode<DefaultWalkerType>(*BaseClass::cmdContainer.get(), dispatchArgs); EncodeDispatchKernel<FamilyType>::template encode<DefaultWalkerType>(*BaseClass::cmdContainer.get(), dispatchArgs);

View File

@ -35,13 +35,7 @@ class CommandEncodeStatesFixture : public DeviceFixture {
static EncodeDispatchKernelArgs createDefaultDispatchKernelArgs(Device *device, DispatchKernelEncoderI *dispatchInterface, const void *threadGroupDimensions, bool requiresUncachedMocs) { static EncodeDispatchKernelArgs createDefaultDispatchKernelArgs(Device *device, DispatchKernelEncoderI *dispatchInterface, const void *threadGroupDimensions, bool requiresUncachedMocs) {
EncodeDispatchKernelArgs args{ EncodeDispatchKernelArgs args{
.eventAddress = 0,
.postSyncImmValue = 0,
.inOrderCounterValue = 0,
.inOrderIncrementGpuAddress = 0,
.inOrderIncrementValue = 0,
.device = device, .device = device,
.inOrderExecInfo = nullptr,
.dispatchInterface = dispatchInterface, .dispatchInterface = dispatchInterface,
.surfaceStateHeap = nullptr, .surfaceStateHeap = nullptr,
.dynamicStateHeap = nullptr, .dynamicStateHeap = nullptr,
@ -52,6 +46,22 @@ class CommandEncodeStatesFixture : public DeviceFixture {
.outImplicitArgsPtr = nullptr, .outImplicitArgsPtr = nullptr,
.additionalCommands = nullptr, .additionalCommands = nullptr,
.extendedArgs = nullptr, .extendedArgs = nullptr,
.postSyncArgs = {
.eventAddress = 0,
.postSyncImmValue = 0,
.inOrderCounterValue = 0,
.inOrderIncrementGpuAddress = 0,
.inOrderIncrementValue = 0,
.device = device,
.inOrderExecInfo = nullptr,
.isTimestampEvent = false,
.isHostScopeSignalEvent = false,
.isKernelUsingSystemAllocation = false,
.dcFlushEnable = false,
.interruptEvent = false,
.isFlushL3ForExternalAllocationRequired = false,
.isFlushL3ForHostUsmRequired = false,
},
.preemptionMode = PreemptionMode::Disabled, .preemptionMode = PreemptionMode::Disabled,
.requiredPartitionDim = NEO::RequiredPartitionDim::none, .requiredPartitionDim = NEO::RequiredPartitionDim::none,
.requiredDispatchWalkOrder = NEO::RequiredDispatchWalkOrder::none, .requiredDispatchWalkOrder = NEO::RequiredDispatchWalkOrder::none,
@ -62,22 +72,15 @@ class CommandEncodeStatesFixture : public DeviceFixture {
.defaultPipelinedThreadArbitrationPolicy = NEO::ThreadArbitrationPolicy::NotPresent, .defaultPipelinedThreadArbitrationPolicy = NEO::ThreadArbitrationPolicy::NotPresent,
.isIndirect = false, .isIndirect = false,
.isPredicate = false, .isPredicate = false,
.isTimestampEvent = false,
.requiresUncachedMocs = requiresUncachedMocs, .requiresUncachedMocs = requiresUncachedMocs,
.isInternal = false, .isInternal = false,
.isCooperative = false, .isCooperative = false,
.isHostScopeSignalEvent = false,
.isKernelUsingSystemAllocation = false,
.isKernelDispatchedFromImmediateCmdList = false, .isKernelDispatchedFromImmediateCmdList = false,
.isRcs = false, .isRcs = false,
.dcFlushEnable = false,
.isHeaplessModeEnabled = false, .isHeaplessModeEnabled = false,
.isHeaplessStateInitEnabled = false, .isHeaplessStateInitEnabled = false,
.interruptEvent = false,
.immediateScratchAddressPatching = false, .immediateScratchAddressPatching = false,
.makeCommandView = false, .makeCommandView = false,
.isFlushL3AfterPostSyncForExternalAllocationRequired = false,
.isFlushL3AfterPostSyncForHostUsmRequired = false,
}; };
return args; return args;

View File

@ -373,7 +373,7 @@ XE2_HPG_CORETEST_F(EncodeKernelXe2HpgCoreTest, givenDefaultSettingForFenceWhenKe
dispatchInterface->getCrossThreadDataSizeResult = 0; dispatchInterface->getCrossThreadDataSizeResult = 0;
EncodeDispatchKernelArgs dispatchArgs = createDefaultDispatchKernelArgs(pDevice, dispatchInterface.get(), dims, false); EncodeDispatchKernelArgs dispatchArgs = createDefaultDispatchKernelArgs(pDevice, dispatchInterface.get(), dims, false);
dispatchArgs.isKernelUsingSystemAllocation = true; dispatchArgs.postSyncArgs.isKernelUsingSystemAllocation = true;
EncodeDispatchKernel<FamilyType>::template encode<DefaultWalkerType>(*cmdContainer.get(), dispatchArgs); EncodeDispatchKernel<FamilyType>::template encode<DefaultWalkerType>(*cmdContainer.get(), dispatchArgs);
@ -399,7 +399,7 @@ XE2_HPG_CORETEST_F(EncodeKernelXe2HpgCoreTest, givenDefaultSettingForFenceWhenEv
dispatchInterface->getCrossThreadDataSizeResult = 0; dispatchInterface->getCrossThreadDataSizeResult = 0;
EncodeDispatchKernelArgs dispatchArgs = createDefaultDispatchKernelArgs(pDevice, dispatchInterface.get(), dims, false); EncodeDispatchKernelArgs dispatchArgs = createDefaultDispatchKernelArgs(pDevice, dispatchInterface.get(), dims, false);
dispatchArgs.isHostScopeSignalEvent = true; dispatchArgs.postSyncArgs.isHostScopeSignalEvent = true;
EncodeDispatchKernel<FamilyType>::template encode<DefaultWalkerType>(*cmdContainer.get(), dispatchArgs); EncodeDispatchKernel<FamilyType>::template encode<DefaultWalkerType>(*cmdContainer.get(), dispatchArgs);
@ -425,8 +425,8 @@ XE2_HPG_CORETEST_F(EncodeKernelXe2HpgCoreTest, givenDefaultSettingForFenceWhenKe
dispatchInterface->getCrossThreadDataSizeResult = 0; dispatchInterface->getCrossThreadDataSizeResult = 0;
EncodeDispatchKernelArgs dispatchArgs = createDefaultDispatchKernelArgs(pDevice, dispatchInterface.get(), dims, false); EncodeDispatchKernelArgs dispatchArgs = createDefaultDispatchKernelArgs(pDevice, dispatchInterface.get(), dims, false);
dispatchArgs.isKernelUsingSystemAllocation = true; dispatchArgs.postSyncArgs.isKernelUsingSystemAllocation = true;
dispatchArgs.isHostScopeSignalEvent = true; dispatchArgs.postSyncArgs.isHostScopeSignalEvent = true;
EncodeDispatchKernel<FamilyType>::template encode<DefaultWalkerType>(*cmdContainer.get(), dispatchArgs); EncodeDispatchKernel<FamilyType>::template encode<DefaultWalkerType>(*cmdContainer.get(), dispatchArgs);

View File

@ -314,7 +314,7 @@ XE3_CORETEST_F(EncodeKernelXe3CoreTest, givenDefaultSettingForFenceWhenKernelUse
dispatchInterface->getCrossThreadDataSizeResult = 0; dispatchInterface->getCrossThreadDataSizeResult = 0;
EncodeDispatchKernelArgs dispatchArgs = createDefaultDispatchKernelArgs(pDevice, dispatchInterface.get(), dims, false); EncodeDispatchKernelArgs dispatchArgs = createDefaultDispatchKernelArgs(pDevice, dispatchInterface.get(), dims, false);
dispatchArgs.isKernelUsingSystemAllocation = true; dispatchArgs.postSyncArgs.isKernelUsingSystemAllocation = true;
EncodeDispatchKernel<FamilyType>::template encode<DefaultWalkerType>(*cmdContainer.get(), dispatchArgs); EncodeDispatchKernel<FamilyType>::template encode<DefaultWalkerType>(*cmdContainer.get(), dispatchArgs);
@ -340,7 +340,7 @@ XE3_CORETEST_F(EncodeKernelXe3CoreTest, givenDefaultSettingForFenceWhenEventHost
dispatchInterface->getCrossThreadDataSizeResult = 0; dispatchInterface->getCrossThreadDataSizeResult = 0;
EncodeDispatchKernelArgs dispatchArgs = createDefaultDispatchKernelArgs(pDevice, dispatchInterface.get(), dims, false); EncodeDispatchKernelArgs dispatchArgs = createDefaultDispatchKernelArgs(pDevice, dispatchInterface.get(), dims, false);
dispatchArgs.isHostScopeSignalEvent = true; dispatchArgs.postSyncArgs.isHostScopeSignalEvent = true;
EncodeDispatchKernel<FamilyType>::template encode<DefaultWalkerType>(*cmdContainer.get(), dispatchArgs); EncodeDispatchKernel<FamilyType>::template encode<DefaultWalkerType>(*cmdContainer.get(), dispatchArgs);
@ -366,8 +366,8 @@ XE3_CORETEST_F(EncodeKernelXe3CoreTest, givenDefaultSettingForFenceWhenKernelUse
dispatchInterface->getCrossThreadDataSizeResult = 0; dispatchInterface->getCrossThreadDataSizeResult = 0;
EncodeDispatchKernelArgs dispatchArgs = createDefaultDispatchKernelArgs(pDevice, dispatchInterface.get(), dims, false); EncodeDispatchKernelArgs dispatchArgs = createDefaultDispatchKernelArgs(pDevice, dispatchInterface.get(), dims, false);
dispatchArgs.isKernelUsingSystemAllocation = true; dispatchArgs.postSyncArgs.isKernelUsingSystemAllocation = true;
dispatchArgs.isHostScopeSignalEvent = true; dispatchArgs.postSyncArgs.isHostScopeSignalEvent = true;
EncodeDispatchKernel<FamilyType>::template encode<DefaultWalkerType>(*cmdContainer.get(), dispatchArgs); EncodeDispatchKernel<FamilyType>::template encode<DefaultWalkerType>(*cmdContainer.get(), dispatchArgs);

View File

@ -374,7 +374,7 @@ XE_HPC_CORETEST_F(EncodeKernelXeHpcCoreTest, givenDefaultSettingForFenceWhenKern
bool requiresUncachedMocs = false; bool requiresUncachedMocs = false;
EncodeDispatchKernelArgs dispatchArgs = createDefaultDispatchKernelArgs(pDevice, dispatchInterface.get(), dims, requiresUncachedMocs); EncodeDispatchKernelArgs dispatchArgs = createDefaultDispatchKernelArgs(pDevice, dispatchInterface.get(), dims, requiresUncachedMocs);
dispatchArgs.isKernelUsingSystemAllocation = true; dispatchArgs.postSyncArgs.isKernelUsingSystemAllocation = true;
EncodeDispatchKernel<FamilyType>::template encode<DefaultWalkerType>(*cmdContainer.get(), dispatchArgs); EncodeDispatchKernel<FamilyType>::template encode<DefaultWalkerType>(*cmdContainer.get(), dispatchArgs);
@ -410,7 +410,7 @@ XE_HPC_CORETEST_F(EncodeKernelXeHpcCoreTest, givenDefaultSettingForFenceWhenEven
bool requiresUncachedMocs = false; bool requiresUncachedMocs = false;
EncodeDispatchKernelArgs dispatchArgs = createDefaultDispatchKernelArgs(pDevice, dispatchInterface.get(), dims, requiresUncachedMocs); EncodeDispatchKernelArgs dispatchArgs = createDefaultDispatchKernelArgs(pDevice, dispatchInterface.get(), dims, requiresUncachedMocs);
dispatchArgs.isHostScopeSignalEvent = true; dispatchArgs.postSyncArgs.isHostScopeSignalEvent = true;
EncodeDispatchKernel<FamilyType>::template encode<DefaultWalkerType>(*cmdContainer.get(), dispatchArgs); EncodeDispatchKernel<FamilyType>::template encode<DefaultWalkerType>(*cmdContainer.get(), dispatchArgs);
@ -445,8 +445,8 @@ XE_HPC_CORETEST_F(EncodeKernelXeHpcCoreTest, givenDefaultSettingForFenceWhenKern
dispatchInterface->getCrossThreadDataSizeResult = 0; dispatchInterface->getCrossThreadDataSizeResult = 0;
EncodeDispatchKernelArgs dispatchArgs = createDefaultDispatchKernelArgs(pDevice, dispatchInterface.get(), dims, false); EncodeDispatchKernelArgs dispatchArgs = createDefaultDispatchKernelArgs(pDevice, dispatchInterface.get(), dims, false);
dispatchArgs.isKernelUsingSystemAllocation = true; dispatchArgs.postSyncArgs.isKernelUsingSystemAllocation = true;
dispatchArgs.isHostScopeSignalEvent = true; dispatchArgs.postSyncArgs.isHostScopeSignalEvent = true;
EncodeDispatchKernel<FamilyType>::template encode<DefaultWalkerType>(*cmdContainer.get(), dispatchArgs); EncodeDispatchKernel<FamilyType>::template encode<DefaultWalkerType>(*cmdContainer.get(), dispatchArgs);