feature: Add heapless mode programming in l0 1/n

Related-To: NEO-7621
Signed-off-by: Kamil Kopryk <kamil.kopryk@intel.com>
This commit is contained in:
Kamil Kopryk
2023-12-01 14:30:28 +00:00
committed by Compute-Runtime-Automation
parent 7a17df93a6
commit aa950a4a96
31 changed files with 295 additions and 175 deletions

View File

@@ -445,6 +445,7 @@ struct CommandList : _ze_command_list_handle_t {
bool dispatchCmdListBatchBufferAsPrimary = false; bool dispatchCmdListBatchBufferAsPrimary = false;
bool copyThroughLockedPtrEnabled = false; bool copyThroughLockedPtrEnabled = false;
bool useOnlyGlobalTimestamps = false; bool useOnlyGlobalTimestamps = false;
bool heaplessModeEnabled = false;
}; };
using CommandListAllocatorFn = CommandList *(*)(uint32_t); using CommandListAllocatorFn = CommandList *(*)(uint32_t);

View File

@@ -16,6 +16,7 @@
#include "shared/source/helpers/api_specific_config.h" #include "shared/source/helpers/api_specific_config.h"
#include "shared/source/helpers/blit_commands_helper.h" #include "shared/source/helpers/blit_commands_helper.h"
#include "shared/source/helpers/blit_properties.h" #include "shared/source/helpers/blit_properties.h"
#include "shared/source/helpers/compiler_product_helper_base.inl"
#include "shared/source/helpers/definitions/command_encoder_args.h" #include "shared/source/helpers/definitions/command_encoder_args.h"
#include "shared/source/helpers/gfx_core_helper.h" #include "shared/source/helpers/gfx_core_helper.h"
#include "shared/source/helpers/hw_info.h" #include "shared/source/helpers/hw_info.h"
@@ -210,6 +211,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::initialize(Device *device, NEO
auto &rootDeviceEnvironment = neoDevice->getRootDeviceEnvironment(); auto &rootDeviceEnvironment = neoDevice->getRootDeviceEnvironment();
auto &productHelper = rootDeviceEnvironment.getHelper<NEO::ProductHelper>(); auto &productHelper = rootDeviceEnvironment.getHelper<NEO::ProductHelper>();
auto &gfxCoreHelper = neoDevice->getGfxCoreHelper(); auto &gfxCoreHelper = neoDevice->getGfxCoreHelper();
auto &compilerProductHelper = neoDevice->getCompilerProductHelper();
auto gmmHelper = rootDeviceEnvironment.getGmmHelper(); auto gmmHelper = rootDeviceEnvironment.getGmmHelper();
this->dcFlushSupport = NEO::MemorySynchronizationCommands<GfxFamily>::getDcFlushEnable(true, rootDeviceEnvironment); this->dcFlushSupport = NEO::MemorySynchronizationCommands<GfxFamily>::getDcFlushEnable(true, rootDeviceEnvironment);
@@ -230,7 +232,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::initialize(Device *device, NEO
this->dispatchCmdListBatchBufferAsPrimary = L0GfxCoreHelper::dispatchCmdListBatchBufferAsPrimary(rootDeviceEnvironment, this->cmdListType == CommandListType::TYPE_REGULAR); this->dispatchCmdListBatchBufferAsPrimary = L0GfxCoreHelper::dispatchCmdListBatchBufferAsPrimary(rootDeviceEnvironment, this->cmdListType == CommandListType::TYPE_REGULAR);
this->useOnlyGlobalTimestamps = gfxCoreHelper.useOnlyGlobalTimestamps(); this->useOnlyGlobalTimestamps = gfxCoreHelper.useOnlyGlobalTimestamps();
this->maxFillPaternSizeForCopyEngine = gfxCoreHelper.getMaxFillPaternSizeForCopyEngine(); this->maxFillPaternSizeForCopyEngine = gfxCoreHelper.getMaxFillPaternSizeForCopyEngine();
this->heaplessModeEnabled = compilerProductHelper.isHeaplessModeEnabled();
this->requiredStreamState.initSupport(rootDeviceEnvironment); this->requiredStreamState.initSupport(rootDeviceEnvironment);
this->finalStreamState.initSupport(rootDeviceEnvironment); this->finalStreamState.initSupport(rootDeviceEnvironment);
@@ -3179,7 +3181,10 @@ void CommandListCoreFamily<gfxCoreFamily>::programStateBaseAddress(NEO::CommandC
false, // useGlobalAtomics false, // useGlobalAtomics
this->partitionCount > 1, // multiOsContextCapable this->partitionCount > 1, // multiOsContextCapable
isRcs, // isRcs isRcs, // isRcs
this->doubleSbaWa}; // doubleSbaWa this->doubleSbaWa, // doubleSbaWa
this->heaplessModeEnabled // heaplessModeEnabled
};
NEO::EncodeStateBaseAddress<GfxFamily>::encode(encodeStateBaseAddressArgs); NEO::EncodeStateBaseAddress<GfxFamily>::encode(encodeStateBaseAddressArgs);
bool sbaTrackingEnabled = NEO::Debugger::isDebugEnabled(this->internalUsage) && this->device->getL0Debugger(); bool sbaTrackingEnabled = NEO::Debugger::isDebugEnabled(this->internalUsage) && this->device->getL0Debugger();

View File

@@ -212,10 +212,11 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernelWithParams(K
false, // isKernelUsingSystemAllocation false, // isKernelUsingSystemAllocation
cmdListType == CommandListType::TYPE_IMMEDIATE, // isKernelDispatchedFromImmediateCmdList cmdListType == CommandListType::TYPE_IMMEDIATE, // isKernelDispatchedFromImmediateCmdList
engineGroupType == NEO::EngineGroupType::renderCompute, // isRcs engineGroupType == NEO::EngineGroupType::renderCompute, // isRcs
this->dcFlushSupport // dcFlushEnable this->dcFlushSupport, // dcFlushEnable
this->heaplessModeEnabled // isHeaplessModeEnabled
}; };
NEO::EncodeDispatchKernel<GfxFamily>::encode(commandContainer, dispatchKernelArgs); NEO::EncodeDispatchKernel<GfxFamily>::encodeCommon(commandContainer, dispatchKernelArgs);
if (!this->isFlushTaskSubmissionEnabled) { if (!this->isFlushTaskSubmissionEnabled) {
this->containsStatelessUncachedResource = dispatchKernelArgs.requiresUncachedMocs; this->containsStatelessUncachedResource = dispatchKernelArgs.requiresUncachedMocs;
} }

View File

@@ -298,7 +298,8 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernelWithParams(K
isKernelUsingSystemAllocation, // isKernelUsingSystemAllocation isKernelUsingSystemAllocation, // isKernelUsingSystemAllocation
cmdListType == CommandListType::TYPE_IMMEDIATE, // isKernelDispatchedFromImmediateCmdList cmdListType == CommandListType::TYPE_IMMEDIATE, // isKernelDispatchedFromImmediateCmdList
engineGroupType == NEO::EngineGroupType::renderCompute, // isRcs engineGroupType == NEO::EngineGroupType::renderCompute, // isRcs
this->dcFlushSupport // dcFlushEnable this->dcFlushSupport, // dcFlushEnable
this->heaplessModeEnabled // isHeaplessModeEnabled
}; };
bool inOrderExecSignalRequired = (this->isInOrderExecutionEnabled() && !launchParams.isKernelSplitOperation); bool inOrderExecSignalRequired = (this->isInOrderExecutionEnabled() && !launchParams.isKernelSplitOperation);
@@ -313,7 +314,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernelWithParams(K
} }
} }
NEO::EncodeDispatchKernel<GfxFamily>::encode(commandContainer, dispatchKernelArgs); NEO::EncodeDispatchKernel<GfxFamily>::encodeCommon(commandContainer, dispatchKernelArgs);
if (!this->isFlushTaskSubmissionEnabled) { if (!this->isFlushTaskSubmissionEnabled) {
this->containsStatelessUncachedResource = dispatchKernelArgs.requiresUncachedMocs; this->containsStatelessUncachedResource = dispatchKernelArgs.requiresUncachedMocs;

View File

@@ -161,6 +161,7 @@ HWTEST_F(CommandListAppendLaunchKernel, givenKernelWithThreadArbitrationPolicySe
HWTEST2_F(CommandListAppendLaunchKernel, givenNotEnoughSpaceInCommandStreamWhenAppendingKernelThenBbEndIsAddedAndNewCmdBufferAllocated, IsAtLeastSkl) { HWTEST2_F(CommandListAppendLaunchKernel, givenNotEnoughSpaceInCommandStreamWhenAppendingKernelThenBbEndIsAddedAndNewCmdBufferAllocated, IsAtLeastSkl) {
using MI_BATCH_BUFFER_END = typename FamilyType::MI_BATCH_BUFFER_END; using MI_BATCH_BUFFER_END = typename FamilyType::MI_BATCH_BUFFER_END;
using WALKER_TYPE = typename FamilyType::WALKER_TYPE;
DebugManagerStateRestore restorer; DebugManagerStateRestore restorer;
debugManager.flags.DispatchCmdlistCmdBufferPrimary.set(0); debugManager.flags.DispatchCmdlistCmdBufferPrimary.set(0);
@@ -207,7 +208,7 @@ HWTEST2_F(CommandListAppendLaunchKernel, givenNotEnoughSpaceInCommandStreamWhenA
false, // isRcs false, // isRcs
commandList->getDcFlushRequired(true) // dcFlushEnable commandList->getDcFlushRequired(true) // dcFlushEnable
}; };
NEO::EncodeDispatchKernel<FamilyType>::encode(commandContainer, dispatchKernelArgs); NEO::EncodeDispatchKernel<FamilyType>::template encode<WALKER_TYPE>(commandContainer, dispatchKernelArgs);
auto usedSpaceAfter = commandContainer.getCommandStream()->getUsed(); auto usedSpaceAfter = commandContainer.getCommandStream()->getUsed();
ASSERT_GT(usedSpaceAfter, 0u); ASSERT_GT(usedSpaceAfter, 0u);

View File

@@ -616,6 +616,8 @@ HWTEST2_F(CommandListAppendLaunchKernel, givenNotEnoughSpaceInCommandStreamWhenA
DebugManagerStateRestore restorer; DebugManagerStateRestore restorer;
NEO::debugManager.flags.EnableFlushTaskSubmission.set(0); NEO::debugManager.flags.EnableFlushTaskSubmission.set(0);
using MI_BATCH_BUFFER_END = typename FamilyType::MI_BATCH_BUFFER_END; using MI_BATCH_BUFFER_END = typename FamilyType::MI_BATCH_BUFFER_END;
using WALKER_TYPE = typename FamilyType::WALKER_TYPE;
createKernel(); createKernel();
ze_result_t returnValue; ze_result_t returnValue;
@@ -657,7 +659,7 @@ HWTEST2_F(CommandListAppendLaunchKernel, givenNotEnoughSpaceInCommandStreamWhenA
false, // isRcs false, // isRcs
commandList->getDcFlushRequired(true) // dcFlushEnable commandList->getDcFlushRequired(true) // dcFlushEnable
}; };
EXPECT_THROW(NEO::EncodeDispatchKernel<FamilyType>::encode(commandContainer, dispatchKernelArgs), std::exception); EXPECT_THROW(NEO::EncodeDispatchKernel<FamilyType>::template encode<WALKER_TYPE>(commandContainer, dispatchKernelArgs), std::exception);
} }
HWTEST_F(CommandListAppendLaunchKernel, givenInvalidKernelWhenAppendingThenReturnErrorInvalidArgument) { HWTEST_F(CommandListAppendLaunchKernel, givenInvalidKernelWhenAppendingThenReturnErrorInvalidArgument) {

View File

@@ -105,12 +105,7 @@ size_t HardwareCommandsHelper<GfxFamily>::sendCrossThreadData(
uint32_t sizeToCopy = sizeCrossThreadData; uint32_t sizeToCopy = sizeCrossThreadData;
if (inlineDataProgrammingRequired == true) { if (inlineDataProgrammingRequired == true) {
using InlineData = typename GfxFamily::INLINE_DATA; constexpr uint32_t inlineDataSize = WalkerType::getInlineDataSize();
uint32_t inlineDataSize = sizeof(InlineData);
if constexpr (heaplessModeEnabled) {
inlineDataSize = 64;
}
sizeToCopy = std::min(inlineDataSize, sizeCrossThreadData); sizeToCopy = std::min(inlineDataSize, sizeCrossThreadData);
dest = reinterpret_cast<char *>(walkerCmd->getInlineDataPointer()); dest = reinterpret_cast<char *>(walkerCmd->getInlineDataPointer());

View File

@@ -65,6 +65,7 @@ struct EncodeDispatchKernelArgs {
bool isKernelDispatchedFromImmediateCmdList = false; bool isKernelDispatchedFromImmediateCmdList = false;
bool isRcs = false; bool isRcs = false;
bool dcFlushEnable = false; bool dcFlushEnable = false;
bool isHeaplessModeEnabled = false;
}; };
enum class MiPredicateType : uint32_t { enum class MiPredicateType : uint32_t {
@@ -94,6 +95,9 @@ struct EncodeDispatchKernel {
using INTERFACE_DESCRIPTOR_DATA = typename GfxFamily::INTERFACE_DESCRIPTOR_DATA; using INTERFACE_DESCRIPTOR_DATA = typename GfxFamily::INTERFACE_DESCRIPTOR_DATA;
using BINDING_TABLE_STATE = typename GfxFamily::BINDING_TABLE_STATE; using BINDING_TABLE_STATE = typename GfxFamily::BINDING_TABLE_STATE;
static void encodeCommon(CommandContainer &container, EncodeDispatchKernelArgs &args);
template <typename WalkerType>
static void encode(CommandContainer &container, EncodeDispatchKernelArgs &args); static void encode(CommandContainer &container, EncodeDispatchKernelArgs &args);
template <typename WalkerType> template <typename WalkerType>
@@ -103,6 +107,21 @@ struct EncodeDispatchKernel {
static void appendAdditionalIDDFields(InterfaceDescriptorType *pInterfaceDescriptor, const RootDeviceEnvironment &rootDeviceEnvironment, static void appendAdditionalIDDFields(InterfaceDescriptorType *pInterfaceDescriptor, const RootDeviceEnvironment &rootDeviceEnvironment,
const uint32_t threadsPerThreadGroup, uint32_t slmTotalSize, SlmPolicy slmPolicy); const uint32_t threadsPerThreadGroup, uint32_t slmTotalSize, SlmPolicy slmPolicy);
template <typename WalkerType>
static void encodeThreadData(WalkerType &walkerCmd,
const uint32_t *startWorkGroup,
const uint32_t *numWorkGroups,
const uint32_t *workGroupSizes,
uint32_t simd,
uint32_t localIdDimensions,
uint32_t threadsPerThreadGroup,
uint32_t threadExecutionMask,
bool localIdsGenerationByRuntime,
bool inlineDataProgrammingRequired,
bool isIndirect,
uint32_t requiredWorkGroupOrder,
const RootDeviceEnvironment &rootDeviceEnvironment);
template <typename InterfaceDescriptorType> template <typename InterfaceDescriptorType>
static void setGrfInfo(InterfaceDescriptorType *pInterfaceDescriptor, uint32_t numGrf, const size_t &sizeCrossThreadData, static void setGrfInfo(InterfaceDescriptorType *pInterfaceDescriptor, uint32_t numGrf, const size_t &sizeCrossThreadData,
const size_t &sizePerThreadData, const RootDeviceEnvironment &rootDeviceEnvironment); const size_t &sizePerThreadData, const RootDeviceEnvironment &rootDeviceEnvironment);
@@ -118,20 +137,6 @@ struct EncodeDispatchKernel {
static bool inlineDataProgrammingRequired(const KernelDescriptor &kernelDesc); static bool inlineDataProgrammingRequired(const KernelDescriptor &kernelDesc);
static void encodeThreadData(WALKER_TYPE &walkerCmd,
const uint32_t *startWorkGroup,
const uint32_t *numWorkGroups,
const uint32_t *workGroupSizes,
uint32_t simd,
uint32_t localIdDimensions,
uint32_t threadsPerThreadGroup,
uint32_t threadExecutionMask,
bool localIdsGenerationByRuntime,
bool inlineDataProgrammingRequired,
bool isIndirect,
uint32_t requiredWorkGroupOrder,
const RootDeviceEnvironment &rootDeviceEnvironment);
template <typename InterfaceDescriptorType> template <typename InterfaceDescriptorType>
static void programBarrierEnable(InterfaceDescriptorType &interfaceDescriptor, uint32_t value, const HardwareInfo &hwInfo); static void programBarrierEnable(InterfaceDescriptorType &interfaceDescriptor, uint32_t value, const HardwareInfo &hwInfo);
@@ -146,7 +151,8 @@ struct EncodeDispatchKernel {
template <typename WalkerType> template <typename WalkerType>
static void setupPostSyncMocs(WalkerType &walkerCmd, const RootDeviceEnvironment &rootDeviceEnvironment, bool dcFlush); static void setupPostSyncMocs(WalkerType &walkerCmd, const RootDeviceEnvironment &rootDeviceEnvironment, bool dcFlush);
static void adjustWalkOrder(WALKER_TYPE &walkerCmd, uint32_t requiredWorkGroupOrder, const RootDeviceEnvironment &rootDeviceEnvironment); template <typename WalkerType>
static void adjustWalkOrder(WalkerType &walkerCmd, uint32_t requiredWorkGroupOrder, const RootDeviceEnvironment &rootDeviceEnvironment);
static size_t getSizeRequiredDsh(const KernelDescriptor &kernelDescriptor, uint32_t iddCount); static size_t getSizeRequiredDsh(const KernelDescriptor &kernelDescriptor, uint32_t iddCount);
static size_t getSizeRequiredSsh(const KernelInfo &kernelInfo); static size_t getSizeRequiredSsh(const KernelInfo &kernelInfo);

View File

@@ -537,6 +537,12 @@ template <typename Family>
void EncodeSurfaceState<Family>::appendParamsForImageFromBuffer(R_SURFACE_STATE *surfaceState) { void EncodeSurfaceState<Family>::appendParamsForImageFromBuffer(R_SURFACE_STATE *surfaceState) {
} }
template <typename Family>
inline void EncodeDispatchKernel<Family>::encodeCommon(CommandContainer &container, EncodeDispatchKernelArgs &args) {
using WALKER_TYPE = typename Family::WALKER_TYPE;
EncodeDispatchKernel<Family>::template encode<WALKER_TYPE>(container, args);
}
template <typename Family> template <typename Family>
void *EncodeDispatchKernel<Family>::getInterfaceDescriptor(CommandContainer &container, IndirectHeap *childDsh, uint32_t &iddOffset) { void *EncodeDispatchKernel<Family>::getInterfaceDescriptor(CommandContainer &container, IndirectHeap *childDsh, uint32_t &iddOffset) {

View File

@@ -48,6 +48,7 @@ void EncodeDispatchKernel<Family>::setGrfInfo(InterfaceDescriptorType *pInterfac
} }
template <typename Family> template <typename Family>
template <typename WalkerType>
void EncodeDispatchKernel<Family>::encode(CommandContainer &container, EncodeDispatchKernelArgs &args) { void EncodeDispatchKernel<Family>::encode(CommandContainer &container, EncodeDispatchKernelArgs &args) {
using MEDIA_STATE_FLUSH = typename Family::MEDIA_STATE_FLUSH; using MEDIA_STATE_FLUSH = typename Family::MEDIA_STATE_FLUSH;
@@ -347,7 +348,8 @@ inline bool EncodeDispatchKernel<Family>::isRuntimeLocalIdsGenerationRequired(ui
} }
template <typename Family> template <typename Family>
void EncodeDispatchKernel<Family>::encodeThreadData(WALKER_TYPE &walkerCmd, template <typename WalkerType>
void EncodeDispatchKernel<Family>::encodeThreadData(WalkerType &walkerCmd,
const uint32_t *startWorkGroup, const uint32_t *startWorkGroup,
const uint32_t *numWorkGroups, const uint32_t *numWorkGroups,
const uint32_t *workGroupSizes, const uint32_t *workGroupSizes,
@@ -375,7 +377,7 @@ void EncodeDispatchKernel<Family>::encodeThreadData(WALKER_TYPE &walkerCmd,
walkerCmd.setThreadGroupIdStartingResumeZ(static_cast<uint32_t>(startWorkGroup[2])); walkerCmd.setThreadGroupIdStartingResumeZ(static_cast<uint32_t>(startWorkGroup[2]));
} }
walkerCmd.setSimdSize(getSimdConfig<WALKER_TYPE>(simd)); walkerCmd.setSimdSize(getSimdConfig<WalkerType>(simd));
auto localWorkSize = static_cast<uint32_t>(workGroupSizes[0] * workGroupSizes[1] * workGroupSizes[2]); auto localWorkSize = static_cast<uint32_t>(workGroupSizes[0] * workGroupSizes[1] * workGroupSizes[2]);
if (threadsPerThreadGroup == 0) { if (threadsPerThreadGroup == 0) {
@@ -603,7 +605,8 @@ template <typename WalkerType>
void EncodeDispatchKernel<Family>::setupPostSyncMocs(WalkerType &walkerCmd, const RootDeviceEnvironment &rootDeviceEnvironment, bool dcFlush) {} void EncodeDispatchKernel<Family>::setupPostSyncMocs(WalkerType &walkerCmd, const RootDeviceEnvironment &rootDeviceEnvironment, bool dcFlush) {}
template <typename Family> template <typename Family>
void EncodeDispatchKernel<Family>::adjustWalkOrder(WALKER_TYPE &walkerCmd, uint32_t requiredWorkGroupOrder, const RootDeviceEnvironment &rootDeviceEnvironment) {} template <typename WalkerType>
void EncodeDispatchKernel<Family>::adjustWalkOrder(WalkerType &walkerCmd, uint32_t requiredWorkGroupOrder, const RootDeviceEnvironment &rootDeviceEnvironment) {}
template <typename Family> template <typename Family>
size_t EncodeDispatchKernel<Family>::additionalSizeRequiredDsh(uint32_t iddCount) { size_t EncodeDispatchKernel<Family>::additionalSizeRequiredDsh(uint32_t iddCount) {

View File

@@ -44,11 +44,12 @@ void EncodeDispatchKernel<Family>::setGrfInfo(InterfaceDescriptorType *pInterfac
} }
template <typename Family> template <typename Family>
template <typename WalkerType>
void EncodeDispatchKernel<Family>::encode(CommandContainer &container, EncodeDispatchKernelArgs &args) { void EncodeDispatchKernel<Family>::encode(CommandContainer &container, EncodeDispatchKernelArgs &args) {
using SHARED_LOCAL_MEMORY_SIZE = typename Family::INTERFACE_DESCRIPTOR_DATA::SHARED_LOCAL_MEMORY_SIZE; using SHARED_LOCAL_MEMORY_SIZE = typename WalkerType::InterfaceDescriptorType::SHARED_LOCAL_MEMORY_SIZE;
using STATE_BASE_ADDRESS = typename Family::STATE_BASE_ADDRESS; using STATE_BASE_ADDRESS = typename Family::STATE_BASE_ADDRESS;
using INLINE_DATA = typename Family::INLINE_DATA;
constexpr bool heaplessModeEnabled = Family::template isHeaplessMode<WalkerType>();
const HardwareInfo &hwInfo = args.device->getHardwareInfo(); const HardwareInfo &hwInfo = args.device->getHardwareInfo();
auto &rootDeviceEnvironment = args.device->getRootDeviceEnvironment(); auto &rootDeviceEnvironment = args.device->getRootDeviceEnvironment();
@@ -73,7 +74,7 @@ void EncodeDispatchKernel<Family>::encode(CommandContainer &container, EncodeDis
EncodeComputeMode<Family>::adjustPipelineSelect(container, kernelDescriptor); EncodeComputeMode<Family>::adjustPipelineSelect(container, kernelDescriptor);
} }
WALKER_TYPE walkerCmd = Family::cmdInitGpgpuWalker; WalkerType walkerCmd = Family::template getInitGpuWalker<WalkerType>();
auto &idd = walkerCmd.getInterfaceDescriptor(); auto &idd = walkerCmd.getInterfaceDescriptor();
EncodeDispatchKernel<Family>::setGrfInfo(&idd, kernelDescriptor.kernelAttributes.numGrfRequired, sizeCrossThreadData, EncodeDispatchKernel<Family>::setGrfInfo(&idd, kernelDescriptor.kernelAttributes.numGrfRequired, sizeCrossThreadData,
@@ -85,12 +86,21 @@ void EncodeDispatchKernel<Family>::encode(CommandContainer &container, EncodeDis
{ {
auto alloc = args.dispatchInterface->getIsaAllocation(); auto alloc = args.dispatchInterface->getIsaAllocation();
UNRECOVERABLE_IF(nullptr == alloc); UNRECOVERABLE_IF(nullptr == alloc);
if constexpr (heaplessModeEnabled) {
auto address = alloc->getGpuAddress() + args.dispatchInterface->getIsaOffsetInParentAllocation();
if (!localIdsGenerationByRuntime) {
address += kernelDescriptor.entryPoints.skipPerThreadDataLoad;
}
idd.setKernelStartPointer(address);
} else {
auto offset = alloc->getGpuAddressToPatch() + args.dispatchInterface->getIsaOffsetInParentAllocation(); auto offset = alloc->getGpuAddressToPatch() + args.dispatchInterface->getIsaOffsetInParentAllocation();
if (!localIdsGenerationByRuntime) { if (!localIdsGenerationByRuntime) {
offset += kernelDescriptor.entryPoints.skipPerThreadDataLoad; offset += kernelDescriptor.entryPoints.skipPerThreadDataLoad;
} }
idd.setKernelStartPointer(offset); idd.setKernelStartPointer(offset);
} }
}
if (args.dispatchInterface->getKernelDescriptor().kernelAttributes.flags.usesAssert && args.device->getL0Debugger() != nullptr) { if (args.dispatchInterface->getKernelDescriptor().kernelAttributes.flags.usesAssert && args.device->getL0Debugger() != nullptr) {
idd.setSoftwareExceptionEnable(1); idd.setSoftwareExceptionEnable(1);
} }
@@ -112,32 +122,17 @@ void EncodeDispatchKernel<Family>::encode(CommandContainer &container, EncodeDis
idd.setSharedLocalMemorySize(slmSize); idd.setSharedLocalMemorySize(slmSize);
auto bindingTableStateCount = kernelDescriptor.payloadMappings.bindingTable.numEntries; auto bindingTableStateCount = kernelDescriptor.payloadMappings.bindingTable.numEntries;
bool skipSshProgramming = false; bool sshProgrammingRequired = true;
auto &productHelper = args.device->getProductHelper(); auto &productHelper = args.device->getProductHelper();
if (productHelper.isSkippingStatefulInformationRequired(kernelDescriptor)) { if (productHelper.isSkippingStatefulInformationRequired(kernelDescriptor)) {
bindingTableStateCount = 0u; bindingTableStateCount = 0u;
skipSshProgramming = true; sshProgrammingRequired = false;
} }
uint32_t bindingTablePointer = 0u; if (sshProgrammingRequired) {
bool isBindlessKernel = NEO::KernelDescriptor::isBindlessAddressingKernel(kernelDescriptor); bool isBindlessKernel = NEO::KernelDescriptor::isBindlessAddressingKernel(kernelDescriptor);
if (isBindlessKernel) {
if (!skipSshProgramming) {
if (!isBindlessKernel) {
container.prepareBindfulSsh();
if (bindingTableStateCount > 0u) {
auto ssh = args.surfaceStateHeap;
if (ssh == nullptr) {
ssh = container.getHeapWithRequiredSizeAndAlignment(HeapType::SURFACE_STATE, args.dispatchInterface->getSurfaceStateHeapDataSize(), BINDING_TABLE_STATE::SURFACESTATEPOINTER_ALIGN_SIZE);
}
bindingTablePointer = static_cast<uint32_t>(EncodeSurfaceState<Family>::pushBindingTableAndSurfaceStates(
*ssh,
args.dispatchInterface->getSurfaceStateHeapData(),
args.dispatchInterface->getSurfaceStateHeapDataSize(), bindingTableStateCount,
kernelDescriptor.payloadMappings.bindingTable.tableOffset));
}
} else {
bool globalBindlessSsh = args.device->getBindlessHeapsHelper() != nullptr; bool globalBindlessSsh = args.device->getBindlessHeapsHelper() != nullptr;
auto sshHeapSize = args.dispatchInterface->getSurfaceStateHeapDataSize(); auto sshHeapSize = args.dispatchInterface->getSurfaceStateHeapDataSize();
@@ -156,15 +151,32 @@ void EncodeDispatchKernel<Family>::encode(CommandContainer &container, EncodeDis
memcpy_s(dstSurfaceState, sshHeapSize, args.dispatchInterface->getSurfaceStateHeapData(), sshHeapSize); memcpy_s(dstSurfaceState, sshHeapSize, args.dispatchInterface->getSurfaceStateHeapData(), sshHeapSize);
args.dispatchInterface->patchBindlessOffsetsInCrossThreadData(bindlessSshBaseOffset); args.dispatchInterface->patchBindlessOffsetsInCrossThreadData(bindlessSshBaseOffset);
} }
} else {
if constexpr (heaplessModeEnabled == false) {
container.prepareBindfulSsh();
if (bindingTableStateCount > 0u) {
auto ssh = args.surfaceStateHeap;
if (ssh == nullptr) {
ssh = container.getHeapWithRequiredSizeAndAlignment(HeapType::SURFACE_STATE, args.dispatchInterface->getSurfaceStateHeapDataSize(), BINDING_TABLE_STATE::SURFACESTATEPOINTER_ALIGN_SIZE);
} }
} auto bindingTablePointer = static_cast<uint32_t>(EncodeSurfaceState<Family>::pushBindingTableAndSurfaceStates(
*ssh,
args.dispatchInterface->getSurfaceStateHeapData(),
args.dispatchInterface->getSurfaceStateHeapDataSize(), bindingTableStateCount,
kernelDescriptor.payloadMappings.bindingTable.tableOffset));
idd.setBindingTablePointer(bindingTablePointer); idd.setBindingTablePointer(bindingTablePointer);
}
}
}
}
PreemptionHelper::programInterfaceDescriptorDataPreemption<Family>(&idd, args.preemptionMode); PreemptionHelper::programInterfaceDescriptorDataPreemption<Family>(&idd, args.preemptionMode);
uint32_t samplerCount = 0; uint32_t samplerCount = 0;
if constexpr (Family::supportsSampler) { if constexpr (Family::supportsSampler && heaplessModeEnabled == false) {
if (args.device->getDeviceInfo().imageSupport) { if (args.device->getDeviceInfo().imageSupport) {
uint32_t samplerStateOffset = 0; uint32_t samplerStateOffset = 0;
@@ -188,10 +200,12 @@ void EncodeDispatchKernel<Family>::encode(CommandContainer &container, EncodeDis
} }
} }
if constexpr (heaplessModeEnabled == false) {
EncodeDispatchKernel<Family>::adjustBindingTablePrefetch(idd, samplerCount, bindingTableStateCount); EncodeDispatchKernel<Family>::adjustBindingTablePrefetch(idd, samplerCount, bindingTableStateCount);
}
uint64_t offsetThreadData = 0u; uint64_t offsetThreadData = 0u;
const uint32_t inlineDataSize = sizeof(INLINE_DATA); constexpr uint32_t inlineDataSize = WalkerType::getInlineDataSize();
auto crossThreadData = args.dispatchInterface->getCrossThreadData(); auto crossThreadData = args.dispatchInterface->getCrossThreadData();
uint32_t inlineDataProgrammingOffset = 0u; uint32_t inlineDataProgrammingOffset = 0u;
@@ -231,6 +245,7 @@ void EncodeDispatchKernel<Family>::encode(CommandContainer &container, EncodeDis
memcpy_s(ptr, sizeCrossThreadData, memcpy_s(ptr, sizeCrossThreadData,
crossThreadData, sizeCrossThreadData); crossThreadData, sizeCrossThreadData);
} }
if (args.isIndirect) { if (args.isIndirect) {
auto gpuPtr = heap->getGraphicsAllocation()->getGpuAddress() + static_cast<uint64_t>(heap->getUsed() - sizeThreadData - inlineDataProgrammingOffset); auto gpuPtr = heap->getGraphicsAllocation()->getGpuAddress() + static_cast<uint64_t>(heap->getUsed() - sizeThreadData - inlineDataProgrammingOffset);
uint64_t implicitArgsGpuPtr = 0u; uint64_t implicitArgsGpuPtr = 0u;
@@ -272,7 +287,7 @@ void EncodeDispatchKernel<Family>::encode(CommandContainer &container, EncodeDis
args.partitionCount > 1, // multiOsContextCapable args.partitionCount > 1, // multiOsContextCapable
args.isRcs, // isRcs args.isRcs, // isRcs
container.doubleSbaWaRef(), // doubleSbaWa container.doubleSbaWaRef(), // doubleSbaWa
false, // heaplessModeEnabled heaplessModeEnabled, // heaplessModeEnabled
}; };
EncodeStateBaseAddress<Family>::encode(encodeStateBaseAddressArgs); EncodeStateBaseAddress<Family>::encode(encodeStateBaseAddressArgs);
container.setDirtyStateForAllHeaps(false); container.setDirtyStateForAllHeaps(false);
@@ -285,8 +300,17 @@ void EncodeDispatchKernel<Family>::encode(CommandContainer &container, EncodeDis
EncodeSemaphore<Family>::applyMiSemaphoreWaitCommand(*listCmdBufferStream, *args.additionalCommands); EncodeSemaphore<Family>::applyMiSemaphoreWaitCommand(*listCmdBufferStream, *args.additionalCommands);
} }
if constexpr (heaplessModeEnabled) {
auto inlineDataPointer = reinterpret_cast<char *>(walkerCmd.getInlineDataPointer());
auto indirectDataPointerAddress = kernelDescriptor.payloadMappings.implicitArgs.indirectDataPointerAddress;
auto heap = container.getIndirectHeap(HeapType::INDIRECT_OBJECT);
auto address = heap->getHeapGpuBase() + offsetThreadData;
std::memcpy(inlineDataPointer + indirectDataPointerAddress.offset, &address, indirectDataPointerAddress.pointerSize);
} else {
walkerCmd.setIndirectDataStartAddress(static_cast<uint32_t>(offsetThreadData)); walkerCmd.setIndirectDataStartAddress(static_cast<uint32_t>(offsetThreadData));
walkerCmd.setIndirectDataLength(sizeThreadData); walkerCmd.setIndirectDataLength(sizeThreadData);
}
EncodeDispatchKernel<Family>::encodeThreadData(walkerCmd, EncodeDispatchKernel<Family>::encodeThreadData(walkerCmd,
nullptr, nullptr,
@@ -377,7 +401,7 @@ void EncodeDispatchKernel<Family>::encode(CommandContainer &container, EncodeDis
hwInfo); hwInfo);
} else { } else {
args.partitionCount = 1; args.partitionCount = 1;
auto buffer = listCmdBufferStream->getSpaceForCmd<WALKER_TYPE>(); auto buffer = listCmdBufferStream->getSpaceForCmd<WalkerType>();
args.outWalkerPtr = buffer; args.outWalkerPtr = buffer;
*buffer = walkerCmd; *buffer = walkerCmd;
} }
@@ -478,7 +502,8 @@ bool EncodeDispatchKernel<Family>::isRuntimeLocalIdsGenerationRequired(uint32_t
} }
template <typename Family> template <typename Family>
void EncodeDispatchKernel<Family>::encodeThreadData(WALKER_TYPE &walkerCmd, template <typename WalkerType>
void EncodeDispatchKernel<Family>::encodeThreadData(WalkerType &walkerCmd,
const uint32_t *startWorkGroup, const uint32_t *startWorkGroup,
const uint32_t *numWorkGroups, const uint32_t *numWorkGroups,
const uint32_t *workGroupSizes, const uint32_t *workGroupSizes,
@@ -517,7 +542,7 @@ void EncodeDispatchKernel<Family>::encodeThreadData(WALKER_TYPE &walkerCmd,
} }
walkerCmd.setExecutionMask(static_cast<uint32_t>(executionMask)); walkerCmd.setExecutionMask(static_cast<uint32_t>(executionMask));
walkerCmd.setSimdSize(getSimdConfig<WALKER_TYPE>(simd)); walkerCmd.setSimdSize(getSimdConfig<WalkerType>(simd));
walkerCmd.setMessageSimd(walkerCmd.getSimdSize()); walkerCmd.setMessageSimd(walkerCmd.getSimdSize());
@@ -818,7 +843,8 @@ inline void EncodeStoreMMIO<Family>::appendFlags(MI_STORE_REGISTER_MEM *storeReg
} }
template <typename Family> template <typename Family>
void EncodeDispatchKernel<Family>::adjustWalkOrder(WALKER_TYPE &walkerCmd, uint32_t requiredWorkGroupOrder, const RootDeviceEnvironment &rootDeviceEnvironment) {} template <typename WalkerType>
void EncodeDispatchKernel<Family>::adjustWalkOrder(WalkerType &walkerCmd, uint32_t requiredWorkGroupOrder, const RootDeviceEnvironment &rootDeviceEnvironment) {}
template <typename Family> template <typename Family>
size_t EncodeDispatchKernel<Family>::additionalSizeRequiredDsh(uint32_t iddCount) { size_t EncodeDispatchKernel<Family>::additionalSizeRequiredDsh(uint32_t iddCount) {

View File

@@ -79,6 +79,9 @@ template void EncodeDispatchKernel<Family>::appendAdditionalIDDFields<Family::IN
template void EncodeDispatchKernel<Family>::programBarrierEnable<Family::INTERFACE_DESCRIPTOR_DATA>(Family::INTERFACE_DESCRIPTOR_DATA &interfaceDescriptor, uint32_t value, const HardwareInfo &hwInfo); template void EncodeDispatchKernel<Family>::programBarrierEnable<Family::INTERFACE_DESCRIPTOR_DATA>(Family::INTERFACE_DESCRIPTOR_DATA &interfaceDescriptor, uint32_t value, const HardwareInfo &hwInfo);
template void EncodeDispatchKernel<Family>::adjustInterfaceDescriptorData<Family::WALKER_TYPE, Family::INTERFACE_DESCRIPTOR_DATA>(Family::INTERFACE_DESCRIPTOR_DATA &interfaceDescriptor, const Device &device, const HardwareInfo &hwInfo, const uint32_t threadGroupCount, const uint32_t numGrf, Family::WALKER_TYPE &walkerCmd); template void EncodeDispatchKernel<Family>::adjustInterfaceDescriptorData<Family::WALKER_TYPE, Family::INTERFACE_DESCRIPTOR_DATA>(Family::INTERFACE_DESCRIPTOR_DATA &interfaceDescriptor, const Device &device, const HardwareInfo &hwInfo, const uint32_t threadGroupCount, const uint32_t numGrf, Family::WALKER_TYPE &walkerCmd);
template void EncodeDispatchKernel<Family>::setupPostSyncMocs<Family::WALKER_TYPE>(Family::WALKER_TYPE &walkerCmd, const RootDeviceEnvironment &rootDeviceEnvironment, bool dcFlush); template void EncodeDispatchKernel<Family>::setupPostSyncMocs<Family::WALKER_TYPE>(Family::WALKER_TYPE &walkerCmd, const RootDeviceEnvironment &rootDeviceEnvironment, bool dcFlush);
template void EncodeDispatchKernel<Family>::encode<Family::WALKER_TYPE>(CommandContainer &container, EncodeDispatchKernelArgs &args);
template void EncodeDispatchKernel<Family>::encodeThreadData<Family::WALKER_TYPE>(Family::WALKER_TYPE &walkerCmd, const uint32_t *startWorkGroup, const uint32_t *numWorkGroups, const uint32_t *workGroupSizes, uint32_t simd, uint32_t localIdDimensions, uint32_t threadsPerThreadGroup, uint32_t threadExecutionMask, bool localIdsGenerationByRuntime, bool inlineDataProgrammingRequired, bool isIndirect, uint32_t requiredWorkGroupOrder, const RootDeviceEnvironment &rootDeviceEnvironment);
template void EncodeDispatchKernel<Family>::adjustWalkOrder<Family::WALKER_TYPE>(Family::WALKER_TYPE &walkerCmd, uint32_t requiredWorkGroupOrder, const RootDeviceEnvironment &rootDeviceEnvironment);
template struct EncodeStates<Family>; template struct EncodeStates<Family>;
template struct EncodeMath<Family>; template struct EncodeMath<Family>;

View File

@@ -119,6 +119,9 @@ template void EncodeDispatchKernel<Family>::appendAdditionalIDDFields<Family::IN
template void EncodeDispatchKernel<Family>::programBarrierEnable<Family::INTERFACE_DESCRIPTOR_DATA>(Family::INTERFACE_DESCRIPTOR_DATA &interfaceDescriptor, uint32_t value, const HardwareInfo &hwInfo); template void EncodeDispatchKernel<Family>::programBarrierEnable<Family::INTERFACE_DESCRIPTOR_DATA>(Family::INTERFACE_DESCRIPTOR_DATA &interfaceDescriptor, uint32_t value, const HardwareInfo &hwInfo);
template void EncodeDispatchKernel<Family>::adjustInterfaceDescriptorData<Family::WALKER_TYPE, Family::INTERFACE_DESCRIPTOR_DATA>(Family::INTERFACE_DESCRIPTOR_DATA &interfaceDescriptor, const Device &device, const HardwareInfo &hwInfo, const uint32_t threadGroupCount, const uint32_t numGrf, Family::WALKER_TYPE &walkerCmd); template void EncodeDispatchKernel<Family>::adjustInterfaceDescriptorData<Family::WALKER_TYPE, Family::INTERFACE_DESCRIPTOR_DATA>(Family::INTERFACE_DESCRIPTOR_DATA &interfaceDescriptor, const Device &device, const HardwareInfo &hwInfo, const uint32_t threadGroupCount, const uint32_t numGrf, Family::WALKER_TYPE &walkerCmd);
template void EncodeDispatchKernel<Family>::setupPostSyncMocs<Family::WALKER_TYPE>(Family::WALKER_TYPE &walkerCmd, const RootDeviceEnvironment &rootDeviceEnvironment, bool dcFlush); template void EncodeDispatchKernel<Family>::setupPostSyncMocs<Family::WALKER_TYPE>(Family::WALKER_TYPE &walkerCmd, const RootDeviceEnvironment &rootDeviceEnvironment, bool dcFlush);
template void EncodeDispatchKernel<Family>::encode<Family::WALKER_TYPE>(CommandContainer &container, EncodeDispatchKernelArgs &args);
template void EncodeDispatchKernel<Family>::encodeThreadData<Family::WALKER_TYPE>(Family::WALKER_TYPE &walkerCmd, const uint32_t *startWorkGroup, const uint32_t *numWorkGroups, const uint32_t *workGroupSizes, uint32_t simd, uint32_t localIdDimensions, uint32_t threadsPerThreadGroup, uint32_t threadExecutionMask, bool localIdsGenerationByRuntime, bool inlineDataProgrammingRequired, bool isIndirect, uint32_t requiredWorkGroupOrder, const RootDeviceEnvironment &rootDeviceEnvironment);
template void EncodeDispatchKernel<Family>::adjustWalkOrder<Family::WALKER_TYPE>(Family::WALKER_TYPE &walkerCmd, uint32_t requiredWorkGroupOrder, const RootDeviceEnvironment &rootDeviceEnvironment);
template struct EncodeStates<Family>; template struct EncodeStates<Family>;
template struct EncodeMath<Family>; template struct EncodeMath<Family>;

View File

@@ -59,6 +59,9 @@ template void EncodeDispatchKernel<Family>::appendAdditionalIDDFields<Family::IN
template void EncodeDispatchKernel<Family>::programBarrierEnable<Family::INTERFACE_DESCRIPTOR_DATA>(Family::INTERFACE_DESCRIPTOR_DATA &interfaceDescriptor, uint32_t value, const HardwareInfo &hwInfo); template void EncodeDispatchKernel<Family>::programBarrierEnable<Family::INTERFACE_DESCRIPTOR_DATA>(Family::INTERFACE_DESCRIPTOR_DATA &interfaceDescriptor, uint32_t value, const HardwareInfo &hwInfo);
template void EncodeDispatchKernel<Family>::adjustInterfaceDescriptorData<Family::WALKER_TYPE, Family::INTERFACE_DESCRIPTOR_DATA>(Family::INTERFACE_DESCRIPTOR_DATA &interfaceDescriptor, const Device &device, const HardwareInfo &hwInfo, const uint32_t threadGroupCount, const uint32_t numGrf, Family::WALKER_TYPE &walkerCmd); template void EncodeDispatchKernel<Family>::adjustInterfaceDescriptorData<Family::WALKER_TYPE, Family::INTERFACE_DESCRIPTOR_DATA>(Family::INTERFACE_DESCRIPTOR_DATA &interfaceDescriptor, const Device &device, const HardwareInfo &hwInfo, const uint32_t threadGroupCount, const uint32_t numGrf, Family::WALKER_TYPE &walkerCmd);
template void EncodeDispatchKernel<Family>::setupPostSyncMocs<Family::WALKER_TYPE>(Family::WALKER_TYPE &walkerCmd, const RootDeviceEnvironment &rootDeviceEnvironment, bool dcFlush); template void EncodeDispatchKernel<Family>::setupPostSyncMocs<Family::WALKER_TYPE>(Family::WALKER_TYPE &walkerCmd, const RootDeviceEnvironment &rootDeviceEnvironment, bool dcFlush);
template void EncodeDispatchKernel<Family>::encode<Family::WALKER_TYPE>(CommandContainer &container, EncodeDispatchKernelArgs &args);
template void EncodeDispatchKernel<Family>::encodeThreadData<Family::WALKER_TYPE>(Family::WALKER_TYPE &walkerCmd, const uint32_t *startWorkGroup, const uint32_t *numWorkGroups, const uint32_t *workGroupSizes, uint32_t simd, uint32_t localIdDimensions, uint32_t threadsPerThreadGroup, uint32_t threadExecutionMask, bool localIdsGenerationByRuntime, bool inlineDataProgrammingRequired, bool isIndirect, uint32_t requiredWorkGroupOrder, const RootDeviceEnvironment &rootDeviceEnvironment);
template void EncodeDispatchKernel<Family>::adjustWalkOrder<Family::WALKER_TYPE>(Family::WALKER_TYPE &walkerCmd, uint32_t requiredWorkGroupOrder, const RootDeviceEnvironment &rootDeviceEnvironment);
template struct EncodeStates<Family>; template struct EncodeStates<Family>;
template struct EncodeMath<Family>; template struct EncodeMath<Family>;

View File

@@ -64,6 +64,9 @@ template void EncodeDispatchKernel<Family>::appendAdditionalIDDFields<Family::IN
template void EncodeDispatchKernel<Family>::programBarrierEnable<Family::INTERFACE_DESCRIPTOR_DATA>(Family::INTERFACE_DESCRIPTOR_DATA &interfaceDescriptor, uint32_t value, const HardwareInfo &hwInfo); template void EncodeDispatchKernel<Family>::programBarrierEnable<Family::INTERFACE_DESCRIPTOR_DATA>(Family::INTERFACE_DESCRIPTOR_DATA &interfaceDescriptor, uint32_t value, const HardwareInfo &hwInfo);
template void EncodeDispatchKernel<Family>::adjustInterfaceDescriptorData<Family::WALKER_TYPE, Family::INTERFACE_DESCRIPTOR_DATA>(Family::INTERFACE_DESCRIPTOR_DATA &interfaceDescriptor, const Device &device, const HardwareInfo &hwInfo, const uint32_t threadGroupCount, const uint32_t numGrf, Family::WALKER_TYPE &walkerCmd); template void EncodeDispatchKernel<Family>::adjustInterfaceDescriptorData<Family::WALKER_TYPE, Family::INTERFACE_DESCRIPTOR_DATA>(Family::INTERFACE_DESCRIPTOR_DATA &interfaceDescriptor, const Device &device, const HardwareInfo &hwInfo, const uint32_t threadGroupCount, const uint32_t numGrf, Family::WALKER_TYPE &walkerCmd);
template void EncodeDispatchKernel<Family>::setupPostSyncMocs<Family::WALKER_TYPE>(Family::WALKER_TYPE &walkerCmd, const RootDeviceEnvironment &rootDeviceEnvironment, bool dcFlush); template void EncodeDispatchKernel<Family>::setupPostSyncMocs<Family::WALKER_TYPE>(Family::WALKER_TYPE &walkerCmd, const RootDeviceEnvironment &rootDeviceEnvironment, bool dcFlush);
template void EncodeDispatchKernel<Family>::encode<Family::WALKER_TYPE>(CommandContainer &container, EncodeDispatchKernelArgs &args);
template void EncodeDispatchKernel<Family>::encodeThreadData<Family::WALKER_TYPE>(Family::WALKER_TYPE &walkerCmd, const uint32_t *startWorkGroup, const uint32_t *numWorkGroups, const uint32_t *workGroupSizes, uint32_t simd, uint32_t localIdDimensions, uint32_t threadsPerThreadGroup, uint32_t threadExecutionMask, bool localIdsGenerationByRuntime, bool inlineDataProgrammingRequired, bool isIndirect, uint32_t requiredWorkGroupOrder, const RootDeviceEnvironment &rootDeviceEnvironment);
template void EncodeDispatchKernel<Family>::adjustWalkOrder<Family::WALKER_TYPE>(Family::WALKER_TYPE &walkerCmd, uint32_t requiredWorkGroupOrder, const RootDeviceEnvironment &rootDeviceEnvironment);
template struct EncodeStates<Family>; template struct EncodeStates<Family>;
template struct EncodeMath<Family>; template struct EncodeMath<Family>;

View File

@@ -490,6 +490,9 @@ typedef struct tagGPGPU_WALKER {
inline uint32_t getBottomExecutionMask() const { inline uint32_t getBottomExecutionMask() const {
return (TheStructure.Common.BottomExecutionMask); return (TheStructure.Common.BottomExecutionMask);
} }
static constexpr uint32_t getInlineDataSize() { // patched
return 0u;
}
using InterfaceDescriptorType = INTERFACE_DESCRIPTOR_DATA; // patched using InterfaceDescriptorType = INTERFACE_DESCRIPTOR_DATA; // patched
} GPGPU_WALKER; } GPGPU_WALKER;

View File

@@ -510,7 +510,9 @@ typedef struct tagGPGPU_WALKER {
inline uint32_t getBottomExecutionMask() const { inline uint32_t getBottomExecutionMask() const {
return TheStructure.Common.BottomExecutionMask; return TheStructure.Common.BottomExecutionMask;
} }
static constexpr uint32_t getInlineDataSize() { // patched
return 0u;
}
using InterfaceDescriptorType = INTERFACE_DESCRIPTOR_DATA; // patched using InterfaceDescriptorType = INTERFACE_DESCRIPTOR_DATA; // patched
} GPGPU_WALKER; } GPGPU_WALKER;

View File

@@ -476,6 +476,9 @@ typedef struct tagGPGPU_WALKER {
inline uint32_t getBottomExecutionMask() const { inline uint32_t getBottomExecutionMask() const {
return (TheStructure.Common.BottomExecutionMask); return (TheStructure.Common.BottomExecutionMask);
} }
static constexpr uint32_t getInlineDataSize() { // patched
return 0u;
}
using InterfaceDescriptorType = INTERFACE_DESCRIPTOR_DATA; // patched using InterfaceDescriptorType = INTERFACE_DESCRIPTOR_DATA; // patched
} GPGPU_WALKER; } GPGPU_WALKER;
STATIC_ASSERT(60 == sizeof(GPGPU_WALKER)); STATIC_ASSERT(60 == sizeof(GPGPU_WALKER));

View File

@@ -485,6 +485,9 @@ typedef struct tagGPGPU_WALKER {
inline uint32_t getBottomExecutionMask() const { inline uint32_t getBottomExecutionMask() const {
return (TheStructure.Common.BottomExecutionMask); return (TheStructure.Common.BottomExecutionMask);
} }
static constexpr uint32_t getInlineDataSize() { // patched
return 0u;
}
using InterfaceDescriptorType = INTERFACE_DESCRIPTOR_DATA; // patched using InterfaceDescriptorType = INTERFACE_DESCRIPTOR_DATA; // patched
} GPGPU_WALKER; } GPGPU_WALKER;
STATIC_ASSERT(60 == sizeof(GPGPU_WALKER)); STATIC_ASSERT(60 == sizeof(GPGPU_WALKER));

View File

@@ -5808,6 +5808,9 @@ typedef struct tagCOMPUTE_WALKER {
inline uint32_t *getInlineDataPointer() { inline uint32_t *getInlineDataPointer() {
return reinterpret_cast<uint32_t *>(&TheStructure.Common.InlineData); return reinterpret_cast<uint32_t *>(&TheStructure.Common.InlineData);
} }
static constexpr uint32_t getInlineDataSize() { // patched
return 32u;
}
using InterfaceDescriptorType = INTERFACE_DESCRIPTOR_DATA; // patched using InterfaceDescriptorType = INTERFACE_DESCRIPTOR_DATA; // patched
} COMPUTE_WALKER; } COMPUTE_WALKER;
STATIC_ASSERT(156 == sizeof(COMPUTE_WALKER)); STATIC_ASSERT(156 == sizeof(COMPUTE_WALKER));

View File

@@ -5579,6 +5579,9 @@ typedef struct tagCOMPUTE_WALKER {
inline uint32_t *getInlineDataPointer() { inline uint32_t *getInlineDataPointer() {
return reinterpret_cast<uint32_t *>(&TheStructure.Common.InlineData); return reinterpret_cast<uint32_t *>(&TheStructure.Common.InlineData);
} }
static constexpr uint32_t getInlineDataSize() { // patched
return 32u;
}
using InterfaceDescriptorType = INTERFACE_DESCRIPTOR_DATA; // patched using InterfaceDescriptorType = INTERFACE_DESCRIPTOR_DATA; // patched
} COMPUTE_WALKER; } COMPUTE_WALKER;
STATIC_ASSERT(156 == sizeof(COMPUTE_WALKER)); STATIC_ASSERT(156 == sizeof(COMPUTE_WALKER));

View File

@@ -378,6 +378,9 @@ template void EncodeDispatchKernel<Family>::setGrfInfo<Family::INTERFACE_DESCRIP
template void EncodeDispatchKernel<Family>::appendAdditionalIDDFields<Family::INTERFACE_DESCRIPTOR_DATA>(Family::INTERFACE_DESCRIPTOR_DATA *pInterfaceDescriptor, const RootDeviceEnvironment &rootDeviceEnvironment, const uint32_t threadsPerThreadGroup, uint32_t slmTotalSize, SlmPolicy slmPolicy); template void EncodeDispatchKernel<Family>::appendAdditionalIDDFields<Family::INTERFACE_DESCRIPTOR_DATA>(Family::INTERFACE_DESCRIPTOR_DATA *pInterfaceDescriptor, const RootDeviceEnvironment &rootDeviceEnvironment, const uint32_t threadsPerThreadGroup, uint32_t slmTotalSize, SlmPolicy slmPolicy);
template void EncodeDispatchKernel<Family>::adjustInterfaceDescriptorData<Family::WALKER_TYPE, Family::INTERFACE_DESCRIPTOR_DATA>(Family::INTERFACE_DESCRIPTOR_DATA &interfaceDescriptor, const Device &device, const HardwareInfo &hwInfo, const uint32_t threadGroupCount, const uint32_t numGrf, Family::WALKER_TYPE &walkerCmd); template void EncodeDispatchKernel<Family>::adjustInterfaceDescriptorData<Family::WALKER_TYPE, Family::INTERFACE_DESCRIPTOR_DATA>(Family::INTERFACE_DESCRIPTOR_DATA &interfaceDescriptor, const Device &device, const HardwareInfo &hwInfo, const uint32_t threadGroupCount, const uint32_t numGrf, Family::WALKER_TYPE &walkerCmd);
template void EncodeDispatchKernel<Family>::setupPostSyncMocs<Family::WALKER_TYPE>(Family::WALKER_TYPE &walkerCmd, const RootDeviceEnvironment &rootDeviceEnvironment, bool dcFlush); template void EncodeDispatchKernel<Family>::setupPostSyncMocs<Family::WALKER_TYPE>(Family::WALKER_TYPE &walkerCmd, const RootDeviceEnvironment &rootDeviceEnvironment, bool dcFlush);
template void EncodeDispatchKernel<Family>::encode<Family::WALKER_TYPE>(CommandContainer &container, EncodeDispatchKernelArgs &args);
template void EncodeDispatchKernel<Family>::encodeThreadData<Family::WALKER_TYPE>(Family::WALKER_TYPE &walkerCmd, const uint32_t *startWorkGroup, const uint32_t *numWorkGroups, const uint32_t *workGroupSizes, uint32_t simd, uint32_t localIdDimensions, uint32_t threadsPerThreadGroup, uint32_t threadExecutionMask, bool localIdsGenerationByRuntime, bool inlineDataProgrammingRequired, bool isIndirect, uint32_t requiredWorkGroupOrder, const RootDeviceEnvironment &rootDeviceEnvironment);
template void EncodeDispatchKernel<Family>::adjustWalkOrder<Family::WALKER_TYPE>(Family::WALKER_TYPE &walkerCmd, uint32_t requiredWorkGroupOrder, const RootDeviceEnvironment &rootDeviceEnvironment);
template struct EncodeStates<Family>; template struct EncodeStates<Family>;
template struct EncodeMath<Family>; template struct EncodeMath<Family>;

View File

@@ -189,13 +189,14 @@ void EncodeSurfaceState<Family>::appendParamsForImageFromBuffer(R_SURFACE_STATE
} }
template <> template <>
void EncodeDispatchKernel<Family>::adjustWalkOrder(WALKER_TYPE &walkerCmd, uint32_t requiredWorkGroupOrder, const RootDeviceEnvironment &rootDeviceEnvironment) { template <typename WalkerType>
void EncodeDispatchKernel<Family>::adjustWalkOrder(WalkerType &walkerCmd, uint32_t requiredWorkGroupOrder, const RootDeviceEnvironment &rootDeviceEnvironment) {
auto &productHelper = rootDeviceEnvironment.template getHelper<ProductHelper>(); auto &productHelper = rootDeviceEnvironment.template getHelper<ProductHelper>();
if (productHelper.isAdjustWalkOrderAvailable(rootDeviceEnvironment.getReleaseHelper())) { if (productHelper.isAdjustWalkOrderAvailable(rootDeviceEnvironment.getReleaseHelper())) {
if (HwWalkOrderHelper::compatibleDimensionOrders[requiredWorkGroupOrder] == HwWalkOrderHelper::linearWalk) { if (HwWalkOrderHelper::compatibleDimensionOrders[requiredWorkGroupOrder] == HwWalkOrderHelper::linearWalk) {
walkerCmd.setDispatchWalkOrder(WALKER_TYPE::DISPATCH_WALK_ORDER::LINERAR_WALKER); walkerCmd.setDispatchWalkOrder(WalkerType::DISPATCH_WALK_ORDER::LINERAR_WALKER);
} else if (HwWalkOrderHelper::compatibleDimensionOrders[requiredWorkGroupOrder] == HwWalkOrderHelper::yOrderWalk) { } else if (HwWalkOrderHelper::compatibleDimensionOrders[requiredWorkGroupOrder] == HwWalkOrderHelper::yOrderWalk) {
walkerCmd.setDispatchWalkOrder(WALKER_TYPE::DISPATCH_WALK_ORDER::Y_ORDER_WALKER); walkerCmd.setDispatchWalkOrder(WalkerType::DISPATCH_WALK_ORDER::Y_ORDER_WALKER);
} }
} }
} }
@@ -228,6 +229,9 @@ template void EncodeDispatchKernel<Family>::setGrfInfo<Family::INTERFACE_DESCRIP
template void EncodeDispatchKernel<Family>::appendAdditionalIDDFields<Family::INTERFACE_DESCRIPTOR_DATA>(Family::INTERFACE_DESCRIPTOR_DATA *pInterfaceDescriptor, const RootDeviceEnvironment &rootDeviceEnvironment, const uint32_t threadsPerThreadGroup, uint32_t slmTotalSize, SlmPolicy slmPolicy); template void EncodeDispatchKernel<Family>::appendAdditionalIDDFields<Family::INTERFACE_DESCRIPTOR_DATA>(Family::INTERFACE_DESCRIPTOR_DATA *pInterfaceDescriptor, const RootDeviceEnvironment &rootDeviceEnvironment, const uint32_t threadsPerThreadGroup, uint32_t slmTotalSize, SlmPolicy slmPolicy);
template void EncodeDispatchKernel<Family>::adjustInterfaceDescriptorData<Family::WALKER_TYPE, Family::INTERFACE_DESCRIPTOR_DATA>(Family::INTERFACE_DESCRIPTOR_DATA &interfaceDescriptor, const Device &device, const HardwareInfo &hwInfo, const uint32_t threadGroupCount, const uint32_t numGrf, Family::WALKER_TYPE &walkerCmd); template void EncodeDispatchKernel<Family>::adjustInterfaceDescriptorData<Family::WALKER_TYPE, Family::INTERFACE_DESCRIPTOR_DATA>(Family::INTERFACE_DESCRIPTOR_DATA &interfaceDescriptor, const Device &device, const HardwareInfo &hwInfo, const uint32_t threadGroupCount, const uint32_t numGrf, Family::WALKER_TYPE &walkerCmd);
template void EncodeDispatchKernel<Family>::setupPostSyncMocs<Family::WALKER_TYPE>(Family::WALKER_TYPE &walkerCmd, const RootDeviceEnvironment &rootDeviceEnvironment, bool dcFlush); template void EncodeDispatchKernel<Family>::setupPostSyncMocs<Family::WALKER_TYPE>(Family::WALKER_TYPE &walkerCmd, const RootDeviceEnvironment &rootDeviceEnvironment, bool dcFlush);
template void EncodeDispatchKernel<Family>::encode<Family::WALKER_TYPE>(CommandContainer &container, EncodeDispatchKernelArgs &args);
template void EncodeDispatchKernel<Family>::encodeThreadData<Family::WALKER_TYPE>(Family::WALKER_TYPE &walkerCmd, const uint32_t *startWorkGroup, const uint32_t *numWorkGroups, const uint32_t *workGroupSizes, uint32_t simd, uint32_t localIdDimensions, uint32_t threadsPerThreadGroup, uint32_t threadExecutionMask, bool localIdsGenerationByRuntime, bool inlineDataProgrammingRequired, bool isIndirect, uint32_t requiredWorkGroupOrder, const RootDeviceEnvironment &rootDeviceEnvironment);
template void EncodeDispatchKernel<Family>::adjustWalkOrder<Family::WALKER_TYPE>(Family::WALKER_TYPE &walkerCmd, uint32_t requiredWorkGroupOrder, const RootDeviceEnvironment &rootDeviceEnvironment);
template struct EncodeStates<Family>; template struct EncodeStates<Family>;
template struct EncodeMath<Family>; template struct EncodeMath<Family>;

View File

@@ -236,15 +236,15 @@ HWTEST2_F(CommandEncoderTests, givenRequiredWorkGroupOrderWhenCallAdjustWalkOrde
WALKER_TYPE walkerOnStart{}; WALKER_TYPE walkerOnStart{};
uint32_t yOrder = 2u; uint32_t yOrder = 2u;
EncodeDispatchKernel<FamilyType>::adjustWalkOrder(walkerCmd, yOrder, rootDeviceEnvironment); EncodeDispatchKernel<FamilyType>::template adjustWalkOrder<WALKER_TYPE>(walkerCmd, yOrder, rootDeviceEnvironment);
EXPECT_EQ(0, memcmp(&walkerOnStart, &walkerCmd, sizeof(WALKER_TYPE))); // no change EXPECT_EQ(0, memcmp(&walkerOnStart, &walkerCmd, sizeof(WALKER_TYPE))); // no change
uint32_t linearOrder = 0u; uint32_t linearOrder = 0u;
EncodeDispatchKernel<FamilyType>::adjustWalkOrder(walkerCmd, linearOrder, rootDeviceEnvironment); EncodeDispatchKernel<FamilyType>::template adjustWalkOrder<WALKER_TYPE>(walkerCmd, linearOrder, rootDeviceEnvironment);
EXPECT_EQ(0, memcmp(&walkerOnStart, &walkerCmd, sizeof(WALKER_TYPE))); // no change EXPECT_EQ(0, memcmp(&walkerOnStart, &walkerCmd, sizeof(WALKER_TYPE))); // no change
uint32_t fakeOrder = 5u; uint32_t fakeOrder = 5u;
EncodeDispatchKernel<FamilyType>::adjustWalkOrder(walkerCmd, fakeOrder, rootDeviceEnvironment); EncodeDispatchKernel<FamilyType>::template adjustWalkOrder<WALKER_TYPE>(walkerCmd, fakeOrder, rootDeviceEnvironment);
EXPECT_EQ(0, memcmp(&walkerOnStart, &walkerCmd, sizeof(WALKER_TYPE))); // no change EXPECT_EQ(0, memcmp(&walkerOnStart, &walkerCmd, sizeof(WALKER_TYPE))); // no change
} }

View File

@@ -46,6 +46,7 @@ TEST_F(CommandEncodeStatesTest, givenCommandConatinerCreatedWithMaxNumAggregateI
} }
HWTEST_F(CommandEncodeStatesTest, givenDispatchInterfaceWhenDispatchKernelThenWalkerCommandProgrammed) { HWTEST_F(CommandEncodeStatesTest, givenDispatchInterfaceWhenDispatchKernelThenWalkerCommandProgrammed) {
using WALKER_TYPE = typename FamilyType::WALKER_TYPE;
uint32_t dims[] = {2, 1, 1}; uint32_t dims[] = {2, 1, 1};
std::unique_ptr<MockDispatchKernelEncoder> dispatchInterface(new MockDispatchKernelEncoder()); std::unique_ptr<MockDispatchKernelEncoder> dispatchInterface(new MockDispatchKernelEncoder());
bool requiresUncachedMocs = false; bool requiresUncachedMocs = false;
@@ -56,17 +57,17 @@ HWTEST_F(CommandEncodeStatesTest, givenDispatchInterfaceWhenDispatchKernelThenWa
dispatchArgs.dynamicStateHeap = cmdContainer->getIndirectHeap(HeapType::DYNAMIC_STATE); dispatchArgs.dynamicStateHeap = cmdContainer->getIndirectHeap(HeapType::DYNAMIC_STATE);
} }
EncodeDispatchKernel<FamilyType>::encode(*cmdContainer.get(), dispatchArgs); EncodeDispatchKernel<FamilyType>::template encode<WALKER_TYPE>(*cmdContainer.get(), dispatchArgs);
GenCmdList commands; GenCmdList commands;
CmdParse<FamilyType>::parseCommandBuffer(commands, ptrOffset(cmdContainer->getCommandStream()->getCpuBase(), 0), cmdContainer->getCommandStream()->getUsed()); CmdParse<FamilyType>::parseCommandBuffer(commands, ptrOffset(cmdContainer->getCommandStream()->getCpuBase(), 0), cmdContainer->getCommandStream()->getUsed());
using WALKER_TYPE = typename FamilyType::WALKER_TYPE;
auto itorPC = find<WALKER_TYPE *>(commands.begin(), commands.end()); auto itorPC = find<WALKER_TYPE *>(commands.begin(), commands.end());
ASSERT_NE(itorPC, commands.end()); ASSERT_NE(itorPC, commands.end());
} }
HWCMDTEST_F(IGFX_XE_HP_CORE, CommandEncodeStatesTest, givenDebugFlagSetWhenProgrammingWalkerThenSetFlushingBits) { HWCMDTEST_F(IGFX_XE_HP_CORE, CommandEncodeStatesTest, givenDebugFlagSetWhenProgrammingWalkerThenSetFlushingBits) {
using WALKER_TYPE = typename FamilyType::WALKER_TYPE;
DebugManagerStateRestore restore; DebugManagerStateRestore restore;
debugManager.flags.ForceComputeWalkerPostSyncFlush.set(1); debugManager.flags.ForceComputeWalkerPostSyncFlush.set(1);
@@ -75,12 +76,11 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, CommandEncodeStatesTest, givenDebugFlagSetWhenProgr
bool requiresUncachedMocs = false; bool requiresUncachedMocs = false;
EncodeDispatchKernelArgs dispatchArgs = createDefaultDispatchKernelArgs(pDevice, dispatchInterface.get(), dims, requiresUncachedMocs); EncodeDispatchKernelArgs dispatchArgs = createDefaultDispatchKernelArgs(pDevice, dispatchInterface.get(), dims, requiresUncachedMocs);
EncodeDispatchKernel<FamilyType>::encode(*cmdContainer.get(), dispatchArgs); EncodeDispatchKernel<FamilyType>::template encode<WALKER_TYPE>(*cmdContainer.get(), dispatchArgs);
GenCmdList commands; GenCmdList commands;
CmdParse<FamilyType>::parseCommandBuffer(commands, ptrOffset(cmdContainer->getCommandStream()->getCpuBase(), 0), cmdContainer->getCommandStream()->getUsed()); CmdParse<FamilyType>::parseCommandBuffer(commands, ptrOffset(cmdContainer->getCommandStream()->getCpuBase(), 0), cmdContainer->getCommandStream()->getUsed());
using WALKER_TYPE = typename FamilyType::WALKER_TYPE;
auto itor = find<WALKER_TYPE *>(commands.begin(), commands.end()); auto itor = find<WALKER_TYPE *>(commands.begin(), commands.end());
ASSERT_NE(itor, commands.end()); ASSERT_NE(itor, commands.end());
@@ -91,6 +91,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, CommandEncodeStatesTest, givenDebugFlagSetWhenProgr
using CommandEncodeStatesUncachedMocsTests = Test<CommandEncodeStatesFixture>; using CommandEncodeStatesUncachedMocsTests = Test<CommandEncodeStatesFixture>;
HWTEST_F(CommandEncodeStatesUncachedMocsTests, whenEncodingDispatchKernelWithUncachedMocsAndDirtyHeapsThenCorrectMocsIsSet) { HWTEST_F(CommandEncodeStatesUncachedMocsTests, whenEncodingDispatchKernelWithUncachedMocsAndDirtyHeapsThenCorrectMocsIsSet) {
using WALKER_TYPE = typename FamilyType::WALKER_TYPE;
DebugManagerStateRestore restore; DebugManagerStateRestore restore;
debugManager.flags.ForceL1Caching.set(0u); debugManager.flags.ForceL1Caching.set(0u);
@@ -100,7 +101,7 @@ HWTEST_F(CommandEncodeStatesUncachedMocsTests, whenEncodingDispatchKernelWithUnc
bool requiresUncachedMocs = true; bool requiresUncachedMocs = true;
EncodeDispatchKernelArgs dispatchArgs = createDefaultDispatchKernelArgs(pDevice, dispatchInterface.get(), dims, requiresUncachedMocs); EncodeDispatchKernelArgs dispatchArgs = createDefaultDispatchKernelArgs(pDevice, dispatchInterface.get(), dims, requiresUncachedMocs);
EncodeDispatchKernel<FamilyType>::encode(*cmdContainer.get(), dispatchArgs); EncodeDispatchKernel<FamilyType>::template encode<WALKER_TYPE>(*cmdContainer.get(), dispatchArgs);
GenCmdList commands; GenCmdList commands;
CmdParse<FamilyType>::parseCommandBuffer(commands, CmdParse<FamilyType>::parseCommandBuffer(commands,
@@ -118,6 +119,7 @@ HWTEST_F(CommandEncodeStatesUncachedMocsTests, whenEncodingDispatchKernelWithUnc
} }
HWTEST_F(CommandEncodeStatesUncachedMocsTests, whenEncodingDispatchKernelWithUncachedMocsAndNonDirtyHeapsThenCorrectMocsIsSet) { HWTEST_F(CommandEncodeStatesUncachedMocsTests, whenEncodingDispatchKernelWithUncachedMocsAndNonDirtyHeapsThenCorrectMocsIsSet) {
using WALKER_TYPE = typename FamilyType::WALKER_TYPE;
DebugManagerStateRestore restore; DebugManagerStateRestore restore;
debugManager.flags.ForceL1Caching.set(0u); debugManager.flags.ForceL1Caching.set(0u);
@@ -127,7 +129,7 @@ HWTEST_F(CommandEncodeStatesUncachedMocsTests, whenEncodingDispatchKernelWithUnc
bool requiresUncachedMocs = true; bool requiresUncachedMocs = true;
EncodeDispatchKernelArgs dispatchArgs = createDefaultDispatchKernelArgs(pDevice, dispatchInterface.get(), dims, requiresUncachedMocs); EncodeDispatchKernelArgs dispatchArgs = createDefaultDispatchKernelArgs(pDevice, dispatchInterface.get(), dims, requiresUncachedMocs);
EncodeDispatchKernel<FamilyType>::encode(*cmdContainer.get(), dispatchArgs); EncodeDispatchKernel<FamilyType>::template encode<WALKER_TYPE>(*cmdContainer.get(), dispatchArgs);
GenCmdList commands; GenCmdList commands;
CmdParse<FamilyType>::parseCommandBuffer(commands, CmdParse<FamilyType>::parseCommandBuffer(commands,
@@ -145,6 +147,7 @@ HWTEST_F(CommandEncodeStatesUncachedMocsTests, whenEncodingDispatchKernelWithUnc
} }
HWTEST_F(CommandEncodeStatesUncachedMocsTests, whenEncodingDispatchKernelWithNonUncachedMocsAndDirtyHeapsThenSbaIsNotProgrammed) { HWTEST_F(CommandEncodeStatesUncachedMocsTests, whenEncodingDispatchKernelWithNonUncachedMocsAndDirtyHeapsThenSbaIsNotProgrammed) {
using WALKER_TYPE = typename FamilyType::WALKER_TYPE;
DebugManagerStateRestore restore; DebugManagerStateRestore restore;
debugManager.flags.ForceL1Caching.set(0u); debugManager.flags.ForceL1Caching.set(0u);
@@ -154,7 +157,7 @@ HWTEST_F(CommandEncodeStatesUncachedMocsTests, whenEncodingDispatchKernelWithNon
bool requiresUncachedMocs = false; bool requiresUncachedMocs = false;
EncodeDispatchKernelArgs dispatchArgs = createDefaultDispatchKernelArgs(pDevice, dispatchInterface.get(), dims, requiresUncachedMocs); EncodeDispatchKernelArgs dispatchArgs = createDefaultDispatchKernelArgs(pDevice, dispatchInterface.get(), dims, requiresUncachedMocs);
EncodeDispatchKernel<FamilyType>::encode(*cmdContainer.get(), dispatchArgs); EncodeDispatchKernel<FamilyType>::template encode<WALKER_TYPE>(*cmdContainer.get(), dispatchArgs);
EXPECT_FALSE(dispatchArgs.requiresUncachedMocs); EXPECT_FALSE(dispatchArgs.requiresUncachedMocs);
GenCmdList commands; GenCmdList commands;
@@ -173,6 +176,7 @@ HWTEST_F(CommandEncodeStatesUncachedMocsTests, whenEncodingDispatchKernelWithNon
} }
HWTEST_F(CommandEncodeStatesUncachedMocsTests, whenEncodingDispatchKernelWithNonUncachedMocsAndNonDirtyHeapsThenSbaIsNotProgrammed) { HWTEST_F(CommandEncodeStatesUncachedMocsTests, whenEncodingDispatchKernelWithNonUncachedMocsAndNonDirtyHeapsThenSbaIsNotProgrammed) {
using WALKER_TYPE = typename FamilyType::WALKER_TYPE;
DebugManagerStateRestore restore; DebugManagerStateRestore restore;
debugManager.flags.ForceL1Caching.set(0u); debugManager.flags.ForceL1Caching.set(0u);
@@ -182,7 +186,7 @@ HWTEST_F(CommandEncodeStatesUncachedMocsTests, whenEncodingDispatchKernelWithNon
bool requiresUncachedMocs = false; bool requiresUncachedMocs = false;
EncodeDispatchKernelArgs dispatchArgs = createDefaultDispatchKernelArgs(pDevice, dispatchInterface.get(), dims, requiresUncachedMocs); EncodeDispatchKernelArgs dispatchArgs = createDefaultDispatchKernelArgs(pDevice, dispatchInterface.get(), dims, requiresUncachedMocs);
EncodeDispatchKernel<FamilyType>::encode(*cmdContainer.get(), dispatchArgs); EncodeDispatchKernel<FamilyType>::template encode<WALKER_TYPE>(*cmdContainer.get(), dispatchArgs);
EXPECT_FALSE(dispatchArgs.requiresUncachedMocs); EXPECT_FALSE(dispatchArgs.requiresUncachedMocs);
GenCmdList commands; GenCmdList commands;
@@ -196,6 +200,7 @@ HWTEST_F(CommandEncodeStatesUncachedMocsTests, whenEncodingDispatchKernelWithNon
} }
HWTEST_F(CommandEncodeStatesUncachedMocsTests, whenEncodingDispatchKernelWithNonUncachedMocsAndNonDirtyHeapsAndSlmSizeThenSbaIsNotProgrammed) { HWTEST_F(CommandEncodeStatesUncachedMocsTests, whenEncodingDispatchKernelWithNonUncachedMocsAndNonDirtyHeapsAndSlmSizeThenSbaIsNotProgrammed) {
using WALKER_TYPE = typename FamilyType::WALKER_TYPE;
DebugManagerStateRestore restore; DebugManagerStateRestore restore;
debugManager.flags.ForceL1Caching.set(0u); debugManager.flags.ForceL1Caching.set(0u);
@@ -209,7 +214,7 @@ HWTEST_F(CommandEncodeStatesUncachedMocsTests, whenEncodingDispatchKernelWithNon
bool requiresUncachedMocs = false; bool requiresUncachedMocs = false;
EncodeDispatchKernelArgs dispatchArgs = createDefaultDispatchKernelArgs(pDevice, dispatchInterface.get(), dims, requiresUncachedMocs); EncodeDispatchKernelArgs dispatchArgs = createDefaultDispatchKernelArgs(pDevice, dispatchInterface.get(), dims, requiresUncachedMocs);
EncodeDispatchKernel<FamilyType>::encode(*cmdContainer.get(), dispatchArgs); EncodeDispatchKernel<FamilyType>::template encode<WALKER_TYPE>(*cmdContainer.get(), dispatchArgs);
EXPECT_FALSE(dispatchArgs.requiresUncachedMocs); EXPECT_FALSE(dispatchArgs.requiresUncachedMocs);
GenCmdList commands; GenCmdList commands;
@@ -223,6 +228,7 @@ HWTEST_F(CommandEncodeStatesUncachedMocsTests, whenEncodingDispatchKernelWithNon
} }
HWTEST_F(CommandEncodeStatesTest, givenCommandContainerWithUsedAvailableSizeWhenDispatchKernelThenNextCommandBufferIsAdded) { HWTEST_F(CommandEncodeStatesTest, givenCommandContainerWithUsedAvailableSizeWhenDispatchKernelThenNextCommandBufferIsAdded) {
using WALKER_TYPE = typename FamilyType::WALKER_TYPE;
uint32_t dims[] = {2, 1, 1}; uint32_t dims[] = {2, 1, 1};
std::unique_ptr<MockDispatchKernelEncoder> dispatchInterface(new MockDispatchKernelEncoder()); std::unique_ptr<MockDispatchKernelEncoder> dispatchInterface(new MockDispatchKernelEncoder());
@@ -233,7 +239,7 @@ HWTEST_F(CommandEncodeStatesTest, givenCommandContainerWithUsedAvailableSizeWhen
bool requiresUncachedMocs = false; bool requiresUncachedMocs = false;
EncodeDispatchKernelArgs dispatchArgs = createDefaultDispatchKernelArgs(pDevice, dispatchInterface.get(), dims, requiresUncachedMocs); EncodeDispatchKernelArgs dispatchArgs = createDefaultDispatchKernelArgs(pDevice, dispatchInterface.get(), dims, requiresUncachedMocs);
EncodeDispatchKernel<FamilyType>::encode(*cmdContainer.get(), dispatchArgs); EncodeDispatchKernel<FamilyType>::template encode<WALKER_TYPE>(*cmdContainer.get(), dispatchArgs);
auto cmdBuffersCountAfter = cmdContainer->getCmdBufferAllocations().size(); auto cmdBuffersCountAfter = cmdContainer->getCmdBufferAllocations().size();
@@ -241,6 +247,7 @@ HWTEST_F(CommandEncodeStatesTest, givenCommandContainerWithUsedAvailableSizeWhen
} }
HWCMDTEST_F(IGFX_GEN8_CORE, CommandEncodeStatesTest, givenSlmTotalSizeGraterThanZeroWhenDispatchingKernelThenSharedMemorySizeSetCorrectly) { HWCMDTEST_F(IGFX_GEN8_CORE, CommandEncodeStatesTest, givenSlmTotalSizeGraterThanZeroWhenDispatchingKernelThenSharedMemorySizeSetCorrectly) {
using WALKER_TYPE = typename FamilyType::WALKER_TYPE;
using INTERFACE_DESCRIPTOR_DATA = typename FamilyType::INTERFACE_DESCRIPTOR_DATA; using INTERFACE_DESCRIPTOR_DATA = typename FamilyType::INTERFACE_DESCRIPTOR_DATA;
uint32_t dims[] = {2, 1, 1}; uint32_t dims[] = {2, 1, 1};
std::unique_ptr<MockDispatchKernelEncoder> dispatchInterface(new MockDispatchKernelEncoder()); std::unique_ptr<MockDispatchKernelEncoder> dispatchInterface(new MockDispatchKernelEncoder());
@@ -251,7 +258,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, CommandEncodeStatesTest, givenSlmTotalSizeGraterThan
bool requiresUncachedMocs = false; bool requiresUncachedMocs = false;
EncodeDispatchKernelArgs dispatchArgs = createDefaultDispatchKernelArgs(pDevice, dispatchInterface.get(), dims, requiresUncachedMocs); EncodeDispatchKernelArgs dispatchArgs = createDefaultDispatchKernelArgs(pDevice, dispatchInterface.get(), dims, requiresUncachedMocs);
EncodeDispatchKernel<FamilyType>::encode(*cmdContainer.get(), dispatchArgs); EncodeDispatchKernel<FamilyType>::template encode<WALKER_TYPE>(*cmdContainer.get(), dispatchArgs);
auto interfaceDescriptorData = static_cast<INTERFACE_DESCRIPTOR_DATA *>(cmdContainer->getIddBlock()); auto interfaceDescriptorData = static_cast<INTERFACE_DESCRIPTOR_DATA *>(cmdContainer->getIddBlock());
auto &gfxcoreHelper = this->getHelper<GfxCoreHelper>(); auto &gfxcoreHelper = this->getHelper<GfxCoreHelper>();
@@ -262,13 +269,14 @@ HWCMDTEST_F(IGFX_GEN8_CORE, CommandEncodeStatesTest, givenSlmTotalSizeGraterThan
} }
HWCMDTEST_F(IGFX_GEN8_CORE, CommandEncodeStatesTest, whenDispatchingKernelThenSetDenormMode) { HWCMDTEST_F(IGFX_GEN8_CORE, CommandEncodeStatesTest, whenDispatchingKernelThenSetDenormMode) {
using WALKER_TYPE = typename FamilyType::WALKER_TYPE;
using INTERFACE_DESCRIPTOR_DATA = typename FamilyType::INTERFACE_DESCRIPTOR_DATA; using INTERFACE_DESCRIPTOR_DATA = typename FamilyType::INTERFACE_DESCRIPTOR_DATA;
uint32_t dims[] = {2, 1, 1}; uint32_t dims[] = {2, 1, 1};
std::unique_ptr<MockDispatchKernelEncoder> dispatchInterface(new MockDispatchKernelEncoder()); std::unique_ptr<MockDispatchKernelEncoder> dispatchInterface(new MockDispatchKernelEncoder());
EncodeDispatchKernelArgs dispatchArgs = createDefaultDispatchKernelArgs(pDevice, dispatchInterface.get(), dims, false); EncodeDispatchKernelArgs dispatchArgs = createDefaultDispatchKernelArgs(pDevice, dispatchInterface.get(), dims, false);
EncodeDispatchKernel<FamilyType>::encode(*cmdContainer.get(), dispatchArgs); EncodeDispatchKernel<FamilyType>::template encode<WALKER_TYPE>(*cmdContainer.get(), dispatchArgs);
auto interfaceDescriptorData = static_cast<INTERFACE_DESCRIPTOR_DATA *>(cmdContainer->getIddBlock()); auto interfaceDescriptorData = static_cast<INTERFACE_DESCRIPTOR_DATA *>(cmdContainer->getIddBlock());
@@ -276,6 +284,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, CommandEncodeStatesTest, whenDispatchingKernelThenSe
} }
HWCMDTEST_F(IGFX_GEN8_CORE, CommandEncodeStatesTest, givenDebuggingEnabledAndAssertInKernelWhenDispatchingKernelThenSwExceptionsAreEnabled) { HWCMDTEST_F(IGFX_GEN8_CORE, CommandEncodeStatesTest, givenDebuggingEnabledAndAssertInKernelWhenDispatchingKernelThenSwExceptionsAreEnabled) {
using WALKER_TYPE = typename FamilyType::WALKER_TYPE;
using INTERFACE_DESCRIPTOR_DATA = typename FamilyType::INTERFACE_DESCRIPTOR_DATA; using INTERFACE_DESCRIPTOR_DATA = typename FamilyType::INTERFACE_DESCRIPTOR_DATA;
uint32_t dims[] = {2, 1, 1}; uint32_t dims[] = {2, 1, 1};
std::unique_ptr<MockDispatchKernelEncoder> dispatchInterface(new MockDispatchKernelEncoder()); std::unique_ptr<MockDispatchKernelEncoder> dispatchInterface(new MockDispatchKernelEncoder());
@@ -286,13 +295,14 @@ HWCMDTEST_F(IGFX_GEN8_CORE, CommandEncodeStatesTest, givenDebuggingEnabledAndAss
dispatchInterface->kernelDescriptor.kernelAttributes.flags.usesAssert = true; dispatchInterface->kernelDescriptor.kernelAttributes.flags.usesAssert = true;
EncodeDispatchKernelArgs dispatchArgs = createDefaultDispatchKernelArgs(pDevice, dispatchInterface.get(), dims, false); EncodeDispatchKernelArgs dispatchArgs = createDefaultDispatchKernelArgs(pDevice, dispatchInterface.get(), dims, false);
EncodeDispatchKernel<FamilyType>::encode(*cmdContainer.get(), dispatchArgs); EncodeDispatchKernel<FamilyType>::template encode<WALKER_TYPE>(*cmdContainer.get(), dispatchArgs);
auto interfaceDescriptorData = static_cast<INTERFACE_DESCRIPTOR_DATA *>(cmdContainer->getIddBlock()); auto interfaceDescriptorData = static_cast<INTERFACE_DESCRIPTOR_DATA *>(cmdContainer->getIddBlock());
EXPECT_TRUE(interfaceDescriptorData->getSoftwareExceptionEnable()); EXPECT_TRUE(interfaceDescriptorData->getSoftwareExceptionEnable());
} }
HWCMDTEST_F(IGFX_GEN8_CORE, CommandEncodeStatesTest, givenSlmTotalSizeEqualZeroWhenDispatchingKernelThenSharedMemorySizeSetCorrectly) { HWCMDTEST_F(IGFX_GEN8_CORE, CommandEncodeStatesTest, givenSlmTotalSizeEqualZeroWhenDispatchingKernelThenSharedMemorySizeSetCorrectly) {
using WALKER_TYPE = typename FamilyType::WALKER_TYPE;
using INTERFACE_DESCRIPTOR_DATA = typename FamilyType::INTERFACE_DESCRIPTOR_DATA; using INTERFACE_DESCRIPTOR_DATA = typename FamilyType::INTERFACE_DESCRIPTOR_DATA;
uint32_t dims[] = {2, 1, 1}; uint32_t dims[] = {2, 1, 1};
std::unique_ptr<MockDispatchKernelEncoder> dispatchInterface(new MockDispatchKernelEncoder()); std::unique_ptr<MockDispatchKernelEncoder> dispatchInterface(new MockDispatchKernelEncoder());
@@ -303,7 +313,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, CommandEncodeStatesTest, givenSlmTotalSizeEqualZeroW
bool requiresUncachedMocs = false; bool requiresUncachedMocs = false;
EncodeDispatchKernelArgs dispatchArgs = createDefaultDispatchKernelArgs(pDevice, dispatchInterface.get(), dims, requiresUncachedMocs); EncodeDispatchKernelArgs dispatchArgs = createDefaultDispatchKernelArgs(pDevice, dispatchInterface.get(), dims, requiresUncachedMocs);
EncodeDispatchKernel<FamilyType>::encode(*cmdContainer.get(), dispatchArgs); EncodeDispatchKernel<FamilyType>::template encode<WALKER_TYPE>(*cmdContainer.get(), dispatchArgs);
auto interfaceDescriptorData = static_cast<INTERFACE_DESCRIPTOR_DATA *>(cmdContainer->getIddBlock()); auto interfaceDescriptorData = static_cast<INTERFACE_DESCRIPTOR_DATA *>(cmdContainer->getIddBlock());
@@ -313,6 +323,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, CommandEncodeStatesTest, givenSlmTotalSizeEqualZeroW
} }
HWCMDTEST_F(IGFX_GEN8_CORE, CommandEncodeStatesTest, givenOneBindingTableEntryWhenDispatchingKernelThenBindingTableOffsetIsCorrect) { HWCMDTEST_F(IGFX_GEN8_CORE, CommandEncodeStatesTest, givenOneBindingTableEntryWhenDispatchingKernelThenBindingTableOffsetIsCorrect) {
using WALKER_TYPE = typename FamilyType::WALKER_TYPE;
using BINDING_TABLE_STATE = typename FamilyType::BINDING_TABLE_STATE; using BINDING_TABLE_STATE = typename FamilyType::BINDING_TABLE_STATE;
using INTERFACE_DESCRIPTOR_DATA = typename FamilyType::INTERFACE_DESCRIPTOR_DATA; using INTERFACE_DESCRIPTOR_DATA = typename FamilyType::INTERFACE_DESCRIPTOR_DATA;
uint32_t numBindingTable = 1; uint32_t numBindingTable = 1;
@@ -337,7 +348,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, CommandEncodeStatesTest, givenOneBindingTableEntryWh
bool requiresUncachedMocs = false; bool requiresUncachedMocs = false;
EncodeDispatchKernelArgs dispatchArgs = createDefaultDispatchKernelArgs(pDevice, dispatchInterface.get(), dims, requiresUncachedMocs); EncodeDispatchKernelArgs dispatchArgs = createDefaultDispatchKernelArgs(pDevice, dispatchInterface.get(), dims, requiresUncachedMocs);
EncodeDispatchKernel<FamilyType>::encode(*cmdContainer.get(), dispatchArgs); EncodeDispatchKernel<FamilyType>::template encode<WALKER_TYPE>(*cmdContainer.get(), dispatchArgs);
auto interfaceDescriptorData = static_cast<INTERFACE_DESCRIPTOR_DATA *>(cmdContainer->getIddBlock()); auto interfaceDescriptorData = static_cast<INTERFACE_DESCRIPTOR_DATA *>(cmdContainer->getIddBlock());
@@ -345,6 +356,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, CommandEncodeStatesTest, givenOneBindingTableEntryWh
} }
HWCMDTEST_F(IGFX_GEN8_CORE, CommandEncodeStatesTest, giveNumBindingTableZeroWhenDispatchingKernelThenBindingTableOffsetIsZero) { HWCMDTEST_F(IGFX_GEN8_CORE, CommandEncodeStatesTest, giveNumBindingTableZeroWhenDispatchingKernelThenBindingTableOffsetIsZero) {
using WALKER_TYPE = typename FamilyType::WALKER_TYPE;
using BINDING_TABLE_STATE = typename FamilyType::BINDING_TABLE_STATE; using BINDING_TABLE_STATE = typename FamilyType::BINDING_TABLE_STATE;
using INTERFACE_DESCRIPTOR_DATA = typename FamilyType::INTERFACE_DESCRIPTOR_DATA; using INTERFACE_DESCRIPTOR_DATA = typename FamilyType::INTERFACE_DESCRIPTOR_DATA;
uint32_t numBindingTable = 0; uint32_t numBindingTable = 0;
@@ -367,7 +379,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, CommandEncodeStatesTest, giveNumBindingTableZeroWhen
bool requiresUncachedMocs = false; bool requiresUncachedMocs = false;
EncodeDispatchKernelArgs dispatchArgs = createDefaultDispatchKernelArgs(pDevice, dispatchInterface.get(), dims, requiresUncachedMocs); EncodeDispatchKernelArgs dispatchArgs = createDefaultDispatchKernelArgs(pDevice, dispatchInterface.get(), dims, requiresUncachedMocs);
EncodeDispatchKernel<FamilyType>::encode(*cmdContainer.get(), dispatchArgs); EncodeDispatchKernel<FamilyType>::template encode<WALKER_TYPE>(*cmdContainer.get(), dispatchArgs);
auto interfaceDescriptorData = static_cast<INTERFACE_DESCRIPTOR_DATA *>(cmdContainer->getIddBlock()); auto interfaceDescriptorData = static_cast<INTERFACE_DESCRIPTOR_DATA *>(cmdContainer->getIddBlock());
@@ -375,6 +387,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, CommandEncodeStatesTest, giveNumBindingTableZeroWhen
} }
HWCMDTEST_F(IGFX_GEN8_CORE, CommandEncodeStatesTest, giveNumSamplersOneWhenDispatchingKernelThensamplerStateWasCopied) { HWCMDTEST_F(IGFX_GEN8_CORE, CommandEncodeStatesTest, giveNumSamplersOneWhenDispatchingKernelThensamplerStateWasCopied) {
using WALKER_TYPE = typename FamilyType::WALKER_TYPE;
using SAMPLER_STATE = typename FamilyType::SAMPLER_STATE; using SAMPLER_STATE = typename FamilyType::SAMPLER_STATE;
using INTERFACE_DESCRIPTOR_DATA = typename FamilyType::INTERFACE_DESCRIPTOR_DATA; using INTERFACE_DESCRIPTOR_DATA = typename FamilyType::INTERFACE_DESCRIPTOR_DATA;
uint32_t numSamplers = 1; uint32_t numSamplers = 1;
@@ -398,7 +411,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, CommandEncodeStatesTest, giveNumSamplersOneWhenDispa
dispatchArgs.surfaceStateHeap = cmdContainer->getIndirectHeap(HeapType::SURFACE_STATE); dispatchArgs.surfaceStateHeap = cmdContainer->getIndirectHeap(HeapType::SURFACE_STATE);
dispatchArgs.dynamicStateHeap = cmdContainer->getIndirectHeap(HeapType::DYNAMIC_STATE); dispatchArgs.dynamicStateHeap = cmdContainer->getIndirectHeap(HeapType::DYNAMIC_STATE);
EncodeDispatchKernel<FamilyType>::encode(*cmdContainer.get(), dispatchArgs); EncodeDispatchKernel<FamilyType>::template encode<WALKER_TYPE>(*cmdContainer.get(), dispatchArgs);
auto interfaceDescriptorData = static_cast<INTERFACE_DESCRIPTOR_DATA *>(cmdContainer->getIddBlock()); auto interfaceDescriptorData = static_cast<INTERFACE_DESCRIPTOR_DATA *>(cmdContainer->getIddBlock());
@@ -412,6 +425,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, CommandEncodeStatesTest, giveNumSamplersOneWhenDispa
} }
HWCMDTEST_F(IGFX_GEN8_CORE, CommandEncodeStatesTest, giveNumSamplersZeroWhenDispatchingKernelThensamplerStateWasNotCopied) { HWCMDTEST_F(IGFX_GEN8_CORE, CommandEncodeStatesTest, giveNumSamplersZeroWhenDispatchingKernelThensamplerStateWasNotCopied) {
using WALKER_TYPE = typename FamilyType::WALKER_TYPE;
using SAMPLER_STATE = typename FamilyType::SAMPLER_STATE; using SAMPLER_STATE = typename FamilyType::SAMPLER_STATE;
using INTERFACE_DESCRIPTOR_DATA = typename FamilyType::INTERFACE_DESCRIPTOR_DATA; using INTERFACE_DESCRIPTOR_DATA = typename FamilyType::INTERFACE_DESCRIPTOR_DATA;
uint32_t numSamplers = 0; uint32_t numSamplers = 0;
@@ -433,7 +447,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, CommandEncodeStatesTest, giveNumSamplersZeroWhenDisp
bool requiresUncachedMocs = false; bool requiresUncachedMocs = false;
EncodeDispatchKernelArgs dispatchArgs = createDefaultDispatchKernelArgs(pDevice, dispatchInterface.get(), dims, requiresUncachedMocs); EncodeDispatchKernelArgs dispatchArgs = createDefaultDispatchKernelArgs(pDevice, dispatchInterface.get(), dims, requiresUncachedMocs);
EncodeDispatchKernel<FamilyType>::encode(*cmdContainer.get(), dispatchArgs); EncodeDispatchKernel<FamilyType>::template encode<WALKER_TYPE>(*cmdContainer.get(), dispatchArgs);
auto interfaceDescriptorData = static_cast<INTERFACE_DESCRIPTOR_DATA *>(cmdContainer->getIddBlock()); auto interfaceDescriptorData = static_cast<INTERFACE_DESCRIPTOR_DATA *>(cmdContainer->getIddBlock());
@@ -447,6 +461,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, CommandEncodeStatesTest, giveNumSamplersZeroWhenDisp
} }
HWTEST_F(CommandEncodeStatesTest, givenIndirectOffsetsCountsWhenDispatchingKernelThenCorrestMIStoreOffsetsSet) { HWTEST_F(CommandEncodeStatesTest, givenIndirectOffsetsCountsWhenDispatchingKernelThenCorrestMIStoreOffsetsSet) {
using WALKER_TYPE = typename FamilyType::WALKER_TYPE;
using MI_STORE_REGISTER_MEM = typename FamilyType::MI_STORE_REGISTER_MEM; using MI_STORE_REGISTER_MEM = typename FamilyType::MI_STORE_REGISTER_MEM;
uint32_t dims[] = {2, 1, 1}; uint32_t dims[] = {2, 1, 1};
uint32_t offsets[] = {0x10, 0x20, 0x30}; uint32_t offsets[] = {0x10, 0x20, 0x30};
@@ -459,7 +474,7 @@ HWTEST_F(CommandEncodeStatesTest, givenIndirectOffsetsCountsWhenDispatchingKerne
EncodeDispatchKernelArgs dispatchArgs = createDefaultDispatchKernelArgs(pDevice, dispatchInterface.get(), dims, requiresUncachedMocs); EncodeDispatchKernelArgs dispatchArgs = createDefaultDispatchKernelArgs(pDevice, dispatchInterface.get(), dims, requiresUncachedMocs);
dispatchArgs.isIndirect = true; dispatchArgs.isIndirect = true;
EncodeDispatchKernel<FamilyType>::encode(*cmdContainer.get(), dispatchArgs); EncodeDispatchKernel<FamilyType>::template encode<WALKER_TYPE>(*cmdContainer.get(), dispatchArgs);
GenCmdList commands; GenCmdList commands;
CmdParse<FamilyType>::parseCommandBuffer(commands, ptrOffset(cmdContainer->getCommandStream()->getCpuBase(), 0), cmdContainer->getCommandStream()->getUsed()); CmdParse<FamilyType>::parseCommandBuffer(commands, ptrOffset(cmdContainer->getCommandStream()->getCpuBase(), 0), cmdContainer->getCommandStream()->getUsed());
@@ -473,6 +488,7 @@ HWTEST_F(CommandEncodeStatesTest, givenIndirectOffsetsCountsWhenDispatchingKerne
} }
HWTEST_F(CommandEncodeStatesTest, givenIndirectOffsetsSizeWhenDispatchingKernelThenMiMathEncoded) { HWTEST_F(CommandEncodeStatesTest, givenIndirectOffsetsSizeWhenDispatchingKernelThenMiMathEncoded) {
using WALKER_TYPE = typename FamilyType::WALKER_TYPE;
using MI_MATH = typename FamilyType::MI_MATH; using MI_MATH = typename FamilyType::MI_MATH;
uint32_t dims[] = {2, 1, 1}; uint32_t dims[] = {2, 1, 1};
uint32_t offsets[] = {0x10, 0x20, 0x30}; uint32_t offsets[] = {0x10, 0x20, 0x30};
@@ -488,7 +504,7 @@ HWTEST_F(CommandEncodeStatesTest, givenIndirectOffsetsSizeWhenDispatchingKernelT
EncodeDispatchKernelArgs dispatchArgs = createDefaultDispatchKernelArgs(pDevice, dispatchInterface.get(), dims, requiresUncachedMocs); EncodeDispatchKernelArgs dispatchArgs = createDefaultDispatchKernelArgs(pDevice, dispatchInterface.get(), dims, requiresUncachedMocs);
dispatchArgs.isIndirect = true; dispatchArgs.isIndirect = true;
EncodeDispatchKernel<FamilyType>::encode(*cmdContainer.get(), dispatchArgs); EncodeDispatchKernel<FamilyType>::template encode<WALKER_TYPE>(*cmdContainer.get(), dispatchArgs);
GenCmdList commands; GenCmdList commands;
CmdParse<FamilyType>::parseCommandBuffer(commands, ptrOffset(cmdContainer->getCommandStream()->getCpuBase(), 0), cmdContainer->getCommandStream()->getUsed()); CmdParse<FamilyType>::parseCommandBuffer(commands, ptrOffset(cmdContainer->getCommandStream()->getCpuBase(), 0), cmdContainer->getCommandStream()->getUsed());
@@ -498,6 +514,7 @@ HWTEST_F(CommandEncodeStatesTest, givenIndirectOffsetsSizeWhenDispatchingKernelT
} }
HWCMDTEST_F(IGFX_GEN8_CORE, CommandEncodeStatesTest, givenForceBtpPrefetchModeDebugFlagWhenDispatchingKernelThenValuesAreSetUpCorrectly) { HWCMDTEST_F(IGFX_GEN8_CORE, CommandEncodeStatesTest, givenForceBtpPrefetchModeDebugFlagWhenDispatchingKernelThenValuesAreSetUpCorrectly) {
using WALKER_TYPE = typename FamilyType::WALKER_TYPE;
using INTERFACE_DESCRIPTOR_DATA = typename FamilyType::INTERFACE_DESCRIPTOR_DATA; using INTERFACE_DESCRIPTOR_DATA = typename FamilyType::INTERFACE_DESCRIPTOR_DATA;
using MEDIA_INTERFACE_DESCRIPTOR_LOAD = typename FamilyType::MEDIA_INTERFACE_DESCRIPTOR_LOAD; using MEDIA_INTERFACE_DESCRIPTOR_LOAD = typename FamilyType::MEDIA_INTERFACE_DESCRIPTOR_LOAD;
using SAMPLER_STATE = typename FamilyType::SAMPLER_STATE; using SAMPLER_STATE = typename FamilyType::SAMPLER_STATE;
@@ -530,7 +547,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, CommandEncodeStatesTest, givenForceBtpPrefetchModeDe
bool requiresUncachedMocs = false; bool requiresUncachedMocs = false;
EncodeDispatchKernelArgs dispatchArgs = createDefaultDispatchKernelArgs(pDevice, dispatchInterface.get(), dims, requiresUncachedMocs); EncodeDispatchKernelArgs dispatchArgs = createDefaultDispatchKernelArgs(pDevice, dispatchInterface.get(), dims, requiresUncachedMocs);
EncodeDispatchKernel<FamilyType>::encode(*cmdContainer.get(), dispatchArgs); EncodeDispatchKernel<FamilyType>::template encode<WALKER_TYPE>(*cmdContainer.get(), dispatchArgs);
auto dsh = cmdContainer->getIndirectHeap(HeapType::DYNAMIC_STATE); auto dsh = cmdContainer->getIndirectHeap(HeapType::DYNAMIC_STATE);
@@ -563,7 +580,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, CommandEncodeStatesTest, givenForceBtpPrefetchModeDe
bool requiresUncachedMocs = false; bool requiresUncachedMocs = false;
EncodeDispatchKernelArgs dispatchArgs = createDefaultDispatchKernelArgs(pDevice, dispatchInterface.get(), dims, requiresUncachedMocs); EncodeDispatchKernelArgs dispatchArgs = createDefaultDispatchKernelArgs(pDevice, dispatchInterface.get(), dims, requiresUncachedMocs);
EncodeDispatchKernel<FamilyType>::encode(*cmdContainer.get(), dispatchArgs); EncodeDispatchKernel<FamilyType>::template encode<WALKER_TYPE>(*cmdContainer.get(), dispatchArgs);
auto dsh = cmdContainer->getIndirectHeap(HeapType::DYNAMIC_STATE); auto dsh = cmdContainer->getIndirectHeap(HeapType::DYNAMIC_STATE);
@@ -591,7 +608,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, CommandEncodeStatesTest, givenForceBtpPrefetchModeDe
bool requiresUncachedMocs = false; bool requiresUncachedMocs = false;
EncodeDispatchKernelArgs dispatchArgs = createDefaultDispatchKernelArgs(pDevice, dispatchInterface.get(), dims, requiresUncachedMocs); EncodeDispatchKernelArgs dispatchArgs = createDefaultDispatchKernelArgs(pDevice, dispatchInterface.get(), dims, requiresUncachedMocs);
EncodeDispatchKernel<FamilyType>::encode(*cmdContainer.get(), dispatchArgs); EncodeDispatchKernel<FamilyType>::template encode<WALKER_TYPE>(*cmdContainer.get(), dispatchArgs);
auto dsh = cmdContainer->getIndirectHeap(HeapType::DYNAMIC_STATE); auto dsh = cmdContainer->getIndirectHeap(HeapType::DYNAMIC_STATE);
@@ -612,6 +629,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, CommandEncodeStatesTest, givenForceBtpPrefetchModeDe
} }
HWCMDTEST_F(IGFX_GEN8_CORE, CommandEncodeStatesTest, givenCleanHeapsAndSlmNotChangedWhenDispatchKernelThenFlushNotAdded) { HWCMDTEST_F(IGFX_GEN8_CORE, CommandEncodeStatesTest, givenCleanHeapsAndSlmNotChangedWhenDispatchKernelThenFlushNotAdded) {
using WALKER_TYPE = typename FamilyType::WALKER_TYPE;
using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL;
uint32_t dims[] = {2, 1, 1}; uint32_t dims[] = {2, 1, 1};
std::unique_ptr<MockDispatchKernelEncoder> dispatchInterface(new MockDispatchKernelEncoder()); std::unique_ptr<MockDispatchKernelEncoder> dispatchInterface(new MockDispatchKernelEncoder());
@@ -622,7 +640,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, CommandEncodeStatesTest, givenCleanHeapsAndSlmNotCha
bool requiresUncachedMocs = false; bool requiresUncachedMocs = false;
EncodeDispatchKernelArgs dispatchArgs = createDefaultDispatchKernelArgs(pDevice, dispatchInterface.get(), dims, requiresUncachedMocs); EncodeDispatchKernelArgs dispatchArgs = createDefaultDispatchKernelArgs(pDevice, dispatchInterface.get(), dims, requiresUncachedMocs);
EncodeDispatchKernel<FamilyType>::encode(*cmdContainer.get(), dispatchArgs); EncodeDispatchKernel<FamilyType>::template encode<WALKER_TYPE>(*cmdContainer.get(), dispatchArgs);
GenCmdList commands; GenCmdList commands;
CmdParse<FamilyType>::parseCommandBuffer(commands, ptrOffset(cmdContainer->getCommandStream()->getCpuBase(), 0), cmdContainer->getCommandStream()->getUsed()); CmdParse<FamilyType>::parseCommandBuffer(commands, ptrOffset(cmdContainer->getCommandStream()->getCpuBase(), 0), cmdContainer->getCommandStream()->getUsed());
@@ -632,6 +650,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, CommandEncodeStatesTest, givenCleanHeapsAndSlmNotCha
} }
HWCMDTEST_F(IGFX_GEN8_CORE, CommandEncodeStatesTest, givenCleanHeapsAndSlmNotChangedAndUncachedMocsRequestedThenSBAIsProgrammedAndMocsAreSet) { HWCMDTEST_F(IGFX_GEN8_CORE, CommandEncodeStatesTest, givenCleanHeapsAndSlmNotChangedAndUncachedMocsRequestedThenSBAIsProgrammedAndMocsAreSet) {
using WALKER_TYPE = typename FamilyType::WALKER_TYPE;
uint32_t dims[] = {2, 1, 1}; uint32_t dims[] = {2, 1, 1};
std::unique_ptr<MockDispatchKernelEncoder> dispatchInterface(new MockDispatchKernelEncoder()); std::unique_ptr<MockDispatchKernelEncoder> dispatchInterface(new MockDispatchKernelEncoder());
cmdContainer->slmSizeRef() = 1; cmdContainer->slmSizeRef() = 1;
@@ -641,7 +660,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, CommandEncodeStatesTest, givenCleanHeapsAndSlmNotCha
bool requiresUncachedMocs = true; bool requiresUncachedMocs = true;
EncodeDispatchKernelArgs dispatchArgs = createDefaultDispatchKernelArgs(pDevice, dispatchInterface.get(), dims, requiresUncachedMocs); EncodeDispatchKernelArgs dispatchArgs = createDefaultDispatchKernelArgs(pDevice, dispatchInterface.get(), dims, requiresUncachedMocs);
EncodeDispatchKernel<FamilyType>::encode(*cmdContainer.get(), dispatchArgs); EncodeDispatchKernel<FamilyType>::template encode<WALKER_TYPE>(*cmdContainer.get(), dispatchArgs);
GenCmdList commands; GenCmdList commands;
CmdParse<FamilyType>::parseCommandBuffer(commands, ptrOffset(cmdContainer->getCommandStream()->getCpuBase(), 0), cmdContainer->getCommandStream()->getUsed()); CmdParse<FamilyType>::parseCommandBuffer(commands, ptrOffset(cmdContainer->getCommandStream()->getCpuBase(), 0), cmdContainer->getCommandStream()->getUsed());
@@ -657,6 +676,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, CommandEncodeStatesTest, givenCleanHeapsAndSlmNotCha
} }
HWCMDTEST_F(IGFX_GEN8_CORE, CommandEncodeStatesTest, givenDirtyHeapsAndSlmNotChangedWhenDispatchKernelThenHeapsAreCleanAndFlushAdded) { HWCMDTEST_F(IGFX_GEN8_CORE, CommandEncodeStatesTest, givenDirtyHeapsAndSlmNotChangedWhenDispatchKernelThenHeapsAreCleanAndFlushAdded) {
using WALKER_TYPE = typename FamilyType::WALKER_TYPE;
using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL;
uint32_t dims[] = {2, 1, 1}; uint32_t dims[] = {2, 1, 1};
@@ -668,7 +688,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, CommandEncodeStatesTest, givenDirtyHeapsAndSlmNotCha
bool requiresUncachedMocs = false; bool requiresUncachedMocs = false;
EncodeDispatchKernelArgs dispatchArgs = createDefaultDispatchKernelArgs(pDevice, dispatchInterface.get(), dims, requiresUncachedMocs); EncodeDispatchKernelArgs dispatchArgs = createDefaultDispatchKernelArgs(pDevice, dispatchInterface.get(), dims, requiresUncachedMocs);
EncodeDispatchKernel<FamilyType>::encode(*cmdContainer.get(), dispatchArgs); EncodeDispatchKernel<FamilyType>::template encode<WALKER_TYPE>(*cmdContainer.get(), dispatchArgs);
GenCmdList commands; GenCmdList commands;
CmdParse<FamilyType>::parseCommandBuffer(commands, ptrOffset(cmdContainer->getCommandStream()->getCpuBase(), 0), cmdContainer->getCommandStream()->getUsed()); CmdParse<FamilyType>::parseCommandBuffer(commands, ptrOffset(cmdContainer->getCommandStream()->getCpuBase(), 0), cmdContainer->getCommandStream()->getUsed());
@@ -679,6 +699,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, CommandEncodeStatesTest, givenDirtyHeapsAndSlmNotCha
} }
HWCMDTEST_F(IGFX_GEN8_CORE, CommandEncodeStatesTest, givenDirtyHeapsWhenDispatchKernelThenPCIsAddedBeforeSBA) { HWCMDTEST_F(IGFX_GEN8_CORE, CommandEncodeStatesTest, givenDirtyHeapsWhenDispatchKernelThenPCIsAddedBeforeSBA) {
using WALKER_TYPE = typename FamilyType::WALKER_TYPE;
using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL;
using STATE_BASE_ADDRESS = typename FamilyType::STATE_BASE_ADDRESS; using STATE_BASE_ADDRESS = typename FamilyType::STATE_BASE_ADDRESS;
@@ -692,7 +713,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, CommandEncodeStatesTest, givenDirtyHeapsWhenDispatch
EncodeDispatchKernelArgs dispatchArgs = createDefaultDispatchKernelArgs(pDevice, dispatchInterface.get(), dims, requiresUncachedMocs); EncodeDispatchKernelArgs dispatchArgs = createDefaultDispatchKernelArgs(pDevice, dispatchInterface.get(), dims, requiresUncachedMocs);
dispatchArgs.dcFlushEnable = MemorySynchronizationCommands<FamilyType>::getDcFlushEnable(true, pDevice->getRootDeviceEnvironment()); dispatchArgs.dcFlushEnable = MemorySynchronizationCommands<FamilyType>::getDcFlushEnable(true, pDevice->getRootDeviceEnvironment());
EncodeDispatchKernel<FamilyType>::encode(*cmdContainer.get(), dispatchArgs); EncodeDispatchKernel<FamilyType>::template encode<WALKER_TYPE>(*cmdContainer.get(), dispatchArgs);
GenCmdList cmdList; GenCmdList cmdList;
CmdParse<FamilyType>::parseCommandBuffer(cmdList, ptrOffset(cmdContainer->getCommandStream()->getCpuBase(), 0), cmdContainer->getCommandStream()->getUsed()); CmdParse<FamilyType>::parseCommandBuffer(cmdList, ptrOffset(cmdContainer->getCommandStream()->getCpuBase(), 0), cmdContainer->getCommandStream()->getUsed());
@@ -720,6 +741,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, CommandEncodeStatesTest, givenDirtyHeapsWhenDispatch
} }
HWCMDTEST_F(IGFX_GEN8_CORE, CommandEncodeStatesTest, givenCleanHeapsAndSlmChangedWhenDispatchKernelThenFlushAdded) { HWCMDTEST_F(IGFX_GEN8_CORE, CommandEncodeStatesTest, givenCleanHeapsAndSlmChangedWhenDispatchKernelThenFlushAdded) {
using WALKER_TYPE = typename FamilyType::WALKER_TYPE;
using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL;
uint32_t dims[] = {2, 1, 1}; uint32_t dims[] = {2, 1, 1};
std::unique_ptr<MockDispatchKernelEncoder> dispatchInterface(new MockDispatchKernelEncoder()); std::unique_ptr<MockDispatchKernelEncoder> dispatchInterface(new MockDispatchKernelEncoder());
@@ -732,7 +754,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, CommandEncodeStatesTest, givenCleanHeapsAndSlmChange
bool requiresUncachedMocs = false; bool requiresUncachedMocs = false;
EncodeDispatchKernelArgs dispatchArgs = createDefaultDispatchKernelArgs(pDevice, dispatchInterface.get(), dims, requiresUncachedMocs); EncodeDispatchKernelArgs dispatchArgs = createDefaultDispatchKernelArgs(pDevice, dispatchInterface.get(), dims, requiresUncachedMocs);
EncodeDispatchKernel<FamilyType>::encode(*cmdContainer.get(), dispatchArgs); EncodeDispatchKernel<FamilyType>::template encode<WALKER_TYPE>(*cmdContainer.get(), dispatchArgs);
GenCmdList commands; GenCmdList commands;
CmdParse<FamilyType>::parseCommandBuffer(commands, ptrOffset(cmdContainer->getCommandStream()->getCpuBase(), 0), cmdContainer->getCommandStream()->getUsed()); CmdParse<FamilyType>::parseCommandBuffer(commands, ptrOffset(cmdContainer->getCommandStream()->getCpuBase(), 0), cmdContainer->getCommandStream()->getUsed());
@@ -743,6 +765,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, CommandEncodeStatesTest, givenCleanHeapsAndSlmChange
} }
HWCMDTEST_F(IGFX_GEN8_CORE, CommandEncodeStatesTest, giveNextIddInBlockZeroWhenDispatchKernelThenMediaInterfaceDescriptorEncoded) { HWCMDTEST_F(IGFX_GEN8_CORE, CommandEncodeStatesTest, giveNextIddInBlockZeroWhenDispatchKernelThenMediaInterfaceDescriptorEncoded) {
using WALKER_TYPE = typename FamilyType::WALKER_TYPE;
using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL;
using INTERFACE_DESCRIPTOR_DATA = typename FamilyType::INTERFACE_DESCRIPTOR_DATA; using INTERFACE_DESCRIPTOR_DATA = typename FamilyType::INTERFACE_DESCRIPTOR_DATA;
using STATE_BASE_ADDRESS = typename FamilyType::STATE_BASE_ADDRESS; using STATE_BASE_ADDRESS = typename FamilyType::STATE_BASE_ADDRESS;
@@ -757,7 +780,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, CommandEncodeStatesTest, giveNextIddInBlockZeroWhenD
bool requiresUncachedMocs = false; bool requiresUncachedMocs = false;
EncodeDispatchKernelArgs dispatchArgs = createDefaultDispatchKernelArgs(pDevice, dispatchInterface.get(), dims, requiresUncachedMocs); EncodeDispatchKernelArgs dispatchArgs = createDefaultDispatchKernelArgs(pDevice, dispatchInterface.get(), dims, requiresUncachedMocs);
EncodeDispatchKernel<FamilyType>::encode(*cmdContainer.get(), dispatchArgs); EncodeDispatchKernel<FamilyType>::template encode<WALKER_TYPE>(*cmdContainer.get(), dispatchArgs);
GenCmdList commands; GenCmdList commands;
CmdParse<FamilyType>::parseCommandBuffer(commands, ptrOffset(cmdContainer->getCommandStream()->getCpuBase(), 0), cmdContainer->getCommandStream()->getUsed()); CmdParse<FamilyType>::parseCommandBuffer(commands, ptrOffset(cmdContainer->getCommandStream()->getCpuBase(), 0), cmdContainer->getCommandStream()->getUsed());
@@ -769,6 +792,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, CommandEncodeStatesTest, giveNextIddInBlockZeroWhenD
} }
HWCMDTEST_F(IGFX_GEN8_CORE, CommandEncodeStatesTest, giveNextIddInBlockZeroWhenDispatchKernelAndDynamicStateHeapDirtyThenStateBaseAddressEncodedAndMediaInterfaceDescriptorEncoded) { HWCMDTEST_F(IGFX_GEN8_CORE, CommandEncodeStatesTest, giveNextIddInBlockZeroWhenDispatchKernelAndDynamicStateHeapDirtyThenStateBaseAddressEncodedAndMediaInterfaceDescriptorEncoded) {
using WALKER_TYPE = typename FamilyType::WALKER_TYPE;
using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL;
using INTERFACE_DESCRIPTOR_DATA = typename FamilyType::INTERFACE_DESCRIPTOR_DATA; using INTERFACE_DESCRIPTOR_DATA = typename FamilyType::INTERFACE_DESCRIPTOR_DATA;
using STATE_BASE_ADDRESS = typename FamilyType::STATE_BASE_ADDRESS; using STATE_BASE_ADDRESS = typename FamilyType::STATE_BASE_ADDRESS;
@@ -787,7 +811,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, CommandEncodeStatesTest, giveNextIddInBlockZeroWhenD
bool requiresUncachedMocs = false; bool requiresUncachedMocs = false;
EncodeDispatchKernelArgs dispatchArgs = createDefaultDispatchKernelArgs(pDevice, dispatchInterface.get(), dims, requiresUncachedMocs); EncodeDispatchKernelArgs dispatchArgs = createDefaultDispatchKernelArgs(pDevice, dispatchInterface.get(), dims, requiresUncachedMocs);
EncodeDispatchKernel<FamilyType>::encode(*cmdContainer.get(), dispatchArgs); EncodeDispatchKernel<FamilyType>::template encode<WALKER_TYPE>(*cmdContainer.get(), dispatchArgs);
GenCmdList commands; GenCmdList commands;
CmdParse<FamilyType>::parseCommandBuffer(commands, ptrOffset(cmdContainer->getCommandStream()->getCpuBase(), 0), cmdContainer->getCommandStream()->getUsed()); CmdParse<FamilyType>::parseCommandBuffer(commands, ptrOffset(cmdContainer->getCommandStream()->getCpuBase(), 0), cmdContainer->getCommandStream()->getUsed());
@@ -799,6 +823,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, CommandEncodeStatesTest, giveNextIddInBlockZeroWhenD
} }
HWCMDTEST_F(IGFX_GEN8_CORE, CommandEncodeStatesTest, giveNumSamplersOneWhenHeapIsDirtyThenSamplerStateWasCopiedAndStateBaseAddressEncoded) { HWCMDTEST_F(IGFX_GEN8_CORE, CommandEncodeStatesTest, giveNumSamplersOneWhenHeapIsDirtyThenSamplerStateWasCopiedAndStateBaseAddressEncoded) {
using WALKER_TYPE = typename FamilyType::WALKER_TYPE;
using SAMPLER_STATE = typename FamilyType::SAMPLER_STATE; using SAMPLER_STATE = typename FamilyType::SAMPLER_STATE;
using INTERFACE_DESCRIPTOR_DATA = typename FamilyType::INTERFACE_DESCRIPTOR_DATA; using INTERFACE_DESCRIPTOR_DATA = typename FamilyType::INTERFACE_DESCRIPTOR_DATA;
using STATE_BASE_ADDRESS = typename FamilyType::STATE_BASE_ADDRESS; using STATE_BASE_ADDRESS = typename FamilyType::STATE_BASE_ADDRESS;
@@ -824,7 +849,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, CommandEncodeStatesTest, giveNumSamplersOneWhenHeapI
dshBeforeFlush->getSpace(dshBeforeFlush->getAvailableSpace() - NEO::EncodeDispatchKernel<FamilyType>::getSizeRequiredDsh(kernelDescriptor, cmdContainer->getNumIddPerBlock())); dshBeforeFlush->getSpace(dshBeforeFlush->getAvailableSpace() - NEO::EncodeDispatchKernel<FamilyType>::getSizeRequiredDsh(kernelDescriptor, cmdContainer->getNumIddPerBlock()));
auto cpuBaseBeforeFlush = dshBeforeFlush->getCpuBase(); auto cpuBaseBeforeFlush = dshBeforeFlush->getCpuBase();
EncodeDispatchKernel<FamilyType>::encode(*cmdContainer.get(), dispatchArgs); EncodeDispatchKernel<FamilyType>::template encode<WALKER_TYPE>(*cmdContainer.get(), dispatchArgs);
GenCmdList commands; GenCmdList commands;
CmdParse<FamilyType>::parseCommandBuffer(commands, ptrOffset(cmdContainer->getCommandStream()->getCpuBase(), 0), cmdContainer->getCommandStream()->getUsed()); CmdParse<FamilyType>::parseCommandBuffer(commands, ptrOffset(cmdContainer->getCommandStream()->getCpuBase(), 0), cmdContainer->getCommandStream()->getUsed());
@@ -848,6 +873,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, CommandEncodeStatesTest, giveNumSamplersOneWhenHeapI
} }
HWCMDTEST_F(IGFX_GEN8_CORE, CommandEncodeStatesTest, giveNumSamplersOneAndNextIDDInBlockWhenHeapIsDirtyThenSamplerStateWasCopiedAndStateBaseAddressEncoded) { HWCMDTEST_F(IGFX_GEN8_CORE, CommandEncodeStatesTest, giveNumSamplersOneAndNextIDDInBlockWhenHeapIsDirtyThenSamplerStateWasCopiedAndStateBaseAddressEncoded) {
using WALKER_TYPE = typename FamilyType::WALKER_TYPE;
using SAMPLER_STATE = typename FamilyType::SAMPLER_STATE; using SAMPLER_STATE = typename FamilyType::SAMPLER_STATE;
using INTERFACE_DESCRIPTOR_DATA = typename FamilyType::INTERFACE_DESCRIPTOR_DATA; using INTERFACE_DESCRIPTOR_DATA = typename FamilyType::INTERFACE_DESCRIPTOR_DATA;
using STATE_BASE_ADDRESS = typename FamilyType::STATE_BASE_ADDRESS; using STATE_BASE_ADDRESS = typename FamilyType::STATE_BASE_ADDRESS;
@@ -879,7 +905,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, CommandEncodeStatesTest, giveNumSamplersOneAndNextID
auto cpuBaseBeforeFlush = dshBeforeFlush->getCpuBase(); auto cpuBaseBeforeFlush = dshBeforeFlush->getCpuBase();
auto usedBefore = cmdContainer->getIndirectHeap(HeapType::SURFACE_STATE)->getUsed(); auto usedBefore = cmdContainer->getIndirectHeap(HeapType::SURFACE_STATE)->getUsed();
EncodeDispatchKernel<FamilyType>::encode(*cmdContainer.get(), dispatchArgs); EncodeDispatchKernel<FamilyType>::template encode<WALKER_TYPE>(*cmdContainer.get(), dispatchArgs);
GenCmdList commands; GenCmdList commands;
CmdParse<FamilyType>::parseCommandBuffer(commands, ptrOffset(cmdContainer->getCommandStream()->getCpuBase(), 0), cmdContainer->getCommandStream()->getUsed()); CmdParse<FamilyType>::parseCommandBuffer(commands, ptrOffset(cmdContainer->getCommandStream()->getCpuBase(), 0), cmdContainer->getCommandStream()->getUsed());
@@ -903,6 +929,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, CommandEncodeStatesTest, giveNumSamplersOneAndNextID
} }
HWTEST_F(CommandEncodeStatesTest, givenPauseOnEnqueueSetToNeverWhenEncodingWalkerThenCommandsToPatchAreNotPresent) { HWTEST_F(CommandEncodeStatesTest, givenPauseOnEnqueueSetToNeverWhenEncodingWalkerThenCommandsToPatchAreNotPresent) {
using WALKER_TYPE = typename FamilyType::WALKER_TYPE;
DebugManagerStateRestore restorer; DebugManagerStateRestore restorer;
debugManager.flags.PauseOnEnqueue.set(-1); debugManager.flags.PauseOnEnqueue.set(-1);
@@ -913,12 +940,13 @@ HWTEST_F(CommandEncodeStatesTest, givenPauseOnEnqueueSetToNeverWhenEncodingWalke
std::list<void *> cmdsToPatch; std::list<void *> cmdsToPatch;
EncodeDispatchKernelArgs dispatchArgs = createDefaultDispatchKernelArgs(pDevice, dispatchInterface.get(), dims, requiresUncachedMocs); EncodeDispatchKernelArgs dispatchArgs = createDefaultDispatchKernelArgs(pDevice, dispatchInterface.get(), dims, requiresUncachedMocs);
dispatchArgs.additionalCommands = &cmdsToPatch; dispatchArgs.additionalCommands = &cmdsToPatch;
EncodeDispatchKernel<FamilyType>::encode(*cmdContainer.get(), dispatchArgs); EncodeDispatchKernel<FamilyType>::template encode<WALKER_TYPE>(*cmdContainer.get(), dispatchArgs);
EXPECT_EQ(cmdsToPatch.size(), 0u); EXPECT_EQ(cmdsToPatch.size(), 0u);
} }
HWTEST_F(CommandEncodeStatesTest, givenPauseOnEnqueueSetToAlwaysWhenEncodingWalkerThenCommandsToPatchAreFilled) { HWTEST_F(CommandEncodeStatesTest, givenPauseOnEnqueueSetToAlwaysWhenEncodingWalkerThenCommandsToPatchAreFilled) {
using WALKER_TYPE = typename FamilyType::WALKER_TYPE;
DebugManagerStateRestore restorer; DebugManagerStateRestore restorer;
debugManager.flags.PauseOnEnqueue.set(-2); debugManager.flags.PauseOnEnqueue.set(-2);
@@ -929,7 +957,7 @@ HWTEST_F(CommandEncodeStatesTest, givenPauseOnEnqueueSetToAlwaysWhenEncodingWalk
std::list<void *> cmdsToPatch; std::list<void *> cmdsToPatch;
EncodeDispatchKernelArgs dispatchArgs = createDefaultDispatchKernelArgs(pDevice, dispatchInterface.get(), dims, requiresUncachedMocs); EncodeDispatchKernelArgs dispatchArgs = createDefaultDispatchKernelArgs(pDevice, dispatchInterface.get(), dims, requiresUncachedMocs);
dispatchArgs.additionalCommands = &cmdsToPatch; dispatchArgs.additionalCommands = &cmdsToPatch;
EncodeDispatchKernel<FamilyType>::encode(*cmdContainer.get(), dispatchArgs); EncodeDispatchKernel<FamilyType>::template encode<WALKER_TYPE>(*cmdContainer.get(), dispatchArgs);
EXPECT_EQ(cmdsToPatch.size(), 4u); EXPECT_EQ(cmdsToPatch.size(), 4u);
} }
@@ -939,7 +967,7 @@ using EncodeDispatchKernelTest = Test<CommandEncodeStatesFixture>;
HWTEST2_F(EncodeDispatchKernelTest, givenBindfulKernelWhenDispatchingKernelThenSshFromContainerIsUsed, IsAtLeastSkl) { HWTEST2_F(EncodeDispatchKernelTest, givenBindfulKernelWhenDispatchingKernelThenSshFromContainerIsUsed, IsAtLeastSkl) {
using BINDING_TABLE_STATE = typename FamilyType::BINDING_TABLE_STATE; using BINDING_TABLE_STATE = typename FamilyType::BINDING_TABLE_STATE;
using INTERFACE_DESCRIPTOR_DATA = typename FamilyType::INTERFACE_DESCRIPTOR_DATA; using INTERFACE_DESCRIPTOR_DATA = typename FamilyType::INTERFACE_DESCRIPTOR_DATA;
using WALKER = typename FamilyType::WALKER_TYPE; using WALKER_TYPE = typename FamilyType::WALKER_TYPE;
uint32_t numBindingTable = 1; uint32_t numBindingTable = 1;
BINDING_TABLE_STATE bindingTableState = FamilyType::cmdInitBindingTableState; BINDING_TABLE_STATE bindingTableState = FamilyType::cmdInitBindingTableState;
@@ -961,7 +989,7 @@ HWTEST2_F(EncodeDispatchKernelTest, givenBindfulKernelWhenDispatchingKernelThenS
if (EncodeDispatchKernel<FamilyType>::isDshNeeded(pDevice->getDeviceInfo())) { if (EncodeDispatchKernel<FamilyType>::isDshNeeded(pDevice->getDeviceInfo())) {
dispatchArgs.dynamicStateHeap = cmdContainer->getIndirectHeap(HeapType::DYNAMIC_STATE); dispatchArgs.dynamicStateHeap = cmdContainer->getIndirectHeap(HeapType::DYNAMIC_STATE);
} }
EncodeDispatchKernel<FamilyType>::encode(*cmdContainer.get(), dispatchArgs); EncodeDispatchKernel<FamilyType>::template encode<WALKER_TYPE>(*cmdContainer.get(), dispatchArgs);
auto usedAfter = cmdContainer->getIndirectHeap(HeapType::SURFACE_STATE)->getUsed(); auto usedAfter = cmdContainer->getIndirectHeap(HeapType::SURFACE_STATE)->getUsed();
@@ -971,7 +999,7 @@ HWTEST2_F(EncodeDispatchKernelTest, givenBindfulKernelWhenDispatchingKernelThenS
HWTEST2_F(EncodeDispatchKernelTest, givenBindlessKernelWhenDispatchingKernelThenThenSshFromContainerIsUsed, IsAtLeastSkl) { HWTEST2_F(EncodeDispatchKernelTest, givenBindlessKernelWhenDispatchingKernelThenThenSshFromContainerIsUsed, IsAtLeastSkl) {
using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE; using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE;
using INTERFACE_DESCRIPTOR_DATA = typename FamilyType::INTERFACE_DESCRIPTOR_DATA; using INTERFACE_DESCRIPTOR_DATA = typename FamilyType::INTERFACE_DESCRIPTOR_DATA;
using WALKER = typename FamilyType::WALKER_TYPE; using WALKER_TYPE = typename FamilyType::WALKER_TYPE;
uint32_t numBindingTable = 1; uint32_t numBindingTable = 1;
RENDER_SURFACE_STATE state = FamilyType::cmdInitRenderSurfaceState; RENDER_SURFACE_STATE state = FamilyType::cmdInitRenderSurfaceState;
@@ -990,7 +1018,7 @@ HWTEST2_F(EncodeDispatchKernelTest, givenBindlessKernelWhenDispatchingKernelThen
auto usedBefore = cmdContainer->getIndirectHeap(HeapType::SURFACE_STATE)->getUsed(); auto usedBefore = cmdContainer->getIndirectHeap(HeapType::SURFACE_STATE)->getUsed();
EncodeDispatchKernelArgs dispatchArgs = createDefaultDispatchKernelArgs(pDevice, dispatchInterface.get(), dims, requiresUncachedMocs); EncodeDispatchKernelArgs dispatchArgs = createDefaultDispatchKernelArgs(pDevice, dispatchInterface.get(), dims, requiresUncachedMocs);
EncodeDispatchKernel<FamilyType>::encode(*cmdContainer.get(), dispatchArgs); EncodeDispatchKernel<FamilyType>::template encode<WALKER_TYPE>(*cmdContainer.get(), dispatchArgs);
auto usedAfter = cmdContainer->getIndirectHeap(HeapType::SURFACE_STATE)->getUsed(); auto usedAfter = cmdContainer->getIndirectHeap(HeapType::SURFACE_STATE)->getUsed();
@@ -999,13 +1027,14 @@ HWTEST2_F(EncodeDispatchKernelTest, givenBindlessKernelWhenDispatchingKernelThen
HWTEST2_F(EncodeDispatchKernelTest, givenKernelsSharingISAParentAllocationsWhenProgrammingWalkerThenKernelStartPointerHasProperOffset, IsBeforeXeHpCore) { HWTEST2_F(EncodeDispatchKernelTest, givenKernelsSharingISAParentAllocationsWhenProgrammingWalkerThenKernelStartPointerHasProperOffset, IsBeforeXeHpCore) {
using INTERFACE_DESCRIPTOR_DATA = typename FamilyType::INTERFACE_DESCRIPTOR_DATA; using INTERFACE_DESCRIPTOR_DATA = typename FamilyType::INTERFACE_DESCRIPTOR_DATA;
using WALKER_TYPE = typename FamilyType::WALKER_TYPE;
auto dispatchInterface = std::make_unique<MockDispatchKernelEncoder>(); auto dispatchInterface = std::make_unique<MockDispatchKernelEncoder>();
dispatchInterface->getIsaOffsetInParentAllocationResult = 8 << INTERFACE_DESCRIPTOR_DATA::KERNELSTARTPOINTER_BIT_SHIFT; dispatchInterface->getIsaOffsetInParentAllocationResult = 8 << INTERFACE_DESCRIPTOR_DATA::KERNELSTARTPOINTER_BIT_SHIFT;
uint32_t dims[] = {2, 1, 1}; uint32_t dims[] = {2, 1, 1};
bool requiresUncachedMocs = false; bool requiresUncachedMocs = false;
EncodeDispatchKernelArgs dispatchArgs = createDefaultDispatchKernelArgs(pDevice, dispatchInterface.get(), dims, requiresUncachedMocs); EncodeDispatchKernelArgs dispatchArgs = createDefaultDispatchKernelArgs(pDevice, dispatchInterface.get(), dims, requiresUncachedMocs);
EncodeDispatchKernel<FamilyType>::encode(*cmdContainer.get(), dispatchArgs); EncodeDispatchKernel<FamilyType>::template encode<WALKER_TYPE>(*cmdContainer.get(), dispatchArgs);
auto idd = static_cast<INTERFACE_DESCRIPTOR_DATA *>(cmdContainer->getIddBlock()); auto idd = static_cast<INTERFACE_DESCRIPTOR_DATA *>(cmdContainer->getIddBlock());
EXPECT_EQ(idd->getKernelStartPointer(), dispatchInterface->getIsaAllocation()->getGpuAddressToPatch() + dispatchInterface->getIsaOffsetInParentAllocation()); EXPECT_EQ(idd->getKernelStartPointer(), dispatchInterface->getIsaAllocation()->getGpuAddressToPatch() + dispatchInterface->getIsaOffsetInParentAllocation());
@@ -1025,7 +1054,7 @@ HWTEST2_F(EncodeDispatchKernelTest, givenKernelsSharingISAParentAllocationsWhenP
uint32_t dims[] = {2, 1, 1}; uint32_t dims[] = {2, 1, 1};
bool requiresUncachedMocs = false; bool requiresUncachedMocs = false;
EncodeDispatchKernelArgs dispatchArgs = createDefaultDispatchKernelArgs(pDevice, dispatchInterface.get(), dims, requiresUncachedMocs); EncodeDispatchKernelArgs dispatchArgs = createDefaultDispatchKernelArgs(pDevice, dispatchInterface.get(), dims, requiresUncachedMocs);
EncodeDispatchKernel<FamilyType>::encode(*cmdContainer.get(), dispatchArgs); EncodeDispatchKernel<FamilyType>::template encode<WALKER_TYPE>(*cmdContainer.get(), dispatchArgs);
GenCmdList commands; GenCmdList commands;
CmdParse<FamilyType>::parseCommandBuffer(commands, ptrOffset(cmdContainer->getCommandStream()->getCpuBase(), 0), cmdContainer->getCommandStream()->getUsed()); CmdParse<FamilyType>::parseCommandBuffer(commands, ptrOffset(cmdContainer->getCommandStream()->getCpuBase(), 0), cmdContainer->getCommandStream()->getUsed());
@@ -1037,6 +1066,7 @@ HWTEST2_F(EncodeDispatchKernelTest, givenKernelsSharingISAParentAllocationsWhenP
} }
HWTEST2_F(EncodeDispatchKernelTest, givenPrintKernelDispatchParametersWhenEncodingKernelThenPrintKernelDispatchParams, IsAtLeastXeHpCore) { HWTEST2_F(EncodeDispatchKernelTest, givenPrintKernelDispatchParametersWhenEncodingKernelThenPrintKernelDispatchParams, IsAtLeastXeHpCore) {
using WALKER_TYPE = typename FamilyType::WALKER_TYPE;
auto dispatchInterface = std::make_unique<MockDispatchKernelEncoder>(); auto dispatchInterface = std::make_unique<MockDispatchKernelEncoder>();
uint32_t dims[] = {2, 1, 1}; uint32_t dims[] = {2, 1, 1};
@@ -1046,7 +1076,7 @@ HWTEST2_F(EncodeDispatchKernelTest, givenPrintKernelDispatchParametersWhenEncodi
DebugManagerStateRestore restore; DebugManagerStateRestore restore;
debugManager.flags.PrintKernelDispatchParameters.set(true); debugManager.flags.PrintKernelDispatchParameters.set(true);
testing::internal::CaptureStdout(); // start capturing testing::internal::CaptureStdout(); // start capturing
EncodeDispatchKernel<FamilyType>::encode(*cmdContainer.get(), dispatchArgs); EncodeDispatchKernel<FamilyType>::template encode<WALKER_TYPE>(*cmdContainer.get(), dispatchArgs);
std::string outputString = testing::internal::GetCapturedStdout(); // stop capturing std::string outputString = testing::internal::GetCapturedStdout(); // stop capturing
EXPECT_NE(std::string::npos, outputString.find("kernel")); EXPECT_NE(std::string::npos, outputString.find("kernel"));
@@ -1062,6 +1092,7 @@ HWTEST2_F(EncodeDispatchKernelTest, givenPrintKernelDispatchParametersWhenEncodi
HWTEST_F(EncodeDispatchKernelTest, givenNonBindlessOrStatelessArgWhenDispatchingKernelThenSurfaceStateOffsetInCrossThreadDataIsNotPatched) { HWTEST_F(EncodeDispatchKernelTest, givenNonBindlessOrStatelessArgWhenDispatchingKernelThenSurfaceStateOffsetInCrossThreadDataIsNotPatched) {
using BINDING_TABLE_STATE = typename FamilyType::BINDING_TABLE_STATE; using BINDING_TABLE_STATE = typename FamilyType::BINDING_TABLE_STATE;
using WALKER_TYPE = typename FamilyType::WALKER_TYPE;
using DataPortBindlessSurfaceExtendedMessageDescriptor = typename FamilyType::DataPortBindlessSurfaceExtendedMessageDescriptor; using DataPortBindlessSurfaceExtendedMessageDescriptor = typename FamilyType::DataPortBindlessSurfaceExtendedMessageDescriptor;
uint32_t numBindingTable = 1; uint32_t numBindingTable = 1;
BINDING_TABLE_STATE bindingTableState = FamilyType::cmdInitBindingTableState; BINDING_TABLE_STATE bindingTableState = FamilyType::cmdInitBindingTableState;
@@ -1113,7 +1144,7 @@ HWTEST_F(EncodeDispatchKernelTest, givenNonBindlessOrStatelessArgWhenDispatching
bool requiresUncachedMocs = false; bool requiresUncachedMocs = false;
EncodeDispatchKernelArgs dispatchArgs = createDefaultDispatchKernelArgs(pDevice, dispatchInterface.get(), dims, requiresUncachedMocs); EncodeDispatchKernelArgs dispatchArgs = createDefaultDispatchKernelArgs(pDevice, dispatchInterface.get(), dims, requiresUncachedMocs);
EncodeDispatchKernel<FamilyType>::encode(*cmdContainer.get(), dispatchArgs); EncodeDispatchKernel<FamilyType>::template encode<WALKER_TYPE>(*cmdContainer.get(), dispatchArgs);
EXPECT_EQ(pattern, *patchLocation); EXPECT_EQ(pattern, *patchLocation);
@@ -1141,7 +1172,7 @@ HWTEST_F(EncodeDispatchKernelTest, givenNonBindlessOrStatelessArgWhenDispatching
memset(ioh->getCpuBase(), 0, ioh->getMaxAvailableSpace()); memset(ioh->getCpuBase(), 0, ioh->getMaxAvailableSpace());
dispatchArgs.dispatchInterface = dispatchInterface.get(); dispatchArgs.dispatchInterface = dispatchInterface.get();
EncodeDispatchKernel<FamilyType>::encode(*cmdContainer.get(), dispatchArgs); EncodeDispatchKernel<FamilyType>::template encode<WALKER_TYPE>(*cmdContainer.get(), dispatchArgs);
EXPECT_TRUE(memoryZeroed(ptrOffset(ioh->getCpuBase(), iohOffset), ioh->getMaxAvailableSpace() - iohOffset)); EXPECT_TRUE(memoryZeroed(ptrOffset(ioh->getCpuBase(), iohOffset), ioh->getMaxAvailableSpace() - iohOffset));
} }
@@ -1350,7 +1381,7 @@ using BindlessCommandEncodeStatesContainerTest = Test<CommandEncodeStatesFixture
HWTEST_F(BindlessCommandEncodeStatesContainerTest, givenBindlessKernelAndBindlessModeEnabledWhenEncodingKernelThenCmdContainerHasNullptrSSH) { HWTEST_F(BindlessCommandEncodeStatesContainerTest, givenBindlessKernelAndBindlessModeEnabledWhenEncodingKernelThenCmdContainerHasNullptrSSH) {
using BINDING_TABLE_STATE = typename FamilyType::BINDING_TABLE_STATE; using BINDING_TABLE_STATE = typename FamilyType::BINDING_TABLE_STATE;
using INTERFACE_DESCRIPTOR_DATA = typename FamilyType::INTERFACE_DESCRIPTOR_DATA; using INTERFACE_DESCRIPTOR_DATA = typename FamilyType::INTERFACE_DESCRIPTOR_DATA;
using WALKER = typename FamilyType::WALKER_TYPE; using WALKER_TYPE = typename FamilyType::WALKER_TYPE;
DebugManagerStateRestore dbgRestorer; DebugManagerStateRestore dbgRestorer;
debugManager.flags.UseBindlessMode.set(1); debugManager.flags.UseBindlessMode.set(1);
debugManager.flags.UseExternalAllocatorForSshAndDsh.set(1); debugManager.flags.UseExternalAllocatorForSshAndDsh.set(1);
@@ -1377,7 +1408,7 @@ HWTEST_F(BindlessCommandEncodeStatesContainerTest, givenBindlessKernelAndBindles
EXPECT_EQ(commandContainer->getIndirectHeap(HeapType::SURFACE_STATE), nullptr); EXPECT_EQ(commandContainer->getIndirectHeap(HeapType::SURFACE_STATE), nullptr);
EncodeDispatchKernelArgs dispatchArgs = createDefaultDispatchKernelArgs(pDevice, dispatchInterface.get(), dims, requiresUncachedMocs); EncodeDispatchKernelArgs dispatchArgs = createDefaultDispatchKernelArgs(pDevice, dispatchInterface.get(), dims, requiresUncachedMocs);
EncodeDispatchKernel<FamilyType>::encode(*commandContainer.get(), dispatchArgs); EncodeDispatchKernel<FamilyType>::template encode<WALKER_TYPE>(*commandContainer.get(), dispatchArgs);
EXPECT_EQ(commandContainer->getIndirectHeap(HeapType::SURFACE_STATE), nullptr); EXPECT_EQ(commandContainer->getIndirectHeap(HeapType::SURFACE_STATE), nullptr);
} }
@@ -1385,7 +1416,7 @@ HWTEST_F(BindlessCommandEncodeStatesContainerTest, givenBindlessKernelAndBindles
HWTEST_F(BindlessCommandEncodeStatesContainerTest, givenBindfulKernelWhenBindlessModeEnabledThenCmdContainerHasSsh) { HWTEST_F(BindlessCommandEncodeStatesContainerTest, givenBindfulKernelWhenBindlessModeEnabledThenCmdContainerHasSsh) {
using BINDING_TABLE_STATE = typename FamilyType::BINDING_TABLE_STATE; using BINDING_TABLE_STATE = typename FamilyType::BINDING_TABLE_STATE;
using INTERFACE_DESCRIPTOR_DATA = typename FamilyType::INTERFACE_DESCRIPTOR_DATA; using INTERFACE_DESCRIPTOR_DATA = typename FamilyType::INTERFACE_DESCRIPTOR_DATA;
using WALKER = typename FamilyType::WALKER_TYPE; using WALKER_TYPE = typename FamilyType::WALKER_TYPE;
DebugManagerStateRestore dbgRestorer; DebugManagerStateRestore dbgRestorer;
debugManager.flags.UseBindlessMode.set(1); debugManager.flags.UseBindlessMode.set(1);
auto commandContainer = std::make_unique<CommandContainer>(); auto commandContainer = std::make_unique<CommandContainer>();
@@ -1414,7 +1445,7 @@ HWTEST_F(BindlessCommandEncodeStatesContainerTest, givenBindfulKernelWhenBindles
EXPECT_NE(commandContainer->getIndirectHeap(HeapType::SURFACE_STATE), nullptr); EXPECT_NE(commandContainer->getIndirectHeap(HeapType::SURFACE_STATE), nullptr);
EncodeDispatchKernelArgs dispatchArgs = createDefaultDispatchKernelArgs(pDevice, dispatchInterface.get(), dims, requiresUncachedMocs); EncodeDispatchKernelArgs dispatchArgs = createDefaultDispatchKernelArgs(pDevice, dispatchInterface.get(), dims, requiresUncachedMocs);
EncodeDispatchKernel<FamilyType>::encode(*commandContainer.get(), dispatchArgs); EncodeDispatchKernel<FamilyType>::template encode<WALKER_TYPE>(*commandContainer.get(), dispatchArgs);
EXPECT_NE(commandContainer->getIndirectHeap(HeapType::SURFACE_STATE), nullptr); EXPECT_NE(commandContainer->getIndirectHeap(HeapType::SURFACE_STATE), nullptr);
} }
@@ -1423,7 +1454,7 @@ using NgenGeneratorDispatchKernelEncodeTest = Test<CommandEncodeStatesFixture>;
HWTEST2_F(NgenGeneratorDispatchKernelEncodeTest, givenBindfulKernelAndIsNotGeneratedByIgcWhenEncodeDispatchKernelThenCmdContainerDoesNotHaveSsh, IsPVC) { HWTEST2_F(NgenGeneratorDispatchKernelEncodeTest, givenBindfulKernelAndIsNotGeneratedByIgcWhenEncodeDispatchKernelThenCmdContainerDoesNotHaveSsh, IsPVC) {
using BINDING_TABLE_STATE = typename FamilyType::BINDING_TABLE_STATE; using BINDING_TABLE_STATE = typename FamilyType::BINDING_TABLE_STATE;
using WALKER_TYPE = typename FamilyType::WALKER_TYPE;
for (auto isGeneratedByIgc : {false, true}) { for (auto isGeneratedByIgc : {false, true}) {
auto commandContainer = std::make_unique<MyMockCommandContainer>(); auto commandContainer = std::make_unique<MyMockCommandContainer>();
commandContainer->initialize(pDevice, nullptr, HeapSize::defaultHeapSize, true, false); commandContainer->initialize(pDevice, nullptr, HeapSize::defaultHeapSize, true, false);
@@ -1453,7 +1484,7 @@ HWTEST2_F(NgenGeneratorDispatchKernelEncodeTest, givenBindfulKernelAndIsNotGener
EncodeDispatchKernelArgs dispatchArgs = createDefaultDispatchKernelArgs(pDevice, dispatchInterface.get(), dims, requiresUncachedMocs); EncodeDispatchKernelArgs dispatchArgs = createDefaultDispatchKernelArgs(pDevice, dispatchInterface.get(), dims, requiresUncachedMocs);
EncodeDispatchKernel<FamilyType>::encode(*commandContainer.get(), dispatchArgs); EncodeDispatchKernel<FamilyType>::template encode<WALKER_TYPE>(*commandContainer.get(), dispatchArgs);
if (isGeneratedByIgc) { if (isGeneratedByIgc) {
EXPECT_NE(commandContainer->getIndirectHeap(HeapType::SURFACE_STATE), nullptr); EXPECT_NE(commandContainer->getIndirectHeap(HeapType::SURFACE_STATE), nullptr);
@@ -1548,12 +1579,13 @@ HWTEST_F(CommandEncodeStatesTest, givenKernelInfoOfBindlessKernelWhenGettingRequ
} }
HWTEST_F(CommandEncodeStatesTest, givenCommandContainerWhenIsKernelDispatchedFromImmediateCmdListTrueThenGetHeapWithRequiredSizeAndAlignmentCalled) { HWTEST_F(CommandEncodeStatesTest, givenCommandContainerWhenIsKernelDispatchedFromImmediateCmdListTrueThenGetHeapWithRequiredSizeAndAlignmentCalled) {
using WALKER_TYPE = typename FamilyType::WALKER_TYPE;
std::unique_ptr<MockDispatchKernelEncoder> dispatchInterface(new MockDispatchKernelEncoder()); std::unique_ptr<MockDispatchKernelEncoder> dispatchInterface(new MockDispatchKernelEncoder());
uint32_t dims[] = {1, 1, 1}; uint32_t dims[] = {1, 1, 1};
bool requiresUncachedMocs = false; bool requiresUncachedMocs = false;
EncodeDispatchKernelArgs dispatchArgs = createDefaultDispatchKernelArgs(pDevice, dispatchInterface.get(), dims, requiresUncachedMocs); EncodeDispatchKernelArgs dispatchArgs = createDefaultDispatchKernelArgs(pDevice, dispatchInterface.get(), dims, requiresUncachedMocs);
dispatchArgs.isKernelDispatchedFromImmediateCmdList = true; dispatchArgs.isKernelDispatchedFromImmediateCmdList = true;
EncodeDispatchKernel<FamilyType>::encode(*cmdContainer.get(), dispatchArgs); EncodeDispatchKernel<FamilyType>::template encode<WALKER_TYPE>(*cmdContainer.get(), dispatchArgs);
EXPECT_NE(0u, cmdContainer->getHeapWithRequiredSizeAndAlignmentCalled); EXPECT_NE(0u, cmdContainer->getHeapWithRequiredSizeAndAlignmentCalled);
} }

View File

@@ -37,7 +37,7 @@ HWTEST2_F(CommandEncodeStatesTestDg2AndLater, givenEventAddressWhenEncodeAndPVCA
dispatchArgs.eventAddress = eventAddress; dispatchArgs.eventAddress = eventAddress;
dispatchArgs.isTimestampEvent = true; dispatchArgs.isTimestampEvent = true;
EncodeDispatchKernel<FamilyType>::encode(*cmdContainer.get(), dispatchArgs); EncodeDispatchKernel<FamilyType>::template encode<WALKER_TYPE>(*cmdContainer.get(), dispatchArgs);
GenCmdList commands; GenCmdList commands;
CmdParse<FamilyType>::parseCommandBuffer(commands, ptrOffset(cmdContainer->getCommandStream()->getCpuBase(), 0), cmdContainer->getCommandStream()->getUsed()); CmdParse<FamilyType>::parseCommandBuffer(commands, ptrOffset(cmdContainer->getCommandStream()->getCpuBase(), 0), cmdContainer->getCommandStream()->getUsed());
@@ -62,7 +62,7 @@ HWTEST2_F(CommandEncodeStatesTestDg2AndLater, givenEventAddressWhenEncodeThenMoc
dispatchArgs.isTimestampEvent = true; dispatchArgs.isTimestampEvent = true;
dispatchArgs.dcFlushEnable = MemorySynchronizationCommands<FamilyType>::getDcFlushEnable(true, pDevice->getRootDeviceEnvironment()); dispatchArgs.dcFlushEnable = MemorySynchronizationCommands<FamilyType>::getDcFlushEnable(true, pDevice->getRootDeviceEnvironment());
EncodeDispatchKernel<FamilyType>::encode(*cmdContainer.get(), dispatchArgs); EncodeDispatchKernel<FamilyType>::template encode<WALKER_TYPE>(*cmdContainer.get(), dispatchArgs);
GenCmdList commands; GenCmdList commands;
CmdParse<FamilyType>::parseCommandBuffer(commands, ptrOffset(cmdContainer->getCommandStream()->getCpuBase(), 0), cmdContainer->getCommandStream()->getUsed()); CmdParse<FamilyType>::parseCommandBuffer(commands, ptrOffset(cmdContainer->getCommandStream()->getCpuBase(), 0), cmdContainer->getCommandStream()->getUsed());
@@ -131,7 +131,7 @@ HWTEST2_F(CommandEncodeStatesTestDg2AndLater, givenOverridePreferredSlmAllocatio
bool requiresUncachedMocs = false; bool requiresUncachedMocs = false;
EncodeDispatchKernelArgs dispatchArgs = createDefaultDispatchKernelArgs(pDevice, dispatchInterface.get(), dims, requiresUncachedMocs); EncodeDispatchKernelArgs dispatchArgs = createDefaultDispatchKernelArgs(pDevice, dispatchInterface.get(), dims, requiresUncachedMocs);
EncodeDispatchKernel<FamilyType>::encode(*cmdContainer.get(), dispatchArgs); EncodeDispatchKernel<FamilyType>::template encode<WALKER_TYPE>(*cmdContainer.get(), dispatchArgs);
GenCmdList commands; GenCmdList commands;
CmdParse<FamilyType>::parseCommandBuffer(commands, ptrOffset(cmdContainer->getCommandStream()->getCpuBase(), 0), cmdContainer->getCommandStream()->getUsed()); CmdParse<FamilyType>::parseCommandBuffer(commands, ptrOffset(cmdContainer->getCommandStream()->getCpuBase(), 0), cmdContainer->getCommandStream()->getUsed());

View File

@@ -40,7 +40,7 @@ HWTEST2_F(CommandEncodeStatesTestPvcAndLater, givenOverrideSlmTotalSizeDebugVari
cmdContainer->reset(); cmdContainer->reset();
EncodeDispatchKernelArgs dispatchArgs = createDefaultDispatchKernelArgs(pDevice, dispatchInterface.get(), dims, requiresUncachedMocs); EncodeDispatchKernelArgs dispatchArgs = createDefaultDispatchKernelArgs(pDevice, dispatchInterface.get(), dims, requiresUncachedMocs);
EncodeDispatchKernel<FamilyType>::encode(*cmdContainer.get(), dispatchArgs); EncodeDispatchKernel<FamilyType>::template encode<WALKER_TYPE>(*cmdContainer.get(), dispatchArgs);
GenCmdList commands; GenCmdList commands;
CmdParse<FamilyType>::parseCommandBuffer(commands, ptrOffset(cmdContainer->getCommandStream()->getCpuBase(), 0), cmdContainer->getCommandStream()->getUsed()); CmdParse<FamilyType>::parseCommandBuffer(commands, ptrOffset(cmdContainer->getCommandStream()->getCpuBase(), 0), cmdContainer->getCommandStream()->getUsed());

View File

@@ -48,7 +48,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, CommandEncodeStatesTest, givenSlmTotalSizeGraterTha
bool requiresUncachedMocs = false; bool requiresUncachedMocs = false;
EncodeDispatchKernelArgs dispatchArgs = createDefaultDispatchKernelArgs(pDevice, dispatchInterface.get(), dims, requiresUncachedMocs); EncodeDispatchKernelArgs dispatchArgs = createDefaultDispatchKernelArgs(pDevice, dispatchInterface.get(), dims, requiresUncachedMocs);
EncodeDispatchKernel<FamilyType>::encode(*cmdContainer.get(), dispatchArgs); EncodeDispatchKernel<FamilyType>::template encode<WALKER_TYPE>(*cmdContainer.get(), dispatchArgs);
GenCmdList commands; GenCmdList commands;
CmdParse<FamilyType>::parseCommandBuffer(commands, ptrOffset(cmdContainer->getCommandStream()->getCpuBase(), 0), cmdContainer->getCommandStream()->getUsed()); CmdParse<FamilyType>::parseCommandBuffer(commands, ptrOffset(cmdContainer->getCommandStream()->getCpuBase(), 0), cmdContainer->getCommandStream()->getUsed());
@@ -75,7 +75,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, CommandEncodeStatesTest, givenXeHpAndLaterWhenDispa
EncodeDispatchKernelArgs dispatchArgs = createDefaultDispatchKernelArgs(pDevice, dispatchInterface.get(), dims, false); EncodeDispatchKernelArgs dispatchArgs = createDefaultDispatchKernelArgs(pDevice, dispatchInterface.get(), dims, false);
EncodeDispatchKernel<FamilyType>::encode(*cmdContainer.get(), dispatchArgs); EncodeDispatchKernel<FamilyType>::template encode<WALKER_TYPE>(*cmdContainer.get(), dispatchArgs);
GenCmdList commands; GenCmdList commands;
CmdParse<FamilyType>::parseCommandBuffer(commands, ptrOffset(cmdContainer->getCommandStream()->getCpuBase(), 0), cmdContainer->getCommandStream()->getUsed()); CmdParse<FamilyType>::parseCommandBuffer(commands, ptrOffset(cmdContainer->getCommandStream()->getCpuBase(), 0), cmdContainer->getCommandStream()->getUsed());
@@ -102,7 +102,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, CommandEncodeStatesTest, givenXeHpDebuggingEnabledA
EncodeDispatchKernelArgs dispatchArgs = createDefaultDispatchKernelArgs(pDevice, dispatchInterface.get(), dims, false); EncodeDispatchKernelArgs dispatchArgs = createDefaultDispatchKernelArgs(pDevice, dispatchInterface.get(), dims, false);
EncodeDispatchKernel<FamilyType>::encode(*cmdContainer.get(), dispatchArgs); EncodeDispatchKernel<FamilyType>::template encode<WALKER_TYPE>(*cmdContainer.get(), dispatchArgs);
GenCmdList commands; GenCmdList commands;
CmdParse<FamilyType>::parseCommandBuffer(commands, ptrOffset(cmdContainer->getCommandStream()->getCpuBase(), 0), cmdContainer->getCommandStream()->getUsed()); CmdParse<FamilyType>::parseCommandBuffer(commands, ptrOffset(cmdContainer->getCommandStream()->getCpuBase(), 0), cmdContainer->getCommandStream()->getUsed());
@@ -126,7 +126,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, CommandEncodeStatesTest, givenSimdSizeWhenDispatchi
bool requiresUncachedMocs = false; bool requiresUncachedMocs = false;
EncodeDispatchKernelArgs dispatchArgs = createDefaultDispatchKernelArgs(pDevice, dispatchInterface.get(), dims, requiresUncachedMocs); EncodeDispatchKernelArgs dispatchArgs = createDefaultDispatchKernelArgs(pDevice, dispatchInterface.get(), dims, requiresUncachedMocs);
EncodeDispatchKernel<FamilyType>::encode(*cmdContainer.get(), dispatchArgs); EncodeDispatchKernel<FamilyType>::template encode<WALKER_TYPE>(*cmdContainer.get(), dispatchArgs);
GenCmdList commands; GenCmdList commands;
CmdParse<FamilyType>::parseCommandBuffer(commands, ptrOffset(cmdContainer->getCommandStream()->getCpuBase(), 0), cmdContainer->getCommandStream()->getUsed()); CmdParse<FamilyType>::parseCommandBuffer(commands, ptrOffset(cmdContainer->getCommandStream()->getCpuBase(), 0), cmdContainer->getCommandStream()->getUsed());
@@ -149,7 +149,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, CommandEncodeStatesTest, givenSlmTotalSizeEqualZero
bool requiresUncachedMocs = false; bool requiresUncachedMocs = false;
EncodeDispatchKernelArgs dispatchArgs = createDefaultDispatchKernelArgs(pDevice, dispatchInterface.get(), dims, requiresUncachedMocs); EncodeDispatchKernelArgs dispatchArgs = createDefaultDispatchKernelArgs(pDevice, dispatchInterface.get(), dims, requiresUncachedMocs);
EncodeDispatchKernel<FamilyType>::encode(*cmdContainer.get(), dispatchArgs); EncodeDispatchKernel<FamilyType>::template encode<WALKER_TYPE>(*cmdContainer.get(), dispatchArgs);
GenCmdList commands; GenCmdList commands;
CmdParse<FamilyType>::parseCommandBuffer(commands, ptrOffset(cmdContainer->getCommandStream()->getCpuBase(), 0), cmdContainer->getCommandStream()->getUsed()); CmdParse<FamilyType>::parseCommandBuffer(commands, ptrOffset(cmdContainer->getCommandStream()->getCpuBase(), 0), cmdContainer->getCommandStream()->getUsed());
@@ -183,7 +183,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, CommandEncodeStatesTest, givenOverrideSlmTotalSizeD
cmdContainer->reset(); cmdContainer->reset();
EncodeDispatchKernelArgs dispatchArgs = createDefaultDispatchKernelArgs(pDevice, dispatchInterface.get(), dims, requiresUncachedMocs); EncodeDispatchKernelArgs dispatchArgs = createDefaultDispatchKernelArgs(pDevice, dispatchInterface.get(), dims, requiresUncachedMocs);
EncodeDispatchKernel<FamilyType>::encode(*cmdContainer.get(), dispatchArgs); EncodeDispatchKernel<FamilyType>::template encode<WALKER_TYPE>(*cmdContainer.get(), dispatchArgs);
GenCmdList commands; GenCmdList commands;
CmdParse<FamilyType>::parseCommandBuffer(commands, ptrOffset(cmdContainer->getCommandStream()->getCpuBase(), 0), cmdContainer->getCommandStream()->getUsed()); CmdParse<FamilyType>::parseCommandBuffer(commands, ptrOffset(cmdContainer->getCommandStream()->getCpuBase(), 0), cmdContainer->getCommandStream()->getUsed());
@@ -221,7 +221,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, CommandEncodeStatesTest, givenStatelessBufferAndIma
bool requiresUncachedMocs = false; bool requiresUncachedMocs = false;
EncodeDispatchKernelArgs dispatchArgs = createDefaultDispatchKernelArgs(pDevice, dispatchInterface.get(), dims, requiresUncachedMocs); EncodeDispatchKernelArgs dispatchArgs = createDefaultDispatchKernelArgs(pDevice, dispatchInterface.get(), dims, requiresUncachedMocs);
EncodeDispatchKernel<FamilyType>::encode(*cmdContainer.get(), dispatchArgs); EncodeDispatchKernel<FamilyType>::template encode<WALKER_TYPE>(*cmdContainer.get(), dispatchArgs);
GenCmdList commands; GenCmdList commands;
CmdParse<FamilyType>::parseCommandBuffer(commands, ptrOffset(cmdContainer->getCommandStream()->getCpuBase(), 0), cmdContainer->getCommandStream()->getUsed()); CmdParse<FamilyType>::parseCommandBuffer(commands, ptrOffset(cmdContainer->getCommandStream()->getCpuBase(), 0), cmdContainer->getCommandStream()->getUsed());
@@ -258,7 +258,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, CommandEncodeStatesTest, givennumBindingTableOneWhe
bool requiresUncachedMocs = false; bool requiresUncachedMocs = false;
EncodeDispatchKernelArgs dispatchArgs = createDefaultDispatchKernelArgs(pDevice, dispatchInterface.get(), dims, requiresUncachedMocs); EncodeDispatchKernelArgs dispatchArgs = createDefaultDispatchKernelArgs(pDevice, dispatchInterface.get(), dims, requiresUncachedMocs);
EncodeDispatchKernel<FamilyType>::encode(*cmdContainer.get(), dispatchArgs); EncodeDispatchKernel<FamilyType>::template encode<WALKER_TYPE>(*cmdContainer.get(), dispatchArgs);
GenCmdList commands; GenCmdList commands;
CmdParse<FamilyType>::parseCommandBuffer(commands, ptrOffset(cmdContainer->getCommandStream()->getCpuBase(), 0), cmdContainer->getCommandStream()->getUsed()); CmdParse<FamilyType>::parseCommandBuffer(commands, ptrOffset(cmdContainer->getCommandStream()->getCpuBase(), 0), cmdContainer->getCommandStream()->getUsed());
@@ -295,7 +295,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, CommandEncodeStatesTest, giveNumBindingTableZeroWhe
bool requiresUncachedMocs = false; bool requiresUncachedMocs = false;
EncodeDispatchKernelArgs dispatchArgs = createDefaultDispatchKernelArgs(pDevice, dispatchInterface.get(), dims, requiresUncachedMocs); EncodeDispatchKernelArgs dispatchArgs = createDefaultDispatchKernelArgs(pDevice, dispatchInterface.get(), dims, requiresUncachedMocs);
EncodeDispatchKernel<FamilyType>::encode(*cmdContainer.get(), dispatchArgs); EncodeDispatchKernel<FamilyType>::template encode<WALKER_TYPE>(*cmdContainer.get(), dispatchArgs);
GenCmdList commands; GenCmdList commands;
CmdParse<FamilyType>::parseCommandBuffer(commands, ptrOffset(cmdContainer->getCommandStream()->getCpuBase(), 0), cmdContainer->getCommandStream()->getUsed()); CmdParse<FamilyType>::parseCommandBuffer(commands, ptrOffset(cmdContainer->getCommandStream()->getCpuBase(), 0), cmdContainer->getCommandStream()->getUsed());
@@ -350,7 +350,7 @@ HWTEST2_F(CommandEncodeStatesTest, giveNumSamplersOneWhenDispatchKernelThensampl
dispatchArgs.surfaceStateHeap = cmdContainer->getIndirectHeap(HeapType::SURFACE_STATE); dispatchArgs.surfaceStateHeap = cmdContainer->getIndirectHeap(HeapType::SURFACE_STATE);
dispatchArgs.dynamicStateHeap = cmdContainer->getIndirectHeap(HeapType::DYNAMIC_STATE); dispatchArgs.dynamicStateHeap = cmdContainer->getIndirectHeap(HeapType::DYNAMIC_STATE);
EncodeDispatchKernel<FamilyType>::encode(*cmdContainer.get(), dispatchArgs); EncodeDispatchKernel<FamilyType>::template encode<WALKER_TYPE>(*cmdContainer.get(), dispatchArgs);
GenCmdList commands; GenCmdList commands;
CmdParse<FamilyType>::parseCommandBuffer(commands, ptrOffset(cmdContainer->getCommandStream()->getCpuBase(), 0), cmdContainer->getCommandStream()->getUsed()); CmdParse<FamilyType>::parseCommandBuffer(commands, ptrOffset(cmdContainer->getCommandStream()->getCpuBase(), 0), cmdContainer->getCommandStream()->getUsed());
@@ -382,7 +382,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, CommandEncodeStatesTest, givenEventAllocationWhenDi
dispatchArgs.eventAddress = eventAddress; dispatchArgs.eventAddress = eventAddress;
dispatchArgs.isTimestampEvent = true; dispatchArgs.isTimestampEvent = true;
EncodeDispatchKernel<FamilyType>::encode(*cmdContainer.get(), dispatchArgs); EncodeDispatchKernel<FamilyType>::template encode<WALKER_TYPE>(*cmdContainer.get(), dispatchArgs);
GenCmdList commands; GenCmdList commands;
CmdParse<FamilyType>::parseCommandBuffer(commands, ptrOffset(cmdContainer->getCommandStream()->getCpuBase(), 0), cmdContainer->getCommandStream()->getUsed()); CmdParse<FamilyType>::parseCommandBuffer(commands, ptrOffset(cmdContainer->getCommandStream()->getCpuBase(), 0), cmdContainer->getCommandStream()->getUsed());
@@ -407,7 +407,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, CommandEncodeStatesTest, givenEventAddressWhenEncod
dispatchArgs.isTimestampEvent = true; dispatchArgs.isTimestampEvent = true;
dispatchArgs.dcFlushEnable = MemorySynchronizationCommands<FamilyType>::getDcFlushEnable(true, pDevice->getRootDeviceEnvironment()); dispatchArgs.dcFlushEnable = MemorySynchronizationCommands<FamilyType>::getDcFlushEnable(true, pDevice->getRootDeviceEnvironment());
EncodeDispatchKernel<FamilyType>::encode(*cmdContainer.get(), dispatchArgs); EncodeDispatchKernel<FamilyType>::template encode<WALKER_TYPE>(*cmdContainer.get(), dispatchArgs);
GenCmdList commands; GenCmdList commands;
CmdParse<FamilyType>::parseCommandBuffer(commands, ptrOffset(cmdContainer->getCommandStream()->getCpuBase(), 0), cmdContainer->getCommandStream()->getUsed()); CmdParse<FamilyType>::parseCommandBuffer(commands, ptrOffset(cmdContainer->getCommandStream()->getCpuBase(), 0), cmdContainer->getCommandStream()->getUsed());
@@ -425,6 +425,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, CommandEncodeStatesTest, givenEventAddressWhenEncod
HWCMDTEST_F(IGFX_XE_HP_CORE, CommandEncodeStatesTest, givenCleanHeapsWhenDispatchKernelThenFlushNotAdded) { HWCMDTEST_F(IGFX_XE_HP_CORE, CommandEncodeStatesTest, givenCleanHeapsWhenDispatchKernelThenFlushNotAdded) {
using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL;
using WALKER_TYPE = typename FamilyType::WALKER_TYPE;
uint32_t dims[] = {2, 1, 1}; uint32_t dims[] = {2, 1, 1};
std::unique_ptr<MockDispatchKernelEncoder> dispatchInterface(new MockDispatchKernelEncoder()); std::unique_ptr<MockDispatchKernelEncoder> dispatchInterface(new MockDispatchKernelEncoder());
cmdContainer->slmSizeRef() = 1; cmdContainer->slmSizeRef() = 1;
@@ -433,7 +434,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, CommandEncodeStatesTest, givenCleanHeapsWhenDispatc
bool requiresUncachedMocs = false; bool requiresUncachedMocs = false;
EncodeDispatchKernelArgs dispatchArgs = createDefaultDispatchKernelArgs(pDevice, dispatchInterface.get(), dims, requiresUncachedMocs); EncodeDispatchKernelArgs dispatchArgs = createDefaultDispatchKernelArgs(pDevice, dispatchInterface.get(), dims, requiresUncachedMocs);
EncodeDispatchKernel<FamilyType>::encode(*cmdContainer.get(), dispatchArgs); EncodeDispatchKernel<FamilyType>::template encode<WALKER_TYPE>(*cmdContainer.get(), dispatchArgs);
GenCmdList commands; GenCmdList commands;
CmdParse<FamilyType>::parseCommandBuffer(commands, ptrOffset(cmdContainer->getCommandStream()->getCpuBase(), 0), cmdContainer->getCommandStream()->getUsed()); CmdParse<FamilyType>::parseCommandBuffer(commands, ptrOffset(cmdContainer->getCommandStream()->getCpuBase(), 0), cmdContainer->getCommandStream()->getUsed());
@@ -474,7 +475,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, CommandEncodeStatesTest, givenForceBtpPrefetchModeD
bool requiresUncachedMocs = false; bool requiresUncachedMocs = false;
EncodeDispatchKernelArgs dispatchArgs = createDefaultDispatchKernelArgs(pDevice, dispatchInterface.get(), dims, requiresUncachedMocs); EncodeDispatchKernelArgs dispatchArgs = createDefaultDispatchKernelArgs(pDevice, dispatchInterface.get(), dims, requiresUncachedMocs);
EncodeDispatchKernel<FamilyType>::encode(*cmdContainer.get(), dispatchArgs); EncodeDispatchKernel<FamilyType>::template encode<WALKER_TYPE>(*cmdContainer.get(), dispatchArgs);
GenCmdList commands; GenCmdList commands;
CmdParse<FamilyType>::parseCommandBuffer(commands, ptrOffset(cmdContainer->getCommandStream()->getCpuBase(), 0), cmdContainer->getCommandStream()->getUsed()); CmdParse<FamilyType>::parseCommandBuffer(commands, ptrOffset(cmdContainer->getCommandStream()->getCpuBase(), 0), cmdContainer->getCommandStream()->getUsed());
@@ -508,7 +509,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, CommandEncodeStatesTest, givenForceBtpPrefetchModeD
bool requiresUncachedMocs = false; bool requiresUncachedMocs = false;
EncodeDispatchKernelArgs dispatchArgs = createDefaultDispatchKernelArgs(pDevice, dispatchInterface.get(), dims, requiresUncachedMocs); EncodeDispatchKernelArgs dispatchArgs = createDefaultDispatchKernelArgs(pDevice, dispatchInterface.get(), dims, requiresUncachedMocs);
EncodeDispatchKernel<FamilyType>::encode(*cmdContainer.get(), dispatchArgs); EncodeDispatchKernel<FamilyType>::template encode<WALKER_TYPE>(*cmdContainer.get(), dispatchArgs);
GenCmdList commands; GenCmdList commands;
CmdParse<FamilyType>::parseCommandBuffer(commands, ptrOffset(cmdContainer->getCommandStream()->getCpuBase(), 0), cmdContainer->getCommandStream()->getUsed()); CmdParse<FamilyType>::parseCommandBuffer(commands, ptrOffset(cmdContainer->getCommandStream()->getCpuBase(), 0), cmdContainer->getCommandStream()->getUsed());
@@ -533,7 +534,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, CommandEncodeStatesTest, givenForceBtpPrefetchModeD
bool requiresUncachedMocs = false; bool requiresUncachedMocs = false;
EncodeDispatchKernelArgs dispatchArgs = createDefaultDispatchKernelArgs(pDevice, dispatchInterface.get(), dims, requiresUncachedMocs); EncodeDispatchKernelArgs dispatchArgs = createDefaultDispatchKernelArgs(pDevice, dispatchInterface.get(), dims, requiresUncachedMocs);
EncodeDispatchKernel<FamilyType>::encode(*cmdContainer.get(), dispatchArgs); EncodeDispatchKernel<FamilyType>::template encode<WALKER_TYPE>(*cmdContainer.get(), dispatchArgs);
GenCmdList commands; GenCmdList commands;
CmdParse<FamilyType>::parseCommandBuffer(commands, ptrOffset(cmdContainer->getCommandStream()->getCpuBase(), 0), cmdContainer->getCommandStream()->getUsed()); CmdParse<FamilyType>::parseCommandBuffer(commands, ptrOffset(cmdContainer->getCommandStream()->getCpuBase(), 0), cmdContainer->getCommandStream()->getUsed());
@@ -580,7 +581,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, CommandEncodeStatesTest, givenInlineDataRequiredWhe
bool requiresUncachedMocs = false; bool requiresUncachedMocs = false;
EncodeDispatchKernelArgs dispatchArgs = createDefaultDispatchKernelArgs(pDevice, dispatchInterface.get(), dims, requiresUncachedMocs); EncodeDispatchKernelArgs dispatchArgs = createDefaultDispatchKernelArgs(pDevice, dispatchInterface.get(), dims, requiresUncachedMocs);
EncodeDispatchKernel<FamilyType>::encode(*cmdContainer.get(), dispatchArgs); EncodeDispatchKernel<FamilyType>::template encode<WALKER_TYPE>(*cmdContainer.get(), dispatchArgs);
GenCmdList commands; GenCmdList commands;
CmdParse<FamilyType>::parseCommandBuffer(commands, ptrOffset(cmdContainer->getCommandStream()->getCpuBase(), 0), cmdContainer->getCommandStream()->getUsed()); CmdParse<FamilyType>::parseCommandBuffer(commands, ptrOffset(cmdContainer->getCommandStream()->getCpuBase(), 0), cmdContainer->getCommandStream()->getUsed());
@@ -607,7 +608,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, CommandEncodeStatesTest, givenInlineDataRequiredIsF
bool requiresUncachedMocs = false; bool requiresUncachedMocs = false;
EncodeDispatchKernelArgs dispatchArgs = createDefaultDispatchKernelArgs(pDevice, dispatchInterface.get(), dims, requiresUncachedMocs); EncodeDispatchKernelArgs dispatchArgs = createDefaultDispatchKernelArgs(pDevice, dispatchInterface.get(), dims, requiresUncachedMocs);
EncodeDispatchKernel<FamilyType>::encode(*cmdContainer.get(), dispatchArgs); EncodeDispatchKernel<FamilyType>::template encode<WALKER_TYPE>(*cmdContainer.get(), dispatchArgs);
GenCmdList commands; GenCmdList commands;
CmdParse<FamilyType>::parseCommandBuffer(commands, ptrOffset(cmdContainer->getCommandStream()->getCpuBase(), 0), cmdContainer->getCommandStream()->getUsed()); CmdParse<FamilyType>::parseCommandBuffer(commands, ptrOffset(cmdContainer->getCommandStream()->getCpuBase(), 0), cmdContainer->getCommandStream()->getUsed());
@@ -634,7 +635,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, CommandEncodeStatesTest, givenInlineDataRequiredAnd
bool requiresUncachedMocs = false; bool requiresUncachedMocs = false;
EncodeDispatchKernelArgs dispatchArgs = createDefaultDispatchKernelArgs(pDevice, dispatchInterface.get(), dims, requiresUncachedMocs); EncodeDispatchKernelArgs dispatchArgs = createDefaultDispatchKernelArgs(pDevice, dispatchInterface.get(), dims, requiresUncachedMocs);
EncodeDispatchKernel<FamilyType>::encode(*cmdContainer.get(), dispatchArgs); EncodeDispatchKernel<FamilyType>::template encode<WALKER_TYPE>(*cmdContainer.get(), dispatchArgs);
GenCmdList commands; GenCmdList commands;
CmdParse<FamilyType>::parseCommandBuffer(commands, ptrOffset(cmdContainer->getCommandStream()->getCpuBase(), 0), cmdContainer->getCommandStream()->getUsed()); CmdParse<FamilyType>::parseCommandBuffer(commands, ptrOffset(cmdContainer->getCommandStream()->getCpuBase(), 0), cmdContainer->getCommandStream()->getUsed());
@@ -663,7 +664,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, CommandEncodeStatesTest, givenInlineDataRequiredAnd
bool requiresUncachedMocs = false; bool requiresUncachedMocs = false;
EncodeDispatchKernelArgs dispatchArgs = createDefaultDispatchKernelArgs(pDevice, dispatchInterface.get(), dims, requiresUncachedMocs); EncodeDispatchKernelArgs dispatchArgs = createDefaultDispatchKernelArgs(pDevice, dispatchInterface.get(), dims, requiresUncachedMocs);
EncodeDispatchKernel<FamilyType>::encode(*cmdContainer.get(), dispatchArgs); EncodeDispatchKernel<FamilyType>::template encode<WALKER_TYPE>(*cmdContainer.get(), dispatchArgs);
GenCmdList commands; GenCmdList commands;
CmdParse<FamilyType>::parseCommandBuffer(commands, ptrOffset(cmdContainer->getCommandStream()->getCpuBase(), 0), cmdContainer->getCommandStream()->getUsed()); CmdParse<FamilyType>::parseCommandBuffer(commands, ptrOffset(cmdContainer->getCommandStream()->getCpuBase(), 0), cmdContainer->getCommandStream()->getUsed());
@@ -1071,7 +1072,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, CommandEncodeStatesImplicitScaling,
dispatchArgs.eventAddress = eventAddress; dispatchArgs.eventAddress = eventAddress;
dispatchArgs.partitionCount = 2; dispatchArgs.partitionCount = 2;
EncodeDispatchKernel<FamilyType>::encode(*cmdContainer.get(), dispatchArgs); EncodeDispatchKernel<FamilyType>::template encode<WALKER_TYPE>(*cmdContainer.get(), dispatchArgs);
size_t usedBuffer = cmdContainer->getCommandStream()->getUsed(); size_t usedBuffer = cmdContainer->getCommandStream()->getUsed();
EXPECT_EQ(2u, dispatchArgs.partitionCount); EXPECT_EQ(2u, dispatchArgs.partitionCount);
@@ -1104,7 +1105,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, CommandEncodeStatesImplicitScaling, givenCooperativ
dispatchArgs.isInternal = isInternal; dispatchArgs.isInternal = isInternal;
dispatchArgs.isCooperative = isCooperative; dispatchArgs.isCooperative = isCooperative;
dispatchArgs.partitionCount = 2; dispatchArgs.partitionCount = 2;
EncodeDispatchKernel<FamilyType>::encode(*cmdContainer.get(), dispatchArgs); EncodeDispatchKernel<FamilyType>::template encode<WALKER_TYPE>(*cmdContainer.get(), dispatchArgs);
size_t containerUsedAfterBase = cmdContainer->getCommandStream()->getUsed(); size_t containerUsedAfterBase = cmdContainer->getCommandStream()->getUsed();
@@ -1146,7 +1147,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, CommandEncodeStatesDynamicImplicitScaling, givenImp
EncodeDispatchKernelArgs dispatchArgs = createDefaultDispatchKernelArgs(pDevice, dispatchInterface.get(), dims, requiresUncachedMocs); EncodeDispatchKernelArgs dispatchArgs = createDefaultDispatchKernelArgs(pDevice, dispatchInterface.get(), dims, requiresUncachedMocs);
dispatchArgs.isInternal = isInternal; dispatchArgs.isInternal = isInternal;
EncodeDispatchKernel<FamilyType>::encode(*cmdContainer.get(), dispatchArgs); EncodeDispatchKernel<FamilyType>::template encode<WALKER_TYPE>(*cmdContainer.get(), dispatchArgs);
size_t containerUsedAfterBase = cmdContainer->getCommandStream()->getUsed(); size_t containerUsedAfterBase = cmdContainer->getCommandStream()->getUsed();
@@ -1159,7 +1160,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, CommandEncodeStatesDynamicImplicitScaling, givenImp
EXPECT_EQ(16u, baseWalkerCmd->getThreadGroupIdXDimension()); EXPECT_EQ(16u, baseWalkerCmd->getThreadGroupIdXDimension());
dispatchArgs.partitionCount = 2; dispatchArgs.partitionCount = 2;
EncodeDispatchKernel<FamilyType>::encode(*cmdContainer.get(), dispatchArgs); EncodeDispatchKernel<FamilyType>::template encode<WALKER_TYPE>(*cmdContainer.get(), dispatchArgs);
size_t total = cmdContainer->getCommandStream()->getUsed(); size_t total = cmdContainer->getCommandStream()->getUsed();
size_t partitionedWalkerSize = total - containerUsedAfterBase; size_t partitionedWalkerSize = total - containerUsedAfterBase;
@@ -1210,7 +1211,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, CommandEncodeStatesDynamicImplicitScaling, givenImp
dispatchArgs.partitionCount = 2; dispatchArgs.partitionCount = 2;
dispatchArgs.dcFlushEnable = MemorySynchronizationCommands<FamilyType>::getDcFlushEnable(true, pDevice->getRootDeviceEnvironment()); dispatchArgs.dcFlushEnable = MemorySynchronizationCommands<FamilyType>::getDcFlushEnable(true, pDevice->getRootDeviceEnvironment());
EncodeDispatchKernel<FamilyType>::encode(*cmdContainer.get(), dispatchArgs); EncodeDispatchKernel<FamilyType>::template encode<WALKER_TYPE>(*cmdContainer.get(), dispatchArgs);
EXPECT_EQ(2u, dispatchArgs.partitionCount); EXPECT_EQ(2u, dispatchArgs.partitionCount);
size_t partitionedWalkerSize = cmdContainer->getCommandStream()->getUsed(); size_t partitionedWalkerSize = cmdContainer->getCommandStream()->getUsed();
@@ -1309,7 +1310,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, CommandEncodeStatesDynamicImplicitScaling,
dispatchArgs.isInternal = isInternal; dispatchArgs.isInternal = isInternal;
dispatchArgs.partitionCount = 2; dispatchArgs.partitionCount = 2;
EncodeDispatchKernel<FamilyType>::encode(*cmdContainer.get(), dispatchArgs); EncodeDispatchKernel<FamilyType>::template encode<WALKER_TYPE>(*cmdContainer.get(), dispatchArgs);
EXPECT_EQ(2u, dispatchArgs.partitionCount); EXPECT_EQ(2u, dispatchArgs.partitionCount);
size_t partitionedWalkerSize = cmdContainer->getCommandStream()->getUsed(); size_t partitionedWalkerSize = cmdContainer->getCommandStream()->getUsed();
@@ -1367,7 +1368,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, CommandEncodeStatesDynamicImplicitScaling, givenImp
dispatchArgs.isInternal = isInternal; dispatchArgs.isInternal = isInternal;
dispatchArgs.partitionCount = 2; dispatchArgs.partitionCount = 2;
EncodeDispatchKernel<FamilyType>::encode(*cmdContainer.get(), dispatchArgs); EncodeDispatchKernel<FamilyType>::template encode<WALKER_TYPE>(*cmdContainer.get(), dispatchArgs);
size_t internalWalkerSize = cmdContainer->getCommandStream()->getUsed(); size_t internalWalkerSize = cmdContainer->getCommandStream()->getUsed();
@@ -1394,7 +1395,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, CommandEncodeStatesTest, givenNonTimestampEventWhen
dispatchArgs.eventAddress = eventAddress; dispatchArgs.eventAddress = eventAddress;
dispatchArgs.isTimestampEvent = true; dispatchArgs.isTimestampEvent = true;
EncodeDispatchKernel<FamilyType>::encode(*cmdContainer.get(), dispatchArgs); EncodeDispatchKernel<FamilyType>::template encode<WALKER_TYPE>(*cmdContainer.get(), dispatchArgs);
GenCmdList commands; GenCmdList commands;
CmdParse<FamilyType>::parseCommandBuffer(commands, ptrOffset(cmdContainer->getCommandStream()->getCpuBase(), 0), cmdContainer->getCommandStream()->getUsed()); CmdParse<FamilyType>::parseCommandBuffer(commands, ptrOffset(cmdContainer->getCommandStream()->getCpuBase(), 0), cmdContainer->getCommandStream()->getUsed());
@@ -1408,6 +1409,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, CommandEncodeStatesTest, givenNonTimestampEventWhen
HWTEST2_F(CommandEncodeStatesTest, HWTEST2_F(CommandEncodeStatesTest,
givenDispatchInterfaceWhenDpasRequiredIsNotDefaultThenPipelineSelectCommandAdded, IsWithinXeGfxFamily) { givenDispatchInterfaceWhenDpasRequiredIsNotDefaultThenPipelineSelectCommandAdded, IsWithinXeGfxFamily) {
using WALKER_TYPE = typename FamilyType::WALKER_TYPE;
using PIPELINE_SELECT = typename FamilyType::PIPELINE_SELECT; using PIPELINE_SELECT = typename FamilyType::PIPELINE_SELECT;
uint32_t dims[] = {2, 1, 1}; uint32_t dims[] = {2, 1, 1};
std::unique_ptr<MockDispatchKernelEncoder> dispatchInterface(new MockDispatchKernelEncoder()); std::unique_ptr<MockDispatchKernelEncoder> dispatchInterface(new MockDispatchKernelEncoder());
@@ -1420,7 +1422,7 @@ HWTEST2_F(CommandEncodeStatesTest,
bool requiresUncachedMocs = false; bool requiresUncachedMocs = false;
EncodeDispatchKernelArgs dispatchArgs = createDefaultDispatchKernelArgs(pDevice, dispatchInterface.get(), dims, requiresUncachedMocs); EncodeDispatchKernelArgs dispatchArgs = createDefaultDispatchKernelArgs(pDevice, dispatchInterface.get(), dims, requiresUncachedMocs);
EncodeDispatchKernel<FamilyType>::encode(*cmdContainer.get(), dispatchArgs); EncodeDispatchKernel<FamilyType>::template encode<WALKER_TYPE>(*cmdContainer.get(), dispatchArgs);
GenCmdList commands; GenCmdList commands;
CmdParse<FamilyType>::parseCommandBuffer(commands, CmdParse<FamilyType>::parseCommandBuffer(commands,
@@ -1442,7 +1444,7 @@ HWTEST2_F(CommandEncodeStatesTest,
HWTEST2_F(CommandEncodeStatesTest, HWTEST2_F(CommandEncodeStatesTest,
givenDebugVariableWhenEncodeStateIsCalledThenSystolicValueIsOverwritten, IsWithinXeGfxFamily) { givenDebugVariableWhenEncodeStateIsCalledThenSystolicValueIsOverwritten, IsWithinXeGfxFamily) {
DebugManagerStateRestore restorer; DebugManagerStateRestore restorer;
using WALKER_TYPE = typename FamilyType::WALKER_TYPE;
using PIPELINE_SELECT = typename FamilyType::PIPELINE_SELECT; using PIPELINE_SELECT = typename FamilyType::PIPELINE_SELECT;
uint32_t dims[] = {2, 1, 1}; uint32_t dims[] = {2, 1, 1};
std::unique_ptr<MockDispatchKernelEncoder> dispatchInterface(new MockDispatchKernelEncoder()); std::unique_ptr<MockDispatchKernelEncoder> dispatchInterface(new MockDispatchKernelEncoder());
@@ -1455,7 +1457,7 @@ HWTEST2_F(CommandEncodeStatesTest,
bool requiresUncachedMocs = false; bool requiresUncachedMocs = false;
EncodeDispatchKernelArgs dispatchArgs = createDefaultDispatchKernelArgs(pDevice, dispatchInterface.get(), dims, requiresUncachedMocs); EncodeDispatchKernelArgs dispatchArgs = createDefaultDispatchKernelArgs(pDevice, dispatchInterface.get(), dims, requiresUncachedMocs);
EncodeDispatchKernel<FamilyType>::encode(*cmdContainer.get(), dispatchArgs); EncodeDispatchKernel<FamilyType>::template encode<WALKER_TYPE>(*cmdContainer.get(), dispatchArgs);
GenCmdList commands; GenCmdList commands;
CmdParse<FamilyType>::parseCommandBuffer(commands, CmdParse<FamilyType>::parseCommandBuffer(commands,
@@ -1477,6 +1479,7 @@ HWTEST2_F(CommandEncodeStatesTest,
HWCMDTEST_F(IGFX_XE_HP_CORE, CommandEncodeStatesTest, HWCMDTEST_F(IGFX_XE_HP_CORE, CommandEncodeStatesTest,
givenDispatchInterfaceWhenDpasRequiredIsSameAsDefaultThenPipelineSelectCommandNotAdded) { givenDispatchInterfaceWhenDpasRequiredIsSameAsDefaultThenPipelineSelectCommandNotAdded) {
using PIPELINE_SELECT = typename FamilyType::PIPELINE_SELECT; using PIPELINE_SELECT = typename FamilyType::PIPELINE_SELECT;
using WALKER_TYPE = typename FamilyType::WALKER_TYPE;
uint32_t dims[] = {2, 1, 1}; uint32_t dims[] = {2, 1, 1};
std::unique_ptr<MockDispatchKernelEncoder> dispatchInterface(new MockDispatchKernelEncoder()); std::unique_ptr<MockDispatchKernelEncoder> dispatchInterface(new MockDispatchKernelEncoder());
@@ -1487,7 +1490,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, CommandEncodeStatesTest,
bool requiresUncachedMocs = false; bool requiresUncachedMocs = false;
EncodeDispatchKernelArgs dispatchArgs = createDefaultDispatchKernelArgs(pDevice, dispatchInterface.get(), dims, requiresUncachedMocs); EncodeDispatchKernelArgs dispatchArgs = createDefaultDispatchKernelArgs(pDevice, dispatchInterface.get(), dims, requiresUncachedMocs);
EncodeDispatchKernel<FamilyType>::encode(*cmdContainer.get(), dispatchArgs); EncodeDispatchKernel<FamilyType>::template encode<WALKER_TYPE>(*cmdContainer.get(), dispatchArgs);
GenCmdList commands; GenCmdList commands;
CmdParse<FamilyType>::parseCommandBuffer(commands, CmdParse<FamilyType>::parseCommandBuffer(commands,
@@ -1513,7 +1516,7 @@ struct CommandEncodeStatesImplicitScalingPrimaryBufferFixture : public CommandEn
template <typename FamilyType> template <typename FamilyType>
void testBodyFindPrimaryBatchBuffer() { void testBodyFindPrimaryBatchBuffer() {
using MI_BATCH_BUFFER_START = typename FamilyType::MI_BATCH_BUFFER_START; using MI_BATCH_BUFFER_START = typename FamilyType::MI_BATCH_BUFFER_START;
using WALKER_TYPE = typename FamilyType::WALKER_TYPE;
constexpr bool expectPrimary = flushTaskUsedForImmediate || usePrimaryBuffer; constexpr bool expectPrimary = flushTaskUsedForImmediate || usePrimaryBuffer;
uint32_t dims[] = {16, 1, 1}; uint32_t dims[] = {16, 1, 1};
@@ -1525,7 +1528,7 @@ struct CommandEncodeStatesImplicitScalingPrimaryBufferFixture : public CommandEn
dispatchArgs.eventAddress = eventAddress; dispatchArgs.eventAddress = eventAddress;
dispatchArgs.partitionCount = 2; dispatchArgs.partitionCount = 2;
EncodeDispatchKernel<FamilyType>::encode(*BaseClass::cmdContainer.get(), dispatchArgs); EncodeDispatchKernel<FamilyType>::template encode<WALKER_TYPE>(*BaseClass::cmdContainer.get(), dispatchArgs);
size_t usedBuffer = BaseClass::cmdContainer->getCommandStream()->getUsed(); size_t usedBuffer = BaseClass::cmdContainer->getCommandStream()->getUsed();
EXPECT_EQ(2u, dispatchArgs.partitionCount); EXPECT_EQ(2u, dispatchArgs.partitionCount);

View File

@@ -296,6 +296,7 @@ HWTEST2_F(EncodeConditionalBatchBufferStartTest, whenProgrammingConditionalRegRe
using CommandEncodeStatesXeHpcAndLaterTests = Test<CommandEncodeStatesFixture>; using CommandEncodeStatesXeHpcAndLaterTests = Test<CommandEncodeStatesFixture>;
HWTEST2_F(CommandEncodeStatesXeHpcAndLaterTests, givenDebugFlagSetWhenProgrammingWalkerThenSetFlushingBits, IsAtLeastXeHpcCore) { HWTEST2_F(CommandEncodeStatesXeHpcAndLaterTests, givenDebugFlagSetWhenProgrammingWalkerThenSetFlushingBits, IsAtLeastXeHpcCore) {
using WALKER_TYPE = typename FamilyType::WALKER_TYPE;
DebugManagerStateRestore restore; DebugManagerStateRestore restore;
debugManager.flags.ForceComputeWalkerPostSyncFlush.set(1); debugManager.flags.ForceComputeWalkerPostSyncFlush.set(1);
@@ -304,12 +305,11 @@ HWTEST2_F(CommandEncodeStatesXeHpcAndLaterTests, givenDebugFlagSetWhenProgrammin
bool requiresUncachedMocs = false; bool requiresUncachedMocs = false;
EncodeDispatchKernelArgs dispatchArgs = createDefaultDispatchKernelArgs(pDevice, dispatchInterface.get(), dims, requiresUncachedMocs); EncodeDispatchKernelArgs dispatchArgs = createDefaultDispatchKernelArgs(pDevice, dispatchInterface.get(), dims, requiresUncachedMocs);
EncodeDispatchKernel<FamilyType>::encode(*cmdContainer.get(), dispatchArgs); EncodeDispatchKernel<FamilyType>::template encode<WALKER_TYPE>(*cmdContainer.get(), dispatchArgs);
GenCmdList commands; GenCmdList commands;
CmdParse<FamilyType>::parseCommandBuffer(commands, ptrOffset(cmdContainer->getCommandStream()->getCpuBase(), 0), cmdContainer->getCommandStream()->getUsed()); CmdParse<FamilyType>::parseCommandBuffer(commands, ptrOffset(cmdContainer->getCommandStream()->getCpuBase(), 0), cmdContainer->getCommandStream()->getUsed());
using WALKER_TYPE = typename FamilyType::WALKER_TYPE;
auto itor = find<WALKER_TYPE *>(commands.begin(), commands.end()); auto itor = find<WALKER_TYPE *>(commands.begin(), commands.end());
ASSERT_NE(itor, commands.end()); ASSERT_NE(itor, commands.end());

View File

@@ -63,6 +63,7 @@ PVCTEST_F(CommandEncodeStatesPvcTest, GivenSmallSlmTotalSizesWhenSetAdditionalIn
using EncodeKernelPvcTest = Test<CommandEncodeStatesFixture>; using EncodeKernelPvcTest = Test<CommandEncodeStatesFixture>;
PVCTEST_F(EncodeKernelPvcTest, givenRevisionBAndAboveWhenSpecialModeRequiredThenDontReprogramPipelineSelect) { PVCTEST_F(EncodeKernelPvcTest, givenRevisionBAndAboveWhenSpecialModeRequiredThenDontReprogramPipelineSelect) {
using WALKER_TYPE = typename FamilyType::WALKER_TYPE;
bool requiresUncachedMocs = false; bool requiresUncachedMocs = false;
auto hwInfo = pDevice->getRootDeviceEnvironment().getMutableHardwareInfo(); auto hwInfo = pDevice->getRootDeviceEnvironment().getMutableHardwareInfo();
@@ -92,7 +93,7 @@ PVCTEST_F(EncodeKernelPvcTest, givenRevisionBAndAboveWhenSpecialModeRequiredThen
EncodeDispatchKernelArgs dispatchArgs = createDefaultDispatchKernelArgs(pDevice, dispatchInterface.get(), dims, requiresUncachedMocs); EncodeDispatchKernelArgs dispatchArgs = createDefaultDispatchKernelArgs(pDevice, dispatchInterface.get(), dims, requiresUncachedMocs);
dispatchArgs.preemptionMode = NEO::PreemptionMode::Initial; dispatchArgs.preemptionMode = NEO::PreemptionMode::Initial;
EncodeDispatchKernel<FamilyType>::encode(*cmdContainer.get(), dispatchArgs); EncodeDispatchKernel<FamilyType>::template encode<WALKER_TYPE>(*cmdContainer.get(), dispatchArgs);
EXPECT_EQ(testInput.expectedValue, cmdContainer->lastPipelineSelectModeRequiredRef()); EXPECT_EQ(testInput.expectedValue, cmdContainer->lastPipelineSelectModeRequiredRef());
} }
} }
@@ -195,7 +196,7 @@ PVCTEST_F(EncodeKernelPvcTest, givenDefaultSettingForFenceAsPostSyncOperationInC
dispatchArgs.isKernelUsingSystemAllocation = true; dispatchArgs.isKernelUsingSystemAllocation = true;
dispatchArgs.isHostScopeSignalEvent = true; dispatchArgs.isHostScopeSignalEvent = true;
EncodeDispatchKernel<FamilyType>::encode(*cmdContainer.get(), dispatchArgs); EncodeDispatchKernel<FamilyType>::template encode<WALKER_TYPE>(*cmdContainer.get(), dispatchArgs);
GenCmdList commands; GenCmdList commands;
CmdParse<FamilyType>::parseCommandBuffer(commands, ptrOffset(cmdContainer->getCommandStream()->getCpuBase(), 0), cmdContainer->getCommandStream()->getUsed()); CmdParse<FamilyType>::parseCommandBuffer(commands, ptrOffset(cmdContainer->getCommandStream()->getCpuBase(), 0), cmdContainer->getCommandStream()->getUsed());

View File

@@ -319,7 +319,7 @@ XE_HPC_CORETEST_F(EncodeKernelXeHpcCoreTest, givenNoFenceAsPostSyncOperationInCo
bool requiresUncachedMocs = false; bool requiresUncachedMocs = false;
EncodeDispatchKernelArgs dispatchArgs = createDefaultDispatchKernelArgs(pDevice, dispatchInterface.get(), dims, requiresUncachedMocs); EncodeDispatchKernelArgs dispatchArgs = createDefaultDispatchKernelArgs(pDevice, dispatchInterface.get(), dims, requiresUncachedMocs);
EncodeDispatchKernel<FamilyType>::encode(*cmdContainer.get(), dispatchArgs); EncodeDispatchKernel<FamilyType>::template encode<WALKER_TYPE>(*cmdContainer.get(), dispatchArgs);
GenCmdList commands; GenCmdList commands;
CmdParse<FamilyType>::parseCommandBuffer(commands, ptrOffset(cmdContainer->getCommandStream()->getCpuBase(), 0), cmdContainer->getCommandStream()->getUsed()); CmdParse<FamilyType>::parseCommandBuffer(commands, ptrOffset(cmdContainer->getCommandStream()->getCpuBase(), 0), cmdContainer->getCommandStream()->getUsed());
@@ -345,7 +345,7 @@ XE_HPC_CORETEST_F(EncodeKernelXeHpcCoreTest, givenFenceAsPostSyncOperationInComp
bool requiresUncachedMocs = false; bool requiresUncachedMocs = false;
EncodeDispatchKernelArgs dispatchArgs = createDefaultDispatchKernelArgs(pDevice, dispatchInterface.get(), dims, requiresUncachedMocs); EncodeDispatchKernelArgs dispatchArgs = createDefaultDispatchKernelArgs(pDevice, dispatchInterface.get(), dims, requiresUncachedMocs);
EncodeDispatchKernel<FamilyType>::encode(*cmdContainer.get(), dispatchArgs); EncodeDispatchKernel<FamilyType>::template encode<WALKER_TYPE>(*cmdContainer.get(), dispatchArgs);
GenCmdList commands; GenCmdList commands;
CmdParse<FamilyType>::parseCommandBuffer(commands, ptrOffset(cmdContainer->getCommandStream()->getCpuBase(), 0), cmdContainer->getCommandStream()->getUsed()); CmdParse<FamilyType>::parseCommandBuffer(commands, ptrOffset(cmdContainer->getCommandStream()->getCpuBase(), 0), cmdContainer->getCommandStream()->getUsed());
@@ -378,7 +378,7 @@ XE_HPC_CORETEST_F(EncodeKernelXeHpcCoreTest, givenDefaultSettingForFenceWhenKern
EncodeDispatchKernelArgs dispatchArgs = createDefaultDispatchKernelArgs(pDevice, dispatchInterface.get(), dims, requiresUncachedMocs); EncodeDispatchKernelArgs dispatchArgs = createDefaultDispatchKernelArgs(pDevice, dispatchInterface.get(), dims, requiresUncachedMocs);
dispatchArgs.isKernelUsingSystemAllocation = true; dispatchArgs.isKernelUsingSystemAllocation = true;
EncodeDispatchKernel<FamilyType>::encode(*cmdContainer.get(), dispatchArgs); EncodeDispatchKernel<FamilyType>::template encode<WALKER_TYPE>(*cmdContainer.get(), dispatchArgs);
GenCmdList commands; GenCmdList commands;
CmdParse<FamilyType>::parseCommandBuffer( CmdParse<FamilyType>::parseCommandBuffer(
@@ -414,7 +414,7 @@ XE_HPC_CORETEST_F(EncodeKernelXeHpcCoreTest, givenDefaultSettingForFenceWhenEven
EncodeDispatchKernelArgs dispatchArgs = createDefaultDispatchKernelArgs(pDevice, dispatchInterface.get(), dims, requiresUncachedMocs); EncodeDispatchKernelArgs dispatchArgs = createDefaultDispatchKernelArgs(pDevice, dispatchInterface.get(), dims, requiresUncachedMocs);
dispatchArgs.isHostScopeSignalEvent = true; dispatchArgs.isHostScopeSignalEvent = true;
EncodeDispatchKernel<FamilyType>::encode(*cmdContainer.get(), dispatchArgs); EncodeDispatchKernel<FamilyType>::template encode<WALKER_TYPE>(*cmdContainer.get(), dispatchArgs);
GenCmdList commands; GenCmdList commands;
CmdParse<FamilyType>::parseCommandBuffer( CmdParse<FamilyType>::parseCommandBuffer(
@@ -450,7 +450,7 @@ XE_HPC_CORETEST_F(EncodeKernelXeHpcCoreTest, givenDefaultSettingForFenceWhenKern
dispatchArgs.isKernelUsingSystemAllocation = true; dispatchArgs.isKernelUsingSystemAllocation = true;
dispatchArgs.isHostScopeSignalEvent = true; dispatchArgs.isHostScopeSignalEvent = true;
EncodeDispatchKernel<FamilyType>::encode(*cmdContainer.get(), dispatchArgs); EncodeDispatchKernel<FamilyType>::template encode<WALKER_TYPE>(*cmdContainer.get(), dispatchArgs);
GenCmdList commands; GenCmdList commands;
CmdParse<FamilyType>::parseCommandBuffer(commands, ptrOffset(cmdContainer->getCommandStream()->getCpuBase(), 0), cmdContainer->getCommandStream()->getUsed()); CmdParse<FamilyType>::parseCommandBuffer(commands, ptrOffset(cmdContainer->getCommandStream()->getCpuBase(), 0), cmdContainer->getCommandStream()->getUsed());
@@ -464,6 +464,7 @@ XE_HPC_CORETEST_F(EncodeKernelXeHpcCoreTest, givenDefaultSettingForFenceWhenKern
} }
XE_HPC_CORETEST_F(EncodeKernelXeHpcCoreTest, givenCleanHeapsAndSlmNotChangedAndUncachedMocsRequestedThenSBAIsProgrammedAndMocsAreSet) { XE_HPC_CORETEST_F(EncodeKernelXeHpcCoreTest, givenCleanHeapsAndSlmNotChangedAndUncachedMocsRequestedThenSBAIsProgrammedAndMocsAreSet) {
using WALKER_TYPE = typename FamilyType::WALKER_TYPE;
uint32_t dims[] = {2, 1, 1}; uint32_t dims[] = {2, 1, 1};
std::unique_ptr<MockDispatchKernelEncoder> dispatchInterface(new MockDispatchKernelEncoder()); std::unique_ptr<MockDispatchKernelEncoder> dispatchInterface(new MockDispatchKernelEncoder());
cmdContainer->slmSizeRef() = 1; cmdContainer->slmSizeRef() = 1;
@@ -473,7 +474,7 @@ XE_HPC_CORETEST_F(EncodeKernelXeHpcCoreTest, givenCleanHeapsAndSlmNotChangedAndU
bool requiresUncachedMocs = true; bool requiresUncachedMocs = true;
EncodeDispatchKernelArgs dispatchArgs = createDefaultDispatchKernelArgs(pDevice, dispatchInterface.get(), dims, requiresUncachedMocs); EncodeDispatchKernelArgs dispatchArgs = createDefaultDispatchKernelArgs(pDevice, dispatchInterface.get(), dims, requiresUncachedMocs);
EncodeDispatchKernel<FamilyType>::encode(*cmdContainer.get(), dispatchArgs); EncodeDispatchKernel<FamilyType>::template encode<WALKER_TYPE>(*cmdContainer.get(), dispatchArgs);
GenCmdList commands; GenCmdList commands;
CmdParse<FamilyType>::parseCommandBuffer(commands, ptrOffset(cmdContainer->getCommandStream()->getCpuBase(), 0), cmdContainer->getCommandStream()->getUsed()); CmdParse<FamilyType>::parseCommandBuffer(commands, ptrOffset(cmdContainer->getCommandStream()->getCpuBase(), 0), cmdContainer->getCommandStream()->getUsed());