feature: Add heapless mode programming in l0 1/n

Related-To: NEO-7621
Signed-off-by: Kamil Kopryk <kamil.kopryk@intel.com>
This commit is contained in:
Kamil Kopryk
2023-12-01 14:30:28 +00:00
committed by Compute-Runtime-Automation
parent 7a17df93a6
commit aa950a4a96
31 changed files with 295 additions and 175 deletions

View File

@@ -445,6 +445,7 @@ struct CommandList : _ze_command_list_handle_t {
bool dispatchCmdListBatchBufferAsPrimary = false;
bool copyThroughLockedPtrEnabled = false;
bool useOnlyGlobalTimestamps = false;
bool heaplessModeEnabled = false;
};
using CommandListAllocatorFn = CommandList *(*)(uint32_t);

View File

@@ -16,6 +16,7 @@
#include "shared/source/helpers/api_specific_config.h"
#include "shared/source/helpers/blit_commands_helper.h"
#include "shared/source/helpers/blit_properties.h"
#include "shared/source/helpers/compiler_product_helper_base.inl"
#include "shared/source/helpers/definitions/command_encoder_args.h"
#include "shared/source/helpers/gfx_core_helper.h"
#include "shared/source/helpers/hw_info.h"
@@ -210,6 +211,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::initialize(Device *device, NEO
auto &rootDeviceEnvironment = neoDevice->getRootDeviceEnvironment();
auto &productHelper = rootDeviceEnvironment.getHelper<NEO::ProductHelper>();
auto &gfxCoreHelper = neoDevice->getGfxCoreHelper();
auto &compilerProductHelper = neoDevice->getCompilerProductHelper();
auto gmmHelper = rootDeviceEnvironment.getGmmHelper();
this->dcFlushSupport = NEO::MemorySynchronizationCommands<GfxFamily>::getDcFlushEnable(true, rootDeviceEnvironment);
@@ -230,7 +232,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::initialize(Device *device, NEO
this->dispatchCmdListBatchBufferAsPrimary = L0GfxCoreHelper::dispatchCmdListBatchBufferAsPrimary(rootDeviceEnvironment, this->cmdListType == CommandListType::TYPE_REGULAR);
this->useOnlyGlobalTimestamps = gfxCoreHelper.useOnlyGlobalTimestamps();
this->maxFillPaternSizeForCopyEngine = gfxCoreHelper.getMaxFillPaternSizeForCopyEngine();
this->heaplessModeEnabled = compilerProductHelper.isHeaplessModeEnabled();
this->requiredStreamState.initSupport(rootDeviceEnvironment);
this->finalStreamState.initSupport(rootDeviceEnvironment);
@@ -3179,7 +3181,10 @@ void CommandListCoreFamily<gfxCoreFamily>::programStateBaseAddress(NEO::CommandC
false, // useGlobalAtomics
this->partitionCount > 1, // multiOsContextCapable
isRcs, // isRcs
this->doubleSbaWa}; // doubleSbaWa
this->doubleSbaWa, // doubleSbaWa
this->heaplessModeEnabled // heaplessModeEnabled
};
NEO::EncodeStateBaseAddress<GfxFamily>::encode(encodeStateBaseAddressArgs);
bool sbaTrackingEnabled = NEO::Debugger::isDebugEnabled(this->internalUsage) && this->device->getL0Debugger();

View File

@@ -212,10 +212,11 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernelWithParams(K
false, // isKernelUsingSystemAllocation
cmdListType == CommandListType::TYPE_IMMEDIATE, // isKernelDispatchedFromImmediateCmdList
engineGroupType == NEO::EngineGroupType::renderCompute, // isRcs
this->dcFlushSupport // dcFlushEnable
this->dcFlushSupport, // dcFlushEnable
this->heaplessModeEnabled // isHeaplessModeEnabled
};
NEO::EncodeDispatchKernel<GfxFamily>::encode(commandContainer, dispatchKernelArgs);
NEO::EncodeDispatchKernel<GfxFamily>::encodeCommon(commandContainer, dispatchKernelArgs);
if (!this->isFlushTaskSubmissionEnabled) {
this->containsStatelessUncachedResource = dispatchKernelArgs.requiresUncachedMocs;
}

View File

@@ -298,7 +298,8 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernelWithParams(K
isKernelUsingSystemAllocation, // isKernelUsingSystemAllocation
cmdListType == CommandListType::TYPE_IMMEDIATE, // isKernelDispatchedFromImmediateCmdList
engineGroupType == NEO::EngineGroupType::renderCompute, // isRcs
this->dcFlushSupport // dcFlushEnable
this->dcFlushSupport, // dcFlushEnable
this->heaplessModeEnabled // isHeaplessModeEnabled
};
bool inOrderExecSignalRequired = (this->isInOrderExecutionEnabled() && !launchParams.isKernelSplitOperation);
@@ -313,7 +314,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernelWithParams(K
}
}
NEO::EncodeDispatchKernel<GfxFamily>::encode(commandContainer, dispatchKernelArgs);
NEO::EncodeDispatchKernel<GfxFamily>::encodeCommon(commandContainer, dispatchKernelArgs);
if (!this->isFlushTaskSubmissionEnabled) {
this->containsStatelessUncachedResource = dispatchKernelArgs.requiresUncachedMocs;

View File

@@ -161,6 +161,7 @@ HWTEST_F(CommandListAppendLaunchKernel, givenKernelWithThreadArbitrationPolicySe
HWTEST2_F(CommandListAppendLaunchKernel, givenNotEnoughSpaceInCommandStreamWhenAppendingKernelThenBbEndIsAddedAndNewCmdBufferAllocated, IsAtLeastSkl) {
using MI_BATCH_BUFFER_END = typename FamilyType::MI_BATCH_BUFFER_END;
using WALKER_TYPE = typename FamilyType::WALKER_TYPE;
DebugManagerStateRestore restorer;
debugManager.flags.DispatchCmdlistCmdBufferPrimary.set(0);
@@ -207,7 +208,7 @@ HWTEST2_F(CommandListAppendLaunchKernel, givenNotEnoughSpaceInCommandStreamWhenA
false, // isRcs
commandList->getDcFlushRequired(true) // dcFlushEnable
};
NEO::EncodeDispatchKernel<FamilyType>::encode(commandContainer, dispatchKernelArgs);
NEO::EncodeDispatchKernel<FamilyType>::template encode<WALKER_TYPE>(commandContainer, dispatchKernelArgs);
auto usedSpaceAfter = commandContainer.getCommandStream()->getUsed();
ASSERT_GT(usedSpaceAfter, 0u);

View File

@@ -616,6 +616,8 @@ HWTEST2_F(CommandListAppendLaunchKernel, givenNotEnoughSpaceInCommandStreamWhenA
DebugManagerStateRestore restorer;
NEO::debugManager.flags.EnableFlushTaskSubmission.set(0);
using MI_BATCH_BUFFER_END = typename FamilyType::MI_BATCH_BUFFER_END;
using WALKER_TYPE = typename FamilyType::WALKER_TYPE;
createKernel();
ze_result_t returnValue;
@@ -657,7 +659,7 @@ HWTEST2_F(CommandListAppendLaunchKernel, givenNotEnoughSpaceInCommandStreamWhenA
false, // isRcs
commandList->getDcFlushRequired(true) // dcFlushEnable
};
EXPECT_THROW(NEO::EncodeDispatchKernel<FamilyType>::encode(commandContainer, dispatchKernelArgs), std::exception);
EXPECT_THROW(NEO::EncodeDispatchKernel<FamilyType>::template encode<WALKER_TYPE>(commandContainer, dispatchKernelArgs), std::exception);
}
HWTEST_F(CommandListAppendLaunchKernel, givenInvalidKernelWhenAppendingThenReturnErrorInvalidArgument) {

View File

@@ -105,12 +105,7 @@ size_t HardwareCommandsHelper<GfxFamily>::sendCrossThreadData(
uint32_t sizeToCopy = sizeCrossThreadData;
if (inlineDataProgrammingRequired == true) {
using InlineData = typename GfxFamily::INLINE_DATA;
uint32_t inlineDataSize = sizeof(InlineData);
if constexpr (heaplessModeEnabled) {
inlineDataSize = 64;
}
constexpr uint32_t inlineDataSize = WalkerType::getInlineDataSize();
sizeToCopy = std::min(inlineDataSize, sizeCrossThreadData);
dest = reinterpret_cast<char *>(walkerCmd->getInlineDataPointer());

View File

@@ -65,6 +65,7 @@ struct EncodeDispatchKernelArgs {
bool isKernelDispatchedFromImmediateCmdList = false;
bool isRcs = false;
bool dcFlushEnable = false;
bool isHeaplessModeEnabled = false;
};
enum class MiPredicateType : uint32_t {
@@ -94,6 +95,9 @@ struct EncodeDispatchKernel {
using INTERFACE_DESCRIPTOR_DATA = typename GfxFamily::INTERFACE_DESCRIPTOR_DATA;
using BINDING_TABLE_STATE = typename GfxFamily::BINDING_TABLE_STATE;
static void encodeCommon(CommandContainer &container, EncodeDispatchKernelArgs &args);
template <typename WalkerType>
static void encode(CommandContainer &container, EncodeDispatchKernelArgs &args);
template <typename WalkerType>
@@ -103,6 +107,21 @@ struct EncodeDispatchKernel {
static void appendAdditionalIDDFields(InterfaceDescriptorType *pInterfaceDescriptor, const RootDeviceEnvironment &rootDeviceEnvironment,
const uint32_t threadsPerThreadGroup, uint32_t slmTotalSize, SlmPolicy slmPolicy);
template <typename WalkerType>
static void encodeThreadData(WalkerType &walkerCmd,
const uint32_t *startWorkGroup,
const uint32_t *numWorkGroups,
const uint32_t *workGroupSizes,
uint32_t simd,
uint32_t localIdDimensions,
uint32_t threadsPerThreadGroup,
uint32_t threadExecutionMask,
bool localIdsGenerationByRuntime,
bool inlineDataProgrammingRequired,
bool isIndirect,
uint32_t requiredWorkGroupOrder,
const RootDeviceEnvironment &rootDeviceEnvironment);
template <typename InterfaceDescriptorType>
static void setGrfInfo(InterfaceDescriptorType *pInterfaceDescriptor, uint32_t numGrf, const size_t &sizeCrossThreadData,
const size_t &sizePerThreadData, const RootDeviceEnvironment &rootDeviceEnvironment);
@@ -118,20 +137,6 @@ struct EncodeDispatchKernel {
static bool inlineDataProgrammingRequired(const KernelDescriptor &kernelDesc);
static void encodeThreadData(WALKER_TYPE &walkerCmd,
const uint32_t *startWorkGroup,
const uint32_t *numWorkGroups,
const uint32_t *workGroupSizes,
uint32_t simd,
uint32_t localIdDimensions,
uint32_t threadsPerThreadGroup,
uint32_t threadExecutionMask,
bool localIdsGenerationByRuntime,
bool inlineDataProgrammingRequired,
bool isIndirect,
uint32_t requiredWorkGroupOrder,
const RootDeviceEnvironment &rootDeviceEnvironment);
template <typename InterfaceDescriptorType>
static void programBarrierEnable(InterfaceDescriptorType &interfaceDescriptor, uint32_t value, const HardwareInfo &hwInfo);
@@ -146,7 +151,8 @@ struct EncodeDispatchKernel {
template <typename WalkerType>
static void setupPostSyncMocs(WalkerType &walkerCmd, const RootDeviceEnvironment &rootDeviceEnvironment, bool dcFlush);
static void adjustWalkOrder(WALKER_TYPE &walkerCmd, uint32_t requiredWorkGroupOrder, const RootDeviceEnvironment &rootDeviceEnvironment);
template <typename WalkerType>
static void adjustWalkOrder(WalkerType &walkerCmd, uint32_t requiredWorkGroupOrder, const RootDeviceEnvironment &rootDeviceEnvironment);
static size_t getSizeRequiredDsh(const KernelDescriptor &kernelDescriptor, uint32_t iddCount);
static size_t getSizeRequiredSsh(const KernelInfo &kernelInfo);

View File

@@ -537,6 +537,12 @@ template <typename Family>
void EncodeSurfaceState<Family>::appendParamsForImageFromBuffer(R_SURFACE_STATE *surfaceState) {
}
template <typename Family>
inline void EncodeDispatchKernel<Family>::encodeCommon(CommandContainer &container, EncodeDispatchKernelArgs &args) {
using WALKER_TYPE = typename Family::WALKER_TYPE;
EncodeDispatchKernel<Family>::template encode<WALKER_TYPE>(container, args);
}
template <typename Family>
void *EncodeDispatchKernel<Family>::getInterfaceDescriptor(CommandContainer &container, IndirectHeap *childDsh, uint32_t &iddOffset) {

View File

@@ -48,6 +48,7 @@ void EncodeDispatchKernel<Family>::setGrfInfo(InterfaceDescriptorType *pInterfac
}
template <typename Family>
template <typename WalkerType>
void EncodeDispatchKernel<Family>::encode(CommandContainer &container, EncodeDispatchKernelArgs &args) {
using MEDIA_STATE_FLUSH = typename Family::MEDIA_STATE_FLUSH;
@@ -347,7 +348,8 @@ inline bool EncodeDispatchKernel<Family>::isRuntimeLocalIdsGenerationRequired(ui
}
template <typename Family>
void EncodeDispatchKernel<Family>::encodeThreadData(WALKER_TYPE &walkerCmd,
template <typename WalkerType>
void EncodeDispatchKernel<Family>::encodeThreadData(WalkerType &walkerCmd,
const uint32_t *startWorkGroup,
const uint32_t *numWorkGroups,
const uint32_t *workGroupSizes,
@@ -375,7 +377,7 @@ void EncodeDispatchKernel<Family>::encodeThreadData(WALKER_TYPE &walkerCmd,
walkerCmd.setThreadGroupIdStartingResumeZ(static_cast<uint32_t>(startWorkGroup[2]));
}
walkerCmd.setSimdSize(getSimdConfig<WALKER_TYPE>(simd));
walkerCmd.setSimdSize(getSimdConfig<WalkerType>(simd));
auto localWorkSize = static_cast<uint32_t>(workGroupSizes[0] * workGroupSizes[1] * workGroupSizes[2]);
if (threadsPerThreadGroup == 0) {
@@ -603,7 +605,8 @@ template <typename WalkerType>
void EncodeDispatchKernel<Family>::setupPostSyncMocs(WalkerType &walkerCmd, const RootDeviceEnvironment &rootDeviceEnvironment, bool dcFlush) {}
template <typename Family>
void EncodeDispatchKernel<Family>::adjustWalkOrder(WALKER_TYPE &walkerCmd, uint32_t requiredWorkGroupOrder, const RootDeviceEnvironment &rootDeviceEnvironment) {}
template <typename WalkerType>
void EncodeDispatchKernel<Family>::adjustWalkOrder(WalkerType &walkerCmd, uint32_t requiredWorkGroupOrder, const RootDeviceEnvironment &rootDeviceEnvironment) {}
template <typename Family>
size_t EncodeDispatchKernel<Family>::additionalSizeRequiredDsh(uint32_t iddCount) {

View File

@@ -44,11 +44,12 @@ void EncodeDispatchKernel<Family>::setGrfInfo(InterfaceDescriptorType *pInterfac
}
template <typename Family>
template <typename WalkerType>
void EncodeDispatchKernel<Family>::encode(CommandContainer &container, EncodeDispatchKernelArgs &args) {
using SHARED_LOCAL_MEMORY_SIZE = typename Family::INTERFACE_DESCRIPTOR_DATA::SHARED_LOCAL_MEMORY_SIZE;
using SHARED_LOCAL_MEMORY_SIZE = typename WalkerType::InterfaceDescriptorType::SHARED_LOCAL_MEMORY_SIZE;
using STATE_BASE_ADDRESS = typename Family::STATE_BASE_ADDRESS;
using INLINE_DATA = typename Family::INLINE_DATA;
constexpr bool heaplessModeEnabled = Family::template isHeaplessMode<WalkerType>();
const HardwareInfo &hwInfo = args.device->getHardwareInfo();
auto &rootDeviceEnvironment = args.device->getRootDeviceEnvironment();
@@ -73,7 +74,7 @@ void EncodeDispatchKernel<Family>::encode(CommandContainer &container, EncodeDis
EncodeComputeMode<Family>::adjustPipelineSelect(container, kernelDescriptor);
}
WALKER_TYPE walkerCmd = Family::cmdInitGpgpuWalker;
WalkerType walkerCmd = Family::template getInitGpuWalker<WalkerType>();
auto &idd = walkerCmd.getInterfaceDescriptor();
EncodeDispatchKernel<Family>::setGrfInfo(&idd, kernelDescriptor.kernelAttributes.numGrfRequired, sizeCrossThreadData,
@@ -85,11 +86,20 @@ void EncodeDispatchKernel<Family>::encode(CommandContainer &container, EncodeDis
{
auto alloc = args.dispatchInterface->getIsaAllocation();
UNRECOVERABLE_IF(nullptr == alloc);
auto offset = alloc->getGpuAddressToPatch() + args.dispatchInterface->getIsaOffsetInParentAllocation();
if (!localIdsGenerationByRuntime) {
offset += kernelDescriptor.entryPoints.skipPerThreadDataLoad;
if constexpr (heaplessModeEnabled) {
auto address = alloc->getGpuAddress() + args.dispatchInterface->getIsaOffsetInParentAllocation();
if (!localIdsGenerationByRuntime) {
address += kernelDescriptor.entryPoints.skipPerThreadDataLoad;
}
idd.setKernelStartPointer(address);
} else {
auto offset = alloc->getGpuAddressToPatch() + args.dispatchInterface->getIsaOffsetInParentAllocation();
if (!localIdsGenerationByRuntime) {
offset += kernelDescriptor.entryPoints.skipPerThreadDataLoad;
}
idd.setKernelStartPointer(offset);
}
idd.setKernelStartPointer(offset);
}
if (args.dispatchInterface->getKernelDescriptor().kernelAttributes.flags.usesAssert && args.device->getL0Debugger() != nullptr) {
idd.setSoftwareExceptionEnable(1);
@@ -112,32 +122,17 @@ void EncodeDispatchKernel<Family>::encode(CommandContainer &container, EncodeDis
idd.setSharedLocalMemorySize(slmSize);
auto bindingTableStateCount = kernelDescriptor.payloadMappings.bindingTable.numEntries;
bool skipSshProgramming = false;
bool sshProgrammingRequired = true;
auto &productHelper = args.device->getProductHelper();
if (productHelper.isSkippingStatefulInformationRequired(kernelDescriptor)) {
bindingTableStateCount = 0u;
skipSshProgramming = true;
sshProgrammingRequired = false;
}
uint32_t bindingTablePointer = 0u;
bool isBindlessKernel = NEO::KernelDescriptor::isBindlessAddressingKernel(kernelDescriptor);
if (!skipSshProgramming) {
if (!isBindlessKernel) {
container.prepareBindfulSsh();
if (bindingTableStateCount > 0u) {
auto ssh = args.surfaceStateHeap;
if (ssh == nullptr) {
ssh = container.getHeapWithRequiredSizeAndAlignment(HeapType::SURFACE_STATE, args.dispatchInterface->getSurfaceStateHeapDataSize(), BINDING_TABLE_STATE::SURFACESTATEPOINTER_ALIGN_SIZE);
}
bindingTablePointer = static_cast<uint32_t>(EncodeSurfaceState<Family>::pushBindingTableAndSurfaceStates(
*ssh,
args.dispatchInterface->getSurfaceStateHeapData(),
args.dispatchInterface->getSurfaceStateHeapDataSize(), bindingTableStateCount,
kernelDescriptor.payloadMappings.bindingTable.tableOffset));
}
} else {
if (sshProgrammingRequired) {
bool isBindlessKernel = NEO::KernelDescriptor::isBindlessAddressingKernel(kernelDescriptor);
if (isBindlessKernel) {
bool globalBindlessSsh = args.device->getBindlessHeapsHelper() != nullptr;
auto sshHeapSize = args.dispatchInterface->getSurfaceStateHeapDataSize();
@@ -156,15 +151,32 @@ void EncodeDispatchKernel<Family>::encode(CommandContainer &container, EncodeDis
memcpy_s(dstSurfaceState, sshHeapSize, args.dispatchInterface->getSurfaceStateHeapData(), sshHeapSize);
args.dispatchInterface->patchBindlessOffsetsInCrossThreadData(bindlessSshBaseOffset);
}
} else {
if constexpr (heaplessModeEnabled == false) {
container.prepareBindfulSsh();
if (bindingTableStateCount > 0u) {
auto ssh = args.surfaceStateHeap;
if (ssh == nullptr) {
ssh = container.getHeapWithRequiredSizeAndAlignment(HeapType::SURFACE_STATE, args.dispatchInterface->getSurfaceStateHeapDataSize(), BINDING_TABLE_STATE::SURFACESTATEPOINTER_ALIGN_SIZE);
}
auto bindingTablePointer = static_cast<uint32_t>(EncodeSurfaceState<Family>::pushBindingTableAndSurfaceStates(
*ssh,
args.dispatchInterface->getSurfaceStateHeapData(),
args.dispatchInterface->getSurfaceStateHeapDataSize(), bindingTableStateCount,
kernelDescriptor.payloadMappings.bindingTable.tableOffset));
idd.setBindingTablePointer(bindingTablePointer);
}
}
}
}
idd.setBindingTablePointer(bindingTablePointer);
PreemptionHelper::programInterfaceDescriptorDataPreemption<Family>(&idd, args.preemptionMode);
uint32_t samplerCount = 0;
if constexpr (Family::supportsSampler) {
if constexpr (Family::supportsSampler && heaplessModeEnabled == false) {
if (args.device->getDeviceInfo().imageSupport) {
uint32_t samplerStateOffset = 0;
@@ -188,10 +200,12 @@ void EncodeDispatchKernel<Family>::encode(CommandContainer &container, EncodeDis
}
}
EncodeDispatchKernel<Family>::adjustBindingTablePrefetch(idd, samplerCount, bindingTableStateCount);
if constexpr (heaplessModeEnabled == false) {
EncodeDispatchKernel<Family>::adjustBindingTablePrefetch(idd, samplerCount, bindingTableStateCount);
}
uint64_t offsetThreadData = 0u;
const uint32_t inlineDataSize = sizeof(INLINE_DATA);
constexpr uint32_t inlineDataSize = WalkerType::getInlineDataSize();
auto crossThreadData = args.dispatchInterface->getCrossThreadData();
uint32_t inlineDataProgrammingOffset = 0u;
@@ -231,6 +245,7 @@ void EncodeDispatchKernel<Family>::encode(CommandContainer &container, EncodeDis
memcpy_s(ptr, sizeCrossThreadData,
crossThreadData, sizeCrossThreadData);
}
if (args.isIndirect) {
auto gpuPtr = heap->getGraphicsAllocation()->getGpuAddress() + static_cast<uint64_t>(heap->getUsed() - sizeThreadData - inlineDataProgrammingOffset);
uint64_t implicitArgsGpuPtr = 0u;
@@ -272,7 +287,7 @@ void EncodeDispatchKernel<Family>::encode(CommandContainer &container, EncodeDis
args.partitionCount > 1, // multiOsContextCapable
args.isRcs, // isRcs
container.doubleSbaWaRef(), // doubleSbaWa
false, // heaplessModeEnabled
heaplessModeEnabled, // heaplessModeEnabled
};
EncodeStateBaseAddress<Family>::encode(encodeStateBaseAddressArgs);
container.setDirtyStateForAllHeaps(false);
@@ -285,8 +300,17 @@ void EncodeDispatchKernel<Family>::encode(CommandContainer &container, EncodeDis
EncodeSemaphore<Family>::applyMiSemaphoreWaitCommand(*listCmdBufferStream, *args.additionalCommands);
}
walkerCmd.setIndirectDataStartAddress(static_cast<uint32_t>(offsetThreadData));
walkerCmd.setIndirectDataLength(sizeThreadData);
if constexpr (heaplessModeEnabled) {
auto inlineDataPointer = reinterpret_cast<char *>(walkerCmd.getInlineDataPointer());
auto indirectDataPointerAddress = kernelDescriptor.payloadMappings.implicitArgs.indirectDataPointerAddress;
auto heap = container.getIndirectHeap(HeapType::INDIRECT_OBJECT);
auto address = heap->getHeapGpuBase() + offsetThreadData;
std::memcpy(inlineDataPointer + indirectDataPointerAddress.offset, &address, indirectDataPointerAddress.pointerSize);
} else {
walkerCmd.setIndirectDataStartAddress(static_cast<uint32_t>(offsetThreadData));
walkerCmd.setIndirectDataLength(sizeThreadData);
}
EncodeDispatchKernel<Family>::encodeThreadData(walkerCmd,
nullptr,
@@ -377,7 +401,7 @@ void EncodeDispatchKernel<Family>::encode(CommandContainer &container, EncodeDis
hwInfo);
} else {
args.partitionCount = 1;
auto buffer = listCmdBufferStream->getSpaceForCmd<WALKER_TYPE>();
auto buffer = listCmdBufferStream->getSpaceForCmd<WalkerType>();
args.outWalkerPtr = buffer;
*buffer = walkerCmd;
}
@@ -478,7 +502,8 @@ bool EncodeDispatchKernel<Family>::isRuntimeLocalIdsGenerationRequired(uint32_t
}
template <typename Family>
void EncodeDispatchKernel<Family>::encodeThreadData(WALKER_TYPE &walkerCmd,
template <typename WalkerType>
void EncodeDispatchKernel<Family>::encodeThreadData(WalkerType &walkerCmd,
const uint32_t *startWorkGroup,
const uint32_t *numWorkGroups,
const uint32_t *workGroupSizes,
@@ -517,7 +542,7 @@ void EncodeDispatchKernel<Family>::encodeThreadData(WALKER_TYPE &walkerCmd,
}
walkerCmd.setExecutionMask(static_cast<uint32_t>(executionMask));
walkerCmd.setSimdSize(getSimdConfig<WALKER_TYPE>(simd));
walkerCmd.setSimdSize(getSimdConfig<WalkerType>(simd));
walkerCmd.setMessageSimd(walkerCmd.getSimdSize());
@@ -818,7 +843,8 @@ inline void EncodeStoreMMIO<Family>::appendFlags(MI_STORE_REGISTER_MEM *storeReg
}
template <typename Family>
void EncodeDispatchKernel<Family>::adjustWalkOrder(WALKER_TYPE &walkerCmd, uint32_t requiredWorkGroupOrder, const RootDeviceEnvironment &rootDeviceEnvironment) {}
template <typename WalkerType>
void EncodeDispatchKernel<Family>::adjustWalkOrder(WalkerType &walkerCmd, uint32_t requiredWorkGroupOrder, const RootDeviceEnvironment &rootDeviceEnvironment) {}
template <typename Family>
size_t EncodeDispatchKernel<Family>::additionalSizeRequiredDsh(uint32_t iddCount) {

View File

@@ -79,6 +79,9 @@ template void EncodeDispatchKernel<Family>::appendAdditionalIDDFields<Family::IN
template void EncodeDispatchKernel<Family>::programBarrierEnable<Family::INTERFACE_DESCRIPTOR_DATA>(Family::INTERFACE_DESCRIPTOR_DATA &interfaceDescriptor, uint32_t value, const HardwareInfo &hwInfo);
template void EncodeDispatchKernel<Family>::adjustInterfaceDescriptorData<Family::WALKER_TYPE, Family::INTERFACE_DESCRIPTOR_DATA>(Family::INTERFACE_DESCRIPTOR_DATA &interfaceDescriptor, const Device &device, const HardwareInfo &hwInfo, const uint32_t threadGroupCount, const uint32_t numGrf, Family::WALKER_TYPE &walkerCmd);
template void EncodeDispatchKernel<Family>::setupPostSyncMocs<Family::WALKER_TYPE>(Family::WALKER_TYPE &walkerCmd, const RootDeviceEnvironment &rootDeviceEnvironment, bool dcFlush);
template void EncodeDispatchKernel<Family>::encode<Family::WALKER_TYPE>(CommandContainer &container, EncodeDispatchKernelArgs &args);
template void EncodeDispatchKernel<Family>::encodeThreadData<Family::WALKER_TYPE>(Family::WALKER_TYPE &walkerCmd, const uint32_t *startWorkGroup, const uint32_t *numWorkGroups, const uint32_t *workGroupSizes, uint32_t simd, uint32_t localIdDimensions, uint32_t threadsPerThreadGroup, uint32_t threadExecutionMask, bool localIdsGenerationByRuntime, bool inlineDataProgrammingRequired, bool isIndirect, uint32_t requiredWorkGroupOrder, const RootDeviceEnvironment &rootDeviceEnvironment);
template void EncodeDispatchKernel<Family>::adjustWalkOrder<Family::WALKER_TYPE>(Family::WALKER_TYPE &walkerCmd, uint32_t requiredWorkGroupOrder, const RootDeviceEnvironment &rootDeviceEnvironment);
template struct EncodeStates<Family>;
template struct EncodeMath<Family>;

View File

@@ -119,6 +119,9 @@ template void EncodeDispatchKernel<Family>::appendAdditionalIDDFields<Family::IN
template void EncodeDispatchKernel<Family>::programBarrierEnable<Family::INTERFACE_DESCRIPTOR_DATA>(Family::INTERFACE_DESCRIPTOR_DATA &interfaceDescriptor, uint32_t value, const HardwareInfo &hwInfo);
template void EncodeDispatchKernel<Family>::adjustInterfaceDescriptorData<Family::WALKER_TYPE, Family::INTERFACE_DESCRIPTOR_DATA>(Family::INTERFACE_DESCRIPTOR_DATA &interfaceDescriptor, const Device &device, const HardwareInfo &hwInfo, const uint32_t threadGroupCount, const uint32_t numGrf, Family::WALKER_TYPE &walkerCmd);
template void EncodeDispatchKernel<Family>::setupPostSyncMocs<Family::WALKER_TYPE>(Family::WALKER_TYPE &walkerCmd, const RootDeviceEnvironment &rootDeviceEnvironment, bool dcFlush);
template void EncodeDispatchKernel<Family>::encode<Family::WALKER_TYPE>(CommandContainer &container, EncodeDispatchKernelArgs &args);
template void EncodeDispatchKernel<Family>::encodeThreadData<Family::WALKER_TYPE>(Family::WALKER_TYPE &walkerCmd, const uint32_t *startWorkGroup, const uint32_t *numWorkGroups, const uint32_t *workGroupSizes, uint32_t simd, uint32_t localIdDimensions, uint32_t threadsPerThreadGroup, uint32_t threadExecutionMask, bool localIdsGenerationByRuntime, bool inlineDataProgrammingRequired, bool isIndirect, uint32_t requiredWorkGroupOrder, const RootDeviceEnvironment &rootDeviceEnvironment);
template void EncodeDispatchKernel<Family>::adjustWalkOrder<Family::WALKER_TYPE>(Family::WALKER_TYPE &walkerCmd, uint32_t requiredWorkGroupOrder, const RootDeviceEnvironment &rootDeviceEnvironment);
template struct EncodeStates<Family>;
template struct EncodeMath<Family>;

View File

@@ -59,6 +59,9 @@ template void EncodeDispatchKernel<Family>::appendAdditionalIDDFields<Family::IN
template void EncodeDispatchKernel<Family>::programBarrierEnable<Family::INTERFACE_DESCRIPTOR_DATA>(Family::INTERFACE_DESCRIPTOR_DATA &interfaceDescriptor, uint32_t value, const HardwareInfo &hwInfo);
template void EncodeDispatchKernel<Family>::adjustInterfaceDescriptorData<Family::WALKER_TYPE, Family::INTERFACE_DESCRIPTOR_DATA>(Family::INTERFACE_DESCRIPTOR_DATA &interfaceDescriptor, const Device &device, const HardwareInfo &hwInfo, const uint32_t threadGroupCount, const uint32_t numGrf, Family::WALKER_TYPE &walkerCmd);
template void EncodeDispatchKernel<Family>::setupPostSyncMocs<Family::WALKER_TYPE>(Family::WALKER_TYPE &walkerCmd, const RootDeviceEnvironment &rootDeviceEnvironment, bool dcFlush);
template void EncodeDispatchKernel<Family>::encode<Family::WALKER_TYPE>(CommandContainer &container, EncodeDispatchKernelArgs &args);
template void EncodeDispatchKernel<Family>::encodeThreadData<Family::WALKER_TYPE>(Family::WALKER_TYPE &walkerCmd, const uint32_t *startWorkGroup, const uint32_t *numWorkGroups, const uint32_t *workGroupSizes, uint32_t simd, uint32_t localIdDimensions, uint32_t threadsPerThreadGroup, uint32_t threadExecutionMask, bool localIdsGenerationByRuntime, bool inlineDataProgrammingRequired, bool isIndirect, uint32_t requiredWorkGroupOrder, const RootDeviceEnvironment &rootDeviceEnvironment);
template void EncodeDispatchKernel<Family>::adjustWalkOrder<Family::WALKER_TYPE>(Family::WALKER_TYPE &walkerCmd, uint32_t requiredWorkGroupOrder, const RootDeviceEnvironment &rootDeviceEnvironment);
template struct EncodeStates<Family>;
template struct EncodeMath<Family>;

View File

@@ -64,6 +64,9 @@ template void EncodeDispatchKernel<Family>::appendAdditionalIDDFields<Family::IN
template void EncodeDispatchKernel<Family>::programBarrierEnable<Family::INTERFACE_DESCRIPTOR_DATA>(Family::INTERFACE_DESCRIPTOR_DATA &interfaceDescriptor, uint32_t value, const HardwareInfo &hwInfo);
template void EncodeDispatchKernel<Family>::adjustInterfaceDescriptorData<Family::WALKER_TYPE, Family::INTERFACE_DESCRIPTOR_DATA>(Family::INTERFACE_DESCRIPTOR_DATA &interfaceDescriptor, const Device &device, const HardwareInfo &hwInfo, const uint32_t threadGroupCount, const uint32_t numGrf, Family::WALKER_TYPE &walkerCmd);
template void EncodeDispatchKernel<Family>::setupPostSyncMocs<Family::WALKER_TYPE>(Family::WALKER_TYPE &walkerCmd, const RootDeviceEnvironment &rootDeviceEnvironment, bool dcFlush);
template void EncodeDispatchKernel<Family>::encode<Family::WALKER_TYPE>(CommandContainer &container, EncodeDispatchKernelArgs &args);
template void EncodeDispatchKernel<Family>::encodeThreadData<Family::WALKER_TYPE>(Family::WALKER_TYPE &walkerCmd, const uint32_t *startWorkGroup, const uint32_t *numWorkGroups, const uint32_t *workGroupSizes, uint32_t simd, uint32_t localIdDimensions, uint32_t threadsPerThreadGroup, uint32_t threadExecutionMask, bool localIdsGenerationByRuntime, bool inlineDataProgrammingRequired, bool isIndirect, uint32_t requiredWorkGroupOrder, const RootDeviceEnvironment &rootDeviceEnvironment);
template void EncodeDispatchKernel<Family>::adjustWalkOrder<Family::WALKER_TYPE>(Family::WALKER_TYPE &walkerCmd, uint32_t requiredWorkGroupOrder, const RootDeviceEnvironment &rootDeviceEnvironment);
template struct EncodeStates<Family>;
template struct EncodeMath<Family>;

View File

@@ -490,6 +490,9 @@ typedef struct tagGPGPU_WALKER {
inline uint32_t getBottomExecutionMask() const {
return (TheStructure.Common.BottomExecutionMask);
}
static constexpr uint32_t getInlineDataSize() { // patched
return 0u;
}
using InterfaceDescriptorType = INTERFACE_DESCRIPTOR_DATA; // patched
} GPGPU_WALKER;

View File

@@ -510,7 +510,9 @@ typedef struct tagGPGPU_WALKER {
inline uint32_t getBottomExecutionMask() const {
return TheStructure.Common.BottomExecutionMask;
}
static constexpr uint32_t getInlineDataSize() { // patched
return 0u;
}
using InterfaceDescriptorType = INTERFACE_DESCRIPTOR_DATA; // patched
} GPGPU_WALKER;

View File

@@ -476,6 +476,9 @@ typedef struct tagGPGPU_WALKER {
inline uint32_t getBottomExecutionMask() const {
return (TheStructure.Common.BottomExecutionMask);
}
static constexpr uint32_t getInlineDataSize() { // patched
return 0u;
}
using InterfaceDescriptorType = INTERFACE_DESCRIPTOR_DATA; // patched
} GPGPU_WALKER;
STATIC_ASSERT(60 == sizeof(GPGPU_WALKER));

View File

@@ -485,6 +485,9 @@ typedef struct tagGPGPU_WALKER {
inline uint32_t getBottomExecutionMask() const {
return (TheStructure.Common.BottomExecutionMask);
}
static constexpr uint32_t getInlineDataSize() { // patched
return 0u;
}
using InterfaceDescriptorType = INTERFACE_DESCRIPTOR_DATA; // patched
} GPGPU_WALKER;
STATIC_ASSERT(60 == sizeof(GPGPU_WALKER));

View File

@@ -5808,6 +5808,9 @@ typedef struct tagCOMPUTE_WALKER {
inline uint32_t *getInlineDataPointer() {
return reinterpret_cast<uint32_t *>(&TheStructure.Common.InlineData);
}
static constexpr uint32_t getInlineDataSize() { // patched
return 32u;
}
using InterfaceDescriptorType = INTERFACE_DESCRIPTOR_DATA; // patched
} COMPUTE_WALKER;
STATIC_ASSERT(156 == sizeof(COMPUTE_WALKER));

View File

@@ -5579,6 +5579,9 @@ typedef struct tagCOMPUTE_WALKER {
inline uint32_t *getInlineDataPointer() {
return reinterpret_cast<uint32_t *>(&TheStructure.Common.InlineData);
}
static constexpr uint32_t getInlineDataSize() { // patched
return 32u;
}
using InterfaceDescriptorType = INTERFACE_DESCRIPTOR_DATA; // patched
} COMPUTE_WALKER;
STATIC_ASSERT(156 == sizeof(COMPUTE_WALKER));

View File

@@ -378,6 +378,9 @@ template void EncodeDispatchKernel<Family>::setGrfInfo<Family::INTERFACE_DESCRIP
template void EncodeDispatchKernel<Family>::appendAdditionalIDDFields<Family::INTERFACE_DESCRIPTOR_DATA>(Family::INTERFACE_DESCRIPTOR_DATA *pInterfaceDescriptor, const RootDeviceEnvironment &rootDeviceEnvironment, const uint32_t threadsPerThreadGroup, uint32_t slmTotalSize, SlmPolicy slmPolicy);
template void EncodeDispatchKernel<Family>::adjustInterfaceDescriptorData<Family::WALKER_TYPE, Family::INTERFACE_DESCRIPTOR_DATA>(Family::INTERFACE_DESCRIPTOR_DATA &interfaceDescriptor, const Device &device, const HardwareInfo &hwInfo, const uint32_t threadGroupCount, const uint32_t numGrf, Family::WALKER_TYPE &walkerCmd);
template void EncodeDispatchKernel<Family>::setupPostSyncMocs<Family::WALKER_TYPE>(Family::WALKER_TYPE &walkerCmd, const RootDeviceEnvironment &rootDeviceEnvironment, bool dcFlush);
template void EncodeDispatchKernel<Family>::encode<Family::WALKER_TYPE>(CommandContainer &container, EncodeDispatchKernelArgs &args);
template void EncodeDispatchKernel<Family>::encodeThreadData<Family::WALKER_TYPE>(Family::WALKER_TYPE &walkerCmd, const uint32_t *startWorkGroup, const uint32_t *numWorkGroups, const uint32_t *workGroupSizes, uint32_t simd, uint32_t localIdDimensions, uint32_t threadsPerThreadGroup, uint32_t threadExecutionMask, bool localIdsGenerationByRuntime, bool inlineDataProgrammingRequired, bool isIndirect, uint32_t requiredWorkGroupOrder, const RootDeviceEnvironment &rootDeviceEnvironment);
template void EncodeDispatchKernel<Family>::adjustWalkOrder<Family::WALKER_TYPE>(Family::WALKER_TYPE &walkerCmd, uint32_t requiredWorkGroupOrder, const RootDeviceEnvironment &rootDeviceEnvironment);
template struct EncodeStates<Family>;
template struct EncodeMath<Family>;

View File

@@ -189,13 +189,14 @@ void EncodeSurfaceState<Family>::appendParamsForImageFromBuffer(R_SURFACE_STATE
}
template <>
void EncodeDispatchKernel<Family>::adjustWalkOrder(WALKER_TYPE &walkerCmd, uint32_t requiredWorkGroupOrder, const RootDeviceEnvironment &rootDeviceEnvironment) {
template <typename WalkerType>
void EncodeDispatchKernel<Family>::adjustWalkOrder(WalkerType &walkerCmd, uint32_t requiredWorkGroupOrder, const RootDeviceEnvironment &rootDeviceEnvironment) {
auto &productHelper = rootDeviceEnvironment.template getHelper<ProductHelper>();
if (productHelper.isAdjustWalkOrderAvailable(rootDeviceEnvironment.getReleaseHelper())) {
if (HwWalkOrderHelper::compatibleDimensionOrders[requiredWorkGroupOrder] == HwWalkOrderHelper::linearWalk) {
walkerCmd.setDispatchWalkOrder(WALKER_TYPE::DISPATCH_WALK_ORDER::LINERAR_WALKER);
walkerCmd.setDispatchWalkOrder(WalkerType::DISPATCH_WALK_ORDER::LINERAR_WALKER);
} else if (HwWalkOrderHelper::compatibleDimensionOrders[requiredWorkGroupOrder] == HwWalkOrderHelper::yOrderWalk) {
walkerCmd.setDispatchWalkOrder(WALKER_TYPE::DISPATCH_WALK_ORDER::Y_ORDER_WALKER);
walkerCmd.setDispatchWalkOrder(WalkerType::DISPATCH_WALK_ORDER::Y_ORDER_WALKER);
}
}
}
@@ -228,6 +229,9 @@ template void EncodeDispatchKernel<Family>::setGrfInfo<Family::INTERFACE_DESCRIP
template void EncodeDispatchKernel<Family>::appendAdditionalIDDFields<Family::INTERFACE_DESCRIPTOR_DATA>(Family::INTERFACE_DESCRIPTOR_DATA *pInterfaceDescriptor, const RootDeviceEnvironment &rootDeviceEnvironment, const uint32_t threadsPerThreadGroup, uint32_t slmTotalSize, SlmPolicy slmPolicy);
template void EncodeDispatchKernel<Family>::adjustInterfaceDescriptorData<Family::WALKER_TYPE, Family::INTERFACE_DESCRIPTOR_DATA>(Family::INTERFACE_DESCRIPTOR_DATA &interfaceDescriptor, const Device &device, const HardwareInfo &hwInfo, const uint32_t threadGroupCount, const uint32_t numGrf, Family::WALKER_TYPE &walkerCmd);
template void EncodeDispatchKernel<Family>::setupPostSyncMocs<Family::WALKER_TYPE>(Family::WALKER_TYPE &walkerCmd, const RootDeviceEnvironment &rootDeviceEnvironment, bool dcFlush);
template void EncodeDispatchKernel<Family>::encode<Family::WALKER_TYPE>(CommandContainer &container, EncodeDispatchKernelArgs &args);
template void EncodeDispatchKernel<Family>::encodeThreadData<Family::WALKER_TYPE>(Family::WALKER_TYPE &walkerCmd, const uint32_t *startWorkGroup, const uint32_t *numWorkGroups, const uint32_t *workGroupSizes, uint32_t simd, uint32_t localIdDimensions, uint32_t threadsPerThreadGroup, uint32_t threadExecutionMask, bool localIdsGenerationByRuntime, bool inlineDataProgrammingRequired, bool isIndirect, uint32_t requiredWorkGroupOrder, const RootDeviceEnvironment &rootDeviceEnvironment);
template void EncodeDispatchKernel<Family>::adjustWalkOrder<Family::WALKER_TYPE>(Family::WALKER_TYPE &walkerCmd, uint32_t requiredWorkGroupOrder, const RootDeviceEnvironment &rootDeviceEnvironment);
template struct EncodeStates<Family>;
template struct EncodeMath<Family>;

View File

@@ -236,15 +236,15 @@ HWTEST2_F(CommandEncoderTests, givenRequiredWorkGroupOrderWhenCallAdjustWalkOrde
WALKER_TYPE walkerOnStart{};
uint32_t yOrder = 2u;
EncodeDispatchKernel<FamilyType>::adjustWalkOrder(walkerCmd, yOrder, rootDeviceEnvironment);
EncodeDispatchKernel<FamilyType>::template adjustWalkOrder<WALKER_TYPE>(walkerCmd, yOrder, rootDeviceEnvironment);
EXPECT_EQ(0, memcmp(&walkerOnStart, &walkerCmd, sizeof(WALKER_TYPE))); // no change
uint32_t linearOrder = 0u;
EncodeDispatchKernel<FamilyType>::adjustWalkOrder(walkerCmd, linearOrder, rootDeviceEnvironment);
EncodeDispatchKernel<FamilyType>::template adjustWalkOrder<WALKER_TYPE>(walkerCmd, linearOrder, rootDeviceEnvironment);
EXPECT_EQ(0, memcmp(&walkerOnStart, &walkerCmd, sizeof(WALKER_TYPE))); // no change
uint32_t fakeOrder = 5u;
EncodeDispatchKernel<FamilyType>::adjustWalkOrder(walkerCmd, fakeOrder, rootDeviceEnvironment);
EncodeDispatchKernel<FamilyType>::template adjustWalkOrder<WALKER_TYPE>(walkerCmd, fakeOrder, rootDeviceEnvironment);
EXPECT_EQ(0, memcmp(&walkerOnStart, &walkerCmd, sizeof(WALKER_TYPE))); // no change
}

View File

@@ -46,6 +46,7 @@ TEST_F(CommandEncodeStatesTest, givenCommandConatinerCreatedWithMaxNumAggregateI
}
HWTEST_F(CommandEncodeStatesTest, givenDispatchInterfaceWhenDispatchKernelThenWalkerCommandProgrammed) {
using WALKER_TYPE = typename FamilyType::WALKER_TYPE;
uint32_t dims[] = {2, 1, 1};
std::unique_ptr<MockDispatchKernelEncoder> dispatchInterface(new MockDispatchKernelEncoder());
bool requiresUncachedMocs = false;
@@ -56,17 +57,17 @@ HWTEST_F(CommandEncodeStatesTest, givenDispatchInterfaceWhenDispatchKernelThenWa
dispatchArgs.dynamicStateHeap = cmdContainer->getIndirectHeap(HeapType::DYNAMIC_STATE);
}
EncodeDispatchKernel<FamilyType>::encode(*cmdContainer.get(), dispatchArgs);
EncodeDispatchKernel<FamilyType>::template encode<WALKER_TYPE>(*cmdContainer.get(), dispatchArgs);
GenCmdList commands;
CmdParse<FamilyType>::parseCommandBuffer(commands, ptrOffset(cmdContainer->getCommandStream()->getCpuBase(), 0), cmdContainer->getCommandStream()->getUsed());
using WALKER_TYPE = typename FamilyType::WALKER_TYPE;
auto itorPC = find<WALKER_TYPE *>(commands.begin(), commands.end());
ASSERT_NE(itorPC, commands.end());
}
HWCMDTEST_F(IGFX_XE_HP_CORE, CommandEncodeStatesTest, givenDebugFlagSetWhenProgrammingWalkerThenSetFlushingBits) {
using WALKER_TYPE = typename FamilyType::WALKER_TYPE;
DebugManagerStateRestore restore;
debugManager.flags.ForceComputeWalkerPostSyncFlush.set(1);
@@ -75,12 +76,11 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, CommandEncodeStatesTest, givenDebugFlagSetWhenProgr
bool requiresUncachedMocs = false;
EncodeDispatchKernelArgs dispatchArgs = createDefaultDispatchKernelArgs(pDevice, dispatchInterface.get(), dims, requiresUncachedMocs);
EncodeDispatchKernel<FamilyType>::encode(*cmdContainer.get(), dispatchArgs);
EncodeDispatchKernel<FamilyType>::template encode<WALKER_TYPE>(*cmdContainer.get(), dispatchArgs);
GenCmdList commands;
CmdParse<FamilyType>::parseCommandBuffer(commands, ptrOffset(cmdContainer->getCommandStream()->getCpuBase(), 0), cmdContainer->getCommandStream()->getUsed());
using WALKER_TYPE = typename FamilyType::WALKER_TYPE;
auto itor = find<WALKER_TYPE *>(commands.begin(), commands.end());
ASSERT_NE(itor, commands.end());
@@ -91,6 +91,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, CommandEncodeStatesTest, givenDebugFlagSetWhenProgr
using CommandEncodeStatesUncachedMocsTests = Test<CommandEncodeStatesFixture>;
HWTEST_F(CommandEncodeStatesUncachedMocsTests, whenEncodingDispatchKernelWithUncachedMocsAndDirtyHeapsThenCorrectMocsIsSet) {
using WALKER_TYPE = typename FamilyType::WALKER_TYPE;
DebugManagerStateRestore restore;
debugManager.flags.ForceL1Caching.set(0u);
@@ -100,7 +101,7 @@ HWTEST_F(CommandEncodeStatesUncachedMocsTests, whenEncodingDispatchKernelWithUnc
bool requiresUncachedMocs = true;
EncodeDispatchKernelArgs dispatchArgs = createDefaultDispatchKernelArgs(pDevice, dispatchInterface.get(), dims, requiresUncachedMocs);
EncodeDispatchKernel<FamilyType>::encode(*cmdContainer.get(), dispatchArgs);
EncodeDispatchKernel<FamilyType>::template encode<WALKER_TYPE>(*cmdContainer.get(), dispatchArgs);
GenCmdList commands;
CmdParse<FamilyType>::parseCommandBuffer(commands,
@@ -118,6 +119,7 @@ HWTEST_F(CommandEncodeStatesUncachedMocsTests, whenEncodingDispatchKernelWithUnc
}
HWTEST_F(CommandEncodeStatesUncachedMocsTests, whenEncodingDispatchKernelWithUncachedMocsAndNonDirtyHeapsThenCorrectMocsIsSet) {
using WALKER_TYPE = typename FamilyType::WALKER_TYPE;
DebugManagerStateRestore restore;
debugManager.flags.ForceL1Caching.set(0u);
@@ -127,7 +129,7 @@ HWTEST_F(CommandEncodeStatesUncachedMocsTests, whenEncodingDispatchKernelWithUnc
bool requiresUncachedMocs = true;
EncodeDispatchKernelArgs dispatchArgs = createDefaultDispatchKernelArgs(pDevice, dispatchInterface.get(), dims, requiresUncachedMocs);
EncodeDispatchKernel<FamilyType>::encode(*cmdContainer.get(), dispatchArgs);
EncodeDispatchKernel<FamilyType>::template encode<WALKER_TYPE>(*cmdContainer.get(), dispatchArgs);
GenCmdList commands;
CmdParse<FamilyType>::parseCommandBuffer(commands,
@@ -145,6 +147,7 @@ HWTEST_F(CommandEncodeStatesUncachedMocsTests, whenEncodingDispatchKernelWithUnc
}
HWTEST_F(CommandEncodeStatesUncachedMocsTests, whenEncodingDispatchKernelWithNonUncachedMocsAndDirtyHeapsThenSbaIsNotProgrammed) {
using WALKER_TYPE = typename FamilyType::WALKER_TYPE;
DebugManagerStateRestore restore;
debugManager.flags.ForceL1Caching.set(0u);
@@ -154,7 +157,7 @@ HWTEST_F(CommandEncodeStatesUncachedMocsTests, whenEncodingDispatchKernelWithNon
bool requiresUncachedMocs = false;
EncodeDispatchKernelArgs dispatchArgs = createDefaultDispatchKernelArgs(pDevice, dispatchInterface.get(), dims, requiresUncachedMocs);
EncodeDispatchKernel<FamilyType>::encode(*cmdContainer.get(), dispatchArgs);
EncodeDispatchKernel<FamilyType>::template encode<WALKER_TYPE>(*cmdContainer.get(), dispatchArgs);
EXPECT_FALSE(dispatchArgs.requiresUncachedMocs);
GenCmdList commands;
@@ -173,6 +176,7 @@ HWTEST_F(CommandEncodeStatesUncachedMocsTests, whenEncodingDispatchKernelWithNon
}
HWTEST_F(CommandEncodeStatesUncachedMocsTests, whenEncodingDispatchKernelWithNonUncachedMocsAndNonDirtyHeapsThenSbaIsNotProgrammed) {
using WALKER_TYPE = typename FamilyType::WALKER_TYPE;
DebugManagerStateRestore restore;
debugManager.flags.ForceL1Caching.set(0u);
@@ -182,7 +186,7 @@ HWTEST_F(CommandEncodeStatesUncachedMocsTests, whenEncodingDispatchKernelWithNon
bool requiresUncachedMocs = false;
EncodeDispatchKernelArgs dispatchArgs = createDefaultDispatchKernelArgs(pDevice, dispatchInterface.get(), dims, requiresUncachedMocs);
EncodeDispatchKernel<FamilyType>::encode(*cmdContainer.get(), dispatchArgs);
EncodeDispatchKernel<FamilyType>::template encode<WALKER_TYPE>(*cmdContainer.get(), dispatchArgs);
EXPECT_FALSE(dispatchArgs.requiresUncachedMocs);
GenCmdList commands;
@@ -196,6 +200,7 @@ HWTEST_F(CommandEncodeStatesUncachedMocsTests, whenEncodingDispatchKernelWithNon
}
HWTEST_F(CommandEncodeStatesUncachedMocsTests, whenEncodingDispatchKernelWithNonUncachedMocsAndNonDirtyHeapsAndSlmSizeThenSbaIsNotProgrammed) {
using WALKER_TYPE = typename FamilyType::WALKER_TYPE;
DebugManagerStateRestore restore;
debugManager.flags.ForceL1Caching.set(0u);
@@ -209,7 +214,7 @@ HWTEST_F(CommandEncodeStatesUncachedMocsTests, whenEncodingDispatchKernelWithNon
bool requiresUncachedMocs = false;
EncodeDispatchKernelArgs dispatchArgs = createDefaultDispatchKernelArgs(pDevice, dispatchInterface.get(), dims, requiresUncachedMocs);
EncodeDispatchKernel<FamilyType>::encode(*cmdContainer.get(), dispatchArgs);
EncodeDispatchKernel<FamilyType>::template encode<WALKER_TYPE>(*cmdContainer.get(), dispatchArgs);
EXPECT_FALSE(dispatchArgs.requiresUncachedMocs);
GenCmdList commands;
@@ -223,6 +228,7 @@ HWTEST_F(CommandEncodeStatesUncachedMocsTests, whenEncodingDispatchKernelWithNon
}
HWTEST_F(CommandEncodeStatesTest, givenCommandContainerWithUsedAvailableSizeWhenDispatchKernelThenNextCommandBufferIsAdded) {
using WALKER_TYPE = typename FamilyType::WALKER_TYPE;
uint32_t dims[] = {2, 1, 1};
std::unique_ptr<MockDispatchKernelEncoder> dispatchInterface(new MockDispatchKernelEncoder());
@@ -233,7 +239,7 @@ HWTEST_F(CommandEncodeStatesTest, givenCommandContainerWithUsedAvailableSizeWhen
bool requiresUncachedMocs = false;
EncodeDispatchKernelArgs dispatchArgs = createDefaultDispatchKernelArgs(pDevice, dispatchInterface.get(), dims, requiresUncachedMocs);
EncodeDispatchKernel<FamilyType>::encode(*cmdContainer.get(), dispatchArgs);
EncodeDispatchKernel<FamilyType>::template encode<WALKER_TYPE>(*cmdContainer.get(), dispatchArgs);
auto cmdBuffersCountAfter = cmdContainer->getCmdBufferAllocations().size();
@@ -241,6 +247,7 @@ HWTEST_F(CommandEncodeStatesTest, givenCommandContainerWithUsedAvailableSizeWhen
}
HWCMDTEST_F(IGFX_GEN8_CORE, CommandEncodeStatesTest, givenSlmTotalSizeGraterThanZeroWhenDispatchingKernelThenSharedMemorySizeSetCorrectly) {
using WALKER_TYPE = typename FamilyType::WALKER_TYPE;
using INTERFACE_DESCRIPTOR_DATA = typename FamilyType::INTERFACE_DESCRIPTOR_DATA;
uint32_t dims[] = {2, 1, 1};
std::unique_ptr<MockDispatchKernelEncoder> dispatchInterface(new MockDispatchKernelEncoder());
@@ -251,7 +258,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, CommandEncodeStatesTest, givenSlmTotalSizeGraterThan
bool requiresUncachedMocs = false;
EncodeDispatchKernelArgs dispatchArgs = createDefaultDispatchKernelArgs(pDevice, dispatchInterface.get(), dims, requiresUncachedMocs);
EncodeDispatchKernel<FamilyType>::encode(*cmdContainer.get(), dispatchArgs);
EncodeDispatchKernel<FamilyType>::template encode<WALKER_TYPE>(*cmdContainer.get(), dispatchArgs);
auto interfaceDescriptorData = static_cast<INTERFACE_DESCRIPTOR_DATA *>(cmdContainer->getIddBlock());
auto &gfxcoreHelper = this->getHelper<GfxCoreHelper>();
@@ -262,13 +269,14 @@ HWCMDTEST_F(IGFX_GEN8_CORE, CommandEncodeStatesTest, givenSlmTotalSizeGraterThan
}
HWCMDTEST_F(IGFX_GEN8_CORE, CommandEncodeStatesTest, whenDispatchingKernelThenSetDenormMode) {
using WALKER_TYPE = typename FamilyType::WALKER_TYPE;
using INTERFACE_DESCRIPTOR_DATA = typename FamilyType::INTERFACE_DESCRIPTOR_DATA;
uint32_t dims[] = {2, 1, 1};
std::unique_ptr<MockDispatchKernelEncoder> dispatchInterface(new MockDispatchKernelEncoder());
EncodeDispatchKernelArgs dispatchArgs = createDefaultDispatchKernelArgs(pDevice, dispatchInterface.get(), dims, false);
EncodeDispatchKernel<FamilyType>::encode(*cmdContainer.get(), dispatchArgs);
EncodeDispatchKernel<FamilyType>::template encode<WALKER_TYPE>(*cmdContainer.get(), dispatchArgs);
auto interfaceDescriptorData = static_cast<INTERFACE_DESCRIPTOR_DATA *>(cmdContainer->getIddBlock());
@@ -276,6 +284,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, CommandEncodeStatesTest, whenDispatchingKernelThenSe
}
HWCMDTEST_F(IGFX_GEN8_CORE, CommandEncodeStatesTest, givenDebuggingEnabledAndAssertInKernelWhenDispatchingKernelThenSwExceptionsAreEnabled) {
using WALKER_TYPE = typename FamilyType::WALKER_TYPE;
using INTERFACE_DESCRIPTOR_DATA = typename FamilyType::INTERFACE_DESCRIPTOR_DATA;
uint32_t dims[] = {2, 1, 1};
std::unique_ptr<MockDispatchKernelEncoder> dispatchInterface(new MockDispatchKernelEncoder());
@@ -286,13 +295,14 @@ HWCMDTEST_F(IGFX_GEN8_CORE, CommandEncodeStatesTest, givenDebuggingEnabledAndAss
dispatchInterface->kernelDescriptor.kernelAttributes.flags.usesAssert = true;
EncodeDispatchKernelArgs dispatchArgs = createDefaultDispatchKernelArgs(pDevice, dispatchInterface.get(), dims, false);
EncodeDispatchKernel<FamilyType>::encode(*cmdContainer.get(), dispatchArgs);
EncodeDispatchKernel<FamilyType>::template encode<WALKER_TYPE>(*cmdContainer.get(), dispatchArgs);
auto interfaceDescriptorData = static_cast<INTERFACE_DESCRIPTOR_DATA *>(cmdContainer->getIddBlock());
EXPECT_TRUE(interfaceDescriptorData->getSoftwareExceptionEnable());
}
HWCMDTEST_F(IGFX_GEN8_CORE, CommandEncodeStatesTest, givenSlmTotalSizeEqualZeroWhenDispatchingKernelThenSharedMemorySizeSetCorrectly) {
using WALKER_TYPE = typename FamilyType::WALKER_TYPE;
using INTERFACE_DESCRIPTOR_DATA = typename FamilyType::INTERFACE_DESCRIPTOR_DATA;
uint32_t dims[] = {2, 1, 1};
std::unique_ptr<MockDispatchKernelEncoder> dispatchInterface(new MockDispatchKernelEncoder());
@@ -303,7 +313,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, CommandEncodeStatesTest, givenSlmTotalSizeEqualZeroW
bool requiresUncachedMocs = false;
EncodeDispatchKernelArgs dispatchArgs = createDefaultDispatchKernelArgs(pDevice, dispatchInterface.get(), dims, requiresUncachedMocs);
EncodeDispatchKernel<FamilyType>::encode(*cmdContainer.get(), dispatchArgs);
EncodeDispatchKernel<FamilyType>::template encode<WALKER_TYPE>(*cmdContainer.get(), dispatchArgs);
auto interfaceDescriptorData = static_cast<INTERFACE_DESCRIPTOR_DATA *>(cmdContainer->getIddBlock());
@@ -313,6 +323,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, CommandEncodeStatesTest, givenSlmTotalSizeEqualZeroW
}
HWCMDTEST_F(IGFX_GEN8_CORE, CommandEncodeStatesTest, givenOneBindingTableEntryWhenDispatchingKernelThenBindingTableOffsetIsCorrect) {
using WALKER_TYPE = typename FamilyType::WALKER_TYPE;
using BINDING_TABLE_STATE = typename FamilyType::BINDING_TABLE_STATE;
using INTERFACE_DESCRIPTOR_DATA = typename FamilyType::INTERFACE_DESCRIPTOR_DATA;
uint32_t numBindingTable = 1;
@@ -337,7 +348,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, CommandEncodeStatesTest, givenOneBindingTableEntryWh
bool requiresUncachedMocs = false;
EncodeDispatchKernelArgs dispatchArgs = createDefaultDispatchKernelArgs(pDevice, dispatchInterface.get(), dims, requiresUncachedMocs);
EncodeDispatchKernel<FamilyType>::encode(*cmdContainer.get(), dispatchArgs);
EncodeDispatchKernel<FamilyType>::template encode<WALKER_TYPE>(*cmdContainer.get(), dispatchArgs);
auto interfaceDescriptorData = static_cast<INTERFACE_DESCRIPTOR_DATA *>(cmdContainer->getIddBlock());
@@ -345,6 +356,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, CommandEncodeStatesTest, givenOneBindingTableEntryWh
}
HWCMDTEST_F(IGFX_GEN8_CORE, CommandEncodeStatesTest, giveNumBindingTableZeroWhenDispatchingKernelThenBindingTableOffsetIsZero) {
using WALKER_TYPE = typename FamilyType::WALKER_TYPE;
using BINDING_TABLE_STATE = typename FamilyType::BINDING_TABLE_STATE;
using INTERFACE_DESCRIPTOR_DATA = typename FamilyType::INTERFACE_DESCRIPTOR_DATA;
uint32_t numBindingTable = 0;
@@ -367,7 +379,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, CommandEncodeStatesTest, giveNumBindingTableZeroWhen
bool requiresUncachedMocs = false;
EncodeDispatchKernelArgs dispatchArgs = createDefaultDispatchKernelArgs(pDevice, dispatchInterface.get(), dims, requiresUncachedMocs);
EncodeDispatchKernel<FamilyType>::encode(*cmdContainer.get(), dispatchArgs);
EncodeDispatchKernel<FamilyType>::template encode<WALKER_TYPE>(*cmdContainer.get(), dispatchArgs);
auto interfaceDescriptorData = static_cast<INTERFACE_DESCRIPTOR_DATA *>(cmdContainer->getIddBlock());
@@ -375,6 +387,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, CommandEncodeStatesTest, giveNumBindingTableZeroWhen
}
HWCMDTEST_F(IGFX_GEN8_CORE, CommandEncodeStatesTest, giveNumSamplersOneWhenDispatchingKernelThensamplerStateWasCopied) {
using WALKER_TYPE = typename FamilyType::WALKER_TYPE;
using SAMPLER_STATE = typename FamilyType::SAMPLER_STATE;
using INTERFACE_DESCRIPTOR_DATA = typename FamilyType::INTERFACE_DESCRIPTOR_DATA;
uint32_t numSamplers = 1;
@@ -398,7 +411,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, CommandEncodeStatesTest, giveNumSamplersOneWhenDispa
dispatchArgs.surfaceStateHeap = cmdContainer->getIndirectHeap(HeapType::SURFACE_STATE);
dispatchArgs.dynamicStateHeap = cmdContainer->getIndirectHeap(HeapType::DYNAMIC_STATE);
EncodeDispatchKernel<FamilyType>::encode(*cmdContainer.get(), dispatchArgs);
EncodeDispatchKernel<FamilyType>::template encode<WALKER_TYPE>(*cmdContainer.get(), dispatchArgs);
auto interfaceDescriptorData = static_cast<INTERFACE_DESCRIPTOR_DATA *>(cmdContainer->getIddBlock());
@@ -412,6 +425,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, CommandEncodeStatesTest, giveNumSamplersOneWhenDispa
}
HWCMDTEST_F(IGFX_GEN8_CORE, CommandEncodeStatesTest, giveNumSamplersZeroWhenDispatchingKernelThensamplerStateWasNotCopied) {
using WALKER_TYPE = typename FamilyType::WALKER_TYPE;
using SAMPLER_STATE = typename FamilyType::SAMPLER_STATE;
using INTERFACE_DESCRIPTOR_DATA = typename FamilyType::INTERFACE_DESCRIPTOR_DATA;
uint32_t numSamplers = 0;
@@ -433,7 +447,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, CommandEncodeStatesTest, giveNumSamplersZeroWhenDisp
bool requiresUncachedMocs = false;
EncodeDispatchKernelArgs dispatchArgs = createDefaultDispatchKernelArgs(pDevice, dispatchInterface.get(), dims, requiresUncachedMocs);
EncodeDispatchKernel<FamilyType>::encode(*cmdContainer.get(), dispatchArgs);
EncodeDispatchKernel<FamilyType>::template encode<WALKER_TYPE>(*cmdContainer.get(), dispatchArgs);
auto interfaceDescriptorData = static_cast<INTERFACE_DESCRIPTOR_DATA *>(cmdContainer->getIddBlock());
@@ -447,6 +461,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, CommandEncodeStatesTest, giveNumSamplersZeroWhenDisp
}
HWTEST_F(CommandEncodeStatesTest, givenIndirectOffsetsCountsWhenDispatchingKernelThenCorrestMIStoreOffsetsSet) {
using WALKER_TYPE = typename FamilyType::WALKER_TYPE;
using MI_STORE_REGISTER_MEM = typename FamilyType::MI_STORE_REGISTER_MEM;
uint32_t dims[] = {2, 1, 1};
uint32_t offsets[] = {0x10, 0x20, 0x30};
@@ -459,7 +474,7 @@ HWTEST_F(CommandEncodeStatesTest, givenIndirectOffsetsCountsWhenDispatchingKerne
EncodeDispatchKernelArgs dispatchArgs = createDefaultDispatchKernelArgs(pDevice, dispatchInterface.get(), dims, requiresUncachedMocs);
dispatchArgs.isIndirect = true;
EncodeDispatchKernel<FamilyType>::encode(*cmdContainer.get(), dispatchArgs);
EncodeDispatchKernel<FamilyType>::template encode<WALKER_TYPE>(*cmdContainer.get(), dispatchArgs);
GenCmdList commands;
CmdParse<FamilyType>::parseCommandBuffer(commands, ptrOffset(cmdContainer->getCommandStream()->getCpuBase(), 0), cmdContainer->getCommandStream()->getUsed());
@@ -473,6 +488,7 @@ HWTEST_F(CommandEncodeStatesTest, givenIndirectOffsetsCountsWhenDispatchingKerne
}
HWTEST_F(CommandEncodeStatesTest, givenIndirectOffsetsSizeWhenDispatchingKernelThenMiMathEncoded) {
using WALKER_TYPE = typename FamilyType::WALKER_TYPE;
using MI_MATH = typename FamilyType::MI_MATH;
uint32_t dims[] = {2, 1, 1};
uint32_t offsets[] = {0x10, 0x20, 0x30};
@@ -488,7 +504,7 @@ HWTEST_F(CommandEncodeStatesTest, givenIndirectOffsetsSizeWhenDispatchingKernelT
EncodeDispatchKernelArgs dispatchArgs = createDefaultDispatchKernelArgs(pDevice, dispatchInterface.get(), dims, requiresUncachedMocs);
dispatchArgs.isIndirect = true;
EncodeDispatchKernel<FamilyType>::encode(*cmdContainer.get(), dispatchArgs);
EncodeDispatchKernel<FamilyType>::template encode<WALKER_TYPE>(*cmdContainer.get(), dispatchArgs);
GenCmdList commands;
CmdParse<FamilyType>::parseCommandBuffer(commands, ptrOffset(cmdContainer->getCommandStream()->getCpuBase(), 0), cmdContainer->getCommandStream()->getUsed());
@@ -498,6 +514,7 @@ HWTEST_F(CommandEncodeStatesTest, givenIndirectOffsetsSizeWhenDispatchingKernelT
}
HWCMDTEST_F(IGFX_GEN8_CORE, CommandEncodeStatesTest, givenForceBtpPrefetchModeDebugFlagWhenDispatchingKernelThenValuesAreSetUpCorrectly) {
using WALKER_TYPE = typename FamilyType::WALKER_TYPE;
using INTERFACE_DESCRIPTOR_DATA = typename FamilyType::INTERFACE_DESCRIPTOR_DATA;
using MEDIA_INTERFACE_DESCRIPTOR_LOAD = typename FamilyType::MEDIA_INTERFACE_DESCRIPTOR_LOAD;
using SAMPLER_STATE = typename FamilyType::SAMPLER_STATE;
@@ -530,7 +547,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, CommandEncodeStatesTest, givenForceBtpPrefetchModeDe
bool requiresUncachedMocs = false;
EncodeDispatchKernelArgs dispatchArgs = createDefaultDispatchKernelArgs(pDevice, dispatchInterface.get(), dims, requiresUncachedMocs);
EncodeDispatchKernel<FamilyType>::encode(*cmdContainer.get(), dispatchArgs);
EncodeDispatchKernel<FamilyType>::template encode<WALKER_TYPE>(*cmdContainer.get(), dispatchArgs);
auto dsh = cmdContainer->getIndirectHeap(HeapType::DYNAMIC_STATE);
@@ -563,7 +580,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, CommandEncodeStatesTest, givenForceBtpPrefetchModeDe
bool requiresUncachedMocs = false;
EncodeDispatchKernelArgs dispatchArgs = createDefaultDispatchKernelArgs(pDevice, dispatchInterface.get(), dims, requiresUncachedMocs);
EncodeDispatchKernel<FamilyType>::encode(*cmdContainer.get(), dispatchArgs);
EncodeDispatchKernel<FamilyType>::template encode<WALKER_TYPE>(*cmdContainer.get(), dispatchArgs);
auto dsh = cmdContainer->getIndirectHeap(HeapType::DYNAMIC_STATE);
@@ -591,7 +608,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, CommandEncodeStatesTest, givenForceBtpPrefetchModeDe
bool requiresUncachedMocs = false;
EncodeDispatchKernelArgs dispatchArgs = createDefaultDispatchKernelArgs(pDevice, dispatchInterface.get(), dims, requiresUncachedMocs);
EncodeDispatchKernel<FamilyType>::encode(*cmdContainer.get(), dispatchArgs);
EncodeDispatchKernel<FamilyType>::template encode<WALKER_TYPE>(*cmdContainer.get(), dispatchArgs);
auto dsh = cmdContainer->getIndirectHeap(HeapType::DYNAMIC_STATE);
@@ -612,6 +629,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, CommandEncodeStatesTest, givenForceBtpPrefetchModeDe
}
HWCMDTEST_F(IGFX_GEN8_CORE, CommandEncodeStatesTest, givenCleanHeapsAndSlmNotChangedWhenDispatchKernelThenFlushNotAdded) {
using WALKER_TYPE = typename FamilyType::WALKER_TYPE;
using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL;
uint32_t dims[] = {2, 1, 1};
std::unique_ptr<MockDispatchKernelEncoder> dispatchInterface(new MockDispatchKernelEncoder());
@@ -622,7 +640,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, CommandEncodeStatesTest, givenCleanHeapsAndSlmNotCha
bool requiresUncachedMocs = false;
EncodeDispatchKernelArgs dispatchArgs = createDefaultDispatchKernelArgs(pDevice, dispatchInterface.get(), dims, requiresUncachedMocs);
EncodeDispatchKernel<FamilyType>::encode(*cmdContainer.get(), dispatchArgs);
EncodeDispatchKernel<FamilyType>::template encode<WALKER_TYPE>(*cmdContainer.get(), dispatchArgs);
GenCmdList commands;
CmdParse<FamilyType>::parseCommandBuffer(commands, ptrOffset(cmdContainer->getCommandStream()->getCpuBase(), 0), cmdContainer->getCommandStream()->getUsed());
@@ -632,6 +650,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, CommandEncodeStatesTest, givenCleanHeapsAndSlmNotCha
}
HWCMDTEST_F(IGFX_GEN8_CORE, CommandEncodeStatesTest, givenCleanHeapsAndSlmNotChangedAndUncachedMocsRequestedThenSBAIsProgrammedAndMocsAreSet) {
using WALKER_TYPE = typename FamilyType::WALKER_TYPE;
uint32_t dims[] = {2, 1, 1};
std::unique_ptr<MockDispatchKernelEncoder> dispatchInterface(new MockDispatchKernelEncoder());
cmdContainer->slmSizeRef() = 1;
@@ -641,7 +660,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, CommandEncodeStatesTest, givenCleanHeapsAndSlmNotCha
bool requiresUncachedMocs = true;
EncodeDispatchKernelArgs dispatchArgs = createDefaultDispatchKernelArgs(pDevice, dispatchInterface.get(), dims, requiresUncachedMocs);
EncodeDispatchKernel<FamilyType>::encode(*cmdContainer.get(), dispatchArgs);
EncodeDispatchKernel<FamilyType>::template encode<WALKER_TYPE>(*cmdContainer.get(), dispatchArgs);
GenCmdList commands;
CmdParse<FamilyType>::parseCommandBuffer(commands, ptrOffset(cmdContainer->getCommandStream()->getCpuBase(), 0), cmdContainer->getCommandStream()->getUsed());
@@ -657,6 +676,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, CommandEncodeStatesTest, givenCleanHeapsAndSlmNotCha
}
HWCMDTEST_F(IGFX_GEN8_CORE, CommandEncodeStatesTest, givenDirtyHeapsAndSlmNotChangedWhenDispatchKernelThenHeapsAreCleanAndFlushAdded) {
using WALKER_TYPE = typename FamilyType::WALKER_TYPE;
using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL;
uint32_t dims[] = {2, 1, 1};
@@ -668,7 +688,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, CommandEncodeStatesTest, givenDirtyHeapsAndSlmNotCha
bool requiresUncachedMocs = false;
EncodeDispatchKernelArgs dispatchArgs = createDefaultDispatchKernelArgs(pDevice, dispatchInterface.get(), dims, requiresUncachedMocs);
EncodeDispatchKernel<FamilyType>::encode(*cmdContainer.get(), dispatchArgs);
EncodeDispatchKernel<FamilyType>::template encode<WALKER_TYPE>(*cmdContainer.get(), dispatchArgs);
GenCmdList commands;
CmdParse<FamilyType>::parseCommandBuffer(commands, ptrOffset(cmdContainer->getCommandStream()->getCpuBase(), 0), cmdContainer->getCommandStream()->getUsed());
@@ -679,6 +699,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, CommandEncodeStatesTest, givenDirtyHeapsAndSlmNotCha
}
HWCMDTEST_F(IGFX_GEN8_CORE, CommandEncodeStatesTest, givenDirtyHeapsWhenDispatchKernelThenPCIsAddedBeforeSBA) {
using WALKER_TYPE = typename FamilyType::WALKER_TYPE;
using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL;
using STATE_BASE_ADDRESS = typename FamilyType::STATE_BASE_ADDRESS;
@@ -692,7 +713,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, CommandEncodeStatesTest, givenDirtyHeapsWhenDispatch
EncodeDispatchKernelArgs dispatchArgs = createDefaultDispatchKernelArgs(pDevice, dispatchInterface.get(), dims, requiresUncachedMocs);
dispatchArgs.dcFlushEnable = MemorySynchronizationCommands<FamilyType>::getDcFlushEnable(true, pDevice->getRootDeviceEnvironment());
EncodeDispatchKernel<FamilyType>::encode(*cmdContainer.get(), dispatchArgs);
EncodeDispatchKernel<FamilyType>::template encode<WALKER_TYPE>(*cmdContainer.get(), dispatchArgs);
GenCmdList cmdList;
CmdParse<FamilyType>::parseCommandBuffer(cmdList, ptrOffset(cmdContainer->getCommandStream()->getCpuBase(), 0), cmdContainer->getCommandStream()->getUsed());
@@ -720,6 +741,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, CommandEncodeStatesTest, givenDirtyHeapsWhenDispatch
}
HWCMDTEST_F(IGFX_GEN8_CORE, CommandEncodeStatesTest, givenCleanHeapsAndSlmChangedWhenDispatchKernelThenFlushAdded) {
using WALKER_TYPE = typename FamilyType::WALKER_TYPE;
using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL;
uint32_t dims[] = {2, 1, 1};
std::unique_ptr<MockDispatchKernelEncoder> dispatchInterface(new MockDispatchKernelEncoder());
@@ -732,7 +754,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, CommandEncodeStatesTest, givenCleanHeapsAndSlmChange
bool requiresUncachedMocs = false;
EncodeDispatchKernelArgs dispatchArgs = createDefaultDispatchKernelArgs(pDevice, dispatchInterface.get(), dims, requiresUncachedMocs);
EncodeDispatchKernel<FamilyType>::encode(*cmdContainer.get(), dispatchArgs);
EncodeDispatchKernel<FamilyType>::template encode<WALKER_TYPE>(*cmdContainer.get(), dispatchArgs);
GenCmdList commands;
CmdParse<FamilyType>::parseCommandBuffer(commands, ptrOffset(cmdContainer->getCommandStream()->getCpuBase(), 0), cmdContainer->getCommandStream()->getUsed());
@@ -743,6 +765,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, CommandEncodeStatesTest, givenCleanHeapsAndSlmChange
}
HWCMDTEST_F(IGFX_GEN8_CORE, CommandEncodeStatesTest, giveNextIddInBlockZeroWhenDispatchKernelThenMediaInterfaceDescriptorEncoded) {
using WALKER_TYPE = typename FamilyType::WALKER_TYPE;
using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL;
using INTERFACE_DESCRIPTOR_DATA = typename FamilyType::INTERFACE_DESCRIPTOR_DATA;
using STATE_BASE_ADDRESS = typename FamilyType::STATE_BASE_ADDRESS;
@@ -757,7 +780,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, CommandEncodeStatesTest, giveNextIddInBlockZeroWhenD
bool requiresUncachedMocs = false;
EncodeDispatchKernelArgs dispatchArgs = createDefaultDispatchKernelArgs(pDevice, dispatchInterface.get(), dims, requiresUncachedMocs);
EncodeDispatchKernel<FamilyType>::encode(*cmdContainer.get(), dispatchArgs);
EncodeDispatchKernel<FamilyType>::template encode<WALKER_TYPE>(*cmdContainer.get(), dispatchArgs);
GenCmdList commands;
CmdParse<FamilyType>::parseCommandBuffer(commands, ptrOffset(cmdContainer->getCommandStream()->getCpuBase(), 0), cmdContainer->getCommandStream()->getUsed());
@@ -769,6 +792,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, CommandEncodeStatesTest, giveNextIddInBlockZeroWhenD
}
HWCMDTEST_F(IGFX_GEN8_CORE, CommandEncodeStatesTest, giveNextIddInBlockZeroWhenDispatchKernelAndDynamicStateHeapDirtyThenStateBaseAddressEncodedAndMediaInterfaceDescriptorEncoded) {
using WALKER_TYPE = typename FamilyType::WALKER_TYPE;
using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL;
using INTERFACE_DESCRIPTOR_DATA = typename FamilyType::INTERFACE_DESCRIPTOR_DATA;
using STATE_BASE_ADDRESS = typename FamilyType::STATE_BASE_ADDRESS;
@@ -787,7 +811,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, CommandEncodeStatesTest, giveNextIddInBlockZeroWhenD
bool requiresUncachedMocs = false;
EncodeDispatchKernelArgs dispatchArgs = createDefaultDispatchKernelArgs(pDevice, dispatchInterface.get(), dims, requiresUncachedMocs);
EncodeDispatchKernel<FamilyType>::encode(*cmdContainer.get(), dispatchArgs);
EncodeDispatchKernel<FamilyType>::template encode<WALKER_TYPE>(*cmdContainer.get(), dispatchArgs);
GenCmdList commands;
CmdParse<FamilyType>::parseCommandBuffer(commands, ptrOffset(cmdContainer->getCommandStream()->getCpuBase(), 0), cmdContainer->getCommandStream()->getUsed());
@@ -799,6 +823,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, CommandEncodeStatesTest, giveNextIddInBlockZeroWhenD
}
HWCMDTEST_F(IGFX_GEN8_CORE, CommandEncodeStatesTest, giveNumSamplersOneWhenHeapIsDirtyThenSamplerStateWasCopiedAndStateBaseAddressEncoded) {
using WALKER_TYPE = typename FamilyType::WALKER_TYPE;
using SAMPLER_STATE = typename FamilyType::SAMPLER_STATE;
using INTERFACE_DESCRIPTOR_DATA = typename FamilyType::INTERFACE_DESCRIPTOR_DATA;
using STATE_BASE_ADDRESS = typename FamilyType::STATE_BASE_ADDRESS;
@@ -824,7 +849,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, CommandEncodeStatesTest, giveNumSamplersOneWhenHeapI
dshBeforeFlush->getSpace(dshBeforeFlush->getAvailableSpace() - NEO::EncodeDispatchKernel<FamilyType>::getSizeRequiredDsh(kernelDescriptor, cmdContainer->getNumIddPerBlock()));
auto cpuBaseBeforeFlush = dshBeforeFlush->getCpuBase();
EncodeDispatchKernel<FamilyType>::encode(*cmdContainer.get(), dispatchArgs);
EncodeDispatchKernel<FamilyType>::template encode<WALKER_TYPE>(*cmdContainer.get(), dispatchArgs);
GenCmdList commands;
CmdParse<FamilyType>::parseCommandBuffer(commands, ptrOffset(cmdContainer->getCommandStream()->getCpuBase(), 0), cmdContainer->getCommandStream()->getUsed());
@@ -848,6 +873,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, CommandEncodeStatesTest, giveNumSamplersOneWhenHeapI
}
HWCMDTEST_F(IGFX_GEN8_CORE, CommandEncodeStatesTest, giveNumSamplersOneAndNextIDDInBlockWhenHeapIsDirtyThenSamplerStateWasCopiedAndStateBaseAddressEncoded) {
using WALKER_TYPE = typename FamilyType::WALKER_TYPE;
using SAMPLER_STATE = typename FamilyType::SAMPLER_STATE;
using INTERFACE_DESCRIPTOR_DATA = typename FamilyType::INTERFACE_DESCRIPTOR_DATA;
using STATE_BASE_ADDRESS = typename FamilyType::STATE_BASE_ADDRESS;
@@ -879,7 +905,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, CommandEncodeStatesTest, giveNumSamplersOneAndNextID
auto cpuBaseBeforeFlush = dshBeforeFlush->getCpuBase();
auto usedBefore = cmdContainer->getIndirectHeap(HeapType::SURFACE_STATE)->getUsed();
EncodeDispatchKernel<FamilyType>::encode(*cmdContainer.get(), dispatchArgs);
EncodeDispatchKernel<FamilyType>::template encode<WALKER_TYPE>(*cmdContainer.get(), dispatchArgs);
GenCmdList commands;
CmdParse<FamilyType>::parseCommandBuffer(commands, ptrOffset(cmdContainer->getCommandStream()->getCpuBase(), 0), cmdContainer->getCommandStream()->getUsed());
@@ -903,6 +929,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, CommandEncodeStatesTest, giveNumSamplersOneAndNextID
}
HWTEST_F(CommandEncodeStatesTest, givenPauseOnEnqueueSetToNeverWhenEncodingWalkerThenCommandsToPatchAreNotPresent) {
using WALKER_TYPE = typename FamilyType::WALKER_TYPE;
DebugManagerStateRestore restorer;
debugManager.flags.PauseOnEnqueue.set(-1);
@@ -913,12 +940,13 @@ HWTEST_F(CommandEncodeStatesTest, givenPauseOnEnqueueSetToNeverWhenEncodingWalke
std::list<void *> cmdsToPatch;
EncodeDispatchKernelArgs dispatchArgs = createDefaultDispatchKernelArgs(pDevice, dispatchInterface.get(), dims, requiresUncachedMocs);
dispatchArgs.additionalCommands = &cmdsToPatch;
EncodeDispatchKernel<FamilyType>::encode(*cmdContainer.get(), dispatchArgs);
EncodeDispatchKernel<FamilyType>::template encode<WALKER_TYPE>(*cmdContainer.get(), dispatchArgs);
EXPECT_EQ(cmdsToPatch.size(), 0u);
}
HWTEST_F(CommandEncodeStatesTest, givenPauseOnEnqueueSetToAlwaysWhenEncodingWalkerThenCommandsToPatchAreFilled) {
using WALKER_TYPE = typename FamilyType::WALKER_TYPE;
DebugManagerStateRestore restorer;
debugManager.flags.PauseOnEnqueue.set(-2);
@@ -929,7 +957,7 @@ HWTEST_F(CommandEncodeStatesTest, givenPauseOnEnqueueSetToAlwaysWhenEncodingWalk
std::list<void *> cmdsToPatch;
EncodeDispatchKernelArgs dispatchArgs = createDefaultDispatchKernelArgs(pDevice, dispatchInterface.get(), dims, requiresUncachedMocs);
dispatchArgs.additionalCommands = &cmdsToPatch;
EncodeDispatchKernel<FamilyType>::encode(*cmdContainer.get(), dispatchArgs);
EncodeDispatchKernel<FamilyType>::template encode<WALKER_TYPE>(*cmdContainer.get(), dispatchArgs);
EXPECT_EQ(cmdsToPatch.size(), 4u);
}
@@ -939,7 +967,7 @@ using EncodeDispatchKernelTest = Test<CommandEncodeStatesFixture>;
HWTEST2_F(EncodeDispatchKernelTest, givenBindfulKernelWhenDispatchingKernelThenSshFromContainerIsUsed, IsAtLeastSkl) {
using BINDING_TABLE_STATE = typename FamilyType::BINDING_TABLE_STATE;
using INTERFACE_DESCRIPTOR_DATA = typename FamilyType::INTERFACE_DESCRIPTOR_DATA;
using WALKER = typename FamilyType::WALKER_TYPE;
using WALKER_TYPE = typename FamilyType::WALKER_TYPE;
uint32_t numBindingTable = 1;
BINDING_TABLE_STATE bindingTableState = FamilyType::cmdInitBindingTableState;
@@ -961,7 +989,7 @@ HWTEST2_F(EncodeDispatchKernelTest, givenBindfulKernelWhenDispatchingKernelThenS
if (EncodeDispatchKernel<FamilyType>::isDshNeeded(pDevice->getDeviceInfo())) {
dispatchArgs.dynamicStateHeap = cmdContainer->getIndirectHeap(HeapType::DYNAMIC_STATE);
}
EncodeDispatchKernel<FamilyType>::encode(*cmdContainer.get(), dispatchArgs);
EncodeDispatchKernel<FamilyType>::template encode<WALKER_TYPE>(*cmdContainer.get(), dispatchArgs);
auto usedAfter = cmdContainer->getIndirectHeap(HeapType::SURFACE_STATE)->getUsed();
@@ -971,7 +999,7 @@ HWTEST2_F(EncodeDispatchKernelTest, givenBindfulKernelWhenDispatchingKernelThenS
HWTEST2_F(EncodeDispatchKernelTest, givenBindlessKernelWhenDispatchingKernelThenThenSshFromContainerIsUsed, IsAtLeastSkl) {
using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE;
using INTERFACE_DESCRIPTOR_DATA = typename FamilyType::INTERFACE_DESCRIPTOR_DATA;
using WALKER = typename FamilyType::WALKER_TYPE;
using WALKER_TYPE = typename FamilyType::WALKER_TYPE;
uint32_t numBindingTable = 1;
RENDER_SURFACE_STATE state = FamilyType::cmdInitRenderSurfaceState;
@@ -990,7 +1018,7 @@ HWTEST2_F(EncodeDispatchKernelTest, givenBindlessKernelWhenDispatchingKernelThen
auto usedBefore = cmdContainer->getIndirectHeap(HeapType::SURFACE_STATE)->getUsed();
EncodeDispatchKernelArgs dispatchArgs = createDefaultDispatchKernelArgs(pDevice, dispatchInterface.get(), dims, requiresUncachedMocs);
EncodeDispatchKernel<FamilyType>::encode(*cmdContainer.get(), dispatchArgs);
EncodeDispatchKernel<FamilyType>::template encode<WALKER_TYPE>(*cmdContainer.get(), dispatchArgs);
auto usedAfter = cmdContainer->getIndirectHeap(HeapType::SURFACE_STATE)->getUsed();
@@ -999,13 +1027,14 @@ HWTEST2_F(EncodeDispatchKernelTest, givenBindlessKernelWhenDispatchingKernelThen
HWTEST2_F(EncodeDispatchKernelTest, givenKernelsSharingISAParentAllocationsWhenProgrammingWalkerThenKernelStartPointerHasProperOffset, IsBeforeXeHpCore) {
using INTERFACE_DESCRIPTOR_DATA = typename FamilyType::INTERFACE_DESCRIPTOR_DATA;
using WALKER_TYPE = typename FamilyType::WALKER_TYPE;
auto dispatchInterface = std::make_unique<MockDispatchKernelEncoder>();
dispatchInterface->getIsaOffsetInParentAllocationResult = 8 << INTERFACE_DESCRIPTOR_DATA::KERNELSTARTPOINTER_BIT_SHIFT;
uint32_t dims[] = {2, 1, 1};
bool requiresUncachedMocs = false;
EncodeDispatchKernelArgs dispatchArgs = createDefaultDispatchKernelArgs(pDevice, dispatchInterface.get(), dims, requiresUncachedMocs);
EncodeDispatchKernel<FamilyType>::encode(*cmdContainer.get(), dispatchArgs);
EncodeDispatchKernel<FamilyType>::template encode<WALKER_TYPE>(*cmdContainer.get(), dispatchArgs);
auto idd = static_cast<INTERFACE_DESCRIPTOR_DATA *>(cmdContainer->getIddBlock());
EXPECT_EQ(idd->getKernelStartPointer(), dispatchInterface->getIsaAllocation()->getGpuAddressToPatch() + dispatchInterface->getIsaOffsetInParentAllocation());
@@ -1025,7 +1054,7 @@ HWTEST2_F(EncodeDispatchKernelTest, givenKernelsSharingISAParentAllocationsWhenP
uint32_t dims[] = {2, 1, 1};
bool requiresUncachedMocs = false;
EncodeDispatchKernelArgs dispatchArgs = createDefaultDispatchKernelArgs(pDevice, dispatchInterface.get(), dims, requiresUncachedMocs);
EncodeDispatchKernel<FamilyType>::encode(*cmdContainer.get(), dispatchArgs);
EncodeDispatchKernel<FamilyType>::template encode<WALKER_TYPE>(*cmdContainer.get(), dispatchArgs);
GenCmdList commands;
CmdParse<FamilyType>::parseCommandBuffer(commands, ptrOffset(cmdContainer->getCommandStream()->getCpuBase(), 0), cmdContainer->getCommandStream()->getUsed());
@@ -1037,6 +1066,7 @@ HWTEST2_F(EncodeDispatchKernelTest, givenKernelsSharingISAParentAllocationsWhenP
}
HWTEST2_F(EncodeDispatchKernelTest, givenPrintKernelDispatchParametersWhenEncodingKernelThenPrintKernelDispatchParams, IsAtLeastXeHpCore) {
using WALKER_TYPE = typename FamilyType::WALKER_TYPE;
auto dispatchInterface = std::make_unique<MockDispatchKernelEncoder>();
uint32_t dims[] = {2, 1, 1};
@@ -1046,7 +1076,7 @@ HWTEST2_F(EncodeDispatchKernelTest, givenPrintKernelDispatchParametersWhenEncodi
DebugManagerStateRestore restore;
debugManager.flags.PrintKernelDispatchParameters.set(true);
testing::internal::CaptureStdout(); // start capturing
EncodeDispatchKernel<FamilyType>::encode(*cmdContainer.get(), dispatchArgs);
EncodeDispatchKernel<FamilyType>::template encode<WALKER_TYPE>(*cmdContainer.get(), dispatchArgs);
std::string outputString = testing::internal::GetCapturedStdout(); // stop capturing
EXPECT_NE(std::string::npos, outputString.find("kernel"));
@@ -1062,6 +1092,7 @@ HWTEST2_F(EncodeDispatchKernelTest, givenPrintKernelDispatchParametersWhenEncodi
HWTEST_F(EncodeDispatchKernelTest, givenNonBindlessOrStatelessArgWhenDispatchingKernelThenSurfaceStateOffsetInCrossThreadDataIsNotPatched) {
using BINDING_TABLE_STATE = typename FamilyType::BINDING_TABLE_STATE;
using WALKER_TYPE = typename FamilyType::WALKER_TYPE;
using DataPortBindlessSurfaceExtendedMessageDescriptor = typename FamilyType::DataPortBindlessSurfaceExtendedMessageDescriptor;
uint32_t numBindingTable = 1;
BINDING_TABLE_STATE bindingTableState = FamilyType::cmdInitBindingTableState;
@@ -1113,7 +1144,7 @@ HWTEST_F(EncodeDispatchKernelTest, givenNonBindlessOrStatelessArgWhenDispatching
bool requiresUncachedMocs = false;
EncodeDispatchKernelArgs dispatchArgs = createDefaultDispatchKernelArgs(pDevice, dispatchInterface.get(), dims, requiresUncachedMocs);
EncodeDispatchKernel<FamilyType>::encode(*cmdContainer.get(), dispatchArgs);
EncodeDispatchKernel<FamilyType>::template encode<WALKER_TYPE>(*cmdContainer.get(), dispatchArgs);
EXPECT_EQ(pattern, *patchLocation);
@@ -1141,7 +1172,7 @@ HWTEST_F(EncodeDispatchKernelTest, givenNonBindlessOrStatelessArgWhenDispatching
memset(ioh->getCpuBase(), 0, ioh->getMaxAvailableSpace());
dispatchArgs.dispatchInterface = dispatchInterface.get();
EncodeDispatchKernel<FamilyType>::encode(*cmdContainer.get(), dispatchArgs);
EncodeDispatchKernel<FamilyType>::template encode<WALKER_TYPE>(*cmdContainer.get(), dispatchArgs);
EXPECT_TRUE(memoryZeroed(ptrOffset(ioh->getCpuBase(), iohOffset), ioh->getMaxAvailableSpace() - iohOffset));
}
@@ -1350,7 +1381,7 @@ using BindlessCommandEncodeStatesContainerTest = Test<CommandEncodeStatesFixture
HWTEST_F(BindlessCommandEncodeStatesContainerTest, givenBindlessKernelAndBindlessModeEnabledWhenEncodingKernelThenCmdContainerHasNullptrSSH) {
using BINDING_TABLE_STATE = typename FamilyType::BINDING_TABLE_STATE;
using INTERFACE_DESCRIPTOR_DATA = typename FamilyType::INTERFACE_DESCRIPTOR_DATA;
using WALKER = typename FamilyType::WALKER_TYPE;
using WALKER_TYPE = typename FamilyType::WALKER_TYPE;
DebugManagerStateRestore dbgRestorer;
debugManager.flags.UseBindlessMode.set(1);
debugManager.flags.UseExternalAllocatorForSshAndDsh.set(1);
@@ -1377,7 +1408,7 @@ HWTEST_F(BindlessCommandEncodeStatesContainerTest, givenBindlessKernelAndBindles
EXPECT_EQ(commandContainer->getIndirectHeap(HeapType::SURFACE_STATE), nullptr);
EncodeDispatchKernelArgs dispatchArgs = createDefaultDispatchKernelArgs(pDevice, dispatchInterface.get(), dims, requiresUncachedMocs);
EncodeDispatchKernel<FamilyType>::encode(*commandContainer.get(), dispatchArgs);
EncodeDispatchKernel<FamilyType>::template encode<WALKER_TYPE>(*commandContainer.get(), dispatchArgs);
EXPECT_EQ(commandContainer->getIndirectHeap(HeapType::SURFACE_STATE), nullptr);
}
@@ -1385,7 +1416,7 @@ HWTEST_F(BindlessCommandEncodeStatesContainerTest, givenBindlessKernelAndBindles
HWTEST_F(BindlessCommandEncodeStatesContainerTest, givenBindfulKernelWhenBindlessModeEnabledThenCmdContainerHasSsh) {
using BINDING_TABLE_STATE = typename FamilyType::BINDING_TABLE_STATE;
using INTERFACE_DESCRIPTOR_DATA = typename FamilyType::INTERFACE_DESCRIPTOR_DATA;
using WALKER = typename FamilyType::WALKER_TYPE;
using WALKER_TYPE = typename FamilyType::WALKER_TYPE;
DebugManagerStateRestore dbgRestorer;
debugManager.flags.UseBindlessMode.set(1);
auto commandContainer = std::make_unique<CommandContainer>();
@@ -1414,7 +1445,7 @@ HWTEST_F(BindlessCommandEncodeStatesContainerTest, givenBindfulKernelWhenBindles
EXPECT_NE(commandContainer->getIndirectHeap(HeapType::SURFACE_STATE), nullptr);
EncodeDispatchKernelArgs dispatchArgs = createDefaultDispatchKernelArgs(pDevice, dispatchInterface.get(), dims, requiresUncachedMocs);
EncodeDispatchKernel<FamilyType>::encode(*commandContainer.get(), dispatchArgs);
EncodeDispatchKernel<FamilyType>::template encode<WALKER_TYPE>(*commandContainer.get(), dispatchArgs);
EXPECT_NE(commandContainer->getIndirectHeap(HeapType::SURFACE_STATE), nullptr);
}
@@ -1423,7 +1454,7 @@ using NgenGeneratorDispatchKernelEncodeTest = Test<CommandEncodeStatesFixture>;
HWTEST2_F(NgenGeneratorDispatchKernelEncodeTest, givenBindfulKernelAndIsNotGeneratedByIgcWhenEncodeDispatchKernelThenCmdContainerDoesNotHaveSsh, IsPVC) {
using BINDING_TABLE_STATE = typename FamilyType::BINDING_TABLE_STATE;
using WALKER_TYPE = typename FamilyType::WALKER_TYPE;
for (auto isGeneratedByIgc : {false, true}) {
auto commandContainer = std::make_unique<MyMockCommandContainer>();
commandContainer->initialize(pDevice, nullptr, HeapSize::defaultHeapSize, true, false);
@@ -1453,7 +1484,7 @@ HWTEST2_F(NgenGeneratorDispatchKernelEncodeTest, givenBindfulKernelAndIsNotGener
EncodeDispatchKernelArgs dispatchArgs = createDefaultDispatchKernelArgs(pDevice, dispatchInterface.get(), dims, requiresUncachedMocs);
EncodeDispatchKernel<FamilyType>::encode(*commandContainer.get(), dispatchArgs);
EncodeDispatchKernel<FamilyType>::template encode<WALKER_TYPE>(*commandContainer.get(), dispatchArgs);
if (isGeneratedByIgc) {
EXPECT_NE(commandContainer->getIndirectHeap(HeapType::SURFACE_STATE), nullptr);
@@ -1548,12 +1579,13 @@ HWTEST_F(CommandEncodeStatesTest, givenKernelInfoOfBindlessKernelWhenGettingRequ
}
HWTEST_F(CommandEncodeStatesTest, givenCommandContainerWhenIsKernelDispatchedFromImmediateCmdListTrueThenGetHeapWithRequiredSizeAndAlignmentCalled) {
using WALKER_TYPE = typename FamilyType::WALKER_TYPE;
std::unique_ptr<MockDispatchKernelEncoder> dispatchInterface(new MockDispatchKernelEncoder());
uint32_t dims[] = {1, 1, 1};
bool requiresUncachedMocs = false;
EncodeDispatchKernelArgs dispatchArgs = createDefaultDispatchKernelArgs(pDevice, dispatchInterface.get(), dims, requiresUncachedMocs);
dispatchArgs.isKernelDispatchedFromImmediateCmdList = true;
EncodeDispatchKernel<FamilyType>::encode(*cmdContainer.get(), dispatchArgs);
EncodeDispatchKernel<FamilyType>::template encode<WALKER_TYPE>(*cmdContainer.get(), dispatchArgs);
EXPECT_NE(0u, cmdContainer->getHeapWithRequiredSizeAndAlignmentCalled);
}

View File

@@ -37,7 +37,7 @@ HWTEST2_F(CommandEncodeStatesTestDg2AndLater, givenEventAddressWhenEncodeAndPVCA
dispatchArgs.eventAddress = eventAddress;
dispatchArgs.isTimestampEvent = true;
EncodeDispatchKernel<FamilyType>::encode(*cmdContainer.get(), dispatchArgs);
EncodeDispatchKernel<FamilyType>::template encode<WALKER_TYPE>(*cmdContainer.get(), dispatchArgs);
GenCmdList commands;
CmdParse<FamilyType>::parseCommandBuffer(commands, ptrOffset(cmdContainer->getCommandStream()->getCpuBase(), 0), cmdContainer->getCommandStream()->getUsed());
@@ -62,7 +62,7 @@ HWTEST2_F(CommandEncodeStatesTestDg2AndLater, givenEventAddressWhenEncodeThenMoc
dispatchArgs.isTimestampEvent = true;
dispatchArgs.dcFlushEnable = MemorySynchronizationCommands<FamilyType>::getDcFlushEnable(true, pDevice->getRootDeviceEnvironment());
EncodeDispatchKernel<FamilyType>::encode(*cmdContainer.get(), dispatchArgs);
EncodeDispatchKernel<FamilyType>::template encode<WALKER_TYPE>(*cmdContainer.get(), dispatchArgs);
GenCmdList commands;
CmdParse<FamilyType>::parseCommandBuffer(commands, ptrOffset(cmdContainer->getCommandStream()->getCpuBase(), 0), cmdContainer->getCommandStream()->getUsed());
@@ -131,7 +131,7 @@ HWTEST2_F(CommandEncodeStatesTestDg2AndLater, givenOverridePreferredSlmAllocatio
bool requiresUncachedMocs = false;
EncodeDispatchKernelArgs dispatchArgs = createDefaultDispatchKernelArgs(pDevice, dispatchInterface.get(), dims, requiresUncachedMocs);
EncodeDispatchKernel<FamilyType>::encode(*cmdContainer.get(), dispatchArgs);
EncodeDispatchKernel<FamilyType>::template encode<WALKER_TYPE>(*cmdContainer.get(), dispatchArgs);
GenCmdList commands;
CmdParse<FamilyType>::parseCommandBuffer(commands, ptrOffset(cmdContainer->getCommandStream()->getCpuBase(), 0), cmdContainer->getCommandStream()->getUsed());

View File

@@ -40,7 +40,7 @@ HWTEST2_F(CommandEncodeStatesTestPvcAndLater, givenOverrideSlmTotalSizeDebugVari
cmdContainer->reset();
EncodeDispatchKernelArgs dispatchArgs = createDefaultDispatchKernelArgs(pDevice, dispatchInterface.get(), dims, requiresUncachedMocs);
EncodeDispatchKernel<FamilyType>::encode(*cmdContainer.get(), dispatchArgs);
EncodeDispatchKernel<FamilyType>::template encode<WALKER_TYPE>(*cmdContainer.get(), dispatchArgs);
GenCmdList commands;
CmdParse<FamilyType>::parseCommandBuffer(commands, ptrOffset(cmdContainer->getCommandStream()->getCpuBase(), 0), cmdContainer->getCommandStream()->getUsed());

View File

@@ -48,7 +48,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, CommandEncodeStatesTest, givenSlmTotalSizeGraterTha
bool requiresUncachedMocs = false;
EncodeDispatchKernelArgs dispatchArgs = createDefaultDispatchKernelArgs(pDevice, dispatchInterface.get(), dims, requiresUncachedMocs);
EncodeDispatchKernel<FamilyType>::encode(*cmdContainer.get(), dispatchArgs);
EncodeDispatchKernel<FamilyType>::template encode<WALKER_TYPE>(*cmdContainer.get(), dispatchArgs);
GenCmdList commands;
CmdParse<FamilyType>::parseCommandBuffer(commands, ptrOffset(cmdContainer->getCommandStream()->getCpuBase(), 0), cmdContainer->getCommandStream()->getUsed());
@@ -75,7 +75,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, CommandEncodeStatesTest, givenXeHpAndLaterWhenDispa
EncodeDispatchKernelArgs dispatchArgs = createDefaultDispatchKernelArgs(pDevice, dispatchInterface.get(), dims, false);
EncodeDispatchKernel<FamilyType>::encode(*cmdContainer.get(), dispatchArgs);
EncodeDispatchKernel<FamilyType>::template encode<WALKER_TYPE>(*cmdContainer.get(), dispatchArgs);
GenCmdList commands;
CmdParse<FamilyType>::parseCommandBuffer(commands, ptrOffset(cmdContainer->getCommandStream()->getCpuBase(), 0), cmdContainer->getCommandStream()->getUsed());
@@ -102,7 +102,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, CommandEncodeStatesTest, givenXeHpDebuggingEnabledA
EncodeDispatchKernelArgs dispatchArgs = createDefaultDispatchKernelArgs(pDevice, dispatchInterface.get(), dims, false);
EncodeDispatchKernel<FamilyType>::encode(*cmdContainer.get(), dispatchArgs);
EncodeDispatchKernel<FamilyType>::template encode<WALKER_TYPE>(*cmdContainer.get(), dispatchArgs);
GenCmdList commands;
CmdParse<FamilyType>::parseCommandBuffer(commands, ptrOffset(cmdContainer->getCommandStream()->getCpuBase(), 0), cmdContainer->getCommandStream()->getUsed());
@@ -126,7 +126,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, CommandEncodeStatesTest, givenSimdSizeWhenDispatchi
bool requiresUncachedMocs = false;
EncodeDispatchKernelArgs dispatchArgs = createDefaultDispatchKernelArgs(pDevice, dispatchInterface.get(), dims, requiresUncachedMocs);
EncodeDispatchKernel<FamilyType>::encode(*cmdContainer.get(), dispatchArgs);
EncodeDispatchKernel<FamilyType>::template encode<WALKER_TYPE>(*cmdContainer.get(), dispatchArgs);
GenCmdList commands;
CmdParse<FamilyType>::parseCommandBuffer(commands, ptrOffset(cmdContainer->getCommandStream()->getCpuBase(), 0), cmdContainer->getCommandStream()->getUsed());
@@ -149,7 +149,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, CommandEncodeStatesTest, givenSlmTotalSizeEqualZero
bool requiresUncachedMocs = false;
EncodeDispatchKernelArgs dispatchArgs = createDefaultDispatchKernelArgs(pDevice, dispatchInterface.get(), dims, requiresUncachedMocs);
EncodeDispatchKernel<FamilyType>::encode(*cmdContainer.get(), dispatchArgs);
EncodeDispatchKernel<FamilyType>::template encode<WALKER_TYPE>(*cmdContainer.get(), dispatchArgs);
GenCmdList commands;
CmdParse<FamilyType>::parseCommandBuffer(commands, ptrOffset(cmdContainer->getCommandStream()->getCpuBase(), 0), cmdContainer->getCommandStream()->getUsed());
@@ -183,7 +183,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, CommandEncodeStatesTest, givenOverrideSlmTotalSizeD
cmdContainer->reset();
EncodeDispatchKernelArgs dispatchArgs = createDefaultDispatchKernelArgs(pDevice, dispatchInterface.get(), dims, requiresUncachedMocs);
EncodeDispatchKernel<FamilyType>::encode(*cmdContainer.get(), dispatchArgs);
EncodeDispatchKernel<FamilyType>::template encode<WALKER_TYPE>(*cmdContainer.get(), dispatchArgs);
GenCmdList commands;
CmdParse<FamilyType>::parseCommandBuffer(commands, ptrOffset(cmdContainer->getCommandStream()->getCpuBase(), 0), cmdContainer->getCommandStream()->getUsed());
@@ -221,7 +221,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, CommandEncodeStatesTest, givenStatelessBufferAndIma
bool requiresUncachedMocs = false;
EncodeDispatchKernelArgs dispatchArgs = createDefaultDispatchKernelArgs(pDevice, dispatchInterface.get(), dims, requiresUncachedMocs);
EncodeDispatchKernel<FamilyType>::encode(*cmdContainer.get(), dispatchArgs);
EncodeDispatchKernel<FamilyType>::template encode<WALKER_TYPE>(*cmdContainer.get(), dispatchArgs);
GenCmdList commands;
CmdParse<FamilyType>::parseCommandBuffer(commands, ptrOffset(cmdContainer->getCommandStream()->getCpuBase(), 0), cmdContainer->getCommandStream()->getUsed());
@@ -258,7 +258,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, CommandEncodeStatesTest, givennumBindingTableOneWhe
bool requiresUncachedMocs = false;
EncodeDispatchKernelArgs dispatchArgs = createDefaultDispatchKernelArgs(pDevice, dispatchInterface.get(), dims, requiresUncachedMocs);
EncodeDispatchKernel<FamilyType>::encode(*cmdContainer.get(), dispatchArgs);
EncodeDispatchKernel<FamilyType>::template encode<WALKER_TYPE>(*cmdContainer.get(), dispatchArgs);
GenCmdList commands;
CmdParse<FamilyType>::parseCommandBuffer(commands, ptrOffset(cmdContainer->getCommandStream()->getCpuBase(), 0), cmdContainer->getCommandStream()->getUsed());
@@ -295,7 +295,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, CommandEncodeStatesTest, giveNumBindingTableZeroWhe
bool requiresUncachedMocs = false;
EncodeDispatchKernelArgs dispatchArgs = createDefaultDispatchKernelArgs(pDevice, dispatchInterface.get(), dims, requiresUncachedMocs);
EncodeDispatchKernel<FamilyType>::encode(*cmdContainer.get(), dispatchArgs);
EncodeDispatchKernel<FamilyType>::template encode<WALKER_TYPE>(*cmdContainer.get(), dispatchArgs);
GenCmdList commands;
CmdParse<FamilyType>::parseCommandBuffer(commands, ptrOffset(cmdContainer->getCommandStream()->getCpuBase(), 0), cmdContainer->getCommandStream()->getUsed());
@@ -350,7 +350,7 @@ HWTEST2_F(CommandEncodeStatesTest, giveNumSamplersOneWhenDispatchKernelThensampl
dispatchArgs.surfaceStateHeap = cmdContainer->getIndirectHeap(HeapType::SURFACE_STATE);
dispatchArgs.dynamicStateHeap = cmdContainer->getIndirectHeap(HeapType::DYNAMIC_STATE);
EncodeDispatchKernel<FamilyType>::encode(*cmdContainer.get(), dispatchArgs);
EncodeDispatchKernel<FamilyType>::template encode<WALKER_TYPE>(*cmdContainer.get(), dispatchArgs);
GenCmdList commands;
CmdParse<FamilyType>::parseCommandBuffer(commands, ptrOffset(cmdContainer->getCommandStream()->getCpuBase(), 0), cmdContainer->getCommandStream()->getUsed());
@@ -382,7 +382,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, CommandEncodeStatesTest, givenEventAllocationWhenDi
dispatchArgs.eventAddress = eventAddress;
dispatchArgs.isTimestampEvent = true;
EncodeDispatchKernel<FamilyType>::encode(*cmdContainer.get(), dispatchArgs);
EncodeDispatchKernel<FamilyType>::template encode<WALKER_TYPE>(*cmdContainer.get(), dispatchArgs);
GenCmdList commands;
CmdParse<FamilyType>::parseCommandBuffer(commands, ptrOffset(cmdContainer->getCommandStream()->getCpuBase(), 0), cmdContainer->getCommandStream()->getUsed());
@@ -407,7 +407,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, CommandEncodeStatesTest, givenEventAddressWhenEncod
dispatchArgs.isTimestampEvent = true;
dispatchArgs.dcFlushEnable = MemorySynchronizationCommands<FamilyType>::getDcFlushEnable(true, pDevice->getRootDeviceEnvironment());
EncodeDispatchKernel<FamilyType>::encode(*cmdContainer.get(), dispatchArgs);
EncodeDispatchKernel<FamilyType>::template encode<WALKER_TYPE>(*cmdContainer.get(), dispatchArgs);
GenCmdList commands;
CmdParse<FamilyType>::parseCommandBuffer(commands, ptrOffset(cmdContainer->getCommandStream()->getCpuBase(), 0), cmdContainer->getCommandStream()->getUsed());
@@ -425,6 +425,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, CommandEncodeStatesTest, givenEventAddressWhenEncod
HWCMDTEST_F(IGFX_XE_HP_CORE, CommandEncodeStatesTest, givenCleanHeapsWhenDispatchKernelThenFlushNotAdded) {
using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL;
using WALKER_TYPE = typename FamilyType::WALKER_TYPE;
uint32_t dims[] = {2, 1, 1};
std::unique_ptr<MockDispatchKernelEncoder> dispatchInterface(new MockDispatchKernelEncoder());
cmdContainer->slmSizeRef() = 1;
@@ -433,7 +434,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, CommandEncodeStatesTest, givenCleanHeapsWhenDispatc
bool requiresUncachedMocs = false;
EncodeDispatchKernelArgs dispatchArgs = createDefaultDispatchKernelArgs(pDevice, dispatchInterface.get(), dims, requiresUncachedMocs);
EncodeDispatchKernel<FamilyType>::encode(*cmdContainer.get(), dispatchArgs);
EncodeDispatchKernel<FamilyType>::template encode<WALKER_TYPE>(*cmdContainer.get(), dispatchArgs);
GenCmdList commands;
CmdParse<FamilyType>::parseCommandBuffer(commands, ptrOffset(cmdContainer->getCommandStream()->getCpuBase(), 0), cmdContainer->getCommandStream()->getUsed());
@@ -474,7 +475,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, CommandEncodeStatesTest, givenForceBtpPrefetchModeD
bool requiresUncachedMocs = false;
EncodeDispatchKernelArgs dispatchArgs = createDefaultDispatchKernelArgs(pDevice, dispatchInterface.get(), dims, requiresUncachedMocs);
EncodeDispatchKernel<FamilyType>::encode(*cmdContainer.get(), dispatchArgs);
EncodeDispatchKernel<FamilyType>::template encode<WALKER_TYPE>(*cmdContainer.get(), dispatchArgs);
GenCmdList commands;
CmdParse<FamilyType>::parseCommandBuffer(commands, ptrOffset(cmdContainer->getCommandStream()->getCpuBase(), 0), cmdContainer->getCommandStream()->getUsed());
@@ -508,7 +509,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, CommandEncodeStatesTest, givenForceBtpPrefetchModeD
bool requiresUncachedMocs = false;
EncodeDispatchKernelArgs dispatchArgs = createDefaultDispatchKernelArgs(pDevice, dispatchInterface.get(), dims, requiresUncachedMocs);
EncodeDispatchKernel<FamilyType>::encode(*cmdContainer.get(), dispatchArgs);
EncodeDispatchKernel<FamilyType>::template encode<WALKER_TYPE>(*cmdContainer.get(), dispatchArgs);
GenCmdList commands;
CmdParse<FamilyType>::parseCommandBuffer(commands, ptrOffset(cmdContainer->getCommandStream()->getCpuBase(), 0), cmdContainer->getCommandStream()->getUsed());
@@ -533,7 +534,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, CommandEncodeStatesTest, givenForceBtpPrefetchModeD
bool requiresUncachedMocs = false;
EncodeDispatchKernelArgs dispatchArgs = createDefaultDispatchKernelArgs(pDevice, dispatchInterface.get(), dims, requiresUncachedMocs);
EncodeDispatchKernel<FamilyType>::encode(*cmdContainer.get(), dispatchArgs);
EncodeDispatchKernel<FamilyType>::template encode<WALKER_TYPE>(*cmdContainer.get(), dispatchArgs);
GenCmdList commands;
CmdParse<FamilyType>::parseCommandBuffer(commands, ptrOffset(cmdContainer->getCommandStream()->getCpuBase(), 0), cmdContainer->getCommandStream()->getUsed());
@@ -580,7 +581,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, CommandEncodeStatesTest, givenInlineDataRequiredWhe
bool requiresUncachedMocs = false;
EncodeDispatchKernelArgs dispatchArgs = createDefaultDispatchKernelArgs(pDevice, dispatchInterface.get(), dims, requiresUncachedMocs);
EncodeDispatchKernel<FamilyType>::encode(*cmdContainer.get(), dispatchArgs);
EncodeDispatchKernel<FamilyType>::template encode<WALKER_TYPE>(*cmdContainer.get(), dispatchArgs);
GenCmdList commands;
CmdParse<FamilyType>::parseCommandBuffer(commands, ptrOffset(cmdContainer->getCommandStream()->getCpuBase(), 0), cmdContainer->getCommandStream()->getUsed());
@@ -607,7 +608,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, CommandEncodeStatesTest, givenInlineDataRequiredIsF
bool requiresUncachedMocs = false;
EncodeDispatchKernelArgs dispatchArgs = createDefaultDispatchKernelArgs(pDevice, dispatchInterface.get(), dims, requiresUncachedMocs);
EncodeDispatchKernel<FamilyType>::encode(*cmdContainer.get(), dispatchArgs);
EncodeDispatchKernel<FamilyType>::template encode<WALKER_TYPE>(*cmdContainer.get(), dispatchArgs);
GenCmdList commands;
CmdParse<FamilyType>::parseCommandBuffer(commands, ptrOffset(cmdContainer->getCommandStream()->getCpuBase(), 0), cmdContainer->getCommandStream()->getUsed());
@@ -634,7 +635,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, CommandEncodeStatesTest, givenInlineDataRequiredAnd
bool requiresUncachedMocs = false;
EncodeDispatchKernelArgs dispatchArgs = createDefaultDispatchKernelArgs(pDevice, dispatchInterface.get(), dims, requiresUncachedMocs);
EncodeDispatchKernel<FamilyType>::encode(*cmdContainer.get(), dispatchArgs);
EncodeDispatchKernel<FamilyType>::template encode<WALKER_TYPE>(*cmdContainer.get(), dispatchArgs);
GenCmdList commands;
CmdParse<FamilyType>::parseCommandBuffer(commands, ptrOffset(cmdContainer->getCommandStream()->getCpuBase(), 0), cmdContainer->getCommandStream()->getUsed());
@@ -663,7 +664,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, CommandEncodeStatesTest, givenInlineDataRequiredAnd
bool requiresUncachedMocs = false;
EncodeDispatchKernelArgs dispatchArgs = createDefaultDispatchKernelArgs(pDevice, dispatchInterface.get(), dims, requiresUncachedMocs);
EncodeDispatchKernel<FamilyType>::encode(*cmdContainer.get(), dispatchArgs);
EncodeDispatchKernel<FamilyType>::template encode<WALKER_TYPE>(*cmdContainer.get(), dispatchArgs);
GenCmdList commands;
CmdParse<FamilyType>::parseCommandBuffer(commands, ptrOffset(cmdContainer->getCommandStream()->getCpuBase(), 0), cmdContainer->getCommandStream()->getUsed());
@@ -1071,7 +1072,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, CommandEncodeStatesImplicitScaling,
dispatchArgs.eventAddress = eventAddress;
dispatchArgs.partitionCount = 2;
EncodeDispatchKernel<FamilyType>::encode(*cmdContainer.get(), dispatchArgs);
EncodeDispatchKernel<FamilyType>::template encode<WALKER_TYPE>(*cmdContainer.get(), dispatchArgs);
size_t usedBuffer = cmdContainer->getCommandStream()->getUsed();
EXPECT_EQ(2u, dispatchArgs.partitionCount);
@@ -1104,7 +1105,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, CommandEncodeStatesImplicitScaling, givenCooperativ
dispatchArgs.isInternal = isInternal;
dispatchArgs.isCooperative = isCooperative;
dispatchArgs.partitionCount = 2;
EncodeDispatchKernel<FamilyType>::encode(*cmdContainer.get(), dispatchArgs);
EncodeDispatchKernel<FamilyType>::template encode<WALKER_TYPE>(*cmdContainer.get(), dispatchArgs);
size_t containerUsedAfterBase = cmdContainer->getCommandStream()->getUsed();
@@ -1146,7 +1147,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, CommandEncodeStatesDynamicImplicitScaling, givenImp
EncodeDispatchKernelArgs dispatchArgs = createDefaultDispatchKernelArgs(pDevice, dispatchInterface.get(), dims, requiresUncachedMocs);
dispatchArgs.isInternal = isInternal;
EncodeDispatchKernel<FamilyType>::encode(*cmdContainer.get(), dispatchArgs);
EncodeDispatchKernel<FamilyType>::template encode<WALKER_TYPE>(*cmdContainer.get(), dispatchArgs);
size_t containerUsedAfterBase = cmdContainer->getCommandStream()->getUsed();
@@ -1159,7 +1160,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, CommandEncodeStatesDynamicImplicitScaling, givenImp
EXPECT_EQ(16u, baseWalkerCmd->getThreadGroupIdXDimension());
dispatchArgs.partitionCount = 2;
EncodeDispatchKernel<FamilyType>::encode(*cmdContainer.get(), dispatchArgs);
EncodeDispatchKernel<FamilyType>::template encode<WALKER_TYPE>(*cmdContainer.get(), dispatchArgs);
size_t total = cmdContainer->getCommandStream()->getUsed();
size_t partitionedWalkerSize = total - containerUsedAfterBase;
@@ -1210,7 +1211,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, CommandEncodeStatesDynamicImplicitScaling, givenImp
dispatchArgs.partitionCount = 2;
dispatchArgs.dcFlushEnable = MemorySynchronizationCommands<FamilyType>::getDcFlushEnable(true, pDevice->getRootDeviceEnvironment());
EncodeDispatchKernel<FamilyType>::encode(*cmdContainer.get(), dispatchArgs);
EncodeDispatchKernel<FamilyType>::template encode<WALKER_TYPE>(*cmdContainer.get(), dispatchArgs);
EXPECT_EQ(2u, dispatchArgs.partitionCount);
size_t partitionedWalkerSize = cmdContainer->getCommandStream()->getUsed();
@@ -1309,7 +1310,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, CommandEncodeStatesDynamicImplicitScaling,
dispatchArgs.isInternal = isInternal;
dispatchArgs.partitionCount = 2;
EncodeDispatchKernel<FamilyType>::encode(*cmdContainer.get(), dispatchArgs);
EncodeDispatchKernel<FamilyType>::template encode<WALKER_TYPE>(*cmdContainer.get(), dispatchArgs);
EXPECT_EQ(2u, dispatchArgs.partitionCount);
size_t partitionedWalkerSize = cmdContainer->getCommandStream()->getUsed();
@@ -1367,7 +1368,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, CommandEncodeStatesDynamicImplicitScaling, givenImp
dispatchArgs.isInternal = isInternal;
dispatchArgs.partitionCount = 2;
EncodeDispatchKernel<FamilyType>::encode(*cmdContainer.get(), dispatchArgs);
EncodeDispatchKernel<FamilyType>::template encode<WALKER_TYPE>(*cmdContainer.get(), dispatchArgs);
size_t internalWalkerSize = cmdContainer->getCommandStream()->getUsed();
@@ -1394,7 +1395,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, CommandEncodeStatesTest, givenNonTimestampEventWhen
dispatchArgs.eventAddress = eventAddress;
dispatchArgs.isTimestampEvent = true;
EncodeDispatchKernel<FamilyType>::encode(*cmdContainer.get(), dispatchArgs);
EncodeDispatchKernel<FamilyType>::template encode<WALKER_TYPE>(*cmdContainer.get(), dispatchArgs);
GenCmdList commands;
CmdParse<FamilyType>::parseCommandBuffer(commands, ptrOffset(cmdContainer->getCommandStream()->getCpuBase(), 0), cmdContainer->getCommandStream()->getUsed());
@@ -1408,6 +1409,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, CommandEncodeStatesTest, givenNonTimestampEventWhen
HWTEST2_F(CommandEncodeStatesTest,
givenDispatchInterfaceWhenDpasRequiredIsNotDefaultThenPipelineSelectCommandAdded, IsWithinXeGfxFamily) {
using WALKER_TYPE = typename FamilyType::WALKER_TYPE;
using PIPELINE_SELECT = typename FamilyType::PIPELINE_SELECT;
uint32_t dims[] = {2, 1, 1};
std::unique_ptr<MockDispatchKernelEncoder> dispatchInterface(new MockDispatchKernelEncoder());
@@ -1420,7 +1422,7 @@ HWTEST2_F(CommandEncodeStatesTest,
bool requiresUncachedMocs = false;
EncodeDispatchKernelArgs dispatchArgs = createDefaultDispatchKernelArgs(pDevice, dispatchInterface.get(), dims, requiresUncachedMocs);
EncodeDispatchKernel<FamilyType>::encode(*cmdContainer.get(), dispatchArgs);
EncodeDispatchKernel<FamilyType>::template encode<WALKER_TYPE>(*cmdContainer.get(), dispatchArgs);
GenCmdList commands;
CmdParse<FamilyType>::parseCommandBuffer(commands,
@@ -1442,7 +1444,7 @@ HWTEST2_F(CommandEncodeStatesTest,
HWTEST2_F(CommandEncodeStatesTest,
givenDebugVariableWhenEncodeStateIsCalledThenSystolicValueIsOverwritten, IsWithinXeGfxFamily) {
DebugManagerStateRestore restorer;
using WALKER_TYPE = typename FamilyType::WALKER_TYPE;
using PIPELINE_SELECT = typename FamilyType::PIPELINE_SELECT;
uint32_t dims[] = {2, 1, 1};
std::unique_ptr<MockDispatchKernelEncoder> dispatchInterface(new MockDispatchKernelEncoder());
@@ -1455,7 +1457,7 @@ HWTEST2_F(CommandEncodeStatesTest,
bool requiresUncachedMocs = false;
EncodeDispatchKernelArgs dispatchArgs = createDefaultDispatchKernelArgs(pDevice, dispatchInterface.get(), dims, requiresUncachedMocs);
EncodeDispatchKernel<FamilyType>::encode(*cmdContainer.get(), dispatchArgs);
EncodeDispatchKernel<FamilyType>::template encode<WALKER_TYPE>(*cmdContainer.get(), dispatchArgs);
GenCmdList commands;
CmdParse<FamilyType>::parseCommandBuffer(commands,
@@ -1477,6 +1479,7 @@ HWTEST2_F(CommandEncodeStatesTest,
HWCMDTEST_F(IGFX_XE_HP_CORE, CommandEncodeStatesTest,
givenDispatchInterfaceWhenDpasRequiredIsSameAsDefaultThenPipelineSelectCommandNotAdded) {
using PIPELINE_SELECT = typename FamilyType::PIPELINE_SELECT;
using WALKER_TYPE = typename FamilyType::WALKER_TYPE;
uint32_t dims[] = {2, 1, 1};
std::unique_ptr<MockDispatchKernelEncoder> dispatchInterface(new MockDispatchKernelEncoder());
@@ -1487,7 +1490,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, CommandEncodeStatesTest,
bool requiresUncachedMocs = false;
EncodeDispatchKernelArgs dispatchArgs = createDefaultDispatchKernelArgs(pDevice, dispatchInterface.get(), dims, requiresUncachedMocs);
EncodeDispatchKernel<FamilyType>::encode(*cmdContainer.get(), dispatchArgs);
EncodeDispatchKernel<FamilyType>::template encode<WALKER_TYPE>(*cmdContainer.get(), dispatchArgs);
GenCmdList commands;
CmdParse<FamilyType>::parseCommandBuffer(commands,
@@ -1513,7 +1516,7 @@ struct CommandEncodeStatesImplicitScalingPrimaryBufferFixture : public CommandEn
template <typename FamilyType>
void testBodyFindPrimaryBatchBuffer() {
using MI_BATCH_BUFFER_START = typename FamilyType::MI_BATCH_BUFFER_START;
using WALKER_TYPE = typename FamilyType::WALKER_TYPE;
constexpr bool expectPrimary = flushTaskUsedForImmediate || usePrimaryBuffer;
uint32_t dims[] = {16, 1, 1};
@@ -1525,7 +1528,7 @@ struct CommandEncodeStatesImplicitScalingPrimaryBufferFixture : public CommandEn
dispatchArgs.eventAddress = eventAddress;
dispatchArgs.partitionCount = 2;
EncodeDispatchKernel<FamilyType>::encode(*BaseClass::cmdContainer.get(), dispatchArgs);
EncodeDispatchKernel<FamilyType>::template encode<WALKER_TYPE>(*BaseClass::cmdContainer.get(), dispatchArgs);
size_t usedBuffer = BaseClass::cmdContainer->getCommandStream()->getUsed();
EXPECT_EQ(2u, dispatchArgs.partitionCount);

View File

@@ -296,6 +296,7 @@ HWTEST2_F(EncodeConditionalBatchBufferStartTest, whenProgrammingConditionalRegRe
using CommandEncodeStatesXeHpcAndLaterTests = Test<CommandEncodeStatesFixture>;
HWTEST2_F(CommandEncodeStatesXeHpcAndLaterTests, givenDebugFlagSetWhenProgrammingWalkerThenSetFlushingBits, IsAtLeastXeHpcCore) {
using WALKER_TYPE = typename FamilyType::WALKER_TYPE;
DebugManagerStateRestore restore;
debugManager.flags.ForceComputeWalkerPostSyncFlush.set(1);
@@ -304,12 +305,11 @@ HWTEST2_F(CommandEncodeStatesXeHpcAndLaterTests, givenDebugFlagSetWhenProgrammin
bool requiresUncachedMocs = false;
EncodeDispatchKernelArgs dispatchArgs = createDefaultDispatchKernelArgs(pDevice, dispatchInterface.get(), dims, requiresUncachedMocs);
EncodeDispatchKernel<FamilyType>::encode(*cmdContainer.get(), dispatchArgs);
EncodeDispatchKernel<FamilyType>::template encode<WALKER_TYPE>(*cmdContainer.get(), dispatchArgs);
GenCmdList commands;
CmdParse<FamilyType>::parseCommandBuffer(commands, ptrOffset(cmdContainer->getCommandStream()->getCpuBase(), 0), cmdContainer->getCommandStream()->getUsed());
using WALKER_TYPE = typename FamilyType::WALKER_TYPE;
auto itor = find<WALKER_TYPE *>(commands.begin(), commands.end());
ASSERT_NE(itor, commands.end());

View File

@@ -63,6 +63,7 @@ PVCTEST_F(CommandEncodeStatesPvcTest, GivenSmallSlmTotalSizesWhenSetAdditionalIn
using EncodeKernelPvcTest = Test<CommandEncodeStatesFixture>;
PVCTEST_F(EncodeKernelPvcTest, givenRevisionBAndAboveWhenSpecialModeRequiredThenDontReprogramPipelineSelect) {
using WALKER_TYPE = typename FamilyType::WALKER_TYPE;
bool requiresUncachedMocs = false;
auto hwInfo = pDevice->getRootDeviceEnvironment().getMutableHardwareInfo();
@@ -92,7 +93,7 @@ PVCTEST_F(EncodeKernelPvcTest, givenRevisionBAndAboveWhenSpecialModeRequiredThen
EncodeDispatchKernelArgs dispatchArgs = createDefaultDispatchKernelArgs(pDevice, dispatchInterface.get(), dims, requiresUncachedMocs);
dispatchArgs.preemptionMode = NEO::PreemptionMode::Initial;
EncodeDispatchKernel<FamilyType>::encode(*cmdContainer.get(), dispatchArgs);
EncodeDispatchKernel<FamilyType>::template encode<WALKER_TYPE>(*cmdContainer.get(), dispatchArgs);
EXPECT_EQ(testInput.expectedValue, cmdContainer->lastPipelineSelectModeRequiredRef());
}
}
@@ -195,7 +196,7 @@ PVCTEST_F(EncodeKernelPvcTest, givenDefaultSettingForFenceAsPostSyncOperationInC
dispatchArgs.isKernelUsingSystemAllocation = true;
dispatchArgs.isHostScopeSignalEvent = true;
EncodeDispatchKernel<FamilyType>::encode(*cmdContainer.get(), dispatchArgs);
EncodeDispatchKernel<FamilyType>::template encode<WALKER_TYPE>(*cmdContainer.get(), dispatchArgs);
GenCmdList commands;
CmdParse<FamilyType>::parseCommandBuffer(commands, ptrOffset(cmdContainer->getCommandStream()->getCpuBase(), 0), cmdContainer->getCommandStream()->getUsed());

View File

@@ -319,7 +319,7 @@ XE_HPC_CORETEST_F(EncodeKernelXeHpcCoreTest, givenNoFenceAsPostSyncOperationInCo
bool requiresUncachedMocs = false;
EncodeDispatchKernelArgs dispatchArgs = createDefaultDispatchKernelArgs(pDevice, dispatchInterface.get(), dims, requiresUncachedMocs);
EncodeDispatchKernel<FamilyType>::encode(*cmdContainer.get(), dispatchArgs);
EncodeDispatchKernel<FamilyType>::template encode<WALKER_TYPE>(*cmdContainer.get(), dispatchArgs);
GenCmdList commands;
CmdParse<FamilyType>::parseCommandBuffer(commands, ptrOffset(cmdContainer->getCommandStream()->getCpuBase(), 0), cmdContainer->getCommandStream()->getUsed());
@@ -345,7 +345,7 @@ XE_HPC_CORETEST_F(EncodeKernelXeHpcCoreTest, givenFenceAsPostSyncOperationInComp
bool requiresUncachedMocs = false;
EncodeDispatchKernelArgs dispatchArgs = createDefaultDispatchKernelArgs(pDevice, dispatchInterface.get(), dims, requiresUncachedMocs);
EncodeDispatchKernel<FamilyType>::encode(*cmdContainer.get(), dispatchArgs);
EncodeDispatchKernel<FamilyType>::template encode<WALKER_TYPE>(*cmdContainer.get(), dispatchArgs);
GenCmdList commands;
CmdParse<FamilyType>::parseCommandBuffer(commands, ptrOffset(cmdContainer->getCommandStream()->getCpuBase(), 0), cmdContainer->getCommandStream()->getUsed());
@@ -378,7 +378,7 @@ XE_HPC_CORETEST_F(EncodeKernelXeHpcCoreTest, givenDefaultSettingForFenceWhenKern
EncodeDispatchKernelArgs dispatchArgs = createDefaultDispatchKernelArgs(pDevice, dispatchInterface.get(), dims, requiresUncachedMocs);
dispatchArgs.isKernelUsingSystemAllocation = true;
EncodeDispatchKernel<FamilyType>::encode(*cmdContainer.get(), dispatchArgs);
EncodeDispatchKernel<FamilyType>::template encode<WALKER_TYPE>(*cmdContainer.get(), dispatchArgs);
GenCmdList commands;
CmdParse<FamilyType>::parseCommandBuffer(
@@ -414,7 +414,7 @@ XE_HPC_CORETEST_F(EncodeKernelXeHpcCoreTest, givenDefaultSettingForFenceWhenEven
EncodeDispatchKernelArgs dispatchArgs = createDefaultDispatchKernelArgs(pDevice, dispatchInterface.get(), dims, requiresUncachedMocs);
dispatchArgs.isHostScopeSignalEvent = true;
EncodeDispatchKernel<FamilyType>::encode(*cmdContainer.get(), dispatchArgs);
EncodeDispatchKernel<FamilyType>::template encode<WALKER_TYPE>(*cmdContainer.get(), dispatchArgs);
GenCmdList commands;
CmdParse<FamilyType>::parseCommandBuffer(
@@ -450,7 +450,7 @@ XE_HPC_CORETEST_F(EncodeKernelXeHpcCoreTest, givenDefaultSettingForFenceWhenKern
dispatchArgs.isKernelUsingSystemAllocation = true;
dispatchArgs.isHostScopeSignalEvent = true;
EncodeDispatchKernel<FamilyType>::encode(*cmdContainer.get(), dispatchArgs);
EncodeDispatchKernel<FamilyType>::template encode<WALKER_TYPE>(*cmdContainer.get(), dispatchArgs);
GenCmdList commands;
CmdParse<FamilyType>::parseCommandBuffer(commands, ptrOffset(cmdContainer->getCommandStream()->getCpuBase(), 0), cmdContainer->getCommandStream()->getUsed());
@@ -464,6 +464,7 @@ XE_HPC_CORETEST_F(EncodeKernelXeHpcCoreTest, givenDefaultSettingForFenceWhenKern
}
XE_HPC_CORETEST_F(EncodeKernelXeHpcCoreTest, givenCleanHeapsAndSlmNotChangedAndUncachedMocsRequestedThenSBAIsProgrammedAndMocsAreSet) {
using WALKER_TYPE = typename FamilyType::WALKER_TYPE;
uint32_t dims[] = {2, 1, 1};
std::unique_ptr<MockDispatchKernelEncoder> dispatchInterface(new MockDispatchKernelEncoder());
cmdContainer->slmSizeRef() = 1;
@@ -473,7 +474,7 @@ XE_HPC_CORETEST_F(EncodeKernelXeHpcCoreTest, givenCleanHeapsAndSlmNotChangedAndU
bool requiresUncachedMocs = true;
EncodeDispatchKernelArgs dispatchArgs = createDefaultDispatchKernelArgs(pDevice, dispatchInterface.get(), dims, requiresUncachedMocs);
EncodeDispatchKernel<FamilyType>::encode(*cmdContainer.get(), dispatchArgs);
EncodeDispatchKernel<FamilyType>::template encode<WALKER_TYPE>(*cmdContainer.get(), dispatchArgs);
GenCmdList commands;
CmdParse<FamilyType>::parseCommandBuffer(commands, ptrOffset(cmdContainer->getCommandStream()->getCpuBase(), 0), cmdContainer->getCommandStream()->getUsed());