mirror of
https://github.com/intel/compute-runtime.git
synced 2025-12-19 06:24:51 +08:00
feature: adding flag to block dispatch implicit scaling commands
- this feature is part of making compute walker command view - compute walker is programed for implicit scaling but not dispatched - together with new flag, comes the refactor to reduce number of arguments Related-To: NEO-11972 Signed-off-by: Zbigniew Zdanowicz <zbigniew.zdanowicz@intel.com>
This commit is contained in:
committed by
Compute-Runtime-Automation
parent
2b71ffa7ce
commit
b33fe6ccf1
@@ -157,24 +157,31 @@ inline void HardwareInterface<GfxFamily>::programWalker(
|
||||
if (partitionWalker) {
|
||||
const uint64_t workPartitionAllocationGpuVa = defaultCsr->getWorkPartitionAllocationGpuAddress();
|
||||
uint32_t partitionCount = 0u;
|
||||
RequiredPartitionDim requiredPartitionDim = kernel.usesImages() ? RequiredPartitionDim::x : RequiredPartitionDim::none;
|
||||
|
||||
ImplicitScalingDispatchCommandArgs implicitScalingArgs{
|
||||
workPartitionAllocationGpuVa, // workPartitionAllocationGpuVa
|
||||
&hwInfo, // hwInfo
|
||||
nullptr, // outWalkerPtr
|
||||
requiredPartitionDim, // requiredPartitionDim
|
||||
partitionCount, // partitionCount
|
||||
false, // useSecondaryBatchBuffer
|
||||
false, // apiSelfCleanup
|
||||
queueCsr.getDcFlushSupport(), // dcFlush
|
||||
kernel.isSingleSubdevicePreferred(), // forceExecutionOnSingleTile
|
||||
false}; // blockDispatchToCommandBuffer
|
||||
|
||||
ImplicitScalingDispatch<GfxFamily>::template dispatchCommands<WalkerType>(commandStream,
|
||||
walkerCmd,
|
||||
nullptr,
|
||||
devices,
|
||||
kernel.usesImages() ? RequiredPartitionDim::x : RequiredPartitionDim::none,
|
||||
partitionCount,
|
||||
false,
|
||||
false,
|
||||
queueCsr.getDcFlushSupport(),
|
||||
kernel.isSingleSubdevicePreferred(),
|
||||
workPartitionAllocationGpuVa,
|
||||
hwInfo);
|
||||
implicitScalingArgs);
|
||||
|
||||
if (queueCsr.isStaticWorkPartitioningEnabled()) {
|
||||
queueCsr.setActivePartitions(std::max(queueCsr.getActivePartitions(), partitionCount));
|
||||
queueCsr.setActivePartitions(std::max(queueCsr.getActivePartitions(), implicitScalingArgs.partitionCount));
|
||||
}
|
||||
|
||||
if (timestampPacketNode) {
|
||||
timestampPacketNode->setPacketsUsed(partitionCount);
|
||||
timestampPacketNode->setPacketsUsed(implicitScalingArgs.partitionCount);
|
||||
}
|
||||
} else {
|
||||
auto computeWalkerOnStream = commandStream.getSpaceForCmd<WalkerType>();
|
||||
|
||||
@@ -412,18 +412,23 @@ void EncodeDispatchKernel<Family>::encode(CommandContainer &container, EncodeDis
|
||||
if (args.partitionCount > 1 && !args.isInternal) {
|
||||
const uint64_t workPartitionAllocationGpuVa = args.device->getDefaultEngine().commandStreamReceiver->getWorkPartitionAllocationGpuAddress();
|
||||
|
||||
ImplicitScalingDispatchCommandArgs implicitScalingArgs{
|
||||
workPartitionAllocationGpuVa, // workPartitionAllocationGpuVa
|
||||
&hwInfo, // hwInfo
|
||||
&args.outWalkerPtr, // outWalkerPtr
|
||||
args.requiredPartitionDim, // requiredPartitionDim
|
||||
args.partitionCount, // partitionCount
|
||||
!(container.getFlushTaskUsedForImmediate() || container.isUsingPrimaryBuffer()), // useSecondaryBatchBuffer
|
||||
!args.isKernelDispatchedFromImmediateCmdList, // apiSelfCleanup
|
||||
args.dcFlushEnable, // dcFlush
|
||||
gfxCoreHelper.singleTileExecImplicitScalingRequired(args.isCooperative), // forceExecutionOnSingleTile
|
||||
args.makeCommandView}; // blockDispatchToCommandBuffer
|
||||
|
||||
ImplicitScalingDispatch<Family>::dispatchCommands(*listCmdBufferStream,
|
||||
walkerCmd,
|
||||
&args.outWalkerPtr,
|
||||
args.device->getDeviceBitfield(),
|
||||
args.requiredPartitionDim,
|
||||
args.partitionCount,
|
||||
!(container.getFlushTaskUsedForImmediate() || container.isUsingPrimaryBuffer()),
|
||||
!args.isKernelDispatchedFromImmediateCmdList,
|
||||
args.dcFlushEnable,
|
||||
gfxCoreHelper.singleTileExecImplicitScalingRequired(args.isCooperative),
|
||||
workPartitionAllocationGpuVa,
|
||||
hwInfo);
|
||||
implicitScalingArgs);
|
||||
args.partitionCount = implicitScalingArgs.partitionCount;
|
||||
} else {
|
||||
args.partitionCount = 1;
|
||||
if (!args.makeCommandView) {
|
||||
|
||||
@@ -37,6 +37,21 @@ struct ImplicitScalingHelper {
|
||||
static bool pipeControlBeforeCleanupAtomicSyncRequired();
|
||||
};
|
||||
|
||||
struct ImplicitScalingDispatchCommandArgs {
|
||||
uint64_t workPartitionAllocationGpuVa = 0;
|
||||
const HardwareInfo *hwInfo = nullptr;
|
||||
void **outWalkerPtr = nullptr;
|
||||
|
||||
RequiredPartitionDim requiredPartitionDim = RequiredPartitionDim::none;
|
||||
uint32_t partitionCount = 0;
|
||||
|
||||
bool useSecondaryBatchBuffer = false;
|
||||
bool apiSelfCleanup = false;
|
||||
bool dcFlush = false;
|
||||
bool forceExecutionOnSingleTile = false;
|
||||
bool blockDispatchToCommandBuffer = false;
|
||||
};
|
||||
|
||||
template <typename GfxFamily>
|
||||
struct ImplicitScalingDispatch {
|
||||
using DefaultWalkerType = typename GfxFamily::DefaultWalkerType;
|
||||
@@ -51,16 +66,8 @@ struct ImplicitScalingDispatch {
|
||||
template <typename WalkerType>
|
||||
static void dispatchCommands(LinearStream &commandStream,
|
||||
WalkerType &walkerCmd,
|
||||
void **outWalkerPtr,
|
||||
const DeviceBitfield &devices,
|
||||
RequiredPartitionDim requiredPartitionDim,
|
||||
uint32_t &partitionCount,
|
||||
bool useSecondaryBatchBuffer,
|
||||
bool apiSelfCleanup,
|
||||
bool dcFlush,
|
||||
bool forceExecutionOnSingleTile,
|
||||
uint64_t workPartitionAllocationGpuVa,
|
||||
const HardwareInfo &hwInfo);
|
||||
ImplicitScalingDispatchCommandArgs &dispatchCommandArgs);
|
||||
|
||||
static bool &getPipeControlStallRequired();
|
||||
|
||||
|
||||
@@ -18,9 +18,7 @@ size_t ImplicitScalingDispatch<GfxFamily>::getSize(bool apiSelfCleanup, bool pre
|
||||
|
||||
template <typename GfxFamily>
|
||||
template <typename WalkerType>
|
||||
void ImplicitScalingDispatch<GfxFamily>::dispatchCommands(LinearStream &commandStream, WalkerType &walkerCmd, void **outWalkerPtr, const DeviceBitfield &devices, NEO::RequiredPartitionDim requiredPartitionDim,
|
||||
uint32_t &partitionCount, bool useSecondaryBatchBuffer, bool apiSelfCleanup, bool dcFlush, bool forceExecutionOnSingleTile, uint64_t workPartitionAllocationGpuVa,
|
||||
const HardwareInfo &hwInfo) {
|
||||
void ImplicitScalingDispatch<GfxFamily>::dispatchCommands(LinearStream &commandStream, WalkerType &walkerCmd, const DeviceBitfield &devices, ImplicitScalingDispatchCommandArgs &dispatchCommandArgs) {
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
@@ -74,10 +72,7 @@ template <>
|
||||
bool ImplicitScalingDispatch<Family>::pipeControlStallRequired = true;
|
||||
|
||||
template struct ImplicitScalingDispatch<Family>;
|
||||
template void ImplicitScalingDispatch<Family>::dispatchCommands<Family::DefaultWalkerType>(LinearStream &commandStream, Family::DefaultWalkerType &walkerCmd, void **outWalkerPtr,
|
||||
const DeviceBitfield &devices, RequiredPartitionDim requiredPartitionDim, uint32_t &partitionCount,
|
||||
bool useSecondaryBatchBuffer, bool apiSelfCleanup, bool dcFlush, bool forceExecutionOnSingleTile,
|
||||
uint64_t workPartitionAllocationGpuVa, const HardwareInfo &hwInfo);
|
||||
template void ImplicitScalingDispatch<Family>::dispatchCommands<Family::DefaultWalkerType>(LinearStream &commandStream, Family::DefaultWalkerType &walkerCmd, const DeviceBitfield &devices, ImplicitScalingDispatchCommandArgs &dispatchCommandArgs);
|
||||
template size_t ImplicitScalingDispatch<Family>::getSize<Family::DefaultWalkerType>(bool apiSelfCleanup, bool preferStaticPartitioning, const DeviceBitfield &devices, const Vec3<size_t> &groupStart, const Vec3<size_t> &groupCount);
|
||||
|
||||
} // namespace NEO
|
||||
|
||||
@@ -17,23 +17,18 @@
|
||||
namespace NEO {
|
||||
|
||||
template <typename GfxFamily>
|
||||
WalkerPartition::WalkerPartitionArgs prepareWalkerPartitionArgs(uint64_t workPartitionAllocationGpuVa,
|
||||
WalkerPartition::WalkerPartitionArgs prepareWalkerPartitionArgs(ImplicitScalingDispatchCommandArgs &dispatchCommandArgs,
|
||||
uint32_t tileCount,
|
||||
uint32_t partitionCount,
|
||||
bool emitSelfCleanup,
|
||||
bool preferStaticPartitioning,
|
||||
bool staticPartitioning,
|
||||
bool useSecondaryBatchBuffer,
|
||||
bool dcFlush,
|
||||
bool forceExecutionOnSingleTile) {
|
||||
bool staticPartitioning) {
|
||||
WalkerPartition::WalkerPartitionArgs args = {};
|
||||
|
||||
args.workPartitionAllocationGpuVa = workPartitionAllocationGpuVa;
|
||||
args.partitionCount = partitionCount;
|
||||
args.workPartitionAllocationGpuVa = dispatchCommandArgs.workPartitionAllocationGpuVa;
|
||||
args.partitionCount = dispatchCommandArgs.partitionCount;
|
||||
args.tileCount = tileCount;
|
||||
args.staticPartitioning = staticPartitioning;
|
||||
args.preferredStaticPartitioning = preferStaticPartitioning;
|
||||
args.forceExecutionOnSingleTile = forceExecutionOnSingleTile;
|
||||
args.forceExecutionOnSingleTile = dispatchCommandArgs.forceExecutionOnSingleTile;
|
||||
|
||||
args.useAtomicsForSelfCleanup = ImplicitScalingHelper::isAtomicsUsedForSelfCleanup();
|
||||
args.initializeWparidRegister = ImplicitScalingHelper::isWparidRegisterInitializationRequired();
|
||||
@@ -44,14 +39,16 @@ WalkerPartition::WalkerPartitionArgs prepareWalkerPartitionArgs(uint64_t workPar
|
||||
args.crossTileAtomicSynchronization = ImplicitScalingHelper::isCrossTileAtomicRequired(args.emitPipeControlStall);
|
||||
args.semaphoreProgrammingRequired = ImplicitScalingHelper::isSemaphoreProgrammingRequired();
|
||||
|
||||
args.emitSelfCleanup = ImplicitScalingHelper::isSelfCleanupRequired(args, emitSelfCleanup);
|
||||
args.emitSelfCleanup = ImplicitScalingHelper::isSelfCleanupRequired(args, dispatchCommandArgs.apiSelfCleanup);
|
||||
args.emitBatchBufferEnd = false;
|
||||
args.secondaryBatchBuffer = useSecondaryBatchBuffer;
|
||||
args.secondaryBatchBuffer = dispatchCommandArgs.useSecondaryBatchBuffer;
|
||||
|
||||
args.dcFlushEnable = dcFlush;
|
||||
args.dcFlushEnable = dispatchCommandArgs.dcFlush;
|
||||
|
||||
args.pipeControlBeforeCleanupCrossTileSync = ImplicitScalingHelper::pipeControlBeforeCleanupAtomicSyncRequired();
|
||||
|
||||
args.blockDispatchToCommandBuffer = dispatchCommandArgs.blockDispatchToCommandBuffer;
|
||||
|
||||
return args;
|
||||
}
|
||||
|
||||
@@ -74,15 +71,14 @@ size_t ImplicitScalingDispatch<GfxFamily>::getSize(bool apiSelfCleanup,
|
||||
&partitionType,
|
||||
&staticPartitioning);
|
||||
UNRECOVERABLE_IF(staticPartitioning && (tileCount != partitionCount));
|
||||
WalkerPartition::WalkerPartitionArgs args = prepareWalkerPartitionArgs<GfxFamily>(0u,
|
||||
ImplicitScalingDispatchCommandArgs dispatchCommandArgs = {};
|
||||
dispatchCommandArgs.partitionCount = partitionCount;
|
||||
dispatchCommandArgs.apiSelfCleanup = apiSelfCleanup;
|
||||
|
||||
WalkerPartition::WalkerPartitionArgs args = prepareWalkerPartitionArgs<GfxFamily>(dispatchCommandArgs,
|
||||
tileCount,
|
||||
partitionCount,
|
||||
apiSelfCleanup,
|
||||
preferStaticPartitioning,
|
||||
staticPartitioning,
|
||||
false,
|
||||
false,
|
||||
false);
|
||||
staticPartitioning);
|
||||
|
||||
return static_cast<size_t>(WalkerPartition::estimateSpaceRequiredInCommandBuffer<GfxFamily, WalkerType>(args));
|
||||
}
|
||||
@@ -91,62 +87,58 @@ template <typename GfxFamily>
|
||||
template <typename WalkerType>
|
||||
void ImplicitScalingDispatch<GfxFamily>::dispatchCommands(LinearStream &commandStream,
|
||||
WalkerType &walkerCmd,
|
||||
void **outWalkerPtr,
|
||||
const DeviceBitfield &devices,
|
||||
NEO::RequiredPartitionDim requiredPartitionDim,
|
||||
uint32_t &partitionCount,
|
||||
bool useSecondaryBatchBuffer,
|
||||
bool apiSelfCleanup,
|
||||
bool dcFlush,
|
||||
bool forceExecutionOnSingleTile,
|
||||
uint64_t workPartitionAllocationGpuVa,
|
||||
const HardwareInfo &hwInfo) {
|
||||
ImplicitScalingDispatchCommandArgs &dispatchCommandArgs) {
|
||||
uint32_t totalProgrammedSize = 0u;
|
||||
const uint32_t tileCount = static_cast<uint32_t>(devices.count());
|
||||
const bool preferStaticPartitioning = workPartitionAllocationGpuVa != 0u;
|
||||
const bool preferStaticPartitioning = dispatchCommandArgs.workPartitionAllocationGpuVa != 0u;
|
||||
|
||||
bool staticPartitioning = false;
|
||||
partitionCount = WalkerPartition::computePartitionCountAndSetPartitionType<GfxFamily, WalkerType>(&walkerCmd, requiredPartitionDim, tileCount, preferStaticPartitioning, &staticPartitioning);
|
||||
dispatchCommandArgs.partitionCount = WalkerPartition::computePartitionCountAndSetPartitionType<GfxFamily, WalkerType>(&walkerCmd, dispatchCommandArgs.requiredPartitionDim, tileCount, preferStaticPartitioning, &staticPartitioning);
|
||||
|
||||
WalkerPartition::WalkerPartitionArgs args = prepareWalkerPartitionArgs<GfxFamily>(workPartitionAllocationGpuVa,
|
||||
tileCount,
|
||||
partitionCount,
|
||||
apiSelfCleanup,
|
||||
preferStaticPartitioning,
|
||||
staticPartitioning,
|
||||
useSecondaryBatchBuffer,
|
||||
dcFlush,
|
||||
forceExecutionOnSingleTile);
|
||||
WalkerPartition::WalkerPartitionArgs walkerPartitionArgs = prepareWalkerPartitionArgs<GfxFamily>(dispatchCommandArgs,
|
||||
tileCount,
|
||||
preferStaticPartitioning,
|
||||
staticPartitioning);
|
||||
size_t dispatchCommandsSize = 0;
|
||||
void *commandBuffer = nullptr;
|
||||
uint64_t cmdBufferGpuAddress = 0;
|
||||
|
||||
auto dispatchCommandsSize = getSize<WalkerType>(apiSelfCleanup, preferStaticPartitioning, devices, {walkerCmd.getThreadGroupIdStartingX(), walkerCmd.getThreadGroupIdStartingY(), walkerCmd.getThreadGroupIdStartingZ()}, {walkerCmd.getThreadGroupIdXDimension(), walkerCmd.getThreadGroupIdYDimension(), walkerCmd.getThreadGroupIdZDimension()});
|
||||
void *commandBuffer = commandStream.getSpace(dispatchCommandsSize);
|
||||
uint64_t cmdBufferGpuAddress = commandStream.getGraphicsAllocation()->getGpuAddress() + commandStream.getUsed() - dispatchCommandsSize;
|
||||
if (!dispatchCommandArgs.blockDispatchToCommandBuffer) {
|
||||
dispatchCommandsSize = getSize<WalkerType>(dispatchCommandArgs.apiSelfCleanup,
|
||||
preferStaticPartitioning,
|
||||
devices,
|
||||
{walkerCmd.getThreadGroupIdStartingX(), walkerCmd.getThreadGroupIdStartingY(), walkerCmd.getThreadGroupIdStartingZ()},
|
||||
{walkerCmd.getThreadGroupIdXDimension(), walkerCmd.getThreadGroupIdYDimension(), walkerCmd.getThreadGroupIdZDimension()});
|
||||
commandBuffer = commandStream.getSpace(dispatchCommandsSize);
|
||||
cmdBufferGpuAddress = commandStream.getGraphicsAllocation()->getGpuAddress() + commandStream.getUsed() - dispatchCommandsSize;
|
||||
}
|
||||
|
||||
if (staticPartitioning) {
|
||||
UNRECOVERABLE_IF(tileCount != partitionCount);
|
||||
UNRECOVERABLE_IF(tileCount != dispatchCommandArgs.partitionCount);
|
||||
WalkerPartition::constructStaticallyPartitionedCommandBuffer<GfxFamily, WalkerType>(commandBuffer,
|
||||
outWalkerPtr,
|
||||
dispatchCommandArgs.outWalkerPtr,
|
||||
cmdBufferGpuAddress,
|
||||
&walkerCmd,
|
||||
totalProgrammedSize,
|
||||
args,
|
||||
hwInfo);
|
||||
walkerPartitionArgs,
|
||||
*dispatchCommandArgs.hwInfo);
|
||||
} else {
|
||||
if (debugManager.flags.ExperimentalSetWalkerPartitionCount.get()) {
|
||||
partitionCount = debugManager.flags.ExperimentalSetWalkerPartitionCount.get();
|
||||
if (partitionCount == 1u) {
|
||||
dispatchCommandArgs.partitionCount = debugManager.flags.ExperimentalSetWalkerPartitionCount.get();
|
||||
if (dispatchCommandArgs.partitionCount == 1u) {
|
||||
walkerCmd.setPartitionType(WalkerType::PARTITION_TYPE::PARTITION_TYPE_DISABLED);
|
||||
}
|
||||
args.partitionCount = partitionCount;
|
||||
walkerPartitionArgs.partitionCount = dispatchCommandArgs.partitionCount;
|
||||
}
|
||||
|
||||
WalkerPartition::constructDynamicallyPartitionedCommandBuffer<GfxFamily, WalkerType>(commandBuffer,
|
||||
outWalkerPtr,
|
||||
dispatchCommandArgs.outWalkerPtr,
|
||||
cmdBufferGpuAddress,
|
||||
&walkerCmd,
|
||||
totalProgrammedSize,
|
||||
args,
|
||||
hwInfo);
|
||||
walkerPartitionArgs,
|
||||
*dispatchCommandArgs.hwInfo);
|
||||
}
|
||||
UNRECOVERABLE_IF(totalProgrammedSize != dispatchCommandsSize);
|
||||
}
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) 2021-2022 Intel Corporation
|
||||
* Copyright (C) 2021-2024 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
@@ -32,6 +32,7 @@ struct WalkerPartitionArgs {
|
||||
bool pipeControlBeforeCleanupCrossTileSync = false;
|
||||
bool dcFlushEnable = false;
|
||||
bool forceExecutionOnSingleTile = false;
|
||||
bool blockDispatchToCommandBuffer = false;
|
||||
};
|
||||
|
||||
inline constexpr uint32_t wparidCCSOffset = 0x221C;
|
||||
|
||||
@@ -496,8 +496,12 @@ void *programPartitionedWalker(void *&inputAddress, uint32_t &totalBytesProgramm
|
||||
WalkerType *inputWalker,
|
||||
uint32_t partitionCount,
|
||||
uint32_t tileCount,
|
||||
bool forceExecutionOnSingleTile) {
|
||||
auto computeWalker = putCommand<WalkerType>(inputAddress, totalBytesProgrammed);
|
||||
bool forceExecutionOnSingleTile,
|
||||
bool blockDispatchToCommandBuffer) {
|
||||
WalkerType *computeWalker = nullptr;
|
||||
if (!blockDispatchToCommandBuffer) {
|
||||
computeWalker = putCommand<WalkerType>(inputAddress, totalBytesProgrammed);
|
||||
}
|
||||
|
||||
if (partitionCount > 1) {
|
||||
auto partitionType = inputWalker->getPartitionType();
|
||||
@@ -527,7 +531,9 @@ void *programPartitionedWalker(void *&inputAddress, uint32_t &totalBytesProgramm
|
||||
appendWalkerFields<GfxFamily, WalkerType>(*inputWalker, tileCount, workgroupCount);
|
||||
}
|
||||
|
||||
*computeWalker = *inputWalker;
|
||||
if (!blockDispatchToCommandBuffer) {
|
||||
*computeWalker = *inputWalker;
|
||||
}
|
||||
|
||||
return computeWalker;
|
||||
}
|
||||
@@ -639,7 +645,7 @@ void constructDynamicallyPartitionedCommandBuffer(void *cpuPointer,
|
||||
args.secondaryBatchBuffer);
|
||||
|
||||
// Walker section
|
||||
auto walkerPtr = programPartitionedWalker<GfxFamily, WalkerType>(currentBatchBufferPointer, totalBytesProgrammed, inputWalker, args.partitionCount, args.tileCount, args.forceExecutionOnSingleTile);
|
||||
auto walkerPtr = programPartitionedWalker<GfxFamily, WalkerType>(currentBatchBufferPointer, totalBytesProgrammed, inputWalker, args.partitionCount, args.tileCount, args.forceExecutionOnSingleTile, args.blockDispatchToCommandBuffer);
|
||||
if (outWalkerPtr) {
|
||||
*outWalkerPtr = walkerPtr;
|
||||
}
|
||||
@@ -720,68 +726,74 @@ void constructStaticallyPartitionedCommandBuffer(void *cpuPointer,
|
||||
void *currentBatchBufferPointer = cpuPointer;
|
||||
|
||||
// Get address of the control section
|
||||
const auto controlSectionOffset = computeStaticPartitioningControlSectionOffset<GfxFamily, WalkerType>(args);
|
||||
const auto afterControlSectionOffset = controlSectionOffset + sizeof(StaticPartitioningControlSection);
|
||||
const auto controlSectionOffset = args.blockDispatchToCommandBuffer ? 0u : computeStaticPartitioningControlSectionOffset<GfxFamily, WalkerType>(args);
|
||||
const auto afterControlSectionOffset = args.blockDispatchToCommandBuffer ? 0u : controlSectionOffset + sizeof(StaticPartitioningControlSection);
|
||||
|
||||
// Synchronize tiles before walker
|
||||
if (args.synchronizeBeforeExecution) {
|
||||
const auto atomicAddress = gpuAddressOfAllocation + controlSectionOffset + offsetof(StaticPartitioningControlSection, synchronizeBeforeWalkerCounter);
|
||||
programTilesSynchronizationWithAtomics<GfxFamily>(currentBatchBufferPointer, totalBytesProgrammed, atomicAddress, args.tileCount);
|
||||
if (!args.blockDispatchToCommandBuffer) {
|
||||
// Synchronize tiles before walker
|
||||
if (args.synchronizeBeforeExecution) {
|
||||
const auto atomicAddress = gpuAddressOfAllocation + controlSectionOffset + offsetof(StaticPartitioningControlSection, synchronizeBeforeWalkerCounter);
|
||||
programTilesSynchronizationWithAtomics<GfxFamily>(currentBatchBufferPointer, totalBytesProgrammed, atomicAddress, args.tileCount);
|
||||
}
|
||||
|
||||
// Load partition ID to wparid register and execute walker
|
||||
if (args.initializeWparidRegister) {
|
||||
programMiLoadRegisterMem<GfxFamily>(currentBatchBufferPointer, totalBytesProgrammed, args.workPartitionAllocationGpuVa, wparidCCSOffset);
|
||||
}
|
||||
}
|
||||
|
||||
// Load partition ID to wparid register and execute walker
|
||||
if (args.initializeWparidRegister) {
|
||||
programMiLoadRegisterMem<GfxFamily>(currentBatchBufferPointer, totalBytesProgrammed, args.workPartitionAllocationGpuVa, wparidCCSOffset);
|
||||
}
|
||||
auto walkerPtr = programPartitionedWalker<GfxFamily>(currentBatchBufferPointer, totalBytesProgrammed, inputWalker, args.partitionCount, args.tileCount, args.forceExecutionOnSingleTile);
|
||||
if (outWalkerPtr) {
|
||||
*outWalkerPtr = walkerPtr;
|
||||
}
|
||||
auto walkerPtr = programPartitionedWalker<GfxFamily>(currentBatchBufferPointer, totalBytesProgrammed, inputWalker, args.partitionCount, args.tileCount, args.forceExecutionOnSingleTile, args.blockDispatchToCommandBuffer);
|
||||
|
||||
// Prepare for cleanup section
|
||||
if (args.emitSelfCleanup) {
|
||||
const auto finalSyncTileCountField = gpuAddressOfAllocation + controlSectionOffset + offsetof(StaticPartitioningControlSection, finalSyncTileCounter);
|
||||
programSelfCleanupSection<GfxFamily>(currentBatchBufferPointer, totalBytesProgrammed, finalSyncTileCountField, args.useAtomicsForSelfCleanup);
|
||||
}
|
||||
if (!args.blockDispatchToCommandBuffer) {
|
||||
if (outWalkerPtr) {
|
||||
*outWalkerPtr = walkerPtr;
|
||||
}
|
||||
|
||||
if (args.emitPipeControlStall) {
|
||||
NEO::PipeControlArgs pipeControlArgs;
|
||||
pipeControlArgs.dcFlushEnable = args.dcFlushEnable;
|
||||
programPipeControlCommand<GfxFamily>(currentBatchBufferPointer, totalBytesProgrammed, pipeControlArgs);
|
||||
}
|
||||
// Prepare for cleanup section
|
||||
if (args.emitSelfCleanup) {
|
||||
const auto finalSyncTileCountField = gpuAddressOfAllocation + controlSectionOffset + offsetof(StaticPartitioningControlSection, finalSyncTileCounter);
|
||||
programSelfCleanupSection<GfxFamily>(currentBatchBufferPointer, totalBytesProgrammed, finalSyncTileCountField, args.useAtomicsForSelfCleanup);
|
||||
}
|
||||
|
||||
// Synchronize tiles after walker
|
||||
if (args.semaphoreProgrammingRequired) {
|
||||
programTilesSynchronizationWithPostSyncs<GfxFamily, WalkerType>(currentBatchBufferPointer, totalBytesProgrammed, inputWalker, args.partitionCount);
|
||||
}
|
||||
if (args.emitPipeControlStall) {
|
||||
NEO::PipeControlArgs pipeControlArgs;
|
||||
pipeControlArgs.dcFlushEnable = args.dcFlushEnable;
|
||||
programPipeControlCommand<GfxFamily>(currentBatchBufferPointer, totalBytesProgrammed, pipeControlArgs);
|
||||
}
|
||||
|
||||
if (args.crossTileAtomicSynchronization || args.emitSelfCleanup) {
|
||||
const auto atomicAddress = gpuAddressOfAllocation + controlSectionOffset + offsetof(StaticPartitioningControlSection, synchronizeAfterWalkerCounter);
|
||||
programTilesSynchronizationWithAtomics<GfxFamily>(currentBatchBufferPointer, totalBytesProgrammed, atomicAddress, args.tileCount);
|
||||
}
|
||||
// Synchronize tiles after walker
|
||||
if (args.semaphoreProgrammingRequired) {
|
||||
programTilesSynchronizationWithPostSyncs<GfxFamily, WalkerType>(currentBatchBufferPointer, totalBytesProgrammed, inputWalker, args.partitionCount);
|
||||
}
|
||||
|
||||
// Jump over the control section only when needed
|
||||
if (isStartAndControlSectionRequired<GfxFamily>(args)) {
|
||||
programMiBatchBufferStart<GfxFamily>(currentBatchBufferPointer, totalBytesProgrammed, gpuAddressOfAllocation + afterControlSectionOffset, false, args.secondaryBatchBuffer);
|
||||
if (args.crossTileAtomicSynchronization || args.emitSelfCleanup) {
|
||||
const auto atomicAddress = gpuAddressOfAllocation + controlSectionOffset + offsetof(StaticPartitioningControlSection, synchronizeAfterWalkerCounter);
|
||||
programTilesSynchronizationWithAtomics<GfxFamily>(currentBatchBufferPointer, totalBytesProgrammed, atomicAddress, args.tileCount);
|
||||
}
|
||||
|
||||
// Control section
|
||||
DEBUG_BREAK_IF(totalBytesProgrammed != controlSectionOffset);
|
||||
StaticPartitioningControlSection *controlSection = putCommand<StaticPartitioningControlSection>(currentBatchBufferPointer, totalBytesProgrammed);
|
||||
controlSection->synchronizeBeforeWalkerCounter = 0u;
|
||||
controlSection->synchronizeAfterWalkerCounter = 0u;
|
||||
controlSection->finalSyncTileCounter = 0u;
|
||||
DEBUG_BREAK_IF(totalBytesProgrammed != afterControlSectionOffset);
|
||||
}
|
||||
// Jump over the control section only when needed
|
||||
if (isStartAndControlSectionRequired<GfxFamily>(args)) {
|
||||
programMiBatchBufferStart<GfxFamily>(currentBatchBufferPointer, totalBytesProgrammed, gpuAddressOfAllocation + afterControlSectionOffset, false, args.secondaryBatchBuffer);
|
||||
|
||||
// Cleanup section
|
||||
if (args.emitSelfCleanup) {
|
||||
const auto finalSyncTileCountAddress = gpuAddressOfAllocation + controlSectionOffset + offsetof(StaticPartitioningControlSection, finalSyncTileCounter);
|
||||
programSelfCleanupEndSection<GfxFamily>(currentBatchBufferPointer,
|
||||
totalBytesProgrammed,
|
||||
finalSyncTileCountAddress,
|
||||
gpuAddressOfAllocation + controlSectionOffset,
|
||||
staticPartitioningFieldsForCleanupCount,
|
||||
args);
|
||||
// Control section
|
||||
DEBUG_BREAK_IF(totalBytesProgrammed != controlSectionOffset);
|
||||
StaticPartitioningControlSection *controlSection = putCommand<StaticPartitioningControlSection>(currentBatchBufferPointer, totalBytesProgrammed);
|
||||
controlSection->synchronizeBeforeWalkerCounter = 0u;
|
||||
controlSection->synchronizeAfterWalkerCounter = 0u;
|
||||
controlSection->finalSyncTileCounter = 0u;
|
||||
DEBUG_BREAK_IF(totalBytesProgrammed != afterControlSectionOffset);
|
||||
}
|
||||
|
||||
// Cleanup section
|
||||
if (args.emitSelfCleanup) {
|
||||
const auto finalSyncTileCountAddress = gpuAddressOfAllocation + controlSectionOffset + offsetof(StaticPartitioningControlSection, finalSyncTileCounter);
|
||||
programSelfCleanupEndSection<GfxFamily>(currentBatchBufferPointer,
|
||||
totalBytesProgrammed,
|
||||
finalSyncTileCountAddress,
|
||||
gpuAddressOfAllocation + controlSectionOffset,
|
||||
staticPartitioningFieldsForCleanupCount,
|
||||
args);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -19,10 +19,8 @@ template <>
|
||||
bool ImplicitScalingDispatch<Family>::pipeControlStallRequired = false;
|
||||
|
||||
template struct ImplicitScalingDispatch<Family>;
|
||||
template void ImplicitScalingDispatch<Family>::dispatchCommands<DefaultWalkerType>(LinearStream &commandStream, DefaultWalkerType &walkerCmd, void **outWalkerPtr,
|
||||
const DeviceBitfield &devices, RequiredPartitionDim requiredPartitionDim, uint32_t &partitionCount,
|
||||
bool useSecondaryBatchBuffer, bool apiSelfCleanup, bool dcFlush, bool forceExecutionOnSingleTile,
|
||||
uint64_t workPartitionAllocationGpuVa, const HardwareInfo &hwInfo);
|
||||
template void ImplicitScalingDispatch<Family>::dispatchCommands<DefaultWalkerType>(LinearStream &commandStream, DefaultWalkerType &walkerCmd, const DeviceBitfield &devices,
|
||||
ImplicitScalingDispatchCommandArgs &dispatchCommandArgs);
|
||||
template size_t ImplicitScalingDispatch<Family>::getSize<DefaultWalkerType>(bool apiSelfCleanup, bool preferStaticPartitioning, const DeviceBitfield &devices, const Vec3<size_t> &groupStart, const Vec3<size_t> &groupCount);
|
||||
} // namespace NEO
|
||||
|
||||
|
||||
@@ -32,10 +32,8 @@ bool ImplicitScalingDispatch<Family>::platformSupportsImplicitScaling(const Root
|
||||
}
|
||||
|
||||
template struct ImplicitScalingDispatch<Family>;
|
||||
template void ImplicitScalingDispatch<Family>::dispatchCommands<DefaultWalkerType>(LinearStream &commandStream, DefaultWalkerType &walkerCmd, void **outWalkerPtr,
|
||||
const DeviceBitfield &devices, RequiredPartitionDim requiredPartitionDim, uint32_t &partitionCount,
|
||||
bool useSecondaryBatchBuffer, bool apiSelfCleanup, bool dcFlush, bool forceExecutionOnSingleTile,
|
||||
uint64_t workPartitionAllocationGpuVa, const HardwareInfo &hwInfo);
|
||||
template void ImplicitScalingDispatch<Family>::dispatchCommands<DefaultWalkerType>(LinearStream &commandStream, DefaultWalkerType &walkerCmd, const DeviceBitfield &devices,
|
||||
ImplicitScalingDispatchCommandArgs &dispatchCommandArgs);
|
||||
template size_t ImplicitScalingDispatch<Family>::getSize<DefaultWalkerType>(bool apiSelfCleanup, bool preferStaticPartitioning, const DeviceBitfield &devices, const Vec3<size_t> &groupStart, const Vec3<size_t> &groupCount);
|
||||
} // namespace NEO
|
||||
|
||||
|
||||
@@ -19,10 +19,8 @@ template <>
|
||||
bool ImplicitScalingDispatch<Family>::pipeControlStallRequired = true;
|
||||
|
||||
template struct ImplicitScalingDispatch<Family>;
|
||||
template void ImplicitScalingDispatch<Family>::dispatchCommands<DefaultWalkerType>(LinearStream &commandStream, DefaultWalkerType &walkerCmd, void **outWalkerPtr,
|
||||
const DeviceBitfield &devices, RequiredPartitionDim requiredPartitionDim, uint32_t &partitionCount,
|
||||
bool useSecondaryBatchBuffer, bool apiSelfCleanup, bool dcFlush, bool forceExecutionOnSingleTile,
|
||||
uint64_t workPartitionAllocationGpuVa, const HardwareInfo &hwInfo);
|
||||
template void ImplicitScalingDispatch<Family>::dispatchCommands<DefaultWalkerType>(LinearStream &commandStream, DefaultWalkerType &walkerCmd, const DeviceBitfield &devices,
|
||||
ImplicitScalingDispatchCommandArgs &dispatchCommandArgs);
|
||||
template size_t ImplicitScalingDispatch<Family>::getSize<DefaultWalkerType>(bool apiSelfCleanup, bool preferStaticPartitioning, const DeviceBitfield &devices, const Vec3<size_t> &groupStart, const Vec3<size_t> &groupCount);
|
||||
} // namespace NEO
|
||||
|
||||
|
||||
@@ -720,7 +720,20 @@ HWTEST2_F(CommandEncoderTests, whenAskingForImplicitScalingValuesThenAlwaysRetur
|
||||
EXPECT_EQ(0u, ImplicitScalingDispatch<FamilyType>::template getSize<WalkerType>(false, false, deviceBitField, vec3, vec3));
|
||||
|
||||
void *ptr = nullptr;
|
||||
ImplicitScalingDispatch<FamilyType>::dispatchCommands(linearStream, walkerCmd, &ptr, deviceBitField, RequiredPartitionDim::x, partitionCount, false, false, false, false, 0, *defaultHwInfo);
|
||||
|
||||
ImplicitScalingDispatchCommandArgs args{
|
||||
0, // workPartitionAllocationGpuVa
|
||||
defaultHwInfo.get(), // hwInfo
|
||||
&ptr, // outWalkerPtr
|
||||
RequiredPartitionDim::x, // requiredPartitionDim
|
||||
partitionCount, // partitionCount
|
||||
false, // useSecondaryBatchBuffer
|
||||
false, // apiSelfCleanup
|
||||
false, // dcFlush
|
||||
false, // forceExecutionOnSingleTile
|
||||
false}; // blockDispatchToCommandBuffer
|
||||
|
||||
ImplicitScalingDispatch<FamilyType>::dispatchCommands(linearStream, walkerCmd, deviceBitField, args);
|
||||
EXPECT_EQ(0u, linearStream.getUsed());
|
||||
|
||||
EXPECT_TRUE(ImplicitScalingDispatch<FamilyType>::getPipeControlStallRequired());
|
||||
|
||||
@@ -1702,3 +1702,47 @@ HWTEST2_F(CommandEncodeStatesTest, givenEncodeDispatchKernelWhenRequestingComman
|
||||
|
||||
EXPECT_ANY_THROW(EncodeDispatchKernel<FamilyType>::template encode<DefaultWalkerType>(*cmdContainer.get(), dispatchArgs));
|
||||
}
|
||||
|
||||
struct MultiTileCommandEncodeStatesFixture : public CommandEncodeStatesFixture {
|
||||
void setUp() {
|
||||
debugManager.flags.CreateMultipleSubDevices.set(2);
|
||||
CommandEncodeStatesFixture::setUp();
|
||||
}
|
||||
|
||||
DebugManagerStateRestore restorer;
|
||||
};
|
||||
|
||||
using MultiTileCommandEncodeStatesTest = Test<MultiTileCommandEncodeStatesFixture>;
|
||||
HWTEST2_F(MultiTileCommandEncodeStatesTest, givenEncodeDispatchKernelInImplicitScalingWhenRequestingCommandViewThenDoNotConsumeCmdBufferAndHeapSpace, IsAtLeastXeHpCore) {
|
||||
using DefaultWalkerType = typename FamilyType::DefaultWalkerType;
|
||||
uint32_t dims[] = {1, 1, 1};
|
||||
std::unique_ptr<MockDispatchKernelEncoder> dispatchInterface(new MockDispatchKernelEncoder());
|
||||
|
||||
auto ultCsr = static_cast<UltCommandStreamReceiver<FamilyType> *>(pDevice->getDefaultEngine().commandStreamReceiver);
|
||||
ultCsr->staticWorkPartitioningEnabled = true;
|
||||
ultCsr->createWorkPartitionAllocation(*pDevice);
|
||||
|
||||
auto payloadHeap = cmdContainer->getIndirectHeap(HeapType::indirectObject);
|
||||
auto payloadHeapUsed = payloadHeap->getUsed();
|
||||
|
||||
auto cmdBuffer = cmdContainer->getCommandStream();
|
||||
auto cmdBufferUsed = cmdBuffer->getUsed();
|
||||
|
||||
uint8_t payloadView[256] = {};
|
||||
dispatchInterface->getCrossThreadDataSizeResult = 64;
|
||||
|
||||
auto walkerPtr = std::make_unique<DefaultWalkerType>();
|
||||
DefaultWalkerType *cpuWalkerPointer = walkerPtr.get();
|
||||
|
||||
bool requiresUncachedMocs = false;
|
||||
EncodeDispatchKernelArgs dispatchArgs = createDefaultDispatchKernelArgs(pDevice, dispatchInterface.get(), dims, requiresUncachedMocs);
|
||||
dispatchArgs.makeCommandView = true;
|
||||
dispatchArgs.partitionCount = 2;
|
||||
dispatchArgs.cpuPayloadBuffer = payloadView;
|
||||
dispatchArgs.cpuWalkerBuffer = cpuWalkerPointer;
|
||||
|
||||
EncodeDispatchKernel<FamilyType>::template encode<DefaultWalkerType>(*cmdContainer.get(), dispatchArgs);
|
||||
|
||||
EXPECT_EQ(payloadHeapUsed, payloadHeap->getUsed());
|
||||
EXPECT_EQ(cmdBufferUsed, cmdBuffer->getUsed());
|
||||
}
|
||||
|
||||
@@ -36,11 +36,12 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, ImplicitScalingTests, GivenGetSizeWhenDispatchingCm
|
||||
expectedSize = ImplicitScalingDispatch<FamilyType>::template getSize<DefaultWalkerType>(false, false, twoTile, Vec3<size_t>(0, 0, 0), Vec3<size_t>(32, 1, 1));
|
||||
|
||||
uint32_t partitionCount = 0;
|
||||
ImplicitScalingDispatch<FamilyType>::dispatchCommands(commandStream, walker, nullptr, twoTile, NEO::RequiredPartitionDim::none, partitionCount, true, false, dcFlushFlag,
|
||||
forceExecutionOnSingleTileFlag, 0u, *defaultHwInfo);
|
||||
auto dispatchArgs = createDispatchCommandArgs(0, partitionCount);
|
||||
|
||||
ImplicitScalingDispatch<FamilyType>::dispatchCommands(commandStream, walker, twoTile, dispatchArgs);
|
||||
totalBytesProgrammed = commandStream.getUsed();
|
||||
EXPECT_EQ(expectedSize, totalBytesProgrammed);
|
||||
EXPECT_EQ(2u, partitionCount);
|
||||
EXPECT_EQ(2u, dispatchArgs.partitionCount);
|
||||
|
||||
HardwareParse hwParser;
|
||||
hwParser.parseCommands<FamilyType>(commandStream, 0);
|
||||
@@ -79,11 +80,13 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, ImplicitScalingTests, GivenWorkgroupOneAndNoPartiti
|
||||
expectedSize = ImplicitScalingDispatch<FamilyType>::template getSize<DefaultWalkerType>(false, false, twoTile, Vec3<size_t>(0, 0, 0), Vec3<size_t>(1, 1, 1));
|
||||
|
||||
uint32_t partitionCount = 0;
|
||||
ImplicitScalingDispatch<FamilyType>::dispatchCommands(commandStream, walker, nullptr, twoTile, NEO::RequiredPartitionDim::none, partitionCount, false, false, dcFlushFlag,
|
||||
forceExecutionOnSingleTileFlag, 0u, *defaultHwInfo);
|
||||
auto dispatchArgs = createDispatchCommandArgs(0, partitionCount);
|
||||
dispatchArgs.useSecondaryBatchBuffer = false;
|
||||
|
||||
ImplicitScalingDispatch<FamilyType>::dispatchCommands(commandStream, walker, twoTile, dispatchArgs);
|
||||
totalBytesProgrammed = commandStream.getUsed();
|
||||
EXPECT_EQ(expectedSize, totalBytesProgrammed);
|
||||
EXPECT_EQ(1u, partitionCount);
|
||||
EXPECT_EQ(1u, dispatchArgs.partitionCount);
|
||||
|
||||
HardwareParse hwParser;
|
||||
hwParser.parseCommands<FamilyType>(commandStream, 0);
|
||||
@@ -123,11 +126,12 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, ImplicitScalingTests, GivenWorkgroupOneAndPartition
|
||||
expectedSize = ImplicitScalingDispatch<FamilyType>::template getSize<DefaultWalkerType>(false, false, twoTile, Vec3<size_t>(0, 0, 0), Vec3<size_t>(1, 1, 1));
|
||||
|
||||
uint32_t partitionCount = 0;
|
||||
ImplicitScalingDispatch<FamilyType>::dispatchCommands(commandStream, walker, nullptr, twoTile, NEO::RequiredPartitionDim::none, partitionCount, true, false, dcFlushFlag,
|
||||
forceExecutionOnSingleTileFlag, 0u, *defaultHwInfo);
|
||||
auto dispatchArgs = createDispatchCommandArgs(0, partitionCount);
|
||||
|
||||
ImplicitScalingDispatch<FamilyType>::dispatchCommands(commandStream, walker, twoTile, dispatchArgs);
|
||||
totalBytesProgrammed = commandStream.getUsed();
|
||||
EXPECT_EQ(expectedSize, totalBytesProgrammed);
|
||||
EXPECT_EQ(1u, partitionCount);
|
||||
EXPECT_EQ(1u, dispatchArgs.partitionCount);
|
||||
|
||||
HardwareParse hwParser;
|
||||
hwParser.parseCommands<FamilyType>(commandStream, 0);
|
||||
@@ -170,11 +174,12 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, ImplicitScalingTests, GivenStaticPartitioningWhenDi
|
||||
expectedSize = ImplicitScalingDispatch<FamilyType>::template getSize<DefaultWalkerType>(false, true, twoTile, Vec3<size_t>(0, 0, 0), Vec3<size_t>(32, 1, 1));
|
||||
|
||||
uint32_t partitionCount = 0;
|
||||
ImplicitScalingDispatch<FamilyType>::dispatchCommands(commandStream, walker, nullptr, twoTile, NEO::RequiredPartitionDim::none, partitionCount, true, false, dcFlushFlag,
|
||||
forceExecutionOnSingleTileFlag, workPartitionAllocationAddress, *defaultHwInfo);
|
||||
auto dispatchArgs = createDispatchCommandArgs(workPartitionAllocationAddress, partitionCount);
|
||||
|
||||
ImplicitScalingDispatch<FamilyType>::dispatchCommands(commandStream, walker, twoTile, dispatchArgs);
|
||||
totalBytesProgrammed = commandStream.getUsed();
|
||||
EXPECT_EQ(expectedSize, totalBytesProgrammed);
|
||||
EXPECT_EQ(2u, partitionCount);
|
||||
EXPECT_EQ(2u, dispatchArgs.partitionCount);
|
||||
|
||||
HardwareParse hwParser;
|
||||
hwParser.parseCommands<FamilyType>(commandStream, 0);
|
||||
@@ -222,11 +227,12 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, ImplicitScalingTests, GivenStaticPartitioningWhenPa
|
||||
expectedSize = ImplicitScalingDispatch<FamilyType>::template getSize<DefaultWalkerType>(false, true, twoTile, Vec3<size_t>(0, 0, 0), Vec3<size_t>(32, 1, 1));
|
||||
|
||||
uint32_t partitionCount = 0;
|
||||
ImplicitScalingDispatch<FamilyType>::dispatchCommands(commandStream, walker, nullptr, twoTile, NEO::RequiredPartitionDim::none, partitionCount, true, false, dcFlushFlag,
|
||||
forceExecutionOnSingleTileFlag, workPartitionAllocationAddress, *defaultHwInfo);
|
||||
auto dispatchArgs = createDispatchCommandArgs(workPartitionAllocationAddress, partitionCount);
|
||||
|
||||
ImplicitScalingDispatch<FamilyType>::dispatchCommands(commandStream, walker, twoTile, dispatchArgs);
|
||||
totalBytesProgrammed = commandStream.getUsed();
|
||||
EXPECT_EQ(expectedSize, totalBytesProgrammed);
|
||||
EXPECT_EQ(2u, partitionCount);
|
||||
EXPECT_EQ(2u, dispatchArgs.partitionCount);
|
||||
|
||||
HardwareParse hwParser;
|
||||
hwParser.parseCommands<FamilyType>(commandStream, 0);
|
||||
@@ -276,11 +282,12 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, ImplicitScalingTests, GivenStaticPartitioningPrefer
|
||||
expectedSize = ImplicitScalingDispatch<FamilyType>::template getSize<DefaultWalkerType>(false, true, twoTile, Vec3<size_t>(0, 0, 0), Vec3<size_t>(1, 1, 1));
|
||||
|
||||
uint32_t partitionCount = 0;
|
||||
ImplicitScalingDispatch<FamilyType>::dispatchCommands(commandStream, walker, nullptr, twoTile, NEO::RequiredPartitionDim::none, partitionCount, true, false, dcFlushFlag,
|
||||
forceExecutionOnSingleTileFlag, workPartitionAllocationAddress, *defaultHwInfo);
|
||||
auto dispatchArgs = createDispatchCommandArgs(workPartitionAllocationAddress, partitionCount);
|
||||
|
||||
ImplicitScalingDispatch<FamilyType>::dispatchCommands(commandStream, walker, twoTile, dispatchArgs);
|
||||
totalBytesProgrammed = commandStream.getUsed();
|
||||
EXPECT_EQ(expectedSize, totalBytesProgrammed);
|
||||
EXPECT_EQ(twoTile.count(), partitionCount);
|
||||
EXPECT_EQ(twoTile.count(), dispatchArgs.partitionCount);
|
||||
|
||||
HardwareParse hwParser;
|
||||
hwParser.parseCommands<FamilyType>(commandStream, 0);
|
||||
@@ -327,11 +334,12 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, ImplicitScalingTests, GivenStaticPartitioningPrefer
|
||||
expectedSize = ImplicitScalingDispatch<FamilyType>::template getSize<DefaultWalkerType>(false, true, twoTile, Vec3<size_t>(0, 0, 0), Vec3<size_t>(1, 1, 1));
|
||||
|
||||
uint32_t partitionCount = 0;
|
||||
ImplicitScalingDispatch<FamilyType>::dispatchCommands(commandStream, walker, nullptr, twoTile, NEO::RequiredPartitionDim::none, partitionCount, true, false, dcFlushFlag,
|
||||
forceExecutionOnSingleTileFlag, workPartitionAllocationAddress, *defaultHwInfo);
|
||||
auto dispatchArgs = createDispatchCommandArgs(workPartitionAllocationAddress, partitionCount);
|
||||
|
||||
ImplicitScalingDispatch<FamilyType>::dispatchCommands(commandStream, walker, twoTile, dispatchArgs);
|
||||
totalBytesProgrammed = commandStream.getUsed();
|
||||
EXPECT_EQ(expectedSize, totalBytesProgrammed);
|
||||
EXPECT_EQ(twoTile.count(), partitionCount);
|
||||
EXPECT_EQ(twoTile.count(), dispatchArgs.partitionCount);
|
||||
|
||||
HardwareParse hwParser;
|
||||
hwParser.parseCommands<FamilyType>(commandStream, 0);
|
||||
@@ -364,11 +372,12 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, ImplicitScalingTests, GivenStaticPartitioningPrefer
|
||||
expectedSize = ImplicitScalingDispatch<FamilyType>::template getSize<DefaultWalkerType>(false, true, twoTile, Vec3<size_t>(0, 0, 0), Vec3<size_t>(1, 1, 1));
|
||||
|
||||
uint32_t partitionCount = 0;
|
||||
ImplicitScalingDispatch<FamilyType>::dispatchCommands(commandStream, walker, nullptr, twoTile, NEO::RequiredPartitionDim::none, partitionCount, true, false, dcFlushFlag,
|
||||
forceExecutionOnSingleTileFlag, workPartitionAllocationAddress, *defaultHwInfo);
|
||||
auto dispatchArgs = createDispatchCommandArgs(workPartitionAllocationAddress, partitionCount);
|
||||
|
||||
ImplicitScalingDispatch<FamilyType>::dispatchCommands(commandStream, walker, twoTile, dispatchArgs);
|
||||
totalBytesProgrammed = commandStream.getUsed();
|
||||
EXPECT_EQ(expectedSize, totalBytesProgrammed);
|
||||
EXPECT_EQ(twoTile.count(), partitionCount);
|
||||
EXPECT_EQ(twoTile.count(), dispatchArgs.partitionCount);
|
||||
|
||||
HardwareParse hwParser;
|
||||
hwParser.parseCommands<FamilyType>(commandStream, 0);
|
||||
@@ -401,11 +410,12 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, ImplicitScalingTests, GivenDynamicPartitioningPrefe
|
||||
expectedSize = ImplicitScalingDispatch<FamilyType>::template getSize<DefaultWalkerType>(false, false, twoTile, Vec3<size_t>(0, 0, 0), Vec3<size_t>(1, 1, 1));
|
||||
|
||||
uint32_t partitionCount = 0;
|
||||
ImplicitScalingDispatch<FamilyType>::dispatchCommands(commandStream, walker, nullptr, twoTile, NEO::RequiredPartitionDim::none, partitionCount, true, false, dcFlushFlag,
|
||||
forceExecutionOnSingleTileFlag, workPartitionAllocationAddress, *defaultHwInfo);
|
||||
auto dispatchArgs = createDispatchCommandArgs(workPartitionAllocationAddress, partitionCount);
|
||||
|
||||
ImplicitScalingDispatch<FamilyType>::dispatchCommands(commandStream, walker, twoTile, dispatchArgs);
|
||||
totalBytesProgrammed = commandStream.getUsed();
|
||||
EXPECT_EQ(expectedSize, totalBytesProgrammed);
|
||||
EXPECT_EQ(twoTile.count(), partitionCount);
|
||||
EXPECT_EQ(twoTile.count(), dispatchArgs.partitionCount);
|
||||
|
||||
HardwareParse hwParser;
|
||||
hwParser.parseCommands<FamilyType>(commandStream, 0);
|
||||
@@ -449,11 +459,13 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, ImplicitScalingTests,
|
||||
EXPECT_EQ(expectedSize, estimatedSize);
|
||||
|
||||
uint32_t partitionCount = 0;
|
||||
ImplicitScalingDispatch<FamilyType>::dispatchCommands(commandStream, walker, nullptr, twoTile, NEO::RequiredPartitionDim::none, partitionCount, true, true, dcFlushFlag,
|
||||
forceExecutionOnSingleTileFlag, workPartitionAllocationAddress, *defaultHwInfo);
|
||||
auto dispatchArgs = createDispatchCommandArgs(workPartitionAllocationAddress, partitionCount);
|
||||
dispatchArgs.apiSelfCleanup = true;
|
||||
|
||||
ImplicitScalingDispatch<FamilyType>::dispatchCommands(commandStream, walker, twoTile, dispatchArgs);
|
||||
totalBytesProgrammed = commandStream.getUsed();
|
||||
EXPECT_EQ(expectedSize, totalBytesProgrammed);
|
||||
EXPECT_EQ(twoTile.count(), partitionCount);
|
||||
EXPECT_EQ(twoTile.count(), dispatchArgs.partitionCount);
|
||||
|
||||
HardwareParse hwParser;
|
||||
hwParser.parsePipeControl = true;
|
||||
@@ -517,11 +529,13 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, ImplicitScalingTests,
|
||||
EXPECT_EQ(expectedSize, estimatedSize);
|
||||
|
||||
uint32_t partitionCount = 0;
|
||||
ImplicitScalingDispatch<FamilyType>::dispatchCommands(commandStream, walker, nullptr, twoTile, NEO::RequiredPartitionDim::none, partitionCount, true, true, dcFlushFlag,
|
||||
forceExecutionOnSingleTileFlag, workPartitionAllocationAddress, *defaultHwInfo);
|
||||
auto dispatchArgs = createDispatchCommandArgs(workPartitionAllocationAddress, partitionCount);
|
||||
dispatchArgs.apiSelfCleanup = true;
|
||||
|
||||
ImplicitScalingDispatch<FamilyType>::dispatchCommands(commandStream, walker, twoTile, dispatchArgs);
|
||||
totalBytesProgrammed = commandStream.getUsed();
|
||||
EXPECT_EQ(expectedSize, totalBytesProgrammed);
|
||||
EXPECT_EQ(twoTile.count(), partitionCount);
|
||||
EXPECT_EQ(twoTile.count(), dispatchArgs.partitionCount);
|
||||
|
||||
HardwareParse hwParser;
|
||||
hwParser.parsePipeControl = true;
|
||||
@@ -577,11 +591,13 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, ImplicitScalingTests,
|
||||
EXPECT_EQ(expectedSize, estimatedSize);
|
||||
|
||||
uint32_t partitionCount = 0;
|
||||
ImplicitScalingDispatch<FamilyType>::dispatchCommands(commandStream, walker, nullptr, twoTile, NEO::RequiredPartitionDim::none, partitionCount, true, true, dcFlushFlag,
|
||||
forceExecutionOnSingleTileFlag, workPartitionAllocationAddress, *defaultHwInfo);
|
||||
auto dispatchArgs = createDispatchCommandArgs(workPartitionAllocationAddress, partitionCount);
|
||||
dispatchArgs.apiSelfCleanup = true;
|
||||
|
||||
ImplicitScalingDispatch<FamilyType>::dispatchCommands(commandStream, walker, twoTile, dispatchArgs);
|
||||
totalBytesProgrammed = commandStream.getUsed();
|
||||
EXPECT_EQ(expectedSize, totalBytesProgrammed);
|
||||
EXPECT_EQ(twoTile.count(), partitionCount);
|
||||
EXPECT_EQ(twoTile.count(), dispatchArgs.partitionCount);
|
||||
|
||||
HardwareParse hwParser;
|
||||
hwParser.parsePipeControl = true;
|
||||
@@ -637,11 +653,13 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, ImplicitScalingTests,
|
||||
EXPECT_EQ(expectedSize, estimatedSize);
|
||||
|
||||
uint32_t partitionCount = 0;
|
||||
ImplicitScalingDispatch<FamilyType>::dispatchCommands(commandStream, walker, nullptr, twoTile, NEO::RequiredPartitionDim::none, partitionCount, true, true, dcFlushFlag,
|
||||
forceExecutionOnSingleTileFlag, workPartitionAllocationAddress, *defaultHwInfo);
|
||||
auto dispatchArgs = createDispatchCommandArgs(workPartitionAllocationAddress, partitionCount);
|
||||
dispatchArgs.apiSelfCleanup = true;
|
||||
|
||||
ImplicitScalingDispatch<FamilyType>::dispatchCommands(commandStream, walker, twoTile, dispatchArgs);
|
||||
totalBytesProgrammed = commandStream.getUsed();
|
||||
EXPECT_EQ(expectedSize, totalBytesProgrammed);
|
||||
EXPECT_EQ(twoTile.count(), partitionCount);
|
||||
EXPECT_EQ(twoTile.count(), dispatchArgs.partitionCount);
|
||||
|
||||
HardwareParse hwParser;
|
||||
hwParser.parsePipeControl = true;
|
||||
@@ -704,11 +722,13 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, ImplicitScalingTests,
|
||||
EXPECT_EQ(expectedSize, estimatedSize);
|
||||
|
||||
uint32_t partitionCount = 0;
|
||||
ImplicitScalingDispatch<FamilyType>::dispatchCommands(commandStream, walker, nullptr, twoTile, NEO::RequiredPartitionDim::none, partitionCount, true, true, dcFlushFlag,
|
||||
forceExecutionOnSingleTileFlag, workPartitionAllocationAddress, *defaultHwInfo);
|
||||
auto dispatchArgs = createDispatchCommandArgs(workPartitionAllocationAddress, partitionCount);
|
||||
dispatchArgs.apiSelfCleanup = true;
|
||||
|
||||
ImplicitScalingDispatch<FamilyType>::dispatchCommands(commandStream, walker, twoTile, dispatchArgs);
|
||||
totalBytesProgrammed = commandStream.getUsed();
|
||||
EXPECT_EQ(expectedSize, totalBytesProgrammed);
|
||||
EXPECT_EQ(twoTile.count(), partitionCount);
|
||||
EXPECT_EQ(twoTile.count(), dispatchArgs.partitionCount);
|
||||
|
||||
HardwareParse hwParser;
|
||||
hwParser.parsePipeControl = true;
|
||||
@@ -767,11 +787,12 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, ImplicitScalingTests,
|
||||
EXPECT_EQ(expectedSize, estimatedSize);
|
||||
|
||||
uint32_t partitionCount = 0;
|
||||
ImplicitScalingDispatch<FamilyType>::dispatchCommands(commandStream, walker, nullptr, twoTile, NEO::RequiredPartitionDim::none, partitionCount, true, false, dcFlushFlag,
|
||||
forceExecutionOnSingleTileFlag, workPartitionAllocationAddress, *defaultHwInfo);
|
||||
auto dispatchArgs = createDispatchCommandArgs(workPartitionAllocationAddress, partitionCount);
|
||||
|
||||
ImplicitScalingDispatch<FamilyType>::dispatchCommands(commandStream, walker, twoTile, dispatchArgs);
|
||||
totalBytesProgrammed = commandStream.getUsed();
|
||||
EXPECT_EQ(expectedSize, totalBytesProgrammed);
|
||||
EXPECT_EQ(twoTile.count(), partitionCount);
|
||||
EXPECT_EQ(twoTile.count(), dispatchArgs.partitionCount);
|
||||
|
||||
HardwareParse hwParser;
|
||||
hwParser.parsePipeControl = true;
|
||||
@@ -832,11 +853,12 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, ImplicitScalingTests,
|
||||
EXPECT_EQ(expectedSize, estimatedSize);
|
||||
|
||||
uint32_t partitionCount = 0;
|
||||
ImplicitScalingDispatch<FamilyType>::dispatchCommands(commandStream, walker, nullptr, twoTile, NEO::RequiredPartitionDim::none, partitionCount, true, false, dcFlushFlag,
|
||||
forceExecutionOnSingleTileFlag, workPartitionAllocationAddress, *defaultHwInfo);
|
||||
auto dispatchArgs = createDispatchCommandArgs(workPartitionAllocationAddress, partitionCount);
|
||||
|
||||
ImplicitScalingDispatch<FamilyType>::dispatchCommands(commandStream, walker, twoTile, dispatchArgs);
|
||||
totalBytesProgrammed = commandStream.getUsed();
|
||||
EXPECT_EQ(expectedSize, totalBytesProgrammed);
|
||||
EXPECT_EQ(twoTile.count(), partitionCount);
|
||||
EXPECT_EQ(twoTile.count(), dispatchArgs.partitionCount);
|
||||
|
||||
HardwareParse hwParser;
|
||||
hwParser.parsePipeControl = true;
|
||||
@@ -900,11 +922,13 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, ImplicitScalingTests,
|
||||
EXPECT_EQ(expectedSize, estimatedSize);
|
||||
|
||||
uint32_t partitionCount = 0;
|
||||
ImplicitScalingDispatch<FamilyType>::dispatchCommands(commandStream, walker, nullptr, twoTile, NEO::RequiredPartitionDim::none, partitionCount, true, true, dcFlushFlag,
|
||||
forceExecutionOnSingleTileFlag, workPartitionAllocationAddress, *defaultHwInfo);
|
||||
auto dispatchArgs = createDispatchCommandArgs(workPartitionAllocationAddress, partitionCount);
|
||||
dispatchArgs.apiSelfCleanup = true;
|
||||
|
||||
ImplicitScalingDispatch<FamilyType>::dispatchCommands(commandStream, walker, twoTile, dispatchArgs);
|
||||
totalBytesProgrammed = commandStream.getUsed();
|
||||
EXPECT_EQ(expectedSize, totalBytesProgrammed);
|
||||
EXPECT_EQ(twoTile.count(), partitionCount);
|
||||
EXPECT_EQ(twoTile.count(), dispatchArgs.partitionCount);
|
||||
|
||||
HardwareParse hwParser;
|
||||
hwParser.parsePipeControl = true;
|
||||
@@ -967,11 +991,12 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, ImplicitScalingTests,
|
||||
EXPECT_EQ(expectedSize, estimatedSize);
|
||||
|
||||
uint32_t partitionCount = 0;
|
||||
ImplicitScalingDispatch<FamilyType>::dispatchCommands(commandStream, walker, nullptr, twoTile, NEO::RequiredPartitionDim::none, partitionCount, true, false, dcFlushFlag,
|
||||
forceExecutionOnSingleTileFlag, workPartitionAllocationAddress, *defaultHwInfo);
|
||||
auto dispatchArgs = createDispatchCommandArgs(workPartitionAllocationAddress, partitionCount);
|
||||
|
||||
ImplicitScalingDispatch<FamilyType>::dispatchCommands(commandStream, walker, twoTile, dispatchArgs);
|
||||
totalBytesProgrammed = commandStream.getUsed();
|
||||
EXPECT_EQ(expectedSize, totalBytesProgrammed);
|
||||
EXPECT_EQ(twoTile.count(), partitionCount);
|
||||
EXPECT_EQ(twoTile.count(), dispatchArgs.partitionCount);
|
||||
|
||||
HardwareParse hwParser;
|
||||
hwParser.parsePipeControl = true;
|
||||
@@ -1564,3 +1589,40 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, ImplicitScalingTests,
|
||||
auto bbStart = reinterpret_cast<MI_BATCH_BUFFER_START *>(*bbStartList.begin());
|
||||
EXPECT_EQ(MI_BATCH_BUFFER_START::SECOND_LEVEL_BATCH_BUFFER::SECOND_LEVEL_BATCH_BUFFER_SECOND_LEVEL_BATCH, bbStart->getSecondLevelBatchBuffer());
|
||||
}
|
||||
|
||||
HWCMDTEST_F(IGFX_XE_HP_CORE, ImplicitScalingTests,
|
||||
givenStaticPartitioningWhenBlockDispatchFlagIsTrueThenDoNotDispatchAnyCommands) {
|
||||
using DefaultWalkerType = typename FamilyType::DefaultWalkerType;
|
||||
using PostSyncType = typename DefaultWalkerType::PostSyncType;
|
||||
|
||||
uint64_t postSyncAddress = (1ull << 48) | (1ull << 24);
|
||||
|
||||
DefaultWalkerType walker = FamilyType::template getInitGpuWalker<DefaultWalkerType>();
|
||||
walker.setThreadGroupIdXDimension(32);
|
||||
auto &postSync = walker.getPostSync();
|
||||
postSync.setOperation(PostSyncType::OPERATION::OPERATION_WRITE_TIMESTAMP);
|
||||
postSync.setDestinationAddress(postSyncAddress);
|
||||
|
||||
DefaultWalkerType walkerDispatched = walker;
|
||||
|
||||
uint64_t workPartitionAllocationAddress = 0x1000;
|
||||
size_t expectedTotalBytesProgrammed = 0;
|
||||
void *outWalkerPtr = nullptr;
|
||||
|
||||
uint32_t partitionCount = 0;
|
||||
auto dispatchArgs = createDispatchCommandArgs(workPartitionAllocationAddress, partitionCount);
|
||||
dispatchArgs.blockDispatchToCommandBuffer = true;
|
||||
dispatchArgs.outWalkerPtr = &outWalkerPtr;
|
||||
|
||||
ImplicitScalingDispatch<FamilyType>::dispatchCommands(commandStream, walker, twoTile, dispatchArgs);
|
||||
size_t totalBytesProgrammed = commandStream.getUsed();
|
||||
EXPECT_EQ(expectedTotalBytesProgrammed, totalBytesProgrammed);
|
||||
EXPECT_EQ(twoTile.count(), dispatchArgs.partitionCount);
|
||||
EXPECT_EQ(nullptr, outWalkerPtr);
|
||||
|
||||
dispatchArgs.blockDispatchToCommandBuffer = false;
|
||||
ImplicitScalingDispatch<FamilyType>::dispatchCommands(commandStream, walkerDispatched, twoTile, dispatchArgs);
|
||||
ASSERT_NE(nullptr, outWalkerPtr);
|
||||
|
||||
EXPECT_EQ(0, memcmp(&walkerDispatched, outWalkerPtr, sizeof(DefaultWalkerType)));
|
||||
}
|
||||
|
||||
@@ -426,7 +426,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenProgramComputeWalkerWhen
|
||||
|
||||
walker.setPartitionType(WalkerType::PARTITION_TYPE::PARTITION_TYPE_X);
|
||||
void *walkerCommandAddress = cmdBufferAddress;
|
||||
programPartitionedWalker<FamilyType>(cmdBufferAddress, totalBytesProgrammed, &walker, 2u, 2, false);
|
||||
programPartitionedWalker<FamilyType>(cmdBufferAddress, totalBytesProgrammed, &walker, 2u, 2, false, false);
|
||||
auto walkerCommand = genCmdCast<WalkerType *>(walkerCommandAddress);
|
||||
|
||||
ASSERT_NE(nullptr, walkerCommand);
|
||||
@@ -437,7 +437,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenProgramComputeWalkerWhen
|
||||
|
||||
walker.setPartitionType(WalkerType::PARTITION_TYPE::PARTITION_TYPE_Y);
|
||||
walkerCommandAddress = cmdBufferAddress;
|
||||
programPartitionedWalker<FamilyType>(cmdBufferAddress, totalBytesProgrammed, &walker, 2u, 2, false);
|
||||
programPartitionedWalker<FamilyType>(cmdBufferAddress, totalBytesProgrammed, &walker, 2u, 2, false, false);
|
||||
walkerCommand = genCmdCast<WalkerType *>(walkerCommandAddress);
|
||||
|
||||
ASSERT_NE(nullptr, walkerCommand);
|
||||
@@ -446,7 +446,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenProgramComputeWalkerWhen
|
||||
|
||||
walker.setPartitionType(WalkerType::PARTITION_TYPE::PARTITION_TYPE_Z);
|
||||
walkerCommandAddress = cmdBufferAddress;
|
||||
programPartitionedWalker<FamilyType>(cmdBufferAddress, totalBytesProgrammed, &walker, 2u, 2, false);
|
||||
programPartitionedWalker<FamilyType>(cmdBufferAddress, totalBytesProgrammed, &walker, 2u, 2, false, false);
|
||||
walkerCommand = genCmdCast<WalkerType *>(walkerCommandAddress);
|
||||
|
||||
ASSERT_NE(nullptr, walkerCommand);
|
||||
@@ -456,7 +456,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenProgramComputeWalkerWhen
|
||||
// if we program with partition Count == 1 then do not trigger partition stuff
|
||||
walker.setPartitionType(WalkerType::PARTITION_TYPE::PARTITION_TYPE_DISABLED);
|
||||
walkerCommandAddress = cmdBufferAddress;
|
||||
programPartitionedWalker<FamilyType>(cmdBufferAddress, totalBytesProgrammed, &walker, 1u, 2, false);
|
||||
programPartitionedWalker<FamilyType>(cmdBufferAddress, totalBytesProgrammed, &walker, 1u, 2, false, false);
|
||||
walkerCommand = genCmdCast<WalkerType *>(walkerCommandAddress);
|
||||
|
||||
ASSERT_NE(nullptr, walkerCommand);
|
||||
@@ -1763,7 +1763,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenForceExecutionOnSingleTi
|
||||
bool forceExecutionOnSingleTile = false;
|
||||
walker.setPartitionType(WalkerType::PARTITION_TYPE::PARTITION_TYPE_X);
|
||||
void *walkerCommandAddress = cmdBufferAddress;
|
||||
programPartitionedWalker<FamilyType>(cmdBufferAddress, totalBytesProgrammed, &walker, 2u, 2, forceExecutionOnSingleTile);
|
||||
programPartitionedWalker<FamilyType>(cmdBufferAddress, totalBytesProgrammed, &walker, 2u, 2, forceExecutionOnSingleTile, false);
|
||||
auto walkerCommand = genCmdCast<WalkerType *>(walkerCommandAddress);
|
||||
|
||||
ASSERT_NE(nullptr, walkerCommand);
|
||||
@@ -1773,7 +1773,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenForceExecutionOnSingleTi
|
||||
|
||||
forceExecutionOnSingleTile = true;
|
||||
walkerCommandAddress = cmdBufferAddress;
|
||||
programPartitionedWalker<FamilyType>(cmdBufferAddress, totalBytesProgrammed, &walker, 2u, 2, forceExecutionOnSingleTile);
|
||||
programPartitionedWalker<FamilyType>(cmdBufferAddress, totalBytesProgrammed, &walker, 2u, 2, forceExecutionOnSingleTile, false);
|
||||
walkerCommand = genCmdCast<WalkerType *>(walkerCommandAddress);
|
||||
|
||||
ASSERT_NE(nullptr, walkerCommand);
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) 2021-2023 Intel Corporation
|
||||
* Copyright (C) 2021-2024 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
@@ -39,3 +39,19 @@ void ImplicitScalingFixture::tearDown() {
|
||||
alignedFree(alignedMemory);
|
||||
CommandEncodeStatesFixture::tearDown();
|
||||
}
|
||||
|
||||
ImplicitScalingDispatchCommandArgs ImplicitScalingFixture::createDispatchCommandArgs(uint64_t workPartitionAllocationAddress, uint32_t partitionCount) {
|
||||
ImplicitScalingDispatchCommandArgs args{
|
||||
workPartitionAllocationAddress, // workPartitionAllocationGpuVa
|
||||
defaultHwInfo.get(), // hwInfo
|
||||
nullptr, // outWalkerPtr
|
||||
NEO::RequiredPartitionDim::none, // requiredPartitionDim
|
||||
partitionCount, // partitionCount
|
||||
true, // useSecondaryBatchBuffer
|
||||
false, // apiSelfCleanup
|
||||
dcFlushFlag, // dcFlush
|
||||
forceExecutionOnSingleTileFlag, // forceExecutionOnSingleTile
|
||||
false}; // blockDispatchToCommandBuffer
|
||||
|
||||
return args;
|
||||
}
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) 2021-2022 Intel Corporation
|
||||
* Copyright (C) 2021-2024 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
@@ -22,6 +22,8 @@ struct ImplicitScalingFixture : public CommandEncodeStatesFixture {
|
||||
void setUp();
|
||||
void tearDown();
|
||||
|
||||
ImplicitScalingDispatchCommandArgs createDispatchCommandArgs(uint64_t workPartitionAllocationAddress, uint32_t partitionCount);
|
||||
|
||||
static constexpr uint64_t gpuVa = (1ull << 48);
|
||||
static constexpr size_t bufferSize = 1024u;
|
||||
DebugManagerStateRestore restorer;
|
||||
|
||||
Reference in New Issue
Block a user