mirror of
https://github.com/intel/compute-runtime.git
synced 2025-12-22 10:17:01 +08:00
performance: avoid reading from gfx memory when modifying walker command
Signed-off-by: Zbigniew Zdanowicz <zbigniew.zdanowicz@intel.com>
This commit is contained in:
committed by
Compute-Runtime-Automation
parent
487b02a2ac
commit
8bb92ff445
@@ -162,25 +162,25 @@ inline void HardwareInterface<GfxFamily>::programWalker(
|
|||||||
uint32_t partitionCount = 0u;
|
uint32_t partitionCount = 0u;
|
||||||
RequiredPartitionDim requiredPartitionDim = kernel.usesImages() ? RequiredPartitionDim::x : RequiredPartitionDim::none;
|
RequiredPartitionDim requiredPartitionDim = kernel.usesImages() ? RequiredPartitionDim::x : RequiredPartitionDim::none;
|
||||||
|
|
||||||
void *outWalker = nullptr;
|
|
||||||
|
|
||||||
ImplicitScalingDispatchCommandArgs implicitScalingArgs{
|
ImplicitScalingDispatchCommandArgs implicitScalingArgs{
|
||||||
workPartitionAllocationGpuVa, // workPartitionAllocationGpuVa
|
workPartitionAllocationGpuVa, // workPartitionAllocationGpuVa
|
||||||
&hwInfo, // hwInfo
|
&hwInfo, // hwInfo
|
||||||
&outWalker, // outWalkerPtr
|
nullptr, // outWalkerPtr
|
||||||
requiredPartitionDim, // requiredPartitionDim
|
requiredPartitionDim, // requiredPartitionDim
|
||||||
partitionCount, // partitionCount
|
partitionCount, // partitionCount
|
||||||
|
workgroupSize, // workgroupSize
|
||||||
|
maxWgCountPerTile, // maxWgCountPerTile
|
||||||
false, // useSecondaryBatchBuffer
|
false, // useSecondaryBatchBuffer
|
||||||
false, // apiSelfCleanup
|
false, // apiSelfCleanup
|
||||||
queueCsr.getDcFlushSupport(), // dcFlush
|
queueCsr.getDcFlushSupport(), // dcFlush
|
||||||
kernel.isSingleSubdevicePreferred(), // forceExecutionOnSingleTile
|
kernel.isSingleSubdevicePreferred(), // forceExecutionOnSingleTile
|
||||||
false}; // blockDispatchToCommandBuffer
|
false, // blockDispatchToCommandBuffer
|
||||||
|
requiredWalkOrder != 0}; // isRequiredWorkGroupOrder
|
||||||
|
|
||||||
ImplicitScalingDispatch<GfxFamily>::template dispatchCommands<WalkerType>(commandStream,
|
ImplicitScalingDispatch<GfxFamily>::template dispatchCommands<WalkerType>(commandStream,
|
||||||
walkerCmd,
|
walkerCmd,
|
||||||
devices,
|
devices,
|
||||||
implicitScalingArgs);
|
implicitScalingArgs);
|
||||||
EncodeDispatchKernel<GfxFamily>::setWalkerRegionSettings(*static_cast<WalkerType *>(outWalker), hwInfo, implicitScalingArgs.partitionCount, workgroupSize, maxWgCountPerTile, requiredWalkOrder != 0);
|
|
||||||
|
|
||||||
if (queueCsr.isStaticWorkPartitioningEnabled()) {
|
if (queueCsr.isStaticWorkPartitioningEnabled()) {
|
||||||
queueCsr.setActivePartitions(std::max(queueCsr.getActivePartitions(), implicitScalingArgs.partitionCount));
|
queueCsr.setActivePartitions(std::max(queueCsr.getActivePartitions(), implicitScalingArgs.partitionCount));
|
||||||
|
|||||||
@@ -413,7 +413,7 @@ void EncodeDispatchKernel<Family>::encode(CommandContainer &container, EncodeDis
|
|||||||
PreemptionHelper::applyPreemptionWaCmdsBegin<Family>(listCmdBufferStream, *args.device);
|
PreemptionHelper::applyPreemptionWaCmdsBegin<Family>(listCmdBufferStream, *args.device);
|
||||||
|
|
||||||
uint32_t workgroupSize = args.dispatchInterface->getGroupSize()[0] * args.dispatchInterface->getGroupSize()[1] * args.dispatchInterface->getGroupSize()[2];
|
uint32_t workgroupSize = args.dispatchInterface->getGroupSize()[0] * args.dispatchInterface->getGroupSize()[1] * args.dispatchInterface->getGroupSize()[2];
|
||||||
|
bool isRequiredWorkGroupOrder = args.requiredDispatchWalkOrder != NEO::RequiredDispatchWalkOrder::none;
|
||||||
if (args.partitionCount > 1 && !args.isInternal) {
|
if (args.partitionCount > 1 && !args.isInternal) {
|
||||||
const uint64_t workPartitionAllocationGpuVa = args.device->getDefaultEngine().commandStreamReceiver->getWorkPartitionAllocationGpuAddress();
|
const uint64_t workPartitionAllocationGpuVa = args.device->getDefaultEngine().commandStreamReceiver->getWorkPartitionAllocationGpuAddress();
|
||||||
|
|
||||||
@@ -423,24 +423,23 @@ void EncodeDispatchKernel<Family>::encode(CommandContainer &container, EncodeDis
|
|||||||
&args.outWalkerPtr, // outWalkerPtr
|
&args.outWalkerPtr, // outWalkerPtr
|
||||||
args.requiredPartitionDim, // requiredPartitionDim
|
args.requiredPartitionDim, // requiredPartitionDim
|
||||||
args.partitionCount, // partitionCount
|
args.partitionCount, // partitionCount
|
||||||
|
workgroupSize, // workgroupSize
|
||||||
|
args.maxWgCountPerTile, // maxWgCountPerTile
|
||||||
!(container.getFlushTaskUsedForImmediate() || container.isUsingPrimaryBuffer()), // useSecondaryBatchBuffer
|
!(container.getFlushTaskUsedForImmediate() || container.isUsingPrimaryBuffer()), // useSecondaryBatchBuffer
|
||||||
!args.isKernelDispatchedFromImmediateCmdList, // apiSelfCleanup
|
!args.isKernelDispatchedFromImmediateCmdList, // apiSelfCleanup
|
||||||
args.dcFlushEnable, // dcFlush
|
args.dcFlushEnable, // dcFlush
|
||||||
EncodeDispatchKernel<Family>::singleTileExecImplicitScalingRequired(args.isCooperative), // forceExecutionOnSingleTile
|
EncodeDispatchKernel<Family>::singleTileExecImplicitScalingRequired(args.isCooperative), // forceExecutionOnSingleTile
|
||||||
args.makeCommandView}; // blockDispatchToCommandBuffer
|
args.makeCommandView, // blockDispatchToCommandBuffer
|
||||||
|
isRequiredWorkGroupOrder}; // isRequiredWorkGroupOrder
|
||||||
|
|
||||||
ImplicitScalingDispatch<Family>::dispatchCommands(*listCmdBufferStream,
|
ImplicitScalingDispatch<Family>::dispatchCommands(*listCmdBufferStream,
|
||||||
walkerCmd,
|
walkerCmd,
|
||||||
args.device->getDeviceBitfield(),
|
args.device->getDeviceBitfield(),
|
||||||
implicitScalingArgs);
|
implicitScalingArgs);
|
||||||
args.partitionCount = implicitScalingArgs.partitionCount;
|
args.partitionCount = implicitScalingArgs.partitionCount;
|
||||||
|
|
||||||
void *walkerToModify = args.outWalkerPtr ? args.outWalkerPtr : &walkerCmd;
|
|
||||||
|
|
||||||
EncodeDispatchKernel<Family>::setWalkerRegionSettings(*static_cast<WalkerType *>(walkerToModify), hwInfo, args.partitionCount, workgroupSize, args.maxWgCountPerTile, args.requiredDispatchWalkOrder != NEO::RequiredDispatchWalkOrder::none);
|
|
||||||
} else {
|
} else {
|
||||||
args.partitionCount = 1;
|
args.partitionCount = 1;
|
||||||
EncodeDispatchKernel<Family>::setWalkerRegionSettings(walkerCmd, hwInfo, args.partitionCount, workgroupSize, args.maxWgCountPerTile, args.requiredDispatchWalkOrder != NEO::RequiredDispatchWalkOrder::none);
|
EncodeDispatchKernel<Family>::setWalkerRegionSettings(walkerCmd, hwInfo, args.partitionCount, workgroupSize, args.maxWgCountPerTile, isRequiredWorkGroupOrder);
|
||||||
|
|
||||||
if (!args.makeCommandView) {
|
if (!args.makeCommandView) {
|
||||||
auto buffer = listCmdBufferStream->getSpaceForCmd<WalkerType>();
|
auto buffer = listCmdBufferStream->getSpaceForCmd<WalkerType>();
|
||||||
|
|||||||
@@ -44,12 +44,15 @@ struct ImplicitScalingDispatchCommandArgs {
|
|||||||
|
|
||||||
RequiredPartitionDim requiredPartitionDim = RequiredPartitionDim::none;
|
RequiredPartitionDim requiredPartitionDim = RequiredPartitionDim::none;
|
||||||
uint32_t partitionCount = 0;
|
uint32_t partitionCount = 0;
|
||||||
|
uint32_t workgroupSize = 0;
|
||||||
|
uint32_t maxWgCountPerTile = 0;
|
||||||
|
|
||||||
bool useSecondaryBatchBuffer = false;
|
bool useSecondaryBatchBuffer = false;
|
||||||
bool apiSelfCleanup = false;
|
bool apiSelfCleanup = false;
|
||||||
bool dcFlush = false;
|
bool dcFlush = false;
|
||||||
bool forceExecutionOnSingleTile = false;
|
bool forceExecutionOnSingleTile = false;
|
||||||
bool blockDispatchToCommandBuffer = false;
|
bool blockDispatchToCommandBuffer = false;
|
||||||
|
bool isRequiredWorkGroupOrder = false;
|
||||||
};
|
};
|
||||||
|
|
||||||
template <typename GfxFamily>
|
template <typename GfxFamily>
|
||||||
|
|||||||
@@ -49,6 +49,10 @@ WalkerPartition::WalkerPartitionArgs prepareWalkerPartitionArgs(ImplicitScalingD
|
|||||||
|
|
||||||
args.blockDispatchToCommandBuffer = dispatchCommandArgs.blockDispatchToCommandBuffer;
|
args.blockDispatchToCommandBuffer = dispatchCommandArgs.blockDispatchToCommandBuffer;
|
||||||
|
|
||||||
|
args.workgroupSize = dispatchCommandArgs.workgroupSize;
|
||||||
|
args.maxWgCountPerTile = dispatchCommandArgs.maxWgCountPerTile;
|
||||||
|
args.isRequiredWorkGroupOrder = dispatchCommandArgs.isRequiredWorkGroupOrder;
|
||||||
|
|
||||||
return args;
|
return args;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -17,6 +17,8 @@ struct WalkerPartitionArgs {
|
|||||||
uint64_t postSyncImmediateValue = 0;
|
uint64_t postSyncImmediateValue = 0;
|
||||||
uint32_t partitionCount = 0;
|
uint32_t partitionCount = 0;
|
||||||
uint32_t tileCount = 0;
|
uint32_t tileCount = 0;
|
||||||
|
uint32_t workgroupSize = 0;
|
||||||
|
uint32_t maxWgCountPerTile = 0;
|
||||||
bool emitBatchBufferEnd = false;
|
bool emitBatchBufferEnd = false;
|
||||||
bool secondaryBatchBuffer = false;
|
bool secondaryBatchBuffer = false;
|
||||||
bool synchronizeBeforeExecution = false;
|
bool synchronizeBeforeExecution = false;
|
||||||
@@ -33,6 +35,7 @@ struct WalkerPartitionArgs {
|
|||||||
bool dcFlushEnable = false;
|
bool dcFlushEnable = false;
|
||||||
bool forceExecutionOnSingleTile = false;
|
bool forceExecutionOnSingleTile = false;
|
||||||
bool blockDispatchToCommandBuffer = false;
|
bool blockDispatchToCommandBuffer = false;
|
||||||
|
bool isRequiredWorkGroupOrder = false;
|
||||||
};
|
};
|
||||||
|
|
||||||
inline constexpr uint32_t wparidCCSOffset = 0x221C;
|
inline constexpr uint32_t wparidCCSOffset = 0x221C;
|
||||||
|
|||||||
@@ -494,16 +494,14 @@ uint64_t computeWalkerSectionStart(WalkerPartitionArgs &args) {
|
|||||||
template <typename GfxFamily, typename WalkerType>
|
template <typename GfxFamily, typename WalkerType>
|
||||||
void *programPartitionedWalker(void *&inputAddress, uint32_t &totalBytesProgrammed,
|
void *programPartitionedWalker(void *&inputAddress, uint32_t &totalBytesProgrammed,
|
||||||
WalkerType *inputWalker,
|
WalkerType *inputWalker,
|
||||||
uint32_t partitionCount,
|
WalkerPartitionArgs &args,
|
||||||
uint32_t tileCount,
|
const NEO::HardwareInfo &hwInfo) {
|
||||||
bool forceExecutionOnSingleTile,
|
|
||||||
bool blockDispatchToCommandBuffer) {
|
|
||||||
WalkerType *computeWalker = nullptr;
|
WalkerType *computeWalker = nullptr;
|
||||||
if (!blockDispatchToCommandBuffer) {
|
if (!args.blockDispatchToCommandBuffer) {
|
||||||
computeWalker = putCommand<WalkerType>(inputAddress, totalBytesProgrammed);
|
computeWalker = putCommand<WalkerType>(inputAddress, totalBytesProgrammed);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (partitionCount > 1) {
|
if (args.partitionCount > 1) {
|
||||||
auto partitionType = inputWalker->getPartitionType();
|
auto partitionType = inputWalker->getPartitionType();
|
||||||
|
|
||||||
assert(inputWalker->getThreadGroupIdStartingX() == 0u);
|
assert(inputWalker->getThreadGroupIdStartingX() == 0u);
|
||||||
@@ -522,16 +520,23 @@ void *programPartitionedWalker(void *&inputAddress, uint32_t &totalBytesProgramm
|
|||||||
workgroupCount = inputWalker->getThreadGroupIdZDimension();
|
workgroupCount = inputWalker->getThreadGroupIdZDimension();
|
||||||
}
|
}
|
||||||
|
|
||||||
if (forceExecutionOnSingleTile) {
|
if (args.forceExecutionOnSingleTile) {
|
||||||
inputWalker->setPartitionSize(workgroupCount);
|
inputWalker->setPartitionSize(workgroupCount);
|
||||||
} else {
|
} else {
|
||||||
inputWalker->setPartitionSize(Math::divideAndRoundUp(workgroupCount, partitionCount));
|
inputWalker->setPartitionSize(Math::divideAndRoundUp(workgroupCount, args.partitionCount));
|
||||||
}
|
}
|
||||||
|
|
||||||
appendWalkerFields<GfxFamily, WalkerType>(*inputWalker, tileCount, workgroupCount);
|
NEO::EncodeDispatchKernel<GfxFamily>::setWalkerRegionSettings(*inputWalker,
|
||||||
|
hwInfo,
|
||||||
|
args.partitionCount,
|
||||||
|
args.workgroupSize,
|
||||||
|
args.maxWgCountPerTile,
|
||||||
|
args.isRequiredWorkGroupOrder);
|
||||||
|
|
||||||
|
appendWalkerFields<GfxFamily, WalkerType>(*inputWalker, args.tileCount, workgroupCount);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!blockDispatchToCommandBuffer) {
|
if (computeWalker != nullptr) {
|
||||||
*computeWalker = *inputWalker;
|
*computeWalker = *inputWalker;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -645,7 +650,7 @@ void constructDynamicallyPartitionedCommandBuffer(void *cpuPointer,
|
|||||||
args.secondaryBatchBuffer);
|
args.secondaryBatchBuffer);
|
||||||
|
|
||||||
// Walker section
|
// Walker section
|
||||||
auto walkerPtr = programPartitionedWalker<GfxFamily, WalkerType>(currentBatchBufferPointer, totalBytesProgrammed, inputWalker, args.partitionCount, args.tileCount, args.forceExecutionOnSingleTile, args.blockDispatchToCommandBuffer);
|
auto walkerPtr = programPartitionedWalker<GfxFamily, WalkerType>(currentBatchBufferPointer, totalBytesProgrammed, inputWalker, args, hwInfo);
|
||||||
if (outWalkerPtr) {
|
if (outWalkerPtr) {
|
||||||
*outWalkerPtr = walkerPtr;
|
*outWalkerPtr = walkerPtr;
|
||||||
}
|
}
|
||||||
@@ -742,7 +747,7 @@ void constructStaticallyPartitionedCommandBuffer(void *cpuPointer,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
auto walkerPtr = programPartitionedWalker<GfxFamily>(currentBatchBufferPointer, totalBytesProgrammed, inputWalker, args.partitionCount, args.tileCount, args.forceExecutionOnSingleTile, args.blockDispatchToCommandBuffer);
|
auto walkerPtr = programPartitionedWalker<GfxFamily>(currentBatchBufferPointer, totalBytesProgrammed, inputWalker, args, hwInfo);
|
||||||
|
|
||||||
if (!args.blockDispatchToCommandBuffer) {
|
if (!args.blockDispatchToCommandBuffer) {
|
||||||
if (outWalkerPtr) {
|
if (outWalkerPtr) {
|
||||||
|
|||||||
@@ -426,7 +426,10 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenProgramComputeWalkerWhen
|
|||||||
|
|
||||||
walker.setPartitionType(WalkerType::PARTITION_TYPE::PARTITION_TYPE_X);
|
walker.setPartitionType(WalkerType::PARTITION_TYPE::PARTITION_TYPE_X);
|
||||||
void *walkerCommandAddress = cmdBufferAddress;
|
void *walkerCommandAddress = cmdBufferAddress;
|
||||||
programPartitionedWalker<FamilyType>(cmdBufferAddress, totalBytesProgrammed, &walker, 2u, 2, false, false);
|
WalkerPartition::WalkerPartitionArgs args = {};
|
||||||
|
args.partitionCount = 2;
|
||||||
|
args.tileCount = 2;
|
||||||
|
programPartitionedWalker<FamilyType>(cmdBufferAddress, totalBytesProgrammed, &walker, args, testHardwareInfo);
|
||||||
auto walkerCommand = genCmdCast<WalkerType *>(walkerCommandAddress);
|
auto walkerCommand = genCmdCast<WalkerType *>(walkerCommandAddress);
|
||||||
|
|
||||||
ASSERT_NE(nullptr, walkerCommand);
|
ASSERT_NE(nullptr, walkerCommand);
|
||||||
@@ -437,7 +440,10 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenProgramComputeWalkerWhen
|
|||||||
|
|
||||||
walker.setPartitionType(WalkerType::PARTITION_TYPE::PARTITION_TYPE_Y);
|
walker.setPartitionType(WalkerType::PARTITION_TYPE::PARTITION_TYPE_Y);
|
||||||
walkerCommandAddress = cmdBufferAddress;
|
walkerCommandAddress = cmdBufferAddress;
|
||||||
programPartitionedWalker<FamilyType>(cmdBufferAddress, totalBytesProgrammed, &walker, 2u, 2, false, false);
|
args = {};
|
||||||
|
args.partitionCount = 2;
|
||||||
|
args.tileCount = 2;
|
||||||
|
programPartitionedWalker<FamilyType>(cmdBufferAddress, totalBytesProgrammed, &walker, args, testHardwareInfo);
|
||||||
walkerCommand = genCmdCast<WalkerType *>(walkerCommandAddress);
|
walkerCommand = genCmdCast<WalkerType *>(walkerCommandAddress);
|
||||||
|
|
||||||
ASSERT_NE(nullptr, walkerCommand);
|
ASSERT_NE(nullptr, walkerCommand);
|
||||||
@@ -446,7 +452,10 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenProgramComputeWalkerWhen
|
|||||||
|
|
||||||
walker.setPartitionType(WalkerType::PARTITION_TYPE::PARTITION_TYPE_Z);
|
walker.setPartitionType(WalkerType::PARTITION_TYPE::PARTITION_TYPE_Z);
|
||||||
walkerCommandAddress = cmdBufferAddress;
|
walkerCommandAddress = cmdBufferAddress;
|
||||||
programPartitionedWalker<FamilyType>(cmdBufferAddress, totalBytesProgrammed, &walker, 2u, 2, false, false);
|
args = {};
|
||||||
|
args.partitionCount = 2;
|
||||||
|
args.tileCount = 2;
|
||||||
|
programPartitionedWalker<FamilyType>(cmdBufferAddress, totalBytesProgrammed, &walker, args, testHardwareInfo);
|
||||||
walkerCommand = genCmdCast<WalkerType *>(walkerCommandAddress);
|
walkerCommand = genCmdCast<WalkerType *>(walkerCommandAddress);
|
||||||
|
|
||||||
ASSERT_NE(nullptr, walkerCommand);
|
ASSERT_NE(nullptr, walkerCommand);
|
||||||
@@ -456,7 +465,10 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenProgramComputeWalkerWhen
|
|||||||
// if we program with partition Count == 1 then do not trigger partition stuff
|
// if we program with partition Count == 1 then do not trigger partition stuff
|
||||||
walker.setPartitionType(WalkerType::PARTITION_TYPE::PARTITION_TYPE_DISABLED);
|
walker.setPartitionType(WalkerType::PARTITION_TYPE::PARTITION_TYPE_DISABLED);
|
||||||
walkerCommandAddress = cmdBufferAddress;
|
walkerCommandAddress = cmdBufferAddress;
|
||||||
programPartitionedWalker<FamilyType>(cmdBufferAddress, totalBytesProgrammed, &walker, 1u, 2, false, false);
|
args = {};
|
||||||
|
args.partitionCount = 1;
|
||||||
|
args.tileCount = 2;
|
||||||
|
programPartitionedWalker<FamilyType>(cmdBufferAddress, totalBytesProgrammed, &walker, args, testHardwareInfo);
|
||||||
walkerCommand = genCmdCast<WalkerType *>(walkerCommandAddress);
|
walkerCommand = genCmdCast<WalkerType *>(walkerCommandAddress);
|
||||||
|
|
||||||
ASSERT_NE(nullptr, walkerCommand);
|
ASSERT_NE(nullptr, walkerCommand);
|
||||||
@@ -1763,7 +1775,11 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenForceExecutionOnSingleTi
|
|||||||
bool forceExecutionOnSingleTile = false;
|
bool forceExecutionOnSingleTile = false;
|
||||||
walker.setPartitionType(WalkerType::PARTITION_TYPE::PARTITION_TYPE_X);
|
walker.setPartitionType(WalkerType::PARTITION_TYPE::PARTITION_TYPE_X);
|
||||||
void *walkerCommandAddress = cmdBufferAddress;
|
void *walkerCommandAddress = cmdBufferAddress;
|
||||||
programPartitionedWalker<FamilyType>(cmdBufferAddress, totalBytesProgrammed, &walker, 2u, 2, forceExecutionOnSingleTile, false);
|
WalkerPartition::WalkerPartitionArgs args = {};
|
||||||
|
args.partitionCount = 2;
|
||||||
|
args.tileCount = 2;
|
||||||
|
args.forceExecutionOnSingleTile = forceExecutionOnSingleTile;
|
||||||
|
programPartitionedWalker<FamilyType>(cmdBufferAddress, totalBytesProgrammed, &walker, args, testHardwareInfo);
|
||||||
auto walkerCommand = genCmdCast<WalkerType *>(walkerCommandAddress);
|
auto walkerCommand = genCmdCast<WalkerType *>(walkerCommandAddress);
|
||||||
|
|
||||||
ASSERT_NE(nullptr, walkerCommand);
|
ASSERT_NE(nullptr, walkerCommand);
|
||||||
@@ -1773,7 +1789,11 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenForceExecutionOnSingleTi
|
|||||||
|
|
||||||
forceExecutionOnSingleTile = true;
|
forceExecutionOnSingleTile = true;
|
||||||
walkerCommandAddress = cmdBufferAddress;
|
walkerCommandAddress = cmdBufferAddress;
|
||||||
programPartitionedWalker<FamilyType>(cmdBufferAddress, totalBytesProgrammed, &walker, 2u, 2, forceExecutionOnSingleTile, false);
|
args = {};
|
||||||
|
args.partitionCount = 2;
|
||||||
|
args.tileCount = 2;
|
||||||
|
args.forceExecutionOnSingleTile = forceExecutionOnSingleTile;
|
||||||
|
programPartitionedWalker<FamilyType>(cmdBufferAddress, totalBytesProgrammed, &walker, args, testHardwareInfo);
|
||||||
walkerCommand = genCmdCast<WalkerType *>(walkerCommandAddress);
|
walkerCommand = genCmdCast<WalkerType *>(walkerCommandAddress);
|
||||||
|
|
||||||
ASSERT_NE(nullptr, walkerCommand);
|
ASSERT_NE(nullptr, walkerCommand);
|
||||||
|
|||||||
@@ -47,11 +47,14 @@ ImplicitScalingDispatchCommandArgs ImplicitScalingFixture::createDispatchCommand
|
|||||||
nullptr, // outWalkerPtr
|
nullptr, // outWalkerPtr
|
||||||
NEO::RequiredPartitionDim::none, // requiredPartitionDim
|
NEO::RequiredPartitionDim::none, // requiredPartitionDim
|
||||||
partitionCount, // partitionCount
|
partitionCount, // partitionCount
|
||||||
|
1, // workgroupSize
|
||||||
|
1, // maxWgCountPerTile
|
||||||
true, // useSecondaryBatchBuffer
|
true, // useSecondaryBatchBuffer
|
||||||
false, // apiSelfCleanup
|
false, // apiSelfCleanup
|
||||||
dcFlushFlag, // dcFlush
|
dcFlushFlag, // dcFlush
|
||||||
forceExecutionOnSingleTileFlag, // forceExecutionOnSingleTile
|
forceExecutionOnSingleTileFlag, // forceExecutionOnSingleTile
|
||||||
false}; // blockDispatchToCommandBuffer
|
false, // blockDispatchToCommandBuffer
|
||||||
|
false}; // isRequiredWorkGroupOrder
|
||||||
|
|
||||||
return args;
|
return args;
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user