refactor: pass extra walker params

Signed-off-by: Bartosz Dunajski <bartosz.dunajski@intel.com>
This commit is contained in:
Bartosz Dunajski
2024-09-10 13:07:00 +00:00
committed by Compute-Runtime-Automation
parent b1558755e7
commit 4f1262645b
29 changed files with 134 additions and 43 deletions

View File

@@ -66,6 +66,7 @@ struct EncodeDispatchKernelArgs {
uint32_t additionalSizeParam = NEO::additionalKernelLaunchSizeParamNotSet;
uint32_t partitionCount = 0u;
uint32_t reserveExtraPayloadSpace = 0;
uint32_t maxWgCountPerTile = 0;
int32_t defaultPipelinedThreadArbitrationPolicy = NEO::ThreadArbitrationPolicy::NotPresent;
bool isIndirect = false;
bool isPredicate = false;
@@ -181,6 +182,9 @@ struct EncodeDispatchKernel {
template <typename WalkerType>
static void setupPostSyncForRegularEvent(WalkerType &walkerCmd, const EncodeDispatchKernelArgs &args);
template <typename WalkerType>
static void setWalkerRegionSettings(WalkerType &walkerCmd, const HardwareInfo &hwInfo, uint32_t partitionCount, uint32_t workgroupSize, uint32_t maxWgCountPerTile, bool requiredWalkOrder);
template <typename WalkerType>
static void setupPostSyncForInOrderExec(WalkerType &walkerCmd, const EncodeDispatchKernelArgs &args);

View File

@@ -598,6 +598,10 @@ template <typename Family>
template <typename WalkerType>
void EncodeDispatchKernel<Family>::adjustTimestampPacket(WalkerType &walkerCmd, const EncodeDispatchKernelArgs &args) {}
template <typename Family>
template <typename WalkerType>
void EncodeDispatchKernel<Family>::setWalkerRegionSettings(WalkerType &walkerCmd, const HardwareInfo &hwInfo, uint32_t partitionCount, uint32_t workgroupSize, uint32_t maxWgCountPerTile, bool requiredWalkOrder) {}
template <typename Family>
void EncodeIndirectParams<Family>::encode(CommandContainer &container, uint64_t crossThreadDataGpuVa, DispatchKernelEncoderI *dispatchInterface, uint64_t implicitArgsGpuPtr) {
const auto &kernelDescriptor = dispatchInterface->getKernelDescriptor();

View File

@@ -29,6 +29,8 @@ template void NEO::EncodeDispatchKernel<Family>::encodeEuSchedulingPolicy<Family
template uint64_t NEO::EncodeDispatchKernel<Family>::getScratchAddressForImmediatePatching<false>(CommandContainer &container, EncodeDispatchKernelArgs &args);
template void NEO::EncodeDispatchKernel<Family>::patchScratchAddressInImplicitArgs<false>(ImplicitArgs &implicitArgs, uint64_t scratchAddress, bool scratchPtrPatchingRequired);
template void NEO::EncodeDispatchKernel<Family>::forceComputeWalkerPostSyncFlushWithWrite<Family::DefaultWalkerType>(Family::DefaultWalkerType &walkerCmd);
template void NEO::EncodeDispatchKernel<Family>::setWalkerRegionSettings<Family::DefaultWalkerType>(Family::DefaultWalkerType &walkerCmd, const HardwareInfo &hwInfo, uint32_t partitionCount,
uint32_t workgroupSize, uint32_t maxWgCountPerTile, bool requiredWalkOrder);
template struct NEO::EncodeStates<Family>;
template struct NEO::EncodeMath<Family>;

View File

@@ -415,6 +415,8 @@ void EncodeDispatchKernel<Family>::encode(CommandContainer &container, EncodeDis
PreemptionHelper::applyPreemptionWaCmdsBegin<Family>(listCmdBufferStream, *args.device);
uint32_t workgroupSize = args.dispatchInterface->getGroupSize()[0] * args.dispatchInterface->getGroupSize()[1] * args.dispatchInterface->getGroupSize()[2];
if (args.partitionCount > 1 && !args.isInternal) {
const uint64_t workPartitionAllocationGpuVa = args.device->getDefaultEngine().commandStreamReceiver->getWorkPartitionAllocationGpuAddress();
@@ -435,8 +437,14 @@ void EncodeDispatchKernel<Family>::encode(CommandContainer &container, EncodeDis
args.device->getDeviceBitfield(),
implicitScalingArgs);
args.partitionCount = implicitScalingArgs.partitionCount;
void *walkerToModify = args.outWalkerPtr ? args.outWalkerPtr : &walkerCmd;
EncodeDispatchKernel<Family>::setWalkerRegionSettings(*static_cast<WalkerType *>(walkerToModify), hwInfo, args.partitionCount, workgroupSize, args.maxWgCountPerTile, args.requiredDispatchWalkOrder != NEO::RequiredDispatchWalkOrder::none);
} else {
args.partitionCount = 1;
EncodeDispatchKernel<Family>::setWalkerRegionSettings(walkerCmd, hwInfo, args.partitionCount, workgroupSize, args.maxWgCountPerTile, args.requiredDispatchWalkOrder != NEO::RequiredDispatchWalkOrder::none);
if (!args.makeCommandView) {
auto buffer = listCmdBufferStream->getSpaceForCmd<WalkerType>();
args.outWalkerPtr = buffer;
@@ -993,5 +1001,4 @@ void InOrderPatchCommandHelpers::PatchCmd<Family>::patchComputeWalker(uint64_t a
auto &postSync = walkerCmd->getPostSync();
postSync.setImmediateData(baseCounterValue + appendCounterValue);
}
} // namespace NEO