refactor: change encoder for thread group over dispatch 1/n

- change method name to more meaningful
- add all inputs of the algorithm as explicit function arguments
- position all implementations accordingly
- rename unit test names to fit new method name
- fix unit test to have correct initial command values
- fix unit test to have consistent input values with command values
- fix unit test to change input values together with command values

Related-To: NEO-12639

Signed-off-by: Zbigniew Zdanowicz <zbigniew.zdanowicz@intel.com>
This commit is contained in:
Zbigniew Zdanowicz
2024-10-25 09:03:09 +00:00
committed by Compute-Runtime-Automation
parent 8c3c703ec0
commit bbdf1ac7b6
11 changed files with 343 additions and 244 deletions

View File

@@ -67,12 +67,7 @@ void EncodeDispatchKernel<Family>::encode(CommandContainer &container, EncodeDis
LinearStream *listCmdBufferStream = container.getCommandStream();
auto threadDims = static_cast<const uint32_t *>(args.threadGroupDimensions);
const Vec3<size_t> threadStartVec{0, 0, 0};
Vec3<size_t> threadDimsVec{0, 0, 0};
if (!args.isIndirect) {
threadDimsVec = {threadDims[0], threadDims[1], threadDims[2]};
}
auto threadGroupDims = static_cast<const uint32_t *>(args.threadGroupDimensions);
DefaultWalkerType cmd = Family::cmdInitGpgpuWalker;
auto idd = Family::cmdInitInterfaceDescriptorData;
@@ -267,11 +262,11 @@ void EncodeDispatchKernel<Family>::encode(CommandContainer &container, EncodeDis
EncodeDispatchKernel<Family>::encodeThreadData(cmd,
nullptr,
threadDims,
threadGroupDims,
args.dispatchInterface->getGroupSize(),
kernelDescriptor.kernelAttributes.simdSize,
kernelDescriptor.kernelAttributes.numLocalIdChannels,
args.dispatchInterface->getNumThreadsPerThreadGroup(),
numThreadsPerThreadGroup,
args.dispatchInterface->getThreadExecutionMask(),
true,
false,
@@ -282,7 +277,7 @@ void EncodeDispatchKernel<Family>::encode(CommandContainer &container, EncodeDis
cmd.setPredicateEnable(args.isPredicate);
auto threadGroupCount = cmd.getThreadGroupIdXDimension() * cmd.getThreadGroupIdYDimension() * cmd.getThreadGroupIdZDimension();
EncodeDispatchKernel<Family>::adjustInterfaceDescriptorData(idd, *args.device, hwInfo, threadGroupCount, kernelDescriptor.kernelAttributes.numGrfRequired, cmd);
EncodeDispatchKernel<Family>::encodeThreadGroupDispatch(idd, *args.device, hwInfo, threadGroupDims, threadGroupCount, kernelDescriptor.kernelAttributes.numGrfRequired, numThreadsPerThreadGroup, cmd);
memcpy_s(iddPtr, sizeof(idd), &idd, sizeof(idd));
@@ -635,4 +630,11 @@ template <typename WalkerType, typename InterfaceDescriptorType>
void EncodeDispatchKernel<Family>::overrideDefaultValues(WalkerType &walkerCmd, InterfaceDescriptorType &interfaceDescriptor) {
}
template <typename Family>
template <typename WalkerType, typename InterfaceDescriptorType>
void EncodeDispatchKernel<Family>::encodeThreadGroupDispatch(InterfaceDescriptorType &interfaceDescriptor, const Device &device, const HardwareInfo &hwInfo,
const uint32_t *threadGroupDimensions, const uint32_t threadGroupCount, const uint32_t grfCount, const uint32_t threadsPerThreadGroup,
WalkerType &walkerCmd) {
}
} // namespace NEO