fix: limit slm size based on hw informations

- add override default values method

Signed-off-by: Zbigniew Zdanowicz <zbigniew.zdanowicz@intel.com>
This commit is contained in:
Zbigniew Zdanowicz 2024-10-02 09:50:19 +00:00 committed by Compute-Runtime-Automation
parent 19e2dd0846
commit ac6472b105
4 changed files with 18 additions and 1 deletions

View File

@ -226,6 +226,9 @@ struct EncodeDispatchKernel {
static uint32_t computeSlmValues(const HardwareInfo &hwInfo, uint32_t slmSize); static uint32_t computeSlmValues(const HardwareInfo &hwInfo, uint32_t slmSize);
static bool singleTileExecImplicitScalingRequired(bool cooperativeKernel); static bool singleTileExecImplicitScalingRequired(bool cooperativeKernel);
template <typename WalkerType, typename InterfaceDescriptorType>
static void overrideDefaultValues(WalkerType &walkerCmd, InterfaceDescriptorType &interfaceDescriptor);
}; };
template <typename GfxFamily> template <typename GfxFamily>

View File

@ -630,4 +630,9 @@ void InOrderPatchCommandHelpers::PatchCmd<Family>::patchComputeWalker(uint64_t a
UNRECOVERABLE_IF(true); UNRECOVERABLE_IF(true);
} }
template <typename Family>
template <typename WalkerType, typename InterfaceDescriptorType>
void EncodeDispatchKernel<Family>::overrideDefaultValues(WalkerType &walkerCmd, InterfaceDescriptorType &interfaceDescriptor) {
}
} // namespace NEO } // namespace NEO

View File

@ -31,6 +31,7 @@ template void NEO::EncodeDispatchKernel<Family>::patchScratchAddressInImplicitAr
template void NEO::EncodeDispatchKernel<Family>::forceComputeWalkerPostSyncFlushWithWrite<Family::DefaultWalkerType>(Family::DefaultWalkerType &walkerCmd); template void NEO::EncodeDispatchKernel<Family>::forceComputeWalkerPostSyncFlushWithWrite<Family::DefaultWalkerType>(Family::DefaultWalkerType &walkerCmd);
template void NEO::EncodeDispatchKernel<Family>::setWalkerRegionSettings<Family::DefaultWalkerType>(Family::DefaultWalkerType &walkerCmd, const HardwareInfo &hwInfo, uint32_t partitionCount, template void NEO::EncodeDispatchKernel<Family>::setWalkerRegionSettings<Family::DefaultWalkerType>(Family::DefaultWalkerType &walkerCmd, const HardwareInfo &hwInfo, uint32_t partitionCount,
uint32_t workgroupSize, uint32_t maxWgCountPerTile, bool requiredWalkOrder); uint32_t workgroupSize, uint32_t maxWgCountPerTile, bool requiredWalkOrder);
template void NEO::EncodeDispatchKernel<Family>::overrideDefaultValues<Family::DefaultWalkerType, Family::INTERFACE_DESCRIPTOR_DATA>(Family::DefaultWalkerType &walkerCmd, Family::INTERFACE_DESCRIPTOR_DATA &interfaceDescriptor);
template struct NEO::EncodeStates<Family>; template struct NEO::EncodeStates<Family>;
template struct NEO::EncodeMath<Family>; template struct NEO::EncodeMath<Family>;

View File

@ -412,6 +412,8 @@ void EncodeDispatchKernel<Family>::encode(CommandContainer &container, EncodeDis
args.device->getDeviceInfo().maxFrontEndThreads}; args.device->getDeviceInfo().maxFrontEndThreads};
EncodeDispatchKernel<Family>::encodeAdditionalWalkerFields(rootDeviceEnvironment, walkerCmd, walkerArgs); EncodeDispatchKernel<Family>::encodeAdditionalWalkerFields(rootDeviceEnvironment, walkerCmd, walkerArgs);
EncodeDispatchKernel<Family>::overrideDefaultValues(walkerCmd, idd);
uint32_t workgroupSize = args.dispatchInterface->getGroupSize()[0] * args.dispatchInterface->getGroupSize()[1] * args.dispatchInterface->getGroupSize()[2]; uint32_t workgroupSize = args.dispatchInterface->getGroupSize()[0] * args.dispatchInterface->getGroupSize()[1] * args.dispatchInterface->getGroupSize()[2];
bool isRequiredWorkGroupOrder = args.requiredDispatchWalkOrder != NEO::RequiredDispatchWalkOrder::none; bool isRequiredWorkGroupOrder = args.requiredDispatchWalkOrder != NEO::RequiredDispatchWalkOrder::none;
if (args.partitionCount > 1 && !args.isInternal) { if (args.partitionCount > 1 && !args.isInternal) {
@ -1076,7 +1078,7 @@ void EncodeDispatchKernel<Family>::setupPreferredSlmSize(InterfaceDescriptorType
break; break;
case SlmPolicy::slmPolicyLargeSlm: case SlmPolicy::slmPolicyLargeSlm:
default: default:
slmSize = slmTotalSize * workGroupCountPerDss; slmSize = std::min(slmTotalSize * workGroupCountPerDss, static_cast<uint32_t>(hwInfo.capabilityTable.slmSize * MemoryConstants::kiloByte));
break; break;
} }
@ -1111,4 +1113,10 @@ void InOrderPatchCommandHelpers::PatchCmd<Family>::patchComputeWalker(uint64_t a
auto &postSync = walkerCmd->getPostSync(); auto &postSync = walkerCmd->getPostSync();
postSync.setImmediateData(baseCounterValue + appendCounterValue); postSync.setImmediateData(baseCounterValue + appendCounterValue);
} }
template <typename Family>
template <typename WalkerType, typename InterfaceDescriptorType>
void EncodeDispatchKernel<Family>::overrideDefaultValues(WalkerType &walkerCmd, InterfaceDescriptorType &interfaceDescriptor) {
}
} // namespace NEO } // namespace NEO