mirror of
https://github.com/intel/compute-runtime.git
synced 2026-01-03 23:03:02 +08:00
feature: Add heapless mode programming in ocl
Related-To: NEO-7621 Signed-off-by: Kamil Kopryk <kamil.kopryk@intel.com>
This commit is contained in:
committed by
Compute-Runtime-Automation
parent
c35b13ccae
commit
ce7298d512
@@ -26,15 +26,17 @@ using Family = NEO::XeHpgCoreFamily;
|
||||
namespace NEO {
|
||||
|
||||
template <>
|
||||
void EncodeDispatchKernel<Family>::adjustTimestampPacket(WALKER_TYPE &walkerCmd, const HardwareInfo &hwInfo) {
|
||||
template <typename WalkerType>
|
||||
void EncodeDispatchKernel<Family>::adjustTimestampPacket(WalkerType &walkerCmd, const HardwareInfo &hwInfo) {
|
||||
auto &postSyncData = walkerCmd.getPostSync();
|
||||
|
||||
postSyncData.setDataportSubsliceCacheFlush(true);
|
||||
}
|
||||
|
||||
template <>
|
||||
void EncodeDispatchKernel<Family>::appendAdditionalIDDFields(INTERFACE_DESCRIPTOR_DATA *pInterfaceDescriptor, const RootDeviceEnvironment &rootDeviceEnvironment, const uint32_t threadsPerThreadGroup, uint32_t slmTotalSize, SlmPolicy slmPolicy) {
|
||||
using PREFERRED_SLM_ALLOCATION_SIZE = typename Family::INTERFACE_DESCRIPTOR_DATA::PREFERRED_SLM_ALLOCATION_SIZE;
|
||||
template <typename InterfaceDescriptorType>
|
||||
void EncodeDispatchKernel<Family>::appendAdditionalIDDFields(InterfaceDescriptorType *pInterfaceDescriptor, const RootDeviceEnvironment &rootDeviceEnvironment, const uint32_t threadsPerThreadGroup, uint32_t slmTotalSize, SlmPolicy slmPolicy) {
|
||||
using PREFERRED_SLM_ALLOCATION_SIZE = typename InterfaceDescriptorType::PREFERRED_SLM_ALLOCATION_SIZE;
|
||||
auto &hwInfo = *rootDeviceEnvironment.getHardwareInfo();
|
||||
const uint32_t threadsPerDssCount = hwInfo.gtSystemInfo.ThreadCount / hwInfo.gtSystemInfo.DualSubSliceCount;
|
||||
const uint32_t workGroupCountPerDss = threadsPerDssCount / threadsPerThreadGroup;
|
||||
@@ -92,7 +94,8 @@ void EncodeDispatchKernel<Family>::appendAdditionalIDDFields(INTERFACE_DESCRIPTO
|
||||
}
|
||||
|
||||
template <>
|
||||
void EncodeDispatchKernel<Family>::adjustInterfaceDescriptorData(INTERFACE_DESCRIPTOR_DATA &interfaceDescriptor, const Device &device, const HardwareInfo &hwInfo, const uint32_t threadGroupCount, const uint32_t numGrf, WALKER_TYPE &walkerCmd) {
|
||||
template <typename WalkerType, typename InterfaceDescriptorType>
|
||||
void EncodeDispatchKernel<Family>::adjustInterfaceDescriptorData(InterfaceDescriptorType &interfaceDescriptor, const Device &device, const HardwareInfo &hwInfo, const uint32_t threadGroupCount, const uint32_t numGrf, WalkerType &walkerCmd) {
|
||||
const auto &productHelper = device.getProductHelper();
|
||||
if (productHelper.isDisableOverdispatchAvailable(hwInfo)) {
|
||||
if (interfaceDescriptor.getNumberOfThreadsInGpgpuThreadGroup() == 1) {
|
||||
@@ -108,6 +111,7 @@ void EncodeDispatchKernel<Family>::adjustInterfaceDescriptorData(INTERFACE_DESCR
|
||||
}
|
||||
}
|
||||
|
||||
template <>
|
||||
template <>
|
||||
void EncodeDispatchKernel<Family>::programBarrierEnable(INTERFACE_DESCRIPTOR_DATA &interfaceDescriptor, uint32_t value, const HardwareInfo &hwInfo) {
|
||||
using BARRIERS = INTERFACE_DESCRIPTOR_DATA::NUMBER_OF_BARRIERS;
|
||||
@@ -118,7 +122,8 @@ void EncodeDispatchKernel<Family>::programBarrierEnable(INTERFACE_DESCRIPTOR_DAT
|
||||
}
|
||||
|
||||
template <>
|
||||
void EncodeDispatchKernel<Family>::encodeAdditionalWalkerFields(const RootDeviceEnvironment &rootDeviceEnvironment, WALKER_TYPE &walkerCmd, const EncodeWalkerArgs &walkerArgs) {
|
||||
template <typename WalkerType>
|
||||
void EncodeDispatchKernel<Family>::encodeAdditionalWalkerFields(const RootDeviceEnvironment &rootDeviceEnvironment, WalkerType &walkerCmd, const EncodeWalkerArgs &walkerArgs) {
|
||||
auto *releaseHelper = rootDeviceEnvironment.getReleaseHelper();
|
||||
bool l3PrefetchDisable = releaseHelper->isPrefetchDisablingRequired();
|
||||
int32_t overrideL3PrefetchDisable = DebugManager.flags.ForceL3PrefetchForComputeWalker.get();
|
||||
@@ -217,6 +222,13 @@ size_t EncodeMiFlushDW<Family>::getWaSize(const EncodeDummyBlitWaArgs &waArgs) {
|
||||
template void flushGpuCache<Family>(LinearStream *commandStream, const Range<L3Range> &ranges, uint64_t postSyncAddress, const HardwareInfo &hwInfo);
|
||||
|
||||
template struct EncodeDispatchKernel<Family>;
|
||||
template void EncodeDispatchKernel<Family>::encodeAdditionalWalkerFields<Family::WALKER_TYPE>(const RootDeviceEnvironment &rootDeviceEnvironment, Family::WALKER_TYPE &walkerCmd, const EncodeWalkerArgs &walkerArgs);
|
||||
template void EncodeDispatchKernel<Family>::adjustTimestampPacket<Family::WALKER_TYPE>(Family::WALKER_TYPE &walkerCmd, const HardwareInfo &hwInfo);
|
||||
template void EncodeDispatchKernel<Family>::setGrfInfo<Family::INTERFACE_DESCRIPTOR_DATA>(Family::INTERFACE_DESCRIPTOR_DATA *pInterfaceDescriptor, uint32_t numGrf, const size_t &sizeCrossThreadData, const size_t &sizePerThreadData, const RootDeviceEnvironment &rootDeviceEnvironment);
|
||||
template void EncodeDispatchKernel<Family>::appendAdditionalIDDFields<Family::INTERFACE_DESCRIPTOR_DATA>(Family::INTERFACE_DESCRIPTOR_DATA *pInterfaceDescriptor, const RootDeviceEnvironment &rootDeviceEnvironment, const uint32_t threadsPerThreadGroup, uint32_t slmTotalSize, SlmPolicy slmPolicy);
|
||||
template void EncodeDispatchKernel<Family>::adjustInterfaceDescriptorData<Family::WALKER_TYPE, Family::INTERFACE_DESCRIPTOR_DATA>(Family::INTERFACE_DESCRIPTOR_DATA &interfaceDescriptor, const Device &device, const HardwareInfo &hwInfo, const uint32_t threadGroupCount, const uint32_t numGrf, Family::WALKER_TYPE &walkerCmd);
|
||||
template void EncodeDispatchKernel<Family>::setupPostSyncMocs<Family::WALKER_TYPE>(Family::WALKER_TYPE &walkerCmd, const RootDeviceEnvironment &rootDeviceEnvironment, bool dcFlush);
|
||||
|
||||
template struct EncodeStates<Family>;
|
||||
template struct EncodeMath<Family>;
|
||||
template struct EncodeMathMMIO<Family>;
|
||||
|
||||
@@ -149,6 +149,26 @@ struct XeHpgCoreFamily : public XeHpgCore {
|
||||
static constexpr bool supportsCmdSet(GFXCORE_FAMILY cmdSetBaseFamily) {
|
||||
return cmdSetBaseFamily == IGFX_XE_HP_CORE;
|
||||
}
|
||||
|
||||
template <typename WalkerType = WALKER_TYPE>
|
||||
static WalkerType getInitGpuWalker() {
|
||||
return cmdInitGpgpuWalker;
|
||||
}
|
||||
|
||||
template <typename WalkerType = WALKER_TYPE>
|
||||
static constexpr size_t getInterfaceDescriptorSize() {
|
||||
return sizeof(INTERFACE_DESCRIPTOR_DATA);
|
||||
}
|
||||
|
||||
template <typename InterfaceDescriptorType>
|
||||
static InterfaceDescriptorType getInitInterfaceDescriptor() {
|
||||
return cmdInitInterfaceDescriptorData;
|
||||
}
|
||||
|
||||
template <typename WalkerType>
|
||||
static constexpr bool isHeaplessMode() {
|
||||
return false;
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace NEO
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) 2021-2022 Intel Corporation
|
||||
* Copyright (C) 2021-2023 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
@@ -17,4 +17,7 @@ template <>
|
||||
bool ImplicitScalingDispatch<Family>::pipeControlStallRequired = true;
|
||||
|
||||
template struct ImplicitScalingDispatch<Family>;
|
||||
template void ImplicitScalingDispatch<Family>::dispatchCommands<Family::WALKER_TYPE>(LinearStream &commandStream, Family::WALKER_TYPE &walkerCmd, void **outWalkerPtr, const DeviceBitfield &devices, uint32_t &partitionCount, bool useSecondaryBatchBuffer, bool apiSelfCleanup, bool usesImages, bool dcFlush, bool forceExecutionOnSingleTile, uint64_t workPartitionAllocationGpuVa, const HardwareInfo &hwInfo);
|
||||
template size_t ImplicitScalingDispatch<Family>::getSize<Family::WALKER_TYPE>(bool apiSelfCleanup, bool preferStaticPartitioning, const DeviceBitfield &devices, const Vec3<size_t> &groupStart, const Vec3<size_t> &groupCount);
|
||||
|
||||
} // namespace NEO
|
||||
|
||||
Reference in New Issue
Block a user