mirror of
https://github.com/intel/compute-runtime.git
synced 2026-01-03 14:55:24 +08:00
Add adjustMaxWorkGroupCount helper
Signed-off-by: Filip Hazubski <filip.hazubski@intel.com>
This commit is contained in:
committed by
Compute-Runtime-Automation
parent
8d60fb2a07
commit
de1e4e0074
@@ -111,7 +111,7 @@ ZE_APIEXPORT ze_result_t ZE_APICALL
|
|||||||
zeKernelSuggestMaxCooperativeGroupCount(
|
zeKernelSuggestMaxCooperativeGroupCount(
|
||||||
ze_kernel_handle_t hKernel,
|
ze_kernel_handle_t hKernel,
|
||||||
uint32_t *totalGroupCount) {
|
uint32_t *totalGroupCount) {
|
||||||
return L0::Kernel::fromHandle(hKernel)->suggestMaxCooperativeGroupCount(totalGroupCount);
|
return L0::Kernel::fromHandle(hKernel)->suggestMaxCooperativeGroupCount(totalGroupCount, NEO::EngineGroupType::Compute, false);
|
||||||
}
|
}
|
||||||
|
|
||||||
ZE_APIEXPORT ze_result_t ZE_APICALL
|
ZE_APIEXPORT ze_result_t ZE_APICALL
|
||||||
|
|||||||
@@ -1880,7 +1880,8 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::programSyncBuffer(Kernel &kern
|
|||||||
}
|
}
|
||||||
|
|
||||||
uint32_t maximalNumberOfWorkgroupsAllowed;
|
uint32_t maximalNumberOfWorkgroupsAllowed;
|
||||||
auto ret = kernel.suggestMaxCooperativeGroupCount(&maximalNumberOfWorkgroupsAllowed);
|
auto ret = kernel.suggestMaxCooperativeGroupCount(&maximalNumberOfWorkgroupsAllowed, this->engineGroupType,
|
||||||
|
device.getDefaultEngine().osContext->isEngineInstanced());
|
||||||
UNRECOVERABLE_IF(ret != ZE_RESULT_SUCCESS);
|
UNRECOVERABLE_IF(ret != ZE_RESULT_SUCCESS);
|
||||||
size_t requestedNumberOfWorkgroups = (pThreadGroupDimensions->groupCountX * pThreadGroupDimensions->groupCountY *
|
size_t requestedNumberOfWorkgroups = (pThreadGroupDimensions->groupCountX * pThreadGroupDimensions->groupCountY *
|
||||||
pThreadGroupDimensions->groupCountZ);
|
pThreadGroupDimensions->groupCountZ);
|
||||||
|
|||||||
@@ -118,7 +118,8 @@ struct Kernel : _ze_kernel_handle_t, virtual NEO::DispatchKernelEncoderI {
|
|||||||
|
|
||||||
virtual void patchWorkDim(uint32_t groupCountX, uint32_t groupCountY, uint32_t groupCountZ) = 0;
|
virtual void patchWorkDim(uint32_t groupCountX, uint32_t groupCountY, uint32_t groupCountZ) = 0;
|
||||||
|
|
||||||
virtual ze_result_t suggestMaxCooperativeGroupCount(uint32_t *totalGroupCount) = 0;
|
virtual ze_result_t suggestMaxCooperativeGroupCount(uint32_t *totalGroupCount, NEO::EngineGroupType engineGroupType,
|
||||||
|
bool isEngineInstanced) = 0;
|
||||||
virtual ze_result_t setCacheConfig(ze_cache_config_flags_t flags) = 0;
|
virtual ze_result_t setCacheConfig(ze_cache_config_flags_t flags) = 0;
|
||||||
|
|
||||||
virtual ze_result_t getProfileInfo(zet_profile_properties_t *pProfileProperties) = 0;
|
virtual ze_result_t getProfileInfo(zet_profile_properties_t *pProfileProperties) = 0;
|
||||||
|
|||||||
@@ -393,7 +393,8 @@ ze_result_t KernelImp::suggestGroupSize(uint32_t globalSizeX, uint32_t globalSiz
|
|||||||
return ZE_RESULT_SUCCESS;
|
return ZE_RESULT_SUCCESS;
|
||||||
}
|
}
|
||||||
|
|
||||||
ze_result_t KernelImp::suggestMaxCooperativeGroupCount(uint32_t *totalGroupCount) {
|
ze_result_t KernelImp::suggestMaxCooperativeGroupCount(uint32_t *totalGroupCount, NEO::EngineGroupType engineGroupType,
|
||||||
|
bool isEngineInstanced) {
|
||||||
UNRECOVERABLE_IF(0 == groupSize[0]);
|
UNRECOVERABLE_IF(0 == groupSize[0]);
|
||||||
UNRECOVERABLE_IF(0 == groupSize[1]);
|
UNRECOVERABLE_IF(0 == groupSize[1]);
|
||||||
UNRECOVERABLE_IF(0 == groupSize[2]);
|
UNRECOVERABLE_IF(0 == groupSize[2]);
|
||||||
@@ -423,6 +424,7 @@ ze_result_t KernelImp::suggestMaxCooperativeGroupCount(uint32_t *totalGroupCount
|
|||||||
hwHelper.getBarriersCountFromHasBarriers(barrierCount),
|
hwHelper.getBarriersCountFromHasBarriers(barrierCount),
|
||||||
workDim,
|
workDim,
|
||||||
localWorkSize);
|
localWorkSize);
|
||||||
|
*totalGroupCount = hwHelper.adjustMaxWorkGroupCount(*totalGroupCount, engineGroupType, hardwareInfo, isEngineInstanced);
|
||||||
return ZE_RESULT_SUCCESS;
|
return ZE_RESULT_SUCCESS;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -45,7 +45,8 @@ struct KernelImp : Kernel {
|
|||||||
|
|
||||||
ze_result_t getKernelName(size_t *pSize, char *pName) override;
|
ze_result_t getKernelName(size_t *pSize, char *pName) override;
|
||||||
|
|
||||||
ze_result_t suggestMaxCooperativeGroupCount(uint32_t *totalGroupCount) override;
|
ze_result_t suggestMaxCooperativeGroupCount(uint32_t *totalGroupCount, NEO::EngineGroupType engineGroupType,
|
||||||
|
bool isEngineInstanced) override;
|
||||||
|
|
||||||
const uint8_t *getCrossThreadData() const override { return crossThreadData.get(); }
|
const uint8_t *getCrossThreadData() const override { return crossThreadData.get(); }
|
||||||
uint32_t getCrossThreadDataSize() const override { return crossThreadDataSize; }
|
uint32_t getCrossThreadDataSize() const override { return crossThreadDataSize; }
|
||||||
|
|||||||
@@ -1038,7 +1038,7 @@ HWTEST2_F(CommandListAppendLaunchKernel, givenKernelUsingSyncBufferWhenAppendLau
|
|||||||
{
|
{
|
||||||
VariableBackup<uint32_t> groupCountX{&groupCount.groupCountX};
|
VariableBackup<uint32_t> groupCountX{&groupCount.groupCountX};
|
||||||
uint32_t maximalNumberOfWorkgroupsAllowed;
|
uint32_t maximalNumberOfWorkgroupsAllowed;
|
||||||
kernel.suggestMaxCooperativeGroupCount(&maximalNumberOfWorkgroupsAllowed);
|
kernel.suggestMaxCooperativeGroupCount(&maximalNumberOfWorkgroupsAllowed, NEO::EngineGroupType::Compute, false);
|
||||||
groupCountX = maximalNumberOfWorkgroupsAllowed + 1;
|
groupCountX = maximalNumberOfWorkgroupsAllowed + 1;
|
||||||
pCommandList = std::make_unique<WhiteBox<::L0::CommandListCoreFamily<gfxCoreFamily>>>();
|
pCommandList = std::make_unique<WhiteBox<::L0::CommandListCoreFamily<gfxCoreFamily>>>();
|
||||||
pCommandList->initialize(device, NEO::EngineGroupType::Compute, 0u);
|
pCommandList->initialize(device, NEO::EngineGroupType::Compute, 0u);
|
||||||
|
|||||||
@@ -1056,15 +1056,18 @@ uint32_t Kernel::getMaxWorkGroupCount(const cl_uint workDim, const size_t *local
|
|||||||
hardwareInfo.gtSystemInfo.EUCount, hardwareInfo.gtSystemInfo.ThreadCount / hardwareInfo.gtSystemInfo.EUCount);
|
hardwareInfo.gtSystemInfo.EUCount, hardwareInfo.gtSystemInfo.ThreadCount / hardwareInfo.gtSystemInfo.EUCount);
|
||||||
|
|
||||||
auto barrierCount = kernelDescriptor.kernelAttributes.barrierCount;
|
auto barrierCount = kernelDescriptor.kernelAttributes.barrierCount;
|
||||||
return KernelHelper::getMaxWorkGroupCount(kernelInfo.getMaxSimdSize(),
|
auto maxWorkGroupCount = KernelHelper::getMaxWorkGroupCount(kernelInfo.getMaxSimdSize(),
|
||||||
availableThreadCount,
|
availableThreadCount,
|
||||||
dssCount,
|
dssCount,
|
||||||
dssCount * KB * hardwareInfo.capabilityTable.slmSize,
|
dssCount * KB * hardwareInfo.capabilityTable.slmSize,
|
||||||
hwHelper.alignSlmSize(slmTotalSize),
|
hwHelper.alignSlmSize(slmTotalSize),
|
||||||
static_cast<uint32_t>(hwHelper.getMaxBarrierRegisterPerSlice()),
|
static_cast<uint32_t>(hwHelper.getMaxBarrierRegisterPerSlice()),
|
||||||
hwHelper.getBarriersCountFromHasBarriers(barrierCount),
|
hwHelper.getBarriersCountFromHasBarriers(barrierCount),
|
||||||
workDim,
|
workDim,
|
||||||
localWorkSize);
|
localWorkSize);
|
||||||
|
auto isEngineInstanced = commandQueue->getCommandStreamReceiver(false).getOsContext().isEngineInstanced();
|
||||||
|
maxWorkGroupCount = hwHelper.adjustMaxWorkGroupCount(maxWorkGroupCount, engineGroupType, hardwareInfo, isEngineInstanced);
|
||||||
|
return maxWorkGroupCount;
|
||||||
}
|
}
|
||||||
|
|
||||||
inline void Kernel::makeArgsResident(CommandStreamReceiver &commandStreamReceiver) {
|
inline void Kernel::makeArgsResident(CommandStreamReceiver &commandStreamReceiver) {
|
||||||
|
|||||||
@@ -126,6 +126,8 @@ class HwHelper {
|
|||||||
virtual bool useSystemMemoryPlacementForISA(const HardwareInfo &hwInfo) const = 0;
|
virtual bool useSystemMemoryPlacementForISA(const HardwareInfo &hwInfo) const = 0;
|
||||||
virtual bool packedFormatsSupported() const = 0;
|
virtual bool packedFormatsSupported() const = 0;
|
||||||
virtual bool isCooperativeDispatchSupported(const EngineGroupType engineGroupType) const = 0;
|
virtual bool isCooperativeDispatchSupported(const EngineGroupType engineGroupType) const = 0;
|
||||||
|
virtual uint32_t adjustMaxWorkGroupCount(uint32_t maxWorkGroupCount, const EngineGroupType engineGroupType,
|
||||||
|
const HardwareInfo &hwInfo, bool isEngineInstanced) const = 0;
|
||||||
virtual size_t getMaxFillPaternSizeForCopyEngine() const = 0;
|
virtual size_t getMaxFillPaternSizeForCopyEngine() const = 0;
|
||||||
virtual bool isCopyOnlyEngineType(EngineGroupType type) const = 0;
|
virtual bool isCopyOnlyEngineType(EngineGroupType type) const = 0;
|
||||||
virtual bool isSipWANeeded(const HardwareInfo &hwInfo) const = 0;
|
virtual bool isSipWANeeded(const HardwareInfo &hwInfo) const = 0;
|
||||||
@@ -334,6 +336,9 @@ class HwHelperHw : public HwHelper {
|
|||||||
|
|
||||||
bool isCooperativeDispatchSupported(const EngineGroupType engineGroupType) const override;
|
bool isCooperativeDispatchSupported(const EngineGroupType engineGroupType) const override;
|
||||||
|
|
||||||
|
uint32_t adjustMaxWorkGroupCount(uint32_t maxWorkGroupCount, const EngineGroupType engineGroupType,
|
||||||
|
const HardwareInfo &hwInfo, bool isEngineInstanced) const override;
|
||||||
|
|
||||||
size_t getMaxFillPaternSizeForCopyEngine() const override;
|
size_t getMaxFillPaternSizeForCopyEngine() const override;
|
||||||
|
|
||||||
bool isKmdMigrationSupported(const HardwareInfo &hwInfo) const override;
|
bool isKmdMigrationSupported(const HardwareInfo &hwInfo) const override;
|
||||||
|
|||||||
@@ -596,6 +596,12 @@ bool HwHelperHw<GfxFamily>::isCooperativeDispatchSupported(const EngineGroupType
|
|||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
template <typename GfxFamily>
|
||||||
|
uint32_t HwHelperHw<GfxFamily>::adjustMaxWorkGroupCount(uint32_t maxWorkGroupCount, const EngineGroupType engineGroupType,
|
||||||
|
const HardwareInfo &hwInfo, bool isEngineInstanced) const {
|
||||||
|
return maxWorkGroupCount;
|
||||||
|
}
|
||||||
|
|
||||||
template <typename GfxFamily>
|
template <typename GfxFamily>
|
||||||
bool HwHelperHw<GfxFamily>::isKmdMigrationSupported(const HardwareInfo &hwInfo) const {
|
bool HwHelperHw<GfxFamily>::isKmdMigrationSupported(const HardwareInfo &hwInfo) const {
|
||||||
return false;
|
return false;
|
||||||
|
|||||||
Reference in New Issue
Block a user