mirror of
https://github.com/intel/compute-runtime.git
synced 2026-01-08 05:56:36 +08:00
refactor: unify getMaxWorkGroupCount logic
Signed-off-by: Dunajski, Bartosz <bartosz.dunajski@intel.com>
This commit is contained in:
committed by
Compute-Runtime-Automation
parent
1002cb9f34
commit
6f4ed10919
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) 2021-2023 Intel Corporation
|
||||
* Copyright (C) 2021-2024 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
@@ -70,7 +70,7 @@ uint32_t GfxCoreHelperHw<Family>::adjustMaxWorkGroupCount(uint32_t maxWorkGroupC
|
||||
UNRECOVERABLE_IF(ccsCount == 0);
|
||||
numberOfpartsInTileForConcurrentKernels = std::max(numberOfpartsInTileForConcurrentKernels, ccsCount);
|
||||
}
|
||||
return maxWorkGroupCount / numberOfpartsInTileForConcurrentKernels;
|
||||
return std::max(maxWorkGroupCount / numberOfpartsInTileForConcurrentKernels, 1u);
|
||||
}
|
||||
|
||||
template <typename Family>
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) 2019-2023 Intel Corporation
|
||||
* Copyright (C) 2019-2024 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
@@ -11,20 +11,33 @@
|
||||
#include "shared/source/device/device.h"
|
||||
#include "shared/source/execution_environment/root_device_environment.h"
|
||||
#include "shared/source/helpers/basic_math.h"
|
||||
#include "shared/source/helpers/constants.h"
|
||||
#include "shared/source/helpers/debug_helpers.h"
|
||||
#include "shared/source/helpers/gfx_core_helper.h"
|
||||
#include "shared/source/helpers/hw_info.h"
|
||||
|
||||
#include <algorithm>
|
||||
|
||||
namespace NEO {
|
||||
|
||||
uint32_t KernelHelper::getMaxWorkGroupCount(uint32_t simd, uint32_t availableThreadCount, uint32_t dssCount, uint32_t availableSlmSize,
|
||||
uint32_t usedSlmSize, uint32_t maxBarrierCount, uint32_t numberOfBarriers, uint32_t workDim,
|
||||
const size_t *localWorkSize) {
|
||||
uint32_t KernelHelper::getMaxWorkGroupCount(const RootDeviceEnvironment &rootDeviceEnvironment, const KernelDescriptor &kernelDescriptor,
|
||||
uint32_t usedSlmSize, uint32_t workDim, const size_t *localWorkSize, EngineGroupType engineGroupType, bool isEngineInstanced) {
|
||||
if (debugManager.flags.OverrideMaxWorkGroupCount.get() != -1) {
|
||||
return static_cast<uint32_t>(debugManager.flags.OverrideMaxWorkGroupCount.get());
|
||||
}
|
||||
|
||||
auto &helper = rootDeviceEnvironment.getHelper<NEO::GfxCoreHelper>();
|
||||
auto &hwInfo = *rootDeviceEnvironment.getHardwareInfo();
|
||||
|
||||
auto dssCount = hwInfo.gtSystemInfo.DualSubSliceCount;
|
||||
if (dssCount == 0) {
|
||||
dssCount = hwInfo.gtSystemInfo.SubSliceCount;
|
||||
}
|
||||
|
||||
auto availableThreadCount = helper.calculateAvailableThreadCount(hwInfo, kernelDescriptor.kernelAttributes.numGrfRequired);
|
||||
auto availableSlmSize = static_cast<uint32_t>(dssCount * MemoryConstants::kiloByte * hwInfo.capabilityTable.slmSize);
|
||||
auto maxBarrierCount = static_cast<uint32_t>(helper.getMaxBarrierRegisterPerSlice());
|
||||
|
||||
UNRECOVERABLE_IF((workDim == 0) || (workDim > 3));
|
||||
UNRECOVERABLE_IF(localWorkSize == nullptr);
|
||||
|
||||
@@ -33,11 +46,11 @@ uint32_t KernelHelper::getMaxWorkGroupCount(uint32_t simd, uint32_t availableThr
|
||||
workGroupSize *= localWorkSize[i];
|
||||
}
|
||||
|
||||
auto numThreadsPerThreadGroup = static_cast<uint32_t>(Math::divideAndRoundUp(workGroupSize, simd));
|
||||
auto numThreadsPerThreadGroup = static_cast<uint32_t>(Math::divideAndRoundUp(workGroupSize, kernelDescriptor.kernelAttributes.simdSize));
|
||||
auto maxWorkGroupsCount = availableThreadCount / numThreadsPerThreadGroup;
|
||||
|
||||
if (numberOfBarriers > 0) {
|
||||
auto maxWorkGroupsCountDueToBarrierUsage = dssCount * (maxBarrierCount / numberOfBarriers);
|
||||
if (kernelDescriptor.kernelAttributes.barrierCount > 0) {
|
||||
auto maxWorkGroupsCountDueToBarrierUsage = dssCount * (maxBarrierCount / kernelDescriptor.kernelAttributes.barrierCount);
|
||||
maxWorkGroupsCount = std::min(maxWorkGroupsCount, maxWorkGroupsCountDueToBarrierUsage);
|
||||
}
|
||||
|
||||
@@ -46,7 +59,7 @@ uint32_t KernelHelper::getMaxWorkGroupCount(uint32_t simd, uint32_t availableThr
|
||||
maxWorkGroupsCount = std::min(maxWorkGroupsCount, maxWorkGroupsCountDueToSlm);
|
||||
}
|
||||
|
||||
return maxWorkGroupsCount;
|
||||
return helper.adjustMaxWorkGroupCount(maxWorkGroupsCount, engineGroupType, rootDeviceEnvironment, isEngineInstanced);
|
||||
}
|
||||
|
||||
KernelHelper::ErrorCode KernelHelper::checkIfThereIsSpaceForScratchOrPrivate(KernelDescriptor::KernelAttributes attributes, Device *device) {
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) 2019-2023 Intel Corporation
|
||||
* Copyright (C) 2019-2024 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
@@ -7,6 +7,7 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "shared/source/helpers/definitions/engine_group_types.h"
|
||||
#include "shared/source/kernel/kernel_descriptor.h"
|
||||
|
||||
#include <cstddef>
|
||||
@@ -14,6 +15,7 @@
|
||||
|
||||
namespace NEO {
|
||||
class Device;
|
||||
struct RootDeviceEnvironment;
|
||||
|
||||
struct KernelHelper {
|
||||
enum class ErrorCode {
|
||||
@@ -21,9 +23,8 @@ struct KernelHelper {
|
||||
outOfDeviceMemory = 1,
|
||||
invalidKernel = 2
|
||||
};
|
||||
static uint32_t getMaxWorkGroupCount(uint32_t simd, uint32_t availableThreadCount, uint32_t dssCount, uint32_t availableSlmSize,
|
||||
uint32_t usedSlmSize, uint32_t maxBarrierCount, uint32_t numberOfBarriers, uint32_t workDim,
|
||||
const size_t *localWorkSize);
|
||||
static uint32_t getMaxWorkGroupCount(const RootDeviceEnvironment &rootDeviceEnvironment, const KernelDescriptor &kernelDescriptor,
|
||||
uint32_t usedSlmSize, uint32_t workDim, const size_t *localWorkSize, EngineGroupType engineGroupType, bool isEngineInstanced);
|
||||
static inline uint64_t getPrivateSurfaceSize(uint64_t perHwThreadPrivateMemorySize, uint32_t computeUnitsUsedForScratch) {
|
||||
return perHwThreadPrivateMemorySize * computeUnitsUsedForScratch;
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user