mirror of
https://github.com/intel/compute-runtime.git
synced 2025-09-15 13:01:45 +08:00

This API allows querying for execution related information. CL_EXECUTION_INFO_MAX_WORKGROUP_COUNT_INTEL parameter allows to query for maximal work group count that is possible to run concurrently on the device. Related-To: NEO-2712 Change-Id: I4e6b4c80aeb06ff966fb543c0a7f05ed54416dab Signed-off-by: Filip Hazubski <filip.hazubski@intel.com>
41 lines
1.3 KiB
C++
41 lines
1.3 KiB
C++
/*
|
|
* Copyright (C) 2019 Intel Corporation
|
|
*
|
|
* SPDX-License-Identifier: MIT
|
|
*
|
|
*/
|
|
|
|
#include "core/helpers/kernel_helpers.h"
|
|
|
|
#include "core/helpers/basic_math.h"
|
|
|
|
#include <algorithm>
|
|
|
|
namespace NEO {
|
|
|
|
uint32_t KernelHelper::getMaxWorkGroupCount(uint32_t simd, uint32_t availableThreadCount, uint32_t dssCount, uint32_t availableSlmSize,
|
|
uint32_t usedSlmSize, uint32_t maxBarrierCount, uint32_t numberOfBarriers, uint32_t workDim,
|
|
const size_t *localWorkSize) {
|
|
size_t workGroupSize = 1;
|
|
for (uint32_t i = 0; i < workDim; i++) {
|
|
workGroupSize *= localWorkSize[i];
|
|
}
|
|
|
|
auto threadsPerThreadGroup = static_cast<uint32_t>(Math::divideAndRoundUp(workGroupSize, simd));
|
|
auto maxWorkGroupsCount = availableThreadCount / threadsPerThreadGroup;
|
|
|
|
if (numberOfBarriers > 0) {
|
|
auto maxWorkGroupsCountDueToBarrierUsage = dssCount * (maxBarrierCount / numberOfBarriers);
|
|
maxWorkGroupsCount = std::min(maxWorkGroupsCount, maxWorkGroupsCountDueToBarrierUsage);
|
|
}
|
|
|
|
if (usedSlmSize > 0) {
|
|
auto maxWorkGroupsCountDueToSlm = availableSlmSize / usedSlmSize;
|
|
maxWorkGroupsCount = std::min(maxWorkGroupsCount, maxWorkGroupsCountDueToSlm);
|
|
}
|
|
|
|
return maxWorkGroupsCount;
|
|
}
|
|
|
|
} // namespace NEO
|