compute-runtime/shared/source/program/work_size_info.cpp

81 lines
2.9 KiB
C++

/*
* Copyright (C) 2023-2025 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#include "shared/source/program/work_size_info.h"
#include "shared/source/debug_settings/debug_settings_manager.h"
#include "shared/source/execution_environment/root_device_environment.h"
#include "shared/source/helpers/gfx_core_helper.h"
#include "shared/source/helpers/hw_info.h"
#include "shared/source/program/kernel_info.h"
#include <cmath>
namespace NEO {
WorkSizeInfo::WorkSizeInfo(uint32_t maxWorkGroupSize, bool hasBarriers, uint32_t simdSize, uint32_t slmTotalSize, const RootDeviceEnvironment &rootDeviceEnvironment, uint32_t numThreadsPerSubSlice, uint32_t localMemSize, bool imgUsed, bool yTiledSurface, bool disableEUFusion) {
this->maxWorkGroupSize = maxWorkGroupSize;
this->hasBarriers = hasBarriers;
this->simdSize = simdSize;
this->slmTotalSize = slmTotalSize;
this->coreFamily = rootDeviceEnvironment.getHardwareInfo()->platform.eRenderCoreFamily;
this->numThreadsPerSubSlice = numThreadsPerSubSlice;
this->localMemSize = localMemSize;
this->imgUsed = imgUsed;
this->yTiledSurfaces = yTiledSurface;
setMinWorkGroupSize(rootDeviceEnvironment, disableEUFusion);
}
void WorkSizeInfo::setIfUseImg(const KernelInfo &kernelInfo) {
for (const auto &arg : kernelInfo.kernelDescriptor.payloadMappings.explicitArgs) {
if (arg.is<ArgDescriptor::argTImage>()) {
imgUsed = true;
yTiledSurfaces = true;
return;
}
}
}
void WorkSizeInfo::setMinWorkGroupSize(const RootDeviceEnvironment &rootDeviceEnvironment, bool disableEUFusion) {
minWorkGroupSize = 0;
if (hasBarriers) {
uint32_t maxBarriersPerHSlice = 32;
minWorkGroupSize = numThreadsPerSubSlice * simdSize / maxBarriersPerHSlice;
}
if (slmTotalSize > 0) {
if (localMemSize < slmTotalSize) {
PRINT_DEBUG_STRING(NEO::debugManager.flags.PrintDebugMessages.get(), stderr, "Size of SLM (%u) larger than available (%u)\n", slmTotalSize, localMemSize);
}
UNRECOVERABLE_IF(localMemSize < slmTotalSize);
minWorkGroupSize = std::max(maxWorkGroupSize / ((localMemSize / slmTotalSize)), minWorkGroupSize);
}
const auto &gfxCoreHelper = rootDeviceEnvironment.getHelper<GfxCoreHelper>();
if (gfxCoreHelper.isFusedEuDispatchEnabled(*rootDeviceEnvironment.getHardwareInfo(), disableEUFusion)) {
minWorkGroupSize *= 2;
}
}
void WorkSizeInfo::checkRatio(const size_t workItems[3]) {
if (slmTotalSize > 0) {
useRatio = true;
targetRatio = log((float)workItems[0]) - log((float)workItems[1]);
useStrictRatio = false;
} else if (yTiledSurfaces == true) {
useRatio = true;
targetRatio = yTilingRatioValue;
useStrictRatio = true;
}
}
void WorkSizeInfo::setPreferredWgCountPerSubslice(uint32_t preferredWgCount) {
preferredWgCountPerSubSlice = preferredWgCount;
}
} // namespace NEO