mirror of
https://github.com/intel/compute-runtime.git
synced 2025-12-21 01:04:57 +08:00
refactor: add param rootDeviceEnvironment to calculateNumThreadsPerThreadGroup
Signed-off-by: Katarzyna Cencelewska <katarzyna.cencelewska@intel.com>
This commit is contained in:
committed by
Compute-Runtime-Automation
parent
ec009cf9e3
commit
dd1d52259e
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) 2022-2023 Intel Corporation
|
||||
* Copyright (C) 2022-2024 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
@@ -7,6 +7,7 @@
|
||||
|
||||
#include "shared/source/kernel/implicit_args_helper.h"
|
||||
|
||||
#include "shared/source/execution_environment/root_device_environment.h"
|
||||
#include "shared/source/helpers/aligned_memory.h"
|
||||
#include "shared/source/helpers/basic_math.h"
|
||||
#include "shared/source/helpers/hw_walk_order.h"
|
||||
@@ -43,7 +44,7 @@ uint32_t getGrfSize(uint32_t simd) {
|
||||
return 32u;
|
||||
}
|
||||
|
||||
uint32_t getSizeForImplicitArgsPatching(const ImplicitArgs *pImplicitArgs, const KernelDescriptor &kernelDescriptor, bool isHwLocalIdGeneration, const GfxCoreHelper &gfxCoreHelper) {
|
||||
uint32_t getSizeForImplicitArgsPatching(const ImplicitArgs *pImplicitArgs, const KernelDescriptor &kernelDescriptor, bool isHwLocalIdGeneration, const RootDeviceEnvironment &rootDeviceEnvironment) {
|
||||
if (!pImplicitArgs) {
|
||||
return 0;
|
||||
}
|
||||
@@ -58,15 +59,15 @@ uint32_t getSizeForImplicitArgsPatching(const ImplicitArgs *pImplicitArgs, const
|
||||
auto itemsInGroup = Math::computeTotalElementsCount(localWorkSize);
|
||||
uint32_t localIdsSizeNeeded =
|
||||
alignUp(static_cast<uint32_t>(NEO::PerThreadDataHelper::getPerThreadDataSizeTotal(
|
||||
simdSize, grfSize, 3u, itemsInGroup, isHwLocalIdGeneration, gfxCoreHelper)),
|
||||
simdSize, grfSize, 3u, itemsInGroup, isHwLocalIdGeneration, rootDeviceEnvironment)),
|
||||
MemoryConstants::cacheLineSize);
|
||||
return implicitArgsSize + localIdsSizeNeeded;
|
||||
}
|
||||
}
|
||||
|
||||
void *patchImplicitArgs(void *ptrToPatch, const ImplicitArgs &implicitArgs, const KernelDescriptor &kernelDescriptor, std::optional<std::pair<bool, uint32_t>> hwGenerationOfLocalIdsParams, const GfxCoreHelper &gfxCoreHelper) {
|
||||
void *patchImplicitArgs(void *ptrToPatch, const ImplicitArgs &implicitArgs, const KernelDescriptor &kernelDescriptor, std::optional<std::pair<bool, uint32_t>> hwGenerationOfLocalIdsParams, const RootDeviceEnvironment &rootDeviceEnvironment) {
|
||||
auto localIdsGeneratedByHw = hwGenerationOfLocalIdsParams.has_value() ? hwGenerationOfLocalIdsParams.value().first : false;
|
||||
auto totalSizeToProgram = getSizeForImplicitArgsPatching(&implicitArgs, kernelDescriptor, localIdsGeneratedByHw, gfxCoreHelper);
|
||||
auto totalSizeToProgram = getSizeForImplicitArgsPatching(&implicitArgs, kernelDescriptor, localIdsGeneratedByHw, rootDeviceEnvironment);
|
||||
auto retVal = ptrOffset(ptrToPatch, totalSizeToProgram);
|
||||
|
||||
auto patchImplicitArgsBufferInCrossThread = NEO::isValidOffset<>(kernelDescriptor.payloadMappings.implicitArgs.implicitArgsBuffer);
|
||||
@@ -82,7 +83,7 @@ void *patchImplicitArgs(void *ptrToPatch, const ImplicitArgs &implicitArgs, cons
|
||||
static_cast<uint16_t>(implicitArgs.localSizeY),
|
||||
static_cast<uint16_t>(implicitArgs.localSizeZ)}},
|
||||
dimensionOrder,
|
||||
false, grfSize, gfxCoreHelper);
|
||||
false, grfSize, rootDeviceEnvironment);
|
||||
auto sizeForLocalIdsProgramming = totalSizeToProgram - ImplicitArgs::getSize();
|
||||
ptrToPatch = ptrOffset(ptrToPatch, sizeForLocalIdsProgramming);
|
||||
}
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) 2021-2023 Intel Corporation
|
||||
* Copyright (C) 2021-2024 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
@@ -17,14 +17,14 @@
|
||||
namespace NEO {
|
||||
|
||||
struct KernelDescriptor;
|
||||
class GfxCoreHelper;
|
||||
struct RootDeviceEnvironment;
|
||||
|
||||
inline constexpr const char *implicitArgsRelocationSymbolName = "__INTEL_PATCH_CROSS_THREAD_OFFSET_OFF_R0";
|
||||
|
||||
namespace ImplicitArgsHelper {
|
||||
std::array<uint8_t, 3> getDimensionOrderForLocalIds(const uint8_t *workgroupDimensionsOrder, std::optional<std::pair<bool /* localIdsGeneratedByRuntime */, uint32_t /* walkOrderForHwGenerationOfLocalIds */>> hwGenerationOfLocalIdsParams);
|
||||
uint32_t getGrfSize(uint32_t simd);
|
||||
uint32_t getSizeForImplicitArgsPatching(const ImplicitArgs *pImplicitArgs, const KernelDescriptor &kernelDescriptor, bool localIdsGeneratedByRuntime, const GfxCoreHelper &gfxCoreHelper);
|
||||
void *patchImplicitArgs(void *ptrToPatch, const ImplicitArgs &implicitArgs, const KernelDescriptor &kernelDescriptor, std::optional<std::pair<bool /* localIdsGeneratedByRuntime */, uint32_t /* walkOrderForHwGenerationOfLocalIds */>> hwGenerationOfLocalIdsParams, const GfxCoreHelper &gfxCoreHelper);
|
||||
uint32_t getSizeForImplicitArgsPatching(const ImplicitArgs *pImplicitArgs, const KernelDescriptor &kernelDescriptor, bool localIdsGeneratedByRuntime, const RootDeviceEnvironment &rootDeviceEnvironment);
|
||||
void *patchImplicitArgs(void *ptrToPatch, const ImplicitArgs &implicitArgs, const KernelDescriptor &kernelDescriptor, std::optional<std::pair<bool /* localIdsGeneratedByRuntime */, uint32_t /* walkOrderForHwGenerationOfLocalIds */>> hwGenerationOfLocalIdsParams, const RootDeviceEnvironment &rootDeviceEnvironment);
|
||||
} // namespace ImplicitArgsHelper
|
||||
} // namespace NEO
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) 2022-2023 Intel Corporation
|
||||
* Copyright (C) 2022-2024 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
@@ -7,6 +7,7 @@
|
||||
|
||||
#include "shared/source/kernel/local_ids_cache.h"
|
||||
|
||||
#include "shared/source/execution_environment/root_device_environment.h"
|
||||
#include "shared/source/helpers/aligned_memory.h"
|
||||
#include "shared/source/helpers/basic_math.h"
|
||||
#include "shared/source/helpers/gfx_core_helper.h"
|
||||
@@ -34,12 +35,13 @@ std::unique_lock<std::mutex> LocalIdsCache::lock() {
|
||||
return std::unique_lock<std::mutex>(setLocalIdsMutex);
|
||||
}
|
||||
|
||||
size_t LocalIdsCache::getLocalIdsSizeForGroup(const Vec3<uint16_t> &group, const GfxCoreHelper &gfxCoreHelper) const {
|
||||
size_t LocalIdsCache::getLocalIdsSizeForGroup(const Vec3<uint16_t> &group, const RootDeviceEnvironment &rootDeviceEnvironment) const {
|
||||
const auto numElementsInGroup = static_cast<uint32_t>(Math::computeTotalElementsCount({group[0], group[1], group[2]}));
|
||||
if (isSimd1(simdSize)) {
|
||||
return static_cast<size_t>(numElementsInGroup * localIdsSizePerThread);
|
||||
}
|
||||
const auto numberOfThreads = gfxCoreHelper.calculateNumThreadsPerThreadGroup(simdSize, numElementsInGroup, grfSize, false);
|
||||
auto &gfxCoreHelper = rootDeviceEnvironment.getHelper<NEO::GfxCoreHelper>();
|
||||
const auto numberOfThreads = gfxCoreHelper.calculateNumThreadsPerThreadGroup(simdSize, numElementsInGroup, grfSize, false, rootDeviceEnvironment);
|
||||
return static_cast<size_t>(numberOfThreads * localIdsSizePerThread);
|
||||
}
|
||||
|
||||
@@ -52,7 +54,7 @@ void LocalIdsCache::setLocalIdsForEntry(LocalIdsCacheEntry &entry, void *destina
|
||||
std::memcpy(destination, entry.localIdsData, entry.localIdsSize);
|
||||
}
|
||||
|
||||
void LocalIdsCache::setLocalIdsForGroup(const Vec3<uint16_t> &group, void *destination, const GfxCoreHelper &gfxCoreHelper) {
|
||||
void LocalIdsCache::setLocalIdsForGroup(const Vec3<uint16_t> &group, void *destination, const RootDeviceEnvironment &rootDeviceEnvironment) {
|
||||
auto setLocalIdsLock = lock();
|
||||
LocalIdsCacheEntry *leastAccessedEntry = &cache[0];
|
||||
for (auto &cacheEntry : cache) {
|
||||
@@ -65,12 +67,12 @@ void LocalIdsCache::setLocalIdsForGroup(const Vec3<uint16_t> &group, void *desti
|
||||
}
|
||||
}
|
||||
|
||||
commitNewEntry(*leastAccessedEntry, group, gfxCoreHelper);
|
||||
commitNewEntry(*leastAccessedEntry, group, rootDeviceEnvironment);
|
||||
setLocalIdsForEntry(*leastAccessedEntry, destination);
|
||||
}
|
||||
|
||||
void LocalIdsCache::commitNewEntry(LocalIdsCacheEntry &entry, const Vec3<uint16_t> &group, const GfxCoreHelper &gfxCoreHelper) {
|
||||
entry.localIdsSize = getLocalIdsSizeForGroup(group, gfxCoreHelper);
|
||||
void LocalIdsCache::commitNewEntry(LocalIdsCacheEntry &entry, const Vec3<uint16_t> &group, const RootDeviceEnvironment &rootDeviceEnvironment) {
|
||||
entry.localIdsSize = getLocalIdsSizeForGroup(group, rootDeviceEnvironment);
|
||||
entry.groupSize = group;
|
||||
entry.accessCounter = 0U;
|
||||
if (entry.localIdsSize > entry.localIdsSizeAllocated) {
|
||||
@@ -79,7 +81,7 @@ void LocalIdsCache::commitNewEntry(LocalIdsCacheEntry &entry, const Vec3<uint16_
|
||||
entry.localIdsSizeAllocated = entry.localIdsSize;
|
||||
}
|
||||
NEO::generateLocalIDs(entry.localIdsData, static_cast<uint16_t>(simdSize),
|
||||
{group[0], group[1], group[2]}, wgDimOrder, usesOnlyImages, grfSize, gfxCoreHelper);
|
||||
{group[0], group[1], group[2]}, wgDimOrder, usesOnlyImages, grfSize, rootDeviceEnvironment);
|
||||
}
|
||||
|
||||
} // namespace NEO
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) 2022-2023 Intel Corporation
|
||||
* Copyright (C) 2022-2024 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
@@ -12,7 +12,7 @@
|
||||
#include <mutex>
|
||||
|
||||
namespace NEO {
|
||||
class GfxCoreHelper;
|
||||
struct RootDeviceEnvironment;
|
||||
class LocalIdsCache {
|
||||
public:
|
||||
struct LocalIdsCacheEntry {
|
||||
@@ -30,13 +30,13 @@ class LocalIdsCache {
|
||||
LocalIdsCache(size_t cacheSize, std::array<uint8_t, 3> wgDimOrder, uint8_t simdSize, uint8_t grfSize, bool usesOnlyImages = false);
|
||||
~LocalIdsCache();
|
||||
|
||||
void setLocalIdsForGroup(const Vec3<uint16_t> &group, void *destination, const GfxCoreHelper &gfxCoreHelper);
|
||||
size_t getLocalIdsSizeForGroup(const Vec3<uint16_t> &group, const GfxCoreHelper &gfxCoreHelper) const;
|
||||
void setLocalIdsForGroup(const Vec3<uint16_t> &group, void *destination, const RootDeviceEnvironment &rootDeviceEnvironment);
|
||||
size_t getLocalIdsSizeForGroup(const Vec3<uint16_t> &group, const RootDeviceEnvironment &rootDeviceEnvironment) const;
|
||||
size_t getLocalIdsSizePerThread() const;
|
||||
|
||||
protected:
|
||||
void setLocalIdsForEntry(LocalIdsCacheEntry &entry, void *destination);
|
||||
void commitNewEntry(LocalIdsCacheEntry &entry, const Vec3<uint16_t> &group, const GfxCoreHelper &gfxCoreHelper);
|
||||
void commitNewEntry(LocalIdsCacheEntry &entry, const Vec3<uint16_t> &group, const RootDeviceEnvironment &rootDeviceEnvironment);
|
||||
std::unique_lock<std::mutex> lock();
|
||||
|
||||
StackVec<LocalIdsCacheEntry, 4> cache;
|
||||
|
||||
Reference in New Issue
Block a user