Revert "fix: Unify logic calculating threads per work group part 2"

This reverts commit 1e8a53bd53. Signed-off-by: Compute-Runtime-Validation <compute-runtime-validation@intel.com>
2023-07-02 06:46:43 +02:00 · 2023-07-02 06:46:43 +02:00 · 39740da9d1
parent 2d7505c01c
commit 39740da9d1
19 changed files with 53 additions and 88 deletions
--- a/level_zero/core/source/kernel/kernel_imp.cpp
+++ b/level_zero/core/source/kernel/kernel_imp.cpp
@ -363,7 +363,7 @@ ze_result_t KernelImp::setGroupSize(uint32_t groupSizeX, uint32_t groupSizeY,
                                         static_cast<uint16_t>(groupSizeY),
                                         static_cast<uint16_t>(groupSizeZ)}},
                std::array<uint8_t, 3>{{0, 1, 2}},
-                false, grfSize, gfxCoreHelper);
+                false, grfSize);
        }

        this->perThreadDataSize = perThreadDataSizeForWholeThreadGroup / numThreadsPerThreadGroup;
--- a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_launch_kernel_2.cpp
+++ b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_launch_kernel_2.cpp
@ -1023,8 +1023,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, CmdlistAppendLaunchKernelWithImplicitArgsTests, giv

    auto grfSize = ImplicitArgsHelper::getGrfSize(expectedImplicitArgs.simdWidth);
    auto expectedLocalIds = alignedMalloc(implicitArgsProgrammingSize - sizeof(ImplicitArgs), MemoryConstants::cacheLineSize);
-    const auto &gfxCoreHelper = device->getNEODevice()->getGfxCoreHelper();
-    generateLocalIDs(expectedLocalIds, expectedImplicitArgs.simdWidth, localSize, workgroupDimOrder, false, grfSize, gfxCoreHelper);
+    generateLocalIDs(expectedLocalIds, expectedImplicitArgs.simdWidth, localSize, workgroupDimOrder, false, grfSize);

    auto localIdsProgrammingSize = implicitArgsProgrammingSize - sizeof(ImplicitArgs);
    size_t sizeForLocalIds = NEO::PerThreadDataHelper::getPerThreadDataSizeTotal(expectedImplicitArgs.simdWidth, grfSize, 3u, totalLocalSize);
@ -1069,8 +1068,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, CmdlistAppendLaunchKernelWithImplicitArgsTests, giv

    auto grfSize = ImplicitArgsHelper::getGrfSize(expectedImplicitArgs.simdWidth);
    auto expectedLocalIds = alignedMalloc(implicitArgsProgrammingSize - sizeof(ImplicitArgs), MemoryConstants::cacheLineSize);
-    const auto &gfxCoreHelper = device->getNEODevice()->getGfxCoreHelper();
-    generateLocalIDs(expectedLocalIds, expectedImplicitArgs.simdWidth, localSize, expectedDimOrder, false, grfSize, gfxCoreHelper);
+    generateLocalIDs(expectedLocalIds, expectedImplicitArgs.simdWidth, localSize, expectedDimOrder, false, grfSize);

    auto localIdsProgrammingSize = implicitArgsProgrammingSize - sizeof(ImplicitArgs);
    size_t sizeForLocalIds = NEO::PerThreadDataHelper::getPerThreadDataSizeTotal(expectedImplicitArgs.simdWidth, grfSize, 3u, totalLocalSize);
--- a/opencl/source/helpers/hardware_commands_helper_bdw_and_later.inl
+++ b/opencl/source/helpers/hardware_commands_helper_bdw_and_later.inl
@ -91,8 +91,7 @@ size_t HardwareCommandsHelper<GfxFamily>::sendCrossThreadData(

        auto implicitArgsGpuVA = indirectHeap.getGraphicsAllocation()->getGpuAddress() + indirectHeap.getUsed();
        auto ptrToPatchImplicitArgs = indirectHeap.getSpace(sizeForImplicitArgsProgramming);
-        const auto &gfxCoreHelper = kernel.getGfxCoreHelper();
-        ImplicitArgsHelper::patchImplicitArgs(ptrToPatchImplicitArgs, *pImplicitArgs, kernelDescriptor, {}, gfxCoreHelper);
+        ImplicitArgsHelper::patchImplicitArgs(ptrToPatchImplicitArgs, *pImplicitArgs, kernelDescriptor, {});

        auto implicitArgsCrossThreadPtr = ptrOffset(reinterpret_cast<uint64_t *>(kernel.getCrossThreadData()), kernelDescriptor.payloadMappings.implicitArgs.implicitArgsBuffer);
        *implicitArgsCrossThreadPtr = implicitArgsGpuVA;
--- a/opencl/source/helpers/hardware_commands_helper_xehp_and_later.inl
+++ b/opencl/source/helpers/hardware_commands_helper_xehp_and_later.inl
@ -96,8 +96,8 @@ size_t HardwareCommandsHelper<GfxFamily>::sendCrossThreadData(
            kernelAttributes.flags.requiresWorkgroupWalkOrder,
            requiredWalkOrder,
            kernelDescriptor.kernelAttributes.simdSize);
-        const auto &gfxCoreHelper = kernel.getGfxCoreHelper();
-        ImplicitArgsHelper::patchImplicitArgs(ptrToPatchImplicitArgs, *pImplicitArgs, kernelDescriptor, std::make_pair(generationOfLocalIdsByRuntime, requiredWalkOrder), gfxCoreHelper);
+
+        ImplicitArgsHelper::patchImplicitArgs(ptrToPatchImplicitArgs, *pImplicitArgs, kernelDescriptor, std::make_pair(generationOfLocalIdsByRuntime, requiredWalkOrder));
    }

    using InlineData = typename GfxFamily::INLINE_DATA;
--- a/opencl/source/kernel/kernel.cpp
+++ b/opencl/source/kernel/kernel.cpp
@ -581,7 +581,7 @@ cl_int Kernel::getWorkGroupInfo(cl_kernel_work_group_info paramName,
    cl_ulong privateMemSize;
    size_t maxWorkgroupSize;
    const auto &hwInfo = clDevice.getHardwareInfo();
-    auto &gfxCoreHelper = this->getGfxCoreHelper();
+    auto &gfxCoreHelper = clDevice.getGfxCoreHelper();
    auto &clGfxCoreHelper = clDevice.getRootDeviceEnvironment().getHelper<ClGfxCoreHelper>();
    GetInfoHelper info(paramValue, paramValueSize, paramValueSizeRet);

@ -1537,7 +1537,7 @@ cl_int Kernel::setArgBuffer(uint32_t argIndex,
                                   disableL3, isAuxTranslationKernel, arg.isReadOnly(), pClDevice->getDevice(),
                                   kernelInfo.kernelDescriptor.kernelAttributes.flags.useGlobalAtomics, areMultipleSubDevicesInContext());
        } else if (isValidOffset(argAsPtr.bindless)) {
-            auto &gfxCoreHelper = this->getGfxCoreHelper();
+            auto &gfxCoreHelper = getDevice().getGfxCoreHelper();
            auto surfaceStateSize = gfxCoreHelper.getRenderSurfaceStateSize();
            auto surfaceState = ptrOffset(getSurfaceStateHeap(), surfaceStateSize * argIndex);

@ -1655,7 +1655,7 @@ cl_int Kernel::setArgImageWithMipLevel(uint32_t argIndex,

        void *surfaceState = nullptr;
        if (isValidOffset(argAsImg.bindless)) {
-            auto &gfxCoreHelper = this->getGfxCoreHelper();
+            auto &gfxCoreHelper = getDevice().getGfxCoreHelper();
            auto surfaceStateSize = gfxCoreHelper.getRenderSurfaceStateSize();
            surfaceState = ptrOffset(getSurfaceStateHeap(), surfaceStateSize * argIndex);
        } else {
@ -2130,7 +2130,7 @@ uint64_t Kernel::getKernelStartAddress(const bool localIdsGenerationByRuntime, c
    kernelStartOffset += getStartOffset();

    auto &hardwareInfo = getHardwareInfo();
-    const auto &gfxCoreHelper = this->getGfxCoreHelper();
+    const auto &gfxCoreHelper = getDevice().getGfxCoreHelper();
    const auto &productHelper = getDevice().getProductHelper();

    if (isCssUsed && gfxCoreHelper.isOffsetToSkipSetFFIDGPWARequired(hardwareInfo, productHelper)) {
@ -2140,7 +2140,7 @@ uint64_t Kernel::getKernelStartAddress(const bool localIdsGenerationByRuntime, c
    return kernelStartOffset;
 }
 void *Kernel::patchBindlessSurfaceState(NEO::GraphicsAllocation *alloc, uint32_t bindless) {
-    auto &gfxCoreHelper = this->getGfxCoreHelper();
+    auto &gfxCoreHelper = getDevice().getGfxCoreHelper();
    auto surfaceStateSize = gfxCoreHelper.getRenderSurfaceStateSize();
    NEO::BindlessHeapsHelper *bindlessHeapsHelper = getDevice().getDevice().getBindlessHeapsHelper();
    auto ssInHeap = bindlessHeapsHelper->allocateSSInHeap(surfaceStateSize, alloc, NEO::BindlessHeapsHelper::GLOBAL_SSH);
@ -2159,7 +2159,7 @@ uint32_t Kernel::getAdditionalKernelExecInfo() const {
 }

 bool Kernel::requiresWaDisableRccRhwoOptimization() const {
-    auto &gfxCoreHelper = this->getGfxCoreHelper();
+    auto &gfxCoreHelper = getDevice().getGfxCoreHelper();
    auto rootDeviceIndex = getDevice().getRootDeviceIndex();

    if (gfxCoreHelper.isWaDisableRccRhwoOptimizationRequired() && isUsingSharedObjArgs()) {
@ -2270,7 +2270,7 @@ void Kernel::reconfigureKernel() {
        kernelDescriptor.kernelAttributes.simdSize != 32) {
        this->maxKernelWorkGroupSize >>= 1;
    }
-    const auto &gfxCoreHelper = this->getGfxCoreHelper();
+    const auto &gfxCoreHelper = getDevice().getGfxCoreHelper();
    bool isLocalIdsGeneratedByHw = false; // if local ids generated by runtime then more work groups available
    auto maxWorkGroupSize = static_cast<uint32_t>(kernelInfo.getMaxRequiredWorkGroupSize(getMaxKernelWorkGroupSize()));

@ -2369,8 +2369,7 @@ void Kernel::initializeLocalIdsCache() {

 void Kernel::setLocalIdsForGroup(const Vec3<uint16_t> &groupSize, void *destination) const {
    UNRECOVERABLE_IF(localIdsCache.get() == nullptr);
-    const auto &gfxCoreHelper = this->getGfxCoreHelper();
-    localIdsCache->setLocalIdsForGroup(groupSize, destination, gfxCoreHelper);
+    localIdsCache->setLocalIdsForGroup(groupSize, destination);
 }

 size_t Kernel::getLocalIdsSizeForGroup(const Vec3<uint16_t> &groupSize) const {
--- a/opencl/source/kernel/kernel.h
+++ b/opencl/source/kernel/kernel.h
@ -410,10 +410,6 @@ class Kernel : public ReferenceTrackedObject<Kernel> {
    size_t getLocalIdsSizeForGroup(const Vec3<uint16_t> &groupSize) const;
    size_t getLocalIdsSizePerThread() const;

-    const GfxCoreHelper &getGfxCoreHelper() const {
-        return getDevice().getGfxCoreHelper();
-    }
-
  protected:
    struct KernelConfig {
        Vec3<size_t> gws;
--- a/opencl/test/unit_test/helpers/hardware_commands_helper_tests.cpp
+++ b/opencl/test/unit_test/helpers/hardware_commands_helper_tests.cpp
@ -552,13 +552,12 @@ HWCMDTEST_F(IGFX_GEN8_CORE, HardwareCommandsTest, whenSendingIndirectStateThenKe
    ASSERT_LE(expectedIohSize, ioh.getUsed());

    auto expectedLocalIds = alignedMalloc(expectedIohSize, 64);
-    const auto &gfxCoreHelper = pDevice->getGfxCoreHelper();
    generateLocalIDs(expectedLocalIds, modifiedKernelInfo.getMaxSimdSize(),
                     std::array<uint16_t, 3>{{localWorkSizeX, localWorkSizeY, localWorkSizeZ}},
                     std::array<uint8_t, 3>{{modifiedKernelInfo.kernelDescriptor.kernelAttributes.workgroupDimensionsOrder[0],
                                             modifiedKernelInfo.kernelDescriptor.kernelAttributes.workgroupDimensionsOrder[1],
                                             modifiedKernelInfo.kernelDescriptor.kernelAttributes.workgroupDimensionsOrder[2]}},
-                     false, grfSize, gfxCoreHelper);
+                     false, grfSize);

    EXPECT_EQ(0, memcmp(expectedLocalIds, ioh.getCpuBase(), expectedIohSize));
    alignedFree(expectedLocalIds);
@ -1378,8 +1377,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, HardwareCommandsImplicitArgsTests, givenKernelWithI

    auto grfSize = ImplicitArgsHelper::getGrfSize(expectedImplicitArgs.simdWidth);
    auto expectedLocalIds = alignedMalloc(implicitArgsProgrammingSize - sizeof(ImplicitArgs), MemoryConstants::cacheLineSize);
-    const auto &gfxCoreHelper = pDevice->getGfxCoreHelper();
-    generateLocalIDs(expectedLocalIds, expectedImplicitArgs.simdWidth, localSize, workgroupDimOrder, false, grfSize, gfxCoreHelper);
+    generateLocalIDs(expectedLocalIds, expectedImplicitArgs.simdWidth, localSize, workgroupDimOrder, false, grfSize);

    auto localIdsProgrammingSize = implicitArgsProgrammingSize - sizeof(ImplicitArgs);
    size_t sizeForLocalIds = PerThreadDataHelper::getPerThreadDataSizeTotal(expectedImplicitArgs.simdWidth, grfSize, 3u, totalLocalSize);
@ -1412,8 +1410,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, HardwareCommandsImplicitArgsTests, givenKernelWithI

    auto grfSize = ImplicitArgsHelper::getGrfSize(expectedImplicitArgs.simdWidth);
    auto expectedLocalIds = alignedMalloc(implicitArgsProgrammingSize - sizeof(ImplicitArgs), MemoryConstants::cacheLineSize);
-    const auto &gfxCoreHelper = pDevice->getGfxCoreHelper();
-    generateLocalIDs(expectedLocalIds, expectedImplicitArgs.simdWidth, localSize, expectedDimOrder, false, grfSize, gfxCoreHelper);
+    generateLocalIDs(expectedLocalIds, expectedImplicitArgs.simdWidth, localSize, expectedDimOrder, false, grfSize);

    auto localIdsProgrammingSize = implicitArgsProgrammingSize - sizeof(ImplicitArgs);
    size_t sizeForLocalIds = PerThreadDataHelper::getPerThreadDataSizeTotal(expectedImplicitArgs.simdWidth, grfSize, 3u, totalLocalSize);
--- a/shared/source/command_container/command_encoder_bdw_and_later.inl
+++ b/shared/source/command_container/command_encoder_bdw_and_later.inl
@ -183,7 +183,7 @@ void EncodeDispatchKernel<Family>::encode(CommandContainer &container, EncodeDis
            auto implicitArgsCrossThreadPtr = ptrOffset(const_cast<uint64_t *>(reinterpret_cast<const uint64_t *>(args.dispatchInterface->getCrossThreadData())), kernelDescriptor.payloadMappings.implicitArgs.implicitArgsBuffer);
            *implicitArgsCrossThreadPtr = implicitArgsGpuVA;

-            ptr = NEO::ImplicitArgsHelper::patchImplicitArgs(ptr, *pImplicitArgs, kernelDescriptor, {}, gfxCoreHelper);
+            ptr = NEO::ImplicitArgsHelper::patchImplicitArgs(ptr, *pImplicitArgs, kernelDescriptor, {});
        }

        memcpy_s(ptr, sizeCrossThreadData,
--- a/shared/source/command_container/command_encoder_xehp_and_later.inl
+++ b/shared/source/command_container/command_encoder_xehp_and_later.inl
@ -218,7 +218,7 @@ void EncodeDispatchKernel<Family>::encode(CommandContainer &container, EncodeDis
        if (pImplicitArgs) {
            offsetThreadData -= sizeof(ImplicitArgs);
            pImplicitArgs->localIdTablePtr = heap->getGraphicsAllocation()->getGpuAddress() + heap->getUsed() - iohRequiredSize;
-            ptr = NEO::ImplicitArgsHelper::patchImplicitArgs(ptr, *pImplicitArgs, kernelDescriptor, std::make_pair(localIdsGenerationByRuntime, requiredWorkgroupOrder), gfxCoreHelper);
+            ptr = NEO::ImplicitArgsHelper::patchImplicitArgs(ptr, *pImplicitArgs, kernelDescriptor, std::make_pair(localIdsGenerationByRuntime, requiredWorkgroupOrder));
        }

        if (sizeCrossThreadData > 0) {
--- a/shared/source/helpers/aarch64/local_id_gen.cpp
+++ b/shared/source/helpers/aarch64/local_id_gen.cpp
@ -8,7 +8,6 @@
 #include "shared/source/helpers/local_id_gen.h"

 #include "shared/source/helpers/aligned_memory.h"
-#include "shared/source/helpers/gfx_core_helper.h"
 #include "shared/source/helpers/local_id_gen_special.inl"
 #include "shared/source/utilities/cpu_info.h"

@ -42,9 +41,8 @@ LocalIDHelper::LocalIDHelper() {

 LocalIDHelper LocalIDHelper::initializer;

-void generateLocalIDs(void *buffer, uint16_t simd, const std::array<uint16_t, 3> &localWorkgroupSize, const std::array<uint8_t, 3> &dimensionsOrder, bool isImageOnlyKernel, uint32_t grfSize, const GfxCoreHelper &gfxCoreHelper) {
-    bool localIdsGeneratedByHw = false;
-    auto threadsPerWorkGroup = static_cast<uint16_t>(gfxCoreHelper.calculateNumThreadsPerThreadGroup(simd, static_cast<uint32_t>(localWorkgroupSize[0] * localWorkgroupSize[1] * localWorkgroupSize[2]), grfSize, localIdsGeneratedByHw));
+void generateLocalIDs(void *buffer, uint16_t simd, const std::array<uint16_t, 3> &localWorkgroupSize, const std::array<uint8_t, 3> &dimensionsOrder, bool isImageOnlyKernel, uint32_t grfSize) {
+    auto threadsPerWorkGroup = static_cast<uint16_t>(getThreadsPerWG(simd, static_cast<uint32_t>(localWorkgroupSize[0] * localWorkgroupSize[1] * localWorkgroupSize[2])));
    bool useLayoutForImages = isImageOnlyKernel && isCompatibleWithLayoutForImages(localWorkgroupSize, dimensionsOrder, simd);
    if (useLayoutForImages) {
        generateLocalIDsWithLayoutForImages(buffer, localWorkgroupSize, simd);
--- a/shared/source/helpers/local_id_gen.h
+++ b/shared/source/helpers/local_id_gen.h
@ -12,7 +12,6 @@
 #include <cstdint>

 namespace NEO {
-class GfxCoreHelper;
 inline uint32_t getGRFsPerThread(uint32_t simd, uint32_t grfSize) {
    return (simd == 32 && grfSize == 32) ? 2 : 1;
 }
@ -59,7 +58,7 @@ void generateLocalIDsSimd(void *b, const std::array<uint16_t, 3> &localWorkgroup
                          const std::array<uint8_t, 3> &dimensionsOrder, bool chooseMaxRowSize);

 void generateLocalIDs(void *buffer, uint16_t simd, const std::array<uint16_t, 3> &localWorkgroupSize,
-                      const std::array<uint8_t, 3> &dimensionsOrder, bool isImageOnlyKernel, uint32_t grfSize, const NEO::GfxCoreHelper &gfxCoreHelper);
+                      const std::array<uint8_t, 3> &dimensionsOrder, bool isImageOnlyKernel, uint32_t grfSize);
 void generateLocalIDsWithLayoutForImages(void *b, const std::array<uint16_t, 3> &localWorkgroupSize, uint16_t simd);

 bool isCompatibleWithLayoutForImages(const std::array<uint16_t, 3> &localWorkgroupSize, const std::array<uint8_t, 3> &dimensionsOrder, uint16_t simd);
--- a/shared/source/helpers/x86_64/local_id_gen.cpp
+++ b/shared/source/helpers/x86_64/local_id_gen.cpp
@ -8,7 +8,6 @@
 #include "shared/source/helpers/local_id_gen.h"

 #include "shared/source/helpers/aligned_memory.h"
-#include "shared/source/helpers/gfx_core_helper.h"
 #include "shared/source/helpers/local_id_gen_special.inl"
 #include "shared/source/utilities/cpu_info.h"

@ -45,9 +44,8 @@ LocalIDHelper::LocalIDHelper() {
 LocalIDHelper LocalIDHelper::initializer;

 // traditional function to generate local IDs
-void generateLocalIDs(void *buffer, uint16_t simd, const std::array<uint16_t, 3> &localWorkgroupSize, const std::array<uint8_t, 3> &dimensionsOrder, bool isImageOnlyKernel, uint32_t grfSize, const GfxCoreHelper &gfxCoreHelper) {
-    bool localIdsGeneratedByHw = false;
-    auto threadsPerWorkGroup = static_cast<uint16_t>(gfxCoreHelper.calculateNumThreadsPerThreadGroup(simd, static_cast<uint32_t>(localWorkgroupSize[0] * localWorkgroupSize[1] * localWorkgroupSize[2]), grfSize, localIdsGeneratedByHw));
+void generateLocalIDs(void *buffer, uint16_t simd, const std::array<uint16_t, 3> &localWorkgroupSize, const std::array<uint8_t, 3> &dimensionsOrder, bool isImageOnlyKernel, uint32_t grfSize) {
+    auto threadsPerWorkGroup = static_cast<uint16_t>(getThreadsPerWG(simd, static_cast<uint32_t>(localWorkgroupSize[0] * localWorkgroupSize[1] * localWorkgroupSize[2])));
    bool useLayoutForImages = isImageOnlyKernel && isCompatibleWithLayoutForImages(localWorkgroupSize, dimensionsOrder, simd);
    if (useLayoutForImages) {
        generateLocalIDsWithLayoutForImages(buffer, localWorkgroupSize, simd);
--- a/shared/source/kernel/implicit_args.h
+++ b/shared/source/kernel/implicit_args.h
@ -15,7 +15,6 @@
 namespace NEO {

 struct KernelDescriptor;
-class GfxCoreHelper;

 struct ImplicitArgs {
    uint8_t structSize;
@ -52,6 +51,6 @@ namespace ImplicitArgsHelper {
 std::array<uint8_t, 3> getDimensionOrderForLocalIds(const uint8_t *workgroupDimensionsOrder, std::optional<std::pair<bool /* localIdsGeneratedByRuntime */, uint32_t /* walkOrderForHwGenerationOfLocalIds */>> hwGenerationOfLocalIdsParams);
 uint32_t getGrfSize(uint32_t simd);
 uint32_t getSizeForImplicitArgsPatching(const ImplicitArgs *pImplicitArgs, const KernelDescriptor &kernelDescriptor);
-void *patchImplicitArgs(void *ptrToPatch, const ImplicitArgs &implicitArgs, const KernelDescriptor &kernelDescriptor, std::optional<std::pair<bool /* localIdsGeneratedByRuntime */, uint32_t /* walkOrderForHwGenerationOfLocalIds */>> hwGenerationOfLocalIdsParams, const GfxCoreHelper &gfxCoreHelper);
+void *patchImplicitArgs(void *ptrToPatch, const ImplicitArgs &implicitArgs, const KernelDescriptor &kernelDescriptor, std::optional<std::pair<bool /* localIdsGeneratedByRuntime */, uint32_t /* walkOrderForHwGenerationOfLocalIds */>> hwGenerationOfLocalIdsParams);
 } // namespace ImplicitArgsHelper
 } // namespace NEO
--- a/shared/source/kernel/implicit_args_helper.cpp
+++ b/shared/source/kernel/implicit_args_helper.cpp
@ -62,7 +62,7 @@ uint32_t getSizeForImplicitArgsPatching(const ImplicitArgs *pImplicitArgs, const
    }
 }

-void *patchImplicitArgs(void *ptrToPatch, const ImplicitArgs &implicitArgs, const KernelDescriptor &kernelDescriptor, std::optional<std::pair<bool, uint32_t>> hwGenerationOfLocalIdsParams, const GfxCoreHelper &gfxCoreHelper) {
+void *patchImplicitArgs(void *ptrToPatch, const ImplicitArgs &implicitArgs, const KernelDescriptor &kernelDescriptor, std::optional<std::pair<bool, uint32_t>> hwGenerationOfLocalIdsParams) {

    auto totalSizeToProgram = getSizeForImplicitArgsPatching(&implicitArgs, kernelDescriptor);
    auto retVal = ptrOffset(ptrToPatch, totalSizeToProgram);
@ -80,7 +80,7 @@ void *patchImplicitArgs(void *ptrToPatch, const ImplicitArgs &implicitArgs, cons
                                     static_cast<uint16_t>(implicitArgs.localSizeY),
                                     static_cast<uint16_t>(implicitArgs.localSizeZ)}},
            dimensionOrder,
-            false, grfSize, gfxCoreHelper);
+            false, grfSize);
        auto sizeForLocalIdsProgramming = totalSizeToProgram - sizeof(NEO::ImplicitArgs);
        ptrToPatch = ptrOffset(ptrToPatch, sizeForLocalIdsProgramming);
    }
--- a/shared/source/kernel/local_ids_cache.cpp
+++ b/shared/source/kernel/local_ids_cache.cpp
@ -47,7 +47,7 @@ void LocalIdsCache::setLocalIdsForEntry(LocalIdsCacheEntry &entry, void *destina
    std::memcpy(destination, entry.localIdsData, entry.localIdsSize);
 }

-void LocalIdsCache::setLocalIdsForGroup(const Vec3<uint16_t> &group, void *destination, const GfxCoreHelper &gfxCoreHelper) {
+void LocalIdsCache::setLocalIdsForGroup(const Vec3<uint16_t> &group, void *destination) {
    auto setLocalIdsLock = lock();
    LocalIdsCacheEntry *leastAccessedEntry = &cache[0];
    for (auto &cacheEntry : cache) {
@ -60,11 +60,11 @@ void LocalIdsCache::setLocalIdsForGroup(const Vec3<uint16_t> &group, void *desti
        }
    }

-    commitNewEntry(*leastAccessedEntry, group, gfxCoreHelper);
+    commitNewEntry(*leastAccessedEntry, group);
    setLocalIdsForEntry(*leastAccessedEntry, destination);
 }

-void LocalIdsCache::commitNewEntry(LocalIdsCacheEntry &entry, const Vec3<uint16_t> &group, const GfxCoreHelper &gfxCoreHelper) {
+void LocalIdsCache::commitNewEntry(LocalIdsCacheEntry &entry, const Vec3<uint16_t> &group) {
    entry.localIdsSize = getLocalIdsSizeForGroup(group);
    entry.groupSize = group;
    entry.accessCounter = 0U;
@ -74,7 +74,7 @@ void LocalIdsCache::commitNewEntry(LocalIdsCacheEntry &entry, const Vec3<uint16_
        entry.localIdsSizeAllocated = entry.localIdsSize;
    }
    NEO::generateLocalIDs(entry.localIdsData, static_cast<uint16_t>(simdSize),
-                          {group[0], group[1], group[2]}, wgDimOrder, usesOnlyImages, grfSize, gfxCoreHelper);
+                          {group[0], group[1], group[2]}, wgDimOrder, usesOnlyImages, grfSize);
 }

 } // namespace NEO
--- a/shared/source/kernel/local_ids_cache.h
+++ b/shared/source/kernel/local_ids_cache.h
@ -12,7 +12,7 @@
 #include <mutex>

 namespace NEO {
-class GfxCoreHelper;
+
 class LocalIdsCache {
  public:
    struct LocalIdsCacheEntry {
@ -30,13 +30,13 @@ class LocalIdsCache {
    LocalIdsCache(size_t cacheSize, std::array<uint8_t, 3> wgDimOrder, uint8_t simdSize, uint8_t grfSize, bool usesOnlyImages = false);
    ~LocalIdsCache();

-    void setLocalIdsForGroup(const Vec3<uint16_t> &group, void *destination, const GfxCoreHelper &gfxCoreHelper);
+    void setLocalIdsForGroup(const Vec3<uint16_t> &group, void *destination);
    size_t getLocalIdsSizeForGroup(const Vec3<uint16_t> &group) const;
    size_t getLocalIdsSizePerThread() const;

  protected:
    void setLocalIdsForEntry(LocalIdsCacheEntry &entry, void *destination);
-    void commitNewEntry(LocalIdsCacheEntry &entry, const Vec3<uint16_t> &group, const GfxCoreHelper &gfxCoreHelper);
+    void commitNewEntry(LocalIdsCacheEntry &entry, const Vec3<uint16_t> &group);
    std::unique_lock<std::mutex> lock();

    StackVec<LocalIdsCacheEntry, 4> cache;
--- a/shared/test/unit_test/helpers/local_id_tests.cpp
+++ b/shared/test/unit_test/helpers/local_id_tests.cpp
@ -7,10 +7,8 @@

 #include "shared/source/helpers/aligned_memory.h"
 #include "shared/source/helpers/basic_math.h"
-#include "shared/source/helpers/gfx_core_helper.h"
 #include "shared/source/helpers/local_id_gen.h"
 #include "shared/source/helpers/ptr_math.h"
-#include "shared/test/common/helpers/default_hw_info.h"
 #include "shared/test/common/helpers/unit_test_helper.h"
 #include "shared/test/common/test_macros/hw_test.h"

@ -77,16 +75,14 @@ TEST(LocalID, GivenSimd1WhenGettingPerThreadSizeLocalIdsThenValueIsEqualGrfSize)

    EXPECT_EQ(grfSize, getPerThreadSizeLocalIDs(simd, grfSize));
 }
-
-TEST(LocalIdTest, givenVariadicGrfSizeWhenLocalSizesAreEmittedThenUseFullRowSize) {
+TEST(LocalID, givenVariadicGrfSizeWhenLocalSizesAreEmittedThenUseFullRowSize) {
    auto localIdsPtr = allocateAlignedMemory(3 * 64u, MemoryConstants::cacheLineSize);

    uint16_t *localIdsView = reinterpret_cast<uint16_t *>(localIdsPtr.get());
    std::array<uint16_t, 3u> localSizes = {{2u, 2u, 1u}};
    std::array<uint8_t, 3u> dimensionsOrder = {{0u, 1u, 2u}};

-    auto gfxCoreHelper = GfxCoreHelper::create(defaultHwInfo->platform.eRenderCoreFamily);
-    generateLocalIDs(localIdsPtr.get(), 16u, localSizes, dimensionsOrder, false, 64u, *gfxCoreHelper.get());
+    generateLocalIDs(localIdsPtr.get(), 16u, localSizes, dimensionsOrder, false, 64u);
    EXPECT_EQ(localIdsView[0], 0u);
    EXPECT_EQ(localIdsView[1], 1u);
    EXPECT_EQ(localIdsView[2], 0u);
@ -281,42 +277,37 @@ struct LocalIDFixture : ::testing::TestWithParam<std::tuple<int, int, int, int,
 };

 HWTEST_P(LocalIDFixture, WhenGeneratingLocalIdsThenIdsAreWithinLimits) {
-    auto gfxCoreHelper = GfxCoreHelper::create(defaultHwInfo->platform.eRenderCoreFamily);
    generateLocalIDs(buffer, simd, std::array<uint16_t, 3>{{static_cast<uint16_t>(localWorkSizeX), static_cast<uint16_t>(localWorkSizeY), static_cast<uint16_t>(localWorkSizeZ)}},
-                     std::array<uint8_t, 3>{{0, 1, 2}}, false, grfSize, *gfxCoreHelper.get());
+                     std::array<uint8_t, 3>{{0, 1, 2}}, false, grfSize);
    validateIDWithinLimits(simd, localWorkSizeX, localWorkSizeY, localWorkSizeZ, UnitTestHelper<FamilyType>::useFullRowForLocalIdsGeneration);
 }

 HWTEST_P(LocalIDFixture, WhenGeneratingLocalIdsThenAllWorkItemsCovered) {
-    auto gfxCoreHelper = GfxCoreHelper::create(defaultHwInfo->platform.eRenderCoreFamily);
    generateLocalIDs(buffer, simd, std::array<uint16_t, 3>{{static_cast<uint16_t>(localWorkSizeX), static_cast<uint16_t>(localWorkSizeY), static_cast<uint16_t>(localWorkSizeZ)}},
-                     std::array<uint8_t, 3>{{0, 1, 2}}, false, grfSize, *gfxCoreHelper.get());
+                     std::array<uint8_t, 3>{{0, 1, 2}}, false, grfSize);
    validateAllWorkItemsCovered(simd, localWorkSizeX, localWorkSizeY, localWorkSizeZ, UnitTestHelper<FamilyType>::useFullRowForLocalIdsGeneration);
 }

 HWTEST_P(LocalIDFixture, WhenWalkOrderIsXyzThenProperLocalIdsAreGenerated) {
    auto dimensionsOrder = std::array<uint8_t, 3>{{0, 1, 2}};
-    auto gfxCoreHelper = GfxCoreHelper::create(defaultHwInfo->platform.eRenderCoreFamily);
    generateLocalIDs(buffer, simd, std::array<uint16_t, 3>{{static_cast<uint16_t>(localWorkSizeX), static_cast<uint16_t>(localWorkSizeY), static_cast<uint16_t>(localWorkSizeZ)}},
-                     dimensionsOrder, false, grfSize, *gfxCoreHelper.get());
+                     dimensionsOrder, false, grfSize);
    validateAllWorkItemsCovered(simd, localWorkSizeX, localWorkSizeY, localWorkSizeZ, UnitTestHelper<FamilyType>::useFullRowForLocalIdsGeneration);
    validateWalkOrder(simd, localWorkSizeX, localWorkSizeY, localWorkSizeZ, dimensionsOrder);
 }

 HWTEST_P(LocalIDFixture, WhenWalkOrderIsYxzThenProperLocalIdsAreGenerated) {
    auto dimensionsOrder = std::array<uint8_t, 3>{{1, 0, 2}};
-    auto gfxCoreHelper = GfxCoreHelper::create(defaultHwInfo->platform.eRenderCoreFamily);
    generateLocalIDs(buffer, simd, std::array<uint16_t, 3>{{static_cast<uint16_t>(localWorkSizeX), static_cast<uint16_t>(localWorkSizeY), static_cast<uint16_t>(localWorkSizeZ)}},
-                     dimensionsOrder, false, grfSize, *gfxCoreHelper.get());
+                     dimensionsOrder, false, grfSize);
    validateAllWorkItemsCovered(simd, localWorkSizeX, localWorkSizeY, localWorkSizeZ, UnitTestHelper<FamilyType>::useFullRowForLocalIdsGeneration);
    validateWalkOrder(simd, localWorkSizeX, localWorkSizeY, localWorkSizeZ, dimensionsOrder);
 }

 HWTEST_P(LocalIDFixture, WhenWalkOrderIsZyxThenProperLocalIdsAreGenerated) {
    auto dimensionsOrder = std::array<uint8_t, 3>{{2, 1, 0}};
-    auto gfxCoreHelper = GfxCoreHelper::create(defaultHwInfo->platform.eRenderCoreFamily);
    generateLocalIDs(buffer, simd, std::array<uint16_t, 3>{{static_cast<uint16_t>(localWorkSizeX), static_cast<uint16_t>(localWorkSizeY), static_cast<uint16_t>(localWorkSizeZ)}},
-                     dimensionsOrder, false, grfSize, *gfxCoreHelper.get());
+                     dimensionsOrder, false, grfSize);
    validateAllWorkItemsCovered(simd, localWorkSizeX, localWorkSizeY, localWorkSizeZ, UnitTestHelper<FamilyType>::useFullRowForLocalIdsGeneration);
    validateWalkOrder(simd, localWorkSizeX, localWorkSizeY, localWorkSizeZ, dimensionsOrder);
 }
@ -344,8 +335,8 @@ struct LocalIdsLayoutForImagesTest : ::testing::TestWithParam<std::tuple<uint16_
        rowWidth = simd == 32u ? 32u : 16u;
        xDelta = simd == 8u ? 2u : 4u;
    }
-
    void generateLocalIds() {
+
        auto numGrfs = (localWorkSize.at(0) * localWorkSize.at(1) + (simd - 1)) / simd;
        elemsInBuffer = 3u * simd * numGrfs;
        if (simd == 8u) {
@ -356,8 +347,7 @@ struct LocalIdsLayoutForImagesTest : ::testing::TestWithParam<std::tuple<uint16_
        memset(memory.get(), 0xff, size);
        buffer = reinterpret_cast<uint16_t *>(memory.get());
        EXPECT_TRUE(isCompatibleWithLayoutForImages(localWorkSize, dimensionsOrder, simd));
-        auto gfxCoreHelper = GfxCoreHelper::create(defaultHwInfo->platform.eRenderCoreFamily);
-        generateLocalIDs(buffer, simd, localWorkSize, dimensionsOrder, true, grfSize, *gfxCoreHelper.get());
+        generateLocalIDs(buffer, simd, localWorkSize, dimensionsOrder, true, grfSize);
    }
    void validateGRF() {
        uint32_t totalLocalIds = localWorkSize.at(0) * localWorkSize.at(1);
@ -457,9 +447,9 @@ TEST_P(LocalIdsLayoutTest, givenLocalWorkgroupSize4x4x1WhenGenerateLocalIdsThenH
    auto alignedMemory2 = allocateAlignedMemory(size, 32);
    auto buffer2 = reinterpret_cast<uint16_t *>(alignedMemory2.get());
    memset(buffer2, 0xff, size);
-    auto gfxCoreHelper = GfxCoreHelper::create(defaultHwInfo->platform.eRenderCoreFamily);
-    generateLocalIDs(buffer1, simd, localWorkSize, dimensionsOrder, false, grfSize, *gfxCoreHelper.get());
-    generateLocalIDs(buffer2, simd, localWorkSize, dimensionsOrder, true, grfSize, *gfxCoreHelper.get());
+
+    generateLocalIDs(buffer1, simd, localWorkSize, dimensionsOrder, false, grfSize);
+    generateLocalIDs(buffer2, simd, localWorkSize, dimensionsOrder, true, grfSize);

    for (auto i = 0u; i < elemsInBuffer / rowWidth; i++) {
        for (auto j = 0u; j < rowWidth; j++) {
--- a/shared/test/unit_test/kernel/implicit_args_helper_tests.cpp
+++ b/shared/test/unit_test/kernel/implicit_args_helper_tests.cpp
@ -6,13 +6,11 @@
 */

 #include "shared/source/helpers/aligned_memory.h"
-#include "shared/source/helpers/gfx_core_helper.h"
 #include "shared/source/helpers/hw_walk_order.h"
 #include "shared/source/helpers/per_thread_data.h"
 #include "shared/source/helpers/ptr_math.h"
 #include "shared/source/kernel/implicit_args.h"
 #include "shared/source/kernel/kernel_descriptor.h"
-#include "shared/test/common/helpers/default_hw_info.h"
 #include "shared/test/common/test_macros/hw_test.h"

 using namespace NEO;
@ -119,8 +117,8 @@ TEST(ImplicitArgsHelperTest, givenImplicitArgsWithoutImplicitArgsBufferOffsetInP
    uint8_t pattern = 0xcd;

    memset(memoryToPatch.get(), pattern, totalSizeForPatching);
-    auto gfxCoreHelper = GfxCoreHelper::create(defaultHwInfo->platform.eRenderCoreFamily);
-    auto retVal = ImplicitArgsHelper::patchImplicitArgs(memoryToPatch.get(), implicitArgs, kernelDescriptor, {}, *gfxCoreHelper.get());
+
+    auto retVal = ImplicitArgsHelper::patchImplicitArgs(memoryToPatch.get(), implicitArgs, kernelDescriptor, {});

    EXPECT_EQ(retVal, ptrOffset(memoryToPatch.get(), totalSizeForPatching));

@ -160,8 +158,8 @@ TEST(ImplicitArgsHelperTest, givenImplicitArgsWithImplicitArgsBufferOffsetInPayl
    uint8_t pattern = 0xcd;

    memset(memoryToPatch.get(), pattern, totalSizeForPatching);
-    auto gfxCoreHelper = GfxCoreHelper::create(defaultHwInfo->platform.eRenderCoreFamily);
-    auto retVal = ImplicitArgsHelper::patchImplicitArgs(memoryToPatch.get(), implicitArgs, kernelDescriptor, {}, *gfxCoreHelper.get());
+
+    auto retVal = ImplicitArgsHelper::patchImplicitArgs(memoryToPatch.get(), implicitArgs, kernelDescriptor, {});

    EXPECT_EQ(retVal, ptrOffset(memoryToPatch.get(), totalSizeForPatching));

--- a/shared/test/unit_test/kernel/local_ids_cache_tests.cpp
+++ b/shared/test/unit_test/kernel/local_ids_cache_tests.cpp
@ -7,11 +7,8 @@

 #include "shared/source/command_stream/linear_stream.h"
 #include "shared/source/helpers/aligned_memory.h"
-#include "shared/source/helpers/gfx_core_helper.h"
-#include "shared/source/helpers/hw_info.h"
 #include "shared/source/helpers/per_thread_data.h"
 #include "shared/source/kernel/local_ids_cache.h"
-#include "shared/test/common/helpers/default_hw_info.h"
 #include "shared/test/common/mocks/mock_graphics_allocation.h"
 #include "shared/test/common/test_macros/test.h"

@ -38,8 +35,7 @@ using LocalIdsCacheTest = Test<LocalIdsCacheFixture>;
 TEST_F(LocalIdsCacheTest, GivenCacheMissWhenGetLocalIdsForGroupThenNewEntryIsCommitedIntoLeastUsedEntry) {
    localIdsCache->cache.resize(2);
    localIdsCache->cache[0].accessCounter = 2U;
-    auto gfxCoreHelper = NEO::GfxCoreHelper::create(NEO::defaultHwInfo->platform.eRenderCoreFamily);
-    localIdsCache->setLocalIdsForGroup(groupSize, perThreadData.data(), *gfxCoreHelper.get());
+    localIdsCache->setLocalIdsForGroup(groupSize, perThreadData.data());

    EXPECT_EQ(groupSize, localIdsCache->cache[1].groupSize);
    EXPECT_NE(nullptr, localIdsCache->cache[1].localIdsData);
@ -54,8 +50,7 @@ TEST_F(LocalIdsCacheTest, GivenEntryInCacheWhenGetLocalIdsForGroupThenEntryFromC
    localIdsCache->cache[0].localIdsSize = 512U;
    localIdsCache->cache[0].localIdsSizeAllocated = 512U;
    localIdsCache->cache[0].accessCounter = 1U;
-    auto gfxCoreHelper = NEO::GfxCoreHelper::create(NEO::defaultHwInfo->platform.eRenderCoreFamily);
-    localIdsCache->setLocalIdsForGroup(groupSize, perThreadData.data(), *gfxCoreHelper.get());
+    localIdsCache->setLocalIdsForGroup(groupSize, perThreadData.data());
    EXPECT_EQ(2U, localIdsCache->cache[0].accessCounter);
 }

@ -68,8 +63,7 @@ TEST_F(LocalIdsCacheTest, GivenEntryWithBiggerBufferAllocatedWhenGetLocalIdsForG
    const auto localIdsData = localIdsCache->cache[0].localIdsData;

    groupSize = {2, 1, 1};
-    auto gfxCoreHelper = NEO::GfxCoreHelper::create(NEO::defaultHwInfo->platform.eRenderCoreFamily);
-    localIdsCache->setLocalIdsForGroup(groupSize, perThreadData.data(), *gfxCoreHelper.get());
+    localIdsCache->setLocalIdsForGroup(groupSize, perThreadData.data());
    EXPECT_EQ(1U, localIdsCache->cache[0].accessCounter);
    EXPECT_EQ(192U, localIdsCache->cache[0].localIdsSize);
    EXPECT_EQ(512U, localIdsCache->cache[0].localIdsSizeAllocated);