diff --git a/level_zero/core/source/kernel/kernel_imp.cpp b/level_zero/core/source/kernel/kernel_imp.cpp index c7f369fd50..c882b06643 100644 --- a/level_zero/core/source/kernel/kernel_imp.cpp +++ b/level_zero/core/source/kernel/kernel_imp.cpp @@ -363,7 +363,7 @@ ze_result_t KernelImp::setGroupSize(uint32_t groupSizeX, uint32_t groupSizeY, static_cast(groupSizeY), static_cast(groupSizeZ)}}, std::array{{0, 1, 2}}, - false, grfSize, gfxCoreHelper); + false, grfSize); } this->perThreadDataSize = perThreadDataSizeForWholeThreadGroup / numThreadsPerThreadGroup; diff --git a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_launch_kernel_2.cpp b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_launch_kernel_2.cpp index 907aadb0fb..c814e6b6b3 100644 --- a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_launch_kernel_2.cpp +++ b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_launch_kernel_2.cpp @@ -1023,8 +1023,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, CmdlistAppendLaunchKernelWithImplicitArgsTests, giv auto grfSize = ImplicitArgsHelper::getGrfSize(expectedImplicitArgs.simdWidth); auto expectedLocalIds = alignedMalloc(implicitArgsProgrammingSize - sizeof(ImplicitArgs), MemoryConstants::cacheLineSize); - const auto &gfxCoreHelper = device->getNEODevice()->getGfxCoreHelper(); - generateLocalIDs(expectedLocalIds, expectedImplicitArgs.simdWidth, localSize, workgroupDimOrder, false, grfSize, gfxCoreHelper); + generateLocalIDs(expectedLocalIds, expectedImplicitArgs.simdWidth, localSize, workgroupDimOrder, false, grfSize); auto localIdsProgrammingSize = implicitArgsProgrammingSize - sizeof(ImplicitArgs); size_t sizeForLocalIds = NEO::PerThreadDataHelper::getPerThreadDataSizeTotal(expectedImplicitArgs.simdWidth, grfSize, 3u, totalLocalSize); @@ -1069,8 +1068,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, CmdlistAppendLaunchKernelWithImplicitArgsTests, giv auto grfSize = ImplicitArgsHelper::getGrfSize(expectedImplicitArgs.simdWidth); auto expectedLocalIds = alignedMalloc(implicitArgsProgrammingSize - sizeof(ImplicitArgs), MemoryConstants::cacheLineSize); - const auto &gfxCoreHelper = device->getNEODevice()->getGfxCoreHelper(); - generateLocalIDs(expectedLocalIds, expectedImplicitArgs.simdWidth, localSize, expectedDimOrder, false, grfSize, gfxCoreHelper); + generateLocalIDs(expectedLocalIds, expectedImplicitArgs.simdWidth, localSize, expectedDimOrder, false, grfSize); auto localIdsProgrammingSize = implicitArgsProgrammingSize - sizeof(ImplicitArgs); size_t sizeForLocalIds = NEO::PerThreadDataHelper::getPerThreadDataSizeTotal(expectedImplicitArgs.simdWidth, grfSize, 3u, totalLocalSize); diff --git a/opencl/source/helpers/hardware_commands_helper_bdw_and_later.inl b/opencl/source/helpers/hardware_commands_helper_bdw_and_later.inl index 44199f2dd0..15c5dab2d9 100644 --- a/opencl/source/helpers/hardware_commands_helper_bdw_and_later.inl +++ b/opencl/source/helpers/hardware_commands_helper_bdw_and_later.inl @@ -91,8 +91,7 @@ size_t HardwareCommandsHelper::sendCrossThreadData( auto implicitArgsGpuVA = indirectHeap.getGraphicsAllocation()->getGpuAddress() + indirectHeap.getUsed(); auto ptrToPatchImplicitArgs = indirectHeap.getSpace(sizeForImplicitArgsProgramming); - const auto &gfxCoreHelper = kernel.getGfxCoreHelper(); - ImplicitArgsHelper::patchImplicitArgs(ptrToPatchImplicitArgs, *pImplicitArgs, kernelDescriptor, {}, gfxCoreHelper); + ImplicitArgsHelper::patchImplicitArgs(ptrToPatchImplicitArgs, *pImplicitArgs, kernelDescriptor, {}); auto implicitArgsCrossThreadPtr = ptrOffset(reinterpret_cast(kernel.getCrossThreadData()), kernelDescriptor.payloadMappings.implicitArgs.implicitArgsBuffer); *implicitArgsCrossThreadPtr = implicitArgsGpuVA; diff --git a/opencl/source/helpers/hardware_commands_helper_xehp_and_later.inl b/opencl/source/helpers/hardware_commands_helper_xehp_and_later.inl index 7dd4e52ca4..c463a5a4f9 100644 --- a/opencl/source/helpers/hardware_commands_helper_xehp_and_later.inl +++ b/opencl/source/helpers/hardware_commands_helper_xehp_and_later.inl @@ -96,8 +96,8 @@ size_t HardwareCommandsHelper::sendCrossThreadData( kernelAttributes.flags.requiresWorkgroupWalkOrder, requiredWalkOrder, kernelDescriptor.kernelAttributes.simdSize); - const auto &gfxCoreHelper = kernel.getGfxCoreHelper(); - ImplicitArgsHelper::patchImplicitArgs(ptrToPatchImplicitArgs, *pImplicitArgs, kernelDescriptor, std::make_pair(generationOfLocalIdsByRuntime, requiredWalkOrder), gfxCoreHelper); + + ImplicitArgsHelper::patchImplicitArgs(ptrToPatchImplicitArgs, *pImplicitArgs, kernelDescriptor, std::make_pair(generationOfLocalIdsByRuntime, requiredWalkOrder)); } using InlineData = typename GfxFamily::INLINE_DATA; diff --git a/opencl/source/kernel/kernel.cpp b/opencl/source/kernel/kernel.cpp index b9c6f0414a..66961d203d 100644 --- a/opencl/source/kernel/kernel.cpp +++ b/opencl/source/kernel/kernel.cpp @@ -581,7 +581,7 @@ cl_int Kernel::getWorkGroupInfo(cl_kernel_work_group_info paramName, cl_ulong privateMemSize; size_t maxWorkgroupSize; const auto &hwInfo = clDevice.getHardwareInfo(); - auto &gfxCoreHelper = this->getGfxCoreHelper(); + auto &gfxCoreHelper = clDevice.getGfxCoreHelper(); auto &clGfxCoreHelper = clDevice.getRootDeviceEnvironment().getHelper(); GetInfoHelper info(paramValue, paramValueSize, paramValueSizeRet); @@ -1537,7 +1537,7 @@ cl_int Kernel::setArgBuffer(uint32_t argIndex, disableL3, isAuxTranslationKernel, arg.isReadOnly(), pClDevice->getDevice(), kernelInfo.kernelDescriptor.kernelAttributes.flags.useGlobalAtomics, areMultipleSubDevicesInContext()); } else if (isValidOffset(argAsPtr.bindless)) { - auto &gfxCoreHelper = this->getGfxCoreHelper(); + auto &gfxCoreHelper = getDevice().getGfxCoreHelper(); auto surfaceStateSize = gfxCoreHelper.getRenderSurfaceStateSize(); auto surfaceState = ptrOffset(getSurfaceStateHeap(), surfaceStateSize * argIndex); @@ -1655,7 +1655,7 @@ cl_int Kernel::setArgImageWithMipLevel(uint32_t argIndex, void *surfaceState = nullptr; if (isValidOffset(argAsImg.bindless)) { - auto &gfxCoreHelper = this->getGfxCoreHelper(); + auto &gfxCoreHelper = getDevice().getGfxCoreHelper(); auto surfaceStateSize = gfxCoreHelper.getRenderSurfaceStateSize(); surfaceState = ptrOffset(getSurfaceStateHeap(), surfaceStateSize * argIndex); } else { @@ -2130,7 +2130,7 @@ uint64_t Kernel::getKernelStartAddress(const bool localIdsGenerationByRuntime, c kernelStartOffset += getStartOffset(); auto &hardwareInfo = getHardwareInfo(); - const auto &gfxCoreHelper = this->getGfxCoreHelper(); + const auto &gfxCoreHelper = getDevice().getGfxCoreHelper(); const auto &productHelper = getDevice().getProductHelper(); if (isCssUsed && gfxCoreHelper.isOffsetToSkipSetFFIDGPWARequired(hardwareInfo, productHelper)) { @@ -2140,7 +2140,7 @@ uint64_t Kernel::getKernelStartAddress(const bool localIdsGenerationByRuntime, c return kernelStartOffset; } void *Kernel::patchBindlessSurfaceState(NEO::GraphicsAllocation *alloc, uint32_t bindless) { - auto &gfxCoreHelper = this->getGfxCoreHelper(); + auto &gfxCoreHelper = getDevice().getGfxCoreHelper(); auto surfaceStateSize = gfxCoreHelper.getRenderSurfaceStateSize(); NEO::BindlessHeapsHelper *bindlessHeapsHelper = getDevice().getDevice().getBindlessHeapsHelper(); auto ssInHeap = bindlessHeapsHelper->allocateSSInHeap(surfaceStateSize, alloc, NEO::BindlessHeapsHelper::GLOBAL_SSH); @@ -2159,7 +2159,7 @@ uint32_t Kernel::getAdditionalKernelExecInfo() const { } bool Kernel::requiresWaDisableRccRhwoOptimization() const { - auto &gfxCoreHelper = this->getGfxCoreHelper(); + auto &gfxCoreHelper = getDevice().getGfxCoreHelper(); auto rootDeviceIndex = getDevice().getRootDeviceIndex(); if (gfxCoreHelper.isWaDisableRccRhwoOptimizationRequired() && isUsingSharedObjArgs()) { @@ -2270,7 +2270,7 @@ void Kernel::reconfigureKernel() { kernelDescriptor.kernelAttributes.simdSize != 32) { this->maxKernelWorkGroupSize >>= 1; } - const auto &gfxCoreHelper = this->getGfxCoreHelper(); + const auto &gfxCoreHelper = getDevice().getGfxCoreHelper(); bool isLocalIdsGeneratedByHw = false; // if local ids generated by runtime then more work groups available auto maxWorkGroupSize = static_cast(kernelInfo.getMaxRequiredWorkGroupSize(getMaxKernelWorkGroupSize())); @@ -2369,8 +2369,7 @@ void Kernel::initializeLocalIdsCache() { void Kernel::setLocalIdsForGroup(const Vec3 &groupSize, void *destination) const { UNRECOVERABLE_IF(localIdsCache.get() == nullptr); - const auto &gfxCoreHelper = this->getGfxCoreHelper(); - localIdsCache->setLocalIdsForGroup(groupSize, destination, gfxCoreHelper); + localIdsCache->setLocalIdsForGroup(groupSize, destination); } size_t Kernel::getLocalIdsSizeForGroup(const Vec3 &groupSize) const { diff --git a/opencl/source/kernel/kernel.h b/opencl/source/kernel/kernel.h index b86ba4d0c5..94f100c8fc 100644 --- a/opencl/source/kernel/kernel.h +++ b/opencl/source/kernel/kernel.h @@ -410,10 +410,6 @@ class Kernel : public ReferenceTrackedObject { size_t getLocalIdsSizeForGroup(const Vec3 &groupSize) const; size_t getLocalIdsSizePerThread() const; - const GfxCoreHelper &getGfxCoreHelper() const { - return getDevice().getGfxCoreHelper(); - } - protected: struct KernelConfig { Vec3 gws; diff --git a/opencl/test/unit_test/helpers/hardware_commands_helper_tests.cpp b/opencl/test/unit_test/helpers/hardware_commands_helper_tests.cpp index 76dbfdd63c..7afbccf43f 100644 --- a/opencl/test/unit_test/helpers/hardware_commands_helper_tests.cpp +++ b/opencl/test/unit_test/helpers/hardware_commands_helper_tests.cpp @@ -552,13 +552,12 @@ HWCMDTEST_F(IGFX_GEN8_CORE, HardwareCommandsTest, whenSendingIndirectStateThenKe ASSERT_LE(expectedIohSize, ioh.getUsed()); auto expectedLocalIds = alignedMalloc(expectedIohSize, 64); - const auto &gfxCoreHelper = pDevice->getGfxCoreHelper(); generateLocalIDs(expectedLocalIds, modifiedKernelInfo.getMaxSimdSize(), std::array{{localWorkSizeX, localWorkSizeY, localWorkSizeZ}}, std::array{{modifiedKernelInfo.kernelDescriptor.kernelAttributes.workgroupDimensionsOrder[0], modifiedKernelInfo.kernelDescriptor.kernelAttributes.workgroupDimensionsOrder[1], modifiedKernelInfo.kernelDescriptor.kernelAttributes.workgroupDimensionsOrder[2]}}, - false, grfSize, gfxCoreHelper); + false, grfSize); EXPECT_EQ(0, memcmp(expectedLocalIds, ioh.getCpuBase(), expectedIohSize)); alignedFree(expectedLocalIds); @@ -1378,8 +1377,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, HardwareCommandsImplicitArgsTests, givenKernelWithI auto grfSize = ImplicitArgsHelper::getGrfSize(expectedImplicitArgs.simdWidth); auto expectedLocalIds = alignedMalloc(implicitArgsProgrammingSize - sizeof(ImplicitArgs), MemoryConstants::cacheLineSize); - const auto &gfxCoreHelper = pDevice->getGfxCoreHelper(); - generateLocalIDs(expectedLocalIds, expectedImplicitArgs.simdWidth, localSize, workgroupDimOrder, false, grfSize, gfxCoreHelper); + generateLocalIDs(expectedLocalIds, expectedImplicitArgs.simdWidth, localSize, workgroupDimOrder, false, grfSize); auto localIdsProgrammingSize = implicitArgsProgrammingSize - sizeof(ImplicitArgs); size_t sizeForLocalIds = PerThreadDataHelper::getPerThreadDataSizeTotal(expectedImplicitArgs.simdWidth, grfSize, 3u, totalLocalSize); @@ -1412,8 +1410,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, HardwareCommandsImplicitArgsTests, givenKernelWithI auto grfSize = ImplicitArgsHelper::getGrfSize(expectedImplicitArgs.simdWidth); auto expectedLocalIds = alignedMalloc(implicitArgsProgrammingSize - sizeof(ImplicitArgs), MemoryConstants::cacheLineSize); - const auto &gfxCoreHelper = pDevice->getGfxCoreHelper(); - generateLocalIDs(expectedLocalIds, expectedImplicitArgs.simdWidth, localSize, expectedDimOrder, false, grfSize, gfxCoreHelper); + generateLocalIDs(expectedLocalIds, expectedImplicitArgs.simdWidth, localSize, expectedDimOrder, false, grfSize); auto localIdsProgrammingSize = implicitArgsProgrammingSize - sizeof(ImplicitArgs); size_t sizeForLocalIds = PerThreadDataHelper::getPerThreadDataSizeTotal(expectedImplicitArgs.simdWidth, grfSize, 3u, totalLocalSize); diff --git a/shared/source/command_container/command_encoder_bdw_and_later.inl b/shared/source/command_container/command_encoder_bdw_and_later.inl index 19c7e9eef6..0722f99a9b 100644 --- a/shared/source/command_container/command_encoder_bdw_and_later.inl +++ b/shared/source/command_container/command_encoder_bdw_and_later.inl @@ -183,7 +183,7 @@ void EncodeDispatchKernel::encode(CommandContainer &container, EncodeDis auto implicitArgsCrossThreadPtr = ptrOffset(const_cast(reinterpret_cast(args.dispatchInterface->getCrossThreadData())), kernelDescriptor.payloadMappings.implicitArgs.implicitArgsBuffer); *implicitArgsCrossThreadPtr = implicitArgsGpuVA; - ptr = NEO::ImplicitArgsHelper::patchImplicitArgs(ptr, *pImplicitArgs, kernelDescriptor, {}, gfxCoreHelper); + ptr = NEO::ImplicitArgsHelper::patchImplicitArgs(ptr, *pImplicitArgs, kernelDescriptor, {}); } memcpy_s(ptr, sizeCrossThreadData, diff --git a/shared/source/command_container/command_encoder_xehp_and_later.inl b/shared/source/command_container/command_encoder_xehp_and_later.inl index a14be7c67f..d6e711509e 100644 --- a/shared/source/command_container/command_encoder_xehp_and_later.inl +++ b/shared/source/command_container/command_encoder_xehp_and_later.inl @@ -218,7 +218,7 @@ void EncodeDispatchKernel::encode(CommandContainer &container, EncodeDis if (pImplicitArgs) { offsetThreadData -= sizeof(ImplicitArgs); pImplicitArgs->localIdTablePtr = heap->getGraphicsAllocation()->getGpuAddress() + heap->getUsed() - iohRequiredSize; - ptr = NEO::ImplicitArgsHelper::patchImplicitArgs(ptr, *pImplicitArgs, kernelDescriptor, std::make_pair(localIdsGenerationByRuntime, requiredWorkgroupOrder), gfxCoreHelper); + ptr = NEO::ImplicitArgsHelper::patchImplicitArgs(ptr, *pImplicitArgs, kernelDescriptor, std::make_pair(localIdsGenerationByRuntime, requiredWorkgroupOrder)); } if (sizeCrossThreadData > 0) { diff --git a/shared/source/helpers/aarch64/local_id_gen.cpp b/shared/source/helpers/aarch64/local_id_gen.cpp index 4169785bf7..8a3cd04ea7 100644 --- a/shared/source/helpers/aarch64/local_id_gen.cpp +++ b/shared/source/helpers/aarch64/local_id_gen.cpp @@ -8,7 +8,6 @@ #include "shared/source/helpers/local_id_gen.h" #include "shared/source/helpers/aligned_memory.h" -#include "shared/source/helpers/gfx_core_helper.h" #include "shared/source/helpers/local_id_gen_special.inl" #include "shared/source/utilities/cpu_info.h" @@ -42,9 +41,8 @@ LocalIDHelper::LocalIDHelper() { LocalIDHelper LocalIDHelper::initializer; -void generateLocalIDs(void *buffer, uint16_t simd, const std::array &localWorkgroupSize, const std::array &dimensionsOrder, bool isImageOnlyKernel, uint32_t grfSize, const GfxCoreHelper &gfxCoreHelper) { - bool localIdsGeneratedByHw = false; - auto threadsPerWorkGroup = static_cast(gfxCoreHelper.calculateNumThreadsPerThreadGroup(simd, static_cast(localWorkgroupSize[0] * localWorkgroupSize[1] * localWorkgroupSize[2]), grfSize, localIdsGeneratedByHw)); +void generateLocalIDs(void *buffer, uint16_t simd, const std::array &localWorkgroupSize, const std::array &dimensionsOrder, bool isImageOnlyKernel, uint32_t grfSize) { + auto threadsPerWorkGroup = static_cast(getThreadsPerWG(simd, static_cast(localWorkgroupSize[0] * localWorkgroupSize[1] * localWorkgroupSize[2]))); bool useLayoutForImages = isImageOnlyKernel && isCompatibleWithLayoutForImages(localWorkgroupSize, dimensionsOrder, simd); if (useLayoutForImages) { generateLocalIDsWithLayoutForImages(buffer, localWorkgroupSize, simd); diff --git a/shared/source/helpers/local_id_gen.h b/shared/source/helpers/local_id_gen.h index ab88965571..94fb33dd5e 100644 --- a/shared/source/helpers/local_id_gen.h +++ b/shared/source/helpers/local_id_gen.h @@ -12,7 +12,6 @@ #include namespace NEO { -class GfxCoreHelper; inline uint32_t getGRFsPerThread(uint32_t simd, uint32_t grfSize) { return (simd == 32 && grfSize == 32) ? 2 : 1; } @@ -59,7 +58,7 @@ void generateLocalIDsSimd(void *b, const std::array &localWorkgroup const std::array &dimensionsOrder, bool chooseMaxRowSize); void generateLocalIDs(void *buffer, uint16_t simd, const std::array &localWorkgroupSize, - const std::array &dimensionsOrder, bool isImageOnlyKernel, uint32_t grfSize, const NEO::GfxCoreHelper &gfxCoreHelper); + const std::array &dimensionsOrder, bool isImageOnlyKernel, uint32_t grfSize); void generateLocalIDsWithLayoutForImages(void *b, const std::array &localWorkgroupSize, uint16_t simd); bool isCompatibleWithLayoutForImages(const std::array &localWorkgroupSize, const std::array &dimensionsOrder, uint16_t simd); diff --git a/shared/source/helpers/x86_64/local_id_gen.cpp b/shared/source/helpers/x86_64/local_id_gen.cpp index 3c7a9def07..18c464284f 100644 --- a/shared/source/helpers/x86_64/local_id_gen.cpp +++ b/shared/source/helpers/x86_64/local_id_gen.cpp @@ -8,7 +8,6 @@ #include "shared/source/helpers/local_id_gen.h" #include "shared/source/helpers/aligned_memory.h" -#include "shared/source/helpers/gfx_core_helper.h" #include "shared/source/helpers/local_id_gen_special.inl" #include "shared/source/utilities/cpu_info.h" @@ -45,9 +44,8 @@ LocalIDHelper::LocalIDHelper() { LocalIDHelper LocalIDHelper::initializer; // traditional function to generate local IDs -void generateLocalIDs(void *buffer, uint16_t simd, const std::array &localWorkgroupSize, const std::array &dimensionsOrder, bool isImageOnlyKernel, uint32_t grfSize, const GfxCoreHelper &gfxCoreHelper) { - bool localIdsGeneratedByHw = false; - auto threadsPerWorkGroup = static_cast(gfxCoreHelper.calculateNumThreadsPerThreadGroup(simd, static_cast(localWorkgroupSize[0] * localWorkgroupSize[1] * localWorkgroupSize[2]), grfSize, localIdsGeneratedByHw)); +void generateLocalIDs(void *buffer, uint16_t simd, const std::array &localWorkgroupSize, const std::array &dimensionsOrder, bool isImageOnlyKernel, uint32_t grfSize) { + auto threadsPerWorkGroup = static_cast(getThreadsPerWG(simd, static_cast(localWorkgroupSize[0] * localWorkgroupSize[1] * localWorkgroupSize[2]))); bool useLayoutForImages = isImageOnlyKernel && isCompatibleWithLayoutForImages(localWorkgroupSize, dimensionsOrder, simd); if (useLayoutForImages) { generateLocalIDsWithLayoutForImages(buffer, localWorkgroupSize, simd); diff --git a/shared/source/kernel/implicit_args.h b/shared/source/kernel/implicit_args.h index 9046809586..1eb6090cc7 100644 --- a/shared/source/kernel/implicit_args.h +++ b/shared/source/kernel/implicit_args.h @@ -15,7 +15,6 @@ namespace NEO { struct KernelDescriptor; -class GfxCoreHelper; struct ImplicitArgs { uint8_t structSize; @@ -52,6 +51,6 @@ namespace ImplicitArgsHelper { std::array getDimensionOrderForLocalIds(const uint8_t *workgroupDimensionsOrder, std::optional> hwGenerationOfLocalIdsParams); uint32_t getGrfSize(uint32_t simd); uint32_t getSizeForImplicitArgsPatching(const ImplicitArgs *pImplicitArgs, const KernelDescriptor &kernelDescriptor); -void *patchImplicitArgs(void *ptrToPatch, const ImplicitArgs &implicitArgs, const KernelDescriptor &kernelDescriptor, std::optional> hwGenerationOfLocalIdsParams, const GfxCoreHelper &gfxCoreHelper); +void *patchImplicitArgs(void *ptrToPatch, const ImplicitArgs &implicitArgs, const KernelDescriptor &kernelDescriptor, std::optional> hwGenerationOfLocalIdsParams); } // namespace ImplicitArgsHelper } // namespace NEO diff --git a/shared/source/kernel/implicit_args_helper.cpp b/shared/source/kernel/implicit_args_helper.cpp index 7b0b7ecdc8..24f783a705 100644 --- a/shared/source/kernel/implicit_args_helper.cpp +++ b/shared/source/kernel/implicit_args_helper.cpp @@ -62,7 +62,7 @@ uint32_t getSizeForImplicitArgsPatching(const ImplicitArgs *pImplicitArgs, const } } -void *patchImplicitArgs(void *ptrToPatch, const ImplicitArgs &implicitArgs, const KernelDescriptor &kernelDescriptor, std::optional> hwGenerationOfLocalIdsParams, const GfxCoreHelper &gfxCoreHelper) { +void *patchImplicitArgs(void *ptrToPatch, const ImplicitArgs &implicitArgs, const KernelDescriptor &kernelDescriptor, std::optional> hwGenerationOfLocalIdsParams) { auto totalSizeToProgram = getSizeForImplicitArgsPatching(&implicitArgs, kernelDescriptor); auto retVal = ptrOffset(ptrToPatch, totalSizeToProgram); @@ -80,7 +80,7 @@ void *patchImplicitArgs(void *ptrToPatch, const ImplicitArgs &implicitArgs, cons static_cast(implicitArgs.localSizeY), static_cast(implicitArgs.localSizeZ)}}, dimensionOrder, - false, grfSize, gfxCoreHelper); + false, grfSize); auto sizeForLocalIdsProgramming = totalSizeToProgram - sizeof(NEO::ImplicitArgs); ptrToPatch = ptrOffset(ptrToPatch, sizeForLocalIdsProgramming); } diff --git a/shared/source/kernel/local_ids_cache.cpp b/shared/source/kernel/local_ids_cache.cpp index f678b72fbb..6fb149297a 100644 --- a/shared/source/kernel/local_ids_cache.cpp +++ b/shared/source/kernel/local_ids_cache.cpp @@ -47,7 +47,7 @@ void LocalIdsCache::setLocalIdsForEntry(LocalIdsCacheEntry &entry, void *destina std::memcpy(destination, entry.localIdsData, entry.localIdsSize); } -void LocalIdsCache::setLocalIdsForGroup(const Vec3 &group, void *destination, const GfxCoreHelper &gfxCoreHelper) { +void LocalIdsCache::setLocalIdsForGroup(const Vec3 &group, void *destination) { auto setLocalIdsLock = lock(); LocalIdsCacheEntry *leastAccessedEntry = &cache[0]; for (auto &cacheEntry : cache) { @@ -60,11 +60,11 @@ void LocalIdsCache::setLocalIdsForGroup(const Vec3 &group, void *desti } } - commitNewEntry(*leastAccessedEntry, group, gfxCoreHelper); + commitNewEntry(*leastAccessedEntry, group); setLocalIdsForEntry(*leastAccessedEntry, destination); } -void LocalIdsCache::commitNewEntry(LocalIdsCacheEntry &entry, const Vec3 &group, const GfxCoreHelper &gfxCoreHelper) { +void LocalIdsCache::commitNewEntry(LocalIdsCacheEntry &entry, const Vec3 &group) { entry.localIdsSize = getLocalIdsSizeForGroup(group); entry.groupSize = group; entry.accessCounter = 0U; @@ -74,7 +74,7 @@ void LocalIdsCache::commitNewEntry(LocalIdsCacheEntry &entry, const Vec3(simdSize), - {group[0], group[1], group[2]}, wgDimOrder, usesOnlyImages, grfSize, gfxCoreHelper); + {group[0], group[1], group[2]}, wgDimOrder, usesOnlyImages, grfSize); } } // namespace NEO \ No newline at end of file diff --git a/shared/source/kernel/local_ids_cache.h b/shared/source/kernel/local_ids_cache.h index 26955ed6b0..f7fb37d262 100644 --- a/shared/source/kernel/local_ids_cache.h +++ b/shared/source/kernel/local_ids_cache.h @@ -12,7 +12,7 @@ #include namespace NEO { -class GfxCoreHelper; + class LocalIdsCache { public: struct LocalIdsCacheEntry { @@ -30,13 +30,13 @@ class LocalIdsCache { LocalIdsCache(size_t cacheSize, std::array wgDimOrder, uint8_t simdSize, uint8_t grfSize, bool usesOnlyImages = false); ~LocalIdsCache(); - void setLocalIdsForGroup(const Vec3 &group, void *destination, const GfxCoreHelper &gfxCoreHelper); + void setLocalIdsForGroup(const Vec3 &group, void *destination); size_t getLocalIdsSizeForGroup(const Vec3 &group) const; size_t getLocalIdsSizePerThread() const; protected: void setLocalIdsForEntry(LocalIdsCacheEntry &entry, void *destination); - void commitNewEntry(LocalIdsCacheEntry &entry, const Vec3 &group, const GfxCoreHelper &gfxCoreHelper); + void commitNewEntry(LocalIdsCacheEntry &entry, const Vec3 &group); std::unique_lock lock(); StackVec cache; diff --git a/shared/test/unit_test/helpers/local_id_tests.cpp b/shared/test/unit_test/helpers/local_id_tests.cpp index 661277ce9b..4cbf58c52e 100644 --- a/shared/test/unit_test/helpers/local_id_tests.cpp +++ b/shared/test/unit_test/helpers/local_id_tests.cpp @@ -7,10 +7,8 @@ #include "shared/source/helpers/aligned_memory.h" #include "shared/source/helpers/basic_math.h" -#include "shared/source/helpers/gfx_core_helper.h" #include "shared/source/helpers/local_id_gen.h" #include "shared/source/helpers/ptr_math.h" -#include "shared/test/common/helpers/default_hw_info.h" #include "shared/test/common/helpers/unit_test_helper.h" #include "shared/test/common/test_macros/hw_test.h" @@ -77,16 +75,14 @@ TEST(LocalID, GivenSimd1WhenGettingPerThreadSizeLocalIdsThenValueIsEqualGrfSize) EXPECT_EQ(grfSize, getPerThreadSizeLocalIDs(simd, grfSize)); } - -TEST(LocalIdTest, givenVariadicGrfSizeWhenLocalSizesAreEmittedThenUseFullRowSize) { +TEST(LocalID, givenVariadicGrfSizeWhenLocalSizesAreEmittedThenUseFullRowSize) { auto localIdsPtr = allocateAlignedMemory(3 * 64u, MemoryConstants::cacheLineSize); uint16_t *localIdsView = reinterpret_cast(localIdsPtr.get()); std::array localSizes = {{2u, 2u, 1u}}; std::array dimensionsOrder = {{0u, 1u, 2u}}; - auto gfxCoreHelper = GfxCoreHelper::create(defaultHwInfo->platform.eRenderCoreFamily); - generateLocalIDs(localIdsPtr.get(), 16u, localSizes, dimensionsOrder, false, 64u, *gfxCoreHelper.get()); + generateLocalIDs(localIdsPtr.get(), 16u, localSizes, dimensionsOrder, false, 64u); EXPECT_EQ(localIdsView[0], 0u); EXPECT_EQ(localIdsView[1], 1u); EXPECT_EQ(localIdsView[2], 0u); @@ -281,42 +277,37 @@ struct LocalIDFixture : ::testing::TestWithParamplatform.eRenderCoreFamily); generateLocalIDs(buffer, simd, std::array{{static_cast(localWorkSizeX), static_cast(localWorkSizeY), static_cast(localWorkSizeZ)}}, - std::array{{0, 1, 2}}, false, grfSize, *gfxCoreHelper.get()); + std::array{{0, 1, 2}}, false, grfSize); validateIDWithinLimits(simd, localWorkSizeX, localWorkSizeY, localWorkSizeZ, UnitTestHelper::useFullRowForLocalIdsGeneration); } HWTEST_P(LocalIDFixture, WhenGeneratingLocalIdsThenAllWorkItemsCovered) { - auto gfxCoreHelper = GfxCoreHelper::create(defaultHwInfo->platform.eRenderCoreFamily); generateLocalIDs(buffer, simd, std::array{{static_cast(localWorkSizeX), static_cast(localWorkSizeY), static_cast(localWorkSizeZ)}}, - std::array{{0, 1, 2}}, false, grfSize, *gfxCoreHelper.get()); + std::array{{0, 1, 2}}, false, grfSize); validateAllWorkItemsCovered(simd, localWorkSizeX, localWorkSizeY, localWorkSizeZ, UnitTestHelper::useFullRowForLocalIdsGeneration); } HWTEST_P(LocalIDFixture, WhenWalkOrderIsXyzThenProperLocalIdsAreGenerated) { auto dimensionsOrder = std::array{{0, 1, 2}}; - auto gfxCoreHelper = GfxCoreHelper::create(defaultHwInfo->platform.eRenderCoreFamily); generateLocalIDs(buffer, simd, std::array{{static_cast(localWorkSizeX), static_cast(localWorkSizeY), static_cast(localWorkSizeZ)}}, - dimensionsOrder, false, grfSize, *gfxCoreHelper.get()); + dimensionsOrder, false, grfSize); validateAllWorkItemsCovered(simd, localWorkSizeX, localWorkSizeY, localWorkSizeZ, UnitTestHelper::useFullRowForLocalIdsGeneration); validateWalkOrder(simd, localWorkSizeX, localWorkSizeY, localWorkSizeZ, dimensionsOrder); } HWTEST_P(LocalIDFixture, WhenWalkOrderIsYxzThenProperLocalIdsAreGenerated) { auto dimensionsOrder = std::array{{1, 0, 2}}; - auto gfxCoreHelper = GfxCoreHelper::create(defaultHwInfo->platform.eRenderCoreFamily); generateLocalIDs(buffer, simd, std::array{{static_cast(localWorkSizeX), static_cast(localWorkSizeY), static_cast(localWorkSizeZ)}}, - dimensionsOrder, false, grfSize, *gfxCoreHelper.get()); + dimensionsOrder, false, grfSize); validateAllWorkItemsCovered(simd, localWorkSizeX, localWorkSizeY, localWorkSizeZ, UnitTestHelper::useFullRowForLocalIdsGeneration); validateWalkOrder(simd, localWorkSizeX, localWorkSizeY, localWorkSizeZ, dimensionsOrder); } HWTEST_P(LocalIDFixture, WhenWalkOrderIsZyxThenProperLocalIdsAreGenerated) { auto dimensionsOrder = std::array{{2, 1, 0}}; - auto gfxCoreHelper = GfxCoreHelper::create(defaultHwInfo->platform.eRenderCoreFamily); generateLocalIDs(buffer, simd, std::array{{static_cast(localWorkSizeX), static_cast(localWorkSizeY), static_cast(localWorkSizeZ)}}, - dimensionsOrder, false, grfSize, *gfxCoreHelper.get()); + dimensionsOrder, false, grfSize); validateAllWorkItemsCovered(simd, localWorkSizeX, localWorkSizeY, localWorkSizeZ, UnitTestHelper::useFullRowForLocalIdsGeneration); validateWalkOrder(simd, localWorkSizeX, localWorkSizeY, localWorkSizeZ, dimensionsOrder); } @@ -344,8 +335,8 @@ struct LocalIdsLayoutForImagesTest : ::testing::TestWithParam(memory.get()); EXPECT_TRUE(isCompatibleWithLayoutForImages(localWorkSize, dimensionsOrder, simd)); - auto gfxCoreHelper = GfxCoreHelper::create(defaultHwInfo->platform.eRenderCoreFamily); - generateLocalIDs(buffer, simd, localWorkSize, dimensionsOrder, true, grfSize, *gfxCoreHelper.get()); + generateLocalIDs(buffer, simd, localWorkSize, dimensionsOrder, true, grfSize); } void validateGRF() { uint32_t totalLocalIds = localWorkSize.at(0) * localWorkSize.at(1); @@ -457,9 +447,9 @@ TEST_P(LocalIdsLayoutTest, givenLocalWorkgroupSize4x4x1WhenGenerateLocalIdsThenH auto alignedMemory2 = allocateAlignedMemory(size, 32); auto buffer2 = reinterpret_cast(alignedMemory2.get()); memset(buffer2, 0xff, size); - auto gfxCoreHelper = GfxCoreHelper::create(defaultHwInfo->platform.eRenderCoreFamily); - generateLocalIDs(buffer1, simd, localWorkSize, dimensionsOrder, false, grfSize, *gfxCoreHelper.get()); - generateLocalIDs(buffer2, simd, localWorkSize, dimensionsOrder, true, grfSize, *gfxCoreHelper.get()); + + generateLocalIDs(buffer1, simd, localWorkSize, dimensionsOrder, false, grfSize); + generateLocalIDs(buffer2, simd, localWorkSize, dimensionsOrder, true, grfSize); for (auto i = 0u; i < elemsInBuffer / rowWidth; i++) { for (auto j = 0u; j < rowWidth; j++) { diff --git a/shared/test/unit_test/kernel/implicit_args_helper_tests.cpp b/shared/test/unit_test/kernel/implicit_args_helper_tests.cpp index e5401b893d..aefe5d7590 100644 --- a/shared/test/unit_test/kernel/implicit_args_helper_tests.cpp +++ b/shared/test/unit_test/kernel/implicit_args_helper_tests.cpp @@ -6,13 +6,11 @@ */ #include "shared/source/helpers/aligned_memory.h" -#include "shared/source/helpers/gfx_core_helper.h" #include "shared/source/helpers/hw_walk_order.h" #include "shared/source/helpers/per_thread_data.h" #include "shared/source/helpers/ptr_math.h" #include "shared/source/kernel/implicit_args.h" #include "shared/source/kernel/kernel_descriptor.h" -#include "shared/test/common/helpers/default_hw_info.h" #include "shared/test/common/test_macros/hw_test.h" using namespace NEO; @@ -119,8 +117,8 @@ TEST(ImplicitArgsHelperTest, givenImplicitArgsWithoutImplicitArgsBufferOffsetInP uint8_t pattern = 0xcd; memset(memoryToPatch.get(), pattern, totalSizeForPatching); - auto gfxCoreHelper = GfxCoreHelper::create(defaultHwInfo->platform.eRenderCoreFamily); - auto retVal = ImplicitArgsHelper::patchImplicitArgs(memoryToPatch.get(), implicitArgs, kernelDescriptor, {}, *gfxCoreHelper.get()); + + auto retVal = ImplicitArgsHelper::patchImplicitArgs(memoryToPatch.get(), implicitArgs, kernelDescriptor, {}); EXPECT_EQ(retVal, ptrOffset(memoryToPatch.get(), totalSizeForPatching)); @@ -160,8 +158,8 @@ TEST(ImplicitArgsHelperTest, givenImplicitArgsWithImplicitArgsBufferOffsetInPayl uint8_t pattern = 0xcd; memset(memoryToPatch.get(), pattern, totalSizeForPatching); - auto gfxCoreHelper = GfxCoreHelper::create(defaultHwInfo->platform.eRenderCoreFamily); - auto retVal = ImplicitArgsHelper::patchImplicitArgs(memoryToPatch.get(), implicitArgs, kernelDescriptor, {}, *gfxCoreHelper.get()); + + auto retVal = ImplicitArgsHelper::patchImplicitArgs(memoryToPatch.get(), implicitArgs, kernelDescriptor, {}); EXPECT_EQ(retVal, ptrOffset(memoryToPatch.get(), totalSizeForPatching)); diff --git a/shared/test/unit_test/kernel/local_ids_cache_tests.cpp b/shared/test/unit_test/kernel/local_ids_cache_tests.cpp index 602832f91b..89f585ddb8 100644 --- a/shared/test/unit_test/kernel/local_ids_cache_tests.cpp +++ b/shared/test/unit_test/kernel/local_ids_cache_tests.cpp @@ -7,11 +7,8 @@ #include "shared/source/command_stream/linear_stream.h" #include "shared/source/helpers/aligned_memory.h" -#include "shared/source/helpers/gfx_core_helper.h" -#include "shared/source/helpers/hw_info.h" #include "shared/source/helpers/per_thread_data.h" #include "shared/source/kernel/local_ids_cache.h" -#include "shared/test/common/helpers/default_hw_info.h" #include "shared/test/common/mocks/mock_graphics_allocation.h" #include "shared/test/common/test_macros/test.h" @@ -38,8 +35,7 @@ using LocalIdsCacheTest = Test; TEST_F(LocalIdsCacheTest, GivenCacheMissWhenGetLocalIdsForGroupThenNewEntryIsCommitedIntoLeastUsedEntry) { localIdsCache->cache.resize(2); localIdsCache->cache[0].accessCounter = 2U; - auto gfxCoreHelper = NEO::GfxCoreHelper::create(NEO::defaultHwInfo->platform.eRenderCoreFamily); - localIdsCache->setLocalIdsForGroup(groupSize, perThreadData.data(), *gfxCoreHelper.get()); + localIdsCache->setLocalIdsForGroup(groupSize, perThreadData.data()); EXPECT_EQ(groupSize, localIdsCache->cache[1].groupSize); EXPECT_NE(nullptr, localIdsCache->cache[1].localIdsData); @@ -54,8 +50,7 @@ TEST_F(LocalIdsCacheTest, GivenEntryInCacheWhenGetLocalIdsForGroupThenEntryFromC localIdsCache->cache[0].localIdsSize = 512U; localIdsCache->cache[0].localIdsSizeAllocated = 512U; localIdsCache->cache[0].accessCounter = 1U; - auto gfxCoreHelper = NEO::GfxCoreHelper::create(NEO::defaultHwInfo->platform.eRenderCoreFamily); - localIdsCache->setLocalIdsForGroup(groupSize, perThreadData.data(), *gfxCoreHelper.get()); + localIdsCache->setLocalIdsForGroup(groupSize, perThreadData.data()); EXPECT_EQ(2U, localIdsCache->cache[0].accessCounter); } @@ -68,8 +63,7 @@ TEST_F(LocalIdsCacheTest, GivenEntryWithBiggerBufferAllocatedWhenGetLocalIdsForG const auto localIdsData = localIdsCache->cache[0].localIdsData; groupSize = {2, 1, 1}; - auto gfxCoreHelper = NEO::GfxCoreHelper::create(NEO::defaultHwInfo->platform.eRenderCoreFamily); - localIdsCache->setLocalIdsForGroup(groupSize, perThreadData.data(), *gfxCoreHelper.get()); + localIdsCache->setLocalIdsForGroup(groupSize, perThreadData.data()); EXPECT_EQ(1U, localIdsCache->cache[0].accessCounter); EXPECT_EQ(192U, localIdsCache->cache[0].localIdsSize); EXPECT_EQ(512U, localIdsCache->cache[0].localIdsSizeAllocated);