/* * Copyright (C) 2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/helpers/basic_math.h" #include "shared/source/helpers/hw_info.h" #include "shared/source/helpers/hw_walk_order.h" #include "shared/source/helpers/per_thread_data.h" #include "shared/source/helpers/string.h" #include "shared/source/helpers/vec.h" #include "shared/source/kernel/implicit_args.h" #include "shared/source/kernel/kernel_descriptor.h" namespace NEO { namespace ImplicitArgsHelper { std::array getDimensionOrderForLocalIds(const uint8_t *workgroupDimensionsOrder, std::optional> hwGenerationOfLocalIdsParams) { auto localIdsGeneratedByRuntime = !hwGenerationOfLocalIdsParams.has_value() || hwGenerationOfLocalIdsParams.value().first; if (localIdsGeneratedByRuntime) { UNRECOVERABLE_IF(!workgroupDimensionsOrder); return {{ workgroupDimensionsOrder[0], workgroupDimensionsOrder[1], workgroupDimensionsOrder[2], }}; } auto walkOrderForHwGenerationOfLocalIds = hwGenerationOfLocalIdsParams.value().second; UNRECOVERABLE_IF(walkOrderForHwGenerationOfLocalIds >= HwWalkOrderHelper::walkOrderPossibilties); return HwWalkOrderHelper::compatibleDimensionOrders[walkOrderForHwGenerationOfLocalIds]; } uint32_t getGrfSize(uint32_t simd, uint32_t grfSize) { if (simd == 1u) { return 3 * sizeof(uint16_t); } return grfSize; } uint32_t getSizeForImplicitArgsPatching(const ImplicitArgs *pImplicitArgs, const KernelDescriptor &kernelDescriptor, const HardwareInfo &hardwareInfo) { if (!pImplicitArgs) { return 0; } auto implicitArgsSize = static_cast(sizeof(NEO::ImplicitArgs)); auto patchImplicitArgsBufferInCrossThread = NEO::isValidOffset<>(kernelDescriptor.payloadMappings.implicitArgs.implicitArgsBuffer); if (patchImplicitArgsBufferInCrossThread) { return alignUp(implicitArgsSize, MemoryConstants::cacheLineSize); } else { auto simdSize = pImplicitArgs->simdWidth; auto grfSize = NEO::ImplicitArgsHelper::getGrfSize(simdSize, hardwareInfo.capabilityTable.grfSize); Vec3 localWorkSize = {pImplicitArgs->localSizeX, pImplicitArgs->localSizeY, pImplicitArgs->localSizeZ}; auto itemsInGroup = Math::computeTotalElementsCount(localWorkSize); uint32_t localIdsSizeNeeded = alignUp(static_cast(NEO::PerThreadDataHelper::getPerThreadDataSizeTotal( simdSize, grfSize, 3u, itemsInGroup)), MemoryConstants::cacheLineSize); return implicitArgsSize + localIdsSizeNeeded; } } void *patchImplicitArgs(void *ptrToPatch, const ImplicitArgs &implicitArgs, const KernelDescriptor &kernelDescriptor, const HardwareInfo &hardwareInfo, std::optional> hwGenerationOfLocalIdsParams) { auto totalSizeToProgram = getSizeForImplicitArgsPatching(&implicitArgs, kernelDescriptor, hardwareInfo); auto retVal = ptrOffset(ptrToPatch, totalSizeToProgram); auto patchImplicitArgsBufferInCrossThread = NEO::isValidOffset<>(kernelDescriptor.payloadMappings.implicitArgs.implicitArgsBuffer); if (!patchImplicitArgsBufferInCrossThread) { auto simdSize = implicitArgs.simdWidth; auto grfSize = getGrfSize(simdSize, hardwareInfo.capabilityTable.grfSize); auto dimensionOrder = getDimensionOrderForLocalIds(kernelDescriptor.kernelAttributes.workgroupDimensionsOrder, hwGenerationOfLocalIdsParams); NEO::generateLocalIDs( ptrToPatch, simdSize, std::array{{static_cast(implicitArgs.localSizeX), static_cast(implicitArgs.localSizeY), static_cast(implicitArgs.localSizeZ)}}, dimensionOrder, false, grfSize); auto sizeForLocalIdsProgramming = totalSizeToProgram - sizeof(NEO::ImplicitArgs); ptrToPatch = ptrOffset(ptrToPatch, sizeForLocalIdsProgramming); } memcpy_s(ptrToPatch, sizeof(NEO::ImplicitArgs), &implicitArgs, sizeof(NEO::ImplicitArgs)); return retVal; } } // namespace ImplicitArgsHelper } // namespace NEO