/* * Copyright (C) 2017-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/source/built_ins/builtins_dispatch_builder.h" #include "shared/source/built_ins/built_ins.h" #include "shared/source/built_ins/sip.h" #include "shared/source/compiler_interface/compiler_interface.h" #include "shared/source/helpers/basic_math.h" #include "shared/source/helpers/built_ins_helper.h" #include "shared/source/helpers/debug_helpers.h" #include "opencl/source/built_ins/aux_translation_builtin.h" #include "opencl/source/built_ins/built_ins.inl" #include "opencl/source/built_ins/vme_dispatch_builder.h" #include "opencl/source/cl_device/cl_device.h" #include "opencl/source/helpers/convert_color.h" #include "opencl/source/helpers/dispatch_info_builder.h" #include "opencl/source/kernel/kernel.h" #include "opencl/source/mem_obj/image.h" #include "opencl/source/program/program.h" #include "compiler_options.h" #include #include namespace NEO { template <> class BuiltInOp : public BuiltinDispatchInfoBuilder { public: BuiltInOp(BuiltIns &kernelsLib, ClDevice &device) : BuiltInOp(kernelsLib, device, true) {} template bool buildDispatchInfosTyped(MultiDispatchInfo &multiDispatchInfo) const { DispatchInfoBuilder kernelSplit1DBuilder(clDevice); auto &operationParams = multiDispatchInfo.peekBuiltinOpParams(); uintptr_t start = reinterpret_cast(operationParams.dstPtr) + operationParams.dstOffset.x; size_t middleAlignment = MemoryConstants::cacheLineSize; size_t middleElSize = sizeof(uint32_t) * 4; uintptr_t leftSize = start % middleAlignment; leftSize = (leftSize > 0) ? (middleAlignment - leftSize) : 0; // calc left leftover size leftSize = std::min(leftSize, operationParams.size.x); // clamp left leftover size to requested size uintptr_t rightSize = (start + operationParams.size.x) % middleAlignment; // calc right leftover size rightSize = std::min(rightSize, operationParams.size.x - leftSize); // clamp uintptr_t middleSizeBytes = operationParams.size.x - leftSize - rightSize; // calc middle size // corner case - fully optimized kernel requires DWORD alignment. If we don't have it, run slower, misaligned kernel const auto srcMiddleStart = reinterpret_cast(operationParams.srcPtr) + operationParams.srcOffset.x + leftSize; const auto srcMisalignment = srcMiddleStart % sizeof(uint32_t); const auto isSrcMisaligned = srcMisalignment != 0u; auto middleSizeEls = middleSizeBytes / middleElSize; // num work items in middle walker // Set-up ISA kernelSplit1DBuilder.setKernel(SplitDispatch::RegionCoordX::Left, kernLeftLeftover); if (isSrcMisaligned) { kernelSplit1DBuilder.setKernel(SplitDispatch::RegionCoordX::Middle, kernMiddleMisaligned); } else { kernelSplit1DBuilder.setKernel(SplitDispatch::RegionCoordX::Middle, kernMiddle); } kernelSplit1DBuilder.setKernel(SplitDispatch::RegionCoordX::Right, kernRightLeftover); // Set-up common kernel args if (operationParams.srcSvmAlloc) { kernelSplit1DBuilder.setArgSvmAlloc(0, operationParams.srcPtr, operationParams.srcSvmAlloc); } else if (operationParams.srcMemObj) { kernelSplit1DBuilder.setArg(0, operationParams.srcMemObj); } else { kernelSplit1DBuilder.setArgSvm(0, operationParams.size.x + operationParams.srcOffset.x, operationParams.srcPtr, nullptr, CL_MEM_READ_ONLY); } if (operationParams.dstSvmAlloc) { kernelSplit1DBuilder.setArgSvmAlloc(1, operationParams.dstPtr, operationParams.dstSvmAlloc); } else if (operationParams.dstMemObj) { kernelSplit1DBuilder.setArg(1, operationParams.dstMemObj); } else { kernelSplit1DBuilder.setArgSvm(1, operationParams.size.x + operationParams.dstOffset.x, operationParams.dstPtr, nullptr, 0u); } kernelSplit1DBuilder.setUnifiedMemorySyncRequirement(operationParams.unifiedMemoryArgsRequireMemSync); // Set-up srcOffset kernelSplit1DBuilder.setArg(SplitDispatch::RegionCoordX::Left, 2, static_cast(operationParams.srcOffset.x)); kernelSplit1DBuilder.setArg(SplitDispatch::RegionCoordX::Middle, 2, static_cast(operationParams.srcOffset.x + leftSize)); kernelSplit1DBuilder.setArg(SplitDispatch::RegionCoordX::Right, 2, static_cast(operationParams.srcOffset.x + leftSize + middleSizeBytes)); // Set-up dstOffset kernelSplit1DBuilder.setArg(SplitDispatch::RegionCoordX::Left, 3, static_cast(operationParams.dstOffset.x)); kernelSplit1DBuilder.setArg(SplitDispatch::RegionCoordX::Middle, 3, static_cast(operationParams.dstOffset.x + leftSize)); kernelSplit1DBuilder.setArg(SplitDispatch::RegionCoordX::Right, 3, static_cast(operationParams.dstOffset.x + leftSize + middleSizeBytes)); if (isSrcMisaligned) { kernelSplit1DBuilder.setArg(SplitDispatch::RegionCoordX::Middle, 4, static_cast(srcMisalignment * 8)); } // Set-up work sizes // Note for split walker, it would be just builder.SetDipatchGeometry(GWS, ELWS, OFFSET) kernelSplit1DBuilder.setDispatchGeometry(SplitDispatch::RegionCoordX::Left, Vec3{leftSize, 0, 0}, Vec3{0, 0, 0}, Vec3{0, 0, 0}); kernelSplit1DBuilder.setDispatchGeometry(SplitDispatch::RegionCoordX::Middle, Vec3{middleSizeEls, 0, 0}, Vec3{0, 0, 0}, Vec3{0, 0, 0}); kernelSplit1DBuilder.setDispatchGeometry(SplitDispatch::RegionCoordX::Right, Vec3{rightSize, 0, 0}, Vec3{0, 0, 0}, Vec3{0, 0, 0}); kernelSplit1DBuilder.bake(multiDispatchInfo); return true; } bool buildDispatchInfos(MultiDispatchInfo &multiDispatchInfo) const override { return buildDispatchInfosTyped(multiDispatchInfo); } protected: Kernel *kernLeftLeftover = nullptr; Kernel *kernMiddle = nullptr; Kernel *kernMiddleMisaligned = nullptr; Kernel *kernRightLeftover = nullptr; BuiltInOp(BuiltIns &kernelsLib, ClDevice &device, bool populateKernels) : BuiltinDispatchInfoBuilder(kernelsLib, device) { if (populateKernels) { populate(EBuiltInOps::CopyBufferToBuffer, "", "CopyBufferToBufferLeftLeftover", kernLeftLeftover, "CopyBufferToBufferMiddle", kernMiddle, "CopyBufferToBufferMiddleMisaligned", kernMiddleMisaligned, "CopyBufferToBufferRightLeftover", kernRightLeftover); } } }; template <> class BuiltInOp : public BuiltInOp { public: BuiltInOp(BuiltIns &kernelsLib, ClDevice &device) : BuiltInOp(kernelsLib, device, false) { populate(EBuiltInOps::CopyBufferToBufferStateless, CompilerOptions::greaterThan4gbBuffersRequired, "CopyBufferToBufferLeftLeftover", kernLeftLeftover, "CopyBufferToBufferMiddle", kernMiddle, "CopyBufferToBufferMiddleMisaligned", kernMiddleMisaligned, "CopyBufferToBufferRightLeftover", kernRightLeftover); } bool buildDispatchInfos(MultiDispatchInfo &multiDispatchInfo) const override { return buildDispatchInfosTyped(multiDispatchInfo); } }; template <> class BuiltInOp : public BuiltinDispatchInfoBuilder { public: BuiltInOp(BuiltIns &kernelsLib, ClDevice &device) : BuiltInOp(kernelsLib, device, true) {} template bool buildDispatchInfosTyped(MultiDispatchInfo &multiDispatchInfo) const { DispatchInfoBuilder kernelNoSplit3DBuilder(clDevice); auto &operationParams = multiDispatchInfo.peekBuiltinOpParams(); size_t hostPtrSize = 0; bool is3D = false; if (operationParams.srcMemObj && operationParams.dstMemObj) { DEBUG_BREAK_IF(!((operationParams.srcPtr == nullptr) && (operationParams.dstPtr == nullptr))); is3D = (operationParams.size.z > 1) || (operationParams.srcOffset.z > 0) || (operationParams.dstOffset.z > 0); } else { if (operationParams.srcPtr) { size_t origin[] = {operationParams.srcOffset.x, operationParams.srcOffset.y, operationParams.srcOffset.z}; size_t region[] = {operationParams.size.x, operationParams.size.y, operationParams.size.z}; hostPtrSize = Buffer::calculateHostPtrSize(origin, region, operationParams.srcRowPitch, operationParams.srcSlicePitch); is3D = (operationParams.size.z > 1) || (operationParams.dstOffset.z > 0); } else if (operationParams.dstPtr) { size_t origin[] = {operationParams.dstOffset.x, operationParams.dstOffset.y, operationParams.dstOffset.z}; size_t region[] = {operationParams.size.x, operationParams.size.y, operationParams.size.z}; hostPtrSize = Buffer::calculateHostPtrSize(origin, region, operationParams.dstRowPitch, operationParams.dstSlicePitch); is3D = (operationParams.size.z > 1) || (operationParams.srcOffset.z > 0); } else { DEBUG_BREAK_IF(!false); } } // Set-up ISA int dimensions = is3D ? 3 : 2; kernelNoSplit3DBuilder.setKernel(kernelBytes[dimensions - 1]); size_t srcOffsetFromAlignedPtr = 0; size_t dstOffsetFromAlignedPtr = 0; // arg0 = src if (operationParams.srcMemObj) { kernelNoSplit3DBuilder.setArg(0, operationParams.srcMemObj); } else { void *srcPtrToSet = operationParams.srcPtr; if (!is3D) { auto srcPtr = ptrOffset(operationParams.srcPtr, operationParams.srcOffset.z * operationParams.srcSlicePitch); srcPtrToSet = alignDown(srcPtr, 4); srcOffsetFromAlignedPtr = ptrDiff(srcPtr, srcPtrToSet); } kernelNoSplit3DBuilder.setArgSvm(0, hostPtrSize, srcPtrToSet, nullptr, CL_MEM_READ_ONLY); } // arg1 = dst if (operationParams.dstMemObj) { kernelNoSplit3DBuilder.setArg(1, operationParams.dstMemObj); } else { void *dstPtrToSet = operationParams.dstPtr; if (!is3D) { auto dstPtr = ptrOffset(operationParams.dstPtr, operationParams.dstOffset.z * operationParams.dstSlicePitch); dstPtrToSet = alignDown(dstPtr, 4); dstOffsetFromAlignedPtr = ptrDiff(dstPtr, dstPtrToSet); } kernelNoSplit3DBuilder.setArgSvm(1, hostPtrSize, dstPtrToSet, nullptr, 0u); } // arg2 = srcOrigin OffsetType kSrcOrigin[4] = {static_cast(operationParams.srcOffset.x + srcOffsetFromAlignedPtr), static_cast(operationParams.srcOffset.y), static_cast(operationParams.srcOffset.z), 0}; kernelNoSplit3DBuilder.setArg(2, sizeof(OffsetType) * 4, kSrcOrigin); // arg3 = dstOrigin OffsetType kDstOrigin[4] = {static_cast(operationParams.dstOffset.x + dstOffsetFromAlignedPtr), static_cast(operationParams.dstOffset.y), static_cast(operationParams.dstOffset.z), 0}; kernelNoSplit3DBuilder.setArg(3, sizeof(OffsetType) * 4, kDstOrigin); // arg4 = srcPitch OffsetType kSrcPitch[2] = {static_cast(operationParams.srcRowPitch), static_cast(operationParams.srcSlicePitch)}; kernelNoSplit3DBuilder.setArg(4, sizeof(OffsetType) * 2, kSrcPitch); // arg5 = dstPitch OffsetType kDstPitch[2] = {static_cast(operationParams.dstRowPitch), static_cast(operationParams.dstSlicePitch)}; kernelNoSplit3DBuilder.setArg(5, sizeof(OffsetType) * 2, kDstPitch); // Set-up work sizes kernelNoSplit3DBuilder.setDispatchGeometry(operationParams.size, Vec3{0, 0, 0}, Vec3{0, 0, 0}); kernelNoSplit3DBuilder.bake(multiDispatchInfo); return true; } bool buildDispatchInfos(MultiDispatchInfo &multiDispatchInfo) const override { return buildDispatchInfosTyped(multiDispatchInfo); } protected: Kernel *kernelBytes[3]{}; BuiltInOp(BuiltIns &kernelsLib, ClDevice &device, bool populateKernels) : BuiltinDispatchInfoBuilder(kernelsLib, device) { if (populateKernels) { populate(EBuiltInOps::CopyBufferRect, "", "CopyBufferRectBytes2d", kernelBytes[0], "CopyBufferRectBytes2d", kernelBytes[1], "CopyBufferRectBytes3d", kernelBytes[2]); } } }; template <> class BuiltInOp : public BuiltInOp { public: BuiltInOp(BuiltIns &kernelsLib, ClDevice &device) : BuiltInOp(kernelsLib, device, false) { populate(EBuiltInOps::CopyBufferRectStateless, CompilerOptions::greaterThan4gbBuffersRequired, "CopyBufferRectBytes2d", kernelBytes[0], "CopyBufferRectBytes2d", kernelBytes[1], "CopyBufferRectBytes3d", kernelBytes[2]); } bool buildDispatchInfos(MultiDispatchInfo &multiDispatchInfo) const override { return buildDispatchInfosTyped(multiDispatchInfo); } }; template <> class BuiltInOp : public BuiltinDispatchInfoBuilder { public: BuiltInOp(BuiltIns &kernelsLib, ClDevice &device) : BuiltInOp(kernelsLib, device, true) {} template bool buildDispatchInfosTyped(MultiDispatchInfo &multiDispatchInfo) const { DispatchInfoBuilder kernelSplit1DBuilder(clDevice); auto &operationParams = multiDispatchInfo.peekBuiltinOpParams(); uintptr_t start = reinterpret_cast(operationParams.dstPtr) + operationParams.dstOffset.x; size_t middleAlignment = MemoryConstants::cacheLineSize; size_t middleElSize = sizeof(uint32_t); uintptr_t leftSize = start % middleAlignment; leftSize = (leftSize > 0) ? (middleAlignment - leftSize) : 0; // calc left leftover size leftSize = std::min(leftSize, operationParams.size.x); // clamp left leftover size to requested size uintptr_t rightSize = (start + operationParams.size.x) % middleAlignment; // calc right leftover size rightSize = std::min(rightSize, operationParams.size.x - leftSize); // clamp uintptr_t middleSizeBytes = operationParams.size.x - leftSize - rightSize; // calc middle size auto middleSizeEls = middleSizeBytes / middleElSize; // num work items in middle walker // Set-up ISA kernelSplit1DBuilder.setKernel(SplitDispatch::RegionCoordX::Left, kernLeftLeftover); kernelSplit1DBuilder.setKernel(SplitDispatch::RegionCoordX::Middle, kernMiddle); kernelSplit1DBuilder.setKernel(SplitDispatch::RegionCoordX::Right, kernRightLeftover); DEBUG_BREAK_IF((operationParams.srcMemObj == nullptr) || (operationParams.srcOffset != 0)); DEBUG_BREAK_IF((operationParams.dstMemObj == nullptr) && (operationParams.dstSvmAlloc == nullptr)); // Set-up dstMemObj with buffer if (operationParams.dstSvmAlloc) { kernelSplit1DBuilder.setArgSvmAlloc(0, operationParams.dstPtr, operationParams.dstSvmAlloc); } else { kernelSplit1DBuilder.setArg(0, operationParams.dstMemObj); } // Set-up dstOffset kernelSplit1DBuilder.setArg(SplitDispatch::RegionCoordX::Left, 1, static_cast(operationParams.dstOffset.x)); kernelSplit1DBuilder.setArg(SplitDispatch::RegionCoordX::Middle, 1, static_cast(operationParams.dstOffset.x + leftSize)); kernelSplit1DBuilder.setArg(SplitDispatch::RegionCoordX::Right, 1, static_cast(operationParams.dstOffset.x + leftSize + middleSizeBytes)); // Set-up srcMemObj with pattern auto graphicsAllocation = operationParams.srcMemObj->getMultiGraphicsAllocation().getDefaultGraphicsAllocation(); kernelSplit1DBuilder.setArgSvm(2, operationParams.srcMemObj->getSize(), graphicsAllocation->getUnderlyingBuffer(), graphicsAllocation, CL_MEM_READ_ONLY); // Set-up patternSizeInEls kernelSplit1DBuilder.setArg(SplitDispatch::RegionCoordX::Left, 3, static_cast(operationParams.srcMemObj->getSize())); kernelSplit1DBuilder.setArg(SplitDispatch::RegionCoordX::Middle, 3, static_cast(operationParams.srcMemObj->getSize() / middleElSize)); kernelSplit1DBuilder.setArg(SplitDispatch::RegionCoordX::Right, 3, static_cast(operationParams.srcMemObj->getSize())); // Set-up work sizes // Note for split walker, it would be just builder.SetDipatchGeomtry(GWS, ELWS, OFFSET) kernelSplit1DBuilder.setDispatchGeometry(SplitDispatch::RegionCoordX::Left, Vec3{leftSize, 0, 0}, Vec3{0, 0, 0}, Vec3{0, 0, 0}); kernelSplit1DBuilder.setDispatchGeometry(SplitDispatch::RegionCoordX::Middle, Vec3{middleSizeEls, 0, 0}, Vec3{0, 0, 0}, Vec3{0, 0, 0}); kernelSplit1DBuilder.setDispatchGeometry(SplitDispatch::RegionCoordX::Right, Vec3{rightSize, 0, 0}, Vec3{0, 0, 0}, Vec3{0, 0, 0}); kernelSplit1DBuilder.bake(multiDispatchInfo); return true; } bool buildDispatchInfos(MultiDispatchInfo &multiDispatchInfo) const override { return buildDispatchInfosTyped(multiDispatchInfo); } protected: Kernel *kernLeftLeftover = nullptr; Kernel *kernMiddle = nullptr; Kernel *kernRightLeftover = nullptr; BuiltInOp(BuiltIns &kernelsLib, ClDevice &device, bool populateKernels) : BuiltinDispatchInfoBuilder(kernelsLib, device) { if (populateKernels) { populate(EBuiltInOps::FillBuffer, "", "FillBufferLeftLeftover", kernLeftLeftover, "FillBufferMiddle", kernMiddle, "FillBufferRightLeftover", kernRightLeftover); } } }; template <> class BuiltInOp : public BuiltInOp { public: BuiltInOp(BuiltIns &kernelsLib, ClDevice &device) : BuiltInOp(kernelsLib, device, false) { populate(EBuiltInOps::FillBufferStateless, CompilerOptions::greaterThan4gbBuffersRequired, "FillBufferLeftLeftover", kernLeftLeftover, "FillBufferMiddle", kernMiddle, "FillBufferRightLeftover", kernRightLeftover); } bool buildDispatchInfos(MultiDispatchInfo &multiDispatchInfos) const override { return buildDispatchInfosTyped(multiDispatchInfos); } }; template <> class BuiltInOp : public BuiltinDispatchInfoBuilder { public: BuiltInOp(BuiltIns &kernelsLib, ClDevice &device) : BuiltInOp(kernelsLib, device, true) {} bool buildDispatchInfos(MultiDispatchInfo &multiDispatchInfo) const override { return buildDispatchInfosTyped(multiDispatchInfo); } protected: Kernel *kernelBytes[5] = {nullptr}; BuiltInOp(BuiltIns &kernelsLib, ClDevice &device, bool populateKernels) : BuiltinDispatchInfoBuilder(kernelsLib, device) { if (populateKernels) { populate(EBuiltInOps::CopyBufferToImage3d, "", "CopyBufferToImage3dBytes", kernelBytes[0], "CopyBufferToImage3d2Bytes", kernelBytes[1], "CopyBufferToImage3d4Bytes", kernelBytes[2], "CopyBufferToImage3d8Bytes", kernelBytes[3], "CopyBufferToImage3d16Bytes", kernelBytes[4]); } } template bool buildDispatchInfosTyped(MultiDispatchInfo &multiDispatchInfo) const { DispatchInfoBuilder kernelNoSplit3DBuilder(clDevice); auto &operationParams = multiDispatchInfo.peekBuiltinOpParams(); DEBUG_BREAK_IF(!(((operationParams.srcPtr != nullptr) || (operationParams.srcMemObj != nullptr)) && (operationParams.dstPtr == nullptr))); auto dstImage = castToObjectOrAbort(operationParams.dstMemObj); // Redescribe image to be byte-copy auto dstImageRedescribed = dstImage->redescribe(); multiDispatchInfo.pushRedescribedMemObj(std::unique_ptr(dstImageRedescribed)); // life range same as mdi's // Calculate srcRowPitch and srcSlicePitch auto bytesPerPixel = dstImage->getSurfaceFormatInfo().surfaceFormat.ImageElementSizeInBytes; size_t region[] = {operationParams.size.x, operationParams.size.y, operationParams.size.z}; auto srcRowPitch = operationParams.dstRowPitch ? operationParams.dstRowPitch : region[0] * bytesPerPixel; auto srcSlicePitch = operationParams.dstSlicePitch ? operationParams.dstSlicePitch : ((dstImage->getImageDesc().image_type == CL_MEM_OBJECT_IMAGE1D_ARRAY ? 1 : region[1]) * srcRowPitch); // Determine size of host ptr surface for residency purposes size_t hostPtrSize = operationParams.srcPtr ? Image::calculateHostPtrSize(region, srcRowPitch, srcSlicePitch, bytesPerPixel, dstImage->getImageDesc().image_type) : 0; hostPtrSize += operationParams.srcOffset.x; // Set-up kernel auto bytesExponent = Math::log2(bytesPerPixel); DEBUG_BREAK_IF(bytesExponent >= 5); kernelNoSplit3DBuilder.setKernel(kernelBytes[bytesExponent]); // Set-up source host ptr / buffer if (operationParams.srcPtr) { kernelNoSplit3DBuilder.setArgSvm(0, hostPtrSize, operationParams.srcPtr, nullptr, CL_MEM_READ_ONLY); } else { kernelNoSplit3DBuilder.setArg(0, operationParams.srcMemObj); } // Set-up destination image kernelNoSplit3DBuilder.setArg(1, dstImageRedescribed, operationParams.dstMipLevel); // Set-up srcOffset kernelNoSplit3DBuilder.setArg(2, static_cast(operationParams.srcOffset.x)); // Set-up dstOrigin { uint32_t origin[] = { static_cast(operationParams.dstOffset.x), static_cast(operationParams.dstOffset.y), static_cast(operationParams.dstOffset.z), 0}; kernelNoSplit3DBuilder.setArg(3, sizeof(origin), origin); } // Set-up srcRowPitch { OffsetType pitch[] = { static_cast(srcRowPitch), static_cast(srcSlicePitch)}; kernelNoSplit3DBuilder.setArg(4, sizeof(pitch), pitch); } // Set-up work sizes kernelNoSplit3DBuilder.setDispatchGeometry(operationParams.size, Vec3{0, 0, 0}, Vec3{0, 0, 0}); kernelNoSplit3DBuilder.bake(multiDispatchInfo); return true; } }; template <> class BuiltInOp : public BuiltInOp { public: BuiltInOp(BuiltIns &kernelsLib, ClDevice &device) : BuiltInOp(kernelsLib, device, false) { populate(EBuiltInOps::CopyBufferToImage3dStateless, CompilerOptions::greaterThan4gbBuffersRequired, "CopyBufferToImage3dBytes", kernelBytes[0], "CopyBufferToImage3d2Bytes", kernelBytes[1], "CopyBufferToImage3d4Bytes", kernelBytes[2], "CopyBufferToImage3d8Bytes", kernelBytes[3], "CopyBufferToImage3d16Bytes", kernelBytes[4]); } bool buildDispatchInfos(MultiDispatchInfo &multiDispatchInfo) const override { return buildDispatchInfosTyped(multiDispatchInfo); } }; template <> class BuiltInOp : public BuiltinDispatchInfoBuilder { public: BuiltInOp(BuiltIns &kernelsLib, ClDevice &device) : BuiltInOp(kernelsLib, device, true) {} bool buildDispatchInfos(MultiDispatchInfo &multiDispatchInfo) const override { return buildDispatchInfosTyped(multiDispatchInfo); } protected: Kernel *kernelBytes[5] = {nullptr}; BuiltInOp(BuiltIns &kernelsLib, ClDevice &device, bool populateKernels) : BuiltinDispatchInfoBuilder(kernelsLib, device) { if (populateKernels) { populate(EBuiltInOps::CopyImage3dToBuffer, "", "CopyImage3dToBufferBytes", kernelBytes[0], "CopyImage3dToBuffer2Bytes", kernelBytes[1], "CopyImage3dToBuffer4Bytes", kernelBytes[2], "CopyImage3dToBuffer8Bytes", kernelBytes[3], "CopyImage3dToBuffer16Bytes", kernelBytes[4]); } } template bool buildDispatchInfosTyped(MultiDispatchInfo &multiDispatchInfo) const { DispatchInfoBuilder kernelNoSplit3DBuilder(clDevice); auto &operationParams = multiDispatchInfo.peekBuiltinOpParams(); DEBUG_BREAK_IF(!((operationParams.srcPtr == nullptr) && ((operationParams.dstPtr != nullptr) || (operationParams.dstMemObj != nullptr)))); auto srcImage = castToObjectOrAbort(operationParams.srcMemObj); // Redescribe image to be byte-copy auto srcImageRedescribed = srcImage->redescribe(); multiDispatchInfo.pushRedescribedMemObj(std::unique_ptr(srcImageRedescribed)); // life range same as mdi's // Calculate dstRowPitch and dstSlicePitch auto bytesPerPixel = srcImage->getSurfaceFormatInfo().surfaceFormat.ImageElementSizeInBytes; size_t region[] = {operationParams.size.x, operationParams.size.y, operationParams.size.z}; auto dstRowPitch = operationParams.srcRowPitch ? operationParams.srcRowPitch : region[0] * bytesPerPixel; auto dstSlicePitch = operationParams.srcSlicePitch ? operationParams.srcSlicePitch : ((srcImage->getImageDesc().image_type == CL_MEM_OBJECT_IMAGE1D_ARRAY ? 1 : region[1]) * dstRowPitch); // Determine size of host ptr surface for residency purposes size_t hostPtrSize = operationParams.dstPtr ? Image::calculateHostPtrSize(region, dstRowPitch, dstSlicePitch, bytesPerPixel, srcImage->getImageDesc().image_type) : 0; hostPtrSize += operationParams.dstOffset.x; // Set-up ISA auto bytesExponent = Math::log2(bytesPerPixel); DEBUG_BREAK_IF(bytesExponent >= 5); kernelNoSplit3DBuilder.setKernel(kernelBytes[bytesExponent]); // Set-up source image kernelNoSplit3DBuilder.setArg(0, srcImageRedescribed, operationParams.srcMipLevel); // Set-up destination host ptr / buffer if (operationParams.dstPtr) { kernelNoSplit3DBuilder.setArgSvm(1, hostPtrSize, operationParams.dstPtr, nullptr, 0u); } else { kernelNoSplit3DBuilder.setArg(1, operationParams.dstMemObj); } // Set-up srcOrigin { uint32_t origin[] = { static_cast(operationParams.srcOffset.x), static_cast(operationParams.srcOffset.y), static_cast(operationParams.srcOffset.z), 0}; kernelNoSplit3DBuilder.setArg(2, sizeof(origin), origin); } // Set-up dstOffset kernelNoSplit3DBuilder.setArg(3, static_cast(operationParams.dstOffset.x)); // Set-up dstRowPitch { OffsetType pitch[] = { static_cast(dstRowPitch), static_cast(dstSlicePitch)}; kernelNoSplit3DBuilder.setArg(4, sizeof(pitch), pitch); } // Set-up work sizes kernelNoSplit3DBuilder.setDispatchGeometry(operationParams.size, Vec3{0, 0, 0}, Vec3{0, 0, 0}); kernelNoSplit3DBuilder.bake(multiDispatchInfo); return true; } }; template <> class BuiltInOp : public BuiltInOp { public: BuiltInOp(BuiltIns &kernelsLib, ClDevice &device) : BuiltInOp(kernelsLib, device, false) { populate(EBuiltInOps::CopyImage3dToBufferStateless, CompilerOptions::greaterThan4gbBuffersRequired, "CopyImage3dToBufferBytes", kernelBytes[0], "CopyImage3dToBuffer2Bytes", kernelBytes[1], "CopyImage3dToBuffer4Bytes", kernelBytes[2], "CopyImage3dToBuffer8Bytes", kernelBytes[3], "CopyImage3dToBuffer16Bytes", kernelBytes[4]); } bool buildDispatchInfos(MultiDispatchInfo &multiDispatchInfo) const override { return buildDispatchInfosTyped(multiDispatchInfo); } }; template <> class BuiltInOp : public BuiltinDispatchInfoBuilder { public: BuiltInOp(BuiltIns &kernelsLib, ClDevice &device) : BuiltinDispatchInfoBuilder(kernelsLib, device) { populate(EBuiltInOps::CopyImageToImage3d, "", "CopyImageToImage3d", kernel); } bool buildDispatchInfos(MultiDispatchInfo &multiDispatchInfo) const override { DispatchInfoBuilder kernelNoSplit3DBuilder(clDevice); auto &operationParams = multiDispatchInfo.peekBuiltinOpParams(); DEBUG_BREAK_IF(!((operationParams.srcPtr == nullptr) && (operationParams.dstPtr == nullptr))); auto srcImage = castToObjectOrAbort(operationParams.srcMemObj); auto dstImage = castToObjectOrAbort(operationParams.dstMemObj); // Redescribe images to be byte-copies auto srcImageRedescribed = srcImage->redescribe(); auto dstImageRedescribed = dstImage->redescribe(); multiDispatchInfo.pushRedescribedMemObj(std::unique_ptr(srcImageRedescribed)); // life range same as mdi's multiDispatchInfo.pushRedescribedMemObj(std::unique_ptr(dstImageRedescribed)); // life range same as mdi's // Set-up kernel kernelNoSplit3DBuilder.setKernel(kernel); // Set-up source image kernelNoSplit3DBuilder.setArg(0, srcImageRedescribed, operationParams.srcMipLevel); // Set-up destination image kernelNoSplit3DBuilder.setArg(1, dstImageRedescribed, operationParams.dstMipLevel); // Set-up srcOrigin { uint32_t origin[] = { static_cast(operationParams.srcOffset.x), static_cast(operationParams.srcOffset.y), static_cast(operationParams.srcOffset.z), 0}; kernelNoSplit3DBuilder.setArg(2, sizeof(origin), origin); } // Set-up dstOrigin { uint32_t origin[] = { static_cast(operationParams.dstOffset.x), static_cast(operationParams.dstOffset.y), static_cast(operationParams.dstOffset.z), 0}; kernelNoSplit3DBuilder.setArg(3, sizeof(origin), origin); } // Set-up work sizes kernelNoSplit3DBuilder.setDispatchGeometry(operationParams.size, Vec3{0, 0, 0}, Vec3{0, 0, 0}); kernelNoSplit3DBuilder.bake(multiDispatchInfo); return true; } protected: Kernel *kernel = nullptr; }; template <> class BuiltInOp : public BuiltinDispatchInfoBuilder { public: BuiltInOp(BuiltIns &kernelsLib, ClDevice &device) : BuiltinDispatchInfoBuilder(kernelsLib, device) { populate(EBuiltInOps::FillImage3d, "", "FillImage3d", kernel); } bool buildDispatchInfos(MultiDispatchInfo &multiDispatchInfo) const override { DispatchInfoBuilder kernelNoSplit3DBuilder(clDevice); auto &operationParams = multiDispatchInfo.peekBuiltinOpParams(); DEBUG_BREAK_IF(!((operationParams.srcMemObj == nullptr) && (operationParams.srcPtr != nullptr) && (operationParams.dstPtr == nullptr))); auto image = castToObjectOrAbort(operationParams.dstMemObj); // Redescribe image to be byte-copy auto imageRedescribed = image->redescribeFillImage(); multiDispatchInfo.pushRedescribedMemObj(std::unique_ptr(imageRedescribed)); // Set-up kernel kernelNoSplit3DBuilder.setKernel(kernel); // Set-up destination image kernelNoSplit3DBuilder.setArg(0, imageRedescribed); // Set-up fill color int iFillColor[4] = {0}; const void *fillColor = operationParams.srcPtr; convertFillColor(fillColor, iFillColor, image->getSurfaceFormatInfo().OCLImageFormat, imageRedescribed->getSurfaceFormatInfo().OCLImageFormat); kernelNoSplit3DBuilder.setArg(1, 4 * sizeof(int32_t), iFillColor); // Set-up dstOffset { uint32_t offset[] = { static_cast(operationParams.dstOffset.x), static_cast(operationParams.dstOffset.y), static_cast(operationParams.dstOffset.z), 0}; kernelNoSplit3DBuilder.setArg(2, sizeof(offset), offset); } // Set-up work sizes kernelNoSplit3DBuilder.setDispatchGeometry(operationParams.size, Vec3{0, 0, 0}, Vec3{0, 0, 0}); kernelNoSplit3DBuilder.bake(multiDispatchInfo); return true; } protected: Kernel *kernel = nullptr; }; BuiltinDispatchInfoBuilder &BuiltInDispatchBuilderOp::getBuiltinDispatchInfoBuilder(EBuiltInOps::Type operation, ClDevice &device) { uint32_t operationId = static_cast(operation); auto kernelsLib = device.getDevice().getBuiltIns(); auto &operationBuilder = kernelsLib->BuiltinOpsBuilders[operationId]; switch (operation) { case EBuiltInOps::CopyBufferToBuffer: std::call_once(operationBuilder.second, [&] { operationBuilder.first = std::make_unique>(*kernelsLib, device); }); break; case EBuiltInOps::CopyBufferToBufferStateless: std::call_once(operationBuilder.second, [&] { operationBuilder.first = std::make_unique>(*kernelsLib, device); }); break; case EBuiltInOps::CopyBufferRect: std::call_once(operationBuilder.second, [&] { operationBuilder.first = std::make_unique>(*kernelsLib, device); }); break; case EBuiltInOps::CopyBufferRectStateless: std::call_once(operationBuilder.second, [&] { operationBuilder.first = std::make_unique>(*kernelsLib, device); }); break; case EBuiltInOps::FillBuffer: std::call_once(operationBuilder.second, [&] { operationBuilder.first = std::make_unique>(*kernelsLib, device); }); break; case EBuiltInOps::FillBufferStateless: std::call_once(operationBuilder.second, [&] { operationBuilder.first = std::make_unique>(*kernelsLib, device); }); break; case EBuiltInOps::CopyBufferToImage3d: std::call_once(operationBuilder.second, [&] { operationBuilder.first = std::make_unique>(*kernelsLib, device); }); break; case EBuiltInOps::CopyBufferToImage3dStateless: std::call_once(operationBuilder.second, [&] { operationBuilder.first = std::make_unique>(*kernelsLib, device); }); break; case EBuiltInOps::CopyImage3dToBuffer: std::call_once(operationBuilder.second, [&] { operationBuilder.first = std::make_unique>(*kernelsLib, device); }); break; case EBuiltInOps::CopyImage3dToBufferStateless: std::call_once(operationBuilder.second, [&] { operationBuilder.first = std::make_unique>(*kernelsLib, device); }); break; case EBuiltInOps::CopyImageToImage3d: std::call_once(operationBuilder.second, [&] { operationBuilder.first = std::make_unique>(*kernelsLib, device); }); break; case EBuiltInOps::FillImage3d: std::call_once(operationBuilder.second, [&] { operationBuilder.first = std::make_unique>(*kernelsLib, device); }); break; case EBuiltInOps::AuxTranslation: std::call_once(operationBuilder.second, [&] { operationBuilder.first = std::make_unique>(*kernelsLib, device); }); break; default: return getUnknownDispatchInfoBuilder(operation, device); } return *operationBuilder.first; } BuiltInOwnershipWrapper::BuiltInOwnershipWrapper(BuiltinDispatchInfoBuilder &inputBuilder) { takeOwnership(inputBuilder); } BuiltInOwnershipWrapper::~BuiltInOwnershipWrapper() { if (builder) { for (auto &kernel : builder->peekUsedKernels()) { kernel->releaseOwnership(); } } } void BuiltInOwnershipWrapper::takeOwnership(BuiltinDispatchInfoBuilder &inputBuilder) { UNRECOVERABLE_IF(builder); builder = &inputBuilder; for (auto &kernel : builder->peekUsedKernels()) { kernel->takeOwnership(); } } std::unique_ptr BuiltinDispatchInfoBuilder::createProgramFromCode(const BuiltinCode &bc, const ClDeviceVector &deviceVector) { std::unique_ptr ret; const char *data = bc.resource.data(); size_t dataLen = bc.resource.size(); cl_int err = 0; switch (bc.type) { default: break; case BuiltinCode::ECodeType::Source: case BuiltinCode::ECodeType::Intermediate: ret.reset(Program::createBuiltInFromSource(data, nullptr, deviceVector, &err)); break; case BuiltinCode::ECodeType::Binary: ret.reset(Program::createBuiltInFromGenBinary(nullptr, deviceVector, data, dataLen, &err)); break; } return ret; } } // namespace NEO