/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/built_ins/built_ins.h" #include "shared/source/helpers/hw_helper.h" #include "shared/source/memory_manager/graphics_allocation.h" #include "opencl/source/built_ins/builtins_dispatch_builder.h" #include "opencl/source/helpers/dispatch_info_builder.h" #include "opencl/source/kernel/kernel_objects_for_aux_translation.h" #include "opencl/source/mem_obj/buffer.h" #include "pipe_control_args.h" #include namespace NEO { template <> class BuiltInOp : public BuiltinDispatchInfoBuilder { public: BuiltInOp(BuiltIns &kernelsLib, ClDevice &device); template bool buildDispatchInfosForAuxTranslation(MultiDispatchInfo &multiDispatchInfo, const BuiltinOpParams &operationParams) const { size_t kernelInstanceNumber = 0; size_t numKernelObjectsToTranslate = multiDispatchInfo.getKernelObjsForAuxTranslation()->size(); resizeKernelInstances(numKernelObjectsToTranslate); multiDispatchInfo.setBuiltinOpParams(operationParams); for (auto &kernelObj : *multiDispatchInfo.getKernelObjsForAuxTranslation()) { DispatchInfoBuilder builder(clDevice); UNRECOVERABLE_IF(builder.getMaxNumDispatches() != 1); if (kernelInstanceNumber == 0) { // Before Kernel registerPipeControlProgramming(builder.getDispatchInfo(0).dispatchInitCommands, true); } if (kernelInstanceNumber == numKernelObjectsToTranslate - 1) { // After Kernel registerPipeControlProgramming(builder.getDispatchInfo(0).dispatchEpilogueCommands, false); } if (AuxTranslationDirection::AuxToNonAux == operationParams.auxTranslationDirection) { builder.setKernel(convertToNonAuxKernel[kernelInstanceNumber++].get()); } else { UNRECOVERABLE_IF(AuxTranslationDirection::NonAuxToAux != operationParams.auxTranslationDirection); builder.setKernel(convertToAuxKernel[kernelInstanceNumber++].get()); } size_t allocationSize = 0; if (kernelObj.type == KernelObjForAuxTranslation::Type::MEM_OBJ) { auto buffer = static_cast(kernelObj.object); builder.setArg(0, buffer); builder.setArg(1, buffer); allocationSize = alignUp(buffer->getSize(), 512); } else { DEBUG_BREAK_IF(kernelObj.type != KernelObjForAuxTranslation::Type::GFX_ALLOC); auto svmAlloc = static_cast(kernelObj.object); auto svmPtr = reinterpret_cast(svmAlloc->getGpuAddressToPatch()); builder.setArgSvmAlloc(0, svmPtr, svmAlloc); builder.setArgSvmAlloc(1, svmPtr, svmAlloc); allocationSize = alignUp(svmAlloc->getUnderlyingBufferSize(), 512); } size_t xGws = allocationSize / 16; builder.setDispatchGeometry(Vec3{xGws, 0, 0}, Vec3{0, 0, 0}, Vec3{0, 0, 0}); builder.bake(multiDispatchInfo); } return true; } protected: using RegisteredMethodDispatcherT = RegisteredMethodDispatcher; template static void dispatchPipeControl(LinearStream &linearStream, TimestampPacketDependencies *, const HardwareInfo &) { PipeControlArgs args(dcFlush); MemorySynchronizationCommands::addPipeControl(linearStream, args); } template static size_t getSizeForSinglePipeControl(size_t, const HardwareInfo &, bool) { return MemorySynchronizationCommands::getSizeForSinglePipeControl(); } template void registerPipeControlProgramming(RegisteredMethodDispatcherT &dispatcher, bool dcFlush) const { if (dcFlush) { dispatcher.registerMethod(this->dispatchPipeControl); } else { dispatcher.registerMethod(this->dispatchPipeControl); } dispatcher.registerCommandsSizeEstimationMethod(this->getSizeForSinglePipeControl); } void resizeKernelInstances(size_t size) const; MultiDeviceKernel *multiDeviceBaseKernel = nullptr; Kernel *baseKernel = nullptr; mutable std::vector> convertToNonAuxKernel; mutable std::vector> convertToAuxKernel; }; } // namespace NEO