/* * Copyright (C) 2018 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "runtime/built_ins/aux_translation_builtin.h" #include "runtime/mem_obj/buffer.h" #include "runtime/kernel/kernel.h" namespace OCLRT { template void BuiltinDispatchInfoBuilder::populate(Context &context, Device &device, EBuiltInOps op, const char *options, KernelsDescArgsT &&... desc) { auto src = kernelsLib.getBuiltinsLib().getBuiltinCode(op, BuiltinCode::ECodeType::Any, device); prog.reset(BuiltinsLib::createProgramFromCode(src, context, device).release()); prog->build(0, nullptr, options, nullptr, nullptr, kernelsLib.isCacheingEnabled()); grabKernels(std::forward(desc)...); } template BuiltInOp::BuiltInOp(BuiltIns &kernelsLib, Context &context, Device &device) : BuiltinDispatchInfoBuilder(kernelsLib) { BuiltinDispatchInfoBuilder::populate(context, device, EBuiltInOps::AuxTranslation, "", "fullCopy", baseKernel); resizeKernelInstances(5); } template bool BuiltInOp::buildDispatchInfos(MultiDispatchInfo &multiDispatchInfo, const BuiltinOpParams &operationParams) const { size_t kernelInstanceNumber = 0; resizeKernelInstances(operationParams.buffersForAuxTranslation->size()); for (auto &buffer : *operationParams.buffersForAuxTranslation) { DispatchInfoBuilder builder; auto graphicsAllocation = buffer->getGraphicsAllocation(); size_t allocationSize = graphicsAllocation->getUnderlyingBufferSize(); if (AuxTranslationDirection::AuxToNonAux == operationParams.auxTranslationDirection) { builder.setKernel(convertToNonAuxKernel.at(kernelInstanceNumber++).get()); builder.setArg(0, buffer); builder.setArgSvm(1, allocationSize, reinterpret_cast(graphicsAllocation->getGpuAddress())); } else { UNRECOVERABLE_IF(AuxTranslationDirection::NonAuxToAux != operationParams.auxTranslationDirection); builder.setKernel(convertToAuxKernel.at(kernelInstanceNumber++).get()); builder.setArgSvm(0, allocationSize, reinterpret_cast(graphicsAllocation->getGpuAddress())); builder.setArg(1, buffer); } size_t elementSize = sizeof(uint32_t) * 4; DEBUG_BREAK_IF(allocationSize < elementSize || !isAligned<4>(allocationSize)); size_t xGws = allocationSize / elementSize; builder.setDispatchGeometry(Vec3{xGws, 0, 0}, Vec3{0, 0, 0}, Vec3{0, 0, 0}); builder.bake(multiDispatchInfo); } return true; } template void BuiltInOp::resizeKernelInstances(size_t size) const { convertToNonAuxKernel.reserve(size); convertToAuxKernel.reserve(size); for (size_t i = convertToNonAuxKernel.size(); i < size; i++) { auto clonedKernel1 = Kernel::create(baseKernel->getProgram(), baseKernel->getKernelInfo(), nullptr); auto clonedKernel2 = Kernel::create(baseKernel->getProgram(), baseKernel->getKernelInfo(), nullptr); clonedKernel1->cloneKernel(baseKernel); clonedKernel2->cloneKernel(baseKernel); convertToNonAuxKernel.emplace_back(clonedKernel1); convertToAuxKernel.emplace_back(clonedKernel2); } } } // namespace OCLRT