/* * Copyright (C) 2018-2019 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "runtime/helpers/kernel_commands.h" #include "runtime/kernel/kernel.h" namespace OCLRT { template typename KernelCommandsHelper::INTERFACE_DESCRIPTOR_DATA *KernelCommandsHelper::getInterfaceDescriptor( const IndirectHeap &indirectHeap, uint64_t offsetInterfaceDescriptor, KernelCommandsHelper::INTERFACE_DESCRIPTOR_DATA *inlineInterfaceDescriptor) { return static_cast(ptrOffset(indirectHeap.getCpuBase(), (size_t)offsetInterfaceDescriptor)); } template void KernelCommandsHelper::setAdditionalInfo( INTERFACE_DESCRIPTOR_DATA *pInterfaceDescriptor, const size_t &sizeCrossThreadData, const size_t &sizePerThreadData) { DEBUG_BREAK_IF((sizeCrossThreadData % sizeof(GRF)) != 0); auto numGrfCrossThreadData = static_cast(sizeCrossThreadData / sizeof(GRF)); DEBUG_BREAK_IF(numGrfCrossThreadData == 0); pInterfaceDescriptor->setCrossThreadConstantDataReadLength(numGrfCrossThreadData); DEBUG_BREAK_IF((sizePerThreadData % sizeof(GRF)) != 0); auto numGrfPerThreadData = static_cast(sizePerThreadData / sizeof(GRF)); // at least 1 GRF of perThreadData for each thread in a thread group when sizeCrossThreadData != 0 numGrfPerThreadData = std::max(numGrfPerThreadData, 1u); pInterfaceDescriptor->setConstantIndirectUrbEntryReadLength(numGrfPerThreadData); } template uint32_t KernelCommandsHelper::additionalSizeRequiredDsh() { return sizeof(INTERFACE_DESCRIPTOR_DATA); } template size_t KernelCommandsHelper::getSizeRequiredCS(const Kernel *kernel) { size_t size = 2 * sizeof(typename GfxFamily::MEDIA_STATE_FLUSH) + sizeof(typename GfxFamily::MEDIA_INTERFACE_DESCRIPTOR_LOAD); return size; } template size_t KernelCommandsHelper::getSizeRequiredForCacheFlush(const CommandQueue &commandQueue, const Kernel *kernel, uint64_t postSyncAddress, uint64_t postSyncData) { return kernel->requiresCacheFlushCommand(commandQueue) ? sizeof(typename GfxFamily::PIPE_CONTROL) : 0; } template void KernelCommandsHelper::sendMediaStateFlush( LinearStream &commandStream, size_t offsetInterfaceDescriptorData) { typedef typename GfxFamily::MEDIA_STATE_FLUSH MEDIA_STATE_FLUSH; auto pCmd = (MEDIA_STATE_FLUSH *)commandStream.getSpace(sizeof(MEDIA_STATE_FLUSH)); *pCmd = GfxFamily::cmdInitMediaStateFlush; pCmd->setInterfaceDescriptorOffset((uint32_t)offsetInterfaceDescriptorData); } template void KernelCommandsHelper::sendMediaInterfaceDescriptorLoad( LinearStream &commandStream, size_t offsetInterfaceDescriptorData, size_t sizeInterfaceDescriptorData) { { typedef typename GfxFamily::MEDIA_STATE_FLUSH MEDIA_STATE_FLUSH; auto pCmd = (MEDIA_STATE_FLUSH *)commandStream.getSpace(sizeof(MEDIA_STATE_FLUSH)); *pCmd = GfxFamily::cmdInitMediaStateFlush; } { typedef typename GfxFamily::MEDIA_INTERFACE_DESCRIPTOR_LOAD MEDIA_INTERFACE_DESCRIPTOR_LOAD; auto pCmd = (MEDIA_INTERFACE_DESCRIPTOR_LOAD *)commandStream.getSpace(sizeof(MEDIA_INTERFACE_DESCRIPTOR_LOAD)); *pCmd = GfxFamily::cmdInitMediaInterfaceDescriptorLoad; pCmd->setInterfaceDescriptorDataStartAddress((uint32_t)offsetInterfaceDescriptorData); pCmd->setInterfaceDescriptorTotalLength((uint32_t)sizeInterfaceDescriptorData); } } template void KernelCommandsHelper::setKernelStartOffset( uint64_t &kernelStartOffset, bool kernelAllocation, const KernelInfo &kernelInfo, const bool &localIdsGenerationByRuntime, const bool &kernelUsesLocalIds, Kernel &kernel) { if (kernelAllocation) { kernelStartOffset = kernelInfo.getGraphicsAllocation()->getGpuAddressToPatch(); } kernelStartOffset += kernel.getStartOffset(); } template void KernelCommandsHelper::programPerThreadData( size_t &sizePerThreadData, const bool &localIdsGenerationByRuntime, LinearStream &ioh, uint32_t &simd, uint32_t &numChannels, const size_t localWorkSize[3], Kernel &kernel, size_t &sizePerThreadDataTotal, size_t &localWorkItems) { sendPerThreadData( ioh, simd, numChannels, localWorkSize, kernel.getKernelInfo().workgroupDimensionsOrder, kernel.usesOnlyImages()); updatePerThreadDataTotal(sizePerThreadData, simd, numChannels, sizePerThreadDataTotal, localWorkItems); } template size_t KernelCommandsHelper::sendCrossThreadData( IndirectHeap &indirectHeap, Kernel &kernel, bool inlineDataProgrammingRequired, WALKER_TYPE *walkerCmd, uint32_t &sizeCrossThreadData) { indirectHeap.align(WALKER_TYPE::INDIRECTDATASTARTADDRESS_ALIGN_SIZE); auto offsetCrossThreadData = indirectHeap.getUsed(); char *pDest = static_cast(indirectHeap.getSpace(sizeCrossThreadData)); memcpy_s(pDest, sizeCrossThreadData, kernel.getCrossThreadData(), sizeCrossThreadData); if (DebugManager.flags.AddPatchInfoCommentsForAUBDump.get()) { FlatBatchBufferHelper::fixCrossThreadDataInfo(kernel.getPatchInfoDataList(), offsetCrossThreadData, indirectHeap.getGraphicsAllocation()->getGpuAddress()); } return offsetCrossThreadData + static_cast(indirectHeap.getHeapGpuStartOffset()); } template bool KernelCommandsHelper::resetBindingTablePrefetch(Kernel &kernel) { return kernel.isSchedulerKernel || !doBindingTablePrefetch(); } template void KernelCommandsHelper::setInterfaceDescriptorOffset( WALKER_TYPE *walkerCmd, uint32_t &interfaceDescriptorIndex) { walkerCmd->setInterfaceDescriptorOffset(interfaceDescriptorIndex++); } template bool KernelCommandsHelper::isRuntimeLocalIdsGenerationRequired(uint32_t workDim, size_t *gws, size_t *lws) { return true; } template void KernelCommandsHelper::programCacheFlushAfterWalkerCommand(LinearStream *commandStream, const CommandQueue &commandQueue, const Kernel *kernel, uint64_t postSyncAddress, uint64_t postSyncData) { if (kernel->requiresCacheFlushCommand(commandQueue)) { using PIPE_CONTROL = typename GfxFamily::PIPE_CONTROL; auto pipeControl = reinterpret_cast(commandStream->getSpace(sizeof(PIPE_CONTROL))); *pipeControl = GfxFamily::cmdInitPipeControl; pipeControl->setCommandStreamerStallEnable(true); pipeControl->setDcFlushEnable(true); } } } // namespace OCLRT