/* * Copyright (C) 2018-2019 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "runtime/command_queue/hardware_interface.h" namespace OCLRT { template inline void HardwareInterface::getDefaultDshSpace( const size_t &offsetInterfaceDescriptorTable, CommandQueue &commandQueue, const MultiDispatchInfo &multiDispatchInfo, size_t &totalInterfaceDescriptorTableSize, Kernel *parentKernel, IndirectHeap *dsh, LinearStream *commandStream) { size_t numDispatches = multiDispatchInfo.size(); totalInterfaceDescriptorTableSize *= numDispatches; if (!parentKernel) { dsh->getSpace(totalInterfaceDescriptorTableSize); } else { dsh->getSpace(commandQueue.getContext().getDefaultDeviceQueue()->getDshOffset() - dsh->getUsed()); } } template inline void HardwareInterface::dispatchWorkarounds( LinearStream *commandStream, CommandQueue &commandQueue, Kernel &kernel, const bool &enable) { if (enable) { PreemptionHelper::applyPreemptionWaCmdsBegin(commandStream, commandQueue.getDevice()); // Implement enabling special WA DisableLSQCROPERFforOCL if needed GpgpuWalkerHelper::applyWADisableLSQCROPERFforOCL(commandStream, kernel, enable); } else { // Implement disabling special WA DisableLSQCROPERFforOCL if needed GpgpuWalkerHelper::applyWADisableLSQCROPERFforOCL(commandStream, kernel, enable); PreemptionHelper::applyPreemptionWaCmdsEnd(commandStream, commandQueue.getDevice()); } } template inline void HardwareInterface::dispatchProfilingPerfStartCommands( const DispatchInfo &dispatchInfo, const MultiDispatchInfo &multiDispatchInfo, TagNode *hwTimeStamps, HwPerfCounter *hwPerfCounter, LinearStream *commandStream, CommandQueue &commandQueue) { if (&dispatchInfo == &*multiDispatchInfo.begin()) { // If hwTimeStampAlloc is passed (not nullptr), then we know that profiling is enabled if (hwTimeStamps != nullptr) { GpgpuWalkerHelper::dispatchProfilingCommandsStart(*hwTimeStamps, commandStream); } if (hwPerfCounter != nullptr) { GpgpuWalkerHelper::dispatchPerfCountersCommandsStart(commandQueue, *hwPerfCounter, commandStream); } } } template inline void HardwareInterface::dispatchProfilingPerfEndCommands( TagNode *hwTimeStamps, HwPerfCounter *hwPerfCounter, LinearStream *commandStream, CommandQueue &commandQueue) { // If hwTimeStamps is passed (not nullptr), then we know that profiling is enabled if (hwTimeStamps != nullptr) { GpgpuWalkerHelper::dispatchProfilingCommandsEnd(*hwTimeStamps, commandStream); } if (hwPerfCounter != nullptr) { GpgpuWalkerHelper::dispatchPerfCountersCommandsEnd(commandQueue, *hwPerfCounter, commandStream); } } template inline void HardwareInterface::programWalker( LinearStream &commandStream, Kernel &kernel, CommandQueue &commandQueue, TimestampPacketContainer *currentTimestampPacketNodes, IndirectHeap &dsh, IndirectHeap &ioh, IndirectHeap &ssh, size_t globalWorkSizes[3], size_t localWorkSizes[3], PreemptionMode preemptionMode, size_t currentDispatchIndex, uint32_t &interfaceDescriptorIndex, const DispatchInfo &dispatchInfo, size_t offsetInterfaceDescriptorTable, Vec3 &numberOfWorkgroups, Vec3 &startOfWorkgroups) { auto walkerCmd = allocateWalkerSpace(commandStream, kernel); uint32_t dim = dispatchInfo.getDim(); uint32_t simd = kernel.getKernelInfo().getMaxSimdSize(); size_t globalOffsets[3] = {dispatchInfo.getOffset().x, dispatchInfo.getOffset().y, dispatchInfo.getOffset().z}; size_t startWorkGroups[3] = {startOfWorkgroups.x, startOfWorkgroups.y, startOfWorkgroups.z}; size_t numWorkGroups[3] = {numberOfWorkgroups.x, numberOfWorkgroups.y, numberOfWorkgroups.z}; if (currentTimestampPacketNodes && commandQueue.getCommandStreamReceiver().peekTimestampPacketWriteEnabled()) { auto timestampPacket = currentTimestampPacketNodes->peekNodes().at(currentDispatchIndex)->tag; GpgpuWalkerHelper::setupTimestampPacket(&commandStream, walkerCmd, timestampPacket, TimestampPacket::WriteOperationType::AfterWalker); } KernelCommandsHelper::sendIndirectState( commandStream, dsh, ioh, ssh, kernel, simd, localWorkSizes, offsetInterfaceDescriptorTable, interfaceDescriptorIndex, preemptionMode, walkerCmd, nullptr, true); GpgpuWalkerHelper::setGpgpuWalkerThreadData(walkerCmd, globalOffsets, startWorkGroups, numWorkGroups, localWorkSizes, simd, dim, false, false, *kernel.getKernelInfo().patchInfo.threadPayload); } } // namespace OCLRT