Files
compute-runtime/runtime/helpers/kernel_commands.h
Chodor, Jaroslaw 044fd1ab81 Fixing IntDescr programing for blocked cmd and MT
Fixing InterfaceDescriptor programming for
blocked commands when MidThread preemption is
enabled
Additionally, fixing couple of tests that block
global preemption enabling in ULTs

Change-Id: I454c9608f8606f23d7446785ac24c7c7d8701ae0
2018-01-17 12:19:07 +01:00

180 lines
7.5 KiB
C++

/*
* Copyright (c) 2017, Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
#pragma once
#include "runtime/built_ins/built_ins.h"
#include "runtime/device_queue/device_queue.h"
#include "runtime/helpers/basic_math.h"
#include "runtime/helpers/per_thread_data.h"
#include "runtime/indirect_heap/indirect_heap.h"
#include "runtime/kernel/kernel.h"
#include <cstdint>
#include <cstddef>
#include <algorithm>
namespace OCLRT {
class LinearStream;
class IndirectHeap;
struct CrossThreadInfo;
struct MultiDispatchInfo;
template <typename GfxFamily>
struct KernelCommandsHelper : public PerThreadDataHelper {
typedef typename GfxFamily::BINDING_TABLE_STATE BINDING_TABLE_STATE;
typedef typename GfxFamily::RENDER_SURFACE_STATE RENDER_SURFACE_STATE;
static uint32_t computeSlmValues(uint32_t valueIn);
static size_t copyKernelBinary(
IndirectHeap &indirectHeap,
const KernelInfo &kernelInfo);
static size_t sendInterfaceDescriptorData(
const IndirectHeap &indirectHeap,
uint64_t offsetInterfaceDescriptor,
uint64_t kernelStartOffset,
size_t sizeCrossThreadData,
size_t sizePerThreadData,
size_t bindingTablePointer,
size_t offsetSamplerState,
uint32_t numSamplers,
uint32_t threadsPerThreadGroup,
uint32_t sizeSlm,
bool barrierEnable);
static void sendMediaStateFlush(
LinearStream &commandStream,
size_t offsetInterfaceDescriptorData);
static void sendMediaInterfaceDescriptorLoad(
LinearStream &commandStream,
size_t offsetInterfaceDescriptorData,
size_t sizeInterfaceDescriptorData);
static size_t sendCrossThreadData(
IndirectHeap &indirectHeap,
const Kernel &kernel);
static size_t pushBindingTableAndSurfaceStates(IndirectHeap &dstHeap, const KernelInfo &srcKernelInfo,
const void *srcKernelSsh, size_t srcKernelSshSize);
static size_t pushBindingTableAndSurfaceStates(IndirectHeap &dstHeap, const KernelInfo &srcKernelInfo) {
return pushBindingTableAndSurfaceStates(dstHeap, srcKernelInfo, srcKernelInfo.heapInfo.pSsh,
srcKernelInfo.heapInfo.pKernelHeader->SurfaceStateHeapSize);
}
static size_t pushBindingTableAndSurfaceStates(IndirectHeap &dstHeap, const Kernel &srcKernel) {
return pushBindingTableAndSurfaceStates(dstHeap, srcKernel.getKernelInfo(),
srcKernel.getSurfaceStateHeap(), srcKernel.getSurfaceStateHeapSize());
}
static size_t sendIndirectState(
LinearStream &commandStream,
IndirectHeap &dsh,
IndirectHeap &ih,
size_t ihReservedBlockSize,
IndirectHeap &ioh,
IndirectHeap &ssh,
const Kernel &kernel,
uint32_t simd,
const size_t localWorkSize[3],
const uint64_t offsetInterfaceDescriptorTable,
const uint32_t interfaceDescriptorIndex);
static size_t getSizeRequiredCS();
static bool isPipeControlWArequired();
static size_t getSizeRequiredDSH(
const Kernel &kernel);
static size_t getSizeRequiredIH(
const Kernel &kernel);
static size_t getSizeRequiredIOH(
const Kernel &kernel,
size_t localWorkSize = 256);
static size_t getSizeRequiredSSH(
const Kernel &kernel);
static size_t getTotalSizeRequiredDSH(
const MultiDispatchInfo &multiDispatchInfo);
static size_t getTotalSizeRequiredIH(
const MultiDispatchInfo &multiDispatchInfo);
static size_t getTotalSizeRequiredIOH(
const MultiDispatchInfo &multiDispatchInfo,
size_t localWorkSize = 256);
static size_t getTotalSizeRequiredSSH(
const MultiDispatchInfo &multiDispatchInfo);
template <IndirectHeap::Type heapType>
static size_t getSizeRequiredForExecutionModel(const Kernel &kernel) {
typedef typename GfxFamily::BINDING_TABLE_STATE BINDING_TABLE_STATE;
size_t totalSize = 0;
if (kernel.isParentKernel) {
BlockKernelManager *blockManager = kernel.getProgram()->getBlockKernelManager();
uint32_t blockCount = static_cast<uint32_t>(blockManager->getCount());
uint32_t maxBindingTableCount = 0;
if (heapType == IndirectHeap::SURFACE_STATE || heapType == IndirectHeap::INSTRUCTION) {
if (heapType == IndirectHeap::SURFACE_STATE) {
totalSize = BINDING_TABLE_STATE::SURFACESTATEPOINTER_ALIGN_SIZE - 1;
} else {
totalSize = Kernel::kernelBinaryAlignement - 1;
}
for (uint32_t i = 0; i < blockCount; i++) {
const KernelInfo *pBlockInfo = blockManager->getBlockKernelInfo(i);
if (heapType == IndirectHeap::SURFACE_STATE) {
totalSize += pBlockInfo->heapInfo.pKernelHeader->SurfaceStateHeapSize;
totalSize = alignUp(totalSize, BINDING_TABLE_STATE::SURFACESTATEPOINTER_ALIGN_SIZE);
maxBindingTableCount = std::max(maxBindingTableCount, pBlockInfo->patchInfo.bindingTableState->Count);
} else {
totalSize += pBlockInfo->heapInfo.pKernelHeader->KernelHeapSize;
totalSize = alignUp(totalSize, Kernel::kernelBinaryAlignement);
}
}
}
if (heapType == IndirectHeap::INSTRUCTION || heapType == IndirectHeap::INDIRECT_OBJECT || heapType == IndirectHeap::SURFACE_STATE) {
BuiltIns &builtIns = BuiltIns::getInstance();
SchedulerKernel &scheduler = builtIns.getSchedulerKernel(kernel.getContext());
if (heapType == IndirectHeap::INSTRUCTION) {
totalSize += getSizeRequiredIH(scheduler);
} else if (heapType == IndirectHeap::INDIRECT_OBJECT) {
totalSize += getSizeRequiredIOH(scheduler);
} else {
totalSize += getSizeRequiredSSH(scheduler);
totalSize += maxBindingTableCount * sizeof(BINDING_TABLE_STATE) * DeviceQueue::interfaceDescriptorEntries;
totalSize = alignUp(totalSize, BINDING_TABLE_STATE::SURFACESTATEPOINTER_ALIGN_SIZE);
}
}
}
return totalSize;
}
static const size_t alignInterfaceDescriptorData = 64 * sizeof(uint8_t);
static const uint32_t alignIndirectStatePointer = 64 * sizeof(uint8_t);
};
} // namespace OCLRT