2017-12-21 07:45:38 +08:00
|
|
|
/*
|
2019-01-10 00:07:11 +08:00
|
|
|
* Copyright (C) 2017-2019 Intel Corporation
|
2017-12-21 07:45:38 +08:00
|
|
|
*
|
2018-09-18 15:11:08 +08:00
|
|
|
* SPDX-License-Identifier: MIT
|
2017-12-21 07:45:38 +08:00
|
|
|
*
|
|
|
|
*/
|
|
|
|
|
|
|
|
#pragma once
|
2018-03-30 23:57:51 +08:00
|
|
|
#include "runtime/command_queue/gpgpu_walker.h"
|
2019-02-27 18:39:32 +08:00
|
|
|
#include "runtime/device_queue/device_queue_hw.h"
|
2019-02-13 00:27:13 +08:00
|
|
|
#include "runtime/helpers/hw_helper.h"
|
2017-12-21 07:45:38 +08:00
|
|
|
#include "runtime/helpers/kernel_commands.h"
|
2017-12-20 20:24:19 +08:00
|
|
|
#include "runtime/helpers/preamble.h"
|
2017-12-21 07:45:38 +08:00
|
|
|
#include "runtime/helpers/string.h"
|
|
|
|
#include "runtime/memory_manager/memory_manager.h"
|
2018-12-21 20:05:21 +08:00
|
|
|
#include "runtime/utilities/tag_allocator.h"
|
2017-12-21 07:45:38 +08:00
|
|
|
|
2019-03-26 18:59:46 +08:00
|
|
|
namespace NEO {
|
2017-12-21 07:45:38 +08:00
|
|
|
template <typename GfxFamily>
|
|
|
|
void DeviceQueueHw<GfxFamily>::allocateSlbBuffer() {
|
|
|
|
auto slbSize = getMinimumSlbSize() + getWaCommandsSize();
|
|
|
|
slbSize *= 128; //num of enqueues
|
2017-12-20 20:24:19 +08:00
|
|
|
slbSize += sizeof(MI_BATCH_BUFFER_START);
|
|
|
|
slbSize = alignUp(slbSize, MemoryConstants::pageSize);
|
|
|
|
slbSize += DeviceQueueHw<GfxFamily>::getExecutionModelCleanupSectionSize();
|
|
|
|
slbSize += (4 * MemoryConstants::pageSize); // +4 pages spec restriction
|
2017-12-21 07:45:38 +08:00
|
|
|
slbSize = alignUp(slbSize, MemoryConstants::pageSize);
|
|
|
|
|
2019-04-16 17:29:19 +08:00
|
|
|
slbBuffer = device->getMemoryManager()->allocateGraphicsMemoryWithProperties({slbSize, GraphicsAllocation::AllocationType::DEVICE_QUEUE_BUFFER});
|
2017-12-21 07:45:38 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
template <typename GfxFamily>
|
|
|
|
void DeviceQueueHw<GfxFamily>::resetDeviceQueue() {
|
|
|
|
auto &caps = device->getDeviceInfo();
|
|
|
|
auto igilEventPool = reinterpret_cast<IGIL_EventPool *>(eventPoolBuffer->getUnderlyingBuffer());
|
|
|
|
|
|
|
|
memset(eventPoolBuffer->getUnderlyingBuffer(), 0x0, eventPoolBuffer->getUnderlyingBufferSize());
|
|
|
|
igilEventPool->m_size = caps.maxOnDeviceEvents;
|
|
|
|
|
|
|
|
auto igilCmdQueue = reinterpret_cast<IGIL_CommandQueue *>(queueBuffer->getUnderlyingBuffer());
|
|
|
|
igilQueue = igilCmdQueue;
|
|
|
|
|
|
|
|
igilCmdQueue->m_controls.m_StackSize =
|
|
|
|
static_cast<uint32_t>((stackBuffer->getUnderlyingBufferSize() / sizeof(cl_uint)) - 1);
|
|
|
|
igilCmdQueue->m_controls.m_StackTop =
|
|
|
|
static_cast<uint32_t>((stackBuffer->getUnderlyingBufferSize() / sizeof(cl_uint)) - 1);
|
|
|
|
igilCmdQueue->m_controls.m_PreviousHead = IGIL_DEVICE_QUEUE_HEAD_INIT;
|
|
|
|
igilCmdQueue->m_controls.m_IDTAfterFirstPhase = 1;
|
|
|
|
igilCmdQueue->m_controls.m_CurrentIDToffset = 1;
|
|
|
|
igilCmdQueue->m_controls.m_PreviousStorageTop = static_cast<uint32_t>(queueStorageBuffer->getUnderlyingBufferSize());
|
|
|
|
igilCmdQueue->m_controls.m_PreviousStackTop =
|
|
|
|
static_cast<uint32_t>((stackBuffer->getUnderlyingBufferSize() / sizeof(cl_uint)) - 1);
|
|
|
|
igilCmdQueue->m_controls.m_DebugNextBlockID = 0xFFFFFFFF;
|
|
|
|
igilCmdQueue->m_controls.m_QstorageSize = static_cast<uint32_t>(queueStorageBuffer->getUnderlyingBufferSize());
|
|
|
|
igilCmdQueue->m_controls.m_QstorageTop = static_cast<uint32_t>(queueStorageBuffer->getUnderlyingBufferSize());
|
|
|
|
igilCmdQueue->m_controls.m_IsProfilingEnabled = static_cast<uint32_t>(isProfilingEnabled());
|
|
|
|
igilCmdQueue->m_controls.m_IsSimulation = static_cast<uint32_t>(device->isSimulation());
|
|
|
|
|
|
|
|
igilCmdQueue->m_controls.m_LastScheduleEventNumber = 0;
|
|
|
|
igilCmdQueue->m_controls.m_PreviousNumberOfQueues = 0;
|
|
|
|
igilCmdQueue->m_controls.m_EnqueueMarkerScheduled = 0;
|
|
|
|
igilCmdQueue->m_controls.m_SecondLevelBatchOffset = 0;
|
|
|
|
igilCmdQueue->m_controls.m_TotalNumberOfQueues = 0;
|
|
|
|
igilCmdQueue->m_controls.m_EventTimestampAddress = 0;
|
|
|
|
igilCmdQueue->m_controls.m_ErrorCode = 0;
|
|
|
|
igilCmdQueue->m_controls.m_CurrentScheduleEventNumber = 0;
|
|
|
|
igilCmdQueue->m_controls.m_DummyAtomicOperationPlaceholder = 0x00;
|
|
|
|
igilCmdQueue->m_controls.m_DebugNextBlockGWS = 0;
|
|
|
|
|
|
|
|
// set first stack element in surface at value "1", it protects Scheduler in corner case when StackTop is empty after Child execution
|
|
|
|
auto stack = static_cast<uint32_t *>(stackBuffer->getUnderlyingBuffer());
|
|
|
|
stack += ((stackBuffer->getUnderlyingBufferSize() / sizeof(cl_uint)) - 1);
|
|
|
|
*stack = 1;
|
|
|
|
|
|
|
|
igilCmdQueue->m_head = IGIL_DEVICE_QUEUE_HEAD_INIT;
|
|
|
|
igilCmdQueue->m_size = static_cast<uint32_t>(queueBuffer->getUnderlyingBufferSize() - sizeof(IGIL_CommandQueue));
|
|
|
|
igilCmdQueue->m_magic = IGIL_MAGIC_NUMBER;
|
|
|
|
|
|
|
|
igilCmdQueue->m_controls.m_SchedulerEarlyReturn = DebugManager.flags.SchedulerSimulationReturnInstance.get();
|
|
|
|
igilCmdQueue->m_controls.m_SchedulerEarlyReturnCounter = 0;
|
|
|
|
|
|
|
|
buildSlbDummyCommands();
|
|
|
|
|
|
|
|
igilCmdQueue->m_controls.m_SLBENDoffsetInBytes = -1;
|
|
|
|
|
|
|
|
igilCmdQueue->m_controls.m_CriticalSection = ExecutionModelCriticalSection::Free;
|
|
|
|
|
|
|
|
resetDSH();
|
|
|
|
}
|
|
|
|
|
|
|
|
template <typename GfxFamily>
|
|
|
|
size_t DeviceQueueHw<GfxFamily>::getMinimumSlbSize() {
|
2018-04-24 21:55:24 +08:00
|
|
|
using MEDIA_STATE_FLUSH = typename GfxFamily::MEDIA_STATE_FLUSH;
|
|
|
|
using MEDIA_INTERFACE_DESCRIPTOR_LOAD = typename GfxFamily::MEDIA_INTERFACE_DESCRIPTOR_LOAD;
|
|
|
|
using GPGPU_WALKER = typename GfxFamily::GPGPU_WALKER;
|
|
|
|
|
2017-12-21 07:45:38 +08:00
|
|
|
return sizeof(MEDIA_STATE_FLUSH) +
|
|
|
|
sizeof(MEDIA_INTERFACE_DESCRIPTOR_LOAD) +
|
|
|
|
sizeof(PIPE_CONTROL) +
|
|
|
|
sizeof(GPGPU_WALKER) +
|
|
|
|
sizeof(MEDIA_STATE_FLUSH) +
|
|
|
|
sizeof(PIPE_CONTROL) +
|
|
|
|
DeviceQueueHw<GfxFamily>::getCSPrefetchSize();
|
|
|
|
}
|
|
|
|
|
|
|
|
template <typename GfxFamily>
|
|
|
|
void DeviceQueueHw<GfxFamily>::initPipeControl(PIPE_CONTROL *pc) {
|
2019-01-18 00:10:12 +08:00
|
|
|
*pc = GfxFamily::cmdInitPipeControl;
|
2017-12-21 07:45:38 +08:00
|
|
|
pc->setStateCacheInvalidationEnable(0x1);
|
|
|
|
pc->setDcFlushEnable(true);
|
|
|
|
pc->setPipeControlFlushEnable(true);
|
|
|
|
pc->setTextureCacheInvalidationEnable(true);
|
|
|
|
pc->setCommandStreamerStallEnable(true);
|
|
|
|
}
|
|
|
|
|
|
|
|
template <typename GfxFamily>
|
|
|
|
void DeviceQueueHw<GfxFamily>::buildSlbDummyCommands() {
|
2018-04-24 21:55:24 +08:00
|
|
|
using MEDIA_STATE_FLUSH = typename GfxFamily::MEDIA_STATE_FLUSH;
|
|
|
|
using MEDIA_INTERFACE_DESCRIPTOR_LOAD = typename GfxFamily::MEDIA_INTERFACE_DESCRIPTOR_LOAD;
|
|
|
|
using GPGPU_WALKER = typename GfxFamily::GPGPU_WALKER;
|
|
|
|
|
2017-12-21 07:45:38 +08:00
|
|
|
auto igilCmdQueue = reinterpret_cast<IGIL_CommandQueue *>(queueBuffer->getUnderlyingBuffer());
|
|
|
|
auto slbEndOffset = igilCmdQueue->m_controls.m_SLBENDoffsetInBytes;
|
|
|
|
size_t commandsSize = getMinimumSlbSize() + getWaCommandsSize();
|
|
|
|
size_t numEnqueues = numberOfDeviceEnqueues;
|
|
|
|
|
|
|
|
// buildSlbDummyCommands is called from resetDeviceQueue() - reset slbCS each time
|
|
|
|
slbCS.replaceBuffer(slbBuffer->getUnderlyingBuffer(), slbBuffer->getUnderlyingBufferSize());
|
|
|
|
|
|
|
|
if (slbEndOffset >= 0) {
|
|
|
|
DEBUG_BREAK_IF(slbEndOffset % commandsSize != 0);
|
|
|
|
//We always overwrite at most one enqueue space with BB_START command pointing to cleanup section
|
|
|
|
//if SLBENDoffset is the at the end then BB_START added after scheduler did not corrupt anything so no need to regenerate
|
|
|
|
numEnqueues = (slbEndOffset == static_cast<int>(commandsSize)) ? 0 : 1;
|
|
|
|
slbCS.getSpace(slbEndOffset);
|
|
|
|
}
|
|
|
|
|
|
|
|
for (size_t i = 0; i < numEnqueues; i++) {
|
|
|
|
auto mediaStateFlush = slbCS.getSpaceForCmd<MEDIA_STATE_FLUSH>();
|
2019-01-18 00:10:12 +08:00
|
|
|
*mediaStateFlush = GfxFamily::cmdInitMediaStateFlush;
|
2017-12-21 07:45:38 +08:00
|
|
|
|
|
|
|
addArbCheckCmdWa();
|
|
|
|
|
|
|
|
addMiAtomicCmdWa((uint64_t)&igilCmdQueue->m_controls.m_DummyAtomicOperationPlaceholder);
|
|
|
|
|
|
|
|
auto mediaIdLoad = slbCS.getSpaceForCmd<MEDIA_INTERFACE_DESCRIPTOR_LOAD>();
|
2019-01-18 00:10:12 +08:00
|
|
|
*mediaIdLoad = GfxFamily::cmdInitMediaInterfaceDescriptorLoad;
|
2017-12-21 07:45:38 +08:00
|
|
|
mediaIdLoad->setInterfaceDescriptorTotalLength(2048);
|
|
|
|
|
|
|
|
auto dataStartAddress = colorCalcStateSize;
|
|
|
|
|
|
|
|
mediaIdLoad->setInterfaceDescriptorDataStartAddress(dataStartAddress + sizeof(INTERFACE_DESCRIPTOR_DATA) * schedulerIDIndex);
|
|
|
|
|
|
|
|
addLriCmdWa(true);
|
|
|
|
|
|
|
|
if (isProfilingEnabled()) {
|
|
|
|
addPipeControlCmdWa();
|
|
|
|
auto pipeControl = slbCS.getSpaceForCmd<PIPE_CONTROL>();
|
|
|
|
initPipeControl(pipeControl);
|
|
|
|
|
|
|
|
} else {
|
|
|
|
auto noop = slbCS.getSpace(sizeof(PIPE_CONTROL));
|
|
|
|
memset(noop, 0x0, sizeof(PIPE_CONTROL));
|
|
|
|
addPipeControlCmdWa(true);
|
|
|
|
}
|
|
|
|
|
|
|
|
auto gpgpuWalker = slbCS.getSpaceForCmd<GPGPU_WALKER>();
|
2019-01-18 00:10:12 +08:00
|
|
|
*gpgpuWalker = GfxFamily::cmdInitGpgpuWalker;
|
2017-12-21 07:45:38 +08:00
|
|
|
gpgpuWalker->setSimdSize(GPGPU_WALKER::SIMD_SIZE::SIMD_SIZE_SIMD16);
|
|
|
|
gpgpuWalker->setThreadGroupIdXDimension(1);
|
|
|
|
gpgpuWalker->setThreadGroupIdYDimension(1);
|
|
|
|
gpgpuWalker->setThreadGroupIdZDimension(1);
|
|
|
|
gpgpuWalker->setRightExecutionMask(0xFFFFFFFF);
|
|
|
|
gpgpuWalker->setBottomExecutionMask(0xFFFFFFFF);
|
|
|
|
|
|
|
|
mediaStateFlush = slbCS.getSpaceForCmd<MEDIA_STATE_FLUSH>();
|
2019-01-18 00:10:12 +08:00
|
|
|
*mediaStateFlush = GfxFamily::cmdInitMediaStateFlush;
|
2017-12-21 07:45:38 +08:00
|
|
|
|
|
|
|
addArbCheckCmdWa();
|
|
|
|
|
|
|
|
addPipeControlCmdWa();
|
|
|
|
|
|
|
|
auto pipeControl2 = slbCS.getSpaceForCmd<PIPE_CONTROL>();
|
|
|
|
initPipeControl(pipeControl2);
|
|
|
|
|
|
|
|
addLriCmdWa(false);
|
|
|
|
|
|
|
|
auto prefetch = slbCS.getSpace(getCSPrefetchSize());
|
|
|
|
memset(prefetch, 0x0, getCSPrefetchSize());
|
|
|
|
}
|
|
|
|
|
|
|
|
// always the same BBStart position (after 128 enqueues)
|
|
|
|
auto bbStartOffset = (commandsSize * 128) - slbCS.getUsed();
|
|
|
|
slbCS.getSpace(bbStartOffset);
|
|
|
|
|
|
|
|
auto bbStart = slbCS.getSpaceForCmd<MI_BATCH_BUFFER_START>();
|
2019-01-18 00:10:12 +08:00
|
|
|
*bbStart = GfxFamily::cmdInitBatchBufferStart;
|
2017-12-21 07:45:38 +08:00
|
|
|
auto slbPtr = reinterpret_cast<uintptr_t>(slbBuffer->getUnderlyingBuffer());
|
|
|
|
bbStart->setBatchBufferStartAddressGraphicsaddress472(slbPtr);
|
|
|
|
|
|
|
|
igilCmdQueue->m_controls.m_CleanupSectionSize = 0;
|
|
|
|
igilQueue->m_controls.m_CleanupSectionAddress = 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
template <typename GfxFamily>
|
2018-12-21 20:05:21 +08:00
|
|
|
void DeviceQueueHw<GfxFamily>::addExecutionModelCleanUpSection(Kernel *parentKernel, TagNode<HwTimeStamps> *hwTimeStamp, uint32_t taskCount) {
|
2017-12-21 07:45:38 +08:00
|
|
|
// CleanUp Section
|
|
|
|
auto offset = slbCS.getUsed();
|
|
|
|
auto alignmentSize = alignUp(offset, MemoryConstants::pageSize) - offset;
|
|
|
|
slbCS.getSpace(alignmentSize);
|
|
|
|
offset = slbCS.getUsed();
|
|
|
|
|
|
|
|
igilQueue->m_controls.m_CleanupSectionAddress = ptrOffset(slbBuffer->getGpuAddress(), slbCS.getUsed());
|
2018-03-30 23:57:51 +08:00
|
|
|
GpgpuWalkerHelper<GfxFamily>::applyWADisableLSQCROPERFforOCL(&slbCS, *parentKernel, true);
|
2017-12-21 07:45:38 +08:00
|
|
|
|
|
|
|
using PIPE_CONTROL = typename GfxFamily::PIPE_CONTROL;
|
|
|
|
|
|
|
|
if (hwTimeStamp != nullptr) {
|
2019-04-11 19:47:38 +08:00
|
|
|
uint64_t timeStampAddress = hwTimeStamp->getGpuAddress() + offsetof(HwTimeStamps, ContextCompleteTS);
|
|
|
|
igilQueue->m_controls.m_EventTimestampAddress = timeStampAddress;
|
2017-12-21 07:45:38 +08:00
|
|
|
|
2019-04-11 19:47:38 +08:00
|
|
|
addProfilingEndCmds(timeStampAddress);
|
2017-12-21 07:45:38 +08:00
|
|
|
|
|
|
|
//enable preemption
|
|
|
|
addLriCmd(false);
|
|
|
|
}
|
|
|
|
|
|
|
|
uint64_t criticalSectionAddress = (uint64_t)&igilQueue->m_controls.m_CriticalSection;
|
|
|
|
|
|
|
|
addPipeControlCmdWa();
|
|
|
|
|
2019-03-21 00:08:05 +08:00
|
|
|
PipeControlHelper<GfxFamily>::obtainPipeControlAndProgramPostSyncOperation(&slbCS, PIPE_CONTROL::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA, criticalSectionAddress, ExecutionModelCriticalSection::Free, false);
|
2017-12-21 07:45:38 +08:00
|
|
|
|
2018-11-29 18:39:10 +08:00
|
|
|
uint64_t tagAddress = reinterpret_cast<uint64_t>(device->getDefaultEngine().commandStreamReceiver->getTagAddress());
|
2017-12-21 07:45:38 +08:00
|
|
|
|
|
|
|
addPipeControlCmdWa();
|
|
|
|
|
2019-03-21 00:08:05 +08:00
|
|
|
PipeControlHelper<GfxFamily>::obtainPipeControlAndProgramPostSyncOperation(&slbCS, PIPE_CONTROL::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA, tagAddress, taskCount, false);
|
2017-12-21 07:45:38 +08:00
|
|
|
|
2017-12-20 20:24:19 +08:00
|
|
|
addMediaStateClearCmds();
|
|
|
|
|
2017-12-21 07:45:38 +08:00
|
|
|
auto pBBE = slbCS.getSpaceForCmd<MI_BATCH_BUFFER_END>();
|
2019-01-18 00:10:12 +08:00
|
|
|
*pBBE = GfxFamily::cmdInitBatchBufferEnd;
|
2017-12-21 07:45:38 +08:00
|
|
|
|
|
|
|
igilQueue->m_controls.m_CleanupSectionSize = (uint32_t)(slbCS.getUsed() - offset);
|
|
|
|
}
|
|
|
|
|
|
|
|
template <typename GfxFamily>
|
|
|
|
void DeviceQueueHw<GfxFamily>::resetDSH() {
|
|
|
|
if (heaps[IndirectHeap::DYNAMIC_STATE]) {
|
2018-03-05 18:03:38 +08:00
|
|
|
heaps[IndirectHeap::DYNAMIC_STATE]->replaceBuffer(heaps[IndirectHeap::DYNAMIC_STATE]->getCpuBase(), heaps[IndirectHeap::DYNAMIC_STATE]->getMaxAvailableSpace());
|
2017-12-21 07:45:38 +08:00
|
|
|
heaps[IndirectHeap::DYNAMIC_STATE]->getSpace(colorCalcStateSize);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
template <typename GfxFamily>
|
|
|
|
IndirectHeap *DeviceQueueHw<GfxFamily>::getIndirectHeap(IndirectHeap::Type type) {
|
|
|
|
|
|
|
|
if (!heaps[type]) {
|
|
|
|
switch (type) {
|
|
|
|
case IndirectHeap::DYNAMIC_STATE: {
|
|
|
|
heaps[type] = new IndirectHeap(dshBuffer);
|
|
|
|
// get space for colorCalc and 2 ID tables at the beginning
|
|
|
|
heaps[type]->getSpace(colorCalcStateSize);
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
default:
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return heaps[type];
|
|
|
|
}
|
|
|
|
|
|
|
|
template <typename GfxFamily>
|
2018-04-05 21:12:28 +08:00
|
|
|
void DeviceQueueHw<GfxFamily>::setupIndirectState(IndirectHeap &surfaceStateHeap, IndirectHeap &dynamicStateHeap, Kernel *parentKernel, uint32_t parentIDCount) {
|
2018-04-24 21:55:24 +08:00
|
|
|
using GPGPU_WALKER = typename GfxFamily::GPGPU_WALKER;
|
2018-04-05 21:12:28 +08:00
|
|
|
void *pDSH = dynamicStateHeap.getCpuBase();
|
2017-12-21 07:45:38 +08:00
|
|
|
|
|
|
|
// Set scheduler ID to last entry in first table, it will have ID == 0, blocks will have following entries.
|
|
|
|
auto igilCmdQueue = reinterpret_cast<IGIL_CommandQueue *>(queueBuffer->getUnderlyingBuffer());
|
|
|
|
igilCmdQueue->m_controls.m_IDTstart = colorCalcStateSize + sizeof(INTERFACE_DESCRIPTOR_DATA) * (interfaceDescriptorEntries - 2);
|
|
|
|
|
|
|
|
// Parent's dsh is located after ColorCalcState and 2 ID tables
|
|
|
|
igilCmdQueue->m_controls.m_DynamicHeapStart = offsetDsh + alignUp((uint32_t)parentKernel->getDynamicStateHeapSize(), GPGPU_WALKER::INDIRECTDATASTARTADDRESS_ALIGN_SIZE);
|
|
|
|
igilCmdQueue->m_controls.m_DynamicHeapSizeInBytes = (uint32_t)dshBuffer->getUnderlyingBufferSize();
|
|
|
|
|
|
|
|
igilCmdQueue->m_controls.m_CurrentDSHoffset = igilCmdQueue->m_controls.m_DynamicHeapStart;
|
|
|
|
igilCmdQueue->m_controls.m_ParentDSHOffset = offsetDsh;
|
|
|
|
|
|
|
|
uint32_t blockIndex = parentIDCount;
|
|
|
|
|
|
|
|
pDSH = ptrOffset(pDSH, colorCalcStateSize);
|
|
|
|
|
|
|
|
INTERFACE_DESCRIPTOR_DATA *pIDDestination = static_cast<INTERFACE_DESCRIPTOR_DATA *>(pDSH);
|
|
|
|
|
|
|
|
BlockKernelManager *blockManager = parentKernel->getProgram()->getBlockKernelManager();
|
|
|
|
uint32_t blockCount = static_cast<uint32_t>(blockManager->getCount());
|
|
|
|
|
|
|
|
uint32_t maxBindingTableCount = 0;
|
|
|
|
uint32_t totalBlockSSHSize = 0;
|
|
|
|
|
|
|
|
igilCmdQueue->m_controls.m_StartBlockID = blockIndex;
|
|
|
|
|
|
|
|
for (uint32_t i = 0; i < blockCount; i++) {
|
|
|
|
const KernelInfo *pBlockInfo = blockManager->getBlockKernelInfo(i);
|
|
|
|
|
2018-03-27 18:55:20 +08:00
|
|
|
auto blockAllocation = pBlockInfo->getGraphicsAllocation();
|
|
|
|
DEBUG_BREAK_IF(!blockAllocation);
|
|
|
|
|
|
|
|
auto gpuAddress = blockAllocation ? blockAllocation->getGpuAddressToPatch() : 0llu;
|
|
|
|
|
2017-12-21 07:45:38 +08:00
|
|
|
auto bindingTableCount = pBlockInfo->patchInfo.bindingTableState->Count;
|
|
|
|
maxBindingTableCount = std::max(maxBindingTableCount, bindingTableCount);
|
|
|
|
|
|
|
|
totalBlockSSHSize += alignUp(pBlockInfo->heapInfo.pKernelHeader->SurfaceStateHeapSize, BINDING_TABLE_STATE::SURFACESTATEPOINTER_ALIGN_SIZE);
|
|
|
|
|
|
|
|
auto btOffset = KernelCommandsHelper<GfxFamily>::pushBindingTableAndSurfaceStates(surfaceStateHeap, *pBlockInfo);
|
|
|
|
|
|
|
|
parentKernel->setReflectionSurfaceBlockBtOffset(i, static_cast<uint32_t>(btOffset));
|
|
|
|
|
|
|
|
// Determine SIMD size
|
|
|
|
uint32_t simd = pBlockInfo->getMaxSimdSize();
|
|
|
|
DEBUG_BREAK_IF(pBlockInfo->patchInfo.interfaceDescriptorData == nullptr);
|
|
|
|
|
|
|
|
uint32_t idOffset = pBlockInfo->patchInfo.interfaceDescriptorData->Offset;
|
|
|
|
const INTERFACE_DESCRIPTOR_DATA *pBlockID = static_cast<const INTERFACE_DESCRIPTOR_DATA *>(ptrOffset(pBlockInfo->heapInfo.pDsh, idOffset));
|
|
|
|
|
|
|
|
pIDDestination[blockIndex + i] = *pBlockID;
|
2018-03-27 18:55:20 +08:00
|
|
|
pIDDestination[blockIndex + i].setKernelStartPointerHigh(gpuAddress >> 32);
|
|
|
|
pIDDestination[blockIndex + i].setKernelStartPointer((uint32_t)gpuAddress);
|
2017-12-21 07:45:38 +08:00
|
|
|
pIDDestination[blockIndex + i].setBarrierEnable(pBlockInfo->patchInfo.executionEnvironment->HasBarriers > 0);
|
|
|
|
pIDDestination[blockIndex + i].setDenormMode(INTERFACE_DESCRIPTOR_DATA::DENORM_MODE_SETBYKERNEL);
|
|
|
|
|
|
|
|
// Set offset to sampler states, block's DHSOffset is added by scheduler
|
|
|
|
pIDDestination[blockIndex + i].setSamplerStatePointer(static_cast<uint32_t>(pBlockInfo->getBorderColorStateSize()));
|
|
|
|
|
|
|
|
auto threadPayload = pBlockInfo->patchInfo.threadPayload;
|
|
|
|
DEBUG_BREAK_IF(nullptr == threadPayload);
|
|
|
|
|
|
|
|
auto numChannels = PerThreadDataHelper::getNumLocalIdChannels(*threadPayload);
|
|
|
|
auto sizePerThreadData = getPerThreadSizeLocalIDs(simd, numChannels);
|
|
|
|
|
|
|
|
auto numGrfPerThreadData = static_cast<uint32_t>(sizePerThreadData / sizeof(GRF));
|
|
|
|
|
|
|
|
// HW requires a minimum of 1 GRF of perThreadData for each thread in a thread group
|
|
|
|
// when sizeCrossThreadData != 0
|
|
|
|
numGrfPerThreadData = std::max(numGrfPerThreadData, 1u);
|
|
|
|
pIDDestination[blockIndex + i].setConstantIndirectUrbEntryReadLength(numGrfPerThreadData);
|
|
|
|
}
|
|
|
|
|
|
|
|
igilCmdQueue->m_controls.m_BTmaxSize = alignUp(maxBindingTableCount * (uint32_t)sizeof(BINDING_TABLE_STATE), INTERFACE_DESCRIPTOR_DATA::BINDINGTABLEPOINTER::BINDINGTABLEPOINTER_ALIGN_SIZE);
|
|
|
|
igilCmdQueue->m_controls.m_BTbaseOffset = alignUp((uint32_t)surfaceStateHeap.getUsed(), INTERFACE_DESCRIPTOR_DATA::BINDINGTABLEPOINTER::BINDINGTABLEPOINTER_ALIGN_SIZE);
|
|
|
|
igilCmdQueue->m_controls.m_CurrentSSHoffset = igilCmdQueue->m_controls.m_BTbaseOffset;
|
|
|
|
}
|
|
|
|
|
|
|
|
template <typename GfxFamily>
|
|
|
|
size_t DeviceQueueHw<GfxFamily>::setSchedulerCrossThreadData(SchedulerKernel &scheduler) {
|
|
|
|
using INTERFACE_DESCRIPTOR_DATA = typename GfxFamily::INTERFACE_DESCRIPTOR_DATA;
|
|
|
|
size_t offset = dshBuffer->getUnderlyingBufferSize() - scheduler.getCurbeSize() - 4096; // Page size padding
|
|
|
|
|
|
|
|
auto igilCmdQueue = reinterpret_cast<IGIL_CommandQueue *>(queueBuffer->getUnderlyingBuffer());
|
|
|
|
igilCmdQueue->m_controls.m_SchedulerDSHOffset = (uint32_t)offset;
|
|
|
|
igilCmdQueue->m_controls.m_SchedulerConstantBufferSize = (uint32_t)scheduler.getCurbeSize();
|
|
|
|
|
|
|
|
return offset;
|
|
|
|
}
|
|
|
|
|
|
|
|
template <typename GfxFamily>
|
2019-04-01 16:10:01 +08:00
|
|
|
void DeviceQueueHw<GfxFamily>::dispatchScheduler(LinearStream &commandStream, SchedulerKernel &scheduler, PreemptionMode preemptionMode, IndirectHeap *ssh, IndirectHeap *dsh) {
|
|
|
|
GpgpuWalkerHelper<GfxFamily>::dispatchScheduler(commandStream,
|
2018-03-30 23:57:51 +08:00
|
|
|
*this,
|
|
|
|
preemptionMode,
|
2018-04-05 21:12:28 +08:00
|
|
|
scheduler,
|
|
|
|
ssh,
|
|
|
|
dsh);
|
2017-12-21 07:45:38 +08:00
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
template <typename GfxFamily>
|
|
|
|
size_t DeviceQueueHw<GfxFamily>::getCSPrefetchSize() {
|
|
|
|
return 512;
|
|
|
|
}
|
|
|
|
|
|
|
|
template <typename GfxFamily>
|
|
|
|
void DeviceQueueHw<GfxFamily>::addLriCmd(bool setArbCheck) {
|
|
|
|
using MI_LOAD_REGISTER_IMM = typename GfxFamily::MI_LOAD_REGISTER_IMM;
|
|
|
|
auto lri = slbCS.getSpaceForCmd<MI_LOAD_REGISTER_IMM>();
|
2019-01-18 00:10:12 +08:00
|
|
|
*lri = GfxFamily::cmdInitLoadRegisterImm;
|
2017-12-21 07:45:38 +08:00
|
|
|
lri->setRegisterOffset(0x2248); // CTXT_PREMP_DBG offset
|
|
|
|
if (setArbCheck)
|
|
|
|
lri->setDataDword(0x00000100); // set only bit 8 (Preempt On MI_ARB_CHK Only)
|
|
|
|
else
|
|
|
|
lri->setDataDword(0x0);
|
|
|
|
}
|
|
|
|
|
2017-12-20 20:24:19 +08:00
|
|
|
template <typename GfxFamily>
|
|
|
|
void DeviceQueueHw<GfxFamily>::addMediaStateClearCmds() {
|
|
|
|
typedef typename GfxFamily::MEDIA_VFE_STATE MEDIA_VFE_STATE;
|
|
|
|
|
|
|
|
addPipeControlCmdWa();
|
|
|
|
|
|
|
|
auto pipeControl = slbCS.getSpaceForCmd<PIPE_CONTROL>();
|
2019-01-18 00:10:12 +08:00
|
|
|
*pipeControl = GfxFamily::cmdInitPipeControl;
|
2017-12-20 20:24:19 +08:00
|
|
|
pipeControl->setGenericMediaStateClear(true);
|
|
|
|
pipeControl->setCommandStreamerStallEnable(true);
|
|
|
|
|
2018-05-30 22:13:53 +08:00
|
|
|
addDcFlushToPipeControlWa(pipeControl);
|
|
|
|
|
2017-12-20 20:24:19 +08:00
|
|
|
PreambleHelper<GfxFamily>::programVFEState(&slbCS, device->getHardwareInfo(), 0, 0);
|
|
|
|
}
|
|
|
|
|
|
|
|
template <typename GfxFamily>
|
|
|
|
size_t DeviceQueueHw<GfxFamily>::getMediaStateClearCmdsSize() {
|
2018-04-24 21:55:24 +08:00
|
|
|
using MEDIA_VFE_STATE = typename GfxFamily::MEDIA_VFE_STATE;
|
2017-12-20 20:24:19 +08:00
|
|
|
// PC with GenreicMediaStateClear + WA PC
|
|
|
|
size_t size = 2 * sizeof(PIPE_CONTROL);
|
|
|
|
|
|
|
|
// VFE state cmds
|
|
|
|
size += sizeof(PIPE_CONTROL);
|
|
|
|
size += sizeof(MEDIA_VFE_STATE);
|
|
|
|
return size;
|
|
|
|
}
|
|
|
|
|
|
|
|
template <typename GfxFamily>
|
|
|
|
size_t DeviceQueueHw<GfxFamily>::getExecutionModelCleanupSectionSize() {
|
|
|
|
size_t totalSize = 0;
|
|
|
|
totalSize += sizeof(PIPE_CONTROL) +
|
|
|
|
2 * sizeof(MI_LOAD_REGISTER_REG) +
|
|
|
|
sizeof(MI_LOAD_REGISTER_IMM) +
|
|
|
|
sizeof(PIPE_CONTROL) +
|
|
|
|
sizeof(MI_MATH) +
|
|
|
|
NUM_ALU_INST_FOR_READ_MODIFY_WRITE * sizeof(MI_MATH_ALU_INST_INLINE);
|
|
|
|
|
|
|
|
totalSize += getProfilingEndCmdsSize();
|
|
|
|
totalSize += getMediaStateClearCmdsSize();
|
|
|
|
|
|
|
|
totalSize += 4 * sizeof(PIPE_CONTROL);
|
|
|
|
totalSize += sizeof(MI_BATCH_BUFFER_END);
|
|
|
|
return totalSize;
|
|
|
|
}
|
|
|
|
|
|
|
|
template <typename GfxFamily>
|
|
|
|
size_t DeviceQueueHw<GfxFamily>::getProfilingEndCmdsSize() {
|
|
|
|
size_t size = 0;
|
2018-02-23 16:09:57 +08:00
|
|
|
size += sizeof(PIPE_CONTROL) + sizeof(MI_STORE_REGISTER_MEM);
|
2017-12-20 20:24:19 +08:00
|
|
|
size += sizeof(MI_LOAD_REGISTER_IMM);
|
|
|
|
return size;
|
|
|
|
}
|
|
|
|
|
2018-05-30 22:13:53 +08:00
|
|
|
template <typename GfxFamily>
|
|
|
|
void DeviceQueueHw<GfxFamily>::addDcFlushToPipeControlWa(PIPE_CONTROL *pc) {}
|
|
|
|
|
2019-03-26 18:59:46 +08:00
|
|
|
} // namespace NEO
|