mirror of
https://github.com/intel/compute-runtime.git
synced 2026-01-03 06:49:52 +08:00
Extract functions to device_queue_hw_base.inl
Change-Id: I91216453effadf7290b6364bfd442704add97566 Related-To: NEO-3016 Signed-off-by: Maciej Dziuban <maciej.dziuban@intel.com>
This commit is contained in:
committed by
sys_ocldev
parent
2e6e791a1c
commit
e67879ffca
239
runtime/device_queue/device_queue_hw_base.inl
Normal file
239
runtime/device_queue/device_queue_hw_base.inl
Normal file
@@ -0,0 +1,239 @@
|
||||
/*
|
||||
* Copyright (C) 2019 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
#include "runtime/command_queue/gpgpu_walker.h"
|
||||
#include "runtime/device_queue/device_queue_hw.h"
|
||||
#include "runtime/helpers/hw_helper.h"
|
||||
#include "runtime/helpers/kernel_commands.h"
|
||||
#include "runtime/helpers/preamble.h"
|
||||
#include "runtime/helpers/string.h"
|
||||
#include "runtime/memory_manager/memory_manager.h"
|
||||
#include "runtime/utilities/tag_allocator.h"
|
||||
|
||||
namespace NEO {
|
||||
template <typename GfxFamily>
|
||||
void DeviceQueueHw<GfxFamily>::allocateSlbBuffer() {
|
||||
auto slbSize = getMinimumSlbSize() + getWaCommandsSize();
|
||||
slbSize *= 128; //num of enqueues
|
||||
slbSize += sizeof(MI_BATCH_BUFFER_START);
|
||||
slbSize = alignUp(slbSize, MemoryConstants::pageSize);
|
||||
slbSize += DeviceQueueHw<GfxFamily>::getExecutionModelCleanupSectionSize();
|
||||
slbSize += (4 * MemoryConstants::pageSize); // +4 pages spec restriction
|
||||
slbSize = alignUp(slbSize, MemoryConstants::pageSize);
|
||||
|
||||
slbBuffer = device->getMemoryManager()->allocateGraphicsMemoryWithProperties({slbSize, GraphicsAllocation::AllocationType::DEVICE_QUEUE_BUFFER});
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
void DeviceQueueHw<GfxFamily>::resetDeviceQueue() {
|
||||
auto &caps = device->getDeviceInfo();
|
||||
auto igilEventPool = reinterpret_cast<IGIL_EventPool *>(eventPoolBuffer->getUnderlyingBuffer());
|
||||
|
||||
memset(eventPoolBuffer->getUnderlyingBuffer(), 0x0, eventPoolBuffer->getUnderlyingBufferSize());
|
||||
igilEventPool->m_size = caps.maxOnDeviceEvents;
|
||||
|
||||
auto igilCmdQueue = reinterpret_cast<IGIL_CommandQueue *>(queueBuffer->getUnderlyingBuffer());
|
||||
igilQueue = igilCmdQueue;
|
||||
|
||||
igilCmdQueue->m_controls.m_StackSize =
|
||||
static_cast<uint32_t>((stackBuffer->getUnderlyingBufferSize() / sizeof(cl_uint)) - 1);
|
||||
igilCmdQueue->m_controls.m_StackTop =
|
||||
static_cast<uint32_t>((stackBuffer->getUnderlyingBufferSize() / sizeof(cl_uint)) - 1);
|
||||
igilCmdQueue->m_controls.m_PreviousHead = IGIL_DEVICE_QUEUE_HEAD_INIT;
|
||||
igilCmdQueue->m_controls.m_IDTAfterFirstPhase = 1;
|
||||
igilCmdQueue->m_controls.m_CurrentIDToffset = 1;
|
||||
igilCmdQueue->m_controls.m_PreviousStorageTop = static_cast<uint32_t>(queueStorageBuffer->getUnderlyingBufferSize());
|
||||
igilCmdQueue->m_controls.m_PreviousStackTop =
|
||||
static_cast<uint32_t>((stackBuffer->getUnderlyingBufferSize() / sizeof(cl_uint)) - 1);
|
||||
igilCmdQueue->m_controls.m_DebugNextBlockID = 0xFFFFFFFF;
|
||||
igilCmdQueue->m_controls.m_QstorageSize = static_cast<uint32_t>(queueStorageBuffer->getUnderlyingBufferSize());
|
||||
igilCmdQueue->m_controls.m_QstorageTop = static_cast<uint32_t>(queueStorageBuffer->getUnderlyingBufferSize());
|
||||
igilCmdQueue->m_controls.m_IsProfilingEnabled = static_cast<uint32_t>(isProfilingEnabled());
|
||||
igilCmdQueue->m_controls.m_IsSimulation = static_cast<uint32_t>(device->isSimulation());
|
||||
|
||||
igilCmdQueue->m_controls.m_LastScheduleEventNumber = 0;
|
||||
igilCmdQueue->m_controls.m_PreviousNumberOfQueues = 0;
|
||||
igilCmdQueue->m_controls.m_EnqueueMarkerScheduled = 0;
|
||||
igilCmdQueue->m_controls.m_SecondLevelBatchOffset = 0;
|
||||
igilCmdQueue->m_controls.m_TotalNumberOfQueues = 0;
|
||||
igilCmdQueue->m_controls.m_EventTimestampAddress = 0;
|
||||
igilCmdQueue->m_controls.m_ErrorCode = 0;
|
||||
igilCmdQueue->m_controls.m_CurrentScheduleEventNumber = 0;
|
||||
igilCmdQueue->m_controls.m_DummyAtomicOperationPlaceholder = 0x00;
|
||||
igilCmdQueue->m_controls.m_DebugNextBlockGWS = 0;
|
||||
|
||||
// set first stack element in surface at value "1", it protects Scheduler in corner case when StackTop is empty after Child execution
|
||||
auto stack = static_cast<uint32_t *>(stackBuffer->getUnderlyingBuffer());
|
||||
stack += ((stackBuffer->getUnderlyingBufferSize() / sizeof(cl_uint)) - 1);
|
||||
*stack = 1;
|
||||
|
||||
igilCmdQueue->m_head = IGIL_DEVICE_QUEUE_HEAD_INIT;
|
||||
igilCmdQueue->m_size = static_cast<uint32_t>(queueBuffer->getUnderlyingBufferSize() - sizeof(IGIL_CommandQueue));
|
||||
igilCmdQueue->m_magic = IGIL_MAGIC_NUMBER;
|
||||
|
||||
igilCmdQueue->m_controls.m_SchedulerEarlyReturn = DebugManager.flags.SchedulerSimulationReturnInstance.get();
|
||||
igilCmdQueue->m_controls.m_SchedulerEarlyReturnCounter = 0;
|
||||
|
||||
buildSlbDummyCommands();
|
||||
|
||||
igilCmdQueue->m_controls.m_SLBENDoffsetInBytes = -1;
|
||||
|
||||
igilCmdQueue->m_controls.m_CriticalSection = ExecutionModelCriticalSection::Free;
|
||||
|
||||
resetDSH();
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
void DeviceQueueHw<GfxFamily>::initPipeControl(PIPE_CONTROL *pc) {
|
||||
*pc = GfxFamily::cmdInitPipeControl;
|
||||
pc->setStateCacheInvalidationEnable(0x1);
|
||||
pc->setDcFlushEnable(true);
|
||||
pc->setPipeControlFlushEnable(true);
|
||||
pc->setTextureCacheInvalidationEnable(true);
|
||||
pc->setCommandStreamerStallEnable(true);
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
void DeviceQueueHw<GfxFamily>::addExecutionModelCleanUpSection(Kernel *parentKernel, TagNode<HwTimeStamps> *hwTimeStamp, uint32_t taskCount) {
|
||||
// CleanUp Section
|
||||
auto offset = slbCS.getUsed();
|
||||
auto alignmentSize = alignUp(offset, MemoryConstants::pageSize) - offset;
|
||||
slbCS.getSpace(alignmentSize);
|
||||
offset = slbCS.getUsed();
|
||||
|
||||
igilQueue->m_controls.m_CleanupSectionAddress = ptrOffset(slbBuffer->getGpuAddress(), slbCS.getUsed());
|
||||
GpgpuWalkerHelper<GfxFamily>::applyWADisableLSQCROPERFforOCL(&slbCS, *parentKernel, true);
|
||||
|
||||
using PIPE_CONTROL = typename GfxFamily::PIPE_CONTROL;
|
||||
|
||||
if (hwTimeStamp != nullptr) {
|
||||
uint64_t timeStampAddress = hwTimeStamp->getGpuAddress() + offsetof(HwTimeStamps, ContextCompleteTS);
|
||||
igilQueue->m_controls.m_EventTimestampAddress = timeStampAddress;
|
||||
|
||||
addProfilingEndCmds(timeStampAddress);
|
||||
|
||||
//enable preemption
|
||||
addLriCmd(false);
|
||||
}
|
||||
|
||||
uint64_t criticalSectionAddress = (uint64_t)&igilQueue->m_controls.m_CriticalSection;
|
||||
|
||||
addPipeControlCmdWa();
|
||||
|
||||
PipeControlHelper<GfxFamily>::obtainPipeControlAndProgramPostSyncOperation(&slbCS, PIPE_CONTROL::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA, criticalSectionAddress, ExecutionModelCriticalSection::Free, false);
|
||||
|
||||
uint64_t tagAddress = reinterpret_cast<uint64_t>(device->getDefaultEngine().commandStreamReceiver->getTagAddress());
|
||||
|
||||
addPipeControlCmdWa();
|
||||
|
||||
PipeControlHelper<GfxFamily>::obtainPipeControlAndProgramPostSyncOperation(&slbCS, PIPE_CONTROL::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA, tagAddress, taskCount, false);
|
||||
|
||||
addMediaStateClearCmds();
|
||||
|
||||
auto pBBE = slbCS.getSpaceForCmd<MI_BATCH_BUFFER_END>();
|
||||
*pBBE = GfxFamily::cmdInitBatchBufferEnd;
|
||||
|
||||
igilQueue->m_controls.m_CleanupSectionSize = (uint32_t)(slbCS.getUsed() - offset);
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
void DeviceQueueHw<GfxFamily>::resetDSH() {
|
||||
if (heaps[IndirectHeap::DYNAMIC_STATE]) {
|
||||
heaps[IndirectHeap::DYNAMIC_STATE]->replaceBuffer(heaps[IndirectHeap::DYNAMIC_STATE]->getCpuBase(), heaps[IndirectHeap::DYNAMIC_STATE]->getMaxAvailableSpace());
|
||||
heaps[IndirectHeap::DYNAMIC_STATE]->getSpace(colorCalcStateSize);
|
||||
}
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
IndirectHeap *DeviceQueueHw<GfxFamily>::getIndirectHeap(IndirectHeap::Type type) {
|
||||
|
||||
if (!heaps[type]) {
|
||||
switch (type) {
|
||||
case IndirectHeap::DYNAMIC_STATE: {
|
||||
heaps[type] = new IndirectHeap(dshBuffer);
|
||||
// get space for colorCalc and 2 ID tables at the beginning
|
||||
heaps[type]->getSpace(colorCalcStateSize);
|
||||
break;
|
||||
}
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
return heaps[type];
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
size_t DeviceQueueHw<GfxFamily>::setSchedulerCrossThreadData(SchedulerKernel &scheduler) {
|
||||
using INTERFACE_DESCRIPTOR_DATA = typename GfxFamily::INTERFACE_DESCRIPTOR_DATA;
|
||||
size_t offset = dshBuffer->getUnderlyingBufferSize() - scheduler.getCurbeSize() - 4096; // Page size padding
|
||||
|
||||
auto igilCmdQueue = reinterpret_cast<IGIL_CommandQueue *>(queueBuffer->getUnderlyingBuffer());
|
||||
igilCmdQueue->m_controls.m_SchedulerDSHOffset = (uint32_t)offset;
|
||||
igilCmdQueue->m_controls.m_SchedulerConstantBufferSize = (uint32_t)scheduler.getCurbeSize();
|
||||
|
||||
return offset;
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
void DeviceQueueHw<GfxFamily>::dispatchScheduler(LinearStream &commandStream, SchedulerKernel &scheduler, PreemptionMode preemptionMode, IndirectHeap *ssh, IndirectHeap *dsh) {
|
||||
GpgpuWalkerHelper<GfxFamily>::dispatchScheduler(commandStream,
|
||||
*this,
|
||||
preemptionMode,
|
||||
scheduler,
|
||||
ssh,
|
||||
dsh);
|
||||
return;
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
size_t DeviceQueueHw<GfxFamily>::getCSPrefetchSize() {
|
||||
return 512;
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
void DeviceQueueHw<GfxFamily>::addLriCmd(bool setArbCheck) {
|
||||
using MI_LOAD_REGISTER_IMM = typename GfxFamily::MI_LOAD_REGISTER_IMM;
|
||||
auto lri = slbCS.getSpaceForCmd<MI_LOAD_REGISTER_IMM>();
|
||||
*lri = GfxFamily::cmdInitLoadRegisterImm;
|
||||
lri->setRegisterOffset(0x2248); // CTXT_PREMP_DBG offset
|
||||
if (setArbCheck)
|
||||
lri->setDataDword(0x00000100); // set only bit 8 (Preempt On MI_ARB_CHK Only)
|
||||
else
|
||||
lri->setDataDword(0x0);
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
size_t DeviceQueueHw<GfxFamily>::getExecutionModelCleanupSectionSize() {
|
||||
size_t totalSize = 0;
|
||||
totalSize += sizeof(PIPE_CONTROL) +
|
||||
2 * sizeof(MI_LOAD_REGISTER_REG) +
|
||||
sizeof(MI_LOAD_REGISTER_IMM) +
|
||||
sizeof(PIPE_CONTROL) +
|
||||
sizeof(MI_MATH) +
|
||||
NUM_ALU_INST_FOR_READ_MODIFY_WRITE * sizeof(MI_MATH_ALU_INST_INLINE);
|
||||
|
||||
totalSize += getProfilingEndCmdsSize();
|
||||
totalSize += getMediaStateClearCmdsSize();
|
||||
|
||||
totalSize += 4 * sizeof(PIPE_CONTROL);
|
||||
totalSize += sizeof(MI_BATCH_BUFFER_END);
|
||||
return totalSize;
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
size_t DeviceQueueHw<GfxFamily>::getProfilingEndCmdsSize() {
|
||||
size_t size = 0;
|
||||
size += sizeof(PIPE_CONTROL) + sizeof(MI_STORE_REGISTER_MEM);
|
||||
size += sizeof(MI_LOAD_REGISTER_IMM);
|
||||
return size;
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
void DeviceQueueHw<GfxFamily>::addDcFlushToPipeControlWa(PIPE_CONTROL *pc) {}
|
||||
|
||||
} // namespace NEO
|
||||
Reference in New Issue
Block a user