compute-runtime/shared/source/command_container/cmdcontainer.cpp

450 lines
19 KiB
C++

/*
* Copyright (C) 2019-2023 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#include "shared/source/command_container/cmdcontainer.h"
#include "shared/source/command_container/command_encoder.h"
#include "shared/source/command_stream/command_stream_receiver.h"
#include "shared/source/command_stream/linear_stream.h"
#include "shared/source/device/device.h"
#include "shared/source/helpers/api_specific_config.h"
#include "shared/source/helpers/debug_helpers.h"
#include "shared/source/helpers/heap_helper.h"
#include "shared/source/helpers/hw_helper.h"
#include "shared/source/helpers/string.h"
#include "shared/source/indirect_heap/indirect_heap.h"
#include "shared/source/memory_manager/allocation_properties.h"
#include "shared/source/memory_manager/allocations_list.h"
#include "shared/source/memory_manager/memory_manager.h"
#include "shared/source/os_interface/os_context.h"
namespace NEO {
CommandContainer::~CommandContainer() {
if (!device) {
DEBUG_BREAK_IF(device);
return;
}
this->handleCmdBufferAllocations(0u);
for (auto allocationIndirectHeap : allocationIndirectHeaps) {
if (heapHelper) {
heapHelper->storeHeapAllocation(allocationIndirectHeap);
}
}
for (auto deallocation : deallocationContainer) {
if (((deallocation->getAllocationType() == AllocationType::INTERNAL_HEAP) || (deallocation->getAllocationType() == AllocationType::LINEAR_STREAM))) {
getHeapHelper()->storeHeapAllocation(deallocation);
}
}
}
CommandContainer::CommandContainer() {
for (auto &indirectHeap : indirectHeaps) {
indirectHeap = nullptr;
}
for (auto &allocationIndirectHeap : allocationIndirectHeaps) {
allocationIndirectHeap = nullptr;
}
residencyContainer.reserve(startingResidencyContainerSize);
if (DebugManager.flags.RemoveUserFenceInCmdlistResetAndDestroy.get() != -1) {
isHandleFenceCompletionRequired = !static_cast<bool>(DebugManager.flags.RemoveUserFenceInCmdlistResetAndDestroy.get());
}
}
CommandContainer::CommandContainer(uint32_t maxNumAggregatedIdds) : CommandContainer() {
numIddsPerBlock = maxNumAggregatedIdds;
}
CommandContainer::ErrorCode CommandContainer::initialize(Device *device, AllocationsList *reusableAllocationList, bool requireHeaps) {
this->device = device;
this->reusableAllocationList = reusableAllocationList;
size_t alignedSize = alignUp<size_t>(this->getTotalCmdBufferSize(), MemoryConstants::pageSize64k);
auto cmdBufferAllocation = this->obtainNextCommandBufferAllocation();
if (!cmdBufferAllocation) {
return ErrorCode::OUT_OF_DEVICE_MEMORY;
}
cmdBufferAllocations.push_back(cmdBufferAllocation);
const auto &hardwareInfo = device->getHardwareInfo();
auto &gfxCoreHelper = device->getGfxCoreHelper();
commandStream = std::make_unique<LinearStream>(cmdBufferAllocation->getUnderlyingBuffer(),
alignedSize - cmdBufferReservedSize, this, gfxCoreHelper.getBatchBufferEndSize());
commandStream->replaceGraphicsAllocation(cmdBufferAllocation);
if (!getFlushTaskUsedForImmediate()) {
addToResidencyContainer(cmdBufferAllocation);
}
if (requireHeaps) {
size_t heapSize = 65536u;
if (DebugManager.flags.ForceDefaultHeapSize.get() != -1) {
heapSize = DebugManager.flags.ForceDefaultHeapSize.get() * MemoryConstants::kiloByte;
}
heapHelper = std::unique_ptr<HeapHelper>(new HeapHelper(device->getMemoryManager(), device->getDefaultEngine().commandStreamReceiver->getInternalAllocationStorage(), device->getNumGenericSubDevices() > 1u));
for (uint32_t i = 0; i < IndirectHeap::Type::NUM_TYPES; i++) {
if (NEO::ApiSpecificConfig::getBindlessConfiguration() && i != IndirectHeap::Type::INDIRECT_OBJECT) {
continue;
}
if (!hardwareInfo.capabilityTable.supportsImages && IndirectHeap::Type::DYNAMIC_STATE == i) {
continue;
}
if (immediateCmdListSharedHeap(static_cast<HeapType>(i))) {
continue;
}
allocationIndirectHeaps[i] = heapHelper->getHeapAllocation(i,
heapSize,
alignedSize,
device->getRootDeviceIndex());
if (!allocationIndirectHeaps[i]) {
return ErrorCode::OUT_OF_DEVICE_MEMORY;
}
residencyContainer.push_back(allocationIndirectHeaps[i]);
bool requireInternalHeap = (IndirectHeap::Type::INDIRECT_OBJECT == i);
indirectHeaps[i] = std::make_unique<IndirectHeap>(allocationIndirectHeaps[i], requireInternalHeap);
if (i == IndirectHeap::Type::SURFACE_STATE) {
indirectHeaps[i]->getSpace(reservedSshSize);
}
}
indirectObjectHeapBaseAddress = device->getMemoryManager()->getInternalHeapBaseAddress(device->getRootDeviceIndex(), allocationIndirectHeaps[IndirectHeap::Type::INDIRECT_OBJECT]->isAllocatedInLocalMemoryPool());
instructionHeapBaseAddress = device->getMemoryManager()->getInternalHeapBaseAddress(device->getRootDeviceIndex(), device->getMemoryManager()->isLocalMemoryUsedForIsa(device->getRootDeviceIndex()));
iddBlock = nullptr;
nextIddInBlock = this->getNumIddPerBlock();
}
return ErrorCode::SUCCESS;
}
void CommandContainer::addToResidencyContainer(GraphicsAllocation *alloc) {
if (alloc == nullptr) {
return;
}
this->residencyContainer.push_back(alloc);
}
void CommandContainer::removeDuplicatesFromResidencyContainer() {
std::sort(this->residencyContainer.begin(), this->residencyContainer.end());
this->residencyContainer.erase(std::unique(this->residencyContainer.begin(), this->residencyContainer.end()), this->residencyContainer.end());
}
void CommandContainer::reset() {
setDirtyStateForAllHeaps(true);
slmSize = std::numeric_limits<uint32_t>::max();
getResidencyContainer().clear();
getDeallocationContainer().clear();
sshAllocations.clear();
this->handleCmdBufferAllocations(1u);
cmdBufferAllocations.erase(cmdBufferAllocations.begin() + 1, cmdBufferAllocations.end());
auto cmdlistCmdBufferSize = defaultListCmdBufferSize;
if (DebugManager.flags.OverrideCmdListCmdBufferSizeInKb.get() > 0) {
cmdlistCmdBufferSize = static_cast<size_t>(DebugManager.flags.OverrideCmdListCmdBufferSizeInKb.get()) * MemoryConstants::kiloByte;
}
commandStream->replaceBuffer(cmdBufferAllocations[0]->getUnderlyingBuffer(), cmdlistCmdBufferSize);
commandStream->replaceGraphicsAllocation(cmdBufferAllocations[0]);
addToResidencyContainer(commandStream->getGraphicsAllocation());
for (auto &indirectHeap : indirectHeaps) {
if (indirectHeap != nullptr) {
indirectHeap->replaceBuffer(indirectHeap->getCpuBase(),
indirectHeap->getMaxAvailableSpace());
addToResidencyContainer(indirectHeap->getGraphicsAllocation());
}
}
if (indirectHeaps[IndirectHeap::Type::SURFACE_STATE] != nullptr) {
indirectHeaps[IndirectHeap::Type::SURFACE_STATE]->getSpace(reservedSshSize);
}
iddBlock = nullptr;
nextIddInBlock = this->getNumIddPerBlock();
lastPipelineSelectModeRequired = false;
lastSentUseGlobalAtomics = false;
}
size_t CommandContainer::getTotalCmdBufferSize() {
auto totalCommandBufferSize = totalCmdBufferSize;
if (DebugManager.flags.OverrideCmdListCmdBufferSizeInKb.get() > 0) {
totalCommandBufferSize = static_cast<size_t>(DebugManager.flags.OverrideCmdListCmdBufferSizeInKb.get()) * MemoryConstants::kiloByte;
totalCommandBufferSize += cmdBufferReservedSize;
}
return totalCommandBufferSize;
}
void *CommandContainer::getHeapSpaceAllowGrow(HeapType heapType,
size_t size) {
return getHeapWithRequiredSize(heapType, size, 0, true)->getSpace(size);
}
IndirectHeap *CommandContainer::getHeapWithRequiredSizeAndAlignment(HeapType heapType, size_t sizeRequired, size_t alignment) {
return getHeapWithRequiredSize(heapType, sizeRequired, alignment, false);
}
IndirectHeap *CommandContainer::getHeapWithRequiredSize(HeapType heapType, size_t sizeRequired, size_t alignment, bool allowGrow) {
auto indirectHeap = getIndirectHeap(heapType);
UNRECOVERABLE_IF(indirectHeap == nullptr);
auto sizeRequested = sizeRequired;
auto heapBuffer = indirectHeap->getSpace(0);
if (alignment && (heapBuffer != alignUp(heapBuffer, alignment))) {
sizeRequested += alignment;
}
if (immediateCmdListSharedHeap(heapType)) {
UNRECOVERABLE_IF(indirectHeap->getAvailableSpace() < sizeRequested);
} else {
if (indirectHeap->getAvailableSpace() < sizeRequested) {
size_t newSize = indirectHeap->getUsed() + indirectHeap->getAvailableSpace();
if (allowGrow) {
newSize = std::max(newSize, indirectHeap->getAvailableSpace() + sizeRequested);
}
newSize = alignUp(newSize, MemoryConstants::pageSize);
auto oldAlloc = getIndirectHeapAllocation(heapType);
this->createAndAssignNewHeap(heapType, newSize);
if (heapType == HeapType::SURFACE_STATE) {
indirectHeap->getSpace(reservedSshSize);
sshAllocations.push_back(oldAlloc);
}
}
}
if (alignment) {
indirectHeap->align(alignment);
}
return indirectHeap;
}
void CommandContainer::createAndAssignNewHeap(HeapType heapType, size_t size) {
auto indirectHeap = getIndirectHeap(heapType);
auto oldAlloc = getIndirectHeapAllocation(heapType);
auto newAlloc = getHeapHelper()->getHeapAllocation(heapType, size, MemoryConstants::pageSize, device->getRootDeviceIndex());
UNRECOVERABLE_IF(!oldAlloc);
UNRECOVERABLE_IF(!newAlloc);
auto oldBase = indirectHeap->getHeapGpuBase();
indirectHeap->replaceGraphicsAllocation(newAlloc);
indirectHeap->replaceBuffer(newAlloc->getUnderlyingBuffer(),
newAlloc->getUnderlyingBufferSize());
auto newBase = indirectHeap->getHeapGpuBase();
getResidencyContainer().push_back(newAlloc);
if (this->immediateCmdListCsr) {
this->storeAllocationAndFlushTagUpdate(oldAlloc);
} else {
getDeallocationContainer().push_back(oldAlloc);
}
setIndirectHeapAllocation(heapType, newAlloc);
if (oldBase != newBase) {
setHeapDirty(heapType);
}
}
void CommandContainer::handleCmdBufferAllocations(size_t startIndex) {
if (immediateReusableAllocationList != nullptr && !immediateReusableAllocationList->peekIsEmpty() && reusableAllocationList != nullptr) {
reusableAllocationList->splice(*immediateReusableAllocationList->detachNodes());
}
for (size_t i = startIndex; i < cmdBufferAllocations.size(); i++) {
if (this->reusableAllocationList) {
if (isHandleFenceCompletionRequired) {
this->device->getMemoryManager()->handleFenceCompletion(cmdBufferAllocations[i]);
}
reusableAllocationList->pushFrontOne(*cmdBufferAllocations[i]);
} else {
this->device->getMemoryManager()->freeGraphicsMemory(cmdBufferAllocations[i]);
}
}
}
GraphicsAllocation *CommandContainer::obtainNextCommandBufferAllocation() {
GraphicsAllocation *cmdBufferAllocation = nullptr;
if (this->reusableAllocationList) {
size_t alignedSize = alignUp<size_t>(this->getTotalCmdBufferSize(), MemoryConstants::pageSize64k);
cmdBufferAllocation = this->reusableAllocationList->detachAllocation(alignedSize, nullptr, nullptr, AllocationType::COMMAND_BUFFER).release();
}
if (!cmdBufferAllocation) {
cmdBufferAllocation = this->allocateCommandBuffer();
}
return cmdBufferAllocation;
}
void CommandContainer::allocateNextCommandBuffer() {
auto cmdBufferAllocation = this->obtainNextCommandBufferAllocation();
UNRECOVERABLE_IF(!cmdBufferAllocation);
cmdBufferAllocations.push_back(cmdBufferAllocation);
setCmdBuffer(cmdBufferAllocation);
}
void CommandContainer::closeAndAllocateNextCommandBuffer() {
auto &gfxCoreHelper = device->getGfxCoreHelper();
auto bbEndSize = gfxCoreHelper.getBatchBufferEndSize();
auto ptr = commandStream->getSpace(0u);
memcpy_s(ptr, bbEndSize, gfxCoreHelper.getBatchBufferEndReference(), bbEndSize);
allocateNextCommandBuffer();
currentLinearStreamStartOffset = 0u;
}
void CommandContainer::prepareBindfulSsh() {
if (ApiSpecificConfig::getBindlessConfiguration()) {
if (allocationIndirectHeaps[IndirectHeap::Type::SURFACE_STATE] == nullptr) {
constexpr size_t heapSize = MemoryConstants::pageSize64k;
allocationIndirectHeaps[IndirectHeap::Type::SURFACE_STATE] = heapHelper->getHeapAllocation(IndirectHeap::Type::SURFACE_STATE,
heapSize,
MemoryConstants::pageSize64k,
device->getRootDeviceIndex());
UNRECOVERABLE_IF(!allocationIndirectHeaps[IndirectHeap::Type::SURFACE_STATE]);
residencyContainer.push_back(allocationIndirectHeaps[IndirectHeap::Type::SURFACE_STATE]);
indirectHeaps[IndirectHeap::Type::SURFACE_STATE] = std::make_unique<IndirectHeap>(allocationIndirectHeaps[IndirectHeap::Type::SURFACE_STATE], false);
indirectHeaps[IndirectHeap::Type::SURFACE_STATE]->getSpace(reservedSshSize);
}
setHeapDirty(IndirectHeap::Type::SURFACE_STATE);
}
}
IndirectHeap *CommandContainer::getIndirectHeap(HeapType heapType) {
if (immediateCmdListSharedHeap(heapType)) {
return heapType == HeapType::SURFACE_STATE ? sharedSshCsrHeap : sharedDshCsrHeap;
} else {
return indirectHeaps[heapType].get();
}
}
void CommandContainer::ensureHeapSizePrepared(size_t sshRequiredSize, size_t dshRequiredSize, bool getDsh) {
if (immediateCmdListCsr) {
auto lock = immediateCmdListCsr->obtainUniqueOwnership();
sharedSshCsrHeap = &immediateCmdListCsr->getIndirectHeap(HeapType::SURFACE_STATE, sshRequiredSize);
if (getDsh) {
sharedDshCsrHeap = &immediateCmdListCsr->getIndirectHeap(HeapType::DYNAMIC_STATE, dshRequiredSize);
}
} else {
this->getHeapWithRequiredSizeAndAlignment(HeapType::SURFACE_STATE, sshRequiredSize, 0);
if (getDsh) {
this->getHeapWithRequiredSizeAndAlignment(HeapType::DYNAMIC_STATE, dshRequiredSize, 0);
}
}
}
GraphicsAllocation *CommandContainer::reuseExistingCmdBuffer() {
size_t alignedSize = alignUp<size_t>(this->getTotalCmdBufferSize(), MemoryConstants::pageSize64k);
auto cmdBufferAllocation = this->immediateReusableAllocationList->detachAllocation(alignedSize, nullptr, this->immediateCmdListCsr, AllocationType::COMMAND_BUFFER).release();
if (!cmdBufferAllocation) {
this->reusableAllocationList->detachAllocation(alignedSize, nullptr, this->immediateCmdListCsr, AllocationType::COMMAND_BUFFER).release();
}
if (cmdBufferAllocation) {
this->cmdBufferAllocations.push_back(cmdBufferAllocation);
}
return cmdBufferAllocation;
}
void CommandContainer::addCurrentCommandBufferToReusableAllocationList() {
this->cmdBufferAllocations.erase(std::find(this->cmdBufferAllocations.begin(), this->cmdBufferAllocations.end(), this->commandStream->getGraphicsAllocation()));
this->storeAllocationAndFlushTagUpdate(this->commandStream->getGraphicsAllocation());
}
void CommandContainer::setCmdBuffer(GraphicsAllocation *cmdBuffer) {
size_t alignedSize = alignUp<size_t>(this->getTotalCmdBufferSize(), MemoryConstants::pageSize64k);
commandStream->replaceBuffer(cmdBuffer->getUnderlyingBuffer(), alignedSize - cmdBufferReservedSize);
commandStream->replaceGraphicsAllocation(cmdBuffer);
if (!getFlushTaskUsedForImmediate()) {
addToResidencyContainer(cmdBuffer);
}
}
GraphicsAllocation *CommandContainer::allocateCommandBuffer() {
size_t alignedSize = alignUp<size_t>(this->getTotalCmdBufferSize(), MemoryConstants::pageSize64k);
AllocationProperties properties{device->getRootDeviceIndex(),
true /* allocateMemory*/,
alignedSize,
AllocationType::COMMAND_BUFFER,
(device->getNumGenericSubDevices() > 1u) /* multiOsContextCapable */,
false,
device->getDeviceBitfield()};
return device->getMemoryManager()->allocateGraphicsMemoryWithProperties(properties);
}
void CommandContainer::fillReusableAllocationLists() {
this->immediateReusableAllocationList = std::make_unique<NEO::AllocationsList>();
const auto &hardwareInfo = device->getHardwareInfo();
auto &gfxCoreHelper = device->getGfxCoreHelper();
auto amountToFill = gfxCoreHelper.getAmountOfAllocationsToFill();
if (amountToFill == 0u) {
return;
}
for (auto i = 0u; i < amountToFill; i++) {
auto allocToReuse = this->allocateCommandBuffer();
this->immediateReusableAllocationList->pushTailOne(*allocToReuse);
this->getResidencyContainer().push_back(allocToReuse);
}
if (!this->heapHelper) {
return;
}
constexpr size_t heapSize = 65536u;
size_t alignedSize = alignUp<size_t>(this->getTotalCmdBufferSize(), MemoryConstants::pageSize64k);
for (auto i = 0u; i < amountToFill; i++) {
for (auto heapType = 0u; heapType < IndirectHeap::Type::NUM_TYPES; heapType++) {
if (NEO::ApiSpecificConfig::getBindlessConfiguration() && heapType != IndirectHeap::Type::INDIRECT_OBJECT) {
continue;
}
if (!hardwareInfo.capabilityTable.supportsImages && IndirectHeap::Type::DYNAMIC_STATE == heapType) {
continue;
}
if (immediateCmdListSharedHeap(static_cast<HeapType>(heapType))) {
continue;
}
auto heapToReuse = heapHelper->getHeapAllocation(heapType,
heapSize,
alignedSize,
device->getRootDeviceIndex());
if (heapToReuse != nullptr) {
this->immediateCmdListCsr->makeResident(*heapToReuse);
}
this->heapHelper->storeHeapAllocation(heapToReuse);
}
}
}
void CommandContainer::storeAllocationAndFlushTagUpdate(GraphicsAllocation *allocation) {
auto lock = this->immediateCmdListCsr->obtainUniqueOwnership();
auto taskCount = this->immediateCmdListCsr->peekTaskCount() + 1;
auto osContextId = this->immediateCmdListCsr->getOsContext().getContextId();
allocation->updateTaskCount(taskCount, osContextId);
allocation->updateResidencyTaskCount(taskCount, osContextId);
if (allocation->getAllocationType() == AllocationType::COMMAND_BUFFER) {
this->immediateReusableAllocationList->pushTailOne(*allocation);
} else {
getHeapHelper()->storeHeapAllocation(allocation);
}
this->immediateCmdListCsr->flushTagUpdate();
}
} // namespace NEO