/* * Copyright (C) 2019-2023 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/command_container/cmdcontainer.h" #include "shared/source/command_container/command_encoder.h" #include "shared/source/command_stream/command_stream_receiver.h" #include "shared/source/command_stream/linear_stream.h" #include "shared/source/device/device.h" #include "shared/source/helpers/api_specific_config.h" #include "shared/source/helpers/debug_helpers.h" #include "shared/source/helpers/gfx_core_helper.h" #include "shared/source/helpers/heap_helper.h" #include "shared/source/helpers/hw_info.h" #include "shared/source/helpers/string.h" #include "shared/source/indirect_heap/indirect_heap.h" #include "shared/source/memory_manager/allocation_properties.h" #include "shared/source/memory_manager/allocations_list.h" #include "shared/source/memory_manager/memory_manager.h" #include "shared/source/os_interface/os_context.h" namespace NEO { CommandContainer::~CommandContainer() { if (!device) { DEBUG_BREAK_IF(device); return; } this->handleCmdBufferAllocations(0u); for (auto allocationIndirectHeap : allocationIndirectHeaps) { if (heapHelper) { heapHelper->storeHeapAllocation(allocationIndirectHeap); } } for (auto deallocation : deallocationContainer) { if (((deallocation->getAllocationType() == AllocationType::INTERNAL_HEAP) || (deallocation->getAllocationType() == AllocationType::LINEAR_STREAM))) { getHeapHelper()->storeHeapAllocation(deallocation); } } } CommandContainer::CommandContainer() { for (auto &indirectHeap : indirectHeaps) { indirectHeap = nullptr; } for (auto &allocationIndirectHeap : allocationIndirectHeaps) { allocationIndirectHeap = nullptr; } residencyContainer.reserve(startingResidencyContainerSize); if (DebugManager.flags.RemoveUserFenceInCmdlistResetAndDestroy.get() != -1) { isHandleFenceCompletionRequired = !static_cast(DebugManager.flags.RemoveUserFenceInCmdlistResetAndDestroy.get()); } } CommandContainer::CommandContainer(uint32_t maxNumAggregatedIdds) : CommandContainer() { numIddsPerBlock = maxNumAggregatedIdds; } CommandContainer::ErrorCode CommandContainer::initialize(Device *device, AllocationsList *reusableAllocationList, bool requireHeaps, bool createSecondaryCmdBufferInHostMem) { this->device = device; this->reusableAllocationList = reusableAllocationList; size_t alignedSize = alignUp(this->getTotalCmdBufferSize(), MemoryConstants::pageSize64k); auto cmdBufferAllocation = this->obtainNextCommandBufferAllocation(); if (!cmdBufferAllocation) { return ErrorCode::OUT_OF_DEVICE_MEMORY; } cmdBufferAllocations.push_back(cmdBufferAllocation); auto &gfxCoreHelper = device->getGfxCoreHelper(); commandStream = std::make_unique(cmdBufferAllocation->getUnderlyingBuffer(), alignedSize - cmdBufferReservedSize, this, gfxCoreHelper.getBatchBufferEndSize()); commandStream->replaceGraphicsAllocation(cmdBufferAllocation); if (createSecondaryCmdBufferInHostMem) { this->useSecondaryCommandStream = true; auto cmdBufferAllocationHost = this->obtainNextCommandBufferAllocation(true); if (!cmdBufferAllocationHost) { return ErrorCode::OUT_OF_DEVICE_MEMORY; } secondaryCommandStreamForImmediateCmdList = std::make_unique(cmdBufferAllocationHost->getUnderlyingBuffer(), alignedSize - cmdBufferReservedSize, this, gfxCoreHelper.getBatchBufferEndSize()); secondaryCommandStreamForImmediateCmdList->replaceGraphicsAllocation(cmdBufferAllocationHost); cmdBufferAllocations.push_back(cmdBufferAllocationHost); addToResidencyContainer(cmdBufferAllocationHost); } addToResidencyContainer(cmdBufferAllocation); if (requireHeaps) { size_t heapSize = 65536u; if (DebugManager.flags.ForceDefaultHeapSize.get() != -1) { heapSize = DebugManager.flags.ForceDefaultHeapSize.get() * MemoryConstants::kiloByte; } heapHelper = std::unique_ptr(new HeapHelper(device->getMemoryManager(), device->getDefaultEngine().commandStreamReceiver->getInternalAllocationStorage(), device->getNumGenericSubDevices() > 1u)); for (uint32_t i = 0; i < IndirectHeap::Type::NUM_TYPES; i++) { if (skipHeapAllocationCreation(static_cast(i))) { continue; } allocationIndirectHeaps[i] = heapHelper->getHeapAllocation(i, heapSize, alignedSize, device->getRootDeviceIndex()); if (!allocationIndirectHeaps[i]) { return ErrorCode::OUT_OF_DEVICE_MEMORY; } residencyContainer.push_back(allocationIndirectHeaps[i]); bool requireInternalHeap = false; if (IndirectHeap::Type::INDIRECT_OBJECT == i) { requireInternalHeap = true; indirectHeapInLocalMemory = allocationIndirectHeaps[i]->isAllocatedInLocalMemoryPool(); } indirectHeaps[i] = std::make_unique(allocationIndirectHeaps[i], requireInternalHeap); if (i == IndirectHeap::Type::SURFACE_STATE) { indirectHeaps[i]->getSpace(reservedSshSize); } } indirectObjectHeapBaseAddress = device->getMemoryManager()->getInternalHeapBaseAddress(device->getRootDeviceIndex(), indirectHeapInLocalMemory); instructionHeapBaseAddress = device->getMemoryManager()->getInternalHeapBaseAddress(device->getRootDeviceIndex(), device->getMemoryManager()->isLocalMemoryUsedForIsa(device->getRootDeviceIndex())); iddBlock = nullptr; nextIddInBlock = this->getNumIddPerBlock(); } return ErrorCode::SUCCESS; } void CommandContainer::addToResidencyContainer(GraphicsAllocation *alloc) { if (alloc == nullptr) { return; } this->residencyContainer.push_back(alloc); } bool CommandContainer::swapStreams() { if (this->useSecondaryCommandStream) { this->commandStream.swap(this->secondaryCommandStreamForImmediateCmdList); return true; } return false; } void CommandContainer::removeDuplicatesFromResidencyContainer() { std::sort(this->residencyContainer.begin(), this->residencyContainer.end()); this->residencyContainer.erase(std::unique(this->residencyContainer.begin(), this->residencyContainer.end()), this->residencyContainer.end()); } void CommandContainer::reset() { setDirtyStateForAllHeaps(true); slmSize = std::numeric_limits::max(); getResidencyContainer().clear(); getDeallocationContainer().clear(); sshAllocations.clear(); this->handleCmdBufferAllocations(1u); cmdBufferAllocations.erase(cmdBufferAllocations.begin() + 1, cmdBufferAllocations.end()); auto cmdlistCmdBufferSize = defaultListCmdBufferSize; if (DebugManager.flags.OverrideCmdListCmdBufferSizeInKb.get() > 0) { cmdlistCmdBufferSize = static_cast(DebugManager.flags.OverrideCmdListCmdBufferSizeInKb.get()) * MemoryConstants::kiloByte; } commandStream->replaceBuffer(cmdBufferAllocations[0]->getUnderlyingBuffer(), cmdlistCmdBufferSize); commandStream->replaceGraphicsAllocation(cmdBufferAllocations[0]); addToResidencyContainer(commandStream->getGraphicsAllocation()); for (auto &indirectHeap : indirectHeaps) { if (indirectHeap != nullptr) { indirectHeap->replaceBuffer(indirectHeap->getCpuBase(), indirectHeap->getMaxAvailableSpace()); addToResidencyContainer(indirectHeap->getGraphicsAllocation()); } } if (indirectHeaps[IndirectHeap::Type::SURFACE_STATE] != nullptr) { indirectHeaps[IndirectHeap::Type::SURFACE_STATE]->getSpace(reservedSshSize); } iddBlock = nullptr; nextIddInBlock = this->getNumIddPerBlock(); lastPipelineSelectModeRequired = false; lastSentUseGlobalAtomics = false; } size_t CommandContainer::getTotalCmdBufferSize() { auto totalCommandBufferSize = totalCmdBufferSize; if (DebugManager.flags.OverrideCmdListCmdBufferSizeInKb.get() > 0) { totalCommandBufferSize = static_cast(DebugManager.flags.OverrideCmdListCmdBufferSizeInKb.get()) * MemoryConstants::kiloByte; totalCommandBufferSize += cmdBufferReservedSize; } return totalCommandBufferSize; } void *CommandContainer::getHeapSpaceAllowGrow(HeapType heapType, size_t size) { return getHeapWithRequiredSize(heapType, size, 0, true)->getSpace(size); } IndirectHeap *CommandContainer::getHeapWithRequiredSizeAndAlignment(HeapType heapType, size_t sizeRequired, size_t alignment) { return getHeapWithRequiredSize(heapType, sizeRequired, alignment, false); } IndirectHeap *CommandContainer::getHeapWithRequiredSize(HeapType heapType, size_t sizeRequired, size_t alignment, bool allowGrow) { auto indirectHeap = getIndirectHeap(heapType); UNRECOVERABLE_IF(indirectHeap == nullptr); auto sizeRequested = sizeRequired; auto heapBuffer = indirectHeap->getSpace(0); if (alignment && (heapBuffer != alignUp(heapBuffer, alignment))) { sizeRequested += alignment; } if (immediateCmdListSharedHeap(heapType)) { UNRECOVERABLE_IF(indirectHeap->getAvailableSpace() < sizeRequested); } else { if (indirectHeap->getAvailableSpace() < sizeRequested) { size_t newSize = indirectHeap->getUsed() + indirectHeap->getAvailableSpace(); if (allowGrow) { newSize = std::max(newSize, indirectHeap->getAvailableSpace() + sizeRequested); } newSize = alignUp(newSize, MemoryConstants::pageSize); auto oldAlloc = getIndirectHeapAllocation(heapType); this->createAndAssignNewHeap(heapType, newSize); if (heapType == HeapType::SURFACE_STATE) { indirectHeap->getSpace(reservedSshSize); sshAllocations.push_back(oldAlloc); } } } if (alignment) { indirectHeap->align(alignment); } return indirectHeap; } void CommandContainer::createAndAssignNewHeap(HeapType heapType, size_t size) { auto indirectHeap = getIndirectHeap(heapType); auto oldAlloc = getIndirectHeapAllocation(heapType); auto newAlloc = getHeapHelper()->getHeapAllocation(heapType, size, MemoryConstants::pageSize, device->getRootDeviceIndex()); UNRECOVERABLE_IF(!oldAlloc); UNRECOVERABLE_IF(!newAlloc); auto oldBase = indirectHeap->getHeapGpuBase(); indirectHeap->replaceGraphicsAllocation(newAlloc); indirectHeap->replaceBuffer(newAlloc->getUnderlyingBuffer(), newAlloc->getUnderlyingBufferSize()); auto newBase = indirectHeap->getHeapGpuBase(); getResidencyContainer().push_back(newAlloc); if (this->immediateCmdListCsr) { this->storeAllocationAndFlushTagUpdate(oldAlloc); } else { getDeallocationContainer().push_back(oldAlloc); } setIndirectHeapAllocation(heapType, newAlloc); if (oldBase != newBase) { setHeapDirty(heapType); } } void CommandContainer::handleCmdBufferAllocations(size_t startIndex) { if (immediateReusableAllocationList != nullptr && !immediateReusableAllocationList->peekIsEmpty() && reusableAllocationList != nullptr) { reusableAllocationList->splice(*immediateReusableAllocationList->detachNodes()); } for (size_t i = startIndex; i < cmdBufferAllocations.size(); i++) { if (this->reusableAllocationList) { if (isHandleFenceCompletionRequired) { this->device->getMemoryManager()->handleFenceCompletion(cmdBufferAllocations[i]); } reusableAllocationList->pushFrontOne(*cmdBufferAllocations[i]); } else { this->device->getMemoryManager()->freeGraphicsMemory(cmdBufferAllocations[i]); } } } GraphicsAllocation *CommandContainer::obtainNextCommandBufferAllocation() { return this->obtainNextCommandBufferAllocation(false); } GraphicsAllocation *CommandContainer::obtainNextCommandBufferAllocation(bool forceHostMemory) { forceHostMemory &= this->useSecondaryCommandStream; GraphicsAllocation *cmdBufferAllocation = nullptr; if (this->reusableAllocationList) { size_t alignedSize = alignUp(this->getTotalCmdBufferSize(), MemoryConstants::pageSize64k); cmdBufferAllocation = this->reusableAllocationList->detachAllocation(alignedSize, nullptr, forceHostMemory, nullptr, AllocationType::COMMAND_BUFFER).release(); } if (!cmdBufferAllocation) { cmdBufferAllocation = this->allocateCommandBuffer(forceHostMemory); } return cmdBufferAllocation; } void CommandContainer::allocateNextCommandBuffer() { auto cmdBufferAllocation = this->obtainNextCommandBufferAllocation(); UNRECOVERABLE_IF(!cmdBufferAllocation); cmdBufferAllocations.push_back(cmdBufferAllocation); setCmdBuffer(cmdBufferAllocation); } void CommandContainer::closeAndAllocateNextCommandBuffer() { auto &gfxCoreHelper = device->getGfxCoreHelper(); auto bbEndSize = gfxCoreHelper.getBatchBufferEndSize(); auto ptr = commandStream->getSpace(0u); memcpy_s(ptr, bbEndSize, gfxCoreHelper.getBatchBufferEndReference(), bbEndSize); allocateNextCommandBuffer(); currentLinearStreamStartOffset = 0u; } void CommandContainer::prepareBindfulSsh() { if (ApiSpecificConfig::getBindlessConfiguration()) { if (allocationIndirectHeaps[IndirectHeap::Type::SURFACE_STATE] == nullptr) { constexpr size_t heapSize = MemoryConstants::pageSize64k; allocationIndirectHeaps[IndirectHeap::Type::SURFACE_STATE] = heapHelper->getHeapAllocation(IndirectHeap::Type::SURFACE_STATE, heapSize, MemoryConstants::pageSize64k, device->getRootDeviceIndex()); UNRECOVERABLE_IF(!allocationIndirectHeaps[IndirectHeap::Type::SURFACE_STATE]); residencyContainer.push_back(allocationIndirectHeaps[IndirectHeap::Type::SURFACE_STATE]); indirectHeaps[IndirectHeap::Type::SURFACE_STATE] = std::make_unique(allocationIndirectHeaps[IndirectHeap::Type::SURFACE_STATE], false); indirectHeaps[IndirectHeap::Type::SURFACE_STATE]->getSpace(reservedSshSize); } setHeapDirty(IndirectHeap::Type::SURFACE_STATE); } } IndirectHeap *CommandContainer::getIndirectHeap(HeapType heapType) { if (immediateCmdListSharedHeap(heapType)) { return heapType == HeapType::SURFACE_STATE ? sharedSshCsrHeap : sharedDshCsrHeap; } else { return indirectHeaps[heapType].get(); } } IndirectHeap *CommandContainer::initIndirectHeapReservation(ReservedIndirectHeap *indirectHeapReservation, size_t size, size_t alignment, HeapType heapType) { void *currentHeap = immediateCmdListCsr->getIndirectHeapCurrentPtr(heapType); auto totalRequiredSize = size + ptrDiff(alignUp(currentHeap, alignment), currentHeap); auto baseHeap = &immediateCmdListCsr->getIndirectHeap(heapType, totalRequiredSize); auto usedSize = baseHeap->getUsed(); void *heapCpuBase = baseHeap->getCpuBase(); auto consumedSize = usedSize + totalRequiredSize; baseHeap->getSpace(totalRequiredSize); indirectHeapReservation->replaceGraphicsAllocation(baseHeap->getGraphicsAllocation()); indirectHeapReservation->replaceBuffer(heapCpuBase, consumedSize); indirectHeapReservation->getSpace(usedSize); indirectHeapReservation->setHeapSizeInPages(baseHeap->getHeapSizeInPages()); return baseHeap; } void CommandContainer::reserveSpaceForDispatch(HeapReserveArguments &sshReserveArg, HeapReserveArguments &dshReserveArg, bool getDsh) { size_t sshAlignment = sshReserveArg.alignment; size_t dshAlignment = dshReserveArg.alignment; if (sshReserveArg.size == 0) { sshAlignment = 1; } if (dshReserveArg.size == 0) { dshAlignment = 1; } if (immediateCmdListCsr) { auto lock = immediateCmdListCsr->obtainUniqueOwnership(); sharedSshCsrHeap = this->initIndirectHeapReservation(sshReserveArg.indirectHeapReservation, sshReserveArg.size, sshAlignment, HeapType::SURFACE_STATE); if (getDsh) { sharedDshCsrHeap = this->initIndirectHeapReservation(dshReserveArg.indirectHeapReservation, dshReserveArg.size, dshAlignment, HeapType::DYNAMIC_STATE); } } else { this->getHeapWithRequiredSizeAndAlignment(HeapType::SURFACE_STATE, sshReserveArg.size, sshAlignment); if (getDsh) { this->getHeapWithRequiredSizeAndAlignment(HeapType::DYNAMIC_STATE, dshReserveArg.size, dshAlignment); } // private heaps can be accessed directly sshReserveArg.indirectHeapReservation = nullptr; dshReserveArg.indirectHeapReservation = nullptr; } } GraphicsAllocation *CommandContainer::reuseExistingCmdBuffer() { return this->reuseExistingCmdBuffer(false); } GraphicsAllocation *CommandContainer::reuseExistingCmdBuffer(bool forceHostMemory) { forceHostMemory &= this->useSecondaryCommandStream; size_t alignedSize = alignUp(this->getTotalCmdBufferSize(), MemoryConstants::pageSize64k); auto cmdBufferAllocation = this->immediateReusableAllocationList->detachAllocation(alignedSize, nullptr, forceHostMemory, this->immediateCmdListCsr, AllocationType::COMMAND_BUFFER).release(); if (!cmdBufferAllocation) { this->reusableAllocationList->detachAllocation(alignedSize, nullptr, forceHostMemory, this->immediateCmdListCsr, AllocationType::COMMAND_BUFFER).release(); } if (cmdBufferAllocation) { this->cmdBufferAllocations.push_back(cmdBufferAllocation); } return cmdBufferAllocation; } void CommandContainer::addCurrentCommandBufferToReusableAllocationList() { this->cmdBufferAllocations.erase(std::find(this->cmdBufferAllocations.begin(), this->cmdBufferAllocations.end(), this->commandStream->getGraphicsAllocation())); this->storeAllocationAndFlushTagUpdate(this->commandStream->getGraphicsAllocation()); } void CommandContainer::setCmdBuffer(GraphicsAllocation *cmdBuffer) { size_t alignedSize = alignUp(this->getTotalCmdBufferSize(), MemoryConstants::pageSize64k); commandStream->replaceBuffer(cmdBuffer->getUnderlyingBuffer(), alignedSize - cmdBufferReservedSize); commandStream->replaceGraphicsAllocation(cmdBuffer); if (!getFlushTaskUsedForImmediate()) { addToResidencyContainer(cmdBuffer); } } GraphicsAllocation *CommandContainer::allocateCommandBuffer() { return this->allocateCommandBuffer(false); } GraphicsAllocation *CommandContainer::allocateCommandBuffer(bool forceHostMemory) { size_t alignedSize = alignUp(this->getTotalCmdBufferSize(), MemoryConstants::pageSize64k); AllocationProperties properties{device->getRootDeviceIndex(), true /* allocateMemory*/, alignedSize, AllocationType::COMMAND_BUFFER, (device->getNumGenericSubDevices() > 1u) /* multiOsContextCapable */, false, device->getDeviceBitfield()}; properties.flags.forceSystemMemory = forceHostMemory && this->useSecondaryCommandStream; return device->getMemoryManager()->allocateGraphicsMemoryWithProperties(properties); } void CommandContainer::fillReusableAllocationLists() { if (this->immediateReusableAllocationList) { return; } this->immediateReusableAllocationList = std::make_unique(); auto &gfxCoreHelper = device->getGfxCoreHelper(); auto amountToFill = gfxCoreHelper.getAmountOfAllocationsToFill(); if (amountToFill == 0u) { return; } for (auto i = 0u; i < amountToFill; i++) { auto allocToReuse = this->allocateCommandBuffer(); this->immediateReusableAllocationList->pushTailOne(*allocToReuse); this->getResidencyContainer().push_back(allocToReuse); if (this->useSecondaryCommandStream) { auto hostAllocToReuse = this->allocateCommandBuffer(true); this->immediateReusableAllocationList->pushTailOne(*hostAllocToReuse); this->getResidencyContainer().push_back(hostAllocToReuse); } } if (!this->heapHelper) { return; } constexpr size_t heapSize = 65536u; size_t alignedSize = alignUp(this->getTotalCmdBufferSize(), MemoryConstants::pageSize64k); for (auto i = 0u; i < amountToFill; i++) { for (auto heapType = 0u; heapType < IndirectHeap::Type::NUM_TYPES; heapType++) { if (skipHeapAllocationCreation(static_cast(heapType))) { continue; } auto heapToReuse = heapHelper->getHeapAllocation(heapType, heapSize, alignedSize, device->getRootDeviceIndex()); if (heapToReuse != nullptr) { this->immediateCmdListCsr->makeResident(*heapToReuse); } this->heapHelper->storeHeapAllocation(heapToReuse); } } } void CommandContainer::storeAllocationAndFlushTagUpdate(GraphicsAllocation *allocation) { auto lock = this->immediateCmdListCsr->obtainUniqueOwnership(); auto taskCount = this->immediateCmdListCsr->peekTaskCount() + 1; auto osContextId = this->immediateCmdListCsr->getOsContext().getContextId(); allocation->updateTaskCount(taskCount, osContextId); allocation->updateResidencyTaskCount(taskCount, osContextId); if (allocation->getAllocationType() == AllocationType::COMMAND_BUFFER) { this->immediateReusableAllocationList->pushTailOne(*allocation); } else { getHeapHelper()->storeHeapAllocation(allocation); } this->immediateCmdListCsr->flushTagUpdate(); } HeapReserveData::HeapReserveData() { object = std::make_unique(nullptr, false); indirectHeapReservation = object.get(); } HeapReserveData::~HeapReserveData() { } bool CommandContainer::skipHeapAllocationCreation(HeapType heapType) { if (heapType == IndirectHeap::Type::INDIRECT_OBJECT) { return false; } const auto &hardwareInfo = this->device->getHardwareInfo(); bool skipCreation = NEO::ApiSpecificConfig::getBindlessConfiguration() || this->immediateCmdListSharedHeap(heapType) || (!hardwareInfo.capabilityTable.supportsImages && IndirectHeap::Type::DYNAMIC_STATE == heapType) || (this->heapAddressModel != HeapAddressModel::PrivateHeaps); return skipCreation; } } // namespace NEO