/* * Copyright (C) 2019-2023 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/helpers/constants.h" #include "shared/source/helpers/non_copyable_or_moveable.h" #include "shared/source/indirect_heap/indirect_heap_type.h" #include #include #include #include namespace NEO { class HeapHelper; class CommandStreamReceiver; class Device; class GraphicsAllocation; class LinearStream; class AllocationsList; class IndirectHeap; using ResidencyContainer = std::vector; using CmdBufferContainer = std::vector; using HeapContainer = std::vector; using HeapType = IndirectHeapType; namespace CSRequirements { // cleanup section usually contains 1-2 pipeControls BB end and place for BB start // that makes 16 * 2 + 4 + 8 = 40 bytes // then command buffer is aligned to cacheline that can take up to 63 bytes // to be sure everything fits minimal size is at 2 x cacheline. inline constexpr auto minCommandQueueCommandStreamSize = 2 * MemoryConstants::cacheLineSize; inline constexpr auto csOverfetchSize = MemoryConstants::pageSize; } // namespace CSRequirements class CommandContainer : public NonCopyableOrMovableClass { public: enum class ErrorCode { SUCCESS = 0, OUT_OF_DEVICE_MEMORY = 1 }; static constexpr size_t defaultListCmdBufferSize = 1u * MemoryConstants ::megaByte; static constexpr size_t cmdBufferReservedSize = MemoryConstants::cacheLineSize + CSRequirements::csOverfetchSize; static constexpr size_t totalCmdBufferSize = defaultListCmdBufferSize + cmdBufferReservedSize; static constexpr size_t startingResidencyContainerSize = 128; CommandContainer(); CommandContainer(uint32_t maxNumAggregatedIdds); CmdBufferContainer &getCmdBufferAllocations() { return cmdBufferAllocations; } ResidencyContainer &getResidencyContainer() { return residencyContainer; } std::vector &getDeallocationContainer() { return deallocationContainer; } void addToResidencyContainer(GraphicsAllocation *alloc); void removeDuplicatesFromResidencyContainer(); LinearStream *getCommandStream() { return commandStream.get(); } IndirectHeap *getIndirectHeap(HeapType heapType); HeapHelper *getHeapHelper() { return heapHelper.get(); } GraphicsAllocation *getIndirectHeapAllocation(HeapType heapType) { return allocationIndirectHeaps[heapType]; } void setIndirectHeapAllocation(HeapType heapType, GraphicsAllocation *allocation) { allocationIndirectHeaps[heapType] = allocation; } uint64_t getInstructionHeapBaseAddress() const { return instructionHeapBaseAddress; } uint64_t getIndirectObjectHeapBaseAddress() const { return indirectObjectHeapBaseAddress; } void *getHeapSpaceAllowGrow(HeapType heapType, size_t size); ErrorCode initialize(Device *device, AllocationsList *reusableAllocationList, bool requireHeaps); void prepareBindfulSsh(); virtual ~CommandContainer(); Device *getDevice() const { return device; } MOCKABLE_VIRTUAL IndirectHeap *getHeapWithRequiredSizeAndAlignment(HeapType heapType, size_t sizeRequired, size_t alignment); void allocateNextCommandBuffer(); void closeAndAllocateNextCommandBuffer(); void handleCmdBufferAllocations(size_t startIndex); GraphicsAllocation *obtainNextCommandBufferAllocation(); void reset(); bool isHeapDirty(HeapType heapType) const { return (dirtyHeaps & (1u << heapType)); } bool isAnyHeapDirty() const { return dirtyHeaps != 0; } void setHeapDirty(HeapType heapType) { dirtyHeaps |= (1u << heapType); } void setDirtyStateForAllHeaps(bool dirty) { dirtyHeaps = dirty ? std::numeric_limits::max() : 0; } void setIddBlock(void *iddBlock) { this->iddBlock = iddBlock; } void *getIddBlock() { return iddBlock; } uint32_t getNumIddPerBlock() const { return numIddsPerBlock; } void setNumIddPerBlock(uint32_t value) { numIddsPerBlock = value; } void setReservedSshSize(size_t reserveSize) { reservedSshSize = reserveSize; } bool getFlushTaskUsedForImmediate() const { return isFlushTaskUsedForImmediate; } void setFlushTaskUsedForImmediate(bool flushTaskUsedForImmediate) { isFlushTaskUsedForImmediate = flushTaskUsedForImmediate; } void setImmediateCmdListCsr(CommandStreamReceiver *newValue) { this->immediateCmdListCsr = newValue; } void enableHeapSharing() { heapSharingEnabled = true; } bool immediateCmdListSharedHeap(HeapType heapType) { return (heapSharingEnabled && (heapType == HeapType::DYNAMIC_STATE || heapType == HeapType::SURFACE_STATE)); } void ensureHeapSizePrepared(size_t sshRequiredSize, size_t dshRequiredSize); GraphicsAllocation *reuseExistingCmdBuffer(); GraphicsAllocation *allocateCommandBuffer(); void setCmdBuffer(GraphicsAllocation *cmdBuffer); void addCurrentCommandBufferToReusableAllocationList(); void fillReusableAllocationLists(); void storeAllocationAndFlushTagUpdate(GraphicsAllocation *allocation); HeapContainer sshAllocations; uint64_t currentLinearStreamStartOffset = 0u; uint32_t slmSize = std::numeric_limits::max(); uint32_t nextIddInBlock = 0; bool lastPipelineSelectModeRequired = false; bool lastSentUseGlobalAtomics = false; bool systolicModeSupport = false; protected: size_t getTotalCmdBufferSize(); IndirectHeap *getHeapWithRequiredSize(HeapType heapType, size_t sizeRequired, size_t alignment, bool allowGrow); void createAndAssignNewHeap(HeapType heapType, size_t size); GraphicsAllocation *allocationIndirectHeaps[HeapType::NUM_TYPES] = {}; std::unique_ptr indirectHeaps[HeapType::NUM_TYPES]; CmdBufferContainer cmdBufferAllocations; ResidencyContainer residencyContainer; std::vector deallocationContainer; std::unique_ptr heapHelper; std::unique_ptr commandStream; uint64_t instructionHeapBaseAddress = 0u; uint64_t indirectObjectHeapBaseAddress = 0u; void *iddBlock = nullptr; Device *device = nullptr; AllocationsList *reusableAllocationList = nullptr; std::unique_ptr immediateReusableAllocationList; size_t reservedSshSize = 0; CommandStreamReceiver *immediateCmdListCsr = nullptr; IndirectHeap *sharedSshCsrHeap = nullptr; IndirectHeap *sharedDshCsrHeap = nullptr; uint32_t dirtyHeaps = std::numeric_limits::max(); uint32_t numIddsPerBlock = 64; bool isFlushTaskUsedForImmediate = false; bool isHandleFenceCompletionRequired = false; bool heapSharingEnabled = false; }; } // namespace NEO