/* * Copyright (C) 2023-2025 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/helpers/common_types.h" #include "shared/source/helpers/non_copyable_or_moveable.h" #include "shared/source/helpers/ptr_math.h" #include "shared/source/memory_manager/allocation_type.h" #include #include #include #include namespace NEO { class GraphicsAllocation; class MemoryManager; class Device; class TagNodeBase; template class DeviceAllocNodeType { public: using ValueT = uint64_t; static constexpr size_t defaultAllocatorTagCount = 128; static constexpr AllocationType getAllocationType() { return deviceAlloc ? NEO::AllocationType::gpuTimestampDeviceBuffer : NEO::AllocationType::timestampPacketTagBuffer; } static constexpr TagNodeType getTagNodeType() { return TagNodeType::counter64b; } static constexpr size_t getSinglePacketSize() { return sizeof(uint64_t); } void initialize(uint64_t initValue) { data = initValue; } protected: uint64_t data = {}; }; static_assert(sizeof(uint64_t) == sizeof(DeviceAllocNodeType), "This structure is consumed by GPU and has to follow specific restrictions for padding and size"); static_assert(sizeof(uint64_t) == sizeof(DeviceAllocNodeType), "This structure is consumed by GPU and has to follow specific restrictions for padding and size"); class InOrderExecInfo : public NEO::NonCopyableClass { public: ~InOrderExecInfo(); InOrderExecInfo() = delete; static std::shared_ptr create(TagNodeBase *deviceCounterNode, TagNodeBase *hostCounterNode, NEO::Device &device, uint32_t partitionCount, bool regularCmdList); static std::shared_ptr createFromExternalAllocation(NEO::Device &device, NEO::GraphicsAllocation *deviceAllocation, uint64_t deviceAddress, NEO::GraphicsAllocation *hostAllocation, uint64_t *hostAddress, uint64_t counterValue, uint32_t devicePartitions, uint32_t hostPartitions); InOrderExecInfo(TagNodeBase *deviceCounterNode, TagNodeBase *hostCounterNode, NEO::Device &device, uint32_t partitionCount, bool regularCmdList, bool atomicDeviceSignalling); NEO::GraphicsAllocation *getDeviceCounterAllocation() const; NEO::GraphicsAllocation *getHostCounterAllocation() const; uint64_t *getBaseHostAddress() const { return hostAddress; } uint64_t getBaseDeviceAddress() const { return deviceAddress; } uint64_t getBaseHostGpuAddress() const; uint64_t getCounterValue() const { return counterValue; } void addCounterValue(uint64_t addValue) { counterValue += addValue; } void resetCounterValue() { counterValue = 0; } uint64_t getRegularCmdListSubmissionCounter() const { return regularCmdListSubmissionCounter; } void addRegularCmdListSubmissionCounter(uint64_t addValue) { regularCmdListSubmissionCounter += addValue; } bool isRegularCmdList() const { return regularCmdList; } bool isHostStorageDuplicated() const { return duplicatedHostStorage; } bool isAtomicDeviceSignalling() const { return atomicDeviceSignalling; } uint32_t getNumDevicePartitionsToWait() const { return numDevicePartitionsToWait; } uint32_t getNumHostPartitionsToWait() const { return numHostPartitionsToWait; } void setAllocationOffset(uint32_t newOffset) { allocationOffset = newOffset; } void initializeAllocationsFromHost(); uint32_t getAllocationOffset() const { return allocationOffset; } void reset(); bool isExternalMemoryExecInfo() const { return deviceCounterNode == nullptr; } void setLastWaitedCounterValue(uint64_t value) { if (!isExternalMemoryExecInfo()) { lastWaitedCounterValue = std::max(value, lastWaitedCounterValue); } } bool isCounterAlreadyDone(uint64_t waitValue) const { return lastWaitedCounterValue >= waitValue && this->allocationOffset == 0u; } NEO::GraphicsAllocation *getExternalHostAllocation() const { return externalHostAllocation; } NEO::GraphicsAllocation *getExternalDeviceAllocation() const { return externalDeviceAllocation; } void pushTempTimestampNode(TagNodeBase *node, uint64_t value); void releaseNotUsedTempTimestampNodes(bool forceReturn); protected: void uploadToTbx(TagNodeBase &node, size_t size); NEO::Device &device; NEO::TagNodeBase *deviceCounterNode = nullptr; NEO::TagNodeBase *hostCounterNode = nullptr; NEO::GraphicsAllocation *externalHostAllocation = nullptr; NEO::GraphicsAllocation *externalDeviceAllocation = nullptr; std::vector> tempTimestampNodes; std::mutex mutex; uint64_t counterValue = 0; uint64_t lastWaitedCounterValue = 0; uint64_t regularCmdListSubmissionCounter = 0; uint64_t deviceAddress = 0; uint64_t *hostAddress = nullptr; uint32_t numDevicePartitionsToWait = 0; uint32_t numHostPartitionsToWait = 0; uint32_t allocationOffset = 0; uint32_t rootDeviceIndex = 0; bool regularCmdList = false; bool duplicatedHostStorage = false; bool atomicDeviceSignalling = false; bool isTbx = false; }; namespace InOrderPatchCommandHelpers { inline uint64_t getAppendCounterValue(const InOrderExecInfo &inOrderExecInfo) { if (inOrderExecInfo.isRegularCmdList() && inOrderExecInfo.getRegularCmdListSubmissionCounter() > 1) { return inOrderExecInfo.getCounterValue() * (inOrderExecInfo.getRegularCmdListSubmissionCounter() - 1); } return 0; } enum class PatchCmdType { none, lri64b, sdi, semaphore, walker, pipeControl }; template struct PatchCmd { PatchCmd(std::shared_ptr *inOrderExecInfo, void *cmd1, void *cmd2, uint64_t baseCounterValue, PatchCmdType patchCmdType, bool deviceAtomicSignaling, bool duplicatedHostStorage) : cmd1(cmd1), cmd2(cmd2), baseCounterValue(baseCounterValue), patchCmdType(patchCmdType), deviceAtomicSignaling(deviceAtomicSignaling), duplicatedHostStorage(duplicatedHostStorage) { if (inOrderExecInfo) { this->inOrderExecInfo = *inOrderExecInfo; } } void patch(uint64_t appendCounterValue) { if (skipPatching) { return; } switch (patchCmdType) { case PatchCmdType::sdi: patchSdi(appendCounterValue); break; case PatchCmdType::semaphore: patchSemaphore(appendCounterValue); break; case PatchCmdType::walker: patchComputeWalker(appendCounterValue); break; case PatchCmdType::lri64b: patchLri64b(appendCounterValue); break; case PatchCmdType::pipeControl: patchPipeControl(appendCounterValue); break; default: UNRECOVERABLE_IF(true); break; } } void updateInOrderExecInfo(std::shared_ptr *inOrderExecInfo) { this->inOrderExecInfo = *inOrderExecInfo; } void setSkipPatching(bool value) { skipPatching = value; } bool isExternalDependency() const { return inOrderExecInfo.get(); } std::shared_ptr inOrderExecInfo; void *cmd1 = nullptr; void *cmd2 = nullptr; const uint64_t baseCounterValue = 0; const PatchCmdType patchCmdType = PatchCmdType::none; bool deviceAtomicSignaling = false; bool duplicatedHostStorage = false; bool skipPatching = false; protected: void patchSdi(uint64_t appendCounterValue) { auto sdiCmd = reinterpret_cast(cmd1); sdiCmd->setDataDword0(getLowPart(baseCounterValue + appendCounterValue)); sdiCmd->setDataDword1(getHighPart(baseCounterValue + appendCounterValue)); } void patchSemaphore(uint64_t appendCounterValue) { if (isExternalDependency()) { appendCounterValue = InOrderPatchCommandHelpers::getAppendCounterValue(*inOrderExecInfo); if (appendCounterValue == 0) { return; } } auto semaphoreCmd = reinterpret_cast(cmd1); semaphoreCmd->setSemaphoreDataDword(static_cast(baseCounterValue + appendCounterValue)); } void patchComputeWalker(uint64_t appendCounterValue); void patchPipeControl(uint64_t appendCounterValue) { auto pcCmd = reinterpret_cast(cmd1); pcCmd->setImmediateData(static_cast(baseCounterValue + appendCounterValue)); } void patchLri64b(uint64_t appendCounterValue) { if (isExternalDependency()) { appendCounterValue = InOrderPatchCommandHelpers::getAppendCounterValue(*inOrderExecInfo); if (appendCounterValue == 0) { return; } } const uint64_t counterValue = baseCounterValue + appendCounterValue; auto lri1 = reinterpret_cast(cmd1); lri1->setDataDword(getLowPart(counterValue)); auto lri2 = reinterpret_cast(cmd2); lri2->setDataDword(getHighPart(counterValue)); } PatchCmd() = delete; }; } // namespace InOrderPatchCommandHelpers template using InOrderPatchCommandsContainer = std::vector>; } // namespace NEO