/* * Copyright (C) 2018-2023 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/command_container/command_encoder.h" #include "shared/source/command_stream/csr_deps.h" #include "shared/source/debug_settings/debug_settings_manager.h" #include "shared/source/execution_environment/root_device_environment.h" #include "shared/source/helpers/aux_translation.h" #include "shared/source/helpers/gfx_core_helper.h" #include "shared/source/helpers/non_copyable_or_moveable.h" #include "shared/source/helpers/pipe_control_args.h" #include "shared/source/helpers/string.h" #include "shared/source/helpers/timestamp_packet_constants.h" #include "shared/source/helpers/timestamp_packet_container.h" #include "shared/source/utilities/tag_allocator.h" #include namespace NEO { class CommandStreamReceiver; class LinearStream; #pragma pack(1) template class TimestampPackets : public TagTypeBase { public: static constexpr AllocationType getAllocationType() { return AllocationType::TIMESTAMP_PACKET_TAG_BUFFER; } static constexpr TagNodeType getTagNodeType() { return TagNodeType::TimestampPacket; } static constexpr size_t getSinglePacketSize() { return sizeof(Packet); } void initialize() { for (auto &packet : packets) { packet.contextStart = TimestampPacketConstants::initValue; packet.globalStart = TimestampPacketConstants::initValue; packet.contextEnd = TimestampPacketConstants::initValue; packet.globalEnd = TimestampPacketConstants::initValue; } } void assignDataToAllTimestamps(uint32_t packetIndex, void *source) { memcpy_s(&packets[packetIndex], sizeof(Packet), source, sizeof(Packet)); } static constexpr size_t getGlobalStartOffset() { return offsetof(Packet, globalStart); } static constexpr size_t getContextStartOffset() { return offsetof(Packet, contextStart); } static constexpr size_t getContextEndOffset() { return offsetof(Packet, contextEnd); } static constexpr size_t getGlobalEndOffset() { return offsetof(Packet, globalEnd); } uint64_t getContextStartValue(uint32_t packetIndex) const { return static_cast(packets[packetIndex].contextStart); } uint64_t getGlobalStartValue(uint32_t packetIndex) const { return static_cast(packets[packetIndex].globalStart); } uint64_t getContextEndValue(uint32_t packetIndex) const { return static_cast(packets[packetIndex].contextEnd); } uint64_t getGlobalEndValue(uint32_t packetIndex) const { return static_cast(packets[packetIndex].globalEnd); } void const *getContextEndAddress(uint32_t packetIndex) const { return static_cast(&packets[packetIndex].contextEnd); } void const *getContextStartAddress(uint32_t packetIndex) const { return static_cast(&packets[packetIndex].contextStart); } protected: struct alignas(1) Packet { TSize contextStart = TimestampPacketConstants::initValue; TSize globalStart = TimestampPacketConstants::initValue; TSize contextEnd = TimestampPacketConstants::initValue; TSize globalEnd = TimestampPacketConstants::initValue; }; Packet packets[TimestampPacketConstants::preferredPacketCount]; }; #pragma pack() static_assert(((4 * TimestampPacketConstants::preferredPacketCount) * sizeof(uint32_t)) == sizeof(TimestampPackets), "This structure is consumed by GPU and has to follow specific restrictions for padding and size"); struct TimestampPacketHelper { static uint64_t getContextEndGpuAddress(const TagNodeBase ×tampPacketNode) { return timestampPacketNode.getGpuAddress() + timestampPacketNode.getContextEndOffset(); } static uint64_t getContextStartGpuAddress(const TagNodeBase ×tampPacketNode) { return timestampPacketNode.getGpuAddress() + timestampPacketNode.getContextStartOffset(); } static uint64_t getGlobalEndGpuAddress(const TagNodeBase ×tampPacketNode) { return timestampPacketNode.getGpuAddress() + timestampPacketNode.getGlobalEndOffset(); } static uint64_t getGlobalStartGpuAddress(const TagNodeBase ×tampPacketNode) { return timestampPacketNode.getGpuAddress() + timestampPacketNode.getGlobalStartOffset(); } template static void programSemaphore(LinearStream &cmdStream, TagNodeBase ×tampPacketNode) { using COMPARE_OPERATION = typename GfxFamily::MI_SEMAPHORE_WAIT::COMPARE_OPERATION; if (DebugManager.flags.PrintTimestampPacketUsage.get() == 1) { printf("\nPID: %u, TSP used for Semaphore: 0x%" PRIX64 ", cmdBuffer pos: 0x%" PRIX64, SysCalls::getProcessId(), timestampPacketNode.getGpuAddress(), cmdStream.getCurrentGpuAddressPosition()); } auto compareAddress = getContextEndGpuAddress(timestampPacketNode); for (uint32_t packetId = 0; packetId < timestampPacketNode.getPacketsUsed(); packetId++) { uint64_t compareOffset = packetId * timestampPacketNode.getSinglePacketSize(); EncodeSemaphore::addMiSemaphoreWaitCommand(cmdStream, compareAddress + compareOffset, TimestampPacketConstants::initValue, COMPARE_OPERATION::COMPARE_OPERATION_SAD_NOT_EQUAL_SDD); } } template static void programConditionalBbStartForRelaxedOrdering(LinearStream &cmdStream, TagNodeBase ×tampPacketNode) { auto compareAddress = getContextEndGpuAddress(timestampPacketNode); for (uint32_t packetId = 0; packetId < timestampPacketNode.getPacketsUsed(); packetId++) { uint64_t compareOffset = packetId * timestampPacketNode.getSinglePacketSize(); EncodeBatchBufferStartOrEnd::programConditionalDataMemBatchBufferStart(cmdStream, 0, compareAddress + compareOffset, TimestampPacketConstants::initValue, NEO::CompareOperation::Equal, true); } } template static void programCsrDependenciesForTimestampPacketContainer(LinearStream &cmdStream, const CsrDependencies &csrDependencies, bool relaxedOrderingEnabled) { for (auto timestampPacketContainer : csrDependencies.timestampPacketContainer) { for (auto &node : timestampPacketContainer->peekNodes()) { if (relaxedOrderingEnabled) { TimestampPacketHelper::programConditionalBbStartForRelaxedOrdering(cmdStream, *node); } else { TimestampPacketHelper::programSemaphore(cmdStream, *node); } } } } template static void nonStallingContextEndNodeSignal(LinearStream &cmdStream, const TagNodeBase ×tampPacketNode, bool multiTileOperation) { uint64_t contextEndAddress = getContextEndGpuAddress(timestampPacketNode); NEO::EncodeStoreMemory::programStoreDataImm(cmdStream, contextEndAddress, 0, 0, false, multiTileOperation); } template static void programCsrDependenciesForForMultiRootDeviceSyncContainer(LinearStream &cmdStream, const CsrDependencies &csrDependencies) { for (auto timestampPacketContainer : csrDependencies.multiRootTimeStampSyncContainer) { for (auto &node : timestampPacketContainer->peekNodes()) { TimestampPacketHelper::programSemaphore(cmdStream, *node); } } } template static void programSemaphoreForAuxTranslation(LinearStream &cmdStream, const TimestampPacketDependencies *timestampPacketDependencies, const RootDeviceEnvironment &rootDeviceEnvironment) { auto &container = (auxTranslationDirection == AuxTranslationDirection::AuxToNonAux) ? timestampPacketDependencies->auxToNonAuxNodes : timestampPacketDependencies->nonAuxToAuxNodes; // cache flush after NDR, before NonAuxToAux if (auxTranslationDirection == AuxTranslationDirection::NonAuxToAux && timestampPacketDependencies->cacheFlushNodes.peekNodes().size() > 0) { UNRECOVERABLE_IF(timestampPacketDependencies->cacheFlushNodes.peekNodes().size() != 1); auto cacheFlushTimestampPacketGpuAddress = getContextEndGpuAddress(*timestampPacketDependencies->cacheFlushNodes.peekNodes()[0]); PipeControlArgs args; args.dcFlushEnable = MemorySynchronizationCommands::getDcFlushEnable(true, rootDeviceEnvironment); MemorySynchronizationCommands::addBarrierWithPostSyncOperation( cmdStream, PostSyncMode::ImmediateData, cacheFlushTimestampPacketGpuAddress, 0, rootDeviceEnvironment, args); } for (auto &node : container.peekNodes()) { TimestampPacketHelper::programSemaphore(cmdStream, *node); } } template static size_t getRequiredCmdStreamSizeForAuxTranslationNodeDependency(size_t count, const RootDeviceEnvironment &rootDeviceEnvironment, bool cacheFlushForBcsRequired) { size_t size = count * TimestampPacketHelper::getRequiredCmdStreamSizeForNodeDependencyWithBlitEnqueue(); if (auxTranslationDirection == AuxTranslationDirection::NonAuxToAux && cacheFlushForBcsRequired) { size += MemorySynchronizationCommands::getSizeForBarrierWithPostSyncOperation(rootDeviceEnvironment, false); } return size; } template static size_t getRequiredCmdStreamSizeForNodeDependencyWithBlitEnqueue() { return NEO::EncodeSemaphore::getSizeMiSemaphoreWait(); } template static size_t getRequiredCmdStreamSizeForSemaphoreNodeDependency(TagNodeBase ×tampPacketNode) { return (timestampPacketNode.getPacketsUsed() * NEO::EncodeSemaphore::getSizeMiSemaphoreWait()); } template static size_t getRequiredCmdStreamSizeForRelaxedOrderingNodeDependency(TagNodeBase ×tampPacketNode) { return (timestampPacketNode.getPacketsUsed() * EncodeBatchBufferStartOrEnd::getCmdSizeConditionalDataMemBatchBufferStart()); } template static size_t getRequiredCmdStreamSize(const CsrDependencies &csrDependencies, bool relaxedOrderingEnabled) { size_t totalCommandsSize = 0; for (auto timestampPacketContainer : csrDependencies.timestampPacketContainer) { for (auto &node : timestampPacketContainer->peekNodes()) { if (relaxedOrderingEnabled) { totalCommandsSize += getRequiredCmdStreamSizeForRelaxedOrderingNodeDependency(*node); } else { totalCommandsSize += getRequiredCmdStreamSizeForSemaphoreNodeDependency(*node); } } } return totalCommandsSize; } template static size_t getRequiredCmdStreamSizeForMultiRootDeviceSyncNodesContainer(const CsrDependencies &csrDependencies) { return csrDependencies.multiRootTimeStampSyncContainer.size() * NEO::EncodeSemaphore::getSizeMiSemaphoreWait(); } }; } // namespace NEO