/* * Copyright (C) 2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/command_container/implicit_scaling.h" #include "shared/source/command_container/walker_partition_xehp_plus.h" #include "shared/source/command_stream/linear_stream.h" namespace NEO { template size_t ImplicitScalingDispatch::getSize(bool nativeCrossTileAtomicSync, bool preferStaticPartitioning, const DeviceBitfield &devices, Vec3 groupStart, Vec3 groupCount) { typename GfxFamily::COMPUTE_WALKER::PARTITION_TYPE partitionType{}; bool staticPartitioning = false; const uint32_t tileCount = static_cast(devices.count()); const uint32_t partitionCount = WalkerPartition::computePartitionCountAndPartitionType(tileCount, preferStaticPartitioning, groupStart, groupCount, {}, &partitionType, &staticPartitioning); UNRECOVERABLE_IF(staticPartitioning && (tileCount != partitionCount)); auto synchronizeBeforeExecution = ImplicitScalingHelper::isSynchronizeBeforeExecutionRequired(); return static_cast(WalkerPartition::estimateSpaceRequiredInCommandBuffer( false, 16u, synchronizeBeforeExecution, nativeCrossTileAtomicSync, staticPartitioning)); } template void ImplicitScalingDispatch::dispatchCommands(LinearStream &commandStream, WALKER_TYPE &walkerCmd, const DeviceBitfield &devices, uint32_t &partitionCount, bool useSecondaryBatchBuffer, bool nativeCrossTileAtomicSync, bool usesImages, uint64_t workPartitionAllocationGpuVa) { uint32_t totalProgrammedSize = 0u; const uint32_t tileCount = static_cast(devices.count()); const bool preferStaticPartitioning = workPartitionAllocationGpuVa != 0u; bool staticPartitioning = false; partitionCount = WalkerPartition::computePartitionCountAndSetPartitionType(&walkerCmd, tileCount, preferStaticPartitioning, usesImages, &staticPartitioning); const bool synchronizeBeforeExecution = ImplicitScalingHelper::isSynchronizeBeforeExecutionRequired(); if (staticPartitioning) { UNRECOVERABLE_IF(tileCount != partitionCount); WalkerPartition::constructStaticallyPartitionedCommandBuffer(commandStream.getSpace(0u), commandStream.getGraphicsAllocation()->getGpuAddress() + commandStream.getUsed(), &walkerCmd, totalProgrammedSize, partitionCount, tileCount, synchronizeBeforeExecution, useSecondaryBatchBuffer, nativeCrossTileAtomicSync, workPartitionAllocationGpuVa); } else { if (DebugManager.flags.ExperimentalSetWalkerPartitionCount.get()) { partitionCount = DebugManager.flags.ExperimentalSetWalkerPartitionCount.get(); if (partitionCount == 1u) { walkerCmd.setPartitionType(GfxFamily::COMPUTE_WALKER::PARTITION_TYPE::PARTITION_TYPE_DISABLED); } } WalkerPartition::constructDynamicallyPartitionedCommandBuffer(commandStream.getSpace(0u), commandStream.getGraphicsAllocation()->getGpuAddress() + commandStream.getUsed(), &walkerCmd, totalProgrammedSize, partitionCount, tileCount, false, synchronizeBeforeExecution, useSecondaryBatchBuffer, nativeCrossTileAtomicSync); } commandStream.getSpace(totalProgrammedSize); } } // namespace NEO