diff --git a/level_zero/core/source/cmdlist/CMakeLists.txt b/level_zero/core/source/cmdlist/CMakeLists.txt index b5bb76cb0e..81465705b7 100644 --- a/level_zero/core/source/cmdlist/CMakeLists.txt +++ b/level_zero/core/source/cmdlist/CMakeLists.txt @@ -12,7 +12,6 @@ target_sources(${L0_STATIC_LIB_NAME} ${CMAKE_CURRENT_SOURCE_DIR}/cmdlist_extended${BRANCH_DIR_SUFFIX}cmdlist_extended.inl ${CMAKE_CURRENT_SOURCE_DIR}/cmdlist_hw.h ${CMAKE_CURRENT_SOURCE_DIR}/cmdlist_hw.inl - ${CMAKE_CURRENT_SOURCE_DIR}/cmdlist_hw_skl_to_tgllp.inl ${CMAKE_CURRENT_SOURCE_DIR}/cmdlist_hw_immediate.h ${CMAKE_CURRENT_SOURCE_DIR}/cmdlist_hw_immediate.inl ${CMAKE_CURRENT_SOURCE_DIR}/cmdlist_imp.cpp diff --git a/level_zero/core/source/cmdlist/cmdlist_hw_skl_to_tgllp.inl b/level_zero/core/source/cmdlist/cmdlist_hw_skl_to_tgllp.inl deleted file mode 100644 index a811931bfe..0000000000 --- a/level_zero/core/source/cmdlist/cmdlist_hw_skl_to_tgllp.inl +++ /dev/null @@ -1,368 +0,0 @@ -/* - * Copyright (C) 2020-2025 Intel Corporation - * - * SPDX-License-Identifier: MIT - * - */ - -#include "shared/source/command_container/command_encoder.h" -#include "shared/source/command_stream/linear_stream.h" -#include "shared/source/command_stream/preemption.h" -#include "shared/source/helpers/pause_on_gpu_properties.h" -#include "shared/source/helpers/pipe_control_args.h" -#include "shared/source/helpers/register_offsets.h" -#include "shared/source/helpers/simd_helper.h" -#include "shared/source/memory_manager/graphics_allocation.h" -#include "shared/source/memory_manager/memory_manager.h" -#include "shared/source/memory_manager/residency_container.h" -#include "shared/source/program/kernel_info.h" -#include "shared/source/unified_memory/unified_memory.h" -#include "shared/source/utilities/software_tags_manager.h" - -#include "level_zero/core/source/driver/driver_handle_imp.h" -#include "level_zero/core/source/kernel/kernel_imp.h" - -#include "encode_surface_state_args.h" - -#include - -namespace L0 { -struct DeviceImp; - -template -size_t CommandListCoreFamily::getReserveSshSize() { - using RENDER_SURFACE_STATE = typename GfxFamily::RENDER_SURFACE_STATE; - return sizeof(RENDER_SURFACE_STATE); -} - -template -void CommandListCoreFamily::adjustWriteKernelTimestamp(uint64_t globalAddress, uint64_t baseAddress, CommandToPatchContainer *outTimeStampSyncCmds, - bool workloadPartition, bool copyOperation, bool globalTimestamp) {} - -template -bool CommandListCoreFamily::isInOrderNonWalkerSignalingRequired(const Event *event) const { - return false; -} - -template -ze_result_t CommandListCoreFamily::appendLaunchKernelWithParams(Kernel *kernel, - const ze_group_count_t &threadGroupDimensions, - Event *event, - CmdListKernelLaunchParams &launchParams) { - UNRECOVERABLE_IF(kernel == nullptr); - UNRECOVERABLE_IF(launchParams.skipInOrderNonWalkerSignaling); - const auto driverHandle = static_cast(device->getDriverHandle()); - const auto &kernelDescriptor = kernel->getKernelDescriptor(); - if (kernelDescriptor.kernelAttributes.flags.isInvalid) { - return ZE_RESULT_ERROR_INVALID_ARGUMENT; - } - - const auto kernelImmutableData = kernel->getImmutableData(); - auto kernelInfo = kernelImmutableData->getKernelInfo(); - - NEO::IndirectHeap *ssh = nullptr; - NEO::IndirectHeap *dsh = nullptr; - - DBG_LOG(PrintDispatchParameters, "Kernel: ", kernelInfo->kernelDescriptor.kernelMetadata.kernelName, - ", Group size: ", kernel->getGroupSize()[0], ", ", kernel->getGroupSize()[1], ", ", kernel->getGroupSize()[2], - ", Group count: ", threadGroupDimensions.groupCountX, ", ", threadGroupDimensions.groupCountY, ", ", threadGroupDimensions.groupCountZ, - ", SIMD: ", kernelInfo->getMaxSimdSize()); - - if (this->immediateCmdListHeapSharing || this->stateBaseAddressTracking) { - auto &sshReserveConfig = commandContainer.getSurfaceStateHeapReserve(); - NEO::HeapReserveArguments sshReserveArgs = { - sshReserveConfig.indirectHeapReservation, - NEO::EncodeDispatchKernel::getSizeRequiredSsh(*kernelInfo), - NEO::EncodeDispatchKernel::getDefaultSshAlignment()}; - - // update SSH size - when global bindless addressing is used, kernel args may not require ssh space - if (kernel->getSurfaceStateHeapDataSize() == 0) { - sshReserveArgs.size = 0; - } - - auto &dshReserveConfig = commandContainer.getDynamicStateHeapReserve(); - NEO::HeapReserveArguments dshReserveArgs = { - dshReserveConfig.indirectHeapReservation, - NEO::EncodeDispatchKernel::getSizeRequiredDsh(kernelDescriptor, commandContainer.getNumIddPerBlock()), - NEO::EncodeDispatchKernel::getDefaultDshAlignment()}; - - if (launchParams.isKernelSplitOperation) { - // when appendLaunchKernel is called during an operation with kernel split is true, - // then reserve sufficient ssh and dsh heaps during first kernel split, by multiplying, individual - // dsh and ssh heap size retrieved above with number of kernels in split operation. - // And after first kernel split, for remainder kernel split calls, dont estimate heap size. - if (launchParams.numKernelsExecutedInSplitLaunch == 0) { - dshReserveArgs.size = launchParams.numKernelsInSplitLaunch * dshReserveArgs.size; - sshReserveArgs.size = launchParams.numKernelsInSplitLaunch * sshReserveArgs.size; - commandContainer.reserveSpaceForDispatch( - sshReserveArgs, - dshReserveArgs, true); - } - } else { - commandContainer.reserveSpaceForDispatch( - sshReserveArgs, - dshReserveArgs, true); - } - ssh = sshReserveArgs.indirectHeapReservation; - dsh = dshReserveArgs.indirectHeapReservation; - } - - appendEventForProfiling(event, nullptr, true, false, false, false); - - auto perThreadScratchSize = std::max(this->getCommandListPerThreadScratchSize(0u), - kernel->getImmutableData()->getDescriptor().kernelAttributes.perThreadScratchSize[0]); - this->setCommandListPerThreadScratchSize(0u, perThreadScratchSize); - - auto slmEnable = (kernel->getImmutableData()->getDescriptor().kernelAttributes.slmInlineSize > 0); - this->setCommandListSLMEnable(slmEnable); - - auto kernelPreemptionMode = obtainKernelPreemptionMode(kernel); - commandListPreemptionMode = std::min(commandListPreemptionMode, kernelPreemptionMode); - - kernel->patchGlobalOffset(); - - this->allocateOrReuseKernelPrivateMemoryIfNeeded(kernel, kernelDescriptor.kernelAttributes.perHwThreadPrivateMemorySize); - - if (!launchParams.isIndirect) { - kernel->setGroupCount(threadGroupDimensions.groupCountX, - threadGroupDimensions.groupCountY, - threadGroupDimensions.groupCountZ); - } - - if (launchParams.isIndirect) { - prepareIndirectParams(&threadGroupDimensions); - } - - if (kernel->hasIndirectAllocationsAllowed()) { - UnifiedMemoryControls unifiedMemoryControls = kernel->getUnifiedMemoryControls(); - - if (unifiedMemoryControls.indirectDeviceAllocationsAllowed) { - this->unifiedMemoryControls.indirectDeviceAllocationsAllowed = true; - } - if (unifiedMemoryControls.indirectHostAllocationsAllowed) { - this->unifiedMemoryControls.indirectHostAllocationsAllowed = true; - } - if (unifiedMemoryControls.indirectSharedAllocationsAllowed) { - this->unifiedMemoryControls.indirectSharedAllocationsAllowed = true; - } - - this->indirectAllocationsAllowed = true; - } - - containsCooperativeKernelsFlag = (containsCooperativeKernelsFlag || launchParams.isCooperative); - if (kernel->usesSyncBuffer()) { - auto retVal = (launchParams.isCooperative - ? programSyncBuffer(*kernel, *device->getNEODevice(), threadGroupDimensions, launchParams.syncBufferPatchIndex) - : ZE_RESULT_ERROR_INVALID_ARGUMENT); - if (retVal) { - return retVal; - } - } - - KernelImp *kernelImp = static_cast(kernel); - bool uncachedMocsKernel = isKernelUncachedMocsRequired(kernelImp->getKernelRequiresUncachedMocs()); - this->requiresQueueUncachedMocs |= kernelImp->getKernelRequiresQueueUncachedMocs(); - - NEO::Device *neoDevice = device->getNEODevice(); - - auto localMemSize = static_cast(neoDevice->getDeviceInfo().localMemSize); - auto slmTotalSize = kernelImp->getSlmTotalSize(); - if (slmTotalSize > 0 && localMemSize < slmTotalSize) { - CREATE_DEBUG_STRING(str, "Size of SLM (%u) larger than available (%u)\n", slmTotalSize, localMemSize); - driverHandle->setErrorDescription(std::string(str.get())); - PRINT_DEBUG_STRING(NEO::debugManager.flags.PrintDebugMessages.get(), stderr, "Size of SLM (%u) larger than available (%u)\n", slmTotalSize, localMemSize); - return ZE_RESULT_ERROR_OUT_OF_DEVICE_MEMORY; - } - - if (NEO::debugManager.flags.EnableSWTags.get()) { - neoDevice->getRootDeviceEnvironment().tagsManager->insertTag( - *commandContainer.getCommandStream(), - *neoDevice, - kernel->getKernelDescriptor().kernelMetadata.kernelName.c_str(), 0u); - } - - std::list additionalCommands; - - updateStreamProperties(*kernel, launchParams.isCooperative, threadGroupDimensions, launchParams.isIndirect); - - auto maxWgCountPerTile = kernel->getMaxWgCountPerTile(this->engineGroupType); - - NEO::EncodeDispatchKernelArgs dispatchKernelArgs{ - .device = neoDevice, - .dispatchInterface = kernel, - .surfaceStateHeap = ssh, - .dynamicStateHeap = dsh, - .threadGroupDimensions = reinterpret_cast(&threadGroupDimensions), - .outWalkerPtr = nullptr, - .cpuWalkerBuffer = nullptr, - .cpuPayloadBuffer = nullptr, - .outImplicitArgsPtr = nullptr, - .additionalCommands = &additionalCommands, - .extendedArgs = nullptr, - .postSyncArgs = { - .eventAddress = 0, - .postSyncImmValue = static_cast(Event::STATE_SIGNALED), - .inOrderCounterValue = 0, - .inOrderIncrementGpuAddress = 0, - .inOrderIncrementValue = 0, - .device = neoDevice, - .inOrderExecInfo = nullptr, - .isCounterBasedEvent = false, - .isTimestampEvent = false, - .isHostScopeSignalEvent = false, - .isUsingSystemAllocation = false, - .dcFlushEnable = this->dcFlushSupport, - .interruptEvent = false, - .isFlushL3ForExternalAllocationRequired = false, - .isFlushL3ForHostUsmRequired = false, - }, - .preemptionMode = commandListPreemptionMode, - .requiredPartitionDim = launchParams.requiredPartitionDim, - .requiredDispatchWalkOrder = launchParams.requiredDispatchWalkOrder, - .localRegionSize = launchParams.localRegionSize, - .partitionCount = 0, - .reserveExtraPayloadSpace = launchParams.reserveExtraPayloadSpace, - .maxWgCountPerTile = maxWgCountPerTile, - .defaultPipelinedThreadArbitrationPolicy = NEO::ThreadArbitrationPolicy::NotPresent, - .isIndirect = launchParams.isIndirect, - .isPredicate = launchParams.isPredicate, - .requiresUncachedMocs = uncachedMocsKernel, - .isInternal = internalUsage, - .isCooperative = launchParams.isCooperative, - .isKernelDispatchedFromImmediateCmdList = isImmediateType(), - .isRcs = engineGroupType == NEO::EngineGroupType::renderCompute, - .isHeaplessModeEnabled = this->heaplessModeEnabled, - .isHeaplessStateInitEnabled = this->heaplessStateInitEnabled, - .immediateScratchAddressPatching = !this->scratchAddressPatchingEnabled, - .makeCommandView = false, - }; - - NEO::EncodeDispatchKernel::encodeCommon(commandContainer, dispatchKernelArgs); - if (!isImmediateType()) { - this->containsStatelessUncachedResource = dispatchKernelArgs.requiresUncachedMocs; - } - - if (neoDevice->getDebugger() && !this->immediateCmdListHeapSharing && !neoDevice->getBindlessHeapsHelper()) { - auto *ssh = commandContainer.getIndirectHeap(NEO::HeapType::surfaceState); - auto surfaceStateSpace = neoDevice->getDebugger()->getDebugSurfaceReservedSurfaceState(*ssh); - auto surfaceState = GfxFamily::cmdInitRenderSurfaceState; - - NEO::EncodeSurfaceStateArgs args; - args.outMemory = &surfaceState; - args.graphicsAddress = device->getDebugSurface()->getGpuAddress(); - args.size = device->getDebugSurface()->getUnderlyingBufferSize(); - args.mocs = device->getMOCS(false, false); - args.numAvailableDevices = neoDevice->getNumGenericSubDevices(); - args.allocation = device->getDebugSurface(); - args.gmmHelper = neoDevice->getGmmHelper(); - args.areMultipleSubDevicesInContext = false; - args.isDebuggerActive = true; - NEO::EncodeSurfaceState::encodeBuffer(args); - *reinterpret_cast(surfaceStateSpace) = surfaceState; - } - - appendSignalEventPostWalker(event, nullptr, nullptr, false, false, false); - - commandContainer.addToResidencyContainer(kernelImmutableData->getIsaGraphicsAllocation()); - auto &argumentsResidencyContainer = kernel->getArgumentsResidencyContainer(); - for (auto resource : argumentsResidencyContainer) { - commandContainer.addToResidencyContainer(resource); - } - auto &internalResidencyContainer = kernel->getInternalResidencyContainer(); - for (auto resource : internalResidencyContainer) { - commandContainer.addToResidencyContainer(resource); - } - - if (kernelImp->getPrintfBufferAllocation() != nullptr) { - storePrintfKernel(kernel); - } - - if (kernelDescriptor.kernelAttributes.flags.usesAssert) { - kernelWithAssertAppended = true; - } - - if (NEO::PauseOnGpuProperties::pauseModeAllowed(NEO::debugManager.flags.PauseOnEnqueue.get(), neoDevice->debugExecutionCounter.load(), NEO::PauseOnGpuProperties::PauseMode::BeforeWorkload)) { - commandsToPatch.push_back({.pCommand = additionalCommands.front(), .type = CommandToPatch::PauseOnEnqueuePipeControlStart}); - additionalCommands.pop_front(); - commandsToPatch.push_back({.pCommand = additionalCommands.front(), .type = CommandToPatch::PauseOnEnqueueSemaphoreStart}); - additionalCommands.pop_front(); - } - - if (NEO::PauseOnGpuProperties::pauseModeAllowed(NEO::debugManager.flags.PauseOnEnqueue.get(), neoDevice->debugExecutionCounter.load(), NEO::PauseOnGpuProperties::PauseMode::AfterWorkload)) { - commandsToPatch.push_back({.pCommand = additionalCommands.front(), .type = CommandToPatch::PauseOnEnqueuePipeControlEnd}); - additionalCommands.pop_front(); - commandsToPatch.push_back({.pCommand = additionalCommands.front(), .type = CommandToPatch::PauseOnEnqueueSemaphoreEnd}); - additionalCommands.pop_front(); - } - - if (event != nullptr && kernel->getPrintfBufferAllocation() != nullptr) { - auto module = static_cast(&static_cast(kernel)->getParentModule()); - event->setKernelForPrintf(module->getPrintfKernelWeakPtr(kernel->toHandle())); - event->setKernelWithPrintfDeviceMutex(kernel->getDevicePrintfKernelMutex()); - } - - if (this->isInOrderExecutionEnabled() && !launchParams.isKernelSplitOperation) { - if (!event || !event->getAllocation(this->device)) { - NEO::PipeControlArgs args; - args.dcFlushEnable = getDcFlushRequired(true); - - NEO::MemorySynchronizationCommands::addSingleBarrier(*commandContainer.getCommandStream(), args); - } - appendSignalInOrderDependencyCounter(event, false, false, false); - } - - return ZE_RESULT_SUCCESS; -} - -template -void CommandListCoreFamily::appendMultiPartitionPrologue(uint32_t partitionDataSize) {} - -template -void CommandListCoreFamily::appendMultiPartitionEpilogue() {} - -template -void CommandListCoreFamily::appendComputeBarrierCommand() { - NEO::PipeControlArgs args = createBarrierFlags(); - NEO::MemorySynchronizationCommands::addSingleBarrier(*commandContainer.getCommandStream(), args); -} - -template -inline NEO::PipeControlArgs CommandListCoreFamily::createBarrierFlags() { - NEO::PipeControlArgs args; - args.isWalkerWithProfilingEnqueued = this->getAndClearIsWalkerWithProfilingEnqueued(); - return args; -} - -template -inline void CommandListCoreFamily::appendMultiTileBarrier(NEO::Device &neoDevice) { -} - -template -inline size_t CommandListCoreFamily::estimateBufferSizeMultiTileBarrier(const NEO::RootDeviceEnvironment &rootDeviceEnvironment) { - return 0; -} - -template -ze_result_t CommandListCoreFamily::appendLaunchKernelSplit(Kernel *kernel, - const ze_group_count_t &threadGroupDimensions, - Event *event, - CmdListKernelLaunchParams &launchParams) { - return appendLaunchKernelWithParams(kernel, threadGroupDimensions, nullptr, launchParams); -} - -template -inline NEO::PreemptionMode CommandListCoreFamily::obtainKernelPreemptionMode(Kernel *kernel) { - NEO::PreemptionFlags flags = NEO::PreemptionHelper::createPreemptionLevelFlags(*device->getNEODevice(), &kernel->getImmutableData()->getDescriptor()); - return NEO::PreemptionHelper::taskPreemptionMode(device->getDevicePreemptionMode(), flags); -} - -template -void CommandListCoreFamily::appendDispatchOffsetRegister(bool workloadPartitionEvent, bool beforeProfilingCmds) { -} - -template -bool CommandListCoreFamily::singleEventPacketRequired(bool inputSinglePacketEventRequest) const { - return true; -} - -} // namespace L0 diff --git a/level_zero/core/source/cmdqueue/CMakeLists.txt b/level_zero/core/source/cmdqueue/CMakeLists.txt index 6cf197a484..39d88f100a 100644 --- a/level_zero/core/source/cmdqueue/CMakeLists.txt +++ b/level_zero/core/source/cmdqueue/CMakeLists.txt @@ -1,5 +1,5 @@ # -# Copyright (C) 2020-2024 Intel Corporation +# Copyright (C) 2020-2025 Intel Corporation # # SPDX-License-Identifier: MIT # @@ -11,9 +11,15 @@ target_sources(${L0_STATIC_LIB_NAME} ${CMAKE_CURRENT_SOURCE_DIR}/cmdqueue.h ${CMAKE_CURRENT_SOURCE_DIR}/cmdqueue_hw.h ${CMAKE_CURRENT_SOURCE_DIR}/cmdqueue_hw.inl - ${CMAKE_CURRENT_SOURCE_DIR}/cmdqueue_hw_skl_to_tgllp.inl ${CMAKE_CURRENT_SOURCE_DIR}/cmdqueue_imp.h ) +if(SUPPORT_GEN12LP) + target_sources(${L0_STATIC_LIB_NAME} + PRIVATE + ${CMAKE_CURRENT_SOURCE_DIR}/cmdqueue_hw_gen12lp.inl + ) + +endif() if(SUPPORT_XEHP_AND_LATER) target_sources(${L0_STATIC_LIB_NAME} PRIVATE diff --git a/level_zero/core/source/cmdqueue/cmdqueue_hw_skl_to_tgllp.inl b/level_zero/core/source/cmdqueue/cmdqueue_hw_gen12lp.inl similarity index 99% rename from level_zero/core/source/cmdqueue/cmdqueue_hw_skl_to_tgllp.inl rename to level_zero/core/source/cmdqueue/cmdqueue_hw_gen12lp.inl index b9e3f8edb9..6400fc9099 100644 --- a/level_zero/core/source/cmdqueue/cmdqueue_hw_skl_to_tgllp.inl +++ b/level_zero/core/source/cmdqueue/cmdqueue_hw_gen12lp.inl @@ -13,7 +13,6 @@ #include "shared/source/helpers/cache_policy.h" #include "shared/source/helpers/gfx_core_helper.h" #include "shared/source/helpers/state_base_address.h" -#include "shared/source/helpers/state_base_address_tgllp_and_later.inl" #include "level_zero/core/source/cmdlist/cmdlist.h" #include "level_zero/core/source/cmdqueue/cmdqueue_hw.h" diff --git a/level_zero/core/source/gen12lp/adln/cmdqueue_adln.cpp b/level_zero/core/source/gen12lp/adln/cmdqueue_adln.cpp index 4166169248..5031d0fb40 100644 --- a/level_zero/core/source/gen12lp/adln/cmdqueue_adln.cpp +++ b/level_zero/core/source/gen12lp/adln/cmdqueue_adln.cpp @@ -1,5 +1,5 @@ /* - * Copyright (C) 2022-2024 Intel Corporation + * Copyright (C) 2022-2025 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -9,7 +9,7 @@ #include "shared/source/gen12lp/hw_info_adln.h" #include "level_zero/core/source/cmdqueue/cmdqueue_hw.inl" -#include "level_zero/core/source/cmdqueue/cmdqueue_hw_skl_to_tgllp.inl" +#include "level_zero/core/source/cmdqueue/cmdqueue_hw_gen12lp.inl" namespace L0 { template struct CommandQueueHw; diff --git a/level_zero/core/source/gen12lp/adlp/cmdqueue_adlp.cpp b/level_zero/core/source/gen12lp/adlp/cmdqueue_adlp.cpp index 1c23329cc8..51b1ee5b04 100644 --- a/level_zero/core/source/gen12lp/adlp/cmdqueue_adlp.cpp +++ b/level_zero/core/source/gen12lp/adlp/cmdqueue_adlp.cpp @@ -1,5 +1,5 @@ /* - * Copyright (C) 2021-2024 Intel Corporation + * Copyright (C) 2021-2025 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -9,7 +9,7 @@ #include "shared/source/gen12lp/hw_info_adlp.h" #include "level_zero/core/source/cmdqueue/cmdqueue_hw.inl" -#include "level_zero/core/source/cmdqueue/cmdqueue_hw_skl_to_tgllp.inl" +#include "level_zero/core/source/cmdqueue/cmdqueue_hw_gen12lp.inl" namespace L0 { template struct CommandQueueHw; diff --git a/level_zero/core/source/gen12lp/adls/cmdqueue_adls.cpp b/level_zero/core/source/gen12lp/adls/cmdqueue_adls.cpp index 87a5e59a43..921ba1408b 100644 --- a/level_zero/core/source/gen12lp/adls/cmdqueue_adls.cpp +++ b/level_zero/core/source/gen12lp/adls/cmdqueue_adls.cpp @@ -1,5 +1,5 @@ /* - * Copyright (C) 2021-2024 Intel Corporation + * Copyright (C) 2021-2025 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -9,7 +9,7 @@ #include "shared/source/gen12lp/hw_info_adls.h" #include "level_zero/core/source/cmdqueue/cmdqueue_hw.inl" -#include "level_zero/core/source/cmdqueue/cmdqueue_hw_skl_to_tgllp.inl" +#include "level_zero/core/source/cmdqueue/cmdqueue_hw_gen12lp.inl" namespace L0 { template struct CommandQueueHw; diff --git a/level_zero/core/source/gen12lp/cmdlist_gen12lp.cpp b/level_zero/core/source/gen12lp/cmdlist_gen12lp.cpp index 84c3e277b3..d61d00403d 100644 --- a/level_zero/core/source/gen12lp/cmdlist_gen12lp.cpp +++ b/level_zero/core/source/gen12lp/cmdlist_gen12lp.cpp @@ -5,15 +5,17 @@ * */ +#include "shared/source/command_stream/preemption.h" #include "shared/source/gen12lp/hw_cmds_base.h" #include "shared/source/gen12lp/hw_info.h" +#include "shared/source/helpers/pause_on_gpu_properties.h" +#include "shared/source/program/kernel_info.h" #include "level_zero/core/source/cmdlist/cmdlist_hw.h" #include "level_zero/core/source/cmdlist/cmdlist_hw.inl" #include "level_zero/core/source/cmdlist/cmdlist_hw_gen12lp_to_xe3.inl" #include "level_zero/core/source/cmdlist/cmdlist_hw_immediate.h" #include "level_zero/core/source/cmdlist/cmdlist_hw_immediate.inl" -#include "level_zero/core/source/cmdlist/cmdlist_hw_skl_to_tgllp.inl" #include "level_zero/core/source/gen12lp/definitions/cache_flush_gen12lp.inl" #include "cmdlist_extended.inl" @@ -21,7 +23,344 @@ namespace L0 { -template struct CommandListCoreFamily; -template struct CommandListCoreFamilyImmediate; +constexpr auto gfxCoreFamily = IGFX_GEN12LP_CORE; +template <> +inline NEO::PreemptionMode CommandListCoreFamily::obtainKernelPreemptionMode(Kernel *kernel) { + NEO::PreemptionFlags flags = NEO::PreemptionHelper::createPreemptionLevelFlags(*device->getNEODevice(), &kernel->getImmutableData()->getDescriptor()); + return NEO::PreemptionHelper::taskPreemptionMode(device->getDevicePreemptionMode(), flags); +} + +template <> +inline NEO::PipeControlArgs CommandListCoreFamily::createBarrierFlags() { + NEO::PipeControlArgs args; + args.isWalkerWithProfilingEnqueued = this->getAndClearIsWalkerWithProfilingEnqueued(); + return args; +} + +template <> +void CommandListCoreFamily::appendMultiTileBarrier(NEO::Device &neoDevice) { +} + +template <> +size_t CommandListCoreFamily::estimateBufferSizeMultiTileBarrier(const NEO::RootDeviceEnvironment &rootDeviceEnvironment) { + return 0; +} + +template <> +size_t CommandListCoreFamily::getReserveSshSize() { + using RENDER_SURFACE_STATE = typename GfxFamily::RENDER_SURFACE_STATE; + return sizeof(RENDER_SURFACE_STATE); +} + +template <> +void CommandListCoreFamily::adjustWriteKernelTimestamp(uint64_t globalAddress, uint64_t baseAddress, CommandToPatchContainer *outTimeStampSyncCmds, + bool workloadPartition, bool copyOperation, bool globalTimestamp) {} + +template <> +bool CommandListCoreFamily::isInOrderNonWalkerSignalingRequired(const Event *event) const { + return false; +} + +template <> +ze_result_t CommandListCoreFamily::appendLaunchKernelWithParams(Kernel *kernel, + const ze_group_count_t &threadGroupDimensions, + Event *event, + CmdListKernelLaunchParams &launchParams) { + UNRECOVERABLE_IF(kernel == nullptr); + UNRECOVERABLE_IF(launchParams.skipInOrderNonWalkerSignaling); + const auto driverHandle = static_cast(device->getDriverHandle()); + const auto &kernelDescriptor = kernel->getKernelDescriptor(); + if (kernelDescriptor.kernelAttributes.flags.isInvalid) { + return ZE_RESULT_ERROR_INVALID_ARGUMENT; + } + + const auto kernelImmutableData = kernel->getImmutableData(); + auto kernelInfo = kernelImmutableData->getKernelInfo(); + + NEO::IndirectHeap *ssh = nullptr; + NEO::IndirectHeap *dsh = nullptr; + + DBG_LOG(PrintDispatchParameters, "Kernel: ", kernelInfo->kernelDescriptor.kernelMetadata.kernelName, + ", Group size: ", kernel->getGroupSize()[0], ", ", kernel->getGroupSize()[1], ", ", kernel->getGroupSize()[2], + ", Group count: ", threadGroupDimensions.groupCountX, ", ", threadGroupDimensions.groupCountY, ", ", threadGroupDimensions.groupCountZ, + ", SIMD: ", kernelInfo->getMaxSimdSize()); + + if (this->immediateCmdListHeapSharing || this->stateBaseAddressTracking) { + auto &sshReserveConfig = commandContainer.getSurfaceStateHeapReserve(); + NEO::HeapReserveArguments sshReserveArgs = { + sshReserveConfig.indirectHeapReservation, + NEO::EncodeDispatchKernel::getSizeRequiredSsh(*kernelInfo), + NEO::EncodeDispatchKernel::getDefaultSshAlignment()}; + + // update SSH size - when global bindless addressing is used, kernel args may not require ssh space + if (kernel->getSurfaceStateHeapDataSize() == 0) { + sshReserveArgs.size = 0; + } + + auto &dshReserveConfig = commandContainer.getDynamicStateHeapReserve(); + NEO::HeapReserveArguments dshReserveArgs = { + dshReserveConfig.indirectHeapReservation, + NEO::EncodeDispatchKernel::getSizeRequiredDsh(kernelDescriptor, commandContainer.getNumIddPerBlock()), + NEO::EncodeDispatchKernel::getDefaultDshAlignment()}; + + if (launchParams.isKernelSplitOperation) { + // when appendLaunchKernel is called during an operation with kernel split is true, + // then reserve sufficient ssh and dsh heaps during first kernel split, by multiplying, individual + // dsh and ssh heap size retrieved above with number of kernels in split operation. + // And after first kernel split, for remainder kernel split calls, dont estimate heap size. + if (launchParams.numKernelsExecutedInSplitLaunch == 0) { + dshReserveArgs.size = launchParams.numKernelsInSplitLaunch * dshReserveArgs.size; + sshReserveArgs.size = launchParams.numKernelsInSplitLaunch * sshReserveArgs.size; + commandContainer.reserveSpaceForDispatch( + sshReserveArgs, + dshReserveArgs, true); + } + } else { + commandContainer.reserveSpaceForDispatch( + sshReserveArgs, + dshReserveArgs, true); + } + ssh = sshReserveArgs.indirectHeapReservation; + dsh = dshReserveArgs.indirectHeapReservation; + } + + appendEventForProfiling(event, nullptr, true, false, false, false); + + auto perThreadScratchSize = std::max(this->getCommandListPerThreadScratchSize(0u), + kernel->getImmutableData()->getDescriptor().kernelAttributes.perThreadScratchSize[0]); + this->setCommandListPerThreadScratchSize(0u, perThreadScratchSize); + + auto slmEnable = (kernel->getImmutableData()->getDescriptor().kernelAttributes.slmInlineSize > 0); + this->setCommandListSLMEnable(slmEnable); + + auto kernelPreemptionMode = obtainKernelPreemptionMode(kernel); + commandListPreemptionMode = std::min(commandListPreemptionMode, kernelPreemptionMode); + + kernel->patchGlobalOffset(); + + this->allocateOrReuseKernelPrivateMemoryIfNeeded(kernel, kernelDescriptor.kernelAttributes.perHwThreadPrivateMemorySize); + + if (!launchParams.isIndirect) { + kernel->setGroupCount(threadGroupDimensions.groupCountX, + threadGroupDimensions.groupCountY, + threadGroupDimensions.groupCountZ); + } + + if (launchParams.isIndirect) { + prepareIndirectParams(&threadGroupDimensions); + } + + if (kernel->hasIndirectAllocationsAllowed()) { + UnifiedMemoryControls unifiedMemoryControls = kernel->getUnifiedMemoryControls(); + + if (unifiedMemoryControls.indirectDeviceAllocationsAllowed) { + this->unifiedMemoryControls.indirectDeviceAllocationsAllowed = true; + } + if (unifiedMemoryControls.indirectHostAllocationsAllowed) { + this->unifiedMemoryControls.indirectHostAllocationsAllowed = true; + } + if (unifiedMemoryControls.indirectSharedAllocationsAllowed) { + this->unifiedMemoryControls.indirectSharedAllocationsAllowed = true; + } + + this->indirectAllocationsAllowed = true; + } + + containsCooperativeKernelsFlag = (containsCooperativeKernelsFlag || launchParams.isCooperative); + if (kernel->usesSyncBuffer()) { + auto retVal = (launchParams.isCooperative + ? programSyncBuffer(*kernel, *device->getNEODevice(), threadGroupDimensions, launchParams.syncBufferPatchIndex) + : ZE_RESULT_ERROR_INVALID_ARGUMENT); + if (retVal) { + return retVal; + } + } + + KernelImp *kernelImp = static_cast(kernel); + bool uncachedMocsKernel = isKernelUncachedMocsRequired(kernelImp->getKernelRequiresUncachedMocs()); + this->requiresQueueUncachedMocs |= kernelImp->getKernelRequiresQueueUncachedMocs(); + + NEO::Device *neoDevice = device->getNEODevice(); + + auto localMemSize = static_cast(neoDevice->getDeviceInfo().localMemSize); + auto slmTotalSize = kernelImp->getSlmTotalSize(); + if (slmTotalSize > 0 && localMemSize < slmTotalSize) { + CREATE_DEBUG_STRING(str, "Size of SLM (%u) larger than available (%u)\n", slmTotalSize, localMemSize); + driverHandle->setErrorDescription(std::string(str.get())); + PRINT_DEBUG_STRING(NEO::debugManager.flags.PrintDebugMessages.get(), stderr, "Size of SLM (%u) larger than available (%u)\n", slmTotalSize, localMemSize); + return ZE_RESULT_ERROR_OUT_OF_DEVICE_MEMORY; + } + + if (NEO::debugManager.flags.EnableSWTags.get()) { + neoDevice->getRootDeviceEnvironment().tagsManager->insertTag( + *commandContainer.getCommandStream(), + *neoDevice, + kernel->getKernelDescriptor().kernelMetadata.kernelName.c_str(), 0u); + } + + std::list additionalCommands; + + updateStreamProperties(*kernel, launchParams.isCooperative, threadGroupDimensions, launchParams.isIndirect); + + auto maxWgCountPerTile = kernel->getMaxWgCountPerTile(this->engineGroupType); + + NEO::EncodeDispatchKernelArgs dispatchKernelArgs{ + .device = neoDevice, + .dispatchInterface = kernel, + .surfaceStateHeap = ssh, + .dynamicStateHeap = dsh, + .threadGroupDimensions = reinterpret_cast(&threadGroupDimensions), + .outWalkerPtr = nullptr, + .cpuWalkerBuffer = nullptr, + .cpuPayloadBuffer = nullptr, + .outImplicitArgsPtr = nullptr, + .additionalCommands = &additionalCommands, + .extendedArgs = nullptr, + .postSyncArgs = { + .eventAddress = 0, + .postSyncImmValue = static_cast(Event::STATE_SIGNALED), + .inOrderCounterValue = 0, + .inOrderIncrementGpuAddress = 0, + .inOrderIncrementValue = 0, + .device = neoDevice, + .inOrderExecInfo = nullptr, + .isCounterBasedEvent = false, + .isTimestampEvent = false, + .isHostScopeSignalEvent = false, + .isUsingSystemAllocation = false, + .dcFlushEnable = this->dcFlushSupport, + .interruptEvent = false, + .isFlushL3ForExternalAllocationRequired = false, + .isFlushL3ForHostUsmRequired = false, + }, + .preemptionMode = commandListPreemptionMode, + .requiredPartitionDim = launchParams.requiredPartitionDim, + .requiredDispatchWalkOrder = launchParams.requiredDispatchWalkOrder, + .localRegionSize = launchParams.localRegionSize, + .partitionCount = 0, + .reserveExtraPayloadSpace = launchParams.reserveExtraPayloadSpace, + .maxWgCountPerTile = maxWgCountPerTile, + .defaultPipelinedThreadArbitrationPolicy = NEO::ThreadArbitrationPolicy::NotPresent, + .isIndirect = launchParams.isIndirect, + .isPredicate = launchParams.isPredicate, + .requiresUncachedMocs = uncachedMocsKernel, + .isInternal = internalUsage, + .isCooperative = launchParams.isCooperative, + .isKernelDispatchedFromImmediateCmdList = isImmediateType(), + .isRcs = engineGroupType == NEO::EngineGroupType::renderCompute, + .isHeaplessModeEnabled = this->heaplessModeEnabled, + .isHeaplessStateInitEnabled = this->heaplessStateInitEnabled, + .immediateScratchAddressPatching = !this->scratchAddressPatchingEnabled, + .makeCommandView = false, + }; + + NEO::EncodeDispatchKernel::encodeCommon(commandContainer, dispatchKernelArgs); + if (!isImmediateType()) { + this->containsStatelessUncachedResource = dispatchKernelArgs.requiresUncachedMocs; + } + + if (neoDevice->getDebugger() && !this->immediateCmdListHeapSharing && !neoDevice->getBindlessHeapsHelper()) { + auto *ssh = commandContainer.getIndirectHeap(NEO::HeapType::surfaceState); + auto surfaceStateSpace = neoDevice->getDebugger()->getDebugSurfaceReservedSurfaceState(*ssh); + auto surfaceState = GfxFamily::cmdInitRenderSurfaceState; + + NEO::EncodeSurfaceStateArgs args; + args.outMemory = &surfaceState; + args.graphicsAddress = device->getDebugSurface()->getGpuAddress(); + args.size = device->getDebugSurface()->getUnderlyingBufferSize(); + args.mocs = device->getMOCS(false, false); + args.numAvailableDevices = neoDevice->getNumGenericSubDevices(); + args.allocation = device->getDebugSurface(); + args.gmmHelper = neoDevice->getGmmHelper(); + args.areMultipleSubDevicesInContext = false; + args.isDebuggerActive = true; + NEO::EncodeSurfaceState::encodeBuffer(args); + *reinterpret_cast(surfaceStateSpace) = surfaceState; + } + + appendSignalEventPostWalker(event, nullptr, nullptr, false, false, false); + + commandContainer.addToResidencyContainer(kernelImmutableData->getIsaGraphicsAllocation()); + auto &argumentsResidencyContainer = kernel->getArgumentsResidencyContainer(); + for (auto resource : argumentsResidencyContainer) { + commandContainer.addToResidencyContainer(resource); + } + auto &internalResidencyContainer = kernel->getInternalResidencyContainer(); + for (auto resource : internalResidencyContainer) { + commandContainer.addToResidencyContainer(resource); + } + + if (kernelImp->getPrintfBufferAllocation() != nullptr) { + storePrintfKernel(kernel); + } + + if (kernelDescriptor.kernelAttributes.flags.usesAssert) { + kernelWithAssertAppended = true; + } + + if (NEO::PauseOnGpuProperties::pauseModeAllowed(NEO::debugManager.flags.PauseOnEnqueue.get(), neoDevice->debugExecutionCounter.load(), NEO::PauseOnGpuProperties::PauseMode::BeforeWorkload)) { + commandsToPatch.push_back({.pCommand = additionalCommands.front(), .type = CommandToPatch::PauseOnEnqueuePipeControlStart}); + additionalCommands.pop_front(); + commandsToPatch.push_back({.pCommand = additionalCommands.front(), .type = CommandToPatch::PauseOnEnqueueSemaphoreStart}); + additionalCommands.pop_front(); + } + + if (NEO::PauseOnGpuProperties::pauseModeAllowed(NEO::debugManager.flags.PauseOnEnqueue.get(), neoDevice->debugExecutionCounter.load(), NEO::PauseOnGpuProperties::PauseMode::AfterWorkload)) { + commandsToPatch.push_back({.pCommand = additionalCommands.front(), .type = CommandToPatch::PauseOnEnqueuePipeControlEnd}); + additionalCommands.pop_front(); + commandsToPatch.push_back({.pCommand = additionalCommands.front(), .type = CommandToPatch::PauseOnEnqueueSemaphoreEnd}); + additionalCommands.pop_front(); + } + + if (event != nullptr && kernel->getPrintfBufferAllocation() != nullptr) { + auto module = static_cast(&static_cast(kernel)->getParentModule()); + event->setKernelForPrintf(module->getPrintfKernelWeakPtr(kernel->toHandle())); + event->setKernelWithPrintfDeviceMutex(kernel->getDevicePrintfKernelMutex()); + } + + if (this->isInOrderExecutionEnabled() && !launchParams.isKernelSplitOperation) { + if (!event || !event->getAllocation(this->device)) { + NEO::PipeControlArgs args; + args.dcFlushEnable = getDcFlushRequired(true); + + NEO::MemorySynchronizationCommands::addSingleBarrier(*commandContainer.getCommandStream(), args); + } + appendSignalInOrderDependencyCounter(event, false, false, false); + } + + return ZE_RESULT_SUCCESS; +} + +template <> +void CommandListCoreFamily::appendMultiPartitionPrologue(uint32_t partitionDataSize) {} + +template <> +void CommandListCoreFamily::appendMultiPartitionEpilogue() {} + +template <> +void CommandListCoreFamily::appendComputeBarrierCommand() { + NEO::PipeControlArgs args = createBarrierFlags(); + NEO::MemorySynchronizationCommands::addSingleBarrier(*commandContainer.getCommandStream(), args); +} + +template <> +ze_result_t CommandListCoreFamily::appendLaunchKernelSplit(Kernel *kernel, + const ze_group_count_t &threadGroupDimensions, + Event *event, + CmdListKernelLaunchParams &launchParams) { + return appendLaunchKernelWithParams(kernel, threadGroupDimensions, nullptr, launchParams); +} + +template <> +void CommandListCoreFamily::appendDispatchOffsetRegister(bool workloadPartitionEvent, bool beforeProfilingCmds) { +} + +template <> +bool CommandListCoreFamily::singleEventPacketRequired(bool inputSinglePacketEventRequest) const { + return true; +} + +template struct CommandListCoreFamily; +template struct CommandListCoreFamilyImmediate; } // namespace L0 diff --git a/level_zero/core/source/gen12lp/dg1/cmdqueue_dg1.cpp b/level_zero/core/source/gen12lp/dg1/cmdqueue_dg1.cpp index 1499fea96f..f83a33dfef 100644 --- a/level_zero/core/source/gen12lp/dg1/cmdqueue_dg1.cpp +++ b/level_zero/core/source/gen12lp/dg1/cmdqueue_dg1.cpp @@ -9,7 +9,7 @@ #include "shared/source/gen12lp/hw_info_dg1.h" #include "level_zero/core/source/cmdqueue/cmdqueue_hw.inl" -#include "level_zero/core/source/cmdqueue/cmdqueue_hw_skl_to_tgllp.inl" +#include "level_zero/core/source/cmdqueue/cmdqueue_hw_gen12lp.inl" #include "neo_igfxfmid.h" diff --git a/level_zero/core/source/gen12lp/l0_gfx_core_helper_gen12lp.cpp b/level_zero/core/source/gen12lp/l0_gfx_core_helper_gen12lp.cpp index dbded5802f..a60cfefb1b 100644 --- a/level_zero/core/source/gen12lp/l0_gfx_core_helper_gen12lp.cpp +++ b/level_zero/core/source/gen12lp/l0_gfx_core_helper_gen12lp.cpp @@ -1,5 +1,5 @@ /* - * Copyright (C) 2020-2023 Intel Corporation + * Copyright (C) 2020-2025 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -8,9 +8,8 @@ #include "shared/source/gen12lp/hw_cmds.h" #include "level_zero/core/source/gfx_core_helpers/l0_gfx_core_helper_base.inl" -#include "level_zero/core/source/gfx_core_helpers/l0_gfx_core_helper_skl_to_pvc.inl" -#include "level_zero/core/source/gfx_core_helpers/l0_gfx_core_helper_skl_to_tgllp.inl" #include "level_zero/core/source/gfx_core_helpers/l0_gfx_core_helper_tgllp_to_dg2.inl" +#include "level_zero/core/source/gfx_core_helpers/l0_gfx_core_helper_tgllp_to_pvc.inl" #include "level_zero/core/source/helpers/l0_populate_factory.h" #include "level_zero/tools/source/debug/eu_thread.h" @@ -19,6 +18,81 @@ namespace L0 { using Family = NEO::Gen12LpFamily; static auto gfxCore = IGFX_GEN12LP_CORE; +template <> +bool L0GfxCoreHelperHw::platformSupportsCmdListHeapSharing() const { + return true; +} + +template <> +bool L0GfxCoreHelperHw::platformSupportsStateComputeModeTracking() const { + return false; +} + +template <> +bool L0GfxCoreHelperHw::platformSupportsFrontEndTracking() const { + return false; +} + +template <> +bool L0GfxCoreHelperHw::platformSupportsPipelineSelectTracking() const { + return false; +} + +template <> +bool L0GfxCoreHelperHw::platformSupportsStateBaseAddressTracking(const NEO::RootDeviceEnvironment &rootDeviceEnvironment) const { + return false; +} + +template <> +uint32_t L0GfxCoreHelperHw::getEventMaxKernelCount(const NEO::HardwareInfo &hwInfo) const { + return 1; +} + +template <> +uint32_t L0GfxCoreHelperHw::getEventBaseMaxPacketCount(const NEO::RootDeviceEnvironment &rootDeviceEnvironment) const { + return 1u; +} + +template <> +bool L0GfxCoreHelperHw::isZebinAllowed(const NEO::Debugger *debugger) const { + return !debugger; +} + +template <> +NEO::HeapAddressModel L0GfxCoreHelperHw::getPlatformHeapAddressModel(const NEO::RootDeviceEnvironment &rootDeviceEnvironment) const { + return NEO::HeapAddressModel::privateHeaps; +} + +template <> +ze_rtas_format_exp_t L0GfxCoreHelperHw::getSupportedRTASFormat() const { + return ZE_RTAS_FORMAT_EXP_INVALID; +} + +template <> +bool L0GfxCoreHelperHw::platformSupportsPrimaryBatchBufferCmdList() const { + return true; +} + +template <> +bool L0GfxCoreHelperHw::platformSupportsImmediateComputeFlushTask() const { + return false; +} + +template <> +ze_mutable_command_exp_flags_t L0GfxCoreHelperHw::getPlatformCmdListUpdateCapabilities() const { + return 0; +} + +template <> +zet_debug_regset_type_intel_gpu_t L0GfxCoreHelperHw::getRegsetTypeForLargeGrfDetection() const { + return ZET_DEBUG_REGSET_TYPE_INVALID_INTEL_GPU; +} + +template <> +uint32_t L0GfxCoreHelperHw::getGrfRegisterCount(uint32_t *regPtr) const { + return 128; +} + #include "level_zero/core/source/helpers/l0_gfx_core_helper_factory_init.inl" template class L0GfxCoreHelperHw; diff --git a/level_zero/core/source/gen12lp/rkl/cmdqueue_rkl.cpp b/level_zero/core/source/gen12lp/rkl/cmdqueue_rkl.cpp index d70ca35ad0..0e420a89eb 100644 --- a/level_zero/core/source/gen12lp/rkl/cmdqueue_rkl.cpp +++ b/level_zero/core/source/gen12lp/rkl/cmdqueue_rkl.cpp @@ -1,5 +1,5 @@ /* - * Copyright (C) 2021-2024 Intel Corporation + * Copyright (C) 2021-2025 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -9,7 +9,7 @@ #include "shared/source/gen12lp/hw_info_rkl.h" #include "level_zero/core/source/cmdqueue/cmdqueue_hw.inl" -#include "level_zero/core/source/cmdqueue/cmdqueue_hw_skl_to_tgllp.inl" +#include "level_zero/core/source/cmdqueue/cmdqueue_hw_gen12lp.inl" namespace L0 { template struct CommandQueueHw; diff --git a/level_zero/core/source/gen12lp/tgllp/cmdqueue_tgllp.cpp b/level_zero/core/source/gen12lp/tgllp/cmdqueue_tgllp.cpp index 02584b71a6..c4b1e26d2e 100644 --- a/level_zero/core/source/gen12lp/tgllp/cmdqueue_tgllp.cpp +++ b/level_zero/core/source/gen12lp/tgllp/cmdqueue_tgllp.cpp @@ -9,7 +9,7 @@ #include "shared/source/gen12lp/hw_info_tgllp.h" #include "level_zero/core/source/cmdqueue/cmdqueue_hw.inl" -#include "level_zero/core/source/cmdqueue/cmdqueue_hw_skl_to_tgllp.inl" +#include "level_zero/core/source/cmdqueue/cmdqueue_hw_gen12lp.inl" #include "neo_igfxfmid.h" diff --git a/level_zero/core/source/gfx_core_helpers/CMakeLists.txt b/level_zero/core/source/gfx_core_helpers/CMakeLists.txt index eaa617fe51..bc3a23f3e2 100644 --- a/level_zero/core/source/gfx_core_helpers/CMakeLists.txt +++ b/level_zero/core/source/gfx_core_helpers/CMakeLists.txt @@ -12,13 +12,6 @@ target_sources(${L0_STATIC_LIB_NAME} ${CMAKE_CURRENT_SOURCE_DIR}/l0_gfx_core_helper.h ) -if(SUPPORT_GEN12LP) - target_sources(${L0_STATIC_LIB_NAME} - PRIVATE - ${CMAKE_CURRENT_SOURCE_DIR}/l0_gfx_core_helper_skl_to_tgllp.inl - ) -endif() - if(SUPPORT_XEHP_AND_LATER) target_sources(${L0_STATIC_LIB_NAME} PRIVATE @@ -50,7 +43,7 @@ endif() if(SUPPORT_GEN12LP OR SUPPORT_XE_HPG_CORE OR SUPPORT_XE_HPC_CORE) target_sources(${L0_STATIC_LIB_NAME} PRIVATE - ${CMAKE_CURRENT_SOURCE_DIR}/l0_gfx_core_helper_skl_to_pvc.inl + ${CMAKE_CURRENT_SOURCE_DIR}/l0_gfx_core_helper_tgllp_to_pvc.inl ) endif() diff --git a/level_zero/core/source/gfx_core_helpers/l0_gfx_core_helper_skl_to_tgllp.inl b/level_zero/core/source/gfx_core_helpers/l0_gfx_core_helper_skl_to_tgllp.inl deleted file mode 100644 index 0cf0c9e277..0000000000 --- a/level_zero/core/source/gfx_core_helpers/l0_gfx_core_helper_skl_to_tgllp.inl +++ /dev/null @@ -1,87 +0,0 @@ -/* - * Copyright (C) 2021-2024 Intel Corporation - * - * SPDX-License-Identifier: MIT - * - */ - -#include "level_zero/core/source/gfx_core_helpers/l0_gfx_core_helper.h" - -namespace L0 { - -template -bool L0GfxCoreHelperHw::platformSupportsCmdListHeapSharing() const { - return true; -} - -template -bool L0GfxCoreHelperHw::platformSupportsStateComputeModeTracking() const { - return false; -} - -template -bool L0GfxCoreHelperHw::platformSupportsFrontEndTracking() const { - return false; -} - -template -bool L0GfxCoreHelperHw::platformSupportsPipelineSelectTracking() const { - return false; -} - -template -bool L0GfxCoreHelperHw::platformSupportsStateBaseAddressTracking(const NEO::RootDeviceEnvironment &rootDeviceEnvironment) const { - return false; -} - -template -uint32_t L0GfxCoreHelperHw::getEventMaxKernelCount(const NEO::HardwareInfo &hwInfo) const { - return 1; -} - -template -uint32_t L0GfxCoreHelperHw::getEventBaseMaxPacketCount(const NEO::RootDeviceEnvironment &rootDeviceEnvironment) const { - return 1u; -} - -template -bool L0GfxCoreHelperHw::isZebinAllowed(const NEO::Debugger *debugger) const { - return !debugger; -} - -template -NEO::HeapAddressModel L0GfxCoreHelperHw::getPlatformHeapAddressModel(const NEO::RootDeviceEnvironment &rootDeviceEnvironment) const { - return NEO::HeapAddressModel::privateHeaps; -} - -template -ze_rtas_format_exp_t L0GfxCoreHelperHw::getSupportedRTASFormat() const { - return ZE_RTAS_FORMAT_EXP_INVALID; -} - -template -bool L0GfxCoreHelperHw::platformSupportsPrimaryBatchBufferCmdList() const { - return true; -} - -template -bool L0GfxCoreHelperHw::platformSupportsImmediateComputeFlushTask() const { - return false; -} - -template -ze_mutable_command_exp_flags_t L0GfxCoreHelperHw::getPlatformCmdListUpdateCapabilities() const { - return 0; -} - -template -zet_debug_regset_type_intel_gpu_t L0GfxCoreHelperHw::getRegsetTypeForLargeGrfDetection() const { - return ZET_DEBUG_REGSET_TYPE_INVALID_INTEL_GPU; -} - -template -uint32_t L0GfxCoreHelperHw::getGrfRegisterCount(uint32_t *regPtr) const { - return 128; -} - -} // namespace L0 diff --git a/level_zero/core/source/gfx_core_helpers/l0_gfx_core_helper_skl_to_pvc.inl b/level_zero/core/source/gfx_core_helpers/l0_gfx_core_helper_tgllp_to_pvc.inl similarity index 100% rename from level_zero/core/source/gfx_core_helpers/l0_gfx_core_helper_skl_to_pvc.inl rename to level_zero/core/source/gfx_core_helpers/l0_gfx_core_helper_tgllp_to_pvc.inl diff --git a/level_zero/core/source/xe_hpc_core/l0_gfx_core_helper_xe_hpc_core.cpp b/level_zero/core/source/xe_hpc_core/l0_gfx_core_helper_xe_hpc_core.cpp index 408420e21b..154a8da8d4 100644 --- a/level_zero/core/source/xe_hpc_core/l0_gfx_core_helper_xe_hpc_core.cpp +++ b/level_zero/core/source/xe_hpc_core/l0_gfx_core_helper_xe_hpc_core.cpp @@ -10,7 +10,7 @@ #include "level_zero/core/source/gfx_core_helpers/l0_gfx_core_helper_base.inl" #include "level_zero/core/source/gfx_core_helpers/l0_gfx_core_helper_pvc_and_later.inl" #include "level_zero/core/source/gfx_core_helpers/l0_gfx_core_helper_pvc_to_xe2.inl" -#include "level_zero/core/source/gfx_core_helpers/l0_gfx_core_helper_skl_to_pvc.inl" +#include "level_zero/core/source/gfx_core_helpers/l0_gfx_core_helper_tgllp_to_pvc.inl" #include "level_zero/core/source/gfx_core_helpers/l0_gfx_core_helper_xe_hpg_and_xe_hpc.inl" #include "level_zero/core/source/gfx_core_helpers/l0_gfx_core_helper_xe_hpg_to_xe2_hpg.inl" #include "level_zero/core/source/gfx_core_helpers/l0_gfx_core_helper_xehp_and_later.inl" diff --git a/level_zero/core/source/xe_hpg_core/l0_gfx_core_helper_xe_hpg_core.cpp b/level_zero/core/source/xe_hpg_core/l0_gfx_core_helper_xe_hpg_core.cpp index bfc83b9954..9edb369e7d 100644 --- a/level_zero/core/source/xe_hpg_core/l0_gfx_core_helper_xe_hpg_core.cpp +++ b/level_zero/core/source/xe_hpg_core/l0_gfx_core_helper_xe_hpg_core.cpp @@ -8,8 +8,8 @@ #include "shared/source/xe_hpg_core/hw_cmds_xe_hpg_core_base.h" #include "level_zero/core/source/gfx_core_helpers/l0_gfx_core_helper_base.inl" -#include "level_zero/core/source/gfx_core_helpers/l0_gfx_core_helper_skl_to_pvc.inl" #include "level_zero/core/source/gfx_core_helpers/l0_gfx_core_helper_tgllp_to_dg2.inl" +#include "level_zero/core/source/gfx_core_helpers/l0_gfx_core_helper_tgllp_to_pvc.inl" #include "level_zero/core/source/gfx_core_helpers/l0_gfx_core_helper_xe_hpg_and_xe_hpc.inl" #include "level_zero/core/source/gfx_core_helpers/l0_gfx_core_helper_xe_hpg_to_xe2_hpg.inl" #include "level_zero/core/source/gfx_core_helpers/l0_gfx_core_helper_xehp_and_later.inl" diff --git a/shared/source/command_stream/CMakeLists.txt b/shared/source/command_stream/CMakeLists.txt index ce49512850..60792dccfb 100644 --- a/shared/source/command_stream/CMakeLists.txt +++ b/shared/source/command_stream/CMakeLists.txt @@ -10,7 +10,6 @@ set(NEO_CORE_COMMAND_STREAM ${CMAKE_CURRENT_SOURCE_DIR}/aub_command_stream_receiver.h ${CMAKE_CURRENT_SOURCE_DIR}/aub_command_stream_receiver_hw.h ${CMAKE_CURRENT_SOURCE_DIR}/aub_command_stream_receiver_hw_base.inl - ${CMAKE_CURRENT_SOURCE_DIR}/aub_command_stream_receiver_hw_bdw_and_later.inl ${CMAKE_CURRENT_SOURCE_DIR}/aub_subcapture_status.h ${CMAKE_CURRENT_SOURCE_DIR}/command_stream_receiver.cpp ${CMAKE_CURRENT_SOURCE_DIR}/command_stream_receiver.h diff --git a/shared/source/command_stream/aub_command_stream_receiver_hw_bdw_and_later.inl b/shared/source/command_stream/aub_command_stream_receiver_hw_bdw_and_later.inl deleted file mode 100644 index e76e5838c7..0000000000 --- a/shared/source/command_stream/aub_command_stream_receiver_hw_bdw_and_later.inl +++ /dev/null @@ -1,22 +0,0 @@ -/* - * Copyright (C) 2019-2024 Intel Corporation - * - * SPDX-License-Identifier: MIT - * - */ - -#include "shared/source/command_stream/aub_command_stream_receiver_hw_base.inl" - -namespace NEO { - -template -constexpr uint32_t AUBCommandStreamReceiverHw::getMaskAndValueForPollForCompletion() { - return 0x100; -} - -template -int AUBCommandStreamReceiverHw::getAddressSpaceFromPTEBits(uint64_t entryBits) const { - return AubMemDump::AddressSpaceValues::TraceNonlocal; -} - -} // namespace NEO diff --git a/shared/source/gen12lp/aub_command_stream_receiver_gen12lp.cpp b/shared/source/gen12lp/aub_command_stream_receiver_gen12lp.cpp index 18c7c9af59..783db79869 100644 --- a/shared/source/gen12lp/aub_command_stream_receiver_gen12lp.cpp +++ b/shared/source/gen12lp/aub_command_stream_receiver_gen12lp.cpp @@ -8,7 +8,7 @@ #include "shared/source/aub_mem_dump/aub_alloc_dump.h" #include "shared/source/command_stream/aub_command_stream_receiver.h" #include "shared/source/command_stream/aub_command_stream_receiver_hw.h" -#include "shared/source/command_stream/aub_command_stream_receiver_hw_bdw_and_later.inl" +#include "shared/source/command_stream/aub_command_stream_receiver_hw_base.inl" #include "shared/source/helpers/array_count.h" #include "shared/source/helpers/populate_factory.h" #include "shared/source/memory_manager/memory_pool.h" @@ -17,6 +17,10 @@ namespace NEO { typedef Gen12LpFamily Family; static auto gfxCore = IGFX_GEN12LP_CORE; +template <> +int AUBCommandStreamReceiverHw::getAddressSpaceFromPTEBits(uint64_t entryBits) const { + return AubMemDump::AddressSpaceValues::TraceNonlocal; +} template <> constexpr uint32_t AUBCommandStreamReceiverHw::getMaskAndValueForPollForCompletion() { return 0x00008000; diff --git a/shared/source/gen12lp/command_stream_receiver_hw_gen12lp.cpp b/shared/source/gen12lp/command_stream_receiver_hw_gen12lp.cpp index 63e5757dc4..a851f976df 100644 --- a/shared/source/gen12lp/command_stream_receiver_hw_gen12lp.cpp +++ b/shared/source/gen12lp/command_stream_receiver_hw_gen12lp.cpp @@ -18,7 +18,6 @@ using Family = NEO::Gen12LpFamily; #include "shared/source/helpers/blit_commands_helper_base.inl" #include "shared/source/helpers/blit_commands_helper_from_gen12lp_to_xe3.inl" #include "shared/source/helpers/populate_factory.h" -#include "shared/source/helpers/state_base_address_tgllp_and_later.inl" namespace NEO { static auto gfxCore = IGFX_GEN12LP_CORE; diff --git a/shared/source/gen12lp/gfx_core_helper_gen12lp.cpp b/shared/source/gen12lp/gfx_core_helper_gen12lp.cpp index ccb929e4f3..823575aeb8 100644 --- a/shared/source/gen12lp/gfx_core_helper_gen12lp.cpp +++ b/shared/source/gen12lp/gfx_core_helper_gen12lp.cpp @@ -14,8 +14,8 @@ using Family = NEO::Gen12LpFamily; #include "shared/source/helpers/flat_batch_buffer_helper_hw.inl" #include "shared/source/helpers/gfx_core_helper.h" #include "shared/source/helpers/gfx_core_helper_base.inl" -#include "shared/source/helpers/gfx_core_helper_bdw_to_dg2.inl" #include "shared/source/helpers/gfx_core_helper_tgllp_and_later.inl" +#include "shared/source/helpers/gfx_core_helper_tgllp_to_dg2.inl" #include "shared/source/helpers/hw_info.h" #include "shared/source/helpers/local_memory_access_modes.h" #include "shared/source/kernel/kernel_descriptor.h" diff --git a/shared/source/gen12lp/state_base_address_gen12lp.cpp b/shared/source/gen12lp/state_base_address_gen12lp.cpp index f905725477..7ad51659bd 100644 --- a/shared/source/gen12lp/state_base_address_gen12lp.cpp +++ b/shared/source/gen12lp/state_base_address_gen12lp.cpp @@ -6,9 +6,75 @@ */ #include "shared/source/gen12lp/hw_cmds_base.h" -#include "shared/source/helpers/state_base_address.h" -#include "shared/source/helpers/state_base_address_icllp_and_later.inl" +#include "shared/source/helpers/state_base_address_base.inl" namespace NEO { -template struct StateBaseAddressHelper; +using Family = Gen12LpFamily; +template <> +uint32_t StateBaseAddressHelper::getMaxBindlessSurfaceStates() { + return (1 << 20) - 1; } + +template <> +void StateBaseAddressHelper::programBindingTableBaseAddress(LinearStream &commandStream, uint64_t baseAddress, uint32_t sizeInPages, GmmHelper *gmmHelper) { +} + +template <> +void StateBaseAddressHelper::appendIohParameters(StateBaseAddressHelperArgs &args) { + if (args.sbaProperties) { + if (args.sbaProperties->indirectObjectBaseAddress.value != StreamProperty64::initValue) { + auto baseAddress = static_cast(args.sbaProperties->indirectObjectBaseAddress.value); + UNRECOVERABLE_IF(!args.gmmHelper); + args.stateBaseAddressCmd->setIndirectObjectBaseAddress(args.gmmHelper->decanonize(baseAddress)); + args.stateBaseAddressCmd->setIndirectObjectBaseAddressModifyEnable(true); + args.stateBaseAddressCmd->setIndirectObjectBufferSizeModifyEnable(true); + args.stateBaseAddressCmd->setIndirectObjectBufferSize(static_cast(args.sbaProperties->indirectObjectSize.value)); + } + } else if (args.useGlobalHeapsBaseAddress) { + args.stateBaseAddressCmd->setIndirectObjectBaseAddressModifyEnable(true); + args.stateBaseAddressCmd->setIndirectObjectBufferSizeModifyEnable(true); + args.stateBaseAddressCmd->setIndirectObjectBaseAddress(args.indirectObjectHeapBaseAddress); + args.stateBaseAddressCmd->setIndirectObjectBufferSize(MemoryConstants::sizeOf4GBinPageEntities); + } else if (args.ioh) { + args.stateBaseAddressCmd->setIndirectObjectBaseAddressModifyEnable(true); + args.stateBaseAddressCmd->setIndirectObjectBufferSizeModifyEnable(true); + args.stateBaseAddressCmd->setIndirectObjectBaseAddress(args.ioh->getHeapGpuBase()); + args.stateBaseAddressCmd->setIndirectObjectBufferSize(args.ioh->getHeapSizeInPages()); + } +} + +template <> +void StateBaseAddressHelper::appendExtraCacheSettings(StateBaseAddressHelperArgs &args) {} + +template <> +void StateBaseAddressHelper::appendStateBaseAddressParameters( + StateBaseAddressHelperArgs &args) { + + if (!args.useGlobalHeapsBaseAddress) { + if (args.bindlessSurfaceStateBaseAddress != 0) { + args.stateBaseAddressCmd->setBindlessSurfaceStateBaseAddress(args.bindlessSurfaceStateBaseAddress); + args.stateBaseAddressCmd->setBindlessSurfaceStateBaseAddressModifyEnable(true); + const auto surfaceStateCount = getMaxBindlessSurfaceStates(); + args.stateBaseAddressCmd->setBindlessSurfaceStateSize(surfaceStateCount); + } else if (args.ssh) { + args.stateBaseAddressCmd->setBindlessSurfaceStateBaseAddressModifyEnable(true); + args.stateBaseAddressCmd->setBindlessSurfaceStateBaseAddress(args.ssh->getHeapGpuBase()); + uint32_t size = uint32_t(args.ssh->getMaxAvailableSpace() / 64) - 1; + args.stateBaseAddressCmd->setBindlessSurfaceStateSize(size); + } + } + + args.stateBaseAddressCmd->setBindlessSamplerStateBaseAddressModifyEnable(true); + + auto l3CacheOnPolicy = GMM_RESOURCE_USAGE_OCL_STATE_HEAP_BUFFER; + + if (args.gmmHelper != nullptr) { + args.stateBaseAddressCmd->setBindlessSurfaceStateMemoryObjectControlState(args.gmmHelper->getMOCS(l3CacheOnPolicy)); + args.stateBaseAddressCmd->setBindlessSamplerStateMemoryObjectControlState(args.gmmHelper->getMOCS(l3CacheOnPolicy)); + } + + StateBaseAddressHelper::appendIohParameters(args); +} + +template struct StateBaseAddressHelper; +} // namespace NEO diff --git a/shared/source/gen12lp/windows/gmm_callbacks_gen12lp.cpp b/shared/source/gen12lp/windows/gmm_callbacks_gen12lp.cpp index 2c4c8927ad..8416347ff4 100644 --- a/shared/source/gen12lp/windows/gmm_callbacks_gen12lp.cpp +++ b/shared/source/gen12lp/windows/gmm_callbacks_gen12lp.cpp @@ -1,12 +1,12 @@ /* - * Copyright (C) 2019-2022 Intel Corporation + * Copyright (C) 2019-2025 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/gen12lp/hw_cmds_base.h" -#include "shared/source/helpers/windows/gmm_callbacks_tgllp_and_later.inl" +#include "shared/source/helpers/windows/gmm_callbacks.inl" namespace NEO { template struct DeviceCallbacks; diff --git a/shared/source/helpers/CMakeLists.txt b/shared/source/helpers/CMakeLists.txt index acf2b49e43..0405957672 100644 --- a/shared/source/helpers/CMakeLists.txt +++ b/shared/source/helpers/CMakeLists.txt @@ -91,8 +91,7 @@ set(NEO_CORE_HELPERS ${CMAKE_CURRENT_SOURCE_DIR}/gfx_core_helper.cpp ${CMAKE_CURRENT_SOURCE_DIR}/gfx_core_helper.h ${CMAKE_CURRENT_SOURCE_DIR}/gfx_core_helper_base.inl - ${CMAKE_CURRENT_SOURCE_DIR}/gfx_core_helper_bdw_to_dg2.inl - ${CMAKE_CURRENT_SOURCE_DIR}/gfx_core_helper_bdw_to_icllp.inl + ${CMAKE_CURRENT_SOURCE_DIR}/gfx_core_helper_tgllp_to_dg2.inl ${CMAKE_CURRENT_SOURCE_DIR}/gfx_core_helper_pvc_and_later.inl ${CMAKE_CURRENT_SOURCE_DIR}/gfx_core_helper_tgllp_and_later.inl ${CMAKE_CURRENT_SOURCE_DIR}/gpu_page_fault_helper.cpp @@ -146,11 +145,7 @@ set(NEO_CORE_HELPERS ${CMAKE_CURRENT_SOURCE_DIR}/sleep.h ${CMAKE_CURRENT_SOURCE_DIR}/state_base_address.h ${CMAKE_CURRENT_SOURCE_DIR}/state_base_address_base.inl - ${CMAKE_CURRENT_SOURCE_DIR}/state_base_address_bdw.inl - ${CMAKE_CURRENT_SOURCE_DIR}/state_base_address_tgllp_and_later.inl ${CMAKE_CURRENT_SOURCE_DIR}/state_base_address_helper.h - ${CMAKE_CURRENT_SOURCE_DIR}/state_base_address_icllp_and_later.inl - ${CMAKE_CURRENT_SOURCE_DIR}/state_base_address_skl.inl ${CMAKE_CURRENT_SOURCE_DIR}/stdio.h ${CMAKE_CURRENT_SOURCE_DIR}/string.h ${CMAKE_CURRENT_SOURCE_DIR}/string_helpers.h @@ -253,7 +248,6 @@ set(NEO_CORE_HELPERS_GMM_CALLBACKS_WINDOWS ${CMAKE_CURRENT_SOURCE_DIR}/windows/gmm_callbacks.cpp ${CMAKE_CURRENT_SOURCE_DIR}/windows/gmm_callbacks.h ${CMAKE_CURRENT_SOURCE_DIR}/windows/gmm_callbacks.inl - ${CMAKE_CURRENT_SOURCE_DIR}/windows/gmm_callbacks_tgllp_and_later.inl ) set_property(GLOBAL PROPERTY NEO_CORE_HELPERS_GMM_CALLBACKS_WINDOWS ${NEO_CORE_HELPERS_GMM_CALLBACKS_WINDOWS}) diff --git a/shared/source/helpers/gfx_core_helper_bdw_to_icllp.inl b/shared/source/helpers/gfx_core_helper_bdw_to_icllp.inl deleted file mode 100644 index ecea7a2040..0000000000 --- a/shared/source/helpers/gfx_core_helper_bdw_to_icllp.inl +++ /dev/null @@ -1,40 +0,0 @@ -/* - * Copyright (C) 2020-2025 Intel Corporation - * - * SPDX-License-Identifier: MIT - * - */ - -#include "shared/source/helpers/gfx_core_helper.h" -#include "shared/source/helpers/hw_info.h" - -namespace NEO { - -template -inline bool GfxCoreHelperHw::isFusedEuDispatchEnabled(const HardwareInfo &hwInfo, bool disableEUFusionForKernel) const { - return false; -} - -template -void *LriHelper::program(MI_LOAD_REGISTER_IMM *lriCmd, uint32_t address, uint32_t value, bool remap, bool isBcs) { - MI_LOAD_REGISTER_IMM cmd = GfxFamily::cmdInitLoadRegisterImm; - address += (isBcs && remap) ? RegisterOffsets::bcs0Base : 0x0; - cmd.setRegisterOffset(address); - cmd.setDataDword(value); - - *lriCmd = cmd; - - return lriCmd; -} - -template -bool GfxCoreHelperHw::packedFormatsSupported() const { - return false; -} - -template -size_t GfxCoreHelperHw::getMaxFillPatternSizeForCopyEngine() const { - return sizeof(uint32_t); -} - -} // namespace NEO diff --git a/shared/source/helpers/gfx_core_helper_bdw_to_dg2.inl b/shared/source/helpers/gfx_core_helper_tgllp_to_dg2.inl similarity index 100% rename from shared/source/helpers/gfx_core_helper_bdw_to_dg2.inl rename to shared/source/helpers/gfx_core_helper_tgllp_to_dg2.inl diff --git a/shared/source/helpers/state_base_address_bdw.inl b/shared/source/helpers/state_base_address_bdw.inl deleted file mode 100644 index 0e9c6ba0ec..0000000000 --- a/shared/source/helpers/state_base_address_bdw.inl +++ /dev/null @@ -1,25 +0,0 @@ -/* - * Copyright (C) 2020-2023 Intel Corporation - * - * SPDX-License-Identifier: MIT - * - */ - -#include "shared/source/helpers/state_base_address.h" - -namespace NEO { - -template -void StateBaseAddressHelper::appendStateBaseAddressParameters( - StateBaseAddressHelperArgs &args) { -} - -template -uint32_t StateBaseAddressHelper::getMaxBindlessSurfaceStates() { - return 0; -} - -template <> -void StateBaseAddressHelper::programStateBaseAddress(StateBaseAddressHelperArgs &args); - -} // namespace NEO diff --git a/shared/source/helpers/state_base_address_icllp_and_later.inl b/shared/source/helpers/state_base_address_icllp_and_later.inl deleted file mode 100644 index 990676f887..0000000000 --- a/shared/source/helpers/state_base_address_icllp_and_later.inl +++ /dev/null @@ -1,49 +0,0 @@ -/* - * Copyright (C) 2020-2025 Intel Corporation - * - * SPDX-License-Identifier: MIT - * - */ - -#include "shared/source/command_stream/stream_properties.h" -#include "shared/source/helpers/state_base_address.h" -#include "shared/source/helpers/state_base_address_tgllp_and_later.inl" - -namespace NEO { - -template -void StateBaseAddressHelper::appendStateBaseAddressParameters( - StateBaseAddressHelperArgs &args) { - - if (!args.useGlobalHeapsBaseAddress) { - if (args.bindlessSurfaceStateBaseAddress != 0) { - args.stateBaseAddressCmd->setBindlessSurfaceStateBaseAddress(args.bindlessSurfaceStateBaseAddress); - args.stateBaseAddressCmd->setBindlessSurfaceStateBaseAddressModifyEnable(true); - const auto surfaceStateCount = getMaxBindlessSurfaceStates(); - args.stateBaseAddressCmd->setBindlessSurfaceStateSize(surfaceStateCount); - } else if (args.ssh) { - args.stateBaseAddressCmd->setBindlessSurfaceStateBaseAddressModifyEnable(true); - args.stateBaseAddressCmd->setBindlessSurfaceStateBaseAddress(args.ssh->getHeapGpuBase()); - uint32_t size = uint32_t(args.ssh->getMaxAvailableSpace() / 64) - 1; - args.stateBaseAddressCmd->setBindlessSurfaceStateSize(size); - } - } - - args.stateBaseAddressCmd->setBindlessSamplerStateBaseAddressModifyEnable(true); - - auto l3CacheOnPolicy = GMM_RESOURCE_USAGE_OCL_STATE_HEAP_BUFFER; - - if (args.gmmHelper != nullptr) { - args.stateBaseAddressCmd->setBindlessSurfaceStateMemoryObjectControlState(args.gmmHelper->getMOCS(l3CacheOnPolicy)); - args.stateBaseAddressCmd->setBindlessSamplerStateMemoryObjectControlState(args.gmmHelper->getMOCS(l3CacheOnPolicy)); - } - - StateBaseAddressHelper::appendIohParameters(args); -} - -template -uint32_t StateBaseAddressHelper::getMaxBindlessSurfaceStates() { - return (1 << 20) - 1; -} - -} // namespace NEO diff --git a/shared/source/helpers/state_base_address_skl.inl b/shared/source/helpers/state_base_address_skl.inl deleted file mode 100644 index 59ad9e451b..0000000000 --- a/shared/source/helpers/state_base_address_skl.inl +++ /dev/null @@ -1,37 +0,0 @@ -/* - * Copyright (C) 2020-2023 Intel Corporation - * - * SPDX-License-Identifier: MIT - * - */ - -#include "shared/source/helpers/state_base_address.h" - -namespace NEO { - -template -void StateBaseAddressHelper::appendStateBaseAddressParameters( - StateBaseAddressHelperArgs &args) { - - if (!args.useGlobalHeapsBaseAddress) { - if (args.bindlessSurfaceStateBaseAddress != 0) { - args.stateBaseAddressCmd->setBindlessSurfaceStateBaseAddress(args.bindlessSurfaceStateBaseAddress); - args.stateBaseAddressCmd->setBindlessSurfaceStateBaseAddressModifyEnable(true); - const auto surfaceStateCount = getMaxBindlessSurfaceStates(); - args.stateBaseAddressCmd->setBindlessSurfaceStateSize(surfaceStateCount); - } else if (args.ssh) { - args.stateBaseAddressCmd->setBindlessSurfaceStateBaseAddressModifyEnable(true); - args.stateBaseAddressCmd->setBindlessSurfaceStateBaseAddress(args.ssh->getHeapGpuBase()); - uint32_t size = uint32_t(args.ssh->getMaxAvailableSpace() / 64) - 1; - args.stateBaseAddressCmd->setBindlessSurfaceStateSize(size); - } - } - - StateBaseAddressHelper::appendIohParameters(args); -} - -template -uint32_t StateBaseAddressHelper::getMaxBindlessSurfaceStates() { - return (1 << 20) - 1; -} -} // namespace NEO diff --git a/shared/source/helpers/state_base_address_tgllp_and_later.inl b/shared/source/helpers/state_base_address_tgllp_and_later.inl deleted file mode 100644 index ad77591e1f..0000000000 --- a/shared/source/helpers/state_base_address_tgllp_and_later.inl +++ /dev/null @@ -1,42 +0,0 @@ -/* - * Copyright (C) 2019-2025 Intel Corporation - * - * SPDX-License-Identifier: MIT - * - */ - -#include "shared/source/helpers/state_base_address_base.inl" - -namespace NEO { - -template -void StateBaseAddressHelper::programBindingTableBaseAddress(LinearStream &commandStream, uint64_t baseAddress, uint32_t sizeInPages, GmmHelper *gmmHelper) { -} - -template -void StateBaseAddressHelper::appendIohParameters(StateBaseAddressHelperArgs &args) { - if (args.sbaProperties) { - if (args.sbaProperties->indirectObjectBaseAddress.value != StreamProperty64::initValue) { - auto baseAddress = static_cast(args.sbaProperties->indirectObjectBaseAddress.value); - args.stateBaseAddressCmd->setIndirectObjectBaseAddress(args.gmmHelper->decanonize(baseAddress)); - args.stateBaseAddressCmd->setIndirectObjectBaseAddressModifyEnable(true); - args.stateBaseAddressCmd->setIndirectObjectBufferSizeModifyEnable(true); - args.stateBaseAddressCmd->setIndirectObjectBufferSize(static_cast(args.sbaProperties->indirectObjectSize.value)); - } - } else if (args.useGlobalHeapsBaseAddress) { - args.stateBaseAddressCmd->setIndirectObjectBaseAddressModifyEnable(true); - args.stateBaseAddressCmd->setIndirectObjectBufferSizeModifyEnable(true); - args.stateBaseAddressCmd->setIndirectObjectBaseAddress(args.indirectObjectHeapBaseAddress); - args.stateBaseAddressCmd->setIndirectObjectBufferSize(MemoryConstants::sizeOf4GBinPageEntities); - } else if (args.ioh) { - args.stateBaseAddressCmd->setIndirectObjectBaseAddressModifyEnable(true); - args.stateBaseAddressCmd->setIndirectObjectBufferSizeModifyEnable(true); - args.stateBaseAddressCmd->setIndirectObjectBaseAddress(args.ioh->getHeapGpuBase()); - args.stateBaseAddressCmd->setIndirectObjectBufferSize(args.ioh->getHeapSizeInPages()); - } -} - -template -void StateBaseAddressHelper::appendExtraCacheSettings(StateBaseAddressHelperArgs &args) {} - -} // namespace NEO diff --git a/shared/source/helpers/windows/gmm_callbacks.inl b/shared/source/helpers/windows/gmm_callbacks.inl index 2a0fa7f148..a28a236ad8 100644 --- a/shared/source/helpers/windows/gmm_callbacks.inl +++ b/shared/source/helpers/windows/gmm_callbacks.inl @@ -1,26 +1,54 @@ /* - * Copyright (C) 2020-2023 Intel Corporation + * Copyright (C) 2019-2025 Intel Corporation * * SPDX-License-Identifier: MIT * */ -#include "shared/source/command_stream/linear_stream.h" +#include "shared/source/command_stream/aub_command_stream_receiver_hw.h" +#include "shared/source/command_stream/command_stream_receiver_hw.h" +#include "shared/source/command_stream/command_stream_receiver_with_aub_dump.h" #include "shared/source/helpers/gfx_core_helper.h" #include "shared/source/helpers/windows/gmm_callbacks.h" - -#include +#include "shared/source/os_interface/windows/wddm_device_command_stream.h" namespace NEO { template long __stdcall DeviceCallbacks::notifyAubCapture(void *csrHandle, uint64_t gfxAddress, size_t gfxSize, bool allocate) { - return 0; + auto csr = reinterpret_cast *>(csrHandle); + + if (obtainCsrTypeFromIntegerValue(debugManager.flags.SetCommandStreamReceiver.get(), CommandStreamReceiverType::hardware) == CommandStreamReceiverType::hardwareWithAub) { + auto csrWithAub = static_cast> *>(csr); + auto aubCsr = static_cast *>(csrWithAub->aubCSR.get()); + if (allocate) { + AllocationView externalAllocation(gfxAddress, gfxSize); + aubCsr->makeResidentExternal(externalAllocation); + } else { + aubCsr->makeNonResidentExternal(gfxAddress); + } + } + + return 1; } template int __stdcall TTCallbacks::writeL3Address(void *queueHandle, uint64_t l3GfxAddress, uint64_t regOffset) { - return 0; + auto csr = reinterpret_cast *>(queueHandle); + + LriHelper::program(&csr->getCS(0), + static_cast(regOffset & 0xFFFFFFFF), + static_cast(l3GfxAddress & 0xFFFFFFFF), + true, + false); + + LriHelper::program(&csr->getCS(0), + static_cast(regOffset >> 32), + static_cast(l3GfxAddress >> 32), + true, + false); + + return 1; } } // namespace NEO diff --git a/shared/source/helpers/windows/gmm_callbacks_tgllp_and_later.inl b/shared/source/helpers/windows/gmm_callbacks_tgllp_and_later.inl deleted file mode 100644 index 7a55b35016..0000000000 --- a/shared/source/helpers/windows/gmm_callbacks_tgllp_and_later.inl +++ /dev/null @@ -1,54 +0,0 @@ -/* - * Copyright (C) 2019-2024 Intel Corporation - * - * SPDX-License-Identifier: MIT - * - */ - -#include "shared/source/command_stream/aub_command_stream_receiver_hw.h" -#include "shared/source/command_stream/command_stream_receiver_hw.h" -#include "shared/source/command_stream/command_stream_receiver_with_aub_dump.h" -#include "shared/source/helpers/gfx_core_helper.h" -#include "shared/source/helpers/windows/gmm_callbacks.h" -#include "shared/source/os_interface/windows/wddm_device_command_stream.h" - -namespace NEO { - -template -long __stdcall DeviceCallbacks::notifyAubCapture(void *csrHandle, uint64_t gfxAddress, size_t gfxSize, bool allocate) { - auto csr = reinterpret_cast *>(csrHandle); - - if (obtainCsrTypeFromIntegerValue(debugManager.flags.SetCommandStreamReceiver.get(), CommandStreamReceiverType::hardware) == CommandStreamReceiverType::hardwareWithAub) { - auto csrWithAub = static_cast> *>(csr); - auto aubCsr = static_cast *>(csrWithAub->aubCSR.get()); - if (allocate) { - AllocationView externalAllocation(gfxAddress, gfxSize); - aubCsr->makeResidentExternal(externalAllocation); - } else { - aubCsr->makeNonResidentExternal(gfxAddress); - } - } - - return 1; -} - -template -int __stdcall TTCallbacks::writeL3Address(void *queueHandle, uint64_t l3GfxAddress, uint64_t regOffset) { - auto csr = reinterpret_cast *>(queueHandle); - - LriHelper::program(&csr->getCS(0), - static_cast(regOffset & 0xFFFFFFFF), - static_cast(l3GfxAddress & 0xFFFFFFFF), - true, - false); - - LriHelper::program(&csr->getCS(0), - static_cast(regOffset >> 32), - static_cast(l3GfxAddress >> 32), - true, - false); - - return 1; -} - -} // namespace NEO diff --git a/shared/source/xe2_hpg_core/windows/gmm_callbacks_xe2_hpg_core.cpp b/shared/source/xe2_hpg_core/windows/gmm_callbacks_xe2_hpg_core.cpp index 578b232524..19c4015679 100644 --- a/shared/source/xe2_hpg_core/windows/gmm_callbacks_xe2_hpg_core.cpp +++ b/shared/source/xe2_hpg_core/windows/gmm_callbacks_xe2_hpg_core.cpp @@ -1,11 +1,11 @@ /* - * Copyright (C) 2024 Intel Corporation + * Copyright (C) 2024-2025 Intel Corporation * * SPDX-License-Identifier: MIT * */ -#include "shared/source/helpers/windows/gmm_callbacks_tgllp_and_later.inl" +#include "shared/source/helpers/windows/gmm_callbacks.inl" #include "shared/source/xe2_hpg_core/hw_cmds.h" namespace NEO { diff --git a/shared/source/xe3_core/windows/gmm_callbacks_xe3_core.cpp b/shared/source/xe3_core/windows/gmm_callbacks_xe3_core.cpp index d7cf99996c..7e74cc9835 100644 --- a/shared/source/xe3_core/windows/gmm_callbacks_xe3_core.cpp +++ b/shared/source/xe3_core/windows/gmm_callbacks_xe3_core.cpp @@ -5,7 +5,7 @@ * */ -#include "shared/source/helpers/windows/gmm_callbacks_tgllp_and_later.inl" +#include "shared/source/helpers/windows/gmm_callbacks.inl" #include "shared/source/xe3_core/hw_cmds_base.h" namespace NEO { diff --git a/shared/source/xe_hpc_core/windows/gmm_callbacks_xe_hpc_core.cpp b/shared/source/xe_hpc_core/windows/gmm_callbacks_xe_hpc_core.cpp index da94ae965a..75ed651664 100644 --- a/shared/source/xe_hpc_core/windows/gmm_callbacks_xe_hpc_core.cpp +++ b/shared/source/xe_hpc_core/windows/gmm_callbacks_xe_hpc_core.cpp @@ -1,11 +1,11 @@ /* - * Copyright (C) 2021-2022 Intel Corporation + * Copyright (C) 2021-2025 Intel Corporation * * SPDX-License-Identifier: MIT * */ -#include "shared/source/helpers/windows/gmm_callbacks_tgllp_and_later.inl" +#include "shared/source/helpers/windows/gmm_callbacks.inl" #include "shared/source/xe_hpc_core/hw_cmds_xe_hpc_core_base.h" namespace NEO { diff --git a/shared/source/xe_hpg_core/gfx_core_helper_xe_hpg_core.cpp b/shared/source/xe_hpg_core/gfx_core_helper_xe_hpg_core.cpp index 1d134b6a09..9000f25369 100644 --- a/shared/source/xe_hpg_core/gfx_core_helper_xe_hpg_core.cpp +++ b/shared/source/xe_hpg_core/gfx_core_helper_xe_hpg_core.cpp @@ -17,9 +17,9 @@ using Family = NEO::XeHpgCoreFamily; #include "shared/source/helpers/constants.h" #include "shared/source/helpers/flat_batch_buffer_helper_hw.inl" #include "shared/source/helpers/gfx_core_helper_base.inl" -#include "shared/source/helpers/gfx_core_helper_bdw_to_dg2.inl" #include "shared/source/helpers/gfx_core_helper_dg2_and_later.inl" #include "shared/source/helpers/gfx_core_helper_tgllp_and_later.inl" +#include "shared/source/helpers/gfx_core_helper_tgllp_to_dg2.inl" #include "shared/source/helpers/gfx_core_helper_xehp_and_later.inl" #include "shared/source/helpers/local_memory_access_modes.h" diff --git a/shared/source/xe_hpg_core/windows/gmm_callbacks_xe_hpg_core.cpp b/shared/source/xe_hpg_core/windows/gmm_callbacks_xe_hpg_core.cpp index b257baa835..e349ca72b1 100644 --- a/shared/source/xe_hpg_core/windows/gmm_callbacks_xe_hpg_core.cpp +++ b/shared/source/xe_hpg_core/windows/gmm_callbacks_xe_hpg_core.cpp @@ -1,11 +1,11 @@ /* - * Copyright (C) 2021-2022 Intel Corporation + * Copyright (C) 2021-2025 Intel Corporation * * SPDX-License-Identifier: MIT * */ -#include "shared/source/helpers/windows/gmm_callbacks_tgllp_and_later.inl" +#include "shared/source/helpers/windows/gmm_callbacks.inl" #include "shared/source/xe_hpg_core/hw_cmds_xe_hpg_core_base.h" namespace NEO {