refactor: remove pre-gen12 code

Signed-off-by: Mateusz Jablonski <mateusz.jablonski@intel.com>
This commit is contained in:
Mateusz Jablonski
2025-08-11 17:32:37 +00:00
committed by Compute-Runtime-Automation
parent 4ac82c17f1
commit 22e7aa36d1
38 changed files with 560 additions and 784 deletions

View File

@@ -12,7 +12,6 @@ target_sources(${L0_STATIC_LIB_NAME}
${CMAKE_CURRENT_SOURCE_DIR}/cmdlist_extended${BRANCH_DIR_SUFFIX}cmdlist_extended.inl
${CMAKE_CURRENT_SOURCE_DIR}/cmdlist_hw.h
${CMAKE_CURRENT_SOURCE_DIR}/cmdlist_hw.inl
${CMAKE_CURRENT_SOURCE_DIR}/cmdlist_hw_skl_to_tgllp.inl
${CMAKE_CURRENT_SOURCE_DIR}/cmdlist_hw_immediate.h
${CMAKE_CURRENT_SOURCE_DIR}/cmdlist_hw_immediate.inl
${CMAKE_CURRENT_SOURCE_DIR}/cmdlist_imp.cpp

View File

@@ -1,368 +0,0 @@
/*
* Copyright (C) 2020-2025 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#include "shared/source/command_container/command_encoder.h"
#include "shared/source/command_stream/linear_stream.h"
#include "shared/source/command_stream/preemption.h"
#include "shared/source/helpers/pause_on_gpu_properties.h"
#include "shared/source/helpers/pipe_control_args.h"
#include "shared/source/helpers/register_offsets.h"
#include "shared/source/helpers/simd_helper.h"
#include "shared/source/memory_manager/graphics_allocation.h"
#include "shared/source/memory_manager/memory_manager.h"
#include "shared/source/memory_manager/residency_container.h"
#include "shared/source/program/kernel_info.h"
#include "shared/source/unified_memory/unified_memory.h"
#include "shared/source/utilities/software_tags_manager.h"
#include "level_zero/core/source/driver/driver_handle_imp.h"
#include "level_zero/core/source/kernel/kernel_imp.h"
#include "encode_surface_state_args.h"
#include <algorithm>
namespace L0 {
struct DeviceImp;
template <GFXCORE_FAMILY gfxCoreFamily>
size_t CommandListCoreFamily<gfxCoreFamily>::getReserveSshSize() {
using RENDER_SURFACE_STATE = typename GfxFamily::RENDER_SURFACE_STATE;
return sizeof(RENDER_SURFACE_STATE);
}
template <GFXCORE_FAMILY gfxCoreFamily>
void CommandListCoreFamily<gfxCoreFamily>::adjustWriteKernelTimestamp(uint64_t globalAddress, uint64_t baseAddress, CommandToPatchContainer *outTimeStampSyncCmds,
bool workloadPartition, bool copyOperation, bool globalTimestamp) {}
template <GFXCORE_FAMILY gfxCoreFamily>
bool CommandListCoreFamily<gfxCoreFamily>::isInOrderNonWalkerSignalingRequired(const Event *event) const {
return false;
}
template <GFXCORE_FAMILY gfxCoreFamily>
ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernelWithParams(Kernel *kernel,
const ze_group_count_t &threadGroupDimensions,
Event *event,
CmdListKernelLaunchParams &launchParams) {
UNRECOVERABLE_IF(kernel == nullptr);
UNRECOVERABLE_IF(launchParams.skipInOrderNonWalkerSignaling);
const auto driverHandle = static_cast<DriverHandleImp *>(device->getDriverHandle());
const auto &kernelDescriptor = kernel->getKernelDescriptor();
if (kernelDescriptor.kernelAttributes.flags.isInvalid) {
return ZE_RESULT_ERROR_INVALID_ARGUMENT;
}
const auto kernelImmutableData = kernel->getImmutableData();
auto kernelInfo = kernelImmutableData->getKernelInfo();
NEO::IndirectHeap *ssh = nullptr;
NEO::IndirectHeap *dsh = nullptr;
DBG_LOG(PrintDispatchParameters, "Kernel: ", kernelInfo->kernelDescriptor.kernelMetadata.kernelName,
", Group size: ", kernel->getGroupSize()[0], ", ", kernel->getGroupSize()[1], ", ", kernel->getGroupSize()[2],
", Group count: ", threadGroupDimensions.groupCountX, ", ", threadGroupDimensions.groupCountY, ", ", threadGroupDimensions.groupCountZ,
", SIMD: ", kernelInfo->getMaxSimdSize());
if (this->immediateCmdListHeapSharing || this->stateBaseAddressTracking) {
auto &sshReserveConfig = commandContainer.getSurfaceStateHeapReserve();
NEO::HeapReserveArguments sshReserveArgs = {
sshReserveConfig.indirectHeapReservation,
NEO::EncodeDispatchKernel<GfxFamily>::getSizeRequiredSsh(*kernelInfo),
NEO::EncodeDispatchKernel<GfxFamily>::getDefaultSshAlignment()};
// update SSH size - when global bindless addressing is used, kernel args may not require ssh space
if (kernel->getSurfaceStateHeapDataSize() == 0) {
sshReserveArgs.size = 0;
}
auto &dshReserveConfig = commandContainer.getDynamicStateHeapReserve();
NEO::HeapReserveArguments dshReserveArgs = {
dshReserveConfig.indirectHeapReservation,
NEO::EncodeDispatchKernel<GfxFamily>::getSizeRequiredDsh(kernelDescriptor, commandContainer.getNumIddPerBlock()),
NEO::EncodeDispatchKernel<GfxFamily>::getDefaultDshAlignment()};
if (launchParams.isKernelSplitOperation) {
// when appendLaunchKernel is called during an operation with kernel split is true,
// then reserve sufficient ssh and dsh heaps during first kernel split, by multiplying, individual
// dsh and ssh heap size retrieved above with number of kernels in split operation.
// And after first kernel split, for remainder kernel split calls, dont estimate heap size.
if (launchParams.numKernelsExecutedInSplitLaunch == 0) {
dshReserveArgs.size = launchParams.numKernelsInSplitLaunch * dshReserveArgs.size;
sshReserveArgs.size = launchParams.numKernelsInSplitLaunch * sshReserveArgs.size;
commandContainer.reserveSpaceForDispatch(
sshReserveArgs,
dshReserveArgs, true);
}
} else {
commandContainer.reserveSpaceForDispatch(
sshReserveArgs,
dshReserveArgs, true);
}
ssh = sshReserveArgs.indirectHeapReservation;
dsh = dshReserveArgs.indirectHeapReservation;
}
appendEventForProfiling(event, nullptr, true, false, false, false);
auto perThreadScratchSize = std::max<std::uint32_t>(this->getCommandListPerThreadScratchSize(0u),
kernel->getImmutableData()->getDescriptor().kernelAttributes.perThreadScratchSize[0]);
this->setCommandListPerThreadScratchSize(0u, perThreadScratchSize);
auto slmEnable = (kernel->getImmutableData()->getDescriptor().kernelAttributes.slmInlineSize > 0);
this->setCommandListSLMEnable(slmEnable);
auto kernelPreemptionMode = obtainKernelPreemptionMode(kernel);
commandListPreemptionMode = std::min(commandListPreemptionMode, kernelPreemptionMode);
kernel->patchGlobalOffset();
this->allocateOrReuseKernelPrivateMemoryIfNeeded(kernel, kernelDescriptor.kernelAttributes.perHwThreadPrivateMemorySize);
if (!launchParams.isIndirect) {
kernel->setGroupCount(threadGroupDimensions.groupCountX,
threadGroupDimensions.groupCountY,
threadGroupDimensions.groupCountZ);
}
if (launchParams.isIndirect) {
prepareIndirectParams(&threadGroupDimensions);
}
if (kernel->hasIndirectAllocationsAllowed()) {
UnifiedMemoryControls unifiedMemoryControls = kernel->getUnifiedMemoryControls();
if (unifiedMemoryControls.indirectDeviceAllocationsAllowed) {
this->unifiedMemoryControls.indirectDeviceAllocationsAllowed = true;
}
if (unifiedMemoryControls.indirectHostAllocationsAllowed) {
this->unifiedMemoryControls.indirectHostAllocationsAllowed = true;
}
if (unifiedMemoryControls.indirectSharedAllocationsAllowed) {
this->unifiedMemoryControls.indirectSharedAllocationsAllowed = true;
}
this->indirectAllocationsAllowed = true;
}
containsCooperativeKernelsFlag = (containsCooperativeKernelsFlag || launchParams.isCooperative);
if (kernel->usesSyncBuffer()) {
auto retVal = (launchParams.isCooperative
? programSyncBuffer(*kernel, *device->getNEODevice(), threadGroupDimensions, launchParams.syncBufferPatchIndex)
: ZE_RESULT_ERROR_INVALID_ARGUMENT);
if (retVal) {
return retVal;
}
}
KernelImp *kernelImp = static_cast<KernelImp *>(kernel);
bool uncachedMocsKernel = isKernelUncachedMocsRequired(kernelImp->getKernelRequiresUncachedMocs());
this->requiresQueueUncachedMocs |= kernelImp->getKernelRequiresQueueUncachedMocs();
NEO::Device *neoDevice = device->getNEODevice();
auto localMemSize = static_cast<uint32_t>(neoDevice->getDeviceInfo().localMemSize);
auto slmTotalSize = kernelImp->getSlmTotalSize();
if (slmTotalSize > 0 && localMemSize < slmTotalSize) {
CREATE_DEBUG_STRING(str, "Size of SLM (%u) larger than available (%u)\n", slmTotalSize, localMemSize);
driverHandle->setErrorDescription(std::string(str.get()));
PRINT_DEBUG_STRING(NEO::debugManager.flags.PrintDebugMessages.get(), stderr, "Size of SLM (%u) larger than available (%u)\n", slmTotalSize, localMemSize);
return ZE_RESULT_ERROR_OUT_OF_DEVICE_MEMORY;
}
if (NEO::debugManager.flags.EnableSWTags.get()) {
neoDevice->getRootDeviceEnvironment().tagsManager->insertTag<GfxFamily, NEO::SWTags::KernelNameTag>(
*commandContainer.getCommandStream(),
*neoDevice,
kernel->getKernelDescriptor().kernelMetadata.kernelName.c_str(), 0u);
}
std::list<void *> additionalCommands;
updateStreamProperties(*kernel, launchParams.isCooperative, threadGroupDimensions, launchParams.isIndirect);
auto maxWgCountPerTile = kernel->getMaxWgCountPerTile(this->engineGroupType);
NEO::EncodeDispatchKernelArgs dispatchKernelArgs{
.device = neoDevice,
.dispatchInterface = kernel,
.surfaceStateHeap = ssh,
.dynamicStateHeap = dsh,
.threadGroupDimensions = reinterpret_cast<const void *>(&threadGroupDimensions),
.outWalkerPtr = nullptr,
.cpuWalkerBuffer = nullptr,
.cpuPayloadBuffer = nullptr,
.outImplicitArgsPtr = nullptr,
.additionalCommands = &additionalCommands,
.extendedArgs = nullptr,
.postSyncArgs = {
.eventAddress = 0,
.postSyncImmValue = static_cast<uint64_t>(Event::STATE_SIGNALED),
.inOrderCounterValue = 0,
.inOrderIncrementGpuAddress = 0,
.inOrderIncrementValue = 0,
.device = neoDevice,
.inOrderExecInfo = nullptr,
.isCounterBasedEvent = false,
.isTimestampEvent = false,
.isHostScopeSignalEvent = false,
.isUsingSystemAllocation = false,
.dcFlushEnable = this->dcFlushSupport,
.interruptEvent = false,
.isFlushL3ForExternalAllocationRequired = false,
.isFlushL3ForHostUsmRequired = false,
},
.preemptionMode = commandListPreemptionMode,
.requiredPartitionDim = launchParams.requiredPartitionDim,
.requiredDispatchWalkOrder = launchParams.requiredDispatchWalkOrder,
.localRegionSize = launchParams.localRegionSize,
.partitionCount = 0,
.reserveExtraPayloadSpace = launchParams.reserveExtraPayloadSpace,
.maxWgCountPerTile = maxWgCountPerTile,
.defaultPipelinedThreadArbitrationPolicy = NEO::ThreadArbitrationPolicy::NotPresent,
.isIndirect = launchParams.isIndirect,
.isPredicate = launchParams.isPredicate,
.requiresUncachedMocs = uncachedMocsKernel,
.isInternal = internalUsage,
.isCooperative = launchParams.isCooperative,
.isKernelDispatchedFromImmediateCmdList = isImmediateType(),
.isRcs = engineGroupType == NEO::EngineGroupType::renderCompute,
.isHeaplessModeEnabled = this->heaplessModeEnabled,
.isHeaplessStateInitEnabled = this->heaplessStateInitEnabled,
.immediateScratchAddressPatching = !this->scratchAddressPatchingEnabled,
.makeCommandView = false,
};
NEO::EncodeDispatchKernel<GfxFamily>::encodeCommon(commandContainer, dispatchKernelArgs);
if (!isImmediateType()) {
this->containsStatelessUncachedResource = dispatchKernelArgs.requiresUncachedMocs;
}
if (neoDevice->getDebugger() && !this->immediateCmdListHeapSharing && !neoDevice->getBindlessHeapsHelper()) {
auto *ssh = commandContainer.getIndirectHeap(NEO::HeapType::surfaceState);
auto surfaceStateSpace = neoDevice->getDebugger()->getDebugSurfaceReservedSurfaceState(*ssh);
auto surfaceState = GfxFamily::cmdInitRenderSurfaceState;
NEO::EncodeSurfaceStateArgs args;
args.outMemory = &surfaceState;
args.graphicsAddress = device->getDebugSurface()->getGpuAddress();
args.size = device->getDebugSurface()->getUnderlyingBufferSize();
args.mocs = device->getMOCS(false, false);
args.numAvailableDevices = neoDevice->getNumGenericSubDevices();
args.allocation = device->getDebugSurface();
args.gmmHelper = neoDevice->getGmmHelper();
args.areMultipleSubDevicesInContext = false;
args.isDebuggerActive = true;
NEO::EncodeSurfaceState<GfxFamily>::encodeBuffer(args);
*reinterpret_cast<typename GfxFamily::RENDER_SURFACE_STATE *>(surfaceStateSpace) = surfaceState;
}
appendSignalEventPostWalker(event, nullptr, nullptr, false, false, false);
commandContainer.addToResidencyContainer(kernelImmutableData->getIsaGraphicsAllocation());
auto &argumentsResidencyContainer = kernel->getArgumentsResidencyContainer();
for (auto resource : argumentsResidencyContainer) {
commandContainer.addToResidencyContainer(resource);
}
auto &internalResidencyContainer = kernel->getInternalResidencyContainer();
for (auto resource : internalResidencyContainer) {
commandContainer.addToResidencyContainer(resource);
}
if (kernelImp->getPrintfBufferAllocation() != nullptr) {
storePrintfKernel(kernel);
}
if (kernelDescriptor.kernelAttributes.flags.usesAssert) {
kernelWithAssertAppended = true;
}
if (NEO::PauseOnGpuProperties::pauseModeAllowed(NEO::debugManager.flags.PauseOnEnqueue.get(), neoDevice->debugExecutionCounter.load(), NEO::PauseOnGpuProperties::PauseMode::BeforeWorkload)) {
commandsToPatch.push_back({.pCommand = additionalCommands.front(), .type = CommandToPatch::PauseOnEnqueuePipeControlStart});
additionalCommands.pop_front();
commandsToPatch.push_back({.pCommand = additionalCommands.front(), .type = CommandToPatch::PauseOnEnqueueSemaphoreStart});
additionalCommands.pop_front();
}
if (NEO::PauseOnGpuProperties::pauseModeAllowed(NEO::debugManager.flags.PauseOnEnqueue.get(), neoDevice->debugExecutionCounter.load(), NEO::PauseOnGpuProperties::PauseMode::AfterWorkload)) {
commandsToPatch.push_back({.pCommand = additionalCommands.front(), .type = CommandToPatch::PauseOnEnqueuePipeControlEnd});
additionalCommands.pop_front();
commandsToPatch.push_back({.pCommand = additionalCommands.front(), .type = CommandToPatch::PauseOnEnqueueSemaphoreEnd});
additionalCommands.pop_front();
}
if (event != nullptr && kernel->getPrintfBufferAllocation() != nullptr) {
auto module = static_cast<const ModuleImp *>(&static_cast<KernelImp *>(kernel)->getParentModule());
event->setKernelForPrintf(module->getPrintfKernelWeakPtr(kernel->toHandle()));
event->setKernelWithPrintfDeviceMutex(kernel->getDevicePrintfKernelMutex());
}
if (this->isInOrderExecutionEnabled() && !launchParams.isKernelSplitOperation) {
if (!event || !event->getAllocation(this->device)) {
NEO::PipeControlArgs args;
args.dcFlushEnable = getDcFlushRequired(true);
NEO::MemorySynchronizationCommands<GfxFamily>::addSingleBarrier(*commandContainer.getCommandStream(), args);
}
appendSignalInOrderDependencyCounter(event, false, false, false);
}
return ZE_RESULT_SUCCESS;
}
template <GFXCORE_FAMILY gfxCoreFamily>
void CommandListCoreFamily<gfxCoreFamily>::appendMultiPartitionPrologue(uint32_t partitionDataSize) {}
template <GFXCORE_FAMILY gfxCoreFamily>
void CommandListCoreFamily<gfxCoreFamily>::appendMultiPartitionEpilogue() {}
template <GFXCORE_FAMILY gfxCoreFamily>
void CommandListCoreFamily<gfxCoreFamily>::appendComputeBarrierCommand() {
NEO::PipeControlArgs args = createBarrierFlags();
NEO::MemorySynchronizationCommands<GfxFamily>::addSingleBarrier(*commandContainer.getCommandStream(), args);
}
template <GFXCORE_FAMILY gfxCoreFamily>
inline NEO::PipeControlArgs CommandListCoreFamily<gfxCoreFamily>::createBarrierFlags() {
NEO::PipeControlArgs args;
args.isWalkerWithProfilingEnqueued = this->getAndClearIsWalkerWithProfilingEnqueued();
return args;
}
template <GFXCORE_FAMILY gfxCoreFamily>
inline void CommandListCoreFamily<gfxCoreFamily>::appendMultiTileBarrier(NEO::Device &neoDevice) {
}
template <GFXCORE_FAMILY gfxCoreFamily>
inline size_t CommandListCoreFamily<gfxCoreFamily>::estimateBufferSizeMultiTileBarrier(const NEO::RootDeviceEnvironment &rootDeviceEnvironment) {
return 0;
}
template <GFXCORE_FAMILY gfxCoreFamily>
ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernelSplit(Kernel *kernel,
const ze_group_count_t &threadGroupDimensions,
Event *event,
CmdListKernelLaunchParams &launchParams) {
return appendLaunchKernelWithParams(kernel, threadGroupDimensions, nullptr, launchParams);
}
template <GFXCORE_FAMILY gfxCoreFamily>
inline NEO::PreemptionMode CommandListCoreFamily<gfxCoreFamily>::obtainKernelPreemptionMode(Kernel *kernel) {
NEO::PreemptionFlags flags = NEO::PreemptionHelper::createPreemptionLevelFlags(*device->getNEODevice(), &kernel->getImmutableData()->getDescriptor());
return NEO::PreemptionHelper::taskPreemptionMode(device->getDevicePreemptionMode(), flags);
}
template <GFXCORE_FAMILY gfxCoreFamily>
void CommandListCoreFamily<gfxCoreFamily>::appendDispatchOffsetRegister(bool workloadPartitionEvent, bool beforeProfilingCmds) {
}
template <GFXCORE_FAMILY gfxCoreFamily>
bool CommandListCoreFamily<gfxCoreFamily>::singleEventPacketRequired(bool inputSinglePacketEventRequest) const {
return true;
}
} // namespace L0

View File

@@ -1,5 +1,5 @@
#
# Copyright (C) 2020-2024 Intel Corporation
# Copyright (C) 2020-2025 Intel Corporation
#
# SPDX-License-Identifier: MIT
#
@@ -11,9 +11,15 @@ target_sources(${L0_STATIC_LIB_NAME}
${CMAKE_CURRENT_SOURCE_DIR}/cmdqueue.h
${CMAKE_CURRENT_SOURCE_DIR}/cmdqueue_hw.h
${CMAKE_CURRENT_SOURCE_DIR}/cmdqueue_hw.inl
${CMAKE_CURRENT_SOURCE_DIR}/cmdqueue_hw_skl_to_tgllp.inl
${CMAKE_CURRENT_SOURCE_DIR}/cmdqueue_imp.h
)
if(SUPPORT_GEN12LP)
target_sources(${L0_STATIC_LIB_NAME}
PRIVATE
${CMAKE_CURRENT_SOURCE_DIR}/cmdqueue_hw_gen12lp.inl
)
endif()
if(SUPPORT_XEHP_AND_LATER)
target_sources(${L0_STATIC_LIB_NAME}
PRIVATE

View File

@@ -13,7 +13,6 @@
#include "shared/source/helpers/cache_policy.h"
#include "shared/source/helpers/gfx_core_helper.h"
#include "shared/source/helpers/state_base_address.h"
#include "shared/source/helpers/state_base_address_tgllp_and_later.inl"
#include "level_zero/core/source/cmdlist/cmdlist.h"
#include "level_zero/core/source/cmdqueue/cmdqueue_hw.h"

View File

@@ -1,5 +1,5 @@
/*
* Copyright (C) 2022-2024 Intel Corporation
* Copyright (C) 2022-2025 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@@ -9,7 +9,7 @@
#include "shared/source/gen12lp/hw_info_adln.h"
#include "level_zero/core/source/cmdqueue/cmdqueue_hw.inl"
#include "level_zero/core/source/cmdqueue/cmdqueue_hw_skl_to_tgllp.inl"
#include "level_zero/core/source/cmdqueue/cmdqueue_hw_gen12lp.inl"
namespace L0 {
template struct CommandQueueHw<IGFX_GEN12LP_CORE>;

View File

@@ -1,5 +1,5 @@
/*
* Copyright (C) 2021-2024 Intel Corporation
* Copyright (C) 2021-2025 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@@ -9,7 +9,7 @@
#include "shared/source/gen12lp/hw_info_adlp.h"
#include "level_zero/core/source/cmdqueue/cmdqueue_hw.inl"
#include "level_zero/core/source/cmdqueue/cmdqueue_hw_skl_to_tgllp.inl"
#include "level_zero/core/source/cmdqueue/cmdqueue_hw_gen12lp.inl"
namespace L0 {
template struct CommandQueueHw<IGFX_GEN12LP_CORE>;

View File

@@ -1,5 +1,5 @@
/*
* Copyright (C) 2021-2024 Intel Corporation
* Copyright (C) 2021-2025 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@@ -9,7 +9,7 @@
#include "shared/source/gen12lp/hw_info_adls.h"
#include "level_zero/core/source/cmdqueue/cmdqueue_hw.inl"
#include "level_zero/core/source/cmdqueue/cmdqueue_hw_skl_to_tgllp.inl"
#include "level_zero/core/source/cmdqueue/cmdqueue_hw_gen12lp.inl"
namespace L0 {
template struct CommandQueueHw<IGFX_GEN12LP_CORE>;

View File

@@ -5,15 +5,17 @@
*
*/
#include "shared/source/command_stream/preemption.h"
#include "shared/source/gen12lp/hw_cmds_base.h"
#include "shared/source/gen12lp/hw_info.h"
#include "shared/source/helpers/pause_on_gpu_properties.h"
#include "shared/source/program/kernel_info.h"
#include "level_zero/core/source/cmdlist/cmdlist_hw.h"
#include "level_zero/core/source/cmdlist/cmdlist_hw.inl"
#include "level_zero/core/source/cmdlist/cmdlist_hw_gen12lp_to_xe3.inl"
#include "level_zero/core/source/cmdlist/cmdlist_hw_immediate.h"
#include "level_zero/core/source/cmdlist/cmdlist_hw_immediate.inl"
#include "level_zero/core/source/cmdlist/cmdlist_hw_skl_to_tgllp.inl"
#include "level_zero/core/source/gen12lp/definitions/cache_flush_gen12lp.inl"
#include "cmdlist_extended.inl"
@@ -21,7 +23,344 @@
namespace L0 {
template struct CommandListCoreFamily<IGFX_GEN12LP_CORE>;
template struct CommandListCoreFamilyImmediate<IGFX_GEN12LP_CORE>;
constexpr auto gfxCoreFamily = IGFX_GEN12LP_CORE;
template <>
inline NEO::PreemptionMode CommandListCoreFamily<gfxCoreFamily>::obtainKernelPreemptionMode(Kernel *kernel) {
NEO::PreemptionFlags flags = NEO::PreemptionHelper::createPreemptionLevelFlags(*device->getNEODevice(), &kernel->getImmutableData()->getDescriptor());
return NEO::PreemptionHelper::taskPreemptionMode(device->getDevicePreemptionMode(), flags);
}
template <>
inline NEO::PipeControlArgs CommandListCoreFamily<gfxCoreFamily>::createBarrierFlags() {
NEO::PipeControlArgs args;
args.isWalkerWithProfilingEnqueued = this->getAndClearIsWalkerWithProfilingEnqueued();
return args;
}
template <>
void CommandListCoreFamily<gfxCoreFamily>::appendMultiTileBarrier(NEO::Device &neoDevice) {
}
template <>
size_t CommandListCoreFamily<gfxCoreFamily>::estimateBufferSizeMultiTileBarrier(const NEO::RootDeviceEnvironment &rootDeviceEnvironment) {
return 0;
}
template <>
size_t CommandListCoreFamily<gfxCoreFamily>::getReserveSshSize() {
using RENDER_SURFACE_STATE = typename GfxFamily::RENDER_SURFACE_STATE;
return sizeof(RENDER_SURFACE_STATE);
}
template <>
void CommandListCoreFamily<gfxCoreFamily>::adjustWriteKernelTimestamp(uint64_t globalAddress, uint64_t baseAddress, CommandToPatchContainer *outTimeStampSyncCmds,
bool workloadPartition, bool copyOperation, bool globalTimestamp) {}
template <>
bool CommandListCoreFamily<gfxCoreFamily>::isInOrderNonWalkerSignalingRequired(const Event *event) const {
return false;
}
template <>
ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernelWithParams(Kernel *kernel,
const ze_group_count_t &threadGroupDimensions,
Event *event,
CmdListKernelLaunchParams &launchParams) {
UNRECOVERABLE_IF(kernel == nullptr);
UNRECOVERABLE_IF(launchParams.skipInOrderNonWalkerSignaling);
const auto driverHandle = static_cast<DriverHandleImp *>(device->getDriverHandle());
const auto &kernelDescriptor = kernel->getKernelDescriptor();
if (kernelDescriptor.kernelAttributes.flags.isInvalid) {
return ZE_RESULT_ERROR_INVALID_ARGUMENT;
}
const auto kernelImmutableData = kernel->getImmutableData();
auto kernelInfo = kernelImmutableData->getKernelInfo();
NEO::IndirectHeap *ssh = nullptr;
NEO::IndirectHeap *dsh = nullptr;
DBG_LOG(PrintDispatchParameters, "Kernel: ", kernelInfo->kernelDescriptor.kernelMetadata.kernelName,
", Group size: ", kernel->getGroupSize()[0], ", ", kernel->getGroupSize()[1], ", ", kernel->getGroupSize()[2],
", Group count: ", threadGroupDimensions.groupCountX, ", ", threadGroupDimensions.groupCountY, ", ", threadGroupDimensions.groupCountZ,
", SIMD: ", kernelInfo->getMaxSimdSize());
if (this->immediateCmdListHeapSharing || this->stateBaseAddressTracking) {
auto &sshReserveConfig = commandContainer.getSurfaceStateHeapReserve();
NEO::HeapReserveArguments sshReserveArgs = {
sshReserveConfig.indirectHeapReservation,
NEO::EncodeDispatchKernel<GfxFamily>::getSizeRequiredSsh(*kernelInfo),
NEO::EncodeDispatchKernel<GfxFamily>::getDefaultSshAlignment()};
// update SSH size - when global bindless addressing is used, kernel args may not require ssh space
if (kernel->getSurfaceStateHeapDataSize() == 0) {
sshReserveArgs.size = 0;
}
auto &dshReserveConfig = commandContainer.getDynamicStateHeapReserve();
NEO::HeapReserveArguments dshReserveArgs = {
dshReserveConfig.indirectHeapReservation,
NEO::EncodeDispatchKernel<GfxFamily>::getSizeRequiredDsh(kernelDescriptor, commandContainer.getNumIddPerBlock()),
NEO::EncodeDispatchKernel<GfxFamily>::getDefaultDshAlignment()};
if (launchParams.isKernelSplitOperation) {
// when appendLaunchKernel is called during an operation with kernel split is true,
// then reserve sufficient ssh and dsh heaps during first kernel split, by multiplying, individual
// dsh and ssh heap size retrieved above with number of kernels in split operation.
// And after first kernel split, for remainder kernel split calls, dont estimate heap size.
if (launchParams.numKernelsExecutedInSplitLaunch == 0) {
dshReserveArgs.size = launchParams.numKernelsInSplitLaunch * dshReserveArgs.size;
sshReserveArgs.size = launchParams.numKernelsInSplitLaunch * sshReserveArgs.size;
commandContainer.reserveSpaceForDispatch(
sshReserveArgs,
dshReserveArgs, true);
}
} else {
commandContainer.reserveSpaceForDispatch(
sshReserveArgs,
dshReserveArgs, true);
}
ssh = sshReserveArgs.indirectHeapReservation;
dsh = dshReserveArgs.indirectHeapReservation;
}
appendEventForProfiling(event, nullptr, true, false, false, false);
auto perThreadScratchSize = std::max<std::uint32_t>(this->getCommandListPerThreadScratchSize(0u),
kernel->getImmutableData()->getDescriptor().kernelAttributes.perThreadScratchSize[0]);
this->setCommandListPerThreadScratchSize(0u, perThreadScratchSize);
auto slmEnable = (kernel->getImmutableData()->getDescriptor().kernelAttributes.slmInlineSize > 0);
this->setCommandListSLMEnable(slmEnable);
auto kernelPreemptionMode = obtainKernelPreemptionMode(kernel);
commandListPreemptionMode = std::min(commandListPreemptionMode, kernelPreemptionMode);
kernel->patchGlobalOffset();
this->allocateOrReuseKernelPrivateMemoryIfNeeded(kernel, kernelDescriptor.kernelAttributes.perHwThreadPrivateMemorySize);
if (!launchParams.isIndirect) {
kernel->setGroupCount(threadGroupDimensions.groupCountX,
threadGroupDimensions.groupCountY,
threadGroupDimensions.groupCountZ);
}
if (launchParams.isIndirect) {
prepareIndirectParams(&threadGroupDimensions);
}
if (kernel->hasIndirectAllocationsAllowed()) {
UnifiedMemoryControls unifiedMemoryControls = kernel->getUnifiedMemoryControls();
if (unifiedMemoryControls.indirectDeviceAllocationsAllowed) {
this->unifiedMemoryControls.indirectDeviceAllocationsAllowed = true;
}
if (unifiedMemoryControls.indirectHostAllocationsAllowed) {
this->unifiedMemoryControls.indirectHostAllocationsAllowed = true;
}
if (unifiedMemoryControls.indirectSharedAllocationsAllowed) {
this->unifiedMemoryControls.indirectSharedAllocationsAllowed = true;
}
this->indirectAllocationsAllowed = true;
}
containsCooperativeKernelsFlag = (containsCooperativeKernelsFlag || launchParams.isCooperative);
if (kernel->usesSyncBuffer()) {
auto retVal = (launchParams.isCooperative
? programSyncBuffer(*kernel, *device->getNEODevice(), threadGroupDimensions, launchParams.syncBufferPatchIndex)
: ZE_RESULT_ERROR_INVALID_ARGUMENT);
if (retVal) {
return retVal;
}
}
KernelImp *kernelImp = static_cast<KernelImp *>(kernel);
bool uncachedMocsKernel = isKernelUncachedMocsRequired(kernelImp->getKernelRequiresUncachedMocs());
this->requiresQueueUncachedMocs |= kernelImp->getKernelRequiresQueueUncachedMocs();
NEO::Device *neoDevice = device->getNEODevice();
auto localMemSize = static_cast<uint32_t>(neoDevice->getDeviceInfo().localMemSize);
auto slmTotalSize = kernelImp->getSlmTotalSize();
if (slmTotalSize > 0 && localMemSize < slmTotalSize) {
CREATE_DEBUG_STRING(str, "Size of SLM (%u) larger than available (%u)\n", slmTotalSize, localMemSize);
driverHandle->setErrorDescription(std::string(str.get()));
PRINT_DEBUG_STRING(NEO::debugManager.flags.PrintDebugMessages.get(), stderr, "Size of SLM (%u) larger than available (%u)\n", slmTotalSize, localMemSize);
return ZE_RESULT_ERROR_OUT_OF_DEVICE_MEMORY;
}
if (NEO::debugManager.flags.EnableSWTags.get()) {
neoDevice->getRootDeviceEnvironment().tagsManager->insertTag<GfxFamily, NEO::SWTags::KernelNameTag>(
*commandContainer.getCommandStream(),
*neoDevice,
kernel->getKernelDescriptor().kernelMetadata.kernelName.c_str(), 0u);
}
std::list<void *> additionalCommands;
updateStreamProperties(*kernel, launchParams.isCooperative, threadGroupDimensions, launchParams.isIndirect);
auto maxWgCountPerTile = kernel->getMaxWgCountPerTile(this->engineGroupType);
NEO::EncodeDispatchKernelArgs dispatchKernelArgs{
.device = neoDevice,
.dispatchInterface = kernel,
.surfaceStateHeap = ssh,
.dynamicStateHeap = dsh,
.threadGroupDimensions = reinterpret_cast<const void *>(&threadGroupDimensions),
.outWalkerPtr = nullptr,
.cpuWalkerBuffer = nullptr,
.cpuPayloadBuffer = nullptr,
.outImplicitArgsPtr = nullptr,
.additionalCommands = &additionalCommands,
.extendedArgs = nullptr,
.postSyncArgs = {
.eventAddress = 0,
.postSyncImmValue = static_cast<uint64_t>(Event::STATE_SIGNALED),
.inOrderCounterValue = 0,
.inOrderIncrementGpuAddress = 0,
.inOrderIncrementValue = 0,
.device = neoDevice,
.inOrderExecInfo = nullptr,
.isCounterBasedEvent = false,
.isTimestampEvent = false,
.isHostScopeSignalEvent = false,
.isUsingSystemAllocation = false,
.dcFlushEnable = this->dcFlushSupport,
.interruptEvent = false,
.isFlushL3ForExternalAllocationRequired = false,
.isFlushL3ForHostUsmRequired = false,
},
.preemptionMode = commandListPreemptionMode,
.requiredPartitionDim = launchParams.requiredPartitionDim,
.requiredDispatchWalkOrder = launchParams.requiredDispatchWalkOrder,
.localRegionSize = launchParams.localRegionSize,
.partitionCount = 0,
.reserveExtraPayloadSpace = launchParams.reserveExtraPayloadSpace,
.maxWgCountPerTile = maxWgCountPerTile,
.defaultPipelinedThreadArbitrationPolicy = NEO::ThreadArbitrationPolicy::NotPresent,
.isIndirect = launchParams.isIndirect,
.isPredicate = launchParams.isPredicate,
.requiresUncachedMocs = uncachedMocsKernel,
.isInternal = internalUsage,
.isCooperative = launchParams.isCooperative,
.isKernelDispatchedFromImmediateCmdList = isImmediateType(),
.isRcs = engineGroupType == NEO::EngineGroupType::renderCompute,
.isHeaplessModeEnabled = this->heaplessModeEnabled,
.isHeaplessStateInitEnabled = this->heaplessStateInitEnabled,
.immediateScratchAddressPatching = !this->scratchAddressPatchingEnabled,
.makeCommandView = false,
};
NEO::EncodeDispatchKernel<GfxFamily>::encodeCommon(commandContainer, dispatchKernelArgs);
if (!isImmediateType()) {
this->containsStatelessUncachedResource = dispatchKernelArgs.requiresUncachedMocs;
}
if (neoDevice->getDebugger() && !this->immediateCmdListHeapSharing && !neoDevice->getBindlessHeapsHelper()) {
auto *ssh = commandContainer.getIndirectHeap(NEO::HeapType::surfaceState);
auto surfaceStateSpace = neoDevice->getDebugger()->getDebugSurfaceReservedSurfaceState(*ssh);
auto surfaceState = GfxFamily::cmdInitRenderSurfaceState;
NEO::EncodeSurfaceStateArgs args;
args.outMemory = &surfaceState;
args.graphicsAddress = device->getDebugSurface()->getGpuAddress();
args.size = device->getDebugSurface()->getUnderlyingBufferSize();
args.mocs = device->getMOCS(false, false);
args.numAvailableDevices = neoDevice->getNumGenericSubDevices();
args.allocation = device->getDebugSurface();
args.gmmHelper = neoDevice->getGmmHelper();
args.areMultipleSubDevicesInContext = false;
args.isDebuggerActive = true;
NEO::EncodeSurfaceState<GfxFamily>::encodeBuffer(args);
*reinterpret_cast<typename GfxFamily::RENDER_SURFACE_STATE *>(surfaceStateSpace) = surfaceState;
}
appendSignalEventPostWalker(event, nullptr, nullptr, false, false, false);
commandContainer.addToResidencyContainer(kernelImmutableData->getIsaGraphicsAllocation());
auto &argumentsResidencyContainer = kernel->getArgumentsResidencyContainer();
for (auto resource : argumentsResidencyContainer) {
commandContainer.addToResidencyContainer(resource);
}
auto &internalResidencyContainer = kernel->getInternalResidencyContainer();
for (auto resource : internalResidencyContainer) {
commandContainer.addToResidencyContainer(resource);
}
if (kernelImp->getPrintfBufferAllocation() != nullptr) {
storePrintfKernel(kernel);
}
if (kernelDescriptor.kernelAttributes.flags.usesAssert) {
kernelWithAssertAppended = true;
}
if (NEO::PauseOnGpuProperties::pauseModeAllowed(NEO::debugManager.flags.PauseOnEnqueue.get(), neoDevice->debugExecutionCounter.load(), NEO::PauseOnGpuProperties::PauseMode::BeforeWorkload)) {
commandsToPatch.push_back({.pCommand = additionalCommands.front(), .type = CommandToPatch::PauseOnEnqueuePipeControlStart});
additionalCommands.pop_front();
commandsToPatch.push_back({.pCommand = additionalCommands.front(), .type = CommandToPatch::PauseOnEnqueueSemaphoreStart});
additionalCommands.pop_front();
}
if (NEO::PauseOnGpuProperties::pauseModeAllowed(NEO::debugManager.flags.PauseOnEnqueue.get(), neoDevice->debugExecutionCounter.load(), NEO::PauseOnGpuProperties::PauseMode::AfterWorkload)) {
commandsToPatch.push_back({.pCommand = additionalCommands.front(), .type = CommandToPatch::PauseOnEnqueuePipeControlEnd});
additionalCommands.pop_front();
commandsToPatch.push_back({.pCommand = additionalCommands.front(), .type = CommandToPatch::PauseOnEnqueueSemaphoreEnd});
additionalCommands.pop_front();
}
if (event != nullptr && kernel->getPrintfBufferAllocation() != nullptr) {
auto module = static_cast<const ModuleImp *>(&static_cast<KernelImp *>(kernel)->getParentModule());
event->setKernelForPrintf(module->getPrintfKernelWeakPtr(kernel->toHandle()));
event->setKernelWithPrintfDeviceMutex(kernel->getDevicePrintfKernelMutex());
}
if (this->isInOrderExecutionEnabled() && !launchParams.isKernelSplitOperation) {
if (!event || !event->getAllocation(this->device)) {
NEO::PipeControlArgs args;
args.dcFlushEnable = getDcFlushRequired(true);
NEO::MemorySynchronizationCommands<GfxFamily>::addSingleBarrier(*commandContainer.getCommandStream(), args);
}
appendSignalInOrderDependencyCounter(event, false, false, false);
}
return ZE_RESULT_SUCCESS;
}
template <>
void CommandListCoreFamily<gfxCoreFamily>::appendMultiPartitionPrologue(uint32_t partitionDataSize) {}
template <>
void CommandListCoreFamily<gfxCoreFamily>::appendMultiPartitionEpilogue() {}
template <>
void CommandListCoreFamily<gfxCoreFamily>::appendComputeBarrierCommand() {
NEO::PipeControlArgs args = createBarrierFlags();
NEO::MemorySynchronizationCommands<GfxFamily>::addSingleBarrier(*commandContainer.getCommandStream(), args);
}
template <>
ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernelSplit(Kernel *kernel,
const ze_group_count_t &threadGroupDimensions,
Event *event,
CmdListKernelLaunchParams &launchParams) {
return appendLaunchKernelWithParams(kernel, threadGroupDimensions, nullptr, launchParams);
}
template <>
void CommandListCoreFamily<gfxCoreFamily>::appendDispatchOffsetRegister(bool workloadPartitionEvent, bool beforeProfilingCmds) {
}
template <>
bool CommandListCoreFamily<gfxCoreFamily>::singleEventPacketRequired(bool inputSinglePacketEventRequest) const {
return true;
}
template struct CommandListCoreFamily<gfxCoreFamily>;
template struct CommandListCoreFamilyImmediate<gfxCoreFamily>;
} // namespace L0

View File

@@ -9,7 +9,7 @@
#include "shared/source/gen12lp/hw_info_dg1.h"
#include "level_zero/core/source/cmdqueue/cmdqueue_hw.inl"
#include "level_zero/core/source/cmdqueue/cmdqueue_hw_skl_to_tgllp.inl"
#include "level_zero/core/source/cmdqueue/cmdqueue_hw_gen12lp.inl"
#include "neo_igfxfmid.h"

View File

@@ -1,5 +1,5 @@
/*
* Copyright (C) 2020-2023 Intel Corporation
* Copyright (C) 2020-2025 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@@ -8,9 +8,8 @@
#include "shared/source/gen12lp/hw_cmds.h"
#include "level_zero/core/source/gfx_core_helpers/l0_gfx_core_helper_base.inl"
#include "level_zero/core/source/gfx_core_helpers/l0_gfx_core_helper_skl_to_pvc.inl"
#include "level_zero/core/source/gfx_core_helpers/l0_gfx_core_helper_skl_to_tgllp.inl"
#include "level_zero/core/source/gfx_core_helpers/l0_gfx_core_helper_tgllp_to_dg2.inl"
#include "level_zero/core/source/gfx_core_helpers/l0_gfx_core_helper_tgllp_to_pvc.inl"
#include "level_zero/core/source/helpers/l0_populate_factory.h"
#include "level_zero/tools/source/debug/eu_thread.h"
@@ -19,6 +18,81 @@ namespace L0 {
using Family = NEO::Gen12LpFamily;
static auto gfxCore = IGFX_GEN12LP_CORE;
template <>
bool L0GfxCoreHelperHw<Family>::platformSupportsCmdListHeapSharing() const {
return true;
}
template <>
bool L0GfxCoreHelperHw<Family>::platformSupportsStateComputeModeTracking() const {
return false;
}
template <>
bool L0GfxCoreHelperHw<Family>::platformSupportsFrontEndTracking() const {
return false;
}
template <>
bool L0GfxCoreHelperHw<Family>::platformSupportsPipelineSelectTracking() const {
return false;
}
template <>
bool L0GfxCoreHelperHw<Family>::platformSupportsStateBaseAddressTracking(const NEO::RootDeviceEnvironment &rootDeviceEnvironment) const {
return false;
}
template <>
uint32_t L0GfxCoreHelperHw<Family>::getEventMaxKernelCount(const NEO::HardwareInfo &hwInfo) const {
return 1;
}
template <>
uint32_t L0GfxCoreHelperHw<Family>::getEventBaseMaxPacketCount(const NEO::RootDeviceEnvironment &rootDeviceEnvironment) const {
return 1u;
}
template <>
bool L0GfxCoreHelperHw<Family>::isZebinAllowed(const NEO::Debugger *debugger) const {
return !debugger;
}
template <>
NEO::HeapAddressModel L0GfxCoreHelperHw<Family>::getPlatformHeapAddressModel(const NEO::RootDeviceEnvironment &rootDeviceEnvironment) const {
return NEO::HeapAddressModel::privateHeaps;
}
template <>
ze_rtas_format_exp_t L0GfxCoreHelperHw<Family>::getSupportedRTASFormat() const {
return ZE_RTAS_FORMAT_EXP_INVALID;
}
template <>
bool L0GfxCoreHelperHw<Family>::platformSupportsPrimaryBatchBufferCmdList() const {
return true;
}
template <>
bool L0GfxCoreHelperHw<Family>::platformSupportsImmediateComputeFlushTask() const {
return false;
}
template <>
ze_mutable_command_exp_flags_t L0GfxCoreHelperHw<Family>::getPlatformCmdListUpdateCapabilities() const {
return 0;
}
template <>
zet_debug_regset_type_intel_gpu_t L0GfxCoreHelperHw<Family>::getRegsetTypeForLargeGrfDetection() const {
return ZET_DEBUG_REGSET_TYPE_INVALID_INTEL_GPU;
}
template <>
uint32_t L0GfxCoreHelperHw<Family>::getGrfRegisterCount(uint32_t *regPtr) const {
return 128;
}
#include "level_zero/core/source/helpers/l0_gfx_core_helper_factory_init.inl"
template class L0GfxCoreHelperHw<Family>;

View File

@@ -1,5 +1,5 @@
/*
* Copyright (C) 2021-2024 Intel Corporation
* Copyright (C) 2021-2025 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@@ -9,7 +9,7 @@
#include "shared/source/gen12lp/hw_info_rkl.h"
#include "level_zero/core/source/cmdqueue/cmdqueue_hw.inl"
#include "level_zero/core/source/cmdqueue/cmdqueue_hw_skl_to_tgllp.inl"
#include "level_zero/core/source/cmdqueue/cmdqueue_hw_gen12lp.inl"
namespace L0 {
template struct CommandQueueHw<IGFX_GEN12LP_CORE>;

View File

@@ -9,7 +9,7 @@
#include "shared/source/gen12lp/hw_info_tgllp.h"
#include "level_zero/core/source/cmdqueue/cmdqueue_hw.inl"
#include "level_zero/core/source/cmdqueue/cmdqueue_hw_skl_to_tgllp.inl"
#include "level_zero/core/source/cmdqueue/cmdqueue_hw_gen12lp.inl"
#include "neo_igfxfmid.h"

View File

@@ -12,13 +12,6 @@ target_sources(${L0_STATIC_LIB_NAME}
${CMAKE_CURRENT_SOURCE_DIR}/l0_gfx_core_helper.h
)
if(SUPPORT_GEN12LP)
target_sources(${L0_STATIC_LIB_NAME}
PRIVATE
${CMAKE_CURRENT_SOURCE_DIR}/l0_gfx_core_helper_skl_to_tgllp.inl
)
endif()
if(SUPPORT_XEHP_AND_LATER)
target_sources(${L0_STATIC_LIB_NAME}
PRIVATE
@@ -50,7 +43,7 @@ endif()
if(SUPPORT_GEN12LP OR SUPPORT_XE_HPG_CORE OR SUPPORT_XE_HPC_CORE)
target_sources(${L0_STATIC_LIB_NAME}
PRIVATE
${CMAKE_CURRENT_SOURCE_DIR}/l0_gfx_core_helper_skl_to_pvc.inl
${CMAKE_CURRENT_SOURCE_DIR}/l0_gfx_core_helper_tgllp_to_pvc.inl
)
endif()

View File

@@ -1,87 +0,0 @@
/*
* Copyright (C) 2021-2024 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#include "level_zero/core/source/gfx_core_helpers/l0_gfx_core_helper.h"
namespace L0 {
template <typename Family>
bool L0GfxCoreHelperHw<Family>::platformSupportsCmdListHeapSharing() const {
return true;
}
template <typename Family>
bool L0GfxCoreHelperHw<Family>::platformSupportsStateComputeModeTracking() const {
return false;
}
template <typename Family>
bool L0GfxCoreHelperHw<Family>::platformSupportsFrontEndTracking() const {
return false;
}
template <typename Family>
bool L0GfxCoreHelperHw<Family>::platformSupportsPipelineSelectTracking() const {
return false;
}
template <typename Family>
bool L0GfxCoreHelperHw<Family>::platformSupportsStateBaseAddressTracking(const NEO::RootDeviceEnvironment &rootDeviceEnvironment) const {
return false;
}
template <typename Family>
uint32_t L0GfxCoreHelperHw<Family>::getEventMaxKernelCount(const NEO::HardwareInfo &hwInfo) const {
return 1;
}
template <typename Family>
uint32_t L0GfxCoreHelperHw<Family>::getEventBaseMaxPacketCount(const NEO::RootDeviceEnvironment &rootDeviceEnvironment) const {
return 1u;
}
template <typename Family>
bool L0GfxCoreHelperHw<Family>::isZebinAllowed(const NEO::Debugger *debugger) const {
return !debugger;
}
template <typename Family>
NEO::HeapAddressModel L0GfxCoreHelperHw<Family>::getPlatformHeapAddressModel(const NEO::RootDeviceEnvironment &rootDeviceEnvironment) const {
return NEO::HeapAddressModel::privateHeaps;
}
template <typename Family>
ze_rtas_format_exp_t L0GfxCoreHelperHw<Family>::getSupportedRTASFormat() const {
return ZE_RTAS_FORMAT_EXP_INVALID;
}
template <typename Family>
bool L0GfxCoreHelperHw<Family>::platformSupportsPrimaryBatchBufferCmdList() const {
return true;
}
template <typename Family>
bool L0GfxCoreHelperHw<Family>::platformSupportsImmediateComputeFlushTask() const {
return false;
}
template <typename Family>
ze_mutable_command_exp_flags_t L0GfxCoreHelperHw<Family>::getPlatformCmdListUpdateCapabilities() const {
return 0;
}
template <typename Family>
zet_debug_regset_type_intel_gpu_t L0GfxCoreHelperHw<Family>::getRegsetTypeForLargeGrfDetection() const {
return ZET_DEBUG_REGSET_TYPE_INVALID_INTEL_GPU;
}
template <typename Family>
uint32_t L0GfxCoreHelperHw<Family>::getGrfRegisterCount(uint32_t *regPtr) const {
return 128;
}
} // namespace L0

View File

@@ -10,7 +10,7 @@
#include "level_zero/core/source/gfx_core_helpers/l0_gfx_core_helper_base.inl"
#include "level_zero/core/source/gfx_core_helpers/l0_gfx_core_helper_pvc_and_later.inl"
#include "level_zero/core/source/gfx_core_helpers/l0_gfx_core_helper_pvc_to_xe2.inl"
#include "level_zero/core/source/gfx_core_helpers/l0_gfx_core_helper_skl_to_pvc.inl"
#include "level_zero/core/source/gfx_core_helpers/l0_gfx_core_helper_tgllp_to_pvc.inl"
#include "level_zero/core/source/gfx_core_helpers/l0_gfx_core_helper_xe_hpg_and_xe_hpc.inl"
#include "level_zero/core/source/gfx_core_helpers/l0_gfx_core_helper_xe_hpg_to_xe2_hpg.inl"
#include "level_zero/core/source/gfx_core_helpers/l0_gfx_core_helper_xehp_and_later.inl"

View File

@@ -8,8 +8,8 @@
#include "shared/source/xe_hpg_core/hw_cmds_xe_hpg_core_base.h"
#include "level_zero/core/source/gfx_core_helpers/l0_gfx_core_helper_base.inl"
#include "level_zero/core/source/gfx_core_helpers/l0_gfx_core_helper_skl_to_pvc.inl"
#include "level_zero/core/source/gfx_core_helpers/l0_gfx_core_helper_tgllp_to_dg2.inl"
#include "level_zero/core/source/gfx_core_helpers/l0_gfx_core_helper_tgllp_to_pvc.inl"
#include "level_zero/core/source/gfx_core_helpers/l0_gfx_core_helper_xe_hpg_and_xe_hpc.inl"
#include "level_zero/core/source/gfx_core_helpers/l0_gfx_core_helper_xe_hpg_to_xe2_hpg.inl"
#include "level_zero/core/source/gfx_core_helpers/l0_gfx_core_helper_xehp_and_later.inl"

View File

@@ -10,7 +10,6 @@ set(NEO_CORE_COMMAND_STREAM
${CMAKE_CURRENT_SOURCE_DIR}/aub_command_stream_receiver.h
${CMAKE_CURRENT_SOURCE_DIR}/aub_command_stream_receiver_hw.h
${CMAKE_CURRENT_SOURCE_DIR}/aub_command_stream_receiver_hw_base.inl
${CMAKE_CURRENT_SOURCE_DIR}/aub_command_stream_receiver_hw_bdw_and_later.inl
${CMAKE_CURRENT_SOURCE_DIR}/aub_subcapture_status.h
${CMAKE_CURRENT_SOURCE_DIR}/command_stream_receiver.cpp
${CMAKE_CURRENT_SOURCE_DIR}/command_stream_receiver.h

View File

@@ -1,22 +0,0 @@
/*
* Copyright (C) 2019-2024 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#include "shared/source/command_stream/aub_command_stream_receiver_hw_base.inl"
namespace NEO {
template <typename GfxFamily>
constexpr uint32_t AUBCommandStreamReceiverHw<GfxFamily>::getMaskAndValueForPollForCompletion() {
return 0x100;
}
template <typename GfxFamily>
int AUBCommandStreamReceiverHw<GfxFamily>::getAddressSpaceFromPTEBits(uint64_t entryBits) const {
return AubMemDump::AddressSpaceValues::TraceNonlocal;
}
} // namespace NEO

View File

@@ -8,7 +8,7 @@
#include "shared/source/aub_mem_dump/aub_alloc_dump.h"
#include "shared/source/command_stream/aub_command_stream_receiver.h"
#include "shared/source/command_stream/aub_command_stream_receiver_hw.h"
#include "shared/source/command_stream/aub_command_stream_receiver_hw_bdw_and_later.inl"
#include "shared/source/command_stream/aub_command_stream_receiver_hw_base.inl"
#include "shared/source/helpers/array_count.h"
#include "shared/source/helpers/populate_factory.h"
#include "shared/source/memory_manager/memory_pool.h"
@@ -17,6 +17,10 @@ namespace NEO {
typedef Gen12LpFamily Family;
static auto gfxCore = IGFX_GEN12LP_CORE;
template <>
int AUBCommandStreamReceiverHw<Family>::getAddressSpaceFromPTEBits(uint64_t entryBits) const {
return AubMemDump::AddressSpaceValues::TraceNonlocal;
}
template <>
constexpr uint32_t AUBCommandStreamReceiverHw<Family>::getMaskAndValueForPollForCompletion() {
return 0x00008000;

View File

@@ -18,7 +18,6 @@ using Family = NEO::Gen12LpFamily;
#include "shared/source/helpers/blit_commands_helper_base.inl"
#include "shared/source/helpers/blit_commands_helper_from_gen12lp_to_xe3.inl"
#include "shared/source/helpers/populate_factory.h"
#include "shared/source/helpers/state_base_address_tgllp_and_later.inl"
namespace NEO {
static auto gfxCore = IGFX_GEN12LP_CORE;

View File

@@ -14,8 +14,8 @@ using Family = NEO::Gen12LpFamily;
#include "shared/source/helpers/flat_batch_buffer_helper_hw.inl"
#include "shared/source/helpers/gfx_core_helper.h"
#include "shared/source/helpers/gfx_core_helper_base.inl"
#include "shared/source/helpers/gfx_core_helper_bdw_to_dg2.inl"
#include "shared/source/helpers/gfx_core_helper_tgllp_and_later.inl"
#include "shared/source/helpers/gfx_core_helper_tgllp_to_dg2.inl"
#include "shared/source/helpers/hw_info.h"
#include "shared/source/helpers/local_memory_access_modes.h"
#include "shared/source/kernel/kernel_descriptor.h"

View File

@@ -6,9 +6,75 @@
*/
#include "shared/source/gen12lp/hw_cmds_base.h"
#include "shared/source/helpers/state_base_address.h"
#include "shared/source/helpers/state_base_address_icllp_and_later.inl"
#include "shared/source/helpers/state_base_address_base.inl"
namespace NEO {
template struct StateBaseAddressHelper<Gen12LpFamily>;
using Family = Gen12LpFamily;
template <>
uint32_t StateBaseAddressHelper<Family>::getMaxBindlessSurfaceStates() {
return (1 << 20) - 1;
}
template <>
void StateBaseAddressHelper<Family>::programBindingTableBaseAddress(LinearStream &commandStream, uint64_t baseAddress, uint32_t sizeInPages, GmmHelper *gmmHelper) {
}
template <>
void StateBaseAddressHelper<Family>::appendIohParameters(StateBaseAddressHelperArgs<Family> &args) {
if (args.sbaProperties) {
if (args.sbaProperties->indirectObjectBaseAddress.value != StreamProperty64::initValue) {
auto baseAddress = static_cast<uint64_t>(args.sbaProperties->indirectObjectBaseAddress.value);
UNRECOVERABLE_IF(!args.gmmHelper);
args.stateBaseAddressCmd->setIndirectObjectBaseAddress(args.gmmHelper->decanonize(baseAddress));
args.stateBaseAddressCmd->setIndirectObjectBaseAddressModifyEnable(true);
args.stateBaseAddressCmd->setIndirectObjectBufferSizeModifyEnable(true);
args.stateBaseAddressCmd->setIndirectObjectBufferSize(static_cast<uint32_t>(args.sbaProperties->indirectObjectSize.value));
}
} else if (args.useGlobalHeapsBaseAddress) {
args.stateBaseAddressCmd->setIndirectObjectBaseAddressModifyEnable(true);
args.stateBaseAddressCmd->setIndirectObjectBufferSizeModifyEnable(true);
args.stateBaseAddressCmd->setIndirectObjectBaseAddress(args.indirectObjectHeapBaseAddress);
args.stateBaseAddressCmd->setIndirectObjectBufferSize(MemoryConstants::sizeOf4GBinPageEntities);
} else if (args.ioh) {
args.stateBaseAddressCmd->setIndirectObjectBaseAddressModifyEnable(true);
args.stateBaseAddressCmd->setIndirectObjectBufferSizeModifyEnable(true);
args.stateBaseAddressCmd->setIndirectObjectBaseAddress(args.ioh->getHeapGpuBase());
args.stateBaseAddressCmd->setIndirectObjectBufferSize(args.ioh->getHeapSizeInPages());
}
}
template <>
void StateBaseAddressHelper<Family>::appendExtraCacheSettings(StateBaseAddressHelperArgs<Family> &args) {}
template <>
void StateBaseAddressHelper<Family>::appendStateBaseAddressParameters(
StateBaseAddressHelperArgs<Family> &args) {
if (!args.useGlobalHeapsBaseAddress) {
if (args.bindlessSurfaceStateBaseAddress != 0) {
args.stateBaseAddressCmd->setBindlessSurfaceStateBaseAddress(args.bindlessSurfaceStateBaseAddress);
args.stateBaseAddressCmd->setBindlessSurfaceStateBaseAddressModifyEnable(true);
const auto surfaceStateCount = getMaxBindlessSurfaceStates();
args.stateBaseAddressCmd->setBindlessSurfaceStateSize(surfaceStateCount);
} else if (args.ssh) {
args.stateBaseAddressCmd->setBindlessSurfaceStateBaseAddressModifyEnable(true);
args.stateBaseAddressCmd->setBindlessSurfaceStateBaseAddress(args.ssh->getHeapGpuBase());
uint32_t size = uint32_t(args.ssh->getMaxAvailableSpace() / 64) - 1;
args.stateBaseAddressCmd->setBindlessSurfaceStateSize(size);
}
}
args.stateBaseAddressCmd->setBindlessSamplerStateBaseAddressModifyEnable(true);
auto l3CacheOnPolicy = GMM_RESOURCE_USAGE_OCL_STATE_HEAP_BUFFER;
if (args.gmmHelper != nullptr) {
args.stateBaseAddressCmd->setBindlessSurfaceStateMemoryObjectControlState(args.gmmHelper->getMOCS(l3CacheOnPolicy));
args.stateBaseAddressCmd->setBindlessSamplerStateMemoryObjectControlState(args.gmmHelper->getMOCS(l3CacheOnPolicy));
}
StateBaseAddressHelper<Family>::appendIohParameters(args);
}
template struct StateBaseAddressHelper<Gen12LpFamily>;
} // namespace NEO

View File

@@ -1,12 +1,12 @@
/*
* Copyright (C) 2019-2022 Intel Corporation
* Copyright (C) 2019-2025 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#include "shared/source/gen12lp/hw_cmds_base.h"
#include "shared/source/helpers/windows/gmm_callbacks_tgllp_and_later.inl"
#include "shared/source/helpers/windows/gmm_callbacks.inl"
namespace NEO {
template struct DeviceCallbacks<Gen12LpFamily>;

View File

@@ -91,8 +91,7 @@ set(NEO_CORE_HELPERS
${CMAKE_CURRENT_SOURCE_DIR}/gfx_core_helper.cpp
${CMAKE_CURRENT_SOURCE_DIR}/gfx_core_helper.h
${CMAKE_CURRENT_SOURCE_DIR}/gfx_core_helper_base.inl
${CMAKE_CURRENT_SOURCE_DIR}/gfx_core_helper_bdw_to_dg2.inl
${CMAKE_CURRENT_SOURCE_DIR}/gfx_core_helper_bdw_to_icllp.inl
${CMAKE_CURRENT_SOURCE_DIR}/gfx_core_helper_tgllp_to_dg2.inl
${CMAKE_CURRENT_SOURCE_DIR}/gfx_core_helper_pvc_and_later.inl
${CMAKE_CURRENT_SOURCE_DIR}/gfx_core_helper_tgllp_and_later.inl
${CMAKE_CURRENT_SOURCE_DIR}/gpu_page_fault_helper.cpp
@@ -146,11 +145,7 @@ set(NEO_CORE_HELPERS
${CMAKE_CURRENT_SOURCE_DIR}/sleep.h
${CMAKE_CURRENT_SOURCE_DIR}/state_base_address.h
${CMAKE_CURRENT_SOURCE_DIR}/state_base_address_base.inl
${CMAKE_CURRENT_SOURCE_DIR}/state_base_address_bdw.inl
${CMAKE_CURRENT_SOURCE_DIR}/state_base_address_tgllp_and_later.inl
${CMAKE_CURRENT_SOURCE_DIR}/state_base_address_helper.h
${CMAKE_CURRENT_SOURCE_DIR}/state_base_address_icllp_and_later.inl
${CMAKE_CURRENT_SOURCE_DIR}/state_base_address_skl.inl
${CMAKE_CURRENT_SOURCE_DIR}/stdio.h
${CMAKE_CURRENT_SOURCE_DIR}/string.h
${CMAKE_CURRENT_SOURCE_DIR}/string_helpers.h
@@ -253,7 +248,6 @@ set(NEO_CORE_HELPERS_GMM_CALLBACKS_WINDOWS
${CMAKE_CURRENT_SOURCE_DIR}/windows/gmm_callbacks.cpp
${CMAKE_CURRENT_SOURCE_DIR}/windows/gmm_callbacks.h
${CMAKE_CURRENT_SOURCE_DIR}/windows/gmm_callbacks.inl
${CMAKE_CURRENT_SOURCE_DIR}/windows/gmm_callbacks_tgllp_and_later.inl
)
set_property(GLOBAL PROPERTY NEO_CORE_HELPERS_GMM_CALLBACKS_WINDOWS ${NEO_CORE_HELPERS_GMM_CALLBACKS_WINDOWS})

View File

@@ -1,40 +0,0 @@
/*
* Copyright (C) 2020-2025 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#include "shared/source/helpers/gfx_core_helper.h"
#include "shared/source/helpers/hw_info.h"
namespace NEO {
template <typename GfxFamily>
inline bool GfxCoreHelperHw<GfxFamily>::isFusedEuDispatchEnabled(const HardwareInfo &hwInfo, bool disableEUFusionForKernel) const {
return false;
}
template <typename GfxFamily>
void *LriHelper<GfxFamily>::program(MI_LOAD_REGISTER_IMM *lriCmd, uint32_t address, uint32_t value, bool remap, bool isBcs) {
MI_LOAD_REGISTER_IMM cmd = GfxFamily::cmdInitLoadRegisterImm;
address += (isBcs && remap) ? RegisterOffsets::bcs0Base : 0x0;
cmd.setRegisterOffset(address);
cmd.setDataDword(value);
*lriCmd = cmd;
return lriCmd;
}
template <typename GfxFamily>
bool GfxCoreHelperHw<GfxFamily>::packedFormatsSupported() const {
return false;
}
template <typename GfxFamily>
size_t GfxCoreHelperHw<GfxFamily>::getMaxFillPatternSizeForCopyEngine() const {
return sizeof(uint32_t);
}
} // namespace NEO

View File

@@ -1,25 +0,0 @@
/*
* Copyright (C) 2020-2023 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#include "shared/source/helpers/state_base_address.h"
namespace NEO {
template <typename GfxFamily>
void StateBaseAddressHelper<GfxFamily>::appendStateBaseAddressParameters(
StateBaseAddressHelperArgs<GfxFamily> &args) {
}
template <typename GfxFamily>
uint32_t StateBaseAddressHelper<GfxFamily>::getMaxBindlessSurfaceStates() {
return 0;
}
template <>
void StateBaseAddressHelper<Gen8Family>::programStateBaseAddress(StateBaseAddressHelperArgs<Gen8Family> &args);
} // namespace NEO

View File

@@ -1,49 +0,0 @@
/*
* Copyright (C) 2020-2025 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#include "shared/source/command_stream/stream_properties.h"
#include "shared/source/helpers/state_base_address.h"
#include "shared/source/helpers/state_base_address_tgllp_and_later.inl"
namespace NEO {
template <typename GfxFamily>
void StateBaseAddressHelper<GfxFamily>::appendStateBaseAddressParameters(
StateBaseAddressHelperArgs<GfxFamily> &args) {
if (!args.useGlobalHeapsBaseAddress) {
if (args.bindlessSurfaceStateBaseAddress != 0) {
args.stateBaseAddressCmd->setBindlessSurfaceStateBaseAddress(args.bindlessSurfaceStateBaseAddress);
args.stateBaseAddressCmd->setBindlessSurfaceStateBaseAddressModifyEnable(true);
const auto surfaceStateCount = getMaxBindlessSurfaceStates();
args.stateBaseAddressCmd->setBindlessSurfaceStateSize(surfaceStateCount);
} else if (args.ssh) {
args.stateBaseAddressCmd->setBindlessSurfaceStateBaseAddressModifyEnable(true);
args.stateBaseAddressCmd->setBindlessSurfaceStateBaseAddress(args.ssh->getHeapGpuBase());
uint32_t size = uint32_t(args.ssh->getMaxAvailableSpace() / 64) - 1;
args.stateBaseAddressCmd->setBindlessSurfaceStateSize(size);
}
}
args.stateBaseAddressCmd->setBindlessSamplerStateBaseAddressModifyEnable(true);
auto l3CacheOnPolicy = GMM_RESOURCE_USAGE_OCL_STATE_HEAP_BUFFER;
if (args.gmmHelper != nullptr) {
args.stateBaseAddressCmd->setBindlessSurfaceStateMemoryObjectControlState(args.gmmHelper->getMOCS(l3CacheOnPolicy));
args.stateBaseAddressCmd->setBindlessSamplerStateMemoryObjectControlState(args.gmmHelper->getMOCS(l3CacheOnPolicy));
}
StateBaseAddressHelper<GfxFamily>::appendIohParameters(args);
}
template <typename GfxFamily>
uint32_t StateBaseAddressHelper<GfxFamily>::getMaxBindlessSurfaceStates() {
return (1 << 20) - 1;
}
} // namespace NEO

View File

@@ -1,37 +0,0 @@
/*
* Copyright (C) 2020-2023 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#include "shared/source/helpers/state_base_address.h"
namespace NEO {
template <typename GfxFamily>
void StateBaseAddressHelper<GfxFamily>::appendStateBaseAddressParameters(
StateBaseAddressHelperArgs<GfxFamily> &args) {
if (!args.useGlobalHeapsBaseAddress) {
if (args.bindlessSurfaceStateBaseAddress != 0) {
args.stateBaseAddressCmd->setBindlessSurfaceStateBaseAddress(args.bindlessSurfaceStateBaseAddress);
args.stateBaseAddressCmd->setBindlessSurfaceStateBaseAddressModifyEnable(true);
const auto surfaceStateCount = getMaxBindlessSurfaceStates();
args.stateBaseAddressCmd->setBindlessSurfaceStateSize(surfaceStateCount);
} else if (args.ssh) {
args.stateBaseAddressCmd->setBindlessSurfaceStateBaseAddressModifyEnable(true);
args.stateBaseAddressCmd->setBindlessSurfaceStateBaseAddress(args.ssh->getHeapGpuBase());
uint32_t size = uint32_t(args.ssh->getMaxAvailableSpace() / 64) - 1;
args.stateBaseAddressCmd->setBindlessSurfaceStateSize(size);
}
}
StateBaseAddressHelper<GfxFamily>::appendIohParameters(args);
}
template <typename GfxFamily>
uint32_t StateBaseAddressHelper<GfxFamily>::getMaxBindlessSurfaceStates() {
return (1 << 20) - 1;
}
} // namespace NEO

View File

@@ -1,42 +0,0 @@
/*
* Copyright (C) 2019-2025 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#include "shared/source/helpers/state_base_address_base.inl"
namespace NEO {
template <typename GfxFamily>
void StateBaseAddressHelper<GfxFamily>::programBindingTableBaseAddress(LinearStream &commandStream, uint64_t baseAddress, uint32_t sizeInPages, GmmHelper *gmmHelper) {
}
template <typename GfxFamily>
void StateBaseAddressHelper<GfxFamily>::appendIohParameters(StateBaseAddressHelperArgs<GfxFamily> &args) {
if (args.sbaProperties) {
if (args.sbaProperties->indirectObjectBaseAddress.value != StreamProperty64::initValue) {
auto baseAddress = static_cast<uint64_t>(args.sbaProperties->indirectObjectBaseAddress.value);
args.stateBaseAddressCmd->setIndirectObjectBaseAddress(args.gmmHelper->decanonize(baseAddress));
args.stateBaseAddressCmd->setIndirectObjectBaseAddressModifyEnable(true);
args.stateBaseAddressCmd->setIndirectObjectBufferSizeModifyEnable(true);
args.stateBaseAddressCmd->setIndirectObjectBufferSize(static_cast<uint32_t>(args.sbaProperties->indirectObjectSize.value));
}
} else if (args.useGlobalHeapsBaseAddress) {
args.stateBaseAddressCmd->setIndirectObjectBaseAddressModifyEnable(true);
args.stateBaseAddressCmd->setIndirectObjectBufferSizeModifyEnable(true);
args.stateBaseAddressCmd->setIndirectObjectBaseAddress(args.indirectObjectHeapBaseAddress);
args.stateBaseAddressCmd->setIndirectObjectBufferSize(MemoryConstants::sizeOf4GBinPageEntities);
} else if (args.ioh) {
args.stateBaseAddressCmd->setIndirectObjectBaseAddressModifyEnable(true);
args.stateBaseAddressCmd->setIndirectObjectBufferSizeModifyEnable(true);
args.stateBaseAddressCmd->setIndirectObjectBaseAddress(args.ioh->getHeapGpuBase());
args.stateBaseAddressCmd->setIndirectObjectBufferSize(args.ioh->getHeapSizeInPages());
}
}
template <typename GfxFamily>
void StateBaseAddressHelper<GfxFamily>::appendExtraCacheSettings(StateBaseAddressHelperArgs<GfxFamily> &args) {}
} // namespace NEO

View File

@@ -1,26 +1,54 @@
/*
* Copyright (C) 2020-2023 Intel Corporation
* Copyright (C) 2019-2025 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#include "shared/source/command_stream/linear_stream.h"
#include "shared/source/command_stream/aub_command_stream_receiver_hw.h"
#include "shared/source/command_stream/command_stream_receiver_hw.h"
#include "shared/source/command_stream/command_stream_receiver_with_aub_dump.h"
#include "shared/source/helpers/gfx_core_helper.h"
#include "shared/source/helpers/windows/gmm_callbacks.h"
#include <cstdint>
#include "shared/source/os_interface/windows/wddm_device_command_stream.h"
namespace NEO {
template <typename GfxFamily>
long __stdcall DeviceCallbacks<GfxFamily>::notifyAubCapture(void *csrHandle, uint64_t gfxAddress, size_t gfxSize, bool allocate) {
return 0;
auto csr = reinterpret_cast<CommandStreamReceiverHw<GfxFamily> *>(csrHandle);
if (obtainCsrTypeFromIntegerValue(debugManager.flags.SetCommandStreamReceiver.get(), CommandStreamReceiverType::hardware) == CommandStreamReceiverType::hardwareWithAub) {
auto csrWithAub = static_cast<CommandStreamReceiverWithAUBDump<WddmCommandStreamReceiver<GfxFamily>> *>(csr);
auto aubCsr = static_cast<AUBCommandStreamReceiverHw<GfxFamily> *>(csrWithAub->aubCSR.get());
if (allocate) {
AllocationView externalAllocation(gfxAddress, gfxSize);
aubCsr->makeResidentExternal(externalAllocation);
} else {
aubCsr->makeNonResidentExternal(gfxAddress);
}
}
return 1;
}
template <typename GfxFamily>
int __stdcall TTCallbacks<GfxFamily>::writeL3Address(void *queueHandle, uint64_t l3GfxAddress, uint64_t regOffset) {
return 0;
auto csr = reinterpret_cast<CommandStreamReceiverHw<GfxFamily> *>(queueHandle);
LriHelper<GfxFamily>::program(&csr->getCS(0),
static_cast<uint32_t>(regOffset & 0xFFFFFFFF),
static_cast<uint32_t>(l3GfxAddress & 0xFFFFFFFF),
true,
false);
LriHelper<GfxFamily>::program(&csr->getCS(0),
static_cast<uint32_t>(regOffset >> 32),
static_cast<uint32_t>(l3GfxAddress >> 32),
true,
false);
return 1;
}
} // namespace NEO

View File

@@ -1,54 +0,0 @@
/*
* Copyright (C) 2019-2024 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#include "shared/source/command_stream/aub_command_stream_receiver_hw.h"
#include "shared/source/command_stream/command_stream_receiver_hw.h"
#include "shared/source/command_stream/command_stream_receiver_with_aub_dump.h"
#include "shared/source/helpers/gfx_core_helper.h"
#include "shared/source/helpers/windows/gmm_callbacks.h"
#include "shared/source/os_interface/windows/wddm_device_command_stream.h"
namespace NEO {
template <typename GfxFamily>
long __stdcall DeviceCallbacks<GfxFamily>::notifyAubCapture(void *csrHandle, uint64_t gfxAddress, size_t gfxSize, bool allocate) {
auto csr = reinterpret_cast<CommandStreamReceiverHw<GfxFamily> *>(csrHandle);
if (obtainCsrTypeFromIntegerValue(debugManager.flags.SetCommandStreamReceiver.get(), CommandStreamReceiverType::hardware) == CommandStreamReceiverType::hardwareWithAub) {
auto csrWithAub = static_cast<CommandStreamReceiverWithAUBDump<WddmCommandStreamReceiver<GfxFamily>> *>(csr);
auto aubCsr = static_cast<AUBCommandStreamReceiverHw<GfxFamily> *>(csrWithAub->aubCSR.get());
if (allocate) {
AllocationView externalAllocation(gfxAddress, gfxSize);
aubCsr->makeResidentExternal(externalAllocation);
} else {
aubCsr->makeNonResidentExternal(gfxAddress);
}
}
return 1;
}
template <typename GfxFamily>
int __stdcall TTCallbacks<GfxFamily>::writeL3Address(void *queueHandle, uint64_t l3GfxAddress, uint64_t regOffset) {
auto csr = reinterpret_cast<CommandStreamReceiverHw<GfxFamily> *>(queueHandle);
LriHelper<GfxFamily>::program(&csr->getCS(0),
static_cast<uint32_t>(regOffset & 0xFFFFFFFF),
static_cast<uint32_t>(l3GfxAddress & 0xFFFFFFFF),
true,
false);
LriHelper<GfxFamily>::program(&csr->getCS(0),
static_cast<uint32_t>(regOffset >> 32),
static_cast<uint32_t>(l3GfxAddress >> 32),
true,
false);
return 1;
}
} // namespace NEO

View File

@@ -1,11 +1,11 @@
/*
* Copyright (C) 2024 Intel Corporation
* Copyright (C) 2024-2025 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#include "shared/source/helpers/windows/gmm_callbacks_tgllp_and_later.inl"
#include "shared/source/helpers/windows/gmm_callbacks.inl"
#include "shared/source/xe2_hpg_core/hw_cmds.h"
namespace NEO {

View File

@@ -5,7 +5,7 @@
*
*/
#include "shared/source/helpers/windows/gmm_callbacks_tgllp_and_later.inl"
#include "shared/source/helpers/windows/gmm_callbacks.inl"
#include "shared/source/xe3_core/hw_cmds_base.h"
namespace NEO {

View File

@@ -1,11 +1,11 @@
/*
* Copyright (C) 2021-2022 Intel Corporation
* Copyright (C) 2021-2025 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#include "shared/source/helpers/windows/gmm_callbacks_tgllp_and_later.inl"
#include "shared/source/helpers/windows/gmm_callbacks.inl"
#include "shared/source/xe_hpc_core/hw_cmds_xe_hpc_core_base.h"
namespace NEO {

View File

@@ -17,9 +17,9 @@ using Family = NEO::XeHpgCoreFamily;
#include "shared/source/helpers/constants.h"
#include "shared/source/helpers/flat_batch_buffer_helper_hw.inl"
#include "shared/source/helpers/gfx_core_helper_base.inl"
#include "shared/source/helpers/gfx_core_helper_bdw_to_dg2.inl"
#include "shared/source/helpers/gfx_core_helper_dg2_and_later.inl"
#include "shared/source/helpers/gfx_core_helper_tgllp_and_later.inl"
#include "shared/source/helpers/gfx_core_helper_tgllp_to_dg2.inl"
#include "shared/source/helpers/gfx_core_helper_xehp_and_later.inl"
#include "shared/source/helpers/local_memory_access_modes.h"

View File

@@ -1,11 +1,11 @@
/*
* Copyright (C) 2021-2022 Intel Corporation
* Copyright (C) 2021-2025 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#include "shared/source/helpers/windows/gmm_callbacks_tgllp_and_later.inl"
#include "shared/source/helpers/windows/gmm_callbacks.inl"
#include "shared/source/xe_hpg_core/hw_cmds_xe_hpg_core_base.h"
namespace NEO {