Initial PVC support
Signed-off-by: Bartosz Dunajski <bartosz.dunajski@intel.com> Related-To: NEO-5542
This commit is contained in:
parent
3979af1dcc
commit
f20236c7f2
|
@ -10,5 +10,7 @@ list(APPEND ALL_CORE_TYPES "GEN11")
|
|||
list(APPEND ALL_CORE_TYPES "GEN12LP")
|
||||
list(APPEND ALL_CORE_TYPES "XE_HP_CORE")
|
||||
list(APPEND ALL_CORE_TYPES "XE_HPG_CORE")
|
||||
list(APPEND XEHP_AND_LATER_CORE_TYPES "XE_HP_CORE" "XE_HPG_CORE")
|
||||
list(APPEND DG2_AND_LATER_CORE_TYPES "XE_HPG_CORE")
|
||||
list(APPEND ALL_CORE_TYPES "XE_HPC_CORE")
|
||||
list(APPEND XEHP_AND_LATER_CORE_TYPES "XE_HP_CORE" "XE_HPG_CORE" "XE_HPC_CORE")
|
||||
list(APPEND DG2_AND_LATER_CORE_TYPES "XE_HPG_CORE" "XE_HPC_CORE")
|
||||
list(APPEND PVC_AND_LATER_CORE_TYPES "XE_HPC_CORE")
|
||||
|
|
|
@ -34,6 +34,15 @@ foreach(CORE_TYPE ${DG2_AND_LATER_CORE_TYPES})
|
|||
endif()
|
||||
endforeach()
|
||||
|
||||
foreach(CORE_TYPE ${PVC_AND_LATER_CORE_TYPES})
|
||||
if(TESTS_${CORE_TYPE})
|
||||
set(TESTS_PVC_AND_LATER 1)
|
||||
endif()
|
||||
if(SUPPORT_${CORE_TYPE})
|
||||
set(SUPPORT_PVC_AND_LATER 1)
|
||||
endif()
|
||||
endforeach()
|
||||
|
||||
# Add supported and tested platforms
|
||||
if(SUPPORT_GEN8)
|
||||
set(CORE_GEN8_REVISIONS 0)
|
||||
|
@ -261,3 +270,21 @@ if(SUPPORT_XE_HPG_CORE)
|
|||
endif()
|
||||
endif()
|
||||
endif()
|
||||
|
||||
if(SUPPORT_XE_HPC_CORE)
|
||||
set(XE_HPC_CORE_TEST_KERNELS_BLOCKLIST "CopyBuffer_simd8.cl")
|
||||
set(CORE_XE_HPC_CORE_REVISIONS 0 3 30 157)
|
||||
if(TESTS_XE_HPC_CORE)
|
||||
ADD_ITEM_FOR_CORE_TYPE("FAMILY_NAME" "TESTED" "XE_HPC_CORE" "XE_HPC_COREFamily")
|
||||
endif()
|
||||
|
||||
if(SUPPORT_PVC)
|
||||
ADD_PRODUCT("SUPPORTED" "PVC" "IGFX_PVC")
|
||||
ADD_PLATFORM_FOR_CORE_TYPE("SUPPORTED" "XE_HPC_CORE" "PVC" "CORE")
|
||||
ADD_PLATFORM_FOR_CORE_TYPE("SUPPORTED_AUX_TRANSLATION" "XE_HPC_CORE" "PVC" "CORE")
|
||||
if(TESTS_PVC)
|
||||
ADD_ITEM_FOR_CORE_TYPE("PLATFORMS" "TESTED" "XE_HPC_CORE" "PVC")
|
||||
ADD_PRODUCT("TESTED" "PVC" "IGFX_PVC")
|
||||
endif()
|
||||
endif()
|
||||
endif()
|
||||
|
|
|
@ -0,0 +1,32 @@
|
|||
/*
|
||||
* Copyright (C) 2021 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#include "shared/source/debug_settings/debug_settings_manager.h"
|
||||
#include "shared/source/helpers/engine_node_helper.h"
|
||||
|
||||
#include "level_zero/core/source/hw_helpers/l0_hw_helper.h"
|
||||
|
||||
#include "engine_group_types.h"
|
||||
|
||||
#include <limits>
|
||||
|
||||
namespace L0 {
|
||||
|
||||
template <typename Family>
|
||||
void L0HwHelperHw<Family>::setAdditionalGroupProperty(ze_command_queue_group_properties_t &groupProperty, NEO::EngineGroupType groupType) const {
|
||||
if (groupType == NEO::EngineGroupType::LinkedCopy) {
|
||||
groupProperty.flags = ZE_COMMAND_QUEUE_GROUP_PROPERTY_FLAG_COPY;
|
||||
groupProperty.maxMemoryFillPatternSize = sizeof(uint8_t);
|
||||
}
|
||||
|
||||
if (groupType == NEO::EngineGroupType::Copy && NEO::EngineHelpers::isBcsVirtualEngineEnabled()) {
|
||||
groupProperty.flags = ZE_COMMAND_QUEUE_GROUP_PROPERTY_FLAG_COPY;
|
||||
groupProperty.maxMemoryFillPatternSize = sizeof(uint8_t);
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace L0
|
|
@ -0,0 +1,19 @@
|
|||
#
|
||||
# Copyright (C) 2021 Intel Corporation
|
||||
#
|
||||
# SPDX-License-Identifier: MIT
|
||||
#
|
||||
|
||||
if(SUPPORT_XE_HPC_CORE)
|
||||
set(HW_SOURCES_XE_HPC_CORE
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/cmdlist_xe_hpc_core.inl
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/debugger_xe_hpc_core.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/enable_family_full_l0_xe_hpc_core.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/l0_hw_helper_xe_hpc_core.cpp
|
||||
)
|
||||
|
||||
add_subdirectories()
|
||||
|
||||
target_sources(${L0_STATIC_LIB_NAME} PRIVATE ${HW_SOURCES_XE_HPC_CORE})
|
||||
endif()
|
|
@ -0,0 +1,49 @@
|
|||
/*
|
||||
* Copyright (C) 2021 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#include "level_zero/core/source/cmdlist/cmdlist_hw.h"
|
||||
|
||||
namespace L0 {
|
||||
|
||||
template <>
|
||||
NEO::PipeControlArgs CommandListCoreFamily<IGFX_XE_HPC_CORE>::createBarrierFlags() {
|
||||
NEO::PipeControlArgs args;
|
||||
args.hdcPipelineFlush = true;
|
||||
args.unTypedDataPortCacheFlush = true;
|
||||
return args;
|
||||
}
|
||||
|
||||
template <>
|
||||
ze_result_t CommandListCoreFamily<IGFX_XE_HPC_CORE>::appendMemoryPrefetch(const void *ptr, size_t size) {
|
||||
using MI_BATCH_BUFFER_END = GfxFamily::MI_BATCH_BUFFER_END;
|
||||
auto allocData = device->getDriverHandle()->getSvmAllocsManager()->getSVMAlloc(ptr);
|
||||
|
||||
if (!allocData) {
|
||||
return ZE_RESULT_ERROR_INVALID_ARGUMENT;
|
||||
}
|
||||
|
||||
if (NEO::DebugManager.flags.AddStatePrefetchCmdToMemoryPrefetchAPI.get() != 1) {
|
||||
return ZE_RESULT_SUCCESS;
|
||||
}
|
||||
|
||||
auto gpuAlloc = allocData->gpuAllocations.getGraphicsAllocation(device->getRootDeviceIndex());
|
||||
auto &hwInfo = device->getHwInfo();
|
||||
|
||||
commandContainer.addToResidencyContainer(gpuAlloc);
|
||||
|
||||
size_t offset = ptrDiff(ptr, gpuAlloc->getGpuAddress());
|
||||
|
||||
NEO::LinearStream &cmdStream = *commandContainer.getCommandStream();
|
||||
|
||||
size_t estimatedSizeRequired = NEO::EncodeMemoryPrefetch<GfxFamily>::getSizeForMemoryPrefetch(size);
|
||||
increaseCommandStreamSpace(estimatedSizeRequired);
|
||||
|
||||
NEO::EncodeMemoryPrefetch<GfxFamily>::programMemoryPrefetch(cmdStream, *gpuAlloc, static_cast<uint32_t>(size), offset, hwInfo);
|
||||
|
||||
return ZE_RESULT_SUCCESS;
|
||||
}
|
||||
} // namespace L0
|
|
@ -0,0 +1,19 @@
|
|||
/*
|
||||
* Copyright (C) 2021 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#include "level_zero/core/source/debugger/debugger_l0.inl"
|
||||
|
||||
namespace NEO {
|
||||
struct XE_HPC_COREFamily;
|
||||
using GfxFamily = XE_HPC_COREFamily;
|
||||
|
||||
} // namespace NEO
|
||||
|
||||
namespace L0 {
|
||||
template class DebuggerL0Hw<NEO::GfxFamily>;
|
||||
DebuggerL0PopulateFactory<IGFX_XE_HPC_CORE, NEO::GfxFamily> debuggerXeHpcCore;
|
||||
} // namespace L0
|
|
@ -0,0 +1,30 @@
|
|||
/*
|
||||
* Copyright (C) 2021 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#include "shared/source/command_stream/aub_command_stream_receiver_hw.h"
|
||||
#include "shared/source/command_stream/command_stream_receiver_hw.h"
|
||||
#include "shared/source/command_stream/tbx_command_stream_receiver_hw.h"
|
||||
#include "shared/source/helpers/populate_factory.h"
|
||||
|
||||
#include "level_zero/core/source/helpers/l0_populate_factory.h"
|
||||
#include "level_zero/core/source/hw_helpers/l0_hw_helper.h"
|
||||
|
||||
namespace NEO {
|
||||
|
||||
using Family = XE_HPC_COREFamily;
|
||||
|
||||
struct EnableL0XeHpcCore {
|
||||
EnableL0XeHpcCore() {
|
||||
populateFactoryTable<AUBCommandStreamReceiverHw<Family>>();
|
||||
populateFactoryTable<TbxCommandStreamReceiverHw<Family>>();
|
||||
populateFactoryTable<CommandStreamReceiverHw<Family>>();
|
||||
L0::populateFactoryTable<L0::L0HwHelperHw<Family>>();
|
||||
}
|
||||
};
|
||||
|
||||
static EnableL0XeHpcCore enable;
|
||||
} // namespace NEO
|
|
@ -0,0 +1,11 @@
|
|||
/*
|
||||
* Copyright (C) 2021 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#include "shared/source/xe_hpc_core/hw_cmds.h"
|
||||
#include "shared/source/xe_hpc_core/hw_info.h"
|
||||
|
||||
#include "level_zero/core/source/image/image_hw.inl"
|
|
@ -0,0 +1,27 @@
|
|||
/*
|
||||
* Copyright (C) 2021 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#include "level_zero/core/source/helpers/l0_populate_factory.h"
|
||||
#include "level_zero/core/source/hw_helpers/l0_hw_helper_base.inl"
|
||||
#include "level_zero/core/source/hw_helpers/l0_hw_helper_pvc_and_later.inl"
|
||||
|
||||
#include "hw_cmds.h"
|
||||
|
||||
namespace L0 {
|
||||
|
||||
using Family = NEO::XE_HPC_COREFamily;
|
||||
static auto gfxCore = IGFX_XE_HPC_CORE;
|
||||
|
||||
template <>
|
||||
void populateFactoryTable<L0HwHelperHw<Family>>() {
|
||||
extern L0HwHelper *l0HwHelperFactory[IGFX_MAX_CORE];
|
||||
l0HwHelperFactory[gfxCore] = &L0HwHelperHw<Family>::get();
|
||||
}
|
||||
|
||||
template class L0HwHelperHw<Family>;
|
||||
|
||||
} // namespace L0
|
|
@ -0,0 +1,17 @@
|
|||
#
|
||||
# Copyright (C) 2021 Intel Corporation
|
||||
#
|
||||
# SPDX-License-Identifier: MIT
|
||||
#
|
||||
|
||||
if(SUPPORT_PVC)
|
||||
set(HW_SOURCES_XE_HPC_CORE
|
||||
${HW_SOURCES_XE_HPC_CORE}
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/image_pvc.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/cmdlist_pvc.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/cmdqueue_pvc.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/kernel_pvc.cpp
|
||||
PARENT_SCOPE
|
||||
)
|
||||
endif()
|
|
@ -0,0 +1,46 @@
|
|||
/*
|
||||
* Copyright (C) 2021 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#include "level_zero/core/source/cmdlist/cmdlist_hw.inl"
|
||||
#include "level_zero/core/source/cmdlist/cmdlist_hw_immediate.inl"
|
||||
#include "level_zero/core/source/cmdlist/cmdlist_hw_xehp_and_later.inl"
|
||||
#include "level_zero/core/source/xe_hpc_core/cmdlist_xe_hpc_core.inl"
|
||||
|
||||
#include "cmdlist_extended.inl"
|
||||
|
||||
namespace L0 {
|
||||
template <>
|
||||
void CommandListCoreFamily<IGFX_XE_HPC_CORE>::applyMemoryRangesBarrier(uint32_t numRanges,
|
||||
const size_t *pRangeSizes,
|
||||
const void **pRanges) {
|
||||
|
||||
increaseCommandStreamSpace(NEO::MemorySynchronizationCommands<GfxFamily>::getSizeForSinglePipeControl());
|
||||
|
||||
NEO::PipeControlArgs args = {};
|
||||
args.hdcPipelineFlush = true;
|
||||
args.unTypedDataPortCacheFlush = true;
|
||||
NEO::MemorySynchronizationCommands<GfxFamily>::addPipeControl(*commandContainer.getCommandStream(), args);
|
||||
}
|
||||
|
||||
template struct CommandListCoreFamily<IGFX_XE_HPC_CORE>;
|
||||
|
||||
template <>
|
||||
struct CommandListProductFamily<IGFX_PVC> : public CommandListCoreFamily<IGFX_XE_HPC_CORE> {
|
||||
using CommandListCoreFamily::CommandListCoreFamily;
|
||||
};
|
||||
|
||||
static CommandListPopulateFactory<IGFX_PVC, CommandListProductFamily<IGFX_PVC>>
|
||||
populatePVC;
|
||||
|
||||
template <>
|
||||
struct CommandListImmediateProductFamily<IGFX_PVC> : public CommandListCoreFamilyImmediate<IGFX_XE_HPC_CORE> {
|
||||
using CommandListCoreFamilyImmediate::CommandListCoreFamilyImmediate;
|
||||
};
|
||||
|
||||
static CommandListImmediatePopulateFactory<IGFX_PVC, CommandListImmediateProductFamily<IGFX_PVC>>
|
||||
populatePVCImmediate;
|
||||
} // namespace L0
|
|
@ -0,0 +1,20 @@
|
|||
/*
|
||||
* Copyright (C) 2021 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#include "shared/source/xe_hpc_core/hw_cmds.h"
|
||||
#include "shared/source/xe_hpc_core/hw_info.h"
|
||||
|
||||
#include "level_zero/core/source/cmdqueue/cmdqueue_hw.inl"
|
||||
#include "level_zero/core/source/cmdqueue/cmdqueue_xe_hp_core_and_later.inl"
|
||||
|
||||
#include "cmdqueue_extended.inl"
|
||||
namespace L0 {
|
||||
template struct CommandQueueHw<IGFX_XE_HPC_CORE>;
|
||||
static CommandQueuePopulateFactory<IGFX_PVC, CommandQueueHw<IGFX_XE_HPC_CORE>>
|
||||
populatePVC;
|
||||
|
||||
} // namespace L0
|
|
@ -0,0 +1,23 @@
|
|||
/*
|
||||
* Copyright (C) 2021 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#include "level_zero/core/source/xe_hpc_core/image_xe_hpc_core.inl"
|
||||
|
||||
namespace L0 {
|
||||
|
||||
template <>
|
||||
struct ImageProductFamily<IGFX_PVC> : public ImageCoreFamily<IGFX_XE_HPC_CORE> {
|
||||
using ImageCoreFamily::ImageCoreFamily;
|
||||
|
||||
ze_result_t initialize(Device *device, const ze_image_desc_t *desc) override {
|
||||
return ImageCoreFamily<IGFX_XE_HPC_CORE>::initialize(device, desc);
|
||||
};
|
||||
};
|
||||
|
||||
static ImagePopulateFactory<IGFX_PVC, ImageProductFamily<IGFX_PVC>> populatePVC;
|
||||
|
||||
} // namespace L0
|
|
@ -0,0 +1,14 @@
|
|||
/*
|
||||
* Copyright (C) 2021 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#include "level_zero/core/source/kernel/kernel_hw.h"
|
||||
|
||||
namespace L0 {
|
||||
|
||||
static KernelPopulateFactory<IGFX_PVC, KernelHw<IGFX_XE_HPC_CORE>> populatePVC;
|
||||
|
||||
} // namespace L0
|
|
@ -142,6 +142,7 @@ using cl_unified_shared_memory_capabilities_intel = cl_bitfield;
|
|||
#define CL_KERNEL_EXEC_INFO_THREAD_ARBITRATION_POLICY_ROUND_ROBIN_INTEL 0x10023
|
||||
#define CL_KERNEL_EXEC_INFO_THREAD_ARBITRATION_POLICY_AFTER_DEPENDENCY_ROUND_ROBIN_INTEL 0x10024
|
||||
#define CL_KERNEL_EXEC_INFO_THREAD_ARBITRATION_POLICY_INTEL 0x10025
|
||||
#define CL_KERNEL_EXEC_INFO_THREAD_ARBITRATION_POLICY_STALL_BASED_ROUND_ROBIN_INTEL 0x10026
|
||||
|
||||
/******************************
|
||||
* SLICE COUNT SELECTING *
|
||||
|
|
|
@ -58,6 +58,10 @@ if(SUPPORT_XEHP_AND_LATER)
|
|||
)
|
||||
endif()
|
||||
|
||||
if(SUPPORT_PVC_AND_LATER)
|
||||
list(APPEND RUNTIME_SRCS_HELPERS_BASE ${CMAKE_CURRENT_SOURCE_DIR}/cl_hw_helper_pvc_and_later.inl)
|
||||
endif()
|
||||
|
||||
set(RUNTIME_SRCS_HELPERS_WINDOWS
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/windows/gl_helper.h
|
||||
)
|
||||
|
|
|
@ -0,0 +1,23 @@
|
|||
/*
|
||||
* Copyright (C) 2021 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#include "opencl/extensions/public/cl_ext_private.h"
|
||||
|
||||
template <>
|
||||
inline bool ClHwHelperHw<Family>::preferBlitterForLocalToLocalTransfers() const {
|
||||
return true;
|
||||
}
|
||||
|
||||
template <>
|
||||
std::vector<uint32_t> ClHwHelperHw<Family>::getSupportedThreadArbitrationPolicies() const {
|
||||
return std::vector<uint32_t>{CL_KERNEL_EXEC_INFO_THREAD_ARBITRATION_POLICY_OLDEST_FIRST_INTEL, CL_KERNEL_EXEC_INFO_THREAD_ARBITRATION_POLICY_ROUND_ROBIN_INTEL, CL_KERNEL_EXEC_INFO_THREAD_ARBITRATION_POLICY_AFTER_DEPENDENCY_ROUND_ROBIN_INTEL, CL_KERNEL_EXEC_INFO_THREAD_ARBITRATION_POLICY_STALL_BASED_ROUND_ROBIN_INTEL};
|
||||
}
|
||||
|
||||
template <>
|
||||
bool ClHwHelperHw<Family>::allowImageCompression(cl_image_format format) const {
|
||||
return true;
|
||||
}
|
|
@ -0,0 +1,14 @@
|
|||
#
|
||||
# Copyright (C) 2021 Intel Corporation
|
||||
#
|
||||
# SPDX-License-Identifier: MIT
|
||||
#
|
||||
|
||||
if(SUPPORT_XE_HPC_CORE)
|
||||
set(RUNTIME_SRCS_XE_HPC_CORE
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt
|
||||
)
|
||||
|
||||
target_sources(${NEO_STATIC_LIB_NAME} PRIVATE ${RUNTIME_SRCS_XE_HPC_CORE})
|
||||
set_property(GLOBAL PROPERTY RUNTIME_SRCS_XE_HPC_CORE ${RUNTIME_SRCS_XE_HPC_CORE})
|
||||
endif()
|
|
@ -0,0 +1,13 @@
|
|||
#
|
||||
# Copyright (C) 2021 Intel Corporation
|
||||
#
|
||||
# SPDX-License-Identifier: MIT
|
||||
#
|
||||
|
||||
set(RUNTIME_SRCS_ADDITIONAL_FILES_XE_HPC_CORE
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/xe_hpc_core/definitions${BRANCH_DIR_SUFFIX}gtpin_setup_xe_hpc_core.inl
|
||||
)
|
||||
include_directories(${NEO_SOURCE_DIR}/opencl/source/xe_hpc_core/definitions${BRANCH_DIR_SUFFIX})
|
||||
|
||||
target_sources(${NEO_STATIC_LIB_NAME} PRIVATE ${RUNTIME_SRCS_ADDITIONAL_FILES_XE_HPC_CORE})
|
||||
set_property(GLOBAL PROPERTY RUNTIME_SRCS_ADDITIONAL_FILES_XE_HPC_CORE ${RUNTIME_SRCS_ADDITIONAL_FILES_XE_HPC_CORE})
|
|
@ -0,0 +1,20 @@
|
|||
/*
|
||||
* Copyright (C) 2021 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#include "shared/source/xe_hpc_core/hw_cmds.h"
|
||||
|
||||
#include "opencl/source/mem_obj/buffer_base.inl"
|
||||
|
||||
namespace NEO {
|
||||
|
||||
using Family = XE_HPC_COREFamily;
|
||||
static auto gfxCore = IGFX_XE_HPC_CORE;
|
||||
|
||||
template class BufferHw<Family>;
|
||||
|
||||
#include "opencl/source/mem_obj/buffer_factory_init.inl"
|
||||
} // namespace NEO
|
|
@ -0,0 +1,54 @@
|
|||
/*
|
||||
* Copyright (C) 2021 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#include "shared/source/helpers/populate_factory.h"
|
||||
|
||||
#include "opencl/source/helpers/cl_hw_helper_base.inl"
|
||||
#include "opencl/source/helpers/cl_hw_helper_xehp_and_later.inl"
|
||||
|
||||
#include "hw_cmds.h"
|
||||
|
||||
namespace NEO {
|
||||
|
||||
using Family = XE_HPC_COREFamily;
|
||||
static auto gfxCore = IGFX_XE_HPC_CORE;
|
||||
|
||||
#include "opencl/source/helpers/cl_hw_helper_pvc_and_later.inl"
|
||||
|
||||
template <>
|
||||
void populateFactoryTable<ClHwHelperHw<Family>>() {
|
||||
extern ClHwHelper *clHwHelperFactory[IGFX_MAX_CORE];
|
||||
clHwHelperFactory[gfxCore] = &ClHwHelperHw<Family>::get();
|
||||
}
|
||||
|
||||
template <>
|
||||
bool ClHwHelperHw<Family>::requiresAuxResolves(const KernelInfo &kernelInfo, const HardwareInfo &hwInfo) const {
|
||||
return false;
|
||||
}
|
||||
|
||||
template <>
|
||||
inline bool ClHwHelperHw<Family>::getQueueFamilyName(std::string &name, EngineGroupType type) const {
|
||||
switch (type) {
|
||||
case EngineGroupType::RenderCompute:
|
||||
name = "cccs";
|
||||
return true;
|
||||
case EngineGroupType::LinkedCopy:
|
||||
name = "linked bcs";
|
||||
return true;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
template <>
|
||||
cl_version ClHwHelperHw<Family>::getDeviceIpVersion(const HardwareInfo &hwInfo) const {
|
||||
return makeDeviceIpVersion(12, 8, makeDeviceRevision(hwInfo));
|
||||
}
|
||||
|
||||
template class ClHwHelperHw<Family>;
|
||||
|
||||
} // namespace NEO
|
|
@ -0,0 +1,40 @@
|
|||
/*
|
||||
* Copyright (C) 2021 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#include "shared/source/helpers/populate_factory.h"
|
||||
#include "shared/source/memory_manager/unified_memory_manager.h"
|
||||
|
||||
#include "opencl/source/command_queue/command_queue_hw.h"
|
||||
#include "opencl/source/command_queue/enqueue_resource_barrier.h"
|
||||
|
||||
#include "enqueue_init_dispatch_globals.h"
|
||||
|
||||
namespace NEO {
|
||||
|
||||
using Family = XE_HPC_COREFamily;
|
||||
static auto gfxCore = IGFX_XE_HPC_CORE;
|
||||
} // namespace NEO
|
||||
|
||||
#include "opencl/source/command_queue/command_queue_hw_xehp_and_later.inl"
|
||||
|
||||
namespace NEO {
|
||||
template <>
|
||||
void populateFactoryTable<CommandQueueHw<Family>>() {
|
||||
extern CommandQueueCreateFunc commandQueueFactory[IGFX_MAX_CORE];
|
||||
commandQueueFactory[gfxCore] = CommandQueueHw<Family>::create;
|
||||
}
|
||||
|
||||
template <>
|
||||
bool CommandQueueHw<Family>::isCacheFlushForBcsRequired() const {
|
||||
if (DebugManager.flags.ForceCacheFlushForBcs.get() != -1) {
|
||||
return !!DebugManager.flags.ForceCacheFlushForBcs.get();
|
||||
}
|
||||
return false;
|
||||
}
|
||||
} // namespace NEO
|
||||
|
||||
template class NEO::CommandQueueHw<NEO::Family>;
|
|
@ -0,0 +1,11 @@
|
|||
/*
|
||||
* Copyright (C) 2021 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
template <>
|
||||
uint32_t GTPinHwHelperHw<Family>::getGenVersion() {
|
||||
return gtpin::GTPIN_GEN_INVALID;
|
||||
}
|
|
@ -0,0 +1,37 @@
|
|||
/*
|
||||
* Copyright (C) 2021 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#include "shared/source/command_stream/aub_command_stream_receiver_hw.h"
|
||||
#include "shared/source/command_stream/tbx_command_stream_receiver_hw.h"
|
||||
#include "shared/source/helpers/populate_factory.h"
|
||||
|
||||
#include "opencl/source/command_queue/command_queue_hw.h"
|
||||
#include "opencl/source/device_queue/device_queue_hw.h"
|
||||
#include "opencl/source/helpers/cl_hw_helper.h"
|
||||
#include "opencl/source/mem_obj/buffer.h"
|
||||
#include "opencl/source/mem_obj/image.h"
|
||||
#include "opencl/source/sampler/sampler.h"
|
||||
|
||||
namespace NEO {
|
||||
|
||||
using Family = XE_HPC_COREFamily;
|
||||
|
||||
struct EnableOCLXeHpcCore {
|
||||
EnableOCLXeHpcCore() {
|
||||
populateFactoryTable<AUBCommandStreamReceiverHw<Family>>();
|
||||
populateFactoryTable<BufferHw<Family>>();
|
||||
populateFactoryTable<ClHwHelperHw<Family>>();
|
||||
populateFactoryTable<CommandQueueHw<Family>>();
|
||||
populateFactoryTable<CommandStreamReceiverHw<Family>>();
|
||||
populateFactoryTable<ImageHw<Family>>();
|
||||
populateFactoryTable<SamplerHw<Family>>();
|
||||
populateFactoryTable<TbxCommandStreamReceiverHw<Family>>();
|
||||
}
|
||||
};
|
||||
|
||||
static EnableOCLXeHpcCore enable;
|
||||
} // namespace NEO
|
|
@ -0,0 +1,19 @@
|
|||
/*
|
||||
* Copyright (C) 2021 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#include "opencl/source/command_queue/gpgpu_walker_xehp_and_later.inl"
|
||||
#include "opencl/source/command_queue/hardware_interface_xehp_and_later.inl"
|
||||
|
||||
namespace NEO {
|
||||
|
||||
template class GpgpuWalkerHelper<XE_HPC_COREFamily>;
|
||||
|
||||
template class HardwareInterface<XE_HPC_COREFamily>;
|
||||
|
||||
template struct EnqueueOperation<XE_HPC_COREFamily>;
|
||||
|
||||
} // namespace NEO
|
|
@ -0,0 +1,33 @@
|
|||
/*
|
||||
* Copyright (C) 2021 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#include "opencl/source/gtpin/gtpin_hw_helper.h"
|
||||
#include "opencl/source/gtpin/gtpin_hw_helper.inl"
|
||||
#include "opencl/source/gtpin/gtpin_hw_helper_xehp_and_later.inl"
|
||||
|
||||
#include "ocl_igc_shared/gtpin/gtpin_ocl_interface.h"
|
||||
|
||||
namespace NEO {
|
||||
|
||||
extern GTPinHwHelper *gtpinHwHelperFactory[IGFX_MAX_CORE];
|
||||
|
||||
using Family = XE_HPC_COREFamily;
|
||||
static const auto gfxFamily = IGFX_XE_HPC_CORE;
|
||||
|
||||
template class GTPinHwHelperHw<Family>;
|
||||
|
||||
struct GTPinEnableXeHpcCore {
|
||||
GTPinEnableXeHpcCore() {
|
||||
gtpinHwHelperFactory[gfxFamily] = >PinHwHelperHw<Family>::get();
|
||||
}
|
||||
};
|
||||
|
||||
#include "gtpin_setup_xe_hpc_core.inl"
|
||||
|
||||
static GTPinEnableXeHpcCore gtpinEnable;
|
||||
|
||||
} // namespace NEO
|
|
@ -0,0 +1,19 @@
|
|||
/*
|
||||
* Copyright (C) 2021 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#include "shared/source/helpers/cache_flush_xehp_and_later.inl"
|
||||
#include "shared/source/xe_hpc_core/hw_cmds.h"
|
||||
|
||||
#include "opencl/source/helpers/hardware_commands_helper.h"
|
||||
#include "opencl/source/helpers/hardware_commands_helper_base.inl"
|
||||
#include "opencl/source/helpers/hardware_commands_helper_xehp_and_later.inl"
|
||||
|
||||
namespace NEO {
|
||||
using FamilyType = XE_HPC_COREFamily;
|
||||
|
||||
template struct HardwareCommandsHelper<FamilyType>;
|
||||
} // namespace NEO
|
|
@ -0,0 +1,28 @@
|
|||
/*
|
||||
* Copyright (C) 2021 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#include "shared/source/gmm_helper/client_context/gmm_client_context.h"
|
||||
#include "shared/source/xe_hpc_core/hw_cmds.h"
|
||||
|
||||
#include "opencl/source/context/context.h"
|
||||
#include "opencl/source/mem_obj/image.inl"
|
||||
|
||||
namespace NEO {
|
||||
|
||||
using Family = XE_HPC_COREFamily;
|
||||
static auto gfxCore = IGFX_XE_HPC_CORE;
|
||||
|
||||
} // namespace NEO
|
||||
|
||||
#include "opencl/source/mem_obj/image_xehp_and_later.inl"
|
||||
|
||||
namespace NEO {
|
||||
// clang-format off
|
||||
#include "opencl/source/mem_obj/image_tgllp_and_later.inl"
|
||||
#include "opencl/source/mem_obj/image_factory_init.inl"
|
||||
// clang-format on
|
||||
} // namespace NEO
|
|
@ -0,0 +1,11 @@
|
|||
/*
|
||||
* Copyright (C) 2021 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#include "shared/source/xe_hpc_core/hw_cmds_base.h"
|
||||
using Family = NEO::XE_HPC_COREFamily;
|
||||
constexpr static auto gfxCore = IGFX_XE_HPC_CORE;
|
||||
#include "opencl/source/sampler/sampler_tgllp_and_later.inl"
|
|
@ -28,5 +28,11 @@ if(SUPPORT_XEHP_AND_LATER)
|
|||
)
|
||||
endif()
|
||||
|
||||
if(SUPPORT_PVC_AND_LATER)
|
||||
list(APPEND NEO_CORE_AUB_MEM_DUMP
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/aub_mem_dump_pvc_and_later.inl
|
||||
)
|
||||
endif()
|
||||
|
||||
set_property(GLOBAL PROPERTY NEO_CORE_AUB_MEM_DUMP ${NEO_CORE_AUB_MEM_DUMP})
|
||||
add_subdirectories()
|
||||
|
|
|
@ -0,0 +1,322 @@
|
|||
/*
|
||||
* Copyright (C) 2021 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#include "shared/source/aub_mem_dump/aub_alloc_dump.inl"
|
||||
#include "shared/source/aub_mem_dump/aub_mem_dump.inl"
|
||||
#include "shared/source/debug_settings/debug_settings_manager.h"
|
||||
#include "shared/source/helpers/array_count.h"
|
||||
#include "shared/source/helpers/completion_stamp.h"
|
||||
#include "shared/source/helpers/hw_helper.h"
|
||||
|
||||
#include "aub_mapper.h"
|
||||
#include "config.h"
|
||||
#include "reg_configs_common.h"
|
||||
|
||||
namespace AubMemDump {
|
||||
|
||||
enum {
|
||||
device = deviceValue
|
||||
};
|
||||
|
||||
// Instantiate these common template implementations.
|
||||
template struct AubDump<Traits<device, 32>>;
|
||||
template struct AubDump<Traits<device, 48>>;
|
||||
|
||||
template struct AubPageTableHelper32<Traits<device, 32>>;
|
||||
template struct AubPageTableHelper64<Traits<device, 48>>;
|
||||
} // namespace AubMemDump
|
||||
|
||||
namespace NEO {
|
||||
|
||||
static const AubMemDump::LrcaHelperRcs rcs(0x002000);
|
||||
static const AubMemDump::LrcaHelperBcs bcs(0x022000);
|
||||
static const AubMemDump::LrcaHelperVcs vcs(0x1c0000);
|
||||
static const AubMemDump::LrcaHelperVecs vecs(0x1c8000);
|
||||
static const AubMemDump::LrcaHelperCcs ccs(0x1a000);
|
||||
static const AubMemDump::LrcaHelperCcs ccs1(0x1c000);
|
||||
static const AubMemDump::LrcaHelperCcs ccs2(0x1e000);
|
||||
static const AubMemDump::LrcaHelperCcs ccs3(0x26000);
|
||||
static const AubMemDump::LrcaHelperCccs cccs(-1);
|
||||
static const AubMemDump::LrcaHelperLinkBcs linkBcs1(-1, 1);
|
||||
static const AubMemDump::LrcaHelperLinkBcs linkBcs2(-1, 2);
|
||||
static const AubMemDump::LrcaHelperLinkBcs linkBcs3(-1, 3);
|
||||
static const AubMemDump::LrcaHelperLinkBcs linkBcs4(-1, 4);
|
||||
static const AubMemDump::LrcaHelperLinkBcs linkBcs5(-1, 5);
|
||||
static const AubMemDump::LrcaHelperLinkBcs linkBcs6(-1, 6);
|
||||
static const AubMemDump::LrcaHelperLinkBcs linkBcs7(-1, 7);
|
||||
static const AubMemDump::LrcaHelperLinkBcs linkBcs8(-1, 8);
|
||||
|
||||
const AubMemDump::LrcaHelper *const AUBFamilyMapper<Family>::csTraits[aub_stream::NUM_ENGINES] = {
|
||||
&rcs,
|
||||
&bcs,
|
||||
&vcs,
|
||||
&vecs,
|
||||
&ccs,
|
||||
&ccs1,
|
||||
&ccs2,
|
||||
&ccs3,
|
||||
&cccs,
|
||||
&linkBcs1,
|
||||
&linkBcs2,
|
||||
&linkBcs3,
|
||||
&linkBcs4,
|
||||
&linkBcs5,
|
||||
&linkBcs6,
|
||||
&linkBcs7,
|
||||
&linkBcs8};
|
||||
|
||||
const MMIOList AUBFamilyMapper<Family>::globalMMIO = {
|
||||
// GLOBAL_MOCS
|
||||
MMIOPair(0x00004000, 0x00000008),
|
||||
MMIOPair(0x00004004, 0x00000038),
|
||||
MMIOPair(0x00004008, 0x00000038),
|
||||
MMIOPair(0x0000400C, 0x00000008),
|
||||
MMIOPair(0x00004010, 0x00000018),
|
||||
MMIOPair(0x00004014, 0x00060038),
|
||||
MMIOPair(0x00004018, 0x00000000),
|
||||
MMIOPair(0x0000401C, 0x00000033),
|
||||
MMIOPair(0x00004020, 0x00060037),
|
||||
MMIOPair(0x00004024, 0x0000003B),
|
||||
MMIOPair(0x00004028, 0x00000032),
|
||||
MMIOPair(0x0000402C, 0x00000036),
|
||||
MMIOPair(0x00004030, 0x0000003A),
|
||||
MMIOPair(0x00004034, 0x00000033),
|
||||
MMIOPair(0x00004038, 0x00000037),
|
||||
MMIOPair(0x0000403C, 0x0000003B),
|
||||
MMIOPair(0x00004040, 0x00000030),
|
||||
MMIOPair(0x00004044, 0x00000034),
|
||||
MMIOPair(0x00004048, 0x00000038),
|
||||
MMIOPair(0x0000404C, 0x00000031),
|
||||
MMIOPair(0x00004050, 0x00000032),
|
||||
MMIOPair(0x00004054, 0x00000036),
|
||||
MMIOPair(0x00004058, 0x0000003A),
|
||||
MMIOPair(0x0000405C, 0x00000033),
|
||||
MMIOPair(0x00004060, 0x00000037),
|
||||
MMIOPair(0x00004064, 0x0000003B),
|
||||
MMIOPair(0x00004068, 0x00000032),
|
||||
MMIOPair(0x0000406C, 0x00000036),
|
||||
MMIOPair(0x00004070, 0x0000003A),
|
||||
MMIOPair(0x00004074, 0x00000033),
|
||||
MMIOPair(0x00004078, 0x00000037),
|
||||
MMIOPair(0x0000407C, 0x0000003B),
|
||||
MMIOPair(0x00004080, 0x00000030),
|
||||
MMIOPair(0x00004084, 0x00000034),
|
||||
MMIOPair(0x00004088, 0x00000038),
|
||||
MMIOPair(0x0000408C, 0x00000031),
|
||||
MMIOPair(0x00004090, 0x00000032),
|
||||
MMIOPair(0x00004094, 0x00000036),
|
||||
MMIOPair(0x00004098, 0x0000003A),
|
||||
MMIOPair(0x0000409C, 0x00000033),
|
||||
MMIOPair(0x000040A0, 0x00000037),
|
||||
MMIOPair(0x000040A4, 0x0000003B),
|
||||
MMIOPair(0x000040A8, 0x00000032),
|
||||
MMIOPair(0x000040AC, 0x00000036),
|
||||
MMIOPair(0x000040B0, 0x0000003A),
|
||||
MMIOPair(0x000040B4, 0x00000033),
|
||||
MMIOPair(0x000040B8, 0x00000037),
|
||||
MMIOPair(0x000040BC, 0x0000003B),
|
||||
MMIOPair(0x000040C0, 0x00000038),
|
||||
MMIOPair(0x000040C4, 0x00000034),
|
||||
MMIOPair(0x000040C8, 0x00000038),
|
||||
MMIOPair(0x000040CC, 0x00000031),
|
||||
MMIOPair(0x000040D0, 0x00000032),
|
||||
MMIOPair(0x000040D4, 0x00000036),
|
||||
MMIOPair(0x000040D8, 0x0000003A),
|
||||
MMIOPair(0x000040DC, 0x00000033),
|
||||
MMIOPair(0x000040E0, 0x00000037),
|
||||
MMIOPair(0x000040E4, 0x0000003B),
|
||||
MMIOPair(0x000040E8, 0x00000032),
|
||||
MMIOPair(0x000040EC, 0x00000036),
|
||||
MMIOPair(0x000040F0, 0x00000038),
|
||||
MMIOPair(0x000040F4, 0x00000038),
|
||||
MMIOPair(0x000040F8, 0x00000038),
|
||||
MMIOPair(0x000040FC, 0x00000038),
|
||||
|
||||
// LNCF_MOCS
|
||||
MMIOPair(0x0000B020, 0x00300010),
|
||||
MMIOPair(0x0000B024, 0x00300010),
|
||||
MMIOPair(0x0000B028, 0x00300030),
|
||||
MMIOPair(0x0000B02C, 0x00000000),
|
||||
MMIOPair(0x0000B030, 0x0030001F),
|
||||
MMIOPair(0x0000B034, 0x00170013),
|
||||
MMIOPair(0x0000B038, 0x0000001F),
|
||||
MMIOPair(0x0000B03C, 0x00000000),
|
||||
MMIOPair(0x0000B040, 0x00100000),
|
||||
MMIOPair(0x0000B044, 0x00170013),
|
||||
MMIOPair(0x0000B048, 0x0010001F),
|
||||
MMIOPair(0x0000B04C, 0x00170013),
|
||||
MMIOPair(0x0000B050, 0x0030001F),
|
||||
MMIOPair(0x0000B054, 0x00170013),
|
||||
MMIOPair(0x0000B058, 0x0000001F),
|
||||
MMIOPair(0x0000B05C, 0x00000000),
|
||||
MMIOPair(0x0000B060, 0x00100000),
|
||||
MMIOPair(0x0000B064, 0x00170013),
|
||||
MMIOPair(0x0000B068, 0x0010001F),
|
||||
MMIOPair(0x0000B06C, 0x00170013),
|
||||
MMIOPair(0x0000B070, 0x0030001F),
|
||||
MMIOPair(0x0000B074, 0x00170013),
|
||||
MMIOPair(0x0000B078, 0x0000001F),
|
||||
MMIOPair(0x0000B07C, 0x00000000),
|
||||
MMIOPair(0x0000B080, 0x00300030),
|
||||
MMIOPair(0x0000B084, 0x00170013),
|
||||
MMIOPair(0x0000B088, 0x0010001F),
|
||||
MMIOPair(0x0000B08C, 0x00170013),
|
||||
MMIOPair(0x0000B090, 0x0030001F),
|
||||
MMIOPair(0x0000B094, 0x00170013),
|
||||
MMIOPair(0x0000B098, 0x00300010),
|
||||
MMIOPair(0x0000B09C, 0x00300010),
|
||||
|
||||
//PAT_INDEX
|
||||
MMIOPair(0x00004100, 0x0000000),
|
||||
MMIOPair(0x00004104, 0x0000000),
|
||||
MMIOPair(0x00004108, 0x0000000),
|
||||
MMIOPair(0x0000410c, 0x0000000),
|
||||
MMIOPair(0x00004110, 0x0000000),
|
||||
MMIOPair(0x00004114, 0x0000000),
|
||||
MMIOPair(0x00004118, 0x0000000),
|
||||
MMIOPair(0x0000411c, 0x0000000),
|
||||
|
||||
MMIOPair(0x00004b80, 0xffff1001), //GACB_PERF_CTRL_REG
|
||||
MMIOPair(0x00007000, 0xffff0000), //CACHE_MODE_0
|
||||
MMIOPair(0x00007004, 0xffff0000), //CACHE_MODE_1
|
||||
MMIOPair(0x000043F8, 0x00000000), //Gen12 (A-step) chicken bit for AuxT granularity
|
||||
MMIOPair(0x00009008, 0x00000200), //IDICR
|
||||
MMIOPair(0x0000900c, 0x00001b40), //SNPCR
|
||||
MMIOPair(0x0000b120, 0x14000002), //LTCDREG
|
||||
MMIOPair(0x00042080, 0x00000000), //CHICKEN_MISC_1
|
||||
MMIOPair(0x000020D4, 0xFFFF0000), //Chicken bit for CSFE
|
||||
MMIOPair(0x0000B0A0, 0x00000000), //SCRATCH 2 for LNCF unit
|
||||
MMIOPair(0x000094D4, 0x00000000), //Slice unit Level Clock Gating Control
|
||||
|
||||
// Capture Perf MMIO register programming
|
||||
MMIOPair(0x0000B004, 0x2FC0100B), //KM_ARBITER_CTRL_REG
|
||||
MMIOPair(0x0000B404, 0x00000160), //KM_GLOBAL_INVALIDATION_REG
|
||||
MMIOPair(0x00008708, 0x00000000), //KM_GEN12_IDI_CONTROL_REGISTER
|
||||
|
||||
// Tiled Resources VA Translation Table L3 Pointer
|
||||
MMIOPair(0x00004410, 0xffffffff), //GEN12_TRTT_NULL_TILE_REG
|
||||
MMIOPair(0x00004414, 0xfffffffe), //GEN12_TRTT_INVD_TILE_REG
|
||||
MMIOPair(0x00004404, 0x000000ff), //GEN12_TRTT_VA_MASKDATA_REG
|
||||
MMIOPair(0x00004408, 0x00000000), //LDWORD GMM_GEN12_TRTT_L3_POINTER
|
||||
MMIOPair(0x0000440C, 0x00000000), //UDWORD GMM_GEN12_TRTT_L3_POINTER
|
||||
MMIOPair(0x00004400, 0x00000001), //GEN12_TRTT_TABLE_CONTROL
|
||||
MMIOPair(0x00004DFC, 0x00000000), //GEN9_TR_CHICKEN_BIT_VECTOR
|
||||
};
|
||||
|
||||
static const MMIOList mmioListRCS = {
|
||||
MMIOPair(AubMemDump::computeRegisterOffset(rcs.mmioBase, 0x00002058), 0x00000000), //CTX_WA_PTR_RCSUNIT
|
||||
MMIOPair(AubMemDump::computeRegisterOffset(rcs.mmioBase, 0x000020a8), 0x00000000), //IMR
|
||||
MMIOPair(AubMemDump::computeRegisterOffset(rcs.mmioBase, 0x0000229c), 0xffff8280), //GFX_MODE
|
||||
|
||||
MMIOPair(0x00002090, 0xffff0000), //CHICKEN_PWR_CTX_RASTER_1
|
||||
MMIOPair(0x000020e0, 0xffff4000), //FF_SLICE_CS_CHICKEN1_RCSUNIT
|
||||
MMIOPair(0x000020e4, 0xffff0000), //FF_SLICE_CS_CHICKEN2_RCSUNIT
|
||||
MMIOPair(0x000020ec, 0xffff0051), //CS_DEBUG_MODE1
|
||||
|
||||
// FORCE_TO_NONPRIV
|
||||
MMIOPair(AubMemDump::computeRegisterOffset(rcs.mmioBase, 0x000024d0), 0x00007014),
|
||||
MMIOPair(AubMemDump::computeRegisterOffset(rcs.mmioBase, 0x000024d4), 0x0000e000),
|
||||
MMIOPair(AubMemDump::computeRegisterOffset(rcs.mmioBase, 0x000024d8), 0x0000e000),
|
||||
MMIOPair(AubMemDump::computeRegisterOffset(rcs.mmioBase, 0x000024dc), 0x0000e000),
|
||||
MMIOPair(AubMemDump::computeRegisterOffset(rcs.mmioBase, 0x000024e0), 0x0000e000),
|
||||
MMIOPair(AubMemDump::computeRegisterOffset(rcs.mmioBase, 0x000024e4), 0x0000e000),
|
||||
MMIOPair(AubMemDump::computeRegisterOffset(rcs.mmioBase, 0x000024e8), 0x0000e000),
|
||||
MMIOPair(AubMemDump::computeRegisterOffset(rcs.mmioBase, 0x000024ec), 0x0000e000),
|
||||
MMIOPair(AubMemDump::computeRegisterOffset(rcs.mmioBase, 0x000024f0), 0x0000e000),
|
||||
MMIOPair(AubMemDump::computeRegisterOffset(rcs.mmioBase, 0x000024f4), 0x0000e000),
|
||||
MMIOPair(AubMemDump::computeRegisterOffset(rcs.mmioBase, 0x000024f8), 0x0000e000),
|
||||
MMIOPair(AubMemDump::computeRegisterOffset(rcs.mmioBase, 0x000024fc), 0x0000e000),
|
||||
|
||||
MMIOPair(0x00002580, 0xffff0005), //CS_CHICKEN1
|
||||
MMIOPair(0x0000e194, 0xffff0002), //CHICKEN_SAMPLER_2
|
||||
|
||||
MMIOPair(0x0000B134, 0xA0000000) //L3ALLOCREG
|
||||
};
|
||||
|
||||
static const MMIOList mmioListBCS = {
|
||||
MMIOPair(AubMemDump::computeRegisterOffset(bcs.mmioBase, 0x0000229c), 0xffff8280), //GFX_MODE
|
||||
};
|
||||
|
||||
static const MMIOList mmioListVCS = {
|
||||
MMIOPair(AubMemDump::computeRegisterOffset(vcs.mmioBase, 0x0000229c), 0xffff8280), //GFX_MODE
|
||||
};
|
||||
|
||||
static const MMIOList mmioListVECS = {
|
||||
MMIOPair(AubMemDump::computeRegisterOffset(vecs.mmioBase, 0x0000229c), 0xffff8280), //GFX_MODE
|
||||
};
|
||||
|
||||
static MMIOList mmioListCCSInstance(uint32_t mmioBase) {
|
||||
MMIOList mmioList;
|
||||
|
||||
mmioList.push_back(MMIOPair(0x0000ce90, 0x00030003)); //GFX_MULT_CTXT_CTL - enable multi-context with 4CCS
|
||||
mmioList.push_back(MMIOPair(0x0000b170, 0x00030003)); //MULT_CTXT_CTL - enable multi-context with 4CCS
|
||||
mmioList.push_back(MMIOPair(0x00014800, 0xFFFF0001)); //RCU_MODE
|
||||
mmioList.push_back(MMIOPair(AubMemDump::computeRegisterOffset(mmioBase, 0x0000229c), 0xffff8280)); //GFX_MODE
|
||||
|
||||
// FORCE_TO_NONPRIV
|
||||
mmioList.push_back(MMIOPair(AubMemDump::computeRegisterOffset(mmioBase, 0x000024d0), 0x0000e000));
|
||||
mmioList.push_back(MMIOPair(AubMemDump::computeRegisterOffset(mmioBase, 0x000024d4), 0x0000e000));
|
||||
mmioList.push_back(MMIOPair(AubMemDump::computeRegisterOffset(mmioBase, 0x000024d8), 0x0000e000));
|
||||
mmioList.push_back(MMIOPair(AubMemDump::computeRegisterOffset(mmioBase, 0x000024dc), 0x0000e000));
|
||||
mmioList.push_back(MMIOPair(AubMemDump::computeRegisterOffset(mmioBase, 0x000024e0), 0x0000e000));
|
||||
mmioList.push_back(MMIOPair(AubMemDump::computeRegisterOffset(mmioBase, 0x000024e4), 0x0000e000));
|
||||
mmioList.push_back(MMIOPair(AubMemDump::computeRegisterOffset(mmioBase, 0x000024e8), 0x0000e000));
|
||||
mmioList.push_back(MMIOPair(AubMemDump::computeRegisterOffset(mmioBase, 0x000024ec), 0x0000e000));
|
||||
mmioList.push_back(MMIOPair(AubMemDump::computeRegisterOffset(mmioBase, 0x000024f0), 0x0000e000));
|
||||
mmioList.push_back(MMIOPair(AubMemDump::computeRegisterOffset(mmioBase, 0x000024f4), 0x0000e000));
|
||||
mmioList.push_back(MMIOPair(AubMemDump::computeRegisterOffset(mmioBase, 0x000024f8), 0x0000e000));
|
||||
mmioList.push_back(MMIOPair(AubMemDump::computeRegisterOffset(mmioBase, 0x000024fc), 0x0000e000));
|
||||
|
||||
mmioList.push_back(MMIOPair(0x0000B234, 0xA0000000)); //L3ALLOCREG_CCS0
|
||||
|
||||
return mmioList;
|
||||
};
|
||||
|
||||
static const MMIOList mmioListCCS = mmioListCCSInstance(ccs.mmioBase);
|
||||
static const MMIOList mmioListCCS1 = mmioListCCSInstance(ccs1.mmioBase);
|
||||
static const MMIOList mmioListCCS2 = mmioListCCSInstance(ccs2.mmioBase);
|
||||
static const MMIOList mmioListCCS3 = mmioListCCSInstance(ccs3.mmioBase);
|
||||
static const MMIOList mmioListCCCS = {};
|
||||
static const MMIOList mmioListLinkBCS = {};
|
||||
|
||||
const MMIOList *AUBFamilyMapper<Family>::perEngineMMIO[aub_stream::NUM_ENGINES] = {
|
||||
&mmioListRCS,
|
||||
&mmioListBCS,
|
||||
&mmioListVCS,
|
||||
&mmioListVECS,
|
||||
&mmioListCCS,
|
||||
&mmioListCCS1,
|
||||
&mmioListCCS2,
|
||||
&mmioListCCS3,
|
||||
&mmioListCCCS,
|
||||
&mmioListLinkBCS,
|
||||
&mmioListLinkBCS,
|
||||
&mmioListLinkBCS,
|
||||
&mmioListLinkBCS,
|
||||
&mmioListLinkBCS,
|
||||
&mmioListLinkBCS,
|
||||
&mmioListLinkBCS,
|
||||
&mmioListLinkBCS};
|
||||
} // namespace NEO
|
||||
|
||||
namespace AubAllocDump {
|
||||
using namespace NEO;
|
||||
|
||||
template SurfaceInfo *getDumpSurfaceInfo<Family>(GraphicsAllocation &gfxAllocation, DumpFormat dumpFormat);
|
||||
|
||||
template uint32_t getImageSurfaceTypeFromGmmResourceType<Family>(GMM_RESOURCE_TYPE gmmResourceType);
|
||||
|
||||
template void dumpBufferInBinFormat<Family>(GraphicsAllocation &gfxAllocation, AubMemDump::AubFileStream *stream, uint32_t context);
|
||||
|
||||
template void dumpImageInBmpFormat<Family>(GraphicsAllocation &gfxAllocation, AubMemDump::AubFileStream *stream, uint32_t context);
|
||||
|
||||
template void dumpBufferInTreFormat<Family>(GraphicsAllocation &gfxAllocation, AubMemDump::AubFileStream *stream, uint32_t context);
|
||||
|
||||
template void dumpImageInTreFormat<Family>(GraphicsAllocation &gfxAllocation, AubMemDump::AubFileStream *stream, uint32_t context);
|
||||
|
||||
template void dumpAllocation<Family>(DumpFormat dumpFormat, GraphicsAllocation &gfxAllocation, AubMemDump::AubFileStream *stream, uint32_t context);
|
||||
} // namespace AubAllocDump
|
|
@ -106,6 +106,7 @@ struct CmdServicesMemTraceVersion {
|
|||
XeHP_SDV = 29,
|
||||
Adlp = 34,
|
||||
Dg2 = 36,
|
||||
Pvc = 39,
|
||||
};
|
||||
};
|
||||
struct RecordingMethodValues {
|
||||
|
|
|
@ -36,6 +36,12 @@ if(SUPPORT_DG2_AND_LATER)
|
|||
)
|
||||
endif()
|
||||
|
||||
if(SUPPORT_PVC_AND_LATER)
|
||||
list(APPEND NEO_CORE_COMMAND_CONTAINER
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/memory_fence_encoder.h
|
||||
)
|
||||
endif()
|
||||
|
||||
set_property(GLOBAL PROPERTY NEO_CORE_COMMAND_CONTAINER ${NEO_CORE_COMMAND_CONTAINER})
|
||||
|
||||
add_subdirectories()
|
||||
|
|
|
@ -0,0 +1,28 @@
|
|||
/*
|
||||
* Copyright (C) 2021 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "shared/source/command_stream/linear_stream.h"
|
||||
#include "shared/source/memory_manager/graphics_allocation.h"
|
||||
|
||||
namespace NEO {
|
||||
template <typename GfxFamily>
|
||||
struct EncodeMemoryFence {
|
||||
using STATE_SYSTEM_MEM_FENCE_ADDRESS = typename GfxFamily::STATE_SYSTEM_MEM_FENCE_ADDRESS;
|
||||
|
||||
static size_t getSystemMemoryFenceSize() {
|
||||
return sizeof(STATE_SYSTEM_MEM_FENCE_ADDRESS);
|
||||
}
|
||||
static void encodeSystemMemoryFence(LinearStream &commandStream, const GraphicsAllocation *globalFenceAllocation) {
|
||||
auto stateSystemFenceAddressSpace = commandStream.getSpaceForCmd<STATE_SYSTEM_MEM_FENCE_ADDRESS>();
|
||||
STATE_SYSTEM_MEM_FENCE_ADDRESS stateSystemFenceAddress = GfxFamily::cmdInitStateSystemMemFenceAddress;
|
||||
stateSystemFenceAddress.setSystemMemoryFenceAddress(globalFenceAllocation->getGpuAddress());
|
||||
*stateSystemFenceAddressSpace = stateSystemFenceAddress;
|
||||
}
|
||||
};
|
||||
} // namespace NEO
|
|
@ -26,6 +26,12 @@ if(SUPPORT_XEHP_AND_LATER)
|
|||
)
|
||||
endif()
|
||||
|
||||
if(SUPPORT_PVC_AND_LATER)
|
||||
list(APPEND NEO_CORE_DIRECT_SUBMISSION
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/direct_submission_prefetcher_pvc_and_later.inl
|
||||
)
|
||||
endif()
|
||||
|
||||
set_property(GLOBAL PROPERTY NEO_CORE_DIRECT_SUBMISSION ${NEO_CORE_DIRECT_SUBMISSION})
|
||||
|
||||
add_subdirectories()
|
||||
|
|
|
@ -0,0 +1,22 @@
|
|||
/*
|
||||
* Copyright (C) 2021 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#include "shared/source/direct_submission/direct_submission_hw.h"
|
||||
|
||||
namespace NEO {
|
||||
|
||||
template <typename GfxFamily, typename Dispatcher>
|
||||
inline void DirectSubmissionHw<GfxFamily, Dispatcher>::dispatchDisablePrefetcher(bool disable) {
|
||||
using MI_ARB_CHECK = typename GfxFamily::MI_ARB_CHECK;
|
||||
|
||||
MI_ARB_CHECK arbCheck = GfxFamily::cmdInitArbCheck;
|
||||
arbCheck.setPreParserDisable(disable);
|
||||
MI_ARB_CHECK *arbCheckSpace = ringCommandStream.getSpaceForCmd<MI_ARB_CHECK>();
|
||||
*arbCheckSpace = arbCheck;
|
||||
}
|
||||
|
||||
} // namespace NEO
|
|
@ -23,4 +23,7 @@
|
|||
#endif
|
||||
#ifdef SUPPORT_XE_HPG_CORE
|
||||
#include "shared/source/xe_hpg_core/aub_mapper.h"
|
||||
#endif
|
||||
#endif
|
||||
#ifdef SUPPORT_XE_HPC_CORE
|
||||
#include "shared/source/xe_hpc_core/aub_mapper.h"
|
||||
#endif
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -150,6 +150,12 @@ if(SUPPORT_DG2_AND_LATER)
|
|||
)
|
||||
endif()
|
||||
|
||||
if(SUPPORT_PVC_AND_LATER)
|
||||
list(APPEND NEO_CORE_HELPERS
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/hw_helper_pvc_and_later.inl
|
||||
)
|
||||
endif()
|
||||
|
||||
set(NEO_CORE_HELPERS_GMM_CALLBACKS_WINDOWS
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/windows/gmm_callbacks.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/windows/gmm_callbacks.h
|
||||
|
|
|
@ -23,4 +23,7 @@
|
|||
#endif
|
||||
#ifdef SUPPORT_XE_HPG_CORE
|
||||
#include "shared/source/xe_hpg_core/hw_cmds.h"
|
||||
#endif
|
||||
#endif
|
||||
#ifdef SUPPORT_XE_HPC_CORE
|
||||
#include "shared/source/xe_hpc_core/hw_cmds.h"
|
||||
#endif
|
||||
|
|
|
@ -0,0 +1,63 @@
|
|||
/*
|
||||
* Copyright (C) 2021 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#include "shared/source/helpers/hw_helper.h"
|
||||
#include "shared/source/helpers/hw_info.h"
|
||||
|
||||
namespace NEO {
|
||||
|
||||
template <>
|
||||
bool HwHelperHw<Family>::isCpuImageTransferPreferred(const HardwareInfo &hwInfo) const {
|
||||
return !hwInfo.capabilityTable.supportsImages;
|
||||
}
|
||||
|
||||
template <>
|
||||
bool HwHelperHw<Family>::isRcsAvailable(const HardwareInfo &hwInfo) const {
|
||||
auto defaultEngine = getChosenEngineType(hwInfo);
|
||||
return (defaultEngine == aub_stream::EngineType::ENGINE_RCS) ||
|
||||
(defaultEngine == aub_stream::EngineType::ENGINE_CCCS) || hwInfo.featureTable.flags.ftrRcsNode;
|
||||
}
|
||||
|
||||
template <>
|
||||
bool HwHelperHw<Family>::isCooperativeDispatchSupported(const EngineGroupType engineGroupType, const HardwareInfo &hwInfo) const {
|
||||
if (isCooperativeEngineSupported(hwInfo)) {
|
||||
if (engineGroupType == EngineGroupType::RenderCompute) {
|
||||
return false;
|
||||
}
|
||||
|
||||
bool isExclusiveContextUsed = (engineGroupType == EngineGroupType::CooperativeCompute);
|
||||
return !isRcsAvailable(hwInfo) || isExclusiveContextUsed;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
template <>
|
||||
uint32_t HwHelperHw<Family>::adjustMaxWorkGroupCount(uint32_t maxWorkGroupCount, const EngineGroupType engineGroupType,
|
||||
const HardwareInfo &hwInfo, bool isEngineInstanced) const {
|
||||
if (!isCooperativeDispatchSupported(engineGroupType, hwInfo)) {
|
||||
return 1u;
|
||||
}
|
||||
|
||||
bool requiresLimitation = this->isCooperativeEngineSupported(hwInfo) &&
|
||||
(engineGroupType != EngineGroupType::CooperativeCompute) &&
|
||||
(!isEngineInstanced) &&
|
||||
(DebugManager.flags.OverrideMaxWorkGroupCount.get() == -1);
|
||||
if (requiresLimitation) {
|
||||
auto ccsCount = hwInfo.gtSystemInfo.CCSInfo.NumberOfCCSEnabled;
|
||||
UNRECOVERABLE_IF(ccsCount == 0);
|
||||
return maxWorkGroupCount / ccsCount;
|
||||
}
|
||||
return maxWorkGroupCount;
|
||||
}
|
||||
|
||||
template <>
|
||||
bool HwHelperHw<Family>::isEngineTypeRemappingToHwSpecificRequired() const {
|
||||
return true;
|
||||
}
|
||||
|
||||
} // namespace NEO
|
|
@ -0,0 +1,15 @@
|
|||
#
|
||||
# Copyright (C) 2021 Intel Corporation
|
||||
#
|
||||
# SPDX-License-Identifier: MIT
|
||||
#
|
||||
|
||||
if(SUPPORT_XE_HPC_CORE)
|
||||
set(HW_DEFINITIONS_XE_HPC_CORE
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/definitions${BRANCH_DIR_SUFFIX}hw_cmds_pvc.inl
|
||||
)
|
||||
set_property(GLOBAL APPEND PROPERTY CORE_SRCS_GENX_ALL_BASE ${HW_DEFINITIONS_XE_HPC_CORE})
|
||||
|
||||
add_subdirectories()
|
||||
endif()
|
|
@ -0,0 +1,25 @@
|
|||
/*
|
||||
* Copyright (C) 2021 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#include "shared/source/command_stream/aub_command_stream_receiver_hw_xehp_and_later.inl"
|
||||
#include "shared/source/helpers/array_count.h"
|
||||
#include "shared/source/helpers/populate_factory.h"
|
||||
|
||||
namespace NEO {
|
||||
|
||||
using Family = XE_HPC_COREFamily;
|
||||
static auto gfxCore = IGFX_XE_HPC_CORE;
|
||||
|
||||
template <>
|
||||
void populateFactoryTable<AUBCommandStreamReceiverHw<Family>>() {
|
||||
extern AubCommandStreamReceiverCreateFunc aubCommandStreamReceiverFactory[IGFX_MAX_CORE];
|
||||
UNRECOVERABLE_IF(!isInRange(gfxCore, aubCommandStreamReceiverFactory));
|
||||
aubCommandStreamReceiverFactory[gfxCore] = AUBCommandStreamReceiverHw<Family>::create;
|
||||
}
|
||||
|
||||
template class AUBCommandStreamReceiverHw<Family>;
|
||||
} // namespace NEO
|
|
@ -0,0 +1,30 @@
|
|||
/*
|
||||
* Copyright (C) 2021 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
#include "shared/source/aub/aub_mapper_base.h"
|
||||
#include "shared/source/helpers/constants.h"
|
||||
|
||||
#include "engine_node.h"
|
||||
|
||||
namespace NEO {
|
||||
struct XE_HPC_COREFamily;
|
||||
|
||||
template <>
|
||||
struct AUBFamilyMapper<XE_HPC_COREFamily> {
|
||||
enum { device = AubMemDump::DeviceValues::Pvc };
|
||||
|
||||
using AubTraits = AubMemDump::Traits<device, MemoryConstants::GfxAddressBits>;
|
||||
|
||||
static const AubMemDump::LrcaHelper *const csTraits[aub_stream::NUM_ENGINES];
|
||||
|
||||
static const MMIOList globalMMIO;
|
||||
static const MMIOList *perEngineMMIO[aub_stream::NUM_ENGINES];
|
||||
|
||||
using AUB = AubMemDump::AubDump<AubTraits>;
|
||||
};
|
||||
} // namespace NEO
|
|
@ -0,0 +1,20 @@
|
|||
/*
|
||||
* Copyright (C) 2021 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#include "shared/source/aub/aub_helper_xehp_and_later.inl"
|
||||
|
||||
#include "aub_mem_dump.h"
|
||||
|
||||
namespace NEO {
|
||||
struct XE_HPC_COREFamily;
|
||||
using Family = NEO::XE_HPC_COREFamily;
|
||||
constexpr static auto deviceValue = AubMemDump::DeviceValues::Pvc;
|
||||
|
||||
template class AubHelperHw<Family>;
|
||||
} // namespace NEO
|
||||
|
||||
#include "shared/source/aub_mem_dump/aub_mem_dump_pvc_and_later.inl"
|
|
@ -0,0 +1,268 @@
|
|||
/*
|
||||
* Copyright (C) 2021 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#include "shared/source/command_container/command_encoder.h"
|
||||
#include "shared/source/command_container/command_encoder.inl"
|
||||
#include "shared/source/command_container/command_encoder_xehp_and_later.inl"
|
||||
#include "shared/source/command_container/encode_compute_mode_tgllp_and_later.inl"
|
||||
#include "shared/source/command_stream/stream_properties.h"
|
||||
#include "shared/source/helpers/constants.h"
|
||||
#include "shared/source/kernel/grf_config.h"
|
||||
#include "shared/source/xe_hpc_core/hw_cmds_base.h"
|
||||
|
||||
namespace NEO {
|
||||
|
||||
using Family = XE_HPC_COREFamily;
|
||||
}
|
||||
|
||||
#include "shared/source/command_container/command_encoder_xe_hpg_core_and_later.inl"
|
||||
#include "shared/source/command_container/image_surface_state/compression_params_tgllp_and_later.inl"
|
||||
#include "shared/source/command_container/image_surface_state/compression_params_xehp_and_later.inl"
|
||||
|
||||
namespace NEO {
|
||||
|
||||
template <>
|
||||
void EncodeDispatchKernel<Family>::adjustTimestampPacket(WALKER_TYPE &walkerCmd, const HardwareInfo &hwInfo) {
|
||||
walkerCmd.getPostSync().setDataportSubsliceCacheFlush(true);
|
||||
}
|
||||
|
||||
template <>
|
||||
void EncodeDispatchKernel<Family>::adjustInterfaceDescriptorData(INTERFACE_DESCRIPTOR_DATA &interfaceDescriptor, const HardwareInfo &hwInfo) {
|
||||
const auto &hwInfoConfig = *HwInfoConfig::get(hwInfo.platform.eProductFamily);
|
||||
if (hwInfoConfig.isDisableOverdispatchAvailable(hwInfo)) {
|
||||
interfaceDescriptor.setThreadGroupDispatchSize(3u);
|
||||
}
|
||||
|
||||
if (DebugManager.flags.ForceThreadGroupDispatchSize.get() != -1) {
|
||||
interfaceDescriptor.setThreadGroupDispatchSize(DebugManager.flags.ForceThreadGroupDispatchSize.get());
|
||||
}
|
||||
}
|
||||
|
||||
template <>
|
||||
inline void EncodeAtomic<Family>::setMiAtomicAddress(MI_ATOMIC &atomic, uint64_t writeAddress) {
|
||||
atomic.setMemoryAddress(writeAddress);
|
||||
}
|
||||
|
||||
template <>
|
||||
void EncodeComputeMode<Family>::adjustComputeMode(LinearStream &csr, void *const stateComputeModePtr, StateComputeModeProperties &properties, const HardwareInfo &hwInfo) {
|
||||
using STATE_COMPUTE_MODE = typename Family::STATE_COMPUTE_MODE;
|
||||
using FORCE_NON_COHERENT = typename STATE_COMPUTE_MODE::FORCE_NON_COHERENT;
|
||||
|
||||
STATE_COMPUTE_MODE stateComputeMode = (stateComputeModePtr != nullptr) ? *(static_cast<STATE_COMPUTE_MODE *>(stateComputeModePtr)) : Family::cmdInitStateComputeMode;
|
||||
auto maskBits = stateComputeMode.getMaskBits();
|
||||
|
||||
if (properties.isCoherencyRequired.isDirty) {
|
||||
FORCE_NON_COHERENT coherencyValue = !properties.isCoherencyRequired.value ? FORCE_NON_COHERENT::FORCE_NON_COHERENT_FORCE_GPU_NON_COHERENT
|
||||
: FORCE_NON_COHERENT::FORCE_NON_COHERENT_FORCE_DISABLED;
|
||||
stateComputeMode.setForceNonCoherent(coherencyValue);
|
||||
maskBits |= Family::stateComputeModeForceNonCoherentMask;
|
||||
}
|
||||
|
||||
if (properties.threadArbitrationPolicy.isDirty) {
|
||||
switch (properties.threadArbitrationPolicy.value) {
|
||||
case ThreadArbitrationPolicy::RoundRobin:
|
||||
stateComputeMode.setEuThreadSchedulingModeOverride(STATE_COMPUTE_MODE::EU_THREAD_SCHEDULING_MODE_OVERRIDE::EU_THREAD_SCHEDULING_MODE_OVERRIDE_ROUND_ROBIN);
|
||||
break;
|
||||
case ThreadArbitrationPolicy::AgeBased:
|
||||
stateComputeMode.setEuThreadSchedulingModeOverride(STATE_COMPUTE_MODE::EU_THREAD_SCHEDULING_MODE_OVERRIDE::EU_THREAD_SCHEDULING_MODE_OVERRIDE_OLDEST_FIRST);
|
||||
break;
|
||||
case ThreadArbitrationPolicy::RoundRobinAfterDependency:
|
||||
stateComputeMode.setEuThreadSchedulingModeOverride(STATE_COMPUTE_MODE::EU_THREAD_SCHEDULING_MODE_OVERRIDE::EU_THREAD_SCHEDULING_MODE_OVERRIDE_STALL_BASED_ROUND_ROBIN);
|
||||
break;
|
||||
default:
|
||||
stateComputeMode.setEuThreadSchedulingModeOverride(STATE_COMPUTE_MODE::EU_THREAD_SCHEDULING_MODE_OVERRIDE::EU_THREAD_SCHEDULING_MODE_OVERRIDE_HW_DEFAULT);
|
||||
}
|
||||
maskBits |= Family::stateComputeModeEuThreadSchedulingModeOverrideMask;
|
||||
}
|
||||
|
||||
if (properties.largeGrfMode.isDirty) {
|
||||
stateComputeMode.setLargeGrfMode(properties.largeGrfMode.value);
|
||||
maskBits |= Family::stateComputeModeLargeGrfModeMask;
|
||||
}
|
||||
|
||||
stateComputeMode.setMaskBits(maskBits);
|
||||
|
||||
auto buffer = csr.getSpaceForCmd<STATE_COMPUTE_MODE>();
|
||||
*buffer = stateComputeMode;
|
||||
}
|
||||
|
||||
template <>
|
||||
void EncodeMemoryPrefetch<Family>::programMemoryPrefetch(LinearStream &commandStream, const GraphicsAllocation &graphicsAllocation, uint32_t size, size_t offset, const HardwareInfo &hwInfo) {
|
||||
using STATE_PREFETCH = typename Family::STATE_PREFETCH;
|
||||
constexpr uint32_t mocsIndexForL3 = (2 << 1);
|
||||
|
||||
bool isBaseDieA0 = (hwInfo.platform.usRevId & Family::pvcBaseDieRevMask) == Family::pvcBaseDieA0Masked;
|
||||
|
||||
bool prefetch = !isBaseDieA0;
|
||||
if (DebugManager.flags.EnableMemoryPrefetch.get() != -1) {
|
||||
prefetch = !!DebugManager.flags.EnableMemoryPrefetch.get();
|
||||
}
|
||||
|
||||
if (!prefetch) {
|
||||
return;
|
||||
}
|
||||
|
||||
uint64_t gpuVa = graphicsAllocation.getGpuAddress() + offset;
|
||||
|
||||
while (size > 0) {
|
||||
uint32_t sizeInBytsToPrefetch = std::min(alignUp(size, MemoryConstants::cacheLineSize),
|
||||
static_cast<uint32_t>(MemoryConstants::pageSize64k));
|
||||
|
||||
// zero based cacheline count (0 == 1 cacheline)
|
||||
uint32_t prefetchSize = (sizeInBytsToPrefetch / MemoryConstants::cacheLineSize) - 1;
|
||||
|
||||
auto statePrefetch = commandStream.getSpaceForCmd<STATE_PREFETCH>();
|
||||
STATE_PREFETCH cmd = Family::cmdInitStatePrefetch;
|
||||
|
||||
cmd.setAddress(gpuVa);
|
||||
cmd.setPrefetchSize(prefetchSize);
|
||||
cmd.setMemoryObjectControlState(mocsIndexForL3);
|
||||
cmd.setKernelInstructionPrefetch(GraphicsAllocation::isIsaAllocationType(graphicsAllocation.getAllocationType()));
|
||||
|
||||
if (DebugManager.flags.ForceCsStallForStatePrefetch.get() == 1) {
|
||||
cmd.setParserStall(true);
|
||||
}
|
||||
|
||||
*statePrefetch = cmd;
|
||||
|
||||
if (sizeInBytsToPrefetch > size) {
|
||||
break;
|
||||
}
|
||||
|
||||
gpuVa += sizeInBytsToPrefetch;
|
||||
size -= sizeInBytsToPrefetch;
|
||||
}
|
||||
}
|
||||
|
||||
template <>
|
||||
size_t EncodeMemoryPrefetch<Family>::getSizeForMemoryPrefetch(size_t size) {
|
||||
if (DebugManager.flags.EnableMemoryPrefetch.get() == 0) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
size = alignUp(size, MemoryConstants::pageSize64k);
|
||||
|
||||
size_t count = size / MemoryConstants::pageSize64k;
|
||||
|
||||
return (count * sizeof(typename Family::STATE_PREFETCH));
|
||||
}
|
||||
|
||||
template <>
|
||||
inline void EncodeMiFlushDW<Family>::appendMiFlushDw(MI_FLUSH_DW *miFlushDwCmd) {
|
||||
miFlushDwCmd->setFlushLlc(1);
|
||||
}
|
||||
|
||||
template <>
|
||||
inline void EncodeMiFlushDW<Family>::programMiFlushDwWA(LinearStream &commandStream) {
|
||||
}
|
||||
|
||||
template <>
|
||||
size_t EncodeMiFlushDW<Family>::getMiFlushDwWaSize() {
|
||||
return 0;
|
||||
}
|
||||
|
||||
template <>
|
||||
void EncodeDispatchKernel<Family>::programBarrierEnable(INTERFACE_DESCRIPTOR_DATA &interfaceDescriptor,
|
||||
uint32_t value,
|
||||
const HardwareInfo &hwInfo) {
|
||||
interfaceDescriptor.setNumberOfBarriers(value);
|
||||
}
|
||||
|
||||
template <>
|
||||
void EncodeDispatchKernel<Family>::encodeAdditionalWalkerFields(const HardwareInfo &hwInfo, WALKER_TYPE &walkerCmd) {
|
||||
auto programGlobalFenceAsPostSyncOperationInComputeWalker = !Family::isXlA0(hwInfo);
|
||||
if (DebugManager.flags.ProgramGlobalFenceAsPostSyncOperationInComputeWalker.get() != -1) {
|
||||
programGlobalFenceAsPostSyncOperationInComputeWalker = !!DebugManager.flags.ProgramGlobalFenceAsPostSyncOperationInComputeWalker.get();
|
||||
}
|
||||
if (programGlobalFenceAsPostSyncOperationInComputeWalker) {
|
||||
auto &postSyncData = walkerCmd.getPostSync();
|
||||
postSyncData.setSystemMemoryFenceRequest(true);
|
||||
}
|
||||
|
||||
if (DebugManager.flags.ForceL3PrefetchForComputeWalker.get() != -1) {
|
||||
walkerCmd.setL3PrefetchDisable(!DebugManager.flags.ForceL3PrefetchForComputeWalker.get());
|
||||
}
|
||||
}
|
||||
|
||||
template <>
|
||||
void EncodeDispatchKernel<Family>::appendAdditionalIDDFields(INTERFACE_DESCRIPTOR_DATA *pInterfaceDescriptor, const HardwareInfo &hwInfo, const uint32_t threadsPerThreadGroup, uint32_t slmTotalSize, SlmPolicy slmPolicy) {
|
||||
using PREFERRED_SLM_SIZE_OVERRIDE = typename Family::INTERFACE_DESCRIPTOR_DATA::PREFERRED_SLM_SIZE_OVERRIDE;
|
||||
using PREFERRED_SLM_ALLOCATION_SIZE_PER_DSS = typename Family::INTERFACE_DESCRIPTOR_DATA::PREFERRED_SLM_ALLOCATION_SIZE_PER_DSS;
|
||||
|
||||
const uint32_t threadsPerDssCount = hwInfo.gtSystemInfo.ThreadCount / hwInfo.gtSystemInfo.DualSubSliceCount;
|
||||
const uint32_t workGroupCountPerDss = static_cast<uint32_t>(Math::divideAndRoundUp(threadsPerDssCount, threadsPerThreadGroup));
|
||||
const uint32_t workgroupSlmSize = HwHelperHw<Family>::get().alignSlmSize(slmTotalSize);
|
||||
|
||||
uint32_t slmSize = 0u;
|
||||
|
||||
switch (slmPolicy) {
|
||||
case SlmPolicy::SlmPolicyLargeData:
|
||||
slmSize = workgroupSlmSize;
|
||||
break;
|
||||
case SlmPolicy::SlmPolicyLargeSlm:
|
||||
default:
|
||||
slmSize = workgroupSlmSize * workGroupCountPerDss;
|
||||
break;
|
||||
}
|
||||
|
||||
struct SizeToPreferredSlmValue {
|
||||
uint32_t upperLimit;
|
||||
PREFERRED_SLM_ALLOCATION_SIZE_PER_DSS valueToProgram;
|
||||
};
|
||||
const std::array<SizeToPreferredSlmValue, 6> ranges = {{
|
||||
// upper limit, retVal
|
||||
{0, PREFERRED_SLM_ALLOCATION_SIZE_PER_DSS::PREFERRED_SLM_SIZE_IS_0K},
|
||||
{16 * KB, PREFERRED_SLM_ALLOCATION_SIZE_PER_DSS::PREFERRED_SLM_SIZE_IS_16K},
|
||||
{32 * KB, PREFERRED_SLM_ALLOCATION_SIZE_PER_DSS::PREFERRED_SLM_SIZE_IS_32K},
|
||||
{64 * KB, PREFERRED_SLM_ALLOCATION_SIZE_PER_DSS::PREFERRED_SLM_SIZE_IS_64K},
|
||||
{96 * KB, PREFERRED_SLM_ALLOCATION_SIZE_PER_DSS::PREFERRED_SLM_SIZE_IS_96K},
|
||||
}};
|
||||
|
||||
auto programmableIdPreferredSlmSize = PREFERRED_SLM_ALLOCATION_SIZE_PER_DSS::PREFERRED_SLM_SIZE_IS_128K;
|
||||
for (auto &range : ranges) {
|
||||
if (slmSize <= range.upperLimit) {
|
||||
programmableIdPreferredSlmSize = range.valueToProgram;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if ((slmSize == 0) && (Family::isXlA0(hwInfo))) {
|
||||
programmableIdPreferredSlmSize = PREFERRED_SLM_ALLOCATION_SIZE_PER_DSS::PREFERRED_SLM_SIZE_IS_16K;
|
||||
}
|
||||
|
||||
pInterfaceDescriptor->setPreferredSlmSizeOverride(PREFERRED_SLM_SIZE_OVERRIDE::PREFERRED_SLM_SIZE_OVERRIDE_IS_ENABLED);
|
||||
pInterfaceDescriptor->setPreferredSlmAllocationSizePerDss(programmableIdPreferredSlmSize);
|
||||
|
||||
if (DebugManager.flags.OverridePreferredSlmAllocationSizePerDss.get() != -1) {
|
||||
auto toProgram =
|
||||
static_cast<PREFERRED_SLM_ALLOCATION_SIZE_PER_DSS>(DebugManager.flags.OverridePreferredSlmAllocationSizePerDss.get());
|
||||
pInterfaceDescriptor->setPreferredSlmAllocationSizePerDss(toProgram);
|
||||
}
|
||||
}
|
||||
|
||||
template struct EncodeDispatchKernel<Family>;
|
||||
template struct EncodeStates<Family>;
|
||||
template struct EncodeMath<Family>;
|
||||
template struct EncodeMathMMIO<Family>;
|
||||
template struct EncodeIndirectParams<Family>;
|
||||
template struct EncodeSetMMIO<Family>;
|
||||
template struct EncodeMediaInterfaceDescriptorLoad<Family>;
|
||||
template struct EncodeStateBaseAddress<Family>;
|
||||
template struct EncodeStoreMMIO<Family>;
|
||||
template struct EncodeSurfaceState<Family>;
|
||||
template struct EncodeComputeMode<Family>;
|
||||
template struct EncodeAtomic<Family>;
|
||||
template struct EncodeSempahore<Family>;
|
||||
template struct EncodeBatchBufferStartOrEnd<Family>;
|
||||
template struct EncodeMiFlushDW<Family>;
|
||||
template struct EncodeMemoryPrefetch<Family>;
|
||||
template struct EncodeMiArbCheck<Family>;
|
||||
template struct EncodeWA<Family>;
|
||||
template struct EncodeEnableRayTracing<Family>;
|
||||
template struct EncodeNoop<Family>;
|
||||
template struct EncodeStoreMemory<Family>;
|
||||
} // namespace NEO
|
|
@ -0,0 +1,240 @@
|
|||
/*
|
||||
* Copyright (C) 2021 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#include "shared/source/command_container/memory_fence_encoder.h"
|
||||
#include "shared/source/xe_hpc_core/hw_cmds.h"
|
||||
#include "shared/source/xe_hpc_core/hw_info.h"
|
||||
|
||||
using Family = NEO::XE_HPC_COREFamily;
|
||||
|
||||
#include "shared/source/command_stream/command_stream_receiver_hw_dg2_and_later.inl"
|
||||
#include "shared/source/command_stream/command_stream_receiver_hw_tgllp_and_later.inl"
|
||||
#include "shared/source/command_stream/command_stream_receiver_hw_xehp_and_later.inl"
|
||||
#include "shared/source/helpers/blit_commands_helper_xehp_and_later.inl"
|
||||
#include "shared/source/helpers/populate_factory.h"
|
||||
#include "shared/source/helpers/state_compute_mode_helper.h"
|
||||
|
||||
namespace NEO {
|
||||
static auto gfxCore = IGFX_XE_HPC_CORE;
|
||||
|
||||
template <>
|
||||
bool ImplicitFlushSettings<Family>::defaultSettingForNewResource = false;
|
||||
template <>
|
||||
bool ImplicitFlushSettings<Family>::defaultSettingForGpuIdle = false;
|
||||
template class ImplicitFlushSettings<Family>;
|
||||
|
||||
template <>
|
||||
void populateFactoryTable<CommandStreamReceiverHw<Family>>() {
|
||||
extern CommandStreamReceiverCreateFunc commandStreamReceiverFactory[2 * IGFX_MAX_CORE];
|
||||
commandStreamReceiverFactory[gfxCore] = DeviceCommandStreamReceiver<Family>::create;
|
||||
}
|
||||
|
||||
template <>
|
||||
void CommandStreamReceiverHw<Family>::programEnginePrologue(LinearStream &csr) {
|
||||
if (!this->isEnginePrologueSent) {
|
||||
if (globalFenceAllocation) {
|
||||
EncodeMemoryFence<Family>::encodeSystemMemoryFence(csr, globalFenceAllocation);
|
||||
}
|
||||
this->isEnginePrologueSent = true;
|
||||
}
|
||||
}
|
||||
|
||||
template <>
|
||||
size_t CommandStreamReceiverHw<Family>::getCmdSizeForPrologue() const {
|
||||
if (!this->isEnginePrologueSent) {
|
||||
if (globalFenceAllocation) {
|
||||
return sizeof(Family::STATE_SYSTEM_MEM_FENCE_ADDRESS);
|
||||
}
|
||||
}
|
||||
return 0u;
|
||||
}
|
||||
|
||||
template <>
|
||||
void BlitCommandsHelper<Family>::appendBlitCommandsForBuffer(const BlitProperties &blitProperites, typename Family::XY_COPY_BLT &blitCmd,
|
||||
const RootDeviceEnvironment &rootDeviceEnvironment) {
|
||||
using MEM_COPY = typename Family::MEM_COPY;
|
||||
|
||||
auto dstAllocation = blitProperites.dstAllocation;
|
||||
auto srcAllocation = blitProperites.srcAllocation;
|
||||
|
||||
if (blitCmd.getDestinationY2CoordinateBottom() > 1) {
|
||||
blitCmd.setCopyType(MEM_COPY::COPY_TYPE::COPY_TYPE_MATRIX_COPY);
|
||||
} else {
|
||||
blitCmd.setCopyType(MEM_COPY::COPY_TYPE::COPY_TYPE_LINEAR_COPY);
|
||||
}
|
||||
|
||||
auto mocsL3enabled = rootDeviceEnvironment.getGmmHelper()->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER);
|
||||
blitCmd.setDestinationMOCS(mocsL3enabled);
|
||||
blitCmd.setSourceMOCS(mocsL3enabled);
|
||||
if (DebugManager.flags.OverrideBlitterMocs.get() != -1) {
|
||||
blitCmd.setDestinationMOCS(DebugManager.flags.OverrideBlitterMocs.get());
|
||||
blitCmd.setSourceMOCS(DebugManager.flags.OverrideBlitterMocs.get());
|
||||
}
|
||||
|
||||
if (dstAllocation->isCompressionEnabled()) {
|
||||
auto resourceFormat = dstAllocation->getDefaultGmm()->gmmResourceInfo->getResourceFormat();
|
||||
auto compressionFormat = rootDeviceEnvironment.getGmmClientContext()->getSurfaceStateCompressionFormat(resourceFormat);
|
||||
blitCmd.setDestinationCompressible(MEM_COPY::DESTINATION_COMPRESSIBLE::DESTINATION_COMPRESSIBLE_COMPRESSIBLE);
|
||||
blitCmd.setCompressionFormat(compressionFormat);
|
||||
}
|
||||
if (srcAllocation->isCompressionEnabled()) {
|
||||
auto resourceFormat = srcAllocation->getDefaultGmm()->gmmResourceInfo->getResourceFormat();
|
||||
auto compressionFormat = rootDeviceEnvironment.getGmmClientContext()->getSurfaceStateCompressionFormat(resourceFormat);
|
||||
blitCmd.setSourceCompressible(MEM_COPY::SOURCE_COMPRESSIBLE::SOURCE_COMPRESSIBLE_COMPRESSIBLE);
|
||||
blitCmd.setCompressionFormat(compressionFormat);
|
||||
}
|
||||
|
||||
if (DebugManager.flags.EnableStatelessCompressionWithUnifiedMemory.get()) {
|
||||
if (!MemoryPool::isSystemMemoryPool(srcAllocation->getMemoryPool())) {
|
||||
blitCmd.setSourceCompressible(MEM_COPY::SOURCE_COMPRESSIBLE::SOURCE_COMPRESSIBLE_COMPRESSIBLE);
|
||||
blitCmd.setCompressionFormat(DebugManager.flags.FormatForStatelessCompressionWithUnifiedMemory.get());
|
||||
}
|
||||
if (!MemoryPool::isSystemMemoryPool(dstAllocation->getMemoryPool())) {
|
||||
blitCmd.setDestinationCompressible(MEM_COPY::DESTINATION_COMPRESSIBLE::DESTINATION_COMPRESSIBLE_COMPRESSIBLE);
|
||||
blitCmd.setCompressionFormat(DebugManager.flags.FormatForStatelessCompressionWithUnifiedMemory.get());
|
||||
}
|
||||
}
|
||||
|
||||
if (blitCmd.getDestinationCompressible() == MEM_COPY::DESTINATION_COMPRESSIBLE::DESTINATION_COMPRESSIBLE_COMPRESSIBLE &&
|
||||
AuxTranslationDirection::AuxToNonAux != blitProperites.auxTranslationDirection) {
|
||||
blitCmd.setDestinationCompressionEnable(MEM_COPY::DESTINATION_COMPRESSION_ENABLE::DESTINATION_COMPRESSION_ENABLE_ENABLE);
|
||||
} else {
|
||||
blitCmd.setDestinationCompressionEnable(MEM_COPY::DESTINATION_COMPRESSION_ENABLE::DESTINATION_COMPRESSION_ENABLE_DISABLE);
|
||||
}
|
||||
|
||||
DEBUG_BREAK_IF((AuxTranslationDirection::None != blitProperites.auxTranslationDirection) &&
|
||||
(dstAllocation != srcAllocation || !dstAllocation->isCompressionEnabled()));
|
||||
}
|
||||
|
||||
template <>
|
||||
template <>
|
||||
void BlitCommandsHelper<Family>::dispatchBlitMemoryFill<1>(NEO::GraphicsAllocation *dstAlloc, uint64_t offset, uint32_t *pattern, LinearStream &linearStream, size_t size, const RootDeviceEnvironment &rootDeviceEnvironment, COLOR_DEPTH depth) {
|
||||
using MEM_SET = typename Family::MEM_SET;
|
||||
auto blitCmd = Family::cmdInitMemSet;
|
||||
|
||||
auto mocsL3enabled = rootDeviceEnvironment.getGmmHelper()->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER);
|
||||
blitCmd.setDestinationMOCS(mocsL3enabled);
|
||||
if (DebugManager.flags.OverrideBlitterMocs.get() != -1) {
|
||||
blitCmd.setDestinationMOCS(DebugManager.flags.OverrideBlitterMocs.get());
|
||||
}
|
||||
|
||||
if (dstAlloc->isCompressionEnabled()) {
|
||||
auto resourceFormat = dstAlloc->getDefaultGmm()->gmmResourceInfo->getResourceFormat();
|
||||
auto compressionFormat = rootDeviceEnvironment.getGmmClientContext()->getSurfaceStateCompressionFormat(resourceFormat);
|
||||
blitCmd.setDestinationCompressible(MEM_SET::DESTINATION_COMPRESSIBLE::DESTINATION_COMPRESSIBLE_COMPRESSIBLE);
|
||||
blitCmd.setCompressionFormat40(compressionFormat);
|
||||
}
|
||||
if (DebugManager.flags.EnableStatelessCompressionWithUnifiedMemory.get()) {
|
||||
if (!MemoryPool::isSystemMemoryPool(dstAlloc->getMemoryPool())) {
|
||||
blitCmd.setDestinationCompressible(MEM_SET::DESTINATION_COMPRESSIBLE::DESTINATION_COMPRESSIBLE_COMPRESSIBLE);
|
||||
blitCmd.setCompressionFormat40(DebugManager.flags.FormatForStatelessCompressionWithUnifiedMemory.get());
|
||||
}
|
||||
}
|
||||
|
||||
if (blitCmd.getDestinationCompressible() == MEM_SET::DESTINATION_COMPRESSIBLE::DESTINATION_COMPRESSIBLE_COMPRESSIBLE) {
|
||||
blitCmd.setDestinationCompressionEnable(MEM_SET::DESTINATION_COMPRESSION_ENABLE::DESTINATION_COMPRESSION_ENABLE_ENABLE);
|
||||
} else {
|
||||
blitCmd.setDestinationCompressionEnable(MEM_SET::DESTINATION_COMPRESSION_ENABLE::DESTINATION_COMPRESSION_ENABLE_DISABLE);
|
||||
}
|
||||
blitCmd.setFillData(*pattern);
|
||||
|
||||
auto sizeToFill = size;
|
||||
while (sizeToFill != 0) {
|
||||
auto tmpCmd = blitCmd;
|
||||
tmpCmd.setDestinationStartAddress(ptrOffset(dstAlloc->getGpuAddress(), static_cast<size_t>(offset)));
|
||||
size_t height = 0;
|
||||
size_t width = 0;
|
||||
if (sizeToFill <= BlitterConstants::maxBlitSetWidth) {
|
||||
width = sizeToFill;
|
||||
height = 1;
|
||||
} else {
|
||||
width = BlitterConstants::maxBlitSetWidth;
|
||||
height = std::min<size_t>((sizeToFill / width), BlitterConstants::maxBlitSetHeight);
|
||||
if (height > 1) {
|
||||
tmpCmd.setFillType(MEM_SET::FILL_TYPE::FILL_TYPE_MATRIX_FILL);
|
||||
}
|
||||
}
|
||||
tmpCmd.setFillWidth(static_cast<uint32_t>(width));
|
||||
tmpCmd.setFillHeight(static_cast<uint32_t>(height));
|
||||
tmpCmd.setDestinationPitch(static_cast<uint32_t>(width));
|
||||
|
||||
auto cmd = linearStream.getSpaceForCmd<MEM_SET>();
|
||||
*cmd = tmpCmd;
|
||||
|
||||
auto blitSize = width * height;
|
||||
offset += blitSize;
|
||||
sizeToFill -= blitSize;
|
||||
}
|
||||
}
|
||||
|
||||
template <>
|
||||
void BlitCommandsHelper<Family>::dispatchBlitCommandsRegion(const BlitProperties &blitProperties, LinearStream &linearStream, const RootDeviceEnvironment &rootDeviceEnvironment) {}
|
||||
|
||||
template <>
|
||||
void BlitCommandsHelper<Family>::appendBlitCommandsForImages(const BlitProperties &blitProperties, typename Family::XY_COPY_BLT &blitCmd, const RootDeviceEnvironment &rootDeviceEnvironment, uint32_t &srcSlicePitch, uint32_t &dstSlicePitch) {}
|
||||
|
||||
template <>
|
||||
void BlitCommandsHelper<Family>::appendTilingType(const GMM_TILE_TYPE srcTilingType, const GMM_TILE_TYPE dstTilingType, typename Family::XY_COPY_BLT &blitCmd) {}
|
||||
|
||||
template <>
|
||||
void BlitCommandsHelper<Family>::appendSliceOffsets(const BlitProperties &blitProperties, typename Family::XY_COPY_BLT &blitCmd, uint32_t sliceIndex, const RootDeviceEnvironment &rootDeviceEnvironment, uint32_t srcSlicePitch, uint32_t dstSlicePitch) {}
|
||||
|
||||
template <>
|
||||
void BlitCommandsHelper<Family>::appendSurfaceType(const BlitProperties &blitProperties, typename Family::XY_COPY_BLT &blitCmd) {
|
||||
}
|
||||
|
||||
template <>
|
||||
void BlitCommandsHelper<Family>::appendColorDepth(const BlitProperties &blitProperites, typename Family::XY_COPY_BLT &blitCmd) {}
|
||||
|
||||
template <>
|
||||
bool BlitCommandsHelper<Family>::useOneBlitCopyCommand(const Vec3<size_t> ©Size, uint32_t bytesPerPixel) {
|
||||
return false;
|
||||
}
|
||||
|
||||
template class CommandStreamReceiverHw<Family>;
|
||||
template struct BlitCommandsHelper<Family>;
|
||||
|
||||
const Family::COMPUTE_WALKER Family::cmdInitGpgpuWalker = Family::COMPUTE_WALKER::sInit();
|
||||
const Family::CFE_STATE Family::cmdInitCfeState = Family::CFE_STATE::sInit();
|
||||
const Family::INTERFACE_DESCRIPTOR_DATA Family::cmdInitInterfaceDescriptorData = Family::INTERFACE_DESCRIPTOR_DATA::sInit();
|
||||
const Family::MI_BATCH_BUFFER_START Family::cmdInitBatchBufferStart = Family::MI_BATCH_BUFFER_START::sInit();
|
||||
const Family::MI_BATCH_BUFFER_END Family::cmdInitBatchBufferEnd = Family::MI_BATCH_BUFFER_END::sInit();
|
||||
const Family::PIPE_CONTROL Family::cmdInitPipeControl = Family::PIPE_CONTROL::sInit();
|
||||
const Family::STATE_COMPUTE_MODE Family::cmdInitStateComputeMode = Family::STATE_COMPUTE_MODE::sInit();
|
||||
const Family::_3DSTATE_BINDING_TABLE_POOL_ALLOC Family::cmdInitStateBindingTablePoolAlloc =
|
||||
Family::_3DSTATE_BINDING_TABLE_POOL_ALLOC::sInit();
|
||||
const Family::MI_SEMAPHORE_WAIT Family::cmdInitMiSemaphoreWait = Family::MI_SEMAPHORE_WAIT::sInit();
|
||||
const Family::RENDER_SURFACE_STATE Family::cmdInitRenderSurfaceState = Family::RENDER_SURFACE_STATE::sInit();
|
||||
const Family::POSTSYNC_DATA Family::cmdInitPostSyncData = Family::POSTSYNC_DATA::sInit();
|
||||
const Family::MI_SET_PREDICATE Family::cmdInitSetPredicate = Family::MI_SET_PREDICATE::sInit();
|
||||
const Family::MI_LOAD_REGISTER_IMM Family::cmdInitLoadRegisterImm = Family::MI_LOAD_REGISTER_IMM::sInit();
|
||||
const Family::MI_LOAD_REGISTER_REG Family::cmdInitLoadRegisterReg = Family::MI_LOAD_REGISTER_REG::sInit();
|
||||
const Family::MI_LOAD_REGISTER_MEM Family::cmdInitLoadRegisterMem = Family::MI_LOAD_REGISTER_MEM::sInit();
|
||||
const Family::MI_STORE_DATA_IMM Family::cmdInitStoreDataImm = Family::MI_STORE_DATA_IMM::sInit();
|
||||
const Family::MI_STORE_REGISTER_MEM Family::cmdInitStoreRegisterMem = Family::MI_STORE_REGISTER_MEM::sInit();
|
||||
const Family::MI_NOOP Family::cmdInitNoop = Family::MI_NOOP::sInit();
|
||||
const Family::MI_REPORT_PERF_COUNT Family::cmdInitReportPerfCount = Family::MI_REPORT_PERF_COUNT::sInit();
|
||||
const Family::MI_ATOMIC Family::cmdInitAtomic = Family::MI_ATOMIC::sInit();
|
||||
const Family::PIPELINE_SELECT Family::cmdInitPipelineSelect = Family::PIPELINE_SELECT::sInit();
|
||||
const Family::MI_ARB_CHECK Family::cmdInitArbCheck = Family::MI_ARB_CHECK::sInit();
|
||||
const Family::STATE_BASE_ADDRESS Family::cmdInitStateBaseAddress = Family::STATE_BASE_ADDRESS::sInit();
|
||||
const Family::MEDIA_SURFACE_STATE Family::cmdInitMediaSurfaceState = Family::MEDIA_SURFACE_STATE::sInit();
|
||||
const Family::SAMPLER_STATE Family::cmdInitSamplerState = Family::SAMPLER_STATE::sInit();
|
||||
const Family::BINDING_TABLE_STATE Family::cmdInitBindingTableState = Family::BINDING_TABLE_STATE::sInit();
|
||||
const Family::MI_USER_INTERRUPT Family::cmdInitUserInterrupt = Family::MI_USER_INTERRUPT::sInit();
|
||||
const Family::MI_CONDITIONAL_BATCH_BUFFER_END cmdInitConditionalBatchBufferEnd = Family::MI_CONDITIONAL_BATCH_BUFFER_END::sInit();
|
||||
const Family::MI_FLUSH_DW Family::cmdInitMiFlushDw = Family::MI_FLUSH_DW::sInit();
|
||||
const Family::MEM_COPY Family::cmdInitXyCopyBlt = Family::MEM_COPY::sInit();
|
||||
const Family::XY_FAST_COLOR_BLT Family::cmdInitXyColorBlt = Family::XY_FAST_COLOR_BLT::sInit();
|
||||
const Family::STATE_PREFETCH Family::cmdInitStatePrefetch = Family::STATE_PREFETCH::sInit();
|
||||
const Family::_3DSTATE_BTD Family::cmd3dStateBtd = Family::_3DSTATE_BTD::sInit();
|
||||
const Family::_3DSTATE_BTD_BODY Family::cmd3dStateBtdBody = Family::_3DSTATE_BTD_BODY::sInit();
|
||||
const Family::MI_MEM_FENCE Family::cmdInitMemFence = Family::MI_MEM_FENCE::sInit();
|
||||
const Family::MEM_SET Family::cmdInitMemSet = Family::MEM_SET::sInit();
|
||||
const Family::STATE_SIP Family::cmdInitStateSip = Family::STATE_SIP::sInit();
|
||||
const Family::STATE_SYSTEM_MEM_FENCE_ADDRESS Family::cmdInitStateSystemMemFenceAddress = Family::STATE_SYSTEM_MEM_FENCE_ADDRESS::sInit();
|
||||
} // namespace NEO
|
|
@ -0,0 +1,36 @@
|
|||
/*
|
||||
* Copyright (C) 2021 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#include "shared/source/command_stream/command_stream_receiver_simulated_common_hw_xehp_and_later.inl"
|
||||
|
||||
namespace NEO {
|
||||
using Family = XE_HPC_COREFamily;
|
||||
|
||||
template <>
|
||||
bool CommandStreamReceiverSimulatedCommonHw<Family>::expectMemoryCompressed(void *gfxAddress, const void *srcAddress, size_t length) {
|
||||
auto format = static_cast<uint32_t>(DebugManager.flags.FormatForStatelessCompressionWithUnifiedMemory.get());
|
||||
UNRECOVERABLE_IF(format > 0x1F);
|
||||
|
||||
uint32_t value = (format << 3); // [3:7] compression_format
|
||||
value |= 0; // [0] disable
|
||||
this->writeMMIO(0x519C, value);
|
||||
this->writeMMIO(0xB0F0, value);
|
||||
this->writeMMIO(0xE4C0, value);
|
||||
|
||||
bool ret = this->expectMemory(gfxAddress, srcAddress, length,
|
||||
AubMemDump::CmdServicesMemTraceMemoryCompare::CompareOperationValues::CompareNotEqual);
|
||||
|
||||
value |= 1; // [0] enable
|
||||
this->writeMMIO(0x519C, value);
|
||||
this->writeMMIO(0xB0F0, value);
|
||||
this->writeMMIO(0xE4C0, value);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
template class CommandStreamReceiverSimulatedCommonHw<Family>;
|
||||
} // namespace NEO
|
|
@ -0,0 +1,11 @@
|
|||
/*
|
||||
* Copyright (C) 2021 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
template <>
|
||||
bool CompilerHwInfoConfigHw<IGFX_PVC>::isForceToStatelessRequired() const {
|
||||
return true;
|
||||
}
|
|
@ -0,0 +1,22 @@
|
|||
/*
|
||||
* Copyright (C) 2021 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#include "shared/source/command_stream/device_command_stream.h"
|
||||
|
||||
#include "create_command_stream_receiver.inl"
|
||||
|
||||
namespace NEO {
|
||||
|
||||
template <>
|
||||
CommandStreamReceiver *createDeviceCommandStreamReceiver<XE_HPC_COREFamily>(bool withAubDump,
|
||||
ExecutionEnvironment &executionEnvironment,
|
||||
uint32_t rootDeviceIndex,
|
||||
const DeviceBitfield deviceBitfield) {
|
||||
return createCommandStreamReceiver<XE_HPC_COREFamily>(withAubDump, executionEnvironment, rootDeviceIndex, deviceBitfield);
|
||||
}
|
||||
|
||||
} // namespace NEO
|
|
@ -0,0 +1,6 @@
|
|||
/*
|
||||
* Copyright (C) 2021 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
|
@ -0,0 +1,20 @@
|
|||
/*
|
||||
* Copyright (C) 2021 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
const HardwareInfo PVC::hwInfo = PVC_CONFIG::hwInfo;
|
||||
const uint64_t PVC::defaultHardwareInfoConfig = 0;
|
||||
|
||||
void setupPVCHardwareInfoImpl(HardwareInfo *hwInfo, bool setupFeatureTableAndWorkaroundTable, uint64_t hwInfoConfig) {
|
||||
if (hwInfoConfig == 0x0) {
|
||||
// Default config
|
||||
PVC_CONFIG::setupHardwareInfo(hwInfo, setupFeatureTableAndWorkaroundTable);
|
||||
} else {
|
||||
UNRECOVERABLE_IF(true);
|
||||
}
|
||||
}
|
||||
|
||||
void (*PVC::setupHardwareInfo)(HardwareInfo *, bool, const uint64_t) = setupPVCHardwareInfoImpl;
|
|
@ -0,0 +1,25 @@
|
|||
/*
|
||||
* Copyright (C) 2021 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#include "shared/source/helpers/hw_helper.h"
|
||||
#include "shared/source/xe_hpc_core/hw_cmds.h"
|
||||
|
||||
namespace NEO {
|
||||
|
||||
extern HwHelper *hwHelperFactory[IGFX_MAX_CORE];
|
||||
|
||||
using Family = XE_HPC_COREFamily;
|
||||
static auto gfxFamily = IGFX_XE_HPC_CORE;
|
||||
|
||||
struct EnableCoreXeHpcCore {
|
||||
EnableCoreXeHpcCore() {
|
||||
hwHelperFactory[gfxFamily] = &HwHelperHw<Family>::get();
|
||||
}
|
||||
};
|
||||
|
||||
static EnableCoreXeHpcCore enable;
|
||||
} // namespace NEO
|
|
@ -0,0 +1,15 @@
|
|||
/*
|
||||
* Copyright (C) 2021 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#include "shared/source/os_interface/hw_info_config.h"
|
||||
#include "shared/source/xe_hpc_core/hw_cmds.h"
|
||||
|
||||
namespace NEO {
|
||||
|
||||
static EnableProductHwInfoConfig<IGFX_PVC> enablePVC;
|
||||
|
||||
} // namespace NEO
|
|
@ -0,0 +1,21 @@
|
|||
/*
|
||||
* Copyright (C) 2021 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#include "shared/source/helpers/compiler_hw_info_config_base.inl"
|
||||
#include "shared/source/helpers/compiler_hw_info_config_bdw_and_later.inl"
|
||||
#include "shared/source/helpers/enable_product.inl"
|
||||
#include "shared/source/os_interface/hw_info_config.h"
|
||||
#include "shared/source/xe_hpc_core/hw_cmds.h"
|
||||
|
||||
namespace NEO {
|
||||
|
||||
#ifdef SUPPORT_PVC
|
||||
static EnableGfxProductHw<IGFX_PVC> enableGfxProductHwPVC;
|
||||
static EnableCompilerHwInfoConfig<IGFX_PVC> enableCompilerHwInfoConfigPVC;
|
||||
#endif
|
||||
|
||||
} // namespace NEO
|
|
@ -0,0 +1,24 @@
|
|||
/*
|
||||
* Copyright (C) 2021 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#include "shared/source/command_stream/experimental_command_buffer.inl"
|
||||
|
||||
namespace NEO {
|
||||
using GfxFamily = XE_HPC_COREFamily;
|
||||
|
||||
template void ExperimentalCommandBuffer::injectBufferStart<GfxFamily>(LinearStream &parentStream, size_t cmdBufferOffset);
|
||||
template size_t ExperimentalCommandBuffer::getRequiredInjectionSize<GfxFamily>() noexcept;
|
||||
|
||||
template size_t ExperimentalCommandBuffer::programExperimentalCommandBuffer<GfxFamily>();
|
||||
template size_t ExperimentalCommandBuffer::getTotalExperimentalSize<GfxFamily>() noexcept;
|
||||
|
||||
template void ExperimentalCommandBuffer::addTimeStampPipeControl<GfxFamily>();
|
||||
template size_t ExperimentalCommandBuffer::getTimeStampPipeControlSize<GfxFamily>() noexcept;
|
||||
|
||||
template void ExperimentalCommandBuffer::addExperimentalCommands<GfxFamily>();
|
||||
template size_t ExperimentalCommandBuffer::getExperimentalCommandsSize<GfxFamily>() noexcept;
|
||||
} // namespace NEO
|
|
@ -0,0 +1,11 @@
|
|||
/*
|
||||
* Copyright (C) 2021 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
#ifdef SUPPORT_PVC
|
||||
#include "hw_cmds_pvc.h"
|
||||
#endif
|
|
@ -0,0 +1,130 @@
|
|||
/*
|
||||
* Copyright (C) 2021 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
#include "shared/source/commands/bxml_generator_glue.h"
|
||||
#include "shared/source/helpers/debug_helpers.h"
|
||||
#include "shared/source/xe_hpc_core/hw_info.h"
|
||||
|
||||
#include "igfxfmid.h"
|
||||
|
||||
#include <cstddef>
|
||||
#include <type_traits>
|
||||
|
||||
template <class T>
|
||||
struct CmdParse;
|
||||
namespace NEO {
|
||||
|
||||
struct XE_HPC_CORE {
|
||||
#include "shared/source/generated/xe_hpc_core/hw_cmds_generated_xe_hpc_core.inl"
|
||||
|
||||
static constexpr uint32_t stateComputeModeForceNonCoherentMask = (0b11u << 3);
|
||||
static constexpr uint32_t stateComputeModeEuThreadSchedulingModeOverrideMask = (0b11u << 13);
|
||||
static constexpr uint32_t stateComputeModeLargeGrfModeMask = (1u << 15);
|
||||
|
||||
static constexpr uint8_t pvcBaseDieRevMask = 0b111000; // [3:5]
|
||||
static constexpr uint8_t pvcBaseDieA0Masked = 0; // [3:5] == 0
|
||||
static constexpr uint32_t pvcSteppingBits = 0b111;
|
||||
static constexpr uint32_t pvcXlDeviceId = 0x0BD0;
|
||||
static constexpr uint32_t pvcXtDeviceIds[5] = {0x0BD5, 0x0BD6, 0x0BD7, 0x0BD8, 0x0BE1};
|
||||
static constexpr uint32_t pvcXtTemporaryDeviceId = 0x0BE5;
|
||||
|
||||
static constexpr bool isUsingL3Control = false;
|
||||
|
||||
static bool isXlA0(const HardwareInfo &hwInfo) {
|
||||
auto revId = hwInfo.platform.usRevId & pvcSteppingBits;
|
||||
return (revId < 0x3) && !isXtTemporary(hwInfo);
|
||||
}
|
||||
|
||||
static bool isAtMostXtA0(const HardwareInfo &hwInfo) {
|
||||
auto revId = hwInfo.platform.usRevId & pvcSteppingBits;
|
||||
return (revId <= 0x3) && !isXtTemporary(hwInfo);
|
||||
}
|
||||
|
||||
static bool isXtTemporary(const HardwareInfo &hwInfo) {
|
||||
return hwInfo.platform.usDeviceID == pvcXtTemporaryDeviceId;
|
||||
}
|
||||
|
||||
struct DataPortBindlessSurfaceExtendedMessageDescriptor {
|
||||
union {
|
||||
struct {
|
||||
uint32_t bindlessSurfaceOffset : 25;
|
||||
uint32_t reserved : 6;
|
||||
};
|
||||
uint32_t packed;
|
||||
};
|
||||
|
||||
DataPortBindlessSurfaceExtendedMessageDescriptor() {
|
||||
packed = 0;
|
||||
}
|
||||
|
||||
void setBindlessSurfaceOffset(uint32_t offsetInBindlessSurfaceHeapInBytes) {
|
||||
bindlessSurfaceOffset = offsetInBindlessSurfaceHeapInBytes >> 6;
|
||||
}
|
||||
|
||||
uint32_t getBindlessSurfaceOffsetToPatch() {
|
||||
return bindlessSurfaceOffset << 6;
|
||||
}
|
||||
};
|
||||
|
||||
static_assert(sizeof(DataPortBindlessSurfaceExtendedMessageDescriptor) == sizeof(DataPortBindlessSurfaceExtendedMessageDescriptor::packed), "");
|
||||
};
|
||||
|
||||
struct XE_HPC_COREFamily : public XE_HPC_CORE {
|
||||
using PARSE = CmdParse<XE_HPC_COREFamily>;
|
||||
using GfxFamily = XE_HPC_COREFamily;
|
||||
using WALKER_TYPE = COMPUTE_WALKER;
|
||||
using VFE_STATE_TYPE = CFE_STATE;
|
||||
using XY_COPY_BLT = typename GfxFamily::MEM_COPY;
|
||||
using XY_COLOR_BLT = typename GfxFamily::XY_FAST_COLOR_BLT;
|
||||
using MI_STORE_REGISTER_MEM_CMD = typename GfxFamily::MI_STORE_REGISTER_MEM;
|
||||
using TimestampPacketType = uint32_t;
|
||||
static const COMPUTE_WALKER cmdInitGpgpuWalker;
|
||||
static const CFE_STATE cmdInitCfeState;
|
||||
static const INTERFACE_DESCRIPTOR_DATA cmdInitInterfaceDescriptorData;
|
||||
static const MI_BATCH_BUFFER_END cmdInitBatchBufferEnd;
|
||||
static const MI_BATCH_BUFFER_START cmdInitBatchBufferStart;
|
||||
static const PIPE_CONTROL cmdInitPipeControl;
|
||||
static const STATE_COMPUTE_MODE cmdInitStateComputeMode;
|
||||
static const _3DSTATE_BINDING_TABLE_POOL_ALLOC cmdInitStateBindingTablePoolAlloc;
|
||||
static const MI_SEMAPHORE_WAIT cmdInitMiSemaphoreWait;
|
||||
static const RENDER_SURFACE_STATE cmdInitRenderSurfaceState;
|
||||
static const POSTSYNC_DATA cmdInitPostSyncData;
|
||||
static const MI_SET_PREDICATE cmdInitSetPredicate;
|
||||
static const MI_LOAD_REGISTER_IMM cmdInitLoadRegisterImm;
|
||||
static const MI_LOAD_REGISTER_REG cmdInitLoadRegisterReg;
|
||||
static const MI_LOAD_REGISTER_MEM cmdInitLoadRegisterMem;
|
||||
static const MI_STORE_DATA_IMM cmdInitStoreDataImm;
|
||||
static const MI_STORE_REGISTER_MEM cmdInitStoreRegisterMem;
|
||||
static const MI_NOOP cmdInitNoop;
|
||||
static const MI_REPORT_PERF_COUNT cmdInitReportPerfCount;
|
||||
static const MI_ATOMIC cmdInitAtomic;
|
||||
static const PIPELINE_SELECT cmdInitPipelineSelect;
|
||||
static const MI_ARB_CHECK cmdInitArbCheck;
|
||||
static const STATE_BASE_ADDRESS cmdInitStateBaseAddress;
|
||||
static const MEDIA_SURFACE_STATE cmdInitMediaSurfaceState;
|
||||
static const SAMPLER_STATE cmdInitSamplerState;
|
||||
static const BINDING_TABLE_STATE cmdInitBindingTableState;
|
||||
static const MI_USER_INTERRUPT cmdInitUserInterrupt;
|
||||
static const MI_CONDITIONAL_BATCH_BUFFER_END cmdInitConditionalBatchBufferEnd;
|
||||
static const MI_FLUSH_DW cmdInitMiFlushDw;
|
||||
static const MEM_COPY cmdInitXyCopyBlt;
|
||||
static const XY_FAST_COLOR_BLT cmdInitXyColorBlt;
|
||||
static const STATE_PREFETCH cmdInitStatePrefetch;
|
||||
static const _3DSTATE_BTD cmd3dStateBtd;
|
||||
static const _3DSTATE_BTD_BODY cmd3dStateBtdBody;
|
||||
static const MI_MEM_FENCE cmdInitMemFence;
|
||||
static const MEM_SET cmdInitMemSet;
|
||||
static const STATE_SIP cmdInitStateSip;
|
||||
static const STATE_SYSTEM_MEM_FENCE_ADDRESS cmdInitStateSystemMemFenceAddress;
|
||||
|
||||
static constexpr bool supportsCmdSet(GFXCORE_FAMILY cmdSetBaseFamily) {
|
||||
return cmdSetBaseFamily == IGFX_XE_HP_CORE;
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace NEO
|
|
@ -0,0 +1,42 @@
|
|||
/*
|
||||
* Copyright (C) 2021 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
#include "shared/source/xe_hpc_core/hw_cmds_base.h"
|
||||
namespace NEO {
|
||||
|
||||
struct PVC : public XE_HPC_COREFamily {
|
||||
static const PLATFORM platform;
|
||||
static const HardwareInfo hwInfo;
|
||||
static const uint64_t defaultHardwareInfoConfig;
|
||||
static FeatureTable featureTable;
|
||||
static WorkaroundTable workaroundTable;
|
||||
// Initial non-zero values for unit tests
|
||||
static const uint32_t threadsPerEu = 8;
|
||||
static const uint32_t maxEuPerSubslice = 8;
|
||||
static const uint32_t maxSlicesSupported = 8;
|
||||
static const uint32_t maxSubslicesSupported = 64;
|
||||
static const uint32_t maxDualSubslicesSupported = 64;
|
||||
static const RuntimeCapabilityTable capabilityTable;
|
||||
static void (*setupHardwareInfo)(HardwareInfo *hwInfo, bool setupFeatureTableAndWorkaroundTable, uint64_t hwInfoConfig);
|
||||
static void setupFeatureAndWorkaroundTable(HardwareInfo *hwInfo);
|
||||
static void setupHardwareInfoBase(HardwareInfo *hwInfo, bool setupFeatureTableAndWorkaroundTable, bool setupMultiTile);
|
||||
static void adjustHardwareInfo(HardwareInfo *hwInfo);
|
||||
};
|
||||
|
||||
class PVC_CONFIG : public PVC {
|
||||
public:
|
||||
static void setupHardwareInfo(HardwareInfo *hwInfo, bool setupFeatureTableAndWorkaroundTable);
|
||||
static void setupHardwareInfoMultiTile(HardwareInfo *hwInfo, bool setupFeatureTableAndWorkaroundTable, bool setupMultiTile);
|
||||
static const HardwareInfo hwInfo;
|
||||
|
||||
private:
|
||||
static GT_SYSTEM_INFO gtSystemInfo;
|
||||
};
|
||||
|
||||
#include "hw_cmds_pvc.inl"
|
||||
} // namespace NEO
|
|
@ -0,0 +1,434 @@
|
|||
/*
|
||||
* Copyright (C) 2021 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#include "shared/source/xe_hpc_core/aub_mapper.h"
|
||||
#include "shared/source/xe_hpc_core/hw_cmds.h"
|
||||
|
||||
using Family = NEO::XE_HPC_COREFamily;
|
||||
|
||||
#include "shared/source/command_container/command_encoder.h"
|
||||
#include "shared/source/debug_settings/debug_settings_manager.h"
|
||||
#include "shared/source/helpers/flat_batch_buffer_helper_hw.inl"
|
||||
#include "shared/source/helpers/hw_helper_base.inl"
|
||||
#include "shared/source/helpers/hw_helper_dg2_and_later.inl"
|
||||
#include "shared/source/helpers/hw_helper_tgllp_and_later.inl"
|
||||
#include "shared/source/helpers/hw_helper_xehp_and_later.inl"
|
||||
|
||||
namespace NEO {
|
||||
|
||||
template <>
|
||||
const AuxTranslationMode HwHelperHw<Family>::defaultAuxTranslationMode = AuxTranslationMode::Blit;
|
||||
|
||||
template <>
|
||||
bool HwHelperHw<Family>::isCooperativeEngineSupported(const HardwareInfo &hwInfo) const {
|
||||
return (HwInfoConfig::get(hwInfo.platform.eProductFamily)->getSteppingFromHwRevId(hwInfo) >= REVISION_B);
|
||||
}
|
||||
|
||||
template <>
|
||||
const EngineInstancesContainer HwHelperHw<Family>::getGpgpuEngineInstances(const HardwareInfo &hwInfo) const {
|
||||
auto defaultEngine = getChosenEngineType(hwInfo);
|
||||
|
||||
EngineInstancesContainer engines;
|
||||
|
||||
if (hwInfo.featureTable.flags.ftrCCSNode) {
|
||||
for (uint32_t i = 0; i < hwInfo.gtSystemInfo.CCSInfo.NumberOfCCSEnabled; i++) {
|
||||
engines.push_back({static_cast<aub_stream::EngineType>(i + aub_stream::ENGINE_CCS), EngineUsage::Regular});
|
||||
if (isCooperativeEngineSupported(hwInfo)) {
|
||||
engines.push_back({static_cast<aub_stream::EngineType>(i + aub_stream::ENGINE_CCS), EngineUsage::Cooperative});
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if ((DebugManager.flags.NodeOrdinal.get() == static_cast<int32_t>(aub_stream::EngineType::ENGINE_CCCS)) ||
|
||||
hwInfo.featureTable.flags.ftrRcsNode) {
|
||||
engines.push_back({aub_stream::ENGINE_CCCS, EngineUsage::Regular});
|
||||
}
|
||||
|
||||
engines.push_back({defaultEngine, EngineUsage::LowPriority});
|
||||
engines.push_back({defaultEngine, EngineUsage::Internal});
|
||||
|
||||
if (hwInfo.capabilityTable.blitterOperationsSupported) {
|
||||
if (hwInfo.featureTable.ftrBcsInfo.test(0)) {
|
||||
engines.push_back({aub_stream::EngineType::ENGINE_BCS, EngineUsage::Regular}); // Main copy engine
|
||||
engines.push_back({aub_stream::EngineType::ENGINE_BCS, EngineUsage::Internal}); // internal usage
|
||||
}
|
||||
|
||||
for (uint32_t i = 1; i < hwInfo.featureTable.ftrBcsInfo.size(); i++) {
|
||||
if (hwInfo.featureTable.ftrBcsInfo.test(i)) {
|
||||
auto engineType = static_cast<aub_stream::EngineType>((i - 1) + aub_stream::ENGINE_BCS1); // Link copy engine
|
||||
engines.push_back({engineType, EngineUsage::Regular});
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return engines;
|
||||
};
|
||||
|
||||
template <>
|
||||
EngineGroupType HwHelperHw<Family>::getEngineGroupType(aub_stream::EngineType engineType, EngineUsage engineUsage, const HardwareInfo &hwInfo) const {
|
||||
if (engineType == aub_stream::ENGINE_CCCS) {
|
||||
return EngineGroupType::RenderCompute;
|
||||
}
|
||||
if (engineType >= aub_stream::ENGINE_CCS && engineType < (aub_stream::ENGINE_CCS + hwInfo.gtSystemInfo.CCSInfo.NumberOfCCSEnabled)) {
|
||||
if (engineUsage == EngineUsage::Cooperative) {
|
||||
return EngineGroupType::CooperativeCompute;
|
||||
}
|
||||
return EngineGroupType::Compute;
|
||||
}
|
||||
if (engineType == aub_stream::ENGINE_BCS) {
|
||||
return EngineGroupType::Copy;
|
||||
}
|
||||
if (engineType >= aub_stream::ENGINE_BCS1 && engineType < aub_stream::ENGINE_BCS1 + hwInfo.featureTable.ftrBcsInfo.size() - 1) {
|
||||
return EngineGroupType::LinkedCopy;
|
||||
}
|
||||
UNRECOVERABLE_IF(true);
|
||||
}
|
||||
|
||||
template <>
|
||||
void HwHelperHw<Family>::adjustDefaultEngineType(HardwareInfo *pHwInfo) {
|
||||
if (!pHwInfo->featureTable.flags.ftrCCSNode) {
|
||||
pHwInfo->capabilityTable.defaultEngineType = aub_stream::EngineType::ENGINE_CCCS;
|
||||
}
|
||||
}
|
||||
|
||||
template <>
|
||||
bool HwHelperHw<Family>::tilingAllowed(bool isSharedContext, bool isImage1d, bool forceLinearStorage) {
|
||||
return false;
|
||||
}
|
||||
|
||||
template <>
|
||||
uint32_t HwHelperHw<Family>::getBarriersCountFromHasBarriers(uint32_t hasBarriers) {
|
||||
static constexpr uint32_t possibleBarriersCounts[] = {
|
||||
0u, // 0
|
||||
1u, // 1
|
||||
2u, // 2
|
||||
4u, // 3
|
||||
8u, // 4
|
||||
16u, // 5
|
||||
24u, // 6
|
||||
32u, // 7
|
||||
};
|
||||
return possibleBarriersCounts[hasBarriers];
|
||||
}
|
||||
|
||||
template <>
|
||||
uint32_t HwHelperHw<Family>::calculateAvailableThreadCount(PRODUCT_FAMILY family, uint32_t grfCount, uint32_t euCount,
|
||||
uint32_t threadsPerEu) {
|
||||
auto maxThreadsPerEuCount = 1024u / grfCount;
|
||||
return maxThreadsPerEuCount * euCount;
|
||||
}
|
||||
|
||||
template <>
|
||||
uint32_t HwHelperHw<Family>::getMetricsLibraryGenId() const {
|
||||
return static_cast<uint32_t>(MetricsLibraryApi::ClientGen::XeHPC);
|
||||
}
|
||||
|
||||
template <>
|
||||
uint32_t HwHelperHw<Family>::getMinimalSIMDSize() {
|
||||
return 16u;
|
||||
}
|
||||
|
||||
template <>
|
||||
bool HwHelperHw<Family>::isFenceAllocationRequired(const HardwareInfo &hwInfo) const {
|
||||
if ((DebugManager.flags.ProgramGlobalFenceAsMiMemFenceCommandInCommandStream.get() == 0) &&
|
||||
(DebugManager.flags.ProgramGlobalFenceAsPostSyncOperationInComputeWalker.get() == 0) &&
|
||||
(DebugManager.flags.ProgramGlobalFenceAsKernelInstructionInEUKernel.get() == 0)) {
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
template <>
|
||||
uint32_t HwHelperHw<Family>::getMocsIndex(const GmmHelper &gmmHelper, bool l3enabled, bool l1enabled) const {
|
||||
if (l3enabled) {
|
||||
return gmmHelper.getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER) >> 1;
|
||||
}
|
||||
return gmmHelper.getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER_CACHELINE_MISALIGNED) >> 1;
|
||||
}
|
||||
|
||||
template <>
|
||||
const StackVec<size_t, 3> HwHelperHw<Family>::getDeviceSubGroupSizes() const {
|
||||
return {16, 32};
|
||||
}
|
||||
|
||||
template <>
|
||||
const StackVec<uint32_t, 6> HwHelperHw<Family>::getThreadsPerEUConfigs() const {
|
||||
return {4, 8};
|
||||
}
|
||||
|
||||
template <>
|
||||
uint32_t HwHelperHw<Family>::getMaxNumSamplers() const {
|
||||
return 0;
|
||||
}
|
||||
|
||||
template <>
|
||||
size_t HwHelperHw<Family>::getPaddingForISAAllocation() const {
|
||||
return 3584;
|
||||
}
|
||||
|
||||
template <>
|
||||
size_t MemorySynchronizationCommands<Family>::getSizeForSingleAdditionalSynchronization(const HardwareInfo &hwInfo) {
|
||||
auto programGlobalFenceAsMiMemFenceCommandInCommandStream = !Family::isXlA0(hwInfo);
|
||||
if (DebugManager.flags.ProgramGlobalFenceAsMiMemFenceCommandInCommandStream.get() != -1) {
|
||||
programGlobalFenceAsMiMemFenceCommandInCommandStream = !!DebugManager.flags.ProgramGlobalFenceAsMiMemFenceCommandInCommandStream.get();
|
||||
}
|
||||
|
||||
if (programGlobalFenceAsMiMemFenceCommandInCommandStream) {
|
||||
return sizeof(Family::MI_MEM_FENCE);
|
||||
} else {
|
||||
return EncodeSempahore<Family>::getSizeMiSemaphoreWait();
|
||||
}
|
||||
}
|
||||
|
||||
template <>
|
||||
void MemorySynchronizationCommands<Family>::setAdditionalSynchronization(void *&commandsBuffer, uint64_t gpuAddress, const HardwareInfo &hwInfo) {
|
||||
using MI_MEM_FENCE = typename Family::MI_MEM_FENCE;
|
||||
using MI_SEMAPHORE_WAIT = typename Family::MI_SEMAPHORE_WAIT;
|
||||
auto programGlobalFenceAsMiMemFenceCommandInCommandStream = !Family::isXlA0(hwInfo);
|
||||
if (DebugManager.flags.ProgramGlobalFenceAsMiMemFenceCommandInCommandStream.get() != -1) {
|
||||
programGlobalFenceAsMiMemFenceCommandInCommandStream = !!DebugManager.flags.ProgramGlobalFenceAsMiMemFenceCommandInCommandStream.get();
|
||||
}
|
||||
if (programGlobalFenceAsMiMemFenceCommandInCommandStream) {
|
||||
MI_MEM_FENCE miMemFence = Family::cmdInitMemFence;
|
||||
miMemFence.setFenceType(Family::MI_MEM_FENCE::FENCE_TYPE::FENCE_TYPE_RELEASE);
|
||||
*reinterpret_cast<MI_MEM_FENCE *>(commandsBuffer) = miMemFence;
|
||||
commandsBuffer = ptrOffset(commandsBuffer, sizeof(MI_MEM_FENCE));
|
||||
} else {
|
||||
EncodeSempahore<Family>::programMiSemaphoreWait(reinterpret_cast<MI_SEMAPHORE_WAIT *>(commandsBuffer),
|
||||
gpuAddress,
|
||||
EncodeSempahore<Family>::invalidHardwareTag,
|
||||
MI_SEMAPHORE_WAIT::COMPARE_OPERATION::COMPARE_OPERATION_SAD_NOT_EQUAL_SDD,
|
||||
false);
|
||||
commandsBuffer = ptrOffset(commandsBuffer, EncodeSempahore<Family>::getSizeMiSemaphoreWait());
|
||||
}
|
||||
}
|
||||
|
||||
template <>
|
||||
bool MemorySynchronizationCommands<Family>::isPipeControlWArequired(const HardwareInfo &hwInfo) {
|
||||
if (DebugManager.flags.DisablePipeControlPrecedingPostSyncCommand.get() == 1) {
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
template <>
|
||||
bool MemorySynchronizationCommands<Family>::isDcFlushAllowed() {
|
||||
return false;
|
||||
}
|
||||
|
||||
template <>
|
||||
size_t MemorySynchronizationCommands<Family>::getSizeForAdditonalSynchronization(const HardwareInfo &hwInfo) {
|
||||
return (DebugManager.flags.DisablePipeControlPrecedingPostSyncCommand.get() == 1 ? 2 : 1) * getSizeForSingleAdditionalSynchronization(hwInfo);
|
||||
}
|
||||
|
||||
template <>
|
||||
void HwHelperHw<Family>::setL1CachePolicy(bool useL1Cache, typename Family::RENDER_SURFACE_STATE *surfaceState, const HardwareInfo *hwInfo) {
|
||||
if (useL1Cache) {
|
||||
surfaceState->setL1CachePolicyL1CacheControl(Family::RENDER_SURFACE_STATE::L1_CACHE_POLICY_WB);
|
||||
if (DebugManager.flags.OverrideL1CacheControlInSurfaceStateForScratchSpace.get() != -1) {
|
||||
surfaceState->setL1CachePolicyL1CacheControl(static_cast<typename Family::RENDER_SURFACE_STATE::L1_CACHE_POLICY>(DebugManager.flags.OverrideL1CacheControlInSurfaceStateForScratchSpace.get()));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template <>
|
||||
void HwHelperHw<Family>::setExtraAllocationData(AllocationData &allocationData, const AllocationProperties &properties, const HardwareInfo &hwInfo) const {
|
||||
if (properties.allocationType == GraphicsAllocation::AllocationType::TIMESTAMP_PACKET_TAG_BUFFER || properties.allocationType == GraphicsAllocation::AllocationType::COMMAND_BUFFER) {
|
||||
allocationData.flags.useSystemMemory = false;
|
||||
}
|
||||
if (properties.allocationType == GraphicsAllocation::AllocationType::COMMAND_BUFFER && properties.subDevicesBitfield.count() == 1) {
|
||||
allocationData.flags.useSystemMemory = true;
|
||||
}
|
||||
allocationData.cacheRegion = properties.cacheRegion;
|
||||
|
||||
if (allocationData.flags.requiresCpuAccess && !allocationData.flags.useSystemMemory &&
|
||||
(allocationData.storageInfo.getMemoryBanks() > 1)) {
|
||||
|
||||
bool bdA0 = ((hwInfo.platform.usRevId & Family::pvcBaseDieRevMask) == Family::pvcBaseDieA0Masked);
|
||||
bool applyWa = ((DebugManager.flags.ForceTile0PlacementForTile1ResourcesWaActive.get() == 1) || bdA0);
|
||||
applyWa &= (DebugManager.flags.ForceTile0PlacementForTile1ResourcesWaActive.get() != 0);
|
||||
|
||||
if (applyWa) {
|
||||
allocationData.storageInfo.memoryBanks = 1; // force Tile0
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template <>
|
||||
uint32_t HwHelperHw<Family>::getNumCacheRegions() const {
|
||||
if (DebugManager.flags.ClosEnabled.get() == 0) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
constexpr uint32_t numSharedCacheRegions = 1;
|
||||
constexpr uint32_t numReservedCacheRegions = 2;
|
||||
constexpr uint32_t numTotalCacheRegions = numSharedCacheRegions + numReservedCacheRegions;
|
||||
return numTotalCacheRegions;
|
||||
}
|
||||
|
||||
template <>
|
||||
std::string HwHelperHw<Family>::getExtensions() const {
|
||||
std::string extensions;
|
||||
|
||||
extensions += "cl_intel_create_buffer_with_properties ";
|
||||
extensions += "cl_intel_dot_accumulate ";
|
||||
extensions += "cl_intel_global_float_atomics ";
|
||||
extensions += "cl_intel_subgroup_local_block_io ";
|
||||
extensions += "cl_intel_subgroup_matrix_multiply_accumulate_for_PVC ";
|
||||
extensions += "cl_khr_subgroup_named_barrier ";
|
||||
extensions += "cl_intel_subgroup_extended_block_read ";
|
||||
|
||||
return extensions;
|
||||
}
|
||||
|
||||
template <>
|
||||
uint32_t HwHelperHw<Family>::alignSlmSize(uint32_t slmSize) {
|
||||
const uint32_t alignedSlmSizes[] = {
|
||||
0u,
|
||||
1u * KB,
|
||||
2u * KB,
|
||||
4u * KB,
|
||||
8u * KB,
|
||||
16u * KB,
|
||||
24u * KB,
|
||||
32u * KB,
|
||||
48u * KB,
|
||||
64u * KB,
|
||||
96u * KB,
|
||||
128u * KB,
|
||||
};
|
||||
|
||||
for (auto &alignedSlmSize : alignedSlmSizes) {
|
||||
if (slmSize <= alignedSlmSize) {
|
||||
return alignedSlmSize;
|
||||
}
|
||||
}
|
||||
|
||||
UNRECOVERABLE_IF(true);
|
||||
return 0;
|
||||
}
|
||||
|
||||
template <>
|
||||
uint32_t HwHelperHw<Family>::computeSlmValues(const HardwareInfo &hwInfo, uint32_t slmSize) {
|
||||
using SHARED_LOCAL_MEMORY_SIZE = typename Family::INTERFACE_DESCRIPTOR_DATA::SHARED_LOCAL_MEMORY_SIZE;
|
||||
if (slmSize == 0u) {
|
||||
return SHARED_LOCAL_MEMORY_SIZE::SHARED_LOCAL_MEMORY_SIZE_ENCODES_0K;
|
||||
}
|
||||
|
||||
UNRECOVERABLE_IF(slmSize > 128u * KB);
|
||||
|
||||
if (slmSize > 96u * KB) {
|
||||
return SHARED_LOCAL_MEMORY_SIZE::SHARED_LOCAL_MEMORY_SIZE_ENCODES_128K;
|
||||
}
|
||||
if (slmSize > 64u * KB) {
|
||||
return SHARED_LOCAL_MEMORY_SIZE::SHARED_LOCAL_MEMORY_SIZE_ENCODES_96K;
|
||||
}
|
||||
if (slmSize > 48u * KB) {
|
||||
return SHARED_LOCAL_MEMORY_SIZE::SHARED_LOCAL_MEMORY_SIZE_ENCODES_64K;
|
||||
}
|
||||
if (slmSize > 32u * KB) {
|
||||
return SHARED_LOCAL_MEMORY_SIZE::SHARED_LOCAL_MEMORY_SIZE_ENCODES_48K;
|
||||
}
|
||||
if (slmSize > 24u * KB) {
|
||||
return SHARED_LOCAL_MEMORY_SIZE::SHARED_LOCAL_MEMORY_SIZE_ENCODES_32K;
|
||||
}
|
||||
if (slmSize > 16u * KB) {
|
||||
return SHARED_LOCAL_MEMORY_SIZE::SHARED_LOCAL_MEMORY_SIZE_ENCODES_24K;
|
||||
}
|
||||
if (slmSize > 8u * KB) {
|
||||
return SHARED_LOCAL_MEMORY_SIZE::SHARED_LOCAL_MEMORY_SIZE_ENCODES_16K;
|
||||
}
|
||||
if (slmSize > 4u * KB) {
|
||||
return SHARED_LOCAL_MEMORY_SIZE::SHARED_LOCAL_MEMORY_SIZE_ENCODES_8K;
|
||||
}
|
||||
if (slmSize > 2u * KB) {
|
||||
return SHARED_LOCAL_MEMORY_SIZE::SHARED_LOCAL_MEMORY_SIZE_ENCODES_4K;
|
||||
}
|
||||
if (slmSize > 1u * KB) {
|
||||
return SHARED_LOCAL_MEMORY_SIZE::SHARED_LOCAL_MEMORY_SIZE_ENCODES_2K;
|
||||
}
|
||||
return SHARED_LOCAL_MEMORY_SIZE::SHARED_LOCAL_MEMORY_SIZE_ENCODES_1K;
|
||||
}
|
||||
|
||||
template <>
|
||||
uint32_t HwHelperHw<Family>::getDefaultThreadArbitrationPolicy() const {
|
||||
return ThreadArbitrationPolicy::RoundRobin;
|
||||
}
|
||||
|
||||
template <>
|
||||
bool HwHelperHw<Family>::isCopyOnlyEngineType(EngineGroupType type) const {
|
||||
return (EngineGroupType::Copy == type || EngineGroupType::LinkedCopy == type);
|
||||
}
|
||||
|
||||
template <>
|
||||
bool HwHelperHw<Family>::isSubDeviceEngineSupported(const HardwareInfo &hwInfo, const DeviceBitfield &deviceBitfield, aub_stream::EngineType engineType) const {
|
||||
constexpr uint64_t tile1Bitfield = 0b10;
|
||||
|
||||
bool isBaseDieA0 = (hwInfo.platform.usRevId & Family::pvcBaseDieRevMask) == Family::pvcBaseDieA0Masked;
|
||||
bool affectedEngine = (deviceBitfield.to_ulong() == tile1Bitfield) &&
|
||||
(aub_stream::ENGINE_BCS == engineType ||
|
||||
aub_stream::ENGINE_BCS1 == engineType ||
|
||||
aub_stream::ENGINE_BCS3 == engineType);
|
||||
|
||||
if (affectedEngine) {
|
||||
if (DebugManager.flags.DoNotReportTile1BscWaActive.get() != -1) {
|
||||
return !DebugManager.flags.DoNotReportTile1BscWaActive.get();
|
||||
}
|
||||
|
||||
return !isBaseDieA0;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
template <>
|
||||
inline bool HwHelperHw<Family>::isBlitCopyRequiredForLocalMemory(const HardwareInfo &hwInfo, const GraphicsAllocation &allocation) const {
|
||||
if (!allocation.isAllocatedInLocalMemoryPool()) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (HwInfoConfig::get(hwInfo.platform.eProductFamily)->getLocalMemoryAccessMode(hwInfo) == LocalMemoryAccessMode::CpuAccessDisallowed) {
|
||||
// Regular L3 WA
|
||||
return true;
|
||||
}
|
||||
|
||||
if (!allocation.isAllocationLockable()) {
|
||||
return true;
|
||||
}
|
||||
|
||||
bool isBaseDieA0 = (hwInfo.platform.usRevId & Family::pvcBaseDieRevMask) == Family::pvcBaseDieA0Masked;
|
||||
bool isOtherTileThan0Accessed = allocation.storageInfo.memoryBanks.to_ulong() > 1u;
|
||||
if (isBaseDieA0 && isOtherTileThan0Accessed) {
|
||||
// Tile1 CPU access
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
template <>
|
||||
uint32_t HwHelperHw<Family>::getComputeUnitsUsedForScratch(const HardwareInfo *pHwInfo) const {
|
||||
if (DebugManager.flags.OverrideNumComputeUnitsForScratch.get() != -1) {
|
||||
return static_cast<uint32_t>(DebugManager.flags.OverrideNumComputeUnitsForScratch.get());
|
||||
}
|
||||
|
||||
auto revId = pHwInfo->platform.usRevId & Family::pvcSteppingBits;
|
||||
uint32_t threadEuRatio = ((0x3 <= revId) && (revId <= 0x6) && !Family::isXtTemporary(*pHwInfo)) ? 16 : 8;
|
||||
|
||||
return pHwInfo->gtSystemInfo.MaxSubSlicesSupported * pHwInfo->gtSystemInfo.MaxEuPerSubSlice * threadEuRatio;
|
||||
}
|
||||
|
||||
} // namespace NEO
|
||||
|
||||
#include "shared/source/helpers/hw_helper_pvc_and_later.inl"
|
||||
|
||||
namespace NEO {
|
||||
template class HwHelperHw<Family>;
|
||||
template class FlatBatchBufferHelperHw<Family>;
|
||||
template struct MemorySynchronizationCommands<Family>;
|
||||
template struct LriHelper<Family>;
|
||||
} // namespace NEO
|
|
@ -0,0 +1,11 @@
|
|||
/*
|
||||
* Copyright (C) 2021 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
#ifdef SUPPORT_PVC
|
||||
#include "hw_info_pvc.h"
|
||||
#endif
|
|
@ -0,0 +1,193 @@
|
|||
/*
|
||||
* Copyright (C) 2021 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#include "shared/source/aub_mem_dump/definitions/aub_services.h"
|
||||
#include "shared/source/debug_settings/debug_settings_manager.h"
|
||||
#include "shared/source/helpers/constants.h"
|
||||
#include "shared/source/unified_memory/usm_memory_support.h"
|
||||
#include "shared/source/xe_hpc_core/hw_cmds.h"
|
||||
|
||||
#include "engine_node.h"
|
||||
|
||||
namespace NEO {
|
||||
|
||||
const char *HwMapper<IGFX_PVC>::abbreviation = "pvc";
|
||||
|
||||
bool isSimulationPVC(unsigned short deviceId) {
|
||||
return false;
|
||||
};
|
||||
|
||||
const PLATFORM PVC::platform = {
|
||||
IGFX_PVC,
|
||||
PCH_UNKNOWN,
|
||||
IGFX_XE_HPC_CORE,
|
||||
IGFX_XE_HPC_CORE,
|
||||
PLATFORM_NONE, // default init
|
||||
0, // usDeviceID
|
||||
0, // usRevId. 0 sets the stepping to A0
|
||||
0, // usDeviceID_PCH
|
||||
0, // usRevId_PCH
|
||||
GTTYPE_UNDEFINED};
|
||||
|
||||
const RuntimeCapabilityTable PVC::capabilityTable{
|
||||
EngineDirectSubmissionInitVec{
|
||||
{aub_stream::ENGINE_CCS, {true, false, false, true}},
|
||||
{aub_stream::ENGINE_CCS1, {true, false, true, true}},
|
||||
{aub_stream::ENGINE_CCS2, {true, false, true, true}},
|
||||
{aub_stream::ENGINE_CCS3, {true, false, true, true}},
|
||||
{aub_stream::ENGINE_BCS1, {true, false, true, true}},
|
||||
{aub_stream::ENGINE_BCS2, {true, false, true, true}},
|
||||
{aub_stream::ENGINE_BCS3, {true, false, true, true}},
|
||||
{aub_stream::ENGINE_BCS4, {true, false, true, true}},
|
||||
{aub_stream::ENGINE_BCS5, {true, false, true, true}},
|
||||
{aub_stream::ENGINE_BCS6, {true, false, true, true}},
|
||||
{aub_stream::ENGINE_BCS7, {true, false, true, true}},
|
||||
{aub_stream::ENGINE_BCS8, {true, false, true, true}}}, // directSubmissionEngines
|
||||
{0, 0, 0, false, false, false}, // kmdNotifyProperties
|
||||
maxNBitValue(57), // gpuAddressSpace
|
||||
0, // sharedSystemMemCapabilities
|
||||
83.333, // defaultProfilingTimerResolution
|
||||
MemoryConstants::pageSize, // requiredPreemptionSurfaceSize
|
||||
&isSimulationPVC, // isSimulation
|
||||
PreemptionMode::ThreadGroup, // defaultPreemptionMode
|
||||
aub_stream::ENGINE_CCS, // defaultEngineType
|
||||
0, // maxRenderFrequency
|
||||
30, // clVersionSupport
|
||||
CmdServicesMemTraceVersion::DeviceValues::Pvc, // aubDeviceId
|
||||
0, // extraQuantityThreadsPerEU
|
||||
128, // slmSize
|
||||
sizeof(PVC::GRF), // grfSize
|
||||
36u, // timestampValidBits
|
||||
32u, // kernelTimestampValidBits
|
||||
false, // blitterOperationsSupported
|
||||
true, // ftrSupportsInteger64BitAtomics
|
||||
true, // ftrSupportsFP64
|
||||
true, // ftrSupports64BitMath
|
||||
true, // ftrSvm
|
||||
false, // ftrSupportsCoherency
|
||||
false, // ftrSupportsVmeAvcTextureSampler
|
||||
false, // ftrSupportsVmeAvcPreemption
|
||||
false, // ftrRenderCompressedBuffers
|
||||
false, // ftrRenderCompressedImages
|
||||
true, // ftr64KBpages
|
||||
true, // instrumentationEnabled
|
||||
"core", // platformType
|
||||
"", // deviceName
|
||||
false, // sourceLevelDebuggerSupported
|
||||
false, // supportsVme
|
||||
false, // supportCacheFlushAfterWalker
|
||||
false, // supportsImages
|
||||
false, // supportsDeviceEnqueue
|
||||
false, // supportsPipes
|
||||
true, // supportsOcl21Features
|
||||
true, // supportsOnDemandPageFaults
|
||||
true, // supportsIndependentForwardProgress
|
||||
false, // hostPtrTrackingEnabled
|
||||
true, // levelZeroSupported
|
||||
false, // isIntegratedDevice
|
||||
false, // supportsMediaBlock
|
||||
false // fusedEuEnabled
|
||||
};
|
||||
|
||||
void PVC::setupFeatureAndWorkaroundTable(HardwareInfo *hwInfo) {
|
||||
FeatureTable *featureTable = &hwInfo->featureTable;
|
||||
WorkaroundTable *workaroundTable = &hwInfo->workaroundTable;
|
||||
|
||||
featureTable->flags.ftrL3IACoherency = true;
|
||||
featureTable->flags.ftrLocalMemory = true;
|
||||
featureTable->flags.ftrLinearCCS = true;
|
||||
featureTable->flags.ftrFlatPhysCCS = true;
|
||||
featureTable->flags.ftrE2ECompression = false;
|
||||
featureTable->flags.ftrCCSNode = true;
|
||||
featureTable->flags.ftrCCSRing = true;
|
||||
featureTable->flags.ftrMultiTileArch = true;
|
||||
featureTable->flags.ftrCCSMultiInstance = true;
|
||||
|
||||
featureTable->flags.ftrPPGTT = true;
|
||||
featureTable->flags.ftrSVM = true;
|
||||
featureTable->flags.ftrL3IACoherency = true;
|
||||
featureTable->flags.ftrIA32eGfxPTEs = true;
|
||||
featureTable->flags.ftrStandardMipTailFormat = true;
|
||||
featureTable->flags.ftrTranslationTable = true;
|
||||
featureTable->flags.ftrUserModeTranslationTable = true;
|
||||
featureTable->flags.ftrTileMappedResource = true;
|
||||
featureTable->flags.ftrEnableGuC = true;
|
||||
featureTable->flags.ftrFbc = true;
|
||||
featureTable->flags.ftrFbc2AddressTranslation = true;
|
||||
featureTable->flags.ftrFbcBlitterTracking = true;
|
||||
featureTable->flags.ftrAstcHdr2D = true;
|
||||
featureTable->flags.ftrAstcLdr2D = true;
|
||||
|
||||
featureTable->flags.ftr3dMidBatchPreempt = true;
|
||||
featureTable->flags.ftrGpGpuMidBatchPreempt = true;
|
||||
featureTable->flags.ftrGpGpuThreadGroupLevelPreempt = true;
|
||||
featureTable->flags.ftrPerCtxtPreemptionGranularityControl = true;
|
||||
|
||||
featureTable->flags.ftrTileY = false;
|
||||
featureTable->ftrBcsInfo = maxNBitValue(9);
|
||||
workaroundTable->flags.wa4kAlignUVOffsetNV12LinearSurface = true;
|
||||
workaroundTable->flags.waEnablePreemptionGranularityControlByUMD = true;
|
||||
}
|
||||
|
||||
void PVC::adjustHardwareInfo(HardwareInfo *hwInfo) {
|
||||
hwInfo->capabilityTable.sharedSystemMemCapabilities = (UNIFIED_SHARED_MEMORY_ACCESS | UNIFIED_SHARED_MEMORY_CONCURRENT_ACCESS | UNIFIED_SHARED_MEMORY_CONCURRENT_ATOMIC_ACCESS);
|
||||
}
|
||||
void PVC::setupHardwareInfoBase(HardwareInfo *hwInfo, bool setupFeatureTableAndWorkaroundTable, bool setupMultiTile) {
|
||||
GT_SYSTEM_INFO *gtSysInfo = &hwInfo->gtSystemInfo;
|
||||
gtSysInfo->ThreadCount = gtSysInfo->EUCount * PVC::threadsPerEu;
|
||||
gtSysInfo->MaxFillRate = 128;
|
||||
gtSysInfo->TotalVsThreads = 336;
|
||||
gtSysInfo->TotalHsThreads = 336;
|
||||
gtSysInfo->TotalDsThreads = 336;
|
||||
gtSysInfo->TotalGsThreads = 336;
|
||||
gtSysInfo->TotalPsThreadsWindowerRange = 64;
|
||||
gtSysInfo->CsrSizeInMb = 8;
|
||||
gtSysInfo->MaxEuPerSubSlice = PVC::maxEuPerSubslice;
|
||||
gtSysInfo->MaxSlicesSupported = PVC::maxSlicesSupported;
|
||||
gtSysInfo->MaxSubSlicesSupported = PVC::maxSubslicesSupported;
|
||||
gtSysInfo->MaxDualSubSlicesSupported = PVC::maxDualSubslicesSupported;
|
||||
gtSysInfo->IsL3HashModeEnabled = false;
|
||||
gtSysInfo->IsDynamicallyPopulated = false;
|
||||
|
||||
gtSysInfo->MultiTileArchInfo.IsValid = setupMultiTile;
|
||||
gtSysInfo->MultiTileArchInfo.TileCount = 1;
|
||||
if (DebugManager.flags.CreateMultipleSubDevices.get() > 0) {
|
||||
gtSysInfo->MultiTileArchInfo.TileCount = DebugManager.flags.CreateMultipleSubDevices.get();
|
||||
}
|
||||
gtSysInfo->MultiTileArchInfo.TileMask = static_cast<uint8_t>(maxNBitValue(gtSysInfo->MultiTileArchInfo.TileCount));
|
||||
|
||||
PVC::adjustHardwareInfo(hwInfo);
|
||||
}
|
||||
|
||||
FeatureTable PVC::featureTable;
|
||||
WorkaroundTable PVC::workaroundTable;
|
||||
|
||||
const HardwareInfo PVC_CONFIG::hwInfo = {
|
||||
&PVC::platform,
|
||||
&PVC::featureTable,
|
||||
&PVC::workaroundTable,
|
||||
&PVC_CONFIG::gtSystemInfo,
|
||||
PVC::capabilityTable,
|
||||
};
|
||||
|
||||
GT_SYSTEM_INFO PVC_CONFIG::gtSystemInfo = {0};
|
||||
void PVC_CONFIG::setupHardwareInfo(HardwareInfo *hwInfo, bool setupFeatureTableAndWorkaroundTable) {
|
||||
PVC_CONFIG::setupHardwareInfoMultiTile(hwInfo, setupFeatureTableAndWorkaroundTable, false);
|
||||
}
|
||||
void PVC_CONFIG::setupHardwareInfoMultiTile(HardwareInfo *hwInfo, bool setupFeatureTableAndWorkaroundTable, bool setupMultiTile) {
|
||||
GT_SYSTEM_INFO *gtSysInfo = &hwInfo->gtSystemInfo;
|
||||
gtSysInfo->CsrSizeInMb = 8;
|
||||
gtSysInfo->IsL3HashModeEnabled = false;
|
||||
gtSysInfo->IsDynamicallyPopulated = false;
|
||||
|
||||
if (setupFeatureTableAndWorkaroundTable) {
|
||||
PVC::setupFeatureAndWorkaroundTable(hwInfo);
|
||||
}
|
||||
};
|
||||
|
||||
#include "hw_info_setup_pvc.inl"
|
||||
} // namespace NEO
|
|
@ -0,0 +1,23 @@
|
|||
/*
|
||||
* Copyright (C) 2021 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
#include "hw_info_xe_hpc_core.h"
|
||||
|
||||
namespace NEO {
|
||||
|
||||
struct PVC;
|
||||
|
||||
template <>
|
||||
struct HwMapper<IGFX_PVC> {
|
||||
enum { gfxFamily = IGFX_XE_HPC_CORE };
|
||||
|
||||
static const char *abbreviation;
|
||||
using GfxFamily = GfxFamilyMapper<static_cast<GFXCORE_FAMILY>(gfxFamily)>::GfxFamily;
|
||||
using GfxProduct = PVC;
|
||||
};
|
||||
} // namespace NEO
|
|
@ -0,0 +1,12 @@
|
|||
/*
|
||||
* Copyright (C) 2021 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#include "shared/source/xe_hpc_core/hw_info_xe_hpc_core.h"
|
||||
|
||||
namespace NEO {
|
||||
const char *GfxFamilyMapper<IGFX_XE_HPC_CORE>::name = "XE_HPC_CORE";
|
||||
} // namespace NEO
|
|
@ -0,0 +1,20 @@
|
|||
/*
|
||||
* Copyright (C) 2021 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
#include "shared/source/helpers/hw_info.h"
|
||||
|
||||
namespace NEO {
|
||||
|
||||
struct XE_HPC_COREFamily;
|
||||
|
||||
template <>
|
||||
struct GfxFamilyMapper<IGFX_XE_HPC_CORE> {
|
||||
using GfxFamily = XE_HPC_COREFamily;
|
||||
static const char *name;
|
||||
};
|
||||
} // namespace NEO
|
|
@ -0,0 +1,19 @@
|
|||
/*
|
||||
* Copyright (C) 2021 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#include "shared/source/image/image_surface_state.h"
|
||||
#include "shared/source/xe_hpc_core/hw_cmds_base.h"
|
||||
|
||||
namespace NEO {
|
||||
|
||||
using Family = XE_HPC_COREFamily;
|
||||
|
||||
// clang-format off
|
||||
#include "shared/source/image/image_bdw_and_later.inl"
|
||||
#include "shared/source/image/image_skl_and_later.inl"
|
||||
// clang-format on
|
||||
} // namespace NEO
|
|
@ -0,0 +1,19 @@
|
|||
/*
|
||||
* Copyright (C) 2021 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#include "shared/source/command_container/implicit_scaling.h"
|
||||
#include "shared/source/command_container/implicit_scaling_xehp_and_later.inl"
|
||||
|
||||
namespace NEO {
|
||||
|
||||
using Family = XE_HPC_COREFamily;
|
||||
|
||||
template <>
|
||||
bool ImplicitScalingDispatch<Family>::pipeControlStallRequired = true;
|
||||
|
||||
template struct ImplicitScalingDispatch<Family>;
|
||||
} // namespace NEO
|
|
@ -0,0 +1,18 @@
|
|||
/*
|
||||
* Copyright (C) 2021 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#include "shared/source/command_stream/command_stream_receiver_with_aub_dump.inl"
|
||||
#include "shared/source/os_interface/linux/device_command_stream.inl"
|
||||
#include "shared/source/os_interface/linux/drm_command_stream.inl"
|
||||
#include "shared/source/os_interface/linux/drm_command_stream_xehp_and_later.inl"
|
||||
|
||||
namespace NEO {
|
||||
|
||||
template class DeviceCommandStreamReceiver<XE_HPC_COREFamily>;
|
||||
template class DrmCommandStreamReceiver<XE_HPC_COREFamily>;
|
||||
template class CommandStreamReceiverWithAUBDump<DrmCommandStreamReceiver<XE_HPC_COREFamily>>;
|
||||
} // namespace NEO
|
|
@ -0,0 +1,31 @@
|
|||
/*
|
||||
* Copyright (C) 2021 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#include "shared/source/direct_submission/direct_submission_hw.inl"
|
||||
#include "shared/source/direct_submission/direct_submission_prefetch_mitigation_xe_hp_core_and_later.inl"
|
||||
#include "shared/source/direct_submission/direct_submission_prefetcher_pvc_and_later.inl"
|
||||
#include "shared/source/direct_submission/direct_submission_xe_hp_core_and_later.inl"
|
||||
#include "shared/source/direct_submission/dispatchers/blitter_dispatcher.inl"
|
||||
#include "shared/source/direct_submission/dispatchers/dispatcher.inl"
|
||||
#include "shared/source/direct_submission/dispatchers/render_dispatcher.inl"
|
||||
#include "shared/source/direct_submission/linux/drm_direct_submission.inl"
|
||||
|
||||
#include "hw_cmds.h"
|
||||
|
||||
namespace NEO {
|
||||
using GfxFamily = XE_HPC_COREFamily;
|
||||
|
||||
template class Dispatcher<GfxFamily>;
|
||||
template class BlitterDispatcher<GfxFamily>;
|
||||
template class RenderDispatcher<GfxFamily>;
|
||||
|
||||
template class DirectSubmissionHw<GfxFamily, BlitterDispatcher<GfxFamily>>;
|
||||
template class DirectSubmissionHw<GfxFamily, RenderDispatcher<GfxFamily>>;
|
||||
|
||||
template class DrmDirectSubmission<GfxFamily, BlitterDispatcher<GfxFamily>>;
|
||||
template class DrmDirectSubmission<GfxFamily, RenderDispatcher<GfxFamily>>;
|
||||
} // namespace NEO
|
|
@ -0,0 +1,42 @@
|
|||
/*
|
||||
* Copyright (C) 2021 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#include "shared/source/helpers/constants.h"
|
||||
#include "shared/source/helpers/hw_info.h"
|
||||
#include "shared/source/kernel/kernel_properties.h"
|
||||
#include "shared/source/os_interface/hw_info_config.h"
|
||||
#include "shared/source/os_interface/hw_info_config.inl"
|
||||
#include "shared/source/os_interface/hw_info_config_dg2_and_later.inl"
|
||||
#include "shared/source/os_interface/hw_info_config_xehp_and_later.inl"
|
||||
|
||||
namespace NEO {
|
||||
constexpr static auto gfxProduct = IGFX_PVC;
|
||||
|
||||
#include "shared/source/xe_hpc_core/os_agnostic_hw_info_config_pvc.inl"
|
||||
|
||||
template <>
|
||||
int HwInfoConfigHw<gfxProduct>::configureHardwareCustom(HardwareInfo *hwInfo, OSInterface *osIface) {
|
||||
enableCompression(hwInfo);
|
||||
|
||||
hwInfo->featureTable.embargoFlags.ftr57bGPUAddressing = (hwInfo->capabilityTable.gpuAddressSpace == maxNBitValue(57));
|
||||
|
||||
enableBlitterOperationsSupport(hwInfo);
|
||||
|
||||
hwInfo->featureTable.flags.ftrRcsNode = false;
|
||||
if (DebugManager.flags.NodeOrdinal.get() == static_cast<int32_t>(aub_stream::EngineType::ENGINE_CCCS)) {
|
||||
hwInfo->featureTable.flags.ftrRcsNode = true;
|
||||
}
|
||||
|
||||
auto &kmdNotifyProperties = hwInfo->capabilityTable.kmdNotifyProperties;
|
||||
kmdNotifyProperties.enableKmdNotify = true;
|
||||
kmdNotifyProperties.delayKmdNotifyMicroseconds = 20;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
template class HwInfoConfigHw<gfxProduct>;
|
||||
} // namespace NEO
|
|
@ -0,0 +1,112 @@
|
|||
/*
|
||||
* Copyright (C) 2021 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
template <>
|
||||
void HwInfoConfigHw<gfxProduct>::getKernelExtendedProperties(uint32_t *fp16, uint32_t *fp32, uint32_t *fp64) {
|
||||
*fp16 = (FP_ATOMIC_EXT_FLAG_GLOBAL_LOAD_STORE | FP_ATOMIC_EXT_FLAG_GLOBAL_ADD | FP_ATOMIC_EXT_FLAG_GLOBAL_MIN_MAX);
|
||||
*fp32 = (FP_ATOMIC_EXT_FLAG_GLOBAL_LOAD_STORE | FP_ATOMIC_EXT_FLAG_GLOBAL_ADD | FP_ATOMIC_EXT_FLAG_GLOBAL_MIN_MAX);
|
||||
*fp64 = (FP_ATOMIC_EXT_FLAG_GLOBAL_LOAD_STORE | FP_ATOMIC_EXT_FLAG_GLOBAL_ADD | FP_ATOMIC_EXT_FLAG_GLOBAL_MIN_MAX);
|
||||
}
|
||||
|
||||
template <>
|
||||
bool HwInfoConfigHw<gfxProduct>::isMaxThreadsForWorkgroupWARequired(const HardwareInfo &hwInfo) const {
|
||||
auto deviceId = hwInfo.platform.usDeviceID;
|
||||
return XE_HPC_COREFamily::pvcXlDeviceId == deviceId;
|
||||
}
|
||||
|
||||
template <>
|
||||
uint32_t HwInfoConfigHw<gfxProduct>::getHwRevIdFromStepping(uint32_t stepping, const HardwareInfo &hwInfo) const {
|
||||
switch (hwInfo.platform.usDeviceID) {
|
||||
default:
|
||||
case XE_HPC_COREFamily::pvcXtTemporaryDeviceId:
|
||||
case XE_HPC_COREFamily::pvcXlDeviceId:
|
||||
switch (stepping) {
|
||||
case REVISION_A0:
|
||||
return 0x0;
|
||||
case REVISION_B:
|
||||
return 0x6;
|
||||
case REVISION_C:
|
||||
DEBUG_BREAK_IF(true);
|
||||
return 0x7;
|
||||
}
|
||||
break;
|
||||
case XE_HPC_COREFamily::pvcXtDeviceIds[0]:
|
||||
case XE_HPC_COREFamily::pvcXtDeviceIds[1]:
|
||||
case XE_HPC_COREFamily::pvcXtDeviceIds[2]:
|
||||
switch (stepping) {
|
||||
case REVISION_A0:
|
||||
return 0x3;
|
||||
case REVISION_B:
|
||||
return 0x9D;
|
||||
case REVISION_C:
|
||||
return 0x7;
|
||||
}
|
||||
}
|
||||
return CommonConstants::invalidStepping;
|
||||
}
|
||||
|
||||
template <>
|
||||
uint32_t HwInfoConfigHw<gfxProduct>::getSteppingFromHwRevId(const HardwareInfo &hwInfo) const {
|
||||
switch (hwInfo.platform.usRevId & XE_HPC_COREFamily::pvcSteppingBits) {
|
||||
case 0x0:
|
||||
case 0x1:
|
||||
case 0x3:
|
||||
return REVISION_A0;
|
||||
case 0x5:
|
||||
case 0x6:
|
||||
return REVISION_B;
|
||||
case 0x7:
|
||||
return REVISION_C;
|
||||
}
|
||||
return CommonConstants::invalidStepping;
|
||||
}
|
||||
|
||||
template <>
|
||||
void HwInfoConfigHw<gfxProduct>::adjustSamplerState(void *sampler, const HardwareInfo &hwInfo) {
|
||||
using SAMPLER_STATE = typename XE_HPC_COREFamily::SAMPLER_STATE;
|
||||
|
||||
auto samplerState = reinterpret_cast<SAMPLER_STATE *>(sampler);
|
||||
if (DebugManager.flags.ForceSamplerLowFilteringPrecision.get()) {
|
||||
samplerState->setLowQualityFilter(SAMPLER_STATE::LOW_QUALITY_FILTER_ENABLE);
|
||||
}
|
||||
}
|
||||
|
||||
template <>
|
||||
uint32_t HwInfoConfigHw<gfxProduct>::getDeviceMemoryMaxClkRate(const HardwareInfo *hwInfo) {
|
||||
bool isBaseDieA0 = (hwInfo->platform.usRevId & XE_HPC_COREFamily::pvcBaseDieRevMask) == XE_HPC_COREFamily::pvcBaseDieA0Masked;
|
||||
if (isBaseDieA0) {
|
||||
// For IGFX_PVC REV A0 HBM frequency would be 3.2 GT/s = 3.2 * 1000 MT/s = 3200 MT/s
|
||||
return 3200u;
|
||||
}
|
||||
return 0u;
|
||||
}
|
||||
|
||||
template <>
|
||||
bool HwInfoConfigHw<gfxProduct>::isDirectSubmissionSupported(const HardwareInfo &hwInfo) const {
|
||||
return true;
|
||||
}
|
||||
|
||||
template <>
|
||||
std::string HwInfoConfigHw<gfxProduct>::getDeviceMemoryName() const {
|
||||
return "HBM";
|
||||
}
|
||||
|
||||
template <>
|
||||
bool HwInfoConfigHw<gfxProduct>::isDisableOverdispatchAvailable(const HardwareInfo &hwInfo) const {
|
||||
return getSteppingFromHwRevId(hwInfo) >= REVISION_B;
|
||||
}
|
||||
|
||||
template <>
|
||||
bool HwInfoConfigHw<gfxProduct>::isPipeControlPriorToNonPipelinedStateCommandsWARequired(const HardwareInfo &hwInfo, bool isRcs) const {
|
||||
bool required = hwInfo.gtSystemInfo.CCSInfo.NumberOfCCSEnabled > 1;
|
||||
|
||||
if (DebugManager.flags.ProgramPipeControlPriorToNonPipelinedStateCommand.get() != -1) {
|
||||
required = DebugManager.flags.ProgramPipeControlPriorToNonPipelinedStateCommand.get();
|
||||
}
|
||||
|
||||
return required;
|
||||
}
|
|
@ -0,0 +1,64 @@
|
|||
/*
|
||||
* Copyright (C) 2020-2021 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
namespace NEO {
|
||||
struct XE_HPC_COREFamily;
|
||||
using Family = XE_HPC_COREFamily;
|
||||
} // namespace NEO
|
||||
|
||||
#include "shared/source/command_stream/stream_properties.h"
|
||||
#include "shared/source/helpers/preamble_xehp_and_later.inl"
|
||||
#include "shared/source/os_interface/hw_info_config.h"
|
||||
|
||||
namespace NEO {
|
||||
|
||||
using CFE_STATE = typename Family::CFE_STATE;
|
||||
template <>
|
||||
void PreambleHelper<Family>::appendProgramVFEState(const HardwareInfo &hwInfo, const StreamProperties &streamProperties, void *cmd) {
|
||||
auto command = static_cast<CFE_STATE *>(cmd);
|
||||
|
||||
command->setComputeOverdispatchDisable(streamProperties.frontEndState.disableOverdispatch.value == 1);
|
||||
command->setSingleSliceDispatchCcsMode(streamProperties.frontEndState.singleSliceDispatchCcsMode.value == 1);
|
||||
|
||||
if (HwInfoConfig::get(hwInfo.platform.eProductFamily)->getSteppingFromHwRevId(hwInfo) >= REVISION_B) {
|
||||
if (streamProperties.frontEndState.computeDispatchAllWalkerEnable.value > 0) {
|
||||
command->setComputeDispatchAllWalkerEnable(true);
|
||||
command->setSingleSliceDispatchCcsMode(true);
|
||||
}
|
||||
}
|
||||
|
||||
if (DebugManager.flags.CFEComputeDispatchAllWalkerEnable.get() != -1) {
|
||||
command->setComputeDispatchAllWalkerEnable(DebugManager.flags.CFEComputeDispatchAllWalkerEnable.get());
|
||||
}
|
||||
|
||||
if (DebugManager.flags.CFEComputeOverdispatchDisable.get() != -1) {
|
||||
command->setComputeOverdispatchDisable(DebugManager.flags.CFEComputeOverdispatchDisable.get());
|
||||
}
|
||||
if (DebugManager.flags.CFESingleSliceDispatchCCSMode.get() != -1) {
|
||||
command->setSingleSliceDispatchCcsMode(DebugManager.flags.CFESingleSliceDispatchCCSMode.get());
|
||||
}
|
||||
|
||||
command->setNumberOfWalkers(1);
|
||||
if (DebugManager.flags.CFENumberOfWalkers.get() != -1) {
|
||||
command->setNumberOfWalkers(DebugManager.flags.CFENumberOfWalkers.get());
|
||||
}
|
||||
}
|
||||
|
||||
template <>
|
||||
bool PreambleHelper<Family>::isSystolicModeConfigurable(const HardwareInfo &hwInfo) {
|
||||
return Family::isAtMostXtA0(hwInfo);
|
||||
}
|
||||
|
||||
template <>
|
||||
bool PreambleHelper<Family>::isSpecialPipelineSelectModeChanged(bool lastSpecialPipelineSelectMode, bool newSpecialPipelineSelectMode,
|
||||
const HardwareInfo &hwInfo) {
|
||||
return (lastSpecialPipelineSelectMode != newSpecialPipelineSelectMode) && Family::isAtMostXtA0(hwInfo);
|
||||
}
|
||||
|
||||
template struct PreambleHelper<Family>;
|
||||
|
||||
} // namespace NEO
|
|
@ -0,0 +1,24 @@
|
|||
/*
|
||||
* Copyright (C) 2020-2021 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#include "shared/source/command_stream/preemption.h"
|
||||
#include "shared/source/command_stream/preemption.inl"
|
||||
|
||||
namespace NEO {
|
||||
|
||||
using GfxFamily = XE_HPC_COREFamily;
|
||||
|
||||
#include "shared/source/command_stream/preemption_xehp_and_later.inl"
|
||||
|
||||
template void PreemptionHelper::programCmdStream<GfxFamily>(LinearStream &cmdStream, PreemptionMode newPreemptionMode,
|
||||
PreemptionMode oldPreemptionMode, GraphicsAllocation *preemptionCsr);
|
||||
template size_t PreemptionHelper::getRequiredCmdStreamSize<GfxFamily>(PreemptionMode newPreemptionMode, PreemptionMode oldPreemptionMode);
|
||||
template size_t PreemptionHelper::getPreemptionWaCsSize<GfxFamily>(const Device &device);
|
||||
template void PreemptionHelper::applyPreemptionWaCmdsBegin<GfxFamily>(LinearStream *pCommandStream, const Device &device);
|
||||
template void PreemptionHelper::applyPreemptionWaCmdsEnd<GfxFamily>(LinearStream *pCommandStream, const Device &device);
|
||||
template void PreemptionHelper::programInterfaceDescriptorDataPreemption<GfxFamily>(INTERFACE_DESCRIPTOR_DATA<GfxFamily> *idd, PreemptionMode preemptionMode);
|
||||
} // namespace NEO
|
|
@ -0,0 +1,21 @@
|
|||
/*
|
||||
* Copyright (C) 2020-2021 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#include "shared/source/helpers/state_base_address_xehp_and_later.inl"
|
||||
|
||||
namespace NEO {
|
||||
template <>
|
||||
void StateBaseAddressHelper<XE_HPC_COREFamily>::appendExtraCacheSettings(STATE_BASE_ADDRESS *stateBaseAddress, GmmHelper *gmmHelper) {
|
||||
stateBaseAddress->setL1CachePolicyL1CacheControl(STATE_BASE_ADDRESS::L1_CACHE_POLICY_WBP);
|
||||
|
||||
if (DebugManager.flags.ForceStatelessL1CachingPolicy.get() != -1) {
|
||||
stateBaseAddress->setL1CachePolicyL1CacheControl(static_cast<typename STATE_BASE_ADDRESS::L1_CACHE_POLICY>(DebugManager.flags.ForceStatelessL1CachingPolicy.get()));
|
||||
}
|
||||
}
|
||||
|
||||
template struct StateBaseAddressHelper<XE_HPC_COREFamily>;
|
||||
} // namespace NEO
|
|
@ -0,0 +1,17 @@
|
|||
/*
|
||||
* Copyright (C) 2020-2021 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#include "shared/source/helpers/state_compute_mode_helper.h"
|
||||
|
||||
namespace NEO {
|
||||
|
||||
template <>
|
||||
bool StateComputeModeHelper<XE_HPC_COREFamily>::isStateComputeModeRequired(const CsrSizeRequestFlags &csrSizeRequestFlags, bool isThreadArbitionPolicyProgrammed) {
|
||||
return csrSizeRequestFlags.coherencyRequestChanged || csrSizeRequestFlags.numGrfRequiredChanged || isThreadArbitionPolicyProgrammed;
|
||||
}
|
||||
|
||||
} // namespace NEO
|
|
@ -0,0 +1,32 @@
|
|||
/*
|
||||
* Copyright (C) 2021 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#include "shared/source/command_stream/command_stream_receiver_with_aub_dump.inl"
|
||||
#include "shared/source/command_stream/tbx_command_stream_receiver_hw.h"
|
||||
#include "shared/source/command_stream/tbx_command_stream_receiver_hw.inl"
|
||||
#include "shared/source/helpers/array_count.h"
|
||||
#include "shared/source/helpers/populate_factory.h"
|
||||
#include "shared/source/memory_manager/memory_banks.h"
|
||||
#include "shared/source/memory_manager/memory_pool.h"
|
||||
#include "shared/source/xe_hpc_core/hw_cmds.h"
|
||||
|
||||
namespace NEO {
|
||||
using Family = XE_HPC_COREFamily;
|
||||
static auto gfxCore = IGFX_XE_HPC_CORE;
|
||||
|
||||
template <>
|
||||
void populateFactoryTable<TbxCommandStreamReceiverHw<Family>>() {
|
||||
extern TbxCommandStreamReceiverCreateFunc tbxCommandStreamReceiverFactory[IGFX_MAX_CORE];
|
||||
UNRECOVERABLE_IF(!isInRange(gfxCore, tbxCommandStreamReceiverFactory));
|
||||
tbxCommandStreamReceiverFactory[gfxCore] = TbxCommandStreamReceiverHw<Family>::create;
|
||||
}
|
||||
|
||||
#include "shared/source/command_stream/tbx_command_stream_receiver_xehp_and_later.inl"
|
||||
|
||||
template class TbxCommandStreamReceiverHw<Family>;
|
||||
template class CommandStreamReceiverWithAUBDump<TbxCommandStreamReceiverHw<Family>>;
|
||||
} // namespace NEO
|
|
@ -0,0 +1,17 @@
|
|||
/*
|
||||
* Copyright (C) 2021 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#include "shared/source/command_stream/command_stream_receiver_with_aub_dump.inl"
|
||||
#include "shared/source/os_interface/windows/device_command_stream.inl"
|
||||
#include "shared/source/os_interface/windows/wddm_device_command_stream.inl"
|
||||
|
||||
namespace NEO {
|
||||
|
||||
template class DeviceCommandStreamReceiver<XE_HPC_COREFamily>;
|
||||
template class WddmCommandStreamReceiver<XE_HPC_COREFamily>;
|
||||
template class CommandStreamReceiverWithAUBDump<WddmCommandStreamReceiver<XE_HPC_COREFamily>>;
|
||||
} // namespace NEO
|
|
@ -0,0 +1,31 @@
|
|||
/*
|
||||
* Copyright (C) 2021 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#include "shared/source/direct_submission/direct_submission_hw.inl"
|
||||
#include "shared/source/direct_submission/direct_submission_prefetch_mitigation_xe_hp_core_and_later.inl"
|
||||
#include "shared/source/direct_submission/direct_submission_prefetcher_pvc_and_later.inl"
|
||||
#include "shared/source/direct_submission/direct_submission_xe_hp_core_and_later.inl"
|
||||
#include "shared/source/direct_submission/dispatchers/blitter_dispatcher.inl"
|
||||
#include "shared/source/direct_submission/dispatchers/dispatcher.inl"
|
||||
#include "shared/source/direct_submission/dispatchers/render_dispatcher.inl"
|
||||
#include "shared/source/direct_submission/windows/wddm_direct_submission.inl"
|
||||
|
||||
#include "hw_cmds.h"
|
||||
|
||||
namespace NEO {
|
||||
using GfxFamily = XE_HPC_COREFamily;
|
||||
|
||||
template class Dispatcher<GfxFamily>;
|
||||
template class BlitterDispatcher<GfxFamily>;
|
||||
template class RenderDispatcher<GfxFamily>;
|
||||
|
||||
template class DirectSubmissionHw<GfxFamily, BlitterDispatcher<GfxFamily>>;
|
||||
template class DirectSubmissionHw<GfxFamily, RenderDispatcher<GfxFamily>>;
|
||||
|
||||
template class WddmDirectSubmission<GfxFamily, BlitterDispatcher<GfxFamily>>;
|
||||
template class WddmDirectSubmission<GfxFamily, RenderDispatcher<GfxFamily>>;
|
||||
} // namespace NEO
|
|
@ -0,0 +1,16 @@
|
|||
/*
|
||||
* Copyright (C) 2021 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#include "shared/source/helpers/windows/gmm_callbacks_tgllp_and_later.inl"
|
||||
#include "shared/source/xe_hpc_core/hw_cmds.h"
|
||||
|
||||
namespace NEO {
|
||||
|
||||
template struct DeviceCallbacks<XE_HPC_COREFamily>;
|
||||
template struct TTCallbacks<XE_HPC_COREFamily>;
|
||||
|
||||
} // namespace NEO
|
|
@ -0,0 +1,36 @@
|
|||
/*
|
||||
* Copyright (C) 2021 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#include "shared/source/kernel/kernel_properties.h"
|
||||
#include "shared/source/os_interface/hw_info_config.h"
|
||||
#include "shared/source/os_interface/hw_info_config.inl"
|
||||
#include "shared/source/os_interface/hw_info_config_dg2_and_later.inl"
|
||||
#include "shared/source/os_interface/hw_info_config_xehp_and_later.inl"
|
||||
|
||||
#include "hw_cmds.h"
|
||||
|
||||
namespace NEO {
|
||||
constexpr static auto gfxProduct = IGFX_PVC;
|
||||
|
||||
#include "shared/source/xe_hpc_core/os_agnostic_hw_info_config_pvc.inl"
|
||||
|
||||
template <>
|
||||
int HwInfoConfigHw<gfxProduct>::configureHardwareCustom(HardwareInfo *hwInfo, OSInterface *osIface) {
|
||||
enableCompression(hwInfo);
|
||||
enableBlitterOperationsSupport(hwInfo);
|
||||
|
||||
hwInfo->featureTable.flags.ftrRcsNode = false;
|
||||
if (DebugManager.flags.NodeOrdinal.get() == static_cast<int32_t>(aub_stream::EngineType::ENGINE_CCCS)) {
|
||||
hwInfo->featureTable.flags.ftrRcsNode = true;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
template class HwInfoConfigHw<gfxProduct>;
|
||||
|
||||
} // namespace NEO
|
|
@ -19,6 +19,15 @@ enum EngineType : uint32_t {
|
|||
ENGINE_CCS1,
|
||||
ENGINE_CCS2,
|
||||
ENGINE_CCS3,
|
||||
ENGINE_CCCS,
|
||||
ENGINE_BCS1,
|
||||
ENGINE_BCS2,
|
||||
ENGINE_BCS3,
|
||||
ENGINE_BCS4,
|
||||
ENGINE_BCS5,
|
||||
ENGINE_BCS6,
|
||||
ENGINE_BCS7,
|
||||
ENGINE_BCS8,
|
||||
NUM_ENGINES
|
||||
};
|
||||
|
||||
|
|
Loading…
Reference in New Issue