feature: add support for Panther Lake platform

Related-To: NEO-12803

Signed-off-by: Mateusz Jablonski <mateusz.jablonski@intel.com>
This commit is contained in:
Mateusz Jablonski
2025-01-03 12:50:18 +00:00
committed by Compute-Runtime-Automation
parent 8e41928eb8
commit bb1a125f0c
169 changed files with 9419 additions and 24 deletions

View File

@@ -1,5 +1,5 @@
#
# Copyright (C) 2020-2024 Intel Corporation
# Copyright (C) 2020-2025 Intel Corporation
#
# SPDX-License-Identifier: MIT
#
@@ -9,7 +9,9 @@ list(APPEND ALL_CORE_TYPES "XE_HP_CORE")
list(APPEND ALL_CORE_TYPES "XE_HPG_CORE")
list(APPEND ALL_CORE_TYPES "XE_HPC_CORE")
list(APPEND ALL_CORE_TYPES "XE2_HPG_CORE")
list(APPEND XE2_AND_LATER_CORE_TYPES "XE2_HPG_CORE")
list(APPEND ALL_CORE_TYPES "XE3_CORE")
list(APPEND XE3_AND_LATER_CORE_TYPES "XE3_CORE")
list(APPEND XE2_AND_LATER_CORE_TYPES ${XE3_AND_LATER_CORE_TYPES} "XE2_HPG_CORE")
list(APPEND PVC_AND_LATER_CORE_TYPES ${XE2_AND_LATER_CORE_TYPES} "XE_HPC_CORE")
list(APPEND DG2_AND_LATER_CORE_TYPES ${PVC_AND_LATER_CORE_TYPES} "XE_HPG_CORE")
list(APPEND MTL_AND_LATER_CORE_TYPES ${DG2_AND_LATER_CORE_TYPES})

View File

@@ -1,5 +1,5 @@
#
# Copyright (C) 2020-2024 Intel Corporation
# Copyright (C) 2020-2025 Intel Corporation
#
# SPDX-License-Identifier: MIT
#
@@ -13,6 +13,12 @@ SET_FLAGS_FOR_CURRENT("XE_HPG_CORE" "DG2" "MTL" "ARL")
SET_FLAGS_FOR_CURRENT("XE_HPC_CORE" "PVC")
SET_FLAGS_FOR_CURRENT("XE2_HPG_CORE" "BMG" "LNL")
if(CMAKE_SIZEOF_VOID_P EQUAL 8)
SET_FLAGS_FOR_CURRENT("XE3_CORE" "PTL")
else()
DISABLE_32BIT_FLAGS_FOR("XE3_CORE" "PTL")
endif()
DISABLE_WDDM_LINUX_FOR("XE_HPC_CORE" "PVC")
foreach(CORE_TYPE ${XE_HPC_AND_BEFORE_CORE_TYPES})
@@ -280,3 +286,25 @@ if(SUPPORT_XE2_HPG_CORE)
endif()
endif()
endif()
if(SUPPORT_XE3_CORE)
ADD_AOT_DEFINITION(XE3)
set(XE3_CORE_TEST_KERNELS_BLOCKLIST "CopyBuffer_simd8.cl")
if(TESTS_XE3_CORE)
ADD_ITEM_FOR_CORE_TYPE("FAMILY_NAME" "TESTED" "XE3_CORE" "Xe3CoreFamily")
endif()
if(SUPPORT_PTL)
ADD_AOT_DEFINITION(XE3_LPG)
ADD_AOT_DEFINITION(PTL)
set(PTL_XE3_CORE_RELEASES "30.0.0" "30.0.4" "30.1.0" "30.1.1")
ADD_PRODUCT("SUPPORTED" "PTL" "IGFX_PTL")
ADD_PLATFORM_FOR_CORE_TYPE("SUPPORTED" "XE3_CORE" "PTL")
ADD_PLATFORM_FOR_CORE_TYPE("SUPPORTED_IMAGES" "XE3_CORE" "PTL")
ADD_PLATFORM_FOR_CORE_TYPE("SUPPORTED_STATELESS" "XE3_CORE" "PTL")
if(TESTS_PTL)
ADD_ITEM_FOR_CORE_TYPE("PLATFORMS" "TESTED" "XE3_CORE" "PTL")
ADD_PRODUCT("TESTED" "PTL" "IGFX_PTL")
endif()
endif()
endif()

View File

@@ -1,5 +1,5 @@
#
# Copyright (C) 2023-2024 Intel Corporation
# Copyright (C) 2023-2025 Intel Corporation
#
# SPDX-License-Identifier: MIT
#
@@ -66,3 +66,8 @@ if(SUPPORT_XE2_AND_LATER)
${CMAKE_CURRENT_SOURCE_DIR}/l0_gfx_core_helper_xe2_hpg_and_later.inl
)
endif()
if(SUPPORT_XE3_AND_LATER)
target_sources(${L0_STATIC_LIB_NAME} PRIVATE
${CMAKE_CURRENT_SOURCE_DIR}/l0_gfx_core_helper_xe3_and_later.inl
)
endif()

View File

@@ -0,0 +1,21 @@
/*
* Copyright (C) 2025 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#include "level_zero/core/source/gfx_core_helpers/l0_gfx_core_helper.h"
namespace L0 {
template <typename Family>
ze_rtas_format_exp_t L0GfxCoreHelperHw<Family>::getSupportedRTASFormat() const {
return static_cast<ze_rtas_format_exp_t>(RTASDeviceFormatInternal::version2);
}
template <typename Family>
zet_debug_regset_type_intel_gpu_t L0GfxCoreHelperHw<Family>::getRegsetTypeForLargeGrfDetection() const {
return ZET_DEBUG_REGSET_TYPE_SR_INTEL_GPU;
}
} // namespace L0

View File

@@ -0,0 +1,19 @@
#
# Copyright (C) 2025 Intel Corporation
#
# SPDX-License-Identifier: MIT
#
if(SUPPORT_XE3_CORE)
target_sources(${L0_STATIC_LIB_NAME} PRIVATE
${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt
${CMAKE_CURRENT_SOURCE_DIR}/cmdlist_xe3_core.cpp
${CMAKE_CURRENT_SOURCE_DIR}/cmdlist_xe3_core.h
${CMAKE_CURRENT_SOURCE_DIR}/enable_family_full_l0_xe3_core.cpp
${CMAKE_CURRENT_SOURCE_DIR}/image_xe3_core.inl
${CMAKE_CURRENT_SOURCE_DIR}/l0_gfx_core_helper_xe3_core.cpp
)
add_subdirectories()
endif()

View File

@@ -0,0 +1,24 @@
/*
* Copyright (C) 2025 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#include "shared/source/xe3_core/hw_cmds_base.h"
#include "shared/source/xe3_core/hw_info_xe3_core.h"
#include "level_zero/core/source/cmdlist/cmdlist_hw.inl"
#include "level_zero/core/source/cmdlist/cmdlist_hw_immediate.inl"
#include "level_zero/core/source/cmdlist/cmdlist_hw_xe2_hpg_and_later.inl"
#include "level_zero/core/source/cmdlist/cmdlist_hw_xe_hpc_and_later.inl"
#include "level_zero/core/source/cmdlist/cmdlist_hw_xehp_and_later.inl"
#include "cmdlist_extended.inl"
namespace L0 {
template struct CommandListCoreFamily<IGFX_XE3_CORE>;
template struct CommandListCoreFamilyImmediate<IGFX_XE3_CORE>;
} // namespace L0

View File

@@ -0,0 +1,24 @@
/*
* Copyright (C) 2025 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#pragma once
#include "shared/source/xe3_core/hw_cmds_base.h"
#include "level_zero/core/source/cmdlist/cmdlist_hw.h"
#include "level_zero/core/source/cmdlist/cmdlist_hw_immediate.h"
namespace L0 {
template <PRODUCT_FAMILY productFamily>
struct CommandListProductFamily : public CommandListCoreFamily<IGFX_XE3_CORE> {
using CommandListCoreFamily::CommandListCoreFamily;
};
template <PRODUCT_FAMILY gfxProductFamily>
struct CommandListImmediateProductFamily : public CommandListCoreFamilyImmediate<IGFX_XE3_CORE> {
using CommandListCoreFamilyImmediate::CommandListCoreFamilyImmediate;
};
} // namespace L0

View File

@@ -0,0 +1,24 @@
/*
* Copyright (C) 2025 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#include "shared/source/xe3_core/hw_cmds_base.h"
#include "level_zero/core/source/gfx_core_helpers/l0_gfx_core_helper.h"
#include "level_zero/core/source/helpers/l0_populate_factory.h"
namespace NEO {
using Family = Xe3CoreFamily;
struct EnableL0Xe3Core {
EnableL0Xe3Core() {
L0::populateFactoryTable<L0::L0GfxCoreHelperHw<Family>>();
}
};
static EnableL0Xe3Core enable;
} // namespace NEO

View File

@@ -0,0 +1,11 @@
/*
* Copyright (C) 2025 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#include "shared/source/xe3_core/hw_cmds_base.h"
#include "shared/source/xe3_core/hw_info_xe3_core.h"
#include "level_zero/core/source/image/image_hw.inl"

View File

@@ -0,0 +1,29 @@
/*
* Copyright (C) 2025 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#include "shared/source/xe3_core/hw_cmds_base.h"
#include "level_zero/core/source/gfx_core_helpers/l0_gfx_core_helper_base.inl"
#include "level_zero/core/source/gfx_core_helpers/l0_gfx_core_helper_pvc_and_later.inl"
#include "level_zero/core/source/gfx_core_helpers/l0_gfx_core_helper_xe2_hpg_and_later.inl"
#include "level_zero/core/source/gfx_core_helpers/l0_gfx_core_helper_xe3_and_later.inl"
#include "level_zero/core/source/gfx_core_helpers/l0_gfx_core_helper_xehp_and_later.inl"
#include "level_zero/core/source/helpers/l0_populate_factory.h"
namespace L0 {
using Family = NEO::Xe3CoreFamily;
static auto gfxCore = IGFX_XE3_CORE;
#include "level_zero/core/source/helpers/l0_gfx_core_helper_factory_init.inl"
template <>
uint32_t L0GfxCoreHelperHw<Family>::getGrfRegisterCount(uint32_t *regPtr) const {
return (regPtr[4] & 0x1FF);
}
template class L0GfxCoreHelperHw<Family>;
} // namespace L0

View File

@@ -0,0 +1,17 @@
#
# Copyright (C) 2025 Intel Corporation
#
# SPDX-License-Identifier: MIT
#
if(SUPPORT_PTL)
target_sources(${L0_STATIC_LIB_NAME} PRIVATE
${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt
${CMAKE_CURRENT_SOURCE_DIR}/cmdlist_ptl.cpp
${CMAKE_CURRENT_SOURCE_DIR}/cmdqueue_ptl.cpp
${CMAKE_CURRENT_SOURCE_DIR}/image_ptl.cpp
${CMAKE_CURRENT_SOURCE_DIR}/kernel_ptl.cpp
${CMAKE_CURRENT_SOURCE_DIR}/sampler_ptl.cpp
)
add_subdirectories()
endif()

View File

@@ -0,0 +1,20 @@
/*
* Copyright (C) 2025 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#include "shared/source/xe3_core/hw_info_xe3_core.h"
#include "level_zero/core/source/xe3_core/cmdlist_xe3_core.h"
namespace L0 {
static CommandListPopulateFactory<IGFX_PTL, CommandListProductFamily<IGFX_PTL>>
populatePTL;
static CommandListImmediatePopulateFactory<IGFX_PTL, CommandListImmediateProductFamily<IGFX_PTL>>
populatePTLImmediate;
} // namespace L0

View File

@@ -0,0 +1,19 @@
/*
* Copyright (C) 2025 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#include "shared/source/xe3_core/hw_cmds_base.h"
#include "shared/source/xe3_core/hw_info_xe3_core.h"
#include "level_zero/core/source/cmdqueue/cmdqueue_hw.inl"
#include "level_zero/core/source/cmdqueue/cmdqueue_xe_hp_core_and_later.inl"
namespace L0 {
template struct CommandQueueHw<IGFX_XE3_CORE>;
static CommandQueuePopulateFactory<IGFX_PTL, CommandQueueHw<IGFX_XE3_CORE>>
populatePTL;
} // namespace L0

View File

@@ -0,0 +1,23 @@
/*
* Copyright (C) 2025 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#include "level_zero/core/source/xe3_core/image_xe3_core.inl"
namespace L0 {
template <>
struct ImageProductFamily<IGFX_PTL> : public ImageCoreFamily<IGFX_XE3_CORE> {
using ImageCoreFamily::ImageCoreFamily;
ze_result_t initialize(Device *device, const ze_image_desc_t *desc) override {
return ImageCoreFamily<IGFX_XE3_CORE>::initialize(device, desc);
};
};
static ImagePopulateFactory<IGFX_PTL, ImageProductFamily<IGFX_PTL>> populatePTL;
} // namespace L0

View File

@@ -0,0 +1,17 @@
/*
* Copyright (C) 2025 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#include "shared/source/xe3_core/hw_cmds_base.h"
#include "shared/source/xe3_core/hw_info_xe3_core.h"
#include "level_zero/core/source/kernel/kernel_hw.h"
namespace L0 {
static KernelPopulateFactory<IGFX_PTL, KernelHw<IGFX_XE3_CORE>> populatePTL;
} // namespace L0

View File

@@ -0,0 +1,22 @@
/*
* Copyright (C) 2025 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#include "shared/source/xe3_core/hw_cmds_base.h"
#include "shared/source/xe3_core/hw_info_xe3_core.h"
#include "level_zero/core/source/sampler/sampler_hw.inl"
namespace L0 {
template <>
struct SamplerProductFamily<IGFX_PTL> : public SamplerCoreFamily<IGFX_XE3_CORE> {
using SamplerCoreFamily::SamplerCoreFamily;
};
static SamplerPopulateFactory<IGFX_PTL, SamplerProductFamily<IGFX_PTL>> populatePTL;
} // namespace L0

View File

@@ -0,0 +1,12 @@
#
# Copyright (C) 2025 Intel Corporation
#
# SPDX-License-Identifier: MIT
#
if(TESTS_XE3_CORE)
target_sources(${TARGET_NAME} PRIVATE
${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt
${CMAKE_CURRENT_SOURCE_DIR}/test_variable_register_per_thread_xe3_core.cpp
)
endif()

View File

@@ -0,0 +1,106 @@
/*
* Copyright (C) 2025 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#include "shared/source/helpers/file_io.h"
#include "shared/test/common/helpers/default_hw_info.h"
#include "shared/test/common/helpers/test_files.h"
#include "shared/test/common/test_macros/hw_test.h"
#include "shared/test/common/test_macros/test.h"
#include "level_zero/core/source/cmdqueue/cmdqueue.h"
#include "level_zero/core/source/context/context_imp.h"
#include "level_zero/core/source/driver/driver_handle_imp.h"
#include "level_zero/core/source/gfx_core_helpers/l0_gfx_core_helper.h"
#include "level_zero/core/test/aub_tests/fixtures/aub_fixture.h"
#include "level_zero/core/test/unit_tests/mocks/mock_cmdlist.h"
#include "level_zero/driver_experimental/zex_module.h"
namespace L0::ult {
struct AUBVariableRegisterPerThreadL0 : Test<AUBFixtureL0> {
std::vector<uint32_t> getGrfSizes(ze_device_handle_t device) {
ze_device_module_properties_t deviceModuleProperties{};
zex_device_module_register_file_exp_t deviceModuleRegisterFile{};
deviceModuleProperties.pNext = &deviceModuleRegisterFile;
EXPECT_EQ(ZE_RESULT_SUCCESS, zeDeviceGetModuleProperties(device, &deviceModuleProperties));
std::vector<uint32_t> result(deviceModuleRegisterFile.registerFileSizesCount);
deviceModuleRegisterFile.registerFileSizes = result.data();
EXPECT_EQ(ZE_RESULT_SUCCESS, zeDeviceGetModuleProperties(device, &deviceModuleProperties));
return result;
}
void *allocateDeviceMemory(ze_context_handle_t context, ze_device_handle_t device, size_t size, size_t alignment) {
void *result = nullptr;
ze_device_mem_alloc_desc_t descriptor{};
descriptor.stype = ZE_STRUCTURE_TYPE_DEVICE_MEM_ALLOC_DESC;
EXPECT_EQ(ZE_RESULT_SUCCESS, zeMemAllocDevice(context, &descriptor, size, alignment, device, &result));
return result;
}
};
XE3_CORETEST_F(AUBVariableRegisterPerThreadL0, givenZeOptRegisterFileSizeOptionWhenExecutingKernelThenCorrectValuesAreReturned) {
constexpr auto bufferSize = 256u;
const auto grfSizes = getGrfSizes(device);
const auto &expectedGrfSizes = device->getProductHelper().getSupportedNumGrfs(device->getNEODevice()->getReleaseHelper());
EXPECT_NE(0u, grfSizes.size());
EXPECT_EQ(expectedGrfSizes, grfSizes);
for (const auto &grfSize : grfSizes) {
std::string filename = "grf_" + std::to_string(grfSize) + "_kernel_variable_register_per_thread";
std::string buildFlags = "-ze-exp-register-file-size " + std::to_string(grfSize);
ze_module_handle_t module = createModuleFromFile(filename, context, device, buildFlags);
ASSERT_NE(nullptr, module);
ze_kernel_handle_t kernel;
ze_kernel_desc_t kernelDescriptor{};
kernelDescriptor.stype = ZE_STRUCTURE_TYPE_KERNEL_DESC;
kernelDescriptor.pKernelName = "kernelVariableRegisterPerThread";
EXPECT_EQ(ZE_RESULT_SUCCESS, zeKernelCreate(module, &kernelDescriptor, &kernel));
const auto numGrfRequired = Kernel::fromHandle(kernel)->getKernelDescriptor().kernelAttributes.numGrfRequired;
EXPECT_EQ(grfSize, numGrfRequired);
ze_command_list_desc_t commandListDescriptor{};
commandListDescriptor.stype = ZE_STRUCTURE_TYPE_COMMAND_LIST_DESC;
ze_command_list_handle_t commandList{};
EXPECT_EQ(ZE_RESULT_SUCCESS, zeCommandListCreate(context, device, &commandListDescriptor, &commandList));
const std::vector<int32_t> input(bufferSize, 1);
const std::vector<int32_t> expectedOutput(bufferSize, 2);
auto *inputBuffer = allocateDeviceMemory(context, device, bufferSize, 1u);
auto *outputBuffer = allocateDeviceMemory(context, device, bufferSize, 1u);
EXPECT_EQ(ZE_RESULT_SUCCESS, zeCommandListAppendMemoryCopy(commandList, inputBuffer, input.data(), bufferSize, nullptr, 0u, nullptr));
EXPECT_EQ(ZE_RESULT_SUCCESS, zeCommandListAppendBarrier(commandList, nullptr, 0u, nullptr));
EXPECT_EQ(ZE_RESULT_SUCCESS, zeKernelSetArgumentValue(kernel, 0u, sizeof(inputBuffer), &inputBuffer));
EXPECT_EQ(ZE_RESULT_SUCCESS, zeKernelSetArgumentValue(kernel, 1u, sizeof(outputBuffer), &outputBuffer));
EXPECT_EQ(ZE_RESULT_SUCCESS, zeKernelSetGroupSize(kernel, bufferSize, 1u, 1u));
ze_group_count_t groupCount{1u, 1u, 1u};
EXPECT_EQ(ZE_RESULT_SUCCESS, zeCommandListAppendLaunchKernel(commandList, kernel, &groupCount, nullptr, 0u, nullptr));
EXPECT_EQ(ZE_RESULT_SUCCESS, zeCommandListAppendBarrier(commandList, nullptr, 0u, nullptr));
EXPECT_EQ(ZE_RESULT_SUCCESS, zeCommandListClose(commandList));
EXPECT_EQ(ZE_RESULT_SUCCESS, zeCommandQueueExecuteCommandLists(pCmdq, 1u, &commandList, nullptr));
EXPECT_EQ(ZE_RESULT_SUCCESS, zeCommandQueueSynchronize(pCmdq, UINT64_MAX));
expectMemory<FamilyType>(outputBuffer, expectedOutput.data(), bufferSize);
EXPECT_EQ(ZE_RESULT_SUCCESS, zeMemFree(context, inputBuffer));
EXPECT_EQ(ZE_RESULT_SUCCESS, zeMemFree(context, outputBuffer));
EXPECT_EQ(ZE_RESULT_SUCCESS, zeCommandListDestroy(commandList));
EXPECT_EQ(ZE_RESULT_SUCCESS, zeKernelDestroy(kernel));
EXPECT_EQ(ZE_RESULT_SUCCESS, zeModuleDestroy(module));
}
}
} // namespace L0::ult

View File

@@ -0,0 +1,16 @@
#
# Copyright (C) 2025 Intel Corporation
#
# SPDX-License-Identifier: MIT
#
if(TESTS_XE3_CORE)
target_sources(${TARGET_NAME} PRIVATE
${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt
${CMAKE_CURRENT_SOURCE_DIR}/excludes_l0_xe3_core.cpp
${CMAKE_CURRENT_SOURCE_DIR}/test_cmdlist_xe3_core.cpp
${CMAKE_CURRENT_SOURCE_DIR}/test_device_xe3_core.cpp
${CMAKE_CURRENT_SOURCE_DIR}/test_l0_gfx_core_helper_xe3_core.cpp
${CMAKE_CURRENT_SOURCE_DIR}/test_module_xe3_core.cpp
)
endif()

View File

@@ -0,0 +1,8 @@
/*
* Copyright (C) 2025 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#include "shared/test/common/test_macros/hw_test_base.h"

View File

@@ -0,0 +1,488 @@
/*
* Copyright (C) 2025 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#include "shared/source/command_stream/scratch_space_controller.h"
#include "shared/source/helpers/compiler_product_helper.h"
#include "shared/source/kernel/implicit_args_helper.h"
#include "shared/source/os_interface/product_helper.h"
#include "shared/test/common/cmd_parse/gen_cmd_parse.h"
#include "shared/test/common/helpers/debug_manager_state_restore.h"
#include "shared/test/common/helpers/unit_test_helper.h"
#include "shared/test/common/mocks/mock_compiler_product_helper.h"
#include "shared/test/common/test_macros/hw_test.h"
#include "level_zero/core/source/event/event.h"
#include "level_zero/core/test/unit_tests/fixtures/module_fixture.h"
#include "level_zero/core/test/unit_tests/mocks/mock_cmdlist.h"
#include "level_zero/core/test/unit_tests/mocks/mock_cmdqueue.h"
#include "level_zero/core/test/unit_tests/mocks/mock_module.h"
namespace L0 {
namespace ult {
HWTEST_EXCLUDE_PRODUCT(AppendMemoryCopyTests, givenCopyCommandListWhenTimestampPassedToMemoryCopyThenAppendProfilingCalledOnceBeforeAndAfterCommand_MatchAny, IGFX_XE3_CORE);
HWTEST_EXCLUDE_PRODUCT(AppendMemoryCopyTests, givenCopyCommandListWhenTimestampPassedToMemoryCopyRegionBlitThenTimeStampRegistersAreAdded_MatchAny, IGFX_XE3_CORE);
using CommandListAppendLaunchKernelXe3 = Test<ModuleFixture>;
HWTEST2_F(CommandListAppendLaunchKernelXe3, givenVariousKernelsWhenUpdateStreamPropertiesIsCalledThenRequiredStateFinalStateAndCommandsToPatchAreCorrectlySet, IsXe3Core) {
DebugManagerStateRestore restorer;
debugManager.flags.AllowPatchingVfeStateInCommandLists.set(1);
Mock<::L0::KernelImp> defaultKernel;
auto pMockModule1 = std::unique_ptr<Module>(new Mock<Module>(device, nullptr));
defaultKernel.module = pMockModule1.get();
Mock<::L0::KernelImp> cooperativeKernel;
auto pMockModule2 = std::unique_ptr<Module>(new Mock<Module>(device, nullptr));
cooperativeKernel.module = pMockModule2.get();
cooperativeKernel.immutableData.kernelDescriptor->kernelAttributes.flags.usesSyncBuffer = true;
auto pCommandList = std::make_unique<WhiteBox<::L0::CommandListCoreFamily<gfxCoreFamily>>>();
auto result = pCommandList->initialize(device, NEO::EngineGroupType::compute, 0u);
ASSERT_EQ(ZE_RESULT_SUCCESS, result);
EXPECT_EQ(0u, pCommandList->commandsToPatch.size());
const ze_group_count_t launchKernelArgs = {};
pCommandList->updateStreamProperties(defaultKernel, false, launchKernelArgs, false);
EXPECT_EQ(0u, pCommandList->commandsToPatch.size());
pCommandList->reset();
pCommandList->updateStreamProperties(cooperativeKernel, true, launchKernelArgs, false);
pCommandList->updateStreamProperties(cooperativeKernel, true, launchKernelArgs, false);
EXPECT_EQ(0u, pCommandList->commandsToPatch.size());
pCommandList->reset();
pCommandList->updateStreamProperties(defaultKernel, false, launchKernelArgs, false);
pCommandList->updateStreamProperties(cooperativeKernel, true, launchKernelArgs, false);
EXPECT_EQ(0u, pCommandList->commandsToPatch.size());
pCommandList->reset();
pCommandList->updateStreamProperties(cooperativeKernel, true, launchKernelArgs, false);
pCommandList->updateStreamProperties(defaultKernel, false, launchKernelArgs, false);
pCommandList->updateStreamProperties(cooperativeKernel, true, launchKernelArgs, false);
EXPECT_EQ(0u, pCommandList->commandsToPatch.size());
pCommandList->reset();
pCommandList->updateStreamProperties(defaultKernel, false, launchKernelArgs, false);
pCommandList->updateStreamProperties(defaultKernel, false, launchKernelArgs, false);
EXPECT_EQ(0u, pCommandList->commandsToPatch.size());
pCommandList->reset();
EXPECT_EQ(0u, pCommandList->commandsToPatch.size());
}
HWTEST2_F(CommandListAppendLaunchKernelXe3, givenVariousKernelsAndPatchingDisallowedWhenUpdateStreamPropertiesIsCalledThenCommandsToPatchAreEmpty, IsXe3Core) {
DebugManagerStateRestore restorer;
Mock<::L0::KernelImp> defaultKernel;
auto pMockModule1 = std::unique_ptr<Module>(new Mock<Module>(device, nullptr));
defaultKernel.module = pMockModule1.get();
Mock<::L0::KernelImp> cooperativeKernel;
auto pMockModule2 = std::unique_ptr<Module>(new Mock<Module>(device, nullptr));
cooperativeKernel.module = pMockModule2.get();
cooperativeKernel.immutableData.kernelDescriptor->kernelAttributes.flags.usesSyncBuffer = true;
auto pCommandList = std::make_unique<WhiteBox<::L0::CommandListCoreFamily<gfxCoreFamily>>>();
auto result = pCommandList->initialize(device, NEO::EngineGroupType::compute, 0u);
ASSERT_EQ(ZE_RESULT_SUCCESS, result);
const ze_group_count_t launchKernelArgs = {};
pCommandList->updateStreamProperties(defaultKernel, false, launchKernelArgs, false);
pCommandList->updateStreamProperties(cooperativeKernel, true, launchKernelArgs, false);
EXPECT_EQ(0u, pCommandList->commandsToPatch.size());
pCommandList->reset();
debugManager.flags.AllowPatchingVfeStateInCommandLists.set(1);
pCommandList->updateStreamProperties(defaultKernel, false, launchKernelArgs, false);
pCommandList->updateStreamProperties(cooperativeKernel, true, launchKernelArgs, false);
EXPECT_EQ(0u, pCommandList->commandsToPatch.size());
pCommandList->reset();
}
struct LocalMemoryModuleFixture : public ModuleFixture {
void setUp() {
debugManager.flags.EnableLocalMemory.set(1);
ModuleFixture::setUp();
}
DebugManagerStateRestore restore;
};
using CommandListAppendLaunchKernelXe3Core = Test<LocalMemoryModuleFixture>;
HWTEST2_F(CommandListAppendLaunchKernelXe3Core, givenAppendKernelWhenKernelNotUsingSystemMemoryAllocationsAndEventNotHostSignalScopeThenExpectsNoSystemFenceUsed, IsXe3Core) {
using DefaultWalkerType = typename FamilyType::DefaultWalkerType;
ze_result_t result = ZE_RESULT_SUCCESS;
constexpr size_t size = 4096u;
constexpr size_t alignment = 4096u;
void *ptr = nullptr;
ze_device_mem_alloc_desc_t deviceDesc = {};
result = context->allocDeviceMem(device->toHandle(),
&deviceDesc,
size, alignment, &ptr);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
EXPECT_NE(nullptr, ptr);
Mock<::L0::KernelImp> kernel;
auto mockModule = std::unique_ptr<Module>(new Mock<Module>(device, nullptr));
kernel.module = mockModule.get();
auto allocData = driverHandle->getSvmAllocsManager()->getSVMAlloc(ptr);
ASSERT_NE(nullptr, allocData);
auto kernelAllocation = allocData->gpuAllocations.getGraphicsAllocation(device->getRootDeviceIndex());
ASSERT_NE(nullptr, kernelAllocation);
kernel.argumentsResidencyContainer.push_back(kernelAllocation);
ze_event_pool_desc_t eventPoolDesc = {};
eventPoolDesc.count = 1;
auto eventPool = std::unique_ptr<L0::EventPool>(L0::EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, result));
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
ze_event_desc_t eventDesc = {};
eventDesc.index = 0;
eventDesc.signal = ZE_EVENT_SCOPE_FLAG_DEVICE;
eventDesc.wait = 0;
auto event = std::unique_ptr<L0::Event>(L0::Event::create<typename FamilyType::TimestampPacketType>(eventPool.get(), &eventDesc, device));
kernel.setGroupSize(1, 1, 1);
ze_group_count_t groupCount{8, 1, 1};
auto commandList = std::make_unique<WhiteBox<::L0::CommandListCoreFamily<gfxCoreFamily>>>();
result = commandList->initialize(device, NEO::EngineGroupType::compute, 0u);
ASSERT_EQ(ZE_RESULT_SUCCESS, result);
CmdListKernelLaunchParams launchParams = {};
result = commandList->appendLaunchKernelWithParams(&kernel, groupCount, event.get(), launchParams);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
GenCmdList commands;
ASSERT_TRUE(CmdParse<FamilyType>::parseCommandBuffer(
commands,
commandList->getCmdContainer().getCommandStream()->getCpuBase(),
commandList->getCmdContainer().getCommandStream()->getUsed()));
auto itor = find<DefaultWalkerType *>(commands.begin(), commands.end());
ASSERT_NE(itor, commands.end());
auto walkerCmd = genCmdCast<DefaultWalkerType *>(*itor);
auto &postSyncData = walkerCmd->getPostSync();
EXPECT_FALSE(postSyncData.getSystemMemoryFenceRequest());
result = context->freeMem(ptr);
ASSERT_EQ(result, ZE_RESULT_SUCCESS);
}
HWTEST2_F(CommandListAppendLaunchKernelXe3Core,
givenAppendKernelWhenKernelUsingUsmHostMemoryAllocationsAndEventNotHostSignalScopeThenExpectsNoSystemFenceUsed, IsXe3Core) {
using DefaultWalkerType = typename FamilyType::DefaultWalkerType;
ze_result_t result = ZE_RESULT_SUCCESS;
constexpr size_t size = 4096u;
constexpr size_t alignment = 4096u;
void *ptr = nullptr;
ze_host_mem_alloc_desc_t hostDesc = {};
result = context->allocHostMem(&hostDesc, size, alignment, &ptr);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
EXPECT_NE(nullptr, ptr);
Mock<::L0::KernelImp> kernel;
auto mockModule = std::unique_ptr<Module>(new Mock<Module>(device, nullptr));
kernel.module = mockModule.get();
auto allocData = driverHandle->getSvmAllocsManager()->getSVMAlloc(ptr);
ASSERT_NE(nullptr, allocData);
auto kernelAllocation = allocData->gpuAllocations.getGraphicsAllocation(device->getRootDeviceIndex());
ASSERT_NE(nullptr, kernelAllocation);
kernel.argumentsResidencyContainer.push_back(kernelAllocation);
ze_event_pool_desc_t eventPoolDesc = {};
eventPoolDesc.count = 1;
auto eventPool = std::unique_ptr<L0::EventPool>(L0::EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, result));
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
ze_event_desc_t eventDesc = {};
eventDesc.index = 0;
eventDesc.signal = ZE_EVENT_SCOPE_FLAG_DEVICE;
eventDesc.wait = 0;
auto event = std::unique_ptr<L0::Event>(L0::Event::create<typename FamilyType::TimestampPacketType>(eventPool.get(), &eventDesc, device));
kernel.setGroupSize(1, 1, 1);
ze_group_count_t groupCount{8, 1, 1};
auto commandList = std::make_unique<WhiteBox<::L0::CommandListCoreFamily<gfxCoreFamily>>>();
result = commandList->initialize(device, NEO::EngineGroupType::compute, 0u);
ASSERT_EQ(ZE_RESULT_SUCCESS, result);
CmdListKernelLaunchParams launchParams = {};
result = commandList->appendLaunchKernelWithParams(&kernel, groupCount, event.get(), launchParams);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
GenCmdList commands;
ASSERT_TRUE(CmdParse<FamilyType>::parseCommandBuffer(
commands,
commandList->getCmdContainer().getCommandStream()->getCpuBase(),
commandList->getCmdContainer().getCommandStream()->getUsed()));
auto itor = find<DefaultWalkerType *>(commands.begin(), commands.end());
ASSERT_NE(itor, commands.end());
auto walkerCmd = genCmdCast<DefaultWalkerType *>(*itor);
auto &postSyncData = walkerCmd->getPostSync();
EXPECT_FALSE(postSyncData.getSystemMemoryFenceRequest());
result = context->freeMem(ptr);
ASSERT_EQ(result, ZE_RESULT_SUCCESS);
}
HWTEST2_F(CommandListAppendLaunchKernelXe3Core,
givenAppendKernelWhenMigrationOnComputeUsingUsmSharedCpuMemoryAllocationsAndEventNotHostSignalScopeThenExpectsNoSystemFenceUsed, IsXe3Core) {
using DefaultWalkerType = typename FamilyType::DefaultWalkerType;
ze_result_t result = ZE_RESULT_SUCCESS;
constexpr size_t size = 4096u;
constexpr size_t alignment = 4096u;
void *ptr = nullptr;
ze_host_mem_alloc_desc_t hostDesc = {};
ze_device_mem_alloc_desc_t deviceDesc = {};
result = context->allocSharedMem(device->toHandle(), &deviceDesc, &hostDesc, size, alignment, &ptr);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
EXPECT_NE(nullptr, ptr);
auto allocData = driverHandle->getSvmAllocsManager()->getSVMAlloc(ptr);
ASSERT_NE(nullptr, allocData);
auto dstAllocation = allocData->cpuAllocation;
ASSERT_NE(nullptr, dstAllocation);
auto srcAllocation = allocData->gpuAllocations.getGraphicsAllocation(device->getRootDeviceIndex());
ASSERT_NE(nullptr, srcAllocation);
auto commandList = std::make_unique<WhiteBox<::L0::CommandListCoreFamily<gfxCoreFamily>>>();
result = commandList->initialize(device, NEO::EngineGroupType::compute, 0u);
ASSERT_EQ(ZE_RESULT_SUCCESS, result);
result = commandList->appendPageFaultCopy(dstAllocation, srcAllocation, size, false);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
EXPECT_TRUE(commandList->usedKernelLaunchParams.isBuiltInKernel);
EXPECT_FALSE(commandList->usedKernelLaunchParams.isKernelSplitOperation);
EXPECT_TRUE(commandList->usedKernelLaunchParams.isDestinationAllocationInSystemMemory);
GenCmdList commands;
ASSERT_TRUE(CmdParse<FamilyType>::parseCommandBuffer(
commands,
commandList->getCmdContainer().getCommandStream()->getCpuBase(),
commandList->getCmdContainer().getCommandStream()->getUsed()));
auto itor = find<DefaultWalkerType *>(commands.begin(), commands.end());
ASSERT_NE(itor, commands.end());
auto walkerCmd = genCmdCast<DefaultWalkerType *>(*itor);
auto &postSyncData = walkerCmd->getPostSync();
EXPECT_FALSE(postSyncData.getSystemMemoryFenceRequest());
result = context->freeMem(ptr);
ASSERT_EQ(result, ZE_RESULT_SUCCESS);
}
HWTEST2_F(CommandListAppendLaunchKernelXe3Core,
givenAppendKernelWhenKernelUsingIndirectSystemMemoryAllocationsAndEventNotHostSignalScopeThenExpectsNoSystemFenceUsed, IsXe3Core) {
using DefaultWalkerType = typename FamilyType::DefaultWalkerType;
ze_result_t result = ZE_RESULT_SUCCESS;
constexpr size_t size = 4096u;
constexpr size_t alignment = 4096u;
void *ptr = nullptr;
ze_device_mem_alloc_desc_t deviceDesc = {};
result = context->allocDeviceMem(device->toHandle(),
&deviceDesc,
size, alignment, &ptr);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
EXPECT_NE(nullptr, ptr);
Mock<::L0::KernelImp> kernel;
auto mockModule = std::unique_ptr<Module>(new Mock<Module>(device, nullptr));
kernel.module = mockModule.get();
auto allocData = driverHandle->getSvmAllocsManager()->getSVMAlloc(ptr);
ASSERT_NE(nullptr, allocData);
auto kernelAllocation = allocData->gpuAllocations.getGraphicsAllocation(device->getRootDeviceIndex());
ASSERT_NE(nullptr, kernelAllocation);
kernel.argumentsResidencyContainer.push_back(kernelAllocation);
kernel.unifiedMemoryControls.indirectHostAllocationsAllowed = true;
ze_event_pool_desc_t eventPoolDesc = {};
eventPoolDesc.count = 1;
auto eventPool = std::unique_ptr<L0::EventPool>(L0::EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, result));
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
ze_event_desc_t eventDesc = {};
eventDesc.index = 0;
eventDesc.signal = ZE_EVENT_SCOPE_FLAG_DEVICE;
eventDesc.wait = 0;
auto event = std::unique_ptr<L0::Event>(L0::Event::create<typename FamilyType::TimestampPacketType>(eventPool.get(), &eventDesc, device));
kernel.setGroupSize(1, 1, 1);
ze_group_count_t groupCount{8, 1, 1};
auto commandList = std::make_unique<WhiteBox<::L0::CommandListCoreFamily<gfxCoreFamily>>>();
result = commandList->initialize(device, NEO::EngineGroupType::compute, 0u);
ASSERT_EQ(ZE_RESULT_SUCCESS, result);
CmdListKernelLaunchParams launchParams = {};
result = commandList->appendLaunchKernelWithParams(&kernel, groupCount, event.get(), launchParams);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
GenCmdList commands;
ASSERT_TRUE(CmdParse<FamilyType>::parseCommandBuffer(
commands,
commandList->getCmdContainer().getCommandStream()->getCpuBase(),
commandList->getCmdContainer().getCommandStream()->getUsed()));
auto itor = find<DefaultWalkerType *>(commands.begin(), commands.end());
ASSERT_NE(itor, commands.end());
auto walkerCmd = genCmdCast<DefaultWalkerType *>(*itor);
auto &postSyncData = walkerCmd->getPostSync();
EXPECT_FALSE(postSyncData.getSystemMemoryFenceRequest());
result = context->freeMem(ptr);
ASSERT_EQ(result, ZE_RESULT_SUCCESS);
}
HWTEST2_F(CommandListAppendLaunchKernelXe3Core,
givenAppendKernelWhenKernelUsingDeviceMemoryAllocationsAndEventHostSignalScopeThenExpectsNoSystemFenceUsed, IsXe3Core) {
using DefaultWalkerType = typename FamilyType::DefaultWalkerType;
ze_result_t result = ZE_RESULT_SUCCESS;
constexpr size_t size = 4096u;
constexpr size_t alignment = 4096u;
void *ptr = nullptr;
ze_device_mem_alloc_desc_t deviceDesc = {};
result = context->allocDeviceMem(device->toHandle(),
&deviceDesc,
size, alignment, &ptr);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
EXPECT_NE(nullptr, ptr);
Mock<::L0::KernelImp> kernel;
auto mockModule = std::unique_ptr<Module>(new Mock<Module>(device, nullptr));
kernel.module = mockModule.get();
auto allocData = driverHandle->getSvmAllocsManager()->getSVMAlloc(ptr);
ASSERT_NE(nullptr, allocData);
auto kernelAllocation = allocData->gpuAllocations.getGraphicsAllocation(device->getRootDeviceIndex());
ASSERT_NE(nullptr, kernelAllocation);
kernel.argumentsResidencyContainer.push_back(kernelAllocation);
ze_event_pool_desc_t eventPoolDesc = {};
eventPoolDesc.count = 1;
auto eventPool = std::unique_ptr<L0::EventPool>(L0::EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, result));
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
ze_event_desc_t eventDesc = {};
eventDesc.index = 0;
eventDesc.signal = ZE_EVENT_SCOPE_FLAG_HOST;
eventDesc.wait = 0;
auto event = std::unique_ptr<L0::Event>(L0::Event::create<typename FamilyType::TimestampPacketType>(eventPool.get(), &eventDesc, device));
kernel.setGroupSize(1, 1, 1);
ze_group_count_t groupCount{8, 1, 1};
auto commandList = std::make_unique<WhiteBox<::L0::CommandListCoreFamily<gfxCoreFamily>>>();
result = commandList->initialize(device, NEO::EngineGroupType::compute, 0u);
ASSERT_EQ(ZE_RESULT_SUCCESS, result);
CmdListKernelLaunchParams launchParams = {};
result = commandList->appendLaunchKernelWithParams(&kernel, groupCount, event.get(), launchParams);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
GenCmdList commands;
ASSERT_TRUE(CmdParse<FamilyType>::parseCommandBuffer(
commands,
commandList->getCmdContainer().getCommandStream()->getCpuBase(),
commandList->getCmdContainer().getCommandStream()->getUsed()));
auto itor = find<DefaultWalkerType *>(commands.begin(), commands.end());
ASSERT_NE(itor, commands.end());
auto walkerCmd = genCmdCast<DefaultWalkerType *>(*itor);
auto &postSyncData = walkerCmd->getPostSync();
EXPECT_FALSE(postSyncData.getSystemMemoryFenceRequest());
result = context->freeMem(ptr);
ASSERT_EQ(result, ZE_RESULT_SUCCESS);
}
HWTEST2_F(CommandListAppendLaunchKernelXe3Core,
givenAppendKernelWhenKernelUsingUsmHostMemoryAllocationsAndEventHostSignalScopeThenExpectsSystemFenceUsed, IsXe3Core) {
using DefaultWalkerType = typename FamilyType::DefaultWalkerType;
ze_result_t result = ZE_RESULT_SUCCESS;
constexpr size_t size = 4096u;
constexpr size_t alignment = 4096u;
void *ptr = nullptr;
ze_host_mem_alloc_desc_t hostDesc = {};
result = context->allocHostMem(&hostDesc, size, alignment, &ptr);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
EXPECT_NE(nullptr, ptr);
Mock<::L0::KernelImp> kernel;
auto mockModule = std::unique_ptr<Module>(new Mock<Module>(device, nullptr));
kernel.module = mockModule.get();
auto allocData = driverHandle->getSvmAllocsManager()->getSVMAlloc(ptr);
ASSERT_NE(nullptr, allocData);
auto kernelAllocation = allocData->gpuAllocations.getGraphicsAllocation(device->getRootDeviceIndex());
ASSERT_NE(nullptr, kernelAllocation);
kernel.argumentsResidencyContainer.push_back(kernelAllocation);
ze_event_pool_desc_t eventPoolDesc = {};
eventPoolDesc.count = 1;
auto eventPool = std::unique_ptr<L0::EventPool>(L0::EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, result));
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
ze_event_desc_t eventDesc = {};
eventDesc.index = 0;
eventDesc.signal = ZE_EVENT_SCOPE_FLAG_HOST;
eventDesc.wait = 0;
auto event = std::unique_ptr<L0::Event>(L0::Event::create<typename FamilyType::TimestampPacketType>(eventPool.get(), &eventDesc, device));
kernel.setGroupSize(1, 1, 1);
ze_group_count_t groupCount{8, 1, 1};
auto commandList = std::make_unique<WhiteBox<::L0::CommandListCoreFamily<gfxCoreFamily>>>();
result = commandList->initialize(device, NEO::EngineGroupType::compute, 0u);
ASSERT_EQ(ZE_RESULT_SUCCESS, result);
CmdListKernelLaunchParams launchParams = {};
result = commandList->appendLaunchKernelWithParams(&kernel, groupCount, event.get(), launchParams);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
GenCmdList commands;
ASSERT_TRUE(CmdParse<FamilyType>::parseCommandBuffer(
commands,
commandList->getCmdContainer().getCommandStream()->getCpuBase(),
commandList->getCmdContainer().getCommandStream()->getUsed()));
auto itor = find<DefaultWalkerType *>(commands.begin(), commands.end());
ASSERT_NE(itor, commands.end());
auto walkerCmd = genCmdCast<DefaultWalkerType *>(*itor);
auto &postSyncData = walkerCmd->getPostSync();
EXPECT_TRUE(postSyncData.getSystemMemoryFenceRequest());
result = context->freeMem(ptr);
ASSERT_EQ(result, ZE_RESULT_SUCCESS);
}
} // namespace ult
} // namespace L0

View File

@@ -0,0 +1,278 @@
/*
* Copyright (C) 2025 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#include "shared/test/common/mocks/mock_device.h"
#include "shared/test/common/test_macros/hw_test.h"
#include "level_zero/core/test/unit_tests/fixtures/device_fixture.h"
#include "level_zero/core/test/unit_tests/mocks/mock_device.h"
namespace L0 {
namespace ult {
using DeviceXe3CoreTest = Test<DeviceFixture>;
HWTEST2_F(DeviceXe3CoreTest, whenCallingGetMemoryPropertiesWithNonNullPtrThenPropertiesAreReturned, IsXe3Core) {
uint32_t count = 0;
ze_result_t res = device->getMemoryProperties(&count, nullptr);
EXPECT_EQ(res, ZE_RESULT_SUCCESS);
EXPECT_EQ(1u, count);
ze_device_memory_properties_t memProperties = {};
res = device->getMemoryProperties(&count, &memProperties);
EXPECT_EQ(res, ZE_RESULT_SUCCESS);
EXPECT_EQ(1u, count);
EXPECT_EQ(memProperties.maxClockRate, 0u);
EXPECT_EQ(memProperties.maxBusWidth, this->neoDevice->getDeviceInfo().addressBits);
EXPECT_EQ(memProperties.totalSize, this->neoDevice->getDeviceInfo().globalMemSize);
}
using CommandQueueGroupTest = Test<DeviceFixture>;
HWTEST2_F(CommandQueueGroupTest, givenNoBlitterSupportAndNoCCSThenOneQueueGroupIsReturned, IsXe3Core) {
const uint32_t rootDeviceIndex = 0u;
NEO::HardwareInfo hwInfo = *NEO::defaultHwInfo.get();
hwInfo.featureTable.flags.ftrCCSNode = false;
hwInfo.capabilityTable.blitterOperationsSupported = false;
auto *neoMockDevice = NEO::MockDevice::createWithNewExecutionEnvironment<NEO::MockDevice>(&hwInfo, rootDeviceIndex);
MockDeviceImp deviceImp(neoMockDevice, neoMockDevice->getExecutionEnvironment());
uint32_t count = 0;
ze_result_t res = deviceImp.getCommandQueueGroupProperties(&count, nullptr);
EXPECT_EQ(ZE_RESULT_SUCCESS, res);
EXPECT_GE(count, 1u);
}
HWTEST2_F(CommandQueueGroupTest, givenNoBlitterSupportAndCCSThenTwoQueueGroupsAreReturned, IsXe3Core) {
const uint32_t rootDeviceIndex = 0u;
NEO::HardwareInfo hwInfo = *NEO::defaultHwInfo.get();
hwInfo.featureTable.flags.ftrCCSNode = true;
hwInfo.capabilityTable.blitterOperationsSupported = false;
auto *neoMockDevice = NEO::MockDevice::createWithNewExecutionEnvironment<NEO::MockDevice>(&hwInfo, rootDeviceIndex);
MockDeviceImp deviceImp(neoMockDevice, neoMockDevice->getExecutionEnvironment());
uint32_t count = 0;
ze_result_t res = deviceImp.getCommandQueueGroupProperties(&count, nullptr);
EXPECT_EQ(ZE_RESULT_SUCCESS, res);
EXPECT_GE(count, 2u);
}
HWTEST2_F(CommandQueueGroupTest, givenBlitterSupportAndCCSThenFourQueueGroupsAreReturned, IsXe3Core) {
const uint32_t rootDeviceIndex = 0u;
NEO::HardwareInfo hwInfo = *NEO::defaultHwInfo.get();
hwInfo.featureTable.flags.ftrCCSNode = true;
hwInfo.capabilityTable.blitterOperationsSupported = true;
hwInfo.featureTable.ftrBcsInfo.set();
auto *neoMockDevice = NEO::MockDevice::createWithNewExecutionEnvironment<NEO::MockDevice>(&hwInfo, rootDeviceIndex);
MockDeviceImp deviceImp(neoMockDevice, neoMockDevice->getExecutionEnvironment());
uint32_t count = 0;
ze_result_t res = deviceImp.getCommandQueueGroupProperties(&count, nullptr);
EXPECT_EQ(ZE_RESULT_SUCCESS, res);
EXPECT_GE(count, 4u);
std::vector<ze_command_queue_group_properties_t> properties(count);
res = deviceImp.getCommandQueueGroupProperties(&count, properties.data());
EXPECT_EQ(ZE_RESULT_SUCCESS, res);
auto &engineGroups = neoMockDevice->getRegularEngineGroups();
for (uint32_t i = 0; i < count; i++) {
if (engineGroups[i].engineGroupType == NEO::EngineGroupType::renderCompute) {
EXPECT_TRUE(properties[i].flags & ZE_COMMAND_QUEUE_GROUP_PROPERTY_FLAG_COMPUTE);
EXPECT_TRUE(properties[i].flags & ZE_COMMAND_QUEUE_GROUP_PROPERTY_FLAG_COPY);
EXPECT_TRUE(properties[i].flags & ZE_COMMAND_QUEUE_GROUP_PROPERTY_FLAG_COOPERATIVE_KERNELS);
EXPECT_TRUE(properties[i].flags & ZE_COMMAND_QUEUE_GROUP_PROPERTY_FLAG_METRICS);
EXPECT_EQ(properties[i].numQueues, 1u);
EXPECT_EQ(properties[i].maxMemoryFillPatternSize, std::numeric_limits<size_t>::max());
} else if (engineGroups[i].engineGroupType == NEO::EngineGroupType::compute) {
EXPECT_TRUE(properties[i].flags & ZE_COMMAND_QUEUE_GROUP_PROPERTY_FLAG_COMPUTE);
EXPECT_TRUE(properties[i].flags & ZE_COMMAND_QUEUE_GROUP_PROPERTY_FLAG_COPY);
EXPECT_TRUE(properties[i].flags & ZE_COMMAND_QUEUE_GROUP_PROPERTY_FLAG_COOPERATIVE_KERNELS);
uint32_t numerOfCCSEnabled = hwInfo.gtSystemInfo.CCSInfo.NumberOfCCSEnabled;
EXPECT_EQ(properties[i].numQueues, numerOfCCSEnabled);
EXPECT_EQ(properties[i].maxMemoryFillPatternSize, std::numeric_limits<size_t>::max());
} else if (engineGroups[i].engineGroupType == NEO::EngineGroupType::copy) {
EXPECT_TRUE(properties[i].flags & ZE_COMMAND_QUEUE_GROUP_PROPERTY_FLAG_COPY);
EXPECT_EQ(properties[i].numQueues, 1u);
EXPECT_EQ(properties[i].maxMemoryFillPatternSize, sizeof(uint8_t));
} else if (engineGroups[i].engineGroupType == NEO::EngineGroupType::linkedCopy) {
EXPECT_TRUE(properties[i].flags & ZE_COMMAND_QUEUE_GROUP_PROPERTY_FLAG_COPY);
EXPECT_EQ(properties[i].numQueues, hwInfo.featureTable.ftrBcsInfo.count() - 1u);
EXPECT_EQ(properties[i].maxMemoryFillPatternSize, sizeof(uint8_t));
}
}
}
HWTEST2_F(CommandQueueGroupTest, givenBlitterSupportCCSAndLinkedBcsDisabledThenThreeQueueGroupsAreReturned, IsXe3Core) {
const uint32_t rootDeviceIndex = 0u;
NEO::HardwareInfo hwInfo = *NEO::defaultHwInfo.get();
hwInfo.featureTable.flags.ftrCCSNode = true;
hwInfo.capabilityTable.blitterOperationsSupported = true;
hwInfo.featureTable.ftrBcsInfo.set(0);
auto *neoMockDevice = NEO::MockDevice::createWithNewExecutionEnvironment<NEO::MockDevice>(&hwInfo, rootDeviceIndex);
MockDeviceImp deviceImp(neoMockDevice, neoMockDevice->getExecutionEnvironment());
uint32_t count = 0;
ze_result_t res = deviceImp.getCommandQueueGroupProperties(&count, nullptr);
EXPECT_EQ(ZE_RESULT_SUCCESS, res);
EXPECT_GE(count, 3u);
std::vector<ze_command_queue_group_properties_t> properties(count);
res = deviceImp.getCommandQueueGroupProperties(&count, properties.data());
EXPECT_EQ(ZE_RESULT_SUCCESS, res);
auto &engineGroups = neoMockDevice->getRegularEngineGroups();
for (uint32_t i = 0; i < count; i++) {
if (engineGroups[i].engineGroupType == NEO::EngineGroupType::renderCompute) {
EXPECT_TRUE(properties[i].flags & ZE_COMMAND_QUEUE_GROUP_PROPERTY_FLAG_COMPUTE);
EXPECT_TRUE(properties[i].flags & ZE_COMMAND_QUEUE_GROUP_PROPERTY_FLAG_COPY);
EXPECT_TRUE(properties[i].flags & ZE_COMMAND_QUEUE_GROUP_PROPERTY_FLAG_COOPERATIVE_KERNELS);
EXPECT_TRUE(properties[i].flags & ZE_COMMAND_QUEUE_GROUP_PROPERTY_FLAG_METRICS);
EXPECT_EQ(properties[i].numQueues, 1u);
EXPECT_EQ(properties[i].maxMemoryFillPatternSize, std::numeric_limits<size_t>::max());
} else if (engineGroups[i].engineGroupType == NEO::EngineGroupType::compute) {
EXPECT_TRUE(properties[i].flags & ZE_COMMAND_QUEUE_GROUP_PROPERTY_FLAG_COMPUTE);
EXPECT_TRUE(properties[i].flags & ZE_COMMAND_QUEUE_GROUP_PROPERTY_FLAG_COPY);
EXPECT_TRUE(properties[i].flags & ZE_COMMAND_QUEUE_GROUP_PROPERTY_FLAG_COOPERATIVE_KERNELS);
uint32_t numerOfCCSEnabled = hwInfo.gtSystemInfo.CCSInfo.NumberOfCCSEnabled;
EXPECT_EQ(properties[i].numQueues, numerOfCCSEnabled);
EXPECT_EQ(properties[i].maxMemoryFillPatternSize, std::numeric_limits<size_t>::max());
} else if (engineGroups[i].engineGroupType == NEO::EngineGroupType::copy) {
EXPECT_TRUE(properties[i].flags & ZE_COMMAND_QUEUE_GROUP_PROPERTY_FLAG_COPY);
EXPECT_EQ(properties[i].numQueues, 1u);
EXPECT_EQ(properties[i].maxMemoryFillPatternSize, sizeof(uint8_t));
}
}
}
HWTEST2_F(CommandQueueGroupTest, givenBlitterDisabledAndAllBcsSetThenTwoQueueGroupsAreReturned, IsXe3Core) {
DebugManagerStateRestore dbgRestorer;
debugManager.flags.EnableBlitterOperationsSupport.set(0);
const uint32_t rootDeviceIndex = 0u;
NEO::HardwareInfo hwInfo = *NEO::defaultHwInfo.get();
hwInfo.featureTable.flags.ftrCCSNode = true;
hwInfo.featureTable.ftrBcsInfo.set();
auto *neoMockDevice = NEO::MockDevice::createWithNewExecutionEnvironment<NEO::MockDevice>(&hwInfo, rootDeviceIndex);
MockDeviceImp deviceImp(neoMockDevice, neoMockDevice->getExecutionEnvironment());
uint32_t count = 0;
ze_result_t res = deviceImp.getCommandQueueGroupProperties(&count, nullptr);
EXPECT_EQ(ZE_RESULT_SUCCESS, res);
EXPECT_EQ(count, 2u);
}
class DeviceCopyQueueGroupXe3CoreFixture : public DeviceFixture {
public:
void setUp() {
debugManager.flags.EnableBlitterOperationsSupport.set(0);
DeviceFixture::setUp();
}
void tearDown() {
DeviceFixture::tearDown();
}
DebugManagerStateRestore restorer;
};
using DeviceCopyQueueGroupXe3CoreTest = Test<DeviceCopyQueueGroupXe3CoreFixture>;
HWTEST2_F(DeviceCopyQueueGroupXe3CoreTest,
givenBlitterSupportAndEnableBlitterOperationsSupportSetToZeroThenNoCopyEngineIsReturned, IsXe3Core) {
const uint32_t rootDeviceIndex = 0u;
NEO::HardwareInfo hwInfo = *NEO::defaultHwInfo.get();
hwInfo.featureTable.flags.ftrCCSNode = false;
hwInfo.capabilityTable.blitterOperationsSupported = true;
hwInfo.featureTable.ftrBcsInfo.set(0);
auto *neoMockDevice = NEO::MockDevice::createWithNewExecutionEnvironment<NEO::MockDevice>(&hwInfo,
rootDeviceIndex);
MockDeviceImp deviceImp(neoMockDevice, neoMockDevice->getExecutionEnvironment());
uint32_t count = 0;
ze_result_t res = deviceImp.getCommandQueueGroupProperties(&count, nullptr);
EXPECT_EQ(ZE_RESULT_SUCCESS, res);
std::vector<ze_command_queue_group_properties_t> properties(count);
res = deviceImp.getCommandQueueGroupProperties(&count, properties.data());
EXPECT_EQ(ZE_RESULT_SUCCESS, res);
for (auto &engineGroup : neoMockDevice->getRegularEngineGroups()) {
EXPECT_NE(NEO::EngineGroupType::copy, engineGroup.engineGroupType);
}
}
class CommandQueueGroupTestXe3Core : public DeviceFixture, public testing::TestWithParam<uint32_t> {
public:
void SetUp() override {
DeviceFixture::setUp();
}
void TearDown() override {
DeviceFixture::tearDown();
}
};
HWTEST2_P(CommandQueueGroupTestXe3Core, givenVaryingBlitterSupportAndCCSThenBCSGroupContainsCorrectNumberOfEngines, IsXe3Core) {
const uint32_t rootDeviceIndex = 0u;
NEO::HardwareInfo hwInfo = *NEO::defaultHwInfo.get();
hwInfo.featureTable.flags.ftrCCSNode = true;
hwInfo.capabilityTable.blitterOperationsSupported = true;
hwInfo.featureTable.ftrBcsInfo = maxNBitValue(2);
hwInfo.featureTable.ftrBcsInfo.set(GetParam());
auto *neoMockDevice = NEO::MockDevice::createWithNewExecutionEnvironment<NEO::MockDevice>(&hwInfo, rootDeviceIndex);
MockDeviceImp deviceImp(neoMockDevice, neoMockDevice->getExecutionEnvironment());
uint32_t count = 0;
ze_result_t res = deviceImp.getCommandQueueGroupProperties(&count, nullptr);
EXPECT_EQ(ZE_RESULT_SUCCESS, res);
EXPECT_GE(count, 3u);
std::vector<ze_command_queue_group_properties_t> properties(count);
res = deviceImp.getCommandQueueGroupProperties(&count, properties.data());
EXPECT_EQ(ZE_RESULT_SUCCESS, res);
auto &engineGroups = neoMockDevice->getRegularEngineGroups();
for (uint32_t i = 0; i < count; i++) {
if (engineGroups[i].engineGroupType == NEO::EngineGroupType::renderCompute) {
EXPECT_TRUE(properties[i].flags & ZE_COMMAND_QUEUE_GROUP_PROPERTY_FLAG_COMPUTE);
EXPECT_TRUE(properties[i].flags & ZE_COMMAND_QUEUE_GROUP_PROPERTY_FLAG_COPY);
EXPECT_TRUE(properties[i].flags & ZE_COMMAND_QUEUE_GROUP_PROPERTY_FLAG_COOPERATIVE_KERNELS);
EXPECT_TRUE(properties[i].flags & ZE_COMMAND_QUEUE_GROUP_PROPERTY_FLAG_METRICS);
EXPECT_EQ(properties[i].numQueues, 1u);
EXPECT_EQ(properties[i].maxMemoryFillPatternSize, std::numeric_limits<size_t>::max());
} else if (engineGroups[i].engineGroupType == NEO::EngineGroupType::compute) {
EXPECT_TRUE(properties[i].flags & ZE_COMMAND_QUEUE_GROUP_PROPERTY_FLAG_COMPUTE);
EXPECT_TRUE(properties[i].flags & ZE_COMMAND_QUEUE_GROUP_PROPERTY_FLAG_COPY);
EXPECT_TRUE(properties[i].flags & ZE_COMMAND_QUEUE_GROUP_PROPERTY_FLAG_COOPERATIVE_KERNELS);
uint32_t numerOfCCSEnabled = hwInfo.gtSystemInfo.CCSInfo.NumberOfCCSEnabled;
EXPECT_EQ(properties[i].numQueues, numerOfCCSEnabled);
EXPECT_EQ(properties[i].maxMemoryFillPatternSize, std::numeric_limits<size_t>::max());
} else if (engineGroups[i].engineGroupType == NEO::EngineGroupType::copy) {
EXPECT_TRUE(properties[i].flags & ZE_COMMAND_QUEUE_GROUP_PROPERTY_FLAG_COPY);
EXPECT_EQ(properties[i].numQueues, 1u);
EXPECT_EQ(properties[i].maxMemoryFillPatternSize, sizeof(uint8_t));
} else if (engineGroups[i].engineGroupType == NEO::EngineGroupType::linkedCopy) {
EXPECT_TRUE(properties[i].flags & ZE_COMMAND_QUEUE_GROUP_PROPERTY_FLAG_COPY);
EXPECT_EQ(properties[i].numQueues, hwInfo.featureTable.ftrBcsInfo.count() - 1u);
EXPECT_EQ(properties[i].maxMemoryFillPatternSize, sizeof(uint8_t));
}
}
}
INSTANTIATE_TEST_SUITE_P(
CommandQueueGroupTestXe3CoreValues,
CommandQueueGroupTestXe3Core,
testing::Values(0, 1, 2, 3));
HWTEST2_F(DeviceXe3CoreTest, givenReturnedDevicePropertiesThenExpectedPageFaultSupportReturned, IsXe3Core) {
ze_device_properties_t deviceProps = {ZE_STRUCTURE_TYPE_DEVICE_PROPERTIES};
device->getProperties(&deviceProps);
EXPECT_NE(0u, deviceProps.flags & ZE_DEVICE_PROPERTY_FLAG_ONDEMANDPAGING);
}
} // namespace ult
} // namespace L0

View File

@@ -0,0 +1,134 @@
/*
* Copyright (C) 2025 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#include "shared/test/common/helpers/debug_manager_state_restore.h"
#include "shared/test/common/helpers/default_hw_info.h"
#include "shared/test/common/mocks/mock_execution_environment.h"
#include "shared/test/common/test_macros/header/per_product_test_definitions.h"
#include "shared/test/common/test_macros/test.h"
#include "level_zero/core/source/gfx_core_helpers/l0_gfx_core_helper.h"
#include "level_zero/core/test/unit_tests/fixtures/device_fixture.h"
#include "hw_cmds_xe3_core.h"
namespace L0 {
namespace ult {
using L0GfxCoreHelperTestXe3 = Test<DeviceFixture>;
HWTEST_EXCLUDE_PRODUCT(L0GfxCoreHelperTest, givenL0GfxCoreHelperWhenAskingForImageCompressionSupportThenReturnFalse, IGFX_XE3_CORE);
XE3_CORETEST_F(L0GfxCoreHelperTestXe3, givenL0GfxCoreHelperWhenAskingForImageCompressionSupportThenReturnCorrectValue) {
DebugManagerStateRestore restore;
auto &l0GfxCoreHelper = getHelper<L0GfxCoreHelper>();
HardwareInfo hwInfo = *NEO::defaultHwInfo;
hwInfo.capabilityTable.ftrRenderCompressedImages = true;
EXPECT_TRUE(l0GfxCoreHelper.imageCompressionSupported(hwInfo));
hwInfo.capabilityTable.ftrRenderCompressedImages = false;
EXPECT_FALSE(l0GfxCoreHelper.imageCompressionSupported(hwInfo));
NEO::debugManager.flags.RenderCompressedImagesEnabled.set(1);
EXPECT_TRUE(l0GfxCoreHelper.imageCompressionSupported(hwInfo));
hwInfo.capabilityTable.ftrRenderCompressedImages = true;
NEO::debugManager.flags.RenderCompressedImagesEnabled.set(0);
EXPECT_FALSE(l0GfxCoreHelper.imageCompressionSupported(hwInfo));
}
XE3_CORETEST_F(L0GfxCoreHelperTestXe3, givenL0GfxCoreHelperWhenAskingForUsmCompressionSupportThenReturnCorrectValue) {
DebugManagerStateRestore restore;
auto &l0GfxCoreHelper = getHelper<L0GfxCoreHelper>();
EXPECT_TRUE(l0GfxCoreHelper.forceDefaultUsmCompressionSupport());
HardwareInfo hwInfo = *NEO::defaultHwInfo;
hwInfo.capabilityTable.ftrRenderCompressedBuffers = true;
EXPECT_TRUE(l0GfxCoreHelper.usmCompressionSupported(hwInfo));
hwInfo.capabilityTable.ftrRenderCompressedBuffers = false;
EXPECT_FALSE(l0GfxCoreHelper.usmCompressionSupported(hwInfo));
NEO::debugManager.flags.RenderCompressedBuffersEnabled.set(1);
EXPECT_TRUE(l0GfxCoreHelper.usmCompressionSupported(hwInfo));
hwInfo.capabilityTable.ftrRenderCompressedBuffers = true;
NEO::debugManager.flags.RenderCompressedBuffersEnabled.set(0);
EXPECT_FALSE(l0GfxCoreHelper.usmCompressionSupported(hwInfo));
}
XE3_CORETEST_F(L0GfxCoreHelperTestXe3, GivenXe3WhenCheckingL0HelperForCmdListHeapSharingSupportThenReturnTrue) {
auto &l0GfxCoreHelper = getHelper<L0GfxCoreHelper>();
EXPECT_TRUE(l0GfxCoreHelper.platformSupportsCmdListHeapSharing());
}
XE3_CORETEST_F(L0GfxCoreHelperTestXe3, GivenXe3WhenCheckingL0HelperForStateComputeModeTrackingSupportThenReturnTrue) {
auto &l0GfxCoreHelper = getHelper<L0GfxCoreHelper>();
EXPECT_TRUE(l0GfxCoreHelper.platformSupportsStateComputeModeTracking());
}
XE3_CORETEST_F(L0GfxCoreHelperTestXe3, GivenXe3WhenCheckingL0HelperForFrontEndTrackingSupportThenReturnTrue) {
auto &l0GfxCoreHelper = getHelper<L0GfxCoreHelper>();
EXPECT_TRUE(l0GfxCoreHelper.platformSupportsFrontEndTracking());
}
XE3_CORETEST_F(L0GfxCoreHelperTestXe3, GivenXe3WhenCheckingL0HelperForPipelineSelectTrackingSupportThenReturnTrue) {
auto &l0GfxCoreHelper = getHelper<L0GfxCoreHelper>();
EXPECT_TRUE(l0GfxCoreHelper.platformSupportsPipelineSelectTracking());
}
XE3_CORETEST_F(L0GfxCoreHelperTestXe3, GivenXe3WhenCheckingL0HelperForStateBaseAddressTrackingSupportThenReturnFalse) {
auto &l0GfxCoreHelper = getHelper<L0GfxCoreHelper>();
EXPECT_FALSE(l0GfxCoreHelper.platformSupportsStateBaseAddressTracking(device->getNEODevice()->getRootDeviceEnvironment()));
}
XE3_CORETEST_F(L0GfxCoreHelperTestXe3, GivenXe3CoreWhenGettingPlatformDefaultHeapAddressModelThenReturnPrivateHeaps) {
auto &l0GfxCoreHelper = getHelper<L0GfxCoreHelper>();
EXPECT_EQ(NEO::HeapAddressModel::privateHeaps, l0GfxCoreHelper.getPlatformHeapAddressModel(device->getNEODevice()->getRootDeviceEnvironment()));
}
XE3_CORETEST_F(L0GfxCoreHelperTestXe3, GivenXe3CoreWhenCheckingL0HelperForCmdlistPrimaryBufferSupportThenReturnTrue) {
auto &l0GfxCoreHelper = getHelper<L0GfxCoreHelper>();
EXPECT_TRUE(l0GfxCoreHelper.platformSupportsPrimaryBatchBufferCmdList());
}
XE3_CORETEST_F(L0GfxCoreHelperTestXe3, GivenXe3WhenCheckingL0HelperForPlatformSupportsImmediateFlushTaskThenReturnTrue) {
auto &l0GfxCoreHelper = getHelper<L0GfxCoreHelper>();
EXPECT_TRUE(l0GfxCoreHelper.platformSupportsImmediateComputeFlushTask());
}
XE3_CORETEST_F(L0GfxCoreHelperTestXe3, GivenXe3CoreWhenGettingSupportedRTASFormatThenExpectedFormatIsReturned) {
const auto &l0GfxCoreHelper = getHelper<L0GfxCoreHelper>();
EXPECT_EQ(RTASDeviceFormatInternal::version2, static_cast<RTASDeviceFormatInternal>(l0GfxCoreHelper.getSupportedRTASFormat()));
}
XE3_CORETEST_F(L0GfxCoreHelperTestXe3, GivenXe3WhenGettingCmdlistUpdateCapabilityThenReturnCorrectValue) {
const auto &l0GfxCoreHelper = getHelper<L0GfxCoreHelper>();
EXPECT_EQ(127u, l0GfxCoreHelper.getPlatformCmdListUpdateCapabilities());
}
XE3_CORETEST_F(L0GfxCoreHelperTestXe3, GivenXe3WhenGetRegsetTypeForLargeGrfDetectionIsCalledThenSrRegsetTypeIsRetuned) {
auto &l0GfxCoreHelper = getHelper<L0GfxCoreHelper>();
EXPECT_EQ(ZET_DEBUG_REGSET_TYPE_SR_INTEL_GPU, l0GfxCoreHelper.getRegsetTypeForLargeGrfDetection());
}
XE3_CORETEST_F(L0GfxCoreHelperTestXe3, GivenXe3WhenGetGrfRegisterCountIsCalledThenCorrectMaskIsRetuned) {
auto &l0GfxCoreHelper = getHelper<L0GfxCoreHelper>();
std::vector<uint32_t> val{0, 0, 0, 0, 0, 0, 0, 0};
val[4] = 0xFFFFFFFF;
constexpr uint32_t expectedMask = 0x1FF;
EXPECT_EQ(expectedMask, l0GfxCoreHelper.getGrfRegisterCount(val.data()));
}
} // namespace ult
} // namespace L0

View File

@@ -0,0 +1,196 @@
/*
* Copyright (C) 2025 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#include "shared/source/kernel/kernel_properties.h"
#include "shared/test/common/mocks/mock_device.h"
#include "shared/test/common/test_macros/hw_test.h"
#include "level_zero/core/test/unit_tests/fixtures/device_fixture.h"
#include "level_zero/core/test/unit_tests/mocks/mock_device.h"
#include "level_zero/core/test/unit_tests/mocks/mock_kernel.h"
#include "level_zero/core/test/unit_tests/mocks/mock_module.h"
namespace L0 {
namespace ult {
using KernelPropertyTest = Test<DeviceFixture>;
HWTEST2_F(KernelPropertyTest, givenKernelExtendedPropertiesStructureWhenKernelPropertiesCalledThenPropertiesAreCorrectlySet, IsXe3Core) {
ze_device_module_properties_t kernelProperties = {};
ze_float_atomic_ext_properties_t kernelExtendedProperties = {};
kernelExtendedProperties.stype = ZE_STRUCTURE_TYPE_FLOAT_ATOMIC_EXT_PROPERTIES;
kernelProperties.pNext = &kernelExtendedProperties;
ze_result_t res = device->getKernelProperties(&kernelProperties);
EXPECT_EQ(res, ZE_RESULT_SUCCESS);
const auto &fp16Properties = kernelExtendedProperties.fp16Flags;
EXPECT_TRUE(fp16Properties & FpAtomicExtFlags::globalLoadStore);
EXPECT_TRUE(fp16Properties & FpAtomicExtFlags::localLoadStore);
EXPECT_TRUE(fp16Properties & FpAtomicExtFlags::globalMinMax);
EXPECT_TRUE(fp16Properties & FpAtomicExtFlags::localMinMax);
EXPECT_FALSE(fp16Properties & FpAtomicExtFlags::globalAdd);
EXPECT_FALSE(fp16Properties & FpAtomicExtFlags::localAdd);
const auto &fp32Properties = kernelExtendedProperties.fp32Flags;
EXPECT_TRUE(fp32Properties & FpAtomicExtFlags::globalLoadStore);
EXPECT_TRUE(fp32Properties & FpAtomicExtFlags::localLoadStore);
EXPECT_TRUE(fp32Properties & FpAtomicExtFlags::globalMinMax);
EXPECT_TRUE(fp32Properties & FpAtomicExtFlags::localMinMax);
EXPECT_TRUE(fp32Properties & FpAtomicExtFlags::globalAdd);
EXPECT_TRUE(fp32Properties & FpAtomicExtFlags::localAdd);
const auto &fp64Properties = kernelExtendedProperties.fp64Flags;
EXPECT_TRUE(fp64Properties & FpAtomicExtFlags::globalLoadStore);
EXPECT_TRUE(fp64Properties & FpAtomicExtFlags::localLoadStore);
EXPECT_TRUE(fp64Properties & FpAtomicExtFlags::globalMinMax);
EXPECT_TRUE(fp64Properties & FpAtomicExtFlags::localMinMax);
EXPECT_TRUE(fp64Properties & FpAtomicExtFlags::globalAdd);
EXPECT_TRUE(fp64Properties & FpAtomicExtFlags::localAdd);
}
using Xe3KernelSetupTests = ::testing::Test;
XE3_CORETEST_F(Xe3KernelSetupTests, givenParamsWhenSetupGroupSizeThenNumThreadsPerThreadGroupAreCorrectly) {
DebugManagerStateRestore restore;
VariableBackup<HardwareInfo> backupHwInfo(defaultHwInfo.get());
{
NEO::Device *mockNeoDevice(NEO::MockDevice::createWithNewExecutionEnvironment<NEO::MockDevice>(NEO::defaultHwInfo.get(), 0));
MockDeviceImp l0Device(mockNeoDevice, mockNeoDevice->getExecutionEnvironment());
Mock<KernelImp> kernel;
kernel.descriptor.kernelAttributes.numGrfRequired = 128u;
kernel.enableForcingOfGenerateLocalIdByHw = true;
Mock<Module> module(&l0Device, nullptr);
module.getMaxGroupSizeResult = UINT32_MAX;
kernel.module = &module;
std::array<std::array<uint32_t, 3>, 4> values = {{
{16u, 0u, 64u}, // SIMT Size, HW local-id generation, Max Num of threads
{16u, 1u, 64u},
{32u, 1u, 32u},
{32u, 0u, 64u},
}};
for (auto &[simtSize, isHwLocalIdGeneration, expectedNumThreadsPerThreadGroup] : values) {
kernel.descriptor.kernelAttributes.simdSize = simtSize;
kernel.forceGenerateLocalIdByHw = isHwLocalIdGeneration;
kernel.setGroupSize(1024u, 1024u, 1024u);
EXPECT_EQ(expectedNumThreadsPerThreadGroup, kernel.numThreadsPerThreadGroup);
kernel.groupSize[0] = kernel.groupSize[1] = kernel.groupSize[2] = 0;
}
}
{
NEO::Device *mockNeoDevice(NEO::MockDevice::createWithNewExecutionEnvironment<NEO::MockDevice>(NEO::defaultHwInfo.get(), 0));
MockDeviceImp l0Device(mockNeoDevice, mockNeoDevice->getExecutionEnvironment());
Mock<KernelImp> kernel;
kernel.descriptor.kernelAttributes.numGrfRequired = 160u;
kernel.enableForcingOfGenerateLocalIdByHw = true;
Mock<Module> module(&l0Device, nullptr);
module.getMaxGroupSizeResult = UINT32_MAX;
kernel.module = &module;
std::array<std::array<uint32_t, 3>, 4> values = {{
{16u, 0u, 48u}, // SIMT Size, HW local-id generation, Max Num of threads
{16u, 1u, 48u},
{32u, 1u, 32u},
{32u, 0u, 48u},
}};
for (auto &[simtSize, isHwLocalIdGeneration, expectedNumThreadsPerThreadGroup] : values) {
kernel.descriptor.kernelAttributes.simdSize = simtSize;
kernel.forceGenerateLocalIdByHw = isHwLocalIdGeneration;
kernel.setGroupSize(1024u, 1024u, 1024u);
EXPECT_EQ(expectedNumThreadsPerThreadGroup, kernel.numThreadsPerThreadGroup);
kernel.groupSize[0] = kernel.groupSize[1] = kernel.groupSize[2] = 0;
}
}
{
NEO::Device *mockNeoDevice(NEO::MockDevice::createWithNewExecutionEnvironment<NEO::MockDevice>(NEO::defaultHwInfo.get(), 0));
MockDeviceImp l0Device(mockNeoDevice, mockNeoDevice->getExecutionEnvironment());
Mock<KernelImp> kernel;
kernel.descriptor.kernelAttributes.numGrfRequired = 192u;
kernel.enableForcingOfGenerateLocalIdByHw = true;
Mock<Module> module(&l0Device, nullptr);
module.getMaxGroupSizeResult = UINT32_MAX;
kernel.module = &module;
std::array<std::array<uint32_t, 3>, 4> values = {{
{16u, 0u, 40u}, // SIMT Size, HW local-id generation, Max Num of threads
{16u, 1u, 40u},
{32u, 1u, 32u},
{32u, 0u, 40u},
}};
for (auto &[simtSize, isHwLocalIdGeneration, expectedNumThreadsPerThreadGroup] : values) {
kernel.descriptor.kernelAttributes.simdSize = simtSize;
kernel.forceGenerateLocalIdByHw = isHwLocalIdGeneration;
kernel.setGroupSize(1024u, 1024u, 1024u);
EXPECT_EQ(expectedNumThreadsPerThreadGroup, kernel.numThreadsPerThreadGroup);
kernel.groupSize[0] = kernel.groupSize[1] = kernel.groupSize[2] = 0;
}
}
{
NEO::Device *mockNeoDevice(NEO::MockDevice::createWithNewExecutionEnvironment<NEO::MockDevice>(NEO::defaultHwInfo.get(), 0));
MockDeviceImp l0Device(mockNeoDevice, mockNeoDevice->getExecutionEnvironment());
Mock<KernelImp> kernel;
kernel.descriptor.kernelAttributes.numGrfRequired = 256u;
kernel.enableForcingOfGenerateLocalIdByHw = true;
Mock<Module> module(&l0Device, nullptr);
module.getMaxGroupSizeResult = UINT32_MAX;
kernel.module = &module;
std::array<std::array<uint32_t, 3>, 4> values = {{
{16u, 0u, 32u}, // SIMT Size, HW local-id generation, Max Num of threads
{16u, 1u, 32u},
{32u, 1u, 32u},
{32u, 0u, 32u},
}};
for (auto &[simtSize, isHwLocalIdGeneration, expectedNumThreadsPerThreadGroup] : values) {
kernel.descriptor.kernelAttributes.simdSize = simtSize;
kernel.forceGenerateLocalIdByHw = isHwLocalIdGeneration;
kernel.setGroupSize(1024u, 1024u, 1024u);
EXPECT_EQ(expectedNumThreadsPerThreadGroup, kernel.numThreadsPerThreadGroup);
kernel.groupSize[0] = kernel.groupSize[1] = kernel.groupSize[2] = 0;
}
}
{
NEO::Device *mockNeoDevice(NEO::MockDevice::createWithNewExecutionEnvironment<NEO::MockDevice>(NEO::defaultHwInfo.get(), 0));
MockDeviceImp l0Device(mockNeoDevice, mockNeoDevice->getExecutionEnvironment());
Mock<KernelImp> kernel;
kernel.descriptor.kernelAttributes.numGrfRequired = 512u;
kernel.enableForcingOfGenerateLocalIdByHw = true;
Mock<Module> module(&l0Device, nullptr);
module.getMaxGroupSizeResult = UINT32_MAX;
kernel.module = &module;
std::array<std::array<uint32_t, 3>, 4> values = {{
{16u, 0u, 16u}, // SIMT Size, HW local-id generation, Max Num of threads
{16u, 1u, 16u},
{32u, 1u, 16u},
{32u, 0u, 16u},
}};
for (auto &[simtSize, isHwLocalIdGeneration, expectedNumThreadsPerThreadGroup] : values) {
kernel.descriptor.kernelAttributes.simdSize = simtSize;
kernel.forceGenerateLocalIdByHw = isHwLocalIdGeneration;
kernel.setGroupSize(1024u, 1024u, 1024u);
EXPECT_EQ(expectedNumThreadsPerThreadGroup, kernel.numThreadsPerThreadGroup);
kernel.groupSize[0] = kernel.groupSize[1] = kernel.groupSize[2] = 0;
}
}
}
} // namespace ult
} // namespace L0

View File

@@ -0,0 +1,7 @@
#
# Copyright (C) 2025 Intel Corporation
#
# SPDX-License-Identifier: MIT
#
add_subdirectories()

View File

@@ -0,0 +1,14 @@
#
# Copyright (C) 2025 Intel Corporation
#
# SPDX-License-Identifier: MIT
#
if(SUPPORT_PTL)
target_sources(${L0_STATIC_LIB_NAME}
PRIVATE
${CMAKE_CURRENT_SOURCE_DIR}/enable_sysman_product_helper_ptl.cpp
${CMAKE_CURRENT_SOURCE_DIR}/sysman_product_helper_ptl.cpp
)
endif()

View File

@@ -0,0 +1,16 @@
/*
* Copyright (C) 2025 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#include "level_zero/sysman/source/shared/linux/product_helper/sysman_product_helper_hw.h"
namespace L0 {
namespace Sysman {
static EnableSysmanProductHelper<IGFX_PTL> enablePTL;
} // namespace Sysman
} // namespace L0

View File

@@ -0,0 +1,35 @@
/*
* Copyright (C) 2025 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#include "level_zero/sysman/source/shared/linux/product_helper/sysman_product_helper_hw.h"
#include "level_zero/sysman/source/shared/linux/product_helper/sysman_product_helper_hw.inl"
namespace L0 {
namespace Sysman {
constexpr static auto gfxProduct = IGFX_PTL;
#include "level_zero/sysman/source/shared/linux/product_helper/sysman_product_helper_xe_hp_and_later.inl"
template <>
RasInterfaceType SysmanProductHelperHw<gfxProduct>::getGtRasUtilInterface() {
return RasInterfaceType::netlink;
}
template <>
RasInterfaceType SysmanProductHelperHw<gfxProduct>::getHbmRasUtilInterface() {
return RasInterfaceType::netlink;
}
template <>
bool SysmanProductHelperHw<gfxProduct>::isZesInitSupported() {
return true;
}
template class SysmanProductHelperHw<gfxProduct>;
} // namespace Sysman
} // namespace L0

View File

@@ -0,0 +1,7 @@
#
# Copyright (C) 2025 Intel Corporation
#
# SPDX-License-Identifier: MIT
#
add_subdirectories()

View File

@@ -0,0 +1,14 @@
#
# Copyright (C) 2025 Intel Corporation
#
# SPDX-License-Identifier: MIT
#
if(SUPPORT_PTL)
target_sources(${L0_STATIC_LIB_NAME}
PRIVATE
${CMAKE_CURRENT_SOURCE_DIR}/enable_sysman_product_helper_ptl.cpp
${CMAKE_CURRENT_SOURCE_DIR}/sysman_product_helper_ptl.cpp
)
endif()

View File

@@ -0,0 +1,16 @@
/*
* Copyright (C) 2025 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#include "level_zero/sysman/source/shared/windows/product_helper/sysman_product_helper_hw.h"
namespace L0 {
namespace Sysman {
static EnableSysmanProductHelper<IGFX_PTL> enablePtl;
} // namespace Sysman
} // namespace L0

View File

@@ -0,0 +1,23 @@
/*
* Copyright (C) 2025 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#include "level_zero/sysman/source/shared/windows/product_helper/sysman_product_helper_hw.h"
#include "level_zero/sysman/source/shared/windows/product_helper/sysman_product_helper_hw.inl"
namespace L0 {
namespace Sysman {
constexpr static auto gfxProduct = IGFX_PTL;
template <>
bool SysmanProductHelperHw<gfxProduct>::isZesInitSupported() {
return true;
}
template class SysmanProductHelperHw<gfxProduct>;
} // namespace Sysman
} // namespace L0

View File

@@ -1,5 +1,5 @@
/*
* Copyright (C) 2024 Intel Corporation
* Copyright (C) 2024-2025 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@@ -35,6 +35,11 @@ HWTEST2_F(SysmanProductHelperSysmanInitTest, GivenValidProductHelperHandleWhenQu
EXPECT_FALSE(pSysmanProductHelper->isZesInitSupported());
}
HWTEST2_F(SysmanProductHelperSysmanInitTest, GivenValidProductHelperHandleWhenQueryingForZesInitSupportThenTrueIsReturned, IsPTL) {
auto pSysmanProductHelper = L0::Sysman::SysmanProductHelper::create(defaultHwInfo->platform.eProductFamily);
EXPECT_TRUE(pSysmanProductHelper->isZesInitSupported());
}
class SysmanNewPlatformInitTest : public ::testing::Test {
public:
void SetUp() override{};

View File

@@ -1,5 +1,5 @@
/*
* Copyright (C) 2024 Intel Corporation
* Copyright (C) 2024-2025 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@@ -30,6 +30,11 @@ HWTEST2_F(SysmanProductHelperSysmanInitTest, GivenValidProductHelperHandleWhenQu
EXPECT_FALSE(pSysmanProductHelper->isZesInitSupported());
}
HWTEST2_F(SysmanProductHelperSysmanInitTest, GivenValidProductHelperHandleWhenQueryingForZesInitSupportThenTrueIsReturned, IsPTL) {
auto pSysmanProductHelper = L0::Sysman::SysmanProductHelper::create(defaultHwInfo->platform.eProductFamily);
EXPECT_TRUE(pSysmanProductHelper->isZesInitSupported());
}
} // namespace ult
} // namespace Sysman
} // namespace L0

View File

@@ -1,5 +1,5 @@
#
# Copyright (C) 2021-2023 Intel Corporation
# Copyright (C) 2021-2025 Intel Corporation
#
# SPDX-License-Identifier: MIT
#
@@ -16,5 +16,11 @@ target_sources(${TARGET_NAME} PRIVATE
${CMAKE_CURRENT_SOURCE_DIR}/debug_session_registers_access.cpp
${CMAKE_CURRENT_SOURCE_DIR}/debug_session_registers_access.h
)
if(SUPPORT_XE3_AND_LATER)
target_sources(${TARGET_NAME} PRIVATE
${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt
${CMAKE_CURRENT_SOURCE_DIR}/debug_session_tests_xe3_and_later.cpp
)
endif()
add_subdirectories()

View File

@@ -0,0 +1,110 @@
/*
* Copyright (C) 2025 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#include "shared/source/debugger/debugger_l0.h"
#include "shared/test/common/mocks/mock_device.h"
#include "shared/test/common/mocks/mock_sip.h"
#include "shared/test/common/test_macros/hw_test.h"
#include "level_zero/core/test/unit_tests/mocks/mock_built_ins.h"
#include "level_zero/tools/source/debug/debug_session_imp.h"
#include "level_zero/tools/test/unit_tests/sources/debug/debug_session_common.h"
#include "level_zero/tools/test/unit_tests/sources/debug/debug_session_registers_access.h"
#include "level_zero/tools/test/unit_tests/sources/debug/mock_debug_session.h"
#include "level_zero/zet_intel_gpu_debug.h"
namespace L0 {
namespace ult {
struct Xe3AndLaterCoreDebugSessionTest : public L0::ult::DebugSessionRegistersAccessV3 {
void setUp() override {
L0::ult::DebugSessionRegistersAccessV3::setUp();
auto mockBuiltins = new L0::ult::MockBuiltins();
mockBuiltins->stateSaveAreaHeader = NEO::MockSipData::createStateSaveAreaHeader(3, 128);
MockRootDeviceEnvironment::resetBuiltins(neoDevice->executionEnvironment->rootDeviceEnvironments[0].get(), mockBuiltins);
{
auto pStateSaveAreaHeader = reinterpret_cast<NEO::StateSaveAreaHeader *>(session->stateSaveAreaHeader.data());
auto size = pStateSaveAreaHeader->versionHeader.size * 8 +
pStateSaveAreaHeader->regHeaderV3.state_area_offset +
pStateSaveAreaHeader->regHeaderV3.state_save_size * 16;
session->stateSaveAreaHeader.resize(size);
}
thread = stoppedThread;
regdesc = &(reinterpret_cast<NEO::StateSaveAreaHeader *>(session->stateSaveAreaHeader.data()))->regHeaderV3.sr;
}
uint32_t sr0[8] = {0, 0, 0, 0, 0, 0, 0, 0};
SIP::regset_desc *regdesc = nullptr;
ze_device_thread_t thread;
};
using Xe3AndLaterCoreDebugSessionTestFixture = Test<Xe3AndLaterCoreDebugSessionTest>;
HWTEST2_F(Xe3AndLaterCoreDebugSessionTestFixture,
GivenSrRegisterWhenGetThreadRegisterSetPropertiesCalledThenCorrectRegisterCountIsReported, IsXe3Core) {
std::vector<uint32_t> checkRegCount{32, 64, 96, 128, 160, 192, 256};
for (size_t i = 0; i < checkRegCount.size(); i++) {
sr0[4] = 0xFFFFFC00 | checkRegCount[i];
session->registersAccessHelper(session->allThreads[stoppedThreadId].get(), regdesc, 0, 1, sr0, true);
uint32_t threadCount = 0;
EXPECT_EQ(ZE_RESULT_SUCCESS, zetDebugGetThreadRegisterSetProperties(session->toHandle(), thread, &threadCount, nullptr));
std::vector<zet_debug_regset_properties_t> threadRegsetProps(threadCount);
ASSERT_EQ(ZE_RESULT_SUCCESS, zetDebugGetThreadRegisterSetProperties(session->toHandle(), thread, &threadCount, threadRegsetProps.data()));
EXPECT_EQ(checkRegCount[i], threadRegsetProps[0].count);
}
}
HWTEST2_F(Xe3AndLaterCoreDebugSessionTestFixture,
GivenSrRegisterWhenGetThreadRegisterSetPropertiesCalledThenCorrectRegisterCountIsReported, IsXe3pCore) {
std::vector<uint32_t> checkRegCount{32, 64, 96, 128, 160, 192, 256, 512};
for (size_t i = 0; i < checkRegCount.size(); i++) {
sr0[4] = 0xFFFFFC00 | checkRegCount[i];
session->registersAccessHelper(session->allThreads[stoppedThreadId].get(), regdesc, 0, 1, sr0, true);
uint32_t threadCount = 0;
EXPECT_EQ(ZE_RESULT_SUCCESS, zetDebugGetThreadRegisterSetProperties(session->toHandle(), thread, &threadCount, nullptr));
std::vector<zet_debug_regset_properties_t> threadRegsetProps(threadCount);
ASSERT_EQ(ZE_RESULT_SUCCESS, zetDebugGetThreadRegisterSetProperties(session->toHandle(), thread, &threadCount, threadRegsetProps.data()));
EXPECT_EQ(checkRegCount[i], threadRegsetProps[0].count);
}
}
using DebugSessionRegistersAccessTesXe3AndLaterSpecfic = Test<DebugSessionRegistersAccess>;
HWTEST2_F(DebugSessionRegistersAccessTesXe3AndLaterSpecfic, GivenGetThreadRegisterSetPropertiesCalledExceptGrfCountOtherPropertieAreSameAsGetRegisterSetProperties,
IsAtLeastXe3Core) {
auto mockBuiltins = new MockBuiltins();
mockBuiltins->stateSaveAreaHeader = MockSipData::createStateSaveAreaHeader(2);
MockRootDeviceEnvironment::resetBuiltins(neoDevice->executionEnvironment->rootDeviceEnvironments[0].get(), mockBuiltins);
uint32_t count = 0;
uint32_t threadCount = 0;
ze_device_thread_t thread = stoppedThread;
EXPECT_EQ(ZE_RESULT_SUCCESS, zetDebugGetRegisterSetProperties(session->getConnectedDevice(), &count, nullptr));
EXPECT_EQ(13u, count);
EXPECT_EQ(ZE_RESULT_SUCCESS, zetDebugGetThreadRegisterSetProperties(session->toHandle(), thread, &threadCount, nullptr));
ASSERT_EQ(threadCount, count);
std::vector<zet_debug_regset_properties_t> regsetProps(count);
ASSERT_EQ(ZE_RESULT_SUCCESS, zetDebugGetRegisterSetProperties(session->getConnectedDevice(), &count, regsetProps.data()));
std::vector<zet_debug_regset_properties_t> threadRegsetProps(count);
ASSERT_EQ(ZE_RESULT_SUCCESS, zetDebugGetThreadRegisterSetProperties(session->toHandle(), thread, &count, threadRegsetProps.data()));
for (size_t i = 0; i < count; i++) {
EXPECT_EQ(regsetProps[i].stype, threadRegsetProps[i].stype);
EXPECT_EQ(regsetProps[i].pNext, threadRegsetProps[i].pNext);
EXPECT_EQ(regsetProps[i].version, threadRegsetProps[i].version);
EXPECT_EQ(regsetProps[i].generalFlags, threadRegsetProps[i].generalFlags);
EXPECT_EQ(regsetProps[i].deviceFlags, threadRegsetProps[i].deviceFlags);
if (regsetProps[i].type != ZET_DEBUG_REGSET_TYPE_GRF_INTEL_GPU) {
EXPECT_EQ(regsetProps[i].count, threadRegsetProps[i].count);
}
EXPECT_EQ(regsetProps[i].bitSize, threadRegsetProps[i].bitSize);
EXPECT_EQ(regsetProps[i].byteSize, threadRegsetProps[i].byteSize);
}
}
} // namespace ult
} // namespace L0

View File

@@ -36,7 +36,7 @@ components:
dest_dir: kernels_bin
type: git
branch: kernels_bin
revision: 3265-3281
revision: 3265-3282
kmdaf:
branch: kmdaf
dest_dir: kmdaf

View File

@@ -0,0 +1,15 @@
#
# Copyright (C) 2025 Intel Corporation
#
# SPDX-License-Identifier: MIT
#
if(SUPPORT_XE3_CORE)
set(RUNTIME_SRCS_XE3_CORE
${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt
)
target_sources(${NEO_STATIC_LIB_NAME} PRIVATE ${RUNTIME_SRCS_XE3_CORE})
set_property(GLOBAL PROPERTY RUNTIME_SRCS_XE3_CORE ${RUNTIME_SRCS_XE3_CORE})
endif()

View File

@@ -0,0 +1,20 @@
/*
* Copyright (C) 2025 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#include "shared/source/xe3_core/hw_cmds_base.h"
#include "opencl/source/mem_obj/buffer_base.inl"
namespace NEO {
using Family = Xe3CoreFamily;
static auto gfxCore = IGFX_XE3_CORE;
template class BufferHw<Family>;
#include "opencl/source/mem_obj/buffer_factory_init.inl"
} // namespace NEO

View File

@@ -0,0 +1,29 @@
/*
* Copyright (C) 2025 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#include "shared/source/helpers/populate_factory.h"
#include "shared/source/xe3_core/hw_cmds_base.h"
#include "opencl/source/helpers/cl_gfx_core_helper_base.inl"
#include "opencl/source/helpers/cl_gfx_core_helper_xehp_and_later.inl"
namespace NEO {
using Family = Xe3CoreFamily;
static auto gfxCore = IGFX_XE3_CORE;
#include "opencl/source/helpers/cl_gfx_core_helper_factory_init.inl"
#include "opencl/source/helpers/cl_gfx_core_helper_pvc_and_later.inl"
template <>
bool ClGfxCoreHelperHw<Family>::requiresAuxResolves(const KernelInfo &kernelInfo) const {
return false;
}
template class ClGfxCoreHelperHw<Family>;
} // namespace NEO

View File

@@ -0,0 +1,42 @@
/*
* Copyright (C) 2025 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#include "shared/source/helpers/populate_factory.h"
#include "shared/source/memory_manager/unified_memory_manager.h"
#include "shared/source/xe3_core/hw_cmds_base.h"
#include "opencl/source/command_queue/command_queue_hw.h"
#include "opencl/source/command_queue/enqueue_resource_barrier.h"
namespace NEO {
using Family = Xe3CoreFamily;
static auto gfxCore = IGFX_XE3_CORE;
} // namespace NEO
#include "opencl/source/command_queue/command_queue_hw_xehp_and_later.inl"
namespace NEO {
template <>
void populateFactoryTable<CommandQueueHw<Family>>() {
extern CommandQueueCreateFunc commandQueueFactory[IGFX_MAX_CORE];
commandQueueFactory[gfxCore] = CommandQueueHw<Family>::create;
}
template <>
bool CommandQueueHw<Family>::isCacheFlushForBcsRequired() const {
if (debugManager.flags.ForceCacheFlushForBcs.get() != -1) {
return !!debugManager.flags.ForceCacheFlushForBcs.get();
}
const auto &productHelper = this->device->getProductHelper();
return productHelper.isDcFlushAllowed();
}
} // namespace NEO
template class NEO::CommandQueueHw<NEO::Family>;

View File

@@ -0,0 +1,32 @@
/*
* Copyright (C) 2025 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#include "shared/source/helpers/populate_factory.h"
#include "shared/source/xe3_core/hw_cmds_base.h"
#include "opencl/source/command_queue/command_queue_hw.h"
#include "opencl/source/helpers/cl_gfx_core_helper.h"
#include "opencl/source/mem_obj/buffer.h"
#include "opencl/source/mem_obj/image.h"
#include "opencl/source/sampler/sampler.h"
namespace NEO {
using Family = Xe3CoreFamily;
struct EnableOCLXe3Core {
EnableOCLXe3Core() {
populateFactoryTable<BufferHw<Family>>();
populateFactoryTable<ClGfxCoreHelperHw<Family>>();
populateFactoryTable<CommandQueueHw<Family>>();
populateFactoryTable<ImageHw<Family>>();
populateFactoryTable<SamplerHw<Family>>();
}
};
static EnableOCLXe3Core enable;
} // namespace NEO

View File

@@ -0,0 +1,32 @@
/*
* Copyright (C) 2025 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#include "shared/source/command_container/encode_surface_state.h"
#include "shared/source/helpers/gfx_core_helper.h"
#include "shared/source/xe3_core/hw_cmds_base.h"
#include "opencl/source/command_queue/gpgpu_walker_xehp_and_later.inl"
#include "opencl/source/command_queue/hardware_interface_xehp_and_later.inl"
namespace NEO {
using Family = Xe3CoreFamily;
template class GpgpuWalkerHelper<Family>;
template void GpgpuWalkerHelper<Family>::setupTimestampPacket<Family::DefaultWalkerType>(LinearStream *cmdStream, Family::DefaultWalkerType *walkerCmd, TagNodeBase *timestampPacketNode, const RootDeviceEnvironment &rootDeviceEnvironment);
template size_t GpgpuWalkerHelper<Family>::setGpgpuWalkerThreadData<Family::DefaultWalkerType>(Family::DefaultWalkerType *walkerCmd, const KernelDescriptor &kernelDescriptor, const size_t startWorkGroups[3],
const size_t numWorkGroups[3], const size_t localWorkSizesIn[3], uint32_t simd, uint32_t workDim, bool localIdsGenerationByRuntime, bool inlineDataProgrammingRequired, uint32_t requiredWorkGroupOrder);
template class HardwareInterface<Family>;
template void HardwareInterface<Family>::dispatchWalker<Family::DefaultWalkerType>(CommandQueue &commandQueue, const MultiDispatchInfo &multiDispatchInfo, const CsrDependencies &csrDependencies, HardwareInterfaceWalkerArgs &walkerArgs);
template void HardwareInterface<Family>::programWalker<Family::DefaultWalkerType>(LinearStream &commandStream, Kernel &kernel, CommandQueue &commandQueue, IndirectHeap &dsh, IndirectHeap &ioh, IndirectHeap &ssh, const DispatchInfo &dispatchInfo, HardwareInterfaceWalkerArgs &walkerArgs);
template void HardwareInterface<Family>::dispatchKernelCommands<Family::DefaultWalkerType>(CommandQueue &commandQueue, const DispatchInfo &dispatchInfo, LinearStream &commandStream, IndirectHeap &dsh, IndirectHeap &ioh, IndirectHeap &ssh, HardwareInterfaceWalkerArgs &walkerArgs);
template Family::DefaultWalkerType *HardwareInterface<Family>::allocateWalkerSpace<Family::DefaultWalkerType>(LinearStream &commandStream, const Kernel &kernel);
template struct EnqueueOperation<Family>;
} // namespace NEO

View File

@@ -0,0 +1,41 @@
/*
* Copyright (C) 2025 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#include "shared/source/compiler_interface/external_functions.h"
#include "shared/source/kernel/implicit_args_helper.h"
#include "shared/source/xe3_core/hw_cmds_base.h"
#include "opencl/source/gtpin/gtpin_gfx_core_helper.h"
#include "opencl/source/gtpin/gtpin_gfx_core_helper.inl"
#include "opencl/source/gtpin/gtpin_gfx_core_helper_xehp_and_later.inl"
#include "ocl_igc_shared/gtpin/gtpin_ocl_interface.h"
namespace NEO {
extern GTPinGfxCoreHelperCreateFunctionType gtpinGfxCoreHelperFactory[IGFX_MAX_CORE];
using Family = Xe3CoreFamily;
static const auto gfxFamily = IGFX_XE3_CORE;
template <>
uint32_t GTPinGfxCoreHelperHw<Family>::getGenVersion() const {
DEBUG_BREAK_IF(true);
return gtpin::GTPIN_XE_HPG_CORE;
}
template class GTPinGfxCoreHelperHw<Family>;
struct GTPinEnableXe3Core {
GTPinEnableXe3Core() {
gtpinGfxCoreHelperFactory[gfxFamily] = GTPinGfxCoreHelperHw<Family>::create;
}
};
static GTPinEnableXe3Core gtpinEnable;
} // namespace NEO

View File

@@ -0,0 +1,19 @@
/*
* Copyright (C) 2025 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#include "shared/source/helpers/cache_flush_xehp_and_later.inl"
#include "shared/source/xe3_core/hw_cmds_base.h"
#include "opencl/source/helpers/hardware_commands_helper.h"
#include "opencl/source/helpers/hardware_commands_helper_base.inl"
#include "opencl/source/helpers/hardware_commands_helper_xehp_and_later.inl"
namespace NEO {
using FamilyType = Xe3CoreFamily;
} // namespace NEO
#include "opencl/source/helpers/enable_hardware_commands_helper_cw.inl"

View File

@@ -0,0 +1,22 @@
/*
* Copyright (C) 2025 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#include "shared/source/xe3_core/hw_cmds_base.h"
#include "opencl/source/mem_obj/image.inl"
namespace NEO {
using Family = Xe3CoreFamily;
static auto gfxCore = IGFX_XE3_CORE;
} // namespace NEO
#include "opencl/source/mem_obj/image_tgllp_and_later.inl"
#include "opencl/source/mem_obj/image_xe2_and_later.inl"
// factory initializer
#include "opencl/source/mem_obj/image_factory_init.inl"

View File

@@ -0,0 +1,11 @@
/*
* Copyright (C) 2025 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#include "shared/source/xe3_core/hw_cmds_base.h"
using Family = NEO::Xe3CoreFamily;
constexpr static auto gfxCore = IGFX_XE3_CORE;
#include "opencl/source/sampler/sampler_tgllp_and_later.inl"

View File

@@ -0,0 +1,16 @@
#
# Copyright (C) 2025 Intel Corporation
#
# SPDX-License-Identifier: MIT
#
if(TESTS_XE3_CORE)
target_sources(igdrcl_aub_tests PRIVATE
${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt
${CMAKE_CURRENT_SOURCE_DIR}/aub_mem_dump_tests_xe3_core.cpp
${CMAKE_CURRENT_SOURCE_DIR}/copy_engine_aub_tests_xe3_core.cpp
${CMAKE_CURRENT_SOURCE_DIR}/enqueue_kernel_aub_tests_xe3_core.cpp
${CMAKE_CURRENT_SOURCE_DIR}/system_memfence_aub_tests_xe3_core.cpp
)
add_subdirectories()
endif()

View File

@@ -0,0 +1,17 @@
/*
* Copyright (C) 2025 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#include "shared/test/common/mocks/mock_device.h"
#include "opencl/test/unit_test/aub_tests/command_stream/aub_mem_dump_tests.h"
#include "opencl/test/unit_test/fixtures/cl_device_fixture.h"
using Xe3CoreAubMemDumpTests = Test<NEO::ClDeviceFixture>;
XE3_CORETEST_F(Xe3CoreAubMemDumpTests, GivenCcsThenExpectationsAreMet) {
setupAUB<FamilyType>(pDevice, aub_stream::ENGINE_CCS);
}

View File

@@ -0,0 +1,126 @@
/*
* Copyright (C) 2025 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#include "shared/test/common/test_macros/header/per_product_test_definitions.h"
#include "shared/test/common/test_macros/test.h"
#include "opencl/source/helpers/hardware_commands_helper.h"
#include "opencl/test/unit_test/aub_tests/command_stream/copy_engine_aub_tests_xehp_and_later.h"
#include "hw_cmds_xe3_core.h"
using namespace NEO;
template <uint32_t numTiles, typename FamilyType, bool useLocalMemory>
struct CopyEnginesXe3CoreFixture : public CopyEngineXeHPAndLater<numTiles, useLocalMemory> {
using MEM_COPY = typename FamilyType::MEM_COPY;
void SetUp() override {
this->bcsEngineType = aub_stream::EngineType::ENGINE_BCS;
CopyEngineXeHPAndLater<numTiles, useLocalMemory>::SetUp();
}
};
using SingleTileXe3CoreTests = CopyEnginesXe3CoreFixture<1, Xe3CoreFamily, true>;
XE3_CORETEST_F(SingleTileXe3CoreTests, givenNotCompressedBufferWhenBltExecutedThenCompressDataAndResolve) {
givenNotCompressedBufferWhenBltExecutedThenCompressDataAndResolveImpl<FamilyType>();
}
XE3_CORETEST_F(SingleTileXe3CoreTests, givenHostPtrWhenBlitCommandToCompressedBufferIsDispatchedThenCopiedDataIsValid) {
givenHostPtrWhenBlitCommandToCompressedBufferIsDispatchedThenCopiedDataIsValidImpl<FamilyType>();
}
XE3_CORETEST_F(SingleTileXe3CoreTests, givenDstHostPtrWhenBlitCommandFromCompressedBufferIsDispatchedThenCopiedDataIsValid) {
givenDstHostPtrWhenBlitCommandFromCompressedBufferIsDispatchedThenCopiedDataIsValidImpl<FamilyType>();
}
XE3_CORETEST_F(SingleTileXe3CoreTests, givenDstHostPtrWhenBlitCommandFromNotCompressedBufferIsDispatchedThenCopiedDataIsValid) {
givenDstHostPtrWhenBlitCommandFromNotCompressedBufferIsDispatchedThenCopiedDataIsValidImpl<FamilyType>();
}
XE3_CORETEST_F(SingleTileXe3CoreTests, givenSrcHostPtrWhenBlitCommandToNotCompressedBufferIsDispatchedThenCopiedDataIsValid) {
givenSrcHostPtrWhenBlitCommandToNotCompressedBufferIsDispatchedThenCopiedDataIsValidImpl<FamilyType>();
}
XE3_CORETEST_F(SingleTileXe3CoreTests, givenBufferWithOffsetWhenHostPtrBlitCommandIsDispatchedFromHostPtrThenDataIsCorrectlyCopied) {
givenBufferWithOffsetWhenHostPtrBlitCommandIsDispatchedFromHostPtrThenDataIsCorrectlyCopiedImpl<FamilyType>();
}
XE3_CORETEST_F(SingleTileXe3CoreTests, givenBufferWithOffsetWhenHostPtrBlitCommandIsDispatchedToHostPtrThenDataIsCorrectlyCopied) {
givenBufferWithOffsetWhenHostPtrBlitCommandIsDispatchedToHostPtrThenDataIsCorrectlyCopiedImpl<FamilyType>();
}
XE3_CORETEST_F(SingleTileXe3CoreTests, givenOffsetsWhenBltExecutedThenCopiedDataIsValid) {
givenOffsetsWhenBltExecutedThenCopiedDataIsValidImpl<FamilyType>();
}
XE3_CORETEST_F(SingleTileXe3CoreTests, givenSrcCompressedBufferWhenBlitCommandToDstCompressedBufferIsDispatchedThenCopiedDataIsValid) {
givenSrcCompressedBufferWhenBlitCommandToDstCompressedBufferIsDispatchedThenCopiedDataIsValidImpl<FamilyType>();
}
XE3_CORETEST_F(SingleTileXe3CoreTests, givenCompressedBufferWhenAuxTranslationCalledThenResolveAndCompress) {
givenCompressedBufferWhenAuxTranslationCalledThenResolveAndCompressImpl<FamilyType>();
}
using SingleTileSystemMemoryXe3CoreTests = CopyEnginesXe3CoreFixture<1, Xe3CoreFamily, false>;
XE3_CORETEST_F(SingleTileSystemMemoryXe3CoreTests, givenSrcSystemBufferWhenBlitCommandToDstSystemBufferIsDispatchedThenCopiedDataIsValid) {
givenSrcSystemBufferWhenBlitCommandToDstSystemBufferIsDispatchedThenCopiedDataIsValidImpl<FamilyType>();
}
XE3_CORETEST_F(SingleTileSystemMemoryXe3CoreTests, givenNotCompressedBufferWhenBltExecutedThenCompressDataAndResolve) {
givenNotCompressedBufferWhenBltExecutedThenCompressDataAndResolveImpl<FamilyType>();
}
XE3_CORETEST_F(SingleTileSystemMemoryXe3CoreTests, givenHostPtrWhenBlitCommandToCompressedBufferIsDispatchedThenCopiedDataIsValid) {
givenHostPtrWhenBlitCommandToCompressedBufferIsDispatchedThenCopiedDataIsValidImpl<FamilyType>();
}
XE3_CORETEST_F(SingleTileSystemMemoryXe3CoreTests, givenDstHostPtrWhenBlitCommandFromCompressedBufferIsDispatchedThenCopiedDataIsValid) {
givenDstHostPtrWhenBlitCommandFromCompressedBufferIsDispatchedThenCopiedDataIsValidImpl<FamilyType>();
}
XE3_CORETEST_F(SingleTileSystemMemoryXe3CoreTests, givenDstHostPtrWhenBlitCommandFromNotCompressedBufferIsDispatchedThenCopiedDataIsValid) {
givenDstHostPtrWhenBlitCommandFromNotCompressedBufferIsDispatchedThenCopiedDataIsValidImpl<FamilyType>();
}
XE3_CORETEST_F(SingleTileSystemMemoryXe3CoreTests, givenSrcHostPtrWhenBlitCommandToNotCompressedBufferIsDispatchedThenCopiedDataIsValid) {
givenSrcHostPtrWhenBlitCommandToNotCompressedBufferIsDispatchedThenCopiedDataIsValidImpl<FamilyType>();
}
XE3_CORETEST_F(SingleTileSystemMemoryXe3CoreTests, givenBufferWithOffsetWhenHostPtrBlitCommandIsDispatchedFromHostPtrThenDataIsCorrectlyCopied) {
givenBufferWithOffsetWhenHostPtrBlitCommandIsDispatchedFromHostPtrThenDataIsCorrectlyCopiedImpl<FamilyType>();
}
XE3_CORETEST_F(SingleTileSystemMemoryXe3CoreTests, givenBufferWithOffsetWhenHostPtrBlitCommandIsDispatchedToHostPtrThenDataIsCorrectlyCopied) {
givenBufferWithOffsetWhenHostPtrBlitCommandIsDispatchedToHostPtrThenDataIsCorrectlyCopiedImpl<FamilyType>();
}
XE3_CORETEST_F(SingleTileSystemMemoryXe3CoreTests, givenOffsetsWhenBltExecutedThenCopiedDataIsValid) {
givenOffsetsWhenBltExecutedThenCopiedDataIsValidImpl<FamilyType>();
}
XE3_CORETEST_F(SingleTileSystemMemoryXe3CoreTests, givenSrcCompressedBufferWhenBlitCommandToDstCompressedBufferIsDispatchedThenCopiedDataIsValid) {
givenSrcCompressedBufferWhenBlitCommandToDstCompressedBufferIsDispatchedThenCopiedDataIsValidImpl<FamilyType>();
}
XE3_CORETEST_F(SingleTileSystemMemoryXe3CoreTests, givenCompressedBufferWhenAuxTranslationCalledThenResolveAndCompress) {
givenCompressedBufferWhenAuxTranslationCalledThenResolveAndCompressImpl<FamilyType>();
}
XE3_CORETEST_F(SingleTileXe3CoreTests, givenReadBufferRectWithOffsetWhenHostPtrBlitCommandIsDispatchedToHostPtrThenDataIsCorrectlyCopied) {
givenReadBufferRectWithOffsetWhenHostPtrBlitCommandIsDispatchedToHostPtrThenDataIsCorrectlyCopiedImpl<FamilyType>();
}
XE3_CORETEST_F(SingleTileXe3CoreTests, givenWriteBufferRectWithOffsetWhenHostPtrBlitCommandIsDispatchedToHostPtrThenDataIsCorrectlyCopied) {
givenWriteBufferRectWithOffsetWhenHostPtrBlitCommandIsDispatchedToHostPtrThenDataIsCorrectlyCopiedImpl<FamilyType>();
}
XE3_CORETEST_F(SingleTileXe3CoreTests, givenCopyBufferRectWithOffsetWhenHostPtrBlitCommandIsDispatchedToHostPtrThenDataIsCorrectlyCopied) {
givenCopyBufferRectWithOffsetWhenHostPtrBlitCommandIsDispatchedToHostPtrThenDataIsCorrectlyCopiedImpl<FamilyType>();
}

View File

@@ -0,0 +1,49 @@
/*
* Copyright (C) 2025 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#include "shared/test/common/helpers/debug_manager_state_restore.h"
#include "shared/test/common/test_macros/hw_test.h"
#include "opencl/source/command_queue/command_queue.h"
#include "opencl/source/mem_obj/buffer.h"
#include "opencl/test/unit_test/aub_tests/fixtures/aub_fixture.h"
#include "opencl/test/unit_test/fixtures/simple_arg_fixture.h"
using namespace NEO;
extern const HardwareInfo *defaultHwInfo;
using AUBSimpleKernelStatelessTestXe3Core = Test<KernelAUBFixture<SimpleKernelStatelessFixture>>;
HWTEST2_F(AUBSimpleKernelStatelessTestXe3Core, givenPrefetchEnabledWhenEnqueuedKernelThenDataIsCorrect, IsXe3Core) {
DebugManagerStateRestore restore;
debugManager.flags.EnableMemoryPrefetch.set(1);
constexpr size_t bufferSize = MemoryConstants::pageSize;
cl_uint workDim = 1;
size_t globalWorkOffset[3] = {0, 0, 0};
size_t globalWorkSize[3] = {bufferSize, 1, 1};
size_t localWorkSize[3] = {1, 1, 1};
uint8_t bufferData[bufferSize] = {};
uint8_t bufferExpected[bufferSize];
memset(bufferExpected, 0xCD, bufferSize);
auto buffer = std::unique_ptr<Buffer>(Buffer::create(context, CL_MEM_USE_HOST_PTR | CL_MEM_ALLOW_UNRESTRICTED_SIZE_INTEL,
bufferSize, bufferData, retVal));
ASSERT_NE(nullptr, buffer);
kernel->setArg(0, buffer.get());
retVal = this->pCmdQ->enqueueKernel(kernel.get(), workDim, globalWorkOffset, globalWorkSize,
localWorkSize, 0, nullptr, nullptr);
this->pCmdQ->flush();
expectMemory<FamilyType>(addrToPtr(ptrOffset(buffer->getGraphicsAllocation(rootDeviceIndex)->getGpuAddress(), buffer->getOffset())),
bufferExpected, bufferSize);
}

View File

@@ -0,0 +1,174 @@
/*
* Copyright (C) 2025 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#include "shared/source/os_interface/product_helper.h"
#include "shared/test/common/helpers/debug_manager_state_restore.h"
#include "shared/test/common/mocks/mock_execution_environment.h"
#include "shared/test/common/test_macros/header/per_product_test_definitions.h"
#include "shared/test/common/test_macros/test.h"
#include "shared/test/common/utilities/base_object_utils.h"
#include "opencl/source/api/api.h"
#include "opencl/test/unit_test/aub_tests/fixtures/aub_fixture.h"
#include "opencl/test/unit_test/aub_tests/fixtures/multicontext_ocl_aub_fixture.h"
#include "opencl/test/unit_test/mocks/mock_kernel.h"
#include "hw_cmds_xe3_core.h"
using namespace NEO;
class SystemMemFenceViaMiMemFence : public AUBFixture,
public ::testing::Test {
public:
void SetUp() override {
debugManager.flags.ProgramGlobalFenceAsMiMemFenceCommandInCommandStream.set(1);
debugManager.flags.ProgramGlobalFenceAsPostSyncOperationInComputeWalker.set(0);
debugManager.flags.ProgramGlobalFenceAsKernelInstructionInEUKernel.set(0);
AUBFixture::setUp(defaultHwInfo.get());
}
void TearDown() override {
AUBFixture::tearDown();
}
DebugManagerStateRestore debugRestorer;
cl_int retVal = CL_SUCCESS;
};
using SystemMemFenceViaMiMemFenceXe3Core = SystemMemFenceViaMiMemFence;
XE3_CORETEST_F(SystemMemFenceViaMiMemFenceXe3Core, WhenGeneratedAsMiMemFenceCommandInCommandStreamThenWritesToSystemMemoryAreGloballyObservable) {
const size_t bufferSize = MemoryConstants::kiloByte;
std::vector<char> buffer(bufferSize, 0x11);
auto deviceMemAlloc = clDeviceMemAllocINTEL(this->context, this->device.get(), nullptr, bufferSize, 0, &retVal);
EXPECT_EQ(CL_SUCCESS, retVal);
ASSERT_NE(nullptr, deviceMemAlloc);
retVal = clEnqueueMemcpyINTEL(this->pCmdQ, true, deviceMemAlloc, buffer.data(), bufferSize, 0, nullptr, nullptr);
EXPECT_EQ(CL_SUCCESS, retVal);
expectMemory<FamilyType>(deviceMemAlloc, buffer.data(), bufferSize);
auto hostMemAlloc = clHostMemAllocINTEL(this->context, nullptr, bufferSize, 0, &retVal);
EXPECT_EQ(CL_SUCCESS, retVal);
ASSERT_NE(nullptr, hostMemAlloc);
retVal = clEnqueueMemcpyINTEL(this->pCmdQ, true, hostMemAlloc, deviceMemAlloc, bufferSize, 0, nullptr, nullptr);
EXPECT_EQ(CL_SUCCESS, retVal);
expectMemory<FamilyType>(hostMemAlloc, buffer.data(), bufferSize);
retVal = clMemFreeINTEL(this->context, deviceMemAlloc);
EXPECT_EQ(CL_SUCCESS, retVal);
retVal = clMemFreeINTEL(this->context, hostMemAlloc);
EXPECT_EQ(CL_SUCCESS, retVal);
}
class SystemMemFenceViaComputeWalker : public AUBFixture,
public ::testing::Test {
public:
void SetUp() override {
debugManager.flags.ProgramGlobalFenceAsMiMemFenceCommandInCommandStream.set(0);
debugManager.flags.ProgramGlobalFenceAsPostSyncOperationInComputeWalker.set(1);
debugManager.flags.ProgramGlobalFenceAsKernelInstructionInEUKernel.set(0);
AUBFixture::setUp(defaultHwInfo.get());
}
void TearDown() override {
AUBFixture::tearDown();
}
DebugManagerStateRestore debugRestorer;
cl_int retVal = CL_SUCCESS;
};
using SystemMemFenceViaComputeWalkerXe3Core = SystemMemFenceViaComputeWalker;
XE3_CORETEST_F(SystemMemFenceViaComputeWalkerXe3Core, WhenGeneratedAsPostSyncOperationInWalkerThenWritesToSystemMemoryAreGloballyObservable) {
const size_t bufferSize = MemoryConstants::kiloByte;
std::vector<char> buffer(bufferSize, 0x11);
auto deviceMemAlloc = clDeviceMemAllocINTEL(this->context, this->device.get(), nullptr, bufferSize, 0, &retVal);
EXPECT_EQ(CL_SUCCESS, retVal);
ASSERT_NE(nullptr, deviceMemAlloc);
retVal = clEnqueueMemcpyINTEL(this->pCmdQ, true, deviceMemAlloc, buffer.data(), bufferSize, 0, nullptr, nullptr);
EXPECT_EQ(CL_SUCCESS, retVal);
expectMemory<FamilyType>(deviceMemAlloc, buffer.data(), bufferSize);
auto hostMemAlloc = clHostMemAllocINTEL(this->context, nullptr, bufferSize, 0, &retVal);
EXPECT_EQ(CL_SUCCESS, retVal);
ASSERT_NE(nullptr, hostMemAlloc);
retVal = clEnqueueMemcpyINTEL(this->pCmdQ, true, hostMemAlloc, deviceMemAlloc, bufferSize, 0, nullptr, nullptr);
EXPECT_EQ(CL_SUCCESS, retVal);
expectMemory<FamilyType>(hostMemAlloc, buffer.data(), bufferSize);
retVal = clMemFreeINTEL(this->context, deviceMemAlloc);
EXPECT_EQ(CL_SUCCESS, retVal);
retVal = clMemFreeINTEL(this->context, hostMemAlloc);
EXPECT_EQ(CL_SUCCESS, retVal);
}
class SystemMemFenceWithBlitterXe3Core : public MulticontextOclAubFixture,
public ::testing::Test {
public:
void SetUp() override {
debugManager.flags.ProgramGlobalFenceAsMiMemFenceCommandInCommandStream.set(1);
debugManager.flags.ProgramGlobalFenceAsPostSyncOperationInComputeWalker.set(0);
debugManager.flags.ProgramGlobalFenceAsKernelInstructionInEUKernel.set(0);
debugManager.flags.EnableBlitterOperationsSupport.set(1);
debugManager.flags.EnableBlitterForEnqueueOperations.set(1);
MockExecutionEnvironment mockExecutionEnvironment{};
auto &productHelper = mockExecutionEnvironment.rootDeviceEnvironments[0]->getHelper<ProductHelper>();
if (!productHelper.obtainBlitterPreference(*defaultHwInfo.get())) {
GTEST_SKIP();
}
MulticontextOclAubFixture::setUp(1, EnabledCommandStreamers::single, true);
}
void TearDown() override {
MulticontextOclAubFixture::tearDown();
}
DebugManagerStateRestore debugRestorer;
cl_int retVal = CL_SUCCESS;
};
XE3_CORETEST_F(SystemMemFenceWithBlitterXe3Core, givenSystemMemFenceWhenGeneratedAsMiMemFenceCmdInBCSThenWritesToSystemMemoryAreGloballyObservable) {
const size_t bufferSize = MemoryConstants::kiloByte;
std::vector<char> buffer(bufferSize, 0x11);
auto deviceMemAlloc = clDeviceMemAllocINTEL(context.get(), tileDevices[0], nullptr, bufferSize, 0, &retVal);
EXPECT_EQ(CL_SUCCESS, retVal);
ASSERT_NE(nullptr, deviceMemAlloc);
retVal = clEnqueueMemcpyINTEL(commandQueues[0][0].get(), true, deviceMemAlloc, buffer.data(), bufferSize, 0, nullptr, nullptr);
EXPECT_EQ(CL_SUCCESS, retVal);
expectMemory<FamilyType>(deviceMemAlloc, buffer.data(), bufferSize, 0, 0);
auto hostMemAlloc = clHostMemAllocINTEL(context.get(), nullptr, bufferSize, 0, &retVal);
EXPECT_EQ(CL_SUCCESS, retVal);
ASSERT_NE(nullptr, hostMemAlloc);
retVal = clEnqueueMemcpyINTEL(commandQueues[0][0].get(), true, hostMemAlloc, deviceMemAlloc, bufferSize, 0, nullptr, nullptr);
EXPECT_EQ(CL_SUCCESS, retVal);
expectMemory<FamilyType>(hostMemAlloc, buffer.data(), bufferSize, 0, 0);
retVal = clMemFreeINTEL(context.get(), deviceMemAlloc);
EXPECT_EQ(CL_SUCCESS, retVal);
retVal = clMemFreeINTEL(context.get(), hostMemAlloc);
EXPECT_EQ(CL_SUCCESS, retVal);
}

View File

@@ -1,5 +1,5 @@
/*
* Copyright (C) 2019-2023 Intel Corporation
* Copyright (C) 2019-2025 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@@ -10,6 +10,8 @@
#include "opencl/test/unit_test/mocks/mock_cl_device.h"
#include <cstring>
namespace NEO {
struct GetDeviceInfoMemCapabilitiesTest : ::testing::Test {

View File

@@ -1,5 +1,5 @@
#
# Copyright (C) 2018-2023 Intel Corporation
# Copyright (C) 2018-2025 Intel Corporation
#
# SPDX-License-Identifier: MIT
#
@@ -41,6 +41,11 @@ if(TESTS_DG2_AND_LATER)
${CMAKE_CURRENT_SOURCE_DIR}/test_preamble_dg2_and_later.cpp
)
endif()
if(TESTS_XE3_AND_LATER)
list(APPEND IGDRCL_SRCS_tests_helpers
${CMAKE_CURRENT_SOURCE_DIR}/test_preamble_xe3_and_later.cpp
)
endif()
get_property(NEO_CORE_PREAMBLE_TESTS GLOBAL PROPERTY NEO_CORE_PREAMBLE_TESTS)

View File

@@ -0,0 +1,48 @@
/*
* Copyright (C) 2025 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#include "shared/source/command_stream/stream_properties.h"
#include "shared/test/common/fixtures/preamble_fixture.h"
#include "shared/test/common/helpers/debug_manager_state_restore.h"
#include "test_traits_common.h"
using namespace NEO;
using PreambleCfeStateXe3AndLater = PreambleFixture;
HWTEST2_F(PreambleCfeStateXe3AndLater, givenSetDebugFlagWhenPreambleCfeStateIsProgrammedThenCFEStateParamsHaveSetValue, IsAtLeastXe3Core) {
using CFE_STATE = typename FamilyType::CFE_STATE;
uint32_t expectedValue1 = 1u;
uint32_t expectedValue2 = 2u;
DebugManagerStateRestore dbgRestore;
debugManager.flags.CFEFusedEUDispatch.set(expectedValue1);
debugManager.flags.OverDispatchControl.set(expectedValue1);
debugManager.flags.CFESingleSliceDispatchCCSMode.set(expectedValue1);
debugManager.flags.CFENumberOfWalkers.set(expectedValue2);
debugManager.flags.MaximumNumberOfThreads.set(expectedValue2);
uint64_t expectedAddress = 1 << CFE_STATE::SCRATCHSPACEBUFFER_BIT_SHIFT;
auto pVfeCmd = PreambleHelper<FamilyType>::getSpaceForVfeState(&linearStream, *defaultHwInfo, EngineGroupType::renderCompute);
StreamProperties emptyProperties{};
PreambleHelper<FamilyType>::programVfeState(pVfeCmd, pDevice->getRootDeviceEnvironment(), 0u, expectedAddress, 16u, emptyProperties);
parseCommands<FamilyType>(linearStream);
auto cfeStateIt = find<CFE_STATE *>(cmdList.begin(), cmdList.end());
ASSERT_NE(cmdList.end(), cfeStateIt);
auto cfeState = reinterpret_cast<CFE_STATE *>(*cfeStateIt);
EXPECT_EQ(expectedValue1, static_cast<uint32_t>(cfeState->getOverDispatchControl()));
if constexpr (TestTraits<gfxCoreFamily>::numberOfWalkersInCfeStateSupported) {
EXPECT_EQ(expectedValue2, cfeState->getNumberOfWalkers());
}
EXPECT_EQ(expectedValue2, cfeState->getMaximumNumberOfThreads());
}

View File

@@ -0,0 +1,13 @@
#
# Copyright (C) 2025 Intel Corporation
#
# SPDX-License-Identifier: MIT
#
if(TESTS_XE3_CORE)
set(IGDRCL_SRCS_offline_compiler_tests_xe3_core
${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt
)
target_sources(ocloc_tests PRIVATE ${IGDRCL_SRCS_offline_compiler_tests_xe3_core})
add_subdirectories()
endif()

View File

@@ -0,0 +1,13 @@
#
# Copyright (C) 2025 Intel Corporation
#
# SPDX-License-Identifier: MIT
#
if(TESTS_PTL)
target_sources(ocloc_tests PRIVATE
${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt
${CMAKE_CURRENT_SOURCE_DIR}/offline_compiler_tests_ptl.cpp
)
add_subdirectories()
endif()

View File

@@ -0,0 +1,59 @@
/*
* Copyright (C) 2025 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#include "shared/offline_compiler/source/ocloc_arg_helper.h"
#include "shared/source/helpers/product_config_helper.h"
#include "shared/source/xe3_core/hw_info_xe3_core.h"
#include "shared/test/common/test_macros/header/per_product_test_definitions.h"
#include "shared/test/common/test_macros/test.h"
#include "opencl/test/unit_test/offline_compiler/mock/mock_offline_compiler.h"
#include "device_ids_configs_ptl.h"
#include "platforms.h"
using namespace NEO;
using PtlOfflineCompilerTests = ::testing::Test;
PTLTEST_F(PtlOfflineCompilerTests, givenPtlHDeviceIdValueWhenInitHwInfoThenCorrectValuesAreSet) {
MockOfflineCompiler mockOfflineCompiler;
HardwareIpVersion ptlConfig = AOT::PTL_H_A0;
for (const auto &deviceID : ptlHDeviceIds) {
std::stringstream deviceIDStr, expectedOutput;
deviceIDStr << "0x" << std::hex << deviceID;
testing::internal::CaptureStdout();
mockOfflineCompiler.initHardwareInfo(deviceIDStr.str());
std::string output = testing::internal::GetCapturedStdout();
expectedOutput << "Auto-detected target based on " << deviceIDStr.str() << " device id: ptl-h-a0\n";
EXPECT_STREQ(output.c_str(), expectedOutput.str().c_str());
EXPECT_EQ(mockOfflineCompiler.hwInfo.platform.usDeviceID, deviceID);
EXPECT_EQ(mockOfflineCompiler.deviceConfig, ptlConfig.value);
}
}
PTLTEST_F(PtlOfflineCompilerTests, givenPtlUDeviceIdValueWhenInitHwInfoThenCorrectValuesAreSet) {
MockOfflineCompiler mockOfflineCompiler;
HardwareIpVersion ptlConfig = AOT::PTL_U_A0;
for (const auto &deviceID : ptlUDeviceIds) {
std::stringstream deviceIDStr, expectedOutput;
deviceIDStr << "0x" << std::hex << deviceID;
testing::internal::CaptureStdout();
mockOfflineCompiler.initHardwareInfo(deviceIDStr.str());
std::string output = testing::internal::GetCapturedStdout();
expectedOutput << "Auto-detected target based on " << deviceIDStr.str() << " device id: ptl-u-a0\n";
EXPECT_STREQ(output.c_str(), expectedOutput.str().c_str());
EXPECT_EQ(mockOfflineCompiler.hwInfo.platform.usDeviceID, deviceID);
EXPECT_EQ(mockOfflineCompiler.deviceConfig, ptlConfig.value);
}
}

View File

@@ -0,0 +1,33 @@
#
# Copyright (C) 2025 Intel Corporation
#
# SPDX-License-Identifier: MIT
#
if(TESTS_XE3_CORE)
set(IGDRCL_SRCS_tests_xe3_core_excludes
${CMAKE_CURRENT_SOURCE_DIR}/excludes_ocl_xe3_core.cpp
)
set_property(GLOBAL APPEND PROPERTY IGDRCL_SRCS_tests_excludes ${IGDRCL_SRCS_tests_xe3_core_excludes})
set(IGDRCL_SRCS_tests_xe3_core
${IGDRCL_SRCS_tests_xe3_core_excludes}
${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt
${CMAKE_CURRENT_SOURCE_DIR}/cl_gfx_core_helper_tests_xe3_core.cpp
${CMAKE_CURRENT_SOURCE_DIR}/cmd_parse_tests_xe3_core.cpp
${CMAKE_CURRENT_SOURCE_DIR}/command_stream_receiver_hw_tests_xe3_core.cpp
${CMAKE_CURRENT_SOURCE_DIR}/copy_engine_tests_xe3_core.cpp
${CMAKE_CURRENT_SOURCE_DIR}/device_info_tests_xe3_core.cpp
${CMAKE_CURRENT_SOURCE_DIR}/dispatch_walker_tests_xe3_core.cpp
${CMAKE_CURRENT_SOURCE_DIR}/test_cmds_programming_xe3_core.cpp
${CMAKE_CURRENT_SOURCE_DIR}/test_cl_device_caps_xe3_core.cpp
${CMAKE_CURRENT_SOURCE_DIR}/test_platform_caps_xe3_core.cpp
${CMAKE_CURRENT_SOURCE_DIR}/test_sample_xe3_core.cpp
)
get_property(NEO_CORE_TESTS_XE3_CORE GLOBAL PROPERTY NEO_CORE_TESTS_XE3_CORE)
list(APPEND IGDRCL_SRCS_tests_xe3_core ${NEO_CORE_TESTS_XE3_CORE})
target_sources(igdrcl_tests PRIVATE ${IGDRCL_SRCS_tests_xe3_core})
add_subdirectories()
endif()

View File

@@ -0,0 +1,27 @@
/*
* Copyright (C) 2025 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#include "shared/source/program/kernel_info.h"
#include "shared/test/common/test_macros/hw_test.h"
#include "opencl/source/helpers/cl_gfx_core_helper.h"
#include "opencl/test/unit_test/fixtures/cl_device_fixture.h"
using namespace NEO;
using ClGfxCoreHelperTestsXe3Core = Test<ClDeviceFixture>;
XE3_CORETEST_F(ClGfxCoreHelperTestsXe3Core, givenXe3CoreThenAuxTranslationIsNotRequired) {
auto &clGfxCoreHelper = getHelper<ClGfxCoreHelper>();
KernelInfo kernelInfo{};
EXPECT_FALSE(clGfxCoreHelper.requiresAuxResolves(kernelInfo));
}
XE3_CORETEST_F(ClGfxCoreHelperTestsXe3Core, WhenCheckingPreferenceForBlitterForLocalToLocalTransfersThenReturnTrue) {
auto &clGfxCoreHelper = getHelper<ClGfxCoreHelper>();
EXPECT_FALSE(clGfxCoreHelper.preferBlitterForLocalToLocalTransfers());
}

View File

@@ -0,0 +1,61 @@
/*
* Copyright (C) 2025 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#include "shared/test/common/cmd_parse/gen_cmd_parse.h"
#include "shared/test/common/cmd_parse/hw_parse.h"
#include "shared/test/common/test_macros/header/per_product_test_definitions.h"
#include "shared/test/common/test_macros/test.h"
#include "opencl/test/unit_test/fixtures/cl_device_fixture.h"
#include "hw_cmds_xe3_core.h"
using namespace NEO;
using CmdParseTestsXe3Core = ::testing::Test;
XE3_CORETEST_F(CmdParseTestsXe3Core, givenMiMemFenceCmdWhenParsingThenFindCommandAndsItsName) {
using MI_MEM_FENCE = typename FamilyType::MI_MEM_FENCE;
uint32_t buffer[64] = {};
LinearStream cmdStream(buffer, sizeof(buffer));
auto miMemFenceCmd = cmdStream.getSpaceForCmd<MI_MEM_FENCE>();
miMemFenceCmd->init();
EXPECT_NE(nullptr, genCmdCast<MI_MEM_FENCE *>(buffer));
auto commandName = CmdParse<FamilyType>::getCommandName(buffer);
EXPECT_EQ(0, strcmp(commandName, "MI_MEM_FENCE"));
HardwareParse hwParser;
hwParser.parseCommands<FamilyType>(cmdStream, 0);
EXPECT_EQ(1u, hwParser.cmdList.size());
}
XE3_CORETEST_F(CmdParseTestsXe3Core, givenStateSystemMemFenceAddrCmdWhenParsingThenFindCommandAndsItsName) {
using STATE_SYSTEM_MEM_FENCE_ADDRESS = typename FamilyType::STATE_SYSTEM_MEM_FENCE_ADDRESS;
uint32_t buffer[64] = {};
LinearStream cmdStream(buffer, sizeof(buffer));
auto stateSystemMemFenceCmd = cmdStream.getSpaceForCmd<STATE_SYSTEM_MEM_FENCE_ADDRESS>();
stateSystemMemFenceCmd->init();
EXPECT_NE(nullptr, genCmdCast<STATE_SYSTEM_MEM_FENCE_ADDRESS *>(buffer));
auto commandName = CmdParse<FamilyType>::getCommandName(buffer);
EXPECT_EQ(0, strcmp(commandName, "STATE_SYSTEM_MEM_FENCE_ADDRESS"));
HardwareParse hwParser;
hwParser.parseCommands<FamilyType>(cmdStream, 0);
EXPECT_EQ(1u, hwParser.cmdList.size());
}

View File

@@ -0,0 +1,740 @@
/*
* Copyright (C) 2025 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#include "shared/source/command_stream/linear_stream.h"
#include "shared/source/gmm_helper/client_context/gmm_client_context.h"
#include "shared/source/gmm_helper/resource_info.h"
#include "shared/source/helpers/blit_commands_helper.h"
#include "shared/source/os_interface/device_factory.h"
#include "shared/test/common/cmd_parse/gen_cmd_parse.h"
#include "shared/test/common/helpers/debug_manager_state_restore.h"
#include "shared/test/common/helpers/engine_descriptor_helper.h"
#include "shared/test/common/libult/ult_aub_command_stream_receiver.h"
#include "shared/test/common/mocks/mock_timestamp_container.h"
#include "shared/test/common/test_macros/header/per_product_test_definitions.h"
#include "shared/test/common/test_macros/test.h"
#include "shared/test/common/utilities/base_object_utils.h"
#include "opencl/source/command_queue/gpgpu_walker.h"
#include "opencl/source/command_queue/hardware_interface.h"
#include "opencl/source/helpers/cl_memory_properties_helpers.h"
#include "opencl/test/unit_test/command_queue/hardware_interface_helper.h"
#include "opencl/test/unit_test/fixtures/ult_command_stream_receiver_fixture.h"
#include "opencl/test/unit_test/mocks/mock_cl_device.h"
#include "opencl/test/unit_test/mocks/mock_command_queue.h"
#include "opencl/test/unit_test/mocks/mock_kernel.h"
#include "opencl/test/unit_test/mocks/mock_mdi.h"
#include "opencl/test/unit_test/mocks/mock_platform.h"
#include "hw_cmds_xe3_core.h"
#include <type_traits>
using namespace NEO;
struct MemorySynchronizationViaMiSemaphoreWaitTest : public UltCommandStreamReceiverTest {
void SetUp() override {
debugManager.flags.EnableLocalMemory.set(1);
debugManager.flags.ProgramGlobalFenceAsMiMemFenceCommandInCommandStream.set(0);
debugManager.flags.ProgramGlobalFenceAsPostSyncOperationInComputeWalker.set(0);
debugManager.flags.ProgramGlobalFenceAsKernelInstructionInEUKernel.set(0);
UltCommandStreamReceiverTest::SetUp();
}
DebugManagerStateRestore restore;
};
using CommandStreamReceiverXe3CoreTests = UltCommandStreamReceiverTest;
XE3_CORETEST_F(CommandStreamReceiverXe3CoreTests, givenProfilingEnabledWhenBlitBufferThenCommandBufferIsConstructedProperly) {
using MI_STORE_REGISTER_MEM = typename FamilyType::MI_STORE_REGISTER_MEM;
auto bcsOsContext = std::unique_ptr<OsContext>(OsContext::create(nullptr, pDevice->getRootDeviceIndex(), 0, EngineDescriptorHelper::getDefaultDescriptor({aub_stream::ENGINE_BCS, EngineUsage::regular}, pDevice->getDeviceBitfield())));
auto bcsCsr = std::make_unique<UltCommandStreamReceiver<FamilyType>>(*pDevice->getExecutionEnvironment(), pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield());
bcsCsr->setupContext(*bcsOsContext);
bcsCsr->initializeTagAllocation();
auto &gfxCoreHelper = pDevice->getGfxCoreHelper();
auto timestampPacketAllocator = new MockTagAllocator<TimestampPackets<uint64_t, TimestampPacketConstants::preferredPacketCount>>(0, pDevice->getMemoryManager(), bcsCsr->getPreferredTagPoolSize(), gfxCoreHelper.getTimestampPacketAllocatorAlignment(),
sizeof(TimestampPackets<uint64_t, TimestampPacketConstants::preferredPacketCount>), false, bcsOsContext->getDeviceBitfield());
bcsCsr->timestampPacketAllocator.reset(timestampPacketAllocator);
auto context = std::make_unique<MockContext>(pClDevice);
cl_int retVal = CL_SUCCESS;
auto buffer = clUniquePtr<Buffer>(Buffer::create(context.get(), CL_MEM_READ_WRITE, 1, nullptr, retVal));
void *hostPtr = reinterpret_cast<void *>(0x12340000);
auto graphicsAllocation = buffer->getGraphicsAllocation(pDevice->getRootDeviceIndex());
auto blitProperties = BlitProperties::constructPropertiesForReadWrite(BlitterConstants::BlitDirection::hostPtrToBuffer,
*bcsCsr, graphicsAllocation, nullptr, hostPtr,
graphicsAllocation->getGpuAddress(), 0,
0, 0, {1, 1, 1}, 0, 0, 0, 0);
MockTimestampPacketContainer timestamp(*bcsCsr->getTimestampPacketAllocator(), 1u);
blitProperties.outputTimestampPacket = timestamp.getNode(0);
auto timestampContextStartGpuAddress = TimestampPacketHelper::getContextStartGpuAddress(*blitProperties.outputTimestampPacket);
auto timestampGlobalStartAddress = TimestampPacketHelper::getGlobalStartGpuAddress(*blitProperties.outputTimestampPacket);
auto timestampContextEndGpuAddress = TimestampPacketHelper::getContextEndGpuAddress(*blitProperties.outputTimestampPacket);
auto timestampGlobalEndAddress = TimestampPacketHelper::getGlobalEndGpuAddress(*blitProperties.outputTimestampPacket);
BlitPropertiesContainer blitPropertiesContainer;
blitPropertiesContainer.push_back(blitProperties);
bcsCsr->flushBcsTask(blitPropertiesContainer, false, true, *pDevice);
HardwareParse hwParser;
hwParser.parseCommands<FamilyType>(bcsCsr->commandStream);
auto &cmdList = hwParser.cmdList;
auto cmdIterator = cmdList.begin();
auto verifyLri = [&](const GenCmdList::iterator &itBegin, uint32_t expectRegister, uint64_t expectedAddress) {
cmdIterator = find<MI_STORE_REGISTER_MEM *>(itBegin, cmdList.end());
ASSERT_NE(cmdList.end(), cmdIterator);
auto lriCmd = genCmdCast<MI_STORE_REGISTER_MEM *>(*cmdIterator);
EXPECT_EQ(expectRegister + RegisterOffsets::bcs0Base, lriCmd->getRegisterAddress());
EXPECT_EQ(expectedAddress, lriCmd->getMemoryAddress());
};
{
verifyLri(cmdIterator, RegisterOffsets::gpThreadTimeRegAddressOffsetHigh, timestampContextStartGpuAddress + sizeof(uint32_t));
verifyLri(++cmdIterator, RegisterOffsets::globalTimestampUn, timestampGlobalStartAddress + sizeof(uint32_t));
verifyLri(++cmdIterator, RegisterOffsets::gpThreadTimeRegAddressOffsetLow, timestampContextStartGpuAddress);
verifyLri(++cmdIterator, RegisterOffsets::globalTimestampLdw, timestampGlobalStartAddress);
}
cmdIterator = find<typename FamilyType::XY_COPY_BLT *>(++cmdIterator, cmdList.end());
ASSERT_NE(cmdList.end(), cmdIterator);
cmdIterator = find<typename FamilyType::MI_FLUSH_DW *>(++cmdIterator, cmdList.end());
ASSERT_NE(cmdList.end(), cmdIterator);
{
verifyLri(++cmdIterator, RegisterOffsets::gpThreadTimeRegAddressOffsetHigh, timestampContextEndGpuAddress + sizeof(uint32_t));
verifyLri(++cmdIterator, RegisterOffsets::globalTimestampUn, timestampGlobalEndAddress + sizeof(uint32_t));
verifyLri(++cmdIterator, RegisterOffsets::gpThreadTimeRegAddressOffsetLow, timestampContextEndGpuAddress);
verifyLri(++cmdIterator, RegisterOffsets::globalTimestampLdw, timestampGlobalEndAddress);
}
}
using MemorySynchronizationViaMiSemaphoreWaitTestXe3Core = MemorySynchronizationViaMiSemaphoreWaitTest;
XE3_CORETEST_F(MemorySynchronizationViaMiSemaphoreWaitTestXe3Core, givenMemorySynchronizationViaMiSemaphoreWaitWhenProgramEnginePrologueIsCalledThenNoCommandIsProgrammed) {
auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver<FamilyType>();
EXPECT_FALSE(commandStreamReceiver.isEnginePrologueSent);
auto requiredSize = commandStreamReceiver.getCmdSizeForPrologue();
EXPECT_EQ(0u, requiredSize);
StackVec<char, 4096> buffer(requiredSize);
LinearStream cmdStream(buffer.begin(), buffer.size());
commandStreamReceiver.programEnginePrologue(cmdStream);
EXPECT_TRUE(commandStreamReceiver.isEnginePrologueSent);
HardwareParse hwParser;
hwParser.parseCommands<FamilyType>(cmdStream);
EXPECT_EQ(0u, hwParser.cmdList.size());
}
struct SystemMemoryFenceViaMiMemFenceTest : public UltCommandStreamReceiverTest {
void SetUp() override {
debugManager.flags.EnableLocalMemory.set(1);
debugManager.flags.ProgramGlobalFenceAsMiMemFenceCommandInCommandStream.set(1);
UltCommandStreamReceiverTest::SetUp();
}
DebugManagerStateRestore restore;
};
using SystemMemoryFenceViaMiMemFenceTestXe3Core = SystemMemoryFenceViaMiMemFenceTest;
XE3_CORETEST_F(SystemMemoryFenceViaMiMemFenceTestXe3Core, givenCommadStreamReceiverWhenProgramEnginePrologueIsCalledThenIsEnginePrologueSentIsSetToTrue) {
auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver<FamilyType>();
EXPECT_FALSE(commandStreamReceiver.isEnginePrologueSent);
auto requiredSize = commandStreamReceiver.getCmdSizeForPrologue();
StackVec<char, 4096> buffer(requiredSize);
LinearStream cmdStream(buffer.begin(), buffer.size());
commandStreamReceiver.programEnginePrologue(cmdStream);
EXPECT_TRUE(commandStreamReceiver.isEnginePrologueSent);
}
XE3_CORETEST_F(SystemMemoryFenceViaMiMemFenceTestXe3Core, givenIsEnginePrologueSentIsSetToTrueWhenGetRequiredCmdStreamSizeIsCalledThenSizeForEnginePrologueIsNotIncluded) {
auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver<FamilyType>();
auto dispatchFlags = DispatchFlagsHelper::createDefaultDispatchFlags();
EXPECT_FALSE(commandStreamReceiver.isEnginePrologueSent);
auto sizeForEnginePrologue = commandStreamReceiver.getCmdSizeForPrologue();
auto sizeWhenEnginePrologueIsNotSent = commandStreamReceiver.getRequiredCmdStreamSize(dispatchFlags, *pDevice);
commandStreamReceiver.isEnginePrologueSent = true;
auto sizeWhenEnginePrologueIsSent = commandStreamReceiver.getRequiredCmdStreamSize(dispatchFlags, *pDevice);
EXPECT_EQ(sizeForEnginePrologue, sizeWhenEnginePrologueIsNotSent - sizeWhenEnginePrologueIsSent);
}
struct SystemMemoryFenceViaComputeWalkerTest : public UltCommandStreamReceiverTest {
void SetUp() override {
debugManager.flags.EnableLocalMemory.set(1);
debugManager.flags.ProgramGlobalFenceAsMiMemFenceCommandInCommandStream.set(0);
debugManager.flags.ProgramGlobalFenceAsPostSyncOperationInComputeWalker.set(1);
debugManager.flags.ProgramGlobalFenceAsKernelInstructionInEUKernel.set(0);
UltCommandStreamReceiverTest::SetUp();
}
DebugManagerStateRestore restore;
};
using SystemMemoryFenceViaComputeWalkerTestXe3Core = SystemMemoryFenceViaComputeWalkerTest;
XE3_CORETEST_F(SystemMemoryFenceViaComputeWalkerTestXe3Core, givenSystemMemoryFenceGeneratedAsPostSyncOperationInComputeWalkerWhenProgramEnginePrologueIsCalledThenSystemMemFenceAddressIsProgrammed) {
using STATE_SYSTEM_MEM_FENCE_ADDRESS = typename FamilyType::STATE_SYSTEM_MEM_FENCE_ADDRESS;
auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver<FamilyType>();
EXPECT_FALSE(commandStreamReceiver.isEnginePrologueSent);
auto requiredSize = commandStreamReceiver.getCmdSizeForPrologue();
StackVec<char, 4096> buffer(requiredSize);
LinearStream cmdStream(buffer.begin(), buffer.size());
commandStreamReceiver.programEnginePrologue(cmdStream);
EXPECT_TRUE(commandStreamReceiver.isEnginePrologueSent);
HardwareParse hwParser;
hwParser.parseCommands<FamilyType>(cmdStream);
auto itorSystemMemFenceAddress = find<STATE_SYSTEM_MEM_FENCE_ADDRESS *>(hwParser.cmdList.begin(), hwParser.cmdList.end());
ASSERT_NE(hwParser.cmdList.end(), itorSystemMemFenceAddress);
auto systemMemFenceAddressCmd = genCmdCast<STATE_SYSTEM_MEM_FENCE_ADDRESS *>(*itorSystemMemFenceAddress);
EXPECT_EQ(commandStreamReceiver.globalFenceAllocation->getGpuAddress(), systemMemFenceAddressCmd->getSystemMemoryFenceAddress());
}
struct SystemMemoryFenceViaKernelInstructionTest : public UltCommandStreamReceiverTest {
void SetUp() override {
debugManager.flags.EnableLocalMemory.set(1);
debugManager.flags.ProgramGlobalFenceAsMiMemFenceCommandInCommandStream.set(0);
debugManager.flags.ProgramGlobalFenceAsPostSyncOperationInComputeWalker.set(0);
debugManager.flags.ProgramGlobalFenceAsKernelInstructionInEUKernel.set(1);
UltCommandStreamReceiverTest::SetUp();
}
DebugManagerStateRestore restore;
};
using SystemMemoryFenceViaKernelInstructionTestXe3Core = SystemMemoryFenceViaKernelInstructionTest;
XE3_CORETEST_F(SystemMemoryFenceViaKernelInstructionTestXe3Core, givenSystemMemoryFenceGeneratedAsKernelInstructionInKernelCodeWhenProgramEnginePrologueIsCalledThenSystemMemFenceAddressIsProgrammed) {
using STATE_SYSTEM_MEM_FENCE_ADDRESS = typename FamilyType::STATE_SYSTEM_MEM_FENCE_ADDRESS;
auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver<FamilyType>();
EXPECT_FALSE(commandStreamReceiver.isEnginePrologueSent);
auto requiredSize = commandStreamReceiver.getCmdSizeForPrologue();
StackVec<char, 4096> buffer(requiredSize);
LinearStream cmdStream(buffer.begin(), buffer.size());
commandStreamReceiver.programEnginePrologue(cmdStream);
EXPECT_TRUE(commandStreamReceiver.isEnginePrologueSent);
HardwareParse hwParser;
hwParser.parseCommands<FamilyType>(cmdStream);
auto itorSystemMemFenceAddress = find<STATE_SYSTEM_MEM_FENCE_ADDRESS *>(hwParser.cmdList.begin(), hwParser.cmdList.end());
ASSERT_NE(hwParser.cmdList.end(), itorSystemMemFenceAddress);
auto systemMemFenceAddressCmd = genCmdCast<STATE_SYSTEM_MEM_FENCE_ADDRESS *>(*itorSystemMemFenceAddress);
EXPECT_EQ(commandStreamReceiver.globalFenceAllocation->getGpuAddress(), systemMemFenceAddressCmd->getSystemMemoryFenceAddress());
}
struct Xe3MidThreadCommandStreamReceiverTest : public UltCommandStreamReceiverTest {
void SetUp() override {
debugManager.flags.ForcePreemptionMode.set(static_cast<int32_t>(PreemptionMode::MidThread));
UltCommandStreamReceiverTest::SetUp();
}
DebugManagerStateRestore restore;
};
XE3_CORETEST_F(Xe3MidThreadCommandStreamReceiverTest, givenMidThreadPreemptionWhenCreatingPreemptionAllocationThenExpectProperAlignment) {
using STATE_CONTEXT_DATA_BASE_ADDRESS = typename FamilyType::STATE_CONTEXT_DATA_BASE_ADDRESS;
constexpr size_t expectedMask = STATE_CONTEXT_DATA_BASE_ADDRESS::CONTEXTDATABASEADDRESS::CONTEXTDATABASEADDRESS_ALIGN_SIZE - 1;
auto &csr = pDevice->getUltCommandStreamReceiver<FamilyType>();
GraphicsAllocation *preemptionAllocation = csr.getPreemptionAllocation();
ASSERT_NE(nullptr, preemptionAllocation);
size_t addressValue = reinterpret_cast<size_t>(preemptionAllocation->getUnderlyingBuffer());
EXPECT_EQ(0u, expectedMask & addressValue);
}
using Xe3CommandStreamReceiverFlushTaskTests = UltCommandStreamReceiverTest;
XE3_CORETEST_F(Xe3CommandStreamReceiverFlushTaskTests, givenOverrideThreadArbitrationPolicyDebugVariableSetWhenFlushingThenRequestRequiredMode) {
DebugManagerStateRestore restore;
auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver<FamilyType>();
debugManager.flags.OverrideThreadArbitrationPolicy.set(ThreadArbitrationPolicy::RoundRobin);
EXPECT_EQ(-1, commandStreamReceiver.streamProperties.stateComputeMode.threadArbitrationPolicy.value);
flushTask(commandStreamReceiver);
EXPECT_EQ(ThreadArbitrationPolicy::RoundRobin,
commandStreamReceiver.streamProperties.stateComputeMode.threadArbitrationPolicy.value);
}
XE3_CORETEST_F(Xe3CommandStreamReceiverFlushTaskTests, givenNotExistPolicyWhenFlushingThenDefaultPolicyIsProgrammed) {
char buff[1024] = {0};
LinearStream stream(buff, 1024);
auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver<FamilyType>();
DispatchFlags dispatchFlags = DispatchFlagsHelper::createDefaultDispatchFlags();
int32_t notExistPolicy = -2;
flushTaskFlags.threadArbitrationPolicy = notExistPolicy;
flushTask(commandStreamReceiver);
EXPECT_EQ(notExistPolicy, commandStreamReceiver.streamProperties.stateComputeMode.threadArbitrationPolicy.value);
}
XE3_CORETEST_F(Xe3CommandStreamReceiverFlushTaskTests, givenLastSystolicPipelineSelectModeWhenFlushTaskIsCalledThenDontReprogramPipelineSelect) {
auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver<FamilyType>();
commandStreamReceiver.isPreambleSent = true;
commandStreamReceiver.lastMediaSamplerConfig = false;
flushTaskFlags.pipelineSelectArgs.mediaSamplerRequired = false;
flushTaskFlags.pipelineSelectArgs.systolicPipelineSelectMode = true;
flushTask(commandStreamReceiver);
EXPECT_FALSE(commandStreamReceiver.lastSystolicPipelineSelectMode);
}
struct Xe3BcsTests : public UltCommandStreamReceiverTest {
void SetUp() override {
debugManager.flags.EnableLocalMemory.set(true);
UltCommandStreamReceiverTest::SetUp();
context = std::make_unique<MockContext>(pClDevice);
}
void TearDown() override {
context.reset();
UltCommandStreamReceiverTest::TearDown();
}
DebugManagerStateRestore restore;
std::unique_ptr<MockContext> context;
cl_int retVal = CL_SUCCESS;
};
XE3_CORETEST_F(Xe3BcsTests, givenBufferInDeviceMemoryWhenStatelessCompressionIsEnabledThenBlitterForBufferUsesStatelessCompressedSettings) {
using MEM_COPY = typename Xe3CoreFamily::MEM_COPY;
char buff[1024] = {0};
LinearStream stream(buff, 1024);
MockGraphicsAllocation clearColorAlloc;
auto buffer = clUniquePtr<Buffer>(Buffer::create(context.get(), {}, MemoryConstants::pageSize64k, nullptr, retVal));
EXPECT_EQ(CL_SUCCESS, retVal);
auto allocation = buffer->getGraphicsAllocation(pClDevice->getRootDeviceIndex());
EXPECT_TRUE(!MemoryPoolHelper::isSystemMemoryPool(allocation->getMemoryPool()));
auto blitProperties = BlitProperties::constructPropertiesForCopy(allocation, allocation,
0, 0, {BlitterConstants::maxBlitWidth - 1, 1, 1}, 0, 0, 0, 0, &clearColorAlloc);
auto bltCmd = stream.getSpaceForCmd<MEM_COPY>();
*bltCmd = Xe3CoreFamily::cmdInitXyCopyBlt;
debugManager.flags.EnableStatelessCompressionWithUnifiedMemory.set(true);
platformsImpl->clear();
EXPECT_EQ(platform(), nullptr);
BlitCommandsHelper<Xe3CoreFamily>::appendBlitCommandsForBuffer<MEM_COPY>(blitProperties, *bltCmd, context->getDevice(0)->getRootDeviceEnvironment());
EXPECT_EQ(static_cast<uint32_t>(debugManager.flags.FormatForStatelessCompressionWithUnifiedMemory.get()), bltCmd->getCompressionFormat());
}
XE3_CORETEST_F(Xe3BcsTests, givenDstBufferInDeviceAndSrcInSystemMemoryWhenStatelessCompressionIsEnabledThenBlitterForBufferUsesStatelessCompressedSettings) {
using MEM_COPY = typename Xe3CoreFamily::MEM_COPY;
debugManager.flags.FormatForStatelessCompressionWithUnifiedMemory.set(0x1);
char buff[1024] = {0};
LinearStream stream(buff, 1024);
MockGraphicsAllocation clearColorAlloc;
auto bufferDst = clUniquePtr<Buffer>(Buffer::create(context.get(), {}, MemoryConstants::pageSize64k, nullptr, retVal));
auto bufferSrc = clUniquePtr<Buffer>(Buffer::create(context.get(), CL_MEM_FORCE_HOST_MEMORY_INTEL, MemoryConstants::pageSize64k, nullptr, retVal));
EXPECT_EQ(CL_SUCCESS, retVal);
auto allocationDst = bufferDst->getGraphicsAllocation(pClDevice->getRootDeviceIndex());
auto allocationSrc = bufferSrc->getGraphicsAllocation(pClDevice->getRootDeviceIndex());
EXPECT_FALSE(MemoryPoolHelper::isSystemMemoryPool(allocationDst->getMemoryPool()));
EXPECT_TRUE(MemoryPoolHelper::isSystemMemoryPool(allocationSrc->getMemoryPool()));
auto blitProperties = BlitProperties::constructPropertiesForCopy(allocationDst, allocationSrc,
0, 0, {BlitterConstants::maxBlitWidth - 1, 1, 1}, 0, 0, 0, 0, &clearColorAlloc);
auto bltCmd = stream.getSpaceForCmd<MEM_COPY>();
*bltCmd = Xe3CoreFamily::cmdInitXyCopyBlt;
debugManager.flags.EnableStatelessCompressionWithUnifiedMemory.set(true);
BlitCommandsHelper<Xe3CoreFamily>::appendBlitCommandsForBuffer(blitProperties, *bltCmd, context->getDevice(0)->getRootDeviceEnvironment());
EXPECT_EQ(static_cast<uint32_t>(debugManager.flags.FormatForStatelessCompressionWithUnifiedMemory.get()), bltCmd->getCompressionFormat());
}
XE3_CORETEST_F(Xe3BcsTests, givenBufferInSystemMemoryWhenStatelessCompressionIsEnabledThenBlitterForBufferDoesntUseStatelessCompressedSettings) {
using MEM_COPY = typename Xe3CoreFamily::MEM_COPY;
char buff[1024] = {0};
LinearStream stream(buff, 1024);
MockGraphicsAllocation clearColorAlloc;
auto buffer = clUniquePtr<Buffer>(Buffer::create(context.get(), CL_MEM_FORCE_HOST_MEMORY_INTEL, MemoryConstants::pageSize64k, nullptr, retVal));
EXPECT_EQ(CL_SUCCESS, retVal);
auto allocation = buffer->getGraphicsAllocation(pClDevice->getRootDeviceIndex());
EXPECT_TRUE(MemoryPoolHelper::isSystemMemoryPool(allocation->getMemoryPool()));
auto blitProperties = BlitProperties::constructPropertiesForCopy(allocation, allocation,
0, 0, {BlitterConstants::maxBlitWidth - 1, 1, 1}, 0, 0, 0, 0, &clearColorAlloc);
auto bltCmd = stream.getSpaceForCmd<MEM_COPY>();
*bltCmd = Xe3CoreFamily::cmdInitXyCopyBlt;
debugManager.flags.EnableStatelessCompressionWithUnifiedMemory.set(true);
platformsImpl->clear();
EXPECT_EQ(platform(), nullptr);
BlitCommandsHelper<Xe3CoreFamily>::appendBlitCommandsForBuffer(blitProperties, *bltCmd, context->getDevice(0)->getRootDeviceEnvironment());
EXPECT_EQ(0u, bltCmd->getCompressionFormat());
}
XE3_CORETEST_F(Xe3BcsTests, givenCompressibleDstBuffersWhenAppendBlitCommandsForBufferCalledThenSetCompressionFormat) {
debugManager.flags.RenderCompressedBuffersEnabled.set(1);
using MEM_COPY = typename FamilyType::MEM_COPY;
char buff[1024] = {0};
LinearStream stream(buff, 1024);
MockGraphicsAllocation clearColorAlloc;
auto srcBuffer = clUniquePtr<Buffer>(Buffer::create(context.get(), {}, MemoryConstants::pageSize64k, nullptr, retVal));
EXPECT_EQ(CL_SUCCESS, retVal);
auto dstBuffer = clUniquePtr<Buffer>(Buffer::create(context.get(), {}, MemoryConstants::pageSize64k, nullptr, retVal));
EXPECT_EQ(CL_SUCCESS, retVal);
auto srcAllocation = srcBuffer->getGraphicsAllocation(pClDevice->getRootDeviceIndex());
srcAllocation->getDefaultGmm()->setCompressionEnabled(false);
auto dstAllocation = dstBuffer->getGraphicsAllocation(pClDevice->getRootDeviceIndex());
EXPECT_TRUE(dstAllocation->getDefaultGmm()->isCompressionEnabled());
auto blitProperties = BlitProperties::constructPropertiesForCopy(dstAllocation, srcAllocation, 0, 0,
{BlitterConstants::maxBlitWidth - 1, 1, 1}, 0, 0, 0, 0, &clearColorAlloc);
auto bltCmd = stream.getSpaceForCmd<MEM_COPY>();
*bltCmd = FamilyType::cmdInitXyCopyBlt;
const auto &rootDeviceEnvironment = context->getDevice(0)->getRootDeviceEnvironment();
BlitCommandsHelper<FamilyType>::appendBlitCommandsForBuffer(blitProperties, *bltCmd, rootDeviceEnvironment);
auto resourceFormat = srcAllocation->getDefaultGmm()->gmmResourceInfo->getResourceFormat();
auto compressionFormat = rootDeviceEnvironment.getGmmClientContext()->getSurfaceStateCompressionFormat(resourceFormat);
EXPECT_EQ(compressionFormat, bltCmd->getCompressionFormat());
}
XE3_CORETEST_F(Xe3BcsTests, givenCompressibleSrcBuffersWhenAppendBlitCommandsForBufferCalledThenSetCompressionFormat) {
debugManager.flags.RenderCompressedBuffersEnabled.set(1);
using MEM_COPY = typename FamilyType::MEM_COPY;
char buff[1024] = {0};
LinearStream stream(buff, 1024);
MockGraphicsAllocation clearColorAlloc;
auto srcBuffer = clUniquePtr<Buffer>(Buffer::create(context.get(), {}, MemoryConstants::pageSize64k, nullptr, retVal));
EXPECT_EQ(CL_SUCCESS, retVal);
auto dstBuffer = clUniquePtr<Buffer>(Buffer::create(context.get(), {}, MemoryConstants::pageSize64k, nullptr, retVal));
EXPECT_EQ(CL_SUCCESS, retVal);
auto srcAllocation = srcBuffer->getGraphicsAllocation(pClDevice->getRootDeviceIndex());
EXPECT_TRUE(srcAllocation->getDefaultGmm()->isCompressionEnabled());
auto dstAllocation = dstBuffer->getGraphicsAllocation(pClDevice->getRootDeviceIndex());
dstAllocation->getDefaultGmm()->setCompressionEnabled(false);
auto blitProperties = BlitProperties::constructPropertiesForCopy(dstAllocation, srcAllocation, 0, 0,
{BlitterConstants::maxBlitWidth - 1, 1, 1}, 0, 0, 0, 0, &clearColorAlloc);
auto bltCmd = stream.getSpaceForCmd<MEM_COPY>();
*bltCmd = FamilyType::cmdInitXyCopyBlt;
const auto &rootDeviceEnvironment = context->getDevice(0)->getRootDeviceEnvironment();
BlitCommandsHelper<FamilyType>::appendBlitCommandsForBuffer(blitProperties, *bltCmd, rootDeviceEnvironment);
auto resourceFormat = srcAllocation->getDefaultGmm()->gmmResourceInfo->getResourceFormat();
auto compressionFormat = rootDeviceEnvironment.getGmmClientContext()->getSurfaceStateCompressionFormat(resourceFormat);
EXPECT_EQ(compressionFormat, bltCmd->getCompressionFormat());
}
XE3_CORETEST_F(Xe3BcsTests, givenCompressibleSrcBuffersWhenAppendBlitCommandsBlockCopyIsCalledThenSetCompressionFormat) {
debugManager.flags.RenderCompressedBuffersEnabled.set(1);
using XY_BLOCK_COPY_BLT = typename FamilyType::XY_BLOCK_COPY_BLT;
char buff[1024] = {0};
LinearStream stream(buff, 1024);
MockGraphicsAllocation clearColorAlloc;
auto srcBuffer = clUniquePtr<Buffer>(Buffer::create(context.get(), {}, MemoryConstants::pageSize64k, nullptr, retVal));
EXPECT_EQ(CL_SUCCESS, retVal);
auto dstBuffer = clUniquePtr<Buffer>(Buffer::create(context.get(), {}, MemoryConstants::pageSize64k, nullptr, retVal));
EXPECT_EQ(CL_SUCCESS, retVal);
auto srcAllocation = srcBuffer->getGraphicsAllocation(pClDevice->getRootDeviceIndex());
EXPECT_TRUE(srcAllocation->getDefaultGmm()->isCompressionEnabled());
auto dstAllocation = dstBuffer->getGraphicsAllocation(pClDevice->getRootDeviceIndex());
dstAllocation->getDefaultGmm()->setCompressionEnabled(false);
auto blitProperties = BlitProperties::constructPropertiesForCopy(dstAllocation, srcAllocation, 0, 0,
{BlitterConstants::maxBlitWidth - 1, 1, 1}, 0, 0, 0, 0, &clearColorAlloc);
auto bltCmd = stream.getSpaceForCmd<XY_BLOCK_COPY_BLT>();
*bltCmd = FamilyType::cmdInitXyBlockCopyBlt;
bltCmd->setDestinationX2CoordinateRight(1);
bltCmd->setDestinationY2CoordinateBottom(1);
const auto &rootDeviceEnvironment = context->getDevice(0)->getRootDeviceEnvironment();
BlitCommandsHelper<FamilyType>::appendBlitCommandsBlockCopy(blitProperties, *bltCmd, rootDeviceEnvironment);
auto resourceFormat = srcAllocation->getDefaultGmm()->gmmResourceInfo->getResourceFormat();
auto compressionFormat = rootDeviceEnvironment.getGmmClientContext()->getSurfaceStateCompressionFormat(resourceFormat);
EXPECT_EQ(compressionFormat, bltCmd->getSourceCompressionFormat());
}
XE3_CORETEST_F(Xe3BcsTests, givenCompressibleDstBuffersWhenAppendBlitCommandsBlockCopyIsCalledThenSetCompressionFormat) {
debugManager.flags.RenderCompressedBuffersEnabled.set(1);
using XY_BLOCK_COPY_BLT = typename FamilyType::XY_BLOCK_COPY_BLT;
char buff[1024] = {0};
LinearStream stream(buff, 1024);
MockGraphicsAllocation clearColorAlloc;
auto srcBuffer = clUniquePtr<Buffer>(Buffer::create(context.get(), {}, MemoryConstants::pageSize64k, nullptr, retVal));
EXPECT_EQ(CL_SUCCESS, retVal);
auto dstBuffer = clUniquePtr<Buffer>(Buffer::create(context.get(), {}, MemoryConstants::pageSize64k, nullptr, retVal));
EXPECT_EQ(CL_SUCCESS, retVal);
auto srcAllocation = srcBuffer->getGraphicsAllocation(pClDevice->getRootDeviceIndex());
srcAllocation->getDefaultGmm()->setCompressionEnabled(false);
auto dstAllocation = dstBuffer->getGraphicsAllocation(pClDevice->getRootDeviceIndex());
EXPECT_TRUE(dstAllocation->getDefaultGmm()->isCompressionEnabled());
auto blitProperties = BlitProperties::constructPropertiesForCopy(dstAllocation, srcAllocation, 0, 0,
{BlitterConstants::maxBlitWidth - 1, 1, 1}, 0, 0, 0, 0, &clearColorAlloc);
auto bltCmd = stream.getSpaceForCmd<XY_BLOCK_COPY_BLT>();
*bltCmd = FamilyType::cmdInitXyBlockCopyBlt;
bltCmd->setDestinationX2CoordinateRight(1);
bltCmd->setDestinationY2CoordinateBottom(1);
const auto &rootDeviceEnvironment = context->getDevice(0)->getRootDeviceEnvironment();
BlitCommandsHelper<FamilyType>::appendBlitCommandsBlockCopy(blitProperties, *bltCmd, rootDeviceEnvironment);
auto resourceFormat = srcAllocation->getDefaultGmm()->gmmResourceInfo->getResourceFormat();
auto compressionFormat = rootDeviceEnvironment.getGmmClientContext()->getSurfaceStateCompressionFormat(resourceFormat);
EXPECT_EQ(compressionFormat, bltCmd->getDestinationCompressionFormat());
}
XE3_CORETEST_F(Xe3BcsTests, givenCompressibleBuffersWhenBufferCompressionFormatIsForcedThenCompressionFormatIsSet) {
debugManager.flags.RenderCompressedBuffersEnabled.set(1);
using XY_BLOCK_COPY_BLT = typename FamilyType::XY_BLOCK_COPY_BLT;
char buff[1024] = {0};
LinearStream stream(buff, 1024);
MockGraphicsAllocation clearColorAlloc;
auto buffer = clUniquePtr<Buffer>(Buffer::create(context.get(), {}, MemoryConstants::pageSize64k, nullptr, retVal));
EXPECT_EQ(CL_SUCCESS, retVal);
auto allocation = buffer->getGraphicsAllocation(pClDevice->getRootDeviceIndex());
EXPECT_TRUE(allocation->getDefaultGmm()->isCompressionEnabled());
auto blitProperties = BlitProperties::constructPropertiesForCopy(allocation, allocation, 0, 0,
{BlitterConstants::maxBlitWidth - 1, 1, 1}, 0, 0, 0, 0, &clearColorAlloc);
auto bltCmd = stream.getSpaceForCmd<XY_BLOCK_COPY_BLT>();
*bltCmd = FamilyType::cmdInitXyBlockCopyBlt;
bltCmd->setDestinationX2CoordinateRight(1);
bltCmd->setDestinationY2CoordinateBottom(1);
uint32_t forcedCompressionFormat = 1;
debugManager.flags.ForceBufferCompressionFormat.set(static_cast<int32_t>(forcedCompressionFormat));
const auto &rootDeviceEnvironment = context->getDevice(0)->getRootDeviceEnvironment();
BlitCommandsHelper<FamilyType>::appendBlitCommandsBlockCopy(blitProperties, *bltCmd, rootDeviceEnvironment);
EXPECT_EQ(forcedCompressionFormat, bltCmd->getDestinationCompressionFormat());
EXPECT_EQ(forcedCompressionFormat, bltCmd->getSourceCompressionFormat());
}
XE3_CORETEST_F(Xe3BcsTests, givenNotCompressibleBuffersWhenBufferCompressionFormatIsForcedThenCompressionFormatIsNotSet) {
debugManager.flags.RenderCompressedBuffersEnabled.set(1);
using XY_BLOCK_COPY_BLT = typename FamilyType::XY_BLOCK_COPY_BLT;
char buff[1024] = {0};
LinearStream stream(buff, 1024);
MockGraphicsAllocation clearColorAlloc;
auto buffer = clUniquePtr<Buffer>(Buffer::create(context.get(), {}, MemoryConstants::pageSize64k, nullptr, retVal));
EXPECT_EQ(CL_SUCCESS, retVal);
auto allocation = buffer->getGraphicsAllocation(pClDevice->getRootDeviceIndex());
allocation->getDefaultGmm()->setCompressionEnabled(false);
auto blitProperties = BlitProperties::constructPropertiesForCopy(allocation, allocation, 0, 0,
{BlitterConstants::maxBlitWidth - 1, 1, 1}, 0, 0, 0, 0, &clearColorAlloc);
auto bltCmd = stream.getSpaceForCmd<XY_BLOCK_COPY_BLT>();
*bltCmd = FamilyType::cmdInitXyBlockCopyBlt;
bltCmd->setDestinationX2CoordinateRight(1);
bltCmd->setDestinationY2CoordinateBottom(1);
uint32_t forcedCompressionFormat = 1;
debugManager.flags.ForceBufferCompressionFormat.set(static_cast<int32_t>(forcedCompressionFormat));
const auto &rootDeviceEnvironment = context->getDevice(0)->getRootDeviceEnvironment();
BlitCommandsHelper<FamilyType>::appendBlitCommandsBlockCopy(blitProperties, *bltCmd, rootDeviceEnvironment);
EXPECT_EQ(0u, bltCmd->getDestinationCompressionFormat());
EXPECT_EQ(0u, bltCmd->getSourceCompressionFormat());
}
XE3_CORETEST_F(Xe3BcsTests, givenOverriddenBlitterTargetToZeroWhenAppendBlitCommandsBlockCopyThenUseSystemMem) {
debugManager.flags.OverrideBlitterTargetMemory.set(0);
using XY_BLOCK_COPY_BLT = typename FamilyType::XY_BLOCK_COPY_BLT;
char buff[1024] = {0};
LinearStream stream(buff, 1024);
MockGraphicsAllocation clearColorAlloc;
auto buffer = clUniquePtr<Buffer>(Buffer::create(context.get(), {}, MemoryConstants::pageSize64k, nullptr, retVal));
EXPECT_EQ(CL_SUCCESS, retVal);
auto allocation = buffer->getGraphicsAllocation(pClDevice->getRootDeviceIndex());
auto blitProperties = BlitProperties::constructPropertiesForCopy(allocation, allocation, 0, 0,
{BlitterConstants::maxBlitWidth - 1, 1, 1}, 0, 0, 0, 0, &clearColorAlloc);
auto bltCmd = stream.getSpaceForCmd<XY_BLOCK_COPY_BLT>();
*bltCmd = FamilyType::cmdInitXyBlockCopyBlt;
bltCmd->setDestinationX2CoordinateRight(1);
bltCmd->setDestinationY2CoordinateBottom(1);
const auto &rootDeviceEnvironment = context->getDevice(0)->getRootDeviceEnvironment();
BlitCommandsHelper<FamilyType>::appendBlitCommandsBlockCopy(blitProperties, *bltCmd, rootDeviceEnvironment);
EXPECT_EQ(bltCmd->getDestinationTargetMemory(), XY_BLOCK_COPY_BLT::TARGET_MEMORY::TARGET_MEMORY_SYSTEM_MEM);
EXPECT_EQ(bltCmd->getSourceTargetMemory(), XY_BLOCK_COPY_BLT::TARGET_MEMORY::TARGET_MEMORY_SYSTEM_MEM);
}
XE3_CORETEST_F(Xe3BcsTests, givenOverriddenBlitterTargetToOneWhenAppendBlitCommandsBlockCopyThenUseLocalMem) {
debugManager.flags.OverrideBlitterTargetMemory.set(1);
using XY_BLOCK_COPY_BLT = typename FamilyType::XY_BLOCK_COPY_BLT;
char buff[1024] = {0};
LinearStream stream(buff, 1024);
MockGraphicsAllocation clearColorAlloc;
auto buffer = clUniquePtr<Buffer>(Buffer::create(context.get(), {}, MemoryConstants::pageSize64k, nullptr, retVal));
EXPECT_EQ(CL_SUCCESS, retVal);
auto allocation = buffer->getGraphicsAllocation(pClDevice->getRootDeviceIndex());
auto blitProperties = BlitProperties::constructPropertiesForCopy(allocation, allocation, 0, 0,
{BlitterConstants::maxBlitWidth - 1, 1, 1}, 0, 0, 0, 0, &clearColorAlloc);
auto bltCmd = stream.getSpaceForCmd<XY_BLOCK_COPY_BLT>();
*bltCmd = FamilyType::cmdInitXyBlockCopyBlt;
bltCmd->setDestinationX2CoordinateRight(1);
bltCmd->setDestinationY2CoordinateBottom(1);
const auto &rootDeviceEnvironment = context->getDevice(0)->getRootDeviceEnvironment();
BlitCommandsHelper<FamilyType>::appendBlitCommandsBlockCopy(blitProperties, *bltCmd, rootDeviceEnvironment);
EXPECT_EQ(bltCmd->getDestinationTargetMemory(), XY_BLOCK_COPY_BLT::TARGET_MEMORY::TARGET_MEMORY_LOCAL_MEM);
EXPECT_EQ(bltCmd->getSourceTargetMemory(), XY_BLOCK_COPY_BLT::TARGET_MEMORY::TARGET_MEMORY_LOCAL_MEM);
}
XE3_CORETEST_F(Xe3BcsTests, givenOverriddenBlitterTargetToTwoWhenAppendBlitCommandsBlockCopyThenUseDefaultMem) {
debugManager.flags.OverrideBlitterTargetMemory.set(2);
using XY_BLOCK_COPY_BLT = typename FamilyType::XY_BLOCK_COPY_BLT;
char buff[1024] = {0};
LinearStream stream(buff, 1024);
MockGraphicsAllocation clearColorAlloc;
auto buffer = clUniquePtr<Buffer>(Buffer::create(context.get(), {}, MemoryConstants::pageSize64k, nullptr, retVal));
EXPECT_EQ(CL_SUCCESS, retVal);
auto allocation = buffer->getGraphicsAllocation(pClDevice->getRootDeviceIndex());
auto blitProperties = BlitProperties::constructPropertiesForCopy(allocation, allocation, 0, 0,
{BlitterConstants::maxBlitWidth - 1, 1, 1}, 0, 0, 0, 0, &clearColorAlloc);
auto bltCmd = stream.getSpaceForCmd<XY_BLOCK_COPY_BLT>();
*bltCmd = FamilyType::cmdInitXyBlockCopyBlt;
bltCmd->setDestinationX2CoordinateRight(1);
bltCmd->setDestinationY2CoordinateBottom(1);
const auto &rootDeviceEnvironment = context->getDevice(0)->getRootDeviceEnvironment();
BlitCommandsHelper<FamilyType>::appendBlitCommandsBlockCopy(blitProperties, *bltCmd, rootDeviceEnvironment);
EXPECT_EQ(bltCmd->getDestinationTargetMemory(), XY_BLOCK_COPY_BLT::TARGET_MEMORY::TARGET_MEMORY_LOCAL_MEM);
EXPECT_EQ(bltCmd->getSourceTargetMemory(), XY_BLOCK_COPY_BLT::TARGET_MEMORY::TARGET_MEMORY_LOCAL_MEM);
}
XE3_CORETEST_F(Xe3BcsTests, givenOverriddenMocksValueWhenAppendBlitCommandsBlockCopyThenMocksValueIsSet) {
using XY_BLOCK_COPY_BLT = typename FamilyType::XY_BLOCK_COPY_BLT;
char buff[1024] = {0};
LinearStream stream(buff, 1024);
MockGraphicsAllocation clearColorAlloc;
auto buffer = clUniquePtr<Buffer>(Buffer::create(context.get(), {}, MemoryConstants::pageSize64k, nullptr, retVal));
EXPECT_EQ(CL_SUCCESS, retVal);
auto allocation = buffer->getGraphicsAllocation(pClDevice->getRootDeviceIndex());
auto blitProperties = BlitProperties::constructPropertiesForCopy(allocation, allocation, 0, 0,
{BlitterConstants::maxBlitWidth - 1, 1, 1}, 0, 0, 0, 0, &clearColorAlloc);
auto bltCmd = stream.getSpaceForCmd<XY_BLOCK_COPY_BLT>();
*bltCmd = FamilyType::cmdInitXyBlockCopyBlt;
bltCmd->setDestinationX2CoordinateRight(1);
bltCmd->setDestinationY2CoordinateBottom(1);
uint32_t mockValue = context->getDevice(0)->getGmmHelper()->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER);
uint32_t newValue = mockValue + 1;
debugManager.flags.OverrideBlitterMocs.set(newValue);
const auto &rootDeviceEnvironment = context->getDevice(0)->getRootDeviceEnvironment();
BlitCommandsHelper<FamilyType>::appendBlitCommandsBlockCopy(blitProperties, *bltCmd, rootDeviceEnvironment);
EXPECT_EQ(bltCmd->getDestinationMOCS(), newValue);
EXPECT_EQ(bltCmd->getSourceMOCS(), newValue);
}

View File

@@ -0,0 +1,195 @@
/*
* Copyright (C) 2025 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#include "shared/source/gmm_helper/gmm_helper.h"
#include "shared/test/common/cmd_parse/hw_parse.h"
#include "shared/test/common/helpers/debug_manager_state_restore.h"
#include "shared/test/common/libult/ult_command_stream_receiver.h"
#include "shared/test/common/mocks/mock_device.h"
#include "shared/test/common/mocks/mock_graphics_allocation.h"
#include "shared/test/common/test_macros/header/per_product_test_definitions.h"
#include "shared/test/common/test_macros/test.h"
#include "shared/test/common/utilities/base_object_utils.h"
#include "opencl/source/command_queue/command_queue_hw.h"
#include "opencl/source/mem_obj/buffer.h"
#include "opencl/test/unit_test/mocks/mock_cl_device.h"
#include "opencl/test/unit_test/mocks/mock_context.h"
#include "hw_cmds_xe3_core.h"
using namespace NEO;
struct BlitXe3CoreTests : public ::testing::Test {
void SetUp() override {
if (is32bit) {
GTEST_SKIP();
}
debugManager.flags.RenderCompressedBuffersEnabled.set(true);
debugManager.flags.EnableLocalMemory.set(true);
HardwareInfo hwInfo = *defaultHwInfo;
hwInfo.capabilityTable.blitterOperationsSupported = true;
clDevice = std::make_unique<MockClDevice>(MockDevice::createWithNewExecutionEnvironment<MockDevice>(&hwInfo));
}
std::optional<TaskCountType> flushBcsTask(CommandStreamReceiver *csr, const BlitProperties &blitProperties, bool blocking, Device &device) {
BlitPropertiesContainer blitPropertiesContainer;
blitPropertiesContainer.push_back(blitProperties);
return csr->flushBcsTask(blitPropertiesContainer, blocking, false, device);
}
std::unique_ptr<MockClDevice> clDevice;
TimestampPacketContainer timestampPacketContainer;
CsrDependencies csrDependencies;
DebugManagerStateRestore debugRestorer;
};
XE3_CORETEST_F(BlitXe3CoreTests, givenBufferWhenProgrammingBltCommandThenSetMocs) {
using MEM_COPY = typename FamilyType::MEM_COPY;
auto &bcsEngine = clDevice->getEngine(aub_stream::EngineType::ENGINE_BCS, EngineUsage::regular);
auto csr = static_cast<UltCommandStreamReceiver<FamilyType> *>(bcsEngine.commandStreamReceiver);
MockContext context(clDevice.get());
MockGraphicsAllocation clearColorAlloc;
cl_int retVal = CL_SUCCESS;
auto buffer = clUniquePtr<Buffer>(Buffer::create(&context, CL_MEM_READ_WRITE, 1, nullptr, retVal));
auto blitProperties = BlitProperties::constructPropertiesForCopy(buffer->getGraphicsAllocation(clDevice->getRootDeviceIndex()),
buffer->getGraphicsAllocation(clDevice->getRootDeviceIndex()),
0, 0, {1, 1, 1}, 0, 0, 0, 0, &clearColorAlloc);
flushBcsTask(csr, blitProperties, true, clDevice->getDevice());
HardwareParse hwParser;
hwParser.parseCommands<FamilyType>(csr->commandStream);
auto itorBltCmd = find<MEM_COPY *>(hwParser.cmdList.begin(), hwParser.cmdList.end());
ASSERT_NE(hwParser.cmdList.end(), itorBltCmd);
MEM_COPY *bltCmd = (MEM_COPY *)*itorBltCmd;
if (clDevice->getGmmHelper()->deferMOCSToPatIndex()) {
EXPECT_EQ(0u, bltCmd->getDestinationMOCS());
EXPECT_EQ(0u, bltCmd->getSourceMOCS());
} else {
auto mocsL3Enabled = 0x10u;
EXPECT_EQ(mocsL3Enabled, bltCmd->getDestinationMOCS());
EXPECT_EQ(mocsL3Enabled, bltCmd->getSourceMOCS());
}
}
XE3_CORETEST_F(BlitXe3CoreTests, givenBufferWhenProgrammingBltCommandThenSetMocsToValueOfDebugKey) {
DebugManagerStateRestore restorer;
uint32_t expectedMocs = 0;
debugManager.flags.OverrideBlitterMocs.set(expectedMocs);
using MEM_COPY = typename FamilyType::MEM_COPY;
auto &bcsEngine = clDevice->getEngine(aub_stream::EngineType::ENGINE_BCS, EngineUsage::regular);
auto csr = static_cast<UltCommandStreamReceiver<FamilyType> *>(bcsEngine.commandStreamReceiver);
MockContext context(clDevice.get());
MockGraphicsAllocation clearColorAlloc;
cl_int retVal = CL_SUCCESS;
auto buffer = clUniquePtr<Buffer>(Buffer::create(&context, CL_MEM_READ_WRITE, 1, nullptr, retVal));
auto blitProperties = BlitProperties::constructPropertiesForCopy(buffer->getGraphicsAllocation(clDevice->getRootDeviceIndex()),
buffer->getGraphicsAllocation(clDevice->getRootDeviceIndex()),
0, 0, {1, 1, 1}, 0, 0, 0, 0, &clearColorAlloc);
flushBcsTask(csr, blitProperties, true, clDevice->getDevice());
HardwareParse hwParser;
hwParser.parseCommands<FamilyType>(csr->commandStream);
auto itorBltCmd = find<MEM_COPY *>(hwParser.cmdList.begin(), hwParser.cmdList.end());
ASSERT_NE(hwParser.cmdList.end(), itorBltCmd);
MEM_COPY *bltCmd = (MEM_COPY *)*itorBltCmd;
EXPECT_EQ(expectedMocs, bltCmd->getDestinationMOCS());
EXPECT_EQ(expectedMocs, bltCmd->getSourceMOCS());
}
XE3_CORETEST_F(BlitXe3CoreTests, given2dBlitCommandWhenDispatchingThenSetValidSurfaceType) {
using MEM_COPY = typename FamilyType::MEM_COPY;
auto &bcsEngine = clDevice->getEngine(aub_stream::EngineType::ENGINE_BCS, EngineUsage::regular);
auto csr = static_cast<UltCommandStreamReceiver<FamilyType> *>(bcsEngine.commandStreamReceiver);
MockContext context(clDevice.get());
cl_int retVal = CL_SUCCESS;
auto buffer = clUniquePtr<Buffer>(Buffer::create(&context, CL_MEM_READ_WRITE, 1, nullptr, retVal));
auto allocation = buffer->getGraphicsAllocation(clDevice->getRootDeviceIndex());
MockGraphicsAllocation clearColorAlloc;
size_t offset = 0;
{
// 1D
auto blitProperties = BlitProperties::constructPropertiesForCopy(allocation, allocation,
0, 0, {BlitterConstants::maxBlitWidth - 1, 1, 1}, 0, 0, 0, 0, &clearColorAlloc);
flushBcsTask(csr, blitProperties, false, clDevice->getDevice());
HardwareParse hwParser;
hwParser.parseCommands<FamilyType>(csr->commandStream);
auto cmdIterator = find<MEM_COPY *>(hwParser.cmdList.begin(), hwParser.cmdList.end());
ASSERT_NE(hwParser.cmdList.end(), cmdIterator);
auto bltCmd = genCmdCast<MEM_COPY *>(*cmdIterator);
EXPECT_NE(nullptr, bltCmd);
EXPECT_EQ(MEM_COPY::COPY_TYPE::COPY_TYPE_LINEAR_COPY, bltCmd->getCopyType());
offset = csr->commandStream.getUsed();
}
{
// 2D
auto blitProperties = BlitProperties::constructPropertiesForCopy(allocation, allocation,
0, 0, {(2 * BlitterConstants::maxBlitWidth) + 1, 1, 1}, 0, 0, 0, 0, &clearColorAlloc);
flushBcsTask(csr, blitProperties, false, clDevice->getDevice());
HardwareParse hwParser;
hwParser.parseCommands<FamilyType>(csr->commandStream, offset);
auto cmdIterator = find<MEM_COPY *>(hwParser.cmdList.begin(), hwParser.cmdList.end());
ASSERT_NE(hwParser.cmdList.end(), cmdIterator);
auto bltCmd = genCmdCast<MEM_COPY *>(*cmdIterator);
EXPECT_NE(nullptr, bltCmd);
EXPECT_EQ(MEM_COPY::COPY_TYPE::COPY_TYPE_MATRIX_COPY, bltCmd->getCopyType());
}
}
using Xe3CoreCopyEngineTests = ::testing::Test;
XE3_CORETEST_F(Xe3CoreCopyEngineTests, givenCommandQueueWhenAskingForCacheFlushOnBcsThenReturnCorrectValue) {
auto clDevice = std::make_unique<MockClDevice>(MockDevice::createWithNewExecutionEnvironment<MockDevice>(defaultHwInfo.get()));
MockContext context(clDevice.get());
cl_int retVal = CL_SUCCESS;
auto commandQueue = std::unique_ptr<CommandQueue>(CommandQueue::create(&context, clDevice.get(), nullptr, false, retVal));
auto commandQueueHw = static_cast<CommandQueueHw<FamilyType> *>(commandQueue.get());
const auto &productHelper = clDevice->getProductHelper();
EXPECT_EQ(productHelper.isDcFlushAllowed(), commandQueueHw->isCacheFlushForBcsRequired());
}
XE3_CORETEST_F(Xe3CoreCopyEngineTests, givenDebugFlagSetWhenCheckingBcsCacheFlushRequirementThenReturnCorrectValue) {
DebugManagerStateRestore restorer;
auto clDevice = std::make_unique<MockClDevice>(MockDevice::createWithNewExecutionEnvironment<MockDevice>(defaultHwInfo.get()));
MockContext context(clDevice.get());
cl_int retVal = CL_SUCCESS;
auto commandQueue = std::unique_ptr<CommandQueue>(CommandQueue::create(&context, clDevice.get(), nullptr, false, retVal));
auto commandQueueHw = static_cast<CommandQueueHw<FamilyType> *>(commandQueue.get());
debugManager.flags.ForceCacheFlushForBcs.set(0);
EXPECT_FALSE(commandQueueHw->isCacheFlushForBcsRequired());
debugManager.flags.ForceCacheFlushForBcs.set(1);
EXPECT_TRUE(commandQueueHw->isCacheFlushForBcsRequired());
}

View File

@@ -0,0 +1,31 @@
/*
* Copyright (C) 2025 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#include "shared/test/common/test_macros/header/per_product_test_definitions.h"
#include "shared/test/common/test_macros/test.h"
#include "opencl/test/unit_test/fixtures/device_info_fixture.h"
#include "hw_cmds_xe3_core.h"
using GetDeviceInfoMemCapabilitiesTestXe3Device = NEO::GetDeviceInfoMemCapabilitiesTest;
XE3_CORETEST_F(GetDeviceInfoMemCapabilitiesTestXe3Device, GivenValidParametersWhenGetDeviceInfoIsCalledForXe3DeviceLinuxThenClSuccessIsReturned) {
std::vector<TestParams> params = {
{CL_DEVICE_HOST_MEM_CAPABILITIES_INTEL,
(CL_UNIFIED_SHARED_MEMORY_ACCESS_INTEL | CL_UNIFIED_SHARED_MEMORY_ATOMIC_ACCESS_INTEL)},
{CL_DEVICE_DEVICE_MEM_CAPABILITIES_INTEL,
(CL_UNIFIED_SHARED_MEMORY_ACCESS_INTEL | CL_UNIFIED_SHARED_MEMORY_ATOMIC_ACCESS_INTEL)},
{CL_DEVICE_SINGLE_DEVICE_SHARED_MEM_CAPABILITIES_INTEL,
(CL_UNIFIED_SHARED_MEMORY_ACCESS_INTEL | CL_UNIFIED_SHARED_MEMORY_ATOMIC_ACCESS_INTEL)},
{CL_DEVICE_CROSS_DEVICE_SHARED_MEM_CAPABILITIES_INTEL,
(CL_UNIFIED_SHARED_MEMORY_ACCESS_INTEL | CL_UNIFIED_SHARED_MEMORY_ATOMIC_ACCESS_INTEL)},
{CL_DEVICE_SHARED_SYSTEM_MEM_CAPABILITIES_INTEL,
0}};
check(params);
}

View File

@@ -0,0 +1,80 @@
/*
* Copyright (C) 2025 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#include "shared/source/command_container/command_encoder.h"
#include "shared/source/kernel/kernel_descriptor.h"
#include "shared/source/os_interface/product_helper.h"
#include "shared/test/common/helpers/debug_manager_state_restore.h"
#include "shared/test/common/helpers/default_hw_info.h"
#include "shared/test/common/mocks/mock_execution_environment.h"
#include "shared/test/common/test_macros/header/per_product_test_definitions.h"
#include "shared/test/common/test_macros/test.h"
#include "hw_cmds_xe3_core.h"
using namespace NEO;
using WalkerDispatchTestsXe3Core = ::testing::Test;
XE3_CORETEST_F(WalkerDispatchTestsXe3Core, whenEncodeAdditionalWalkerFieldsIsCalledThenComputeDispatchAllIsCorrectlySet) {
DebugManagerStateRestore debugRestorer;
auto walkerCmd = FamilyType::cmdInitGpgpuWalker;
KernelDescriptor kernelDescriptor;
EncodeWalkerArgs walkerArgs{
kernelDescriptor, // kernelDescriptor
KernelExecutionType::concurrent, // kernelExecutionType
NEO::RequiredDispatchWalkOrder::none, // requiredDispatchWalkOrder
0, // localRegionSize
0, // maxFrontEndThreads
true}; // requiredSystemFence
{
EncodeDispatchKernel<FamilyType>::encodeComputeDispatchAllWalker(walkerCmd, walkerArgs);
EXPECT_TRUE(walkerCmd.getComputeDispatchAllWalkerEnable());
}
{
walkerArgs.kernelExecutionType = KernelExecutionType::defaultType;
debugManager.flags.ComputeDispatchAllWalkerEnableInComputeWalker.set(1);
EncodeDispatchKernel<FamilyType>::encodeComputeDispatchAllWalker(walkerCmd, walkerArgs);
EXPECT_TRUE(walkerCmd.getComputeDispatchAllWalkerEnable());
}
}
XE3_CORETEST_F(WalkerDispatchTestsXe3Core, givenHasSampleSetWhenEncodingExtraParamsThenSetCorrectFields) {
using COMPUTE_WALKER = typename FamilyType::COMPUTE_WALKER;
using DISPATCH_WALK_ORDER = typename COMPUTE_WALKER::DISPATCH_WALK_ORDER;
using THREAD_GROUP_BATCH_SIZE = typename COMPUTE_WALKER::THREAD_GROUP_BATCH_SIZE;
auto walkerCmd = FamilyType::cmdInitGpgpuWalker;
MockExecutionEnvironment mockExecutionEnvironment{};
auto &rootDeviceEnvironment = *mockExecutionEnvironment.rootDeviceEnvironments[0];
KernelDescriptor kernelDescriptor;
EncodeWalkerArgs walkerArgs{
kernelDescriptor, // kernelDescriptor
KernelExecutionType::defaultType, // kernelExecutionType
NEO::RequiredDispatchWalkOrder::none, // requiredDispatchWalkOrder
0, // localRegionSize
0, // maxFrontEndThreads
false}; // requiredSystemFence
{
kernelDescriptor.kernelAttributes.flags.hasSample = false;
EncodeDispatchKernel<FamilyType>::encodeAdditionalWalkerFields(rootDeviceEnvironment, walkerCmd, walkerArgs);
EXPECT_NE(DISPATCH_WALK_ORDER::DISPATCH_WALK_ORDER_MORTON_WALK, walkerCmd.getDispatchWalkOrder());
EXPECT_EQ(THREAD_GROUP_BATCH_SIZE::THREAD_GROUP_BATCH_SIZE_TG_BATCH_1, walkerCmd.getThreadGroupBatchSize());
}
{
kernelDescriptor.kernelAttributes.flags.hasSample = true;
EncodeDispatchKernel<FamilyType>::encodeAdditionalWalkerFields(rootDeviceEnvironment, walkerCmd, walkerArgs);
EXPECT_EQ(DISPATCH_WALK_ORDER::DISPATCH_WALK_ORDER_MORTON_WALK, walkerCmd.getDispatchWalkOrder());
EXPECT_EQ(THREAD_GROUP_BATCH_SIZE::THREAD_GROUP_BATCH_SIZE_TG_BATCH_4, walkerCmd.getThreadGroupBatchSize());
}
}

View File

@@ -0,0 +1,21 @@
/*
* Copyright (C) 2025 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#include "shared/test/common/test_macros/hw_test_base.h"
HWTEST_EXCLUDE_PRODUCT(BufferSetSurfaceTests, givenAlignedCacheableReadOnlyBufferThenChoseOclBufferPolicy, IGFX_XE3_CORE);
HWTEST_EXCLUDE_PRODUCT(BufferSetSurfaceTests, givenBufferSetSurfaceThatMemoryIsUnalignedToCachelineButReadOnlyThenL3CacheShouldBeStillOn, IGFX_XE3_CORE);
HWTEST_EXCLUDE_PRODUCT(ImageSetArgTest, givenMcsAllocationWhenSetArgIsCalledWithUnifiedAuxCapabilityThenProgramAuxFieldsForCcs, IGFX_XE3_CORE);
HWTEST_EXCLUDE_PRODUCT(ImageSetArgTest, givenCompressedResourceWhenSettingImgArgThenSetCorrectAuxParams, IGFX_XE3_CORE);
HWTEST_EXCLUDE_PRODUCT(XeHPAndLaterImageTests, givenCompressionEnabledWhenAppendingSurfaceStateParamsThenProgramCompressionFormat, IGFX_XE3_CORE);
HWTEST_EXCLUDE_PRODUCT(QueueFamilyNameTest, givenRcsWhenGettingQueueFamilyNameThenReturnProperValue, IGFX_XE3_CORE);
HWTEST_EXCLUDE_PRODUCT(DeviceGetCapsTest, givenDeviceWhenAskingForSubGroupSizesThenReturnCorrectValues, IGFX_XE3_CORE);
HWTEST_EXCLUDE_PRODUCT(DeviceGetCapsTest, givenDisabledFtrPooledEuWhenCalculatingMaxEuPerSSThenIgnoreEuCountPerPoolMin, IGFX_XE3_CORE);
HWTEST_EXCLUDE_PRODUCT(DeviceGetCapsTest, givenEnabledFtrPooledEuWhenCalculatingMaxEuPerSSThenDontIgnoreEuCountPerPoolMin, IGFX_XE3_CORE);
HWTEST_EXCLUDE_PRODUCT(CommandQueueHwTest, givenCommandQueueWhenAskingForCacheFlushOnBcsThenReturnTrue, IGFX_XE3_CORE);
HWTEST_EXCLUDE_PRODUCT(CommandStreamReceiverFlushTaskTests, givenOverrideThreadArbitrationPolicyDebugVariableSetWhenFlushingThenRequestRequiredMode, IGFX_XE3_CORE);
HWTEST_EXCLUDE_PRODUCT(ClGfxCoreHelperTest, givenKernelInfoWhenCheckingRequiresAuxResolvesThenCorrectValuesAreReturned, IGFX_XE3_CORE);

View File

@@ -0,0 +1,14 @@
#
# Copyright (C) 2025 Intel Corporation
#
# SPDX-License-Identifier: MIT
#
if(TESTS_PTL)
set(IGDRCL_SRCS_tests_xe3_core_ptl
${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt
)
target_sources(igdrcl_tests PRIVATE ${IGDRCL_SRCS_tests_xe3_core_ptl})
add_subdirectories()
endif()

View File

@@ -0,0 +1,9 @@
#
# Copyright (C) 2025 Intel Corporation
#
# SPDX-License-Identifier: MIT
#
if(UNIX)
add_subdirectories()
endif()

View File

@@ -0,0 +1,12 @@
#
# Copyright (C) 2025 Intel Corporation
#
# SPDX-License-Identifier: MIT
#
set(IGDRCL_SRCS_linux_dll_tests_xe3_core_ptl
${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt
${CMAKE_CURRENT_SOURCE_DIR}/device_id_tests_ptl.cpp
)
target_sources(igdrcl_linux_dll_tests PRIVATE ${IGDRCL_SRCS_linux_dll_tests_xe3_core_ptl})
add_subdirectories()

View File

@@ -0,0 +1,22 @@
/*
* Copyright (C) 2025 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#include "shared/test/common/fixtures/linux/device_id_fixture.h"
using namespace NEO;
TEST_F(DeviceIdTests, GivenPtlSupportedDeviceIdThenDeviceDescriptorTableExists) {
std::array<DeviceDescriptor, 7> expectedDescriptors = {{{0xB080, &PtlHwConfig::hwInfo, &PtlHwConfig::setupHardwareInfo},
{0xB081, &PtlHwConfig::hwInfo, &PtlHwConfig::setupHardwareInfo},
{0xB082, &PtlHwConfig::hwInfo, &PtlHwConfig::setupHardwareInfo},
{0xB083, &PtlHwConfig::hwInfo, &PtlHwConfig::setupHardwareInfo},
{0xB08F, &PtlHwConfig::hwInfo, &PtlHwConfig::setupHardwareInfo},
{0xB090, &PtlHwConfig::hwInfo, &PtlHwConfig::setupHardwareInfo},
{0xB0A0, &PtlHwConfig::hwInfo, &PtlHwConfig::setupHardwareInfo}}};
testImpl(expectedDescriptors);
}

View File

@@ -0,0 +1,68 @@
/*
* Copyright (C) 2025 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#include "shared/source/helpers/basic_math.h"
#include "shared/test/common/helpers/gtest_helpers.h"
#include "shared/test/common/test_macros/header/per_product_test_definitions.h"
#include "opencl/test/unit_test/fixtures/cl_device_fixture.h"
#include "opencl/test/unit_test/fixtures/device_info_fixture.h"
#include "hw_cmds_xe3_core.h"
using namespace NEO;
using Xe3CoreClDeviceCaps = Test<ClDeviceFixture>;
XE3_CORETEST_F(Xe3CoreClDeviceCaps, givenXe3CoreWhenCheckExtensionsThenDeviceDoesNotReportClKhrSubgroupsExtension) {
const auto &caps = pClDevice->getDeviceInfo();
EXPECT_TRUE(hasSubstr(caps.deviceExtensions, std::string("cl_khr_subgroups")));
EXPECT_TRUE(hasSubstr(caps.deviceExtensions, std::string("cl_intel_bfloat16_conversions")));
}
XE3_CORETEST_F(Xe3CoreClDeviceCaps, givenXe3CoreWhenCheckingCapsThenDeviceDoesNotSupportIndependentForwardProgress) {
const auto &caps = pClDevice->getDeviceInfo();
EXPECT_TRUE(caps.independentForwardProgress);
}
XE3_CORETEST_F(Xe3CoreClDeviceCaps, givenXe3CoreWhenCalculatingMaxEuPerSSThenDontIgnoreEuCountPerPoolMin) {
HardwareInfo myHwInfo = *defaultHwInfo;
GT_SYSTEM_INFO &mySysInfo = myHwInfo.gtSystemInfo;
FeatureTable &mySkuTable = myHwInfo.featureTable;
mySysInfo.EUCount = 20;
mySysInfo.EuCountPerPoolMin = 99999;
mySkuTable.flags.ftrPooledEuEnabled = 1;
auto device = std::unique_ptr<Device>(MockDevice::createWithNewExecutionEnvironment<MockDevice>(&myHwInfo));
auto expectedMaxWGS = mySysInfo.EuCountPerPoolMin * (mySysInfo.ThreadCount / mySysInfo.EUCount) * 8;
expectedMaxWGS = std::min(Math::prevPowerOfTwo(expectedMaxWGS), 2048u);
EXPECT_EQ(expectedMaxWGS, device->getDeviceInfo().maxWorkGroupSize);
}
XE3_CORETEST_F(Xe3CoreClDeviceCaps, givenDeviceExtensionsWhenDeviceCapsInitializedThenAddProperExtensions) {
const auto &dInfo = pClDevice->getDeviceInfo();
EXPECT_TRUE(hasSubstr(dInfo.deviceExtensions, std::string("cl_intel_create_buffer_with_properties")));
EXPECT_TRUE(hasSubstr(dInfo.deviceExtensions, std::string("cl_intel_subgroup_local_block_io")));
EXPECT_TRUE(hasSubstr(dInfo.deviceExtensions, std::string("cl_intel_subgroup_matrix_multiply_accumulate")));
EXPECT_TRUE(hasSubstr(dInfo.deviceExtensions, std::string("cl_khr_subgroup_named_barrier")));
EXPECT_TRUE(hasSubstr(dInfo.deviceExtensions, std::string("cl_intel_subgroup_extended_block_read")));
}
using QueueFamilyNameTestXe3Core = QueueFamilyNameTest;
XE3_CORETEST_F(QueueFamilyNameTestXe3Core, givenCccsWhenGettingQueueFamilyNameThenReturnProperValue) {
verify(EngineGroupType::renderCompute, "cccs");
}
XE3_CORETEST_F(QueueFamilyNameTestXe3Core, givenLinkedBcsWhenGettingQueueFamilyNameThenReturnProperValue) {
verify(EngineGroupType::linkedCopy, "linked bcs");
}

View File

@@ -0,0 +1,109 @@
/*
* Copyright (C) 2025 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#include "shared/source/command_container/encode_surface_state.h"
#include "shared/source/gmm_helper/client_context/gmm_client_context.h"
#include "shared/source/gmm_helper/gmm_helper.h"
#include "shared/source/helpers/state_base_address.h"
#include "shared/source/memory_manager/allocation_properties.h"
#include "shared/test/common/helpers/debug_manager_state_restore.h"
#include "shared/test/common/libult/ult_command_stream_receiver.h"
#include "shared/test/common/test_macros/header/per_product_test_definitions.h"
#include "shared/test/common/test_macros/test.h"
#include "opencl/source/mem_obj/buffer.h"
#include "opencl/test/unit_test/fixtures/ult_command_stream_receiver_fixture.h"
#include "opencl/test/unit_test/mocks/mock_cl_device.h"
#include "opencl/test/unit_test/mocks/mock_context.h"
#include "encode_surface_state_args.h"
#include "hw_cmds_xe3_core.h"
using namespace NEO;
using CmdsProgrammingTestsXe3Core = UltCommandStreamReceiverTest;
XE3_CORETEST_F(CmdsProgrammingTestsXe3Core, givenL3ToL1DebugFlagWhenStatelessMocsIsProgrammedThenItHasL1CachingOn) {
using STATE_BASE_ADDRESS = typename FamilyType::STATE_BASE_ADDRESS;
DebugManagerStateRestore restore;
debugManager.flags.ForceL1Caching.set(1u);
auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver<FamilyType>();
flushTask(commandStreamReceiver);
HardwareParse hwParserCsr;
hwParserCsr.parseCommands<FamilyType>(commandStreamReceiver.commandStream, 0);
hwParserCsr.findHardwareCommands<FamilyType>();
ASSERT_NE(nullptr, hwParserCsr.cmdStateBaseAddress);
auto stateBaseAddress = static_cast<STATE_BASE_ADDRESS *>(hwParserCsr.cmdStateBaseAddress);
auto actualL1CachePolocy = static_cast<uint8_t>(stateBaseAddress->getL1CacheControlCachePolicy());
const uint8_t expectedL1CachePolicy = 0;
EXPECT_EQ(expectedL1CachePolicy, actualL1CachePolocy);
}
XE3_CORETEST_F(CmdsProgrammingTestsXe3Core, whenAppendingRssThenProgramWtL1CachePolicy) {
auto memoryManager = pDevice->getExecutionEnvironment()->memoryManager.get();
size_t allocationSize = MemoryConstants::pageSize;
AllocationProperties properties(pDevice->getRootDeviceIndex(), allocationSize, AllocationType::buffer, pDevice->getDeviceBitfield());
auto allocation = memoryManager->allocateGraphicsMemoryWithProperties(properties);
auto rssCmd = FamilyType::cmdInitRenderSurfaceState;
MockContext context(pClDevice);
auto multiGraphicsAllocation = MultiGraphicsAllocation(pClDevice->getRootDeviceIndex());
multiGraphicsAllocation.addAllocation(allocation);
std::unique_ptr<BufferHw<FamilyType>> buffer(static_cast<BufferHw<FamilyType> *>(
BufferHw<FamilyType>::create(&context, {}, 0, 0, allocationSize, nullptr, nullptr, std::move(multiGraphicsAllocation), false, false, false)));
NEO::EncodeSurfaceStateArgs args;
args.outMemory = &rssCmd;
args.graphicsAddress = allocation->getGpuAddress();
args.size = allocation->getUnderlyingBufferSize();
args.mocs = buffer->getMocsValue(false, false, pClDevice->getRootDeviceIndex());
args.numAvailableDevices = pClDevice->getNumGenericSubDevices();
args.allocation = allocation;
args.gmmHelper = pClDevice->getGmmHelper();
args.areMultipleSubDevicesInContext = true;
EncodeSurfaceState<FamilyType>::encodeBuffer(args);
EXPECT_EQ(FamilyType::RENDER_SURFACE_STATE::L1_CACHE_CONTROL_WBP, rssCmd.getL1CacheControlCachePolicy());
}
XE3_CORETEST_F(CmdsProgrammingTestsXe3Core, givenAlignedCacheableReadOnlyBufferThenChoseOclBufferConstPolicy) {
MockContext context;
const auto size = MemoryConstants::pageSize;
const auto ptr = (void *)alignedMalloc(size * 2, MemoryConstants::pageSize);
const auto flags = CL_MEM_USE_HOST_PTR | CL_MEM_READ_ONLY;
auto retVal = CL_SUCCESS;
auto buffer = std::unique_ptr<Buffer>(Buffer::create(
&context,
flags,
size,
ptr,
retVal));
EXPECT_EQ(CL_SUCCESS, retVal);
typename FamilyType::RENDER_SURFACE_STATE surfaceState = {};
buffer->setArgStateful(&surfaceState, false, false, false, false, context.getDevice(0)->getDevice(), false);
const auto expectedMocs = context.getDevice(0)->getGmmHelper()->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER_CONST);
const auto actualMocs = surfaceState.getMemoryObjectControlState();
EXPECT_EQ(expectedMocs, actualMocs);
auto actualL1CachePolocy = static_cast<uint8_t>(surfaceState.getL1CacheControlCachePolicy());
const uint8_t expectedL1CachePolicy = 0;
EXPECT_EQ(expectedL1CachePolicy, actualL1CachePolocy);
alignedFree(ptr);
}

View File

@@ -0,0 +1,32 @@
/*
* Copyright (C) 2025 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#include "shared/test/common/test_macros/header/per_product_test_definitions.h"
#include "shared/test/common/test_macros/test.h"
#include "opencl/source/platform/platform_info.h"
#include "opencl/test/unit_test/fixtures/platform_fixture.h"
#include "hw_cmds_xe3_core.h"
using namespace NEO;
struct Xe3CorePlatformCaps : public PlatformFixture, public ::testing::Test {
void SetUp() override {
PlatformFixture::setUp();
}
void TearDown() override {
PlatformFixture::tearDown();
}
};
XE3_CORETEST_F(Xe3CorePlatformCaps, givenXe3CoreSkusThenItSupportFP64) {
const auto &caps = pPlatform->getPlatformInfo();
EXPECT_NE(std::string::npos, caps.extensions.find(std::string("cl_khr_fp64")));
}

View File

@@ -0,0 +1,22 @@
/*
* Copyright (C) 2025 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#include "shared/test/common/mocks/mock_device.h"
#include "shared/test/common/test_macros/header/per_product_test_definitions.h"
#include "shared/test/common/test_macros/test.h"
#include "opencl/test/unit_test/fixtures/cl_device_fixture.h"
#include "hw_cmds_xe3_core.h"
using namespace NEO;
using Xe3CoreOnlyTest = Test<ClDeviceFixture>;
XE3_CORETEST_F(Xe3CoreOnlyTest, WhenGettingRenderCoreFamilyThenOnlyXe3CoreIsReturned) {
EXPECT_EQ(IGFX_XE3_CORE, pDevice->getRenderCoreFamily());
}

View File

@@ -0,0 +1,20 @@
/*
* Copyright (C) 2025 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#include "shared/source/ail/ail_configuration_base.inl"
#include <map>
#include <vector>
namespace NEO {
static EnableAIL<IGFX_PTL> enableAILPTL;
std::map<std::string_view, std::vector<AILEnumeration>> applicationMapPTL = {};
template class AILConfigurationHw<IGFX_PTL>;
} // namespace NEO

View File

@@ -1,5 +1,5 @@
/*
* Copyright (C) 2018-2024 Intel Corporation
* Copyright (C) 2018-2025 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@@ -89,6 +89,7 @@ struct CmdServicesMemTraceVersion {
Bmg = 43,
Lnl = 45,
Arl = 46,
Ptl = 51,
};
};
struct RecordingMethodValues {

View File

@@ -1,5 +1,5 @@
#
# Copyright (C) 2019-2024 Intel Corporation
# Copyright (C) 2019-2025 Intel Corporation
#
# SPDX-License-Identifier: MIT
#
@@ -70,6 +70,12 @@ if(SUPPORT_XE2_AND_LATER)
)
endif()
if(SUPPORT_XE3_AND_LATER)
list(APPEND NEO_CORE_COMMAND_CONTAINER
${CMAKE_CURRENT_SOURCE_DIR}/command_encoder_from_xe3_and_later.inl
)
endif()
if(SUPPORT_XE_HPC_AND_BEFORE)
list(APPEND NEO_CORE_COMMAND_CONTAINER
${CMAKE_CURRENT_SOURCE_DIR}/command_encoder_pre_xe2_hpg_core.inl

View File

@@ -0,0 +1,78 @@
/*
* Copyright (C) 2025 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#include "shared/source/command_container/command_encoder.h"
#include "shared/source/execution_environment/root_device_environment.h"
#include "shared/source/helpers/pipe_control_args.h"
namespace NEO {
template <typename Family>
void EncodeComputeMode<Family>::programComputeModeCommandWithSynchronization(LinearStream &csr, StateComputeModeProperties &properties, const PipelineSelectArgs &args,
bool hasSharedHandles, const RootDeviceEnvironment &rootDeviceEnvironment, bool isRcs, bool dcFlush) {
programComputeModeCommand(csr, properties, rootDeviceEnvironment);
}
template <typename Family>
void EncodeEnableRayTracing<Family>::append3dStateBtd(void *ptr3dStateBtd) {
using _3DSTATE_BTD = typename Family::_3DSTATE_BTD;
using DISPATCH_TIMEOUT_COUNTER = typename Family::_3DSTATE_BTD::DISPATCH_TIMEOUT_COUNTER;
using CONTROLS_THE_MAXIMUM_NUMBER_OF_OUTSTANDING_RAYQUERIES_PER_SS = typename Family::_3DSTATE_BTD::CONTROLS_THE_MAXIMUM_NUMBER_OF_OUTSTANDING_RAYQUERIES_PER_SS;
auto cmd = static_cast<_3DSTATE_BTD *>(ptr3dStateBtd);
if (debugManager.flags.ForceTheMaximumNumberOfOutstandingRayqueriesPerSs.get() != -1) {
auto value = static_cast<CONTROLS_THE_MAXIMUM_NUMBER_OF_OUTSTANDING_RAYQUERIES_PER_SS>(debugManager.flags.ForceTheMaximumNumberOfOutstandingRayqueriesPerSs.get());
DEBUG_BREAK_IF(value > 3);
cmd->setControlsTheMaximumNumberOfOutstandingRayqueriesPerSs(value);
}
if (debugManager.flags.ForceDispatchTimeoutCounter.get() != -1) {
auto value = static_cast<DISPATCH_TIMEOUT_COUNTER>(debugManager.flags.ForceDispatchTimeoutCounter.get());
DEBUG_BREAK_IF(value > 7);
cmd->setDispatchTimeoutCounter(value);
}
cmd->setRtMemStructures64bModeEnable(!is48bResourceNeededForRayTracing());
}
template <typename Family>
template <typename InterfaceDescriptorType>
void EncodeDispatchKernel<Family>::setGrfInfo(InterfaceDescriptorType *pInterfaceDescriptor, uint32_t grfCount,
const size_t &sizeCrossThreadData, const size_t &sizePerThreadData,
const RootDeviceEnvironment &rootDeviceEnvironment) {
using REGISTERS_PER_THREAD = typename InterfaceDescriptorType::REGISTERS_PER_THREAD;
struct NumGrfsForIdd {
bool operator==(uint32_t grfCount) const { return this->grfCount == grfCount; }
uint32_t grfCount;
REGISTERS_PER_THREAD valueForIdd;
};
const std::array<NumGrfsForIdd, 8> validNumGrfsForIdd{{{32u, REGISTERS_PER_THREAD::REGISTERS_PER_THREAD_REGISTERS_32},
{64u, REGISTERS_PER_THREAD::REGISTERS_PER_THREAD_REGISTERS_64},
{96u, REGISTERS_PER_THREAD::REGISTERS_PER_THREAD_REGISTERS_96},
{128u, REGISTERS_PER_THREAD::REGISTERS_PER_THREAD_REGISTERS_128},
{160u, REGISTERS_PER_THREAD::REGISTERS_PER_THREAD_REGISTERS_160},
{192u, REGISTERS_PER_THREAD::REGISTERS_PER_THREAD_REGISTERS_192},
{256u, REGISTERS_PER_THREAD::REGISTERS_PER_THREAD_REGISTERS_256},
{512u, REGISTERS_PER_THREAD::REGISTERS_PER_THREAD_REGISTERS_512}}};
const auto &productHelper = rootDeviceEnvironment.template getHelper<ProductHelper>();
const auto supportedNumGrfs = productHelper.getSupportedNumGrfs(rootDeviceEnvironment.getReleaseHelper());
for (const auto &supportedNumGrf : supportedNumGrfs) {
if (grfCount <= supportedNumGrf) {
auto value = std::find(validNumGrfsForIdd.begin(), validNumGrfsForIdd.end(), supportedNumGrf);
if (value != validNumGrfsForIdd.end()) {
pInterfaceDescriptor->setRegistersPerThread(value->valueForIdd);
return;
}
}
}
UNRECOVERABLE_IF(true); // out of expected range
}
} // namespace NEO

View File

@@ -1,5 +1,5 @@
/*
* Copyright (C) 2018-2024 Intel Corporation
* Copyright (C) 2018-2025 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@@ -562,6 +562,8 @@ DECLARE_DEBUG_VARIABLE(int32_t, EnableFtrTile64Optimization, -1, "Control featur
DECLARE_DEBUG_VARIABLE(int32_t, ForceTheMaximumNumberOfOutstandingRayqueriesPerSs, -1, "Set the maximum number of outstanding RayQueries per SS, -1: default, 0: 128, 1: 256, 2: 512, 3: 1024")
DECLARE_DEBUG_VARIABLE(int32_t, ForceDispatchTimeoutCounter, -1, "Set timeout for Synchronous Ray Tracing, -1: default, 0: 64, 1: 128, 2: 192, 3: 256, 4: 512, 5: 1024, 6: 2048, 7: 4096")
DECLARE_DEBUG_VARIABLE(int32_t, Enable10ThreadsPerEu, -1, "Enable 10 threads per EU HSD-18022695913, -1: default, 0: disabled, 1: enabled")
DECLARE_DEBUG_VARIABLE(int32_t, Enable64bAddressingForRayTracing, -1, "-1: default, 0: disabled, 1: enabled. Enable support for 64 bit addressing for RayTracing HSD-14016042915")
DECLARE_DEBUG_VARIABLE(int32_t, EnableXe3VariableRegisterSizeAllocation, -1, "When enabled, use new Xe3 Variable Register per Thread (VRT) feature, -1: default, 0: disabled, 1: enabled")
/* IMPLICIT SCALING */
DECLARE_DEBUG_VARIABLE(int32_t, EnableWalkerPartition, -1, "-1: default, 0: disable, 1: enable, Enables Walker Partitioning via WPARID.")

View File

@@ -1,10 +1,22 @@
/*
* Copyright (C) 2018-2024 Intel Corporation
* Copyright (C) 2018-2025 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#if SUPPORT_XE3_CORE
#ifdef SUPPORT_PTL
DEVICE(0xB080, PtlHwConfig)
DEVICE(0xB081, PtlHwConfig)
DEVICE(0xB082, PtlHwConfig)
DEVICE(0xB083, PtlHwConfig)
DEVICE(0xB08F, PtlHwConfig)
DEVICE(0xB090, PtlHwConfig)
DEVICE(0xB0A0, PtlHwConfig)
#endif
#endif
#if SUPPORT_XE2_HPG_CORE
#ifdef SUPPORT_BMG
DEVICE(0xE202, BmgHwConfig)

View File

@@ -1,10 +1,19 @@
/*
* Copyright (C) 2021-2024 Intel Corporation
* Copyright (C) 2021-2025 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#if SUPPORT_XE3_CORE
#ifdef SUPPORT_PTL
DEVICE_CONFIG(PTL_H_A0, PtlHwConfig, ptlHDeviceIds, XE3_FAMILY, XE3_LPG_RELEASE)
DEVICE_CONFIG(PTL_H_B0, PtlHwConfig, ptlHDeviceIds, XE3_FAMILY, XE3_LPG_RELEASE)
DEVICE_CONFIG(PTL_U_A0, PtlHwConfig, ptlUDeviceIds, XE3_FAMILY, XE3_LPG_RELEASE)
DEVICE_CONFIG(PTL_U_A1, PtlHwConfig, ptlUDeviceIds, XE3_FAMILY, XE3_LPG_RELEASE)
#endif
#endif
#if SUPPORT_XE2_HPG_CORE
#ifdef SUPPORT_BMG
DEVICE_CONFIG(BMG_G21_A0, BmgHwConfig, bmgG21DeviceIds, XE2_FAMILY, XE2_HPG_RELEASE)

View File

@@ -1,5 +1,5 @@
/*
* Copyright (C) 2020-2024 Intel Corporation
* Copyright (C) 2020-2025 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@@ -18,3 +18,6 @@
#ifdef SUPPORT_XE2_HPG_CORE
#include "shared/source/xe2_hpg_core/aub_mapper.h"
#endif
#ifdef SUPPORT_XE3_CORE
#include "shared/source/xe3_core/aub_mapper.h"
#endif

View File

@@ -1,5 +1,5 @@
#
# Copyright (C) 2019-2024 Intel Corporation
# Copyright (C) 2019-2025 Intel Corporation
#
# SPDX-License-Identifier: MIT
#
@@ -226,6 +226,13 @@ if(SUPPORT_XE2_CORE OR SUPPORT_XE_2HPG_CORE)
)
endif()
if(SUPPORT_XE3_AND_LATER)
list(APPEND NEO_CORE_HELPERS
${CMAKE_CURRENT_SOURCE_DIR}/gfx_core_helper_xe3_and_later.inl
${CMAKE_CURRENT_SOURCE_DIR}/preamble_xe3_and_later.inl
)
endif()
set(NEO_CORE_HELPERS_GMM_CALLBACKS_WINDOWS
${CMAKE_CURRENT_SOURCE_DIR}/windows/gmm_callbacks.cpp
${CMAKE_CURRENT_SOURCE_DIR}/windows/gmm_callbacks.h

View File

@@ -1,5 +1,5 @@
/*
* Copyright (C) 2022-2024 Intel Corporation
* Copyright (C) 2022-2025 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@@ -7,6 +7,12 @@
#pragma once
#if SUPPORT_XE3_CORE
#ifdef SUPPORT_PTL
#include "device_ids_configs_ptl.h"
#endif
#endif
#if SUPPORT_XE2_HPG_CORE
#ifdef SUPPORT_BMG
#include "device_ids_configs_bmg.h"

View File

@@ -1,5 +1,5 @@
/*
* Copyright (C) 2019-2024 Intel Corporation
* Copyright (C) 2019-2025 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@@ -18,3 +18,6 @@
#ifdef SUPPORT_XE2_HPG_CORE
#include "shared/source/xe2_hpg_core/hw_cmds.h"
#endif
#ifdef SUPPORT_XE3_CORE
#include "hw_cmds_xe3_core.h"
#endif

View File

@@ -0,0 +1,28 @@
/*
* Copyright (C) 2025 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#include "shared/source/helpers/gfx_core_helper.h"
#include "shared/source/helpers/hw_info.h"
namespace NEO {
template <>
uint32_t GfxCoreHelperHw<Family>::calculateAvailableThreadCount(const HardwareInfo &hwInfo, uint32_t grfCount) const {
auto maxThreadsPerEuCount = 1u;
if (grfCount <= 96u) {
maxThreadsPerEuCount = 10;
} else if (grfCount <= 128u) {
maxThreadsPerEuCount = 8;
} else if (grfCount <= 160u) {
maxThreadsPerEuCount = 6;
} else if (grfCount <= 192u) {
maxThreadsPerEuCount = 5;
} else if (grfCount <= 256u) {
maxThreadsPerEuCount = 4;
}
return std::min(hwInfo.gtSystemInfo.ThreadCount, maxThreadsPerEuCount * hwInfo.gtSystemInfo.EUCount);
}
} // namespace NEO

View File

@@ -0,0 +1,55 @@
/*
* Copyright (C) 2025 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#include "shared/source/command_stream/linear_stream.h"
#include "shared/source/command_stream/stream_properties.h"
#include "shared/source/debug_settings/debug_settings_manager.h"
#include "shared/source/helpers/pipeline_select_helper.h"
#include "shared/source/helpers/preamble.h"
namespace NEO {
template <typename Family>
size_t PreambleHelper<Family>::getCmdSizeForPipelineSelect(const RootDeviceEnvironment &rootDeviceEnvironment) {
if (debugManager.flags.PipelinedPipelineSelect.get()) {
return sizeof(typename Family::PIPELINE_SELECT);
}
return 0;
}
template <typename Family>
void PreambleHelper<Family>::programPipelineSelect(LinearStream *pCommandStream,
const PipelineSelectArgs &pipelineSelectArgs,
const RootDeviceEnvironment &rootDeviceEnvironment) {
if (debugManager.flags.PipelinedPipelineSelect.get()) {
using PIPELINE_SELECT = typename Family::PIPELINE_SELECT;
auto cmdBuffer = pCommandStream->getSpaceForCmd<PIPELINE_SELECT>();
auto pipelineSelectCmd = Family::cmdInitPipelineSelect;
auto mask = pipelineSelectEnablePipelineSelectMaskBits;
pipelineSelectCmd.setMaskBits(mask);
pipelineSelectCmd.setPipelineSelection(PIPELINE_SELECT::PIPELINE_SELECTION_GPGPU);
*cmdBuffer = pipelineSelectCmd;
}
}
template <typename Family>
void PreambleHelper<Family>::appendProgramVFEState(const RootDeviceEnvironment &rootDeviceEnvironment, const StreamProperties &streamProperties, void *cmd) {
using CFE_STATE = typename Family::CFE_STATE;
using STACK_ID_CONTROL = typename CFE_STATE::STACK_ID_CONTROL;
auto command = static_cast<CFE_STATE *>(cmd);
if (debugManager.flags.CFEStackIDControl.get() != -1) {
command->setStackIdControl(static_cast<STACK_ID_CONTROL>(debugManager.flags.CFEStackIDControl.get()));
}
}
} // namespace NEO

View File

@@ -0,0 +1,26 @@
/*
* Copyright (C) 2025 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#include "shared/source/command_stream/aub_command_stream_receiver.h"
#include "shared/source/command_stream/aub_command_stream_receiver_hw_xehp_and_later.inl"
#include "shared/source/helpers/array_count.h"
#include "shared/source/helpers/populate_factory.h"
namespace NEO {
using Family = Xe3CoreFamily;
static auto gfxCore = IGFX_XE3_CORE;
template <>
void populateFactoryTable<AUBCommandStreamReceiverHw<Family>>() {
extern AubCommandStreamReceiverCreateFunc aubCommandStreamReceiverFactory[IGFX_MAX_CORE];
UNRECOVERABLE_IF(!isInRange(gfxCore, aubCommandStreamReceiverFactory));
aubCommandStreamReceiverFactory[gfxCore] = AUBCommandStreamReceiverHw<Family>::create;
}
template class AUBCommandStreamReceiverHw<Family>;
} // namespace NEO

View File

@@ -0,0 +1,31 @@
/*
* Copyright (C) 2025 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#pragma once
#include "shared/source/aub/aub_mapper_base.h"
#include "shared/source/helpers/constants.h"
#include "shared/source/xe3_core/hw_cmds_base.h"
#include "aubstream/engine_node.h"
namespace NEO {
struct Xe3CoreFamily;
template <>
struct AUBFamilyMapper<Xe3CoreFamily> {
enum { device = AubMemDump::DeviceValues::Ptl };
using AubTraits = AubMemDump::Traits<device, MemoryConstants::gfxAddressBits>;
static const AubMemDump::LrcaHelper *const csTraits[aub_stream::NUM_ENGINES];
static const MMIOList globalMMIO;
static const MMIOList *perEngineMMIO[aub_stream::NUM_ENGINES];
using AUB = AubMemDump::AubDump<AubTraits>;
};
} // namespace NEO

View File

@@ -0,0 +1,20 @@
/*
* Copyright (C) 2025 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#include "shared/source/aub/aub_helper_xehp_and_later.inl"
#include "shared/source/aub_mem_dump/aub_mem_dump.h"
#include "shared/source/xe3_core/hw_cmds_base.h"
namespace NEO {
struct Xe3CoreFamily;
using Family = NEO::Xe3CoreFamily;
constexpr static auto deviceValue = AubMemDump::DeviceValues::Ptl;
template class AubHelperHw<Family>;
} // namespace NEO
#include "shared/source/aub_mem_dump/aub_mem_dump_pvc_and_later.inl"

View File

@@ -0,0 +1,146 @@
/*
* Copyright (C) 2025 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#include "shared/source/command_container/command_encoder.h"
#include "shared/source/command_container/command_encoder.inl"
#include "shared/source/command_container/command_encoder_from_xe3_and_later.inl"
#include "shared/source/command_container/command_encoder_from_xe_hpg_core_to_xe3_core.inl"
#include "shared/source/command_container/command_encoder_heap_addressing.inl"
#include "shared/source/command_container/command_encoder_tgllp_and_later.inl"
#include "shared/source/command_container/command_encoder_xe2_hpg_core_and_later.inl"
#include "shared/source/command_container/command_encoder_xe_hpc_core_and_later.inl"
#include "shared/source/command_container/command_encoder_xehp_and_later.inl"
#include "shared/source/command_stream/stream_properties.h"
#include "shared/source/helpers/cache_policy.h"
#include "shared/source/helpers/constants.h"
#include "shared/source/kernel/grf_config.h"
#include "shared/source/release_helper/release_helper.h"
#include "shared/source/xe3_core/hw_cmds_base.h"
using Family = NEO::Xe3CoreFamily;
namespace NEO {
template <>
void EncodeComputeMode<Family>::programComputeModeCommand(LinearStream &csr, StateComputeModeProperties &properties, const RootDeviceEnvironment &rootDeviceEnvironment) {
using STATE_COMPUTE_MODE = typename Family::STATE_COMPUTE_MODE;
STATE_COMPUTE_MODE stateComputeMode = Family::cmdInitStateComputeMode;
auto maskBits = stateComputeMode.getMask1();
auto maskBits2 = stateComputeMode.getMask2();
if (properties.threadArbitrationPolicy.isDirty) {
switch (properties.threadArbitrationPolicy.value) {
case ThreadArbitrationPolicy::RoundRobin:
stateComputeMode.setEuThreadSchedulingModeOverride(STATE_COMPUTE_MODE::EU_THREAD_SCHEDULING_MODE_OVERRIDE::EU_THREAD_SCHEDULING_MODE_OVERRIDE_ROUND_ROBIN);
break;
case ThreadArbitrationPolicy::AgeBased:
stateComputeMode.setEuThreadSchedulingModeOverride(STATE_COMPUTE_MODE::EU_THREAD_SCHEDULING_MODE_OVERRIDE::EU_THREAD_SCHEDULING_MODE_OVERRIDE_OLDEST_FIRST);
break;
case ThreadArbitrationPolicy::RoundRobinAfterDependency:
stateComputeMode.setEuThreadSchedulingModeOverride(STATE_COMPUTE_MODE::EU_THREAD_SCHEDULING_MODE_OVERRIDE::EU_THREAD_SCHEDULING_MODE_OVERRIDE_STALL_BASED_ROUND_ROBIN);
break;
default:
stateComputeMode.setEuThreadSchedulingModeOverride(STATE_COMPUTE_MODE::EU_THREAD_SCHEDULING_MODE_OVERRIDE::EU_THREAD_SCHEDULING_MODE_OVERRIDE_HW_DEFAULT);
}
maskBits |= Family::stateComputeModeEuThreadSchedulingModeOverrideMask;
}
if (properties.largeGrfMode.isDirty) {
stateComputeMode.setLargeGrfMode(properties.largeGrfMode.value);
maskBits |= Family::stateComputeModeLargeGrfModeMask;
}
if (properties.enableVariableRegisterSizeAllocation.isDirty) {
stateComputeMode.setEnableVariableRegisterSizeAllocationVrt(properties.enableVariableRegisterSizeAllocation.value);
maskBits |= Family::stateComputeModeEnableVariableRegisterSizeAllocationMask;
}
stateComputeMode.setMask1(maskBits);
stateComputeMode.setMask2(maskBits2);
auto buffer = csr.getSpaceForCmd<STATE_COMPUTE_MODE>();
*buffer = stateComputeMode;
}
template <>
void EncodeMemoryPrefetch<Family>::programMemoryPrefetch(LinearStream &commandStream, const GraphicsAllocation &graphicsAllocation, uint32_t size, size_t offset, const RootDeviceEnvironment &rootDeviceEnvironment) {
using STATE_PREFETCH = typename Family::STATE_PREFETCH;
constexpr uint32_t mocsIndexForL3 = (2 << 1);
auto &hwInfo = *rootDeviceEnvironment.getHardwareInfo();
auto &productHelper = rootDeviceEnvironment.getHelper<ProductHelper>();
bool prefetch = productHelper.allowMemoryPrefetch(hwInfo);
if (!prefetch) {
return;
}
uint64_t gpuVa = graphicsAllocation.getGpuAddress() + offset;
while (size > 0) {
uint32_t sizeInBytesToPrefetch = std::min(alignUp(size, MemoryConstants::cacheLineSize),
static_cast<uint32_t>(MemoryConstants::pageSize64k));
uint32_t prefetchSize = sizeInBytesToPrefetch / MemoryConstants::cacheLineSize;
auto statePrefetch = commandStream.getSpaceForCmd<STATE_PREFETCH>();
STATE_PREFETCH cmd = Family::cmdInitStatePrefetch;
cmd.setAddress(gpuVa);
cmd.setPrefetchSize(prefetchSize);
cmd.setMemoryObjectControlState(mocsIndexForL3);
cmd.setKernelInstructionPrefetch(GraphicsAllocation::isIsaAllocationType(graphicsAllocation.getAllocationType()));
if (debugManager.flags.ForceCsStallForStatePrefetch.get() == 1) {
cmd.setParserStall(true);
}
*statePrefetch = cmd;
if (sizeInBytesToPrefetch > size) {
break;
}
gpuVa += sizeInBytesToPrefetch;
size -= sizeInBytesToPrefetch;
}
}
template <>
void EncodeSurfaceState<Family>::setAuxParamsForMCSCCS(R_SURFACE_STATE *surfaceState, const ReleaseHelper *releaseHelper) {
UNRECOVERABLE_IF(releaseHelper == nullptr);
if (releaseHelper->isDisablingMsaaRequired()) {
surfaceState->setAuxiliarySurfaceMode(AUXILIARY_SURFACE_MODE::AUXILIARY_SURFACE_MODE_AUX_NONE);
}
}
template <>
template <typename WalkerType>
void EncodeDispatchKernel<Family>::encodeAdditionalWalkerFields(const RootDeviceEnvironment &rootDeviceEnvironment, WalkerType &walkerCmd, const EncodeWalkerArgs &walkerArgs) {
if (walkerArgs.kernelDescriptor.kernelAttributes.flags.hasSample) {
walkerCmd.setDispatchWalkOrder(DefaultWalkerType::DISPATCH_WALK_ORDER::DISPATCH_WALK_ORDER_MORTON_WALK);
walkerCmd.setThreadGroupBatchSize(DefaultWalkerType::THREAD_GROUP_BATCH_SIZE::THREAD_GROUP_BATCH_SIZE_TG_BATCH_4);
}
}
template <>
bool EncodeEnableRayTracing<Family>::is48bResourceNeededForRayTracing() {
if (debugManager.flags.Enable64bAddressingForRayTracing.get() != -1) {
return !debugManager.flags.Enable64bAddressingForRayTracing.get();
}
return false;
}
} // namespace NEO
#include "shared/source/command_container/command_encoder_enablers.inl"
namespace NEO {
template void InOrderPatchCommandHelpers::PatchCmd<Family>::patchComputeWalker(uint64_t appendCounterValue);
} // namespace NEO

View File

@@ -0,0 +1,322 @@
/*
* Copyright (C) 2025 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#include "shared/source/gmm_helper/client_context/gmm_client_context.h"
#include "shared/source/xe3_core/hw_cmds_base.h"
using Family = NEO::Xe3CoreFamily;
#include "shared/source/command_stream/command_stream_receiver_hw_dg2_and_later.inl"
#include "shared/source/command_stream/command_stream_receiver_hw_heap_addressing.inl"
#include "shared/source/command_stream/command_stream_receiver_hw_xehp_and_later.inl"
#include "shared/source/gmm_helper/gmm.h"
#include "shared/source/helpers/blit_commands_helper_xehp_and_later.inl"
#include "shared/source/helpers/constants.h"
#include "shared/source/helpers/populate_factory.h"
#include "shared/source/helpers/register_offsets.h"
#include "shared/source/memory_manager/allocation_properties.h"
namespace NEO {
static auto gfxCore = IGFX_XE3_CORE;
template <>
bool ImplicitFlushSettings<Family>::defaultSettingForNewResource = false;
template <>
bool ImplicitFlushSettings<Family>::defaultSettingForGpuIdle = false;
template class ImplicitFlushSettings<Family>;
template <>
void populateFactoryTable<CommandStreamReceiverHw<Family>>() {
extern CommandStreamReceiverCreateFunc commandStreamReceiverFactory[2 * IGFX_MAX_CORE];
commandStreamReceiverFactory[gfxCore] = DeviceCommandStreamReceiver<Family>::create;
}
template <>
void CommandStreamReceiverHw<Family>::programEnginePrologue(LinearStream &csr) {
if (!this->isEnginePrologueSent) {
if (getGlobalFenceAllocation()) {
EncodeMemoryFence<Family>::encodeSystemMemoryFence(csr, getGlobalFenceAllocation());
}
this->isEnginePrologueSent = true;
}
}
template <>
size_t CommandStreamReceiverHw<Family>::getCmdSizeForPrologue() const {
if (!this->isEnginePrologueSent) {
if (getGlobalFenceAllocation()) {
return EncodeMemoryFence<Family>::getSystemMemoryFenceSize();
}
}
return 0u;
}
template <>
void BlitCommandsHelper<Family>::appendBlitCommandsBlockCopy(const BlitProperties &blitProperties, typename Family::XY_BLOCK_COPY_BLT &blitCmd, const RootDeviceEnvironment &rootDeviceEnvironment) {
using XY_BLOCK_COPY_BLT = typename Family::XY_BLOCK_COPY_BLT;
uint8_t srcCompressionFormat = 0;
uint8_t dstCompressionFormat = 0;
auto dstAllocation = blitProperties.dstAllocation;
auto srcAllocation = blitProperties.srcAllocation;
if (srcAllocation->isCompressionEnabled()) {
auto resourceFormat = srcAllocation->getDefaultGmm()->gmmResourceInfo->getResourceFormat();
srcCompressionFormat = rootDeviceEnvironment.getGmmClientContext()->getSurfaceStateCompressionFormat(resourceFormat);
}
if (dstAllocation->isCompressionEnabled()) {
auto resourceFormat = dstAllocation->getDefaultGmm()->gmmResourceInfo->getResourceFormat();
dstCompressionFormat = rootDeviceEnvironment.getGmmClientContext()->getSurfaceStateCompressionFormat(resourceFormat);
}
if (debugManager.flags.ForceBufferCompressionFormat.get() != -1) {
if (srcAllocation->isCompressionEnabled()) {
srcCompressionFormat = static_cast<uint8_t>(debugManager.flags.ForceBufferCompressionFormat.get());
}
if (dstAllocation->isCompressionEnabled()) {
dstCompressionFormat = static_cast<uint8_t>(debugManager.flags.ForceBufferCompressionFormat.get());
}
}
DEBUG_BREAK_IF((AuxTranslationDirection::none != blitProperties.auxTranslationDirection) &&
(blitProperties.dstAllocation != blitProperties.srcAllocation || !blitProperties.dstAllocation->isCompressionEnabled()));
blitCmd.setSourceCompressionFormat(static_cast<XY_BLOCK_COPY_BLT::SOURCE_COMPRESSION_FORMAT>(srcCompressionFormat));
blitCmd.setDestinationCompressionFormat(static_cast<XY_BLOCK_COPY_BLT::DESTINATION_COMPRESSION_FORMAT>(dstCompressionFormat));
if (MemoryPoolHelper::isSystemMemoryPool(blitProperties.dstAllocation->getMemoryPool())) {
blitCmd.setDestinationTargetMemory(XY_BLOCK_COPY_BLT::TARGET_MEMORY::TARGET_MEMORY_SYSTEM_MEM);
}
if (MemoryPoolHelper::isSystemMemoryPool(blitProperties.srcAllocation->getMemoryPool())) {
blitCmd.setSourceTargetMemory(XY_BLOCK_COPY_BLT::TARGET_MEMORY::TARGET_MEMORY_SYSTEM_MEM);
}
if (debugManager.flags.OverrideBlitterTargetMemory.get() != -1) {
if (debugManager.flags.OverrideBlitterTargetMemory.get() == 0u) {
blitCmd.setDestinationTargetMemory(XY_BLOCK_COPY_BLT::TARGET_MEMORY::TARGET_MEMORY_SYSTEM_MEM);
blitCmd.setSourceTargetMemory(XY_BLOCK_COPY_BLT::TARGET_MEMORY::TARGET_MEMORY_SYSTEM_MEM);
} else if (debugManager.flags.OverrideBlitterTargetMemory.get() == 1u) {
blitCmd.setDestinationTargetMemory(XY_BLOCK_COPY_BLT::TARGET_MEMORY::TARGET_MEMORY_LOCAL_MEM);
blitCmd.setSourceTargetMemory(XY_BLOCK_COPY_BLT::TARGET_MEMORY::TARGET_MEMORY_LOCAL_MEM);
}
}
appendExtraMemoryProperties(blitCmd, rootDeviceEnvironment);
blitCmd.setSourceSurfaceWidth(blitCmd.getDestinationX2CoordinateRight());
blitCmd.setSourceSurfaceHeight(blitCmd.getDestinationY2CoordinateBottom());
blitCmd.setDestinationSurfaceWidth(blitCmd.getDestinationX2CoordinateRight());
blitCmd.setDestinationSurfaceHeight(blitCmd.getDestinationY2CoordinateBottom());
if (blitCmd.getDestinationY2CoordinateBottom() > 1) {
blitCmd.setDestinationSurfaceType(XY_BLOCK_COPY_BLT::SURFACE_TYPE::SURFACE_TYPE_SURFTYPE_2D);
blitCmd.setSourceSurfaceType(XY_BLOCK_COPY_BLT::SURFACE_TYPE::SURFACE_TYPE_SURFTYPE_2D);
} else {
blitCmd.setDestinationSurfaceType(XY_BLOCK_COPY_BLT::SURFACE_TYPE::SURFACE_TYPE_SURFTYPE_1D);
blitCmd.setSourceSurfaceType(XY_BLOCK_COPY_BLT::SURFACE_TYPE::SURFACE_TYPE_SURFTYPE_1D);
}
auto mocs = rootDeviceEnvironment.getGmmHelper()->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER);
if (debugManager.flags.OverrideBlitterMocs.get() != -1) {
mocs = static_cast<uint32_t>(debugManager.flags.OverrideBlitterMocs.get());
}
blitCmd.setDestinationMOCS(mocs);
blitCmd.setSourceMOCS(mocs);
}
template <>
template <typename T>
void BlitCommandsHelper<Family>::appendBlitCommandsForBuffer(const BlitProperties &blitProperties, T &blitCmd, const RootDeviceEnvironment &rootDeviceEnvironment) {
appendBlitCommandsMemCopy(blitProperties, blitCmd, rootDeviceEnvironment);
}
template <>
uint32_t BlitCommandsHelper<Family>::getAvailableBytesPerPixel(size_t copySize, uint32_t srcOrigin, uint32_t dstOrigin, size_t srcSize, size_t dstSize) {
return 1;
}
template <>
void BlitCommandsHelper<Family>::appendBlitCommandsMemCopy(const BlitProperties &blitProperties, typename Family::XY_COPY_BLT &blitCmd,
const RootDeviceEnvironment &rootDeviceEnvironment) {
using MEM_COPY = typename Family::MEM_COPY;
using COMPRESSION_FORMAT30 = typename MEM_COPY::COMPRESSION_FORMAT30;
auto dstAllocation = blitProperties.dstAllocation;
auto srcAllocation = blitProperties.srcAllocation;
if (blitCmd.getDestinationY2CoordinateBottom() > 1) {
blitCmd.setCopyType(MEM_COPY::COPY_TYPE::COPY_TYPE_MATRIX_COPY);
} else {
blitCmd.setCopyType(MEM_COPY::COPY_TYPE::COPY_TYPE_LINEAR_COPY);
}
auto mocs = rootDeviceEnvironment.getGmmHelper()->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER);
if (debugManager.flags.OverrideBlitterMocs.get() != -1) {
mocs = static_cast<uint32_t>(debugManager.flags.OverrideBlitterMocs.get());
}
blitCmd.setDestinationMOCS(mocs);
blitCmd.setSourceMOCS(mocs);
uint8_t compressionFormat = 0;
if (dstAllocation->isCompressionEnabled()) {
auto resourceFormat = dstAllocation->getDefaultGmm()->gmmResourceInfo->getResourceFormat();
compressionFormat = rootDeviceEnvironment.getGmmClientContext()->getSurfaceStateCompressionFormat(resourceFormat);
} else if (srcAllocation->isCompressionEnabled()) {
auto resourceFormat = srcAllocation->getDefaultGmm()->gmmResourceInfo->getResourceFormat();
compressionFormat = rootDeviceEnvironment.getGmmClientContext()->getSurfaceStateCompressionFormat(resourceFormat);
}
if (debugManager.flags.EnableStatelessCompressionWithUnifiedMemory.get()) {
if (!MemoryPoolHelper::isSystemMemoryPool(srcAllocation->getMemoryPool()) || !MemoryPoolHelper::isSystemMemoryPool(dstAllocation->getMemoryPool())) {
compressionFormat = static_cast<uint8_t>(debugManager.flags.FormatForStatelessCompressionWithUnifiedMemory.get());
}
}
blitCmd.setCompressionFormat(static_cast<COMPRESSION_FORMAT30>(compressionFormat));
DEBUG_BREAK_IF(AuxTranslationDirection::none != blitProperties.auxTranslationDirection);
}
template <>
template <>
void BlitCommandsHelper<Family>::dispatchBlitMemoryFill<1>(NEO::GraphicsAllocation *dstAlloc, uint64_t offset, uint32_t *pattern, LinearStream &linearStream, size_t size, RootDeviceEnvironment &rootDeviceEnvironment, COLOR_DEPTH depth) {
using MEM_SET = typename Family::MEM_SET;
using COMPRESSION_FORMAT30 = typename MEM_SET::COMPRESSION_FORMAT30;
auto blitCmd = Family::cmdInitMemSet;
auto mocs = rootDeviceEnvironment.getGmmHelper()->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER);
if (debugManager.flags.OverrideBlitterMocs.get() != -1) {
mocs = static_cast<uint32_t>(debugManager.flags.OverrideBlitterMocs.get());
}
blitCmd.setDestinationMOCS(mocs);
if (dstAlloc->isCompressionEnabled()) {
auto resourceFormat = dstAlloc->getDefaultGmm()->gmmResourceInfo->getResourceFormat();
auto compressionFormat = static_cast<COMPRESSION_FORMAT30>(rootDeviceEnvironment.getGmmClientContext()->getSurfaceStateCompressionFormat(resourceFormat));
blitCmd.setCompressionFormat(compressionFormat);
}
if (debugManager.flags.EnableStatelessCompressionWithUnifiedMemory.get()) {
if (!MemoryPoolHelper::isSystemMemoryPool(dstAlloc->getMemoryPool())) {
blitCmd.setCompressionFormat(debugManager.flags.FormatForStatelessCompressionWithUnifiedMemory.get());
}
}
blitCmd.setFillData(*pattern);
auto sizeToFill = size;
while (sizeToFill != 0) {
auto tmpCmd = blitCmd;
tmpCmd.setDestinationStartAddress(ptrOffset(dstAlloc->getGpuAddress(), static_cast<size_t>(offset)));
size_t height = 0;
size_t width = 0;
if (sizeToFill <= BlitterConstants::maxBlitSetWidth) {
width = sizeToFill;
height = 1;
} else {
width = BlitterConstants::maxBlitSetWidth;
height = std::min<size_t>((sizeToFill / width), BlitterConstants::maxBlitSetHeight);
if (height > 1) {
tmpCmd.setFillType(MEM_SET::FILL_TYPE::FILL_TYPE_MATRIX_FILL);
}
}
tmpCmd.setFillWidth(static_cast<uint32_t>(width));
tmpCmd.setFillHeight(static_cast<uint32_t>(height));
tmpCmd.setDestinationPitch(static_cast<uint32_t>(width));
auto cmd = linearStream.getSpaceForCmd<MEM_SET>();
*cmd = tmpCmd;
auto blitSize = width * height;
offset += blitSize;
sizeToFill -= blitSize;
}
}
template <>
void BlitCommandsHelper<Family>::encodeProfilingStartMmios(LinearStream &cmdStream, const TagNodeBase &timestampPacketNode) {
auto timestampContextStartGpuAddress = TimestampPacketHelper::getContextStartGpuAddress(timestampPacketNode);
auto timestampGlobalStartAddress = TimestampPacketHelper::getGlobalStartGpuAddress(timestampPacketNode);
EncodeStoreMMIO<Family>::encode(cmdStream, RegisterOffsets::gpThreadTimeRegAddressOffsetHigh, timestampContextStartGpuAddress + sizeof(uint32_t), false, nullptr, true);
EncodeStoreMMIO<Family>::encode(cmdStream, RegisterOffsets::globalTimestampUn, timestampGlobalStartAddress + sizeof(uint32_t), false, nullptr, true);
EncodeStoreMMIO<Family>::encode(cmdStream, RegisterOffsets::gpThreadTimeRegAddressOffsetLow, timestampContextStartGpuAddress, false, nullptr, true);
EncodeStoreMMIO<Family>::encode(cmdStream, RegisterOffsets::globalTimestampLdw, timestampGlobalStartAddress, false, nullptr, true);
}
template <>
void BlitCommandsHelper<Family>::encodeProfilingEndMmios(LinearStream &cmdStream, const TagNodeBase &timestampPacketNode) {
auto timestampContextEndGpuAddress = TimestampPacketHelper::getContextEndGpuAddress(timestampPacketNode);
auto timestampGlobalEndAddress = TimestampPacketHelper::getGlobalEndGpuAddress(timestampPacketNode);
EncodeStoreMMIO<Family>::encode(cmdStream, RegisterOffsets::gpThreadTimeRegAddressOffsetHigh, timestampContextEndGpuAddress + sizeof(uint32_t), false, nullptr, true);
EncodeStoreMMIO<Family>::encode(cmdStream, RegisterOffsets::globalTimestampUn, timestampGlobalEndAddress + sizeof(uint32_t), false, nullptr, true);
EncodeStoreMMIO<Family>::encode(cmdStream, RegisterOffsets::gpThreadTimeRegAddressOffsetLow, timestampContextEndGpuAddress, false, nullptr, true);
EncodeStoreMMIO<Family>::encode(cmdStream, RegisterOffsets::globalTimestampLdw, timestampGlobalEndAddress, false, nullptr, true);
}
template <>
size_t BlitCommandsHelper<Family>::getProfilingMmioCmdsSize() {
return 8 * sizeof(typename Family::MI_STORE_REGISTER_MEM);
}
template <>
void setCompressionParamsForFillOperation<Family>(typename Family::XY_COLOR_BLT &xyColorBlt) {
}
template class CommandStreamReceiverHw<Family>;
template struct BlitCommandsHelper<Family>;
template void BlitCommandsHelper<Family>::appendColorDepth<typename Family::XY_BLOCK_COPY_BLT>(const BlitProperties &blitProperties, typename Family::XY_BLOCK_COPY_BLT &blitCmd);
template void BlitCommandsHelper<Family>::appendColorDepth<typename Family::XY_COPY_BLT>(const BlitProperties &blitProperties, typename Family::XY_COPY_BLT &blitCmd);
template void BlitCommandsHelper<Family>::appendBlitCommandsForBuffer<typename Family::XY_COPY_BLT>(const BlitProperties &blitProperties, typename Family::XY_COPY_BLT &blitCmd, const RootDeviceEnvironment &rootDeviceEnvironment);
const Family::COMPUTE_WALKER Family::cmdInitGpgpuWalker = Family::COMPUTE_WALKER::sInit();
const Family::CFE_STATE Family::cmdInitCfeState = Family::CFE_STATE::sInit();
const Family::INTERFACE_DESCRIPTOR_DATA Family::cmdInitInterfaceDescriptorData = Family::INTERFACE_DESCRIPTOR_DATA::sInit();
const Family::MI_BATCH_BUFFER_START Family::cmdInitBatchBufferStart = Family::MI_BATCH_BUFFER_START::sInit();
const Family::MI_BATCH_BUFFER_END Family::cmdInitBatchBufferEnd = Family::MI_BATCH_BUFFER_END::sInit();
const Family::PIPE_CONTROL Family::cmdInitPipeControl = Family::PIPE_CONTROL::sInit();
const Family::STATE_COMPUTE_MODE Family::cmdInitStateComputeMode = Family::STATE_COMPUTE_MODE::sInit();
const Family::_3DSTATE_BINDING_TABLE_POOL_ALLOC Family::cmdInitStateBindingTablePoolAlloc =
Family::_3DSTATE_BINDING_TABLE_POOL_ALLOC::sInit();
const Family::MI_SEMAPHORE_WAIT Family::cmdInitMiSemaphoreWait = Family::MI_SEMAPHORE_WAIT::sInit();
const Family::RENDER_SURFACE_STATE Family::cmdInitRenderSurfaceState = Family::RENDER_SURFACE_STATE::sInit();
const Family::POSTSYNC_DATA Family::cmdInitPostSyncData = Family::POSTSYNC_DATA::sInit();
const Family::MI_SET_PREDICATE Family::cmdInitSetPredicate = Family::MI_SET_PREDICATE::sInit();
const Family::MI_LOAD_REGISTER_IMM Family::cmdInitLoadRegisterImm = Family::MI_LOAD_REGISTER_IMM::sInit();
const Family::MI_LOAD_REGISTER_REG Family::cmdInitLoadRegisterReg = Family::MI_LOAD_REGISTER_REG::sInit();
const Family::MI_LOAD_REGISTER_MEM Family::cmdInitLoadRegisterMem = Family::MI_LOAD_REGISTER_MEM::sInit();
const Family::MI_STORE_DATA_IMM Family::cmdInitStoreDataImm = Family::MI_STORE_DATA_IMM::sInit();
const Family::MI_STORE_REGISTER_MEM Family::cmdInitStoreRegisterMem = Family::MI_STORE_REGISTER_MEM::sInit();
const Family::MI_NOOP Family::cmdInitNoop = Family::MI_NOOP::sInit();
const Family::MI_REPORT_PERF_COUNT Family::cmdInitReportPerfCount = Family::MI_REPORT_PERF_COUNT::sInit();
const Family::MI_ATOMIC Family::cmdInitAtomic = Family::MI_ATOMIC::sInit();
const Family::PIPELINE_SELECT Family::cmdInitPipelineSelect = Family::PIPELINE_SELECT::sInit();
const Family::MI_ARB_CHECK Family::cmdInitArbCheck = Family::MI_ARB_CHECK::sInit();
const Family::STATE_BASE_ADDRESS Family::cmdInitStateBaseAddress = Family::STATE_BASE_ADDRESS::sInit();
const Family::MEDIA_SURFACE_STATE Family::cmdInitMediaSurfaceState = Family::MEDIA_SURFACE_STATE::sInit();
const Family::SAMPLER_STATE Family::cmdInitSamplerState = Family::SAMPLER_STATE::sInit();
const Family::BINDING_TABLE_STATE Family::cmdInitBindingTableState = Family::BINDING_TABLE_STATE::sInit();
const Family::MI_USER_INTERRUPT Family::cmdInitUserInterrupt = Family::MI_USER_INTERRUPT::sInit();
const Family::MI_CONDITIONAL_BATCH_BUFFER_END cmdInitConditionalBatchBufferEnd = Family::MI_CONDITIONAL_BATCH_BUFFER_END::sInit();
const Family::MI_FLUSH_DW Family::cmdInitMiFlushDw = Family::MI_FLUSH_DW::sInit();
const Family::XY_BLOCK_COPY_BLT Family::cmdInitXyBlockCopyBlt = Family::XY_BLOCK_COPY_BLT::sInit();
const Family::MEM_COPY Family::cmdInitXyCopyBlt = Family::MEM_COPY::sInit();
const Family::XY_FAST_COLOR_BLT Family::cmdInitXyColorBlt = Family::XY_FAST_COLOR_BLT::sInit();
const Family::STATE_PREFETCH Family::cmdInitStatePrefetch = Family::STATE_PREFETCH::sInit();
const Family::_3DSTATE_BTD Family::cmd3dStateBtd = Family::_3DSTATE_BTD::sInit();
const Family::MI_MEM_FENCE Family::cmdInitMemFence = Family::MI_MEM_FENCE::sInit();
const Family::MEM_SET Family::cmdInitMemSet = Family::MEM_SET::sInit();
const Family::STATE_SIP Family::cmdInitStateSip = Family::STATE_SIP::sInit();
const Family::STATE_CONTEXT_DATA_BASE_ADDRESS Family::cmdInitStateContextDataBaseAddress = Family::STATE_CONTEXT_DATA_BASE_ADDRESS::sInit();
const Family::STATE_SYSTEM_MEM_FENCE_ADDRESS Family::cmdInitStateSystemMemFenceAddress = Family::STATE_SYSTEM_MEM_FENCE_ADDRESS::sInit();
} // namespace NEO

View File

@@ -0,0 +1,35 @@
/*
* Copyright (C) 2025 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#include "shared/source/command_stream/command_stream_receiver_simulated_common_hw_xehp_and_later.inl"
#include "shared/source/gmm_helper/gmm_helper.h"
#include "shared/source/helpers/ptr_math.h"
namespace NEO {
using Family = Xe3CoreFamily;
template <>
bool CommandStreamReceiverSimulatedCommonHw<Family>::expectMemoryEqual(void *gfxAddress, const void *srcAddress, size_t length) {
auto gpuAddress = peekGmmHelper()->decanonize(castToUint64(gfxAddress));
return this->expectMemory(reinterpret_cast<void *>(gpuAddress), srcAddress, length,
AubMemDump::CmdServicesMemTraceMemoryCompare::CompareOperationValues::CompareEqual);
}
template <>
bool CommandStreamReceiverSimulatedCommonHw<Family>::expectMemoryNotEqual(void *gfxAddress, const void *srcAddress, size_t length) {
auto gpuAddress = peekGmmHelper()->decanonize(castToUint64(gfxAddress));
return this->expectMemory(reinterpret_cast<void *>(gpuAddress), srcAddress, length,
AubMemDump::CmdServicesMemTraceMemoryCompare::CompareOperationValues::CompareNotEqual);
}
template <>
bool CommandStreamReceiverSimulatedCommonHw<Family>::expectMemoryCompressed(void *gfxAddress, const void *srcAddress, size_t length) {
auto gpuAddress = peekGmmHelper()->decanonize(castToUint64(gfxAddress));
return this->expectMemory(reinterpret_cast<void *>(gpuAddress), srcAddress, length,
AubMemDump::CmdServicesMemTraceMemoryCompare::CompareOperationValues::CompareNotEqual);
}
template class CommandStreamReceiverSimulatedCommonHw<Family>;
} // namespace NEO

View File

@@ -0,0 +1,24 @@
/*
* Copyright (C) 2025 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#include "shared/source/command_stream/device_command_stream.h"
#include "shared/source/execution_environment/execution_environment.h"
#include "shared/source/xe3_core/hw_cmds_base.h"
#include "create_command_stream_receiver.inl"
namespace NEO {
template <>
CommandStreamReceiver *createDeviceCommandStreamReceiver<Xe3CoreFamily>(bool withAubDump,
ExecutionEnvironment &executionEnvironment,
uint32_t rootDeviceIndex,
const DeviceBitfield deviceBitfield) {
return createCommandStreamReceiver<Xe3CoreFamily>(withAubDump, executionEnvironment, rootDeviceIndex, deviceBitfield);
}
} // namespace NEO

View File

@@ -0,0 +1,23 @@
/*
* Copyright (C) 2025 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#include "shared/source/debugger/debugger_l0.inl"
#include "shared/source/debugger/debugger_l0_tgllp_and_later.inl"
#include "shared/source/helpers/populate_factory.h"
#include "shared/source/xe3_core/hw_cmds_base.h"
namespace NEO {
using GfxFamily = NEO::Xe3CoreFamily;
static auto coreFamily = IGFX_XE3_CORE;
template <>
void populateFactoryTable<DebuggerL0Hw<GfxFamily>>() {
extern DebugerL0CreateFn debuggerL0Factory[IGFX_MAX_CORE];
debuggerL0Factory[coreFamily] = DebuggerL0Hw<GfxFamily>::allocate;
}
template class DebuggerL0Hw<GfxFamily>;
} // namespace NEO

View File

@@ -0,0 +1,11 @@
/*
* Copyright (C) 2025 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#ifdef SUPPORT_PTL
template struct L1CachePolicyHelper<IGFX_PTL>;
static EnableGfxProductHw<IGFX_PTL> enableGfxProductHwPTL;
#endif

View File

@@ -0,0 +1,6 @@
/*
* Copyright (C) 2025 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/

View File

@@ -0,0 +1,13 @@
/*
* Copyright (C) 2025 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#pragma once
#ifdef SUPPORT_PTL
#include "shared/source/xe3_core/hw_cmds_ptl.h"
#include "shared/source/xe3_core/hw_info_ptl.h"
#endif

View File

@@ -0,0 +1,34 @@
/*
* Copyright (C) 2025 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#include "shared/source/helpers/compiler_product_helper.h"
#include "shared/source/helpers/compiler_product_helper_base.inl"
#include "shared/source/helpers/compiler_product_helper_enable_subgroup_local_block_io.inl"
#include "shared/source/helpers/compiler_product_helper_mtl_and_later.inl"
#include "shared/source/helpers/compiler_product_helper_tgllp_and_later.inl"
#include "shared/source/helpers/compiler_product_helper_xe_hp_and_later.inl"
#include "shared/source/helpers/compiler_product_helper_xe_hpc_and_later.inl"
#include "platforms.h"
#include "wmtp_setup_ptl.inl"
constexpr auto gfxProduct = IGFX_PTL;
namespace NEO {
template <>
uint32_t CompilerProductHelperHw<gfxProduct>::getDefaultHwIpVersion() const {
return AOT::PTL_H_A0;
}
template <>
bool CompilerProductHelperHw<gfxProduct>::isMidThreadPreemptionSupported(const HardwareInfo &hwInfo) const {
return hwInfo.featureTable.flags.ftrWalkerMTP && wmtpSupported;
}
static EnableCompilerProductHelper<gfxProduct> enableCompilerProductHelperPTL;
} // namespace NEO

View File

@@ -0,0 +1,34 @@
/*
* Copyright (C) 2025 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#include "shared/source/command_stream/aub_command_stream_receiver_hw.h"
#include "shared/source/command_stream/command_stream_receiver_hw.h"
#include "shared/source/command_stream/tbx_command_stream_receiver_hw.h"
#include "shared/source/debugger/debugger_l0.h"
#include "shared/source/helpers/gfx_core_helper.h"
#include "shared/source/helpers/populate_factory.h"
#include "shared/source/xe3_core/hw_cmds_base.h"
namespace NEO {
extern GfxCoreHelperCreateFunctionType gfxCoreHelperFactory[IGFX_MAX_CORE];
using Family = Xe3CoreFamily;
static auto gfxFamily = IGFX_XE3_CORE;
struct EnableCoreXe3Core {
EnableCoreXe3Core() {
gfxCoreHelperFactory[gfxFamily] = GfxCoreHelperHw<Family>::create;
populateFactoryTable<AUBCommandStreamReceiverHw<Family>>();
populateFactoryTable<CommandStreamReceiverHw<Family>>();
populateFactoryTable<TbxCommandStreamReceiverHw<Family>>();
populateFactoryTable<DebuggerL0Hw<Family>>();
}
};
static EnableCoreXe3Core enable;
} // namespace NEO

Some files were not shown because too many files have changed in this diff Show More