feature: add support for Panther Lake platform

Related-To: NEO-12803

Signed-off-by: Mateusz Jablonski <mateusz.jablonski@intel.com>
This commit is contained in:
Mateusz Jablonski
2025-01-03 12:50:18 +00:00
committed by Compute-Runtime-Automation
parent 8e41928eb8
commit bb1a125f0c
169 changed files with 9419 additions and 24 deletions

View File

@@ -1,5 +1,5 @@
#
# Copyright (C) 2023-2024 Intel Corporation
# Copyright (C) 2023-2025 Intel Corporation
#
# SPDX-License-Identifier: MIT
#
@@ -66,3 +66,8 @@ if(SUPPORT_XE2_AND_LATER)
${CMAKE_CURRENT_SOURCE_DIR}/l0_gfx_core_helper_xe2_hpg_and_later.inl
)
endif()
if(SUPPORT_XE3_AND_LATER)
target_sources(${L0_STATIC_LIB_NAME} PRIVATE
${CMAKE_CURRENT_SOURCE_DIR}/l0_gfx_core_helper_xe3_and_later.inl
)
endif()

View File

@@ -0,0 +1,21 @@
/*
* Copyright (C) 2025 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#include "level_zero/core/source/gfx_core_helpers/l0_gfx_core_helper.h"
namespace L0 {
template <typename Family>
ze_rtas_format_exp_t L0GfxCoreHelperHw<Family>::getSupportedRTASFormat() const {
return static_cast<ze_rtas_format_exp_t>(RTASDeviceFormatInternal::version2);
}
template <typename Family>
zet_debug_regset_type_intel_gpu_t L0GfxCoreHelperHw<Family>::getRegsetTypeForLargeGrfDetection() const {
return ZET_DEBUG_REGSET_TYPE_SR_INTEL_GPU;
}
} // namespace L0

View File

@@ -0,0 +1,19 @@
#
# Copyright (C) 2025 Intel Corporation
#
# SPDX-License-Identifier: MIT
#
if(SUPPORT_XE3_CORE)
target_sources(${L0_STATIC_LIB_NAME} PRIVATE
${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt
${CMAKE_CURRENT_SOURCE_DIR}/cmdlist_xe3_core.cpp
${CMAKE_CURRENT_SOURCE_DIR}/cmdlist_xe3_core.h
${CMAKE_CURRENT_SOURCE_DIR}/enable_family_full_l0_xe3_core.cpp
${CMAKE_CURRENT_SOURCE_DIR}/image_xe3_core.inl
${CMAKE_CURRENT_SOURCE_DIR}/l0_gfx_core_helper_xe3_core.cpp
)
add_subdirectories()
endif()

View File

@@ -0,0 +1,24 @@
/*
* Copyright (C) 2025 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#include "shared/source/xe3_core/hw_cmds_base.h"
#include "shared/source/xe3_core/hw_info_xe3_core.h"
#include "level_zero/core/source/cmdlist/cmdlist_hw.inl"
#include "level_zero/core/source/cmdlist/cmdlist_hw_immediate.inl"
#include "level_zero/core/source/cmdlist/cmdlist_hw_xe2_hpg_and_later.inl"
#include "level_zero/core/source/cmdlist/cmdlist_hw_xe_hpc_and_later.inl"
#include "level_zero/core/source/cmdlist/cmdlist_hw_xehp_and_later.inl"
#include "cmdlist_extended.inl"
namespace L0 {
template struct CommandListCoreFamily<IGFX_XE3_CORE>;
template struct CommandListCoreFamilyImmediate<IGFX_XE3_CORE>;
} // namespace L0

View File

@@ -0,0 +1,24 @@
/*
* Copyright (C) 2025 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#pragma once
#include "shared/source/xe3_core/hw_cmds_base.h"
#include "level_zero/core/source/cmdlist/cmdlist_hw.h"
#include "level_zero/core/source/cmdlist/cmdlist_hw_immediate.h"
namespace L0 {
template <PRODUCT_FAMILY productFamily>
struct CommandListProductFamily : public CommandListCoreFamily<IGFX_XE3_CORE> {
using CommandListCoreFamily::CommandListCoreFamily;
};
template <PRODUCT_FAMILY gfxProductFamily>
struct CommandListImmediateProductFamily : public CommandListCoreFamilyImmediate<IGFX_XE3_CORE> {
using CommandListCoreFamilyImmediate::CommandListCoreFamilyImmediate;
};
} // namespace L0

View File

@@ -0,0 +1,24 @@
/*
* Copyright (C) 2025 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#include "shared/source/xe3_core/hw_cmds_base.h"
#include "level_zero/core/source/gfx_core_helpers/l0_gfx_core_helper.h"
#include "level_zero/core/source/helpers/l0_populate_factory.h"
namespace NEO {
using Family = Xe3CoreFamily;
struct EnableL0Xe3Core {
EnableL0Xe3Core() {
L0::populateFactoryTable<L0::L0GfxCoreHelperHw<Family>>();
}
};
static EnableL0Xe3Core enable;
} // namespace NEO

View File

@@ -0,0 +1,11 @@
/*
* Copyright (C) 2025 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#include "shared/source/xe3_core/hw_cmds_base.h"
#include "shared/source/xe3_core/hw_info_xe3_core.h"
#include "level_zero/core/source/image/image_hw.inl"

View File

@@ -0,0 +1,29 @@
/*
* Copyright (C) 2025 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#include "shared/source/xe3_core/hw_cmds_base.h"
#include "level_zero/core/source/gfx_core_helpers/l0_gfx_core_helper_base.inl"
#include "level_zero/core/source/gfx_core_helpers/l0_gfx_core_helper_pvc_and_later.inl"
#include "level_zero/core/source/gfx_core_helpers/l0_gfx_core_helper_xe2_hpg_and_later.inl"
#include "level_zero/core/source/gfx_core_helpers/l0_gfx_core_helper_xe3_and_later.inl"
#include "level_zero/core/source/gfx_core_helpers/l0_gfx_core_helper_xehp_and_later.inl"
#include "level_zero/core/source/helpers/l0_populate_factory.h"
namespace L0 {
using Family = NEO::Xe3CoreFamily;
static auto gfxCore = IGFX_XE3_CORE;
#include "level_zero/core/source/helpers/l0_gfx_core_helper_factory_init.inl"
template <>
uint32_t L0GfxCoreHelperHw<Family>::getGrfRegisterCount(uint32_t *regPtr) const {
return (regPtr[4] & 0x1FF);
}
template class L0GfxCoreHelperHw<Family>;
} // namespace L0

View File

@@ -0,0 +1,17 @@
#
# Copyright (C) 2025 Intel Corporation
#
# SPDX-License-Identifier: MIT
#
if(SUPPORT_PTL)
target_sources(${L0_STATIC_LIB_NAME} PRIVATE
${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt
${CMAKE_CURRENT_SOURCE_DIR}/cmdlist_ptl.cpp
${CMAKE_CURRENT_SOURCE_DIR}/cmdqueue_ptl.cpp
${CMAKE_CURRENT_SOURCE_DIR}/image_ptl.cpp
${CMAKE_CURRENT_SOURCE_DIR}/kernel_ptl.cpp
${CMAKE_CURRENT_SOURCE_DIR}/sampler_ptl.cpp
)
add_subdirectories()
endif()

View File

@@ -0,0 +1,20 @@
/*
* Copyright (C) 2025 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#include "shared/source/xe3_core/hw_info_xe3_core.h"
#include "level_zero/core/source/xe3_core/cmdlist_xe3_core.h"
namespace L0 {
static CommandListPopulateFactory<IGFX_PTL, CommandListProductFamily<IGFX_PTL>>
populatePTL;
static CommandListImmediatePopulateFactory<IGFX_PTL, CommandListImmediateProductFamily<IGFX_PTL>>
populatePTLImmediate;
} // namespace L0

View File

@@ -0,0 +1,19 @@
/*
* Copyright (C) 2025 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#include "shared/source/xe3_core/hw_cmds_base.h"
#include "shared/source/xe3_core/hw_info_xe3_core.h"
#include "level_zero/core/source/cmdqueue/cmdqueue_hw.inl"
#include "level_zero/core/source/cmdqueue/cmdqueue_xe_hp_core_and_later.inl"
namespace L0 {
template struct CommandQueueHw<IGFX_XE3_CORE>;
static CommandQueuePopulateFactory<IGFX_PTL, CommandQueueHw<IGFX_XE3_CORE>>
populatePTL;
} // namespace L0

View File

@@ -0,0 +1,23 @@
/*
* Copyright (C) 2025 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#include "level_zero/core/source/xe3_core/image_xe3_core.inl"
namespace L0 {
template <>
struct ImageProductFamily<IGFX_PTL> : public ImageCoreFamily<IGFX_XE3_CORE> {
using ImageCoreFamily::ImageCoreFamily;
ze_result_t initialize(Device *device, const ze_image_desc_t *desc) override {
return ImageCoreFamily<IGFX_XE3_CORE>::initialize(device, desc);
};
};
static ImagePopulateFactory<IGFX_PTL, ImageProductFamily<IGFX_PTL>> populatePTL;
} // namespace L0

View File

@@ -0,0 +1,17 @@
/*
* Copyright (C) 2025 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#include "shared/source/xe3_core/hw_cmds_base.h"
#include "shared/source/xe3_core/hw_info_xe3_core.h"
#include "level_zero/core/source/kernel/kernel_hw.h"
namespace L0 {
static KernelPopulateFactory<IGFX_PTL, KernelHw<IGFX_XE3_CORE>> populatePTL;
} // namespace L0

View File

@@ -0,0 +1,22 @@
/*
* Copyright (C) 2025 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#include "shared/source/xe3_core/hw_cmds_base.h"
#include "shared/source/xe3_core/hw_info_xe3_core.h"
#include "level_zero/core/source/sampler/sampler_hw.inl"
namespace L0 {
template <>
struct SamplerProductFamily<IGFX_PTL> : public SamplerCoreFamily<IGFX_XE3_CORE> {
using SamplerCoreFamily::SamplerCoreFamily;
};
static SamplerPopulateFactory<IGFX_PTL, SamplerProductFamily<IGFX_PTL>> populatePTL;
} // namespace L0

View File

@@ -0,0 +1,12 @@
#
# Copyright (C) 2025 Intel Corporation
#
# SPDX-License-Identifier: MIT
#
if(TESTS_XE3_CORE)
target_sources(${TARGET_NAME} PRIVATE
${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt
${CMAKE_CURRENT_SOURCE_DIR}/test_variable_register_per_thread_xe3_core.cpp
)
endif()

View File

@@ -0,0 +1,106 @@
/*
* Copyright (C) 2025 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#include "shared/source/helpers/file_io.h"
#include "shared/test/common/helpers/default_hw_info.h"
#include "shared/test/common/helpers/test_files.h"
#include "shared/test/common/test_macros/hw_test.h"
#include "shared/test/common/test_macros/test.h"
#include "level_zero/core/source/cmdqueue/cmdqueue.h"
#include "level_zero/core/source/context/context_imp.h"
#include "level_zero/core/source/driver/driver_handle_imp.h"
#include "level_zero/core/source/gfx_core_helpers/l0_gfx_core_helper.h"
#include "level_zero/core/test/aub_tests/fixtures/aub_fixture.h"
#include "level_zero/core/test/unit_tests/mocks/mock_cmdlist.h"
#include "level_zero/driver_experimental/zex_module.h"
namespace L0::ult {
struct AUBVariableRegisterPerThreadL0 : Test<AUBFixtureL0> {
std::vector<uint32_t> getGrfSizes(ze_device_handle_t device) {
ze_device_module_properties_t deviceModuleProperties{};
zex_device_module_register_file_exp_t deviceModuleRegisterFile{};
deviceModuleProperties.pNext = &deviceModuleRegisterFile;
EXPECT_EQ(ZE_RESULT_SUCCESS, zeDeviceGetModuleProperties(device, &deviceModuleProperties));
std::vector<uint32_t> result(deviceModuleRegisterFile.registerFileSizesCount);
deviceModuleRegisterFile.registerFileSizes = result.data();
EXPECT_EQ(ZE_RESULT_SUCCESS, zeDeviceGetModuleProperties(device, &deviceModuleProperties));
return result;
}
void *allocateDeviceMemory(ze_context_handle_t context, ze_device_handle_t device, size_t size, size_t alignment) {
void *result = nullptr;
ze_device_mem_alloc_desc_t descriptor{};
descriptor.stype = ZE_STRUCTURE_TYPE_DEVICE_MEM_ALLOC_DESC;
EXPECT_EQ(ZE_RESULT_SUCCESS, zeMemAllocDevice(context, &descriptor, size, alignment, device, &result));
return result;
}
};
XE3_CORETEST_F(AUBVariableRegisterPerThreadL0, givenZeOptRegisterFileSizeOptionWhenExecutingKernelThenCorrectValuesAreReturned) {
constexpr auto bufferSize = 256u;
const auto grfSizes = getGrfSizes(device);
const auto &expectedGrfSizes = device->getProductHelper().getSupportedNumGrfs(device->getNEODevice()->getReleaseHelper());
EXPECT_NE(0u, grfSizes.size());
EXPECT_EQ(expectedGrfSizes, grfSizes);
for (const auto &grfSize : grfSizes) {
std::string filename = "grf_" + std::to_string(grfSize) + "_kernel_variable_register_per_thread";
std::string buildFlags = "-ze-exp-register-file-size " + std::to_string(grfSize);
ze_module_handle_t module = createModuleFromFile(filename, context, device, buildFlags);
ASSERT_NE(nullptr, module);
ze_kernel_handle_t kernel;
ze_kernel_desc_t kernelDescriptor{};
kernelDescriptor.stype = ZE_STRUCTURE_TYPE_KERNEL_DESC;
kernelDescriptor.pKernelName = "kernelVariableRegisterPerThread";
EXPECT_EQ(ZE_RESULT_SUCCESS, zeKernelCreate(module, &kernelDescriptor, &kernel));
const auto numGrfRequired = Kernel::fromHandle(kernel)->getKernelDescriptor().kernelAttributes.numGrfRequired;
EXPECT_EQ(grfSize, numGrfRequired);
ze_command_list_desc_t commandListDescriptor{};
commandListDescriptor.stype = ZE_STRUCTURE_TYPE_COMMAND_LIST_DESC;
ze_command_list_handle_t commandList{};
EXPECT_EQ(ZE_RESULT_SUCCESS, zeCommandListCreate(context, device, &commandListDescriptor, &commandList));
const std::vector<int32_t> input(bufferSize, 1);
const std::vector<int32_t> expectedOutput(bufferSize, 2);
auto *inputBuffer = allocateDeviceMemory(context, device, bufferSize, 1u);
auto *outputBuffer = allocateDeviceMemory(context, device, bufferSize, 1u);
EXPECT_EQ(ZE_RESULT_SUCCESS, zeCommandListAppendMemoryCopy(commandList, inputBuffer, input.data(), bufferSize, nullptr, 0u, nullptr));
EXPECT_EQ(ZE_RESULT_SUCCESS, zeCommandListAppendBarrier(commandList, nullptr, 0u, nullptr));
EXPECT_EQ(ZE_RESULT_SUCCESS, zeKernelSetArgumentValue(kernel, 0u, sizeof(inputBuffer), &inputBuffer));
EXPECT_EQ(ZE_RESULT_SUCCESS, zeKernelSetArgumentValue(kernel, 1u, sizeof(outputBuffer), &outputBuffer));
EXPECT_EQ(ZE_RESULT_SUCCESS, zeKernelSetGroupSize(kernel, bufferSize, 1u, 1u));
ze_group_count_t groupCount{1u, 1u, 1u};
EXPECT_EQ(ZE_RESULT_SUCCESS, zeCommandListAppendLaunchKernel(commandList, kernel, &groupCount, nullptr, 0u, nullptr));
EXPECT_EQ(ZE_RESULT_SUCCESS, zeCommandListAppendBarrier(commandList, nullptr, 0u, nullptr));
EXPECT_EQ(ZE_RESULT_SUCCESS, zeCommandListClose(commandList));
EXPECT_EQ(ZE_RESULT_SUCCESS, zeCommandQueueExecuteCommandLists(pCmdq, 1u, &commandList, nullptr));
EXPECT_EQ(ZE_RESULT_SUCCESS, zeCommandQueueSynchronize(pCmdq, UINT64_MAX));
expectMemory<FamilyType>(outputBuffer, expectedOutput.data(), bufferSize);
EXPECT_EQ(ZE_RESULT_SUCCESS, zeMemFree(context, inputBuffer));
EXPECT_EQ(ZE_RESULT_SUCCESS, zeMemFree(context, outputBuffer));
EXPECT_EQ(ZE_RESULT_SUCCESS, zeCommandListDestroy(commandList));
EXPECT_EQ(ZE_RESULT_SUCCESS, zeKernelDestroy(kernel));
EXPECT_EQ(ZE_RESULT_SUCCESS, zeModuleDestroy(module));
}
}
} // namespace L0::ult

View File

@@ -0,0 +1,16 @@
#
# Copyright (C) 2025 Intel Corporation
#
# SPDX-License-Identifier: MIT
#
if(TESTS_XE3_CORE)
target_sources(${TARGET_NAME} PRIVATE
${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt
${CMAKE_CURRENT_SOURCE_DIR}/excludes_l0_xe3_core.cpp
${CMAKE_CURRENT_SOURCE_DIR}/test_cmdlist_xe3_core.cpp
${CMAKE_CURRENT_SOURCE_DIR}/test_device_xe3_core.cpp
${CMAKE_CURRENT_SOURCE_DIR}/test_l0_gfx_core_helper_xe3_core.cpp
${CMAKE_CURRENT_SOURCE_DIR}/test_module_xe3_core.cpp
)
endif()

View File

@@ -0,0 +1,8 @@
/*
* Copyright (C) 2025 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#include "shared/test/common/test_macros/hw_test_base.h"

View File

@@ -0,0 +1,488 @@
/*
* Copyright (C) 2025 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#include "shared/source/command_stream/scratch_space_controller.h"
#include "shared/source/helpers/compiler_product_helper.h"
#include "shared/source/kernel/implicit_args_helper.h"
#include "shared/source/os_interface/product_helper.h"
#include "shared/test/common/cmd_parse/gen_cmd_parse.h"
#include "shared/test/common/helpers/debug_manager_state_restore.h"
#include "shared/test/common/helpers/unit_test_helper.h"
#include "shared/test/common/mocks/mock_compiler_product_helper.h"
#include "shared/test/common/test_macros/hw_test.h"
#include "level_zero/core/source/event/event.h"
#include "level_zero/core/test/unit_tests/fixtures/module_fixture.h"
#include "level_zero/core/test/unit_tests/mocks/mock_cmdlist.h"
#include "level_zero/core/test/unit_tests/mocks/mock_cmdqueue.h"
#include "level_zero/core/test/unit_tests/mocks/mock_module.h"
namespace L0 {
namespace ult {
HWTEST_EXCLUDE_PRODUCT(AppendMemoryCopyTests, givenCopyCommandListWhenTimestampPassedToMemoryCopyThenAppendProfilingCalledOnceBeforeAndAfterCommand_MatchAny, IGFX_XE3_CORE);
HWTEST_EXCLUDE_PRODUCT(AppendMemoryCopyTests, givenCopyCommandListWhenTimestampPassedToMemoryCopyRegionBlitThenTimeStampRegistersAreAdded_MatchAny, IGFX_XE3_CORE);
using CommandListAppendLaunchKernelXe3 = Test<ModuleFixture>;
HWTEST2_F(CommandListAppendLaunchKernelXe3, givenVariousKernelsWhenUpdateStreamPropertiesIsCalledThenRequiredStateFinalStateAndCommandsToPatchAreCorrectlySet, IsXe3Core) {
DebugManagerStateRestore restorer;
debugManager.flags.AllowPatchingVfeStateInCommandLists.set(1);
Mock<::L0::KernelImp> defaultKernel;
auto pMockModule1 = std::unique_ptr<Module>(new Mock<Module>(device, nullptr));
defaultKernel.module = pMockModule1.get();
Mock<::L0::KernelImp> cooperativeKernel;
auto pMockModule2 = std::unique_ptr<Module>(new Mock<Module>(device, nullptr));
cooperativeKernel.module = pMockModule2.get();
cooperativeKernel.immutableData.kernelDescriptor->kernelAttributes.flags.usesSyncBuffer = true;
auto pCommandList = std::make_unique<WhiteBox<::L0::CommandListCoreFamily<gfxCoreFamily>>>();
auto result = pCommandList->initialize(device, NEO::EngineGroupType::compute, 0u);
ASSERT_EQ(ZE_RESULT_SUCCESS, result);
EXPECT_EQ(0u, pCommandList->commandsToPatch.size());
const ze_group_count_t launchKernelArgs = {};
pCommandList->updateStreamProperties(defaultKernel, false, launchKernelArgs, false);
EXPECT_EQ(0u, pCommandList->commandsToPatch.size());
pCommandList->reset();
pCommandList->updateStreamProperties(cooperativeKernel, true, launchKernelArgs, false);
pCommandList->updateStreamProperties(cooperativeKernel, true, launchKernelArgs, false);
EXPECT_EQ(0u, pCommandList->commandsToPatch.size());
pCommandList->reset();
pCommandList->updateStreamProperties(defaultKernel, false, launchKernelArgs, false);
pCommandList->updateStreamProperties(cooperativeKernel, true, launchKernelArgs, false);
EXPECT_EQ(0u, pCommandList->commandsToPatch.size());
pCommandList->reset();
pCommandList->updateStreamProperties(cooperativeKernel, true, launchKernelArgs, false);
pCommandList->updateStreamProperties(defaultKernel, false, launchKernelArgs, false);
pCommandList->updateStreamProperties(cooperativeKernel, true, launchKernelArgs, false);
EXPECT_EQ(0u, pCommandList->commandsToPatch.size());
pCommandList->reset();
pCommandList->updateStreamProperties(defaultKernel, false, launchKernelArgs, false);
pCommandList->updateStreamProperties(defaultKernel, false, launchKernelArgs, false);
EXPECT_EQ(0u, pCommandList->commandsToPatch.size());
pCommandList->reset();
EXPECT_EQ(0u, pCommandList->commandsToPatch.size());
}
HWTEST2_F(CommandListAppendLaunchKernelXe3, givenVariousKernelsAndPatchingDisallowedWhenUpdateStreamPropertiesIsCalledThenCommandsToPatchAreEmpty, IsXe3Core) {
DebugManagerStateRestore restorer;
Mock<::L0::KernelImp> defaultKernel;
auto pMockModule1 = std::unique_ptr<Module>(new Mock<Module>(device, nullptr));
defaultKernel.module = pMockModule1.get();
Mock<::L0::KernelImp> cooperativeKernel;
auto pMockModule2 = std::unique_ptr<Module>(new Mock<Module>(device, nullptr));
cooperativeKernel.module = pMockModule2.get();
cooperativeKernel.immutableData.kernelDescriptor->kernelAttributes.flags.usesSyncBuffer = true;
auto pCommandList = std::make_unique<WhiteBox<::L0::CommandListCoreFamily<gfxCoreFamily>>>();
auto result = pCommandList->initialize(device, NEO::EngineGroupType::compute, 0u);
ASSERT_EQ(ZE_RESULT_SUCCESS, result);
const ze_group_count_t launchKernelArgs = {};
pCommandList->updateStreamProperties(defaultKernel, false, launchKernelArgs, false);
pCommandList->updateStreamProperties(cooperativeKernel, true, launchKernelArgs, false);
EXPECT_EQ(0u, pCommandList->commandsToPatch.size());
pCommandList->reset();
debugManager.flags.AllowPatchingVfeStateInCommandLists.set(1);
pCommandList->updateStreamProperties(defaultKernel, false, launchKernelArgs, false);
pCommandList->updateStreamProperties(cooperativeKernel, true, launchKernelArgs, false);
EXPECT_EQ(0u, pCommandList->commandsToPatch.size());
pCommandList->reset();
}
struct LocalMemoryModuleFixture : public ModuleFixture {
void setUp() {
debugManager.flags.EnableLocalMemory.set(1);
ModuleFixture::setUp();
}
DebugManagerStateRestore restore;
};
using CommandListAppendLaunchKernelXe3Core = Test<LocalMemoryModuleFixture>;
HWTEST2_F(CommandListAppendLaunchKernelXe3Core, givenAppendKernelWhenKernelNotUsingSystemMemoryAllocationsAndEventNotHostSignalScopeThenExpectsNoSystemFenceUsed, IsXe3Core) {
using DefaultWalkerType = typename FamilyType::DefaultWalkerType;
ze_result_t result = ZE_RESULT_SUCCESS;
constexpr size_t size = 4096u;
constexpr size_t alignment = 4096u;
void *ptr = nullptr;
ze_device_mem_alloc_desc_t deviceDesc = {};
result = context->allocDeviceMem(device->toHandle(),
&deviceDesc,
size, alignment, &ptr);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
EXPECT_NE(nullptr, ptr);
Mock<::L0::KernelImp> kernel;
auto mockModule = std::unique_ptr<Module>(new Mock<Module>(device, nullptr));
kernel.module = mockModule.get();
auto allocData = driverHandle->getSvmAllocsManager()->getSVMAlloc(ptr);
ASSERT_NE(nullptr, allocData);
auto kernelAllocation = allocData->gpuAllocations.getGraphicsAllocation(device->getRootDeviceIndex());
ASSERT_NE(nullptr, kernelAllocation);
kernel.argumentsResidencyContainer.push_back(kernelAllocation);
ze_event_pool_desc_t eventPoolDesc = {};
eventPoolDesc.count = 1;
auto eventPool = std::unique_ptr<L0::EventPool>(L0::EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, result));
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
ze_event_desc_t eventDesc = {};
eventDesc.index = 0;
eventDesc.signal = ZE_EVENT_SCOPE_FLAG_DEVICE;
eventDesc.wait = 0;
auto event = std::unique_ptr<L0::Event>(L0::Event::create<typename FamilyType::TimestampPacketType>(eventPool.get(), &eventDesc, device));
kernel.setGroupSize(1, 1, 1);
ze_group_count_t groupCount{8, 1, 1};
auto commandList = std::make_unique<WhiteBox<::L0::CommandListCoreFamily<gfxCoreFamily>>>();
result = commandList->initialize(device, NEO::EngineGroupType::compute, 0u);
ASSERT_EQ(ZE_RESULT_SUCCESS, result);
CmdListKernelLaunchParams launchParams = {};
result = commandList->appendLaunchKernelWithParams(&kernel, groupCount, event.get(), launchParams);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
GenCmdList commands;
ASSERT_TRUE(CmdParse<FamilyType>::parseCommandBuffer(
commands,
commandList->getCmdContainer().getCommandStream()->getCpuBase(),
commandList->getCmdContainer().getCommandStream()->getUsed()));
auto itor = find<DefaultWalkerType *>(commands.begin(), commands.end());
ASSERT_NE(itor, commands.end());
auto walkerCmd = genCmdCast<DefaultWalkerType *>(*itor);
auto &postSyncData = walkerCmd->getPostSync();
EXPECT_FALSE(postSyncData.getSystemMemoryFenceRequest());
result = context->freeMem(ptr);
ASSERT_EQ(result, ZE_RESULT_SUCCESS);
}
HWTEST2_F(CommandListAppendLaunchKernelXe3Core,
givenAppendKernelWhenKernelUsingUsmHostMemoryAllocationsAndEventNotHostSignalScopeThenExpectsNoSystemFenceUsed, IsXe3Core) {
using DefaultWalkerType = typename FamilyType::DefaultWalkerType;
ze_result_t result = ZE_RESULT_SUCCESS;
constexpr size_t size = 4096u;
constexpr size_t alignment = 4096u;
void *ptr = nullptr;
ze_host_mem_alloc_desc_t hostDesc = {};
result = context->allocHostMem(&hostDesc, size, alignment, &ptr);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
EXPECT_NE(nullptr, ptr);
Mock<::L0::KernelImp> kernel;
auto mockModule = std::unique_ptr<Module>(new Mock<Module>(device, nullptr));
kernel.module = mockModule.get();
auto allocData = driverHandle->getSvmAllocsManager()->getSVMAlloc(ptr);
ASSERT_NE(nullptr, allocData);
auto kernelAllocation = allocData->gpuAllocations.getGraphicsAllocation(device->getRootDeviceIndex());
ASSERT_NE(nullptr, kernelAllocation);
kernel.argumentsResidencyContainer.push_back(kernelAllocation);
ze_event_pool_desc_t eventPoolDesc = {};
eventPoolDesc.count = 1;
auto eventPool = std::unique_ptr<L0::EventPool>(L0::EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, result));
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
ze_event_desc_t eventDesc = {};
eventDesc.index = 0;
eventDesc.signal = ZE_EVENT_SCOPE_FLAG_DEVICE;
eventDesc.wait = 0;
auto event = std::unique_ptr<L0::Event>(L0::Event::create<typename FamilyType::TimestampPacketType>(eventPool.get(), &eventDesc, device));
kernel.setGroupSize(1, 1, 1);
ze_group_count_t groupCount{8, 1, 1};
auto commandList = std::make_unique<WhiteBox<::L0::CommandListCoreFamily<gfxCoreFamily>>>();
result = commandList->initialize(device, NEO::EngineGroupType::compute, 0u);
ASSERT_EQ(ZE_RESULT_SUCCESS, result);
CmdListKernelLaunchParams launchParams = {};
result = commandList->appendLaunchKernelWithParams(&kernel, groupCount, event.get(), launchParams);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
GenCmdList commands;
ASSERT_TRUE(CmdParse<FamilyType>::parseCommandBuffer(
commands,
commandList->getCmdContainer().getCommandStream()->getCpuBase(),
commandList->getCmdContainer().getCommandStream()->getUsed()));
auto itor = find<DefaultWalkerType *>(commands.begin(), commands.end());
ASSERT_NE(itor, commands.end());
auto walkerCmd = genCmdCast<DefaultWalkerType *>(*itor);
auto &postSyncData = walkerCmd->getPostSync();
EXPECT_FALSE(postSyncData.getSystemMemoryFenceRequest());
result = context->freeMem(ptr);
ASSERT_EQ(result, ZE_RESULT_SUCCESS);
}
HWTEST2_F(CommandListAppendLaunchKernelXe3Core,
givenAppendKernelWhenMigrationOnComputeUsingUsmSharedCpuMemoryAllocationsAndEventNotHostSignalScopeThenExpectsNoSystemFenceUsed, IsXe3Core) {
using DefaultWalkerType = typename FamilyType::DefaultWalkerType;
ze_result_t result = ZE_RESULT_SUCCESS;
constexpr size_t size = 4096u;
constexpr size_t alignment = 4096u;
void *ptr = nullptr;
ze_host_mem_alloc_desc_t hostDesc = {};
ze_device_mem_alloc_desc_t deviceDesc = {};
result = context->allocSharedMem(device->toHandle(), &deviceDesc, &hostDesc, size, alignment, &ptr);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
EXPECT_NE(nullptr, ptr);
auto allocData = driverHandle->getSvmAllocsManager()->getSVMAlloc(ptr);
ASSERT_NE(nullptr, allocData);
auto dstAllocation = allocData->cpuAllocation;
ASSERT_NE(nullptr, dstAllocation);
auto srcAllocation = allocData->gpuAllocations.getGraphicsAllocation(device->getRootDeviceIndex());
ASSERT_NE(nullptr, srcAllocation);
auto commandList = std::make_unique<WhiteBox<::L0::CommandListCoreFamily<gfxCoreFamily>>>();
result = commandList->initialize(device, NEO::EngineGroupType::compute, 0u);
ASSERT_EQ(ZE_RESULT_SUCCESS, result);
result = commandList->appendPageFaultCopy(dstAllocation, srcAllocation, size, false);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
EXPECT_TRUE(commandList->usedKernelLaunchParams.isBuiltInKernel);
EXPECT_FALSE(commandList->usedKernelLaunchParams.isKernelSplitOperation);
EXPECT_TRUE(commandList->usedKernelLaunchParams.isDestinationAllocationInSystemMemory);
GenCmdList commands;
ASSERT_TRUE(CmdParse<FamilyType>::parseCommandBuffer(
commands,
commandList->getCmdContainer().getCommandStream()->getCpuBase(),
commandList->getCmdContainer().getCommandStream()->getUsed()));
auto itor = find<DefaultWalkerType *>(commands.begin(), commands.end());
ASSERT_NE(itor, commands.end());
auto walkerCmd = genCmdCast<DefaultWalkerType *>(*itor);
auto &postSyncData = walkerCmd->getPostSync();
EXPECT_FALSE(postSyncData.getSystemMemoryFenceRequest());
result = context->freeMem(ptr);
ASSERT_EQ(result, ZE_RESULT_SUCCESS);
}
HWTEST2_F(CommandListAppendLaunchKernelXe3Core,
givenAppendKernelWhenKernelUsingIndirectSystemMemoryAllocationsAndEventNotHostSignalScopeThenExpectsNoSystemFenceUsed, IsXe3Core) {
using DefaultWalkerType = typename FamilyType::DefaultWalkerType;
ze_result_t result = ZE_RESULT_SUCCESS;
constexpr size_t size = 4096u;
constexpr size_t alignment = 4096u;
void *ptr = nullptr;
ze_device_mem_alloc_desc_t deviceDesc = {};
result = context->allocDeviceMem(device->toHandle(),
&deviceDesc,
size, alignment, &ptr);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
EXPECT_NE(nullptr, ptr);
Mock<::L0::KernelImp> kernel;
auto mockModule = std::unique_ptr<Module>(new Mock<Module>(device, nullptr));
kernel.module = mockModule.get();
auto allocData = driverHandle->getSvmAllocsManager()->getSVMAlloc(ptr);
ASSERT_NE(nullptr, allocData);
auto kernelAllocation = allocData->gpuAllocations.getGraphicsAllocation(device->getRootDeviceIndex());
ASSERT_NE(nullptr, kernelAllocation);
kernel.argumentsResidencyContainer.push_back(kernelAllocation);
kernel.unifiedMemoryControls.indirectHostAllocationsAllowed = true;
ze_event_pool_desc_t eventPoolDesc = {};
eventPoolDesc.count = 1;
auto eventPool = std::unique_ptr<L0::EventPool>(L0::EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, result));
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
ze_event_desc_t eventDesc = {};
eventDesc.index = 0;
eventDesc.signal = ZE_EVENT_SCOPE_FLAG_DEVICE;
eventDesc.wait = 0;
auto event = std::unique_ptr<L0::Event>(L0::Event::create<typename FamilyType::TimestampPacketType>(eventPool.get(), &eventDesc, device));
kernel.setGroupSize(1, 1, 1);
ze_group_count_t groupCount{8, 1, 1};
auto commandList = std::make_unique<WhiteBox<::L0::CommandListCoreFamily<gfxCoreFamily>>>();
result = commandList->initialize(device, NEO::EngineGroupType::compute, 0u);
ASSERT_EQ(ZE_RESULT_SUCCESS, result);
CmdListKernelLaunchParams launchParams = {};
result = commandList->appendLaunchKernelWithParams(&kernel, groupCount, event.get(), launchParams);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
GenCmdList commands;
ASSERT_TRUE(CmdParse<FamilyType>::parseCommandBuffer(
commands,
commandList->getCmdContainer().getCommandStream()->getCpuBase(),
commandList->getCmdContainer().getCommandStream()->getUsed()));
auto itor = find<DefaultWalkerType *>(commands.begin(), commands.end());
ASSERT_NE(itor, commands.end());
auto walkerCmd = genCmdCast<DefaultWalkerType *>(*itor);
auto &postSyncData = walkerCmd->getPostSync();
EXPECT_FALSE(postSyncData.getSystemMemoryFenceRequest());
result = context->freeMem(ptr);
ASSERT_EQ(result, ZE_RESULT_SUCCESS);
}
HWTEST2_F(CommandListAppendLaunchKernelXe3Core,
givenAppendKernelWhenKernelUsingDeviceMemoryAllocationsAndEventHostSignalScopeThenExpectsNoSystemFenceUsed, IsXe3Core) {
using DefaultWalkerType = typename FamilyType::DefaultWalkerType;
ze_result_t result = ZE_RESULT_SUCCESS;
constexpr size_t size = 4096u;
constexpr size_t alignment = 4096u;
void *ptr = nullptr;
ze_device_mem_alloc_desc_t deviceDesc = {};
result = context->allocDeviceMem(device->toHandle(),
&deviceDesc,
size, alignment, &ptr);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
EXPECT_NE(nullptr, ptr);
Mock<::L0::KernelImp> kernel;
auto mockModule = std::unique_ptr<Module>(new Mock<Module>(device, nullptr));
kernel.module = mockModule.get();
auto allocData = driverHandle->getSvmAllocsManager()->getSVMAlloc(ptr);
ASSERT_NE(nullptr, allocData);
auto kernelAllocation = allocData->gpuAllocations.getGraphicsAllocation(device->getRootDeviceIndex());
ASSERT_NE(nullptr, kernelAllocation);
kernel.argumentsResidencyContainer.push_back(kernelAllocation);
ze_event_pool_desc_t eventPoolDesc = {};
eventPoolDesc.count = 1;
auto eventPool = std::unique_ptr<L0::EventPool>(L0::EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, result));
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
ze_event_desc_t eventDesc = {};
eventDesc.index = 0;
eventDesc.signal = ZE_EVENT_SCOPE_FLAG_HOST;
eventDesc.wait = 0;
auto event = std::unique_ptr<L0::Event>(L0::Event::create<typename FamilyType::TimestampPacketType>(eventPool.get(), &eventDesc, device));
kernel.setGroupSize(1, 1, 1);
ze_group_count_t groupCount{8, 1, 1};
auto commandList = std::make_unique<WhiteBox<::L0::CommandListCoreFamily<gfxCoreFamily>>>();
result = commandList->initialize(device, NEO::EngineGroupType::compute, 0u);
ASSERT_EQ(ZE_RESULT_SUCCESS, result);
CmdListKernelLaunchParams launchParams = {};
result = commandList->appendLaunchKernelWithParams(&kernel, groupCount, event.get(), launchParams);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
GenCmdList commands;
ASSERT_TRUE(CmdParse<FamilyType>::parseCommandBuffer(
commands,
commandList->getCmdContainer().getCommandStream()->getCpuBase(),
commandList->getCmdContainer().getCommandStream()->getUsed()));
auto itor = find<DefaultWalkerType *>(commands.begin(), commands.end());
ASSERT_NE(itor, commands.end());
auto walkerCmd = genCmdCast<DefaultWalkerType *>(*itor);
auto &postSyncData = walkerCmd->getPostSync();
EXPECT_FALSE(postSyncData.getSystemMemoryFenceRequest());
result = context->freeMem(ptr);
ASSERT_EQ(result, ZE_RESULT_SUCCESS);
}
HWTEST2_F(CommandListAppendLaunchKernelXe3Core,
givenAppendKernelWhenKernelUsingUsmHostMemoryAllocationsAndEventHostSignalScopeThenExpectsSystemFenceUsed, IsXe3Core) {
using DefaultWalkerType = typename FamilyType::DefaultWalkerType;
ze_result_t result = ZE_RESULT_SUCCESS;
constexpr size_t size = 4096u;
constexpr size_t alignment = 4096u;
void *ptr = nullptr;
ze_host_mem_alloc_desc_t hostDesc = {};
result = context->allocHostMem(&hostDesc, size, alignment, &ptr);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
EXPECT_NE(nullptr, ptr);
Mock<::L0::KernelImp> kernel;
auto mockModule = std::unique_ptr<Module>(new Mock<Module>(device, nullptr));
kernel.module = mockModule.get();
auto allocData = driverHandle->getSvmAllocsManager()->getSVMAlloc(ptr);
ASSERT_NE(nullptr, allocData);
auto kernelAllocation = allocData->gpuAllocations.getGraphicsAllocation(device->getRootDeviceIndex());
ASSERT_NE(nullptr, kernelAllocation);
kernel.argumentsResidencyContainer.push_back(kernelAllocation);
ze_event_pool_desc_t eventPoolDesc = {};
eventPoolDesc.count = 1;
auto eventPool = std::unique_ptr<L0::EventPool>(L0::EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, result));
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
ze_event_desc_t eventDesc = {};
eventDesc.index = 0;
eventDesc.signal = ZE_EVENT_SCOPE_FLAG_HOST;
eventDesc.wait = 0;
auto event = std::unique_ptr<L0::Event>(L0::Event::create<typename FamilyType::TimestampPacketType>(eventPool.get(), &eventDesc, device));
kernel.setGroupSize(1, 1, 1);
ze_group_count_t groupCount{8, 1, 1};
auto commandList = std::make_unique<WhiteBox<::L0::CommandListCoreFamily<gfxCoreFamily>>>();
result = commandList->initialize(device, NEO::EngineGroupType::compute, 0u);
ASSERT_EQ(ZE_RESULT_SUCCESS, result);
CmdListKernelLaunchParams launchParams = {};
result = commandList->appendLaunchKernelWithParams(&kernel, groupCount, event.get(), launchParams);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
GenCmdList commands;
ASSERT_TRUE(CmdParse<FamilyType>::parseCommandBuffer(
commands,
commandList->getCmdContainer().getCommandStream()->getCpuBase(),
commandList->getCmdContainer().getCommandStream()->getUsed()));
auto itor = find<DefaultWalkerType *>(commands.begin(), commands.end());
ASSERT_NE(itor, commands.end());
auto walkerCmd = genCmdCast<DefaultWalkerType *>(*itor);
auto &postSyncData = walkerCmd->getPostSync();
EXPECT_TRUE(postSyncData.getSystemMemoryFenceRequest());
result = context->freeMem(ptr);
ASSERT_EQ(result, ZE_RESULT_SUCCESS);
}
} // namespace ult
} // namespace L0

View File

@@ -0,0 +1,278 @@
/*
* Copyright (C) 2025 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#include "shared/test/common/mocks/mock_device.h"
#include "shared/test/common/test_macros/hw_test.h"
#include "level_zero/core/test/unit_tests/fixtures/device_fixture.h"
#include "level_zero/core/test/unit_tests/mocks/mock_device.h"
namespace L0 {
namespace ult {
using DeviceXe3CoreTest = Test<DeviceFixture>;
HWTEST2_F(DeviceXe3CoreTest, whenCallingGetMemoryPropertiesWithNonNullPtrThenPropertiesAreReturned, IsXe3Core) {
uint32_t count = 0;
ze_result_t res = device->getMemoryProperties(&count, nullptr);
EXPECT_EQ(res, ZE_RESULT_SUCCESS);
EXPECT_EQ(1u, count);
ze_device_memory_properties_t memProperties = {};
res = device->getMemoryProperties(&count, &memProperties);
EXPECT_EQ(res, ZE_RESULT_SUCCESS);
EXPECT_EQ(1u, count);
EXPECT_EQ(memProperties.maxClockRate, 0u);
EXPECT_EQ(memProperties.maxBusWidth, this->neoDevice->getDeviceInfo().addressBits);
EXPECT_EQ(memProperties.totalSize, this->neoDevice->getDeviceInfo().globalMemSize);
}
using CommandQueueGroupTest = Test<DeviceFixture>;
HWTEST2_F(CommandQueueGroupTest, givenNoBlitterSupportAndNoCCSThenOneQueueGroupIsReturned, IsXe3Core) {
const uint32_t rootDeviceIndex = 0u;
NEO::HardwareInfo hwInfo = *NEO::defaultHwInfo.get();
hwInfo.featureTable.flags.ftrCCSNode = false;
hwInfo.capabilityTable.blitterOperationsSupported = false;
auto *neoMockDevice = NEO::MockDevice::createWithNewExecutionEnvironment<NEO::MockDevice>(&hwInfo, rootDeviceIndex);
MockDeviceImp deviceImp(neoMockDevice, neoMockDevice->getExecutionEnvironment());
uint32_t count = 0;
ze_result_t res = deviceImp.getCommandQueueGroupProperties(&count, nullptr);
EXPECT_EQ(ZE_RESULT_SUCCESS, res);
EXPECT_GE(count, 1u);
}
HWTEST2_F(CommandQueueGroupTest, givenNoBlitterSupportAndCCSThenTwoQueueGroupsAreReturned, IsXe3Core) {
const uint32_t rootDeviceIndex = 0u;
NEO::HardwareInfo hwInfo = *NEO::defaultHwInfo.get();
hwInfo.featureTable.flags.ftrCCSNode = true;
hwInfo.capabilityTable.blitterOperationsSupported = false;
auto *neoMockDevice = NEO::MockDevice::createWithNewExecutionEnvironment<NEO::MockDevice>(&hwInfo, rootDeviceIndex);
MockDeviceImp deviceImp(neoMockDevice, neoMockDevice->getExecutionEnvironment());
uint32_t count = 0;
ze_result_t res = deviceImp.getCommandQueueGroupProperties(&count, nullptr);
EXPECT_EQ(ZE_RESULT_SUCCESS, res);
EXPECT_GE(count, 2u);
}
HWTEST2_F(CommandQueueGroupTest, givenBlitterSupportAndCCSThenFourQueueGroupsAreReturned, IsXe3Core) {
const uint32_t rootDeviceIndex = 0u;
NEO::HardwareInfo hwInfo = *NEO::defaultHwInfo.get();
hwInfo.featureTable.flags.ftrCCSNode = true;
hwInfo.capabilityTable.blitterOperationsSupported = true;
hwInfo.featureTable.ftrBcsInfo.set();
auto *neoMockDevice = NEO::MockDevice::createWithNewExecutionEnvironment<NEO::MockDevice>(&hwInfo, rootDeviceIndex);
MockDeviceImp deviceImp(neoMockDevice, neoMockDevice->getExecutionEnvironment());
uint32_t count = 0;
ze_result_t res = deviceImp.getCommandQueueGroupProperties(&count, nullptr);
EXPECT_EQ(ZE_RESULT_SUCCESS, res);
EXPECT_GE(count, 4u);
std::vector<ze_command_queue_group_properties_t> properties(count);
res = deviceImp.getCommandQueueGroupProperties(&count, properties.data());
EXPECT_EQ(ZE_RESULT_SUCCESS, res);
auto &engineGroups = neoMockDevice->getRegularEngineGroups();
for (uint32_t i = 0; i < count; i++) {
if (engineGroups[i].engineGroupType == NEO::EngineGroupType::renderCompute) {
EXPECT_TRUE(properties[i].flags & ZE_COMMAND_QUEUE_GROUP_PROPERTY_FLAG_COMPUTE);
EXPECT_TRUE(properties[i].flags & ZE_COMMAND_QUEUE_GROUP_PROPERTY_FLAG_COPY);
EXPECT_TRUE(properties[i].flags & ZE_COMMAND_QUEUE_GROUP_PROPERTY_FLAG_COOPERATIVE_KERNELS);
EXPECT_TRUE(properties[i].flags & ZE_COMMAND_QUEUE_GROUP_PROPERTY_FLAG_METRICS);
EXPECT_EQ(properties[i].numQueues, 1u);
EXPECT_EQ(properties[i].maxMemoryFillPatternSize, std::numeric_limits<size_t>::max());
} else if (engineGroups[i].engineGroupType == NEO::EngineGroupType::compute) {
EXPECT_TRUE(properties[i].flags & ZE_COMMAND_QUEUE_GROUP_PROPERTY_FLAG_COMPUTE);
EXPECT_TRUE(properties[i].flags & ZE_COMMAND_QUEUE_GROUP_PROPERTY_FLAG_COPY);
EXPECT_TRUE(properties[i].flags & ZE_COMMAND_QUEUE_GROUP_PROPERTY_FLAG_COOPERATIVE_KERNELS);
uint32_t numerOfCCSEnabled = hwInfo.gtSystemInfo.CCSInfo.NumberOfCCSEnabled;
EXPECT_EQ(properties[i].numQueues, numerOfCCSEnabled);
EXPECT_EQ(properties[i].maxMemoryFillPatternSize, std::numeric_limits<size_t>::max());
} else if (engineGroups[i].engineGroupType == NEO::EngineGroupType::copy) {
EXPECT_TRUE(properties[i].flags & ZE_COMMAND_QUEUE_GROUP_PROPERTY_FLAG_COPY);
EXPECT_EQ(properties[i].numQueues, 1u);
EXPECT_EQ(properties[i].maxMemoryFillPatternSize, sizeof(uint8_t));
} else if (engineGroups[i].engineGroupType == NEO::EngineGroupType::linkedCopy) {
EXPECT_TRUE(properties[i].flags & ZE_COMMAND_QUEUE_GROUP_PROPERTY_FLAG_COPY);
EXPECT_EQ(properties[i].numQueues, hwInfo.featureTable.ftrBcsInfo.count() - 1u);
EXPECT_EQ(properties[i].maxMemoryFillPatternSize, sizeof(uint8_t));
}
}
}
HWTEST2_F(CommandQueueGroupTest, givenBlitterSupportCCSAndLinkedBcsDisabledThenThreeQueueGroupsAreReturned, IsXe3Core) {
const uint32_t rootDeviceIndex = 0u;
NEO::HardwareInfo hwInfo = *NEO::defaultHwInfo.get();
hwInfo.featureTable.flags.ftrCCSNode = true;
hwInfo.capabilityTable.blitterOperationsSupported = true;
hwInfo.featureTable.ftrBcsInfo.set(0);
auto *neoMockDevice = NEO::MockDevice::createWithNewExecutionEnvironment<NEO::MockDevice>(&hwInfo, rootDeviceIndex);
MockDeviceImp deviceImp(neoMockDevice, neoMockDevice->getExecutionEnvironment());
uint32_t count = 0;
ze_result_t res = deviceImp.getCommandQueueGroupProperties(&count, nullptr);
EXPECT_EQ(ZE_RESULT_SUCCESS, res);
EXPECT_GE(count, 3u);
std::vector<ze_command_queue_group_properties_t> properties(count);
res = deviceImp.getCommandQueueGroupProperties(&count, properties.data());
EXPECT_EQ(ZE_RESULT_SUCCESS, res);
auto &engineGroups = neoMockDevice->getRegularEngineGroups();
for (uint32_t i = 0; i < count; i++) {
if (engineGroups[i].engineGroupType == NEO::EngineGroupType::renderCompute) {
EXPECT_TRUE(properties[i].flags & ZE_COMMAND_QUEUE_GROUP_PROPERTY_FLAG_COMPUTE);
EXPECT_TRUE(properties[i].flags & ZE_COMMAND_QUEUE_GROUP_PROPERTY_FLAG_COPY);
EXPECT_TRUE(properties[i].flags & ZE_COMMAND_QUEUE_GROUP_PROPERTY_FLAG_COOPERATIVE_KERNELS);
EXPECT_TRUE(properties[i].flags & ZE_COMMAND_QUEUE_GROUP_PROPERTY_FLAG_METRICS);
EXPECT_EQ(properties[i].numQueues, 1u);
EXPECT_EQ(properties[i].maxMemoryFillPatternSize, std::numeric_limits<size_t>::max());
} else if (engineGroups[i].engineGroupType == NEO::EngineGroupType::compute) {
EXPECT_TRUE(properties[i].flags & ZE_COMMAND_QUEUE_GROUP_PROPERTY_FLAG_COMPUTE);
EXPECT_TRUE(properties[i].flags & ZE_COMMAND_QUEUE_GROUP_PROPERTY_FLAG_COPY);
EXPECT_TRUE(properties[i].flags & ZE_COMMAND_QUEUE_GROUP_PROPERTY_FLAG_COOPERATIVE_KERNELS);
uint32_t numerOfCCSEnabled = hwInfo.gtSystemInfo.CCSInfo.NumberOfCCSEnabled;
EXPECT_EQ(properties[i].numQueues, numerOfCCSEnabled);
EXPECT_EQ(properties[i].maxMemoryFillPatternSize, std::numeric_limits<size_t>::max());
} else if (engineGroups[i].engineGroupType == NEO::EngineGroupType::copy) {
EXPECT_TRUE(properties[i].flags & ZE_COMMAND_QUEUE_GROUP_PROPERTY_FLAG_COPY);
EXPECT_EQ(properties[i].numQueues, 1u);
EXPECT_EQ(properties[i].maxMemoryFillPatternSize, sizeof(uint8_t));
}
}
}
HWTEST2_F(CommandQueueGroupTest, givenBlitterDisabledAndAllBcsSetThenTwoQueueGroupsAreReturned, IsXe3Core) {
DebugManagerStateRestore dbgRestorer;
debugManager.flags.EnableBlitterOperationsSupport.set(0);
const uint32_t rootDeviceIndex = 0u;
NEO::HardwareInfo hwInfo = *NEO::defaultHwInfo.get();
hwInfo.featureTable.flags.ftrCCSNode = true;
hwInfo.featureTable.ftrBcsInfo.set();
auto *neoMockDevice = NEO::MockDevice::createWithNewExecutionEnvironment<NEO::MockDevice>(&hwInfo, rootDeviceIndex);
MockDeviceImp deviceImp(neoMockDevice, neoMockDevice->getExecutionEnvironment());
uint32_t count = 0;
ze_result_t res = deviceImp.getCommandQueueGroupProperties(&count, nullptr);
EXPECT_EQ(ZE_RESULT_SUCCESS, res);
EXPECT_EQ(count, 2u);
}
class DeviceCopyQueueGroupXe3CoreFixture : public DeviceFixture {
public:
void setUp() {
debugManager.flags.EnableBlitterOperationsSupport.set(0);
DeviceFixture::setUp();
}
void tearDown() {
DeviceFixture::tearDown();
}
DebugManagerStateRestore restorer;
};
using DeviceCopyQueueGroupXe3CoreTest = Test<DeviceCopyQueueGroupXe3CoreFixture>;
HWTEST2_F(DeviceCopyQueueGroupXe3CoreTest,
givenBlitterSupportAndEnableBlitterOperationsSupportSetToZeroThenNoCopyEngineIsReturned, IsXe3Core) {
const uint32_t rootDeviceIndex = 0u;
NEO::HardwareInfo hwInfo = *NEO::defaultHwInfo.get();
hwInfo.featureTable.flags.ftrCCSNode = false;
hwInfo.capabilityTable.blitterOperationsSupported = true;
hwInfo.featureTable.ftrBcsInfo.set(0);
auto *neoMockDevice = NEO::MockDevice::createWithNewExecutionEnvironment<NEO::MockDevice>(&hwInfo,
rootDeviceIndex);
MockDeviceImp deviceImp(neoMockDevice, neoMockDevice->getExecutionEnvironment());
uint32_t count = 0;
ze_result_t res = deviceImp.getCommandQueueGroupProperties(&count, nullptr);
EXPECT_EQ(ZE_RESULT_SUCCESS, res);
std::vector<ze_command_queue_group_properties_t> properties(count);
res = deviceImp.getCommandQueueGroupProperties(&count, properties.data());
EXPECT_EQ(ZE_RESULT_SUCCESS, res);
for (auto &engineGroup : neoMockDevice->getRegularEngineGroups()) {
EXPECT_NE(NEO::EngineGroupType::copy, engineGroup.engineGroupType);
}
}
class CommandQueueGroupTestXe3Core : public DeviceFixture, public testing::TestWithParam<uint32_t> {
public:
void SetUp() override {
DeviceFixture::setUp();
}
void TearDown() override {
DeviceFixture::tearDown();
}
};
HWTEST2_P(CommandQueueGroupTestXe3Core, givenVaryingBlitterSupportAndCCSThenBCSGroupContainsCorrectNumberOfEngines, IsXe3Core) {
const uint32_t rootDeviceIndex = 0u;
NEO::HardwareInfo hwInfo = *NEO::defaultHwInfo.get();
hwInfo.featureTable.flags.ftrCCSNode = true;
hwInfo.capabilityTable.blitterOperationsSupported = true;
hwInfo.featureTable.ftrBcsInfo = maxNBitValue(2);
hwInfo.featureTable.ftrBcsInfo.set(GetParam());
auto *neoMockDevice = NEO::MockDevice::createWithNewExecutionEnvironment<NEO::MockDevice>(&hwInfo, rootDeviceIndex);
MockDeviceImp deviceImp(neoMockDevice, neoMockDevice->getExecutionEnvironment());
uint32_t count = 0;
ze_result_t res = deviceImp.getCommandQueueGroupProperties(&count, nullptr);
EXPECT_EQ(ZE_RESULT_SUCCESS, res);
EXPECT_GE(count, 3u);
std::vector<ze_command_queue_group_properties_t> properties(count);
res = deviceImp.getCommandQueueGroupProperties(&count, properties.data());
EXPECT_EQ(ZE_RESULT_SUCCESS, res);
auto &engineGroups = neoMockDevice->getRegularEngineGroups();
for (uint32_t i = 0; i < count; i++) {
if (engineGroups[i].engineGroupType == NEO::EngineGroupType::renderCompute) {
EXPECT_TRUE(properties[i].flags & ZE_COMMAND_QUEUE_GROUP_PROPERTY_FLAG_COMPUTE);
EXPECT_TRUE(properties[i].flags & ZE_COMMAND_QUEUE_GROUP_PROPERTY_FLAG_COPY);
EXPECT_TRUE(properties[i].flags & ZE_COMMAND_QUEUE_GROUP_PROPERTY_FLAG_COOPERATIVE_KERNELS);
EXPECT_TRUE(properties[i].flags & ZE_COMMAND_QUEUE_GROUP_PROPERTY_FLAG_METRICS);
EXPECT_EQ(properties[i].numQueues, 1u);
EXPECT_EQ(properties[i].maxMemoryFillPatternSize, std::numeric_limits<size_t>::max());
} else if (engineGroups[i].engineGroupType == NEO::EngineGroupType::compute) {
EXPECT_TRUE(properties[i].flags & ZE_COMMAND_QUEUE_GROUP_PROPERTY_FLAG_COMPUTE);
EXPECT_TRUE(properties[i].flags & ZE_COMMAND_QUEUE_GROUP_PROPERTY_FLAG_COPY);
EXPECT_TRUE(properties[i].flags & ZE_COMMAND_QUEUE_GROUP_PROPERTY_FLAG_COOPERATIVE_KERNELS);
uint32_t numerOfCCSEnabled = hwInfo.gtSystemInfo.CCSInfo.NumberOfCCSEnabled;
EXPECT_EQ(properties[i].numQueues, numerOfCCSEnabled);
EXPECT_EQ(properties[i].maxMemoryFillPatternSize, std::numeric_limits<size_t>::max());
} else if (engineGroups[i].engineGroupType == NEO::EngineGroupType::copy) {
EXPECT_TRUE(properties[i].flags & ZE_COMMAND_QUEUE_GROUP_PROPERTY_FLAG_COPY);
EXPECT_EQ(properties[i].numQueues, 1u);
EXPECT_EQ(properties[i].maxMemoryFillPatternSize, sizeof(uint8_t));
} else if (engineGroups[i].engineGroupType == NEO::EngineGroupType::linkedCopy) {
EXPECT_TRUE(properties[i].flags & ZE_COMMAND_QUEUE_GROUP_PROPERTY_FLAG_COPY);
EXPECT_EQ(properties[i].numQueues, hwInfo.featureTable.ftrBcsInfo.count() - 1u);
EXPECT_EQ(properties[i].maxMemoryFillPatternSize, sizeof(uint8_t));
}
}
}
INSTANTIATE_TEST_SUITE_P(
CommandQueueGroupTestXe3CoreValues,
CommandQueueGroupTestXe3Core,
testing::Values(0, 1, 2, 3));
HWTEST2_F(DeviceXe3CoreTest, givenReturnedDevicePropertiesThenExpectedPageFaultSupportReturned, IsXe3Core) {
ze_device_properties_t deviceProps = {ZE_STRUCTURE_TYPE_DEVICE_PROPERTIES};
device->getProperties(&deviceProps);
EXPECT_NE(0u, deviceProps.flags & ZE_DEVICE_PROPERTY_FLAG_ONDEMANDPAGING);
}
} // namespace ult
} // namespace L0

View File

@@ -0,0 +1,134 @@
/*
* Copyright (C) 2025 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#include "shared/test/common/helpers/debug_manager_state_restore.h"
#include "shared/test/common/helpers/default_hw_info.h"
#include "shared/test/common/mocks/mock_execution_environment.h"
#include "shared/test/common/test_macros/header/per_product_test_definitions.h"
#include "shared/test/common/test_macros/test.h"
#include "level_zero/core/source/gfx_core_helpers/l0_gfx_core_helper.h"
#include "level_zero/core/test/unit_tests/fixtures/device_fixture.h"
#include "hw_cmds_xe3_core.h"
namespace L0 {
namespace ult {
using L0GfxCoreHelperTestXe3 = Test<DeviceFixture>;
HWTEST_EXCLUDE_PRODUCT(L0GfxCoreHelperTest, givenL0GfxCoreHelperWhenAskingForImageCompressionSupportThenReturnFalse, IGFX_XE3_CORE);
XE3_CORETEST_F(L0GfxCoreHelperTestXe3, givenL0GfxCoreHelperWhenAskingForImageCompressionSupportThenReturnCorrectValue) {
DebugManagerStateRestore restore;
auto &l0GfxCoreHelper = getHelper<L0GfxCoreHelper>();
HardwareInfo hwInfo = *NEO::defaultHwInfo;
hwInfo.capabilityTable.ftrRenderCompressedImages = true;
EXPECT_TRUE(l0GfxCoreHelper.imageCompressionSupported(hwInfo));
hwInfo.capabilityTable.ftrRenderCompressedImages = false;
EXPECT_FALSE(l0GfxCoreHelper.imageCompressionSupported(hwInfo));
NEO::debugManager.flags.RenderCompressedImagesEnabled.set(1);
EXPECT_TRUE(l0GfxCoreHelper.imageCompressionSupported(hwInfo));
hwInfo.capabilityTable.ftrRenderCompressedImages = true;
NEO::debugManager.flags.RenderCompressedImagesEnabled.set(0);
EXPECT_FALSE(l0GfxCoreHelper.imageCompressionSupported(hwInfo));
}
XE3_CORETEST_F(L0GfxCoreHelperTestXe3, givenL0GfxCoreHelperWhenAskingForUsmCompressionSupportThenReturnCorrectValue) {
DebugManagerStateRestore restore;
auto &l0GfxCoreHelper = getHelper<L0GfxCoreHelper>();
EXPECT_TRUE(l0GfxCoreHelper.forceDefaultUsmCompressionSupport());
HardwareInfo hwInfo = *NEO::defaultHwInfo;
hwInfo.capabilityTable.ftrRenderCompressedBuffers = true;
EXPECT_TRUE(l0GfxCoreHelper.usmCompressionSupported(hwInfo));
hwInfo.capabilityTable.ftrRenderCompressedBuffers = false;
EXPECT_FALSE(l0GfxCoreHelper.usmCompressionSupported(hwInfo));
NEO::debugManager.flags.RenderCompressedBuffersEnabled.set(1);
EXPECT_TRUE(l0GfxCoreHelper.usmCompressionSupported(hwInfo));
hwInfo.capabilityTable.ftrRenderCompressedBuffers = true;
NEO::debugManager.flags.RenderCompressedBuffersEnabled.set(0);
EXPECT_FALSE(l0GfxCoreHelper.usmCompressionSupported(hwInfo));
}
XE3_CORETEST_F(L0GfxCoreHelperTestXe3, GivenXe3WhenCheckingL0HelperForCmdListHeapSharingSupportThenReturnTrue) {
auto &l0GfxCoreHelper = getHelper<L0GfxCoreHelper>();
EXPECT_TRUE(l0GfxCoreHelper.platformSupportsCmdListHeapSharing());
}
XE3_CORETEST_F(L0GfxCoreHelperTestXe3, GivenXe3WhenCheckingL0HelperForStateComputeModeTrackingSupportThenReturnTrue) {
auto &l0GfxCoreHelper = getHelper<L0GfxCoreHelper>();
EXPECT_TRUE(l0GfxCoreHelper.platformSupportsStateComputeModeTracking());
}
XE3_CORETEST_F(L0GfxCoreHelperTestXe3, GivenXe3WhenCheckingL0HelperForFrontEndTrackingSupportThenReturnTrue) {
auto &l0GfxCoreHelper = getHelper<L0GfxCoreHelper>();
EXPECT_TRUE(l0GfxCoreHelper.platformSupportsFrontEndTracking());
}
XE3_CORETEST_F(L0GfxCoreHelperTestXe3, GivenXe3WhenCheckingL0HelperForPipelineSelectTrackingSupportThenReturnTrue) {
auto &l0GfxCoreHelper = getHelper<L0GfxCoreHelper>();
EXPECT_TRUE(l0GfxCoreHelper.platformSupportsPipelineSelectTracking());
}
XE3_CORETEST_F(L0GfxCoreHelperTestXe3, GivenXe3WhenCheckingL0HelperForStateBaseAddressTrackingSupportThenReturnFalse) {
auto &l0GfxCoreHelper = getHelper<L0GfxCoreHelper>();
EXPECT_FALSE(l0GfxCoreHelper.platformSupportsStateBaseAddressTracking(device->getNEODevice()->getRootDeviceEnvironment()));
}
XE3_CORETEST_F(L0GfxCoreHelperTestXe3, GivenXe3CoreWhenGettingPlatformDefaultHeapAddressModelThenReturnPrivateHeaps) {
auto &l0GfxCoreHelper = getHelper<L0GfxCoreHelper>();
EXPECT_EQ(NEO::HeapAddressModel::privateHeaps, l0GfxCoreHelper.getPlatformHeapAddressModel(device->getNEODevice()->getRootDeviceEnvironment()));
}
XE3_CORETEST_F(L0GfxCoreHelperTestXe3, GivenXe3CoreWhenCheckingL0HelperForCmdlistPrimaryBufferSupportThenReturnTrue) {
auto &l0GfxCoreHelper = getHelper<L0GfxCoreHelper>();
EXPECT_TRUE(l0GfxCoreHelper.platformSupportsPrimaryBatchBufferCmdList());
}
XE3_CORETEST_F(L0GfxCoreHelperTestXe3, GivenXe3WhenCheckingL0HelperForPlatformSupportsImmediateFlushTaskThenReturnTrue) {
auto &l0GfxCoreHelper = getHelper<L0GfxCoreHelper>();
EXPECT_TRUE(l0GfxCoreHelper.platformSupportsImmediateComputeFlushTask());
}
XE3_CORETEST_F(L0GfxCoreHelperTestXe3, GivenXe3CoreWhenGettingSupportedRTASFormatThenExpectedFormatIsReturned) {
const auto &l0GfxCoreHelper = getHelper<L0GfxCoreHelper>();
EXPECT_EQ(RTASDeviceFormatInternal::version2, static_cast<RTASDeviceFormatInternal>(l0GfxCoreHelper.getSupportedRTASFormat()));
}
XE3_CORETEST_F(L0GfxCoreHelperTestXe3, GivenXe3WhenGettingCmdlistUpdateCapabilityThenReturnCorrectValue) {
const auto &l0GfxCoreHelper = getHelper<L0GfxCoreHelper>();
EXPECT_EQ(127u, l0GfxCoreHelper.getPlatformCmdListUpdateCapabilities());
}
XE3_CORETEST_F(L0GfxCoreHelperTestXe3, GivenXe3WhenGetRegsetTypeForLargeGrfDetectionIsCalledThenSrRegsetTypeIsRetuned) {
auto &l0GfxCoreHelper = getHelper<L0GfxCoreHelper>();
EXPECT_EQ(ZET_DEBUG_REGSET_TYPE_SR_INTEL_GPU, l0GfxCoreHelper.getRegsetTypeForLargeGrfDetection());
}
XE3_CORETEST_F(L0GfxCoreHelperTestXe3, GivenXe3WhenGetGrfRegisterCountIsCalledThenCorrectMaskIsRetuned) {
auto &l0GfxCoreHelper = getHelper<L0GfxCoreHelper>();
std::vector<uint32_t> val{0, 0, 0, 0, 0, 0, 0, 0};
val[4] = 0xFFFFFFFF;
constexpr uint32_t expectedMask = 0x1FF;
EXPECT_EQ(expectedMask, l0GfxCoreHelper.getGrfRegisterCount(val.data()));
}
} // namespace ult
} // namespace L0

View File

@@ -0,0 +1,196 @@
/*
* Copyright (C) 2025 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#include "shared/source/kernel/kernel_properties.h"
#include "shared/test/common/mocks/mock_device.h"
#include "shared/test/common/test_macros/hw_test.h"
#include "level_zero/core/test/unit_tests/fixtures/device_fixture.h"
#include "level_zero/core/test/unit_tests/mocks/mock_device.h"
#include "level_zero/core/test/unit_tests/mocks/mock_kernel.h"
#include "level_zero/core/test/unit_tests/mocks/mock_module.h"
namespace L0 {
namespace ult {
using KernelPropertyTest = Test<DeviceFixture>;
HWTEST2_F(KernelPropertyTest, givenKernelExtendedPropertiesStructureWhenKernelPropertiesCalledThenPropertiesAreCorrectlySet, IsXe3Core) {
ze_device_module_properties_t kernelProperties = {};
ze_float_atomic_ext_properties_t kernelExtendedProperties = {};
kernelExtendedProperties.stype = ZE_STRUCTURE_TYPE_FLOAT_ATOMIC_EXT_PROPERTIES;
kernelProperties.pNext = &kernelExtendedProperties;
ze_result_t res = device->getKernelProperties(&kernelProperties);
EXPECT_EQ(res, ZE_RESULT_SUCCESS);
const auto &fp16Properties = kernelExtendedProperties.fp16Flags;
EXPECT_TRUE(fp16Properties & FpAtomicExtFlags::globalLoadStore);
EXPECT_TRUE(fp16Properties & FpAtomicExtFlags::localLoadStore);
EXPECT_TRUE(fp16Properties & FpAtomicExtFlags::globalMinMax);
EXPECT_TRUE(fp16Properties & FpAtomicExtFlags::localMinMax);
EXPECT_FALSE(fp16Properties & FpAtomicExtFlags::globalAdd);
EXPECT_FALSE(fp16Properties & FpAtomicExtFlags::localAdd);
const auto &fp32Properties = kernelExtendedProperties.fp32Flags;
EXPECT_TRUE(fp32Properties & FpAtomicExtFlags::globalLoadStore);
EXPECT_TRUE(fp32Properties & FpAtomicExtFlags::localLoadStore);
EXPECT_TRUE(fp32Properties & FpAtomicExtFlags::globalMinMax);
EXPECT_TRUE(fp32Properties & FpAtomicExtFlags::localMinMax);
EXPECT_TRUE(fp32Properties & FpAtomicExtFlags::globalAdd);
EXPECT_TRUE(fp32Properties & FpAtomicExtFlags::localAdd);
const auto &fp64Properties = kernelExtendedProperties.fp64Flags;
EXPECT_TRUE(fp64Properties & FpAtomicExtFlags::globalLoadStore);
EXPECT_TRUE(fp64Properties & FpAtomicExtFlags::localLoadStore);
EXPECT_TRUE(fp64Properties & FpAtomicExtFlags::globalMinMax);
EXPECT_TRUE(fp64Properties & FpAtomicExtFlags::localMinMax);
EXPECT_TRUE(fp64Properties & FpAtomicExtFlags::globalAdd);
EXPECT_TRUE(fp64Properties & FpAtomicExtFlags::localAdd);
}
using Xe3KernelSetupTests = ::testing::Test;
XE3_CORETEST_F(Xe3KernelSetupTests, givenParamsWhenSetupGroupSizeThenNumThreadsPerThreadGroupAreCorrectly) {
DebugManagerStateRestore restore;
VariableBackup<HardwareInfo> backupHwInfo(defaultHwInfo.get());
{
NEO::Device *mockNeoDevice(NEO::MockDevice::createWithNewExecutionEnvironment<NEO::MockDevice>(NEO::defaultHwInfo.get(), 0));
MockDeviceImp l0Device(mockNeoDevice, mockNeoDevice->getExecutionEnvironment());
Mock<KernelImp> kernel;
kernel.descriptor.kernelAttributes.numGrfRequired = 128u;
kernel.enableForcingOfGenerateLocalIdByHw = true;
Mock<Module> module(&l0Device, nullptr);
module.getMaxGroupSizeResult = UINT32_MAX;
kernel.module = &module;
std::array<std::array<uint32_t, 3>, 4> values = {{
{16u, 0u, 64u}, // SIMT Size, HW local-id generation, Max Num of threads
{16u, 1u, 64u},
{32u, 1u, 32u},
{32u, 0u, 64u},
}};
for (auto &[simtSize, isHwLocalIdGeneration, expectedNumThreadsPerThreadGroup] : values) {
kernel.descriptor.kernelAttributes.simdSize = simtSize;
kernel.forceGenerateLocalIdByHw = isHwLocalIdGeneration;
kernel.setGroupSize(1024u, 1024u, 1024u);
EXPECT_EQ(expectedNumThreadsPerThreadGroup, kernel.numThreadsPerThreadGroup);
kernel.groupSize[0] = kernel.groupSize[1] = kernel.groupSize[2] = 0;
}
}
{
NEO::Device *mockNeoDevice(NEO::MockDevice::createWithNewExecutionEnvironment<NEO::MockDevice>(NEO::defaultHwInfo.get(), 0));
MockDeviceImp l0Device(mockNeoDevice, mockNeoDevice->getExecutionEnvironment());
Mock<KernelImp> kernel;
kernel.descriptor.kernelAttributes.numGrfRequired = 160u;
kernel.enableForcingOfGenerateLocalIdByHw = true;
Mock<Module> module(&l0Device, nullptr);
module.getMaxGroupSizeResult = UINT32_MAX;
kernel.module = &module;
std::array<std::array<uint32_t, 3>, 4> values = {{
{16u, 0u, 48u}, // SIMT Size, HW local-id generation, Max Num of threads
{16u, 1u, 48u},
{32u, 1u, 32u},
{32u, 0u, 48u},
}};
for (auto &[simtSize, isHwLocalIdGeneration, expectedNumThreadsPerThreadGroup] : values) {
kernel.descriptor.kernelAttributes.simdSize = simtSize;
kernel.forceGenerateLocalIdByHw = isHwLocalIdGeneration;
kernel.setGroupSize(1024u, 1024u, 1024u);
EXPECT_EQ(expectedNumThreadsPerThreadGroup, kernel.numThreadsPerThreadGroup);
kernel.groupSize[0] = kernel.groupSize[1] = kernel.groupSize[2] = 0;
}
}
{
NEO::Device *mockNeoDevice(NEO::MockDevice::createWithNewExecutionEnvironment<NEO::MockDevice>(NEO::defaultHwInfo.get(), 0));
MockDeviceImp l0Device(mockNeoDevice, mockNeoDevice->getExecutionEnvironment());
Mock<KernelImp> kernel;
kernel.descriptor.kernelAttributes.numGrfRequired = 192u;
kernel.enableForcingOfGenerateLocalIdByHw = true;
Mock<Module> module(&l0Device, nullptr);
module.getMaxGroupSizeResult = UINT32_MAX;
kernel.module = &module;
std::array<std::array<uint32_t, 3>, 4> values = {{
{16u, 0u, 40u}, // SIMT Size, HW local-id generation, Max Num of threads
{16u, 1u, 40u},
{32u, 1u, 32u},
{32u, 0u, 40u},
}};
for (auto &[simtSize, isHwLocalIdGeneration, expectedNumThreadsPerThreadGroup] : values) {
kernel.descriptor.kernelAttributes.simdSize = simtSize;
kernel.forceGenerateLocalIdByHw = isHwLocalIdGeneration;
kernel.setGroupSize(1024u, 1024u, 1024u);
EXPECT_EQ(expectedNumThreadsPerThreadGroup, kernel.numThreadsPerThreadGroup);
kernel.groupSize[0] = kernel.groupSize[1] = kernel.groupSize[2] = 0;
}
}
{
NEO::Device *mockNeoDevice(NEO::MockDevice::createWithNewExecutionEnvironment<NEO::MockDevice>(NEO::defaultHwInfo.get(), 0));
MockDeviceImp l0Device(mockNeoDevice, mockNeoDevice->getExecutionEnvironment());
Mock<KernelImp> kernel;
kernel.descriptor.kernelAttributes.numGrfRequired = 256u;
kernel.enableForcingOfGenerateLocalIdByHw = true;
Mock<Module> module(&l0Device, nullptr);
module.getMaxGroupSizeResult = UINT32_MAX;
kernel.module = &module;
std::array<std::array<uint32_t, 3>, 4> values = {{
{16u, 0u, 32u}, // SIMT Size, HW local-id generation, Max Num of threads
{16u, 1u, 32u},
{32u, 1u, 32u},
{32u, 0u, 32u},
}};
for (auto &[simtSize, isHwLocalIdGeneration, expectedNumThreadsPerThreadGroup] : values) {
kernel.descriptor.kernelAttributes.simdSize = simtSize;
kernel.forceGenerateLocalIdByHw = isHwLocalIdGeneration;
kernel.setGroupSize(1024u, 1024u, 1024u);
EXPECT_EQ(expectedNumThreadsPerThreadGroup, kernel.numThreadsPerThreadGroup);
kernel.groupSize[0] = kernel.groupSize[1] = kernel.groupSize[2] = 0;
}
}
{
NEO::Device *mockNeoDevice(NEO::MockDevice::createWithNewExecutionEnvironment<NEO::MockDevice>(NEO::defaultHwInfo.get(), 0));
MockDeviceImp l0Device(mockNeoDevice, mockNeoDevice->getExecutionEnvironment());
Mock<KernelImp> kernel;
kernel.descriptor.kernelAttributes.numGrfRequired = 512u;
kernel.enableForcingOfGenerateLocalIdByHw = true;
Mock<Module> module(&l0Device, nullptr);
module.getMaxGroupSizeResult = UINT32_MAX;
kernel.module = &module;
std::array<std::array<uint32_t, 3>, 4> values = {{
{16u, 0u, 16u}, // SIMT Size, HW local-id generation, Max Num of threads
{16u, 1u, 16u},
{32u, 1u, 16u},
{32u, 0u, 16u},
}};
for (auto &[simtSize, isHwLocalIdGeneration, expectedNumThreadsPerThreadGroup] : values) {
kernel.descriptor.kernelAttributes.simdSize = simtSize;
kernel.forceGenerateLocalIdByHw = isHwLocalIdGeneration;
kernel.setGroupSize(1024u, 1024u, 1024u);
EXPECT_EQ(expectedNumThreadsPerThreadGroup, kernel.numThreadsPerThreadGroup);
kernel.groupSize[0] = kernel.groupSize[1] = kernel.groupSize[2] = 0;
}
}
}
} // namespace ult
} // namespace L0