performance: Set dispatch all for small TG

Resolves: NEO-11814

Signed-off-by: Lukasz Jobczyk <lukasz.jobczyk@intel.com>
This commit is contained in:
Lukasz Jobczyk
2024-09-27 09:54:41 +00:00
committed by Compute-Runtime-Automation
parent ef1b569a85
commit 0dc2870513
19 changed files with 110 additions and 69 deletions

View File

@@ -13,6 +13,7 @@ if(TESTS_XE2_HPG_CORE)
${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt
${CMAKE_CURRENT_SOURCE_DIR}/aub_command_stream_receiver_2_tests_xe2_hpg_core.cpp
${CMAKE_CURRENT_SOURCE_DIR}/compute_mode_tests_xe2_hpg_core.cpp
${CMAKE_CURRENT_SOURCE_DIR}/dispatch_walker_tests_xe2_hpg_core.cpp
${CMAKE_CURRENT_SOURCE_DIR}/gfx_core_helper_tests_xe2_hpg_core.cpp
${CMAKE_CURRENT_SOURCE_DIR}/hw_cmds_xe2_hpg_core_tests.cpp
${CMAKE_CURRENT_SOURCE_DIR}/image_surface_state_tests_xe2_hpg_core.cpp

View File

@@ -0,0 +1,77 @@
/*
* Copyright (C) 2024 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#include "shared/source/command_container/command_encoder.h"
#include "shared/source/kernel/kernel_descriptor.h"
#include "shared/source/os_interface/product_helper.h"
#include "shared/source/xe2_hpg_core/hw_cmds.h"
#include "shared/test/common/helpers/debug_manager_state_restore.h"
#include "shared/test/common/helpers/default_hw_info.h"
#include "shared/test/common/mocks/mock_execution_environment.h"
#include "shared/test/common/test_macros/header/per_product_test_definitions.h"
#include "shared/test/common/test_macros/test.h"
using namespace NEO;
using WalkerDispatchTestsXe2HpGCore = ::testing::Test;
XE2_HPG_CORETEST_F(WalkerDispatchTestsXe2HpGCore, whenEncodeAdditionalWalkerFieldsIsCalledThenComputeDispatchAllIsCorrectlySet) {
using COMPUTE_WALKER = typename FamilyType::COMPUTE_WALKER;
DebugManagerStateRestore debugRestorer;
auto walkerCmd = FamilyType::cmdInitGpgpuWalker;
MockExecutionEnvironment mockExecutionEnvironment{};
auto &rootDeviceEnvironment = *mockExecutionEnvironment.rootDeviceEnvironments[0];
KernelDescriptor kernelDescriptor;
EncodeWalkerArgs walkerArgs{KernelExecutionType::concurrent, true, kernelDescriptor, NEO::RequiredDispatchWalkOrder::none, 0, 113};
{
EncodeDispatchKernel<FamilyType>::encodeAdditionalWalkerFields(rootDeviceEnvironment, walkerCmd, &walkerCmd.getInterfaceDescriptor(), walkerArgs);
EXPECT_TRUE(walkerCmd.getComputeDispatchAllWalkerEnable());
}
auto backupCcsNumber = rootDeviceEnvironment.getNonLimitedNumberOfCcs();
rootDeviceEnvironment.setNonLimitedNumberOfCcs(2);
walkerArgs.kernelExecutionType = KernelExecutionType::defaultType;
{
walkerCmd.getInterfaceDescriptor().setThreadGroupDispatchSize(FamilyType::INTERFACE_DESCRIPTOR_DATA::THREAD_GROUP_DISPATCH_SIZE_TG_SIZE_1);
EncodeDispatchKernel<FamilyType>::encodeAdditionalWalkerFields(rootDeviceEnvironment, walkerCmd, &walkerCmd.getInterfaceDescriptor(), walkerArgs);
EXPECT_TRUE(walkerCmd.getComputeDispatchAllWalkerEnable());
}
{
walkerArgs.maxFrontEndThreads = 0u;
EncodeDispatchKernel<FamilyType>::encodeAdditionalWalkerFields(rootDeviceEnvironment, walkerCmd, &walkerCmd.getInterfaceDescriptor(), walkerArgs);
EXPECT_FALSE(walkerCmd.getComputeDispatchAllWalkerEnable());
}
{
walkerCmd.getInterfaceDescriptor().setThreadGroupDispatchSize(FamilyType::INTERFACE_DESCRIPTOR_DATA::THREAD_GROUP_DISPATCH_SIZE_TG_SIZE_2);
EncodeDispatchKernel<FamilyType>::encodeAdditionalWalkerFields(rootDeviceEnvironment, walkerCmd, &walkerCmd.getInterfaceDescriptor(), walkerArgs);
EXPECT_FALSE(walkerCmd.getComputeDispatchAllWalkerEnable());
}
{
walkerCmd.getInterfaceDescriptor().setThreadGroupDispatchSize(FamilyType::INTERFACE_DESCRIPTOR_DATA::THREAD_GROUP_DISPATCH_SIZE_TG_SIZE_1);
EncodeDispatchKernel<FamilyType>::template encodeAdditionalWalkerFields<COMPUTE_WALKER, typename FamilyType::INTERFACE_DESCRIPTOR_DATA>(rootDeviceEnvironment, walkerCmd, nullptr, walkerArgs);
EXPECT_FALSE(walkerCmd.getComputeDispatchAllWalkerEnable());
}
{
rootDeviceEnvironment.setNonLimitedNumberOfCcs(1);
EncodeDispatchKernel<FamilyType>::encodeAdditionalWalkerFields(rootDeviceEnvironment, walkerCmd, &walkerCmd.getInterfaceDescriptor(), walkerArgs);
EXPECT_FALSE(walkerCmd.getComputeDispatchAllWalkerEnable());
}
{
rootDeviceEnvironment.setNonLimitedNumberOfCcs(backupCcsNumber);
debugManager.flags.ComputeDispatchAllWalkerEnableInComputeWalker.set(1);
EncodeDispatchKernel<FamilyType>::encodeAdditionalWalkerFields(rootDeviceEnvironment, walkerCmd, &walkerCmd.getInterfaceDescriptor(), walkerArgs);
EXPECT_TRUE(walkerCmd.getComputeDispatchAllWalkerEnable());
}
}

View File

@@ -1,5 +1,5 @@
/*
* Copyright (C) 2021-2023 Intel Corporation
* Copyright (C) 2021-2024 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@@ -28,13 +28,13 @@ XE_HPC_CORETEST_F(WalkerDispatchTestsXeHpcCore, givenXeHpcWhenEncodeAdditionalWa
KernelDescriptor kernelDescriptor;
EncodeWalkerArgs walkerArgs{KernelExecutionType::defaultType, true, kernelDescriptor, NEO::RequiredDispatchWalkOrder::none, 0};
{
EncodeDispatchKernel<FamilyType>::encodeAdditionalWalkerFields(rootDeviceEnvironment, walkerCmd, walkerArgs);
EncodeDispatchKernel<FamilyType>::encodeAdditionalWalkerFields(rootDeviceEnvironment, walkerCmd, &walkerCmd.getInterfaceDescriptor(), walkerArgs);
EXPECT_FALSE(walkerCmd.getComputeDispatchAllWalkerEnable());
}
{
debugManager.flags.ComputeDispatchAllWalkerEnableInComputeWalker.set(1);
EncodeDispatchKernel<FamilyType>::encodeAdditionalWalkerFields(rootDeviceEnvironment, walkerCmd, walkerArgs);
EncodeDispatchKernel<FamilyType>::encodeAdditionalWalkerFields(rootDeviceEnvironment, walkerCmd, &walkerCmd.getInterfaceDescriptor(), walkerArgs);
EXPECT_TRUE(walkerCmd.getComputeDispatchAllWalkerEnable());
}
}

View File

@@ -1,5 +1,5 @@
/*
* Copyright (C) 2022-2023 Intel Corporation
* Copyright (C) 2022-2024 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@@ -53,7 +53,7 @@ PVCTEST_F(WalkerDispatchTestsPvc, givenPvcWhenEncodeAdditionalWalkerFieldsThenPo
testInput.programGlobalFenceAsPostSyncOperationInComputeWalker);
postSyncData.setSystemMemoryFenceRequest(false);
EncodeDispatchKernel<FamilyType>::encodeAdditionalWalkerFields(rootDeviceEnvironment, walkerCmd, walkerArgs);
EncodeDispatchKernel<FamilyType>::encodeAdditionalWalkerFields(rootDeviceEnvironment, walkerCmd, &walkerCmd.getInterfaceDescriptor(), walkerArgs);
EXPECT_EQ(testInput.expectSystemMemoryFenceRequest, postSyncData.getSystemMemoryFenceRequest());
}
}
@@ -75,7 +75,7 @@ PVCTEST_F(WalkerDispatchTestsPvc, givenPvcSupportsSystemMemoryFenceWhenNoSystemF
hwInfo.platform.usDeviceID = deviceId;
postSyncData.setSystemMemoryFenceRequest(true);
EncodeDispatchKernel<FamilyType>::encodeAdditionalWalkerFields(rootDeviceEnvironment, walkerCmd, walkerArgs);
EncodeDispatchKernel<FamilyType>::encodeAdditionalWalkerFields(rootDeviceEnvironment, walkerCmd, &walkerCmd.getInterfaceDescriptor(), walkerArgs);
EXPECT_FALSE(postSyncData.getSystemMemoryFenceRequest());
}
}

View File

@@ -1,5 +1,5 @@
/*
* Copyright (C) 2021-2023 Intel Corporation
* Copyright (C) 2021-2024 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@@ -86,7 +86,7 @@ DG2TEST_F(CommandEncodeDG2Test, whenProgramComputeWalkerThenApplyL3WAForDg2G10A0
hwInfo.ipVersion = compilerProductHelper.getHwIpVersion(hwInfo);
rootDeviceEnvironment.releaseHelper = ReleaseHelper::create(hwInfo.ipVersion);
EncodeDispatchKernel<FamilyType>::encodeAdditionalWalkerFields(rootDeviceEnvironment, walkerCmd, walkerArgs);
EncodeDispatchKernel<FamilyType>::encodeAdditionalWalkerFields(rootDeviceEnvironment, walkerCmd, &walkerCmd.getInterfaceDescriptor(), walkerArgs);
if (DG2::isG10(hwInfo) && revisionID < revIdB0) {
EXPECT_TRUE(walkerCmd.getL3PrefetchDisable());