mirror of
https://github.com/intel/compute-runtime.git
synced 2025-09-10 12:53:42 +08:00
Adjust preffered wgs multiple for specific configs
Change-Id: Ib7e788760f0400b983e03044386f04637e12727e Signed-off-by: Kamil Kopryk <kamil.kopryk@intel.com> Related-To: NEO-4331
This commit is contained in:
@ -5,8 +5,13 @@
|
||||
*
|
||||
*/
|
||||
|
||||
#include "shared/source/gen12lp/hw_cmds.h"
|
||||
|
||||
using Family = NEO::TGLLPFamily;
|
||||
|
||||
#include "shared/source/helpers/flat_batch_buffer_helper_hw.inl"
|
||||
#include "shared/source/helpers/hw_helper_bdw_plus.inl"
|
||||
#include "shared/source/helpers/hw_helper_tgllp_plus.inl"
|
||||
|
||||
#include "opencl/source/aub/aub_helper_bdw_plus.inl"
|
||||
#include "opencl/source/gen12lp/helpers_gen12lp.h"
|
||||
@ -14,7 +19,6 @@
|
||||
#include "engine_node.h"
|
||||
|
||||
namespace NEO {
|
||||
typedef TGLLPFamily Family;
|
||||
|
||||
template <>
|
||||
bool HwHelperHw<Family>::isOffsetToSkipSetFFIDGPWARequired(const HardwareInfo &hwInfo) const {
|
||||
|
@ -581,7 +581,8 @@ cl_int Kernel::getWorkGroupInfo(cl_device_id device, cl_kernel_work_group_info p
|
||||
cl_ulong scratchSize;
|
||||
cl_ulong privateMemSize;
|
||||
size_t maxWorkgroupSize;
|
||||
|
||||
const auto &hwInfo = getDevice().getHardwareInfo();
|
||||
auto &hwHelper = HwHelper::get(hwInfo.platform.eRenderCoreFamily);
|
||||
GetInfoHelper info(paramValue, paramValueSize, paramValueSizeRet);
|
||||
|
||||
switch (paramName) {
|
||||
@ -612,6 +613,9 @@ cl_int Kernel::getWorkGroupInfo(cl_device_id device, cl_kernel_work_group_info p
|
||||
case CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE:
|
||||
DEBUG_BREAK_IF(!patchInfo.executionEnvironment);
|
||||
preferredWorkGroupSizeMultiple = patchInfo.executionEnvironment->LargestCompiledSIMDSize;
|
||||
if (hwHelper.isFusedEuDispatchEnabled(hwInfo)) {
|
||||
preferredWorkGroupSizeMultiple *= 2;
|
||||
}
|
||||
retVal = changeGetInfoStatusToCLResultType((info.set<size_t>(preferredWorkGroupSizeMultiple)));
|
||||
break;
|
||||
|
||||
|
@ -5,6 +5,8 @@
|
||||
*
|
||||
*/
|
||||
|
||||
#include "shared/test/unit_test/helpers/debug_manager_state_restore.h"
|
||||
|
||||
#include "opencl/test/unit_test/gen12lp/special_ult_helper_gen12lp.h"
|
||||
#include "opencl/test/unit_test/helpers/hw_helper_tests.h"
|
||||
#include "opencl/test/unit_test/mocks/mock_context.h"
|
||||
@ -144,6 +146,30 @@ GEN12LPTEST_F(HwHelperTestGen12Lp, givenFtrCcsNodeSetAndDefaultRcsWhenGetGpgpuEn
|
||||
EXPECT_EQ(aub_stream::ENGINE_CCS, engines[3]);
|
||||
}
|
||||
|
||||
GEN12LPTEST_F(HwHelperTestGen12Lp, givenTgllpWhenIsFusedEuDispatchEnabledIsCalledThenResultIsCorrect) {
|
||||
DebugManagerStateRestore restorer;
|
||||
auto &helper = HwHelper::get(renderCoreFamily);
|
||||
auto &waTable = hardwareInfo.workaroundTable;
|
||||
bool wa;
|
||||
int32_t debugKey;
|
||||
size_t expectedResult;
|
||||
|
||||
const std::array<std::tuple<bool, bool, int32_t>, 6> testParams{std::make_tuple(true, false, -1),
|
||||
std::make_tuple(false, true, -1),
|
||||
std::make_tuple(true, false, 0),
|
||||
std::make_tuple(true, true, 0),
|
||||
std::make_tuple(false, false, 1),
|
||||
std::make_tuple(false, true, 1)};
|
||||
|
||||
for (const auto ¶ms : testParams) {
|
||||
std::tie(expectedResult, wa, debugKey) = params;
|
||||
waTable.waDisableFusedThreadScheduling = wa;
|
||||
DebugManager.flags.CFEFusedEUDispatch.set(debugKey);
|
||||
|
||||
EXPECT_EQ(expectedResult, helper.isFusedEuDispatchEnabled(hardwareInfo));
|
||||
}
|
||||
}
|
||||
|
||||
class HwHelperTestsGen12LpBuffer : public ::testing::Test {
|
||||
public:
|
||||
void SetUp() override {
|
||||
|
@ -816,6 +816,14 @@ HWTEST_F(HwHelperTest, givenDefaultHwHelperHwWhenMinimalSIMDSizeIsQueriedThen8Is
|
||||
EXPECT_EQ(8u, helper.getMinimalSIMDSize());
|
||||
}
|
||||
|
||||
HWCMDTEST_F(IGFX_GEN8_CORE, HwHelperTest, WhenIsFusedEuDispatchEnabledIsCalledThenFalseIsReturned) {
|
||||
if (hardwareInfo.platform.eRenderCoreFamily == IGFX_GEN12LP_CORE) {
|
||||
GTEST_SKIP();
|
||||
}
|
||||
auto &helper = HwHelper::get(renderCoreFamily);
|
||||
EXPECT_FALSE(helper.isFusedEuDispatchEnabled(hardwareInfo));
|
||||
}
|
||||
|
||||
HWTEST_F(PipeControlHelperTests, WhenGettingPipeControSizeForCacheFlushThenReturnCorrectValue) {
|
||||
using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL;
|
||||
size_t actualSize = MemorySynchronizationCommands<FamilyType>::getSizeForFullCacheFlush();
|
||||
|
@ -39,7 +39,7 @@ DECLARE_DEBUG_VARIABLE(int32_t, SchedulerSimulationReturnInstance, 0, "prints ex
|
||||
DECLARE_DEBUG_VARIABLE(int32_t, SchedulerGWS, 0, "Forces gws of scheduler kernel, only multiple of 24 allowed or 0 - default selected")
|
||||
DECLARE_DEBUG_VARIABLE(int32_t, EnableExperimentalCommandBuffer, 0, "Enables injection of experimental command buffer")
|
||||
DECLARE_DEBUG_VARIABLE(int32_t, OverrideStatelessMocsIndex, -1, "-1: feature inactive, >=0 : following MOCS index will be programmed for stateless accesses in state base address")
|
||||
DECLARE_DEBUG_VARIABLE(int32_t, CFEFusedEUDispatch, -1, "Set Fused EU dispatch in FrontEnd State command. -1 - do not set")
|
||||
DECLARE_DEBUG_VARIABLE(int32_t, CFEFusedEUDispatch, -1, "Set Fused EU dispatch in FrontEnd State command. -1 - default, 0 - enabled, 1 - disabled")
|
||||
DECLARE_DEBUG_VARIABLE(int32_t, ForceAuxTranslationMode, -1, "-1: Default, 0: Builtin, 1: Blit")
|
||||
DECLARE_DEBUG_VARIABLE(int32_t, OverrideGpuAddressSpace, -1, "-1: Default, !=-1: GPU address space range in bits")
|
||||
DECLARE_DEBUG_VARIABLE(int32_t, OverrideMaxWorkgroupSize, -1, "-1: Default, !=-1: Overrides max worgkroup size to this value")
|
||||
|
@ -79,13 +79,12 @@ uint32_t PreambleHelper<TGLLPFamily>::getUrbEntryAllocationSize() {
|
||||
|
||||
template <>
|
||||
void PreambleHelper<TGLLPFamily>::programAdditionalFieldsInVfeState(VFE_STATE_TYPE *mediaVfeState, const HardwareInfo &hwInfo) {
|
||||
mediaVfeState->setDisableSlice0Subslice2(hwInfo.workaroundTable.waDisableFusedThreadScheduling);
|
||||
|
||||
if (DebugManager.flags.CFEFusedEUDispatch.get() != -1) {
|
||||
mediaVfeState->setDisableSlice0Subslice2(DebugManager.flags.CFEFusedEUDispatch.get());
|
||||
auto &hwHelper = HwHelper::get(hwInfo.platform.eRenderCoreFamily);
|
||||
if (!hwHelper.isFusedEuDispatchEnabled(hwInfo)) {
|
||||
mediaVfeState->setDisableSlice0Subslice2(true);
|
||||
}
|
||||
}
|
||||
// Explicitly instantiate PreambleHelper for TGLLP device family
|
||||
|
||||
// Explicitly instantiate PreambleHelper for TGLLP device family
|
||||
template struct PreambleHelper<TGLLPFamily>;
|
||||
} // namespace NEO
|
||||
|
@ -48,6 +48,7 @@ set(NEO_CORE_HELPERS
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/hw_helper_base.inl
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/hw_helper_bdw_plus.inl
|
||||
${CMAKE_CURRENT_SOURCE_DIR}${BRANCH_DIR_SUFFIX}/hw_helper_extended.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/hw_helper_tgllp_plus.inl
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/hw_info.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/hw_info.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/interlocked_max.h
|
||||
|
@ -79,6 +79,7 @@ class HwHelper {
|
||||
virtual bool isForceEmuInt32DivRemSPWARequired(const HardwareInfo &hwInfo) = 0;
|
||||
virtual uint32_t getMinimalSIMDSize() = 0;
|
||||
virtual bool isOffsetToSkipSetFFIDGPWARequired(const HardwareInfo &hwInfo) const = 0;
|
||||
virtual bool isFusedEuDispatchEnabled(const HardwareInfo &hwInfo) const = 0;
|
||||
|
||||
static uint32_t getSubDevicesCount(const HardwareInfo *pHwInfo);
|
||||
static uint32_t getEnginesCount(const HardwareInfo &hwInfo);
|
||||
@ -200,6 +201,8 @@ class HwHelperHw : public HwHelper {
|
||||
|
||||
bool isOffsetToSkipSetFFIDGPWARequired(const HardwareInfo &hwInfo) const override;
|
||||
|
||||
bool isFusedEuDispatchEnabled(const HardwareInfo &hwInfo) const override;
|
||||
|
||||
static bool isForceDefaultRCSEngineWARequired(const HardwareInfo &hwInfo);
|
||||
|
||||
bool isForceEmuInt32DivRemSPWARequired(const HardwareInfo &hwInfo) override;
|
||||
|
@ -306,6 +306,11 @@ uint32_t HwHelperHw<GfxFamily>::getMaxThreadsForWorkgroup(const HardwareInfo &hw
|
||||
return HwHelper::getMaxThreadsForWorkgroup(hwInfo, maxNumEUsPerSubSlice);
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
inline bool HwHelperHw<GfxFamily>::isFusedEuDispatchEnabled(const HardwareInfo &hwInfo) const {
|
||||
return false;
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
size_t MemorySynchronizationCommands<GfxFamily>::getSizeForFullCacheFlush() {
|
||||
return sizeof(typename GfxFamily::PIPE_CONTROL);
|
||||
|
19
shared/source/helpers/hw_helper_tgllp_plus.inl
Normal file
19
shared/source/helpers/hw_helper_tgllp_plus.inl
Normal file
@ -0,0 +1,19 @@
|
||||
/*
|
||||
* Copyright (C) 2020 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
namespace NEO {
|
||||
|
||||
template <>
|
||||
inline bool HwHelperHw<Family>::isFusedEuDispatchEnabled(const HardwareInfo &hwInfo) const {
|
||||
auto fusedEuDispatchEnabled = !hwInfo.workaroundTable.waDisableFusedThreadScheduling;
|
||||
if (DebugManager.flags.CFEFusedEUDispatch.get() != -1) {
|
||||
fusedEuDispatchEnabled = (DebugManager.flags.CFEFusedEUDispatch.get() == 0);
|
||||
}
|
||||
return fusedEuDispatchEnabled;
|
||||
}
|
||||
|
||||
} // namespace NEO
|
Reference in New Issue
Block a user