Adjust preffered wgs multiple for specific configs

Change-Id: Ib7e788760f0400b983e03044386f04637e12727e
Signed-off-by: Kamil Kopryk <kamil.kopryk@intel.com>
Related-To: NEO-4331
This commit is contained in:
Kamil Kopryk
2020-03-13 13:14:28 +01:00
committed by sys_ocldev
parent 8f2a29b55c
commit e55d4bf234
10 changed files with 77 additions and 8 deletions

View File

@ -5,8 +5,13 @@
*
*/
#include "shared/source/gen12lp/hw_cmds.h"
using Family = NEO::TGLLPFamily;
#include "shared/source/helpers/flat_batch_buffer_helper_hw.inl"
#include "shared/source/helpers/hw_helper_bdw_plus.inl"
#include "shared/source/helpers/hw_helper_tgllp_plus.inl"
#include "opencl/source/aub/aub_helper_bdw_plus.inl"
#include "opencl/source/gen12lp/helpers_gen12lp.h"
@ -14,7 +19,6 @@
#include "engine_node.h"
namespace NEO {
typedef TGLLPFamily Family;
template <>
bool HwHelperHw<Family>::isOffsetToSkipSetFFIDGPWARequired(const HardwareInfo &hwInfo) const {

View File

@ -581,7 +581,8 @@ cl_int Kernel::getWorkGroupInfo(cl_device_id device, cl_kernel_work_group_info p
cl_ulong scratchSize;
cl_ulong privateMemSize;
size_t maxWorkgroupSize;
const auto &hwInfo = getDevice().getHardwareInfo();
auto &hwHelper = HwHelper::get(hwInfo.platform.eRenderCoreFamily);
GetInfoHelper info(paramValue, paramValueSize, paramValueSizeRet);
switch (paramName) {
@ -612,6 +613,9 @@ cl_int Kernel::getWorkGroupInfo(cl_device_id device, cl_kernel_work_group_info p
case CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE:
DEBUG_BREAK_IF(!patchInfo.executionEnvironment);
preferredWorkGroupSizeMultiple = patchInfo.executionEnvironment->LargestCompiledSIMDSize;
if (hwHelper.isFusedEuDispatchEnabled(hwInfo)) {
preferredWorkGroupSizeMultiple *= 2;
}
retVal = changeGetInfoStatusToCLResultType((info.set<size_t>(preferredWorkGroupSizeMultiple)));
break;

View File

@ -5,6 +5,8 @@
*
*/
#include "shared/test/unit_test/helpers/debug_manager_state_restore.h"
#include "opencl/test/unit_test/gen12lp/special_ult_helper_gen12lp.h"
#include "opencl/test/unit_test/helpers/hw_helper_tests.h"
#include "opencl/test/unit_test/mocks/mock_context.h"
@ -144,6 +146,30 @@ GEN12LPTEST_F(HwHelperTestGen12Lp, givenFtrCcsNodeSetAndDefaultRcsWhenGetGpgpuEn
EXPECT_EQ(aub_stream::ENGINE_CCS, engines[3]);
}
GEN12LPTEST_F(HwHelperTestGen12Lp, givenTgllpWhenIsFusedEuDispatchEnabledIsCalledThenResultIsCorrect) {
DebugManagerStateRestore restorer;
auto &helper = HwHelper::get(renderCoreFamily);
auto &waTable = hardwareInfo.workaroundTable;
bool wa;
int32_t debugKey;
size_t expectedResult;
const std::array<std::tuple<bool, bool, int32_t>, 6> testParams{std::make_tuple(true, false, -1),
std::make_tuple(false, true, -1),
std::make_tuple(true, false, 0),
std::make_tuple(true, true, 0),
std::make_tuple(false, false, 1),
std::make_tuple(false, true, 1)};
for (const auto &params : testParams) {
std::tie(expectedResult, wa, debugKey) = params;
waTable.waDisableFusedThreadScheduling = wa;
DebugManager.flags.CFEFusedEUDispatch.set(debugKey);
EXPECT_EQ(expectedResult, helper.isFusedEuDispatchEnabled(hardwareInfo));
}
}
class HwHelperTestsGen12LpBuffer : public ::testing::Test {
public:
void SetUp() override {

View File

@ -816,6 +816,14 @@ HWTEST_F(HwHelperTest, givenDefaultHwHelperHwWhenMinimalSIMDSizeIsQueriedThen8Is
EXPECT_EQ(8u, helper.getMinimalSIMDSize());
}
HWCMDTEST_F(IGFX_GEN8_CORE, HwHelperTest, WhenIsFusedEuDispatchEnabledIsCalledThenFalseIsReturned) {
if (hardwareInfo.platform.eRenderCoreFamily == IGFX_GEN12LP_CORE) {
GTEST_SKIP();
}
auto &helper = HwHelper::get(renderCoreFamily);
EXPECT_FALSE(helper.isFusedEuDispatchEnabled(hardwareInfo));
}
HWTEST_F(PipeControlHelperTests, WhenGettingPipeControSizeForCacheFlushThenReturnCorrectValue) {
using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL;
size_t actualSize = MemorySynchronizationCommands<FamilyType>::getSizeForFullCacheFlush();

View File

@ -39,7 +39,7 @@ DECLARE_DEBUG_VARIABLE(int32_t, SchedulerSimulationReturnInstance, 0, "prints ex
DECLARE_DEBUG_VARIABLE(int32_t, SchedulerGWS, 0, "Forces gws of scheduler kernel, only multiple of 24 allowed or 0 - default selected")
DECLARE_DEBUG_VARIABLE(int32_t, EnableExperimentalCommandBuffer, 0, "Enables injection of experimental command buffer")
DECLARE_DEBUG_VARIABLE(int32_t, OverrideStatelessMocsIndex, -1, "-1: feature inactive, >=0 : following MOCS index will be programmed for stateless accesses in state base address")
DECLARE_DEBUG_VARIABLE(int32_t, CFEFusedEUDispatch, -1, "Set Fused EU dispatch in FrontEnd State command. -1 - do not set")
DECLARE_DEBUG_VARIABLE(int32_t, CFEFusedEUDispatch, -1, "Set Fused EU dispatch in FrontEnd State command. -1 - default, 0 - enabled, 1 - disabled")
DECLARE_DEBUG_VARIABLE(int32_t, ForceAuxTranslationMode, -1, "-1: Default, 0: Builtin, 1: Blit")
DECLARE_DEBUG_VARIABLE(int32_t, OverrideGpuAddressSpace, -1, "-1: Default, !=-1: GPU address space range in bits")
DECLARE_DEBUG_VARIABLE(int32_t, OverrideMaxWorkgroupSize, -1, "-1: Default, !=-1: Overrides max worgkroup size to this value")

View File

@ -79,13 +79,12 @@ uint32_t PreambleHelper<TGLLPFamily>::getUrbEntryAllocationSize() {
template <>
void PreambleHelper<TGLLPFamily>::programAdditionalFieldsInVfeState(VFE_STATE_TYPE *mediaVfeState, const HardwareInfo &hwInfo) {
mediaVfeState->setDisableSlice0Subslice2(hwInfo.workaroundTable.waDisableFusedThreadScheduling);
if (DebugManager.flags.CFEFusedEUDispatch.get() != -1) {
mediaVfeState->setDisableSlice0Subslice2(DebugManager.flags.CFEFusedEUDispatch.get());
auto &hwHelper = HwHelper::get(hwInfo.platform.eRenderCoreFamily);
if (!hwHelper.isFusedEuDispatchEnabled(hwInfo)) {
mediaVfeState->setDisableSlice0Subslice2(true);
}
}
// Explicitly instantiate PreambleHelper for TGLLP device family
// Explicitly instantiate PreambleHelper for TGLLP device family
template struct PreambleHelper<TGLLPFamily>;
} // namespace NEO

View File

@ -48,6 +48,7 @@ set(NEO_CORE_HELPERS
${CMAKE_CURRENT_SOURCE_DIR}/hw_helper_base.inl
${CMAKE_CURRENT_SOURCE_DIR}/hw_helper_bdw_plus.inl
${CMAKE_CURRENT_SOURCE_DIR}${BRANCH_DIR_SUFFIX}/hw_helper_extended.cpp
${CMAKE_CURRENT_SOURCE_DIR}/hw_helper_tgllp_plus.inl
${CMAKE_CURRENT_SOURCE_DIR}/hw_info.cpp
${CMAKE_CURRENT_SOURCE_DIR}/hw_info.h
${CMAKE_CURRENT_SOURCE_DIR}/interlocked_max.h

View File

@ -79,6 +79,7 @@ class HwHelper {
virtual bool isForceEmuInt32DivRemSPWARequired(const HardwareInfo &hwInfo) = 0;
virtual uint32_t getMinimalSIMDSize() = 0;
virtual bool isOffsetToSkipSetFFIDGPWARequired(const HardwareInfo &hwInfo) const = 0;
virtual bool isFusedEuDispatchEnabled(const HardwareInfo &hwInfo) const = 0;
static uint32_t getSubDevicesCount(const HardwareInfo *pHwInfo);
static uint32_t getEnginesCount(const HardwareInfo &hwInfo);
@ -200,6 +201,8 @@ class HwHelperHw : public HwHelper {
bool isOffsetToSkipSetFFIDGPWARequired(const HardwareInfo &hwInfo) const override;
bool isFusedEuDispatchEnabled(const HardwareInfo &hwInfo) const override;
static bool isForceDefaultRCSEngineWARequired(const HardwareInfo &hwInfo);
bool isForceEmuInt32DivRemSPWARequired(const HardwareInfo &hwInfo) override;

View File

@ -306,6 +306,11 @@ uint32_t HwHelperHw<GfxFamily>::getMaxThreadsForWorkgroup(const HardwareInfo &hw
return HwHelper::getMaxThreadsForWorkgroup(hwInfo, maxNumEUsPerSubSlice);
}
template <typename GfxFamily>
inline bool HwHelperHw<GfxFamily>::isFusedEuDispatchEnabled(const HardwareInfo &hwInfo) const {
return false;
}
template <typename GfxFamily>
size_t MemorySynchronizationCommands<GfxFamily>::getSizeForFullCacheFlush() {
return sizeof(typename GfxFamily::PIPE_CONTROL);

View File

@ -0,0 +1,19 @@
/*
* Copyright (C) 2020 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
namespace NEO {
template <>
inline bool HwHelperHw<Family>::isFusedEuDispatchEnabled(const HardwareInfo &hwInfo) const {
auto fusedEuDispatchEnabled = !hwInfo.workaroundTable.waDisableFusedThreadScheduling;
if (DebugManager.flags.CFEFusedEUDispatch.get() != -1) {
fusedEuDispatchEnabled = (DebugManager.flags.CFEFusedEUDispatch.get() == 0);
}
return fusedEuDispatchEnabled;
}
} // namespace NEO