From 8f85d4b8f81f597078caa150ee02965fded0c88b Mon Sep 17 00:00:00 2001 From: Michal Mrozek Date: Thu, 3 Feb 2022 09:07:39 +0000 Subject: [PATCH] Add debug variable to override message simd. Signed-off-by: Michal Mrozek --- .../gpgpu_walker_xehp_and_later.inl | 4 +++ .../dispatch_walker_tests_xehp_and_later.cpp | 25 +++++++++++++++++++ .../test/unit_test/test_files/igdrcl.config | 1 + .../command_encoder_xehp_and_later.inl | 4 +++ .../debug_settings/debug_variables_base.inl | 2 +- ..._encode_dispatch_kernel_xehp_and_later.cpp | 15 +++++++++++ 6 files changed, 50 insertions(+), 1 deletion(-) diff --git a/opencl/source/command_queue/gpgpu_walker_xehp_and_later.inl b/opencl/source/command_queue/gpgpu_walker_xehp_and_later.inl index 4d4aaaa753..ee1cfcb27f 100644 --- a/opencl/source/command_queue/gpgpu_walker_xehp_and_later.inl +++ b/opencl/source/command_queue/gpgpu_walker_xehp_and_later.inl @@ -53,6 +53,10 @@ size_t GpgpuWalkerHelper::setGpgpuWalkerThreadData( walkerCmd->setSimdSize(getSimdConfig(simd)); walkerCmd->setMessageSimd(walkerCmd->getSimdSize()); + if (DebugManager.flags.ForceSimdMessageSizeInWalker.get() != -1) { + walkerCmd->setMessageSimd(DebugManager.flags.ForceSimdMessageSizeInWalker.get()); + } + walkerCmd->setThreadGroupIdStartingX(static_cast(startWorkGroups[0])); walkerCmd->setThreadGroupIdStartingY(static_cast(startWorkGroups[1])); walkerCmd->setThreadGroupIdStartingZ(static_cast(startWorkGroups[2])); diff --git a/opencl/test/unit_test/command_queue/dispatch_walker_tests_xehp_and_later.cpp b/opencl/test/unit_test/command_queue/dispatch_walker_tests_xehp_and_later.cpp index 7307307d68..37b938df9e 100644 --- a/opencl/test/unit_test/command_queue/dispatch_walker_tests_xehp_and_later.cpp +++ b/opencl/test/unit_test/command_queue/dispatch_walker_tests_xehp_and_later.cpp @@ -243,6 +243,31 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, XeHPAndLaterDispatchWalkerBasicTest, givenDifferent } } +HWCMDTEST_F(IGFX_XE_HP_CORE, XeHPAndLaterDispatchWalkerBasicTest, givenDebugFlagWhenItIsSetThenMessageSimdIsOverwritten) { + DebugManagerStateRestore restorer; + DebugManager.flags.ForceSimdMessageSizeInWalker.set(1); + using COMPUTE_WALKER = typename FamilyType::COMPUTE_WALKER; + COMPUTE_WALKER *computeWalker = static_cast(linearStream.getSpace(sizeof(COMPUTE_WALKER))); + *computeWalker = FamilyType::cmdInitGpgpuWalker; + + kernel->kernelInfo.setLocalIds({0, 0, 1}); + localWorkSizesIn[2] = 16; + localWorkSizesIn[0] = localWorkSizesIn[1] = 1; + + uint32_t simdProgramming[3] = {32, 16, 8}; + bool walkerInput[4][2] = {{false, false}, {true, false}, {false, true}, {true, true}}; // {runtime local ids, inline data} + + for (uint32_t i = 0; i < 4; i++) { + for (uint32_t j = 0; j < 3; j++) { + *computeWalker = FamilyType::cmdInitGpgpuWalker; + GpgpuWalkerHelper::setGpgpuWalkerThreadData(computeWalker, kernel->kernelInfo.kernelDescriptor, globalOffsets, startWorkGroups, numWorkGroups, + localWorkSizesIn, simdProgramming[j], 2, + walkerInput[i][0], walkerInput[i][1], 0u); + EXPECT_EQ(1u, computeWalker->getMessageSimd()); + } + } +} + HWCMDTEST_F(IGFX_XE_HP_CORE, XeHPAndLaterDispatchWalkerBasicTest, givenWorkDimTwoWhenAskHwForLocalIdsThenExpectGenerationFieldsSet) { using COMPUTE_WALKER = typename FamilyType::COMPUTE_WALKER; COMPUTE_WALKER *computeWalker = static_cast(linearStream.getSpace(sizeof(COMPUTE_WALKER))); diff --git a/opencl/test/unit_test/test_files/igdrcl.config b/opencl/test/unit_test/test_files/igdrcl.config index c8e19534fd..d12712f688 100644 --- a/opencl/test/unit_test/test_files/igdrcl.config +++ b/opencl/test/unit_test/test_files/igdrcl.config @@ -370,3 +370,4 @@ Force2dImageAsArray = -1 ForceExtendedBufferSize = -1 MakeIndirectAllocationsResidentAsPack = -1 EnableChipsetUniqueUUID = -1 +ForceSimdMessageSizeInWalker = -1 diff --git a/shared/source/command_container/command_encoder_xehp_and_later.inl b/shared/source/command_container/command_encoder_xehp_and_later.inl index b33a302d4a..651c7ce6cb 100644 --- a/shared/source/command_container/command_encoder_xehp_and_later.inl +++ b/shared/source/command_container/command_encoder_xehp_and_later.inl @@ -419,6 +419,10 @@ void EncodeDispatchKernel::encodeThreadData(WALKER_TYPE &walkerCmd, walkerCmd.setMessageSimd(walkerCmd.getSimdSize()); + if (DebugManager.flags.ForceSimdMessageSizeInWalker.get() != -1) { + walkerCmd.setMessageSimd(DebugManager.flags.ForceSimdMessageSizeInWalker.get()); + } + //1) cross-thread inline data will be put into R1, but if kernel uses local ids, then cross-thread should be put further back //so whenever local ids are driver or hw generated, reserve space by setting right values for emitLocalIds //2) Auto-generation of local ids should be possible, when in fact local ids are used diff --git a/shared/source/debug_settings/debug_variables_base.inl b/shared/source/debug_settings/debug_variables_base.inl index 7adabf512c..763272a675 100644 --- a/shared/source/debug_settings/debug_variables_base.inl +++ b/shared/source/debug_settings/debug_variables_base.inl @@ -178,7 +178,7 @@ DECLARE_DEBUG_VARIABLE(int32_t, EngineUsageHint, -1, "-1: default, >=0: engine u DECLARE_DEBUG_VARIABLE(int32_t, ForceBcsEngineIndex, -1, "-1: default, >=0 Copy Engine index") DECLARE_DEBUG_VARIABLE(int32_t, Force2dImageAsArray, -1, "-1: default, 0: WA Disabled, 1: Forces surface state of 2dImage to array") DECLARE_DEBUG_VARIABLE(int32_t, ForceExtendedBufferSize, -1, "-1: default, 0: disabled, >=1: Forces extended buffer size by specify pageSize number in clCreateBuffer, clCreateBufferWithProperties and clCreateBufferWithPropertiesINTEL calls") - +DECLARE_DEBUG_VARIABLE(int32_t, ForceSimdMessageSizeInWalker, -1, "-1: default, >=0 Program given value in Walker command for SIMD size") /*LOGGING FLAGS*/ DECLARE_DEBUG_VARIABLE(int32_t, PrintDriverDiagnostics, -1, "prints driver diagnostics messages to standard output, value corresponds to hint level") DECLARE_DEBUG_VARIABLE(bool, PrintOsContextInitializations, false, "print initialized OsContexts to standard output") diff --git a/shared/test/unit_test/encoders/test_encode_dispatch_kernel_xehp_and_later.cpp b/shared/test/unit_test/encoders/test_encode_dispatch_kernel_xehp_and_later.cpp index 220ca03cef..b00349af16 100644 --- a/shared/test/unit_test/encoders/test_encode_dispatch_kernel_xehp_and_later.cpp +++ b/shared/test/unit_test/encoders/test_encode_dispatch_kernel_xehp_and_later.cpp @@ -810,6 +810,21 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerThreadTestXeHPAndLater, givenLocalIdGeneratio EXPECT_FALSE(walkerCmd.getEmitInlineParameter()); } +HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerThreadTestXeHPAndLater, givenDebugVariableToOverrideSimdMessageSizeWhenWalkerIsProgrammedItIsOverwritten) { + DebugManagerStateRestore restorer; + DebugManager.flags.ForceSimdMessageSizeInWalker.set(1); + + using WALKER_TYPE = typename FamilyType::WALKER_TYPE; + + WALKER_TYPE walkerCmd = FamilyType::cmdInitGpgpuWalker; + requiredWorkGroupOrder = 2u; + workGroupSizes[1] = workGroupSizes[2] = 2u; + + EncodeDispatchKernel::encodeThreadData(walkerCmd, nullptr, numWorkGroups, workGroupSizes, simd, localIdDimensions, + 0, 0, false, false, false, requiredWorkGroupOrder); + EXPECT_EQ(1u, walkerCmd.getMessageSimd()); +} + HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerThreadTestXeHPAndLater, WhenInlineDataIsTrueThenExpectInlineDataProgramming) { using WALKER_TYPE = typename FamilyType::WALKER_TYPE;