diff --git a/opencl/source/helpers/hardware_commands_helper_base.inl b/opencl/source/helpers/hardware_commands_helper_base.inl index de8ffb2216..79286c84fd 100644 --- a/opencl/source/helpers/hardware_commands_helper_base.inl +++ b/opencl/source/helpers/hardware_commands_helper_base.inl @@ -197,7 +197,8 @@ size_t HardwareCommandsHelper::sendInterfaceDescriptorData( } EncodeDispatchKernel::encodeEuSchedulingPolicy(&interfaceDescriptor, kernelDescriptor, defaultPipelinedThreadArbitrationPolicy); const uint32_t threadGroupDimensions[] = {walkerCmd->getThreadGroupIdXDimension(), walkerCmd->getThreadGroupIdYDimension(), walkerCmd->getThreadGroupIdXDimension()}; - EncodeDispatchKernel::encodeThreadGroupDispatch(interfaceDescriptor, device, hardwareInfo, threadGroupDimensions, threadGroupCount, kernelDescriptor.kernelAttributes.numGrfRequired, threadsPerThreadGroup, *walkerCmd); + EncodeDispatchKernel::encodeThreadGroupDispatch(interfaceDescriptor, device, hardwareInfo, threadGroupDimensions, threadGroupCount, kernelDescriptor.kernelMetadata.requiredThreadGroupDispatchSize, + kernelDescriptor.kernelAttributes.numGrfRequired, threadsPerThreadGroup, *walkerCmd); *pInterfaceDescriptor = interfaceDescriptor; return (size_t)offsetInterfaceDescriptor; diff --git a/shared/source/command_container/command_encoder.h b/shared/source/command_container/command_encoder.h index 86caf35037..79c13bc1de 100644 --- a/shared/source/command_container/command_encoder.h +++ b/shared/source/command_container/command_encoder.h @@ -271,8 +271,8 @@ struct EncodeDispatchKernel : public EncodeDispatchKernelBase { template static void encodeThreadGroupDispatch(InterfaceDescriptorType &interfaceDescriptor, const Device &device, const HardwareInfo &hwInfo, - const uint32_t *threadGroupDimensions, const uint32_t threadGroupCount, const uint32_t grfCount, const uint32_t threadsPerThreadGroup, - WalkerType &walkerCmd); + const uint32_t *threadGroupDimensions, const uint32_t threadGroupCount, const uint32_t requiredThreadGroupDispatchSize, + const uint32_t grfCount, const uint32_t threadsPerThreadGroup, WalkerType &walkerCmd); template static void setWalkerRegionSettings(WalkerType &walkerCmd, const NEO::Device &device, uint32_t partitionCount, uint32_t workgroupSize, uint32_t threadGroupCount, uint32_t maxWgCountPerTile, bool requiredDispatchWalkOrder); diff --git a/shared/source/command_container/command_encoder_enablers.inl b/shared/source/command_container/command_encoder_enablers.inl index 0c1ee36460..e050cc6cf9 100644 --- a/shared/source/command_container/command_encoder_enablers.inl +++ b/shared/source/command_container/command_encoder_enablers.inl @@ -11,7 +11,7 @@ template struct NEO::EncodeDispatchKernel; template void NEO::EncodeDispatchKernel::encodeAdditionalWalkerFields(const RootDeviceEnvironment &rootDeviceEnvironment, Family::DefaultWalkerType &walkerCmd, const EncodeWalkerArgs &walkerArgs); template void NEO::EncodeDispatchKernel::setGrfInfo(Family::DefaultWalkerType::InterfaceDescriptorType *pInterfaceDescriptor, uint32_t grfCount, const size_t &sizeCrossThreadData, const size_t &sizePerThreadData, const RootDeviceEnvironment &rootDeviceEnvironment); template void NEO::EncodeDispatchKernel::setupPreferredSlmSize(Family::DefaultWalkerType::InterfaceDescriptorType *pInterfaceDescriptor, const RootDeviceEnvironment &rootDeviceEnvironment, const uint32_t threadsPerThreadGroup, uint32_t slmTotalSize, SlmPolicy slmPolicy); -template void NEO::EncodeDispatchKernel::encodeThreadGroupDispatch(Family::DefaultWalkerType::InterfaceDescriptorType &interfaceDescriptor, const Device &device, const HardwareInfo &hwInfo, const uint32_t *threadGroupDimensions, const uint32_t threadGroupCount, const uint32_t grfCount, const uint32_t threadsPerThreadGroup, Family::DefaultWalkerType &walkerCmd); +template void NEO::EncodeDispatchKernel::encodeThreadGroupDispatch(Family::DefaultWalkerType::InterfaceDescriptorType &interfaceDescriptor, const Device &device, const HardwareInfo &hwInfo, const uint32_t *threadGroupDimensions, const uint32_t threadGroupCount, const uint32_t requiredThreadGroupDispatchSize, const uint32_t grfCount, const uint32_t threadsPerThreadGroup, Family::DefaultWalkerType &walkerCmd); template void NEO::EncodeDispatchKernel::encode(CommandContainer &container, EncodeDispatchKernelArgs &args); template void NEO::EncodeDispatchKernel::encodeThreadData(Family::DefaultWalkerType &walkerCmd, const uint32_t *startWorkGroup, const uint32_t *numWorkGroups, const uint32_t *workGroupSizes, uint32_t simd, uint32_t localIdDimensions, uint32_t threadsPerThreadGroup, uint32_t threadExecutionMask, bool localIdsGenerationByRuntime, bool inlineDataProgrammingRequired, bool isIndirect, uint32_t requiredWorkGroupOrder, const RootDeviceEnvironment &rootDeviceEnvironment); template void NEO::EncodeDispatchKernel::adjustWalkOrder(Family::DefaultWalkerType &walkerCmd, uint32_t requiredWorkGroupOrder, const RootDeviceEnvironment &rootDeviceEnvironment); diff --git a/shared/source/command_container/command_encoder_xehp_and_later.inl b/shared/source/command_container/command_encoder_xehp_and_later.inl index d6c28b8ad4..a0ef1f498b 100644 --- a/shared/source/command_container/command_encoder_xehp_and_later.inl +++ b/shared/source/command_container/command_encoder_xehp_and_later.inl @@ -387,7 +387,8 @@ void EncodeDispatchKernel::encode(CommandContainer &container, EncodeDis walkerCmd.setPredicateEnable(args.isPredicate); auto threadGroupCount = walkerCmd.getThreadGroupIdXDimension() * walkerCmd.getThreadGroupIdYDimension() * walkerCmd.getThreadGroupIdZDimension(); - EncodeDispatchKernel::encodeThreadGroupDispatch(idd, *args.device, hwInfo, threadDimsVec, threadGroupCount, kernelDescriptor.kernelAttributes.numGrfRequired, threadsPerThreadGroup, walkerCmd); + EncodeDispatchKernel::encodeThreadGroupDispatch(idd, *args.device, hwInfo, threadDimsVec, threadGroupCount, + kernelDescriptor.kernelMetadata.requiredThreadGroupDispatchSize, kernelDescriptor.kernelAttributes.numGrfRequired, threadsPerThreadGroup, walkerCmd); if (debugManager.flags.PrintKernelDispatchParameters.get()) { fprintf(stdout, "kernel, %s, grfCount, %d, simdSize, %d, tilesCount, %d, implicitScaling, %s, threadGroupCount, %d, numberOfThreadsInGpgpuThreadGroup, %d, threadGroupDimensions, %d, %d, %d, threadGroupDispatchSize enum, %d\n", kernelDescriptor.kernelMetadata.kernelName.c_str(), @@ -1064,10 +1065,13 @@ void EncodeDispatchKernel::overrideDefaultValues(WalkerType &walkerCmd, template template void EncodeDispatchKernel::encodeThreadGroupDispatch(InterfaceDescriptorType &interfaceDescriptor, const Device &device, const HardwareInfo &hwInfo, - const uint32_t *threadGroupDimensions, const uint32_t threadGroupCount, const uint32_t grfCount, const uint32_t threadsPerThreadGroup, WalkerType &walkerCmd) { + const uint32_t *threadGroupDimensions, const uint32_t threadGroupCount, const uint32_t requiredThreadGroupDispatchSize, + const uint32_t grfCount, const uint32_t threadsPerThreadGroup, WalkerType &walkerCmd) { const auto &productHelper = device.getProductHelper(); - if (productHelper.isDisableOverdispatchAvailable(hwInfo)) { + if (requiredThreadGroupDispatchSize != 0) { + interfaceDescriptor.setThreadGroupDispatchSize(static_cast(requiredThreadGroupDispatchSize)); + } else if (productHelper.isDisableOverdispatchAvailable(hwInfo)) { interfaceDescriptor.setThreadGroupDispatchSize(InterfaceDescriptorType::THREAD_GROUP_DISPATCH_SIZE_TG_SIZE_1); bool adjustTGDispatchSize = true; diff --git a/shared/source/device_binary_format/zebin/zeinfo.h b/shared/source/device_binary_format/zebin/zeinfo.h index 155b2cbc49..c2e9c7d34b 100644 --- a/shared/source/device_binary_format/zebin/zeinfo.h +++ b/shared/source/device_binary_format/zebin/zeinfo.h @@ -80,6 +80,7 @@ inline constexpr ConstStringRef invalidKernel("invalid_kernel"); inline constexpr ConstStringRef vecTypeHint("vec_type_hint"); inline constexpr ConstStringRef workgroupSizeHint("work_group_size_hint"); inline constexpr ConstStringRef hintSuffix("_hint"); +inline constexpr ConstStringRef intelReqdThreadgroupDispatchSize("intel_reqd_thread_group_dispatch_size"); } // namespace Attributes namespace DebugEnv { @@ -459,12 +460,14 @@ using ReqdWorkgroupSizeT = std::array; using InvalidKernelT = ConstStringRef; using WorkgroupSizeHint = std::array; using VecTypeHintT = ConstStringRef; +using IntelReqdThreadgroupDispatchSizeT = int32_t; namespace Defaults { inline constexpr IntelReqdSubgroupSizeT intelReqdSubgroupSize = 0; inline constexpr IntelReqdWorkgroupWalkOrder intelReqdWorkgroupWalkOrder = {0, 0, 0}; inline constexpr ReqdWorkgroupSizeT reqdWorkgroupSize = {0, 0, 0}; inline constexpr WorkgroupSizeHint workgroupSizeHint = {0, 0, 0}; +inline constexpr IntelReqdThreadgroupDispatchSizeT intelReqdThreadgroupDispatchSize = 0; } // namespace Defaults struct AttributesBaseT { @@ -474,6 +477,7 @@ struct AttributesBaseT { std::optional invalidKernel; std::optional workgroupSizeHint; std::optional vecTypeHint; + std::optional intelReqdThreadgroupDispatchSize; std::vector> otherHints; }; } // namespace Attributes diff --git a/shared/source/device_binary_format/zebin/zeinfo_decoder.cpp b/shared/source/device_binary_format/zebin/zeinfo_decoder.cpp index 41c15b1257..8fba7350f2 100644 --- a/shared/source/device_binary_format/zebin/zeinfo_decoder.cpp +++ b/shared/source/device_binary_format/zebin/zeinfo_decoder.cpp @@ -801,6 +801,9 @@ DecodeError readZeInfoAttributes(const Yaml::YamlParser &parser, const Yaml::Nod outAttributes.invalidKernel = parser.readValue(attributesMetadataNd); } else if (key == Tags::Kernel::Attributes::vecTypeHint) { outAttributes.vecTypeHint = parser.readValue(attributesMetadataNd); + } else if (key == Tags::Kernel::Attributes::intelReqdThreadgroupDispatchSize) { + outAttributes.intelReqdThreadgroupDispatchSize = AttributeTypes::Defaults::intelReqdThreadgroupDispatchSize; + validAttributes &= readZeInfoValueChecked(parser, attributesMetadataNd, *outAttributes.intelReqdThreadgroupDispatchSize, context, outErrReason); } else if (key.contains(Tags::Kernel::Attributes::hintSuffix.data())) { outAttributes.otherHints.push_back({key, parser.readValue(attributesMetadataNd)}); } else { @@ -849,10 +852,12 @@ void populateKernelSourceAttributes(NEO::KernelDescriptor &dst, const KernelAttr appendAttributeIfSet(languageAttributes, AttributeTags::workgroupSizeHint, attributes.workgroupSizeHint); appendAttributeIfSet(languageAttributes, AttributeTags::vecTypeHint, attributes.vecTypeHint); appendAttributeIfSet(languageAttributes, AttributeTags::invalidKernel, attributes.invalidKernel); + appendAttributeIfSet(languageAttributes, AttributeTags::intelReqdThreadgroupDispatchSize, attributes.intelReqdThreadgroupDispatchSize); dst.kernelAttributes.flags.isInvalid = attributes.invalidKernel.has_value(); dst.kernelAttributes.flags.requiresWorkgroupWalkOrder = attributes.intelReqdWorkgroupWalkOrder.has_value(); dst.kernelMetadata.requiredSubGroupSize = static_cast(attributes.intelReqdSubgroupSize.value_or(0U)); + dst.kernelMetadata.requiredThreadGroupDispatchSize = static_cast(attributes.intelReqdThreadgroupDispatchSize.value_or(0U)); } DecodeError decodeZeInfoKernelDebugEnvironment(KernelDescriptor &dst, Yaml::YamlParser &parser, const ZeInfoKernelSections &kernelSections, std::string &outErrReason, std::string &outWarning) { diff --git a/shared/source/gen12lp/command_encoder_gen12lp.cpp b/shared/source/gen12lp/command_encoder_gen12lp.cpp index 2ab48164cd..308262cbdd 100644 --- a/shared/source/gen12lp/command_encoder_gen12lp.cpp +++ b/shared/source/gen12lp/command_encoder_gen12lp.cpp @@ -291,7 +291,7 @@ void EncodeDispatchKernel::encode(CommandContainer &container, EncodeDis cmd.setPredicateEnable(args.isPredicate); auto threadGroupCount = cmd.getThreadGroupIdXDimension() * cmd.getThreadGroupIdYDimension() * cmd.getThreadGroupIdZDimension(); - EncodeDispatchKernel::encodeThreadGroupDispatch(idd, *args.device, hwInfo, threadGroupDims, threadGroupCount, kernelDescriptor.kernelAttributes.numGrfRequired, numThreadsPerThreadGroup, cmd); + EncodeDispatchKernel::encodeThreadGroupDispatch(idd, *args.device, hwInfo, threadGroupDims, threadGroupCount, 0, kernelDescriptor.kernelAttributes.numGrfRequired, numThreadsPerThreadGroup, cmd); EncodeWalkerArgs walkerArgs{ .kernelExecutionType = KernelExecutionType::defaultType, @@ -669,8 +669,8 @@ void EncodeDispatchKernel::overrideDefaultValues(WalkerType &walkerCmd, template template void EncodeDispatchKernel::encodeThreadGroupDispatch(InterfaceDescriptorType &interfaceDescriptor, const Device &device, const HardwareInfo &hwInfo, - const uint32_t *threadGroupDimensions, const uint32_t threadGroupCount, const uint32_t grfCount, const uint32_t threadsPerThreadGroup, - WalkerType &walkerCmd) { + const uint32_t *threadGroupDimensions, const uint32_t threadGroupCount, const uint32_t requiredThreadGroupDispatchSize, + const uint32_t grfCount, const uint32_t threadsPerThreadGroup, WalkerType &walkerCmd) { } template diff --git a/shared/source/kernel/kernel_descriptor.h b/shared/source/kernel/kernel_descriptor.h index c5fdc20a3e..5778a97e6f 100644 --- a/shared/source/kernel/kernel_descriptor.h +++ b/shared/source/kernel/kernel_descriptor.h @@ -266,6 +266,7 @@ struct KernelDescriptor : NEO::NonCopyableAndNonMovableClass { uint16_t compiledSubGroupsNumber = 0U; uint8_t requiredSubGroupSize = 0U; + uint8_t requiredThreadGroupDispatchSize = 0U; bool isGeneratedByIgc = true; } kernelMetadata; diff --git a/shared/source/kernel/kernel_descriptor_from_patchtokens.cpp b/shared/source/kernel/kernel_descriptor_from_patchtokens.cpp index 8bb5589d2e..b94fbca599 100644 --- a/shared/source/kernel/kernel_descriptor_from_patchtokens.cpp +++ b/shared/source/kernel/kernel_descriptor_from_patchtokens.cpp @@ -126,6 +126,18 @@ void populateKernelDescriptor(KernelDescriptor &dst, const SPatchKernelAttribute } } + constexpr ConstStringRef attributeReqdThreadGroupDispatchSizeBeg = "intel_reqd_thread_group_dispatch_size("; + it = attributes.find(attributeReqdThreadGroupDispatchSizeBeg.begin()); + if (it != std::string::npos) { + it += attributeReqdThreadGroupDispatchSizeBeg.size(); + dst.kernelMetadata.requiredThreadGroupDispatchSize = 0U; + while ((attributes[it] >= '0') && (attributes[it] <= '9')) { + dst.kernelMetadata.requiredThreadGroupDispatchSize *= 10; + dst.kernelMetadata.requiredThreadGroupDispatchSize += attributes[it] - '0'; + ++it; + } + } + constexpr ConstStringRef invalidKernelAttrBeg = "invalid_kernel("; dst.kernelAttributes.flags.isInvalid = (attributes.find(invalidKernelAttrBeg.data()) != std::string::npos); } diff --git a/shared/source/xe_hpg_core/command_encoder_xe_hpg_core.cpp b/shared/source/xe_hpg_core/command_encoder_xe_hpg_core.cpp index b17a0cc13e..426ff86e6d 100644 --- a/shared/source/xe_hpg_core/command_encoder_xe_hpg_core.cpp +++ b/shared/source/xe_hpg_core/command_encoder_xe_hpg_core.cpp @@ -31,9 +31,12 @@ namespace NEO { template <> template void EncodeDispatchKernel::encodeThreadGroupDispatch(InterfaceDescriptorType &interfaceDescriptor, const Device &device, const HardwareInfo &hwInfo, - const uint32_t *threadGroupDimensions, const uint32_t threadGroupCount, const uint32_t grfCount, const uint32_t threadsPerThreadGroup, WalkerType &walkerCmd) { + const uint32_t *threadGroupDimensions, const uint32_t threadGroupCount, const uint32_t requiredThreadGroupDispatchSize, + const uint32_t grfCount, const uint32_t threadsPerThreadGroup, WalkerType &walkerCmd) { const auto &productHelper = device.getProductHelper(); - if (productHelper.isDisableOverdispatchAvailable(hwInfo)) { + if (requiredThreadGroupDispatchSize != 0) { + interfaceDescriptor.setThreadGroupDispatchSize(static_cast(requiredThreadGroupDispatchSize)); + } else if (productHelper.isDisableOverdispatchAvailable(hwInfo)) { if (threadsPerThreadGroup == 1) { interfaceDescriptor.setThreadGroupDispatchSize(static_cast(2u)); } else { diff --git a/shared/test/unit_test/device_binary_format/zebin_decoder_tests.cpp b/shared/test/unit_test/device_binary_format/zebin_decoder_tests.cpp index 24236436bb..01b37e1590 100644 --- a/shared/test/unit_test/device_binary_format/zebin_decoder_tests.cpp +++ b/shared/test/unit_test/device_binary_format/zebin_decoder_tests.cpp @@ -2094,6 +2094,7 @@ kernels: work_group_size_hint: [256, 2, 1] new_user_hint: new_user_hint_value invalid_kernel: invalid_kernel_reason + intel_reqd_thread_group_dispatch_size: 8 ... )==="; @@ -2125,6 +2126,7 @@ kernels: EXPECT_TRUE(equals(attributes.otherHints[0].first, "new_user_hint")); EXPECT_TRUE(equals(attributes.otherHints[0].second, "new_user_hint_value")); EXPECT_TRUE(equals(attributes.invalidKernel.value(), "invalid_kernel_reason")); + EXPECT_EQ(8, attributes.intelReqdThreadgroupDispatchSize.value()); } TEST(ReadZeInfoDebugEnvironment, givenSipSurfaceBtiEntryThenSetProperMembers) { @@ -2366,6 +2368,7 @@ kernels: intel_reqd_sub_group_size: 16 intel_reqd_workgroup_walk_order: [0, 1, 2] reqd_work_group_size: [256, 2, 1] + intel_reqd_thread_group_dispatch_size: 8 vec_type_hint: uint work_group_size_hint: [256, 2, 1] new_user_hint: new_user_hint_value @@ -2376,8 +2379,9 @@ kernels: EXPECT_TRUE(warnings.empty()) << warnings; EXPECT_TRUE(errors.empty()) << errors; - EXPECT_STREQ("new_user_hint(new_user_hint_value) intel_reqd_sub_group_size(16) intel_reqd_workgroup_walk_order(0,1,2) reqd_work_group_size(256,2,1) work_group_size_hint(256,2,1) vec_type_hint(uint)", kernelDescriptor->kernelMetadata.kernelLanguageAttributes.c_str()); + EXPECT_STREQ("new_user_hint(new_user_hint_value) intel_reqd_sub_group_size(16) intel_reqd_workgroup_walk_order(0,1,2) reqd_work_group_size(256,2,1) work_group_size_hint(256,2,1) vec_type_hint(uint) intel_reqd_thread_group_dispatch_size(8)", kernelDescriptor->kernelMetadata.kernelLanguageAttributes.c_str()); EXPECT_EQ(16U, kernelDescriptor->kernelMetadata.requiredSubGroupSize); + EXPECT_EQ(8U, kernelDescriptor->kernelMetadata.requiredThreadGroupDispatchSize); EXPECT_FALSE(kernelDescriptor->kernelAttributes.flags.isInvalid); } diff --git a/shared/test/unit_test/encoders/command_encoder_tests_dg2.cpp b/shared/test/unit_test/encoders/command_encoder_tests_dg2.cpp index 7cfd058137..d767fc2aa5 100644 --- a/shared/test/unit_test/encoders/command_encoder_tests_dg2.cpp +++ b/shared/test/unit_test/encoders/command_encoder_tests_dg2.cpp @@ -85,7 +85,7 @@ HWTEST2_F(DG2CommandEncoderTest, givenInterfaceDescriptorDataWhenForceThreadGrou for (auto numberOfThreadsInGroup : {1u, 4u, 16u}) { iddArg.setNumberOfThreadsInGpgpuThreadGroup(numberOfThreadsInGroup); - EncodeDispatchKernel::encodeThreadGroupDispatch(iddArg, *pDevice, hwInfo, threadGroups, 0, 0, numberOfThreadsInGroup, walkerCmd); + EncodeDispatchKernel::encodeThreadGroupDispatch(iddArg, *pDevice, hwInfo, threadGroups, 0, 0, 0, numberOfThreadsInGroup, walkerCmd); if (productHelper.isDisableOverdispatchAvailable(hwInfo)) { if (numberOfThreadsInGroup == 1) { diff --git a/shared/test/unit_test/encoders/test_encode_dispatch_kernel_pvc_and_later.cpp b/shared/test/unit_test/encoders/test_encode_dispatch_kernel_pvc_and_later.cpp index 5316d67184..05651ec6da 100644 --- a/shared/test/unit_test/encoders/test_encode_dispatch_kernel_pvc_and_later.cpp +++ b/shared/test/unit_test/encoders/test_encode_dispatch_kernel_pvc_and_later.cpp @@ -148,7 +148,7 @@ HWTEST2_F(CommandEncodeStatesTestPvcAndLater, givenDispatchSizeSmallerOrEqualToA uint32_t threadGroups[] = {walkerCmd.getThreadGroupIdXDimension(), walkerCmd.getThreadGroupIdYDimension(), walkerCmd.getThreadGroupIdZDimension()}; for (const auto threadGroupCount : {1u, 2u}) { - EncodeDispatchKernel::encodeThreadGroupDispatch(iddArg, *pDevice, hwInfo, threadGroups, threadGroupCount, numGrf, 1u, walkerCmd); + EncodeDispatchKernel::encodeThreadGroupDispatch(iddArg, *pDevice, hwInfo, threadGroups, threadGroupCount, 0u, numGrf, 1u, walkerCmd); EXPECT_EQ(InterfaceDescriptorType::THREAD_GROUP_DISPATCH_SIZE_TG_SIZE_1, iddArg.getThreadGroupDispatchSize()); } @@ -168,15 +168,16 @@ HWTEST2_F(CommandEncodeStatesTestPvcAndLater, givenMultipleTilesAndImplicitScali auto &gfxCoreHelper = pDevice->getGfxCoreHelper(); const uint32_t threadGroupCount = gfxCoreHelper.calculateAvailableThreadCount(hwInfo, numGrf) / 32u; uint32_t threadsPerThreadGroup = 64u; + const uint32_t requiredThreadGroupDispatchSize = 0u; iddArg.setNumberOfThreadsInGpgpuThreadGroup(threadsPerThreadGroup); uint32_t threadGroups[] = {walkerCmd.getThreadGroupIdXDimension(), walkerCmd.getThreadGroupIdYDimension(), walkerCmd.getThreadGroupIdZDimension()}; - EncodeDispatchKernel::encodeThreadGroupDispatch(iddArg, *pDevice, hwInfo, threadGroups, threadGroupCount, numGrf, threadsPerThreadGroup, walkerCmd); + EncodeDispatchKernel::encodeThreadGroupDispatch(iddArg, *pDevice, hwInfo, threadGroups, threadGroupCount, requiredThreadGroupDispatchSize, numGrf, threadsPerThreadGroup, walkerCmd); ASSERT_EQ(InterfaceDescriptorType::THREAD_GROUP_DISPATCH_SIZE_TG_SIZE_1, iddArg.getThreadGroupDispatchSize()); debugManager.flags.EnableWalkerPartition.set(1); pDevice->numSubDevices = 2; - EncodeDispatchKernel::encodeThreadGroupDispatch(iddArg, *pDevice, hwInfo, threadGroups, threadGroupCount, numGrf, threadsPerThreadGroup, walkerCmd); + EncodeDispatchKernel::encodeThreadGroupDispatch(iddArg, *pDevice, hwInfo, threadGroups, threadGroupCount, requiredThreadGroupDispatchSize, numGrf, threadsPerThreadGroup, walkerCmd); EXPECT_EQ(InterfaceDescriptorType::THREAD_GROUP_DISPATCH_SIZE_TG_SIZE_1, iddArg.getThreadGroupDispatchSize()); } @@ -190,6 +191,7 @@ HWTEST2_F(CommandEncodeStatesTestPvcAndLater, givenNumberOfThreadsInThreadGroupW InterfaceDescriptorType iddArg = FamilyType::template getInitInterfaceDescriptor(); const uint32_t threadGroupCount = 512u; + const uint32_t requiredThreadGroupDispatchSize = 0u; const uint32_t numGrf = GrfConfig::defaultGrfNumber; std::array, 3> testParams = {{{16u, InterfaceDescriptorType::THREAD_GROUP_DISPATCH_SIZE_TG_SIZE_8}, {32u, InterfaceDescriptorType::THREAD_GROUP_DISPATCH_SIZE_TG_SIZE_4}, @@ -198,7 +200,7 @@ HWTEST2_F(CommandEncodeStatesTestPvcAndLater, givenNumberOfThreadsInThreadGroupW for (const auto &[numberOfThreadsInThreadGroup, expectedThreadGroupDispatchSize] : testParams) { iddArg.setNumberOfThreadsInGpgpuThreadGroup(numberOfThreadsInThreadGroup); - EncodeDispatchKernel::encodeThreadGroupDispatch(iddArg, *pDevice, hwInfo, threadGroups, threadGroupCount, numGrf, numberOfThreadsInThreadGroup, walkerCmd); + EncodeDispatchKernel::encodeThreadGroupDispatch(iddArg, *pDevice, hwInfo, threadGroups, threadGroupCount, requiredThreadGroupDispatchSize, numGrf, numberOfThreadsInThreadGroup, walkerCmd); EXPECT_EQ(expectedThreadGroupDispatchSize, iddArg.getThreadGroupDispatchSize()); } @@ -214,13 +216,14 @@ HWTEST2_F(CommandEncodeStatesTestPvcAndLater, givenNumberOfThreadsInThreadGroupA InterfaceDescriptorType iddArg = FamilyType::template getInitInterfaceDescriptor(); const uint32_t threadGroupCount = 512u; + const uint32_t requiredThreadGroupDispatchSize = 0u; const uint32_t numGrf = GrfConfig::defaultGrfNumber; uint32_t threadsPerThreadGroup = 16; iddArg.setNumberOfThreadsInGpgpuThreadGroup(threadsPerThreadGroup); { uint32_t threadGroups[] = {walkerCmd.getThreadGroupIdXDimension(), walkerCmd.getThreadGroupIdYDimension(), walkerCmd.getThreadGroupIdZDimension()}; - EncodeDispatchKernel::encodeThreadGroupDispatch(iddArg, *pDevice, hwInfo, threadGroups, threadGroupCount, numGrf, threadsPerThreadGroup, walkerCmd); + EncodeDispatchKernel::encodeThreadGroupDispatch(iddArg, *pDevice, hwInfo, threadGroups, threadGroupCount, requiredThreadGroupDispatchSize, numGrf, threadsPerThreadGroup, walkerCmd); EXPECT_EQ(InterfaceDescriptorType::THREAD_GROUP_DISPATCH_SIZE_TG_SIZE_8, iddArg.getThreadGroupDispatchSize()); } walkerCmd.setThreadGroupIdYDimension(2); @@ -228,19 +231,19 @@ HWTEST2_F(CommandEncodeStatesTestPvcAndLater, givenNumberOfThreadsInThreadGroupA { walkerCmd.setThreadGroupIdXDimension(4); uint32_t threadGroups[] = {walkerCmd.getThreadGroupIdXDimension(), walkerCmd.getThreadGroupIdYDimension(), walkerCmd.getThreadGroupIdZDimension()}; - EncodeDispatchKernel::encodeThreadGroupDispatch(iddArg, *pDevice, hwInfo, threadGroups, threadGroupCount, numGrf, threadsPerThreadGroup, walkerCmd); + EncodeDispatchKernel::encodeThreadGroupDispatch(iddArg, *pDevice, hwInfo, threadGroups, threadGroupCount, requiredThreadGroupDispatchSize, numGrf, threadsPerThreadGroup, walkerCmd); EXPECT_EQ(InterfaceDescriptorType::THREAD_GROUP_DISPATCH_SIZE_TG_SIZE_4, iddArg.getThreadGroupDispatchSize()); } { walkerCmd.setThreadGroupIdXDimension(2); uint32_t threadGroups[] = {walkerCmd.getThreadGroupIdXDimension(), walkerCmd.getThreadGroupIdYDimension(), walkerCmd.getThreadGroupIdZDimension()}; - EncodeDispatchKernel::encodeThreadGroupDispatch(iddArg, *pDevice, hwInfo, threadGroups, threadGroupCount, numGrf, threadsPerThreadGroup, walkerCmd); + EncodeDispatchKernel::encodeThreadGroupDispatch(iddArg, *pDevice, hwInfo, threadGroups, threadGroupCount, requiredThreadGroupDispatchSize, numGrf, threadsPerThreadGroup, walkerCmd); EXPECT_EQ(InterfaceDescriptorType::THREAD_GROUP_DISPATCH_SIZE_TG_SIZE_2, iddArg.getThreadGroupDispatchSize()); } { walkerCmd.setThreadGroupIdXDimension(1); uint32_t threadGroups[] = {walkerCmd.getThreadGroupIdXDimension(), walkerCmd.getThreadGroupIdYDimension(), walkerCmd.getThreadGroupIdZDimension()}; - EncodeDispatchKernel::encodeThreadGroupDispatch(iddArg, *pDevice, hwInfo, threadGroups, threadGroupCount, numGrf, threadsPerThreadGroup, walkerCmd); + EncodeDispatchKernel::encodeThreadGroupDispatch(iddArg, *pDevice, hwInfo, threadGroups, threadGroupCount, requiredThreadGroupDispatchSize, numGrf, threadsPerThreadGroup, walkerCmd); EXPECT_EQ(InterfaceDescriptorType::THREAD_GROUP_DISPATCH_SIZE_TG_SIZE_8, iddArg.getThreadGroupDispatchSize()); } walkerCmd.setThreadGroupIdYDimension(1); @@ -248,19 +251,19 @@ HWTEST2_F(CommandEncodeStatesTestPvcAndLater, givenNumberOfThreadsInThreadGroupA { walkerCmd.setThreadGroupIdXDimension(4); uint32_t threadGroups[] = {walkerCmd.getThreadGroupIdXDimension(), walkerCmd.getThreadGroupIdYDimension(), walkerCmd.getThreadGroupIdZDimension()}; - EncodeDispatchKernel::encodeThreadGroupDispatch(iddArg, *pDevice, hwInfo, threadGroups, threadGroupCount, numGrf, threadsPerThreadGroup, walkerCmd); + EncodeDispatchKernel::encodeThreadGroupDispatch(iddArg, *pDevice, hwInfo, threadGroups, threadGroupCount, requiredThreadGroupDispatchSize, numGrf, threadsPerThreadGroup, walkerCmd); EXPECT_EQ(InterfaceDescriptorType::THREAD_GROUP_DISPATCH_SIZE_TG_SIZE_4, iddArg.getThreadGroupDispatchSize()); } { walkerCmd.setThreadGroupIdXDimension(2); uint32_t threadGroups[] = {walkerCmd.getThreadGroupIdXDimension(), walkerCmd.getThreadGroupIdYDimension(), walkerCmd.getThreadGroupIdZDimension()}; - EncodeDispatchKernel::encodeThreadGroupDispatch(iddArg, *pDevice, hwInfo, threadGroups, threadGroupCount, numGrf, threadsPerThreadGroup, walkerCmd); + EncodeDispatchKernel::encodeThreadGroupDispatch(iddArg, *pDevice, hwInfo, threadGroups, threadGroupCount, requiredThreadGroupDispatchSize, numGrf, threadsPerThreadGroup, walkerCmd); EXPECT_EQ(InterfaceDescriptorType::THREAD_GROUP_DISPATCH_SIZE_TG_SIZE_2, iddArg.getThreadGroupDispatchSize()); } { walkerCmd.setThreadGroupIdXDimension(1); uint32_t threadGroups[] = {walkerCmd.getThreadGroupIdXDimension(), walkerCmd.getThreadGroupIdYDimension(), walkerCmd.getThreadGroupIdZDimension()}; - EncodeDispatchKernel::encodeThreadGroupDispatch(iddArg, *pDevice, hwInfo, threadGroups, threadGroupCount, numGrf, threadsPerThreadGroup, walkerCmd); + EncodeDispatchKernel::encodeThreadGroupDispatch(iddArg, *pDevice, hwInfo, threadGroups, threadGroupCount, requiredThreadGroupDispatchSize, numGrf, threadsPerThreadGroup, walkerCmd); EXPECT_EQ(InterfaceDescriptorType::THREAD_GROUP_DISPATCH_SIZE_TG_SIZE_8, iddArg.getThreadGroupDispatchSize()); } walkerCmd.setThreadGroupIdYDimension(1); @@ -268,19 +271,19 @@ HWTEST2_F(CommandEncodeStatesTestPvcAndLater, givenNumberOfThreadsInThreadGroupA { walkerCmd.setThreadGroupIdXDimension(4); uint32_t threadGroups[] = {walkerCmd.getThreadGroupIdXDimension(), walkerCmd.getThreadGroupIdYDimension(), walkerCmd.getThreadGroupIdZDimension()}; - EncodeDispatchKernel::encodeThreadGroupDispatch(iddArg, *pDevice, hwInfo, threadGroups, threadGroupCount, numGrf, threadsPerThreadGroup, walkerCmd); + EncodeDispatchKernel::encodeThreadGroupDispatch(iddArg, *pDevice, hwInfo, threadGroups, threadGroupCount, requiredThreadGroupDispatchSize, numGrf, threadsPerThreadGroup, walkerCmd); EXPECT_EQ(InterfaceDescriptorType::THREAD_GROUP_DISPATCH_SIZE_TG_SIZE_8, iddArg.getThreadGroupDispatchSize()); } { walkerCmd.setThreadGroupIdXDimension(2); uint32_t threadGroups[] = {walkerCmd.getThreadGroupIdXDimension(), walkerCmd.getThreadGroupIdYDimension(), walkerCmd.getThreadGroupIdZDimension()}; - EncodeDispatchKernel::encodeThreadGroupDispatch(iddArg, *pDevice, hwInfo, threadGroups, threadGroupCount, numGrf, threadsPerThreadGroup, walkerCmd); + EncodeDispatchKernel::encodeThreadGroupDispatch(iddArg, *pDevice, hwInfo, threadGroups, threadGroupCount, requiredThreadGroupDispatchSize, numGrf, threadsPerThreadGroup, walkerCmd); EXPECT_EQ(InterfaceDescriptorType::THREAD_GROUP_DISPATCH_SIZE_TG_SIZE_8, iddArg.getThreadGroupDispatchSize()); } { walkerCmd.setThreadGroupIdXDimension(1); uint32_t threadGroups[] = {walkerCmd.getThreadGroupIdXDimension(), walkerCmd.getThreadGroupIdYDimension(), walkerCmd.getThreadGroupIdZDimension()}; - EncodeDispatchKernel::encodeThreadGroupDispatch(iddArg, *pDevice, hwInfo, threadGroups, threadGroupCount, numGrf, threadsPerThreadGroup, walkerCmd); + EncodeDispatchKernel::encodeThreadGroupDispatch(iddArg, *pDevice, hwInfo, threadGroups, threadGroupCount, requiredThreadGroupDispatchSize, numGrf, threadsPerThreadGroup, walkerCmd); EXPECT_EQ(InterfaceDescriptorType::THREAD_GROUP_DISPATCH_SIZE_TG_SIZE_8, iddArg.getThreadGroupDispatchSize()); } walkerCmd.setThreadGroupIdXDimension(1); @@ -288,19 +291,19 @@ HWTEST2_F(CommandEncodeStatesTestPvcAndLater, givenNumberOfThreadsInThreadGroupA { walkerCmd.setThreadGroupIdYDimension(4); uint32_t threadGroups[] = {walkerCmd.getThreadGroupIdXDimension(), walkerCmd.getThreadGroupIdYDimension(), walkerCmd.getThreadGroupIdZDimension()}; - EncodeDispatchKernel::encodeThreadGroupDispatch(iddArg, *pDevice, hwInfo, threadGroups, threadGroupCount, numGrf, threadsPerThreadGroup, walkerCmd); + EncodeDispatchKernel::encodeThreadGroupDispatch(iddArg, *pDevice, hwInfo, threadGroups, threadGroupCount, requiredThreadGroupDispatchSize, numGrf, threadsPerThreadGroup, walkerCmd); EXPECT_EQ(InterfaceDescriptorType::THREAD_GROUP_DISPATCH_SIZE_TG_SIZE_4, iddArg.getThreadGroupDispatchSize()); } { walkerCmd.setThreadGroupIdYDimension(2); uint32_t threadGroups[] = {walkerCmd.getThreadGroupIdXDimension(), walkerCmd.getThreadGroupIdYDimension(), walkerCmd.getThreadGroupIdZDimension()}; - EncodeDispatchKernel::encodeThreadGroupDispatch(iddArg, *pDevice, hwInfo, threadGroups, threadGroupCount, numGrf, threadsPerThreadGroup, walkerCmd); + EncodeDispatchKernel::encodeThreadGroupDispatch(iddArg, *pDevice, hwInfo, threadGroups, threadGroupCount, requiredThreadGroupDispatchSize, numGrf, threadsPerThreadGroup, walkerCmd); EXPECT_EQ(InterfaceDescriptorType::THREAD_GROUP_DISPATCH_SIZE_TG_SIZE_2, iddArg.getThreadGroupDispatchSize()); } { walkerCmd.setThreadGroupIdYDimension(1); uint32_t threadGroups[] = {walkerCmd.getThreadGroupIdXDimension(), walkerCmd.getThreadGroupIdYDimension(), walkerCmd.getThreadGroupIdZDimension()}; - EncodeDispatchKernel::encodeThreadGroupDispatch(iddArg, *pDevice, hwInfo, threadGroups, threadGroupCount, numGrf, threadsPerThreadGroup, walkerCmd); + EncodeDispatchKernel::encodeThreadGroupDispatch(iddArg, *pDevice, hwInfo, threadGroups, threadGroupCount, requiredThreadGroupDispatchSize, numGrf, threadsPerThreadGroup, walkerCmd); EXPECT_EQ(InterfaceDescriptorType::THREAD_GROUP_DISPATCH_SIZE_TG_SIZE_8, iddArg.getThreadGroupDispatchSize()); } } @@ -313,6 +316,7 @@ HWTEST2_F(CommandEncodeStatesTestPvcAndLater, givenDifferentNumGrfWhenCallingEnc InterfaceDescriptorType iddArg = FamilyType::template getInitInterfaceDescriptor(); const uint32_t numberOfThreadsInThreadGroup = 1u; + const uint32_t requiredThreadGroupDispatchSize = 0u; walkerCmd.setThreadGroupIdXDimension(1); walkerCmd.setThreadGroupIdYDimension(1); @@ -323,7 +327,7 @@ HWTEST2_F(CommandEncodeStatesTestPvcAndLater, givenDifferentNumGrfWhenCallingEnc const uint32_t threadGroupCount = 1; iddArg.setNumberOfThreadsInGpgpuThreadGroup(numberOfThreadsInThreadGroup); uint32_t threadGroups[] = {walkerCmd.getThreadGroupIdXDimension(), walkerCmd.getThreadGroupIdYDimension(), walkerCmd.getThreadGroupIdZDimension()}; - EncodeDispatchKernel::encodeThreadGroupDispatch(iddArg, *pDevice, hwInfo, threadGroups, threadGroupCount, numGrf, numberOfThreadsInThreadGroup, walkerCmd); + EncodeDispatchKernel::encodeThreadGroupDispatch(iddArg, *pDevice, hwInfo, threadGroups, threadGroupCount, requiredThreadGroupDispatchSize, numGrf, numberOfThreadsInThreadGroup, walkerCmd); ASSERT_EQ(InterfaceDescriptorType::THREAD_GROUP_DISPATCH_SIZE_TG_SIZE_1, iddArg.getThreadGroupDispatchSize()); } @@ -332,7 +336,7 @@ HWTEST2_F(CommandEncodeStatesTestPvcAndLater, givenDifferentNumGrfWhenCallingEnc const uint32_t threadGroupCount = 1; iddArg.setNumberOfThreadsInGpgpuThreadGroup(numberOfThreadsInThreadGroup); uint32_t threadGroups[] = {walkerCmd.getThreadGroupIdXDimension(), walkerCmd.getThreadGroupIdYDimension(), walkerCmd.getThreadGroupIdZDimension()}; - EncodeDispatchKernel::encodeThreadGroupDispatch(iddArg, *pDevice, hwInfo, threadGroups, threadGroupCount, numGrf, numberOfThreadsInThreadGroup, walkerCmd); + EncodeDispatchKernel::encodeThreadGroupDispatch(iddArg, *pDevice, hwInfo, threadGroups, threadGroupCount, requiredThreadGroupDispatchSize, numGrf, numberOfThreadsInThreadGroup, walkerCmd); EXPECT_EQ(InterfaceDescriptorType::THREAD_GROUP_DISPATCH_SIZE_TG_SIZE_1, iddArg.getThreadGroupDispatchSize()); } } @@ -348,6 +352,7 @@ HWTEST2_F(CommandEncodeStatesTestPvcAndLater, givenVariousDispatchParamtersWhenA mutableHwInfo->gtSystemInfo.ThreadCount = 4096u; auto hwInfo = pDevice->getHardwareInfo(); + const uint32_t requiredThreadGroupDispatchSize = 0u; uint32_t numGrf = GrfConfig::defaultGrfNumber; InterfaceDescriptorType iddArg = FamilyType::template getInitInterfaceDescriptor(); @@ -361,7 +366,7 @@ HWTEST2_F(CommandEncodeStatesTestPvcAndLater, givenVariousDispatchParamtersWhenA { const uint32_t threadGroupCount = walkerCmd.getThreadGroupIdXDimension() * walkerCmd.getThreadGroupIdYDimension() * walkerCmd.getThreadGroupIdZDimension(); uint32_t threadGroups[] = {walkerCmd.getThreadGroupIdXDimension(), walkerCmd.getThreadGroupIdYDimension(), walkerCmd.getThreadGroupIdZDimension()}; - EncodeDispatchKernel::encodeThreadGroupDispatch(iddArg, *pDevice, hwInfo, threadGroups, threadGroupCount, numGrf, numberOfThreadsInThreadGroup, walkerCmd); + EncodeDispatchKernel::encodeThreadGroupDispatch(iddArg, *pDevice, hwInfo, threadGroups, threadGroupCount, requiredThreadGroupDispatchSize, numGrf, numberOfThreadsInThreadGroup, walkerCmd); EXPECT_EQ(InterfaceDescriptorType::THREAD_GROUP_DISPATCH_SIZE_TG_SIZE_1, iddArg.getThreadGroupDispatchSize()); } @@ -373,7 +378,7 @@ HWTEST2_F(CommandEncodeStatesTestPvcAndLater, givenVariousDispatchParamtersWhenA { const uint32_t threadGroupCount = walkerCmd.getThreadGroupIdXDimension() * walkerCmd.getThreadGroupIdYDimension() * walkerCmd.getThreadGroupIdZDimension(); uint32_t threadGroups[] = {walkerCmd.getThreadGroupIdXDimension(), walkerCmd.getThreadGroupIdYDimension(), walkerCmd.getThreadGroupIdZDimension()}; - EncodeDispatchKernel::encodeThreadGroupDispatch(iddArg, *pDevice, hwInfo, threadGroups, threadGroupCount, numGrf, numberOfThreadsInThreadGroup, walkerCmd); + EncodeDispatchKernel::encodeThreadGroupDispatch(iddArg, *pDevice, hwInfo, threadGroups, threadGroupCount, requiredThreadGroupDispatchSize, numGrf, numberOfThreadsInThreadGroup, walkerCmd); EXPECT_EQ(InterfaceDescriptorType::THREAD_GROUP_DISPATCH_SIZE_TG_SIZE_4, iddArg.getThreadGroupDispatchSize()); } @@ -386,7 +391,7 @@ HWTEST2_F(CommandEncodeStatesTestPvcAndLater, givenVariousDispatchParamtersWhenA { const uint32_t threadGroupCount = walkerCmd.getThreadGroupIdXDimension() * walkerCmd.getThreadGroupIdYDimension() * walkerCmd.getThreadGroupIdZDimension(); uint32_t threadGroups[] = {walkerCmd.getThreadGroupIdXDimension(), walkerCmd.getThreadGroupIdYDimension(), walkerCmd.getThreadGroupIdZDimension()}; - EncodeDispatchKernel::encodeThreadGroupDispatch(iddArg, *pDevice, hwInfo, threadGroups, threadGroupCount, numGrf, numberOfThreadsInThreadGroup, walkerCmd); + EncodeDispatchKernel::encodeThreadGroupDispatch(iddArg, *pDevice, hwInfo, threadGroups, threadGroupCount, requiredThreadGroupDispatchSize, numGrf, numberOfThreadsInThreadGroup, walkerCmd); EXPECT_EQ(InterfaceDescriptorType::THREAD_GROUP_DISPATCH_SIZE_TG_SIZE_1, iddArg.getThreadGroupDispatchSize()); } @@ -399,7 +404,7 @@ HWTEST2_F(CommandEncodeStatesTestPvcAndLater, givenVariousDispatchParamtersWhenA { const uint32_t threadGroupCount = walkerCmd.getThreadGroupIdXDimension() * walkerCmd.getThreadGroupIdYDimension() * walkerCmd.getThreadGroupIdZDimension(); uint32_t threadGroups[] = {walkerCmd.getThreadGroupIdXDimension(), walkerCmd.getThreadGroupIdYDimension(), walkerCmd.getThreadGroupIdZDimension()}; - EncodeDispatchKernel::encodeThreadGroupDispatch(iddArg, *pDevice, hwInfo, threadGroups, threadGroupCount, numGrf, numberOfThreadsInThreadGroup, walkerCmd); + EncodeDispatchKernel::encodeThreadGroupDispatch(iddArg, *pDevice, hwInfo, threadGroups, threadGroupCount, requiredThreadGroupDispatchSize, numGrf, numberOfThreadsInThreadGroup, walkerCmd); EXPECT_EQ(InterfaceDescriptorType::THREAD_GROUP_DISPATCH_SIZE_TG_SIZE_8, iddArg.getThreadGroupDispatchSize()); } @@ -412,7 +417,7 @@ HWTEST2_F(CommandEncodeStatesTestPvcAndLater, givenVariousDispatchParamtersWhenA { const uint32_t threadGroupCount = walkerCmd.getThreadGroupIdXDimension() * walkerCmd.getThreadGroupIdYDimension() * walkerCmd.getThreadGroupIdZDimension(); uint32_t threadGroups[] = {walkerCmd.getThreadGroupIdXDimension(), walkerCmd.getThreadGroupIdYDimension(), walkerCmd.getThreadGroupIdZDimension()}; - EncodeDispatchKernel::encodeThreadGroupDispatch(iddArg, *pDevice, hwInfo, threadGroups, threadGroupCount, numGrf, numberOfThreadsInThreadGroup, walkerCmd); + EncodeDispatchKernel::encodeThreadGroupDispatch(iddArg, *pDevice, hwInfo, threadGroups, threadGroupCount, requiredThreadGroupDispatchSize, numGrf, numberOfThreadsInThreadGroup, walkerCmd); EXPECT_EQ(InterfaceDescriptorType::THREAD_GROUP_DISPATCH_SIZE_TG_SIZE_2, iddArg.getThreadGroupDispatchSize()); } @@ -425,7 +430,7 @@ HWTEST2_F(CommandEncodeStatesTestPvcAndLater, givenVariousDispatchParamtersWhenA { const uint32_t threadGroupCount = walkerCmd.getThreadGroupIdXDimension() * walkerCmd.getThreadGroupIdYDimension() * walkerCmd.getThreadGroupIdZDimension(); uint32_t threadGroups[] = {walkerCmd.getThreadGroupIdXDimension(), walkerCmd.getThreadGroupIdYDimension(), walkerCmd.getThreadGroupIdZDimension()}; - EncodeDispatchKernel::encodeThreadGroupDispatch(iddArg, *pDevice, hwInfo, threadGroups, threadGroupCount, numGrf, numberOfThreadsInThreadGroup, walkerCmd); + EncodeDispatchKernel::encodeThreadGroupDispatch(iddArg, *pDevice, hwInfo, threadGroups, threadGroupCount, requiredThreadGroupDispatchSize, numGrf, numberOfThreadsInThreadGroup, walkerCmd); EXPECT_EQ(InterfaceDescriptorType::THREAD_GROUP_DISPATCH_SIZE_TG_SIZE_8, iddArg.getThreadGroupDispatchSize()); } @@ -438,7 +443,7 @@ HWTEST2_F(CommandEncodeStatesTestPvcAndLater, givenVariousDispatchParamtersWhenA { const uint32_t threadGroupCount = walkerCmd.getThreadGroupIdXDimension() * walkerCmd.getThreadGroupIdYDimension() * walkerCmd.getThreadGroupIdZDimension(); uint32_t threadGroups[] = {walkerCmd.getThreadGroupIdXDimension(), walkerCmd.getThreadGroupIdYDimension(), walkerCmd.getThreadGroupIdZDimension()}; - EncodeDispatchKernel::encodeThreadGroupDispatch(iddArg, *pDevice, hwInfo, threadGroups, threadGroupCount, numGrf, numberOfThreadsInThreadGroup, walkerCmd); + EncodeDispatchKernel::encodeThreadGroupDispatch(iddArg, *pDevice, hwInfo, threadGroups, threadGroupCount, requiredThreadGroupDispatchSize, numGrf, numberOfThreadsInThreadGroup, walkerCmd); EXPECT_EQ(InterfaceDescriptorType::THREAD_GROUP_DISPATCH_SIZE_TG_SIZE_2, iddArg.getThreadGroupDispatchSize()); } @@ -451,7 +456,7 @@ HWTEST2_F(CommandEncodeStatesTestPvcAndLater, givenVariousDispatchParamtersWhenA { const uint32_t threadGroupCount = walkerCmd.getThreadGroupIdXDimension() * walkerCmd.getThreadGroupIdYDimension() * walkerCmd.getThreadGroupIdZDimension(); uint32_t threadGroups[] = {walkerCmd.getThreadGroupIdXDimension(), walkerCmd.getThreadGroupIdYDimension(), walkerCmd.getThreadGroupIdZDimension()}; - EncodeDispatchKernel::encodeThreadGroupDispatch(iddArg, *pDevice, hwInfo, threadGroups, threadGroupCount, numGrf, numberOfThreadsInThreadGroup, walkerCmd); + EncodeDispatchKernel::encodeThreadGroupDispatch(iddArg, *pDevice, hwInfo, threadGroups, threadGroupCount, requiredThreadGroupDispatchSize, numGrf, numberOfThreadsInThreadGroup, walkerCmd); EXPECT_EQ(InterfaceDescriptorType::THREAD_GROUP_DISPATCH_SIZE_TG_SIZE_1, iddArg.getThreadGroupDispatchSize()); } @@ -464,7 +469,7 @@ HWTEST2_F(CommandEncodeStatesTestPvcAndLater, givenVariousDispatchParamtersWhenA { const uint32_t threadGroupCount = walkerCmd.getThreadGroupIdXDimension() * walkerCmd.getThreadGroupIdYDimension() * walkerCmd.getThreadGroupIdZDimension(); uint32_t threadGroups[] = {walkerCmd.getThreadGroupIdXDimension(), walkerCmd.getThreadGroupIdYDimension(), walkerCmd.getThreadGroupIdZDimension()}; - EncodeDispatchKernel::encodeThreadGroupDispatch(iddArg, *pDevice, hwInfo, threadGroups, threadGroupCount, numGrf, numberOfThreadsInThreadGroup, walkerCmd); + EncodeDispatchKernel::encodeThreadGroupDispatch(iddArg, *pDevice, hwInfo, threadGroups, threadGroupCount, requiredThreadGroupDispatchSize, numGrf, numberOfThreadsInThreadGroup, walkerCmd); EXPECT_EQ(InterfaceDescriptorType::THREAD_GROUP_DISPATCH_SIZE_TG_SIZE_2, iddArg.getThreadGroupDispatchSize()); } @@ -477,7 +482,7 @@ HWTEST2_F(CommandEncodeStatesTestPvcAndLater, givenVariousDispatchParamtersWhenA { const uint32_t threadGroupCount = walkerCmd.getThreadGroupIdXDimension() * walkerCmd.getThreadGroupIdYDimension() * walkerCmd.getThreadGroupIdZDimension(); uint32_t threadGroups[] = {walkerCmd.getThreadGroupIdXDimension(), walkerCmd.getThreadGroupIdYDimension(), walkerCmd.getThreadGroupIdZDimension()}; - EncodeDispatchKernel::encodeThreadGroupDispatch(iddArg, *pDevice, hwInfo, threadGroups, threadGroupCount, numGrf, numberOfThreadsInThreadGroup, walkerCmd); + EncodeDispatchKernel::encodeThreadGroupDispatch(iddArg, *pDevice, hwInfo, threadGroups, threadGroupCount, requiredThreadGroupDispatchSize, numGrf, numberOfThreadsInThreadGroup, walkerCmd); EXPECT_EQ(InterfaceDescriptorType::THREAD_GROUP_DISPATCH_SIZE_TG_SIZE_4, iddArg.getThreadGroupDispatchSize()); } @@ -490,7 +495,7 @@ HWTEST2_F(CommandEncodeStatesTestPvcAndLater, givenVariousDispatchParamtersWhenA { const uint32_t threadGroupCount = walkerCmd.getThreadGroupIdXDimension() * walkerCmd.getThreadGroupIdYDimension() * walkerCmd.getThreadGroupIdZDimension(); uint32_t threadGroups[] = {walkerCmd.getThreadGroupIdXDimension(), walkerCmd.getThreadGroupIdYDimension(), walkerCmd.getThreadGroupIdZDimension()}; - EncodeDispatchKernel::encodeThreadGroupDispatch(iddArg, *pDevice, hwInfo, threadGroups, threadGroupCount, numGrf, numberOfThreadsInThreadGroup, walkerCmd); + EncodeDispatchKernel::encodeThreadGroupDispatch(iddArg, *pDevice, hwInfo, threadGroups, threadGroupCount, requiredThreadGroupDispatchSize, numGrf, numberOfThreadsInThreadGroup, walkerCmd); EXPECT_EQ(InterfaceDescriptorType::THREAD_GROUP_DISPATCH_SIZE_TG_SIZE_2, iddArg.getThreadGroupDispatchSize()); } @@ -503,7 +508,7 @@ HWTEST2_F(CommandEncodeStatesTestPvcAndLater, givenVariousDispatchParamtersWhenA { const uint32_t threadGroupCount = walkerCmd.getThreadGroupIdXDimension() * walkerCmd.getThreadGroupIdYDimension() * walkerCmd.getThreadGroupIdZDimension(); uint32_t threadGroups[] = {walkerCmd.getThreadGroupIdXDimension(), walkerCmd.getThreadGroupIdYDimension(), walkerCmd.getThreadGroupIdZDimension()}; - EncodeDispatchKernel::encodeThreadGroupDispatch(iddArg, *pDevice, hwInfo, threadGroups, threadGroupCount, numGrf, numberOfThreadsInThreadGroup, walkerCmd); + EncodeDispatchKernel::encodeThreadGroupDispatch(iddArg, *pDevice, hwInfo, threadGroups, threadGroupCount, requiredThreadGroupDispatchSize, numGrf, numberOfThreadsInThreadGroup, walkerCmd); EXPECT_EQ(InterfaceDescriptorType::THREAD_GROUP_DISPATCH_SIZE_TG_SIZE_2, iddArg.getThreadGroupDispatchSize()); } @@ -516,7 +521,7 @@ HWTEST2_F(CommandEncodeStatesTestPvcAndLater, givenVariousDispatchParamtersWhenA { const uint32_t threadGroupCount = walkerCmd.getThreadGroupIdXDimension() * walkerCmd.getThreadGroupIdYDimension() * walkerCmd.getThreadGroupIdZDimension(); uint32_t threadGroups[] = {walkerCmd.getThreadGroupIdXDimension(), walkerCmd.getThreadGroupIdYDimension(), walkerCmd.getThreadGroupIdZDimension()}; - EncodeDispatchKernel::encodeThreadGroupDispatch(iddArg, *pDevice, hwInfo, threadGroups, threadGroupCount, numGrf, numberOfThreadsInThreadGroup, walkerCmd); + EncodeDispatchKernel::encodeThreadGroupDispatch(iddArg, *pDevice, hwInfo, threadGroups, threadGroupCount, requiredThreadGroupDispatchSize, numGrf, numberOfThreadsInThreadGroup, walkerCmd); EXPECT_EQ(InterfaceDescriptorType::THREAD_GROUP_DISPATCH_SIZE_TG_SIZE_1, iddArg.getThreadGroupDispatchSize()); } @@ -529,7 +534,7 @@ HWTEST2_F(CommandEncodeStatesTestPvcAndLater, givenVariousDispatchParamtersWhenA { const uint32_t threadGroupCount = walkerCmd.getThreadGroupIdXDimension() * walkerCmd.getThreadGroupIdYDimension() * walkerCmd.getThreadGroupIdZDimension(); uint32_t threadGroups[] = {walkerCmd.getThreadGroupIdXDimension(), walkerCmd.getThreadGroupIdYDimension(), walkerCmd.getThreadGroupIdZDimension()}; - EncodeDispatchKernel::encodeThreadGroupDispatch(iddArg, *pDevice, hwInfo, threadGroups, threadGroupCount, numGrf, numberOfThreadsInThreadGroup, walkerCmd); + EncodeDispatchKernel::encodeThreadGroupDispatch(iddArg, *pDevice, hwInfo, threadGroups, threadGroupCount, requiredThreadGroupDispatchSize, numGrf, numberOfThreadsInThreadGroup, walkerCmd); EXPECT_EQ(InterfaceDescriptorType::THREAD_GROUP_DISPATCH_SIZE_TG_SIZE_4, iddArg.getThreadGroupDispatchSize()); } @@ -542,7 +547,7 @@ HWTEST2_F(CommandEncodeStatesTestPvcAndLater, givenVariousDispatchParamtersWhenA { const uint32_t threadGroupCount = walkerCmd.getThreadGroupIdXDimension() * walkerCmd.getThreadGroupIdYDimension() * walkerCmd.getThreadGroupIdZDimension(); uint32_t threadGroups[] = {walkerCmd.getThreadGroupIdXDimension(), walkerCmd.getThreadGroupIdYDimension(), walkerCmd.getThreadGroupIdZDimension()}; - EncodeDispatchKernel::encodeThreadGroupDispatch(iddArg, *pDevice, hwInfo, threadGroups, threadGroupCount, numGrf, numberOfThreadsInThreadGroup, walkerCmd); + EncodeDispatchKernel::encodeThreadGroupDispatch(iddArg, *pDevice, hwInfo, threadGroups, threadGroupCount, requiredThreadGroupDispatchSize, numGrf, numberOfThreadsInThreadGroup, walkerCmd); EXPECT_EQ(InterfaceDescriptorType::THREAD_GROUP_DISPATCH_SIZE_TG_SIZE_2, iddArg.getThreadGroupDispatchSize()); } } @@ -557,6 +562,7 @@ HWTEST2_F(CommandEncodeStatesTestPvcAndLater, givenDualSubSliceCountNotEqualToMa mutableHwInfo->gtSystemInfo.ThreadCount = 2048u; auto hwInfo = pDevice->getHardwareInfo(); + const uint32_t requiredThreadGroupDispatchSize = 0u; uint32_t numGrf = GrfConfig::defaultGrfNumber; InterfaceDescriptorType iddArg = FamilyType::template getInitInterfaceDescriptor(); @@ -569,7 +575,7 @@ HWTEST2_F(CommandEncodeStatesTestPvcAndLater, givenDualSubSliceCountNotEqualToMa walkerCmd.setThreadGroupIdYDimension(1); walkerCmd.setThreadGroupIdZDimension(1); uint32_t threadGroups[] = {walkerCmd.getThreadGroupIdXDimension(), walkerCmd.getThreadGroupIdYDimension(), walkerCmd.getThreadGroupIdZDimension()}; - EncodeDispatchKernel::encodeThreadGroupDispatch(iddArg, *pDevice, hwInfo, threadGroups, threadGroupCount, numGrf, numberOfThreadsInThreadGroup, walkerCmd); + EncodeDispatchKernel::encodeThreadGroupDispatch(iddArg, *pDevice, hwInfo, threadGroups, threadGroupCount, requiredThreadGroupDispatchSize, numGrf, numberOfThreadsInThreadGroup, walkerCmd); EXPECT_EQ(InterfaceDescriptorType::THREAD_GROUP_DISPATCH_SIZE_TG_SIZE_1, iddArg.getThreadGroupDispatchSize()); } @@ -586,6 +592,7 @@ HWTEST2_F(CommandEncodeStatesTestPvcAndLater, givenNumberOfThreadsInThreadGroupA InterfaceDescriptorType iddArg = FamilyType::template getInitInterfaceDescriptor(); const uint32_t threadGroupCount = 1u; + const uint32_t requiredThreadGroupDispatchSize = 0; const uint32_t numGrf = GrfConfig::defaultGrfNumber; std::array, 3> testParams = {{{16u, InterfaceDescriptorType::THREAD_GROUP_DISPATCH_SIZE_TG_SIZE_1}, {32u, InterfaceDescriptorType::THREAD_GROUP_DISPATCH_SIZE_TG_SIZE_1}, @@ -594,7 +601,7 @@ HWTEST2_F(CommandEncodeStatesTestPvcAndLater, givenNumberOfThreadsInThreadGroupA for (const auto &[numberOfThreadsInThreadGroup, expectedThreadGroupDispatchSize] : testParams) { iddArg.setNumberOfThreadsInGpgpuThreadGroup(numberOfThreadsInThreadGroup); - EncodeDispatchKernel::encodeThreadGroupDispatch(iddArg, *pDevice, hwInfo, threadGroups, threadGroupCount, numGrf, numberOfThreadsInThreadGroup, walkerCmd); + EncodeDispatchKernel::encodeThreadGroupDispatch(iddArg, *pDevice, hwInfo, threadGroups, threadGroupCount, requiredThreadGroupDispatchSize, numGrf, numberOfThreadsInThreadGroup, walkerCmd); EXPECT_EQ(expectedThreadGroupDispatchSize, iddArg.getThreadGroupDispatchSize()); } @@ -610,12 +617,13 @@ HWTEST2_F(CommandEncodeStatesTestPvcAndLater, givenThreadGroupCountZeroWhenCalli auto hwInfo = pDevice->getHardwareInfo(); const uint32_t threadGroupCount = 1u; + const uint32_t requiredThreadGroupDispatchSize = 0u; const uint32_t numGrf = GrfConfig::defaultGrfNumber; InterfaceDescriptorType iddArg = FamilyType::template getInitInterfaceDescriptor(); uint32_t numberOfThreadsInThreadGroup = 1; iddArg.setNumberOfThreadsInGpgpuThreadGroup(numberOfThreadsInThreadGroup); uint32_t threadGroups[] = {walkerCmd.getThreadGroupIdXDimension(), walkerCmd.getThreadGroupIdYDimension(), walkerCmd.getThreadGroupIdZDimension()}; - EncodeDispatchKernel::encodeThreadGroupDispatch(iddArg, *pDevice, hwInfo, threadGroups, threadGroupCount, numGrf, numberOfThreadsInThreadGroup, walkerCmd); + EncodeDispatchKernel::encodeThreadGroupDispatch(iddArg, *pDevice, hwInfo, threadGroups, threadGroupCount, requiredThreadGroupDispatchSize, numGrf, numberOfThreadsInThreadGroup, walkerCmd); EXPECT_EQ(InterfaceDescriptorType::THREAD_GROUP_DISPATCH_SIZE_TG_SIZE_1, iddArg.getThreadGroupDispatchSize()); } diff --git a/shared/test/unit_test/encoders/test_encode_dispatch_kernel_xehp_and_later.cpp b/shared/test/unit_test/encoders/test_encode_dispatch_kernel_xehp_and_later.cpp index abdfeb0b7b..cbfdbe82c9 100644 --- a/shared/test/unit_test/encoders/test_encode_dispatch_kernel_xehp_and_later.cpp +++ b/shared/test/unit_test/encoders/test_encode_dispatch_kernel_xehp_and_later.cpp @@ -720,7 +720,7 @@ HWTEST2_F(CommandEncodeStatesTest, givenInterfaceDescriptorDataWhenForceThreadGr uint32_t threadsPerThreadGroup = 4; for (auto revision : revisions) { hwInfo.platform.usRevId = productHelper.getHwRevIdFromStepping(revision, hwInfo); - EncodeDispatchKernel::encodeThreadGroupDispatch(iddArg, *pDevice, hwInfo, threadGroups, threadGroupCount, 0, threadsPerThreadGroup, walkerCmd); + EncodeDispatchKernel::encodeThreadGroupDispatch(iddArg, *pDevice, hwInfo, threadGroups, threadGroupCount, 0, 0, threadsPerThreadGroup, walkerCmd); if (productHelper.isDisableOverdispatchAvailable(hwInfo)) { EXPECT_EQ(INTERFACE_DESCRIPTOR_DATA::THREAD_GROUP_DISPATCH_SIZE_TG_SIZE_1, iddArg.getThreadGroupDispatchSize()); @@ -748,7 +748,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, CommandEncodeStatesTest, givenInterfaceDescriptorDa DebugManagerStateRestore restorer; debugManager.flags.ForceThreadGroupDispatchSize.set(forceThreadGroupDispatchSize); uint32_t threadGroups[] = {walkerCmd.getThreadGroupIdXDimension(), walkerCmd.getThreadGroupIdYDimension(), walkerCmd.getThreadGroupIdZDimension()}; - EncodeDispatchKernel::encodeThreadGroupDispatch(iddArg, *pDevice, pDevice->getHardwareInfo(), threadGroups, threadGroupCount, 1, 1, walkerCmd); + EncodeDispatchKernel::encodeThreadGroupDispatch(iddArg, *pDevice, pDevice->getHardwareInfo(), threadGroups, threadGroupCount, 0, 1, 1, walkerCmd); EXPECT_NE(defaultThreadGroupDispatchSize, iddArg.getThreadGroupDispatchSize()); EXPECT_EQ(forceThreadGroupDispatchSize, iddArg.getThreadGroupDispatchSize()); diff --git a/shared/test/unit_test/kernel/kernel_descriptor_from_patchtokens_tests.cpp b/shared/test/unit_test/kernel/kernel_descriptor_from_patchtokens_tests.cpp index 89b4877418..c5452c6955 100644 --- a/shared/test/unit_test/kernel/kernel_descriptor_from_patchtokens_tests.cpp +++ b/shared/test/unit_test/kernel/kernel_descriptor_from_patchtokens_tests.cpp @@ -1,5 +1,5 @@ /* - * Copyright (C) 2020-2024 Intel Corporation + * Copyright (C) 2020-2025 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -444,6 +444,7 @@ TEST(KernelDescriptorFromPatchtokens, GivenhKernelAttributesThenPopulatesStrings NEO::populateKernelDescriptor(kernelDescriptor, kernelTokens, 4); EXPECT_TRUE(kernelDescriptor.kernelMetadata.kernelLanguageAttributes.empty()); EXPECT_EQ(0U, kernelDescriptor.kernelMetadata.requiredSubGroupSize); + EXPECT_EQ(0U, kernelDescriptor.kernelMetadata.requiredThreadGroupDispatchSize); iOpenCL::SPatchKernelAttributesInfo kernelAttributesToken; kernelAttributesToken.AttributesSize = 0U; @@ -451,8 +452,9 @@ TEST(KernelDescriptorFromPatchtokens, GivenhKernelAttributesThenPopulatesStrings NEO::populateKernelDescriptor(kernelDescriptor, kernelTokens, 4); EXPECT_TRUE(kernelDescriptor.kernelMetadata.kernelLanguageAttributes.empty()); EXPECT_EQ(0U, kernelDescriptor.kernelMetadata.requiredSubGroupSize); + EXPECT_EQ(0U, kernelDescriptor.kernelMetadata.requiredThreadGroupDispatchSize); - std::string attribute = "intel_reqd_sub_group_size(32)"; + std::string attribute = "intel_reqd_sub_group_size(32) intel_reqd_thread_group_dispatch_size(8)"; kernelAttributesToken.AttributesSize = static_cast(attribute.size()); std::vector tokenStorage; tokenStorage.insert(tokenStorage.end(), reinterpret_cast(&kernelAttributesToken), reinterpret_cast(&kernelAttributesToken + 1)); @@ -462,6 +464,7 @@ TEST(KernelDescriptorFromPatchtokens, GivenhKernelAttributesThenPopulatesStrings NEO::populateKernelDescriptor(kernelDescriptor, kernelTokens, 4); EXPECT_EQ(attribute, kernelDescriptor.kernelMetadata.kernelLanguageAttributes); EXPECT_EQ(32U, kernelDescriptor.kernelMetadata.requiredSubGroupSize); + EXPECT_EQ(8U, kernelDescriptor.kernelMetadata.requiredThreadGroupDispatchSize); EXPECT_FALSE(kernelDescriptor.kernelAttributes.flags.isInvalid); } diff --git a/shared/test/unit_test/kernel/kernel_descriptor_tests.cpp b/shared/test/unit_test/kernel/kernel_descriptor_tests.cpp index a62592dc75..cf4be7f4eb 100644 --- a/shared/test/unit_test/kernel/kernel_descriptor_tests.cpp +++ b/shared/test/unit_test/kernel/kernel_descriptor_tests.cpp @@ -1,5 +1,5 @@ /* - * Copyright (C) 2020-2024 Intel Corporation + * Copyright (C) 2020-2025 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -86,6 +86,7 @@ TEST(KernelDescriptor, WhenDefaultInitializedThenValuesAreCleared) { EXPECT_TRUE(desc.kernelMetadata.printfStringsMap.empty()); EXPECT_EQ(0U, desc.kernelMetadata.compiledSubGroupsNumber); EXPECT_EQ(0U, desc.kernelMetadata.requiredSubGroupSize); + EXPECT_EQ(0U, desc.kernelMetadata.requiredThreadGroupDispatchSize); EXPECT_EQ(nullptr, desc.external.debugData.get()); EXPECT_EQ(nullptr, desc.external.igcInfoForGtpin); } @@ -271,4 +272,4 @@ TEST(KernelDescriptor, GivenDescriptorWithoutStatefulArgsWhenInitBindlessOffsets desc.initBindlessOffsetToSurfaceState(); EXPECT_EQ(0u, desc.bindlessArgsMap.size()); -} \ No newline at end of file +} diff --git a/shared/test/unit_test/xe3_core/test_encode_xe3_core.cpp b/shared/test/unit_test/xe3_core/test_encode_xe3_core.cpp index cdc4616e48..d3bdc20dde 100644 --- a/shared/test/unit_test/xe3_core/test_encode_xe3_core.cpp +++ b/shared/test/unit_test/xe3_core/test_encode_xe3_core.cpp @@ -67,7 +67,7 @@ XE3_CORETEST_F(CommandEncodeXe3CoreTest, givenInterfaceDescriptorDataWhenAdjustI MockDevice mockDevice; uint32_t threadsPerGroup = 1; uint32_t threadGroups[] = {walkerCmd.getThreadGroupIdXDimension(), walkerCmd.getThreadGroupIdYDimension(), walkerCmd.getThreadGroupIdZDimension()}; - EncodeDispatchKernel::encodeThreadGroupDispatch(iddArg, mockDevice, *defaultHwInfo, threadGroups, 1, 0, threadsPerGroup, walkerCmd); + EncodeDispatchKernel::encodeThreadGroupDispatch(iddArg, mockDevice, *defaultHwInfo, threadGroups, 0, 1, 0, threadsPerGroup, walkerCmd); EXPECT_EQ(2u, iddArg.getBindingTableEntryCount()); EXPECT_EQ(INTERFACE_DESCRIPTOR_DATA::SAMPLER_COUNT_BETWEEN_1_AND_4_SAMPLERS_USED, iddArg.getSamplerCount()); }