diff --git a/opencl/source/command_queue/cl_local_work_size.cpp b/opencl/source/command_queue/cl_local_work_size.cpp index 1028cc4257..0ece9b98f2 100644 --- a/opencl/source/command_queue/cl_local_work_size.cpp +++ b/opencl/source/command_queue/cl_local_work_size.cpp @@ -1,5 +1,5 @@ /* - * Copyright (C) 2021-2023 Intel Corporation + * Copyright (C) 2021-2025 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -101,6 +101,7 @@ WorkSizeInfo createWorkSizeInfoFromDispatchInfo(const DispatchInfo &dispatchInfo kernelInfo.kernelDescriptor.kernelAttributes.flags.requiresDisabledEUFusion); wsInfo.setIfUseImg(kernelInfo); + wsInfo.setPreferredWgCountPerSubslice(device.getProductHelper().getPreferredWorkgroupCountPerSubslice()); return wsInfo; } diff --git a/opencl/test/unit_test/command_queue/local_work_size_tests.cpp b/opencl/test/unit_test/command_queue/local_work_size_tests.cpp index 5ad9c515da..b09e4f82b9 100644 --- a/opencl/test/unit_test/command_queue/local_work_size_tests.cpp +++ b/opencl/test/unit_test/command_queue/local_work_size_tests.cpp @@ -1,5 +1,5 @@ /* - * Copyright (C) 2018-2024 Intel Corporation + * Copyright (C) 2018-2025 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -309,6 +309,257 @@ TEST_F(LocalWorkSizeTest, given2DimWorkGroupAndSimdEqual32WhenComputeCalledThenL EXPECT_EQ(workGroupSize[2], 1u); } +TEST_F(LocalWorkSizeTest, givenSimdEqual32AndPreferredWgCountPerSubslice2WhenComputeCalledThenLocalGroupSizeIsLimited) { + DebugManagerStateRestore dbgRestore; + debugManager.flags.EnableComputeWorkSizeSquared.set(false); + WorkSizeInfo wsInfo(1024, 0u, 32, 0u, rootDeviceEnvironment, 32u, 0u, false, false, false); + wsInfo.setPreferredWgCountPerSubslice(2); + + constexpr uint32_t maxLws = 32 * 32 / 2; // simd size * num threadsPerSubslice / preferredWgCountPerSubslice + + uint32_t workDim = 2; + size_t workGroup[3] = {384, 96, 1}; + size_t workGroupSize[3]; + + NEO::computeWorkgroupSizeND(wsInfo, workGroupSize, workGroup, workDim); + EXPECT_EQ(workGroupSize[0], 128u); + EXPECT_EQ(workGroupSize[1], 2u); + EXPECT_EQ(workGroupSize[2], 1u); + EXPECT_TRUE(workGroupSize[0] * workGroupSize[1] < maxLws); + + workGroup[0] = 1024 * 256; + NEO::computeWorkgroupSizeND(wsInfo, workGroupSize, workGroup, workDim); + EXPECT_EQ(workGroupSize[0], 256u); + EXPECT_EQ(workGroupSize[1], 1u); + EXPECT_EQ(workGroupSize[2], 1u); + EXPECT_TRUE(workGroupSize[0] * workGroupSize[1] <= maxLws); + + workGroup[0] = 48; + NEO::computeWorkgroupSizeND(wsInfo, workGroupSize, workGroup, workDim); + EXPECT_EQ(workGroupSize[0], 16u); + EXPECT_EQ(workGroupSize[1], 32u); + EXPECT_EQ(workGroupSize[2], 1u); + EXPECT_TRUE(workGroupSize[0] * workGroupSize[1] <= maxLws); + + workGroup[0] = 512; + workGroup[1] = 1; + NEO::computeWorkgroupSizeND(wsInfo, workGroupSize, workGroup, workDim); + EXPECT_EQ(workGroupSize[0], 512u); + EXPECT_EQ(workGroupSize[1], 1u); + EXPECT_EQ(workGroupSize[2], 1u); + EXPECT_TRUE(workGroupSize[0] * workGroupSize[1] <= maxLws); + + workGroup[0] = 12; + workGroup[1] = 512; + NEO::computeWorkgroupSizeND(wsInfo, workGroupSize, workGroup, workDim); + EXPECT_EQ(workGroupSize[0], 4u); + EXPECT_EQ(workGroupSize[1], 64u); + EXPECT_EQ(workGroupSize[2], 1u); + EXPECT_TRUE(workGroupSize[0] * workGroupSize[1] <= maxLws); + + workGroup[0] = 1; + workGroup[1] = 384; + NEO::computeWorkgroupSizeND(wsInfo, workGroupSize, workGroup, workDim); + EXPECT_EQ(workGroupSize[0], 1u); + EXPECT_EQ(workGroupSize[1], 128u); + EXPECT_EQ(workGroupSize[2], 1u); + EXPECT_TRUE(workGroupSize[0] * workGroupSize[1] <= maxLws); + + workGroup[0] = 128; + workGroup[1] = 4; + wsInfo.imgUsed = true; + NEO::computeWorkgroupSizeND(wsInfo, workGroupSize, workGroup, workDim); + EXPECT_EQ(workGroupSize[0], 128u); + EXPECT_EQ(workGroupSize[1], 4u); + EXPECT_EQ(workGroupSize[2], 1u); + EXPECT_TRUE(workGroupSize[0] * workGroupSize[1] <= maxLws); + + workGroup[0] = 64; + workGroup[1] = 8; + wsInfo.imgUsed = false; + NEO::computeWorkgroupSizeND(wsInfo, workGroupSize, workGroup, workDim); + EXPECT_EQ(workGroupSize[0], 64u); + EXPECT_EQ(workGroupSize[1], 8u); + EXPECT_EQ(workGroupSize[2], 1u); + EXPECT_TRUE(workGroupSize[0] * workGroupSize[1] <= maxLws); + + workGroup[0] = 1024; + workGroup[1] = 9; + wsInfo.imgUsed = true; + NEO::computeWorkgroupSizeND(wsInfo, workGroupSize, workGroup, workDim); + EXPECT_EQ(workGroupSize[0], 512u); + EXPECT_EQ(workGroupSize[1], 1u); + EXPECT_EQ(workGroupSize[2], 1u); + EXPECT_TRUE(workGroupSize[0] * workGroupSize[1] <= maxLws); +} + +TEST_F(LocalWorkSizeTest, givenSimdEqual32AndPreferredWgCountPerSubslice4WhenComputeCalledThenLocalGroupSizeIsLimited) { + DebugManagerStateRestore dbgRestore; + debugManager.flags.EnableComputeWorkSizeSquared.set(false); + WorkSizeInfo wsInfo(1024, 0u, 32, 0u, rootDeviceEnvironment, 32u, 0u, false, false, false); + wsInfo.setPreferredWgCountPerSubslice(4); + + constexpr uint32_t maxLws = 32 * 32 / 4; // simd size * num threadsPerSubslice / preferredWgCountPerSubslice + + uint32_t workDim = 2; + size_t workGroup[3] = {384, 96, 1}; + size_t workGroupSize[3]; + + NEO::computeWorkgroupSizeND(wsInfo, workGroupSize, workGroup, workDim); + EXPECT_EQ(workGroupSize[0], 128u); + EXPECT_EQ(workGroupSize[1], 2u); + EXPECT_EQ(workGroupSize[2], 1u); + EXPECT_TRUE(workGroupSize[0] * workGroupSize[1] <= maxLws); + + workGroup[0] = 1024 * 256; + NEO::computeWorkgroupSizeND(wsInfo, workGroupSize, workGroup, workDim); + EXPECT_EQ(workGroupSize[0], 256u); + EXPECT_EQ(workGroupSize[1], 1u); + EXPECT_EQ(workGroupSize[2], 1u); + EXPECT_TRUE(workGroupSize[0] * workGroupSize[1] <= maxLws); + + workGroup[0] = 48; + NEO::computeWorkgroupSizeND(wsInfo, workGroupSize, workGroup, workDim); + EXPECT_EQ(workGroupSize[0], 16u); + EXPECT_EQ(workGroupSize[1], 16u); + EXPECT_EQ(workGroupSize[2], 1u); + EXPECT_TRUE(workGroupSize[0] * workGroupSize[1] <= maxLws); + + workGroup[0] = 512; + workGroup[1] = 1; + NEO::computeWorkgroupSizeND(wsInfo, workGroupSize, workGroup, workDim); + EXPECT_EQ(workGroupSize[0], 256u); + EXPECT_EQ(workGroupSize[1], 1u); + EXPECT_EQ(workGroupSize[2], 1u); + EXPECT_TRUE(workGroupSize[0] * workGroupSize[1] <= maxLws); + + workGroup[0] = 12; + workGroup[1] = 512; + NEO::computeWorkgroupSizeND(wsInfo, workGroupSize, workGroup, workDim); + EXPECT_EQ(workGroupSize[0], 4u); + EXPECT_EQ(workGroupSize[1], 64u); + EXPECT_EQ(workGroupSize[2], 1u); + EXPECT_TRUE(workGroupSize[0] * workGroupSize[1] <= maxLws); + + workGroup[0] = 1; + workGroup[1] = 384; + NEO::computeWorkgroupSizeND(wsInfo, workGroupSize, workGroup, workDim); + EXPECT_EQ(workGroupSize[0], 1u); + EXPECT_EQ(workGroupSize[1], 128u); + EXPECT_EQ(workGroupSize[2], 1u); + EXPECT_TRUE(workGroupSize[0] * workGroupSize[1] <= maxLws); + + workGroup[0] = 128; + workGroup[1] = 4; + wsInfo.imgUsed = true; + NEO::computeWorkgroupSizeND(wsInfo, workGroupSize, workGroup, workDim); + EXPECT_EQ(workGroupSize[0], 64u); + EXPECT_EQ(workGroupSize[1], 4u); + EXPECT_EQ(workGroupSize[2], 1u); + EXPECT_TRUE(workGroupSize[0] * workGroupSize[1] <= maxLws); + + workGroup[0] = 64; + workGroup[1] = 8; + wsInfo.imgUsed = false; + NEO::computeWorkgroupSizeND(wsInfo, workGroupSize, workGroup, workDim); + EXPECT_EQ(workGroupSize[0], 64u); + EXPECT_EQ(workGroupSize[1], 4u); + EXPECT_EQ(workGroupSize[2], 1u); + EXPECT_TRUE(workGroupSize[0] * workGroupSize[1] <= maxLws); + + workGroup[0] = 1024; + workGroup[1] = 9; + wsInfo.imgUsed = true; + NEO::computeWorkgroupSizeND(wsInfo, workGroupSize, workGroup, workDim); + EXPECT_EQ(workGroupSize[0], 256u); + EXPECT_EQ(workGroupSize[1], 1u); + EXPECT_EQ(workGroupSize[2], 1u); + EXPECT_TRUE(workGroupSize[0] * workGroupSize[1] <= maxLws); + + workGroup[0] = 2048; + workGroup[1] = 1; + wsInfo.imgUsed = false; + NEO::computeWorkgroupSizeND(wsInfo, workGroupSize, workGroup, workDim); + EXPECT_EQ(workGroupSize[0], 256u); + EXPECT_EQ(workGroupSize[1], 1u); + EXPECT_EQ(workGroupSize[2], 1u); + EXPECT_TRUE(workGroupSize[0] * workGroupSize[1] <= maxLws); +} + +TEST_F(LocalWorkSizeTest, givenSimdEqual32AndPreferredWgCountPerSubslice4WhenBarriersOrSlmUsedThenLocalGroupSizeIsNotLimited) { + DebugManagerStateRestore dbgRestore; + debugManager.flags.EnableComputeWorkSizeSquared.set(false); + WorkSizeInfo wsInfo(1024, 0u, 32, 0u, rootDeviceEnvironment, 32u, 0u, false, false, false); + wsInfo.setPreferredWgCountPerSubslice(4); + + for (uint32_t i = 0; i < 2; i++) { + if (i == 0) { + wsInfo.hasBarriers = true; + } else if (i == 1) { + wsInfo.hasBarriers = false; + wsInfo.slmTotalSize = 256; + } + + uint32_t workDim = 2; + size_t workGroup[3] = {384, 96, 1}; + size_t workGroupSize[3]; + + NEO::computeWorkgroupSizeND(wsInfo, workGroupSize, workGroup, workDim); + if (wsInfo.slmTotalSize == 0) { + EXPECT_EQ(workGroupSize[0], 384u); + EXPECT_EQ(workGroupSize[1], 2u); + EXPECT_EQ(workGroupSize[2], 1u); + } else { + // use ratio in algorithm + EXPECT_EQ(workGroupSize[0], 64u); + EXPECT_EQ(workGroupSize[1], 16u); + EXPECT_EQ(workGroupSize[2], 1u); + } + + workGroup[0] = 1024 * 256; + NEO::computeWorkgroupSizeND(wsInfo, workGroupSize, workGroup, workDim); + EXPECT_EQ(workGroupSize[0], 512u); + EXPECT_EQ(workGroupSize[1], 2u); + EXPECT_EQ(workGroupSize[2], 1u); + + workGroup[0] = 48; + NEO::computeWorkgroupSizeND(wsInfo, workGroupSize, workGroup, workDim); + EXPECT_EQ(workGroupSize[0], 48u); + EXPECT_EQ(workGroupSize[1], 16u); + EXPECT_EQ(workGroupSize[2], 1u); + + workGroup[0] = 128; + workGroup[1] = 4; + wsInfo.imgUsed = true; + NEO::computeWorkgroupSizeND(wsInfo, workGroupSize, workGroup, workDim); + EXPECT_EQ(workGroupSize[0], 128u); + EXPECT_EQ(workGroupSize[1], 4u); + EXPECT_EQ(workGroupSize[2], 1u); + + workGroup[0] = 1024; + workGroup[1] = 9; + wsInfo.imgUsed = false; + NEO::computeWorkgroupSizeND(wsInfo, workGroupSize, workGroup, workDim); + if (wsInfo.slmTotalSize == 0) { + EXPECT_EQ(workGroupSize[0], 512u); + EXPECT_EQ(workGroupSize[1], 1u); + EXPECT_EQ(workGroupSize[2], 1u); + } else { + EXPECT_EQ(workGroupSize[0], 256u); + EXPECT_EQ(workGroupSize[1], 3u); + EXPECT_EQ(workGroupSize[2], 1u); + } + + workGroup[0] = 2048; + workGroup[1] = 2; + wsInfo.imgUsed = false; + NEO::computeWorkgroupSizeND(wsInfo, workGroupSize, workGroup, workDim); + EXPECT_EQ(workGroupSize[0], 512u); + EXPECT_EQ(workGroupSize[1], 2u); + EXPECT_EQ(workGroupSize[2], 1u); + } +} + TEST_F(LocalWorkSizeTest, given3DimWorkGroupAndSimdEqual8WhenComputeCalledThenLocalGroupComputed) { WorkSizeInfo wsInfo(256, 0u, 8, 0u, rootDeviceEnvironment, 56u, 0u, false, false, false); uint32_t workDim = 3; diff --git a/shared/source/helpers/local_work_size.cpp b/shared/source/helpers/local_work_size.cpp index 9c3b0d6321..c92649f935 100644 --- a/shared/source/helpers/local_work_size.cpp +++ b/shared/source/helpers/local_work_size.cpp @@ -1,5 +1,5 @@ /* - * Copyright (C) 2018-2023 Intel Corporation + * Copyright (C) 2018-2025 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -360,6 +360,9 @@ void computeWorkgroupSizeND(WorkSizeInfo &wsInfo, size_t workGroupSize[3], const workGroupSize[i] = 1; UNRECOVERABLE_IF(wsInfo.simdSize == 0); + uint64_t totalNumberOfItems = workItems[0] * workItems[1] * workItems[2]; + auto optimalWgThreadCount = optimalHardwareThreadCountGeneric[0]; + bool totalRequiredThreadGroupsMoreThanSingleThreadGroup = totalNumberOfItems > wsInfo.simdSize * optimalWgThreadCount; // Find biggest power of two which devide each dimension size if (wsInfo.slmTotalSize == 0 && !wsInfo.hasBarriers) { @@ -367,9 +370,14 @@ void computeWorkgroupSizeND(WorkSizeInfo &wsInfo, size_t workGroupSize[3], const return computeWorkgroupSizeSquared(wsInfo.maxWorkGroupSize, workGroupSize, workItems, wsInfo.simdSize, workDim); } + if (wsInfo.preferredWgCountPerSubSlice != 0 && wsInfo.simdSize == 32 && totalRequiredThreadGroupsMoreThanSingleThreadGroup) { + optimalWgThreadCount = std::min(optimalWgThreadCount, wsInfo.numThreadsPerSubSlice / wsInfo.preferredWgCountPerSubSlice); + wsInfo.maxWorkGroupSize = wsInfo.simdSize * optimalWgThreadCount; + } + size_t itemsPowerOfTwoDivisors[3] = {1, 1, 1}; for (auto i = 0u; i < workDim; i++) { - uint32_t requiredWorkItemsCount = uint32_t(wsInfo.simdSize * optimalHardwareThreadCountGeneric[0]); + uint32_t requiredWorkItemsCount = uint32_t(wsInfo.simdSize * optimalWgThreadCount); while (requiredWorkItemsCount > 1 && !(Math::isDivisibleByPowerOfTwoDivisor(uint32_t(workItems[i]), requiredWorkItemsCount))) requiredWorkItemsCount >>= 1; itemsPowerOfTwoDivisors[i] = requiredWorkItemsCount; @@ -382,7 +390,7 @@ void computeWorkgroupSizeND(WorkSizeInfo &wsInfo, size_t workGroupSize[3], const // If computed dimension sizes which are powers of two are creating group which is // bigger than maxWorkGroupSize or this group would create more than optimal hardware threads then downsize it uint64_t allItems = itemsPowerOfTwoDivisors[0] * itemsPowerOfTwoDivisors[1] * itemsPowerOfTwoDivisors[2]; - if (allItems > wsInfo.simdSize && (allItems > wsInfo.maxWorkGroupSize || allItems > wsInfo.simdSize * optimalHardwareThreadCountGeneric[0])) { + if (allItems > wsInfo.simdSize && (allItems > wsInfo.maxWorkGroupSize || allItems > wsInfo.simdSize * optimalWgThreadCount)) { return computePowerOfTwoLWS(itemsPowerOfTwoDivisors, wsInfo, workGroupSize, workDim, canUseNx4); } // If coputed workgroup is at this point in correct size @@ -394,9 +402,8 @@ void computeWorkgroupSizeND(WorkSizeInfo &wsInfo, size_t workGroupSize[3], const } } - uint64_t totalNuberOfItems = workItems[0] * workItems[1] * workItems[2]; // If dimensions are not powers of two but total number of items is less than max work group size - if (totalNuberOfItems <= wsInfo.maxWorkGroupSize) { + if (totalNumberOfItems <= wsInfo.maxWorkGroupSize) { for (auto i = 0u; i < workDim; i++) workGroupSize[i] = workItems[i]; return; diff --git a/shared/source/os_interface/product_helper.h b/shared/source/os_interface/product_helper.h index e1dabb0723..39a279f046 100644 --- a/shared/source/os_interface/product_helper.h +++ b/shared/source/os_interface/product_helper.h @@ -95,6 +95,7 @@ class ProductHelper { virtual bool isMaxThreadsForWorkgroupWARequired(const HardwareInfo &hwInfo) const = 0; virtual uint32_t getMaxThreadsForWorkgroupInDSSOrSS(const HardwareInfo &hwInfo, uint32_t maxNumEUsPerSubSlice, uint32_t maxNumEUsPerDualSubSlice) const = 0; virtual uint32_t getMaxThreadsForWorkgroup(const HardwareInfo &hwInfo, uint32_t maxNumEUsPerSubSlice) const = 0; + virtual uint32_t getPreferredWorkgroupCountPerSubslice() const = 0; virtual void setForceNonCoherent(void *const commandPtr, const StateComputeModeProperties &properties) const = 0; virtual void updateScmCommand(void *const commandPtr, const StateComputeModeProperties &properties) const = 0; virtual bool obtainBlitterPreference(const HardwareInfo &hwInfo) const = 0; diff --git a/shared/source/os_interface/product_helper.inl b/shared/source/os_interface/product_helper.inl index b6e7c97fe7..e67ba456d9 100644 --- a/shared/source/os_interface/product_helper.inl +++ b/shared/source/os_interface/product_helper.inl @@ -247,6 +247,11 @@ uint32_t ProductHelperHw::getMaxThreadsForWorkgroup(const HardwareIn return maxNumEUsPerSubSlice * numThreadsPerEU; } +template +uint32_t ProductHelperHw::getPreferredWorkgroupCountPerSubslice() const { + return 0; +} + template void ProductHelperHw::setForceNonCoherent(void *const commandPtr, const StateComputeModeProperties &properties) const {} diff --git a/shared/source/os_interface/product_helper_hw.h b/shared/source/os_interface/product_helper_hw.h index 5609ba1372..b9335f6f54 100644 --- a/shared/source/os_interface/product_helper_hw.h +++ b/shared/source/os_interface/product_helper_hw.h @@ -35,6 +35,7 @@ class ProductHelperHw : public ProductHelper { bool isMaxThreadsForWorkgroupWARequired(const HardwareInfo &hwInfo) const override; uint32_t getMaxThreadsForWorkgroupInDSSOrSS(const HardwareInfo &hwInfo, uint32_t maxNumEUsPerSubSlice, uint32_t maxNumEUsPerDualSubSlice) const override; uint32_t getMaxThreadsForWorkgroup(const HardwareInfo &hwInfo, uint32_t maxNumEUsPerSubSlice) const override; + uint32_t getPreferredWorkgroupCountPerSubslice() const override; void setForceNonCoherent(void *const commandPtr, const StateComputeModeProperties &properties) const override; void updateScmCommand(void *const commandPtr, const StateComputeModeProperties &properties) const override; bool obtainBlitterPreference(const HardwareInfo &hwInfo) const override; diff --git a/shared/source/program/work_size_info.cpp b/shared/source/program/work_size_info.cpp index df760ba206..9bb5c976a8 100644 --- a/shared/source/program/work_size_info.cpp +++ b/shared/source/program/work_size_info.cpp @@ -1,5 +1,5 @@ /* - * Copyright (C) 2023-2024 Intel Corporation + * Copyright (C) 2023-2025 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -73,4 +73,8 @@ void WorkSizeInfo::checkRatio(const size_t workItems[3]) { } } +void WorkSizeInfo::setPreferredWgCountPerSubslice(uint32_t preferredWgCount) { + preferredWgCountPerSubSlice = preferredWgCount; +} + } // namespace NEO diff --git a/shared/source/program/work_size_info.h b/shared/source/program/work_size_info.h index e8478458f6..09a20bfe9f 100644 --- a/shared/source/program/work_size_info.h +++ b/shared/source/program/work_size_info.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2023 Intel Corporation + * Copyright (C) 2023-2025 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -29,12 +29,14 @@ struct WorkSizeInfo { bool useRatio = false; bool useStrictRatio = false; float targetRatio = 0; + uint32_t preferredWgCountPerSubSlice = 0; WorkSizeInfo(uint32_t maxWorkGroupSize, bool hasBarriers, uint32_t simdSize, uint32_t slmTotalSize, const RootDeviceEnvironment &rootDeviceEnvironment, uint32_t numThreadsPerSubSlice, uint32_t localMemSize, bool imgUsed, bool yTiledSurface, bool disableEUFusion); void setIfUseImg(const KernelInfo &kernelInfo); void setMinWorkGroupSize(const RootDeviceEnvironment &rootDeviceEnvironment, bool disableEUFusion); void checkRatio(const size_t workItems[3]); + void setPreferredWgCountPerSubslice(uint32_t preferredWgCount); }; } // namespace NEO \ No newline at end of file diff --git a/shared/test/unit_test/os_interface/product_helper_tests.cpp b/shared/test/unit_test/os_interface/product_helper_tests.cpp index a7812d2bfb..aed485a51f 100644 --- a/shared/test/unit_test/os_interface/product_helper_tests.cpp +++ b/shared/test/unit_test/os_interface/product_helper_tests.cpp @@ -1166,3 +1166,7 @@ HWTEST2_F(ProductHelperTest, givenProductHelperWhenCallingIsResourceUncachedForC } } } + +HWTEST_F(ProductHelperTest, givenProductHelperWhenGettingPreferredWorkgroupCountPerSubsliceThenZeroReturned) { + EXPECT_EQ(0u, productHelper->getPreferredWorkgroupCountPerSubslice()); +}