From d2677bed2ec645cf56ab30eb7e9bcba896a37722 Mon Sep 17 00:00:00 2001 From: Jaime Arteaga Date: Wed, 19 Aug 2020 09:12:49 -0700 Subject: [PATCH] Fix computation of workgroup sizes for large sizes (2) Change-Id: I31bb300a2cfae2df9806ae812bd2acbc62f88191 Signed-off-by: Jaime Arteaga --- .../source/command_queue/local_work_size.cpp | 4 ++-- .../command_queue/local_work_size_tests.cpp | 23 +++++++++++++++++++ 2 files changed, 25 insertions(+), 2 deletions(-) diff --git a/opencl/source/command_queue/local_work_size.cpp b/opencl/source/command_queue/local_work_size.cpp index 706bf12933..ce8597b86e 100644 --- a/opencl/source/command_queue/local_work_size.cpp +++ b/opencl/source/command_queue/local_work_size.cpp @@ -245,9 +245,9 @@ void computeWorkgroupSize2D(uint32_t maxWorkGroupSize, size_t workGroupSize[3], uint32_t xFactorsLen = 0; uint32_t yFactorsLen = 0; uint64_t waste; - uint64_t localWSWaste = 0xffffffff; + uint64_t localWSWaste = 0xffffffffffffffff; uint64_t euThrdsDispatched; - uint64_t localEuThrdsDispatched = 0xffffffff; + uint64_t localEuThrdsDispatched = 0xffffffffffffffff; uint64_t workGroups; uint32_t xDim; uint32_t yDim; diff --git a/opencl/test/unit_test/command_queue/local_work_size_tests.cpp b/opencl/test/unit_test/command_queue/local_work_size_tests.cpp index 50920ab335..3685c94625 100644 --- a/opencl/test/unit_test/command_queue/local_work_size_tests.cpp +++ b/opencl/test/unit_test/command_queue/local_work_size_tests.cpp @@ -37,6 +37,29 @@ TEST(localWorkSizeTest, given3DimWorkGroupAndSimdEqual8AndBarriersWhenComputeCal EXPECT_EQ(workGroupSize[2], 1u); } +TEST(localWorkSizeTest, given2DimWorkGroupAndSimdEqual8AndNoBarriersWhenComputeCalledThenLocalGroupComputedCorrectly) { + DebugManagerStateRestore dbgRestore; + DebugManager.flags.EnableComputeWorkSizeSquared.set(true); + + //wsInfo maxWorkGroupSize, hasBariers, simdSize, slmTotalSize, coreFamily, numThreadsPerSubSlice, localMemorySize, imgUsed, yTiledSurface + WorkSizeInfo wsInfo(256, 0u, 8, 0u, defaultHwInfo->platform.eRenderCoreFamily, 32u, 0u, false, false); + uint32_t workDim = 2; + size_t workGroup[3] = {10003, 10003, 1}; + size_t workGroupSize[3]; + + NEO::computeWorkgroupSizeND(wsInfo, workGroupSize, workGroup, workDim); + EXPECT_EQ(workGroupSize[0], 7u); + EXPECT_EQ(workGroupSize[1], 7u); + EXPECT_EQ(workGroupSize[2], 1u); + + workGroup[0] = 21; + workGroup[1] = 3000; + NEO::computeWorkgroupSizeND(wsInfo, workGroupSize, workGroup, workDim); + EXPECT_EQ(workGroupSize[0], 21u); + EXPECT_EQ(workGroupSize[1], 8u); + EXPECT_EQ(workGroupSize[2], 1u); +} + TEST(localWorkSizeTest, given1DimWorkGroupAndSimdEqual8WhenComputeCalledThenLocalGroupComputed) { //wsInfo maxWorkGroupSize, hasBariers, simdSize, slmTotalSize, coreFamily, numThreadsPerSubSlice, localMemorySize, imgUsed, yTiledSurface WorkSizeInfo wsInfo(256, 0u, 8, 0u, defaultHwInfo->platform.eRenderCoreFamily, 32u, 0u, false, false);