diff --git a/opencl/source/command_queue/local_work_size.cpp b/opencl/source/command_queue/local_work_size.cpp index 706bf12933..ce8597b86e 100644 --- a/opencl/source/command_queue/local_work_size.cpp +++ b/opencl/source/command_queue/local_work_size.cpp @@ -245,9 +245,9 @@ void computeWorkgroupSize2D(uint32_t maxWorkGroupSize, size_t workGroupSize[3], uint32_t xFactorsLen = 0; uint32_t yFactorsLen = 0; uint64_t waste; - uint64_t localWSWaste = 0xffffffff; + uint64_t localWSWaste = 0xffffffffffffffff; uint64_t euThrdsDispatched; - uint64_t localEuThrdsDispatched = 0xffffffff; + uint64_t localEuThrdsDispatched = 0xffffffffffffffff; uint64_t workGroups; uint32_t xDim; uint32_t yDim; diff --git a/opencl/test/unit_test/command_queue/local_work_size_tests.cpp b/opencl/test/unit_test/command_queue/local_work_size_tests.cpp index 50920ab335..3685c94625 100644 --- a/opencl/test/unit_test/command_queue/local_work_size_tests.cpp +++ b/opencl/test/unit_test/command_queue/local_work_size_tests.cpp @@ -37,6 +37,29 @@ TEST(localWorkSizeTest, given3DimWorkGroupAndSimdEqual8AndBarriersWhenComputeCal EXPECT_EQ(workGroupSize[2], 1u); } +TEST(localWorkSizeTest, given2DimWorkGroupAndSimdEqual8AndNoBarriersWhenComputeCalledThenLocalGroupComputedCorrectly) { + DebugManagerStateRestore dbgRestore; + DebugManager.flags.EnableComputeWorkSizeSquared.set(true); + + //wsInfo maxWorkGroupSize, hasBariers, simdSize, slmTotalSize, coreFamily, numThreadsPerSubSlice, localMemorySize, imgUsed, yTiledSurface + WorkSizeInfo wsInfo(256, 0u, 8, 0u, defaultHwInfo->platform.eRenderCoreFamily, 32u, 0u, false, false); + uint32_t workDim = 2; + size_t workGroup[3] = {10003, 10003, 1}; + size_t workGroupSize[3]; + + NEO::computeWorkgroupSizeND(wsInfo, workGroupSize, workGroup, workDim); + EXPECT_EQ(workGroupSize[0], 7u); + EXPECT_EQ(workGroupSize[1], 7u); + EXPECT_EQ(workGroupSize[2], 1u); + + workGroup[0] = 21; + workGroup[1] = 3000; + NEO::computeWorkgroupSizeND(wsInfo, workGroupSize, workGroup, workDim); + EXPECT_EQ(workGroupSize[0], 21u); + EXPECT_EQ(workGroupSize[1], 8u); + EXPECT_EQ(workGroupSize[2], 1u); +} + TEST(localWorkSizeTest, given1DimWorkGroupAndSimdEqual8WhenComputeCalledThenLocalGroupComputed) { //wsInfo maxWorkGroupSize, hasBariers, simdSize, slmTotalSize, coreFamily, numThreadsPerSubSlice, localMemorySize, imgUsed, yTiledSurface WorkSizeInfo wsInfo(256, 0u, 8, 0u, defaultHwInfo->platform.eRenderCoreFamily, 32u, 0u, false, false);