diff --git a/opencl/test/unit_test/command_queue/sync_buffer_handler_tests.cpp b/opencl/test/unit_test/command_queue/sync_buffer_handler_tests.cpp index d0f2aca252..edc6ea74c9 100644 --- a/opencl/test/unit_test/command_queue/sync_buffer_handler_tests.cpp +++ b/opencl/test/unit_test/command_queue/sync_buffer_handler_tests.cpp @@ -104,10 +104,9 @@ class SyncBufferHandlerTest : public SyncBufferEnqueueHandlerTest { const cl_uint workDim = 1; const size_t gwOffset[3] = {0, 0, 0}; - const size_t lws[3] = {10, 1, 1}; - size_t workgroupCount[3] = {10, 1, 1}; - size_t globalWorkSize[3] = {100, 1, 1}; - size_t workItemsCount = 10; + const size_t workItemsCount = 16; + const size_t lws[3] = {workItemsCount, 1, 1}; + size_t workgroupCount[3] = {workItemsCount, 1, 1}; std::unique_ptr kernelInternals; MockKernel *kernel; MockCommandQueue *commandQueue; @@ -128,6 +127,19 @@ HWTEST_TEMPLATED_F(SyncBufferHandlerTest, GivenAllocateSyncBufferPatchAndConcurr static_cast *>(pCsr)->latestSentTaskCount); } +HWTEST_TEMPLATED_F(SyncBufferHandlerTest, GivenAllocateSyncBufferPatchAndConcurrentKernelWhenEnqueuingKernelThenSyncBufferOffsetIsProperlyAligned) { + patchAllocateSyncBuffer(); + + workgroupCount[0] = 1; + enqueueNDCount(); + + auto syncBufferHandler = getSyncBufferHandler(); + EXPECT_EQ(CommonConstants::maximalSizeOfAtomicType, syncBufferHandler->usedBufferSize); + + enqueueNDCount(); + EXPECT_EQ(2u * CommonConstants::maximalSizeOfAtomicType, syncBufferHandler->usedBufferSize); +} + HWTEST_TEMPLATED_F(SyncBufferHandlerTest, GivenConcurrentKernelWithoutAllocateSyncBufferPatchWhenEnqueuingConcurrentKernelThenSyncBufferIsNotCreated) { auto retVal = enqueueNDCount(); EXPECT_EQ(CL_SUCCESS, retVal); @@ -153,7 +165,6 @@ HWTEST_TEMPLATED_F(SyncBufferHandlerTest, GivenConcurrentKernelWithAllocateSyncB HWTEST_TEMPLATED_F(SyncBufferHandlerTest, GivenMaxWorkgroupCountWhenEnqueuingConcurrentKernelThenSuccessIsReturned) { auto maxWorkGroupCount = kernel->getMaxWorkGroupCount(workDim, lws, commandQueue); workgroupCount[0] = maxWorkGroupCount; - globalWorkSize[0] = maxWorkGroupCount * lws[0]; auto retVal = enqueueNDCount(); EXPECT_EQ(CL_SUCCESS, retVal); @@ -162,7 +173,6 @@ HWTEST_TEMPLATED_F(SyncBufferHandlerTest, GivenMaxWorkgroupCountWhenEnqueuingCon HWTEST_TEMPLATED_F(SyncBufferHandlerTest, GivenTooHighWorkgroupCountWhenEnqueuingConcurrentKernelThenErrorIsReturned) { size_t maxWorkGroupCount = kernel->getMaxWorkGroupCount(workDim, lws, commandQueue); workgroupCount[0] = maxWorkGroupCount + 1; - globalWorkSize[0] = maxWorkGroupCount * lws[0]; auto retVal = enqueueNDCount(); EXPECT_EQ(CL_INVALID_VALUE, retVal); diff --git a/shared/source/helpers/constants.h b/shared/source/helpers/constants.h index 912d7fc9c6..cd7303a8a7 100644 --- a/shared/source/helpers/constants.h +++ b/shared/source/helpers/constants.h @@ -88,6 +88,7 @@ namespace CommonConstants { constexpr uint32_t unspecifiedDeviceIndex = std::numeric_limits::max(); constexpr uint32_t invalidStepping = std::numeric_limits::max(); constexpr uint32_t maximalSimdSize = 32; +constexpr uint32_t maximalSizeOfAtomicType = 8; constexpr uint32_t engineGroupCount = static_cast(NEO::EngineGroupType::MaxEngineGroups); constexpr uint32_t partitionAddressOffsetDwords = 2u; constexpr uint32_t partitionAddressOffset = sizeof(uint32_t) * partitionAddressOffsetDwords; diff --git a/shared/source/program/sync_buffer_handler.inl b/shared/source/program/sync_buffer_handler.inl index 351b6b40d9..3d3038d920 100644 --- a/shared/source/program/sync_buffer_handler.inl +++ b/shared/source/program/sync_buffer_handler.inl @@ -9,7 +9,7 @@ template void NEO::SyncBufferHandler::prepareForEnqueue(size_t workGroupsCount, KernelT &kernel) { - auto requiredSize = workGroupsCount; + auto requiredSize = alignUp(workGroupsCount, CommonConstants::maximalSizeOfAtomicType); std::lock_guard guard(this->mutex); bool isCurrentBufferFull = (usedBufferSize + requiredSize > bufferSize);