diff --git a/core/helpers/CMakeLists.txt b/core/helpers/CMakeLists.txt index 9878d12a2c..40ea95fb2a 100644 --- a/core/helpers/CMakeLists.txt +++ b/core/helpers/CMakeLists.txt @@ -17,12 +17,14 @@ set(NEO_CORE_HELPERS ${CMAKE_CURRENT_SOURCE_DIR}/file_io.h ${CMAKE_CURRENT_SOURCE_DIR}/hash.h ${CMAKE_CURRENT_SOURCE_DIR}/interlocked_max.h + ${CMAKE_CURRENT_SOURCE_DIR}/kernel_helpers.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/kernel_helpers.h ${CMAKE_CURRENT_SOURCE_DIR}/non_copyable_or_moveable.h ${CMAKE_CURRENT_SOURCE_DIR}/pipeline_select_args.h - ${CMAKE_CURRENT_SOURCE_DIR}/ptr_math.h ${CMAKE_CURRENT_SOURCE_DIR}/preamble.h ${CMAKE_CURRENT_SOURCE_DIR}/preamble_base.inl ${CMAKE_CURRENT_SOURCE_DIR}/preamble_bdw_plus.inl + ${CMAKE_CURRENT_SOURCE_DIR}/ptr_math.h ${CMAKE_CURRENT_SOURCE_DIR}/register_offsets.h ${CMAKE_CURRENT_SOURCE_DIR}/simd_helper.h ${CMAKE_CURRENT_SOURCE_DIR}/stdio.h diff --git a/core/helpers/kernel_helpers.cpp b/core/helpers/kernel_helpers.cpp new file mode 100644 index 0000000000..c38c53aed1 --- /dev/null +++ b/core/helpers/kernel_helpers.cpp @@ -0,0 +1,40 @@ +/* + * Copyright (C) 2019 Intel Corporation + * + * SPDX-License-Identifier: MIT + * + */ + +#include "core/helpers/kernel_helpers.h" + +#include "core/helpers/basic_math.h" + +#include + +namespace NEO { + +uint32_t KernelHelper::getMaxWorkGroupCount(uint32_t simd, uint32_t availableThreadCount, uint32_t dssCount, uint32_t availableSlmSize, + uint32_t usedSlmSize, uint32_t maxBarrierCount, uint32_t numberOfBarriers, uint32_t workDim, + const size_t *localWorkSize) { + size_t workGroupSize = 1; + for (uint32_t i = 0; i < workDim; i++) { + workGroupSize *= localWorkSize[i]; + } + + auto threadsPerThreadGroup = static_cast(Math::divideAndRoundUp(workGroupSize, simd)); + auto maxWorkGroupsCount = availableThreadCount / threadsPerThreadGroup; + + if (numberOfBarriers > 0) { + auto maxWorkGroupsCountDueToBarrierUsage = dssCount * (maxBarrierCount / numberOfBarriers); + maxWorkGroupsCount = std::min(maxWorkGroupsCount, maxWorkGroupsCountDueToBarrierUsage); + } + + if (usedSlmSize > 0) { + auto maxWorkGroupsCountDueToSlm = availableSlmSize / usedSlmSize; + maxWorkGroupsCount = std::min(maxWorkGroupsCount, maxWorkGroupsCountDueToSlm); + } + + return maxWorkGroupsCount; +} + +} // namespace NEO diff --git a/core/helpers/kernel_helpers.h b/core/helpers/kernel_helpers.h new file mode 100644 index 0000000000..3663ab9f2d --- /dev/null +++ b/core/helpers/kernel_helpers.h @@ -0,0 +1,19 @@ +/* + * Copyright (C) 2019 Intel Corporation + * + * SPDX-License-Identifier: MIT + * + */ + +#include +#include + +namespace NEO { + +struct KernelHelper { + static uint32_t getMaxWorkGroupCount(uint32_t simd, uint32_t availableThreadCount, uint32_t dssCount, uint32_t availableSlmSize, + uint32_t usedSlmSize, uint32_t maxBarrierCount, uint32_t numberOfBarriers, uint32_t workDim, + const size_t *localWorkSize); +}; + +} // namespace NEO diff --git a/core/unit_tests/helpers/CMakeLists.txt b/core/unit_tests/helpers/CMakeLists.txt index be325e5dbd..a7750686e4 100644 --- a/core/unit_tests/helpers/CMakeLists.txt +++ b/core/unit_tests/helpers/CMakeLists.txt @@ -9,6 +9,7 @@ set(NEO_CORE_HELPERS_TESTS ${CMAKE_CURRENT_SOURCE_DIR}/debug_manager_state_restore.h ${CMAKE_CURRENT_SOURCE_DIR}/file_io_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/hash_tests.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/kernel_helpers_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/memory_leak_listener.h ${CMAKE_CURRENT_SOURCE_DIR}/memory_management.h ${CMAKE_CURRENT_SOURCE_DIR}/simd_helper_tests.inl diff --git a/core/unit_tests/helpers/kernel_helpers_tests.cpp b/core/unit_tests/helpers/kernel_helpers_tests.cpp new file mode 100644 index 0000000000..e5c8718eda --- /dev/null +++ b/core/unit_tests/helpers/kernel_helpers_tests.cpp @@ -0,0 +1,72 @@ +/* + * Copyright (C) 2019 Intel Corporation + * + * SPDX-License-Identifier: MIT + * + */ + +#include "core/helpers/basic_math.h" +#include "core/helpers/kernel_helpers.h" +#include "test.h" + +using namespace NEO; + +struct KernelHelperMaxWorkGroupsTests : ::testing::Test { + uint32_t simd = 8; + uint32_t threadCount = 8 * 1024; + uint32_t dssCount = 16; + uint32_t availableSlm = 64 * KB; + uint32_t usedSlm = 0; + uint32_t maxBarrierCount = 32; + uint32_t numberOfBarriers = 0; + uint32_t workDim = 3; + size_t lws[3] = {10, 10, 10}; + + uint32_t getMaxWorkGroupCount() { + return KernelHelper::getMaxWorkGroupCount(simd, threadCount, dssCount, availableSlm, usedSlm, + maxBarrierCount, numberOfBarriers, workDim, lws); + } +}; + +TEST_F(KernelHelperMaxWorkGroupsTests, GivenNoBarriersOrSlmUsedWhenCalculatingMaxWorkGroupsCountThenResultIsCalculatedWithSimd) { + auto workGroupSize = lws[0] * lws[1] * lws[2]; + auto expected = threadCount / Math::divideAndRoundUp(workGroupSize, simd); + EXPECT_EQ(expected, getMaxWorkGroupCount()); +} + +TEST_F(KernelHelperMaxWorkGroupsTests, GivenBarriersWhenCalculatingMaxWorkGroupsCountThenResultIsCalculatedWithRegardToBarriersCount) { + numberOfBarriers = 16; + + auto expected = dssCount * (maxBarrierCount / numberOfBarriers); + EXPECT_EQ(expected, getMaxWorkGroupCount()); +} + +TEST_F(KernelHelperMaxWorkGroupsTests, GivenUsedSlmSizeWhenCalculatingMaxWorkGroupsCountThenResultIsCalculatedWithRegardToUsedSlmSize) { + usedSlm = 4 * KB; + + auto expected = availableSlm / usedSlm; + EXPECT_EQ(expected, getMaxWorkGroupCount()); +} + +TEST_F(KernelHelperMaxWorkGroupsTests, GivenVariousValuesWhenCalculatingMaxWorkGroupsCountThenLowestResultIsAlwaysReturned) { + usedSlm = 1 * KB; + numberOfBarriers = 1; + dssCount = 1; + + workDim = 1; + lws[0] = simd; + threadCount = 1; + EXPECT_EQ(1u, getMaxWorkGroupCount()); + + threadCount = 1024; + EXPECT_NE(1u, getMaxWorkGroupCount()); + + numberOfBarriers = 32; + EXPECT_EQ(1u, getMaxWorkGroupCount()); + + numberOfBarriers = 1; + EXPECT_NE(1u, getMaxWorkGroupCount()); + + usedSlm = availableSlm; + EXPECT_EQ(1u, getMaxWorkGroupCount()); +} diff --git a/public/cl_ext_private.h b/public/cl_ext_private.h index b75da984f9..7f1b4d0504 100644 --- a/public/cl_ext_private.h +++ b/public/cl_ext_private.h @@ -68,6 +68,9 @@ using cl_unified_shared_memory_capabilities_intel = cl_bitfield; //Used with createBuffer #define CL_MEM_ALLOW_UNRESTRICTED_SIZE_INTEL (1 << 23) +typedef cl_uint cl_execution_info_intel; +#define CL_EXECUTION_INFO_MAX_WORKGROUP_COUNT_INTEL 0x10100 + /****************************** * UNIFIED MEMORY * *******************************/ diff --git a/runtime/api/api.cpp b/runtime/api/api.cpp index 86b35ffed1..96a501bcdf 100644 --- a/runtime/api/api.cpp +++ b/runtime/api/api.cpp @@ -8,6 +8,7 @@ #include "api.h" #include "core/helpers/aligned_memory.h" +#include "core/helpers/kernel_helpers.h" #include "core/memory_manager/unified_memory_manager.h" #include "core/utilities/stackvec.h" #include "runtime/accelerators/intel_motion_estimation.h" @@ -3873,6 +3874,7 @@ void *CL_API_CALL clGetExtensionFunctionAddress(const char *funcName) { RETURN_FUNC_PTR_IF_EXIST(clEnqueueMemAdviseINTEL); RETURN_FUNC_PTR_IF_EXIST(clGetDeviceFunctionPointerINTEL); RETURN_FUNC_PTR_IF_EXIST(clGetDeviceGlobalVariablePointerINTEL); + RETURN_FUNC_PTR_IF_EXIST(clGetExecutionInfoIntel); void *ret = sharingFactory.getExtensionFunctionAddress(funcName); if (ret != nullptr) { @@ -5074,3 +5076,54 @@ cl_int CL_API_CALL clSetProgramSpecializationConstant(cl_program program, cl_uin return retVal; } + +cl_int CL_API_CALL clGetExecutionInfoIntel(cl_command_queue commandQueue, + cl_kernel kernel, + cl_uint workDim, + const size_t *globalWorkOffset, + const size_t *localWorkSize, + cl_execution_info_intel paramName, + size_t paramValueSize, + void *paramValue, + size_t *paramValueSizeRet) { + + cl_int retVal = CL_SUCCESS; + API_ENTER(&retVal); + DBG_LOG_INPUTS("commandQueue", commandQueue, "cl_kernel", kernel, + "globalWorkOffset[0]", DebugManager.getInput(globalWorkOffset, 0), + "globalWorkOffset[1]", DebugManager.getInput(globalWorkOffset, 1), + "globalWorkOffset[2]", DebugManager.getInput(globalWorkOffset, 2), + "localWorkSize", DebugManager.getSizes(localWorkSize, workDim, true), + "paramName", paramName, "paramValueSize", paramValueSize, + "paramValue", paramValue, "paramValueSizeRet", paramValueSizeRet); + + retVal = validateObjects(commandQueue, kernel); + + if (CL_SUCCESS != retVal) { + return retVal; + } + + auto pKernel = castToObjectOrAbort(kernel); + if (!pKernel->isPatched()) { + retVal = CL_INVALID_KERNEL; + return retVal; + } + + TakeOwnershipWrapper kernelOwnership(*pKernel, gtpinIsGTPinInitialized()); + switch (paramName) { + case CL_EXECUTION_INFO_MAX_WORKGROUP_COUNT_INTEL: + if ((paramValueSize < sizeof(uint32_t)) || (paramValue == nullptr)) { + retVal = CL_INVALID_VALUE; + return retVal; + } + *reinterpret_cast(paramValue) = pKernel->getMaxWorkGroupCount(workDim, localWorkSize); + if (paramValueSizeRet != nullptr) { + *paramValueSizeRet = sizeof(uint32_t); + } + break; + default: + retVal = CL_INVALID_VALUE; + } + + return retVal; +} diff --git a/runtime/api/api.h b/runtime/api/api.h index ef2141ac6a..0a3893df4c 100644 --- a/runtime/api/api.h +++ b/runtime/api/api.h @@ -1015,6 +1015,17 @@ cl_int CL_API_CALL clGetDeviceGlobalVariablePointerINTEL( size_t *globalVariableSizeRet, void **globalVariablePointerRet); +cl_int CL_API_CALL clGetExecutionInfoIntel( + cl_command_queue commandQueue, + cl_kernel kernel, + cl_uint workDim, + const size_t *globalWorkOffset, + const size_t *localWorkSize, + cl_execution_info_intel paramName, + size_t paramValueSize, + void *paramValue, + size_t *paramValueSizeRet); + // OpenCL 2.2 cl_int CL_API_CALL clSetProgramSpecializationConstant( diff --git a/runtime/gen8/hardware_commands_helper_gen8.cpp b/runtime/gen8/hardware_commands_helper_gen8.cpp index 85d2efe263..5e36fcd7af 100644 --- a/runtime/gen8/hardware_commands_helper_gen8.cpp +++ b/runtime/gen8/hardware_commands_helper_gen8.cpp @@ -17,12 +17,22 @@ namespace NEO { static uint32_t slmSizeId[] = {0, 1, 2, 4, 4, 8, 8, 8, 8, 16, 16, 16, 16, 16, 16, 16}; template <> -uint32_t HardwareCommandsHelper::computeSlmValues(uint32_t valueIn) { - valueIn += (4 * KB - 1); - valueIn = valueIn >> 12; - valueIn = std::min(valueIn, 15u); - valueIn = slmSizeId[valueIn]; - return valueIn; +uint32_t HardwareCommandsHelper::alignSlmSize(uint32_t slmSize) { + if (slmSize == 0u) { + return 0u; + } + slmSize = std::max(slmSize, 4096u); + slmSize = Math::nextPowerOfTwo(slmSize); + return slmSize; +} + +template <> +uint32_t HardwareCommandsHelper::computeSlmValues(uint32_t slmSize) { + slmSize += (4 * KB - 1); + slmSize = slmSize >> 12; + slmSize = std::min(slmSize, 15u); + slmSize = slmSizeId[slmSize]; + return slmSize; } // Explicitly instantiate HardwareCommandsHelper for BDW device family diff --git a/runtime/helpers/hardware_commands_helper.h b/runtime/helpers/hardware_commands_helper.h index 72a07c1175..b6338cff2f 100644 --- a/runtime/helpers/hardware_commands_helper.h +++ b/runtime/helpers/hardware_commands_helper.h @@ -35,7 +35,8 @@ struct HardwareCommandsHelper : public PerThreadDataHelper { using INTERFACE_DESCRIPTOR_DATA = typename GfxFamily::INTERFACE_DESCRIPTOR_DATA; using MI_ATOMIC = typename GfxFamily::MI_ATOMIC; - static uint32_t computeSlmValues(uint32_t valueIn); + static uint32_t alignSlmSize(uint32_t slmSize); + static uint32_t computeSlmValues(uint32_t slmSize); static INTERFACE_DESCRIPTOR_DATA *getInterfaceDescriptor( const IndirectHeap &indirectHeap, diff --git a/runtime/helpers/hardware_commands_helper.inl b/runtime/helpers/hardware_commands_helper.inl index e88c6e04df..bf377962a9 100644 --- a/runtime/helpers/hardware_commands_helper.inl +++ b/runtime/helpers/hardware_commands_helper.inl @@ -31,13 +31,24 @@ bool HardwareCommandsHelper::isPipeControlPriorToPipelineSelectWArequ } template -uint32_t HardwareCommandsHelper::computeSlmValues(uint32_t valueIn) { - auto value = std::max(valueIn, 1024u); +uint32_t HardwareCommandsHelper::alignSlmSize(uint32_t slmSize) { + if (slmSize == 0u) { + return 0u; + } + slmSize = std::max(slmSize, 1024u); + slmSize = Math::nextPowerOfTwo(slmSize); + UNRECOVERABLE_IF(slmSize > 64u * KB); + return slmSize; +} + +template +uint32_t HardwareCommandsHelper::computeSlmValues(uint32_t slmSize) { + auto value = std::max(slmSize, 1024u); value = Math::nextPowerOfTwo(value); value = Math::getMinLsbSet(value); value = value - 9; DEBUG_BREAK_IF(value > 7); - return value * !!valueIn; + return value * !!slmSize; } template diff --git a/runtime/helpers/hw_helper.h b/runtime/helpers/hw_helper.h index 95d714162f..0f1a28293b 100644 --- a/runtime/helpers/hw_helper.h +++ b/runtime/helpers/hw_helper.h @@ -69,6 +69,10 @@ class HwHelper { virtual uint32_t getMocsIndex(GmmHelper &gmmHelper, bool l3enabled, bool l1enabled) const = 0; virtual bool requiresAuxResolves() const = 0; virtual bool tilingAllowed(bool isSharedContext, const cl_image_desc &imgDesc, bool forceLinearStorage) = 0; + virtual uint32_t getBarriersCountFromHasBarriers(uint32_t hasBarriers) = 0; + virtual uint32_t calculateAvailableThreadCount(PRODUCT_FAMILY family, uint32_t grfCount, uint32_t euCount, + uint32_t threadsPerEu) = 0; + virtual uint32_t alignSlmSize(uint32_t slmSize) = 0; static constexpr uint32_t lowPriorityGpgpuEngineIndex = 1; @@ -170,6 +174,12 @@ class HwHelperHw : public HwHelper { bool tilingAllowed(bool isSharedContext, const cl_image_desc &imgDesc, bool forceLinearStorage) override; + uint32_t getBarriersCountFromHasBarriers(uint32_t hasBarriers) override; + + uint32_t calculateAvailableThreadCount(PRODUCT_FAMILY family, uint32_t grfCount, uint32_t euCount, uint32_t threadsPerEu) override; + + uint32_t alignSlmSize(uint32_t slmSize) override; + static AuxTranslationMode getAuxTranslationMode(); protected: diff --git a/runtime/helpers/hw_helper_base.inl b/runtime/helpers/hw_helper_base.inl index 2c645f3c81..b8e52bb602 100644 --- a/runtime/helpers/hw_helper_base.inl +++ b/runtime/helpers/hw_helper_base.inl @@ -242,4 +242,15 @@ bool HwHelperHw::tilingAllowed(bool isSharedContext, const cl_image_d return !(imageType == CL_MEM_OBJECT_IMAGE1D || imageType == CL_MEM_OBJECT_IMAGE1D_ARRAY || imageType == CL_MEM_OBJECT_IMAGE1D_BUFFER || buffer); } + +template +uint32_t HwHelperHw::alignSlmSize(uint32_t slmSize) { + return HardwareCommandsHelper::alignSlmSize(slmSize); +} + +template +uint32_t HwHelperHw::getBarriersCountFromHasBarriers(uint32_t hasBarriers) { + return hasBarriers; +} + } // namespace NEO diff --git a/runtime/helpers/hw_helper_bdw_plus.inl b/runtime/helpers/hw_helper_bdw_plus.inl index 983b950009..0c997e32a8 100644 --- a/runtime/helpers/hw_helper_bdw_plus.inl +++ b/runtime/helpers/hw_helper_bdw_plus.inl @@ -65,4 +65,10 @@ uint32_t HwHelperHw::getMocsIndex(GmmHelper &gmmHelper, bool l3enable return gmmHelper.getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER_CACHELINE_MISALIGNED) >> 1; } +template +uint32_t HwHelperHw::calculateAvailableThreadCount(PRODUCT_FAMILY family, uint32_t grfCount, uint32_t euCount, + uint32_t threadsPerEu) { + return threadsPerEu * euCount; +} + } // namespace NEO diff --git a/runtime/kernel/kernel.cpp b/runtime/kernel/kernel.cpp index 5320693609..44ac390c77 100644 --- a/runtime/kernel/kernel.cpp +++ b/runtime/kernel/kernel.cpp @@ -10,6 +10,7 @@ #include "core/helpers/aligned_memory.h" #include "core/helpers/basic_math.h" #include "core/helpers/debug_helpers.h" +#include "core/helpers/kernel_helpers.h" #include "core/helpers/ptr_math.h" #include "core/memory_manager/unified_memory_manager.h" #include "runtime/accelerators/intel_accelerator.h" @@ -974,6 +975,31 @@ void Kernel::clearUnifiedMemoryExecInfo() { kernelUnifiedMemoryGfxAllocations.clear(); } +uint32_t Kernel::getMaxWorkGroupCount(const cl_uint workDim, const size_t *localWorkSize) const { + auto &hardwareInfo = getDevice().getHardwareInfo(); + auto executionEnvironment = kernelInfo.patchInfo.executionEnvironment; + auto dssCount = hardwareInfo.gtSystemInfo.DualSubSliceCount; + if (dssCount == 0) { + dssCount = hardwareInfo.gtSystemInfo.SubSliceCount; + } + auto &hwHelper = HwHelper::get(hardwareInfo.platform.eRenderCoreFamily); + auto availableThreadCount = hwHelper.calculateAvailableThreadCount( + hardwareInfo.platform.eProductFamily, + ((executionEnvironment != nullptr) ? executionEnvironment->NumGRFRequired : GrfConfig::DefaultGrfNumber), + hardwareInfo.gtSystemInfo.EUCount, hardwareInfo.gtSystemInfo.ThreadCount / hardwareInfo.gtSystemInfo.EUCount); + + auto hasBarriers = ((executionEnvironment != nullptr) ? executionEnvironment->HasBarriers : 0u); + return KernelHelper::getMaxWorkGroupCount(kernelInfo.getMaxSimdSize(), + availableThreadCount, + dssCount, + dssCount * KB * hardwareInfo.capabilityTable.slmSize, + hwHelper.alignSlmSize(slmTotalSize), + static_cast(hwHelper.getMaxBarrierRegisterPerSlice()), + hwHelper.getBarriersCountFromHasBarriers(hasBarriers), + workDim, + localWorkSize); +} + inline void Kernel::makeArgsResident(CommandStreamReceiver &commandStreamReceiver) { auto numArgs = kernelInfo.kernelArgInfo.size(); for (decltype(numArgs) argIndex = 0; argIndex < numArgs; argIndex++) { diff --git a/runtime/kernel/kernel.h b/runtime/kernel/kernel.h index f413305f5b..b9ed56e989 100644 --- a/runtime/kernel/kernel.h +++ b/runtime/kernel/kernel.h @@ -398,6 +398,8 @@ class Kernel : public BaseObject<_cl_kernel> { bool areStatelessWritesUsed() { return containsStatelessWrites; } + uint32_t getMaxWorkGroupCount(const cl_uint workDim, const size_t *localWorkSize) const; + protected: struct ObjectCounts { uint32_t imageCount; diff --git a/unit_tests/api/CMakeLists.txt b/unit_tests/api/CMakeLists.txt index 25add1868c..84be664b38 100644 --- a/unit_tests/api/CMakeLists.txt +++ b/unit_tests/api/CMakeLists.txt @@ -74,6 +74,7 @@ set(IGDRCL_SRCS_tests_api ${CMAKE_CURRENT_SOURCE_DIR}/cl_get_device_ids_tests.inl ${CMAKE_CURRENT_SOURCE_DIR}/cl_get_device_info_tests.inl ${CMAKE_CURRENT_SOURCE_DIR}/cl_get_event_profiling_info_tests.inl + ${CMAKE_CURRENT_SOURCE_DIR}/cl_get_execution_info_intel_tests.inl ${CMAKE_CURRENT_SOURCE_DIR}/cl_get_extension_function_address_for_platform_tests.inl ${CMAKE_CURRENT_SOURCE_DIR}/cl_get_extension_function_address_tests.inl ${CMAKE_CURRENT_SOURCE_DIR}/cl_get_image_info_tests.inl @@ -93,8 +94,8 @@ set(IGDRCL_SRCS_tests_api ${CMAKE_CURRENT_SOURCE_DIR}/cl_get_supported_image_formats_tests.inl ${CMAKE_CURRENT_SOURCE_DIR}/cl_icd_get_platform_ids_khr_tests.inl ${CMAKE_CURRENT_SOURCE_DIR}/cl_intel_accelerator_tests.inl - ${CMAKE_CURRENT_SOURCE_DIR}/cl_intel_tracing_tests.inl ${CMAKE_CURRENT_SOURCE_DIR}/cl_intel_motion_estimation.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/cl_intel_tracing_tests.inl ${CMAKE_CURRENT_SOURCE_DIR}/cl_link_program_tests.inl ${CMAKE_CURRENT_SOURCE_DIR}/cl_mem_locally_uncached_resource_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/cl_release_command_queue_tests.inl diff --git a/unit_tests/api/api_tests_wrapper3.cpp b/unit_tests/api/api_tests_wrapper3.cpp index 84d2c8593e..4245f74574 100644 --- a/unit_tests/api/api_tests_wrapper3.cpp +++ b/unit_tests/api/api_tests_wrapper3.cpp @@ -5,6 +5,7 @@ * */ +#include "unit_tests/api/cl_get_execution_info_intel_tests.inl" #include "unit_tests/api/cl_get_kernel_work_group_info_tests.inl" #include "unit_tests/api/cl_get_mem_object_info_tests.inl" #include "unit_tests/api/cl_get_pipe_info_tests.inl" diff --git a/unit_tests/api/cl_get_execution_info_intel_tests.inl b/unit_tests/api/cl_get_execution_info_intel_tests.inl new file mode 100644 index 0000000000..96dd46ea0b --- /dev/null +++ b/unit_tests/api/cl_get_execution_info_intel_tests.inl @@ -0,0 +1,69 @@ +/* + * Copyright (C) 2017-2019 Intel Corporation + * + * SPDX-License-Identifier: MIT + * + */ + +#include "runtime/command_queue/command_queue.h" +#include "unit_tests/mocks/mock_kernel.h" + +#include "cl_api_tests.h" + +using namespace NEO; + +using clGetExecutionInfoTests = api_tests; + +namespace ULT { + +TEST_F(clGetExecutionInfoTests, GivenInvalidInputWhenCallingGetExecutionInfoThenErrorIsReturned) { + retVal = clGetExecutionInfoIntel(nullptr, pKernel, 0, nullptr, nullptr, 0, 0, nullptr, nullptr); + EXPECT_NE(CL_SUCCESS, retVal); + + retVal = clGetExecutionInfoIntel(pCommandQueue, nullptr, 0, nullptr, nullptr, 0, 0, nullptr, nullptr); + EXPECT_NE(CL_SUCCESS, retVal); + + pKernel->isPatchedOverride = false; + retVal = clGetExecutionInfoIntel(pCommandQueue, pKernel, 0, nullptr, nullptr, 0, 0, nullptr, nullptr); + EXPECT_NE(CL_SUCCESS, retVal); + pKernel->isPatchedOverride = true; + + auto invalidParamName = 0xFFFF; + retVal = clGetExecutionInfoIntel(pCommandQueue, pKernel, 0, nullptr, nullptr, invalidParamName, 0, nullptr, nullptr); + EXPECT_NE(CL_SUCCESS, retVal); + + uint32_t queryResult; + retVal = clGetExecutionInfoIntel(pCommandQueue, pKernel, 0, nullptr, nullptr, CL_EXECUTION_INFO_MAX_WORKGROUP_COUNT_INTEL, + sizeof(queryResult), nullptr, nullptr); + EXPECT_NE(CL_SUCCESS, retVal); + + retVal = clGetExecutionInfoIntel(pCommandQueue, pKernel, 0, nullptr, nullptr, CL_EXECUTION_INFO_MAX_WORKGROUP_COUNT_INTEL, + 0, &queryResult, nullptr); + EXPECT_NE(CL_SUCCESS, retVal); +} + +TEST_F(clGetExecutionInfoTests, GivenVariousInputWhenGettingMaxWorkGroupCountThenCorrectValuesAreReturned) { + uint32_t queryResult; + retVal = clGetExecutionInfoIntel(pCommandQueue, pKernel, 0, nullptr, nullptr, CL_EXECUTION_INFO_MAX_WORKGROUP_COUNT_INTEL, + sizeof(queryResult), &queryResult, nullptr); + EXPECT_EQ(CL_SUCCESS, retVal); + EXPECT_NE(0u, queryResult); + + uint64_t queryResult64 = 0; + size_t queryResultSize; + retVal = clGetExecutionInfoIntel(pCommandQueue, pKernel, 0, nullptr, nullptr, CL_EXECUTION_INFO_MAX_WORKGROUP_COUNT_INTEL, + sizeof(queryResult64), &queryResult64, &queryResultSize); + EXPECT_EQ(CL_SUCCESS, retVal); + EXPECT_EQ(queryResult, queryResult64); + EXPECT_EQ(sizeof(queryResult), queryResultSize); + + std::unique_ptr pKernelWithExecutionEnvironmentPatch(MockKernel::create(pCommandQueue->getDevice(), pProgram)); + uint32_t queryResultWithExecutionEnvironment; + retVal = clGetExecutionInfoIntel(pCommandQueue, pKernelWithExecutionEnvironmentPatch.get(), 0, nullptr, nullptr, + CL_EXECUTION_INFO_MAX_WORKGROUP_COUNT_INTEL, + sizeof(queryResultWithExecutionEnvironment), &queryResultWithExecutionEnvironment, nullptr); + EXPECT_EQ(CL_SUCCESS, retVal); + EXPECT_EQ(queryResult, queryResultWithExecutionEnvironment); +} + +} // namespace ULT diff --git a/unit_tests/helpers/CMakeLists.txt b/unit_tests/helpers/CMakeLists.txt index 5da39acec6..1310ac9a07 100644 --- a/unit_tests/helpers/CMakeLists.txt +++ b/unit_tests/helpers/CMakeLists.txt @@ -18,8 +18,8 @@ set(IGDRCL_SRCS_tests_helpers ${CMAKE_CURRENT_SOURCE_DIR}/deferred_deleter_helpers_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/dirty_state_helpers_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/dispatch_flags_helper.h - ${CMAKE_CURRENT_SOURCE_DIR}/dispatch_info_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/dispatch_info_builder_tests.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/dispatch_info_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/extendable_enum_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/flush_stamp_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/get_gpgpu_engines_tests.inl @@ -34,14 +34,14 @@ set(IGDRCL_SRCS_tests_helpers ${CMAKE_CURRENT_SOURCE_DIR}/hw_parse.inl ${CMAKE_CURRENT_SOURCE_DIR}/kernel_filename_helper.h ${CMAKE_CURRENT_SOURCE_DIR}/kmd_notify_tests.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/memory_properties_flags_helpers_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/mem_properties_parser_helper_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/memory_management_tests.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/memory_properties_flags_helpers_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/mipmap_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/per_thread_data_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/ptr_math_tests.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/raii_hw_helper.h ${CMAKE_CURRENT_SOURCE_DIR}/queue_helpers_tests.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/raii_hw_helper.h ${CMAKE_CURRENT_SOURCE_DIR}/sampler_helpers_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/task_information_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/test_debug_variables.inl diff --git a/unit_tests/helpers/hardware_commands_helper_tests.cpp b/unit_tests/helpers/hardware_commands_helper_tests.cpp index d51dcab44b..ca6d5e08a7 100644 --- a/unit_tests/helpers/hardware_commands_helper_tests.cpp +++ b/unit_tests/helpers/hardware_commands_helper_tests.cpp @@ -927,7 +927,47 @@ HWTEST_F(HardwareCommandsTest, setBindingTableStatesForNoSurfaces) { delete pKernel; } -HWTEST_F(HardwareCommandsTest, slmValueScenarios) { +HWTEST_F(HardwareCommandsTest, GivenVariousValuesWhenAlignSlmSizeIsCalledThenCorrectValueIsReturned) { + if (::renderCoreFamily == IGFX_GEN8_CORE) { + EXPECT_EQ(0u, HardwareCommandsHelper::alignSlmSize(0)); + EXPECT_EQ(4096u, HardwareCommandsHelper::alignSlmSize(1)); + EXPECT_EQ(4096u, HardwareCommandsHelper::alignSlmSize(1024)); + EXPECT_EQ(4096u, HardwareCommandsHelper::alignSlmSize(1025)); + EXPECT_EQ(4096u, HardwareCommandsHelper::alignSlmSize(2048)); + EXPECT_EQ(4096u, HardwareCommandsHelper::alignSlmSize(2049)); + EXPECT_EQ(4096u, HardwareCommandsHelper::alignSlmSize(4096)); + EXPECT_EQ(8192u, HardwareCommandsHelper::alignSlmSize(4097)); + EXPECT_EQ(8192u, HardwareCommandsHelper::alignSlmSize(8192)); + EXPECT_EQ(16384u, HardwareCommandsHelper::alignSlmSize(8193)); + EXPECT_EQ(16384u, HardwareCommandsHelper::alignSlmSize(12288)); + EXPECT_EQ(16384u, HardwareCommandsHelper::alignSlmSize(16384)); + EXPECT_EQ(32768u, HardwareCommandsHelper::alignSlmSize(16385)); + EXPECT_EQ(32768u, HardwareCommandsHelper::alignSlmSize(24576)); + EXPECT_EQ(32768u, HardwareCommandsHelper::alignSlmSize(32768)); + EXPECT_EQ(65536u, HardwareCommandsHelper::alignSlmSize(32769)); + EXPECT_EQ(65536u, HardwareCommandsHelper::alignSlmSize(49152)); + EXPECT_EQ(65536u, HardwareCommandsHelper::alignSlmSize(65535)); + EXPECT_EQ(65536u, HardwareCommandsHelper::alignSlmSize(65536)); + } else { + EXPECT_EQ(0u, HardwareCommandsHelper::alignSlmSize(0)); + EXPECT_EQ(1024u, HardwareCommandsHelper::alignSlmSize(1)); + EXPECT_EQ(1024u, HardwareCommandsHelper::alignSlmSize(1024)); + EXPECT_EQ(2048u, HardwareCommandsHelper::alignSlmSize(1025)); + EXPECT_EQ(2048u, HardwareCommandsHelper::alignSlmSize(2048)); + EXPECT_EQ(4096u, HardwareCommandsHelper::alignSlmSize(2049)); + EXPECT_EQ(4096u, HardwareCommandsHelper::alignSlmSize(4096)); + EXPECT_EQ(8192u, HardwareCommandsHelper::alignSlmSize(4097)); + EXPECT_EQ(8192u, HardwareCommandsHelper::alignSlmSize(8192)); + EXPECT_EQ(16384u, HardwareCommandsHelper::alignSlmSize(8193)); + EXPECT_EQ(16384u, HardwareCommandsHelper::alignSlmSize(16384)); + EXPECT_EQ(32768u, HardwareCommandsHelper::alignSlmSize(16385)); + EXPECT_EQ(32768u, HardwareCommandsHelper::alignSlmSize(32768)); + EXPECT_EQ(65536u, HardwareCommandsHelper::alignSlmSize(32769)); + EXPECT_EQ(65536u, HardwareCommandsHelper::alignSlmSize(65536)); + } +} + +HWTEST_F(HardwareCommandsTest, GivenVariousValuesWhenComputeSlmSizeIsCalledThenCorrectValueIsReturned) { if (::renderCoreFamily == IGFX_GEN8_CORE) { EXPECT_EQ(0u, HardwareCommandsHelper::computeSlmValues(0)); EXPECT_EQ(1u, HardwareCommandsHelper::computeSlmValues(1)); diff --git a/unit_tests/helpers/hw_helper_tests.cpp b/unit_tests/helpers/hw_helper_tests.cpp index b26b21f341..25ce245ff8 100644 --- a/unit_tests/helpers/hw_helper_tests.cpp +++ b/unit_tests/helpers/hw_helper_tests.cpp @@ -717,3 +717,19 @@ HWTEST_F(HwHelperTest, givenHwHelperWhenAskingForTilingSupportThenReturnValidVal EXPECT_FALSE(helper.tilingAllowed(false, imgDesc, false)); } } + +HWCMDTEST_F(IGFX_GEN8_CORE, HwHelperTest, GivenVariousValuesWhenCallingGetBarriersCountFromHasBarrierThenCorrectValueIsReturned) { + auto &hwHelper = HwHelper::get(hardwareInfo.platform.eRenderCoreFamily); + EXPECT_EQ(0u, hwHelper.getBarriersCountFromHasBarriers(0u)); + EXPECT_EQ(1u, hwHelper.getBarriersCountFromHasBarriers(1u)); +} + +HWCMDTEST_F(IGFX_GEN8_CORE, HwHelperTest, GivenVariousValuesWhenCallingCalculateAvailableThreadCountThenCorrectValueIsReturned) { + auto &hwHelper = HwHelper::get(hardwareInfo.platform.eRenderCoreFamily); + auto result = hwHelper.calculateAvailableThreadCount( + hardwareInfo.platform.eProductFamily, + 0, + hardwareInfo.gtSystemInfo.EUCount, + hardwareInfo.gtSystemInfo.ThreadCount / hardwareInfo.gtSystemInfo.EUCount); + EXPECT_EQ(hardwareInfo.gtSystemInfo.ThreadCount, result); +} diff --git a/unit_tests/mocks/mock_kernel.cpp b/unit_tests/mocks/mock_kernel.cpp index acbb2aee75..a85bc4a73e 100644 --- a/unit_tests/mocks/mock_kernel.cpp +++ b/unit_tests/mocks/mock_kernel.cpp @@ -37,7 +37,7 @@ void Kernel::ReflectionSurfaceHelper::patchBlocksCurbe(void *reflectionSur template void Kernel::patchReflectionSurface(DeviceQueue *, PrintfHandler *); bool MockKernel::isPatched() const { - return true; + return isPatchedOverride; } bool MockKernel::canTransformImages() const { diff --git a/unit_tests/mocks/mock_kernel.h b/unit_tests/mocks/mock_kernel.h index 7947a0999d..5a0eca41ba 100644 --- a/unit_tests/mocks/mock_kernel.h +++ b/unit_tests/mocks/mock_kernel.h @@ -246,6 +246,7 @@ class MockKernel : public Kernel { mutable uint32_t releaseOwnershipCalls = 0; bool canKernelTransformImages = true; + bool isPatchedOverride = true; protected: KernelInfo *kernelInfoAllocated = nullptr;