mirror of
https://github.com/intel/compute-runtime.git
synced 2025-09-15 13:01:45 +08:00
Add clGetKernelMaxConcurrentWorkGroupCountINTEL
clGetKernelMaxConcurrentWorkGroupCountINTEL replaces clGetExecutionInfoINTEL function. Change-Id: I7e3461695de7ee4c0e43c3e9770724b025c0e2be Signed-off-by: Filip Hazubski <filip.hazubski@intel.com>
This commit is contained in:

committed by
sys_ocldev

parent
2c568542f1
commit
9c16c1a425
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) 2019 Intel Corporation
|
||||
* Copyright (C) 2019-2020 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
@ -8,6 +8,7 @@
|
||||
#include "core/helpers/kernel_helpers.h"
|
||||
|
||||
#include "core/helpers/basic_math.h"
|
||||
#include "core/helpers/debug_helpers.h"
|
||||
|
||||
#include <algorithm>
|
||||
|
||||
@ -16,8 +17,11 @@ namespace NEO {
|
||||
uint32_t KernelHelper::getMaxWorkGroupCount(uint32_t simd, uint32_t availableThreadCount, uint32_t dssCount, uint32_t availableSlmSize,
|
||||
uint32_t usedSlmSize, uint32_t maxBarrierCount, uint32_t numberOfBarriers, uint32_t workDim,
|
||||
const size_t *localWorkSize) {
|
||||
size_t workGroupSize = 1;
|
||||
for (uint32_t i = 0; i < workDim; i++) {
|
||||
UNRECOVERABLE_IF((workDim == 0) || (workDim > 3));
|
||||
UNRECOVERABLE_IF(localWorkSize == nullptr);
|
||||
|
||||
size_t workGroupSize = localWorkSize[0];
|
||||
for (uint32_t i = 1; i < workDim; i++) {
|
||||
workGroupSize *= localWorkSize[i];
|
||||
}
|
||||
|
||||
|
@ -78,9 +78,6 @@ using cl_unified_shared_memory_capabilities_intel = cl_bitfield;
|
||||
//Used with createBuffer
|
||||
#define CL_MEM_ALLOW_UNRESTRICTED_SIZE_INTEL (1 << 23)
|
||||
|
||||
typedef cl_uint cl_execution_info_intel;
|
||||
#define CL_EXECUTION_INFO_MAX_WORKGROUP_COUNT_INTEL 0x10100
|
||||
|
||||
/******************************
|
||||
* UNIFIED MEMORY *
|
||||
*******************************/
|
||||
|
@ -3977,8 +3977,8 @@ void *CL_API_CALL clGetExtensionFunctionAddress(const char *funcName) {
|
||||
RETURN_FUNC_PTR_IF_EXIST(clEnqueueMemAdviseINTEL);
|
||||
RETURN_FUNC_PTR_IF_EXIST(clGetDeviceFunctionPointerINTEL);
|
||||
RETURN_FUNC_PTR_IF_EXIST(clGetDeviceGlobalVariablePointerINTEL);
|
||||
RETURN_FUNC_PTR_IF_EXIST(clGetKernelMaxConcurrentWorkGroupCountINTEL);
|
||||
RETURN_FUNC_PTR_IF_EXIST(clGetKernelSuggestedLocalWorkSizeINTEL);
|
||||
RETURN_FUNC_PTR_IF_EXIST(clGetExecutionInfoINTEL);
|
||||
RETURN_FUNC_PTR_IF_EXIST(clEnqueueNDCountKernelINTEL);
|
||||
|
||||
void *ret = sharingFactory.getExtensionFunctionAddress(funcName);
|
||||
@ -5252,15 +5252,12 @@ cl_int CL_API_CALL clGetKernelSuggestedLocalWorkSizeINTEL(cl_command_queue comma
|
||||
return retVal;
|
||||
}
|
||||
|
||||
cl_int CL_API_CALL clGetExecutionInfoINTEL(cl_command_queue commandQueue,
|
||||
cl_kernel kernel,
|
||||
cl_uint workDim,
|
||||
const size_t *globalWorkOffset,
|
||||
const size_t *localWorkSize,
|
||||
cl_execution_info_intel paramName,
|
||||
size_t paramValueSize,
|
||||
void *paramValue,
|
||||
size_t *paramValueSizeRet) {
|
||||
cl_int CL_API_CALL clGetKernelMaxConcurrentWorkGroupCountINTEL(cl_command_queue commandQueue,
|
||||
cl_kernel kernel,
|
||||
cl_uint workDim,
|
||||
const size_t *globalWorkOffset,
|
||||
const size_t *localWorkSize,
|
||||
size_t *suggestedWorkGroupCount) {
|
||||
|
||||
cl_int retVal = CL_SUCCESS;
|
||||
API_ENTER(&retVal);
|
||||
@ -5269,8 +5266,7 @@ cl_int CL_API_CALL clGetExecutionInfoINTEL(cl_command_queue commandQueue,
|
||||
"globalWorkOffset[1]", NEO::FileLoggerInstance().getInput(globalWorkOffset, 1),
|
||||
"globalWorkOffset[2]", NEO::FileLoggerInstance().getInput(globalWorkOffset, 2),
|
||||
"localWorkSize", NEO::FileLoggerInstance().getSizes(localWorkSize, workDim, true),
|
||||
"paramName", paramName, "paramValueSize", paramValueSize,
|
||||
"paramValue", paramValue, "paramValueSizeRet", paramValueSizeRet);
|
||||
"suggestedWorkGroupCount", suggestedWorkGroupCount);
|
||||
|
||||
retVal = validateObjects(commandQueue, kernel);
|
||||
|
||||
@ -5278,28 +5274,34 @@ cl_int CL_API_CALL clGetExecutionInfoINTEL(cl_command_queue commandQueue,
|
||||
return retVal;
|
||||
}
|
||||
|
||||
if ((workDim == 0) || (workDim > 3)) {
|
||||
retVal = CL_INVALID_WORK_DIMENSION;
|
||||
return retVal;
|
||||
}
|
||||
|
||||
if (globalWorkOffset == nullptr) {
|
||||
retVal = CL_INVALID_GLOBAL_OFFSET;
|
||||
return retVal;
|
||||
}
|
||||
|
||||
if (localWorkSize == nullptr) {
|
||||
retVal = CL_INVALID_WORK_GROUP_SIZE;
|
||||
return retVal;
|
||||
}
|
||||
|
||||
auto pKernel = castToObjectOrAbort<Kernel>(kernel);
|
||||
if (!pKernel->isPatched()) {
|
||||
retVal = CL_INVALID_KERNEL;
|
||||
return retVal;
|
||||
}
|
||||
|
||||
TakeOwnershipWrapper<Kernel> kernelOwnership(*pKernel, gtpinIsGTPinInitialized());
|
||||
switch (paramName) {
|
||||
case CL_EXECUTION_INFO_MAX_WORKGROUP_COUNT_INTEL:
|
||||
if ((paramValueSize < sizeof(uint32_t)) || (paramValue == nullptr)) {
|
||||
retVal = CL_INVALID_VALUE;
|
||||
return retVal;
|
||||
}
|
||||
*reinterpret_cast<uint32_t *>(paramValue) = pKernel->getMaxWorkGroupCount(workDim, localWorkSize);
|
||||
if (paramValueSizeRet != nullptr) {
|
||||
*paramValueSizeRet = sizeof(uint32_t);
|
||||
}
|
||||
break;
|
||||
default:
|
||||
if (suggestedWorkGroupCount == nullptr) {
|
||||
retVal = CL_INVALID_VALUE;
|
||||
return retVal;
|
||||
}
|
||||
|
||||
*suggestedWorkGroupCount = pKernel->getMaxWorkGroupCount(workDim, localWorkSize);
|
||||
|
||||
return retVal;
|
||||
}
|
||||
|
||||
|
@ -1034,16 +1034,13 @@ cl_int CL_API_CALL clGetKernelSuggestedLocalWorkSizeINTEL(
|
||||
const size_t *globalWorkSize,
|
||||
size_t *suggestedLocalWorkSize);
|
||||
|
||||
cl_int CL_API_CALL clGetExecutionInfoINTEL(
|
||||
cl_int CL_API_CALL clGetKernelMaxConcurrentWorkGroupCountINTEL(
|
||||
cl_command_queue commandQueue,
|
||||
cl_kernel kernel,
|
||||
cl_uint workDim,
|
||||
const size_t *globalWorkOffset,
|
||||
const size_t *localWorkSize,
|
||||
cl_execution_info_intel paramName,
|
||||
size_t paramValueSize,
|
||||
void *paramValue,
|
||||
size_t *paramValueSizeRet);
|
||||
size_t *suggestedWorkGroupCount);
|
||||
|
||||
cl_int CL_API_CALL clEnqueueNDCountKernelINTEL(
|
||||
cl_command_queue commandQueue,
|
||||
|
@ -74,13 +74,13 @@ set(IGDRCL_SRCS_tests_api
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/cl_get_device_ids_tests.inl
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/cl_get_device_info_tests.inl
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/cl_get_event_profiling_info_tests.inl
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/cl_get_execution_info_intel_tests.inl
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/cl_get_extension_function_address_for_platform_tests.inl
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/cl_get_extension_function_address_tests.inl
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/cl_get_image_info_tests.inl
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/cl_get_image_params_tests.inl
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/cl_get_kernel_arg_info_tests.inl
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/cl_get_kernel_info_tests.inl
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/cl_get_kernel_max_concurrent_work_group_count_intel_tests.inl
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/cl_get_kernel_sub_group_info_khr_tests.inl
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/cl_get_kernel_sub_group_info_tests.inl
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/cl_get_kernel_suggested_local_work_size_intel_tests.inl
|
||||
|
@ -5,7 +5,7 @@
|
||||
*
|
||||
*/
|
||||
|
||||
#include "unit_tests/api/cl_get_execution_info_intel_tests.inl"
|
||||
#include "unit_tests/api/cl_get_kernel_max_concurrent_work_group_count_intel_tests.inl"
|
||||
#include "unit_tests/api/cl_get_kernel_suggested_local_work_size_intel_tests.inl"
|
||||
#include "unit_tests/api/cl_get_kernel_work_group_info_tests.inl"
|
||||
#include "unit_tests/api/cl_get_mem_object_info_tests.inl"
|
||||
|
@ -1,69 +0,0 @@
|
||||
/*
|
||||
* Copyright (C) 2017-2019 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#include "runtime/command_queue/command_queue.h"
|
||||
#include "unit_tests/mocks/mock_kernel.h"
|
||||
|
||||
#include "cl_api_tests.h"
|
||||
|
||||
using namespace NEO;
|
||||
|
||||
using clGetExecutionInfoTests = api_tests;
|
||||
|
||||
namespace ULT {
|
||||
|
||||
TEST_F(clGetExecutionInfoTests, GivenInvalidInputWhenCallingGetExecutionInfoThenErrorIsReturned) {
|
||||
retVal = clGetExecutionInfoINTEL(nullptr, pKernel, 0, nullptr, nullptr, 0, 0, nullptr, nullptr);
|
||||
EXPECT_NE(CL_SUCCESS, retVal);
|
||||
|
||||
retVal = clGetExecutionInfoINTEL(pCommandQueue, nullptr, 0, nullptr, nullptr, 0, 0, nullptr, nullptr);
|
||||
EXPECT_NE(CL_SUCCESS, retVal);
|
||||
|
||||
pKernel->isPatchedOverride = false;
|
||||
retVal = clGetExecutionInfoINTEL(pCommandQueue, pKernel, 0, nullptr, nullptr, 0, 0, nullptr, nullptr);
|
||||
EXPECT_NE(CL_SUCCESS, retVal);
|
||||
pKernel->isPatchedOverride = true;
|
||||
|
||||
auto invalidParamName = 0xFFFF;
|
||||
retVal = clGetExecutionInfoINTEL(pCommandQueue, pKernel, 0, nullptr, nullptr, invalidParamName, 0, nullptr, nullptr);
|
||||
EXPECT_NE(CL_SUCCESS, retVal);
|
||||
|
||||
uint32_t queryResult;
|
||||
retVal = clGetExecutionInfoINTEL(pCommandQueue, pKernel, 0, nullptr, nullptr, CL_EXECUTION_INFO_MAX_WORKGROUP_COUNT_INTEL,
|
||||
sizeof(queryResult), nullptr, nullptr);
|
||||
EXPECT_NE(CL_SUCCESS, retVal);
|
||||
|
||||
retVal = clGetExecutionInfoINTEL(pCommandQueue, pKernel, 0, nullptr, nullptr, CL_EXECUTION_INFO_MAX_WORKGROUP_COUNT_INTEL,
|
||||
0, &queryResult, nullptr);
|
||||
EXPECT_NE(CL_SUCCESS, retVal);
|
||||
}
|
||||
|
||||
TEST_F(clGetExecutionInfoTests, GivenVariousInputWhenGettingMaxWorkGroupCountThenCorrectValuesAreReturned) {
|
||||
uint32_t queryResult;
|
||||
retVal = clGetExecutionInfoINTEL(pCommandQueue, pKernel, 0, nullptr, nullptr, CL_EXECUTION_INFO_MAX_WORKGROUP_COUNT_INTEL,
|
||||
sizeof(queryResult), &queryResult, nullptr);
|
||||
EXPECT_EQ(CL_SUCCESS, retVal);
|
||||
EXPECT_NE(0u, queryResult);
|
||||
|
||||
uint64_t queryResult64 = 0;
|
||||
size_t queryResultSize;
|
||||
retVal = clGetExecutionInfoINTEL(pCommandQueue, pKernel, 0, nullptr, nullptr, CL_EXECUTION_INFO_MAX_WORKGROUP_COUNT_INTEL,
|
||||
sizeof(queryResult64), &queryResult64, &queryResultSize);
|
||||
EXPECT_EQ(CL_SUCCESS, retVal);
|
||||
EXPECT_EQ(queryResult, queryResult64);
|
||||
EXPECT_EQ(sizeof(queryResult), queryResultSize);
|
||||
|
||||
std::unique_ptr<MockKernel> pKernelWithExecutionEnvironmentPatch(MockKernel::create(pCommandQueue->getDevice(), pProgram));
|
||||
uint32_t queryResultWithExecutionEnvironment;
|
||||
retVal = clGetExecutionInfoINTEL(pCommandQueue, pKernelWithExecutionEnvironmentPatch.get(), 0, nullptr, nullptr,
|
||||
CL_EXECUTION_INFO_MAX_WORKGROUP_COUNT_INTEL,
|
||||
sizeof(queryResultWithExecutionEnvironment), &queryResultWithExecutionEnvironment, nullptr);
|
||||
EXPECT_EQ(CL_SUCCESS, retVal);
|
||||
EXPECT_EQ(queryResult, queryResultWithExecutionEnvironment);
|
||||
}
|
||||
|
||||
} // namespace ULT
|
@ -83,9 +83,9 @@ TEST_F(clGetExtensionFunctionAddressForPlatformTests, GivenClGetKernelSuggestedL
|
||||
EXPECT_EQ(retVal, reinterpret_cast<void *>(clGetKernelSuggestedLocalWorkSizeINTEL));
|
||||
}
|
||||
|
||||
TEST_F(clGetExtensionFunctionAddressForPlatformTests, GivenClGetExecutionInfoINTELWhenGettingExtensionFunctionThenCorrectAddressIsReturned) {
|
||||
auto retVal = clGetExtensionFunctionAddressForPlatform(pPlatform, "clGetExecutionInfoINTEL");
|
||||
EXPECT_EQ(retVal, reinterpret_cast<void *>(clGetExecutionInfoINTEL));
|
||||
TEST_F(clGetExtensionFunctionAddressForPlatformTests, GivenClGetKernelMaxConcurrentWorkGroupCountINTELWhenGettingExtensionFunctionThenCorrectAddressIsReturned) {
|
||||
auto retVal = clGetExtensionFunctionAddressForPlatform(pPlatform, "clGetKernelMaxConcurrentWorkGroupCountINTEL");
|
||||
EXPECT_EQ(retVal, reinterpret_cast<void *>(clGetKernelMaxConcurrentWorkGroupCountINTEL));
|
||||
}
|
||||
|
||||
TEST_F(clGetExtensionFunctionAddressForPlatformTests, GivenClEnqueueNDCountKernelINTELWhenGettingExtensionFunctionThenCorrectAddressIsReturned) {
|
||||
|
@ -168,9 +168,9 @@ TEST_F(clGetExtensionFunctionAddressTests, GivenClGetKernelSuggestedLocalWorkSiz
|
||||
EXPECT_EQ(retVal, reinterpret_cast<void *>(clGetKernelSuggestedLocalWorkSizeINTEL));
|
||||
}
|
||||
|
||||
TEST_F(clGetExtensionFunctionAddressTests, GivenClGetExecutionInfoINTELWhenGettingExtensionFunctionThenCorrectAddressIsReturned) {
|
||||
auto retVal = clGetExtensionFunctionAddress("clGetExecutionInfoINTEL");
|
||||
EXPECT_EQ(retVal, reinterpret_cast<void *>(clGetExecutionInfoINTEL));
|
||||
TEST_F(clGetExtensionFunctionAddressTests, GivenClGetKernelMaxConcurrentWorkGroupCountINTELWhenGettingExtensionFunctionThenCorrectAddressIsReturned) {
|
||||
auto retVal = clGetExtensionFunctionAddress("clGetKernelMaxConcurrentWorkGroupCountINTEL");
|
||||
EXPECT_EQ(retVal, reinterpret_cast<void *>(clGetKernelMaxConcurrentWorkGroupCountINTEL));
|
||||
}
|
||||
|
||||
TEST_F(clGetExtensionFunctionAddressTests, GivenClEnqueueNDCountKernelINTELWhenGettingExtensionFunctionThenCorrectAddressIsReturned) {
|
||||
|
@ -0,0 +1,79 @@
|
||||
/*
|
||||
* Copyright (C) 2020 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#include "runtime/command_queue/command_queue.h"
|
||||
#include "unit_tests/mocks/mock_kernel.h"
|
||||
|
||||
#include "cl_api_tests.h"
|
||||
|
||||
using namespace NEO;
|
||||
|
||||
using clGetKernelMaxConcurrentWorkGroupCountTests = api_tests;
|
||||
|
||||
namespace ULT {
|
||||
|
||||
TEST_F(clGetKernelMaxConcurrentWorkGroupCountTests, GivenInvalidInputWhenCallingGetKernelMaxConcurrentWorkGroupCountThenErrorIsReturned) {
|
||||
size_t globalWorkOffset[3];
|
||||
size_t localWorkSize[3];
|
||||
size_t suggestedWorkGroupCount;
|
||||
cl_uint workDim = 1;
|
||||
retVal = clGetKernelMaxConcurrentWorkGroupCountINTEL(nullptr, pKernel, workDim,
|
||||
globalWorkOffset, localWorkSize, &suggestedWorkGroupCount);
|
||||
EXPECT_EQ(CL_INVALID_COMMAND_QUEUE, retVal);
|
||||
|
||||
retVal = clGetKernelMaxConcurrentWorkGroupCountINTEL(pCommandQueue, nullptr, workDim,
|
||||
globalWorkOffset, localWorkSize, &suggestedWorkGroupCount);
|
||||
EXPECT_EQ(CL_INVALID_KERNEL, retVal);
|
||||
|
||||
pKernel->isPatchedOverride = false;
|
||||
retVal = clGetKernelMaxConcurrentWorkGroupCountINTEL(pCommandQueue, pKernel, workDim,
|
||||
globalWorkOffset, localWorkSize, &suggestedWorkGroupCount);
|
||||
EXPECT_EQ(CL_INVALID_KERNEL, retVal);
|
||||
pKernel->isPatchedOverride = true;
|
||||
|
||||
retVal = clGetKernelMaxConcurrentWorkGroupCountINTEL(pCommandQueue, pKernel, workDim,
|
||||
globalWorkOffset, localWorkSize, nullptr);
|
||||
EXPECT_EQ(CL_INVALID_VALUE, retVal);
|
||||
|
||||
retVal = clGetKernelMaxConcurrentWorkGroupCountINTEL(pCommandQueue, pKernel, 0,
|
||||
globalWorkOffset, localWorkSize, &suggestedWorkGroupCount);
|
||||
EXPECT_EQ(CL_INVALID_WORK_DIMENSION, retVal);
|
||||
|
||||
retVal = clGetKernelMaxConcurrentWorkGroupCountINTEL(pCommandQueue, pKernel, 4,
|
||||
globalWorkOffset, localWorkSize, &suggestedWorkGroupCount);
|
||||
EXPECT_EQ(CL_INVALID_WORK_DIMENSION, retVal);
|
||||
|
||||
retVal = clGetKernelMaxConcurrentWorkGroupCountINTEL(pCommandQueue, pKernel, workDim,
|
||||
nullptr, localWorkSize, &suggestedWorkGroupCount);
|
||||
EXPECT_EQ(CL_INVALID_GLOBAL_OFFSET, retVal);
|
||||
|
||||
retVal = clGetKernelMaxConcurrentWorkGroupCountINTEL(pCommandQueue, pKernel, workDim,
|
||||
globalWorkOffset, nullptr, &suggestedWorkGroupCount);
|
||||
EXPECT_EQ(CL_INVALID_WORK_GROUP_SIZE, retVal);
|
||||
}
|
||||
|
||||
TEST_F(clGetKernelMaxConcurrentWorkGroupCountTests, GivenVariousInputWhenGettingMaxConcurrentWorkGroupCountThenCorrectValuesAreReturned) {
|
||||
cl_uint workDim = 3;
|
||||
size_t globalWorkOffset[] = {0, 0, 0};
|
||||
size_t localWorkSize[] = {8, 8, 8};
|
||||
size_t maxConcurrentWorkGroupCount = 0;
|
||||
retVal = clGetKernelMaxConcurrentWorkGroupCountINTEL(pCommandQueue, pKernel, workDim, globalWorkOffset, localWorkSize,
|
||||
&maxConcurrentWorkGroupCount);
|
||||
EXPECT_EQ(CL_SUCCESS, retVal);
|
||||
size_t expectedMaxConcurrentWorkGroupCount = pKernel->getMaxWorkGroupCount(workDim, localWorkSize);
|
||||
EXPECT_EQ(expectedMaxConcurrentWorkGroupCount, maxConcurrentWorkGroupCount);
|
||||
|
||||
std::unique_ptr<MockKernel> pKernelWithExecutionEnvironmentPatch(MockKernel::create(pCommandQueue->getDevice(), pProgram));
|
||||
retVal = clGetKernelMaxConcurrentWorkGroupCountINTEL(pCommandQueue, pKernelWithExecutionEnvironmentPatch.get(), workDim,
|
||||
globalWorkOffset, localWorkSize,
|
||||
&maxConcurrentWorkGroupCount);
|
||||
EXPECT_EQ(CL_SUCCESS, retVal);
|
||||
expectedMaxConcurrentWorkGroupCount = pKernelWithExecutionEnvironmentPatch->getMaxWorkGroupCount(workDim, localWorkSize);
|
||||
EXPECT_EQ(expectedMaxConcurrentWorkGroupCount, maxConcurrentWorkGroupCount);
|
||||
}
|
||||
|
||||
} // namespace ULT
|
Reference in New Issue
Block a user