Add clGetExecutionInfoIntel API

This API allows querying for execution related information.
CL_EXECUTION_INFO_MAX_WORKGROUP_COUNT_INTEL parameter allows to query for
maximal work group count that is possible to run concurrently on the device.

Related-To: NEO-2712

Change-Id: I4e6b4c80aeb06ff966fb543c0a7f05ed54416dab
Signed-off-by: Filip Hazubski <filip.hazubski@intel.com>
This commit is contained in:
Filip Hazubski
2019-11-07 18:49:46 +01:00
committed by sys_ocldev
parent 340e80bb1a
commit 4b2a8e99d8
24 changed files with 423 additions and 17 deletions

View File

@@ -17,12 +17,14 @@ set(NEO_CORE_HELPERS
${CMAKE_CURRENT_SOURCE_DIR}/file_io.h
${CMAKE_CURRENT_SOURCE_DIR}/hash.h
${CMAKE_CURRENT_SOURCE_DIR}/interlocked_max.h
${CMAKE_CURRENT_SOURCE_DIR}/kernel_helpers.cpp
${CMAKE_CURRENT_SOURCE_DIR}/kernel_helpers.h
${CMAKE_CURRENT_SOURCE_DIR}/non_copyable_or_moveable.h
${CMAKE_CURRENT_SOURCE_DIR}/pipeline_select_args.h
${CMAKE_CURRENT_SOURCE_DIR}/ptr_math.h
${CMAKE_CURRENT_SOURCE_DIR}/preamble.h
${CMAKE_CURRENT_SOURCE_DIR}/preamble_base.inl
${CMAKE_CURRENT_SOURCE_DIR}/preamble_bdw_plus.inl
${CMAKE_CURRENT_SOURCE_DIR}/ptr_math.h
${CMAKE_CURRENT_SOURCE_DIR}/register_offsets.h
${CMAKE_CURRENT_SOURCE_DIR}/simd_helper.h
${CMAKE_CURRENT_SOURCE_DIR}/stdio.h

View File

@@ -0,0 +1,40 @@
/*
* Copyright (C) 2019 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#include "core/helpers/kernel_helpers.h"
#include "core/helpers/basic_math.h"
#include <algorithm>
namespace NEO {
uint32_t KernelHelper::getMaxWorkGroupCount(uint32_t simd, uint32_t availableThreadCount, uint32_t dssCount, uint32_t availableSlmSize,
uint32_t usedSlmSize, uint32_t maxBarrierCount, uint32_t numberOfBarriers, uint32_t workDim,
const size_t *localWorkSize) {
size_t workGroupSize = 1;
for (uint32_t i = 0; i < workDim; i++) {
workGroupSize *= localWorkSize[i];
}
auto threadsPerThreadGroup = static_cast<uint32_t>(Math::divideAndRoundUp(workGroupSize, simd));
auto maxWorkGroupsCount = availableThreadCount / threadsPerThreadGroup;
if (numberOfBarriers > 0) {
auto maxWorkGroupsCountDueToBarrierUsage = dssCount * (maxBarrierCount / numberOfBarriers);
maxWorkGroupsCount = std::min(maxWorkGroupsCount, maxWorkGroupsCountDueToBarrierUsage);
}
if (usedSlmSize > 0) {
auto maxWorkGroupsCountDueToSlm = availableSlmSize / usedSlmSize;
maxWorkGroupsCount = std::min(maxWorkGroupsCount, maxWorkGroupsCountDueToSlm);
}
return maxWorkGroupsCount;
}
} // namespace NEO

View File

@@ -0,0 +1,19 @@
/*
* Copyright (C) 2019 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#include <cstddef>
#include <cstdint>
namespace NEO {
struct KernelHelper {
static uint32_t getMaxWorkGroupCount(uint32_t simd, uint32_t availableThreadCount, uint32_t dssCount, uint32_t availableSlmSize,
uint32_t usedSlmSize, uint32_t maxBarrierCount, uint32_t numberOfBarriers, uint32_t workDim,
const size_t *localWorkSize);
};
} // namespace NEO

View File

@@ -9,6 +9,7 @@ set(NEO_CORE_HELPERS_TESTS
${CMAKE_CURRENT_SOURCE_DIR}/debug_manager_state_restore.h
${CMAKE_CURRENT_SOURCE_DIR}/file_io_tests.cpp
${CMAKE_CURRENT_SOURCE_DIR}/hash_tests.cpp
${CMAKE_CURRENT_SOURCE_DIR}/kernel_helpers_tests.cpp
${CMAKE_CURRENT_SOURCE_DIR}/memory_leak_listener.h
${CMAKE_CURRENT_SOURCE_DIR}/memory_management.h
${CMAKE_CURRENT_SOURCE_DIR}/simd_helper_tests.inl

View File

@@ -0,0 +1,72 @@
/*
* Copyright (C) 2019 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#include "core/helpers/basic_math.h"
#include "core/helpers/kernel_helpers.h"
#include "test.h"
using namespace NEO;
struct KernelHelperMaxWorkGroupsTests : ::testing::Test {
uint32_t simd = 8;
uint32_t threadCount = 8 * 1024;
uint32_t dssCount = 16;
uint32_t availableSlm = 64 * KB;
uint32_t usedSlm = 0;
uint32_t maxBarrierCount = 32;
uint32_t numberOfBarriers = 0;
uint32_t workDim = 3;
size_t lws[3] = {10, 10, 10};
uint32_t getMaxWorkGroupCount() {
return KernelHelper::getMaxWorkGroupCount(simd, threadCount, dssCount, availableSlm, usedSlm,
maxBarrierCount, numberOfBarriers, workDim, lws);
}
};
TEST_F(KernelHelperMaxWorkGroupsTests, GivenNoBarriersOrSlmUsedWhenCalculatingMaxWorkGroupsCountThenResultIsCalculatedWithSimd) {
auto workGroupSize = lws[0] * lws[1] * lws[2];
auto expected = threadCount / Math::divideAndRoundUp(workGroupSize, simd);
EXPECT_EQ(expected, getMaxWorkGroupCount());
}
TEST_F(KernelHelperMaxWorkGroupsTests, GivenBarriersWhenCalculatingMaxWorkGroupsCountThenResultIsCalculatedWithRegardToBarriersCount) {
numberOfBarriers = 16;
auto expected = dssCount * (maxBarrierCount / numberOfBarriers);
EXPECT_EQ(expected, getMaxWorkGroupCount());
}
TEST_F(KernelHelperMaxWorkGroupsTests, GivenUsedSlmSizeWhenCalculatingMaxWorkGroupsCountThenResultIsCalculatedWithRegardToUsedSlmSize) {
usedSlm = 4 * KB;
auto expected = availableSlm / usedSlm;
EXPECT_EQ(expected, getMaxWorkGroupCount());
}
TEST_F(KernelHelperMaxWorkGroupsTests, GivenVariousValuesWhenCalculatingMaxWorkGroupsCountThenLowestResultIsAlwaysReturned) {
usedSlm = 1 * KB;
numberOfBarriers = 1;
dssCount = 1;
workDim = 1;
lws[0] = simd;
threadCount = 1;
EXPECT_EQ(1u, getMaxWorkGroupCount());
threadCount = 1024;
EXPECT_NE(1u, getMaxWorkGroupCount());
numberOfBarriers = 32;
EXPECT_EQ(1u, getMaxWorkGroupCount());
numberOfBarriers = 1;
EXPECT_NE(1u, getMaxWorkGroupCount());
usedSlm = availableSlm;
EXPECT_EQ(1u, getMaxWorkGroupCount());
}

View File

@@ -68,6 +68,9 @@ using cl_unified_shared_memory_capabilities_intel = cl_bitfield;
//Used with createBuffer
#define CL_MEM_ALLOW_UNRESTRICTED_SIZE_INTEL (1 << 23)
typedef cl_uint cl_execution_info_intel;
#define CL_EXECUTION_INFO_MAX_WORKGROUP_COUNT_INTEL 0x10100
/******************************
* UNIFIED MEMORY *
*******************************/

View File

@@ -8,6 +8,7 @@
#include "api.h"
#include "core/helpers/aligned_memory.h"
#include "core/helpers/kernel_helpers.h"
#include "core/memory_manager/unified_memory_manager.h"
#include "core/utilities/stackvec.h"
#include "runtime/accelerators/intel_motion_estimation.h"
@@ -3873,6 +3874,7 @@ void *CL_API_CALL clGetExtensionFunctionAddress(const char *funcName) {
RETURN_FUNC_PTR_IF_EXIST(clEnqueueMemAdviseINTEL);
RETURN_FUNC_PTR_IF_EXIST(clGetDeviceFunctionPointerINTEL);
RETURN_FUNC_PTR_IF_EXIST(clGetDeviceGlobalVariablePointerINTEL);
RETURN_FUNC_PTR_IF_EXIST(clGetExecutionInfoIntel);
void *ret = sharingFactory.getExtensionFunctionAddress(funcName);
if (ret != nullptr) {
@@ -5074,3 +5076,54 @@ cl_int CL_API_CALL clSetProgramSpecializationConstant(cl_program program, cl_uin
return retVal;
}
cl_int CL_API_CALL clGetExecutionInfoIntel(cl_command_queue commandQueue,
cl_kernel kernel,
cl_uint workDim,
const size_t *globalWorkOffset,
const size_t *localWorkSize,
cl_execution_info_intel paramName,
size_t paramValueSize,
void *paramValue,
size_t *paramValueSizeRet) {
cl_int retVal = CL_SUCCESS;
API_ENTER(&retVal);
DBG_LOG_INPUTS("commandQueue", commandQueue, "cl_kernel", kernel,
"globalWorkOffset[0]", DebugManager.getInput(globalWorkOffset, 0),
"globalWorkOffset[1]", DebugManager.getInput(globalWorkOffset, 1),
"globalWorkOffset[2]", DebugManager.getInput(globalWorkOffset, 2),
"localWorkSize", DebugManager.getSizes(localWorkSize, workDim, true),
"paramName", paramName, "paramValueSize", paramValueSize,
"paramValue", paramValue, "paramValueSizeRet", paramValueSizeRet);
retVal = validateObjects(commandQueue, kernel);
if (CL_SUCCESS != retVal) {
return retVal;
}
auto pKernel = castToObjectOrAbort<Kernel>(kernel);
if (!pKernel->isPatched()) {
retVal = CL_INVALID_KERNEL;
return retVal;
}
TakeOwnershipWrapper<Kernel> kernelOwnership(*pKernel, gtpinIsGTPinInitialized());
switch (paramName) {
case CL_EXECUTION_INFO_MAX_WORKGROUP_COUNT_INTEL:
if ((paramValueSize < sizeof(uint32_t)) || (paramValue == nullptr)) {
retVal = CL_INVALID_VALUE;
return retVal;
}
*reinterpret_cast<uint32_t *>(paramValue) = pKernel->getMaxWorkGroupCount(workDim, localWorkSize);
if (paramValueSizeRet != nullptr) {
*paramValueSizeRet = sizeof(uint32_t);
}
break;
default:
retVal = CL_INVALID_VALUE;
}
return retVal;
}

View File

@@ -1015,6 +1015,17 @@ cl_int CL_API_CALL clGetDeviceGlobalVariablePointerINTEL(
size_t *globalVariableSizeRet,
void **globalVariablePointerRet);
cl_int CL_API_CALL clGetExecutionInfoIntel(
cl_command_queue commandQueue,
cl_kernel kernel,
cl_uint workDim,
const size_t *globalWorkOffset,
const size_t *localWorkSize,
cl_execution_info_intel paramName,
size_t paramValueSize,
void *paramValue,
size_t *paramValueSizeRet);
// OpenCL 2.2
cl_int CL_API_CALL clSetProgramSpecializationConstant(

View File

@@ -17,12 +17,22 @@ namespace NEO {
static uint32_t slmSizeId[] = {0, 1, 2, 4, 4, 8, 8, 8, 8, 16, 16, 16, 16, 16, 16, 16};
template <>
uint32_t HardwareCommandsHelper<BDWFamily>::computeSlmValues(uint32_t valueIn) {
valueIn += (4 * KB - 1);
valueIn = valueIn >> 12;
valueIn = std::min(valueIn, 15u);
valueIn = slmSizeId[valueIn];
return valueIn;
uint32_t HardwareCommandsHelper<BDWFamily>::alignSlmSize(uint32_t slmSize) {
if (slmSize == 0u) {
return 0u;
}
slmSize = std::max(slmSize, 4096u);
slmSize = Math::nextPowerOfTwo(slmSize);
return slmSize;
}
template <>
uint32_t HardwareCommandsHelper<BDWFamily>::computeSlmValues(uint32_t slmSize) {
slmSize += (4 * KB - 1);
slmSize = slmSize >> 12;
slmSize = std::min(slmSize, 15u);
slmSize = slmSizeId[slmSize];
return slmSize;
}
// Explicitly instantiate HardwareCommandsHelper for BDW device family

View File

@@ -35,7 +35,8 @@ struct HardwareCommandsHelper : public PerThreadDataHelper {
using INTERFACE_DESCRIPTOR_DATA = typename GfxFamily::INTERFACE_DESCRIPTOR_DATA;
using MI_ATOMIC = typename GfxFamily::MI_ATOMIC;
static uint32_t computeSlmValues(uint32_t valueIn);
static uint32_t alignSlmSize(uint32_t slmSize);
static uint32_t computeSlmValues(uint32_t slmSize);
static INTERFACE_DESCRIPTOR_DATA *getInterfaceDescriptor(
const IndirectHeap &indirectHeap,

View File

@@ -31,13 +31,24 @@ bool HardwareCommandsHelper<GfxFamily>::isPipeControlPriorToPipelineSelectWArequ
}
template <typename GfxFamily>
uint32_t HardwareCommandsHelper<GfxFamily>::computeSlmValues(uint32_t valueIn) {
auto value = std::max(valueIn, 1024u);
uint32_t HardwareCommandsHelper<GfxFamily>::alignSlmSize(uint32_t slmSize) {
if (slmSize == 0u) {
return 0u;
}
slmSize = std::max(slmSize, 1024u);
slmSize = Math::nextPowerOfTwo(slmSize);
UNRECOVERABLE_IF(slmSize > 64u * KB);
return slmSize;
}
template <typename GfxFamily>
uint32_t HardwareCommandsHelper<GfxFamily>::computeSlmValues(uint32_t slmSize) {
auto value = std::max(slmSize, 1024u);
value = Math::nextPowerOfTwo(value);
value = Math::getMinLsbSet(value);
value = value - 9;
DEBUG_BREAK_IF(value > 7);
return value * !!valueIn;
return value * !!slmSize;
}
template <typename GfxFamily>

View File

@@ -69,6 +69,10 @@ class HwHelper {
virtual uint32_t getMocsIndex(GmmHelper &gmmHelper, bool l3enabled, bool l1enabled) const = 0;
virtual bool requiresAuxResolves() const = 0;
virtual bool tilingAllowed(bool isSharedContext, const cl_image_desc &imgDesc, bool forceLinearStorage) = 0;
virtual uint32_t getBarriersCountFromHasBarriers(uint32_t hasBarriers) = 0;
virtual uint32_t calculateAvailableThreadCount(PRODUCT_FAMILY family, uint32_t grfCount, uint32_t euCount,
uint32_t threadsPerEu) = 0;
virtual uint32_t alignSlmSize(uint32_t slmSize) = 0;
static constexpr uint32_t lowPriorityGpgpuEngineIndex = 1;
@@ -170,6 +174,12 @@ class HwHelperHw : public HwHelper {
bool tilingAllowed(bool isSharedContext, const cl_image_desc &imgDesc, bool forceLinearStorage) override;
uint32_t getBarriersCountFromHasBarriers(uint32_t hasBarriers) override;
uint32_t calculateAvailableThreadCount(PRODUCT_FAMILY family, uint32_t grfCount, uint32_t euCount, uint32_t threadsPerEu) override;
uint32_t alignSlmSize(uint32_t slmSize) override;
static AuxTranslationMode getAuxTranslationMode();
protected:

View File

@@ -242,4 +242,15 @@ bool HwHelperHw<GfxFamily>::tilingAllowed(bool isSharedContext, const cl_image_d
return !(imageType == CL_MEM_OBJECT_IMAGE1D || imageType == CL_MEM_OBJECT_IMAGE1D_ARRAY ||
imageType == CL_MEM_OBJECT_IMAGE1D_BUFFER || buffer);
}
template <typename GfxFamily>
uint32_t HwHelperHw<GfxFamily>::alignSlmSize(uint32_t slmSize) {
return HardwareCommandsHelper<GfxFamily>::alignSlmSize(slmSize);
}
template <typename GfxFamily>
uint32_t HwHelperHw<GfxFamily>::getBarriersCountFromHasBarriers(uint32_t hasBarriers) {
return hasBarriers;
}
} // namespace NEO

View File

@@ -65,4 +65,10 @@ uint32_t HwHelperHw<GfxFamily>::getMocsIndex(GmmHelper &gmmHelper, bool l3enable
return gmmHelper.getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER_CACHELINE_MISALIGNED) >> 1;
}
template <typename GfxFamily>
uint32_t HwHelperHw<GfxFamily>::calculateAvailableThreadCount(PRODUCT_FAMILY family, uint32_t grfCount, uint32_t euCount,
uint32_t threadsPerEu) {
return threadsPerEu * euCount;
}
} // namespace NEO

View File

@@ -10,6 +10,7 @@
#include "core/helpers/aligned_memory.h"
#include "core/helpers/basic_math.h"
#include "core/helpers/debug_helpers.h"
#include "core/helpers/kernel_helpers.h"
#include "core/helpers/ptr_math.h"
#include "core/memory_manager/unified_memory_manager.h"
#include "runtime/accelerators/intel_accelerator.h"
@@ -974,6 +975,31 @@ void Kernel::clearUnifiedMemoryExecInfo() {
kernelUnifiedMemoryGfxAllocations.clear();
}
uint32_t Kernel::getMaxWorkGroupCount(const cl_uint workDim, const size_t *localWorkSize) const {
auto &hardwareInfo = getDevice().getHardwareInfo();
auto executionEnvironment = kernelInfo.patchInfo.executionEnvironment;
auto dssCount = hardwareInfo.gtSystemInfo.DualSubSliceCount;
if (dssCount == 0) {
dssCount = hardwareInfo.gtSystemInfo.SubSliceCount;
}
auto &hwHelper = HwHelper::get(hardwareInfo.platform.eRenderCoreFamily);
auto availableThreadCount = hwHelper.calculateAvailableThreadCount(
hardwareInfo.platform.eProductFamily,
((executionEnvironment != nullptr) ? executionEnvironment->NumGRFRequired : GrfConfig::DefaultGrfNumber),
hardwareInfo.gtSystemInfo.EUCount, hardwareInfo.gtSystemInfo.ThreadCount / hardwareInfo.gtSystemInfo.EUCount);
auto hasBarriers = ((executionEnvironment != nullptr) ? executionEnvironment->HasBarriers : 0u);
return KernelHelper::getMaxWorkGroupCount(kernelInfo.getMaxSimdSize(),
availableThreadCount,
dssCount,
dssCount * KB * hardwareInfo.capabilityTable.slmSize,
hwHelper.alignSlmSize(slmTotalSize),
static_cast<uint32_t>(hwHelper.getMaxBarrierRegisterPerSlice()),
hwHelper.getBarriersCountFromHasBarriers(hasBarriers),
workDim,
localWorkSize);
}
inline void Kernel::makeArgsResident(CommandStreamReceiver &commandStreamReceiver) {
auto numArgs = kernelInfo.kernelArgInfo.size();
for (decltype(numArgs) argIndex = 0; argIndex < numArgs; argIndex++) {

View File

@@ -398,6 +398,8 @@ class Kernel : public BaseObject<_cl_kernel> {
bool areStatelessWritesUsed() { return containsStatelessWrites; }
uint32_t getMaxWorkGroupCount(const cl_uint workDim, const size_t *localWorkSize) const;
protected:
struct ObjectCounts {
uint32_t imageCount;

View File

@@ -74,6 +74,7 @@ set(IGDRCL_SRCS_tests_api
${CMAKE_CURRENT_SOURCE_DIR}/cl_get_device_ids_tests.inl
${CMAKE_CURRENT_SOURCE_DIR}/cl_get_device_info_tests.inl
${CMAKE_CURRENT_SOURCE_DIR}/cl_get_event_profiling_info_tests.inl
${CMAKE_CURRENT_SOURCE_DIR}/cl_get_execution_info_intel_tests.inl
${CMAKE_CURRENT_SOURCE_DIR}/cl_get_extension_function_address_for_platform_tests.inl
${CMAKE_CURRENT_SOURCE_DIR}/cl_get_extension_function_address_tests.inl
${CMAKE_CURRENT_SOURCE_DIR}/cl_get_image_info_tests.inl
@@ -93,8 +94,8 @@ set(IGDRCL_SRCS_tests_api
${CMAKE_CURRENT_SOURCE_DIR}/cl_get_supported_image_formats_tests.inl
${CMAKE_CURRENT_SOURCE_DIR}/cl_icd_get_platform_ids_khr_tests.inl
${CMAKE_CURRENT_SOURCE_DIR}/cl_intel_accelerator_tests.inl
${CMAKE_CURRENT_SOURCE_DIR}/cl_intel_tracing_tests.inl
${CMAKE_CURRENT_SOURCE_DIR}/cl_intel_motion_estimation.cpp
${CMAKE_CURRENT_SOURCE_DIR}/cl_intel_tracing_tests.inl
${CMAKE_CURRENT_SOURCE_DIR}/cl_link_program_tests.inl
${CMAKE_CURRENT_SOURCE_DIR}/cl_mem_locally_uncached_resource_tests.cpp
${CMAKE_CURRENT_SOURCE_DIR}/cl_release_command_queue_tests.inl

View File

@@ -5,6 +5,7 @@
*
*/
#include "unit_tests/api/cl_get_execution_info_intel_tests.inl"
#include "unit_tests/api/cl_get_kernel_work_group_info_tests.inl"
#include "unit_tests/api/cl_get_mem_object_info_tests.inl"
#include "unit_tests/api/cl_get_pipe_info_tests.inl"

View File

@@ -0,0 +1,69 @@
/*
* Copyright (C) 2017-2019 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#include "runtime/command_queue/command_queue.h"
#include "unit_tests/mocks/mock_kernel.h"
#include "cl_api_tests.h"
using namespace NEO;
using clGetExecutionInfoTests = api_tests;
namespace ULT {
TEST_F(clGetExecutionInfoTests, GivenInvalidInputWhenCallingGetExecutionInfoThenErrorIsReturned) {
retVal = clGetExecutionInfoIntel(nullptr, pKernel, 0, nullptr, nullptr, 0, 0, nullptr, nullptr);
EXPECT_NE(CL_SUCCESS, retVal);
retVal = clGetExecutionInfoIntel(pCommandQueue, nullptr, 0, nullptr, nullptr, 0, 0, nullptr, nullptr);
EXPECT_NE(CL_SUCCESS, retVal);
pKernel->isPatchedOverride = false;
retVal = clGetExecutionInfoIntel(pCommandQueue, pKernel, 0, nullptr, nullptr, 0, 0, nullptr, nullptr);
EXPECT_NE(CL_SUCCESS, retVal);
pKernel->isPatchedOverride = true;
auto invalidParamName = 0xFFFF;
retVal = clGetExecutionInfoIntel(pCommandQueue, pKernel, 0, nullptr, nullptr, invalidParamName, 0, nullptr, nullptr);
EXPECT_NE(CL_SUCCESS, retVal);
uint32_t queryResult;
retVal = clGetExecutionInfoIntel(pCommandQueue, pKernel, 0, nullptr, nullptr, CL_EXECUTION_INFO_MAX_WORKGROUP_COUNT_INTEL,
sizeof(queryResult), nullptr, nullptr);
EXPECT_NE(CL_SUCCESS, retVal);
retVal = clGetExecutionInfoIntel(pCommandQueue, pKernel, 0, nullptr, nullptr, CL_EXECUTION_INFO_MAX_WORKGROUP_COUNT_INTEL,
0, &queryResult, nullptr);
EXPECT_NE(CL_SUCCESS, retVal);
}
TEST_F(clGetExecutionInfoTests, GivenVariousInputWhenGettingMaxWorkGroupCountThenCorrectValuesAreReturned) {
uint32_t queryResult;
retVal = clGetExecutionInfoIntel(pCommandQueue, pKernel, 0, nullptr, nullptr, CL_EXECUTION_INFO_MAX_WORKGROUP_COUNT_INTEL,
sizeof(queryResult), &queryResult, nullptr);
EXPECT_EQ(CL_SUCCESS, retVal);
EXPECT_NE(0u, queryResult);
uint64_t queryResult64 = 0;
size_t queryResultSize;
retVal = clGetExecutionInfoIntel(pCommandQueue, pKernel, 0, nullptr, nullptr, CL_EXECUTION_INFO_MAX_WORKGROUP_COUNT_INTEL,
sizeof(queryResult64), &queryResult64, &queryResultSize);
EXPECT_EQ(CL_SUCCESS, retVal);
EXPECT_EQ(queryResult, queryResult64);
EXPECT_EQ(sizeof(queryResult), queryResultSize);
std::unique_ptr<MockKernel> pKernelWithExecutionEnvironmentPatch(MockKernel::create(pCommandQueue->getDevice(), pProgram));
uint32_t queryResultWithExecutionEnvironment;
retVal = clGetExecutionInfoIntel(pCommandQueue, pKernelWithExecutionEnvironmentPatch.get(), 0, nullptr, nullptr,
CL_EXECUTION_INFO_MAX_WORKGROUP_COUNT_INTEL,
sizeof(queryResultWithExecutionEnvironment), &queryResultWithExecutionEnvironment, nullptr);
EXPECT_EQ(CL_SUCCESS, retVal);
EXPECT_EQ(queryResult, queryResultWithExecutionEnvironment);
}
} // namespace ULT

View File

@@ -18,8 +18,8 @@ set(IGDRCL_SRCS_tests_helpers
${CMAKE_CURRENT_SOURCE_DIR}/deferred_deleter_helpers_tests.cpp
${CMAKE_CURRENT_SOURCE_DIR}/dirty_state_helpers_tests.cpp
${CMAKE_CURRENT_SOURCE_DIR}/dispatch_flags_helper.h
${CMAKE_CURRENT_SOURCE_DIR}/dispatch_info_tests.cpp
${CMAKE_CURRENT_SOURCE_DIR}/dispatch_info_builder_tests.cpp
${CMAKE_CURRENT_SOURCE_DIR}/dispatch_info_tests.cpp
${CMAKE_CURRENT_SOURCE_DIR}/extendable_enum_tests.cpp
${CMAKE_CURRENT_SOURCE_DIR}/flush_stamp_tests.cpp
${CMAKE_CURRENT_SOURCE_DIR}/get_gpgpu_engines_tests.inl
@@ -34,14 +34,14 @@ set(IGDRCL_SRCS_tests_helpers
${CMAKE_CURRENT_SOURCE_DIR}/hw_parse.inl
${CMAKE_CURRENT_SOURCE_DIR}/kernel_filename_helper.h
${CMAKE_CURRENT_SOURCE_DIR}/kmd_notify_tests.cpp
${CMAKE_CURRENT_SOURCE_DIR}/memory_properties_flags_helpers_tests.cpp
${CMAKE_CURRENT_SOURCE_DIR}/mem_properties_parser_helper_tests.cpp
${CMAKE_CURRENT_SOURCE_DIR}/memory_management_tests.cpp
${CMAKE_CURRENT_SOURCE_DIR}/memory_properties_flags_helpers_tests.cpp
${CMAKE_CURRENT_SOURCE_DIR}/mipmap_tests.cpp
${CMAKE_CURRENT_SOURCE_DIR}/per_thread_data_tests.cpp
${CMAKE_CURRENT_SOURCE_DIR}/ptr_math_tests.cpp
${CMAKE_CURRENT_SOURCE_DIR}/raii_hw_helper.h
${CMAKE_CURRENT_SOURCE_DIR}/queue_helpers_tests.cpp
${CMAKE_CURRENT_SOURCE_DIR}/raii_hw_helper.h
${CMAKE_CURRENT_SOURCE_DIR}/sampler_helpers_tests.cpp
${CMAKE_CURRENT_SOURCE_DIR}/task_information_tests.cpp
${CMAKE_CURRENT_SOURCE_DIR}/test_debug_variables.inl

View File

@@ -927,7 +927,47 @@ HWTEST_F(HardwareCommandsTest, setBindingTableStatesForNoSurfaces) {
delete pKernel;
}
HWTEST_F(HardwareCommandsTest, slmValueScenarios) {
HWTEST_F(HardwareCommandsTest, GivenVariousValuesWhenAlignSlmSizeIsCalledThenCorrectValueIsReturned) {
if (::renderCoreFamily == IGFX_GEN8_CORE) {
EXPECT_EQ(0u, HardwareCommandsHelper<FamilyType>::alignSlmSize(0));
EXPECT_EQ(4096u, HardwareCommandsHelper<FamilyType>::alignSlmSize(1));
EXPECT_EQ(4096u, HardwareCommandsHelper<FamilyType>::alignSlmSize(1024));
EXPECT_EQ(4096u, HardwareCommandsHelper<FamilyType>::alignSlmSize(1025));
EXPECT_EQ(4096u, HardwareCommandsHelper<FamilyType>::alignSlmSize(2048));
EXPECT_EQ(4096u, HardwareCommandsHelper<FamilyType>::alignSlmSize(2049));
EXPECT_EQ(4096u, HardwareCommandsHelper<FamilyType>::alignSlmSize(4096));
EXPECT_EQ(8192u, HardwareCommandsHelper<FamilyType>::alignSlmSize(4097));
EXPECT_EQ(8192u, HardwareCommandsHelper<FamilyType>::alignSlmSize(8192));
EXPECT_EQ(16384u, HardwareCommandsHelper<FamilyType>::alignSlmSize(8193));
EXPECT_EQ(16384u, HardwareCommandsHelper<FamilyType>::alignSlmSize(12288));
EXPECT_EQ(16384u, HardwareCommandsHelper<FamilyType>::alignSlmSize(16384));
EXPECT_EQ(32768u, HardwareCommandsHelper<FamilyType>::alignSlmSize(16385));
EXPECT_EQ(32768u, HardwareCommandsHelper<FamilyType>::alignSlmSize(24576));
EXPECT_EQ(32768u, HardwareCommandsHelper<FamilyType>::alignSlmSize(32768));
EXPECT_EQ(65536u, HardwareCommandsHelper<FamilyType>::alignSlmSize(32769));
EXPECT_EQ(65536u, HardwareCommandsHelper<FamilyType>::alignSlmSize(49152));
EXPECT_EQ(65536u, HardwareCommandsHelper<FamilyType>::alignSlmSize(65535));
EXPECT_EQ(65536u, HardwareCommandsHelper<FamilyType>::alignSlmSize(65536));
} else {
EXPECT_EQ(0u, HardwareCommandsHelper<FamilyType>::alignSlmSize(0));
EXPECT_EQ(1024u, HardwareCommandsHelper<FamilyType>::alignSlmSize(1));
EXPECT_EQ(1024u, HardwareCommandsHelper<FamilyType>::alignSlmSize(1024));
EXPECT_EQ(2048u, HardwareCommandsHelper<FamilyType>::alignSlmSize(1025));
EXPECT_EQ(2048u, HardwareCommandsHelper<FamilyType>::alignSlmSize(2048));
EXPECT_EQ(4096u, HardwareCommandsHelper<FamilyType>::alignSlmSize(2049));
EXPECT_EQ(4096u, HardwareCommandsHelper<FamilyType>::alignSlmSize(4096));
EXPECT_EQ(8192u, HardwareCommandsHelper<FamilyType>::alignSlmSize(4097));
EXPECT_EQ(8192u, HardwareCommandsHelper<FamilyType>::alignSlmSize(8192));
EXPECT_EQ(16384u, HardwareCommandsHelper<FamilyType>::alignSlmSize(8193));
EXPECT_EQ(16384u, HardwareCommandsHelper<FamilyType>::alignSlmSize(16384));
EXPECT_EQ(32768u, HardwareCommandsHelper<FamilyType>::alignSlmSize(16385));
EXPECT_EQ(32768u, HardwareCommandsHelper<FamilyType>::alignSlmSize(32768));
EXPECT_EQ(65536u, HardwareCommandsHelper<FamilyType>::alignSlmSize(32769));
EXPECT_EQ(65536u, HardwareCommandsHelper<FamilyType>::alignSlmSize(65536));
}
}
HWTEST_F(HardwareCommandsTest, GivenVariousValuesWhenComputeSlmSizeIsCalledThenCorrectValueIsReturned) {
if (::renderCoreFamily == IGFX_GEN8_CORE) {
EXPECT_EQ(0u, HardwareCommandsHelper<FamilyType>::computeSlmValues(0));
EXPECT_EQ(1u, HardwareCommandsHelper<FamilyType>::computeSlmValues(1));

View File

@@ -717,3 +717,19 @@ HWTEST_F(HwHelperTest, givenHwHelperWhenAskingForTilingSupportThenReturnValidVal
EXPECT_FALSE(helper.tilingAllowed(false, imgDesc, false));
}
}
HWCMDTEST_F(IGFX_GEN8_CORE, HwHelperTest, GivenVariousValuesWhenCallingGetBarriersCountFromHasBarrierThenCorrectValueIsReturned) {
auto &hwHelper = HwHelper::get(hardwareInfo.platform.eRenderCoreFamily);
EXPECT_EQ(0u, hwHelper.getBarriersCountFromHasBarriers(0u));
EXPECT_EQ(1u, hwHelper.getBarriersCountFromHasBarriers(1u));
}
HWCMDTEST_F(IGFX_GEN8_CORE, HwHelperTest, GivenVariousValuesWhenCallingCalculateAvailableThreadCountThenCorrectValueIsReturned) {
auto &hwHelper = HwHelper::get(hardwareInfo.platform.eRenderCoreFamily);
auto result = hwHelper.calculateAvailableThreadCount(
hardwareInfo.platform.eProductFamily,
0,
hardwareInfo.gtSystemInfo.EUCount,
hardwareInfo.gtSystemInfo.ThreadCount / hardwareInfo.gtSystemInfo.EUCount);
EXPECT_EQ(hardwareInfo.gtSystemInfo.ThreadCount, result);
}

View File

@@ -37,7 +37,7 @@ void Kernel::ReflectionSurfaceHelper::patchBlocksCurbe<true>(void *reflectionSur
template void Kernel::patchReflectionSurface<true>(DeviceQueue *, PrintfHandler *);
bool MockKernel::isPatched() const {
return true;
return isPatchedOverride;
}
bool MockKernel::canTransformImages() const {

View File

@@ -246,6 +246,7 @@ class MockKernel : public Kernel {
mutable uint32_t releaseOwnershipCalls = 0;
bool canKernelTransformImages = true;
bool isPatchedOverride = true;
protected:
KernelInfo *kernelInfoAllocated = nullptr;