mirror of
https://github.com/intel/compute-runtime.git
synced 2025-12-21 01:04:57 +08:00
Add clGetExecutionInfoIntel API
This API allows querying for execution related information. CL_EXECUTION_INFO_MAX_WORKGROUP_COUNT_INTEL parameter allows to query for maximal work group count that is possible to run concurrently on the device. Related-To: NEO-2712 Change-Id: I4e6b4c80aeb06ff966fb543c0a7f05ed54416dab Signed-off-by: Filip Hazubski <filip.hazubski@intel.com>
This commit is contained in:
committed by
sys_ocldev
parent
340e80bb1a
commit
4b2a8e99d8
@@ -17,12 +17,14 @@ set(NEO_CORE_HELPERS
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/file_io.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/hash.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/interlocked_max.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/kernel_helpers.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/kernel_helpers.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/non_copyable_or_moveable.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/pipeline_select_args.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/ptr_math.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/preamble.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/preamble_base.inl
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/preamble_bdw_plus.inl
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/ptr_math.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/register_offsets.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/simd_helper.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/stdio.h
|
||||
|
||||
40
core/helpers/kernel_helpers.cpp
Normal file
40
core/helpers/kernel_helpers.cpp
Normal file
@@ -0,0 +1,40 @@
|
||||
/*
|
||||
* Copyright (C) 2019 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#include "core/helpers/kernel_helpers.h"
|
||||
|
||||
#include "core/helpers/basic_math.h"
|
||||
|
||||
#include <algorithm>
|
||||
|
||||
namespace NEO {
|
||||
|
||||
uint32_t KernelHelper::getMaxWorkGroupCount(uint32_t simd, uint32_t availableThreadCount, uint32_t dssCount, uint32_t availableSlmSize,
|
||||
uint32_t usedSlmSize, uint32_t maxBarrierCount, uint32_t numberOfBarriers, uint32_t workDim,
|
||||
const size_t *localWorkSize) {
|
||||
size_t workGroupSize = 1;
|
||||
for (uint32_t i = 0; i < workDim; i++) {
|
||||
workGroupSize *= localWorkSize[i];
|
||||
}
|
||||
|
||||
auto threadsPerThreadGroup = static_cast<uint32_t>(Math::divideAndRoundUp(workGroupSize, simd));
|
||||
auto maxWorkGroupsCount = availableThreadCount / threadsPerThreadGroup;
|
||||
|
||||
if (numberOfBarriers > 0) {
|
||||
auto maxWorkGroupsCountDueToBarrierUsage = dssCount * (maxBarrierCount / numberOfBarriers);
|
||||
maxWorkGroupsCount = std::min(maxWorkGroupsCount, maxWorkGroupsCountDueToBarrierUsage);
|
||||
}
|
||||
|
||||
if (usedSlmSize > 0) {
|
||||
auto maxWorkGroupsCountDueToSlm = availableSlmSize / usedSlmSize;
|
||||
maxWorkGroupsCount = std::min(maxWorkGroupsCount, maxWorkGroupsCountDueToSlm);
|
||||
}
|
||||
|
||||
return maxWorkGroupsCount;
|
||||
}
|
||||
|
||||
} // namespace NEO
|
||||
19
core/helpers/kernel_helpers.h
Normal file
19
core/helpers/kernel_helpers.h
Normal file
@@ -0,0 +1,19 @@
|
||||
/*
|
||||
* Copyright (C) 2019 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#include <cstddef>
|
||||
#include <cstdint>
|
||||
|
||||
namespace NEO {
|
||||
|
||||
struct KernelHelper {
|
||||
static uint32_t getMaxWorkGroupCount(uint32_t simd, uint32_t availableThreadCount, uint32_t dssCount, uint32_t availableSlmSize,
|
||||
uint32_t usedSlmSize, uint32_t maxBarrierCount, uint32_t numberOfBarriers, uint32_t workDim,
|
||||
const size_t *localWorkSize);
|
||||
};
|
||||
|
||||
} // namespace NEO
|
||||
@@ -9,6 +9,7 @@ set(NEO_CORE_HELPERS_TESTS
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/debug_manager_state_restore.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/file_io_tests.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/hash_tests.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/kernel_helpers_tests.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/memory_leak_listener.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/memory_management.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/simd_helper_tests.inl
|
||||
|
||||
72
core/unit_tests/helpers/kernel_helpers_tests.cpp
Normal file
72
core/unit_tests/helpers/kernel_helpers_tests.cpp
Normal file
@@ -0,0 +1,72 @@
|
||||
/*
|
||||
* Copyright (C) 2019 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#include "core/helpers/basic_math.h"
|
||||
#include "core/helpers/kernel_helpers.h"
|
||||
#include "test.h"
|
||||
|
||||
using namespace NEO;
|
||||
|
||||
struct KernelHelperMaxWorkGroupsTests : ::testing::Test {
|
||||
uint32_t simd = 8;
|
||||
uint32_t threadCount = 8 * 1024;
|
||||
uint32_t dssCount = 16;
|
||||
uint32_t availableSlm = 64 * KB;
|
||||
uint32_t usedSlm = 0;
|
||||
uint32_t maxBarrierCount = 32;
|
||||
uint32_t numberOfBarriers = 0;
|
||||
uint32_t workDim = 3;
|
||||
size_t lws[3] = {10, 10, 10};
|
||||
|
||||
uint32_t getMaxWorkGroupCount() {
|
||||
return KernelHelper::getMaxWorkGroupCount(simd, threadCount, dssCount, availableSlm, usedSlm,
|
||||
maxBarrierCount, numberOfBarriers, workDim, lws);
|
||||
}
|
||||
};
|
||||
|
||||
TEST_F(KernelHelperMaxWorkGroupsTests, GivenNoBarriersOrSlmUsedWhenCalculatingMaxWorkGroupsCountThenResultIsCalculatedWithSimd) {
|
||||
auto workGroupSize = lws[0] * lws[1] * lws[2];
|
||||
auto expected = threadCount / Math::divideAndRoundUp(workGroupSize, simd);
|
||||
EXPECT_EQ(expected, getMaxWorkGroupCount());
|
||||
}
|
||||
|
||||
TEST_F(KernelHelperMaxWorkGroupsTests, GivenBarriersWhenCalculatingMaxWorkGroupsCountThenResultIsCalculatedWithRegardToBarriersCount) {
|
||||
numberOfBarriers = 16;
|
||||
|
||||
auto expected = dssCount * (maxBarrierCount / numberOfBarriers);
|
||||
EXPECT_EQ(expected, getMaxWorkGroupCount());
|
||||
}
|
||||
|
||||
TEST_F(KernelHelperMaxWorkGroupsTests, GivenUsedSlmSizeWhenCalculatingMaxWorkGroupsCountThenResultIsCalculatedWithRegardToUsedSlmSize) {
|
||||
usedSlm = 4 * KB;
|
||||
|
||||
auto expected = availableSlm / usedSlm;
|
||||
EXPECT_EQ(expected, getMaxWorkGroupCount());
|
||||
}
|
||||
|
||||
TEST_F(KernelHelperMaxWorkGroupsTests, GivenVariousValuesWhenCalculatingMaxWorkGroupsCountThenLowestResultIsAlwaysReturned) {
|
||||
usedSlm = 1 * KB;
|
||||
numberOfBarriers = 1;
|
||||
dssCount = 1;
|
||||
|
||||
workDim = 1;
|
||||
lws[0] = simd;
|
||||
threadCount = 1;
|
||||
EXPECT_EQ(1u, getMaxWorkGroupCount());
|
||||
|
||||
threadCount = 1024;
|
||||
EXPECT_NE(1u, getMaxWorkGroupCount());
|
||||
|
||||
numberOfBarriers = 32;
|
||||
EXPECT_EQ(1u, getMaxWorkGroupCount());
|
||||
|
||||
numberOfBarriers = 1;
|
||||
EXPECT_NE(1u, getMaxWorkGroupCount());
|
||||
|
||||
usedSlm = availableSlm;
|
||||
EXPECT_EQ(1u, getMaxWorkGroupCount());
|
||||
}
|
||||
@@ -68,6 +68,9 @@ using cl_unified_shared_memory_capabilities_intel = cl_bitfield;
|
||||
//Used with createBuffer
|
||||
#define CL_MEM_ALLOW_UNRESTRICTED_SIZE_INTEL (1 << 23)
|
||||
|
||||
typedef cl_uint cl_execution_info_intel;
|
||||
#define CL_EXECUTION_INFO_MAX_WORKGROUP_COUNT_INTEL 0x10100
|
||||
|
||||
/******************************
|
||||
* UNIFIED MEMORY *
|
||||
*******************************/
|
||||
|
||||
@@ -8,6 +8,7 @@
|
||||
#include "api.h"
|
||||
|
||||
#include "core/helpers/aligned_memory.h"
|
||||
#include "core/helpers/kernel_helpers.h"
|
||||
#include "core/memory_manager/unified_memory_manager.h"
|
||||
#include "core/utilities/stackvec.h"
|
||||
#include "runtime/accelerators/intel_motion_estimation.h"
|
||||
@@ -3873,6 +3874,7 @@ void *CL_API_CALL clGetExtensionFunctionAddress(const char *funcName) {
|
||||
RETURN_FUNC_PTR_IF_EXIST(clEnqueueMemAdviseINTEL);
|
||||
RETURN_FUNC_PTR_IF_EXIST(clGetDeviceFunctionPointerINTEL);
|
||||
RETURN_FUNC_PTR_IF_EXIST(clGetDeviceGlobalVariablePointerINTEL);
|
||||
RETURN_FUNC_PTR_IF_EXIST(clGetExecutionInfoIntel);
|
||||
|
||||
void *ret = sharingFactory.getExtensionFunctionAddress(funcName);
|
||||
if (ret != nullptr) {
|
||||
@@ -5074,3 +5076,54 @@ cl_int CL_API_CALL clSetProgramSpecializationConstant(cl_program program, cl_uin
|
||||
|
||||
return retVal;
|
||||
}
|
||||
|
||||
cl_int CL_API_CALL clGetExecutionInfoIntel(cl_command_queue commandQueue,
|
||||
cl_kernel kernel,
|
||||
cl_uint workDim,
|
||||
const size_t *globalWorkOffset,
|
||||
const size_t *localWorkSize,
|
||||
cl_execution_info_intel paramName,
|
||||
size_t paramValueSize,
|
||||
void *paramValue,
|
||||
size_t *paramValueSizeRet) {
|
||||
|
||||
cl_int retVal = CL_SUCCESS;
|
||||
API_ENTER(&retVal);
|
||||
DBG_LOG_INPUTS("commandQueue", commandQueue, "cl_kernel", kernel,
|
||||
"globalWorkOffset[0]", DebugManager.getInput(globalWorkOffset, 0),
|
||||
"globalWorkOffset[1]", DebugManager.getInput(globalWorkOffset, 1),
|
||||
"globalWorkOffset[2]", DebugManager.getInput(globalWorkOffset, 2),
|
||||
"localWorkSize", DebugManager.getSizes(localWorkSize, workDim, true),
|
||||
"paramName", paramName, "paramValueSize", paramValueSize,
|
||||
"paramValue", paramValue, "paramValueSizeRet", paramValueSizeRet);
|
||||
|
||||
retVal = validateObjects(commandQueue, kernel);
|
||||
|
||||
if (CL_SUCCESS != retVal) {
|
||||
return retVal;
|
||||
}
|
||||
|
||||
auto pKernel = castToObjectOrAbort<Kernel>(kernel);
|
||||
if (!pKernel->isPatched()) {
|
||||
retVal = CL_INVALID_KERNEL;
|
||||
return retVal;
|
||||
}
|
||||
|
||||
TakeOwnershipWrapper<Kernel> kernelOwnership(*pKernel, gtpinIsGTPinInitialized());
|
||||
switch (paramName) {
|
||||
case CL_EXECUTION_INFO_MAX_WORKGROUP_COUNT_INTEL:
|
||||
if ((paramValueSize < sizeof(uint32_t)) || (paramValue == nullptr)) {
|
||||
retVal = CL_INVALID_VALUE;
|
||||
return retVal;
|
||||
}
|
||||
*reinterpret_cast<uint32_t *>(paramValue) = pKernel->getMaxWorkGroupCount(workDim, localWorkSize);
|
||||
if (paramValueSizeRet != nullptr) {
|
||||
*paramValueSizeRet = sizeof(uint32_t);
|
||||
}
|
||||
break;
|
||||
default:
|
||||
retVal = CL_INVALID_VALUE;
|
||||
}
|
||||
|
||||
return retVal;
|
||||
}
|
||||
|
||||
@@ -1015,6 +1015,17 @@ cl_int CL_API_CALL clGetDeviceGlobalVariablePointerINTEL(
|
||||
size_t *globalVariableSizeRet,
|
||||
void **globalVariablePointerRet);
|
||||
|
||||
cl_int CL_API_CALL clGetExecutionInfoIntel(
|
||||
cl_command_queue commandQueue,
|
||||
cl_kernel kernel,
|
||||
cl_uint workDim,
|
||||
const size_t *globalWorkOffset,
|
||||
const size_t *localWorkSize,
|
||||
cl_execution_info_intel paramName,
|
||||
size_t paramValueSize,
|
||||
void *paramValue,
|
||||
size_t *paramValueSizeRet);
|
||||
|
||||
// OpenCL 2.2
|
||||
|
||||
cl_int CL_API_CALL clSetProgramSpecializationConstant(
|
||||
|
||||
@@ -17,12 +17,22 @@ namespace NEO {
|
||||
static uint32_t slmSizeId[] = {0, 1, 2, 4, 4, 8, 8, 8, 8, 16, 16, 16, 16, 16, 16, 16};
|
||||
|
||||
template <>
|
||||
uint32_t HardwareCommandsHelper<BDWFamily>::computeSlmValues(uint32_t valueIn) {
|
||||
valueIn += (4 * KB - 1);
|
||||
valueIn = valueIn >> 12;
|
||||
valueIn = std::min(valueIn, 15u);
|
||||
valueIn = slmSizeId[valueIn];
|
||||
return valueIn;
|
||||
uint32_t HardwareCommandsHelper<BDWFamily>::alignSlmSize(uint32_t slmSize) {
|
||||
if (slmSize == 0u) {
|
||||
return 0u;
|
||||
}
|
||||
slmSize = std::max(slmSize, 4096u);
|
||||
slmSize = Math::nextPowerOfTwo(slmSize);
|
||||
return slmSize;
|
||||
}
|
||||
|
||||
template <>
|
||||
uint32_t HardwareCommandsHelper<BDWFamily>::computeSlmValues(uint32_t slmSize) {
|
||||
slmSize += (4 * KB - 1);
|
||||
slmSize = slmSize >> 12;
|
||||
slmSize = std::min(slmSize, 15u);
|
||||
slmSize = slmSizeId[slmSize];
|
||||
return slmSize;
|
||||
}
|
||||
|
||||
// Explicitly instantiate HardwareCommandsHelper for BDW device family
|
||||
|
||||
@@ -35,7 +35,8 @@ struct HardwareCommandsHelper : public PerThreadDataHelper {
|
||||
using INTERFACE_DESCRIPTOR_DATA = typename GfxFamily::INTERFACE_DESCRIPTOR_DATA;
|
||||
using MI_ATOMIC = typename GfxFamily::MI_ATOMIC;
|
||||
|
||||
static uint32_t computeSlmValues(uint32_t valueIn);
|
||||
static uint32_t alignSlmSize(uint32_t slmSize);
|
||||
static uint32_t computeSlmValues(uint32_t slmSize);
|
||||
|
||||
static INTERFACE_DESCRIPTOR_DATA *getInterfaceDescriptor(
|
||||
const IndirectHeap &indirectHeap,
|
||||
|
||||
@@ -31,13 +31,24 @@ bool HardwareCommandsHelper<GfxFamily>::isPipeControlPriorToPipelineSelectWArequ
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
uint32_t HardwareCommandsHelper<GfxFamily>::computeSlmValues(uint32_t valueIn) {
|
||||
auto value = std::max(valueIn, 1024u);
|
||||
uint32_t HardwareCommandsHelper<GfxFamily>::alignSlmSize(uint32_t slmSize) {
|
||||
if (slmSize == 0u) {
|
||||
return 0u;
|
||||
}
|
||||
slmSize = std::max(slmSize, 1024u);
|
||||
slmSize = Math::nextPowerOfTwo(slmSize);
|
||||
UNRECOVERABLE_IF(slmSize > 64u * KB);
|
||||
return slmSize;
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
uint32_t HardwareCommandsHelper<GfxFamily>::computeSlmValues(uint32_t slmSize) {
|
||||
auto value = std::max(slmSize, 1024u);
|
||||
value = Math::nextPowerOfTwo(value);
|
||||
value = Math::getMinLsbSet(value);
|
||||
value = value - 9;
|
||||
DEBUG_BREAK_IF(value > 7);
|
||||
return value * !!valueIn;
|
||||
return value * !!slmSize;
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
|
||||
@@ -69,6 +69,10 @@ class HwHelper {
|
||||
virtual uint32_t getMocsIndex(GmmHelper &gmmHelper, bool l3enabled, bool l1enabled) const = 0;
|
||||
virtual bool requiresAuxResolves() const = 0;
|
||||
virtual bool tilingAllowed(bool isSharedContext, const cl_image_desc &imgDesc, bool forceLinearStorage) = 0;
|
||||
virtual uint32_t getBarriersCountFromHasBarriers(uint32_t hasBarriers) = 0;
|
||||
virtual uint32_t calculateAvailableThreadCount(PRODUCT_FAMILY family, uint32_t grfCount, uint32_t euCount,
|
||||
uint32_t threadsPerEu) = 0;
|
||||
virtual uint32_t alignSlmSize(uint32_t slmSize) = 0;
|
||||
|
||||
static constexpr uint32_t lowPriorityGpgpuEngineIndex = 1;
|
||||
|
||||
@@ -170,6 +174,12 @@ class HwHelperHw : public HwHelper {
|
||||
|
||||
bool tilingAllowed(bool isSharedContext, const cl_image_desc &imgDesc, bool forceLinearStorage) override;
|
||||
|
||||
uint32_t getBarriersCountFromHasBarriers(uint32_t hasBarriers) override;
|
||||
|
||||
uint32_t calculateAvailableThreadCount(PRODUCT_FAMILY family, uint32_t grfCount, uint32_t euCount, uint32_t threadsPerEu) override;
|
||||
|
||||
uint32_t alignSlmSize(uint32_t slmSize) override;
|
||||
|
||||
static AuxTranslationMode getAuxTranslationMode();
|
||||
|
||||
protected:
|
||||
|
||||
@@ -242,4 +242,15 @@ bool HwHelperHw<GfxFamily>::tilingAllowed(bool isSharedContext, const cl_image_d
|
||||
return !(imageType == CL_MEM_OBJECT_IMAGE1D || imageType == CL_MEM_OBJECT_IMAGE1D_ARRAY ||
|
||||
imageType == CL_MEM_OBJECT_IMAGE1D_BUFFER || buffer);
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
uint32_t HwHelperHw<GfxFamily>::alignSlmSize(uint32_t slmSize) {
|
||||
return HardwareCommandsHelper<GfxFamily>::alignSlmSize(slmSize);
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
uint32_t HwHelperHw<GfxFamily>::getBarriersCountFromHasBarriers(uint32_t hasBarriers) {
|
||||
return hasBarriers;
|
||||
}
|
||||
|
||||
} // namespace NEO
|
||||
|
||||
@@ -65,4 +65,10 @@ uint32_t HwHelperHw<GfxFamily>::getMocsIndex(GmmHelper &gmmHelper, bool l3enable
|
||||
return gmmHelper.getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER_CACHELINE_MISALIGNED) >> 1;
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
uint32_t HwHelperHw<GfxFamily>::calculateAvailableThreadCount(PRODUCT_FAMILY family, uint32_t grfCount, uint32_t euCount,
|
||||
uint32_t threadsPerEu) {
|
||||
return threadsPerEu * euCount;
|
||||
}
|
||||
|
||||
} // namespace NEO
|
||||
|
||||
@@ -10,6 +10,7 @@
|
||||
#include "core/helpers/aligned_memory.h"
|
||||
#include "core/helpers/basic_math.h"
|
||||
#include "core/helpers/debug_helpers.h"
|
||||
#include "core/helpers/kernel_helpers.h"
|
||||
#include "core/helpers/ptr_math.h"
|
||||
#include "core/memory_manager/unified_memory_manager.h"
|
||||
#include "runtime/accelerators/intel_accelerator.h"
|
||||
@@ -974,6 +975,31 @@ void Kernel::clearUnifiedMemoryExecInfo() {
|
||||
kernelUnifiedMemoryGfxAllocations.clear();
|
||||
}
|
||||
|
||||
uint32_t Kernel::getMaxWorkGroupCount(const cl_uint workDim, const size_t *localWorkSize) const {
|
||||
auto &hardwareInfo = getDevice().getHardwareInfo();
|
||||
auto executionEnvironment = kernelInfo.patchInfo.executionEnvironment;
|
||||
auto dssCount = hardwareInfo.gtSystemInfo.DualSubSliceCount;
|
||||
if (dssCount == 0) {
|
||||
dssCount = hardwareInfo.gtSystemInfo.SubSliceCount;
|
||||
}
|
||||
auto &hwHelper = HwHelper::get(hardwareInfo.platform.eRenderCoreFamily);
|
||||
auto availableThreadCount = hwHelper.calculateAvailableThreadCount(
|
||||
hardwareInfo.platform.eProductFamily,
|
||||
((executionEnvironment != nullptr) ? executionEnvironment->NumGRFRequired : GrfConfig::DefaultGrfNumber),
|
||||
hardwareInfo.gtSystemInfo.EUCount, hardwareInfo.gtSystemInfo.ThreadCount / hardwareInfo.gtSystemInfo.EUCount);
|
||||
|
||||
auto hasBarriers = ((executionEnvironment != nullptr) ? executionEnvironment->HasBarriers : 0u);
|
||||
return KernelHelper::getMaxWorkGroupCount(kernelInfo.getMaxSimdSize(),
|
||||
availableThreadCount,
|
||||
dssCount,
|
||||
dssCount * KB * hardwareInfo.capabilityTable.slmSize,
|
||||
hwHelper.alignSlmSize(slmTotalSize),
|
||||
static_cast<uint32_t>(hwHelper.getMaxBarrierRegisterPerSlice()),
|
||||
hwHelper.getBarriersCountFromHasBarriers(hasBarriers),
|
||||
workDim,
|
||||
localWorkSize);
|
||||
}
|
||||
|
||||
inline void Kernel::makeArgsResident(CommandStreamReceiver &commandStreamReceiver) {
|
||||
auto numArgs = kernelInfo.kernelArgInfo.size();
|
||||
for (decltype(numArgs) argIndex = 0; argIndex < numArgs; argIndex++) {
|
||||
|
||||
@@ -398,6 +398,8 @@ class Kernel : public BaseObject<_cl_kernel> {
|
||||
|
||||
bool areStatelessWritesUsed() { return containsStatelessWrites; }
|
||||
|
||||
uint32_t getMaxWorkGroupCount(const cl_uint workDim, const size_t *localWorkSize) const;
|
||||
|
||||
protected:
|
||||
struct ObjectCounts {
|
||||
uint32_t imageCount;
|
||||
|
||||
@@ -74,6 +74,7 @@ set(IGDRCL_SRCS_tests_api
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/cl_get_device_ids_tests.inl
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/cl_get_device_info_tests.inl
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/cl_get_event_profiling_info_tests.inl
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/cl_get_execution_info_intel_tests.inl
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/cl_get_extension_function_address_for_platform_tests.inl
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/cl_get_extension_function_address_tests.inl
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/cl_get_image_info_tests.inl
|
||||
@@ -93,8 +94,8 @@ set(IGDRCL_SRCS_tests_api
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/cl_get_supported_image_formats_tests.inl
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/cl_icd_get_platform_ids_khr_tests.inl
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/cl_intel_accelerator_tests.inl
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/cl_intel_tracing_tests.inl
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/cl_intel_motion_estimation.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/cl_intel_tracing_tests.inl
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/cl_link_program_tests.inl
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/cl_mem_locally_uncached_resource_tests.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/cl_release_command_queue_tests.inl
|
||||
|
||||
@@ -5,6 +5,7 @@
|
||||
*
|
||||
*/
|
||||
|
||||
#include "unit_tests/api/cl_get_execution_info_intel_tests.inl"
|
||||
#include "unit_tests/api/cl_get_kernel_work_group_info_tests.inl"
|
||||
#include "unit_tests/api/cl_get_mem_object_info_tests.inl"
|
||||
#include "unit_tests/api/cl_get_pipe_info_tests.inl"
|
||||
|
||||
69
unit_tests/api/cl_get_execution_info_intel_tests.inl
Normal file
69
unit_tests/api/cl_get_execution_info_intel_tests.inl
Normal file
@@ -0,0 +1,69 @@
|
||||
/*
|
||||
* Copyright (C) 2017-2019 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#include "runtime/command_queue/command_queue.h"
|
||||
#include "unit_tests/mocks/mock_kernel.h"
|
||||
|
||||
#include "cl_api_tests.h"
|
||||
|
||||
using namespace NEO;
|
||||
|
||||
using clGetExecutionInfoTests = api_tests;
|
||||
|
||||
namespace ULT {
|
||||
|
||||
TEST_F(clGetExecutionInfoTests, GivenInvalidInputWhenCallingGetExecutionInfoThenErrorIsReturned) {
|
||||
retVal = clGetExecutionInfoIntel(nullptr, pKernel, 0, nullptr, nullptr, 0, 0, nullptr, nullptr);
|
||||
EXPECT_NE(CL_SUCCESS, retVal);
|
||||
|
||||
retVal = clGetExecutionInfoIntel(pCommandQueue, nullptr, 0, nullptr, nullptr, 0, 0, nullptr, nullptr);
|
||||
EXPECT_NE(CL_SUCCESS, retVal);
|
||||
|
||||
pKernel->isPatchedOverride = false;
|
||||
retVal = clGetExecutionInfoIntel(pCommandQueue, pKernel, 0, nullptr, nullptr, 0, 0, nullptr, nullptr);
|
||||
EXPECT_NE(CL_SUCCESS, retVal);
|
||||
pKernel->isPatchedOverride = true;
|
||||
|
||||
auto invalidParamName = 0xFFFF;
|
||||
retVal = clGetExecutionInfoIntel(pCommandQueue, pKernel, 0, nullptr, nullptr, invalidParamName, 0, nullptr, nullptr);
|
||||
EXPECT_NE(CL_SUCCESS, retVal);
|
||||
|
||||
uint32_t queryResult;
|
||||
retVal = clGetExecutionInfoIntel(pCommandQueue, pKernel, 0, nullptr, nullptr, CL_EXECUTION_INFO_MAX_WORKGROUP_COUNT_INTEL,
|
||||
sizeof(queryResult), nullptr, nullptr);
|
||||
EXPECT_NE(CL_SUCCESS, retVal);
|
||||
|
||||
retVal = clGetExecutionInfoIntel(pCommandQueue, pKernel, 0, nullptr, nullptr, CL_EXECUTION_INFO_MAX_WORKGROUP_COUNT_INTEL,
|
||||
0, &queryResult, nullptr);
|
||||
EXPECT_NE(CL_SUCCESS, retVal);
|
||||
}
|
||||
|
||||
TEST_F(clGetExecutionInfoTests, GivenVariousInputWhenGettingMaxWorkGroupCountThenCorrectValuesAreReturned) {
|
||||
uint32_t queryResult;
|
||||
retVal = clGetExecutionInfoIntel(pCommandQueue, pKernel, 0, nullptr, nullptr, CL_EXECUTION_INFO_MAX_WORKGROUP_COUNT_INTEL,
|
||||
sizeof(queryResult), &queryResult, nullptr);
|
||||
EXPECT_EQ(CL_SUCCESS, retVal);
|
||||
EXPECT_NE(0u, queryResult);
|
||||
|
||||
uint64_t queryResult64 = 0;
|
||||
size_t queryResultSize;
|
||||
retVal = clGetExecutionInfoIntel(pCommandQueue, pKernel, 0, nullptr, nullptr, CL_EXECUTION_INFO_MAX_WORKGROUP_COUNT_INTEL,
|
||||
sizeof(queryResult64), &queryResult64, &queryResultSize);
|
||||
EXPECT_EQ(CL_SUCCESS, retVal);
|
||||
EXPECT_EQ(queryResult, queryResult64);
|
||||
EXPECT_EQ(sizeof(queryResult), queryResultSize);
|
||||
|
||||
std::unique_ptr<MockKernel> pKernelWithExecutionEnvironmentPatch(MockKernel::create(pCommandQueue->getDevice(), pProgram));
|
||||
uint32_t queryResultWithExecutionEnvironment;
|
||||
retVal = clGetExecutionInfoIntel(pCommandQueue, pKernelWithExecutionEnvironmentPatch.get(), 0, nullptr, nullptr,
|
||||
CL_EXECUTION_INFO_MAX_WORKGROUP_COUNT_INTEL,
|
||||
sizeof(queryResultWithExecutionEnvironment), &queryResultWithExecutionEnvironment, nullptr);
|
||||
EXPECT_EQ(CL_SUCCESS, retVal);
|
||||
EXPECT_EQ(queryResult, queryResultWithExecutionEnvironment);
|
||||
}
|
||||
|
||||
} // namespace ULT
|
||||
@@ -18,8 +18,8 @@ set(IGDRCL_SRCS_tests_helpers
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/deferred_deleter_helpers_tests.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/dirty_state_helpers_tests.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/dispatch_flags_helper.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/dispatch_info_tests.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/dispatch_info_builder_tests.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/dispatch_info_tests.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/extendable_enum_tests.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/flush_stamp_tests.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/get_gpgpu_engines_tests.inl
|
||||
@@ -34,14 +34,14 @@ set(IGDRCL_SRCS_tests_helpers
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/hw_parse.inl
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/kernel_filename_helper.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/kmd_notify_tests.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/memory_properties_flags_helpers_tests.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/mem_properties_parser_helper_tests.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/memory_management_tests.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/memory_properties_flags_helpers_tests.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/mipmap_tests.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/per_thread_data_tests.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/ptr_math_tests.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/raii_hw_helper.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/queue_helpers_tests.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/raii_hw_helper.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/sampler_helpers_tests.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/task_information_tests.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/test_debug_variables.inl
|
||||
|
||||
@@ -927,7 +927,47 @@ HWTEST_F(HardwareCommandsTest, setBindingTableStatesForNoSurfaces) {
|
||||
delete pKernel;
|
||||
}
|
||||
|
||||
HWTEST_F(HardwareCommandsTest, slmValueScenarios) {
|
||||
HWTEST_F(HardwareCommandsTest, GivenVariousValuesWhenAlignSlmSizeIsCalledThenCorrectValueIsReturned) {
|
||||
if (::renderCoreFamily == IGFX_GEN8_CORE) {
|
||||
EXPECT_EQ(0u, HardwareCommandsHelper<FamilyType>::alignSlmSize(0));
|
||||
EXPECT_EQ(4096u, HardwareCommandsHelper<FamilyType>::alignSlmSize(1));
|
||||
EXPECT_EQ(4096u, HardwareCommandsHelper<FamilyType>::alignSlmSize(1024));
|
||||
EXPECT_EQ(4096u, HardwareCommandsHelper<FamilyType>::alignSlmSize(1025));
|
||||
EXPECT_EQ(4096u, HardwareCommandsHelper<FamilyType>::alignSlmSize(2048));
|
||||
EXPECT_EQ(4096u, HardwareCommandsHelper<FamilyType>::alignSlmSize(2049));
|
||||
EXPECT_EQ(4096u, HardwareCommandsHelper<FamilyType>::alignSlmSize(4096));
|
||||
EXPECT_EQ(8192u, HardwareCommandsHelper<FamilyType>::alignSlmSize(4097));
|
||||
EXPECT_EQ(8192u, HardwareCommandsHelper<FamilyType>::alignSlmSize(8192));
|
||||
EXPECT_EQ(16384u, HardwareCommandsHelper<FamilyType>::alignSlmSize(8193));
|
||||
EXPECT_EQ(16384u, HardwareCommandsHelper<FamilyType>::alignSlmSize(12288));
|
||||
EXPECT_EQ(16384u, HardwareCommandsHelper<FamilyType>::alignSlmSize(16384));
|
||||
EXPECT_EQ(32768u, HardwareCommandsHelper<FamilyType>::alignSlmSize(16385));
|
||||
EXPECT_EQ(32768u, HardwareCommandsHelper<FamilyType>::alignSlmSize(24576));
|
||||
EXPECT_EQ(32768u, HardwareCommandsHelper<FamilyType>::alignSlmSize(32768));
|
||||
EXPECT_EQ(65536u, HardwareCommandsHelper<FamilyType>::alignSlmSize(32769));
|
||||
EXPECT_EQ(65536u, HardwareCommandsHelper<FamilyType>::alignSlmSize(49152));
|
||||
EXPECT_EQ(65536u, HardwareCommandsHelper<FamilyType>::alignSlmSize(65535));
|
||||
EXPECT_EQ(65536u, HardwareCommandsHelper<FamilyType>::alignSlmSize(65536));
|
||||
} else {
|
||||
EXPECT_EQ(0u, HardwareCommandsHelper<FamilyType>::alignSlmSize(0));
|
||||
EXPECT_EQ(1024u, HardwareCommandsHelper<FamilyType>::alignSlmSize(1));
|
||||
EXPECT_EQ(1024u, HardwareCommandsHelper<FamilyType>::alignSlmSize(1024));
|
||||
EXPECT_EQ(2048u, HardwareCommandsHelper<FamilyType>::alignSlmSize(1025));
|
||||
EXPECT_EQ(2048u, HardwareCommandsHelper<FamilyType>::alignSlmSize(2048));
|
||||
EXPECT_EQ(4096u, HardwareCommandsHelper<FamilyType>::alignSlmSize(2049));
|
||||
EXPECT_EQ(4096u, HardwareCommandsHelper<FamilyType>::alignSlmSize(4096));
|
||||
EXPECT_EQ(8192u, HardwareCommandsHelper<FamilyType>::alignSlmSize(4097));
|
||||
EXPECT_EQ(8192u, HardwareCommandsHelper<FamilyType>::alignSlmSize(8192));
|
||||
EXPECT_EQ(16384u, HardwareCommandsHelper<FamilyType>::alignSlmSize(8193));
|
||||
EXPECT_EQ(16384u, HardwareCommandsHelper<FamilyType>::alignSlmSize(16384));
|
||||
EXPECT_EQ(32768u, HardwareCommandsHelper<FamilyType>::alignSlmSize(16385));
|
||||
EXPECT_EQ(32768u, HardwareCommandsHelper<FamilyType>::alignSlmSize(32768));
|
||||
EXPECT_EQ(65536u, HardwareCommandsHelper<FamilyType>::alignSlmSize(32769));
|
||||
EXPECT_EQ(65536u, HardwareCommandsHelper<FamilyType>::alignSlmSize(65536));
|
||||
}
|
||||
}
|
||||
|
||||
HWTEST_F(HardwareCommandsTest, GivenVariousValuesWhenComputeSlmSizeIsCalledThenCorrectValueIsReturned) {
|
||||
if (::renderCoreFamily == IGFX_GEN8_CORE) {
|
||||
EXPECT_EQ(0u, HardwareCommandsHelper<FamilyType>::computeSlmValues(0));
|
||||
EXPECT_EQ(1u, HardwareCommandsHelper<FamilyType>::computeSlmValues(1));
|
||||
|
||||
@@ -717,3 +717,19 @@ HWTEST_F(HwHelperTest, givenHwHelperWhenAskingForTilingSupportThenReturnValidVal
|
||||
EXPECT_FALSE(helper.tilingAllowed(false, imgDesc, false));
|
||||
}
|
||||
}
|
||||
|
||||
HWCMDTEST_F(IGFX_GEN8_CORE, HwHelperTest, GivenVariousValuesWhenCallingGetBarriersCountFromHasBarrierThenCorrectValueIsReturned) {
|
||||
auto &hwHelper = HwHelper::get(hardwareInfo.platform.eRenderCoreFamily);
|
||||
EXPECT_EQ(0u, hwHelper.getBarriersCountFromHasBarriers(0u));
|
||||
EXPECT_EQ(1u, hwHelper.getBarriersCountFromHasBarriers(1u));
|
||||
}
|
||||
|
||||
HWCMDTEST_F(IGFX_GEN8_CORE, HwHelperTest, GivenVariousValuesWhenCallingCalculateAvailableThreadCountThenCorrectValueIsReturned) {
|
||||
auto &hwHelper = HwHelper::get(hardwareInfo.platform.eRenderCoreFamily);
|
||||
auto result = hwHelper.calculateAvailableThreadCount(
|
||||
hardwareInfo.platform.eProductFamily,
|
||||
0,
|
||||
hardwareInfo.gtSystemInfo.EUCount,
|
||||
hardwareInfo.gtSystemInfo.ThreadCount / hardwareInfo.gtSystemInfo.EUCount);
|
||||
EXPECT_EQ(hardwareInfo.gtSystemInfo.ThreadCount, result);
|
||||
}
|
||||
|
||||
@@ -37,7 +37,7 @@ void Kernel::ReflectionSurfaceHelper::patchBlocksCurbe<true>(void *reflectionSur
|
||||
template void Kernel::patchReflectionSurface<true>(DeviceQueue *, PrintfHandler *);
|
||||
|
||||
bool MockKernel::isPatched() const {
|
||||
return true;
|
||||
return isPatchedOverride;
|
||||
}
|
||||
|
||||
bool MockKernel::canTransformImages() const {
|
||||
|
||||
@@ -246,6 +246,7 @@ class MockKernel : public Kernel {
|
||||
mutable uint32_t releaseOwnershipCalls = 0;
|
||||
|
||||
bool canKernelTransformImages = true;
|
||||
bool isPatchedOverride = true;
|
||||
|
||||
protected:
|
||||
KernelInfo *kernelInfoAllocated = nullptr;
|
||||
|
||||
Reference in New Issue
Block a user