Add threads count queries

Signed-off-by: Dunajski, Bartosz <bartosz.dunajski@intel.com>
This commit is contained in:
Dunajski, Bartosz
2022-08-24 11:10:22 +00:00
committed by Compute-Runtime-Automation
parent 9763d42379
commit 1a28c2a154
12 changed files with 50 additions and 62 deletions

View File

@@ -338,3 +338,7 @@ typedef cl_bitfield cl_command_queue_mdapi_properties_intel;
#if !defined(cl_khr_external_memory_dma_buf)
#define CL_EXTERNAL_MEMORY_HANDLE_DMA_BUF_KHR 0x2067
#endif
// cl_intel_variable_eu_thread_count
#define CL_DEVICE_EU_THREAD_COUNTS_INTEL 0x1000A // placeholder
#define CL_KERNEL_EU_THREAD_COUNT_INTEL 0x1000B // placeholder

View File

@@ -17,7 +17,6 @@
#include "opencl/source/cl_device/cl_device_get_cap.inl"
#include "opencl/source/cl_device/cl_device_info_map.h"
#include "opencl/source/cl_device/cl_device_vector.h"
#include "opencl/source/helpers/cl_device_helpers.h"
#include "opencl/source/helpers/cl_hw_helper.h"
#include "opencl/source/helpers/get_info_status_mapper.h"
#include "opencl/source/platform/platform.h"
@@ -325,12 +324,15 @@ cl_int ClDevice::getDeviceInfo(cl_device_info paramName,
src = &param.uint;
retSize = srcSize = sizeof(cl_uint);
break;
case CL_DEVICE_EU_THREAD_COUNTS_INTEL:
src = getSharedDeviceInfo().threadsPerEUConfigs.begin();
retSize = srcSize = (getSharedDeviceInfo().threadsPerEUConfigs.size() * sizeof(uint32_t));
break;
default:
if (getDeviceInfoForImage(paramName, src, srcSize, retSize) && !getSharedDeviceInfo().imageSupport) {
src = &value;
break;
}
ClDeviceHelper::getExtraDeviceInfo(*this, paramName, param, src, srcSize, retSize);
}
auto getInfoStatus = GetInfo::getInfo(paramValue, paramValueSize, src, srcSize);

View File

@@ -9,8 +9,6 @@ set(RUNTIME_SRCS_HELPERS_BASE
${CMAKE_CURRENT_SOURCE_DIR}/base_object.cpp
${CMAKE_CURRENT_SOURCE_DIR}/base_object.h
${CMAKE_CURRENT_SOURCE_DIR}/cl_blit_properties.h
${CMAKE_CURRENT_SOURCE_DIR}${BRANCH_DIR_SUFFIX}cl_device_helpers.cpp
${CMAKE_CURRENT_SOURCE_DIR}/cl_device_helpers.h
${CMAKE_CURRENT_SOURCE_DIR}/cl_helper.h
${CMAKE_CURRENT_SOURCE_DIR}/cl_hw_helper.cpp
${CMAKE_CURRENT_SOURCE_DIR}/cl_hw_helper.h

View File

@@ -1,12 +0,0 @@
/*
* Copyright (C) 2020-2022 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#include "opencl/source/helpers/cl_device_helpers.h"
namespace NEO {
void ClDeviceHelper::getExtraDeviceInfo(const ClDevice &clDevice, cl_device_info paramName, ClDeviceInfoParam &param, const void *&src, size_t &size, size_t &retSize) {}
} // namespace NEO

View File

@@ -1,22 +0,0 @@
/*
* Copyright (C) 2020-2022 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#pragma once
#include "opencl/extensions/public/cl_ext_private.h"
#include "CL/cl.h"
#include <cstdint>
namespace NEO {
class ClDevice;
struct ClDeviceInfoParam;
struct HardwareInfo;
namespace ClDeviceHelper {
void getExtraDeviceInfo(const ClDevice &clDevice, cl_device_info paramName, ClDeviceInfoParam &param, const void *&src, size_t &size, size_t &retSize);
}; // namespace ClDeviceHelper
} // namespace NEO

View File

@@ -7,7 +7,6 @@
#include "shared/source/os_interface/hw_info_config.h"
#include "opencl/source/helpers/cl_device_helpers.h"
#include "opencl/source/helpers/cl_hw_helper.h"
namespace NEO {

View File

@@ -6,7 +6,6 @@
set(RUNTIME_SRCS_KERNEL
${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt
${CMAKE_CURRENT_SOURCE_DIR}${BRANCH_DIR_SUFFIX}get_additional_kernel_info.cpp
${CMAKE_CURRENT_SOURCE_DIR}/image_transformer.cpp
${CMAKE_CURRENT_SOURCE_DIR}/image_transformer.h
${CMAKE_CURRENT_SOURCE_DIR}/kernel.cpp

View File

@@ -1,16 +0,0 @@
/*
* Copyright (C) 2019-2021 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#include "opencl/source/kernel/kernel.h"
namespace NEO {
void Kernel::getAdditionalInfo(cl_kernel_info paramName, const void *&paramValue, size_t &paramValueSizeRet) const {
}
void Kernel::getAdditionalWorkGroupInfo(cl_kernel_work_group_info paramName, const void *&paramValue, size_t &paramValueSizeRet) const {
}
} // namespace NEO

View File

@@ -449,7 +449,6 @@ cl_int Kernel::getInfo(cl_kernel_info paramName, size_t paramValueSize,
srcSize = sizeof(nonCannonizedGpuAddress);
break;
default:
getAdditionalInfo(paramName, pSrc, srcSize);
break;
}
@@ -583,8 +582,11 @@ cl_int Kernel::getWorkGroupInfo(cl_kernel_work_group_info paramName,
srcSize = sizeof(privateMemSize);
pSrc = &privateMemSize;
break;
case CL_KERNEL_EU_THREAD_COUNT_INTEL:
srcSize = sizeof(cl_uint);
pSrc = &this->getKernelInfo().kernelDescriptor.kernelAttributes.numThreadsRequired;
break;
default:
getAdditionalWorkGroupInfo(paramName, pSrc, srcSize);
break;
}

View File

@@ -156,8 +156,6 @@ class Kernel : public ReferenceTrackedObject<Kernel> {
cl_int getInfo(cl_kernel_info paramName, size_t paramValueSize,
void *paramValue, size_t *paramValueSizeRet) const;
void getAdditionalInfo(cl_kernel_info paramName, const void *&paramValue, size_t &paramValueSizeRet) const;
void getAdditionalWorkGroupInfo(cl_kernel_work_group_info paramName, const void *&paramValue, size_t &paramValueSizeRet) const;
cl_int getArgInfo(cl_uint argIndx, cl_kernel_arg_info paramName,
size_t paramValueSize, void *paramValue, size_t *paramValueSizeRet) const;

View File

@@ -178,6 +178,24 @@ HWTEST_F(DeviceTest, WhenDeviceIsCreatedThenActualEngineTypeIsSameAsDefault) {
EXPECT_EQ(defaultCounter, 1);
}
TEST_F(DeviceTest, givenDeviceWithThreadsPerEUConfigsWhenQueryingEuThreadCountsThenConfigsAreReturned) {
cl_int retVal = CL_SUCCESS;
auto device = std::make_unique<MockClDevice>(MockDevice::createWithNewExecutionEnvironment<MockDevice>(NEO::defaultHwInfo.get(), 0));
const StackVec<uint32_t, 6> configs = {123U, 456U};
device->sharedDeviceInfo.threadsPerEUConfigs = configs;
size_t paramRetSize;
retVal = device->getDeviceInfo(CL_DEVICE_EU_THREAD_COUNTS_INTEL, 0, nullptr, &paramRetSize);
EXPECT_EQ(CL_SUCCESS, retVal);
EXPECT_EQ(configs.size() * sizeof(cl_uint), paramRetSize);
auto euThreadCounts = std::make_unique<uint32_t[]>(paramRetSize / sizeof(cl_uint));
retVal = device->getDeviceInfo(CL_DEVICE_EU_THREAD_COUNTS_INTEL, paramRetSize, euThreadCounts.get(), nullptr);
EXPECT_EQ(CL_SUCCESS, retVal);
EXPECT_EQ(123U, euThreadCounts[0]);
EXPECT_EQ(456U, euThreadCounts[1]);
}
HWTEST_F(DeviceTest, givenNoHwCsrTypeAndModifiedDefaultEngineIndexWhenIsSimulationIsCalledThenTrueIsReturned) {
EXPECT_FALSE(pDevice->isSimulation());
auto csr = TbxCommandStreamReceiver::create("", false, *pDevice->executionEnvironment, 0, 1);

View File

@@ -3282,3 +3282,21 @@ TEST_F(KernelTests, GivenCorrectAllocationTypeThenFunctionCheckingSystemMemoryRe
}
}
}
TEST(KernelTest, givenKernelWithNumThreadsRequiredPatchTokenWhenQueryingEuThreadCountThenEuThreadCountIsReturned) {
cl_int retVal = CL_SUCCESS;
KernelInfo kernelInfo = {};
kernelInfo.kernelDescriptor.kernelAttributes.numThreadsRequired = 123U;
auto rootDeviceIndex = 0u;
auto device = std::make_unique<MockClDevice>(MockDevice::createWithNewExecutionEnvironment<MockDevice>(NEO::defaultHwInfo.get(), rootDeviceIndex));
auto program = std::make_unique<MockProgram>(toClDeviceVector(*device));
MockKernel kernel(program.get(), kernelInfo, *device);
cl_uint euThreadCount;
size_t paramRetSize;
retVal = kernel.getWorkGroupInfo(CL_KERNEL_EU_THREAD_COUNT_INTEL, sizeof(cl_uint), &euThreadCount, &paramRetSize);
EXPECT_EQ(CL_SUCCESS, retVal);
EXPECT_EQ(sizeof(cl_uint), paramRetSize);
EXPECT_EQ(123U, euThreadCount);
}