Add info about supported thread arbitration policies

Signed-off-by: Katarzyna Cencelewska <katarzyna.cencelewska@intel.com>
This commit is contained in:
Katarzyna Cencelewska 2021-04-06 09:51:30 +00:00 committed by Compute-Runtime-Automation
parent aff7b7cdd1
commit 43a032d764
11 changed files with 168 additions and 12 deletions

View File

@ -118,6 +118,20 @@ using cl_unified_shared_memory_capabilities_intel = cl_bitfield;
#define CL_MEM_TYPE_DEVICE_INTEL 0x4198
#define CL_MEM_TYPE_SHARED_INTEL 0x4199
/* cl_command_type */
#define CL_COMMAND_MEMSET_INTEL 0x4204
#define CL_COMMAND_MEMFILL_INTEL 0x4204
#define CL_COMMAND_MEMCPY_INTEL 0x4205
#define CL_COMMAND_MIGRATEMEM_INTEL 0x4206
#define CL_COMMAND_MEMADVISE_INTEL 0x4207
/******************************
* THREAD ARBITRATION POLICY *
*******************************/
/* cl_device_info */
#define CL_DEVICE_SUPPORTED_THREAD_ARBITRATION_POLICY_INTEL 0x4208
/* cl_kernel_exec_info */
#define CL_KERNEL_EXEC_INFO_INDIRECT_HOST_ACCESS_INTEL 0x4200
#define CL_KERNEL_EXEC_INFO_INDIRECT_DEVICE_ACCESS_INTEL 0x4201
@ -129,13 +143,6 @@ using cl_unified_shared_memory_capabilities_intel = cl_bitfield;
#define CL_KERNEL_EXEC_INFO_THREAD_ARBITRATION_POLICY_AFTER_DEPENDENCY_ROUND_ROBIN_INTEL 0x10024
#define CL_KERNEL_EXEC_INFO_THREAD_ARBITRATION_POLICY_INTEL 0x10025
/* cl_command_type */
#define CL_COMMAND_MEMSET_INTEL 0x4204
#define CL_COMMAND_MEMFILL_INTEL 0x4204
#define CL_COMMAND_MEMCPY_INTEL 0x4205
#define CL_COMMAND_MIGRATEMEM_INTEL 0x4206
#define CL_COMMAND_MEMADVISE_INTEL 0x4207
/******************************
* SLICE COUNT SELECTING *
*******************************/

View File

@ -15,6 +15,7 @@
#include "shared/source/os_interface/hw_info_config.h"
#include "opencl/source/cl_device/cl_device.h"
#include "opencl/source/helpers/cl_hw_helper.h"
#include "opencl/source/platform/extensions.h"
#include "opencl/source/sharings/sharing_factory.h"
@ -373,7 +374,12 @@ void ClDevice::initializeCaps() {
deviceInfo.queueFamilyProperties.push_back(properties);
}
}
auto &clHwHelper = NEO::ClHwHelper::get(hwInfo.platform.eRenderCoreFamily);
const std::vector<uint32_t> &supportedThreadArbitrationPolicies = clHwHelper.getSupportedThreadArbitrationPolicies();
deviceInfo.supportedThreadArbitrationPolicies.resize(supportedThreadArbitrationPolicies.size());
for (size_t policy = 0u; policy < supportedThreadArbitrationPolicies.size(); policy++) {
deviceInfo.supportedThreadArbitrationPolicies[policy] = supportedThreadArbitrationPolicies[policy];
}
deviceInfo.preemptionSupported = false;
deviceInfo.maxGlobalVariableSize = ocl21FeaturesEnabled ? 64 * KB : 0;
deviceInfo.globalVariablePreferredTotalSize = ocl21FeaturesEnabled ? static_cast<size_t>(sharedDeviceInfo.maxMemAllocSize) : 0;

View File

@ -17,6 +17,7 @@
#include "opencl/source/cl_device/cl_device_info_map.h"
#include "opencl/source/cl_device/cl_device_vector.h"
#include "opencl/source/helpers/cl_device_helpers.h"
#include "opencl/source/helpers/cl_hw_helper.h"
#include "opencl/source/helpers/get_info_status_mapper.h"
#include "opencl/source/platform/platform.h"
@ -237,6 +238,10 @@ cl_int ClDevice::getDeviceInfo(cl_device_info paramName,
src = deviceInfo.extensionsWithVersion.data();
retSize = srcSize = deviceInfo.extensionsWithVersion.size() * sizeof(cl_name_version);
break;
case CL_DEVICE_SUPPORTED_THREAD_ARBITRATION_POLICY_INTEL:
src = deviceInfo.supportedThreadArbitrationPolicies.data();
retSize = srcSize = deviceInfo.supportedThreadArbitrationPolicies.size() * sizeof(cl_uint);
break;
default:
if (getDeviceInfoForImage(paramName, src, srcSize, retSize) && !getSharedDeviceInfo().imageSupport) {
src = &value;

View File

@ -132,6 +132,7 @@ struct ClDeviceInfo {
cl_unified_shared_memory_capabilities_intel singleDeviceSharedMemCapabilities;
cl_unified_shared_memory_capabilities_intel crossDeviceSharedMemCapabilities;
cl_unified_shared_memory_capabilities_intel sharedSystemMemCapabilities;
StackVec<uint32_t, 4> supportedThreadArbitrationPolicies;
};
// clang-format on

View File

@ -13,6 +13,7 @@
#include "igfxfmid.h"
#include <string>
#include <vector>
namespace NEO {
@ -31,6 +32,8 @@ class ClHwHelper {
virtual bool getQueueFamilyName(std::string &name, EngineGroupType type) const = 0;
virtual cl_ulong getKernelPrivateMemSize(const KernelInfo &kernelInfo) const = 0;
virtual bool preferBlitterForLocalToLocalTransfers() const = 0;
virtual bool isSupportedKernelThreadArbitrationPolicy() const = 0;
virtual std::vector<uint32_t> getSupportedThreadArbitrationPolicies() const = 0;
protected:
virtual bool hasStatelessAccessToBuffer(const KernelInfo &kernelInfo) const = 0;
@ -52,10 +55,11 @@ class ClHwHelperHw : public ClHwHelper {
bool getQueueFamilyName(std::string &name, EngineGroupType type) const override;
cl_ulong getKernelPrivateMemSize(const KernelInfo &kernelInfo) const override;
bool preferBlitterForLocalToLocalTransfers() const override;
bool isSupportedKernelThreadArbitrationPolicy() const override;
std::vector<uint32_t> getSupportedThreadArbitrationPolicies() const override;
protected:
bool hasStatelessAccessToBuffer(const KernelInfo &kernelInfo) const override;
ClHwHelperHw() = default;
};

View File

@ -43,5 +43,12 @@ template <typename GfxFamily>
inline bool ClHwHelperHw<GfxFamily>::preferBlitterForLocalToLocalTransfers() const {
return false;
}
template <typename GfxFamily>
bool ClHwHelperHw<GfxFamily>::isSupportedKernelThreadArbitrationPolicy() const { return true; }
template <typename GfxFamily>
std::vector<uint32_t> ClHwHelperHw<GfxFamily>::getSupportedThreadArbitrationPolicies() const {
return std::vector<uint32_t>{CL_KERNEL_EXEC_INFO_THREAD_ARBITRATION_POLICY_OLDEST_FIRST_INTEL, CL_KERNEL_EXEC_INFO_THREAD_ARBITRATION_POLICY_ROUND_ROBIN_INTEL, CL_KERNEL_EXEC_INFO_THREAD_ARBITRATION_POLICY_AFTER_DEPENDENCY_ROUND_ROBIN_INTEL};
}
} // namespace NEO

View File

@ -7,6 +7,7 @@
#include "shared/source/debug_settings/debug_settings_manager.h"
#include "opencl/source/helpers/cl_hw_helper.h"
#include "opencl/source/kernel/kernel.h"
namespace NEO {
@ -16,7 +17,12 @@ bool Kernel::requiresCacheFlushCommand(const CommandQueue &commandQueue) const {
void Kernel::reconfigureKernel() {
}
int Kernel::setKernelThreadArbitrationPolicy(uint32_t policy) {
if (policy == CL_KERNEL_EXEC_INFO_THREAD_ARBITRATION_POLICY_ROUND_ROBIN_INTEL) {
auto hwInfo = clDevice.getHardwareInfo();
auto &hwHelper = NEO::ClHwHelper::get(hwInfo.platform.eRenderCoreFamily);
if (!hwHelper.isSupportedKernelThreadArbitrationPolicy()) {
this->threadArbitrationPolicy = ThreadArbitrationPolicy::NotPresent;
return CL_INVALID_DEVICE;
} else if (policy == CL_KERNEL_EXEC_INFO_THREAD_ARBITRATION_POLICY_ROUND_ROBIN_INTEL) {
this->threadArbitrationPolicy = ThreadArbitrationPolicy::RoundRobin;
} else if (policy == CL_KERNEL_EXEC_INFO_THREAD_ARBITRATION_POLICY_OLDEST_FIRST_INTEL) {
this->threadArbitrationPolicy = ThreadArbitrationPolicy::AgeBased;

View File

@ -8,6 +8,8 @@
#include "shared/source/device/device.h"
#include "shared/source/helpers/hw_info.h"
#include "opencl/source/helpers/cl_hw_helper.h"
#include "cl_api_tests.h"
#include <cstring>
@ -273,6 +275,49 @@ TEST_F(clGetDeviceInfoTests, GivenClDeviceIlVersionParamWhenGettingDeviceInfoThe
EXPECT_EQ(CL_SUCCESS, retVal);
EXPECT_STREQ("SPIR-V_1.2 ", paramValue.get());
}
using matcherAtMostGen12lp = IsAtMostGfxCore<IGFX_GEN12LP_CORE>;
HWTEST2_F(clGetDeviceInfoTests, givenClDeviceSupportedThreadArbitrationPolicyIntelWhenCallClGetDeviceInfoThenProperArrayIsReturned, matcherAtMostGen12lp) {
cl_device_info paramName = 0;
cl_uint paramValue[3];
size_t paramSize = sizeof(paramValue);
size_t paramRetSize = 0;
paramName = CL_DEVICE_SUPPORTED_THREAD_ARBITRATION_POLICY_INTEL;
cl_uint expectedRetValue[] = {CL_KERNEL_EXEC_INFO_THREAD_ARBITRATION_POLICY_OLDEST_FIRST_INTEL, CL_KERNEL_EXEC_INFO_THREAD_ARBITRATION_POLICY_ROUND_ROBIN_INTEL, CL_KERNEL_EXEC_INFO_THREAD_ARBITRATION_POLICY_AFTER_DEPENDENCY_ROUND_ROBIN_INTEL};
retVal = clGetDeviceInfo(
testedClDevice,
paramName,
paramSize,
paramValue,
&paramRetSize);
EXPECT_EQ(CL_SUCCESS, retVal);
EXPECT_EQ(sizeof(expectedRetValue), paramRetSize);
EXPECT_TRUE(memcmp(expectedRetValue, paramValue, sizeof(expectedRetValue)) == 0);
}
HWTEST_F(clGetDeviceInfoTests, givenClDeviceSupportedThreadArbitrationPolicyIntelWhenThreadArbitrationPolicyChangeNotSupportedAndCallClGetDeviceInfoThenParamRetSizeIsZero) {
auto &hwHelper = NEO::ClHwHelper::get(defaultHwInfo->platform.eRenderCoreFamily);
if (hwHelper.isSupportedKernelThreadArbitrationPolicy()) {
GTEST_SKIP();
}
cl_device_info paramName = 0;
cl_uint paramValue[3];
size_t paramSize = sizeof(paramValue);
size_t paramRetSize = 0;
paramName = CL_DEVICE_SUPPORTED_THREAD_ARBITRATION_POLICY_INTEL;
retVal = clGetDeviceInfo(
testedClDevice,
paramName,
paramSize,
paramValue,
&paramRetSize);
EXPECT_EQ(CL_SUCCESS, retVal);
EXPECT_EQ(0u, paramRetSize);
}
//------------------------------------------------------------------------------
struct GetDeviceInfoP : public ApiFixture<>,

View File

@ -5,6 +5,7 @@
*
*/
#include "opencl/source/helpers/cl_hw_helper.h"
#include "opencl/test/unit_test/command_stream/thread_arbitration_policy_helper.h"
#include "opencl/test/unit_test/mocks/mock_kernel.h"
#include "test.h"
@ -66,6 +67,10 @@ TEST_F(clSetKernelExecInfoTests, GivenNullKernelWhenSettingAdditionalKernelInfoT
}
TEST_F(clSetKernelExecInfoTests, GivenDeviceNotSupportingSvmWhenSettingKernelExecInfoThenErrorIsReturnedOnSvmRelatedParams) {
auto &hwHelper = NEO::ClHwHelper::get(pDevice->getHardwareInfo().platform.eRenderCoreFamily);
if (!hwHelper.isSupportedKernelThreadArbitrationPolicy()) {
GTEST_SKIP();
}
auto hwInfo = executionEnvironment->rootDeviceEnvironments[ApiFixture::testedRootDeviceIndex]->getMutableHardwareInfo();
VariableBackup<bool> ftrSvm{&hwInfo->capabilityTable.ftrSvm, false};
@ -298,6 +303,10 @@ TEST_F(clSetKernelExecInfoTests, givenNonExistingParamNameWithValuesWhenSettingA
}
HWTEST_F(clSetKernelExecInfoTests, givenKernelExecInfoThreadArbitrationPolicyWhenSettingAdditionalKernelInfoThenSuccessIsReturned) {
auto &hwHelper = NEO::ClHwHelper::get(pDevice->getHardwareInfo().platform.eRenderCoreFamily);
if (!hwHelper.isSupportedKernelThreadArbitrationPolicy()) {
GTEST_SKIP();
}
uint32_t newThreadArbitrationPolicy = CL_KERNEL_EXEC_INFO_THREAD_ARBITRATION_POLICY_ROUND_ROBIN_INTEL;
size_t ptrSizeInBytes = sizeof(uint32_t *);
@ -312,7 +321,28 @@ HWTEST_F(clSetKernelExecInfoTests, givenKernelExecInfoThreadArbitrationPolicyWhe
EXPECT_EQ(getNewKernelArbitrationPolicy(newThreadArbitrationPolicy), pMockKernel->getThreadArbitrationPolicy());
}
HWTEST_F(clSetKernelExecInfoTests, givenKernelExecInfoThreadArbitrationPolicyWhenNotSupportedAndSettingAdditionalKernelInfoThenClInvalidDeviceIsReturned) {
auto &hwHelper = NEO::ClHwHelper::get(pDevice->getHardwareInfo().platform.eRenderCoreFamily);
if (hwHelper.isSupportedKernelThreadArbitrationPolicy()) {
GTEST_SKIP();
}
uint32_t newThreadArbitrationPolicy = CL_KERNEL_EXEC_INFO_THREAD_ARBITRATION_POLICY_ROUND_ROBIN_INTEL;
size_t ptrSizeInBytes = sizeof(uint32_t *);
retVal = clSetKernelExecInfo(
pMockMultiDeviceKernel, // cl_kernel kernel
CL_KERNEL_EXEC_INFO_THREAD_ARBITRATION_POLICY_INTEL, // cl_kernel_exec_info param_name
ptrSizeInBytes, // size_t param_value_size
&newThreadArbitrationPolicy // const void *param_value
);
EXPECT_EQ(CL_INVALID_DEVICE, retVal);
}
HWTEST_F(clSetKernelExecInfoTests, givenInvalidThreadArbitrationPolicyWhenSettingAdditionalKernelInfoThenClInvalidValueIsReturned) {
auto &hwHelper = NEO::ClHwHelper::get(pDevice->getHardwareInfo().platform.eRenderCoreFamily);
if (!hwHelper.isSupportedKernelThreadArbitrationPolicy()) {
GTEST_SKIP();
}
uint32_t invalidThreadArbitrationPolicy = 0;
size_t ptrSizeInBytes = 1 * sizeof(uint32_t *);

View File

@ -12,6 +12,7 @@
#include "shared/test/unit_test/utilities/base_object_utils.h"
#include "opencl/source/event/user_event.h"
#include "opencl/source/helpers/cl_hw_helper.h"
#include "opencl/source/platform/platform.h"
#include "opencl/test/unit_test/command_stream/thread_arbitration_policy_helper.h"
#include "opencl/test/unit_test/fixtures/enqueue_handler_fixture.h"
@ -498,8 +499,12 @@ HWTEST_F(EnqueueHandlerTest, givenEnqueueHandlerWhenSubCaptureIsOnThenActivateSu
mockCmdQ->release();
}
HWTEST_F(EnqueueHandlerTest, givenEnqueueHandlerWhenClSetKernelExecInfoAlreadysetKernelThreadArbitrationPolicyThenRequiredThreadArbitrationPolicyIsSetProperly) {
HWTEST_F(EnqueueHandlerTest, givenEnqueueHandlerWhenClSetKernelExecInfoAlreadySetKernelThreadArbitrationPolicyThenRequiredThreadArbitrationPolicyIsSetProperly) {
REQUIRE_SVM_OR_SKIP(pClDevice);
auto &hwHelper = NEO::ClHwHelper::get(pClDevice->getHardwareInfo().platform.eRenderCoreFamily);
if (!hwHelper.isSupportedKernelThreadArbitrationPolicy()) {
GTEST_SKIP();
}
DebugManagerStateRestore stateRestore;
DebugManager.flags.AUBDumpSubCaptureMode.set(static_cast<int32_t>(AubSubCaptureManager::SubCaptureMode::Filter));
@ -529,6 +534,42 @@ HWTEST_F(EnqueueHandlerTest, givenEnqueueHandlerWhenClSetKernelExecInfoAlreadyse
mockCmdQ->release();
}
HWTEST_F(EnqueueHandlerTest, givenEnqueueHandlerWhenNotSupportedPolicyChangeThenRequiredThreadArbitrationPolicyNotChangedAndIsSetAsDefault) {
auto &hwHelper = NEO::ClHwHelper::get(pClDevice->getHardwareInfo().platform.eRenderCoreFamily);
if (hwHelper.isSupportedKernelThreadArbitrationPolicy()) {
GTEST_SKIP();
}
DebugManagerStateRestore stateRestore;
DebugManager.flags.AUBDumpSubCaptureMode.set(static_cast<int32_t>(AubSubCaptureManager::SubCaptureMode::Filter));
MockKernelWithInternals kernelInternals(*pClDevice, context);
Kernel *kernel = kernelInternals.mockKernel;
MockMultiDispatchInfo multiDispatchInfo(pClDevice, kernel);
uint32_t euThreadSetting = CL_KERNEL_EXEC_INFO_THREAD_ARBITRATION_POLICY_ROUND_ROBIN_INTEL;
size_t ptrSizeInBytes = 1 * sizeof(uint32_t *);
auto retVal = clSetKernelExecInfo(
kernelInternals.mockMultiDeviceKernel, // cl_kernel kernel
CL_KERNEL_EXEC_INFO_THREAD_ARBITRATION_POLICY_INTEL, // cl_kernel_exec_info param_name
ptrSizeInBytes, // size_t param_value_size
&euThreadSetting // const void *param_value
);
EXPECT_EQ(CL_INVALID_DEVICE, retVal);
auto mockCmdQ = new MockCommandQueueHw<FamilyType>(context, pClDevice, 0);
mockCmdQ->template enqueueHandler<CL_COMMAND_NDRANGE_KERNEL>(nullptr,
0,
false,
multiDispatchInfo,
0,
nullptr,
nullptr);
EXPECT_NE(getNewKernelArbitrationPolicy(euThreadSetting), pDevice->getUltCommandStreamReceiver<FamilyType>().requiredThreadArbitrationPolicy);
EXPECT_EQ(0u, pDevice->getUltCommandStreamReceiver<FamilyType>().requiredThreadArbitrationPolicy);
mockCmdQ->release();
}
HWTEST_F(EnqueueHandlerTest, givenKernelUsingSyncBufferWhenEnqueuingKernelThenSshIsCorrectlyProgrammed) {
using BINDING_TABLE_STATE = typename FamilyType::BINDING_TABLE_STATE;
using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE;

View File

@ -3211,7 +3211,11 @@ TEST(KernelTest, givenDefaultKernelWhenItIsCreatedThenItReportsStatelessWrites)
}
TEST(KernelTest, givenPolicyWhensetKernelThreadArbitrationPolicyThenExpectedClValueIsReturned) {
auto device = clUniquePtr(new MockClDevice(MockDevice::createWithNewExecutionEnvironment<MockDevice>(defaultHwInfo.get())));
auto &hwHelper = NEO::ClHwHelper::get(defaultHwInfo->platform.eRenderCoreFamily);
if (!hwHelper.isSupportedKernelThreadArbitrationPolicy()) {
GTEST_SKIP();
}
auto device = std::make_unique<MockClDevice>(MockDevice::createWithNewExecutionEnvironment<MockDevice>(defaultHwInfo.get()));
MockKernelWithInternals kernel(*device);
EXPECT_EQ(CL_SUCCESS, kernel.mockKernel->setKernelThreadArbitrationPolicy(CL_KERNEL_EXEC_INFO_THREAD_ARBITRATION_POLICY_ROUND_ROBIN_INTEL));
EXPECT_EQ(CL_SUCCESS, kernel.mockKernel->setKernelThreadArbitrationPolicy(CL_KERNEL_EXEC_INFO_THREAD_ARBITRATION_POLICY_OLDEST_FIRST_INTEL));