Add debug flag to expose each subdevice as a separate cl device

Signed-off-by: Lukasz Jobczyk <lukasz.jobczyk@intel.com>
This commit is contained in:
Lukasz Jobczyk
2022-05-10 07:19:29 +00:00
committed by Compute-Runtime-Automation
parent 943ad0e1eb
commit 17aac9d1bf
4 changed files with 74 additions and 11 deletions

View File

@ -160,6 +160,19 @@ cl_int CL_API_CALL clGetPlatformInfo(cl_platform_id platform,
return retVal;
}
bool checkDeviceTypeAndFillDeviceID(ClDevice &device, cl_device_type deviceType, cl_device_id *devices, cl_uint numEntries, cl_uint &retNum) {
if (deviceType & device.getDeviceInfo().deviceType) {
if (devices) {
if (retNum >= numEntries) {
return false;
}
devices[retNum] = &device;
}
retNum++;
}
return true;
}
cl_int CL_API_CALL clGetDeviceIDs(cl_platform_id platform,
cl_device_type deviceType,
cl_uint numEntries,
@ -233,18 +246,33 @@ cl_int CL_API_CALL clGetDeviceIDs(cl_platform_id platform,
cl_uint retNum = 0;
for (auto platformDeviceIndex = 0u; platformDeviceIndex < numDev; platformDeviceIndex++) {
bool exposeSubDevices = false;
if (DebugManager.flags.ReturnSubDevicesAsClDeviceIDs.get() != -1) {
exposeSubDevices = DebugManager.flags.ReturnSubDevicesAsClDeviceIDs.get();
}
ClDevice *device = pPlatform->getClDevice(platformDeviceIndex);
UNRECOVERABLE_IF(device == nullptr);
if (deviceType & device->getDeviceInfo().deviceType) {
if (devices) {
if (retNum >= numEntries) {
exposeSubDevices &= device->getNumGenericSubDevices() > 0u;
if (exposeSubDevices) {
bool numEntriesReached = false;
for (uint32_t subDeviceIndex = 0u; subDeviceIndex < device->getNumGenericSubDevices(); subDeviceIndex++) {
auto subDevice = device->getSubDevice(subDeviceIndex);
numEntriesReached = checkDeviceTypeAndFillDeviceID(*subDevice, deviceType, devices, numEntries, retNum);
if (!numEntriesReached) {
break;
}
devices[retNum] = device;
}
retNum++;
if (!numEntriesReached) {
break;
}
} else {
if (!checkDeviceTypeAndFillDeviceID(*device, deviceType, devices, numEntries, retNum)) {
break;
}
}
}
@ -2058,10 +2086,10 @@ cl_int CL_API_CALL clGetEventInfo(cl_event event,
if (neoEvent->isUserEvent()) {
auto executionStatus = neoEvent->peekExecutionStatus();
//Spec requires initial state to be queued
//our current design relies heavily on SUBMITTED status which directly corresponds
//to command being able to be submitted, to overcome this we set initial status to queued
//and we override the value stored with the value required by the spec.
// Spec requires initial state to be queued
// our current design relies heavily on SUBMITTED status which directly corresponds
// to command being able to be submitted, to overcome this we set initial status to queued
// and we override the value stored with the value required by the spec.
if (executionStatus == CL_QUEUED) {
executionStatus = CL_SUBMITTED;
}
@ -4371,7 +4399,7 @@ void *CL_API_CALL clGetExtensionFunctionAddress(const char *funcName) {
// Support an internal call by the ICD
RETURN_FUNC_PTR_IF_EXIST(clIcdGetPlatformIDsKHR);
//perf counters
// perf counters
RETURN_FUNC_PTR_IF_EXIST(clCreatePerfCountersCommandQueueINTEL);
RETURN_FUNC_PTR_IF_EXIST(clSetPerformanceConfigurationINTEL);
// Support device extensions

View File

@ -1,5 +1,5 @@
/*
* Copyright (C) 2018-2021 Intel Corporation
* Copyright (C) 2018-2022 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@ -146,6 +146,39 @@ TEST(clGetDeviceIDsTest, givenMultipleRootDevicesWhenGetDeviceIdsButNumEntriesIs
EXPECT_EQ(devices[numEntries], dummyDevice);
}
TEST(clGetDeviceIDsTest, givenReturnSubDevicesAsClDeviceIDsWhenCallClGetDeviceIDsThenSubDevicesAreReturnedAsSeparateClDevices) {
platformsImpl->clear();
constexpr auto numRootDevices = 3u;
VariableBackup<UltHwConfig> backup(&ultHwConfig);
ultHwConfig.useMockedPrepareDeviceEnvironmentsFunc = false;
DebugManagerStateRestore restorer;
DebugManager.flags.CreateMultipleRootDevices.set(numRootDevices);
DebugManager.flags.CreateMultipleSubDevices.set(numRootDevices);
DebugManager.flags.ReturnSubDevicesAsClDeviceIDs.set(1);
cl_uint maxNumDevices;
auto retVal = clGetDeviceIDs(nullptr, CL_DEVICE_TYPE_ALL, 0, nullptr, &maxNumDevices);
EXPECT_EQ(retVal, CL_SUCCESS);
EXPECT_EQ(numRootDevices * numRootDevices, maxNumDevices);
cl_uint numDevices = 0;
cl_uint numEntries = maxNumDevices - 1;
cl_device_id devices[numRootDevices * numRootDevices];
const auto dummyDevice = reinterpret_cast<cl_device_id>(0x1357);
for (auto i = 0u; i < maxNumDevices; i++) {
devices[i] = dummyDevice;
}
retVal = clGetDeviceIDs(nullptr, CL_DEVICE_TYPE_ALL, numEntries, devices, &numDevices);
EXPECT_EQ(retVal, CL_SUCCESS);
EXPECT_LT(numDevices, maxNumDevices);
EXPECT_EQ(numEntries, numDevices);
for (auto i = 0u; i < numEntries; i++) {
EXPECT_EQ(devices[i], platform()->getClDevice(i / numRootDevices)->getSubDevice(i % numRootDevices));
}
EXPECT_EQ(devices[numEntries], dummyDevice);
}
TEST(clGetDeviceIDsTest, givenMultipleRootDevicesAndLimitedNumberOfReturnedDevicesWhenGetDeviceIdsThenLimitedNumberOfRootDevicesIsReturned) {
platformsImpl->clear();
constexpr auto numRootDevices = 3u;

View File

@ -146,6 +146,7 @@ EnableLocalMemory = -1
EnableStatelessToStatefulBufferOffsetOpt = -1
CreateMultipleRootDevices = 0
CreateMultipleSubDevices = 0
ReturnSubDevicesAsClDeviceIDs = -1
LimitAmountOfReturnedDevices = 0
Enable64kbpages = -1
OverrideEnableKmdNotify = -1

View File

@ -326,6 +326,7 @@ DECLARE_DEBUG_VARIABLE(bool, EnableFreeMemory, false, "Enable freeMemory in memo
DECLARE_DEBUG_VARIABLE(bool, ForceSamplerLowFilteringPrecision, false, "Force Low Filtering Precision Sampler mode")
DECLARE_DEBUG_VARIABLE(bool, EngineInstancedSubDevices, false, "Create subdevices assigned to specific engine")
DECLARE_DEBUG_VARIABLE(bool, AllowSingleTileEngineInstancedSubDevices, false, "Create subdevices assigned to specific engine on single tile config")
DECLARE_DEBUG_VARIABLE(int32_t, ReturnSubDevicesAsClDeviceIDs, -1, "Expose each subdevice as a separate device during clGetDeviceIDs API call")
DECLARE_DEBUG_VARIABLE(int32_t, ForceRunAloneContext, -1, "Control creation of run-alone HW context, -1:default, 0:disable, 1:enable")
DECLARE_DEBUG_VARIABLE(int32_t, AddClGlSharing, -1, "Add cl-gl extension")
DECLARE_DEBUG_VARIABLE(int32_t, EnableKernelTunning, -1, "Perform a tunning of enqueue kernel, -1:default(disabled), 0:disable, 1:enable simple kernel tunning, 2:enable full kernel tunning")