diff --git a/opencl/source/api/api.cpp b/opencl/source/api/api.cpp index 68fc0a8412..ecda610b89 100644 --- a/opencl/source/api/api.cpp +++ b/opencl/source/api/api.cpp @@ -160,6 +160,19 @@ cl_int CL_API_CALL clGetPlatformInfo(cl_platform_id platform, return retVal; } +bool checkDeviceTypeAndFillDeviceID(ClDevice &device, cl_device_type deviceType, cl_device_id *devices, cl_uint numEntries, cl_uint &retNum) { + if (deviceType & device.getDeviceInfo().deviceType) { + if (devices) { + if (retNum >= numEntries) { + return false; + } + devices[retNum] = &device; + } + retNum++; + } + return true; +} + cl_int CL_API_CALL clGetDeviceIDs(cl_platform_id platform, cl_device_type deviceType, cl_uint numEntries, @@ -233,18 +246,33 @@ cl_int CL_API_CALL clGetDeviceIDs(cl_platform_id platform, cl_uint retNum = 0; for (auto platformDeviceIndex = 0u; platformDeviceIndex < numDev; platformDeviceIndex++) { + bool exposeSubDevices = false; + + if (DebugManager.flags.ReturnSubDevicesAsClDeviceIDs.get() != -1) { + exposeSubDevices = DebugManager.flags.ReturnSubDevicesAsClDeviceIDs.get(); + } ClDevice *device = pPlatform->getClDevice(platformDeviceIndex); UNRECOVERABLE_IF(device == nullptr); - if (deviceType & device->getDeviceInfo().deviceType) { - if (devices) { - if (retNum >= numEntries) { + exposeSubDevices &= device->getNumGenericSubDevices() > 0u; + + if (exposeSubDevices) { + bool numEntriesReached = false; + for (uint32_t subDeviceIndex = 0u; subDeviceIndex < device->getNumGenericSubDevices(); subDeviceIndex++) { + auto subDevice = device->getSubDevice(subDeviceIndex); + numEntriesReached = checkDeviceTypeAndFillDeviceID(*subDevice, deviceType, devices, numEntries, retNum); + if (!numEntriesReached) { break; } - devices[retNum] = device; } - retNum++; + if (!numEntriesReached) { + break; + } + } else { + if (!checkDeviceTypeAndFillDeviceID(*device, deviceType, devices, numEntries, retNum)) { + break; + } } } @@ -2058,10 +2086,10 @@ cl_int CL_API_CALL clGetEventInfo(cl_event event, if (neoEvent->isUserEvent()) { auto executionStatus = neoEvent->peekExecutionStatus(); - //Spec requires initial state to be queued - //our current design relies heavily on SUBMITTED status which directly corresponds - //to command being able to be submitted, to overcome this we set initial status to queued - //and we override the value stored with the value required by the spec. + // Spec requires initial state to be queued + // our current design relies heavily on SUBMITTED status which directly corresponds + // to command being able to be submitted, to overcome this we set initial status to queued + // and we override the value stored with the value required by the spec. if (executionStatus == CL_QUEUED) { executionStatus = CL_SUBMITTED; } @@ -4371,7 +4399,7 @@ void *CL_API_CALL clGetExtensionFunctionAddress(const char *funcName) { // Support an internal call by the ICD RETURN_FUNC_PTR_IF_EXIST(clIcdGetPlatformIDsKHR); - //perf counters + // perf counters RETURN_FUNC_PTR_IF_EXIST(clCreatePerfCountersCommandQueueINTEL); RETURN_FUNC_PTR_IF_EXIST(clSetPerformanceConfigurationINTEL); // Support device extensions diff --git a/opencl/test/unit_test/api/cl_get_device_ids_tests.inl b/opencl/test/unit_test/api/cl_get_device_ids_tests.inl index b95555494d..c209dfb4c0 100644 --- a/opencl/test/unit_test/api/cl_get_device_ids_tests.inl +++ b/opencl/test/unit_test/api/cl_get_device_ids_tests.inl @@ -1,5 +1,5 @@ /* - * Copyright (C) 2018-2021 Intel Corporation + * Copyright (C) 2018-2022 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -146,6 +146,39 @@ TEST(clGetDeviceIDsTest, givenMultipleRootDevicesWhenGetDeviceIdsButNumEntriesIs EXPECT_EQ(devices[numEntries], dummyDevice); } +TEST(clGetDeviceIDsTest, givenReturnSubDevicesAsClDeviceIDsWhenCallClGetDeviceIDsThenSubDevicesAreReturnedAsSeparateClDevices) { + platformsImpl->clear(); + constexpr auto numRootDevices = 3u; + VariableBackup backup(&ultHwConfig); + ultHwConfig.useMockedPrepareDeviceEnvironmentsFunc = false; + DebugManagerStateRestore restorer; + DebugManager.flags.CreateMultipleRootDevices.set(numRootDevices); + DebugManager.flags.CreateMultipleSubDevices.set(numRootDevices); + DebugManager.flags.ReturnSubDevicesAsClDeviceIDs.set(1); + cl_uint maxNumDevices; + auto retVal = clGetDeviceIDs(nullptr, CL_DEVICE_TYPE_ALL, 0, nullptr, &maxNumDevices); + EXPECT_EQ(retVal, CL_SUCCESS); + EXPECT_EQ(numRootDevices * numRootDevices, maxNumDevices); + + cl_uint numDevices = 0; + cl_uint numEntries = maxNumDevices - 1; + cl_device_id devices[numRootDevices * numRootDevices]; + + const auto dummyDevice = reinterpret_cast(0x1357); + for (auto i = 0u; i < maxNumDevices; i++) { + devices[i] = dummyDevice; + } + + retVal = clGetDeviceIDs(nullptr, CL_DEVICE_TYPE_ALL, numEntries, devices, &numDevices); + EXPECT_EQ(retVal, CL_SUCCESS); + EXPECT_LT(numDevices, maxNumDevices); + EXPECT_EQ(numEntries, numDevices); + for (auto i = 0u; i < numEntries; i++) { + EXPECT_EQ(devices[i], platform()->getClDevice(i / numRootDevices)->getSubDevice(i % numRootDevices)); + } + EXPECT_EQ(devices[numEntries], dummyDevice); +} + TEST(clGetDeviceIDsTest, givenMultipleRootDevicesAndLimitedNumberOfReturnedDevicesWhenGetDeviceIdsThenLimitedNumberOfRootDevicesIsReturned) { platformsImpl->clear(); constexpr auto numRootDevices = 3u; diff --git a/opencl/test/unit_test/test_files/igdrcl.config b/opencl/test/unit_test/test_files/igdrcl.config index 059894128d..a8487f3433 100644 --- a/opencl/test/unit_test/test_files/igdrcl.config +++ b/opencl/test/unit_test/test_files/igdrcl.config @@ -146,6 +146,7 @@ EnableLocalMemory = -1 EnableStatelessToStatefulBufferOffsetOpt = -1 CreateMultipleRootDevices = 0 CreateMultipleSubDevices = 0 +ReturnSubDevicesAsClDeviceIDs = -1 LimitAmountOfReturnedDevices = 0 Enable64kbpages = -1 OverrideEnableKmdNotify = -1 diff --git a/shared/source/debug_settings/debug_variables_base.inl b/shared/source/debug_settings/debug_variables_base.inl index bc60657ed6..8f9dce9f20 100644 --- a/shared/source/debug_settings/debug_variables_base.inl +++ b/shared/source/debug_settings/debug_variables_base.inl @@ -326,6 +326,7 @@ DECLARE_DEBUG_VARIABLE(bool, EnableFreeMemory, false, "Enable freeMemory in memo DECLARE_DEBUG_VARIABLE(bool, ForceSamplerLowFilteringPrecision, false, "Force Low Filtering Precision Sampler mode") DECLARE_DEBUG_VARIABLE(bool, EngineInstancedSubDevices, false, "Create subdevices assigned to specific engine") DECLARE_DEBUG_VARIABLE(bool, AllowSingleTileEngineInstancedSubDevices, false, "Create subdevices assigned to specific engine on single tile config") +DECLARE_DEBUG_VARIABLE(int32_t, ReturnSubDevicesAsClDeviceIDs, -1, "Expose each subdevice as a separate device during clGetDeviceIDs API call") DECLARE_DEBUG_VARIABLE(int32_t, ForceRunAloneContext, -1, "Control creation of run-alone HW context, -1:default, 0:disable, 1:enable") DECLARE_DEBUG_VARIABLE(int32_t, AddClGlSharing, -1, "Add cl-gl extension") DECLARE_DEBUG_VARIABLE(int32_t, EnableKernelTunning, -1, "Perform a tunning of enqueue kernel, -1:default(disabled), 0:disable, 1:enable simple kernel tunning, 2:enable full kernel tunning")