Add new EngineUsage

Simplify verifying EngineUsage in tryGetEngine function. Remove unused getGpgpuEnginesCount function. Related-To: NEO-4940 Signed-off-by: Filip Hazubski <filip.hazubski@intel.com>
2025-09-15 13:01:45 +08:00 · 2021-08-16 18:24:13 +00:00
parent fccd22e3c7
commit 63f8c9d98b
29 changed files with 89 additions and 97 deletions
--- a/opencl/source/api/api.cpp
+++ b/opencl/source/api/api.cpp
@ -5932,13 +5932,6 @@ cl_int CL_API_CALL clEnqueueNDCountKernelINTEL(cl_command_queue commandQueue,

    auto &device = pCommandQueue->getClDevice();
    auto rootDeviceIndex = device.getRootDeviceIndex();
-    auto &hardwareInfo = device.getHardwareInfo();
-    auto &hwHelper = HwHelper::get(hardwareInfo.platform.eRenderCoreFamily);
-    auto engineGroupType = hwHelper.getEngineGroupType(pCommandQueue->getGpgpuEngine().getEngineType(), hardwareInfo);
-    if (!hwHelper.isCooperativeDispatchSupported(engineGroupType, hardwareInfo.platform.eProductFamily)) {
-        retVal = CL_INVALID_COMMAND_QUEUE;
-        return retVal;
-    }

    pKernel = pMultiDeviceKernel->getKernel(rootDeviceIndex);
    size_t globalWorkSize[3];
@ -5946,6 +5939,22 @@ cl_int CL_API_CALL clEnqueueNDCountKernelINTEL(cl_command_queue commandQueue,
        globalWorkSize[i] = workgroupCount[i] * localWorkSize[i];
    }

+    if (pKernel->usesSyncBuffer()) {
+        if (pKernel->getExecutionType() != KernelExecutionType::Concurrent) {
+            retVal = CL_INVALID_KERNEL;
+            return retVal;
+        }
+
+        auto &hardwareInfo = device.getHardwareInfo();
+        auto &hwHelper = HwHelper::get(hardwareInfo.platform.eRenderCoreFamily);
+        auto engineGroupType = hwHelper.getEngineGroupType(pCommandQueue->getGpgpuEngine().getEngineType(),
+                                                           pCommandQueue->getGpgpuEngine().getEngineUsage(), hardwareInfo);
+        if (!hwHelper.isCooperativeDispatchSupported(engineGroupType)) {
+            retVal = CL_INVALID_COMMAND_QUEUE;
+            return retVal;
+        }
+    }
+
    if (pKernel->getExecutionType() == KernelExecutionType::Concurrent) {
        size_t requestedNumberOfWorkgroups = 1;
        for (size_t i = 0; i < workDim; i++) {
@ -5958,20 +5967,15 @@ cl_int CL_API_CALL clEnqueueNDCountKernelINTEL(cl_command_queue commandQueue,
        }
    }

-    if (pKernel->usesSyncBuffer()) {
-        if (pKernel->getExecutionType() != KernelExecutionType::Concurrent) {
-            retVal = CL_INVALID_KERNEL;
-            return retVal;
-        }
-
-        device.getDevice().allocateSyncBufferHandler();
-    }
-
    if (!pCommandQueue->validateCapabilityForOperation(CL_QUEUE_CAPABILITY_KERNEL_INTEL, numEventsInWaitList, eventWaitList, event)) {
        retVal = CL_INVALID_OPERATION;
        return retVal;
    }

+    if (pKernel->usesSyncBuffer()) {
+        device.getDevice().allocateSyncBufferHandler();
+    }
+
    TakeOwnershipWrapper<MultiDeviceKernel> kernelOwnership(*pMultiDeviceKernel, gtpinIsGTPinInitialized());
    if (gtpinIsGTPinInitialized()) {
        gtpinNotifyKernelSubmit(kernel, pCommandQueue);