Correct device usage in kernel methods

pass device to substituteKernelHeap use proper device when iterating over devices Related-To: NEO-5001 Signed-off-by: Mateusz Jablonski <mateusz.jablonski@intel.com>
2026-01-03 06:49:52 +08:00 · 2020-12-17 09:54:45 +00:00
parent 26dc1de24d
commit ee0523ae23
7 changed files with 58 additions and 34 deletions
--- a/opencl/source/gtpin/gtpin_callbacks.cpp
+++ b/opencl/source/gtpin/gtpin_callbacks.cpp
@@ -63,11 +63,11 @@ void gtpinNotifyKernelCreate(cl_kernel kernel) {
    }
    if (isGTPinInitialized) {
        auto pKernel = castToObjectOrAbort<Kernel>(kernel);
-        auto device = pKernel->getDevices()[0];
-        auto rootDeviceIndex = device->getRootDeviceIndex();
+        auto &device = pKernel->getDevices()[0]->getDevice();
+        auto rootDeviceIndex = device.getRootDeviceIndex();
        size_t gtpinBTI = pKernel->getNumberOfBindingTableStates(rootDeviceIndex);
        // Enlarge local copy of SSH by 1 SS
-        GFXCORE_FAMILY genFamily = device->getHardwareInfo().platform.eRenderCoreFamily;
+        GFXCORE_FAMILY genFamily = device.getHardwareInfo().platform.eRenderCoreFamily;
        GTPinHwHelper &gtpinHelper = GTPinHwHelper::get(genFamily);
        if (pKernel->isParentKernel || !gtpinHelper.addSurfaceState(pKernel, rootDeviceIndex)) {
            // Kernel with no SSH or Kernel EM, not supported
@@ -97,7 +97,7 @@ void gtpinNotifyKernelCreate(cl_kernel kernel) {
        instrument_params_out_t paramsOut = {0};
        (*GTPinCallbacks.onKernelCreate)((context_handle_t)(cl_context)context, &paramsIn, &paramsOut);
        // Substitute ISA of created kernel with instrumented code
-        pKernel->substituteKernelHeap(rootDeviceIndex, paramsOut.inst_kernel_binary, paramsOut.inst_kernel_size);
+        pKernel->substituteKernelHeap(device, paramsOut.inst_kernel_binary, paramsOut.inst_kernel_size);
        pKernel->setKernelId(rootDeviceIndex, paramsOut.kernel_id);
    }
 }
--- a/opencl/source/kernel/kernel.cpp
+++ b/opencl/source/kernel/kernel.cpp
@@ -794,7 +794,8 @@ size_t Kernel::getKernelHeapSize(uint32_t rootDeviceIndex) const {
    return getKernelInfo(rootDeviceIndex).heapInfo.KernelHeapSize;
 }

-void Kernel::substituteKernelHeap(uint32_t rootDeviceIndex, void *newKernelHeap, size_t newKernelHeapSize) {
+void Kernel::substituteKernelHeap(const Device &device, void *newKernelHeap, size_t newKernelHeapSize) {
+    auto rootDeviceIndex = device.getRootDeviceIndex();
    KernelInfo *pKernelInfo = const_cast<KernelInfo *>(&getKernelInfo(rootDeviceIndex));
    void **pKernelHeap = const_cast<void **>(&pKernelInfo->heapInfo.pKernelHeap);
    *pKernelHeap = newKernelHeap;
@@ -810,7 +811,7 @@ void Kernel::substituteKernelHeap(uint32_t rootDeviceIndex, void *newKernelHeap,
    } else {
        memoryManager->checkGpuUsageAndDestroyGraphicsAllocations(pKernelInfo->kernelAllocation);
        pKernelInfo->kernelAllocation = nullptr;
-        status = pKernelInfo->createKernelAllocation(getDevice().getDevice(), isBuiltIn);
+        status = pKernelInfo->createKernelAllocation(device, isBuiltIn);
    }
    UNRECOVERABLE_IF(!status);
 }
@@ -2300,21 +2301,21 @@ void Kernel::provideInitializationHints() {
    Context *context = program->getContextPtr();
    if (context == nullptr || !context->isProvidingPerformanceHints())
        return;
-    for (auto i = 0u; i < kernelDeviceInfos.size(); i++) {
-        if (!kernelInfos[i]) {
-            continue;
-        }
-        if (kernelDeviceInfos[i].privateSurfaceSize) {
+
+    for (auto &pClDevice : getDevices()) {
+        auto rootDeviceIndex = pClDevice->getRootDeviceIndex();
+        if (kernelDeviceInfos[rootDeviceIndex].privateSurfaceSize) {
            context->providePerformanceHint(CL_CONTEXT_DIAGNOSTICS_LEVEL_BAD_INTEL, PRIVATE_MEMORY_USAGE_TOO_HIGH,
-                                            kernelInfos[i]->kernelDescriptor.kernelMetadata.kernelName.c_str(), kernelDeviceInfos[i].privateSurfaceSize);
+                                            kernelInfos[rootDeviceIndex]->kernelDescriptor.kernelMetadata.kernelName.c_str(),
+                                            kernelDeviceInfos[rootDeviceIndex].privateSurfaceSize);
        }
-        const auto &patchInfo = kernelInfos[i]->patchInfo;
+        const auto &patchInfo = kernelInfos[rootDeviceIndex]->patchInfo;
        if (patchInfo.mediavfestate) {
            auto scratchSize = patchInfo.mediavfestate->PerThreadScratchSpace;
-            scratchSize *= getDevice().getSharedDeviceInfo().computeUnitsUsedForScratch * getKernelInfo(i).getMaxSimdSize();
+            scratchSize *= pClDevice->getSharedDeviceInfo().computeUnitsUsedForScratch * getKernelInfo(rootDeviceIndex).getMaxSimdSize();
            if (scratchSize > 0) {
                context->providePerformanceHint(CL_CONTEXT_DIAGNOSTICS_LEVEL_BAD_INTEL, REGISTER_PRESSURE_TOO_HIGH,
-                                                kernelInfos[i]->kernelDescriptor.kernelMetadata.kernelName.c_str(), scratchSize);
+                                                kernelInfos[rootDeviceIndex]->kernelDescriptor.kernelMetadata.kernelName.c_str(), scratchSize);
            }
        }
    }
--- a/opencl/source/kernel/kernel.h
+++ b/opencl/source/kernel/kernel.h
@@ -170,7 +170,7 @@ class Kernel : public BaseObject<_cl_kernel> {

    void resizeSurfaceStateHeap(uint32_t rootDeviceIndex, void *pNewSsh, size_t newSshSize, size_t newBindingTableCount, size_t newBindingTableOffset);

-    void substituteKernelHeap(uint32_t rootDeviceIndex, void *newKernelHeap, size_t newKernelHeapSize);
+    void substituteKernelHeap(const Device &device, void *newKernelHeap, size_t newKernelHeapSize);
    bool isKernelHeapSubstituted(uint32_t rootDeviceIndex) const;
    uint64_t getKernelId(uint32_t rootDeviceIndex) const;
    void setKernelId(uint32_t rootDeviceIndex, uint64_t newKernelId);