diff --git a/opencl/source/gtpin/gtpin_callbacks.cpp b/opencl/source/gtpin/gtpin_callbacks.cpp
index 917040fa18..b56dca29bf 100644
--- a/opencl/source/gtpin/gtpin_callbacks.cpp
+++ b/opencl/source/gtpin/gtpin_callbacks.cpp
@@ -63,11 +63,11 @@ void gtpinNotifyKernelCreate(cl_kernel kernel) {
     }
     if (isGTPinInitialized) {
         auto pKernel = castToObjectOrAbort<Kernel>(kernel);
-        auto device = pKernel->getDevices()[0];
-        auto rootDeviceIndex = device->getRootDeviceIndex();
+        auto &device = pKernel->getDevices()[0]->getDevice();
+        auto rootDeviceIndex = device.getRootDeviceIndex();
         size_t gtpinBTI = pKernel->getNumberOfBindingTableStates(rootDeviceIndex);
         // Enlarge local copy of SSH by 1 SS
-        GFXCORE_FAMILY genFamily = device->getHardwareInfo().platform.eRenderCoreFamily;
+        GFXCORE_FAMILY genFamily = device.getHardwareInfo().platform.eRenderCoreFamily;
         GTPinHwHelper &gtpinHelper = GTPinHwHelper::get(genFamily);
         if (pKernel->isParentKernel || !gtpinHelper.addSurfaceState(pKernel, rootDeviceIndex)) {
             // Kernel with no SSH or Kernel EM, not supported
@@ -97,7 +97,7 @@ void gtpinNotifyKernelCreate(cl_kernel kernel) {
         instrument_params_out_t paramsOut = {0};
         (*GTPinCallbacks.onKernelCreate)((context_handle_t)(cl_context)context, &paramsIn, &paramsOut);
         // Substitute ISA of created kernel with instrumented code
-        pKernel->substituteKernelHeap(rootDeviceIndex, paramsOut.inst_kernel_binary, paramsOut.inst_kernel_size);
+        pKernel->substituteKernelHeap(device, paramsOut.inst_kernel_binary, paramsOut.inst_kernel_size);
         pKernel->setKernelId(rootDeviceIndex, paramsOut.kernel_id);
     }
 }
diff --git a/opencl/source/kernel/kernel.cpp b/opencl/source/kernel/kernel.cpp
index 67b477cc3f..87f01bef5e 100644
--- a/opencl/source/kernel/kernel.cpp
+++ b/opencl/source/kernel/kernel.cpp
@@ -794,7 +794,8 @@ size_t Kernel::getKernelHeapSize(uint32_t rootDeviceIndex) const {
     return getKernelInfo(rootDeviceIndex).heapInfo.KernelHeapSize;
 }
 
-void Kernel::substituteKernelHeap(uint32_t rootDeviceIndex, void *newKernelHeap, size_t newKernelHeapSize) {
+void Kernel::substituteKernelHeap(const Device &device, void *newKernelHeap, size_t newKernelHeapSize) {
+    auto rootDeviceIndex = device.getRootDeviceIndex();
     KernelInfo *pKernelInfo = const_cast<KernelInfo *>(&getKernelInfo(rootDeviceIndex));
     void **pKernelHeap = const_cast<void **>(&pKernelInfo->heapInfo.pKernelHeap);
     *pKernelHeap = newKernelHeap;
@@ -810,7 +811,7 @@ void Kernel::substituteKernelHeap(uint32_t rootDeviceIndex, void *newKernelHeap,
     } else {
         memoryManager->checkGpuUsageAndDestroyGraphicsAllocations(pKernelInfo->kernelAllocation);
         pKernelInfo->kernelAllocation = nullptr;
-        status = pKernelInfo->createKernelAllocation(getDevice().getDevice(), isBuiltIn);
+        status = pKernelInfo->createKernelAllocation(device, isBuiltIn);
     }
     UNRECOVERABLE_IF(!status);
 }
@@ -2300,21 +2301,21 @@ void Kernel::provideInitializationHints() {
     Context *context = program->getContextPtr();
     if (context == nullptr || !context->isProvidingPerformanceHints())
         return;
-    for (auto i = 0u; i < kernelDeviceInfos.size(); i++) {
-        if (!kernelInfos[i]) {
-            continue;
-        }
-        if (kernelDeviceInfos[i].privateSurfaceSize) {
+
+    for (auto &pClDevice : getDevices()) {
+        auto rootDeviceIndex = pClDevice->getRootDeviceIndex();
+        if (kernelDeviceInfos[rootDeviceIndex].privateSurfaceSize) {
             context->providePerformanceHint(CL_CONTEXT_DIAGNOSTICS_LEVEL_BAD_INTEL, PRIVATE_MEMORY_USAGE_TOO_HIGH,
-                                            kernelInfos[i]->kernelDescriptor.kernelMetadata.kernelName.c_str(), kernelDeviceInfos[i].privateSurfaceSize);
+                                            kernelInfos[rootDeviceIndex]->kernelDescriptor.kernelMetadata.kernelName.c_str(),
+                                            kernelDeviceInfos[rootDeviceIndex].privateSurfaceSize);
         }
-        const auto &patchInfo = kernelInfos[i]->patchInfo;
+        const auto &patchInfo = kernelInfos[rootDeviceIndex]->patchInfo;
         if (patchInfo.mediavfestate) {
             auto scratchSize = patchInfo.mediavfestate->PerThreadScratchSpace;
-            scratchSize *= getDevice().getSharedDeviceInfo().computeUnitsUsedForScratch * getKernelInfo(i).getMaxSimdSize();
+            scratchSize *= pClDevice->getSharedDeviceInfo().computeUnitsUsedForScratch * getKernelInfo(rootDeviceIndex).getMaxSimdSize();
             if (scratchSize > 0) {
                 context->providePerformanceHint(CL_CONTEXT_DIAGNOSTICS_LEVEL_BAD_INTEL, REGISTER_PRESSURE_TOO_HIGH,
-                                                kernelInfos[i]->kernelDescriptor.kernelMetadata.kernelName.c_str(), scratchSize);
+                                                kernelInfos[rootDeviceIndex]->kernelDescriptor.kernelMetadata.kernelName.c_str(), scratchSize);
             }
         }
     }
diff --git a/opencl/source/kernel/kernel.h b/opencl/source/kernel/kernel.h
index 3061944ddb..878b269834 100644
--- a/opencl/source/kernel/kernel.h
+++ b/opencl/source/kernel/kernel.h
@@ -170,7 +170,7 @@ class Kernel : public BaseObject<_cl_kernel> {
 
     void resizeSurfaceStateHeap(uint32_t rootDeviceIndex, void *pNewSsh, size_t newSshSize, size_t newBindingTableCount, size_t newBindingTableOffset);
 
-    void substituteKernelHeap(uint32_t rootDeviceIndex, void *newKernelHeap, size_t newKernelHeapSize);
+    void substituteKernelHeap(const Device &device, void *newKernelHeap, size_t newKernelHeapSize);
     bool isKernelHeapSubstituted(uint32_t rootDeviceIndex) const;
     uint64_t getKernelId(uint32_t rootDeviceIndex) const;
     void setKernelId(uint32_t rootDeviceIndex, uint64_t newKernelId);
diff --git a/opencl/test/unit_test/context/driver_diagnostics_tests.cpp b/opencl/test/unit_test/context/driver_diagnostics_tests.cpp
index 5c06ecc61b..ca156332df 100644
--- a/opencl/test/unit_test/context/driver_diagnostics_tests.cpp
+++ b/opencl/test/unit_test/context/driver_diagnostics_tests.cpp
@@ -707,22 +707,30 @@ TEST_F(PerformanceHintTest, givenUncompressedImageWhenItsCreatedThenProperPerfor
 
 TEST_P(PerformanceHintKernelTest, GivenSpillFillWhenKernelIsInitializedThenContextProvidesProperHint) {
 
-    auto pDevice = castToObject<ClDevice>(devices[0]);
-    auto rootDeviceIndex = pDevice->getRootDeviceIndex();
     auto size = zeroSized ? 0 : 1024;
-    MockKernelWithInternals mockKernel(*pDevice, context);
+    MockKernelWithInternals mockKernel(context->getDevices(), context);
     SPatchMediaVFEState mediaVFEstate;
 
     mediaVFEstate.PerThreadScratchSpace = size;
 
+    uint32_t computeUnitsForScratch[] = {0x10, 0x20};
+
+    for (auto &pClDevice : context->getDevices()) {
+        auto &deviceInfo = const_cast<DeviceInfo &>(pClDevice->getSharedDeviceInfo());
+        deviceInfo.computeUnitsUsedForScratch = computeUnitsForScratch[pClDevice->getRootDeviceIndex()];
+    }
+
     mockKernel.kernelInfo.patchInfo.mediavfestate = &mediaVFEstate;
-    size *= pDevice->getSharedDeviceInfo().computeUnitsUsedForScratch * mockKernel.mockKernel->getKernelInfo(rootDeviceIndex).getMaxSimdSize();
 
     mockKernel.mockKernel->initialize();
 
-    snprintf(expectedHint, DriverDiagnostics::maxHintStringSize, DriverDiagnostics::hintFormat[REGISTER_PRESSURE_TOO_HIGH],
-             mockKernel.mockKernel->getKernelInfo(rootDeviceIndex).kernelDescriptor.kernelMetadata.kernelName.c_str(), size);
-    EXPECT_EQ(!zeroSized, containsHint(expectedHint, userData));
+    for (auto &pClDevice : context->getDevices()) {
+        auto rootDeviceIndex = pClDevice->getRootDeviceIndex();
+        auto expectedSize = size * pClDevice->getSharedDeviceInfo().computeUnitsUsedForScratch * mockKernel.mockKernel->getKernelInfo(rootDeviceIndex).getMaxSimdSize();
+        snprintf(expectedHint, DriverDiagnostics::maxHintStringSize, DriverDiagnostics::hintFormat[REGISTER_PRESSURE_TOO_HIGH],
+                 mockKernel.mockKernel->getKernelInfo(rootDeviceIndex).kernelDescriptor.kernelMetadata.kernelName.c_str(), expectedSize);
+        EXPECT_EQ(!zeroSized, containsHint(expectedHint, userData));
+    }
 }
 
 TEST_P(PerformanceHintKernelTest, GivenPrivateSurfaceWhenKernelIsInitializedThenContextProvidesProperHint) {
diff --git a/opencl/test/unit_test/gtpin/gtpin_tests.cpp b/opencl/test/unit_test/gtpin/gtpin_tests.cpp
index 0c335f41d7..b4f7988864 100644
--- a/opencl/test/unit_test/gtpin/gtpin_tests.cpp
+++ b/opencl/test/unit_test/gtpin/gtpin_tests.cpp
@@ -2328,7 +2328,7 @@ TEST_F(GTPinTests, givenKernelThenVerifyThatKernelCodeSubstitutionWorksWell) {
     // Substitute new kernel code
     constexpr size_t newCodeSize = 64;
     uint8_t newCode[newCodeSize] = {0x0, 0x1, 0x2, 0x3, 0x4};
-    pKernel->substituteKernelHeap(rootDeviceIndex, &newCode[0], newCodeSize);
+    pKernel->substituteKernelHeap(pDevice->getDevice(), &newCode[0], newCodeSize);
 
     // Verify that substitution went properly
     isKernelCodeSubstituted = pKernel->isKernelHeapSubstituted(rootDeviceIndex);
diff --git a/opencl/test/unit_test/kernel/substitute_kernel_heap_tests.cpp b/opencl/test/unit_test/kernel/substitute_kernel_heap_tests.cpp
index 92d58afccc..510dd1024e 100644
--- a/opencl/test/unit_test/kernel/substitute_kernel_heap_tests.cpp
+++ b/opencl/test/unit_test/kernel/substitute_kernel_heap_tests.cpp
@@ -34,7 +34,7 @@ TEST_F(KernelSubstituteTest, givenKernelWhenSubstituteKernelHeapWithGreaterSizeT
     const size_t newHeapSize = initialHeapSize + 1;
     char newHeap[newHeapSize];
 
-    kernel.mockKernel->substituteKernelHeap(rootDeviceIndex, newHeap, newHeapSize);
+    kernel.mockKernel->substituteKernelHeap(*pDevice, newHeap, newHeapSize);
     auto secondAllocation = kernel.kernelInfo.kernelAllocation;
     EXPECT_NE(nullptr, secondAllocation);
     auto secondAllocationSize = secondAllocation->getUnderlyingBufferSize();
@@ -64,7 +64,7 @@ TEST_F(KernelSubstituteTest, givenKernelWhenSubstituteKernelHeapWithSameSizeThen
     const size_t newHeapSize = initialHeapSize;
     char newHeap[newHeapSize];
 
-    kernel.mockKernel->substituteKernelHeap(rootDeviceIndex, newHeap, newHeapSize);
+    kernel.mockKernel->substituteKernelHeap(*pDevice, newHeap, newHeapSize);
     auto secondAllocation = kernel.kernelInfo.kernelAllocation;
     EXPECT_NE(nullptr, secondAllocation);
     auto secondAllocationSize = secondAllocation->getUnderlyingBufferSize();
@@ -93,7 +93,7 @@ TEST_F(KernelSubstituteTest, givenKernelWhenSubstituteKernelHeapWithSmallerSizeT
     const size_t newHeapSize = initialHeapSize - 1;
     char newHeap[newHeapSize];
 
-    kernel.mockKernel->substituteKernelHeap(rootDeviceIndex, newHeap, newHeapSize);
+    kernel.mockKernel->substituteKernelHeap(*pDevice, newHeap, newHeapSize);
     auto secondAllocation = kernel.kernelInfo.kernelAllocation;
     EXPECT_NE(nullptr, secondAllocation);
     auto secondAllocationSize = secondAllocation->getUnderlyingBufferSize();
@@ -125,7 +125,7 @@ TEST_F(KernelSubstituteTest, givenKernelWithUsedKernelAllocationWhenSubstituteKe
 
     EXPECT_TRUE(commandStreamReceiver.getTemporaryAllocations().peekIsEmpty());
 
-    kernel.mockKernel->substituteKernelHeap(rootDeviceIndex, newHeap, newHeapSize);
+    kernel.mockKernel->substituteKernelHeap(*pDevice, newHeap, newHeapSize);
     auto secondAllocation = kernel.kernelInfo.kernelAllocation;
 
     EXPECT_FALSE(commandStreamReceiver.getTemporaryAllocations().peekIsEmpty());
diff --git a/opencl/test/unit_test/mocks/mock_kernel.h b/opencl/test/unit_test/mocks/mock_kernel.h
index 24c79f84e2..d800a10407 100644
--- a/opencl/test/unit_test/mocks/mock_kernel.h
+++ b/opencl/test/unit_test/mocks/mock_kernel.h
@@ -254,7 +254,7 @@ class MockKernel : public Kernel {
 //class below have enough internals to service Enqueue operation.
 class MockKernelWithInternals {
   public:
-    MockKernelWithInternals(ClDevice &deviceArg, Context *context = nullptr, bool addDefaultArg = false, SPatchExecutionEnvironment execEnv = {}) {
+    MockKernelWithInternals(const ClDeviceVector &deviceVector, Context *context = nullptr, bool addDefaultArg = false, SPatchExecutionEnvironment execEnv = {}) {
         memset(&kernelHeader, 0, sizeof(SKernelBinaryHeaderCommon));
         memset(&threadPayload, 0, sizeof(SPatchThreadPayload));
         memset(&dataParameterStream, 0, sizeof(SPatchDataParameterStream));
@@ -284,15 +284,27 @@ class MockKernelWithInternals {
             context->incRefInternal();
             mockContext = context;
         }
-        ClDeviceVector deviceVector;
-        deviceVector.push_back(&deviceArg);
-        kernelInfos.resize(deviceArg.getRootDeviceIndex() + 1);
-        kernelInfos[deviceArg.getRootDeviceIndex()] = &kernelInfo;
+        auto maxRootDeviceIndex = 0u;
+
+        for (const auto &pClDevice : deviceVector) {
+            if (pClDevice->getRootDeviceIndex() > maxRootDeviceIndex) {
+                maxRootDeviceIndex = pClDevice->getRootDeviceIndex();
+            }
+        }
+
+        kernelInfos.resize(maxRootDeviceIndex + 1);
+
+        for (const auto &pClDevice : deviceVector) {
+            kernelInfos[pClDevice->getRootDeviceIndex()] = &kernelInfo;
+        }
 
         mockProgram = new MockProgram(context, false, deviceVector);
         mockKernel = new MockKernel(mockProgram, kernelInfos);
         mockKernel->setCrossThreadData(&crossThreadData, sizeof(crossThreadData));
-        mockKernel->setSshLocal(&sshLocal, sizeof(sshLocal), deviceArg.getRootDeviceIndex());
+
+        for (const auto &pClDevice : deviceVector) {
+            mockKernel->setSshLocal(&sshLocal, sizeof(sshLocal), pClDevice->getRootDeviceIndex());
+        }
 
         if (addDefaultArg) {
             defaultKernelArguments.resize(2);
@@ -323,6 +335,9 @@ class MockKernelWithInternals {
             kernelInfo.kernelArgInfo[0].offsetHeap = 64;
         }
     }
+
+    MockKernelWithInternals(ClDevice &deviceArg, Context *context = nullptr, bool addDefaultArg = false, SPatchExecutionEnvironment execEnv = {}) : MockKernelWithInternals(toClDeviceVector(deviceArg), context, addDefaultArg, execEnv) {
+    }
     MockKernelWithInternals(ClDevice &deviceArg, SPatchExecutionEnvironment execEnv) : MockKernelWithInternals(deviceArg, nullptr, false, execEnv) {
         mockKernel->initialize();
     }