From b85a8ace68e8b30a2ca533ab7076788dcc8cb406 Mon Sep 17 00:00:00 2001
From: Mateusz Jablonski <mateusz.jablonski@intel.com>
Date: Tue, 24 Nov 2020 16:07:54 +0000
Subject: [PATCH] Pass root device index to Kernel's methods

Related-To: NEO-5001
Signed-off-by: Mateusz Jablonski <mateusz.jablonski@intel.com>
---
 opencl/source/command_queue/enqueue_common.h  |  2 +-
 opencl/source/command_queue/gpgpu_walker.h    |  2 +-
 .../command_queue/gpgpu_walker_base.inl       |  2 +-
 .../command_queue/gpgpu_walker_bdw_plus.inl   |  4 +--
 .../hardware_interface_bdw_plus.inl           |  2 +-
 .../source/command_queue/local_work_size.cpp  |  2 +-
 .../source/gen12lp/gpgpu_walker_gen12lp.cpp   |  6 ++--
 opencl/source/kernel/kernel.cpp               | 23 ++++++++-------
 opencl/source/kernel/kernel.h                 | 16 +++++++----
 opencl/source/kernel/kernel_extra.cpp         |  2 +-
 .../accelerators/media_image_arg_tests.cpp    |  4 +--
 .../command_queue/enqueue_svm_tests.cpp       |  4 +--
 .../unit_test/gen11/kernel_tests_gen11.cpp    |  2 +-
 .../gen12lp/gpgpu_walker_tests_gen12lp.cpp    |  6 ++--
 .../gen12lp/kernel_tests_gen12lp.inl          |  4 +--
 .../gen12lp/tgllp/kernel_tests_tgllp.cpp      |  3 +-
 .../hardware_commands_helper_tests.cpp        | 28 +++++++++----------
 opencl/test/unit_test/kernel/kernel_tests.cpp | 13 +++++----
 .../mem_obj/buffer_set_arg_tests.cpp          |  4 +--
 .../unit_test/mem_obj/image_set_arg_tests.cpp | 16 +++++------
 opencl/test/unit_test/mocks/mock_kernel.cpp   |  4 +--
 opencl/test/unit_test/mocks/mock_kernel.h     |  2 +-
 .../test/unit_test/program/program_tests.cpp  |  2 +-
 .../sampler/sampler_set_arg_tests.cpp         |  2 +-
 24 files changed, 83 insertions(+), 72 deletions(-)
diff --git a/opencl/source/command_queue/enqueue_common.h b/opencl/source/command_queue/enqueue_common.h
index f5e9a6ae42..3ded661a81 100644
--- a/opencl/source/command_queue/enqueue_common.h
+++ b/opencl/source/command_queue/enqueue_common.h
@@ -868,7 +868,7 @@ void CommandQueueHw<GfxFamily>::enqueueBlocked(
             } else {
                 continue;
             }
-            kernel->getResidency(allSurfaces);
+            kernel->getResidency(allSurfaces, device->getRootDeviceIndex());
         }
         for (auto &surface : CreateRange(surfaces, surfaceCount)) {
             allSurfaces.push_back(surface->duplicate());
diff --git a/opencl/source/command_queue/gpgpu_walker.h b/opencl/source/command_queue/gpgpu_walker.h
index fcfc72c1f9..e68c8bd684 100644
--- a/opencl/source/command_queue/gpgpu_walker.h
+++ b/opencl/source/command_queue/gpgpu_walker.h
@@ -101,7 +101,7 @@ class GpgpuWalkerHelper {
                                                bool disablePerfMode);
 
     static size_t getSizeForWADisableLSQCROPERFforOCL(const Kernel *pKernel);
-    static size_t getSizeForWaDisableRccRhwoOptimization(const Kernel *pKernel);
+    static size_t getSizeForWaDisableRccRhwoOptimization(const Kernel *pKernel, uint32_t rootDeviceIndex);
 
     static size_t setGpgpuWalkerThreadData(
         WALKER_TYPE<GfxFamily> *walkerCmd,
diff --git a/opencl/source/command_queue/gpgpu_walker_base.inl b/opencl/source/command_queue/gpgpu_walker_base.inl
index e9e8d1751b..a524c96015 100644
--- a/opencl/source/command_queue/gpgpu_walker_base.inl
+++ b/opencl/source/command_queue/gpgpu_walker_base.inl
@@ -172,7 +172,7 @@ size_t GpgpuWalkerHelper<GfxFamily>::getSizeForWADisableLSQCROPERFforOCL(const K
 }
 
 template <typename GfxFamily>
-size_t GpgpuWalkerHelper<GfxFamily>::getSizeForWaDisableRccRhwoOptimization(const Kernel *pKernel) {
+size_t GpgpuWalkerHelper<GfxFamily>::getSizeForWaDisableRccRhwoOptimization(const Kernel *pKernel, uint32_t rootDeviceIndex) {
     return 0u;
 }
 
diff --git a/opencl/source/command_queue/gpgpu_walker_bdw_plus.inl b/opencl/source/command_queue/gpgpu_walker_bdw_plus.inl
index fc8b866896..ddeafd683b 100644
--- a/opencl/source/command_queue/gpgpu_walker_bdw_plus.inl
+++ b/opencl/source/command_queue/gpgpu_walker_bdw_plus.inl
@@ -141,7 +141,7 @@ void GpgpuWalkerHelper<GfxFamily>::dispatchScheduler(
         *ioh,
         *ssh,
         scheduler,
-        scheduler.getKernelStartOffset(true, kernelUsesLocalIds, isCcsUsed),
+        scheduler.getKernelStartOffset(true, kernelUsesLocalIds, isCcsUsed, devQueueHw.getDevice().getRootDeviceIndex()),
         simd,
         localWorkSizes,
         offsetInterfaceDescriptorTable,
@@ -211,7 +211,7 @@ size_t EnqueueOperation<GfxFamily>::getSizeRequiredCSKernel(bool reserveProfilin
     }
     size += PerformanceCounters::getGpuCommandsSize(commandQueue, reservePerfCounters);
     size += GpgpuWalkerHelper<GfxFamily>::getSizeForWADisableLSQCROPERFforOCL(pKernel);
-    size += GpgpuWalkerHelper<GfxFamily>::getSizeForWaDisableRccRhwoOptimization(pKernel);
+    size += GpgpuWalkerHelper<GfxFamily>::getSizeForWaDisableRccRhwoOptimization(pKernel, commandQueue.getDevice().getRootDeviceIndex());
 
     return size;
 }
diff --git a/opencl/source/command_queue/hardware_interface_bdw_plus.inl b/opencl/source/command_queue/hardware_interface_bdw_plus.inl
index 1ae6f449ee..58d428bc90 100644
--- a/opencl/source/command_queue/hardware_interface_bdw_plus.inl
+++ b/opencl/source/command_queue/hardware_interface_bdw_plus.inl
@@ -93,7 +93,7 @@ inline void HardwareInterface<GfxFamily>::programWalker(
         ioh,
         ssh,
         kernel,
-        kernel.getKernelStartOffset(true, kernelUsesLocalIds, isCcsUsed),
+        kernel.getKernelStartOffset(true, kernelUsesLocalIds, isCcsUsed, commandQueue.getDevice().getRootDeviceIndex()),
         simd,
         localWorkSizes,
         offsetInterfaceDescriptorTable,
diff --git a/opencl/source/command_queue/local_work_size.cpp b/opencl/source/command_queue/local_work_size.cpp
index 2d663e4f9d..f422e636b9 100644
--- a/opencl/source/command_queue/local_work_size.cpp
+++ b/opencl/source/command_queue/local_work_size.cpp
@@ -419,7 +419,7 @@ Vec3<size_t> computeWorkgroupSize(const DispatchInfo &dispatchInfo) {
         const auto &hwInfo = device.getHardwareInfo();
         auto &hwHelper = HwHelper::get(hwInfo.platform.eRenderCoreFamily);
         auto isSimulation = device.isSimulation();
-        if (kernel->requiresLimitedWorkgroupSize() && hwHelper.isSpecialWorkgroupSizeRequired(hwInfo, isSimulation)) {
+        if (kernel->requiresLimitedWorkgroupSize(device.getRootDeviceIndex()) && hwHelper.isSpecialWorkgroupSizeRequired(hwInfo, isSimulation)) {
             setSpecialWorkgroupSize(workGroupSize);
         } else if (DebugManager.flags.EnableComputeWorkSizeND.get()) {
             WorkSizeInfo wsInfo(dispatchInfo);
diff --git a/opencl/source/gen12lp/gpgpu_walker_gen12lp.cpp b/opencl/source/gen12lp/gpgpu_walker_gen12lp.cpp
index 46f77333dc..ab33b249a8 100644
--- a/opencl/source/gen12lp/gpgpu_walker_gen12lp.cpp
+++ b/opencl/source/gen12lp/gpgpu_walker_gen12lp.cpp
@@ -64,7 +64,7 @@ void HardwareInterface<TGLLPFamily>::dispatchWorkarounds(
     using MI_LOAD_REGISTER_IMM = typename TGLLPFamily::MI_LOAD_REGISTER_IMM;
     using PIPE_CONTROL = typename TGLLPFamily::PIPE_CONTROL;
 
-    if (kernel.requiresWaDisableRccRhwoOptimization()) {
+    if (kernel.requiresWaDisableRccRhwoOptimization(commandQueue.getDevice().getRootDeviceIndex())) {
 
         PIPE_CONTROL cmdPipeControl = TGLLPFamily::cmdInitPipeControl;
         cmdPipeControl.setCommandStreamerStallEnable(true);
@@ -80,8 +80,8 @@ void HardwareInterface<TGLLPFamily>::dispatchWorkarounds(
 }
 
 template <>
-size_t GpgpuWalkerHelper<TGLLPFamily>::getSizeForWaDisableRccRhwoOptimization(const Kernel *pKernel) {
-    if (pKernel->requiresWaDisableRccRhwoOptimization()) {
+size_t GpgpuWalkerHelper<TGLLPFamily>::getSizeForWaDisableRccRhwoOptimization(const Kernel *pKernel, uint32_t rootDeviceIndex) {
+    if (pKernel->requiresWaDisableRccRhwoOptimization(rootDeviceIndex)) {
         return (2 * (sizeof(TGLLPFamily::PIPE_CONTROL) + sizeof(TGLLPFamily::MI_LOAD_REGISTER_IMM)));
     }
     return 0u;
diff --git a/opencl/source/kernel/kernel.cpp b/opencl/source/kernel/kernel.cpp
index 130262cb37..6e201460fd 100644
--- a/opencl/source/kernel/kernel.cpp
+++ b/opencl/source/kernel/kernel.cpp
@@ -1142,8 +1142,7 @@ void Kernel::makeResident(CommandStreamReceiver &commandStreamReceiver) {
     }
 }
 
-void Kernel::getResidency(std::vector<Surface *> &dst) {
-    auto rootDeviceIndex = getDevice().getRootDeviceIndex();
+void Kernel::getResidency(std::vector<Surface *> &dst, uint32_t rootDeviceIndex) {
     if (kernelDeviceInfos[rootDeviceIndex].privateSurface) {
         GeneralSurface *surface = new GeneralSurface(kernelDeviceInfos[rootDeviceIndex].privateSurface);
         dst.push_back(surface);
@@ -2386,8 +2385,8 @@ void Kernel::fillWithBuffersForAuxTranslation(MemObjsForAuxTranslation &memObjsF
     }
 }
 
-void Kernel::getAllocationsForCacheFlush(CacheFlushAllocationsVec &out) const {
-    if (false == HwHelper::cacheFlushAfterWalkerSupported(getDevice().getHardwareInfo())) {
+void Kernel::getAllocationsForCacheFlush(CacheFlushAllocationsVec &out, uint32_t rootDeviceIndex) const {
+    if (false == HwHelper::cacheFlushAfterWalkerSupported(getHardwareInfo(rootDeviceIndex))) {
         return;
     }
     for (GraphicsAllocation *alloc : this->kernelArgRequiresCacheFlush) {
@@ -2398,7 +2397,7 @@ void Kernel::getAllocationsForCacheFlush(CacheFlushAllocationsVec &out) const {
         out.push_back(alloc);
     }
 
-    auto global = getProgram()->getGlobalSurface(getDevice().getRootDeviceIndex());
+    auto global = getProgram()->getGlobalSurface(rootDeviceIndex);
     if (global != nullptr) {
         out.push_back(global);
     }
@@ -2440,7 +2439,8 @@ bool Kernel::checkIfIsParentKernelAndBlocksUsesPrintf() {
 uint64_t Kernel::getKernelStartOffset(
     const bool localIdsGenerationByRuntime,
     const bool kernelUsesLocalIds,
-    const bool isCssUsed) const {
+    const bool isCssUsed,
+    uint32_t rootDeviceIndex) const {
 
     uint64_t kernelStartOffset = 0;
 
@@ -2453,7 +2453,7 @@ uint64_t Kernel::getKernelStartOffset(
 
     kernelStartOffset += getStartOffset();
 
-    auto &hardwareInfo = getDevice().getHardwareInfo();
+    auto &hardwareInfo = getHardwareInfo(rootDeviceIndex);
     auto &hwHelper = HwHelper::get(hardwareInfo.platform.eRenderCoreFamily);
 
     if (isCssUsed && hwHelper.isOffsetToSkipSetFFIDGPWARequired(hardwareInfo)) {
@@ -2493,8 +2493,8 @@ uint32_t Kernel::getAdditionalKernelExecInfo() const {
     return this->additionalKernelExecInfo;
 }
 
-bool Kernel::requiresWaDisableRccRhwoOptimization() const {
-    auto &hardwareInfo = getDevice().getHardwareInfo();
+bool Kernel::requiresWaDisableRccRhwoOptimization(uint32_t rootDeviceIndex) const {
+    auto &hardwareInfo = getHardwareInfo(rootDeviceIndex);
     auto &hwHelper = HwHelper::get(hardwareInfo.platform.eRenderCoreFamily);
 
     if (hwHelper.isWaDisableRccRhwoOptimizationRequired() && isUsingSharedObjArgs()) {
@@ -2502,7 +2502,7 @@ bool Kernel::requiresWaDisableRccRhwoOptimization() const {
             auto clMemObj = static_cast<cl_mem>(arg.object);
             auto memObj = castToObject<MemObj>(clMemObj);
             if (memObj && memObj->peekSharingHandler()) {
-                auto allocation = memObj->getGraphicsAllocation(getDevice().getRootDeviceIndex());
+                auto allocation = memObj->getGraphicsAllocation(rootDeviceIndex);
                 for (uint32_t handleId = 0u; handleId < allocation->getNumGmms(); handleId++) {
                     if (allocation->getGmm(handleId)->gmmResourceInfo->getResourceFlags()->Info.MediaCompressed) {
                         return true;
@@ -2514,4 +2514,7 @@ bool Kernel::requiresWaDisableRccRhwoOptimization() const {
     return false;
 }
 
+const HardwareInfo &Kernel::getHardwareInfo(uint32_t rootDeviceIndex) const {
+    return *executionEnvironment.rootDeviceEnvironments[rootDeviceIndex]->getHardwareInfo();
+}
 } // namespace NEO
diff --git a/opencl/source/kernel/kernel.h b/opencl/source/kernel/kernel.h
index e2f6d89e07..a96111f077 100644
--- a/opencl/source/kernel/kernel.h
+++ b/opencl/source/kernel/kernel.h
@@ -283,7 +283,7 @@ class Kernel : public BaseObject<_cl_kernel> {
 
     //residency for kernel surfaces
     MOCKABLE_VIRTUAL void makeResident(CommandStreamReceiver &commandStreamReceiver);
-    MOCKABLE_VIRTUAL void getResidency(std::vector<Surface *> &dst);
+    MOCKABLE_VIRTUAL void getResidency(std::vector<Surface *> &dst, uint32_t rootDeviceIndex);
     bool requiresCoherency();
     void resetSharedObjectsPatchAddresses();
     bool isUsingSharedObjArgs() const { return usingSharedObjArgs; }
@@ -379,7 +379,7 @@ class Kernel : public BaseObject<_cl_kernel> {
     MOCKABLE_VIRTUAL bool requiresCacheFlushCommand(const CommandQueue &commandQueue) const;
 
     using CacheFlushAllocationsVec = StackVec<GraphicsAllocation *, 32>;
-    void getAllocationsForCacheFlush(CacheFlushAllocationsVec &out) const;
+    void getAllocationsForCacheFlush(CacheFlushAllocationsVec &out, uint32_t rootDeviceIndex) const;
 
     void setAuxTranslationDirection(AuxTranslationDirection auxTranslationDirection) {
         this->auxTranslationDirection = auxTranslationDirection;
@@ -404,17 +404,18 @@ class Kernel : public BaseObject<_cl_kernel> {
     uint64_t getKernelStartOffset(
         const bool localIdsGenerationByRuntime,
         const bool kernelUsesLocalIds,
-        const bool isCssUsed) const;
+        const bool isCssUsed,
+        uint32_t rootDeviceIndex) const;
 
     bool requiresPerDssBackedBuffer() const;
-    bool requiresLimitedWorkgroupSize() const;
+    bool requiresLimitedWorkgroupSize(uint32_t rootDeviceIndex) const;
     bool isKernelDebugEnabled() const { return debugEnabled; }
     int32_t setAdditionalKernelExecInfoWithParam(uint32_t paramName, size_t paramValueSize, const void *paramValue);
     void setAdditionalKernelExecInfo(uint32_t additionalKernelExecInfo);
     uint32_t getAdditionalKernelExecInfo() const;
-    MOCKABLE_VIRTUAL bool requiresWaDisableRccRhwoOptimization() const;
+    MOCKABLE_VIRTUAL bool requiresWaDisableRccRhwoOptimization(uint32_t rootDeviceIndex) const;
     const ClDeviceVector &getDevices() const {
-        return deviceVector;
+        return program->getDevices();
     }
 
   protected:
@@ -507,9 +508,12 @@ class Kernel : public BaseObject<_cl_kernel> {
     void addAllocationToCacheFlushVector(uint32_t argIndex, GraphicsAllocation *argAllocation);
     bool allocationForCacheFlush(GraphicsAllocation *argAllocation) const;
 
+    const HardwareInfo &getHardwareInfo(uint32_t rootDeviceIndex) const;
+
     const ClDevice &getDevice() const {
         return *deviceVector[0];
     }
+
     const ExecutionEnvironment &executionEnvironment;
     Program *program;
     const ClDeviceVector &deviceVector;
diff --git a/opencl/source/kernel/kernel_extra.cpp b/opencl/source/kernel/kernel_extra.cpp
index 9ed6844598..9712a23310 100644
--- a/opencl/source/kernel/kernel_extra.cpp
+++ b/opencl/source/kernel/kernel_extra.cpp
@@ -33,7 +33,7 @@ bool Kernel::requiresPerDssBackedBuffer() const {
     return DebugManager.flags.ForcePerDssBackedBufferProgramming.get();
 }
 
-bool Kernel::requiresLimitedWorkgroupSize() const {
+bool Kernel::requiresLimitedWorkgroupSize(uint32_t rootDeviceIndex) const {
     return this->isBuiltIn;
 }
 
diff --git a/opencl/test/unit_test/accelerators/media_image_arg_tests.cpp b/opencl/test/unit_test/accelerators/media_image_arg_tests.cpp
index 87f57c7a24..45469c8e2c 100644
--- a/opencl/test/unit_test/accelerators/media_image_arg_tests.cpp
+++ b/opencl/test/unit_test/accelerators/media_image_arg_tests.cpp
@@ -93,7 +93,7 @@ HWTEST_F(MediaImageSetArgTest, WhenSettingMediaImageArgThenArgsSetCorrectly) {
               pSurfaceState->getSurfaceBaseAddress());
 
     std::vector<Surface *> surfaces;
-    pKernel->getResidency(surfaces);
+    pKernel->getResidency(surfaces, rootDeviceIndex);
     EXPECT_EQ(0u, surfaces.size());
 }
 
@@ -131,7 +131,7 @@ HWTEST_F(MediaImageSetArgTest, WhenSettingKernelArgImageThenArgsSetCorrectly) {
     EXPECT_EQ(MEDIA_SURFACE_STATE::PICTURE_STRUCTURE_FRAME_PICTURE, pSurfaceState->getPictureStructure());
 
     std::vector<Surface *> surfaces;
-    pKernel->getResidency(surfaces);
+    pKernel->getResidency(surfaces, rootDeviceIndex);
 
     for (auto &surface : surfaces) {
         delete surface;
diff --git a/opencl/test/unit_test/command_queue/enqueue_svm_tests.cpp b/opencl/test/unit_test/command_queue/enqueue_svm_tests.cpp
index 587a56b706..510984ecdb 100644
--- a/opencl/test/unit_test/command_queue/enqueue_svm_tests.cpp
+++ b/opencl/test/unit_test/command_queue/enqueue_svm_tests.cpp
@@ -769,7 +769,7 @@ TEST_F(EnqueueSvmTest, givenEnqueueTaskBlockedOnUserEventWhenItIsEnqueuedThenSur
     auto kernel = clUniquePtr(Kernel::create<MockKernel>(program.get(), *program->getKernelInfo("FillBufferBytes"), &retVal));
 
     std::vector<Surface *> allSurfaces;
-    kernel->getResidency(allSurfaces);
+    kernel->getResidency(allSurfaces, rootDeviceIndex);
     EXPECT_EQ(1u, allSurfaces.size());
 
     kernel->setSvmKernelExecInfo(pSvmAlloc);
@@ -789,7 +789,7 @@ TEST_F(EnqueueSvmTest, givenEnqueueTaskBlockedOnUserEventWhenItIsEnqueuedThenSur
         nullptr);
     EXPECT_EQ(CL_SUCCESS, retVal);
 
-    kernel->getResidency(allSurfaces);
+    kernel->getResidency(allSurfaces, rootDeviceIndex);
     EXPECT_EQ(3u, allSurfaces.size());
 
     for (auto &surface : allSurfaces)
diff --git a/opencl/test/unit_test/gen11/kernel_tests_gen11.cpp b/opencl/test/unit_test/gen11/kernel_tests_gen11.cpp
index 4830033d90..01e50274d5 100644
--- a/opencl/test/unit_test/gen11/kernel_tests_gen11.cpp
+++ b/opencl/test/unit_test/gen11/kernel_tests_gen11.cpp
@@ -21,5 +21,5 @@ GEN11TEST_F(Gen11KernelTest, givenKernelWhenCanTransformImagesIsCalledThenReturn
 
 GEN11TEST_F(Gen11KernelTest, GivenKernelWhenNotRunningOnGen12lpThenWaDisableRccRhwoOptimizationIsNotRequired) {
     MockKernelWithInternals kernel(*pClDevice);
-    EXPECT_FALSE(kernel.mockKernel->requiresWaDisableRccRhwoOptimization());
+    EXPECT_FALSE(kernel.mockKernel->requiresWaDisableRccRhwoOptimization(rootDeviceIndex));
 }
diff --git a/opencl/test/unit_test/gen12lp/gpgpu_walker_tests_gen12lp.cpp b/opencl/test/unit_test/gen12lp/gpgpu_walker_tests_gen12lp.cpp
index be8b9402a4..90b142b441 100644
--- a/opencl/test/unit_test/gen12lp/gpgpu_walker_tests_gen12lp.cpp
+++ b/opencl/test/unit_test/gen12lp/gpgpu_walker_tests_gen12lp.cpp
@@ -38,7 +38,7 @@ GEN12LPTEST_F(GpgpuWalkerTests, givenMiStoreRegMemWhenAdjustMiStoreRegMemModeThe
 class MockKernelWithApplicableWa : public MockKernel {
   public:
     MockKernelWithApplicableWa(Program *program, KernelInfo &kernelInfo) : MockKernel(program, kernelInfo) {}
-    bool requiresWaDisableRccRhwoOptimization() const override {
+    bool requiresWaDisableRccRhwoOptimization(uint32_t rootDeviceIndex) const override {
         return waApplicable;
     }
     bool waApplicable = false;
@@ -135,14 +135,14 @@ GEN12LPTEST_F(HardwareInterfaceTests, GivenKernelWithApplicableWaDisableRccRhwoO
     using MI_LOAD_REGISTER_IMM = typename FamilyType::MI_LOAD_REGISTER_IMM;
 
     pKernel->waApplicable = true;
-    auto cmdSize = GpgpuWalkerHelper<FamilyType>::getSizeForWaDisableRccRhwoOptimization(pKernel);
+    auto cmdSize = GpgpuWalkerHelper<FamilyType>::getSizeForWaDisableRccRhwoOptimization(pKernel, rootDeviceIndex);
     size_t expectedSize = 2 * (sizeof(PIPE_CONTROL) + sizeof(MI_LOAD_REGISTER_IMM));
     EXPECT_EQ(expectedSize, cmdSize);
 }
 
 GEN12LPTEST_F(HardwareInterfaceTests, GivenKernelWithoutApplicableWaDisableRccRhwoOptimizationWhenCalculatingCommandsSizeThenZeroIsReturned) {
     pKernel->waApplicable = false;
-    auto cmdSize = GpgpuWalkerHelper<FamilyType>::getSizeForWaDisableRccRhwoOptimization(pKernel);
+    auto cmdSize = GpgpuWalkerHelper<FamilyType>::getSizeForWaDisableRccRhwoOptimization(pKernel, rootDeviceIndex);
     EXPECT_EQ(0u, cmdSize);
 }
 
diff --git a/opencl/test/unit_test/gen12lp/kernel_tests_gen12lp.inl b/opencl/test/unit_test/gen12lp/kernel_tests_gen12lp.inl
index edcb96b3ad..84f8e562ac 100644
--- a/opencl/test/unit_test/gen12lp/kernel_tests_gen12lp.inl
+++ b/opencl/test/unit_test/gen12lp/kernel_tests_gen12lp.inl
@@ -22,7 +22,7 @@ GEN12LPTEST_F(Gen12LpKernelTest, givenKernelWhenCanTransformImagesIsCalledThenRe
 
 GEN12LPTEST_F(Gen12LpKernelTest, GivenKernelWhenNotUsingSharedObjArgsThenWaDisableRccRhwoOptimizationIsNotRequired) {
     MockKernelWithInternals kernel(*pClDevice);
-    EXPECT_FALSE(kernel.mockKernel->requiresWaDisableRccRhwoOptimization());
+    EXPECT_FALSE(kernel.mockKernel->requiresWaDisableRccRhwoOptimization(rootDeviceIndex));
 }
 
 GEN12LPTEST_F(Gen12LpKernelTest, GivenKernelWhenAtLeastOneArgIsMediaCompressedThenWaDisableRccRhwoOptimizationIsRequired) {
@@ -56,5 +56,5 @@ GEN12LPTEST_F(Gen12LpKernelTest, GivenKernelWhenAtLeastOneArgIsMediaCompressedTh
     cl_mem clMem2 = &bufferMediaCompressed;
     kernel.mockKernel->setArgBuffer(2, sizeof(cl_mem *), &clMem2);
 
-    EXPECT_TRUE(kernel.mockKernel->requiresWaDisableRccRhwoOptimization());
+    EXPECT_TRUE(kernel.mockKernel->requiresWaDisableRccRhwoOptimization(rootDeviceIndex));
 }
diff --git a/opencl/test/unit_test/gen12lp/tgllp/kernel_tests_tgllp.cpp b/opencl/test/unit_test/gen12lp/tgllp/kernel_tests_tgllp.cpp
index e30d7938d4..6351ddb713 100644
--- a/opencl/test/unit_test/gen12lp/tgllp/kernel_tests_tgllp.cpp
+++ b/opencl/test/unit_test/gen12lp/tgllp/kernel_tests_tgllp.cpp
@@ -28,11 +28,12 @@ TGLLPTEST_F(KernelTgllpTests, GivenUseOffsetToSkipSetFFIDGPWorkaroundActiveWhenS
 
         hwInfo.platform.usRevId = stepping;
         auto device = std::make_unique<MockClDevice>(MockDevice::createWithNewExecutionEnvironment<MockDevice>(&hwInfo));
+        auto rootDeviceIndex = device->getRootDeviceIndex();
         MockKernelWithInternals mockKernelWithInternals{*device};
         mockKernelWithInternals.kernelInfo.patchInfo.threadPayload = &threadPayload;
 
         for (auto isCcsUsed : ::testing::Bool()) {
-            uint64_t kernelStartOffset = mockKernelWithInternals.mockKernel->getKernelStartOffset(false, false, isCcsUsed);
+            uint64_t kernelStartOffset = mockKernelWithInternals.mockKernel->getKernelStartOffset(false, false, isCcsUsed, rootDeviceIndex);
 
             if (stepping == REVISION_A0 && isCcsUsed) {
                 EXPECT_EQ(defaultKernelStartOffset + additionalOffsetDueToFfid, kernelStartOffset);
diff --git a/opencl/test/unit_test/helpers/hardware_commands_helper_tests.cpp b/opencl/test/unit_test/helpers/hardware_commands_helper_tests.cpp
index 04b4f53b93..833664ac77 100644
--- a/opencl/test/unit_test/helpers/hardware_commands_helper_tests.cpp
+++ b/opencl/test/unit_test/helpers/hardware_commands_helper_tests.cpp
@@ -345,7 +345,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, HardwareCommandsTest, WhenAllocatingIndirectStateRes
         ioh,
         ssh,
         *kernel,
-        kernel->getKernelStartOffset(true, kernelUsesLocalIds, isCcsUsed),
+        kernel->getKernelStartOffset(true, kernelUsesLocalIds, isCcsUsed, rootDeviceIndex),
         kernel->getKernelInfo().getMaxSimdSize(),
         localWorkSizes,
         IDToffset,
@@ -400,7 +400,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, HardwareCommandsTest, givenKernelWithFourBindingTabl
         ioh,
         ssh,
         *mockKernelWithInternal->mockKernel,
-        mockKernelWithInternal->mockKernel->getKernelStartOffset(true, kernelUsesLocalIds, isCcsUsed),
+        mockKernelWithInternal->mockKernel->getKernelStartOffset(true, kernelUsesLocalIds, isCcsUsed, rootDeviceIndex),
         mockKernelWithInternal->mockKernel->getKernelInfo().getMaxSimdSize(),
         localWorkSizes,
         0,
@@ -448,7 +448,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, HardwareCommandsTest, givenKernelThatIsSchedulerWhen
         ioh,
         ssh,
         *mockKernelWithInternal->mockKernel,
-        mockKernelWithInternal->mockKernel->getKernelStartOffset(true, kernelUsesLocalIds, isCcsUsed),
+        mockKernelWithInternal->mockKernel->getKernelStartOffset(true, kernelUsesLocalIds, isCcsUsed, rootDeviceIndex),
         mockKernelWithInternal->mockKernel->getKernelInfo().getMaxSimdSize(),
         localWorkSizes,
         0,
@@ -490,7 +490,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, HardwareCommandsTest, givenKernelWith100BindingTable
         ioh,
         ssh,
         *mockKernelWithInternal->mockKernel,
-        mockKernelWithInternal->mockKernel->getKernelStartOffset(true, kernelUsesLocalIds, isCcsUsed),
+        mockKernelWithInternal->mockKernel->getKernelStartOffset(true, kernelUsesLocalIds, isCcsUsed, rootDeviceIndex),
         mockKernelWithInternal->mockKernel->getKernelInfo().getMaxSimdSize(),
         localWorkSizes,
         0,
@@ -568,7 +568,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, HardwareCommandsTest, whenSendingIndirectStateThenKe
         ioh,
         ssh,
         mockKernel,
-        mockKernel.getKernelStartOffset(true, kernelUsesLocalIds, isCcsUsed),
+        mockKernel.getKernelStartOffset(true, kernelUsesLocalIds, isCcsUsed, rootDeviceIndex),
         modifiedKernelInfo.getMaxSimdSize(),
         localWorkSizes,
         IDToffset,
@@ -659,7 +659,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, HardwareCommandsTest, WhenSendingIndirectStateThenBi
         ioh,
         ssh,
         *kernel,
-        kernel->getKernelStartOffset(true, kernelUsesLocalIds, isCcsUsed),
+        kernel->getKernelStartOffset(true, kernelUsesLocalIds, isCcsUsed, rootDeviceIndex),
         kernel->getKernelInfo().getMaxSimdSize(),
         localWorkSizes,
         0,
@@ -819,7 +819,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, HardwareCommandsTest, WhenGettingBindingTableStateTh
             ioh,
             ssh,
             *pKernel,
-            pKernel->getKernelStartOffset(true, kernelUsesLocalIds, isCcsUsed),
+            pKernel->getKernelStartOffset(true, kernelUsesLocalIds, isCcsUsed, rootDeviceIndex),
             pKernel->getKernelInfo().getMaxSimdSize(),
             localWorkSizes,
             0,
@@ -1016,7 +1016,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, HardwareCommandsTest, GivenKernelWithSamplersWhenInd
         ioh,
         ssh,
         *mockKernelWithInternal->mockKernel,
-        mockKernelWithInternal->mockKernel->getKernelStartOffset(true, kernelUsesLocalIds, isCcsUsed),
+        mockKernelWithInternal->mockKernel->getKernelStartOffset(true, kernelUsesLocalIds, isCcsUsed, rootDeviceIndex),
         8,
         localWorkSizes,
         interfaceDescriptorTableOffset,
@@ -1196,7 +1196,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, HardwareCommandsTest, givenCacheFlushAfterWalkerEnab
     mockKernelWithInternal->mockProgram->setGlobalSurface(&globalAllocation);
 
     Kernel::CacheFlushAllocationsVec allocs;
-    mockKernelWithInternal->mockKernel->getAllocationsForCacheFlush(allocs);
+    mockKernelWithInternal->mockKernel->getAllocationsForCacheFlush(allocs, rootDeviceIndex);
     EXPECT_NE(allocs.end(), std::find(allocs.begin(), allocs.end(), &globalAllocation));
 
     size_t expectedSize = sizeof(PIPE_CONTROL);
@@ -1235,7 +1235,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, HardwareCommandsTest, givenCacheFlushAfterWalkerEnab
     mockKernelWithInternal->mockKernel->svmAllocationsRequireCacheFlush = true;
 
     Kernel::CacheFlushAllocationsVec allocs;
-    mockKernelWithInternal->mockKernel->getAllocationsForCacheFlush(allocs);
+    mockKernelWithInternal->mockKernel->getAllocationsForCacheFlush(allocs, rootDeviceIndex);
     EXPECT_NE(allocs.end(), std::find(allocs.begin(), allocs.end(), &svmAllocation1));
     EXPECT_EQ(allocs.end(), std::find(allocs.begin(), allocs.end(), &svmAllocation2));
 
@@ -1270,7 +1270,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, HardwareCommandsTest, givenCacheFlushAfterWalkerEnab
     mockKernelWithInternal->mockKernel->kernelArgRequiresCacheFlush[0] = &cacheRequiringAllocation;
 
     Kernel::CacheFlushAllocationsVec allocs;
-    mockKernelWithInternal->mockKernel->getAllocationsForCacheFlush(allocs);
+    mockKernelWithInternal->mockKernel->getAllocationsForCacheFlush(allocs, rootDeviceIndex);
     EXPECT_NE(allocs.end(), std::find(allocs.begin(), allocs.end(), &cacheRequiringAllocation));
 
     size_t expectedSize = sizeof(PIPE_CONTROL);
@@ -1306,7 +1306,7 @@ TEST_F(HardwareCommandsTest, givenCacheFlushAfterWalkerEnabledWhenPlatformNotSup
     hardwareInfo.capabilityTable.supportCacheFlushAfterWalker = false;
 
     StackVec<GraphicsAllocation *, 32> allocationsForCacheFlush;
-    mockKernelWithInternal->mockKernel->getAllocationsForCacheFlush(allocationsForCacheFlush);
+    mockKernelWithInternal->mockKernel->getAllocationsForCacheFlush(allocationsForCacheFlush, rootDeviceIndex);
     EXPECT_EQ(0U, allocationsForCacheFlush.size());
 }
 
@@ -1324,13 +1324,13 @@ HWTEST_F(KernelCacheFlushTests, givenLocallyUncachedBufferWhenGettingAllocations
 
     using CacheFlushAllocationsVec = StackVec<GraphicsAllocation *, 32>;
     CacheFlushAllocationsVec cacheFlushVec;
-    kernel->getAllocationsForCacheFlush(cacheFlushVec);
+    kernel->getAllocationsForCacheFlush(cacheFlushVec, rootDeviceIndex);
     EXPECT_EQ(0u, cacheFlushVec.size());
 
     auto bufferRegular = clCreateBufferWithPropertiesINTEL(context, nullptr, 0, 1, nullptr, nullptr);
     kernel->setArg(1, sizeof(bufferRegular), &bufferRegular);
 
-    kernel->getAllocationsForCacheFlush(cacheFlushVec);
+    kernel->getAllocationsForCacheFlush(cacheFlushVec, rootDeviceIndex);
     size_t expectedCacheFlushVecSize = (hardwareInfo.capabilityTable.supportCacheFlushAfterWalker ? 1u : 0u);
     EXPECT_EQ(expectedCacheFlushVecSize, cacheFlushVec.size());
 
diff --git a/opencl/test/unit_test/kernel/kernel_tests.cpp b/opencl/test/unit_test/kernel/kernel_tests.cpp
index 28dabfbc6d..42f791ffd8 100644
--- a/opencl/test/unit_test/kernel/kernel_tests.cpp
+++ b/opencl/test/unit_test/kernel/kernel_tests.cpp
@@ -1675,7 +1675,7 @@ HWTEST_F(KernelResidencyTest, givenKernelWhenMakeResidentIsCalledThenExportedFun
 
     // check getResidency as well
     std::vector<NEO::Surface *> residencySurfaces;
-    pKernel->getResidency(residencySurfaces);
+    pKernel->getResidency(residencySurfaces, rootDeviceIndex);
     std::unique_ptr<NEO::ExecutionEnvironment> mockCsrExecEnv;
     {
         CommandStreamReceiverMock csrMock;
@@ -1711,7 +1711,7 @@ HWTEST_F(KernelResidencyTest, givenKernelWhenMakeResidentIsCalledThenGlobalBuffe
     EXPECT_TRUE(commandStreamReceiver.isMadeResident(program.buildInfos[pDevice->getRootDeviceIndex()].globalSurface));
 
     std::vector<NEO::Surface *> residencySurfaces;
-    pKernel->getResidency(residencySurfaces);
+    pKernel->getResidency(residencySurfaces, rootDeviceIndex);
     std::unique_ptr<NEO::ExecutionEnvironment> mockCsrExecEnv;
     {
         CommandStreamReceiverMock csrMock;
@@ -3059,6 +3059,7 @@ TEST(KernelTest, GivenDifferentValuesWhenSetKernelExecutionTypeIsCalledThenCorre
 
 TEST(KernelTest, givenKernelLocalIdGenerationByRuntimeFalseWhenGettingStartOffsetThenOffsetToSkipPerThreadDataLoadIsAdded) {
     auto device = std::make_unique<MockClDevice>(MockDevice::createWithNewExecutionEnvironment<MockDevice>(defaultHwInfo.get()));
+    auto rootDeviceIndex = device->getRootDeviceIndex();
 
     MockKernelWithInternals mockKernel(*device);
     SPatchThreadPayload threadPayload = {};
@@ -3070,13 +3071,14 @@ TEST(KernelTest, givenKernelLocalIdGenerationByRuntimeFalseWhenGettingStartOffse
     auto allocationOffset = mockKernel.kernelInfo.getGraphicsAllocation()->getGpuAddressToPatch();
 
     mockKernel.mockKernel->setStartOffset(128);
-    auto offset = mockKernel.mockKernel->getKernelStartOffset(false, true, false);
+    auto offset = mockKernel.mockKernel->getKernelStartOffset(false, true, false, rootDeviceIndex);
     EXPECT_EQ(allocationOffset + 256u, offset);
     device->getMemoryManager()->freeGraphicsMemory(mockKernel.kernelInfo.getGraphicsAllocation());
 }
 
 TEST(KernelTest, givenKernelLocalIdGenerationByRuntimeTrueAndLocalIdsUsedWhenGettingStartOffsetThenOffsetToSkipPerThreadDataLoadIsNotAdded) {
     auto device = std::make_unique<MockClDevice>(MockDevice::createWithNewExecutionEnvironment<MockDevice>(defaultHwInfo.get()));
+    auto rootDeviceIndex = device->getRootDeviceIndex();
 
     MockKernelWithInternals mockKernel(*device);
     SPatchThreadPayload threadPayload = {};
@@ -3088,13 +3090,14 @@ TEST(KernelTest, givenKernelLocalIdGenerationByRuntimeTrueAndLocalIdsUsedWhenGet
     auto allocationOffset = mockKernel.kernelInfo.getGraphicsAllocation()->getGpuAddressToPatch();
 
     mockKernel.mockKernel->setStartOffset(128);
-    auto offset = mockKernel.mockKernel->getKernelStartOffset(true, true, false);
+    auto offset = mockKernel.mockKernel->getKernelStartOffset(true, true, false, rootDeviceIndex);
     EXPECT_EQ(allocationOffset + 128u, offset);
     device->getMemoryManager()->freeGraphicsMemory(mockKernel.kernelInfo.getGraphicsAllocation());
 }
 
 TEST(KernelTest, givenKernelLocalIdGenerationByRuntimeFalseAndLocalIdsNotUsedWhenGettingStartOffsetThenOffsetToSkipPerThreadDataLoadIsNotAdded) {
     auto device = std::make_unique<MockClDevice>(MockDevice::createWithNewExecutionEnvironment<MockDevice>(defaultHwInfo.get()));
+    auto rootDeviceIndex = device->getRootDeviceIndex();
 
     MockKernelWithInternals mockKernel(*device);
     SPatchThreadPayload threadPayload = {};
@@ -3106,7 +3109,7 @@ TEST(KernelTest, givenKernelLocalIdGenerationByRuntimeFalseAndLocalIdsNotUsedWhe
     auto allocationOffset = mockKernel.kernelInfo.getGraphicsAllocation()->getGpuAddressToPatch();
 
     mockKernel.mockKernel->setStartOffset(128);
-    auto offset = mockKernel.mockKernel->getKernelStartOffset(false, false, false);
+    auto offset = mockKernel.mockKernel->getKernelStartOffset(false, false, false, rootDeviceIndex);
     EXPECT_EQ(allocationOffset + 128u, offset);
     device->getMemoryManager()->freeGraphicsMemory(mockKernel.kernelInfo.getGraphicsAllocation());
 }
diff --git a/opencl/test/unit_test/mem_obj/buffer_set_arg_tests.cpp b/opencl/test/unit_test/mem_obj/buffer_set_arg_tests.cpp
index 39b01c90eb..b5b4ca0a0a 100644
--- a/opencl/test/unit_test/mem_obj/buffer_set_arg_tests.cpp
+++ b/opencl/test/unit_test/mem_obj/buffer_set_arg_tests.cpp
@@ -281,7 +281,7 @@ TEST_F(BufferSetArgTest, WhenSettingKernelArgThenAddressToPatchIsSetCorrectlyAnd
     EXPECT_EQ(reinterpret_cast<void *>(buffer->getGraphicsAllocation(pClDevice->getRootDeviceIndex())->getGpuAddressToPatch()), *pKernelArg);
 
     std::vector<Surface *> surfaces;
-    pKernel->getResidency(surfaces);
+    pKernel->getResidency(surfaces, rootDeviceIndex);
     EXPECT_EQ(1u, surfaces.size());
 
     for (auto &surface : surfaces) {
@@ -311,7 +311,7 @@ TEST_F(BufferSetArgTest, GivenSvmPointerWhenSettingKernelArgThenAddressToPatchIs
     EXPECT_EQ(ptrSVM, *pKernelArg);
 
     std::vector<Surface *> surfaces;
-    pKernel->getResidency(surfaces);
+    pKernel->getResidency(surfaces, rootDeviceIndex);
     EXPECT_EQ(1u, surfaces.size());
     for (auto &surface : surfaces) {
         delete surface;
diff --git a/opencl/test/unit_test/mem_obj/image_set_arg_tests.cpp b/opencl/test/unit_test/mem_obj/image_set_arg_tests.cpp
index 72f980b23d..f4ec5e95a3 100644
--- a/opencl/test/unit_test/mem_obj/image_set_arg_tests.cpp
+++ b/opencl/test/unit_test/mem_obj/image_set_arg_tests.cpp
@@ -129,7 +129,7 @@ HWTEST_F(ImageSetArgTest, WhenSettingKernelArgImageThenSurfaceBaseAddressIsSetCo
     EXPECT_EQ(srcAllocation->getGpuAddress(), surfaceAddress);
 
     std::vector<Surface *> surfaces;
-    pKernel->getResidency(surfaces);
+    pKernel->getResidency(surfaces, rootDeviceIndex);
     EXPECT_EQ(0u, surfaces.size());
 }
 
@@ -339,7 +339,7 @@ HWTEST_F(ImageSetArgTest, givenOffsetedBufferWhenSetKernelArgImageIscalledThenFu
     EXPECT_EQ(srcAllocation->getGpuAddress(), surfaceAddress);
 
     std::vector<Surface *> surfaces;
-    pKernel->getResidency(surfaces);
+    pKernel->getResidency(surfaces, rootDeviceIndex);
     EXPECT_EQ(0u, surfaces.size());
 }
 
@@ -382,7 +382,7 @@ HWTEST_F(ImageSetArgTest, WhenSettingKernelArgThenPropertiesAreSetCorrectly) {
     EXPECT_EQ(0u, surfaceState->getCoherencyType());
 
     std::vector<Surface *> surfaces;
-    pKernel->getResidency(surfaces);
+    pKernel->getResidency(surfaces, rootDeviceIndex);
     EXPECT_EQ(1u, surfaces.size());
 
     for (auto &surface : surfaces) {
@@ -454,7 +454,7 @@ HWTEST_F(ImageSetArgTest, Given2dArrayWhenSettingKernelArgThenPropertiesAreSetCo
     EXPECT_EQ(RENDER_SURFACE_STATE::SHADER_CHANNEL_SELECT_ALPHA, surfaceState->getShaderChannelSelectAlpha());
 
     std::vector<Surface *> surfaces;
-    pKernel->getResidency(surfaces);
+    pKernel->getResidency(surfaces, rootDeviceIndex);
     EXPECT_EQ(1u, surfaces.size());
     for (auto &surface : surfaces) {
         delete surface;
@@ -502,7 +502,7 @@ HWTEST_F(ImageSetArgTest, Given1dArrayWhenSettingKernelArgThenPropertiesAreSetCo
     EXPECT_EQ(RENDER_SURFACE_STATE::SHADER_CHANNEL_SELECT_ALPHA, surfaceState->getShaderChannelSelectAlpha());
 
     std::vector<Surface *> surfaces;
-    pKernel->getResidency(surfaces);
+    pKernel->getResidency(surfaces, rootDeviceIndex);
     EXPECT_EQ(1u, surfaces.size());
     for (auto &surface : surfaces) {
         delete surface;
@@ -846,7 +846,7 @@ HWTEST_F(ImageSetArgTest, GivenImageWithClLuminanceFormatWhenSettingKernelArgThe
     EXPECT_EQ(RENDER_SURFACE_STATE::SHADER_CHANNEL_SELECT_ALPHA, surfaceState->getShaderChannelSelectAlpha());
 
     std::vector<Surface *> surfaces;
-    pKernel->getResidency(surfaces);
+    pKernel->getResidency(surfaces, rootDeviceIndex);
     EXPECT_EQ(1u, surfaces.size());
     for (auto &surface : surfaces) {
         delete surface;
@@ -866,7 +866,7 @@ HWTEST_F(ImageSetArgTest, WhenSettingArgThenImageIsReturned) {
     EXPECT_EQ(memObj, pKernel->getKernelArg(0));
 
     std::vector<Surface *> surfaces;
-    pKernel->getResidency(surfaces);
+    pKernel->getResidency(surfaces, rootDeviceIndex);
     EXPECT_EQ(1u, surfaces.size());
 
     for (auto &surface : surfaces) {
@@ -990,7 +990,7 @@ HWTEST_F(ImageMediaBlockSetArgTest, WhenSettingKernelArgImageThenPropertiesAreCo
     EXPECT_EQ(imageMocs, surfaceState->getMemoryObjectControlState());
 
     std::vector<Surface *> surfaces;
-    pKernel->getResidency(surfaces);
+    pKernel->getResidency(surfaces, rootDeviceIndex);
     EXPECT_EQ(1u, surfaces.size());
 
     for (auto &surface : surfaces) {
diff --git a/opencl/test/unit_test/mocks/mock_kernel.cpp b/opencl/test/unit_test/mocks/mock_kernel.cpp
index 91b3e2b4db..1185e04e26 100644
--- a/opencl/test/unit_test/mocks/mock_kernel.cpp
+++ b/opencl/test/unit_test/mocks/mock_kernel.cpp
@@ -49,9 +49,9 @@ void MockKernel::makeResident(CommandStreamReceiver &commandStreamReceiver) {
     Kernel::makeResident(commandStreamReceiver);
 }
 
-void MockKernel::getResidency(std::vector<Surface *> &dst) {
+void MockKernel::getResidency(std::vector<Surface *> &dst, uint32_t rootDeviceIndex) {
     getResidencyCalls++;
-    Kernel::getResidency(dst);
+    Kernel::getResidency(dst, rootDeviceIndex);
 }
 bool MockKernel::requiresCacheFlushCommand(const CommandQueue &commandQueue) const {
     if (DebugManager.flags.EnableCacheFlushAfterWalker.get() != -1) {
diff --git a/opencl/test/unit_test/mocks/mock_kernel.h b/opencl/test/unit_test/mocks/mock_kernel.h
index d21e4dbef8..300fecd3ee 100644
--- a/opencl/test/unit_test/mocks/mock_kernel.h
+++ b/opencl/test/unit_test/mocks/mock_kernel.h
@@ -223,7 +223,7 @@ class MockKernel : public Kernel {
     void setUsingSharedArgs(bool usingSharedArgValue) { this->usingSharedObjArgs = usingSharedArgValue; }
 
     void makeResident(CommandStreamReceiver &commandStreamReceiver) override;
-    void getResidency(std::vector<Surface *> &dst) override;
+    void getResidency(std::vector<Surface *> &dst, uint32_t rootDeviceIndex) override;
     void takeOwnership() const override {
         Kernel::takeOwnership();
         takeOwnershipCalls++;
diff --git a/opencl/test/unit_test/program/program_tests.cpp b/opencl/test/unit_test/program/program_tests.cpp
index 41cb6020e3..fa673ad383 100644
--- a/opencl/test/unit_test/program/program_tests.cpp
+++ b/opencl/test/unit_test/program/program_tests.cpp
@@ -1355,7 +1355,7 @@ HWTEST_F(PatchTokenTests, givenKernelRequiringConstantAllocationWhenMakeResident
     EXPECT_EQ(0u, pCommandStreamReceiver->residency.size());
 
     std::vector<Surface *> surfaces;
-    pKernel->getResidency(surfaces);
+    pKernel->getResidency(surfaces, rootDeviceIndex);
     EXPECT_EQ(2u, surfaces.size());
 
     for (Surface *surface : surfaces) {
diff --git a/opencl/test/unit_test/sampler/sampler_set_arg_tests.cpp b/opencl/test/unit_test/sampler/sampler_set_arg_tests.cpp
index 4c44fa434a..f613d09dac 100644
--- a/opencl/test/unit_test/sampler/sampler_set_arg_tests.cpp
+++ b/opencl/test/unit_test/sampler/sampler_set_arg_tests.cpp
@@ -133,7 +133,7 @@ HWTEST_F(SamplerSetArgTest, WhenSettingKernelArgSamplerThenSamplerStatesAreCorre
     EXPECT_EQ(SAMPLER_STATE::MIP_MODE_FILTER_NEAREST, samplerState->getMipModeFilter());
 
     std::vector<Surface *> surfaces;
-    pKernel->getResidency(surfaces);
+    pKernel->getResidency(surfaces, rootDeviceIndex);
     EXPECT_EQ(0u, surfaces.size());
 }