diff --git a/runtime/command_queue/enqueue_common.h b/runtime/command_queue/enqueue_common.h
index 8bb49475e1..ccc71a77ed 100644
--- a/runtime/command_queue/enqueue_common.h
+++ b/runtime/command_queue/enqueue_common.h
@@ -489,10 +489,16 @@ CompletionStamp CommandQueueHw<GfxFamily>::enqueueNonBlocked(
     }
 
     auto mediaSamplerRequired = false;
+    Kernel *kernel = nullptr;
     for (auto &dispatchInfo : multiDispatchInfo) {
-        dispatchInfo.getKernel()->makeResident(commandStreamReceiver);
-        requiresCoherency |= dispatchInfo.getKernel()->requiresCoherency();
-        mediaSamplerRequired |= dispatchInfo.getKernel()->isVmeKernel();
+        if (kernel != dispatchInfo.getKernel()) {
+            kernel = dispatchInfo.getKernel();
+        } else {
+            continue;
+        }
+        kernel->makeResident(commandStreamReceiver);
+        requiresCoherency |= kernel->requiresCoherency();
+        mediaSamplerRequired |= kernel->isVmeKernel();
     }
 
     if (mediaSamplerRequired) {
@@ -617,11 +623,17 @@ void CommandQueueHw<GfxFamily>::enqueueBlocked(
     } else {
         //store task data in event
         std::vector<Surface *> allSurfaces;
+        Kernel *kernel = nullptr;
         for (auto &dispatchInfo : multiDispatchInfo) {
-            dispatchInfo.getKernel()->getResidency(allSurfaces);
-            for (auto &surface : CreateRange(surfaces, surfaceCount)) {
-                allSurfaces.push_back(surface->duplicate());
+            if (kernel != dispatchInfo.getKernel()) {
+                kernel = dispatchInfo.getKernel();
+            } else {
+                continue;
             }
+            kernel->getResidency(allSurfaces);
+        }
+        for (auto &surface : CreateRange(surfaces, surfaceCount)) {
+            allSurfaces.push_back(surface->duplicate());
         }
 
         auto kernelOperation = std::unique_ptr<KernelOperation>(blockedCommandsData); // marking ownership
diff --git a/runtime/kernel/kernel.h b/runtime/kernel/kernel.h
index 114dbf776a..175ed00635 100644
--- a/runtime/kernel/kernel.h
+++ b/runtime/kernel/kernel.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017, Intel Corporation
+ * Copyright (c) 2017 - 2018, Intel Corporation
  *
  * Permission is hereby granted, free of charge, to any person obtaining a
  * copy of this software and associated documentation files (the "Software"),
@@ -274,9 +274,9 @@ class Kernel : public BaseObject<_cl_kernel> {
     bool isVmeKernel() { return kernelInfo.isVmeWorkload; };
 
     //residency for kernel surfaces
-    void makeResident(CommandStreamReceiver &commandStreamReceiver);
+    MOCKABLE_VIRTUAL void makeResident(CommandStreamReceiver &commandStreamReceiver);
     void updateWithCompletionStamp(CommandStreamReceiver &commandStreamReceiver, CompletionStamp *completionStamp);
-    void getResidency(std::vector<Surface *> &dst);
+    MOCKABLE_VIRTUAL void getResidency(std::vector<Surface *> &dst);
     bool requiresCoherency();
     void resetSharedObjectsPatchAddresses();
     bool isUsingSharedObjArgs() { return usingSharedObjArgs; }
diff --git a/unit_tests/command_queue/command_queue_hw_tests.cpp b/unit_tests/command_queue/command_queue_hw_tests.cpp
index 83dc00840f..7ea5282b3d 100644
--- a/unit_tests/command_queue/command_queue_hw_tests.cpp
+++ b/unit_tests/command_queue/command_queue_hw_tests.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017, Intel Corporation
+ * Copyright (c) 2017 - 2018, Intel Corporation
  *
  * Permission is hereby granted, free of charge, to any person obtaining a
  * copy of this software and associated documentation files (the "Software"),
@@ -30,6 +30,7 @@
 #include "runtime/memory_manager/memory_manager.h"
 #include "runtime/memory_manager/surface.h"
 #include "unit_tests/command_queue/command_queue_fixture.h"
+#include "unit_tests/fixtures/buffer_fixture.h"
 #include "unit_tests/fixtures/context_fixture.h"
 #include "unit_tests/fixtures/device_fixture.h"
 #include "unit_tests/fixtures/memory_management_fixture.h"
@@ -990,3 +991,85 @@ HWTEST_F(OOQueueHwTest, givenBlockedOutOfOrderCmdQueueAndAsynchronouslyCompleted
     EXPECT_EQ(virtualEventTaskLevel + 1, cmdQHw->taskLevel);
     EXPECT_EQ(virtualEventTaskLevel + 1, mockCSR->lastTaskLevelToFlushTask);
 }
+
+HWTEST_F(CommandQueueHwTest, givenWalkerSplitEnqueueNDRangeWhenNoBlockedThenKernelMakeResidentCalledOnce) {
+    KernelInfo kernelInfo;
+    MockKernelWithInternals mockKernelWithInternals(*pDevice);
+    auto mockKernel = mockKernelWithInternals.mockKernel;
+    auto mockProgram = mockKernelWithInternals.mockProgram;
+    mockProgram->setAllowNonUniform(true);
+    auto &csr = pDevice->getUltCommandStreamReceiver<FamilyType>();
+    csr.storeMakeResidentAllocations = true;
+
+    size_t offset = 0;
+    size_t gws = 63;
+    size_t lws = 16;
+
+    cl_int status = pCmdQ->enqueueKernel(mockKernel, 1, &offset, &gws, &lws, 0, nullptr, nullptr);
+    EXPECT_EQ(CL_SUCCESS, status);
+    EXPECT_EQ(1u, mockKernel->makeResidentCalls);
+
+    std::map<GraphicsAllocation *, uint32_t>::iterator it = csr.makeResidentAllocations.begin();
+    for (; it != csr.makeResidentAllocations.end(); it++) {
+        EXPECT_EQ(1u, it->second);
+    }
+}
+
+HWTEST_F(CommandQueueHwTest, givenWalkerSplitEnqueueNDRangeWhenBlockedThenKernelGetResidencyCalledOnce) {
+    UserEvent userEvent(context);
+    KernelInfo kernelInfo;
+    MockKernelWithInternals mockKernelWithInternals(*pDevice);
+    auto mockKernel = mockKernelWithInternals.mockKernel;
+    auto mockProgram = mockKernelWithInternals.mockProgram;
+    mockProgram->setAllowNonUniform(true);
+    auto &csr = pDevice->getUltCommandStreamReceiver<FamilyType>();
+    csr.storeMakeResidentAllocations = true;
+
+    size_t offset = 0;
+    size_t gws = 63;
+    size_t lws = 16;
+
+    cl_event blockedEvent = &userEvent;
+
+    cl_int status = pCmdQ->enqueueKernel(mockKernel, 1, &offset, &gws, &lws, 1, &blockedEvent, nullptr);
+    EXPECT_EQ(CL_SUCCESS, status);
+    EXPECT_EQ(1u, mockKernel->getResidencyCalls);
+
+    userEvent.setStatus(CL_COMPLETE);
+
+    std::map<GraphicsAllocation *, uint32_t>::iterator it = csr.makeResidentAllocations.begin();
+    for (; it != csr.makeResidentAllocations.end(); it++) {
+        EXPECT_EQ(1u, it->second);
+    }
+}
+
+HWTEST_F(CommandQueueHwTest, givenKernelSplitEnqueueReadBufferWhenBlockedThenEnqueueSurfacesMakeResidentIsCalledOnce) {
+    UserEvent userEvent(context);
+    auto &csr = pDevice->getUltCommandStreamReceiver<FamilyType>();
+    csr.storeMakeResidentAllocations = true;
+
+    BufferDefaults::context = context;
+    std::unique_ptr<Buffer> buffer(BufferHelper<>::create());
+    GraphicsAllocation *bufferAllocation = buffer->getGraphicsAllocation();
+    char array[3 * MemoryConstants::cacheLineSize];
+    char *ptr = &array[MemoryConstants::cacheLineSize];
+    ptr = alignUp(ptr, MemoryConstants::cacheLineSize);
+    ptr -= 1;
+
+    cl_event blockedEvent = &userEvent;
+
+    cl_int status = pCmdQ->enqueueReadBuffer(buffer.get(), CL_FALSE, 0, MemoryConstants::cacheLineSize, ptr, 1, &blockedEvent, nullptr);
+    EXPECT_EQ(CL_SUCCESS, status);
+
+    userEvent.setStatus(CL_COMPLETE);
+
+    std::map<GraphicsAllocation *, uint32_t>::iterator it = csr.makeResidentAllocations.begin();
+    for (; it != csr.makeResidentAllocations.end(); it++) {
+        uint32_t expected = 1u;
+        //Buffer surface will be added three times (for each kernel from split and as a base range of enqueueReadBuffer call)
+        if (it->first == bufferAllocation) {
+            expected = 3u;
+        }
+        EXPECT_EQ(expected, it->second);
+    }
+}
diff --git a/unit_tests/libult/ult_command_stream_receiver.h b/unit_tests/libult/ult_command_stream_receiver.h
index e76ec7b9c7..691610c6ad 100644
--- a/unit_tests/libult/ult_command_stream_receiver.h
+++ b/unit_tests/libult/ult_command_stream_receiver.h
@@ -43,14 +43,14 @@ class UltCommandStreamReceiver : public CommandStreamReceiverHw<GfxFamily> {
     using BaseClass::CommandStreamReceiver::dispatchMode;
     using BaseClass::CommandStreamReceiver::flushStamp;
     using BaseClass::CommandStreamReceiver::isPreambleSent;
+    using BaseClass::CommandStreamReceiver::lastMediaSamplerConfig;
+    using BaseClass::CommandStreamReceiver::lastPreemptionMode;
     using BaseClass::CommandStreamReceiver::lastSentCoherencyRequest;
     using BaseClass::CommandStreamReceiver::lastSentL3Config;
     using BaseClass::CommandStreamReceiver::lastSentThreadAribtrationPolicy;
+    using BaseClass::CommandStreamReceiver::lastVmeSubslicesConfig;
     using BaseClass::CommandStreamReceiver::latestFlushedTaskCount;
     using BaseClass::CommandStreamReceiver::latestSentStatelessMocsConfig;
-    using BaseClass::CommandStreamReceiver::lastMediaSamplerConfig;
-    using BaseClass::CommandStreamReceiver::lastPreemptionMode;
-    using BaseClass::CommandStreamReceiver::lastVmeSubslicesConfig;
     using BaseClass::CommandStreamReceiver::taskCount;
     using BaseClass::CommandStreamReceiver::taskLevel;
 
@@ -65,6 +65,7 @@ class UltCommandStreamReceiver : public CommandStreamReceiverHw<GfxFamily> {
         tempTagLocation = new GraphicsAllocation(nullptr, 0);
         this->tagAllocation = tempTagLocation;
         this->tagAddress = reinterpret_cast<uint32_t *>(tempTagLocation->getUnderlyingBuffer());
+        this->storeMakeResidentAllocations = false;
     }
 
     virtual MemoryManager *createMemoryManager(bool enable64kbPages) override {
@@ -82,6 +83,23 @@ class UltCommandStreamReceiver : public CommandStreamReceiverHw<GfxFamily> {
     using SamplerCacheFlushState = CommandStreamReceiver::SamplerCacheFlushState;
     SamplerCacheFlushState peekSamplerCacheFlushRequired() const { return this->samplerCacheFlushRequired; }
 
+    void makeResident(GraphicsAllocation &gfxAllocation) override {
+        if (storeMakeResidentAllocations) {
+            std::map<GraphicsAllocation *, uint32_t>::iterator it = makeResidentAllocations.find(&gfxAllocation);
+            if (it == makeResidentAllocations.end()) {
+                std::pair<std::map<GraphicsAllocation *, uint32_t>::iterator, bool> result;
+                result = makeResidentAllocations.insert(std::pair<GraphicsAllocation *, uint32_t>(&gfxAllocation, 1));
+                DEBUG_BREAK_IF(!result.second);
+            } else {
+                makeResidentAllocations[&gfxAllocation]++;
+            }
+        }
+        BaseClass::makeResident(gfxAllocation);
+    }
+
+    std::map<GraphicsAllocation *, uint32_t> makeResidentAllocations;
+    bool storeMakeResidentAllocations;
+
   protected:
     using BaseClass::CommandStreamReceiver::memoryManager;
     using BaseClass::CommandStreamReceiver::tagAddress;
diff --git a/unit_tests/mocks/mock_kernel.cpp b/unit_tests/mocks/mock_kernel.cpp
index 9efd2650e2..198c927bc9 100644
--- a/unit_tests/mocks/mock_kernel.cpp
+++ b/unit_tests/mocks/mock_kernel.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017, Intel Corporation
+ * Copyright (c) 2017 - 2018, Intel Corporation
  *
  * Permission is hereby granted, free of charge, to any person obtaining a
  * copy of this software and associated documentation files (the "Software"),
@@ -53,4 +53,14 @@ bool MockKernel::isPatched() const {
     return true;
 }
 
+void MockKernel::makeResident(CommandStreamReceiver &commandStreamReceiver) {
+    makeResidentCalls++;
+    Kernel::makeResident(commandStreamReceiver);
+}
+
+void MockKernel::getResidency(std::vector<Surface *> &dst) {
+    getResidencyCalls++;
+    Kernel::getResidency(dst);
+}
+
 } // namespace OCLRT
diff --git a/unit_tests/mocks/mock_kernel.h b/unit_tests/mocks/mock_kernel.h
index 709b3639af..085eba0546 100644
--- a/unit_tests/mocks/mock_kernel.h
+++ b/unit_tests/mocks/mock_kernel.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017, Intel Corporation
+ * Copyright (c) 2017 - 2018, Intel Corporation
  *
  * Permission is hereby granted, free of charge, to any person obtaining a
  * copy of this software and associated documentation files (the "Software"),
@@ -182,6 +182,12 @@ class MockKernel : public Kernel {
     using Kernel::kernelArgHandlers;
 
     void setUsingSharedArgs(bool usingSharedArgValue) { this->usingSharedObjArgs = usingSharedArgValue; }
+
+    void makeResident(CommandStreamReceiver &commandStreamReceiver) override;
+    void getResidency(std::vector<Surface *> &dst) override;
+
+    uint32_t makeResidentCalls = 0;
+    uint32_t getResidencyCalls = 0;
 };
 
 //class below have enough internals to service Enqueue operation.
diff --git a/unit_tests/mocks/mock_program.h b/unit_tests/mocks/mock_program.h
index 919d7f6605..e0a8d6b013 100644
--- a/unit_tests/mocks/mock_program.h
+++ b/unit_tests/mocks/mock_program.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017, Intel Corporation
+ * Copyright (c) 2017 - 2018, Intel Corporation
  *
  * Permission is hereby granted, free of charge, to any person obtaining a
  * copy of this software and associated documentation files (the "Software"),
@@ -99,6 +99,9 @@ class MockProgram : public Program {
     void SetLLVMBinarySize(size_t bsz) { llvmBinarySize = bsz; }
 
     uint64_t getHash();
+    void setAllowNonUniform(bool allow) {
+        allowNonUniform = allow;
+    }
 
     bool contextSet = false;
 };