diff --git a/Jenkinsfile b/Jenkinsfile
index 4a9b01ba98..6ba6916a11 100644
--- a/Jenkinsfile
+++ b/Jenkinsfile
@@ -1,5 +1,5 @@
 #!groovy
 dependenciesRevision='4423584cd5bfda14bb324142159b5affb5461d26-1165'
 strategy='EQUAL'
-allowedCD=272
+allowedCD=273
 allowedF=4
diff --git a/runtime/api/api.cpp b/runtime/api/api.cpp
index 4684bf193d..5e88048171 100644
--- a/runtime/api/api.cpp
+++ b/runtime/api/api.cpp
@@ -1797,7 +1797,7 @@ cl_int CL_API_CALL clSetUserEventStatus(cl_event event,
         return retVal;
     }
 
-    auto commandStreamReceiverOwnership = userEvent->getContext()->getDevice(0)->getEngine(0).commandStreamReceiver->obtainUniqueOwnership();
+    auto commandStreamReceiverOwnership = userEvent->getContext()->getDevice(0)->getDefaultEngine().commandStreamReceiver->obtainUniqueOwnership();
     userEvent->setStatus(executionStatus);
     return retVal;
 }
diff --git a/runtime/device/device.cpp b/runtime/device/device.cpp
index c1b083b56e..aec3e0b247 100644
--- a/runtime/device/device.cpp
+++ b/runtime/device/device.cpp
@@ -106,70 +106,90 @@ Device::~Device() {
 }
 
 bool Device::createDeviceImpl(const HardwareInfo *pHwInfo, Device &outDevice) {
-    uint32_t deviceCsrIndex = 0;
     auto executionEnvironment = outDevice.executionEnvironment;
     executionEnvironment->initGmm(pHwInfo);
-    if (!executionEnvironment->initializeCommandStreamReceiver(pHwInfo, outDevice.getDeviceIndex(), deviceCsrIndex)) {
-        return false;
-    }
-    executionEnvironment->initializeMemoryManager(outDevice.getEnabled64kbPages(), outDevice.getEnableLocalMemory(),
-                                                  outDevice.getDeviceIndex(), deviceCsrIndex);
 
-    auto osContext = executionEnvironment->memoryManager->createAndRegisterOsContext({getChosenEngineType(*pHwInfo), 0});
-    auto commandStreamReceiver = executionEnvironment->commandStreamReceivers[outDevice.getDeviceIndex()][deviceCsrIndex].get();
-    commandStreamReceiver->setOsContext(*osContext);
-    if (!commandStreamReceiver->initializeTagAllocation()) {
+    if (!createEngines(pHwInfo, outDevice)) {
         return false;
     }
 
-    outDevice.engines[0] = {commandStreamReceiver, osContext};
+    executionEnvironment->memoryManager->setDefaultEngineIndex(outDevice.defaultEngineIndex);
 
-    auto pDevice = &outDevice;
-    if (!pDevice->osTime) {
-        pDevice->osTime = OSTime::create(commandStreamReceiver->getOSInterface());
+    auto osInterface = executionEnvironment->osInterface.get();
+
+    if (!outDevice.osTime) {
+        outDevice.osTime = OSTime::create(osInterface);
     }
-    pDevice->driverInfo.reset(DriverInfo::create(commandStreamReceiver->getOSInterface()));
+    outDevice.driverInfo.reset(DriverInfo::create(osInterface));
 
-    pDevice->initializeCaps();
+    outDevice.initializeCaps();
 
-    if (pDevice->osTime->getOSInterface()) {
+    if (outDevice.osTime->getOSInterface()) {
         if (pHwInfo->capabilityTable.instrumentationEnabled) {
-            pDevice->performanceCounters = createPerformanceCountersFunc(pDevice->osTime.get());
-            pDevice->performanceCounters->initialize(pHwInfo);
+            outDevice.performanceCounters = createPerformanceCountersFunc(outDevice.osTime.get());
+            outDevice.performanceCounters->initialize(pHwInfo);
         }
     }
 
     uint32_t deviceHandle = 0;
-    if (commandStreamReceiver->getOSInterface()) {
-        deviceHandle = commandStreamReceiver->getOSInterface()->getDeviceHandle();
+    if (osInterface) {
+        deviceHandle = osInterface->getDeviceHandle();
     }
 
-    if (pDevice->deviceInfo.sourceLevelDebuggerActive) {
-        pDevice->executionEnvironment->sourceLevelDebugger->notifyNewDevice(deviceHandle);
+    if (outDevice.deviceInfo.sourceLevelDebuggerActive) {
+        outDevice.executionEnvironment->sourceLevelDebugger->notifyNewDevice(deviceHandle);
     }
 
-    outDevice.executionEnvironment->memoryManager->setForce32BitAllocations(pDevice->getDeviceInfo().force32BitAddressess);
-    outDevice.executionEnvironment->memoryManager->setDefaultEngineIndex(deviceCsrIndex);
+    outDevice.executionEnvironment->memoryManager->setForce32BitAllocations(outDevice.getDeviceInfo().force32BitAddressess);
 
-    if (pDevice->preemptionMode == PreemptionMode::MidThread || pDevice->isSourceLevelDebuggerActive()) {
+    if (outDevice.preemptionMode == PreemptionMode::MidThread || outDevice.isSourceLevelDebuggerActive()) {
         size_t requiredSize = pHwInfo->capabilityTable.requiredPreemptionSurfaceSize;
         size_t alignment = 256 * MemoryConstants::kiloByte;
-        bool uncacheable = pDevice->getWaTable()->waCSRUncachable;
-        pDevice->preemptionAllocation = outDevice.executionEnvironment->memoryManager->allocateGraphicsMemory(requiredSize, alignment, false, uncacheable);
-        if (!pDevice->preemptionAllocation) {
+        bool uncacheable = outDevice.getWaTable()->waCSRUncachable;
+        outDevice.preemptionAllocation = outDevice.executionEnvironment->memoryManager->allocateGraphicsMemory(requiredSize, alignment, false, uncacheable);
+        if (!outDevice.preemptionAllocation) {
             return false;
         }
-        commandStreamReceiver->setPreemptionCsrAllocation(pDevice->preemptionAllocation);
     }
 
-    if (DebugManager.flags.EnableExperimentalCommandBuffer.get() > 0) {
-        commandStreamReceiver->setExperimentalCmdBuffer(std::unique_ptr<ExperimentalCommandBuffer>(
-            new ExperimentalCommandBuffer(commandStreamReceiver, pDevice->getDeviceInfo().profilingTimerResolution)));
+    for (auto engine : outDevice.engines) {
+        auto csr = engine.commandStreamReceiver;
+        csr->setPreemptionCsrAllocation(outDevice.preemptionAllocation);
+        if (DebugManager.flags.EnableExperimentalCommandBuffer.get() > 0) {
+            csr->setExperimentalCmdBuffer(std::make_unique<ExperimentalCommandBuffer>(csr, outDevice.getDeviceInfo().profilingTimerResolution));
+        }
     }
 
     return true;
 }
 
+bool Device::createEngines(const HardwareInfo *pHwInfo, Device &outDevice) {
+    auto executionEnvironment = outDevice.executionEnvironment;
+    EngineType defaultEngineType = getChosenEngineType(*pHwInfo);
+
+    for (uint32_t deviceCsrIndex = 0; deviceCsrIndex < gpgpuEngineInstances.size(); deviceCsrIndex++) {
+        if (!executionEnvironment->initializeCommandStreamReceiver(pHwInfo, outDevice.getDeviceIndex(), deviceCsrIndex)) {
+            return false;
+        }
+        executionEnvironment->initializeMemoryManager(outDevice.getEnabled64kbPages(), outDevice.getEnableLocalMemory(),
+                                                      outDevice.getDeviceIndex(), deviceCsrIndex);
+
+        auto osContext = executionEnvironment->memoryManager->createAndRegisterOsContext(gpgpuEngineInstances[deviceCsrIndex]);
+        auto commandStreamReceiver = executionEnvironment->commandStreamReceivers[outDevice.getDeviceIndex()][deviceCsrIndex].get();
+        commandStreamReceiver->setOsContext(*osContext);
+        if (!commandStreamReceiver->initializeTagAllocation()) {
+            return false;
+        }
+
+        if (gpgpuEngineInstances[deviceCsrIndex].type == defaultEngineType && gpgpuEngineInstances[deviceCsrIndex].id == 0) {
+            outDevice.defaultEngineIndex = deviceCsrIndex;
+        }
+
+        outDevice.engines[deviceCsrIndex] = {commandStreamReceiver, osContext};
+    }
+    return true;
+}
+
 const HardwareInfo *Device::getDeviceInitHwInfo(const HardwareInfo *pHwInfoIn) {
     return pHwInfoIn ? pHwInfoIn : platformDevices[0];
 }
diff --git a/runtime/device/device.h b/runtime/device/device.h
index 87becc8631..96a417dfa1 100644
--- a/runtime/device/device.h
+++ b/runtime/device/device.h
@@ -73,8 +73,6 @@ class Device : public BaseObject<_cl_device_id> {
     EngineControl &getEngine(uint32_t engineId);
     EngineControl &getDefaultEngine();
 
-    volatile uint32_t *getTagAddress() const;
-
     const char *getProductAbbrev() const;
     const std::string getFamilyNameWithType() const;
 
@@ -136,6 +134,7 @@ class Device : public BaseObject<_cl_device_id> {
     }
 
     static bool createDeviceImpl(const HardwareInfo *pHwInfo, Device &outDevice);
+    static bool createEngines(const HardwareInfo *pHwInfo, Device &outDevice);
     static const HardwareInfo *getDeviceInitHwInfo(const HardwareInfo *pHwInfoIn);
     MOCKABLE_VIRTUAL void initializeCaps();
     void setupFp64Flags();
@@ -177,7 +176,7 @@ inline EngineControl &Device::getEngine(uint32_t engineId) {
 }
 
 inline EngineControl &Device::getDefaultEngine() {
-    return engines[defaultEngineIndex];
+    return getEngine(defaultEngineIndex);
 }
 
 inline MemoryManager *Device::getMemoryManager() const {
diff --git a/runtime/event/user_event.cpp b/runtime/event/user_event.cpp
index d441706996..62f02fa603 100644
--- a/runtime/event/user_event.cpp
+++ b/runtime/event/user_event.cpp
@@ -35,7 +35,7 @@ uint32_t UserEvent::getTaskLevel() {
     uint32_t taskLevel = 0;
     if (ctx != nullptr) {
         Device *pDevice = ctx->getDevice(0);
-        auto csr = pDevice->getEngine(0).commandStreamReceiver;
+        auto csr = pDevice->getDefaultEngine().commandStreamReceiver;
         taskLevel = csr->peekTaskLevel();
     }
     return taskLevel;
diff --git a/runtime/memory_manager/deferrable_allocation_deletion.cpp b/runtime/memory_manager/deferrable_allocation_deletion.cpp
index 11d39fcf8c..5179dfd891 100644
--- a/runtime/memory_manager/deferrable_allocation_deletion.cpp
+++ b/runtime/memory_manager/deferrable_allocation_deletion.cpp
@@ -19,13 +19,11 @@ void DeferrableAllocationDeletion::apply() {
 
         for (auto &deviceCsrs : memoryManager.getCommandStreamReceivers()) {
             for (auto &csr : deviceCsrs) {
-                if (csr) {
-                    auto contextId = csr->getOsContext().getContextId();
-                    if (graphicsAllocation.isUsedByContext(contextId)) {
-                        auto currentContextTaskCount = *csr->getTagAddress();
-                        if (graphicsAllocation.getTaskCount(contextId) <= currentContextTaskCount) {
-                            graphicsAllocation.resetTaskCount(contextId);
-                        }
+                auto contextId = csr->getOsContext().getContextId();
+                if (graphicsAllocation.isUsedByContext(contextId)) {
+                    auto currentContextTaskCount = *csr->getTagAddress();
+                    if (graphicsAllocation.getTaskCount(contextId) <= currentContextTaskCount) {
+                        graphicsAllocation.resetTaskCount(contextId);
                     }
                 }
             }
diff --git a/runtime/memory_manager/memory_manager.cpp b/runtime/memory_manager/memory_manager.cpp
index 90246a5b1c..3e80cdab6d 100644
--- a/runtime/memory_manager/memory_manager.cpp
+++ b/runtime/memory_manager/memory_manager.cpp
@@ -137,7 +137,7 @@ void MemoryManager::freeGraphicsMemory(GraphicsAllocation *gfxAllocation) {
 //if not in use destroy in place
 //if in use pass to temporary allocation list that is cleaned on blocking calls
 void MemoryManager::checkGpuUsageAndDestroyGraphicsAllocations(GraphicsAllocation *gfxAllocation) {
-    if (!gfxAllocation->isUsed() || gfxAllocation->getTaskCount(0u) <= *getCommandStreamReceivers()[0][defaultEngineIndex]->getTagAddress()) {
+    if (!gfxAllocation->isUsed() || gfxAllocation->getTaskCount(defaultEngineIndex) <= *getCommandStreamReceivers()[0][defaultEngineIndex]->getTagAddress()) {
         freeGraphicsMemory(gfxAllocation);
     } else {
         getCommandStreamReceivers()[0][defaultEngineIndex]->getInternalAllocationStorage()->storeAllocation(std::unique_ptr<GraphicsAllocation>(gfxAllocation), TEMPORARY_ALLOCATION);
diff --git a/runtime/platform/platform.cpp b/runtime/platform/platform.cpp
index d1b7b39f32..f77d16674e 100644
--- a/runtime/platform/platform.cpp
+++ b/runtime/platform/platform.cpp
@@ -174,7 +174,7 @@ bool Platform::initialize() {
         initSipKernel(sipType, *devices[0]);
     }
 
-    CommandStreamReceiverType csrType = this->devices[0]->getEngine(0).commandStreamReceiver->getType();
+    CommandStreamReceiverType csrType = this->devices[0]->getDefaultEngine().commandStreamReceiver->getType();
     if (csrType != CommandStreamReceiverType::CSR_HW) {
         executionEnvironment->initAubCenter(&hwInfo[0], this->devices[0]->getEnableLocalMemory(), "aubfile");
     }
diff --git a/runtime/sharings/gl/gl_sync_event.cpp b/runtime/sharings/gl/gl_sync_event.cpp
index 4fb18a78d1..d1a735f248 100644
--- a/runtime/sharings/gl/gl_sync_event.cpp
+++ b/runtime/sharings/gl/gl_sync_event.cpp
@@ -53,7 +53,7 @@ void GlSyncEvent::updateExecutionStatus() {
 }
 
 uint32_t GlSyncEvent::getTaskLevel() {
-    auto csr = ctx->getDevice(0)->getEngine(0).commandStreamReceiver;
+    auto csr = ctx->getDevice(0)->getDefaultEngine().commandStreamReceiver;
     return csr->peekTaskLevel();
 }
 } // namespace OCLRT
diff --git a/unit_tests/aub_tests/fixtures/aub_fixture.h b/unit_tests/aub_tests/fixtures/aub_fixture.h
index 38837872d4..045e4ea9b2 100644
--- a/unit_tests/aub_tests/fixtures/aub_fixture.h
+++ b/unit_tests/aub_tests/fixtures/aub_fixture.h
@@ -44,8 +44,9 @@ class AUBFixture : public CommandQueueHwFixture {
         }
 
         executionEnvironment->commandStreamReceivers.resize(deviceIndex + 1);
-        executionEnvironment->commandStreamReceivers[deviceIndex][0].reset(this->csr);
+
         device.reset(MockDevice::create<MockDevice>(&hwInfo, executionEnvironment, deviceIndex));
+        device->resetCommandStreamReceiver(this->csr);
 
         CommandQueueHwFixture::SetUp(AUBFixture::device.get(), cl_command_queue_properties(0));
     }
diff --git a/unit_tests/command_queue/command_queue_tests.cpp b/unit_tests/command_queue/command_queue_tests.cpp
index 80da4ad97a..4b11a0611a 100644
--- a/unit_tests/command_queue/command_queue_tests.cpp
+++ b/unit_tests/command_queue/command_queue_tests.cpp
@@ -195,6 +195,14 @@ TEST(CommandQueue, GivenOOQwhenUpdateFromCompletionStampWithTrueIsCalledThenTask
     EXPECT_EQ(cs.flushStamp, cmdQ.flushStamp->peekStamp());
 }
 
+TEST(CommandQueue, givenDeviceWhenCreatingCommandQueueThenPickCsrFromDefaultEngine) {
+    std::unique_ptr<MockDevice> mockDevice(MockDevice::createWithNewExecutionEnvironment<MockDevice>(platformDevices[0]));
+    CommandQueue cmdQ(nullptr, mockDevice.get(), 0);
+
+    auto defaultCsr = mockDevice->getDefaultEngine().commandStreamReceiver;
+    EXPECT_EQ(defaultCsr, &cmdQ.getCommandStreamReceiver());
+}
+
 TEST(CommandQueue, givenCmdQueueBlockedByReadyVirtualEventWhenUnblockingThenUpdateFlushTaskFromEvent) {
     std::unique_ptr<MockDevice> mockDevice(MockDevice::createWithNewExecutionEnvironment<MockDevice>(nullptr));
     auto context = new MockContext;
diff --git a/unit_tests/command_queue/enqueue_handler_tests.cpp b/unit_tests/command_queue/enqueue_handler_tests.cpp
index 7ba5460247..8332a79c79 100644
--- a/unit_tests/command_queue/enqueue_handler_tests.cpp
+++ b/unit_tests/command_queue/enqueue_handler_tests.cpp
@@ -342,8 +342,8 @@ HWTEST_F(EnqueueHandlerTestBasic, givenEnqueueHandlerWhenCommandIsBlokingThenCom
     auto executionEnvironment = new ExecutionEnvironment;
     auto mockCsr = new MockCsrBase<FamilyType>(tag, *executionEnvironment);
     executionEnvironment->commandStreamReceivers.resize(1);
-    executionEnvironment->commandStreamReceivers[0][0].reset(mockCsr);
     std::unique_ptr<MockDevice> pDevice(MockDevice::createWithExecutionEnvironment<MockDevice>(nullptr, executionEnvironment, 0u));
+    pDevice->resetCommandStreamReceiver(mockCsr);
     auto context = std::make_unique<MockContext>(pDevice.get());
     MockKernelWithInternals kernelInternals(*pDevice, context.get());
     Kernel *kernel = kernelInternals.mockKernel;
diff --git a/unit_tests/device/device_tests.cpp b/unit_tests/device/device_tests.cpp
index 520737f305..138148d0cd 100644
--- a/unit_tests/device/device_tests.cpp
+++ b/unit_tests/device/device_tests.cpp
@@ -44,9 +44,11 @@ TEST_F(DeviceTest, getSupportedClVersion) {
 }
 
 TEST_F(DeviceTest, getTagAddress) {
-    auto tagAddress = pDevice->getDefaultEngine().commandStreamReceiver->getTagAddress();
-    ASSERT_NE(nullptr, const_cast<uint32_t *>(tagAddress));
-    EXPECT_EQ(initialHardwareTag, *tagAddress);
+    for (uint32_t i = 0; i < static_cast<uint32_t>(gpgpuEngineInstances.size()); i++) {
+        auto tagAddress = pDevice->getEngine(i).commandStreamReceiver->getTagAddress();
+        ASSERT_NE(nullptr, const_cast<uint32_t *>(tagAddress));
+        EXPECT_EQ(initialHardwareTag, *tagAddress);
+    }
 }
 
 TEST_F(DeviceTest, WhenGetOSTimeThenNotNull) {
@@ -84,26 +86,13 @@ TEST_F(DeviceTest, retainAndRelease) {
 TEST_F(DeviceTest, getEngineTypeDefault) {
     auto pTestDevice = std::unique_ptr<Device>(createWithUsDeviceId(0));
 
-    EngineType actualEngineType = pDevice->getEngine(0).osContext->getEngineType().type;
+    EngineType actualEngineType = pDevice->getDefaultEngine().osContext->getEngineType().type;
     EngineType defaultEngineType = pDevice->getHardwareInfo().capabilityTable.defaultEngineType;
 
-    EXPECT_EQ(&pDevice->getEngine(0).commandStreamReceiver->getOsContext(), pDevice->getEngine(0).osContext);
+    EXPECT_EQ(&pDevice->getDefaultEngine().commandStreamReceiver->getOsContext(), pDevice->getDefaultEngine().osContext);
     EXPECT_EQ(defaultEngineType, actualEngineType);
 }
 
-TEST_F(DeviceTest, givenDebugVariableOverrideEngineTypeWhenDeviceIsCreatedThenUseDebugNotDefaul) {
-    EngineType expectedEngine = EngineType::ENGINE_VCS;
-    DebugManagerStateRestore dbgRestorer;
-    DebugManager.flags.NodeOrdinal.set(static_cast<int32_t>(expectedEngine));
-    auto pTestDevice = std::unique_ptr<Device>(createWithUsDeviceId(0));
-
-    EngineType actualEngineType = pTestDevice->getEngine(0).osContext->getEngineType().type;
-    EngineType defaultEngineType = pDevice->getHardwareInfo().capabilityTable.defaultEngineType;
-
-    EXPECT_NE(defaultEngineType, actualEngineType);
-    EXPECT_EQ(expectedEngine, actualEngineType);
-}
-
 TEST(DeviceCleanup, givenDeviceWhenItIsDestroyedThenFlushBatchedSubmissionsIsCalled) {
     auto mockDevice = std::unique_ptr<MockDevice>(MockDevice::createWithNewExecutionEnvironment<MockDevice>(nullptr));
     MockCommandStreamReceiver *csr = new MockCommandStreamReceiver(*mockDevice->getExecutionEnvironment());
@@ -161,18 +150,22 @@ TEST(DeviceCreation, givenDefaultHwCsrInDebugVarsWhenDeviceIsCreatedThenIsSimula
 TEST(DeviceCreation, givenDeviceWhenItIsCreatedThenOsContextIsRegistredInMemoryManager) {
     auto device = std::unique_ptr<Device>(MockDevice::createWithNewExecutionEnvironment<Device>(nullptr));
     auto memoryManager = device->getMemoryManager();
-    EXPECT_EQ(1u, memoryManager->getOsContextCount());
+    EXPECT_EQ(gpgpuEngineInstances.size(), memoryManager->getOsContextCount());
 }
 
 TEST(DeviceCreation, givenMultiDeviceWhenTheyAreCreatedThenEachOsContextHasUniqueId) {
     ExecutionEnvironment executionEnvironment;
     executionEnvironment.incRefInternal();
+    const size_t numDevices = 2;
+
     auto device1 = std::unique_ptr<Device>(Device::create<Device>(nullptr, &executionEnvironment, 0u));
     auto device2 = std::unique_ptr<Device>(Device::create<Device>(nullptr, &executionEnvironment, 1u));
 
-    EXPECT_EQ(0u, device1->getEngine(0).osContext->getContextId());
-    EXPECT_EQ(1u, device2->getEngine(0).osContext->getContextId());
-    EXPECT_EQ(2u, executionEnvironment.memoryManager->getOsContextCount());
+    for (uint32_t i = 0; i < static_cast<uint32_t>(gpgpuEngineInstances.size()); i++) {
+        EXPECT_EQ(i, device1->getEngine(i).osContext->getContextId());
+        EXPECT_EQ(i + static_cast<uint32_t>(gpgpuEngineInstances.size()), device2->getEngine(i).osContext->getContextId());
+    }
+    EXPECT_EQ(gpgpuEngineInstances.size() * numDevices, executionEnvironment.memoryManager->getOsContextCount());
 }
 
 TEST(DeviceCreation, givenMultiDeviceWhenTheyAreCreatedThenEachDeviceHasSeperateDeviceIndex) {
@@ -188,25 +181,33 @@ TEST(DeviceCreation, givenMultiDeviceWhenTheyAreCreatedThenEachDeviceHasSeperate
 TEST(DeviceCreation, givenMultiDeviceWhenTheyAreCreatedThenEachDeviceHasSeperateCommandStreamReceiver) {
     ExecutionEnvironment executionEnvironment;
     executionEnvironment.incRefInternal();
-    auto device = std::unique_ptr<MockDevice>(Device::create<MockDevice>(nullptr, &executionEnvironment, 0u));
+    const size_t numDevices = 2;
+    auto device1 = std::unique_ptr<MockDevice>(Device::create<MockDevice>(nullptr, &executionEnvironment, 0u));
     auto device2 = std::unique_ptr<MockDevice>(Device::create<MockDevice>(nullptr, &executionEnvironment, 1u));
 
-    EXPECT_EQ(2u, executionEnvironment.commandStreamReceivers.size());
+    EXPECT_EQ(numDevices, executionEnvironment.commandStreamReceivers.size());
     EXPECT_EQ(gpgpuEngineInstances.size(), executionEnvironment.commandStreamReceivers[0].size());
     EXPECT_EQ(gpgpuEngineInstances.size(), executionEnvironment.commandStreamReceivers[1].size());
-    EXPECT_NE(nullptr, executionEnvironment.commandStreamReceivers[0][0]);
-    EXPECT_NE(nullptr, executionEnvironment.commandStreamReceivers[1][0]);
-    EXPECT_EQ(device->getEngine(0).commandStreamReceiver, executionEnvironment.commandStreamReceivers[0][0].get());
-    EXPECT_EQ(device2->getEngine(0).commandStreamReceiver, executionEnvironment.commandStreamReceivers[1][0].get());
 
-    for (uint32_t i = 1; i < gpgpuEngineInstances.size(); i++) {
-        EXPECT_EQ(nullptr, executionEnvironment.commandStreamReceivers[0][i]);
-        EXPECT_EQ(nullptr, executionEnvironment.commandStreamReceivers[1][i]);
-        EXPECT_EQ(nullptr, device->getEngine(i).commandStreamReceiver);
-        EXPECT_EQ(nullptr, device2->getEngine(i).commandStreamReceiver);
+    for (uint32_t i = 0; i < static_cast<uint32_t>(gpgpuEngineInstances.size()); i++) {
+        EXPECT_NE(nullptr, executionEnvironment.commandStreamReceivers[0][i]);
+        EXPECT_NE(nullptr, executionEnvironment.commandStreamReceivers[1][i]);
+        EXPECT_EQ(executionEnvironment.commandStreamReceivers[0][i].get(), device1->getEngine(i).commandStreamReceiver);
+        EXPECT_EQ(executionEnvironment.commandStreamReceivers[1][i].get(), device2->getEngine(i).commandStreamReceiver);
     }
 }
 
+TEST(DeviceCreation, givenDeviceWhenAskingForDefaultEngineThenReturnValidValue) {
+    ExecutionEnvironment executionEnvironment;
+    executionEnvironment.incRefInternal();
+    auto device = std::unique_ptr<MockDevice>(Device::create<MockDevice>(platformDevices[0], &executionEnvironment, 0));
+
+    auto &defaultEngine = device->getDefaultEngine().osContext->getEngineType();
+
+    EXPECT_EQ(platformDevices[0]->capabilityTable.defaultEngineType, defaultEngine.type);
+    EXPECT_EQ(0, defaultEngine.id);
+}
+
 TEST(DeviceCreation, givenFtrSimulationModeFlagTrueWhenNoOtherSimulationFlagsArePresentThenIsSimulationReturnsTrue) {
     FeatureTable skuTable = *platformDevices[0]->pSkuTable;
     skuTable.ftrSimulationMode = true;
diff --git a/unit_tests/event/user_events_tests.cpp b/unit_tests/event/user_events_tests.cpp
index 12c2b78f84..c6a74e09bb 100644
--- a/unit_tests/event/user_events_tests.cpp
+++ b/unit_tests/event/user_events_tests.cpp
@@ -1057,7 +1057,7 @@ TEST_F(EventTests, givenUserEventWhenSetStatusIsDoneThenDeviceMutextisAcquired)
     struct mockedEvent : public UserEvent {
         using UserEvent::UserEvent;
         bool setStatus(cl_int status) override {
-            auto commandStreamReceiverOwnership = ctx->getDevice(0)->getEngine(0).commandStreamReceiver->obtainUniqueOwnership();
+            auto commandStreamReceiverOwnership = ctx->getDevice(0)->getDefaultEngine().commandStreamReceiver->obtainUniqueOwnership();
             mutexProperlyAcquired = commandStreamReceiverOwnership.owns_lock();
             return true;
         }
diff --git a/unit_tests/fixtures/ult_command_stream_receiver_fixture.h b/unit_tests/fixtures/ult_command_stream_receiver_fixture.h
index f1758e33ed..1ccbe1e224 100644
--- a/unit_tests/fixtures/ult_command_stream_receiver_fixture.h
+++ b/unit_tests/fixtures/ult_command_stream_receiver_fixture.h
@@ -55,7 +55,7 @@ struct UltCommandStreamReceiverTest
         graphicsAllocation = new MockGraphicsAllocation(sshBuffer, sizeStream);
         ssh.replaceGraphicsAllocation(graphicsAllocation);
 
-        pDevice->getCommandStreamReceiver().setOsContext(*pDevice->getEngine(0).osContext);
+        pDevice->getCommandStreamReceiver().setOsContext(*pDevice->getDefaultEngine().osContext);
     }
 
     void TearDown() override {
diff --git a/unit_tests/mem_obj/mem_obj_destruction_tests.cpp b/unit_tests/mem_obj/mem_obj_destruction_tests.cpp
index 98e54f2599..24bb7c2d83 100644
--- a/unit_tests/mem_obj/mem_obj_destruction_tests.cpp
+++ b/unit_tests/mem_obj/mem_obj_destruction_tests.cpp
@@ -7,6 +7,7 @@
 
 #include "runtime/mem_obj/mem_obj.h"
 #include "runtime/memory_manager/allocations_list.h"
+#include "runtime/os_interface/os_context.h"
 #include "unit_tests/mocks/mock_context.h"
 #include "unit_tests/mocks/mock_device.h"
 #include "unit_tests/mocks/mock_memory_manager.h"
@@ -28,17 +29,20 @@ void CL_CALLBACK emptyDestructorCallback(cl_mem memObj, void *userData) {
 class MemObjDestructionTest : public ::testing::TestWithParam<bool> {
   public:
     void SetUp() override {
-        context.reset(new MockContext());
-        memoryManager = new MockMemoryManager(*context->getDevice(0)->getExecutionEnvironment());
-        device = static_cast<MockDevice *>(context->getDevice(0));
-        device->injectMemoryManager(memoryManager);
-        context->setMemoryManager(memoryManager);
+        executionEnvironment = std::make_unique<ExecutionEnvironment>();
+        executionEnvironment->incRefInternal();
+        memoryManager = new MockMemoryManager(*executionEnvironment);
+        executionEnvironment->memoryManager.reset(memoryManager);
+        device.reset(MockDevice::create<MockDevice>(*platformDevices, executionEnvironment.get(), 0));
+        context.reset(new MockContext(device.get()));
+
         allocation = memoryManager->allocateGraphicsMemory(size);
         memObj = new MemObj(context.get(), CL_MEM_OBJECT_BUFFER,
                             CL_MEM_READ_WRITE,
                             size,
                             nullptr, nullptr, allocation, true, false, false);
         *device->getDefaultEngine().commandStreamReceiver->getTagAddress() = 0;
+        contextId = device->getDefaultEngine().osContext->getContextId();
     }
 
     void TearDown() override {
@@ -46,7 +50,7 @@ class MemObjDestructionTest : public ::testing::TestWithParam<bool> {
     }
 
     void makeMemObjUsed() {
-        memObj->getGraphicsAllocation()->updateTaskCount(taskCountReady, 0u);
+        memObj->getGraphicsAllocation()->updateTaskCount(taskCountReady, contextId);
     }
 
     void makeMemObjNotReady() {
@@ -60,7 +64,9 @@ class MemObjDestructionTest : public ::testing::TestWithParam<bool> {
     }
 
     constexpr static uint32_t taskCountReady = 3u;
-    MockDevice *device;
+    std::unique_ptr<ExecutionEnvironment> executionEnvironment;
+    std::unique_ptr<MockDevice> device;
+    uint32_t contextId = 0;
     MockMemoryManager *memoryManager;
     std::unique_ptr<MockContext> context;
     GraphicsAllocation *allocation;
diff --git a/unit_tests/memory_manager/deferrable_allocation_deletion_tests.cpp b/unit_tests/memory_manager/deferrable_allocation_deletion_tests.cpp
index 76dff560f1..f25d87a65b 100644
--- a/unit_tests/memory_manager/deferrable_allocation_deletion_tests.cpp
+++ b/unit_tests/memory_manager/deferrable_allocation_deletion_tests.cpp
@@ -51,7 +51,7 @@ struct DeferrableAllocationDeletionTest : ::testing::Test {
 };
 
 TEST_F(DeferrableAllocationDeletionTest, givenDeferrableAllocationWhenApplyThenWaitForEachTaskCount) {
-    EXPECT_EQ(1u, memoryManager->getOsContextCount());
+    EXPECT_EQ(gpgpuEngineInstances.size(), memoryManager->getOsContextCount());
     auto allocation = memoryManager->allocateGraphicsMemory(MemoryConstants::pageSize);
     allocation->updateTaskCount(1u, device1ContextId);
     *hwTag = 0u;
@@ -74,7 +74,7 @@ TEST_F(DeferrableAllocationDeletionTest, givenDeferrableAllocationWhenApplyThenW
 TEST_F(DeferrableAllocationDeletionTest, givenAllocationUsedByTwoOsContextsWhenApplyDeletionThenWaitForBothContexts) {
     std::unique_ptr<Device> device2(Device::create<Device>(nullptr, device1->getExecutionEnvironment(), 1u));
     auto device2ContextId = device2->getDefaultEngine().osContext->getContextId();
-    EXPECT_EQ(2u, memoryManager->getOsContextCount());
+    EXPECT_EQ(gpgpuEngineInstances.size() * 2, memoryManager->getOsContextCount());
     auto allocation = memoryManager->allocateGraphicsMemory(MemoryConstants::pageSize);
     *hwTag = 0u;
     *device2->getDefaultEngine().commandStreamReceiver->getTagAddress() = 1u;
@@ -91,7 +91,7 @@ TEST_F(DeferrableAllocationDeletionTest, givenAllocationUsedByTwoOsContextsWhenA
     *hwTag = 1u;
 }
 TEST_F(DeferrableAllocationDeletionTest, givenNotUsedAllocationWhenApplyDeletionThenDontWait) {
-    EXPECT_EQ(1u, memoryManager->getOsContextCount());
+    EXPECT_EQ(gpgpuEngineInstances.size(), memoryManager->getOsContextCount());
     auto allocation = memoryManager->allocateGraphicsMemory(MemoryConstants::pageSize);
     EXPECT_FALSE(allocation->isUsed());
     EXPECT_EQ(0u, memoryManager->freeGraphicsMemoryCalled);
diff --git a/unit_tests/mocks/mock_device.cpp b/unit_tests/mocks/mock_device.cpp
index f08554be34..3884b6d163 100644
--- a/unit_tests/mocks/mock_device.cpp
+++ b/unit_tests/mocks/mock_device.cpp
@@ -17,7 +17,7 @@ MockDevice::MockDevice(const HardwareInfo &hwInfo)
     : MockDevice(hwInfo, new ExecutionEnvironment, 0u) {
     CommandStreamReceiver *commandStreamReceiver = createCommandStream(&hwInfo, *this->executionEnvironment);
     executionEnvironment->commandStreamReceivers.resize(getDeviceIndex() + 1);
-    executionEnvironment->commandStreamReceivers[getDeviceIndex()][0].reset(commandStreamReceiver);
+    executionEnvironment->commandStreamReceivers[getDeviceIndex()][defaultEngineIndex].reset(commandStreamReceiver);
     this->executionEnvironment->memoryManager = std::move(this->mockMemoryManager);
     this->engines[defaultEngineIndex] = {commandStreamReceiver, nullptr};
 }
diff --git a/unit_tests/os_interface/linux/drm_command_stream_tests.cpp b/unit_tests/os_interface/linux/drm_command_stream_tests.cpp
index 6c710e40eb..62e28800cd 100644
--- a/unit_tests/os_interface/linux/drm_command_stream_tests.cpp
+++ b/unit_tests/os_interface/linux/drm_command_stream_tests.cpp
@@ -901,11 +901,12 @@ TEST_F(DrmCommandStreamBatchingTests, givenCSRWhenFlushIsCalledThenProperFlagsAr
     csr->flush(batchBuffer, csr->getResidencyAllocations());
 
     //preemption allocation + Sip Kernel
-    int ioctlExtraCnt = (PreemptionHelper::getDefaultPreemptionMode(*platformDevices[0]) == PreemptionMode::MidThread) ? 2 : 0;
+    int ioctlPreemptionCnt = (PreemptionHelper::getDefaultPreemptionMode(*platformDevices[0]) == PreemptionMode::MidThread) ? 2 : 0;
+    int ioctlTagAllocCnt = gpgpuEngineInstances.size();
 
     auto engineFlag = csr->getOsContext().get()->getEngineFlag();
 
-    EXPECT_EQ(6 + ioctlExtraCnt, this->mock->ioctl_cnt.total);
+    EXPECT_EQ(5 + ioctlPreemptionCnt + ioctlTagAllocCnt, this->mock->ioctl_cnt.total);
     uint64_t flags = engineFlag | I915_EXEC_NO_RELOC;
     EXPECT_EQ(flags, this->mock->execBuffer.flags);
 
@@ -944,7 +945,8 @@ TEST_F(DrmCommandStreamBatchingTests, givenCsrWhenDispatchPolicyIsSetToBatchingT
     size_t csrSurfaceCount = (device->getPreemptionMode() == PreemptionMode::MidThread) ? 2 : 0;
 
     //preemption allocation + sipKernel
-    int ioctlExtraCnt = (PreemptionHelper::getDefaultPreemptionMode(*platformDevices[0]) == PreemptionMode::MidThread) ? 2 : 0;
+    int ioctlPreemptionCnt = (PreemptionHelper::getDefaultPreemptionMode(*platformDevices[0]) == PreemptionMode::MidThread) ? 2 : 0;
+    int ioctlTagAllocCnt = gpgpuEngineInstances.size();
 
     auto recordedCmdBuffer = cmdBuffers.peekHead();
     EXPECT_EQ(3u + csrSurfaceCount, recordedCmdBuffer->surfaces.size());
@@ -961,7 +963,7 @@ TEST_F(DrmCommandStreamBatchingTests, givenCsrWhenDispatchPolicyIsSetToBatchingT
 
     EXPECT_EQ(tCsr->commandStream.getGraphicsAllocation(), recordedCmdBuffer->batchBuffer.commandBufferAllocation);
 
-    EXPECT_EQ(6 + ioctlExtraCnt, this->mock->ioctl_cnt.total);
+    EXPECT_EQ(5 + ioctlPreemptionCnt + ioctlTagAllocCnt, this->mock->ioctl_cnt.total);
 
     EXPECT_EQ(0u, this->mock->execBuffer.flags);
 
@@ -1010,7 +1012,8 @@ TEST_F(DrmCommandStreamBatchingTests, givenRecordedCommandBufferWhenItIsSubmitte
     size_t csrSurfaceCount = (device->getPreemptionMode() == PreemptionMode::MidThread) ? 2 : 0;
 
     //preemption allocation +sip Kernel
-    int ioctlExtraCnt = (PreemptionHelper::getDefaultPreemptionMode(*platformDevices[0]) == PreemptionMode::MidThread) ? 2 : 0;
+    int ioctlPreemptionCnt = (PreemptionHelper::getDefaultPreemptionMode(*platformDevices[0]) == PreemptionMode::MidThread) ? 2 : 0;
+    int ioctlTagAllocCnt = gpgpuEngineInstances.size();
 
     //validate that submited command buffer has what we want
     EXPECT_EQ(3u + csrSurfaceCount, this->mock->execBuffer.buffer_count);
@@ -1032,7 +1035,7 @@ TEST_F(DrmCommandStreamBatchingTests, givenRecordedCommandBufferWhenItIsSubmitte
         EXPECT_TRUE(handleFound);
     }
 
-    EXPECT_EQ(7 + ioctlExtraCnt, this->mock->ioctl_cnt.total);
+    EXPECT_EQ(6 + ioctlPreemptionCnt + ioctlTagAllocCnt, this->mock->ioctl_cnt.total);
 
     mm->freeGraphicsMemory(dummyAllocation);
     mm->freeGraphicsMemory(commandBuffer);
diff --git a/unit_tests/os_interface/windows/device_command_stream_tests.cpp b/unit_tests/os_interface/windows/device_command_stream_tests.cpp
index d9b18ba3eb..a1e3d97056 100644
--- a/unit_tests/os_interface/windows/device_command_stream_tests.cpp
+++ b/unit_tests/os_interface/windows/device_command_stream_tests.cpp
@@ -126,15 +126,12 @@ class WddmCommandStreamWithMockGdiFixture {
         wddm->gdi.reset(gdi);
         ASSERT_NE(wddm, nullptr);
         DebugManager.flags.CsrDispatchMode.set(static_cast<uint32_t>(DispatchMode::ImmediateDispatch));
-        executionEnvironment->commandStreamReceivers.resize(1);
-        executionEnvironment->commandStreamReceivers[0][0] =
-            std::make_unique<MockWddmCsr<DEFAULT_TEST_FAMILY_NAME>>(*platformDevices[0],
-                                                                    *executionEnvironment);
-        this->csr = static_cast<MockWddmCsr<DEFAULT_TEST_FAMILY_NAME> *>(executionEnvironment->commandStreamReceivers[0][0].get());
+        this->csr = new MockWddmCsr<DEFAULT_TEST_FAMILY_NAME>(*platformDevices[0], *executionEnvironment);
         memoryManager = csr->createMemoryManager(false, false);
         ASSERT_NE(nullptr, memoryManager);
         executionEnvironment->memoryManager.reset(memoryManager);
         device = std::unique_ptr<MockDevice>(Device::create<MockDevice>(platformDevices[0], executionEnvironment, 0u));
+        device->resetCommandStreamReceiver(this->csr);
         ASSERT_NE(nullptr, device);
         this->csr->overrideRecorededCommandBuffer(*device);
         if (device->getPreemptionMode() == PreemptionMode::MidThread) {
@@ -884,11 +881,11 @@ HWTEST_F(WddmCsrCompressionTests, givenEnabledCompressionWhenFlushingThenInitTra
         auto mockWddmCsr = new MockWddmCsr<FamilyType>(hwInfo[0], *executionEnvironment);
         mockWddmCsr->createPageTableManager();
         mockWddmCsr->overrideDispatchPolicy(DispatchMode::BatchedDispatch);
-        executionEnvironment->commandStreamReceivers.resize(1);
-        executionEnvironment->commandStreamReceivers[0][0].reset(mockWddmCsr);
+        executionEnvironment->memoryManager.reset(mockWddmCsr->createMemoryManager(false, false));
 
         auto mockMngr = reinterpret_cast<MockGmmPageTableMngr *>(myMockWddm->getPageTableManager());
         std::unique_ptr<MockDevice> device(Device::create<MockDevice>(hwInfo, executionEnvironment, 0u));
+        device->resetCommandStreamReceiver(mockWddmCsr);
 
         auto memoryManager = executionEnvironment->memoryManager.get();
 
@@ -925,11 +922,11 @@ HWTEST_F(WddmCsrCompressionTests, givenDisabledCompressionWhenFlushingThenDontIn
     myMockWddm = static_cast<WddmMock *>(executionEnvironment->osInterface->get()->getWddm());
 
     auto mockWddmCsr = new MockWddmCsr<FamilyType>(hwInfo[0], *executionEnvironment);
-    executionEnvironment->commandStreamReceivers.resize(1);
-    executionEnvironment->commandStreamReceivers[0][0].reset(mockWddmCsr);
     mockWddmCsr->overrideDispatchPolicy(DispatchMode::BatchedDispatch);
+    executionEnvironment->memoryManager.reset(mockWddmCsr->createMemoryManager(false, false));
 
     std::unique_ptr<MockDevice> device(Device::create<MockDevice>(hwInfo, executionEnvironment, 0u));
+    device->resetCommandStreamReceiver(mockWddmCsr);
 
     auto memoryManager = executionEnvironment->memoryManager.get();
 
diff --git a/unit_tests/preemption/preemption_tests.cpp b/unit_tests/preemption/preemption_tests.cpp
index 52b4637da4..9fd926878b 100644
--- a/unit_tests/preemption/preemption_tests.cpp
+++ b/unit_tests/preemption/preemption_tests.cpp
@@ -462,6 +462,28 @@ HWTEST_F(MidThreadPreemptionTests, createCsrSurfaceNoWa) {
     const_cast<HardwareInfo *>(platformDevices[0])->pWaTable = waTable;
 }
 
+HWTEST_F(MidThreadPreemptionTests, givenMidThreadPreemptionWhenFailingOnCsrSurfaceAllocationThenFailToCreateDevice) {
+    class FailingMemoryManager : public OsAgnosticMemoryManager {
+      public:
+        FailingMemoryManager(ExecutionEnvironment &executionEnvironment) : OsAgnosticMemoryManager(false, false, executionEnvironment) {}
+
+        GraphicsAllocation *allocateGraphicsMemory(size_t size, size_t alignment, bool forcePin, bool uncacheable) override {
+            if (++allocateGraphicsMemoryCount > gpgpuEngineInstances.size()) {
+                return nullptr;
+            }
+            return OsAgnosticMemoryManager::allocateGraphicsMemory(size, alignment, forcePin, uncacheable);
+        }
+
+        uint32_t allocateGraphicsMemoryCount = 0;
+    };
+    ExecutionEnvironment executionEnvironment;
+    executionEnvironment.incRefInternal();
+    executionEnvironment.memoryManager = std::make_unique<FailingMemoryManager>(executionEnvironment);
+
+    std::unique_ptr<MockDevice> mockDevice(MockDevice::create<MockDevice>(platformDevices[0], &executionEnvironment, 0));
+    EXPECT_EQ(nullptr, mockDevice.get());
+}
+
 HWTEST_F(MidThreadPreemptionTests, createCsrSurfaceWa) {
     const WorkaroundTable *waTable = platformDevices[0]->pWaTable;
     WorkaroundTable tmpWaTable;
diff --git a/unit_tests/program/program_data_tests.cpp b/unit_tests/program/program_data_tests.cpp
index cb551df458..e1ddb244d0 100644
--- a/unit_tests/program/program_data_tests.cpp
+++ b/unit_tests/program/program_data_tests.cpp
@@ -175,7 +175,7 @@ TEST_F(ProgramDataTest, givenConstantAllocationThatIsInUseByGpuWhenProgramIsBein
 
     buildAndDecodeProgramPatchList();
 
-    auto &csr = *pPlatform->getDevice(0)->getEngine(0).commandStreamReceiver;
+    auto &csr = *pPlatform->getDevice(0)->getDefaultEngine().commandStreamReceiver;
     auto tagAddress = csr.getTagAddress();
     auto constantSurface = pProgram->getConstantSurface();
     constantSurface->updateTaskCount(*tagAddress + 1, 0);
@@ -192,7 +192,7 @@ TEST_F(ProgramDataTest, givenGlobalAllocationThatIsInUseByGpuWhenProgramIsBeingD
 
     buildAndDecodeProgramPatchList();
 
-    auto &csr = *pPlatform->getDevice(0)->getEngine(0).commandStreamReceiver;
+    auto &csr = *pPlatform->getDevice(0)->getDefaultEngine().commandStreamReceiver;
     auto tagAddress = csr.getTagAddress();
     auto globalSurface = pProgram->getGlobalSurface();
     globalSurface->updateTaskCount(*tagAddress + 1, 0);
diff --git a/unit_tests/sharings/gl/gl_arb_sync_event_tests.cpp b/unit_tests/sharings/gl/gl_arb_sync_event_tests.cpp
index 9d6fe1828e..0236164ff4 100644
--- a/unit_tests/sharings/gl/gl_arb_sync_event_tests.cpp
+++ b/unit_tests/sharings/gl/gl_arb_sync_event_tests.cpp
@@ -65,10 +65,9 @@ struct GlArbSyncEventTest : public ::testing::Test {
     void SetUp() override {
         executionEnvironment = new ExecutionEnvironment;
         auto mockCsr = new MockCommandStreamReceiver(*executionEnvironment);
-        executionEnvironment->commandStreamReceivers.resize(1);
-        executionEnvironment->commandStreamReceivers[0][0].reset(mockCsr);
         executionEnvironment->memoryManager = std::make_unique<OsAgnosticMemoryManager>(false, false, *executionEnvironment);
         device.reset(MockDevice::create<MockDevice>(nullptr, executionEnvironment, 0u));
+        device->resetCommandStreamReceiver(mockCsr);
         ctx.reset(new MockContext);
         cmdQ.reset(new MockCommandQueue(ctx.get(), device.get(), nullptr));
         sharing = new GlSharingFunctionsMock();