Allow Device creating multiple CSRs [7/n]

Create and initialize all supported Engines Change-Id: If0adf1a06b5005ef2698cebc6f1aaa6eacf562ec Signed-off-by: Dunajski, Bartosz <bartosz.dunajski@intel.com>
2025-12-25 13:33:02 +08:00 · 2018-11-28 09:02:55 +01:00
parent 54269d9791
commit 1f7448425d
23 changed files with 170 additions and 116 deletions
--- a/2
+++ b/2
@@ -1,5 +1,5 @@
 #!groovy
 dependenciesRevision='4423584cd5bfda14bb324142159b5affb5461d26-1165'
 strategy='EQUAL'
-allowedCD=272
+allowedCD=273
 allowedF=4
--- a/runtime/api/api.cpp
+++ b/runtime/api/api.cpp
@@ -1797,7 +1797,7 @@ cl_int CL_API_CALL clSetUserEventStatus(cl_event event,
        return retVal;
    }

-    auto commandStreamReceiverOwnership = userEvent->getContext()->getDevice(0)->getEngine(0).commandStreamReceiver->obtainUniqueOwnership();
+    auto commandStreamReceiverOwnership = userEvent->getContext()->getDevice(0)->getDefaultEngine().commandStreamReceiver->obtainUniqueOwnership();
    userEvent->setStatus(executionStatus);
    return retVal;
 }
--- a/runtime/device/device.cpp
+++ b/runtime/device/device.cpp
@@ -106,70 +106,90 @@ Device::~Device() {
 }

 bool Device::createDeviceImpl(const HardwareInfo *pHwInfo, Device &outDevice) {
-    uint32_t deviceCsrIndex = 0;
    auto executionEnvironment = outDevice.executionEnvironment;
    executionEnvironment->initGmm(pHwInfo);
-    if (!executionEnvironment->initializeCommandStreamReceiver(pHwInfo, outDevice.getDeviceIndex(), deviceCsrIndex)) {
-        return false;
-    }
-    executionEnvironment->initializeMemoryManager(outDevice.getEnabled64kbPages(), outDevice.getEnableLocalMemory(),
-                                                  outDevice.getDeviceIndex(), deviceCsrIndex);

-    auto osContext = executionEnvironment->memoryManager->createAndRegisterOsContext({getChosenEngineType(*pHwInfo), 0});
-    auto commandStreamReceiver = executionEnvironment->commandStreamReceivers[outDevice.getDeviceIndex()][deviceCsrIndex].get();
-    commandStreamReceiver->setOsContext(*osContext);
-    if (!commandStreamReceiver->initializeTagAllocation()) {
+    if (!createEngines(pHwInfo, outDevice)) {
        return false;
    }

-    outDevice.engines[0] = {commandStreamReceiver, osContext};
+    executionEnvironment->memoryManager->setDefaultEngineIndex(outDevice.defaultEngineIndex);

-    auto pDevice = &outDevice;
-    if (!pDevice->osTime) {
-        pDevice->osTime = OSTime::create(commandStreamReceiver->getOSInterface());
+    auto osInterface = executionEnvironment->osInterface.get();
+
+    if (!outDevice.osTime) {
+        outDevice.osTime = OSTime::create(osInterface);
    }
-    pDevice->driverInfo.reset(DriverInfo::create(commandStreamReceiver->getOSInterface()));
+    outDevice.driverInfo.reset(DriverInfo::create(osInterface));

-    pDevice->initializeCaps();
+    outDevice.initializeCaps();

-    if (pDevice->osTime->getOSInterface()) {
+    if (outDevice.osTime->getOSInterface()) {
        if (pHwInfo->capabilityTable.instrumentationEnabled) {
-            pDevice->performanceCounters = createPerformanceCountersFunc(pDevice->osTime.get());
-            pDevice->performanceCounters->initialize(pHwInfo);
+            outDevice.performanceCounters = createPerformanceCountersFunc(outDevice.osTime.get());
+            outDevice.performanceCounters->initialize(pHwInfo);
        }
    }

    uint32_t deviceHandle = 0;
-    if (commandStreamReceiver->getOSInterface()) {
-        deviceHandle = commandStreamReceiver->getOSInterface()->getDeviceHandle();
+    if (osInterface) {
+        deviceHandle = osInterface->getDeviceHandle();
    }

-    if (pDevice->deviceInfo.sourceLevelDebuggerActive) {
-        pDevice->executionEnvironment->sourceLevelDebugger->notifyNewDevice(deviceHandle);
+    if (outDevice.deviceInfo.sourceLevelDebuggerActive) {
+        outDevice.executionEnvironment->sourceLevelDebugger->notifyNewDevice(deviceHandle);
    }

-    outDevice.executionEnvironment->memoryManager->setForce32BitAllocations(pDevice->getDeviceInfo().force32BitAddressess);
-    outDevice.executionEnvironment->memoryManager->setDefaultEngineIndex(deviceCsrIndex);
+    outDevice.executionEnvironment->memoryManager->setForce32BitAllocations(outDevice.getDeviceInfo().force32BitAddressess);

-    if (pDevice->preemptionMode == PreemptionMode::MidThread || pDevice->isSourceLevelDebuggerActive()) {
+    if (outDevice.preemptionMode == PreemptionMode::MidThread || outDevice.isSourceLevelDebuggerActive()) {
        size_t requiredSize = pHwInfo->capabilityTable.requiredPreemptionSurfaceSize;
        size_t alignment = 256 * MemoryConstants::kiloByte;
-        bool uncacheable = pDevice->getWaTable()->waCSRUncachable;
-        pDevice->preemptionAllocation = outDevice.executionEnvironment->memoryManager->allocateGraphicsMemory(requiredSize, alignment, false, uncacheable);
-        if (!pDevice->preemptionAllocation) {
+        bool uncacheable = outDevice.getWaTable()->waCSRUncachable;
+        outDevice.preemptionAllocation = outDevice.executionEnvironment->memoryManager->allocateGraphicsMemory(requiredSize, alignment, false, uncacheable);
+        if (!outDevice.preemptionAllocation) {
            return false;
        }
-        commandStreamReceiver->setPreemptionCsrAllocation(pDevice->preemptionAllocation);
    }

-    if (DebugManager.flags.EnableExperimentalCommandBuffer.get() > 0) {
-        commandStreamReceiver->setExperimentalCmdBuffer(std::unique_ptr<ExperimentalCommandBuffer>(
-            new ExperimentalCommandBuffer(commandStreamReceiver, pDevice->getDeviceInfo().profilingTimerResolution)));
+    for (auto engine : outDevice.engines) {
+        auto csr = engine.commandStreamReceiver;
+        csr->setPreemptionCsrAllocation(outDevice.preemptionAllocation);
+        if (DebugManager.flags.EnableExperimentalCommandBuffer.get() > 0) {
+            csr->setExperimentalCmdBuffer(std::make_unique<ExperimentalCommandBuffer>(csr, outDevice.getDeviceInfo().profilingTimerResolution));
+        }
    }

    return true;
 }

+bool Device::createEngines(const HardwareInfo *pHwInfo, Device &outDevice) {
+    auto executionEnvironment = outDevice.executionEnvironment;
+    EngineType defaultEngineType = getChosenEngineType(*pHwInfo);
+
+    for (uint32_t deviceCsrIndex = 0; deviceCsrIndex < gpgpuEngineInstances.size(); deviceCsrIndex++) {
+        if (!executionEnvironment->initializeCommandStreamReceiver(pHwInfo, outDevice.getDeviceIndex(), deviceCsrIndex)) {
+            return false;
+        }
+        executionEnvironment->initializeMemoryManager(outDevice.getEnabled64kbPages(), outDevice.getEnableLocalMemory(),
+                                                      outDevice.getDeviceIndex(), deviceCsrIndex);
+
+        auto osContext = executionEnvironment->memoryManager->createAndRegisterOsContext(gpgpuEngineInstances[deviceCsrIndex]);
+        auto commandStreamReceiver = executionEnvironment->commandStreamReceivers[outDevice.getDeviceIndex()][deviceCsrIndex].get();
+        commandStreamReceiver->setOsContext(*osContext);
+        if (!commandStreamReceiver->initializeTagAllocation()) {
+            return false;
+        }
+
+        if (gpgpuEngineInstances[deviceCsrIndex].type == defaultEngineType && gpgpuEngineInstances[deviceCsrIndex].id == 0) {
+            outDevice.defaultEngineIndex = deviceCsrIndex;
+        }
+
+        outDevice.engines[deviceCsrIndex] = {commandStreamReceiver, osContext};
+    }
+    return true;
+}
+
 const HardwareInfo *Device::getDeviceInitHwInfo(const HardwareInfo *pHwInfoIn) {
    return pHwInfoIn ? pHwInfoIn : platformDevices[0];
 }
--- a/runtime/device/device.h
+++ b/runtime/device/device.h
@@ -73,8 +73,6 @@ class Device : public BaseObject<_cl_device_id> {
    EngineControl &getEngine(uint32_t engineId);
    EngineControl &getDefaultEngine();

-    volatile uint32_t *getTagAddress() const;
-
    const char *getProductAbbrev() const;
    const std::string getFamilyNameWithType() const;

@@ -136,6 +134,7 @@ class Device : public BaseObject<_cl_device_id> {
    }

    static bool createDeviceImpl(const HardwareInfo *pHwInfo, Device &outDevice);
+    static bool createEngines(const HardwareInfo *pHwInfo, Device &outDevice);
    static const HardwareInfo *getDeviceInitHwInfo(const HardwareInfo *pHwInfoIn);
    MOCKABLE_VIRTUAL void initializeCaps();
    void setupFp64Flags();
@@ -177,7 +176,7 @@ inline EngineControl &Device::getEngine(uint32_t engineId) {
 }

 inline EngineControl &Device::getDefaultEngine() {
-    return engines[defaultEngineIndex];
+    return getEngine(defaultEngineIndex);
 }

 inline MemoryManager *Device::getMemoryManager() const {
--- a/runtime/event/user_event.cpp
+++ b/runtime/event/user_event.cpp
@@ -35,7 +35,7 @@ uint32_t UserEvent::getTaskLevel() {
    uint32_t taskLevel = 0;
    if (ctx != nullptr) {
        Device *pDevice = ctx->getDevice(0);
-        auto csr = pDevice->getEngine(0).commandStreamReceiver;
+        auto csr = pDevice->getDefaultEngine().commandStreamReceiver;
        taskLevel = csr->peekTaskLevel();
    }
    return taskLevel;
--- a/runtime/memory_manager/deferrable_allocation_deletion.cpp
+++ b/runtime/memory_manager/deferrable_allocation_deletion.cpp
@@ -19,13 +19,11 @@ void DeferrableAllocationDeletion::apply() {

        for (auto &deviceCsrs : memoryManager.getCommandStreamReceivers()) {
            for (auto &csr : deviceCsrs) {
-                if (csr) {
-                    auto contextId = csr->getOsContext().getContextId();
-                    if (graphicsAllocation.isUsedByContext(contextId)) {
-                        auto currentContextTaskCount = *csr->getTagAddress();
-                        if (graphicsAllocation.getTaskCount(contextId) <= currentContextTaskCount) {
-                            graphicsAllocation.resetTaskCount(contextId);
-                        }
+                auto contextId = csr->getOsContext().getContextId();
+                if (graphicsAllocation.isUsedByContext(contextId)) {
+                    auto currentContextTaskCount = *csr->getTagAddress();
+                    if (graphicsAllocation.getTaskCount(contextId) <= currentContextTaskCount) {
+                        graphicsAllocation.resetTaskCount(contextId);
                    }
                }
            }
--- a/runtime/memory_manager/memory_manager.cpp
+++ b/runtime/memory_manager/memory_manager.cpp
@@ -137,7 +137,7 @@ void MemoryManager::freeGraphicsMemory(GraphicsAllocation *gfxAllocation) {
 //if not in use destroy in place
 //if in use pass to temporary allocation list that is cleaned on blocking calls
 void MemoryManager::checkGpuUsageAndDestroyGraphicsAllocations(GraphicsAllocation *gfxAllocation) {
-    if (!gfxAllocation->isUsed() || gfxAllocation->getTaskCount(0u) <= *getCommandStreamReceivers()[0][defaultEngineIndex]->getTagAddress()) {
+    if (!gfxAllocation->isUsed() || gfxAllocation->getTaskCount(defaultEngineIndex) <= *getCommandStreamReceivers()[0][defaultEngineIndex]->getTagAddress()) {
        freeGraphicsMemory(gfxAllocation);
    } else {
        getCommandStreamReceivers()[0][defaultEngineIndex]->getInternalAllocationStorage()->storeAllocation(std::unique_ptr<GraphicsAllocation>(gfxAllocation), TEMPORARY_ALLOCATION);
--- a/runtime/platform/platform.cpp
+++ b/runtime/platform/platform.cpp
@@ -174,7 +174,7 @@ bool Platform::initialize() {
        initSipKernel(sipType, *devices[0]);
    }

-    CommandStreamReceiverType csrType = this->devices[0]->getEngine(0).commandStreamReceiver->getType();
+    CommandStreamReceiverType csrType = this->devices[0]->getDefaultEngine().commandStreamReceiver->getType();
    if (csrType != CommandStreamReceiverType::CSR_HW) {
        executionEnvironment->initAubCenter(&hwInfo[0], this->devices[0]->getEnableLocalMemory(), "aubfile");
    }
--- a/runtime/sharings/gl/gl_sync_event.cpp
+++ b/runtime/sharings/gl/gl_sync_event.cpp
@@ -53,7 +53,7 @@ void GlSyncEvent::updateExecutionStatus() {
 }

 uint32_t GlSyncEvent::getTaskLevel() {
-    auto csr = ctx->getDevice(0)->getEngine(0).commandStreamReceiver;
+    auto csr = ctx->getDevice(0)->getDefaultEngine().commandStreamReceiver;
    return csr->peekTaskLevel();
 }
 } // namespace OCLRT
--- a/unit_tests/aub_tests/fixtures/aub_fixture.h
+++ b/unit_tests/aub_tests/fixtures/aub_fixture.h
@@ -44,8 +44,9 @@ class AUBFixture : public CommandQueueHwFixture {
        }

        executionEnvironment->commandStreamReceivers.resize(deviceIndex + 1);
-        executionEnvironment->commandStreamReceivers[deviceIndex][0].reset(this->csr);
+
        device.reset(MockDevice::create<MockDevice>(&hwInfo, executionEnvironment, deviceIndex));
+        device->resetCommandStreamReceiver(this->csr);

        CommandQueueHwFixture::SetUp(AUBFixture::device.get(), cl_command_queue_properties(0));
    }
--- a/unit_tests/command_queue/command_queue_tests.cpp
+++ b/unit_tests/command_queue/command_queue_tests.cpp
@@ -195,6 +195,14 @@ TEST(CommandQueue, GivenOOQwhenUpdateFromCompletionStampWithTrueIsCalledThenTask
    EXPECT_EQ(cs.flushStamp, cmdQ.flushStamp->peekStamp());
 }

+TEST(CommandQueue, givenDeviceWhenCreatingCommandQueueThenPickCsrFromDefaultEngine) {
+    std::unique_ptr<MockDevice> mockDevice(MockDevice::createWithNewExecutionEnvironment<MockDevice>(platformDevices[0]));
+    CommandQueue cmdQ(nullptr, mockDevice.get(), 0);
+
+    auto defaultCsr = mockDevice->getDefaultEngine().commandStreamReceiver;
+    EXPECT_EQ(defaultCsr, &cmdQ.getCommandStreamReceiver());
+}
+
 TEST(CommandQueue, givenCmdQueueBlockedByReadyVirtualEventWhenUnblockingThenUpdateFlushTaskFromEvent) {
    std::unique_ptr<MockDevice> mockDevice(MockDevice::createWithNewExecutionEnvironment<MockDevice>(nullptr));
    auto context = new MockContext;
--- a/unit_tests/command_queue/enqueue_handler_tests.cpp
+++ b/unit_tests/command_queue/enqueue_handler_tests.cpp
@@ -342,8 +342,8 @@ HWTEST_F(EnqueueHandlerTestBasic, givenEnqueueHandlerWhenCommandIsBlokingThenCom
    auto executionEnvironment = new ExecutionEnvironment;
    auto mockCsr = new MockCsrBase<FamilyType>(tag, *executionEnvironment);
    executionEnvironment->commandStreamReceivers.resize(1);
-    executionEnvironment->commandStreamReceivers[0][0].reset(mockCsr);
    std::unique_ptr<MockDevice> pDevice(MockDevice::createWithExecutionEnvironment<MockDevice>(nullptr, executionEnvironment, 0u));
+    pDevice->resetCommandStreamReceiver(mockCsr);
    auto context = std::make_unique<MockContext>(pDevice.get());
    MockKernelWithInternals kernelInternals(*pDevice, context.get());
    Kernel *kernel = kernelInternals.mockKernel;
--- a/unit_tests/device/device_tests.cpp
+++ b/unit_tests/device/device_tests.cpp
@@ -44,9 +44,11 @@ TEST_F(DeviceTest, getSupportedClVersion) {
 }

 TEST_F(DeviceTest, getTagAddress) {
-    auto tagAddress = pDevice->getDefaultEngine().commandStreamReceiver->getTagAddress();
-    ASSERT_NE(nullptr, const_cast<uint32_t *>(tagAddress));
-    EXPECT_EQ(initialHardwareTag, *tagAddress);
+    for (uint32_t i = 0; i < static_cast<uint32_t>(gpgpuEngineInstances.size()); i++) {
+        auto tagAddress = pDevice->getEngine(i).commandStreamReceiver->getTagAddress();
+        ASSERT_NE(nullptr, const_cast<uint32_t *>(tagAddress));
+        EXPECT_EQ(initialHardwareTag, *tagAddress);
+    }
 }

 TEST_F(DeviceTest, WhenGetOSTimeThenNotNull) {
@@ -84,26 +86,13 @@ TEST_F(DeviceTest, retainAndRelease) {
 TEST_F(DeviceTest, getEngineTypeDefault) {
    auto pTestDevice = std::unique_ptr<Device>(createWithUsDeviceId(0));

-    EngineType actualEngineType = pDevice->getEngine(0).osContext->getEngineType().type;
+    EngineType actualEngineType = pDevice->getDefaultEngine().osContext->getEngineType().type;
    EngineType defaultEngineType = pDevice->getHardwareInfo().capabilityTable.defaultEngineType;

-    EXPECT_EQ(&pDevice->getEngine(0).commandStreamReceiver->getOsContext(), pDevice->getEngine(0).osContext);
+    EXPECT_EQ(&pDevice->getDefaultEngine().commandStreamReceiver->getOsContext(), pDevice->getDefaultEngine().osContext);
    EXPECT_EQ(defaultEngineType, actualEngineType);
 }

-TEST_F(DeviceTest, givenDebugVariableOverrideEngineTypeWhenDeviceIsCreatedThenUseDebugNotDefaul) {
-    EngineType expectedEngine = EngineType::ENGINE_VCS;
-    DebugManagerStateRestore dbgRestorer;
-    DebugManager.flags.NodeOrdinal.set(static_cast<int32_t>(expectedEngine));
-    auto pTestDevice = std::unique_ptr<Device>(createWithUsDeviceId(0));
-
-    EngineType actualEngineType = pTestDevice->getEngine(0).osContext->getEngineType().type;
-    EngineType defaultEngineType = pDevice->getHardwareInfo().capabilityTable.defaultEngineType;
-
-    EXPECT_NE(defaultEngineType, actualEngineType);
-    EXPECT_EQ(expectedEngine, actualEngineType);
-}
-
 TEST(DeviceCleanup, givenDeviceWhenItIsDestroyedThenFlushBatchedSubmissionsIsCalled) {
    auto mockDevice = std::unique_ptr<MockDevice>(MockDevice::createWithNewExecutionEnvironment<MockDevice>(nullptr));
    MockCommandStreamReceiver *csr = new MockCommandStreamReceiver(*mockDevice->getExecutionEnvironment());
@@ -161,18 +150,22 @@ TEST(DeviceCreation, givenDefaultHwCsrInDebugVarsWhenDeviceIsCreatedThenIsSimula
 TEST(DeviceCreation, givenDeviceWhenItIsCreatedThenOsContextIsRegistredInMemoryManager) {
    auto device = std::unique_ptr<Device>(MockDevice::createWithNewExecutionEnvironment<Device>(nullptr));
    auto memoryManager = device->getMemoryManager();
-    EXPECT_EQ(1u, memoryManager->getOsContextCount());
+    EXPECT_EQ(gpgpuEngineInstances.size(), memoryManager->getOsContextCount());
 }

 TEST(DeviceCreation, givenMultiDeviceWhenTheyAreCreatedThenEachOsContextHasUniqueId) {
    ExecutionEnvironment executionEnvironment;
    executionEnvironment.incRefInternal();
+    const size_t numDevices = 2;
+
    auto device1 = std::unique_ptr<Device>(Device::create<Device>(nullptr, &executionEnvironment, 0u));
    auto device2 = std::unique_ptr<Device>(Device::create<Device>(nullptr, &executionEnvironment, 1u));

-    EXPECT_EQ(0u, device1->getEngine(0).osContext->getContextId());
-    EXPECT_EQ(1u, device2->getEngine(0).osContext->getContextId());
-    EXPECT_EQ(2u, executionEnvironment.memoryManager->getOsContextCount());
+    for (uint32_t i = 0; i < static_cast<uint32_t>(gpgpuEngineInstances.size()); i++) {
+        EXPECT_EQ(i, device1->getEngine(i).osContext->getContextId());
+        EXPECT_EQ(i + static_cast<uint32_t>(gpgpuEngineInstances.size()), device2->getEngine(i).osContext->getContextId());
+    }
+    EXPECT_EQ(gpgpuEngineInstances.size() * numDevices, executionEnvironment.memoryManager->getOsContextCount());
 }

 TEST(DeviceCreation, givenMultiDeviceWhenTheyAreCreatedThenEachDeviceHasSeperateDeviceIndex) {
@@ -188,25 +181,33 @@ TEST(DeviceCreation, givenMultiDeviceWhenTheyAreCreatedThenEachDeviceHasSeperate
 TEST(DeviceCreation, givenMultiDeviceWhenTheyAreCreatedThenEachDeviceHasSeperateCommandStreamReceiver) {
    ExecutionEnvironment executionEnvironment;
    executionEnvironment.incRefInternal();
-    auto device = std::unique_ptr<MockDevice>(Device::create<MockDevice>(nullptr, &executionEnvironment, 0u));
+    const size_t numDevices = 2;
+    auto device1 = std::unique_ptr<MockDevice>(Device::create<MockDevice>(nullptr, &executionEnvironment, 0u));
    auto device2 = std::unique_ptr<MockDevice>(Device::create<MockDevice>(nullptr, &executionEnvironment, 1u));

-    EXPECT_EQ(2u, executionEnvironment.commandStreamReceivers.size());
+    EXPECT_EQ(numDevices, executionEnvironment.commandStreamReceivers.size());
    EXPECT_EQ(gpgpuEngineInstances.size(), executionEnvironment.commandStreamReceivers[0].size());
    EXPECT_EQ(gpgpuEngineInstances.size(), executionEnvironment.commandStreamReceivers[1].size());
-    EXPECT_NE(nullptr, executionEnvironment.commandStreamReceivers[0][0]);
-    EXPECT_NE(nullptr, executionEnvironment.commandStreamReceivers[1][0]);
-    EXPECT_EQ(device->getEngine(0).commandStreamReceiver, executionEnvironment.commandStreamReceivers[0][0].get());
-    EXPECT_EQ(device2->getEngine(0).commandStreamReceiver, executionEnvironment.commandStreamReceivers[1][0].get());

-    for (uint32_t i = 1; i < gpgpuEngineInstances.size(); i++) {
-        EXPECT_EQ(nullptr, executionEnvironment.commandStreamReceivers[0][i]);
-        EXPECT_EQ(nullptr, executionEnvironment.commandStreamReceivers[1][i]);
-        EXPECT_EQ(nullptr, device->getEngine(i).commandStreamReceiver);
-        EXPECT_EQ(nullptr, device2->getEngine(i).commandStreamReceiver);
+    for (uint32_t i = 0; i < static_cast<uint32_t>(gpgpuEngineInstances.size()); i++) {
+        EXPECT_NE(nullptr, executionEnvironment.commandStreamReceivers[0][i]);
+        EXPECT_NE(nullptr, executionEnvironment.commandStreamReceivers[1][i]);
+        EXPECT_EQ(executionEnvironment.commandStreamReceivers[0][i].get(), device1->getEngine(i).commandStreamReceiver);
+        EXPECT_EQ(executionEnvironment.commandStreamReceivers[1][i].get(), device2->getEngine(i).commandStreamReceiver);
    }
 }

+TEST(DeviceCreation, givenDeviceWhenAskingForDefaultEngineThenReturnValidValue) {
+    ExecutionEnvironment executionEnvironment;
+    executionEnvironment.incRefInternal();
+    auto device = std::unique_ptr<MockDevice>(Device::create<MockDevice>(platformDevices[0], &executionEnvironment, 0));
+
+    auto &defaultEngine = device->getDefaultEngine().osContext->getEngineType();
+
+    EXPECT_EQ(platformDevices[0]->capabilityTable.defaultEngineType, defaultEngine.type);
+    EXPECT_EQ(0, defaultEngine.id);
+}
+
 TEST(DeviceCreation, givenFtrSimulationModeFlagTrueWhenNoOtherSimulationFlagsArePresentThenIsSimulationReturnsTrue) {
    FeatureTable skuTable = *platformDevices[0]->pSkuTable;
    skuTable.ftrSimulationMode = true;
--- a/unit_tests/event/user_events_tests.cpp
+++ b/unit_tests/event/user_events_tests.cpp
@@ -1057,7 +1057,7 @@ TEST_F(EventTests, givenUserEventWhenSetStatusIsDoneThenDeviceMutextisAcquired)
    struct mockedEvent : public UserEvent {
        using UserEvent::UserEvent;
        bool setStatus(cl_int status) override {
-            auto commandStreamReceiverOwnership = ctx->getDevice(0)->getEngine(0).commandStreamReceiver->obtainUniqueOwnership();
+            auto commandStreamReceiverOwnership = ctx->getDevice(0)->getDefaultEngine().commandStreamReceiver->obtainUniqueOwnership();
            mutexProperlyAcquired = commandStreamReceiverOwnership.owns_lock();
            return true;
        }
--- a/unit_tests/fixtures/ult_command_stream_receiver_fixture.h
+++ b/unit_tests/fixtures/ult_command_stream_receiver_fixture.h
@@ -55,7 +55,7 @@ struct UltCommandStreamReceiverTest
        graphicsAllocation = new MockGraphicsAllocation(sshBuffer, sizeStream);
        ssh.replaceGraphicsAllocation(graphicsAllocation);

-        pDevice->getCommandStreamReceiver().setOsContext(*pDevice->getEngine(0).osContext);
+        pDevice->getCommandStreamReceiver().setOsContext(*pDevice->getDefaultEngine().osContext);
    }

    void TearDown() override {
--- a/unit_tests/mem_obj/mem_obj_destruction_tests.cpp
+++ b/unit_tests/mem_obj/mem_obj_destruction_tests.cpp
@@ -7,6 +7,7 @@

 #include "runtime/mem_obj/mem_obj.h"
 #include "runtime/memory_manager/allocations_list.h"
+#include "runtime/os_interface/os_context.h"
 #include "unit_tests/mocks/mock_context.h"
 #include "unit_tests/mocks/mock_device.h"
 #include "unit_tests/mocks/mock_memory_manager.h"
@@ -28,17 +29,20 @@ void CL_CALLBACK emptyDestructorCallback(cl_mem memObj, void *userData) {
 class MemObjDestructionTest : public ::testing::TestWithParam<bool> {
  public:
    void SetUp() override {
-        context.reset(new MockContext());
-        memoryManager = new MockMemoryManager(*context->getDevice(0)->getExecutionEnvironment());
-        device = static_cast<MockDevice *>(context->getDevice(0));
-        device->injectMemoryManager(memoryManager);
-        context->setMemoryManager(memoryManager);
+        executionEnvironment = std::make_unique<ExecutionEnvironment>();
+        executionEnvironment->incRefInternal();
+        memoryManager = new MockMemoryManager(*executionEnvironment);
+        executionEnvironment->memoryManager.reset(memoryManager);
+        device.reset(MockDevice::create<MockDevice>(*platformDevices, executionEnvironment.get(), 0));
+        context.reset(new MockContext(device.get()));
+
        allocation = memoryManager->allocateGraphicsMemory(size);
        memObj = new MemObj(context.get(), CL_MEM_OBJECT_BUFFER,
                            CL_MEM_READ_WRITE,
                            size,
                            nullptr, nullptr, allocation, true, false, false);
        *device->getDefaultEngine().commandStreamReceiver->getTagAddress() = 0;
+        contextId = device->getDefaultEngine().osContext->getContextId();
    }

    void TearDown() override {
@@ -46,7 +50,7 @@ class MemObjDestructionTest : public ::testing::TestWithParam<bool> {
    }

    void makeMemObjUsed() {
-        memObj->getGraphicsAllocation()->updateTaskCount(taskCountReady, 0u);
+        memObj->getGraphicsAllocation()->updateTaskCount(taskCountReady, contextId);
    }

    void makeMemObjNotReady() {
@@ -60,7 +64,9 @@ class MemObjDestructionTest : public ::testing::TestWithParam<bool> {
    }

    constexpr static uint32_t taskCountReady = 3u;
-    MockDevice *device;
+    std::unique_ptr<ExecutionEnvironment> executionEnvironment;
+    std::unique_ptr<MockDevice> device;
+    uint32_t contextId = 0;
    MockMemoryManager *memoryManager;
    std::unique_ptr<MockContext> context;
    GraphicsAllocation *allocation;
--- a/unit_tests/memory_manager/deferrable_allocation_deletion_tests.cpp
+++ b/unit_tests/memory_manager/deferrable_allocation_deletion_tests.cpp
@@ -51,7 +51,7 @@ struct DeferrableAllocationDeletionTest : ::testing::Test {
 };

 TEST_F(DeferrableAllocationDeletionTest, givenDeferrableAllocationWhenApplyThenWaitForEachTaskCount) {
-    EXPECT_EQ(1u, memoryManager->getOsContextCount());
+    EXPECT_EQ(gpgpuEngineInstances.size(), memoryManager->getOsContextCount());
    auto allocation = memoryManager->allocateGraphicsMemory(MemoryConstants::pageSize);
    allocation->updateTaskCount(1u, device1ContextId);
    *hwTag = 0u;
@@ -74,7 +74,7 @@ TEST_F(DeferrableAllocationDeletionTest, givenDeferrableAllocationWhenApplyThenW
 TEST_F(DeferrableAllocationDeletionTest, givenAllocationUsedByTwoOsContextsWhenApplyDeletionThenWaitForBothContexts) {
    std::unique_ptr<Device> device2(Device::create<Device>(nullptr, device1->getExecutionEnvironment(), 1u));
    auto device2ContextId = device2->getDefaultEngine().osContext->getContextId();
-    EXPECT_EQ(2u, memoryManager->getOsContextCount());
+    EXPECT_EQ(gpgpuEngineInstances.size() * 2, memoryManager->getOsContextCount());
    auto allocation = memoryManager->allocateGraphicsMemory(MemoryConstants::pageSize);
    *hwTag = 0u;
    *device2->getDefaultEngine().commandStreamReceiver->getTagAddress() = 1u;
@@ -91,7 +91,7 @@ TEST_F(DeferrableAllocationDeletionTest, givenAllocationUsedByTwoOsContextsWhenA
    *hwTag = 1u;
 }
 TEST_F(DeferrableAllocationDeletionTest, givenNotUsedAllocationWhenApplyDeletionThenDontWait) {
-    EXPECT_EQ(1u, memoryManager->getOsContextCount());
+    EXPECT_EQ(gpgpuEngineInstances.size(), memoryManager->getOsContextCount());
    auto allocation = memoryManager->allocateGraphicsMemory(MemoryConstants::pageSize);
    EXPECT_FALSE(allocation->isUsed());
    EXPECT_EQ(0u, memoryManager->freeGraphicsMemoryCalled);
--- a/unit_tests/mocks/mock_device.cpp
+++ b/unit_tests/mocks/mock_device.cpp
@@ -17,7 +17,7 @@ MockDevice::MockDevice(const HardwareInfo &hwInfo)
    : MockDevice(hwInfo, new ExecutionEnvironment, 0u) {
    CommandStreamReceiver *commandStreamReceiver = createCommandStream(&hwInfo, *this->executionEnvironment);
    executionEnvironment->commandStreamReceivers.resize(getDeviceIndex() + 1);
-    executionEnvironment->commandStreamReceivers[getDeviceIndex()][0].reset(commandStreamReceiver);
+    executionEnvironment->commandStreamReceivers[getDeviceIndex()][defaultEngineIndex].reset(commandStreamReceiver);
    this->executionEnvironment->memoryManager = std::move(this->mockMemoryManager);
    this->engines[defaultEngineIndex] = {commandStreamReceiver, nullptr};
 }
--- a/unit_tests/os_interface/linux/drm_command_stream_tests.cpp
+++ b/unit_tests/os_interface/linux/drm_command_stream_tests.cpp
@@ -901,11 +901,12 @@ TEST_F(DrmCommandStreamBatchingTests, givenCSRWhenFlushIsCalledThenProperFlagsAr
    csr->flush(batchBuffer, csr->getResidencyAllocations());

    //preemption allocation + Sip Kernel
-    int ioctlExtraCnt = (PreemptionHelper::getDefaultPreemptionMode(*platformDevices[0]) == PreemptionMode::MidThread) ? 2 : 0;
+    int ioctlPreemptionCnt = (PreemptionHelper::getDefaultPreemptionMode(*platformDevices[0]) == PreemptionMode::MidThread) ? 2 : 0;
+    int ioctlTagAllocCnt = gpgpuEngineInstances.size();

    auto engineFlag = csr->getOsContext().get()->getEngineFlag();

-    EXPECT_EQ(6 + ioctlExtraCnt, this->mock->ioctl_cnt.total);
+    EXPECT_EQ(5 + ioctlPreemptionCnt + ioctlTagAllocCnt, this->mock->ioctl_cnt.total);
    uint64_t flags = engineFlag | I915_EXEC_NO_RELOC;
    EXPECT_EQ(flags, this->mock->execBuffer.flags);

@@ -944,7 +945,8 @@ TEST_F(DrmCommandStreamBatchingTests, givenCsrWhenDispatchPolicyIsSetToBatchingT
    size_t csrSurfaceCount = (device->getPreemptionMode() == PreemptionMode::MidThread) ? 2 : 0;

    //preemption allocation + sipKernel
-    int ioctlExtraCnt = (PreemptionHelper::getDefaultPreemptionMode(*platformDevices[0]) == PreemptionMode::MidThread) ? 2 : 0;
+    int ioctlPreemptionCnt = (PreemptionHelper::getDefaultPreemptionMode(*platformDevices[0]) == PreemptionMode::MidThread) ? 2 : 0;
+    int ioctlTagAllocCnt = gpgpuEngineInstances.size();

    auto recordedCmdBuffer = cmdBuffers.peekHead();
    EXPECT_EQ(3u + csrSurfaceCount, recordedCmdBuffer->surfaces.size());
@@ -961,7 +963,7 @@ TEST_F(DrmCommandStreamBatchingTests, givenCsrWhenDispatchPolicyIsSetToBatchingT

    EXPECT_EQ(tCsr->commandStream.getGraphicsAllocation(), recordedCmdBuffer->batchBuffer.commandBufferAllocation);

-    EXPECT_EQ(6 + ioctlExtraCnt, this->mock->ioctl_cnt.total);
+    EXPECT_EQ(5 + ioctlPreemptionCnt + ioctlTagAllocCnt, this->mock->ioctl_cnt.total);

    EXPECT_EQ(0u, this->mock->execBuffer.flags);

@@ -1010,7 +1012,8 @@ TEST_F(DrmCommandStreamBatchingTests, givenRecordedCommandBufferWhenItIsSubmitte
    size_t csrSurfaceCount = (device->getPreemptionMode() == PreemptionMode::MidThread) ? 2 : 0;

    //preemption allocation +sip Kernel
-    int ioctlExtraCnt = (PreemptionHelper::getDefaultPreemptionMode(*platformDevices[0]) == PreemptionMode::MidThread) ? 2 : 0;
+    int ioctlPreemptionCnt = (PreemptionHelper::getDefaultPreemptionMode(*platformDevices[0]) == PreemptionMode::MidThread) ? 2 : 0;
+    int ioctlTagAllocCnt = gpgpuEngineInstances.size();

    //validate that submited command buffer has what we want
    EXPECT_EQ(3u + csrSurfaceCount, this->mock->execBuffer.buffer_count);
@@ -1032,7 +1035,7 @@ TEST_F(DrmCommandStreamBatchingTests, givenRecordedCommandBufferWhenItIsSubmitte
        EXPECT_TRUE(handleFound);
    }

-    EXPECT_EQ(7 + ioctlExtraCnt, this->mock->ioctl_cnt.total);
+    EXPECT_EQ(6 + ioctlPreemptionCnt + ioctlTagAllocCnt, this->mock->ioctl_cnt.total);

    mm->freeGraphicsMemory(dummyAllocation);
    mm->freeGraphicsMemory(commandBuffer);
--- a/unit_tests/os_interface/windows/device_command_stream_tests.cpp
+++ b/unit_tests/os_interface/windows/device_command_stream_tests.cpp
@@ -126,15 +126,12 @@ class WddmCommandStreamWithMockGdiFixture {
        wddm->gdi.reset(gdi);
        ASSERT_NE(wddm, nullptr);
        DebugManager.flags.CsrDispatchMode.set(static_cast<uint32_t>(DispatchMode::ImmediateDispatch));
-        executionEnvironment->commandStreamReceivers.resize(1);
-        executionEnvironment->commandStreamReceivers[0][0] =
-            std::make_unique<MockWddmCsr<DEFAULT_TEST_FAMILY_NAME>>(*platformDevices[0],
-                                                                    *executionEnvironment);
-        this->csr = static_cast<MockWddmCsr<DEFAULT_TEST_FAMILY_NAME> *>(executionEnvironment->commandStreamReceivers[0][0].get());
+        this->csr = new MockWddmCsr<DEFAULT_TEST_FAMILY_NAME>(*platformDevices[0], *executionEnvironment);
        memoryManager = csr->createMemoryManager(false, false);
        ASSERT_NE(nullptr, memoryManager);
        executionEnvironment->memoryManager.reset(memoryManager);
        device = std::unique_ptr<MockDevice>(Device::create<MockDevice>(platformDevices[0], executionEnvironment, 0u));
+        device->resetCommandStreamReceiver(this->csr);
        ASSERT_NE(nullptr, device);
        this->csr->overrideRecorededCommandBuffer(*device);
        if (device->getPreemptionMode() == PreemptionMode::MidThread) {
@@ -884,11 +881,11 @@ HWTEST_F(WddmCsrCompressionTests, givenEnabledCompressionWhenFlushingThenInitTra
        auto mockWddmCsr = new MockWddmCsr<FamilyType>(hwInfo[0], *executionEnvironment);
        mockWddmCsr->createPageTableManager();
        mockWddmCsr->overrideDispatchPolicy(DispatchMode::BatchedDispatch);
-        executionEnvironment->commandStreamReceivers.resize(1);
-        executionEnvironment->commandStreamReceivers[0][0].reset(mockWddmCsr);
+        executionEnvironment->memoryManager.reset(mockWddmCsr->createMemoryManager(false, false));

        auto mockMngr = reinterpret_cast<MockGmmPageTableMngr *>(myMockWddm->getPageTableManager());
        std::unique_ptr<MockDevice> device(Device::create<MockDevice>(hwInfo, executionEnvironment, 0u));
+        device->resetCommandStreamReceiver(mockWddmCsr);

        auto memoryManager = executionEnvironment->memoryManager.get();

@@ -925,11 +922,11 @@ HWTEST_F(WddmCsrCompressionTests, givenDisabledCompressionWhenFlushingThenDontIn
    myMockWddm = static_cast<WddmMock *>(executionEnvironment->osInterface->get()->getWddm());

    auto mockWddmCsr = new MockWddmCsr<FamilyType>(hwInfo[0], *executionEnvironment);
-    executionEnvironment->commandStreamReceivers.resize(1);
-    executionEnvironment->commandStreamReceivers[0][0].reset(mockWddmCsr);
    mockWddmCsr->overrideDispatchPolicy(DispatchMode::BatchedDispatch);
+    executionEnvironment->memoryManager.reset(mockWddmCsr->createMemoryManager(false, false));

    std::unique_ptr<MockDevice> device(Device::create<MockDevice>(hwInfo, executionEnvironment, 0u));
+    device->resetCommandStreamReceiver(mockWddmCsr);

    auto memoryManager = executionEnvironment->memoryManager.get();

--- a/unit_tests/preemption/preemption_tests.cpp
+++ b/unit_tests/preemption/preemption_tests.cpp
@@ -462,6 +462,28 @@ HWTEST_F(MidThreadPreemptionTests, createCsrSurfaceNoWa) {
    const_cast<HardwareInfo *>(platformDevices[0])->pWaTable = waTable;
 }

+HWTEST_F(MidThreadPreemptionTests, givenMidThreadPreemptionWhenFailingOnCsrSurfaceAllocationThenFailToCreateDevice) {
+    class FailingMemoryManager : public OsAgnosticMemoryManager {
+      public:
+        FailingMemoryManager(ExecutionEnvironment &executionEnvironment) : OsAgnosticMemoryManager(false, false, executionEnvironment) {}
+
+        GraphicsAllocation *allocateGraphicsMemory(size_t size, size_t alignment, bool forcePin, bool uncacheable) override {
+            if (++allocateGraphicsMemoryCount > gpgpuEngineInstances.size()) {
+                return nullptr;
+            }
+            return OsAgnosticMemoryManager::allocateGraphicsMemory(size, alignment, forcePin, uncacheable);
+        }
+
+        uint32_t allocateGraphicsMemoryCount = 0;
+    };
+    ExecutionEnvironment executionEnvironment;
+    executionEnvironment.incRefInternal();
+    executionEnvironment.memoryManager = std::make_unique<FailingMemoryManager>(executionEnvironment);
+
+    std::unique_ptr<MockDevice> mockDevice(MockDevice::create<MockDevice>(platformDevices[0], &executionEnvironment, 0));
+    EXPECT_EQ(nullptr, mockDevice.get());
+}
+
 HWTEST_F(MidThreadPreemptionTests, createCsrSurfaceWa) {
    const WorkaroundTable *waTable = platformDevices[0]->pWaTable;
    WorkaroundTable tmpWaTable;
--- a/unit_tests/program/program_data_tests.cpp
+++ b/unit_tests/program/program_data_tests.cpp
@@ -175,7 +175,7 @@ TEST_F(ProgramDataTest, givenConstantAllocationThatIsInUseByGpuWhenProgramIsBein

    buildAndDecodeProgramPatchList();

-    auto &csr = *pPlatform->getDevice(0)->getEngine(0).commandStreamReceiver;
+    auto &csr = *pPlatform->getDevice(0)->getDefaultEngine().commandStreamReceiver;
    auto tagAddress = csr.getTagAddress();
    auto constantSurface = pProgram->getConstantSurface();
    constantSurface->updateTaskCount(*tagAddress + 1, 0);
@@ -192,7 +192,7 @@ TEST_F(ProgramDataTest, givenGlobalAllocationThatIsInUseByGpuWhenProgramIsBeingD

    buildAndDecodeProgramPatchList();

-    auto &csr = *pPlatform->getDevice(0)->getEngine(0).commandStreamReceiver;
+    auto &csr = *pPlatform->getDevice(0)->getDefaultEngine().commandStreamReceiver;
    auto tagAddress = csr.getTagAddress();
    auto globalSurface = pProgram->getGlobalSurface();
    globalSurface->updateTaskCount(*tagAddress + 1, 0);
--- a/unit_tests/sharings/gl/gl_arb_sync_event_tests.cpp
+++ b/unit_tests/sharings/gl/gl_arb_sync_event_tests.cpp
@@ -65,10 +65,9 @@ struct GlArbSyncEventTest : public ::testing::Test {
    void SetUp() override {
        executionEnvironment = new ExecutionEnvironment;
        auto mockCsr = new MockCommandStreamReceiver(*executionEnvironment);
-        executionEnvironment->commandStreamReceivers.resize(1);
-        executionEnvironment->commandStreamReceivers[0][0].reset(mockCsr);
        executionEnvironment->memoryManager = std::make_unique<OsAgnosticMemoryManager>(false, false, *executionEnvironment);
        device.reset(MockDevice::create<MockDevice>(nullptr, executionEnvironment, 0u));
+        device->resetCommandStreamReceiver(mockCsr);
        ctx.reset(new MockContext);
        cmdQ.reset(new MockCommandQueue(ctx.get(), device.get(), nullptr));
        sharing = new GlSharingFunctionsMock();