From c4759884d8901ff20d5a15d024b203c47b61aff3 Mon Sep 17 00:00:00 2001 From: Mateusz Jablonski Date: Fri, 13 Jan 2023 10:36:23 +0000 Subject: [PATCH] fix: defer initialization of cross root device tag allocations additional tag allocations are not needed before creating OCL contexts with multiple root devices Related-To: NEO-7634 Signed-off-by: Mateusz Jablonski --- opencl/source/context/context.cpp | 3 + .../test/unit_test/context/context_tests.cpp | 55 +++++++++++++ .../command_stream_receiver.cpp | 34 +++++--- .../command_stream/command_stream_receiver.h | 1 + .../command_stream_receiver_tests.cpp | 79 +++++++++++-------- .../linux/drm_memory_manager_tests.cpp | 12 +-- 6 files changed, 128 insertions(+), 56 deletions(-) diff --git a/opencl/source/context/context.cpp b/opencl/source/context/context.cpp index 0f089fe97f..356803ea28 100644 --- a/opencl/source/context/context.cpp +++ b/opencl/source/context/context.cpp @@ -245,6 +245,9 @@ bool Context::createImpl(const cl_context_properties *properties, if (pDevice->getRootDeviceIndex() == rootDeviceIndex) { deviceBitfield |= pDevice->getDeviceBitfield(); } + for (auto &engine : pDevice->getDevice().getAllEngines()) { + engine.commandStreamReceiver->ensureTagAllocationForRootDeviceIndex(rootDeviceIndex); + } } deviceBitfields.insert({rootDeviceIndex, deviceBitfield}); } diff --git a/opencl/test/unit_test/context/context_tests.cpp b/opencl/test/unit_test/context/context_tests.cpp index 515e81d935..bc3b78cb2d 100644 --- a/opencl/test/unit_test/context/context_tests.cpp +++ b/opencl/test/unit_test/context/context_tests.cpp @@ -14,6 +14,7 @@ #include "shared/test/common/mocks/mock_deferred_deleter.h" #include "shared/test/common/mocks/mock_device.h" #include "shared/test/common/mocks/mock_memory_manager.h" +#include "shared/test/common/mocks/ult_device_factory.h" #include "shared/test/common/test_macros/hw_test.h" #include "opencl/source/command_queue/command_queue.h" @@ -419,6 +420,60 @@ TEST(MultiDeviceContextTest, givenContextWithTwoDifferentSubDevicesFromDifferent context->release(); } +TEST(MultiDeviceContextTest, givenMultipleRootDevicesWhenCreatingMultiRootDeviceContextCrossDeviceTagAllocationsAreCreated) { + DebugManagerStateRestore restorer; + + UltClDeviceFactory deviceFactory{3, 0}; + cl_int retVal; + + for (auto &csr : deviceFactory.pUltDeviceFactory->rootDevices[0]->commandStreamReceivers) { + auto tagsMultiAllocation = csr->getTagsMultiAllocation(); + EXPECT_NE(nullptr, tagsMultiAllocation->getGraphicsAllocation(0)); + EXPECT_EQ(nullptr, tagsMultiAllocation->getGraphicsAllocation(1)); + EXPECT_EQ(nullptr, tagsMultiAllocation->getGraphicsAllocation(2)); + } + + for (auto &csr : deviceFactory.pUltDeviceFactory->rootDevices[1]->commandStreamReceivers) { + auto tagsMultiAllocation = csr->getTagsMultiAllocation(); + EXPECT_EQ(nullptr, tagsMultiAllocation->getGraphicsAllocation(0)); + EXPECT_NE(nullptr, tagsMultiAllocation->getGraphicsAllocation(1)); + EXPECT_EQ(nullptr, tagsMultiAllocation->getGraphicsAllocation(2)); + } + + for (auto &csr : deviceFactory.pUltDeviceFactory->rootDevices[2]->commandStreamReceivers) { + auto tagsMultiAllocation = csr->getTagsMultiAllocation(); + EXPECT_EQ(nullptr, tagsMultiAllocation->getGraphicsAllocation(0)); + EXPECT_EQ(nullptr, tagsMultiAllocation->getGraphicsAllocation(1)); + EXPECT_NE(nullptr, tagsMultiAllocation->getGraphicsAllocation(2)); + } + cl_device_id devices[]{deviceFactory.rootDevices[0], deviceFactory.rootDevices[2]}; + ClDeviceVector deviceVector(devices, 2); + + auto context = Context::create(0, deviceVector, nullptr, nullptr, retVal); + EXPECT_NE(nullptr, context); + EXPECT_EQ(CL_SUCCESS, retVal); + for (auto &csr : deviceFactory.pUltDeviceFactory->rootDevices[0]->commandStreamReceivers) { + auto tagsMultiAllocation = csr->getTagsMultiAllocation(); + EXPECT_NE(nullptr, tagsMultiAllocation->getGraphicsAllocation(0)); + EXPECT_EQ(nullptr, tagsMultiAllocation->getGraphicsAllocation(1)); + EXPECT_NE(nullptr, tagsMultiAllocation->getGraphicsAllocation(2)); + } + for (auto &csr : deviceFactory.pUltDeviceFactory->rootDevices[1]->commandStreamReceivers) { + auto tagsMultiAllocation = csr->getTagsMultiAllocation(); + EXPECT_EQ(nullptr, tagsMultiAllocation->getGraphicsAllocation(0)); + EXPECT_NE(nullptr, tagsMultiAllocation->getGraphicsAllocation(1)); + EXPECT_EQ(nullptr, tagsMultiAllocation->getGraphicsAllocation(2)); + } + for (auto &csr : deviceFactory.pUltDeviceFactory->rootDevices[2]->commandStreamReceivers) { + auto tagsMultiAllocation = csr->getTagsMultiAllocation(); + EXPECT_NE(nullptr, tagsMultiAllocation->getGraphicsAllocation(0)); + EXPECT_EQ(nullptr, tagsMultiAllocation->getGraphicsAllocation(1)); + EXPECT_NE(nullptr, tagsMultiAllocation->getGraphicsAllocation(2)); + } + + context->release(); +} + TEST(Context, WhenSettingContextDestructorCallbackThenCallOrderIsPreserved) { struct UserDataType { cl_context expectedContext; diff --git a/shared/source/command_stream/command_stream_receiver.cpp b/shared/source/command_stream/command_stream_receiver.cpp index 90c1df5244..8d931ed71d 100644 --- a/shared/source/command_stream/command_stream_receiver.cpp +++ b/shared/source/command_stream/command_stream_receiver.cpp @@ -1,5 +1,5 @@ /* - * Copyright (C) 2018-2022 Intel Corporation + * Copyright (C) 2018-2023 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -461,25 +461,33 @@ void CommandStreamReceiver::setTagAllocation(GraphicsAllocation *allocation) { MultiGraphicsAllocation &CommandStreamReceiver::createTagsMultiAllocation() { RootDeviceIndicesContainer rootDeviceIndices; - if (ApiSpecificConfig::getApiType() == ApiSpecificConfig::L0) { - rootDeviceIndices.push_back(rootDeviceIndex); - } else { - for (auto index = 0u; index < this->executionEnvironment.rootDeviceEnvironments.size(); index++) { - if (this->executionEnvironment.rootDeviceEnvironments[index]->getHardwareInfo()->platform.eProductFamily == - this->executionEnvironment.rootDeviceEnvironments[this->rootDeviceIndex]->getHardwareInfo()->platform.eProductFamily) { - rootDeviceIndices.push_back(index); - } - } - } + rootDeviceIndices.push_back(rootDeviceIndex); - auto maxRootDeviceIndex = *std::max_element(rootDeviceIndices.begin(), rootDeviceIndices.end(), std::less()); + auto maxRootDeviceIndex = static_cast(this->executionEnvironment.rootDeviceEnvironments.size() - 1); auto allocations = new MultiGraphicsAllocation(maxRootDeviceIndex); - AllocationProperties unifiedMemoryProperties{rootDeviceIndices.at(0), MemoryConstants::pageSize, AllocationType::TAG_BUFFER, systemMemoryBitfield}; + AllocationProperties unifiedMemoryProperties{rootDeviceIndex, MemoryConstants::pageSize, AllocationType::TAG_BUFFER, systemMemoryBitfield}; this->getMemoryManager()->createMultiGraphicsAllocationInSystemMemoryPool(rootDeviceIndices, unifiedMemoryProperties, *allocations); return *allocations; } +bool CommandStreamReceiver::ensureTagAllocationForRootDeviceIndex(uint32_t rootDeviceIndex) { + UNRECOVERABLE_IF(!tagsMultiAllocation); + if (rootDeviceIndex >= tagsMultiAllocation->getGraphicsAllocations().size()) { + return false; + } + if (tagsMultiAllocation->getGraphicsAllocation(rootDeviceIndex)) { + return true; + } + AllocationProperties allocationProperties{rootDeviceIndex, MemoryConstants::pageSize, AllocationType::TAG_BUFFER, systemMemoryBitfield}; + allocationProperties.flags.allocateMemory = false; + auto graphicsAllocation = this->getMemoryManager()->createGraphicsAllocationFromExistingStorage(allocationProperties, tagAllocation->getUnderlyingBuffer(), *tagsMultiAllocation); + if (!graphicsAllocation) { + return false; + } + tagsMultiAllocation->addAllocation(graphicsAllocation); + return true; +} FlushStamp CommandStreamReceiver::obtainCurrentFlushStamp() const { return flushStamp->peekStamp(); diff --git a/shared/source/command_stream/command_stream_receiver.h b/shared/source/command_stream/command_stream_receiver.h index b5aa2b9bd1..edebd23625 100644 --- a/shared/source/command_stream/command_stream_receiver.h +++ b/shared/source/command_stream/command_stream_receiver.h @@ -385,6 +385,7 @@ class CommandStreamReceiver { } bool isTbxMode() const; + bool ensureTagAllocationForRootDeviceIndex(uint32_t rootDeviceIndex); protected: void cleanupResources(); diff --git a/shared/test/unit_test/command_stream/command_stream_receiver_tests.cpp b/shared/test/unit_test/command_stream/command_stream_receiver_tests.cpp index 2b5d0f0e21..aa67f4f2dd 100644 --- a/shared/test/unit_test/command_stream/command_stream_receiver_tests.cpp +++ b/shared/test/unit_test/command_stream/command_stream_receiver_tests.cpp @@ -1188,35 +1188,41 @@ HWTEST_F(CommandStreamReceiverTest, givenUltCommandStreamReceiverWhenAddAubComme EXPECT_TRUE(csr.addAubCommentCalled); } -TEST(CommandStreamReceiverSimpleTest, givenCommandStreamReceiverWhenInitializeTagAllocationForOpenCLThenMultiTagAllocationIsBeingAllocated) { - VariableBackup backup(&apiTypeForUlts, ApiSpecificConfig::OCL); +TEST(CommandStreamReceiverSimpleTest, givenCommandStreamReceiverWhenInitializeTagAllocationsThenSingleTagAllocationIsBeingAllocated) { uint32_t numRootDevices = 10u; UltDeviceFactory deviceFactory{numRootDevices, 0}; - EXPECT_NE(nullptr, deviceFactory.rootDevices[0]->commandStreamReceivers[0]->getTagAllocation()); - EXPECT_EQ(AllocationType::TAG_BUFFER, deviceFactory.rootDevices[0]->commandStreamReceivers[0]->getTagAllocation()->getAllocationType()); - EXPECT_TRUE(deviceFactory.rootDevices[0]->commandStreamReceivers[0]->getTagAddress() != nullptr); - EXPECT_EQ(*deviceFactory.rootDevices[0]->commandStreamReceivers[0]->getTagAddress(), initialHardwareTag); - auto tagsMultiAllocation = deviceFactory.rootDevices[0]->commandStreamReceivers[0]->getTagsMultiAllocation(); - auto graphicsAllocation0 = tagsMultiAllocation->getGraphicsAllocation(0); - EXPECT_EQ(tagsMultiAllocation->getGraphicsAllocations().size(), numRootDevices); - for (auto graphicsAllocation : tagsMultiAllocation->getGraphicsAllocations()) { - if (graphicsAllocation != graphicsAllocation0) { - EXPECT_EQ(graphicsAllocation->getUnderlyingBuffer(), graphicsAllocation0->getUnderlyingBuffer()); + for (auto rootDeviceIndex = 0u; rootDeviceIndex < numRootDevices; rootDeviceIndex++) { + auto tagAllocation = deviceFactory.rootDevices[rootDeviceIndex]->commandStreamReceivers[0]->getTagAllocation(); + EXPECT_NE(nullptr, tagAllocation); + EXPECT_EQ(AllocationType::TAG_BUFFER, deviceFactory.rootDevices[rootDeviceIndex]->commandStreamReceivers[0]->getTagAllocation()->getAllocationType()); + EXPECT_TRUE(deviceFactory.rootDevices[rootDeviceIndex]->commandStreamReceivers[0]->getTagAddress() != nullptr); + EXPECT_EQ(*deviceFactory.rootDevices[rootDeviceIndex]->commandStreamReceivers[0]->getTagAddress(), initialHardwareTag); + auto tagsMultiAllocation = deviceFactory.rootDevices[rootDeviceIndex]->commandStreamReceivers[0]->getTagsMultiAllocation(); + EXPECT_EQ(tagsMultiAllocation->getGraphicsAllocations().size(), numRootDevices); + + for (auto i = 0u; i < numRootDevices; i++) { + auto allocation = tagsMultiAllocation->getGraphicsAllocation(i); + if (rootDeviceIndex == i) { + EXPECT_EQ(allocation, tagAllocation); + } else { + EXPECT_EQ(nullptr, allocation); + } } } } - -TEST(CommandStreamReceiverSimpleTest, givenCommandStreamReceiverWhenInitializeTagAllocationForLevelZeroThenSingleTagAllocationIsBeingAllocated) { - VariableBackup backup(&apiTypeForUlts, ApiSpecificConfig::L0); - uint32_t numRootDevices = 10u; +TEST(CommandStreamReceiverSimpleTest, givenCommandStreamReceiverWhenEnsureTagAllocationIsCalledForIncorrectRootDeviceIndexThenFailureIsReturned) { + uint32_t numRootDevices = 1u; UltDeviceFactory deviceFactory{numRootDevices, 0}; - EXPECT_NE(nullptr, deviceFactory.rootDevices[0]->commandStreamReceivers[0]->getTagAllocation()); - EXPECT_EQ(AllocationType::TAG_BUFFER, deviceFactory.rootDevices[0]->commandStreamReceivers[0]->getTagAllocation()->getAllocationType()); - EXPECT_TRUE(deviceFactory.rootDevices[0]->commandStreamReceivers[0]->getTagAddress() != nullptr); - EXPECT_EQ(*deviceFactory.rootDevices[0]->commandStreamReceivers[0]->getTagAddress(), initialHardwareTag); - auto tagsMultiAllocation = deviceFactory.rootDevices[0]->commandStreamReceivers[0]->getTagsMultiAllocation(); - EXPECT_EQ(tagsMultiAllocation->getGraphicsAllocations().size(), 1u); + + EXPECT_FALSE(deviceFactory.rootDevices[0]->commandStreamReceivers[0]->ensureTagAllocationForRootDeviceIndex(1)); +} + +TEST(CommandStreamReceiverSimpleTest, givenCommandStreamReceiverWhenEnsureTagAllocationIsCalledForRootDeviceIndexWhichHasTagAllocationThenReturnEarlySucess) { + uint32_t numRootDevices = 1u; + UltDeviceFactory deviceFactory{numRootDevices, 0}; + + EXPECT_TRUE(deviceFactory.rootDevices[0]->commandStreamReceivers[0]->ensureTagAllocationForRootDeviceIndex(0)); } TEST(CommandStreamReceiverSimpleTest, givenCommandStreamReceiverWhenItIsDestroyedThenItDestroysTagAllocation) { @@ -1264,7 +1270,7 @@ TEST(CommandStreamReceiverSimpleTest, givenCommandStreamReceiverWhenInitializeTa } } -TEST(CommandStreamReceiverSimpleTest, givenCommandStreamReceiverWhenInitializeTagAllocationIsCalledInMultiRootDeviceEnvironmentThenTagAllocationIsBeingAllocated) { +TEST(CommandStreamReceiverSimpleTest, givenCommandStreamReceiverWhenEnsureTagAllocationForRootDeviceIndexIsCalledThenProperAllocationIsBeingAllocated) { MockExecutionEnvironment executionEnvironment(defaultHwInfo.get(), true, 10u); DeviceBitfield devices(0b1111); auto csr = std::make_unique(executionEnvironment, 0, devices); @@ -1289,22 +1295,29 @@ TEST(CommandStreamReceiverSimpleTest, givenCommandStreamReceiverWhenInitializeTa for (auto graphicsAllocation : tagsMultiAllocation->getGraphicsAllocations()) { if (graphicsAllocation != graphicsAllocation0) { - EXPECT_EQ(graphicsAllocation->getUnderlyingBuffer(), graphicsAllocation0->getUnderlyingBuffer()); + EXPECT_EQ(nullptr, graphicsAllocation); } } + + EXPECT_TRUE(csr->ensureTagAllocationForRootDeviceIndex(1)); + auto graphicsAllocation = tagsMultiAllocation->getGraphicsAllocation(1); + EXPECT_NE(nullptr, graphicsAllocation); + EXPECT_EQ(graphicsAllocation->getUnderlyingBuffer(), graphicsAllocation0->getUnderlyingBuffer()); } -TEST(CommandStreamReceiverSimpleTest, givenNullHardwareDebugModeWhenInitializeTagAllocationIsCalledThenTagAllocationIsBeingAllocatedAndinitialValueIsMinusOne) { - DebugManagerStateRestore dbgRestore; - DebugManager.flags.EnableNullHardware.set(true); - MockExecutionEnvironment executionEnvironment(defaultHwInfo.get()); - auto csr = std::make_unique(executionEnvironment, 0, 1); - executionEnvironment.memoryManager.reset(new OsAgnosticMemoryManager(executionEnvironment)); +TEST(CommandStreamReceiverSimpleTest, givenMemoryAllocationFailureWhenEnsuringTagAllocationThenFailureIsReturned) { + MockExecutionEnvironment executionEnvironment(defaultHwInfo.get(), true, 2u); + DeviceBitfield devices(0b11); + auto csr = std::make_unique(executionEnvironment, 0, devices); + EXPECT_EQ(nullptr, csr->getTagAllocation()); + executionEnvironment.memoryManager.reset(new OsAgnosticMemoryManager(executionEnvironment)); + csr->initializeTagAllocation(); EXPECT_NE(nullptr, csr->getTagAllocation()); - EXPECT_EQ(csr->getTagAllocation()->getUnderlyingBuffer(), csr->getTagAddress()); - EXPECT_EQ(*csr->getTagAddress(), static_cast(-1)); + executionEnvironment.memoryManager.reset(new FailMemoryManager(executionEnvironment)); + + EXPECT_FALSE(csr->ensureTagAllocationForRootDeviceIndex(1)); } TEST(CommandStreamReceiverSimpleTest, givenVariousDataSetsWhenVerifyingMemoryThenCorrectValueIsReturned) { @@ -2552,4 +2565,4 @@ HWTEST_F(CommandStreamReceiverHwTest, givenNullPtrAsMultiRootDeviceSyncNodeWhenF cmdIterator = find(++cmdIterator, hwParser.cmdList.end()); } EXPECT_FALSE(nodeAddressFound); -} \ No newline at end of file +} diff --git a/shared/test/unit_test/os_interface/linux/drm_memory_manager_tests.cpp b/shared/test/unit_test/os_interface/linux/drm_memory_manager_tests.cpp index 082336bd05..0bc10ddd19 100644 --- a/shared/test/unit_test/os_interface/linux/drm_memory_manager_tests.cpp +++ b/shared/test/unit_test/os_interface/linux/drm_memory_manager_tests.cpp @@ -1406,11 +1406,7 @@ TEST_F(DrmMemoryManagerTest, GivenMisalignedHostPtrAndMultiplePagesSizeWhenAsked auto hostPtrManager = static_cast(memoryManager->getHostPtrManager()); - if (memoryManager->isLimitedRange(rootDeviceIndex)) { - ASSERT_EQ(6u, hostPtrManager->getFragmentCount()); - } else { - ASSERT_EQ(3u, hostPtrManager->getFragmentCount()); - } + ASSERT_EQ(3u, hostPtrManager->getFragmentCount()); auto reqs = MockHostPtrManager::getAllocationRequirements(rootDeviceIndex, ptr, size); @@ -1422,11 +1418,7 @@ TEST_F(DrmMemoryManagerTest, GivenMisalignedHostPtrAndMultiplePagesSizeWhenAsked } memoryManager->freeGraphicsMemory(graphicsAllocation); - if (memoryManager->isLimitedRange(rootDeviceIndex)) { - EXPECT_EQ(3u, hostPtrManager->getFragmentCount()); - } else { - EXPECT_EQ(0u, hostPtrManager->getFragmentCount()); - } + EXPECT_EQ(0u, hostPtrManager->getFragmentCount()); } TEST_F(DrmMemoryManagerTest, givenMemoryManagerWhenAskedFor32BitAllocationThen32BitDrmAllocationIsBeingReturned) {