From 74dcb5a9e366b3c21e2c8095e93deca72ee10222 Mon Sep 17 00:00:00 2001 From: Bartosz Dunajski Date: Tue, 26 Mar 2024 10:02:52 +0000 Subject: [PATCH] feature: initial support for sync dispatch token allocation Related-To: NEO-8171 Signed-off-by: Bartosz Dunajski --- level_zero/core/source/device/device.cpp | 20 +++++++++++++ level_zero/core/source/device/device.h | 3 ++ level_zero/core/source/device/device_imp.cpp | 2 ++ .../core/test/unit_tests/mocks/mock_device.h | 1 + .../sources/cmdlist/test_in_order_cmdlist.cpp | 22 ++++++++++++++ shared/source/helpers/app_resource_helper.cpp | 4 ++- .../source/memory_manager/allocation_type.h | 3 +- .../source/memory_manager/memory_manager.cpp | 2 ++ shared/source/utilities/logger.cpp | 2 ++ .../unit_test/helpers/app_resource_tests.cpp | 5 ++-- ...nager_allocate_in_preferred_pool_tests.cpp | 29 +++++++++++-------- .../windows/wddm_memory_manager_tests.cpp | 3 +- 12 files changed, 79 insertions(+), 17 deletions(-) diff --git a/level_zero/core/source/device/device.cpp b/level_zero/core/source/device/device.cpp index c5efb5238e..b76f4b26ee 100644 --- a/level_zero/core/source/device/device.cpp +++ b/level_zero/core/source/device/device.cpp @@ -56,7 +56,27 @@ uint32_t Device::getNextSyncDispatchQueueId() { UNRECOVERABLE_IF(newValue == (std::numeric_limits::max() - 1)); + ensureSyncDispatchTokenAllocation(); + return newValue; } +void Device::ensureSyncDispatchTokenAllocation() { + if (!syncDispatchTokenAllocation) { + std::unique_lock lock(syncDispatchTokenMutex); + + if (!syncDispatchTokenAllocation) { + + const NEO::AllocationProperties allocationProperties(getRootDeviceIndex(), true, MemoryConstants::pageSize, + NEO::AllocationType::syncDispatchToken, + true, false, getNEODevice()->getDeviceBitfield()); + + syncDispatchTokenAllocation = getNEODevice()->getMemoryManager()->allocateGraphicsMemoryWithProperties(allocationProperties); + UNRECOVERABLE_IF(syncDispatchTokenAllocation == nullptr); + + memset(syncDispatchTokenAllocation->getUnderlyingBuffer(), 0, syncDispatchTokenAllocation->getUnderlyingBufferSize()); + } + } +} + } // namespace L0 \ No newline at end of file diff --git a/level_zero/core/source/device/device.h b/level_zero/core/source/device/device.h index 0ed1f6f5aa..80dca715dd 100644 --- a/level_zero/core/source/device/device.h +++ b/level_zero/core/source/device/device.h @@ -152,12 +152,15 @@ struct Device : _ze_device_handle_t { NEO::TagAllocatorBase *getDeviceInOrderCounterAllocator(); NEO::TagAllocatorBase *getHostInOrderCounterAllocator(); uint32_t getNextSyncDispatchQueueId(); + void ensureSyncDispatchTokenAllocation(); protected: NEO::Device *neoDevice = nullptr; std::unique_ptr deviceInOrderCounterAllocator; std::unique_ptr hostInOrderCounterAllocator; + NEO::GraphicsAllocation *syncDispatchTokenAllocation = nullptr; std::mutex inOrderAllocatorMutex; + std::mutex syncDispatchTokenMutex; std::atomic syncDispatchQueueIdAllocator = 0; bool implicitScalingCapable = false; }; diff --git a/level_zero/core/source/device/device_imp.cpp b/level_zero/core/source/device/device_imp.cpp index 02ed3ff273..5da1620847 100644 --- a/level_zero/core/source/device/device_imp.cpp +++ b/level_zero/core/source/device/device_imp.cpp @@ -1438,6 +1438,8 @@ void DeviceImp::releaseResources() { UNRECOVERABLE_IF(neoDevice == nullptr); + getNEODevice()->getMemoryManager()->freeGraphicsMemory(syncDispatchTokenAllocation); + this->bcsSplit.releaseResources(); if (neoDevice->getExecutionEnvironment()->rootDeviceEnvironments[neoDevice->getRootDeviceIndex()]->debugger.get()) { diff --git a/level_zero/core/test/unit_tests/mocks/mock_device.h b/level_zero/core/test/unit_tests/mocks/mock_device.h index 1dde11e587..03168bab76 100644 --- a/level_zero/core/test/unit_tests/mocks/mock_device.h +++ b/level_zero/core/test/unit_tests/mocks/mock_device.h @@ -109,6 +109,7 @@ struct MockDeviceImp : public L0::DeviceImp { using Base::implicitScalingCapable; using Base::neoDevice; using Base::subDeviceCopyEngineGroups; + using Base::syncDispatchTokenAllocation; MockDeviceImp(NEO::Device *device, NEO::ExecutionEnvironment *execEnv) { device->incRefInternal(); diff --git a/level_zero/core/test/unit_tests/sources/cmdlist/test_in_order_cmdlist.cpp b/level_zero/core/test/unit_tests/sources/cmdlist/test_in_order_cmdlist.cpp index 59bdf1cf14..3c129bbcf4 100644 --- a/level_zero/core/test/unit_tests/sources/cmdlist/test_in_order_cmdlist.cpp +++ b/level_zero/core/test/unit_tests/sources/cmdlist/test_in_order_cmdlist.cpp @@ -5936,5 +5936,27 @@ HWTEST2_F(MultiTileSynchronizedDispatchTests, givenMultiTileSyncDispatchQueueWhe EXPECT_EQ(3u, immCmdList1->syncDispatchQueueId); EXPECT_EQ(NEO::SynchronizedDispatchMode::full, immCmdList1->synchronizedDispatchMode); } + +HWTEST2_F(MultiTileSynchronizedDispatchTests, givenSyncDispatchEnabledWhenAllocatingQueueIdThenEnsureTokenAllocation, IsAtLeastSkl) { + NEO::debugManager.flags.ForceSynchronizedDispatchMode.set(1); + + auto mockDevice = static_cast(device); + + EXPECT_EQ(nullptr, mockDevice->syncDispatchTokenAllocation); + + auto immCmdList = createMultiTileImmCmdList(); + EXPECT_EQ(NEO::SynchronizedDispatchMode::full, immCmdList->synchronizedDispatchMode); + + auto syncAllocation = mockDevice->syncDispatchTokenAllocation; + EXPECT_NE(nullptr, syncAllocation); + + EXPECT_EQ(syncAllocation->getAllocationType(), NEO::AllocationType::syncDispatchToken); + + immCmdList = createMultiTileImmCmdList(); + EXPECT_EQ(NEO::SynchronizedDispatchMode::full, immCmdList->synchronizedDispatchMode); + + EXPECT_EQ(mockDevice->syncDispatchTokenAllocation, syncAllocation); +} + } // namespace ult } // namespace L0 diff --git a/shared/source/helpers/app_resource_helper.cpp b/shared/source/helpers/app_resource_helper.cpp index 6dfd3e7b15..98d2e8544a 100644 --- a/shared/source/helpers/app_resource_helper.cpp +++ b/shared/source/helpers/app_resource_helper.cpp @@ -1,5 +1,5 @@ /* - * Copyright (C) 2021-2023 Intel Corporation + * Copyright (C) 2021-2024 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -116,6 +116,8 @@ const char *AppResourceHelper::getResourceTagStr(AllocationType type) { return "TSKLIST"; case AllocationType::assertBuffer: return "ASSRTBUF"; + case AllocationType::syncDispatchToken: + return "SYNCTOK"; default: return "NOTFOUND"; } diff --git a/shared/source/memory_manager/allocation_type.h b/shared/source/memory_manager/allocation_type.h index de7f9724a9..111faba870 100644 --- a/shared/source/memory_manager/allocation_type.h +++ b/shared/source/memory_manager/allocation_type.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2022-2023 Intel Corporation + * Copyright (C) 2022-2024 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -58,6 +58,7 @@ enum class AllocationType { swTagBuffer, deferredTasksList, assertBuffer, + syncDispatchToken, count }; diff --git a/shared/source/memory_manager/memory_manager.cpp b/shared/source/memory_manager/memory_manager.cpp index e07dfa701a..7c51194d3d 100644 --- a/shared/source/memory_manager/memory_manager.cpp +++ b/shared/source/memory_manager/memory_manager.cpp @@ -416,6 +416,7 @@ bool MemoryManager::getAllocationData(AllocationData &allocationData, const Allo case AllocationType::svmZeroCopy: case AllocationType::gpuTimestampDeviceBuffer: case AllocationType::preemption: + case AllocationType::syncDispatchToken: allow64KbPages = true; default: break; @@ -501,6 +502,7 @@ bool MemoryManager::getAllocationData(AllocationData &allocationData, const Allo case AllocationType::debugModuleArea: case AllocationType::gpuTimestampDeviceBuffer: case AllocationType::semaphoreBuffer: + case AllocationType::syncDispatchToken: allocationData.flags.resource48Bit = true; break; default: diff --git a/shared/source/utilities/logger.cpp b/shared/source/utilities/logger.cpp index fc31a15399..5068038226 100644 --- a/shared/source/utilities/logger.cpp +++ b/shared/source/utilities/logger.cpp @@ -246,6 +246,8 @@ const char *getAllocationTypeString(GraphicsAllocation const *graphicsAllocation return "DEFERRED_TASKS_LIST"; case AllocationType::assertBuffer: return "ASSERT_BUFFER"; + case AllocationType::syncDispatchToken: + return "SYNC_DISPATCH_TOKEN"; default: return "ILLEGAL_VALUE"; } diff --git a/shared/test/unit_test/helpers/app_resource_tests.cpp b/shared/test/unit_test/helpers/app_resource_tests.cpp index 8501738ac2..4adc7a6d79 100644 --- a/shared/test/unit_test/helpers/app_resource_tests.cpp +++ b/shared/test/unit_test/helpers/app_resource_tests.cpp @@ -1,5 +1,5 @@ /* - * Copyright (C) 2021-2023 Intel Corporation + * Copyright (C) 2021-2024 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -101,7 +101,8 @@ AllocationTypeTagTestCase allocationTypeTagValues[static_cast(AllocationTyp {AllocationType::gpuTimestampDeviceBuffer, "GPUTSDBF"}, {AllocationType::swTagBuffer, "SWTAGBF"}, {AllocationType::deferredTasksList, "TSKLIST"}, - {AllocationType::assertBuffer, "ASSRTBUF"}}; + {AllocationType::assertBuffer, "ASSRTBUF"}, + {AllocationType::syncDispatchToken, "SYNCTOK"}}; class AllocationTypeTagString : public ::testing::TestWithParam {}; TEST_P(AllocationTypeTagString, givenGraphicsAllocationTypeWhenCopyTagToStorageInfoThenCorrectTagIsReturned) { diff --git a/shared/test/unit_test/memory_manager/memory_manager_allocate_in_preferred_pool_tests.cpp b/shared/test/unit_test/memory_manager/memory_manager_allocate_in_preferred_pool_tests.cpp index d982b42901..b26b6a1a60 100644 --- a/shared/test/unit_test/memory_manager/memory_manager_allocate_in_preferred_pool_tests.cpp +++ b/shared/test/unit_test/memory_manager/memory_manager_allocate_in_preferred_pool_tests.cpp @@ -515,6 +515,15 @@ TEST(MemoryManagerTest, givenPreemptionTypeWhenGetAllocationDataIsCalledThenSyst EXPECT_FALSE(allocData.flags.useSystemMemory); } +TEST(MemoryManagerTest, givenSyncTokenTypeWhenGetAllocationDataIsCalledThenSystemMemoryIsNotRequestedAndAllow64kPages) { + AllocationData allocData; + MockMemoryManager mockMemoryManager; + AllocationProperties properties{mockRootDeviceIndex, 1, AllocationType::syncDispatchToken, mockDeviceBitfield}; + mockMemoryManager.getAllocationData(allocData, properties, nullptr, mockMemoryManager.createStorageInfoFromProperties(properties)); + EXPECT_FALSE(allocData.flags.useSystemMemory); + EXPECT_TRUE(allocData.flags.allow64kbPages); +} + TEST(MemoryManagerTest, givenPreemptionTypeWhenGetAllocationDataIsCalledThen64kbPagesAllowed) { AllocationData allocData; MockMemoryManager mockMemoryManager; @@ -523,20 +532,15 @@ TEST(MemoryManagerTest, givenPreemptionTypeWhenGetAllocationDataIsCalledThen64kb EXPECT_TRUE(allocData.flags.allow64kbPages); } -TEST(MemoryManagerTest, givenPreemptionTypeWhenGetAllocationDataIsCalledThen48BitResourceIsTrue) { - AllocationData allocData; +TEST(MemoryManagerTest, givenAllocationTypeWhenGetAllocationDataIsCalledThen48BitResourceIsTrue) { MockMemoryManager mockMemoryManager; - AllocationProperties properties{mockRootDeviceIndex, 1, AllocationType::preemption, mockDeviceBitfield}; - mockMemoryManager.getAllocationData(allocData, properties, nullptr, mockMemoryManager.createStorageInfoFromProperties(properties)); - EXPECT_TRUE(allocData.flags.resource48Bit); -} -TEST(MemoryManagerTest, givenDeferredTasksListTypeWhenGetAllocationDataIsCalledThen48BitResourceIsTrue) { - AllocationData allocData; - MockMemoryManager mockMemoryManager; - AllocationProperties properties{mockRootDeviceIndex, 1, AllocationType::deferredTasksList, mockDeviceBitfield}; - mockMemoryManager.getAllocationData(allocData, properties, nullptr, mockMemoryManager.createStorageInfoFromProperties(properties)); - EXPECT_TRUE(allocData.flags.resource48Bit); + for (auto &type : {AllocationType::preemption, AllocationType::deferredTasksList, AllocationType::syncDispatchToken}) { + AllocationData allocData; + AllocationProperties properties{mockRootDeviceIndex, 1, type, mockDeviceBitfield}; + mockMemoryManager.getAllocationData(allocData, properties, nullptr, mockMemoryManager.createStorageInfoFromProperties(properties)); + EXPECT_TRUE(allocData.flags.resource48Bit); + } } TEST(MemoryManagerTest, givenMCSTypeWhenGetAllocationDataIsCalledThenSystemMemoryIsRequested) { @@ -1090,6 +1094,7 @@ static const AllocationType allocationHaveToBeForcedTo48Bit[] = { AllocationType::timestampPacketTagBuffer, AllocationType::semaphoreBuffer, AllocationType::deferredTasksList, + AllocationType::syncDispatchToken, }; static const AllocationType allocationsOptionallyForcedTo48Bit[] = { diff --git a/shared/test/unit_test/os_interface/windows/wddm_memory_manager_tests.cpp b/shared/test/unit_test/os_interface/windows/wddm_memory_manager_tests.cpp index 1579834e3b..d50455be89 100644 --- a/shared/test/unit_test/os_interface/windows/wddm_memory_manager_tests.cpp +++ b/shared/test/unit_test/os_interface/windows/wddm_memory_manager_tests.cpp @@ -576,7 +576,8 @@ TEST_F(WddmMemoryManagerTests, givenTypeWhenCallIsStatelessAccessRequiredThenPro AllocationType::gpuTimestampDeviceBuffer, AllocationType::swTagBuffer, AllocationType::deferredTasksList, - AllocationType::assertBuffer}) { + AllocationType::assertBuffer, + AllocationType::syncDispatchToken}) { EXPECT_FALSE(wddmMemoryManager->isStatelessAccessRequired(type)); } }