From 0346a5679fe877537435dad4e542be3ab211a3f7 Mon Sep 17 00:00:00 2001 From: Bartosz Dunajski Date: Mon, 6 Dec 2021 11:38:24 +0000 Subject: [PATCH] Fix returned local memory size Signed-off-by: Bartosz Dunajski --- .../test/unit_test/aub/aub_helper_tests.cpp | 12 ++++----- .../unit_test/device/sub_device_tests.cpp | 26 ++++++++++++++++++- ..._manager_allocate_in_device_pool_tests.cpp | 7 ++++- shared/source/aub/aub_center.cpp | 2 +- shared/source/aub/aub_helper.cpp | 2 +- shared/source/aub/aub_helper.h | 2 +- ...ver_simulated_common_hw_xehp_and_later.inl | 2 +- .../command_stream_receiver_simulated_hw.h | 2 +- shared/source/device/device.h | 2 +- shared/source/device/sub_device.cpp | 5 ---- shared/source/device/sub_device.h | 1 - .../os_agnostic_memory_manager.cpp | 3 ++- 12 files changed, 45 insertions(+), 21 deletions(-) diff --git a/opencl/test/unit_test/aub/aub_helper_tests.cpp b/opencl/test/unit_test/aub/aub_helper_tests.cpp index bcd27105ba..eb2189fe0b 100644 --- a/opencl/test/unit_test/aub/aub_helper_tests.cpp +++ b/opencl/test/unit_test/aub/aub_helper_tests.cpp @@ -83,13 +83,13 @@ TEST(AubHelper, WhenHBMSizePerTileInGigabytesIsSetThenGetMemBankSizeReturnsCorre sysInfo.MultiTileArchInfo.IsValid = true; sysInfo.MultiTileArchInfo.TileCount = 1; - EXPECT_EQ(8 * MemoryConstants::gigaByte, AubHelper::getMemBankSize(&hwInfo)); + EXPECT_EQ(8 * MemoryConstants::gigaByte, AubHelper::getPerTileLocalMemorySize(&hwInfo)); sysInfo.MultiTileArchInfo.TileCount = 2; - EXPECT_EQ(8 * MemoryConstants::gigaByte, AubHelper::getMemBankSize(&hwInfo)); + EXPECT_EQ(8 * MemoryConstants::gigaByte, AubHelper::getPerTileLocalMemorySize(&hwInfo)); sysInfo.MultiTileArchInfo.TileCount = 4; - EXPECT_EQ(8 * MemoryConstants::gigaByte, AubHelper::getMemBankSize(&hwInfo)); + EXPECT_EQ(8 * MemoryConstants::gigaByte, AubHelper::getPerTileLocalMemorySize(&hwInfo)); } TEST(AubHelper, WhenHBMSizePerTileInGigabytesIsNotSetThenGetMemBankSizeReturnsCorrectValue) { @@ -98,13 +98,13 @@ TEST(AubHelper, WhenHBMSizePerTileInGigabytesIsNotSetThenGetMemBankSizeReturnsCo sysInfo.MultiTileArchInfo.IsValid = true; sysInfo.MultiTileArchInfo.TileCount = 1; - EXPECT_EQ(32 * MemoryConstants::gigaByte, AubHelper::getMemBankSize(&hwInfo)); + EXPECT_EQ(32 * MemoryConstants::gigaByte, AubHelper::getPerTileLocalMemorySize(&hwInfo)); sysInfo.MultiTileArchInfo.TileCount = 2; - EXPECT_EQ(16 * MemoryConstants::gigaByte, AubHelper::getMemBankSize(&hwInfo)); + EXPECT_EQ(16 * MemoryConstants::gigaByte, AubHelper::getPerTileLocalMemorySize(&hwInfo)); sysInfo.MultiTileArchInfo.TileCount = 4; - EXPECT_EQ(8 * MemoryConstants::gigaByte, AubHelper::getMemBankSize(&hwInfo)); + EXPECT_EQ(8 * MemoryConstants::gigaByte, AubHelper::getPerTileLocalMemorySize(&hwInfo)); } using AubHelperHwTest = Test; diff --git a/opencl/test/unit_test/device/sub_device_tests.cpp b/opencl/test/unit_test/device/sub_device_tests.cpp index 91ca198151..4b0557c13d 100644 --- a/opencl/test/unit_test/device/sub_device_tests.cpp +++ b/opencl/test/unit_test/device/sub_device_tests.cpp @@ -259,7 +259,12 @@ TEST(RootDevicesTest, givenRootDeviceWithSubdevicesWhenCreateEnginesThenDeviceCr EXPECT_EQ(1u, device.engines.size()); } -TEST(SubDevicesTest, givenRootDeviceWithSubDevicesWhenGettingGlobalMemorySizeThenSubDevicesReturnReducedAmountOfGlobalMemAllocSize) { +TEST(SubDevicesTest, givenRootDeviceWithSubDevicesAndLocalMemoryWhenGettingGlobalMemorySizeThenSubDevicesReturnReducedAmountOfGlobalMemAllocSize) { + DebugManagerStateRestore restorer; + DebugManager.flags.EnableLocalMemory.set(1); + DebugManager.flags.CreateMultipleSubDevices.set(2); + DebugManager.flags.HBMSizePerTileInGigabytes.set(1); + const uint32_t numSubDevices = 2u; UltDeviceFactory deviceFactory{1, numSubDevices}; @@ -275,6 +280,25 @@ TEST(SubDevicesTest, givenRootDeviceWithSubDevicesWhenGettingGlobalMemorySizeThe } } +TEST(SubDevicesTest, givenRootDeviceWithSubDevicesWithoutLocalMemoryWhenGettingGlobalMemorySizeThenSubDevicesReturnReducedAmountOfGlobalMemAllocSize) { + DebugManagerStateRestore restorer; + DebugManager.flags.EnableLocalMemory.set(0); + DebugManager.flags.CreateMultipleSubDevices.set(2); + + const uint32_t numSubDevices = 2u; + UltDeviceFactory deviceFactory{1, numSubDevices}; + + auto rootDevice = deviceFactory.rootDevices[0]; + + auto totalGlobalMemorySize = rootDevice->getGlobalMemorySize(static_cast(rootDevice->getDeviceBitfield().to_ulong())); + + for (const auto &subDevice : deviceFactory.subDevices) { + auto mockSubDevice = static_cast(subDevice); + auto subDeviceBitfield = static_cast(mockSubDevice->getDeviceBitfield().to_ulong()); + EXPECT_EQ(totalGlobalMemorySize, mockSubDevice->getGlobalMemorySize(subDeviceBitfield)); + } +} + TEST(SubDevicesTest, whenCreatingEngineInstancedSubDeviceThenSetCorrectSubdeviceIndex) { class MyRootDevice : public RootDevice { public: diff --git a/opencl/test/unit_test/memory_manager/memory_manager_allocate_in_device_pool_tests.cpp b/opencl/test/unit_test/memory_manager/memory_manager_allocate_in_device_pool_tests.cpp index bfe309b452..a08693f27f 100644 --- a/opencl/test/unit_test/memory_manager/memory_manager_allocate_in_device_pool_tests.cpp +++ b/opencl/test/unit_test/memory_manager/memory_manager_allocate_in_device_pool_tests.cpp @@ -486,7 +486,12 @@ TEST(MemoryManagerTest, givenOsAgnosticMemoryManagerWhenGetLocalMemoryIsCalledTh MockExecutionEnvironment executionEnvironment(defaultHwInfo.get()); MockMemoryManager memoryManager(false, false, executionEnvironment); - EXPECT_EQ(AubHelper::getMemBankSize(executionEnvironment.rootDeviceEnvironments[0]->getHardwareInfo()), memoryManager.getLocalMemorySize(0u, 0xF)); + auto hwInfo = executionEnvironment.rootDeviceEnvironments[0]->getHardwareInfo(); + + auto subDevicesCount = HwHelper::getSubDevicesCount(hwInfo); + uint32_t deviceMask = static_cast(maxNBitValue(subDevicesCount)); + + EXPECT_EQ(AubHelper::getPerTileLocalMemorySize(hwInfo) * subDevicesCount, memoryManager.getLocalMemorySize(0u, deviceMask)); } HWTEST_F(MemoryManagerTests, givenEnabledLocalMemoryWhenAllocatingKernelIsaThenLocalMemoryPoolIsUsed) { diff --git a/shared/source/aub/aub_center.cpp b/shared/source/aub/aub_center.cpp index bb524b1be3..1426fd3174 100644 --- a/shared/source/aub/aub_center.cpp +++ b/shared/source/aub/aub_center.cpp @@ -22,7 +22,7 @@ extern aub_stream::AubManager *createAubManager(uint32_t productFamily, uint32_t AubCenter::AubCenter(const HardwareInfo *pHwInfo, const GmmHelper &gmmHelper, bool localMemoryEnabled, const std::string &aubFileName, CommandStreamReceiverType csrType) { if (DebugManager.flags.UseAubStream.get()) { auto devicesCount = HwHelper::getSubDevicesCount(pHwInfo); - auto memoryBankSize = AubHelper::getMemBankSize(pHwInfo); + auto memoryBankSize = AubHelper::getPerTileLocalMemorySize(pHwInfo); CommandStreamReceiverType type = csrType; if (DebugManager.flags.SetCommandStreamReceiver.get() >= CommandStreamReceiverType::CSR_HW) { type = static_cast(DebugManager.flags.SetCommandStreamReceiver.get()); diff --git a/shared/source/aub/aub_helper.cpp b/shared/source/aub/aub_helper.cpp index 491031d1d9..7b19ac31d6 100644 --- a/shared/source/aub/aub_helper.cpp +++ b/shared/source/aub/aub_helper.cpp @@ -42,7 +42,7 @@ uint32_t AubHelper::getMemType(uint32_t addressSpace) { return mem_types::MEM_TYPE_SYSTEM; } -uint64_t AubHelper::getMemBankSize(const HardwareInfo *pHwInfo) { +uint64_t AubHelper::getPerTileLocalMemorySize(const HardwareInfo *pHwInfo) { if (DebugManager.flags.HBMSizePerTileInGigabytes.get() > 0) { return DebugManager.flags.HBMSizePerTileInGigabytes.get() * MemoryConstants::gigaByte; } diff --git a/shared/source/aub/aub_helper.h b/shared/source/aub/aub_helper.h index 987078dff0..0e2899f5d3 100644 --- a/shared/source/aub/aub_helper.h +++ b/shared/source/aub/aub_helper.h @@ -43,7 +43,7 @@ class AubHelper : public NonCopyableOrMovableClass { static int getMemTrace(uint64_t pdEntryBits); static uint64_t getPTEntryBits(uint64_t pdEntryBits); static uint32_t getMemType(uint32_t addressSpace); - static uint64_t getMemBankSize(const HardwareInfo *pHwInfo); + static uint64_t getPerTileLocalMemorySize(const HardwareInfo *pHwInfo); static MMIOList getAdditionalMmioList(); static void setTbxConfiguration(); diff --git a/shared/source/command_stream/command_stream_receiver_simulated_common_hw_xehp_and_later.inl b/shared/source/command_stream/command_stream_receiver_simulated_common_hw_xehp_and_later.inl index a9ab640427..dce2f9b734 100644 --- a/shared/source/command_stream/command_stream_receiver_simulated_common_hw_xehp_and_later.inl +++ b/shared/source/command_stream/command_stream_receiver_simulated_common_hw_xehp_and_later.inl @@ -34,7 +34,7 @@ void CommandStreamReceiverSimulatedCommonHw::initGlobalMMIO() { {0x0000490c, 0x0001}}; //XEHP_TILE_ADDR_RANGE const uint32_t numberOfTiles = 4; - const uint32_t localMemorySizeGB = static_cast(AubHelper::getMemBankSize(&this->peekHwInfo()) / MemoryConstants::gigaByte); + const uint32_t localMemorySizeGB = static_cast(AubHelper::getPerTileLocalMemorySize(&this->peekHwInfo()) / MemoryConstants::gigaByte); uint32_t localMemoryBaseAddressInGB = 0x0; diff --git a/shared/source/command_stream/command_stream_receiver_simulated_hw.h b/shared/source/command_stream/command_stream_receiver_simulated_hw.h index e3cc83e9f8..0d7343aa13 100644 --- a/shared/source/command_stream/command_stream_receiver_simulated_hw.h +++ b/shared/source/command_stream/command_stream_receiver_simulated_hw.h @@ -91,7 +91,7 @@ class CommandStreamReceiverSimulatedHw : public CommandStreamReceiverSimulatedCo return AubMemDump::AddressSpaceValues::TraceNonlocal; } PhysicalAddressAllocator *createPhysicalAddressAllocator(const HardwareInfo *hwInfo) { - const auto bankSize = AubHelper::getMemBankSize(hwInfo); + const auto bankSize = AubHelper::getPerTileLocalMemorySize(hwInfo); const auto devicesCount = HwHelper::getSubDevicesCount(hwInfo); return new PhysicalAddressAllocatorHw(bankSize, devicesCount); } diff --git a/shared/source/device/device.h b/shared/source/device/device.h index 86975eec22..b2494ad550 100644 --- a/shared/source/device/device.h +++ b/shared/source/device/device.h @@ -123,7 +123,7 @@ class Device : public ReferenceTrackedObject { GraphicsAllocation *getRTMemoryBackedBuffer() { return rtMemoryBackedBuffer; } void initializeRayTracing(); - virtual uint64_t getGlobalMemorySize(uint32_t deviceBitfield) const; + uint64_t getGlobalMemorySize(uint32_t deviceBitfield) const; const std::vector getSubDevices() const { return subdevices; } protected: diff --git a/shared/source/device/sub_device.cpp b/shared/source/device/sub_device.cpp index ad1ba6fd2e..abf3f1032d 100644 --- a/shared/source/device/sub_device.cpp +++ b/shared/source/device/sub_device.cpp @@ -45,9 +45,4 @@ Device *SubDevice::getRootDevice() const { return &rootDevice; } -uint64_t SubDevice::getGlobalMemorySize(uint32_t deviceBitfield) const { - auto globalMemorySize = Device::getGlobalMemorySize(static_cast(maxNBitValue(rootDevice.getNumSubDevices()))); - return globalMemorySize / std::max(rootDevice.getNumGenericSubDevices(), 1u); -} - } // namespace NEO diff --git a/shared/source/device/sub_device.h b/shared/source/device/sub_device.h index ba7f3f26ce..7fad321d1f 100644 --- a/shared/source/device/sub_device.h +++ b/shared/source/device/sub_device.h @@ -25,7 +25,6 @@ class SubDevice : public Device { bool isSubDevice() const override { return true; } protected: - uint64_t getGlobalMemorySize(uint32_t deviceBitfield) const override; bool genericSubDevicesAllowed() override { return false; }; RootDevice &rootDevice; diff --git a/shared/source/memory_manager/os_agnostic_memory_manager.cpp b/shared/source/memory_manager/os_agnostic_memory_manager.cpp index 7fddc7546b..2b08165f71 100644 --- a/shared/source/memory_manager/os_agnostic_memory_manager.cpp +++ b/shared/source/memory_manager/os_agnostic_memory_manager.cpp @@ -519,7 +519,8 @@ GraphicsAllocation *OsAgnosticMemoryManager::allocateGraphicsMemoryInDevicePool( } uint64_t OsAgnosticMemoryManager::getLocalMemorySize(uint32_t rootDeviceIndex, uint32_t deviceBitfield) { - return AubHelper::getMemBankSize(executionEnvironment.rootDeviceEnvironments[rootDeviceIndex]->getHardwareInfo()); + DeviceBitfield bitfield = deviceBitfield; + return (AubHelper::getPerTileLocalMemorySize(executionEnvironment.rootDeviceEnvironments[rootDeviceIndex]->getHardwareInfo()) * bitfield.count()); } double OsAgnosticMemoryManager::getPercentOfGlobalMemoryAvailable(uint32_t rootDeviceIndex) {