diff --git a/opencl/source/api/api.cpp b/opencl/source/api/api.cpp index 744579c9f5..437d9b10f9 100644 --- a/opencl/source/api/api.cpp +++ b/opencl/source/api/api.cpp @@ -3521,6 +3521,7 @@ void *clHostMemAllocINTEL( cl_mem_flags flags = 0; cl_mem_flags_intel flagsIntel = 0; cl_mem_alloc_flags_intel allocflags = 0; + unifiedMemoryProperties.subdeviceBitfield = neoContext->getDeviceBitfieldForAllocation(); if (!MemoryPropertiesHelper::parseMemoryProperties(properties, unifiedMemoryProperties.allocationFlags, flags, flagsIntel, allocflags, MemoryPropertiesHelper::ObjType::UNKNOWN, *neoContext)) { @@ -3559,6 +3560,7 @@ void *clDeviceMemAllocINTEL( cl_mem_flags flags = 0; cl_mem_flags_intel flagsIntel = 0; cl_mem_alloc_flags_intel allocflags = 0; + unifiedMemoryProperties.subdeviceBitfield = neoDevice->getDeviceBitfield(); if (!MemoryPropertiesHelper::parseMemoryProperties(properties, unifiedMemoryProperties.allocationFlags, flags, flagsIntel, allocflags, MemoryPropertiesHelper::ObjType::UNKNOWN, *neoContext)) { @@ -3573,7 +3575,6 @@ void *clDeviceMemAllocINTEL( } unifiedMemoryProperties.device = device; - unifiedMemoryProperties.subdeviceBitfield = neoDevice->getDefaultEngine().osContext->getDeviceBitfield(); return neoContext->getSVMAllocsManager()->createUnifiedMemoryAllocation(neoDevice->getRootDeviceIndex(), size, unifiedMemoryProperties); } @@ -3626,6 +3627,7 @@ void *clSharedMemAllocINTEL( if (!ptr) { err.set(CL_OUT_OF_RESOURCES); } + return ptr; } diff --git a/opencl/source/memory_manager/os_agnostic_memory_manager.cpp b/opencl/source/memory_manager/os_agnostic_memory_manager.cpp index 40780f32cd..bfdb77eecb 100644 --- a/opencl/source/memory_manager/os_agnostic_memory_manager.cpp +++ b/opencl/source/memory_manager/os_agnostic_memory_manager.cpp @@ -145,6 +145,7 @@ GraphicsAllocation *OsAgnosticMemoryManager::allocate32BitGraphicsMemoryImpl(con memoryAllocation->set32BitAllocation(true); memoryAllocation->setGpuBaseAddress(GmmHelper::canonize(gfxPartition->getHeapBase(heap))); memoryAllocation->sizeToFree = allocationSize; + memoryAllocation->storageInfo = allocationData.storageInfo; } counter++; return memoryAllocation; diff --git a/opencl/test/unit_test/memory_manager/local_memory_usage_tests.cpp b/opencl/test/unit_test/memory_manager/local_memory_usage_tests.cpp index 59177e5929..391a6df69f 100644 --- a/opencl/test/unit_test/memory_manager/local_memory_usage_tests.cpp +++ b/opencl/test/unit_test/memory_manager/local_memory_usage_tests.cpp @@ -6,6 +6,7 @@ */ #include "shared/source/helpers/basic_math.h" +#include "shared/source/helpers/constants.h" #include "shared/source/memory_manager/local_memory_usage.h" #include "shared/test/unit_test/helpers/debug_manager_state_restore.h" @@ -19,7 +20,12 @@ struct MockLocalMemoryUsageBankSelector : public LocalMemoryUsageBankSelector { using LocalMemoryUsageBankSelector::LocalMemoryUsageBankSelector; using LocalMemoryUsageBankSelector::reserveOnBank; using LocalMemoryUsageBankSelector::updateUsageInfo; + DeviceBitfield bitfield; std::atomic *getMemorySizes() { return memorySizes.get(); } + + MockLocalMemoryUsageBankSelector(uint32_t banksCount) : LocalMemoryUsageBankSelector(banksCount) { + bitfield = maxNBitValue(banksCount); + } }; TEST(localMemoryUsageTest, givenLocalMemoryUsageBankSelectorWhenItsCreatedAllValuesAreZero) { @@ -34,7 +40,7 @@ TEST(localMemoryUsageTest, givenLocalMemoryUsageBankSelectorWhenMemoryIsReserved MockLocalMemoryUsageBankSelector selector(4u); uint64_t allocationSize = 1024u; - auto bankIndex = selector.getLeastOccupiedBank(); + auto bankIndex = selector.getLeastOccupiedBank(selector.bitfield); selector.reserveOnBank(bankIndex, allocationSize); EXPECT_EQ(allocationSize, selector.getOccupiedMemorySizeForBank(bankIndex)); @@ -44,11 +50,11 @@ TEST(localMemoryUsageTest, givenLocalMemoryUsageBankSelectorWhenMemoryIsReleased MockLocalMemoryUsageBankSelector selector(1u); uint64_t allocationSize = 1024u; - auto bankIndex = selector.getLeastOccupiedBank(); + auto bankIndex = selector.getLeastOccupiedBank(selector.bitfield); EXPECT_EQ(0u, bankIndex); selector.reserveOnBank(bankIndex, allocationSize); - bankIndex = selector.getLeastOccupiedBank(); + bankIndex = selector.getLeastOccupiedBank(selector.bitfield); EXPECT_EQ(0u, bankIndex); selector.reserveOnBank(bankIndex, allocationSize); @@ -60,12 +66,12 @@ TEST(localMemoryUsageTest, givenLocalMemoryUsageBankSelectorWhenMemoryIsReleased TEST(localMemoryUsageTest, givenOverrideLeastOccupiedBankDebugFlagWhenGetLeastOccupiedBankIsCalledThenForcedBankIndexIsReturned) { DebugManagerStateRestore dbgRestore; MockLocalMemoryUsageBankSelector selector(1u); - auto bankIndex = selector.getLeastOccupiedBank(); + auto bankIndex = selector.getLeastOccupiedBank(selector.bitfield); EXPECT_EQ(0u, bankIndex); uint32_t forcedBankIndex = 64u; DebugManager.flags.OverrideLeastOccupiedBank.set(static_cast(forcedBankIndex)); - bankIndex = selector.getLeastOccupiedBank(); + bankIndex = selector.getLeastOccupiedBank(selector.bitfield); EXPECT_EQ(forcedBankIndex, bankIndex); } @@ -74,15 +80,15 @@ TEST(localMemoryUsageTest, givenLocalMemoryUsageBankSelectorWhenMemoryAllocatedS uint64_t allocationSize = 1024u; - auto bankIndex = selector.getLeastOccupiedBank(); + auto bankIndex = selector.getLeastOccupiedBank(selector.bitfield); selector.reserveOnBank(bankIndex, allocationSize); - bankIndex = selector.getLeastOccupiedBank(); + bankIndex = selector.getLeastOccupiedBank(selector.bitfield); selector.reserveOnBank(bankIndex, allocationSize); - bankIndex = selector.getLeastOccupiedBank(); + bankIndex = selector.getLeastOccupiedBank(selector.bitfield); selector.reserveOnBank(bankIndex, allocationSize); - bankIndex = selector.getLeastOccupiedBank(); + bankIndex = selector.getLeastOccupiedBank(selector.bitfield); selector.reserveOnBank(bankIndex, allocationSize); - bankIndex = selector.getLeastOccupiedBank(); + bankIndex = selector.getLeastOccupiedBank(selector.bitfield); selector.reserveOnBank(bankIndex, allocationSize); for (uint32_t i = 0; i < selector.banksCount; i++) { @@ -148,4 +154,12 @@ TEST(localMemoryUsageTest, givenLocalMemoryUsageBankSelectorWhenThereAreMoreThan EXPECT_EQ(0u, selector.getOccupiedMemorySizeForBank(32)); } +TEST(localMemoryUsageTest, givenBitfieldWhenGettingLeastOccupiedBankThenReturnTheProperOne) { + MockLocalMemoryUsageBankSelector selector(2u); + DeviceBitfield bitfield(0b10); + auto bank = selector.getLeastOccupiedBank(bitfield); + + EXPECT_EQ(bank, 1u); +} + } // namespace NEO diff --git a/shared/source/command_container/cmdcontainer.cpp b/shared/source/command_container/cmdcontainer.cpp index 2e4700bb57..78694ba93d 100644 --- a/shared/source/command_container/cmdcontainer.cpp +++ b/shared/source/command_container/cmdcontainer.cpp @@ -56,7 +56,7 @@ bool CommandContainer::initialize(Device *device) { GraphicsAllocation::AllocationType::INTERNAL_HOST_MEMORY, (device->getNumAvailableDevices() > 1u) /* multiOsContextCapable */, false, - {}}; + device->getDeviceBitfield()}; auto cmdBufferAllocation = device->getMemoryManager()->allocateGraphicsMemoryWithProperties(properties); UNRECOVERABLE_IF(!cmdBufferAllocation); @@ -188,7 +188,7 @@ void CommandContainer::allocateNextCommandBuffer() { GraphicsAllocation::AllocationType::INTERNAL_HOST_MEMORY, (device->getNumAvailableDevices() > 1u) /* multiOsContextCapable */, false, - {}}; + device->getDeviceBitfield()}; auto cmdBufferAllocation = device->getMemoryManager()->allocateGraphicsMemoryWithProperties(properties); UNRECOVERABLE_IF(!cmdBufferAllocation); diff --git a/shared/source/helpers/heap_helper.cpp b/shared/source/helpers/heap_helper.cpp index c9f51970e6..a5812cf377 100644 --- a/shared/source/helpers/heap_helper.cpp +++ b/shared/source/helpers/heap_helper.cpp @@ -24,7 +24,7 @@ GraphicsAllocation *HeapHelper::getHeapAllocation(uint32_t heapType, size_t heap if (allocation) { return allocation.release(); } - NEO::AllocationProperties properties{rootDeviceIndex, true, heapSize, allocationType, isMultiOsContextCapable, false, {}}; + NEO::AllocationProperties properties{rootDeviceIndex, true, heapSize, allocationType, isMultiOsContextCapable, false, storageForReuse->getDeviceBitfield()}; properties.alignment = alignment; return this->memManager->allocateGraphicsMemoryWithProperties(properties); diff --git a/shared/source/memory_manager/internal_allocation_storage.cpp b/shared/source/memory_manager/internal_allocation_storage.cpp index 8e2471ab83..1752a6ec4c 100644 --- a/shared/source/memory_manager/internal_allocation_storage.cpp +++ b/shared/source/memory_manager/internal_allocation_storage.cpp @@ -117,4 +117,8 @@ GraphicsAllocation *AllocationsList::detachAllocationImpl(GraphicsAllocation *, return nullptr; } +DeviceBitfield InternalAllocationStorage::getDeviceBitfield() const { + return commandStreamReceiver.getOsContext().getDeviceBitfield(); +} + } // namespace NEO diff --git a/shared/source/memory_manager/internal_allocation_storage.h b/shared/source/memory_manager/internal_allocation_storage.h index 30ee9fae5f..da2a25399c 100644 --- a/shared/source/memory_manager/internal_allocation_storage.h +++ b/shared/source/memory_manager/internal_allocation_storage.h @@ -6,10 +6,10 @@ */ #pragma once +#include "shared/source/helpers/common_types.h" #include "shared/source/memory_manager/allocations_list.h" namespace NEO { -class CommandStreamReceiver; class InternalAllocationStorage { public: @@ -22,6 +22,7 @@ class InternalAllocationStorage { std::unique_ptr obtainTemporaryAllocationWithPtr(size_t requiredSize, const void *requiredPtr, GraphicsAllocation::AllocationType allocationType); AllocationsList &getTemporaryAllocations() { return temporaryAllocations; } AllocationsList &getAllocationsForReuse() { return allocationsForReuse; } + DeviceBitfield getDeviceBitfield() const; protected: void freeAllocationsList(uint32_t waitTaskCount, AllocationsList &allocationsList); diff --git a/shared/source/memory_manager/local_memory_usage.cpp b/shared/source/memory_manager/local_memory_usage.cpp index b0527c15b7..c5c595588b 100644 --- a/shared/source/memory_manager/local_memory_usage.cpp +++ b/shared/source/memory_manager/local_memory_usage.cpp @@ -8,6 +8,7 @@ #include "shared/source/memory_manager/local_memory_usage.h" #include "shared/source/debug_settings/debug_settings_manager.h" +#include "shared/source/helpers/common_types.h" #include #include @@ -24,13 +25,24 @@ LocalMemoryUsageBankSelector::LocalMemoryUsageBankSelector(uint32_t banksCount) } } -uint32_t LocalMemoryUsageBankSelector::getLeastOccupiedBank() { +uint32_t LocalMemoryUsageBankSelector::getLeastOccupiedBank(DeviceBitfield deviceBitfield) { if (DebugManager.flags.OverrideLeastOccupiedBank.get() != -1) { return static_cast(DebugManager.flags.OverrideLeastOccupiedBank.get()); } + uint32_t leastOccupiedBank = 0u; + uint64_t smallestViableMemorySize = std::numeric_limits::max(); - auto leastOccupiedBankIterator = std::min_element(memorySizes.get(), memorySizes.get() + banksCount); - return static_cast(std::distance(memorySizes.get(), leastOccupiedBankIterator)); + UNRECOVERABLE_IF(deviceBitfield.count() == 0); + for (uint32_t i = 0u; i < banksCount; i++) { + if (deviceBitfield.test(i)) { + if (memorySizes[i] < smallestViableMemorySize) { + leastOccupiedBank = i; + smallestViableMemorySize = memorySizes[i]; + } + } + } + + return leastOccupiedBank; } void LocalMemoryUsageBankSelector::freeOnBank(uint32_t bankIndex, uint64_t allocationSize) { diff --git a/shared/source/memory_manager/local_memory_usage.h b/shared/source/memory_manager/local_memory_usage.h index 6e1b779169..1f1ed6e2a2 100644 --- a/shared/source/memory_manager/local_memory_usage.h +++ b/shared/source/memory_manager/local_memory_usage.h @@ -7,6 +7,7 @@ #pragma once +#include "shared/source/helpers/common_types.h" #include "shared/source/helpers/debug_helpers.h" #include "shared/source/helpers/non_copyable_or_moveable.h" @@ -18,7 +19,7 @@ class LocalMemoryUsageBankSelector : public NonCopyableOrMovableClass { public: LocalMemoryUsageBankSelector() = delete; LocalMemoryUsageBankSelector(uint32_t banksCount); - uint32_t getLeastOccupiedBank(); + uint32_t getLeastOccupiedBank(DeviceBitfield deviceBitfield); void reserveOnBanks(uint32_t memoryBanks, uint64_t allocationSize) { updateUsageInfo(memoryBanks, allocationSize, true); }