mirror of
https://github.com/intel/compute-runtime.git
synced 2025-12-24 20:39:56 +08:00
Choose valid bank from memory bank selector
Related-To: NEO-4645 Change-Id: I8d1f63ba24ead2e77ba6381e4770068bf2eb1725 Signed-off-by: Andrzej Swierczynski <andrzej.swierczynski@intel.com>
This commit is contained in:
committed by
sys_ocldev
parent
ef4cc0e685
commit
a5e4edb327
@@ -3521,6 +3521,7 @@ void *clHostMemAllocINTEL(
|
|||||||
cl_mem_flags flags = 0;
|
cl_mem_flags flags = 0;
|
||||||
cl_mem_flags_intel flagsIntel = 0;
|
cl_mem_flags_intel flagsIntel = 0;
|
||||||
cl_mem_alloc_flags_intel allocflags = 0;
|
cl_mem_alloc_flags_intel allocflags = 0;
|
||||||
|
unifiedMemoryProperties.subdeviceBitfield = neoContext->getDeviceBitfieldForAllocation();
|
||||||
if (!MemoryPropertiesHelper::parseMemoryProperties(properties, unifiedMemoryProperties.allocationFlags, flags, flagsIntel,
|
if (!MemoryPropertiesHelper::parseMemoryProperties(properties, unifiedMemoryProperties.allocationFlags, flags, flagsIntel,
|
||||||
allocflags, MemoryPropertiesHelper::ObjType::UNKNOWN,
|
allocflags, MemoryPropertiesHelper::ObjType::UNKNOWN,
|
||||||
*neoContext)) {
|
*neoContext)) {
|
||||||
@@ -3559,6 +3560,7 @@ void *clDeviceMemAllocINTEL(
|
|||||||
cl_mem_flags flags = 0;
|
cl_mem_flags flags = 0;
|
||||||
cl_mem_flags_intel flagsIntel = 0;
|
cl_mem_flags_intel flagsIntel = 0;
|
||||||
cl_mem_alloc_flags_intel allocflags = 0;
|
cl_mem_alloc_flags_intel allocflags = 0;
|
||||||
|
unifiedMemoryProperties.subdeviceBitfield = neoDevice->getDeviceBitfield();
|
||||||
if (!MemoryPropertiesHelper::parseMemoryProperties(properties, unifiedMemoryProperties.allocationFlags, flags, flagsIntel,
|
if (!MemoryPropertiesHelper::parseMemoryProperties(properties, unifiedMemoryProperties.allocationFlags, flags, flagsIntel,
|
||||||
allocflags, MemoryPropertiesHelper::ObjType::UNKNOWN,
|
allocflags, MemoryPropertiesHelper::ObjType::UNKNOWN,
|
||||||
*neoContext)) {
|
*neoContext)) {
|
||||||
@@ -3573,7 +3575,6 @@ void *clDeviceMemAllocINTEL(
|
|||||||
}
|
}
|
||||||
|
|
||||||
unifiedMemoryProperties.device = device;
|
unifiedMemoryProperties.device = device;
|
||||||
unifiedMemoryProperties.subdeviceBitfield = neoDevice->getDefaultEngine().osContext->getDeviceBitfield();
|
|
||||||
|
|
||||||
return neoContext->getSVMAllocsManager()->createUnifiedMemoryAllocation(neoDevice->getRootDeviceIndex(), size, unifiedMemoryProperties);
|
return neoContext->getSVMAllocsManager()->createUnifiedMemoryAllocation(neoDevice->getRootDeviceIndex(), size, unifiedMemoryProperties);
|
||||||
}
|
}
|
||||||
@@ -3626,6 +3627,7 @@ void *clSharedMemAllocINTEL(
|
|||||||
if (!ptr) {
|
if (!ptr) {
|
||||||
err.set(CL_OUT_OF_RESOURCES);
|
err.set(CL_OUT_OF_RESOURCES);
|
||||||
}
|
}
|
||||||
|
|
||||||
return ptr;
|
return ptr;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -145,6 +145,7 @@ GraphicsAllocation *OsAgnosticMemoryManager::allocate32BitGraphicsMemoryImpl(con
|
|||||||
memoryAllocation->set32BitAllocation(true);
|
memoryAllocation->set32BitAllocation(true);
|
||||||
memoryAllocation->setGpuBaseAddress(GmmHelper::canonize(gfxPartition->getHeapBase(heap)));
|
memoryAllocation->setGpuBaseAddress(GmmHelper::canonize(gfxPartition->getHeapBase(heap)));
|
||||||
memoryAllocation->sizeToFree = allocationSize;
|
memoryAllocation->sizeToFree = allocationSize;
|
||||||
|
memoryAllocation->storageInfo = allocationData.storageInfo;
|
||||||
}
|
}
|
||||||
counter++;
|
counter++;
|
||||||
return memoryAllocation;
|
return memoryAllocation;
|
||||||
|
|||||||
@@ -6,6 +6,7 @@
|
|||||||
*/
|
*/
|
||||||
|
|
||||||
#include "shared/source/helpers/basic_math.h"
|
#include "shared/source/helpers/basic_math.h"
|
||||||
|
#include "shared/source/helpers/constants.h"
|
||||||
#include "shared/source/memory_manager/local_memory_usage.h"
|
#include "shared/source/memory_manager/local_memory_usage.h"
|
||||||
#include "shared/test/unit_test/helpers/debug_manager_state_restore.h"
|
#include "shared/test/unit_test/helpers/debug_manager_state_restore.h"
|
||||||
|
|
||||||
@@ -19,7 +20,12 @@ struct MockLocalMemoryUsageBankSelector : public LocalMemoryUsageBankSelector {
|
|||||||
using LocalMemoryUsageBankSelector::LocalMemoryUsageBankSelector;
|
using LocalMemoryUsageBankSelector::LocalMemoryUsageBankSelector;
|
||||||
using LocalMemoryUsageBankSelector::reserveOnBank;
|
using LocalMemoryUsageBankSelector::reserveOnBank;
|
||||||
using LocalMemoryUsageBankSelector::updateUsageInfo;
|
using LocalMemoryUsageBankSelector::updateUsageInfo;
|
||||||
|
DeviceBitfield bitfield;
|
||||||
std::atomic<uint64_t> *getMemorySizes() { return memorySizes.get(); }
|
std::atomic<uint64_t> *getMemorySizes() { return memorySizes.get(); }
|
||||||
|
|
||||||
|
MockLocalMemoryUsageBankSelector(uint32_t banksCount) : LocalMemoryUsageBankSelector(banksCount) {
|
||||||
|
bitfield = maxNBitValue(banksCount);
|
||||||
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
TEST(localMemoryUsageTest, givenLocalMemoryUsageBankSelectorWhenItsCreatedAllValuesAreZero) {
|
TEST(localMemoryUsageTest, givenLocalMemoryUsageBankSelectorWhenItsCreatedAllValuesAreZero) {
|
||||||
@@ -34,7 +40,7 @@ TEST(localMemoryUsageTest, givenLocalMemoryUsageBankSelectorWhenMemoryIsReserved
|
|||||||
MockLocalMemoryUsageBankSelector selector(4u);
|
MockLocalMemoryUsageBankSelector selector(4u);
|
||||||
|
|
||||||
uint64_t allocationSize = 1024u;
|
uint64_t allocationSize = 1024u;
|
||||||
auto bankIndex = selector.getLeastOccupiedBank();
|
auto bankIndex = selector.getLeastOccupiedBank(selector.bitfield);
|
||||||
selector.reserveOnBank(bankIndex, allocationSize);
|
selector.reserveOnBank(bankIndex, allocationSize);
|
||||||
|
|
||||||
EXPECT_EQ(allocationSize, selector.getOccupiedMemorySizeForBank(bankIndex));
|
EXPECT_EQ(allocationSize, selector.getOccupiedMemorySizeForBank(bankIndex));
|
||||||
@@ -44,11 +50,11 @@ TEST(localMemoryUsageTest, givenLocalMemoryUsageBankSelectorWhenMemoryIsReleased
|
|||||||
MockLocalMemoryUsageBankSelector selector(1u);
|
MockLocalMemoryUsageBankSelector selector(1u);
|
||||||
|
|
||||||
uint64_t allocationSize = 1024u;
|
uint64_t allocationSize = 1024u;
|
||||||
auto bankIndex = selector.getLeastOccupiedBank();
|
auto bankIndex = selector.getLeastOccupiedBank(selector.bitfield);
|
||||||
EXPECT_EQ(0u, bankIndex);
|
EXPECT_EQ(0u, bankIndex);
|
||||||
selector.reserveOnBank(bankIndex, allocationSize);
|
selector.reserveOnBank(bankIndex, allocationSize);
|
||||||
|
|
||||||
bankIndex = selector.getLeastOccupiedBank();
|
bankIndex = selector.getLeastOccupiedBank(selector.bitfield);
|
||||||
EXPECT_EQ(0u, bankIndex);
|
EXPECT_EQ(0u, bankIndex);
|
||||||
selector.reserveOnBank(bankIndex, allocationSize);
|
selector.reserveOnBank(bankIndex, allocationSize);
|
||||||
|
|
||||||
@@ -60,12 +66,12 @@ TEST(localMemoryUsageTest, givenLocalMemoryUsageBankSelectorWhenMemoryIsReleased
|
|||||||
TEST(localMemoryUsageTest, givenOverrideLeastOccupiedBankDebugFlagWhenGetLeastOccupiedBankIsCalledThenForcedBankIndexIsReturned) {
|
TEST(localMemoryUsageTest, givenOverrideLeastOccupiedBankDebugFlagWhenGetLeastOccupiedBankIsCalledThenForcedBankIndexIsReturned) {
|
||||||
DebugManagerStateRestore dbgRestore;
|
DebugManagerStateRestore dbgRestore;
|
||||||
MockLocalMemoryUsageBankSelector selector(1u);
|
MockLocalMemoryUsageBankSelector selector(1u);
|
||||||
auto bankIndex = selector.getLeastOccupiedBank();
|
auto bankIndex = selector.getLeastOccupiedBank(selector.bitfield);
|
||||||
EXPECT_EQ(0u, bankIndex);
|
EXPECT_EQ(0u, bankIndex);
|
||||||
|
|
||||||
uint32_t forcedBankIndex = 64u;
|
uint32_t forcedBankIndex = 64u;
|
||||||
DebugManager.flags.OverrideLeastOccupiedBank.set(static_cast<int32_t>(forcedBankIndex));
|
DebugManager.flags.OverrideLeastOccupiedBank.set(static_cast<int32_t>(forcedBankIndex));
|
||||||
bankIndex = selector.getLeastOccupiedBank();
|
bankIndex = selector.getLeastOccupiedBank(selector.bitfield);
|
||||||
EXPECT_EQ(forcedBankIndex, bankIndex);
|
EXPECT_EQ(forcedBankIndex, bankIndex);
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -74,15 +80,15 @@ TEST(localMemoryUsageTest, givenLocalMemoryUsageBankSelectorWhenMemoryAllocatedS
|
|||||||
|
|
||||||
uint64_t allocationSize = 1024u;
|
uint64_t allocationSize = 1024u;
|
||||||
|
|
||||||
auto bankIndex = selector.getLeastOccupiedBank();
|
auto bankIndex = selector.getLeastOccupiedBank(selector.bitfield);
|
||||||
selector.reserveOnBank(bankIndex, allocationSize);
|
selector.reserveOnBank(bankIndex, allocationSize);
|
||||||
bankIndex = selector.getLeastOccupiedBank();
|
bankIndex = selector.getLeastOccupiedBank(selector.bitfield);
|
||||||
selector.reserveOnBank(bankIndex, allocationSize);
|
selector.reserveOnBank(bankIndex, allocationSize);
|
||||||
bankIndex = selector.getLeastOccupiedBank();
|
bankIndex = selector.getLeastOccupiedBank(selector.bitfield);
|
||||||
selector.reserveOnBank(bankIndex, allocationSize);
|
selector.reserveOnBank(bankIndex, allocationSize);
|
||||||
bankIndex = selector.getLeastOccupiedBank();
|
bankIndex = selector.getLeastOccupiedBank(selector.bitfield);
|
||||||
selector.reserveOnBank(bankIndex, allocationSize);
|
selector.reserveOnBank(bankIndex, allocationSize);
|
||||||
bankIndex = selector.getLeastOccupiedBank();
|
bankIndex = selector.getLeastOccupiedBank(selector.bitfield);
|
||||||
selector.reserveOnBank(bankIndex, allocationSize);
|
selector.reserveOnBank(bankIndex, allocationSize);
|
||||||
|
|
||||||
for (uint32_t i = 0; i < selector.banksCount; i++) {
|
for (uint32_t i = 0; i < selector.banksCount; i++) {
|
||||||
@@ -148,4 +154,12 @@ TEST(localMemoryUsageTest, givenLocalMemoryUsageBankSelectorWhenThereAreMoreThan
|
|||||||
EXPECT_EQ(0u, selector.getOccupiedMemorySizeForBank(32));
|
EXPECT_EQ(0u, selector.getOccupiedMemorySizeForBank(32));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
TEST(localMemoryUsageTest, givenBitfieldWhenGettingLeastOccupiedBankThenReturnTheProperOne) {
|
||||||
|
MockLocalMemoryUsageBankSelector selector(2u);
|
||||||
|
DeviceBitfield bitfield(0b10);
|
||||||
|
auto bank = selector.getLeastOccupiedBank(bitfield);
|
||||||
|
|
||||||
|
EXPECT_EQ(bank, 1u);
|
||||||
|
}
|
||||||
|
|
||||||
} // namespace NEO
|
} // namespace NEO
|
||||||
|
|||||||
@@ -56,7 +56,7 @@ bool CommandContainer::initialize(Device *device) {
|
|||||||
GraphicsAllocation::AllocationType::INTERNAL_HOST_MEMORY,
|
GraphicsAllocation::AllocationType::INTERNAL_HOST_MEMORY,
|
||||||
(device->getNumAvailableDevices() > 1u) /* multiOsContextCapable */,
|
(device->getNumAvailableDevices() > 1u) /* multiOsContextCapable */,
|
||||||
false,
|
false,
|
||||||
{}};
|
device->getDeviceBitfield()};
|
||||||
|
|
||||||
auto cmdBufferAllocation = device->getMemoryManager()->allocateGraphicsMemoryWithProperties(properties);
|
auto cmdBufferAllocation = device->getMemoryManager()->allocateGraphicsMemoryWithProperties(properties);
|
||||||
UNRECOVERABLE_IF(!cmdBufferAllocation);
|
UNRECOVERABLE_IF(!cmdBufferAllocation);
|
||||||
@@ -188,7 +188,7 @@ void CommandContainer::allocateNextCommandBuffer() {
|
|||||||
GraphicsAllocation::AllocationType::INTERNAL_HOST_MEMORY,
|
GraphicsAllocation::AllocationType::INTERNAL_HOST_MEMORY,
|
||||||
(device->getNumAvailableDevices() > 1u) /* multiOsContextCapable */,
|
(device->getNumAvailableDevices() > 1u) /* multiOsContextCapable */,
|
||||||
false,
|
false,
|
||||||
{}};
|
device->getDeviceBitfield()};
|
||||||
|
|
||||||
auto cmdBufferAllocation = device->getMemoryManager()->allocateGraphicsMemoryWithProperties(properties);
|
auto cmdBufferAllocation = device->getMemoryManager()->allocateGraphicsMemoryWithProperties(properties);
|
||||||
UNRECOVERABLE_IF(!cmdBufferAllocation);
|
UNRECOVERABLE_IF(!cmdBufferAllocation);
|
||||||
|
|||||||
@@ -24,7 +24,7 @@ GraphicsAllocation *HeapHelper::getHeapAllocation(uint32_t heapType, size_t heap
|
|||||||
if (allocation) {
|
if (allocation) {
|
||||||
return allocation.release();
|
return allocation.release();
|
||||||
}
|
}
|
||||||
NEO::AllocationProperties properties{rootDeviceIndex, true, heapSize, allocationType, isMultiOsContextCapable, false, {}};
|
NEO::AllocationProperties properties{rootDeviceIndex, true, heapSize, allocationType, isMultiOsContextCapable, false, storageForReuse->getDeviceBitfield()};
|
||||||
properties.alignment = alignment;
|
properties.alignment = alignment;
|
||||||
|
|
||||||
return this->memManager->allocateGraphicsMemoryWithProperties(properties);
|
return this->memManager->allocateGraphicsMemoryWithProperties(properties);
|
||||||
|
|||||||
@@ -117,4 +117,8 @@ GraphicsAllocation *AllocationsList::detachAllocationImpl(GraphicsAllocation *,
|
|||||||
return nullptr;
|
return nullptr;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
DeviceBitfield InternalAllocationStorage::getDeviceBitfield() const {
|
||||||
|
return commandStreamReceiver.getOsContext().getDeviceBitfield();
|
||||||
|
}
|
||||||
|
|
||||||
} // namespace NEO
|
} // namespace NEO
|
||||||
|
|||||||
@@ -6,10 +6,10 @@
|
|||||||
*/
|
*/
|
||||||
|
|
||||||
#pragma once
|
#pragma once
|
||||||
|
#include "shared/source/helpers/common_types.h"
|
||||||
#include "shared/source/memory_manager/allocations_list.h"
|
#include "shared/source/memory_manager/allocations_list.h"
|
||||||
|
|
||||||
namespace NEO {
|
namespace NEO {
|
||||||
class CommandStreamReceiver;
|
|
||||||
|
|
||||||
class InternalAllocationStorage {
|
class InternalAllocationStorage {
|
||||||
public:
|
public:
|
||||||
@@ -22,6 +22,7 @@ class InternalAllocationStorage {
|
|||||||
std::unique_ptr<GraphicsAllocation> obtainTemporaryAllocationWithPtr(size_t requiredSize, const void *requiredPtr, GraphicsAllocation::AllocationType allocationType);
|
std::unique_ptr<GraphicsAllocation> obtainTemporaryAllocationWithPtr(size_t requiredSize, const void *requiredPtr, GraphicsAllocation::AllocationType allocationType);
|
||||||
AllocationsList &getTemporaryAllocations() { return temporaryAllocations; }
|
AllocationsList &getTemporaryAllocations() { return temporaryAllocations; }
|
||||||
AllocationsList &getAllocationsForReuse() { return allocationsForReuse; }
|
AllocationsList &getAllocationsForReuse() { return allocationsForReuse; }
|
||||||
|
DeviceBitfield getDeviceBitfield() const;
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
void freeAllocationsList(uint32_t waitTaskCount, AllocationsList &allocationsList);
|
void freeAllocationsList(uint32_t waitTaskCount, AllocationsList &allocationsList);
|
||||||
|
|||||||
@@ -8,6 +8,7 @@
|
|||||||
#include "shared/source/memory_manager/local_memory_usage.h"
|
#include "shared/source/memory_manager/local_memory_usage.h"
|
||||||
|
|
||||||
#include "shared/source/debug_settings/debug_settings_manager.h"
|
#include "shared/source/debug_settings/debug_settings_manager.h"
|
||||||
|
#include "shared/source/helpers/common_types.h"
|
||||||
|
|
||||||
#include <algorithm>
|
#include <algorithm>
|
||||||
#include <bitset>
|
#include <bitset>
|
||||||
@@ -24,13 +25,24 @@ LocalMemoryUsageBankSelector::LocalMemoryUsageBankSelector(uint32_t banksCount)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
uint32_t LocalMemoryUsageBankSelector::getLeastOccupiedBank() {
|
uint32_t LocalMemoryUsageBankSelector::getLeastOccupiedBank(DeviceBitfield deviceBitfield) {
|
||||||
if (DebugManager.flags.OverrideLeastOccupiedBank.get() != -1) {
|
if (DebugManager.flags.OverrideLeastOccupiedBank.get() != -1) {
|
||||||
return static_cast<uint32_t>(DebugManager.flags.OverrideLeastOccupiedBank.get());
|
return static_cast<uint32_t>(DebugManager.flags.OverrideLeastOccupiedBank.get());
|
||||||
}
|
}
|
||||||
|
uint32_t leastOccupiedBank = 0u;
|
||||||
|
uint64_t smallestViableMemorySize = std::numeric_limits<uint64_t>::max();
|
||||||
|
|
||||||
auto leastOccupiedBankIterator = std::min_element(memorySizes.get(), memorySizes.get() + banksCount);
|
UNRECOVERABLE_IF(deviceBitfield.count() == 0);
|
||||||
return static_cast<uint32_t>(std::distance(memorySizes.get(), leastOccupiedBankIterator));
|
for (uint32_t i = 0u; i < banksCount; i++) {
|
||||||
|
if (deviceBitfield.test(i)) {
|
||||||
|
if (memorySizes[i] < smallestViableMemorySize) {
|
||||||
|
leastOccupiedBank = i;
|
||||||
|
smallestViableMemorySize = memorySizes[i];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return leastOccupiedBank;
|
||||||
}
|
}
|
||||||
|
|
||||||
void LocalMemoryUsageBankSelector::freeOnBank(uint32_t bankIndex, uint64_t allocationSize) {
|
void LocalMemoryUsageBankSelector::freeOnBank(uint32_t bankIndex, uint64_t allocationSize) {
|
||||||
|
|||||||
@@ -7,6 +7,7 @@
|
|||||||
|
|
||||||
#pragma once
|
#pragma once
|
||||||
|
|
||||||
|
#include "shared/source/helpers/common_types.h"
|
||||||
#include "shared/source/helpers/debug_helpers.h"
|
#include "shared/source/helpers/debug_helpers.h"
|
||||||
#include "shared/source/helpers/non_copyable_or_moveable.h"
|
#include "shared/source/helpers/non_copyable_or_moveable.h"
|
||||||
|
|
||||||
@@ -18,7 +19,7 @@ class LocalMemoryUsageBankSelector : public NonCopyableOrMovableClass {
|
|||||||
public:
|
public:
|
||||||
LocalMemoryUsageBankSelector() = delete;
|
LocalMemoryUsageBankSelector() = delete;
|
||||||
LocalMemoryUsageBankSelector(uint32_t banksCount);
|
LocalMemoryUsageBankSelector(uint32_t banksCount);
|
||||||
uint32_t getLeastOccupiedBank();
|
uint32_t getLeastOccupiedBank(DeviceBitfield deviceBitfield);
|
||||||
void reserveOnBanks(uint32_t memoryBanks, uint64_t allocationSize) {
|
void reserveOnBanks(uint32_t memoryBanks, uint64_t allocationSize) {
|
||||||
updateUsageInfo(memoryBanks, allocationSize, true);
|
updateUsageInfo(memoryBanks, allocationSize, true);
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user