Revert "performance: optimize svm allocation tracking"

This reverts commit e91ce78ec8.

Signed-off-by: Compute-Runtime-Validation <compute-runtime-validation@intel.com>
This commit is contained in:
Compute-Runtime-Validation 2023-11-16 10:10:03 +01:00 committed by Compute-Runtime-Automation
parent d358ed051f
commit 7f61217a44
5 changed files with 22 additions and 158 deletions

View File

@ -313,8 +313,7 @@ NEO::GraphicsAllocation *MemoryManagerOpenIpcMock::allocateGraphicsMemoryWithPro
}
NEO::GraphicsAllocation *MemoryManagerOpenIpcMock::allocateGraphicsMemoryWithProperties(const AllocationProperties &properties, const void *externalPtr) {
auto ptr = reinterpret_cast<void *>(sharedHandleAddress);
sharedHandleAddress += properties.size;
auto ptr = reinterpret_cast<void *>(sharedHandleAddress++);
auto gmmHelper = getGmmHelper(0);
auto canonizedGpuAddress = gmmHelper->canonize(castToUint64(ptr));
auto alloc = new IpcImplicitScalingMockGraphicsAllocation(properties.rootDeviceIndex,
@ -333,8 +332,7 @@ NEO::GraphicsAllocation *MemoryManagerOpenIpcMock::createGraphicsAllocationFromS
if (failOnCreateGraphicsAllocationFromSharedHandle) {
return nullptr;
}
auto ptr = reinterpret_cast<void *>(sharedHandleAddress);
sharedHandleAddress += properties.size;
auto ptr = reinterpret_cast<void *>(sharedHandleAddress++);
auto gmmHelper = getGmmHelper(0);
auto canonizedGpuAddress = gmmHelper->canonize(castToUint64(ptr));
auto alloc = new IpcImplicitScalingMockGraphicsAllocation(properties.rootDeviceIndex,
@ -352,8 +350,7 @@ NEO::GraphicsAllocation *MemoryManagerOpenIpcMock::createGraphicsAllocationFromM
if (failOnCreateGraphicsAllocationFromSharedHandle) {
return nullptr;
}
auto ptr = reinterpret_cast<void *>(sharedHandleAddress);
sharedHandleAddress += properties.size;
auto ptr = reinterpret_cast<void *>(sharedHandleAddress++);
auto gmmHelper = getGmmHelper(0);
auto canonizedGpuAddress = gmmHelper->canonize(castToUint64(ptr));
auto alloc = new IpcImplicitScalingMockGraphicsAllocation(properties.rootDeviceIndex,
@ -368,8 +365,7 @@ NEO::GraphicsAllocation *MemoryManagerOpenIpcMock::createGraphicsAllocationFromM
return alloc;
}
NEO::GraphicsAllocation *MemoryManagerOpenIpcMock::createGraphicsAllocationFromNTHandle(void *handle, uint32_t rootDeviceIndex, AllocationType allocType) {
auto ptr = reinterpret_cast<void *>(sharedHandleAddress);
sharedHandleAddress += 0x1000;
auto ptr = reinterpret_cast<void *>(sharedHandleAddress++);
auto gmmHelper = getGmmHelper(0);
auto canonizedGpuAddress = gmmHelper->canonize(castToUint64(ptr));
auto alloc = new IpcImplicitScalingMockGraphicsAllocation(0u,
@ -428,8 +424,7 @@ void MemoryOpenIpcHandleTest::TearDown() {
}
NEO::GraphicsAllocation *MemoryManagerIpcImplicitScalingMock::allocateGraphicsMemoryInPreferredPool(const AllocationProperties &properties, const void *hostPtr) {
auto ptr = reinterpret_cast<void *>(sharedHandleAddress);
sharedHandleAddress += properties.size;
auto ptr = reinterpret_cast<void *>(sharedHandleAddress++);
auto gmmHelper = getGmmHelper(0);
auto canonizedGpuAddress = gmmHelper->canonize(castToUint64(ptr));
auto alloc = new IpcImplicitScalingMockGraphicsAllocation(0u,
@ -445,8 +440,7 @@ NEO::GraphicsAllocation *MemoryManagerIpcImplicitScalingMock::allocateGraphicsMe
}
NEO::GraphicsAllocation *MemoryManagerIpcImplicitScalingMock::allocateGraphicsMemoryWithProperties(const AllocationProperties &properties) {
auto ptr = reinterpret_cast<void *>(sharedHandleAddress);
sharedHandleAddress += properties.size;
auto ptr = reinterpret_cast<void *>(sharedHandleAddress++);
auto gmmHelper = getGmmHelper(0);
auto canonizedGpuAddress = gmmHelper->canonize(castToUint64(ptr));
auto alloc = new IpcImplicitScalingMockGraphicsAllocation(0u,
@ -465,8 +459,7 @@ NEO::GraphicsAllocation *MemoryManagerIpcImplicitScalingMock::createGraphicsAllo
if (failOnCreateGraphicsAllocationFromSharedHandle) {
return nullptr;
}
auto ptr = reinterpret_cast<void *>(sharedHandleAddress);
sharedHandleAddress += properties.size;
auto ptr = reinterpret_cast<void *>(sharedHandleAddress++);
auto gmmHelper = getGmmHelper(0);
auto canonizedGpuAddress = gmmHelper->canonize(castToUint64(ptr));
auto alloc = new IpcImplicitScalingMockGraphicsAllocation(0u,
@ -486,8 +479,7 @@ NEO::GraphicsAllocation *MemoryManagerIpcImplicitScalingMock::createGraphicsAllo
return nullptr;
}
auto ptr = reinterpret_cast<void *>(sharedHandleAddress);
sharedHandleAddress += 0x1000;
auto ptr = reinterpret_cast<void *>(sharedHandleAddress++);
auto gmmHelper = getGmmHelper(0);
auto canonizedGpuAddress = gmmHelper->canonize(castToUint64(ptr));
auto alloc = new IpcImplicitScalingMockGraphicsAllocation(0u,
@ -505,8 +497,7 @@ NEO::GraphicsAllocation *MemoryManagerIpcImplicitScalingMock::createGraphicsAllo
if (failOnCreateGraphicsAllocationFromSharedHandle) {
return nullptr;
}
auto ptr = reinterpret_cast<void *>(sharedHandleAddress);
sharedHandleAddress += properties.size;
auto ptr = reinterpret_cast<void *>(sharedHandleAddress++);
auto gmmHelper = getGmmHelper(0);
auto canonizedGpuAddress = gmmHelper->canonize(castToUint64(ptr));
auto alloc = new IpcImplicitScalingMockGraphicsAllocation(0u,

View File

@ -1965,7 +1965,7 @@ std::unique_ptr<KernelObjsForAuxTranslation> Kernel::fillWithKernelObjsForAuxTra
}
if (getContext().getSVMAllocsManager()) {
for (auto &allocation : getContext().getSVMAllocsManager()->getSVMAllocs()->allocations) {
auto gfxAllocation = allocation.second->gpuAllocations.getDefaultGraphicsAllocation();
auto gfxAllocation = allocation.second.gpuAllocations.getDefaultGraphicsAllocation();
if (gfxAllocation->isCompressionEnabled()) {
kernelObjsForAuxTranslation->insert({KernelObjForAuxTranslation::Type::GFX_ALLOC, gfxAllocation});
auto &context = this->program->getContext();

View File

@ -42,25 +42,6 @@ void SVMAllocsManager::MapBasedAllocationTracker::remove(const SvmAllocationData
allocations.erase(iter);
}
void SVMAllocsManager::SortedVectorBasedAllocationTracker::insert(const SvmAllocationData &allocationsPair) {
allocations.push_back(std::make_pair(reinterpret_cast<void *>(allocationsPair.gpuAllocations.getDefaultGraphicsAllocation()->getGpuAddress()), std::make_unique<SvmAllocationData>(allocationsPair)));
for (size_t i = allocations.size() - 1; i > 0; --i) {
if (allocations[i].first < allocations[i - 1].first) {
std::iter_swap(allocations.begin() + i, allocations.begin() + i - 1);
} else {
break;
}
}
}
void SVMAllocsManager::SortedVectorBasedAllocationTracker::remove(const SvmAllocationData &allocationsPair) {
auto gpuAddress = reinterpret_cast<void *>(allocationsPair.gpuAllocations.getDefaultGraphicsAllocation()->getGpuAddress());
auto removeIt = std::remove_if(allocations.begin(), allocations.end(), [&gpuAddress](const auto &other) {
return gpuAddress == other.first;
});
allocations.erase(removeIt);
}
void SVMAllocsManager::SvmAllocationCache::insert(size_t size, void *ptr) {
std::lock_guard<std::mutex> lock(this->mtx);
allocations.emplace(std::lower_bound(allocations.begin(), allocations.end(), size), size, ptr);
@ -132,34 +113,6 @@ SvmAllocationData *SVMAllocsManager::MapBasedAllocationTracker::get(const void *
return nullptr;
}
SvmAllocationData *SVMAllocsManager::SortedVectorBasedAllocationTracker::get(const void *ptr) {
if (allocations.size() == 0) {
return nullptr;
}
if (!ptr) {
return nullptr;
}
int begin = 0;
int end = static_cast<int>(allocations.size() - 1);
while (end >= begin) {
int currentPos = (begin + end) / 2;
const auto &allocation = allocations[currentPos];
if (allocation.first == ptr || (allocation.first < ptr &&
(reinterpret_cast<uintptr_t>(ptr) < (reinterpret_cast<uintptr_t>(allocation.first) + allocation.second->size)))) {
return allocation.second.get();
} else if (ptr < allocation.first) {
end = currentPos - 1;
continue;
} else {
begin = currentPos + 1;
continue;
}
}
return nullptr;
}
void SVMAllocsManager::MapOperationsTracker::insert(SvmMapOperation mapOperation) {
operations.insert(std::make_pair(mapOperation.regionSvmPtr, mapOperation));
}
@ -184,16 +137,16 @@ void SVMAllocsManager::addInternalAllocationsToResidencyContainer(uint32_t rootD
uint32_t requestedTypesMask) {
std::shared_lock<std::shared_mutex> lock(mtx);
for (auto &allocation : this->svmAllocs.allocations) {
if (rootDeviceIndex >= allocation.second->gpuAllocations.getGraphicsAllocations().size()) {
if (rootDeviceIndex >= allocation.second.gpuAllocations.getGraphicsAllocations().size()) {
continue;
}
if (!(allocation.second->memoryType & requestedTypesMask) ||
(nullptr == allocation.second->gpuAllocations.getGraphicsAllocation(rootDeviceIndex))) {
if (!(allocation.second.memoryType & requestedTypesMask) ||
(nullptr == allocation.second.gpuAllocations.getGraphicsAllocation(rootDeviceIndex))) {
continue;
}
auto alloc = allocation.second->gpuAllocations.getGraphicsAllocation(rootDeviceIndex);
auto alloc = allocation.second.gpuAllocations.getGraphicsAllocation(rootDeviceIndex);
residencyContainer.push_back(alloc);
}
}
@ -201,8 +154,8 @@ void SVMAllocsManager::addInternalAllocationsToResidencyContainer(uint32_t rootD
void SVMAllocsManager::makeInternalAllocationsResident(CommandStreamReceiver &commandStreamReceiver, uint32_t requestedTypesMask) {
std::shared_lock<std::shared_mutex> lock(mtx);
for (auto &allocation : this->svmAllocs.allocations) {
if (allocation.second->memoryType & requestedTypesMask) {
auto gpuAllocation = allocation.second->gpuAllocations.getGraphicsAllocation(commandStreamReceiver.getRootDeviceIndex());
if (allocation.second.memoryType & requestedTypesMask) {
auto gpuAllocation = allocation.second.gpuAllocations.getGraphicsAllocation(commandStreamReceiver.getRootDeviceIndex());
if (gpuAllocation == nullptr) {
continue;
}
@ -712,7 +665,7 @@ void SVMAllocsManager::freeSvmAllocationWithDeviceStorage(SvmAllocationData *svm
bool SVMAllocsManager::hasHostAllocations() {
std::shared_lock<std::shared_mutex> lock(mtx);
for (auto &allocation : this->svmAllocs.allocations) {
if (allocation.second->memoryType == InternalMemoryType::HOST_UNIFIED_MEMORY) {
if (allocation.second.memoryType == InternalMemoryType::HOST_UNIFIED_MEMORY) {
return true;
}
}
@ -742,7 +695,7 @@ void SVMAllocsManager::makeIndirectAllocationsResident(CommandStreamReceiver &co
}
if (parseAllAllocations) {
for (auto &allocation : this->svmAllocs.allocations) {
auto gpuAllocation = allocation.second->gpuAllocations.getGraphicsAllocation(commandStreamReceiver.getRootDeviceIndex());
auto gpuAllocation = allocation.second.gpuAllocations.getGraphicsAllocation(commandStreamReceiver.getRootDeviceIndex());
if (gpuAllocation == nullptr) {
continue;
}
@ -859,7 +812,7 @@ void SVMAllocsManager::prefetchMemory(Device &device, CommandStreamReceiver &com
void SVMAllocsManager::prefetchSVMAllocs(Device &device, CommandStreamReceiver &commandStreamReceiver) {
std::shared_lock<std::shared_mutex> lock(mtx);
for (auto &allocation : this->svmAllocs.allocations) {
NEO::SvmAllocationData allocData = *allocation.second;
NEO::SvmAllocationData allocData = allocation.second;
this->prefetchMemory(device, commandStreamReceiver, allocData);
}
}

View File

@ -17,7 +17,6 @@
#include <atomic>
#include <cstdint>
#include <map>
#include <memory>
#include <mutex>
#include <shared_mutex>
#include <type_traits>
@ -84,19 +83,6 @@ struct SvmMapOperation {
class SVMAllocsManager {
public:
class SortedVectorBasedAllocationTracker {
friend class SVMAllocsManager;
public:
using SvmAllocationContainer = std::vector<std::pair<const void *, std::unique_ptr<SvmAllocationData>>>;
void insert(const SvmAllocationData &);
void remove(const SvmAllocationData &);
SvmAllocationData *get(const void *);
size_t getNumAllocs() const { return allocations.size(); };
SvmAllocationContainer allocations;
};
class MapBasedAllocationTracker {
friend class SVMAllocsManager;
@ -219,7 +205,7 @@ class SVMAllocsManager {
void removeSVMAlloc(const SvmAllocationData &svmData);
size_t getNumAllocs() const { return svmAllocs.getNumAllocs(); }
MOCKABLE_VIRTUAL size_t getNumDeferFreeAllocs() const { return svmDeferFreeAllocs.getNumAllocs(); }
SortedVectorBasedAllocationTracker *getSVMAllocs() { return &svmAllocs; }
MapBasedAllocationTracker *getSVMAllocs() { return &svmAllocs; }
MOCKABLE_VIRTUAL void insertSvmMapOperation(void *regionSvmPtr, size_t regionSize, void *baseSvmPtr, size_t offset, bool readOnlyMap);
void removeSvmMapOperation(const void *regionSvmPtr);
@ -254,7 +240,7 @@ class SVMAllocsManager {
void initUsmDeviceAllocationsCache();
void freeSVMData(SvmAllocationData *svmData);
SortedVectorBasedAllocationTracker svmAllocs;
MapBasedAllocationTracker svmAllocs;
MapOperationsTracker svmMapOperations;
MapBasedAllocationTracker svmDeferFreeAllocs;
MemoryManager *memoryManager;

View File

@ -17,72 +17,6 @@
using namespace NEO;
TEST(SortedVectorBasedAllocationTrackerTests, givenSortedVectorBasedAllocationTrackerWhenInsertRemoveAndGetThenStoreDataProperly) {
SvmAllocationData data(1u);
SVMAllocsManager::SortedVectorBasedAllocationTracker tracker;
MockGraphicsAllocation graphicsAllocations[] = {{reinterpret_cast<void *>(0x1 * MemoryConstants::pageSize64k), MemoryConstants::pageSize64k},
{reinterpret_cast<void *>(0x2 * MemoryConstants::pageSize64k), MemoryConstants::pageSize64k},
{reinterpret_cast<void *>(0x3 * MemoryConstants::pageSize64k), MemoryConstants::pageSize64k},
{reinterpret_cast<void *>(0x4 * MemoryConstants::pageSize64k), MemoryConstants::pageSize64k},
{reinterpret_cast<void *>(0x5 * MemoryConstants::pageSize64k), MemoryConstants::pageSize64k},
{reinterpret_cast<void *>(0x6 * MemoryConstants::pageSize64k), MemoryConstants::pageSize64k},
{reinterpret_cast<void *>(0x7 * MemoryConstants::pageSize64k), MemoryConstants::pageSize64k},
{reinterpret_cast<void *>(0x8 * MemoryConstants::pageSize64k), MemoryConstants::pageSize64k},
{reinterpret_cast<void *>(0x9 * MemoryConstants::pageSize64k), MemoryConstants::pageSize64k},
{reinterpret_cast<void *>(0xA * MemoryConstants::pageSize64k), MemoryConstants::pageSize64k}};
const auto graphicsAllocationsSize = sizeof(graphicsAllocations) / sizeof(MockGraphicsAllocation);
for (uint32_t i = graphicsAllocationsSize - 1; i >= graphicsAllocationsSize / 2; --i) {
data.gpuAllocations.addAllocation(&graphicsAllocations[i]);
data.device = reinterpret_cast<Device *>(graphicsAllocations[i].getGpuAddress());
tracker.insert(data);
}
for (uint32_t i = 0; i < graphicsAllocationsSize / 2; ++i) {
data.gpuAllocations.addAllocation(&graphicsAllocations[i]);
data.device = reinterpret_cast<Device *>(graphicsAllocations[i].getGpuAddress());
tracker.insert(data);
}
EXPECT_EQ(tracker.getNumAllocs(), graphicsAllocationsSize);
for (uint64_t i = 0; i < graphicsAllocationsSize; ++i) {
EXPECT_EQ((i + 1) * MemoryConstants::pageSize64k, reinterpret_cast<uint64_t>(tracker.allocations[static_cast<uint32_t>(i)].first));
EXPECT_EQ((i + 1) * MemoryConstants::pageSize64k, reinterpret_cast<uint64_t>(tracker.allocations[static_cast<uint32_t>(i)].second->device));
}
auto addr1 = reinterpret_cast<void *>(graphicsAllocations[7].getGpuAddress());
auto data1 = tracker.get(addr1);
EXPECT_EQ(data1->device, addr1);
MockGraphicsAllocation graphicsAlloc{reinterpret_cast<void *>(0x0), MemoryConstants::pageSize64k};
data.gpuAllocations.addAllocation(&graphicsAlloc);
data.device = reinterpret_cast<Device *>(graphicsAlloc.getGpuAddress());
tracker.insert(data);
EXPECT_EQ(tracker.getNumAllocs(), graphicsAllocationsSize + 1);
for (uint64_t i = 0; i < graphicsAllocationsSize + 1; ++i) {
EXPECT_EQ(i * MemoryConstants::pageSize64k, reinterpret_cast<uint64_t>(tracker.allocations[static_cast<uint32_t>(i)].first));
EXPECT_EQ(i * MemoryConstants::pageSize64k, reinterpret_cast<uint64_t>(tracker.allocations[static_cast<uint32_t>(i)].second->device));
}
EXPECT_EQ(data1->device, addr1);
auto addr2 = reinterpret_cast<void *>(graphicsAllocations[1].getGpuAddress());
auto data2 = tracker.get(addr2);
EXPECT_EQ(data1->device, addr1);
EXPECT_EQ(data2->device, addr2);
tracker.remove(*data2);
EXPECT_EQ(tracker.getNumAllocs(), graphicsAllocationsSize);
for (uint64_t i = 0; i < graphicsAllocationsSize; ++i) {
if (i < 2) {
EXPECT_EQ(i * MemoryConstants::pageSize64k, reinterpret_cast<uint64_t>(tracker.allocations[static_cast<uint32_t>(i)].first));
EXPECT_EQ(i * MemoryConstants::pageSize64k, reinterpret_cast<uint64_t>(tracker.allocations[static_cast<uint32_t>(i)].second->device));
} else {
EXPECT_EQ((i + 1) * MemoryConstants::pageSize64k, reinterpret_cast<uint64_t>(tracker.allocations[static_cast<uint32_t>(i)].first));
EXPECT_EQ((i + 1) * MemoryConstants::pageSize64k, reinterpret_cast<uint64_t>(tracker.allocations[static_cast<uint32_t>(i)].second->device));
}
}
EXPECT_EQ(data1->device, addr1);
}
TEST(SvmDeviceAllocationCacheTest, givenAllocationCacheDefaultWhenCheckingIfEnabledThenItIsDisabled) {
std::unique_ptr<UltDeviceFactory> deviceFactory(new UltDeviceFactory(1, 1));
RootDeviceIndicesContainer rootDeviceIndices = {mockRootDeviceIndex};
@ -283,7 +217,7 @@ TEST(SvmDeviceAllocationCacheTest, givenAllocationsWithDifferentFlagsWhenAllocat
auto svmManager = std::make_unique<MockSVMAllocsManager>(rootDevice->getMemoryManager(), false);
ASSERT_TRUE(svmManager->usmDeviceAllocationsCacheEnabled);
constexpr auto allocationSizeBasis = MemoryConstants::kiloByte;
constexpr auto allocationSizeBasis = MemoryConstants::pageSize64k;
size_t defaultAllocSize = allocationSizeBasis << 0;
std::map<uint32_t, DeviceBitfield> subDeviceBitfields = {{0u, {01}}, {1u, {10}}};
SvmDeviceAllocationCacheTestDataType