performance: Use vector for private allocs to reuse

Related-To: HSD-18033105655, HSD-18033153203

Signed-off-by: Maciej Plewka <maciej.plewka@intel.com>
This commit is contained in:
Maciej Plewka 2023-09-04 09:42:02 +00:00 committed by Compute-Runtime-Automation
parent 91b26277a4
commit 3b3e17e738
9 changed files with 30 additions and 19 deletions

View File

@ -11,6 +11,7 @@
#include "shared/source/command_stream/preemption_mode.h"
#include "shared/source/command_stream/stream_properties.h"
#include "shared/source/helpers/cache_policy.h"
#include "shared/source/helpers/common_types.h"
#include "shared/source/helpers/definitions/command_encoder_args.h"
#include "shared/source/helpers/heap_base_address_model.h"
#include "shared/source/memory_manager/prefetch_manager.h"
@ -356,7 +357,7 @@ struct CommandList : _ze_command_list_handle_t {
MOCKABLE_VIRTUAL void synchronizeEventList(uint32_t numWaitEvents, ze_event_handle_t *waitEventList);
std::map<const void *, NEO::GraphicsAllocation *> hostPtrMap;
std::unordered_map<uint32_t, NEO::GraphicsAllocation *> ownedPrivateAllocations;
NEO::PrivateAllocsToReuseContainer ownedPrivateAllocations;
std::vector<NEO::GraphicsAllocation *> patternAllocations;
std::vector<Kernel *> printfKernelContainer;

View File

@ -309,7 +309,7 @@ struct CommandListCoreFamily : CommandListImp {
return (this->pipeControlMultiKernelEventSync && splitKernel) ||
compactL3FlushEvent(dcFlush);
}
MOCKABLE_VIRTUAL void allocateOrReuseKernelPrivateMemory(Kernel *kernel, uint32_t sizePerHwThread, std::unordered_map<uint32_t, NEO::GraphicsAllocation *> &privateAllocsToReuse);
MOCKABLE_VIRTUAL void allocateOrReuseKernelPrivateMemory(Kernel *kernel, uint32_t sizePerHwThread, NEO::PrivateAllocsToReuseContainer &privateAllocsToReuse);
virtual void allocateOrReuseKernelPrivateMemoryIfNeeded(Kernel *kernel, uint32_t sizePerHwThread);
CmdListEventOperation estimateEventPostSync(Event *event, uint32_t operations);
void dispatchPostSyncCopy(uint64_t gpuAddress, uint32_t value, bool workloadPartition);

View File

@ -3182,15 +3182,22 @@ void CommandListCoreFamily<gfxCoreFamily>::allocateOrReuseKernelPrivateMemoryIfN
}
template <GFXCORE_FAMILY gfxCoreFamily>
void CommandListCoreFamily<gfxCoreFamily>::allocateOrReuseKernelPrivateMemory(Kernel *kernel, uint32_t sizePerHwThread, std::unordered_map<uint32_t, NEO::GraphicsAllocation *> &privateAllocsToReuse) {
void CommandListCoreFamily<gfxCoreFamily>::allocateOrReuseKernelPrivateMemory(Kernel *kernel, uint32_t sizePerHwThread, NEO::PrivateAllocsToReuseContainer &privateAllocsToReuse) {
L0::KernelImp *kernelImp = static_cast<KernelImp *>(kernel);
NEO::GraphicsAllocation *privateAlloc = nullptr;
if (privateAllocsToReuse[sizePerHwThread] != nullptr) {
privateAlloc = privateAllocsToReuse[sizePerHwThread];
} else {
bool allocToReuseFound = false;
for (auto &alloc : privateAllocsToReuse) {
if (sizePerHwThread == alloc.first) {
privateAlloc = alloc.second;
allocToReuseFound = true;
break;
}
}
if (!allocToReuseFound) {
privateAlloc = kernelImp->allocatePrivateMemoryGraphicsAllocation();
privateAllocsToReuse[sizePerHwThread] = privateAlloc;
privateAllocsToReuse.push_back({sizePerHwThread, privateAlloc});
}
kernelImp->patchAndMoveToResidencyContainerPrivateSurface(privateAlloc);
}

View File

@ -534,7 +534,7 @@ class MockCommandListCoreFamily : public CommandListCoreFamily<gfxCoreFamily> {
false,
(L0::Kernel * kernel,
uint32_t sizePerHwThread,
std::unordered_map<uint32_t, NEO::GraphicsAllocation *> &privateAllocsToReuse),
NEO::PrivateAllocsToReuseContainer &privateAllocsToReuse),
(kernel, sizePerHwThread, privateAllocsToReuse));
ADDMETHOD_VOIDRETURN(allocateOrReuseKernelPrivateMemoryIfNeeded,
@ -631,7 +631,7 @@ class MockCommandListImmediateHw : public WhiteBox<::L0::CommandListCoreFamilyIm
false,
(L0::Kernel * kernel,
uint32_t sizePerHwThread,
std::unordered_map<uint32_t, NEO::GraphicsAllocation *> &privateAllocsToReuse),
NEO::PrivateAllocsToReuseContainer &privateAllocsToReuse),
(kernel, sizePerHwThread, privateAllocsToReuse));
ADDMETHOD_VOIDRETURN(allocateOrReuseKernelPrivateMemoryIfNeeded,

View File

@ -3193,11 +3193,11 @@ HWTEST2_F(CommandListMappedTimestampTest, givenEventIsAddedToMappedEventListWhen
template <GFXCORE_FAMILY gfxCoreFamily, typename BaseMock>
class MockCommandListCoreFamilyIfPrivateNeeded : public BaseMock {
public:
void allocateOrReuseKernelPrivateMemory(Kernel *kernel, uint32_t sizePerHwThread, std::unordered_map<uint32_t, GraphicsAllocation *> &privateAllocsToReuse) override {
void allocateOrReuseKernelPrivateMemory(Kernel *kernel, uint32_t sizePerHwThread, PrivateAllocsToReuseContainer &privateAllocsToReuse) override {
passedContainer = &privateAllocsToReuse;
BaseMock::allocateOrReuseKernelPrivateMemory(kernel, sizePerHwThread, privateAllocsToReuse);
}
std::unordered_map<uint32_t, GraphicsAllocation *> *passedContainer;
PrivateAllocsToReuseContainer *passedContainer;
};
HWTEST2_F(CommandListCreate, givenPrivatePerDispatchDisabledWhenAllocatingPrivateMemoryThenAllocateIsNotCalled, IsAtLeastSkl) {
@ -3281,8 +3281,8 @@ HWTEST2_F(CommandListCreate, givenCmdListWhenAllocateOrReuseCalledForSizeThatIsS
const_cast<uint32_t &>(mockKernel.kernelImmData->getDescriptor().kernelAttributes.perHwThreadPrivateMemorySize) = 0x1000;
mockKernel.module = &mockModule;
MockGraphicsAllocation mockGA(mockMem.get(), 2 * sizePerHwThread * this->neoDevice->getDeviceInfo().computeUnitsUsedForScratch);
std::unordered_map<uint32_t, GraphicsAllocation *> mapForReuse;
mapForReuse[sizePerHwThread] = &mockGA;
PrivateAllocsToReuseContainer mapForReuse;
mapForReuse.push_back({sizePerHwThread, &mockGA});
commandList->allocateOrReuseKernelPrivateMemory(&mockKernel, sizePerHwThread, mapForReuse);
EXPECT_EQ(mockKernel.residencyContainer[0], &mockGA);
}
@ -3298,8 +3298,8 @@ HWTEST2_F(CommandListCreate, givenNewSizeDifferentThanSizesInMapWhenAllocatingPr
const_cast<uint32_t &>(mockKernel.kernelImmData->getDescriptor().kernelAttributes.perHwThreadPrivateMemorySize) = 0x1000;
mockKernel.module = &mockModule;
MockGraphicsAllocation mockGA(mockMem.get(), sizePerHwThread * this->neoDevice->getDeviceInfo().computeUnitsUsedForScratch / 2);
std::unordered_map<uint32_t, GraphicsAllocation *> mapForReuse;
mapForReuse[sizePerHwThread] = &mockGA;
PrivateAllocsToReuseContainer mapForReuse;
mapForReuse.push_back({sizePerHwThread, &mockGA});
commandList->allocateOrReuseKernelPrivateMemory(&mockKernel, sizePerHwThread / 2, mapForReuse);
EXPECT_NE(mockKernel.residencyContainer[0], &mockGA);
neoDevice->getMemoryManager()->freeGraphicsMemory(mockKernel.residencyContainer[0]);

View File

@ -571,7 +571,7 @@ ResidencyContainer &CommandStreamReceiver::getResidencyAllocations() {
ResidencyContainer &CommandStreamReceiver::getEvictionAllocations() {
return this->evictionAllocations;
}
std::unordered_map<uint32_t, GraphicsAllocation *> &CommandStreamReceiver::getOwnedPrivateAllocations() {
PrivateAllocsToReuseContainer &CommandStreamReceiver::getOwnedPrivateAllocations() {
return this->ownedPrivateAllocations;
}

View File

@ -11,6 +11,7 @@
#include "shared/source/command_stream/stream_properties.h"
#include "shared/source/helpers/blit_properties_container.h"
#include "shared/source/helpers/cache_policy.h"
#include "shared/source/helpers/common_types.h"
#include "shared/source/helpers/completion_stamp.h"
#include "shared/source/helpers/options.h"
#include "shared/source/utilities/spinlock.h"
@ -118,7 +119,7 @@ class CommandStreamReceiver {
ResidencyContainer &getResidencyAllocations();
ResidencyContainer &getEvictionAllocations();
std::unordered_map<uint32_t, GraphicsAllocation *> &getOwnedPrivateAllocations();
PrivateAllocsToReuseContainer &getOwnedPrivateAllocations();
virtual GmmPageTableMngr *createPageTableManager() { return nullptr; }
bool needsPageTableManager() const;
@ -461,7 +462,7 @@ class CommandStreamReceiver {
ResidencyContainer residencyAllocations;
ResidencyContainer evictionAllocations;
std::unordered_map<uint32_t, GraphicsAllocation *> ownedPrivateAllocations;
PrivateAllocsToReuseContainer ownedPrivateAllocations;
MutexType ownershipMutex;
MutexType hostPtrSurfaceCreationMutex;

View File

@ -13,11 +13,13 @@
#include <vector>
namespace NEO {
class GraphicsAllocation;
struct EngineControl;
using EngineControlContainer = std::vector<EngineControl>;
using MultiDeviceEngineControlContainer = StackVec<EngineControlContainer, 6u>;
class Device;
using DeviceVector = std::vector<std::unique_ptr<Device>>;
using PrivateAllocsToReuseContainer = StackVec<std::pair<uint32_t, GraphicsAllocation *>, 8>;
enum class DebugPauseState : uint32_t {
disabled,

View File

@ -4491,7 +4491,7 @@ HWTEST_F(CommandStreamReceiverTest, givenCsrWhenCleanUpResourcesThenOwnedPrivate
auto mockGA = std::make_unique<MockGraphicsAllocation>();
auto mapForReuse = &csr.getOwnedPrivateAllocations();
mapForReuse->insert({0x100, mockGA.release()});
mapForReuse->push_back({0x100, mockGA.release()});
csr.cleanupResources();
EXPECT_EQ(mapForReuse->size(), 0u);
}