performance: Use vector for private allocs to reuse
Related-To: HSD-18033105655, HSD-18033153203 Signed-off-by: Maciej Plewka <maciej.plewka@intel.com>
This commit is contained in:
parent
91b26277a4
commit
3b3e17e738
|
@ -11,6 +11,7 @@
|
|||
#include "shared/source/command_stream/preemption_mode.h"
|
||||
#include "shared/source/command_stream/stream_properties.h"
|
||||
#include "shared/source/helpers/cache_policy.h"
|
||||
#include "shared/source/helpers/common_types.h"
|
||||
#include "shared/source/helpers/definitions/command_encoder_args.h"
|
||||
#include "shared/source/helpers/heap_base_address_model.h"
|
||||
#include "shared/source/memory_manager/prefetch_manager.h"
|
||||
|
@ -356,7 +357,7 @@ struct CommandList : _ze_command_list_handle_t {
|
|||
MOCKABLE_VIRTUAL void synchronizeEventList(uint32_t numWaitEvents, ze_event_handle_t *waitEventList);
|
||||
|
||||
std::map<const void *, NEO::GraphicsAllocation *> hostPtrMap;
|
||||
std::unordered_map<uint32_t, NEO::GraphicsAllocation *> ownedPrivateAllocations;
|
||||
NEO::PrivateAllocsToReuseContainer ownedPrivateAllocations;
|
||||
std::vector<NEO::GraphicsAllocation *> patternAllocations;
|
||||
std::vector<Kernel *> printfKernelContainer;
|
||||
|
||||
|
|
|
@ -309,7 +309,7 @@ struct CommandListCoreFamily : CommandListImp {
|
|||
return (this->pipeControlMultiKernelEventSync && splitKernel) ||
|
||||
compactL3FlushEvent(dcFlush);
|
||||
}
|
||||
MOCKABLE_VIRTUAL void allocateOrReuseKernelPrivateMemory(Kernel *kernel, uint32_t sizePerHwThread, std::unordered_map<uint32_t, NEO::GraphicsAllocation *> &privateAllocsToReuse);
|
||||
MOCKABLE_VIRTUAL void allocateOrReuseKernelPrivateMemory(Kernel *kernel, uint32_t sizePerHwThread, NEO::PrivateAllocsToReuseContainer &privateAllocsToReuse);
|
||||
virtual void allocateOrReuseKernelPrivateMemoryIfNeeded(Kernel *kernel, uint32_t sizePerHwThread);
|
||||
CmdListEventOperation estimateEventPostSync(Event *event, uint32_t operations);
|
||||
void dispatchPostSyncCopy(uint64_t gpuAddress, uint32_t value, bool workloadPartition);
|
||||
|
|
|
@ -3182,15 +3182,22 @@ void CommandListCoreFamily<gfxCoreFamily>::allocateOrReuseKernelPrivateMemoryIfN
|
|||
}
|
||||
|
||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||
void CommandListCoreFamily<gfxCoreFamily>::allocateOrReuseKernelPrivateMemory(Kernel *kernel, uint32_t sizePerHwThread, std::unordered_map<uint32_t, NEO::GraphicsAllocation *> &privateAllocsToReuse) {
|
||||
void CommandListCoreFamily<gfxCoreFamily>::allocateOrReuseKernelPrivateMemory(Kernel *kernel, uint32_t sizePerHwThread, NEO::PrivateAllocsToReuseContainer &privateAllocsToReuse) {
|
||||
L0::KernelImp *kernelImp = static_cast<KernelImp *>(kernel);
|
||||
NEO::GraphicsAllocation *privateAlloc = nullptr;
|
||||
|
||||
if (privateAllocsToReuse[sizePerHwThread] != nullptr) {
|
||||
privateAlloc = privateAllocsToReuse[sizePerHwThread];
|
||||
} else {
|
||||
bool allocToReuseFound = false;
|
||||
|
||||
for (auto &alloc : privateAllocsToReuse) {
|
||||
if (sizePerHwThread == alloc.first) {
|
||||
privateAlloc = alloc.second;
|
||||
allocToReuseFound = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (!allocToReuseFound) {
|
||||
privateAlloc = kernelImp->allocatePrivateMemoryGraphicsAllocation();
|
||||
privateAllocsToReuse[sizePerHwThread] = privateAlloc;
|
||||
privateAllocsToReuse.push_back({sizePerHwThread, privateAlloc});
|
||||
}
|
||||
kernelImp->patchAndMoveToResidencyContainerPrivateSurface(privateAlloc);
|
||||
}
|
||||
|
|
|
@ -534,7 +534,7 @@ class MockCommandListCoreFamily : public CommandListCoreFamily<gfxCoreFamily> {
|
|||
false,
|
||||
(L0::Kernel * kernel,
|
||||
uint32_t sizePerHwThread,
|
||||
std::unordered_map<uint32_t, NEO::GraphicsAllocation *> &privateAllocsToReuse),
|
||||
NEO::PrivateAllocsToReuseContainer &privateAllocsToReuse),
|
||||
(kernel, sizePerHwThread, privateAllocsToReuse));
|
||||
|
||||
ADDMETHOD_VOIDRETURN(allocateOrReuseKernelPrivateMemoryIfNeeded,
|
||||
|
@ -631,7 +631,7 @@ class MockCommandListImmediateHw : public WhiteBox<::L0::CommandListCoreFamilyIm
|
|||
false,
|
||||
(L0::Kernel * kernel,
|
||||
uint32_t sizePerHwThread,
|
||||
std::unordered_map<uint32_t, NEO::GraphicsAllocation *> &privateAllocsToReuse),
|
||||
NEO::PrivateAllocsToReuseContainer &privateAllocsToReuse),
|
||||
(kernel, sizePerHwThread, privateAllocsToReuse));
|
||||
|
||||
ADDMETHOD_VOIDRETURN(allocateOrReuseKernelPrivateMemoryIfNeeded,
|
||||
|
|
|
@ -3193,11 +3193,11 @@ HWTEST2_F(CommandListMappedTimestampTest, givenEventIsAddedToMappedEventListWhen
|
|||
template <GFXCORE_FAMILY gfxCoreFamily, typename BaseMock>
|
||||
class MockCommandListCoreFamilyIfPrivateNeeded : public BaseMock {
|
||||
public:
|
||||
void allocateOrReuseKernelPrivateMemory(Kernel *kernel, uint32_t sizePerHwThread, std::unordered_map<uint32_t, GraphicsAllocation *> &privateAllocsToReuse) override {
|
||||
void allocateOrReuseKernelPrivateMemory(Kernel *kernel, uint32_t sizePerHwThread, PrivateAllocsToReuseContainer &privateAllocsToReuse) override {
|
||||
passedContainer = &privateAllocsToReuse;
|
||||
BaseMock::allocateOrReuseKernelPrivateMemory(kernel, sizePerHwThread, privateAllocsToReuse);
|
||||
}
|
||||
std::unordered_map<uint32_t, GraphicsAllocation *> *passedContainer;
|
||||
PrivateAllocsToReuseContainer *passedContainer;
|
||||
};
|
||||
|
||||
HWTEST2_F(CommandListCreate, givenPrivatePerDispatchDisabledWhenAllocatingPrivateMemoryThenAllocateIsNotCalled, IsAtLeastSkl) {
|
||||
|
@ -3281,8 +3281,8 @@ HWTEST2_F(CommandListCreate, givenCmdListWhenAllocateOrReuseCalledForSizeThatIsS
|
|||
const_cast<uint32_t &>(mockKernel.kernelImmData->getDescriptor().kernelAttributes.perHwThreadPrivateMemorySize) = 0x1000;
|
||||
mockKernel.module = &mockModule;
|
||||
MockGraphicsAllocation mockGA(mockMem.get(), 2 * sizePerHwThread * this->neoDevice->getDeviceInfo().computeUnitsUsedForScratch);
|
||||
std::unordered_map<uint32_t, GraphicsAllocation *> mapForReuse;
|
||||
mapForReuse[sizePerHwThread] = &mockGA;
|
||||
PrivateAllocsToReuseContainer mapForReuse;
|
||||
mapForReuse.push_back({sizePerHwThread, &mockGA});
|
||||
commandList->allocateOrReuseKernelPrivateMemory(&mockKernel, sizePerHwThread, mapForReuse);
|
||||
EXPECT_EQ(mockKernel.residencyContainer[0], &mockGA);
|
||||
}
|
||||
|
@ -3298,8 +3298,8 @@ HWTEST2_F(CommandListCreate, givenNewSizeDifferentThanSizesInMapWhenAllocatingPr
|
|||
const_cast<uint32_t &>(mockKernel.kernelImmData->getDescriptor().kernelAttributes.perHwThreadPrivateMemorySize) = 0x1000;
|
||||
mockKernel.module = &mockModule;
|
||||
MockGraphicsAllocation mockGA(mockMem.get(), sizePerHwThread * this->neoDevice->getDeviceInfo().computeUnitsUsedForScratch / 2);
|
||||
std::unordered_map<uint32_t, GraphicsAllocation *> mapForReuse;
|
||||
mapForReuse[sizePerHwThread] = &mockGA;
|
||||
PrivateAllocsToReuseContainer mapForReuse;
|
||||
mapForReuse.push_back({sizePerHwThread, &mockGA});
|
||||
commandList->allocateOrReuseKernelPrivateMemory(&mockKernel, sizePerHwThread / 2, mapForReuse);
|
||||
EXPECT_NE(mockKernel.residencyContainer[0], &mockGA);
|
||||
neoDevice->getMemoryManager()->freeGraphicsMemory(mockKernel.residencyContainer[0]);
|
||||
|
|
|
@ -571,7 +571,7 @@ ResidencyContainer &CommandStreamReceiver::getResidencyAllocations() {
|
|||
ResidencyContainer &CommandStreamReceiver::getEvictionAllocations() {
|
||||
return this->evictionAllocations;
|
||||
}
|
||||
std::unordered_map<uint32_t, GraphicsAllocation *> &CommandStreamReceiver::getOwnedPrivateAllocations() {
|
||||
PrivateAllocsToReuseContainer &CommandStreamReceiver::getOwnedPrivateAllocations() {
|
||||
return this->ownedPrivateAllocations;
|
||||
}
|
||||
|
||||
|
|
|
@ -11,6 +11,7 @@
|
|||
#include "shared/source/command_stream/stream_properties.h"
|
||||
#include "shared/source/helpers/blit_properties_container.h"
|
||||
#include "shared/source/helpers/cache_policy.h"
|
||||
#include "shared/source/helpers/common_types.h"
|
||||
#include "shared/source/helpers/completion_stamp.h"
|
||||
#include "shared/source/helpers/options.h"
|
||||
#include "shared/source/utilities/spinlock.h"
|
||||
|
@ -118,7 +119,7 @@ class CommandStreamReceiver {
|
|||
|
||||
ResidencyContainer &getResidencyAllocations();
|
||||
ResidencyContainer &getEvictionAllocations();
|
||||
std::unordered_map<uint32_t, GraphicsAllocation *> &getOwnedPrivateAllocations();
|
||||
PrivateAllocsToReuseContainer &getOwnedPrivateAllocations();
|
||||
|
||||
virtual GmmPageTableMngr *createPageTableManager() { return nullptr; }
|
||||
bool needsPageTableManager() const;
|
||||
|
@ -461,7 +462,7 @@ class CommandStreamReceiver {
|
|||
|
||||
ResidencyContainer residencyAllocations;
|
||||
ResidencyContainer evictionAllocations;
|
||||
std::unordered_map<uint32_t, GraphicsAllocation *> ownedPrivateAllocations;
|
||||
PrivateAllocsToReuseContainer ownedPrivateAllocations;
|
||||
|
||||
MutexType ownershipMutex;
|
||||
MutexType hostPtrSurfaceCreationMutex;
|
||||
|
|
|
@ -13,11 +13,13 @@
|
|||
#include <vector>
|
||||
|
||||
namespace NEO {
|
||||
class GraphicsAllocation;
|
||||
struct EngineControl;
|
||||
using EngineControlContainer = std::vector<EngineControl>;
|
||||
using MultiDeviceEngineControlContainer = StackVec<EngineControlContainer, 6u>;
|
||||
class Device;
|
||||
using DeviceVector = std::vector<std::unique_ptr<Device>>;
|
||||
using PrivateAllocsToReuseContainer = StackVec<std::pair<uint32_t, GraphicsAllocation *>, 8>;
|
||||
|
||||
enum class DebugPauseState : uint32_t {
|
||||
disabled,
|
||||
|
|
|
@ -4491,7 +4491,7 @@ HWTEST_F(CommandStreamReceiverTest, givenCsrWhenCleanUpResourcesThenOwnedPrivate
|
|||
auto mockGA = std::make_unique<MockGraphicsAllocation>();
|
||||
|
||||
auto mapForReuse = &csr.getOwnedPrivateAllocations();
|
||||
mapForReuse->insert({0x100, mockGA.release()});
|
||||
mapForReuse->push_back({0x100, mockGA.release()});
|
||||
csr.cleanupResources();
|
||||
EXPECT_EQ(mapForReuse->size(), 0u);
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue