Remove additional memory allocations for surfaces container

In constructor of CommandComputeKernel we had been doing multiple allocations
of memory on heap due to lack of call to std::vector copy-constructor or reserve
member function.

Furthermore, in production code there is only one place, where we create objects
of this type and we redundantly copy the local variable, which could be moved.

This change:
- ensures that constructor of CommandComputeKernel performs single allocation
in the worst case; in the best case, it does not allocate memory due to usage
of std::move on input parameter
- steals the memory of the local variable in place of usage of the constructor
to remove redundant copying and memory allocations
- uses reserve() method to reduce the number of allocations during creation
of this local variable

Signed-off-by: Patryk Wrobel <patryk.wrobel@intel.com>
This commit is contained in:
Patryk Wrobel 2022-03-01 15:14:04 +00:00 committed by Compute-Runtime-Automation
parent 721c59d3d5
commit f193efec2f
3 changed files with 7 additions and 8 deletions

View File

@ -893,6 +893,8 @@ void CommandQueueHw<GfxFamily>::enqueueBlocked(
}
kernel->getResidency(allSurfaces);
}
allSurfaces.reserve(allSurfaces.size() + surfaceCount);
for (auto &surface : CreateRange(surfaces, surfaceCount)) {
allSurfaces.push_back(surface->duplicate());
}
@ -901,7 +903,7 @@ void CommandQueueHw<GfxFamily>::enqueueBlocked(
bool slmUsed = multiDispatchInfo.usesSlm();
command = std::make_unique<CommandComputeKernel>(*this,
blockedCommandsData,
allSurfaces,
std::move(allSurfaces),
shouldFlushDC(commandType, printfHandler.get()),
slmUsed,
commandType,

View File

@ -112,15 +112,12 @@ CompletionStamp &CommandMapUnmap::submit(uint32_t taskLevel, bool terminated) {
return completionStamp;
}
CommandComputeKernel::CommandComputeKernel(CommandQueue &commandQueue, std::unique_ptr<KernelOperation> &kernelOperation, std::vector<Surface *> &surfaces,
CommandComputeKernel::CommandComputeKernel(CommandQueue &commandQueue, std::unique_ptr<KernelOperation> &kernelOperation, std::vector<Surface *> surfaces,
bool flushDC, bool usesSLM, uint32_t commandType, std::unique_ptr<PrintfHandler> &&printfHandler,
PreemptionMode preemptionMode, Kernel *kernel, uint32_t kernelCount)
: Command(commandQueue, kernelOperation), flushDC(flushDC), slmUsed(usesSLM),
: Command(commandQueue, kernelOperation), surfaces(std::move(surfaces)), flushDC(flushDC), slmUsed(usesSLM),
commandType(commandType), printfHandler(std::move(printfHandler)), kernel(kernel),
kernelCount(kernelCount), preemptionMode(preemptionMode) {
for (auto surface : surfaces) {
this->surfaces.push_back(surface);
}
UNRECOVERABLE_IF(nullptr == this->kernel);
kernel->incRefInternal();
}

View File

@ -1,5 +1,5 @@
/*
* Copyright (C) 2018-2021 Intel Corporation
* Copyright (C) 2018-2022 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@ -130,7 +130,7 @@ class CommandMapUnmap : public Command {
class CommandComputeKernel : public Command {
public:
CommandComputeKernel(CommandQueue &commandQueue, std::unique_ptr<KernelOperation> &kernelOperation, std::vector<Surface *> &surfaces,
CommandComputeKernel(CommandQueue &commandQueue, std::unique_ptr<KernelOperation> &kernelOperation, std::vector<Surface *> surfaces,
bool flushDC, bool usesSLM, uint32_t commandType, std::unique_ptr<PrintfHandler> &&printfHandler,
PreemptionMode preemptionMode, Kernel *kernel, uint32_t kernelCount);