Remove additional memory allocations for surfaces container

In constructor of CommandComputeKernel we had been doing multiple allocations of memory on heap due to lack of call to std::vector copy-constructor or reserve member function. Furthermore, in production code there is only one place, where we create objects of this type and we redundantly copy the local variable, which could be moved. This change: - ensures that constructor of CommandComputeKernel performs single allocation in the worst case; in the best case, it does not allocate memory due to usage of std::move on input parameter - steals the memory of the local variable in place of usage of the constructor to remove redundant copying and memory allocations - uses reserve() method to reduce the number of allocations during creation of this local variable Signed-off-by: Patryk Wrobel <patryk.wrobel@intel.com>
2022-03-01 15:14:04 +00:00 · 2022-03-01 15:14:04 +00:00 · f193efec2f
parent 721c59d3d5
commit f193efec2f
3 changed files with 7 additions and 8 deletions
--- a/opencl/source/command_queue/enqueue_common.h
+++ b/opencl/source/command_queue/enqueue_common.h
@ -893,6 +893,8 @@ void CommandQueueHw<GfxFamily>::enqueueBlocked(
            }
            kernel->getResidency(allSurfaces);
        }
+
+        allSurfaces.reserve(allSurfaces.size() + surfaceCount);
        for (auto &surface : CreateRange(surfaces, surfaceCount)) {
            allSurfaces.push_back(surface->duplicate());
        }
@ -901,7 +903,7 @@ void CommandQueueHw<GfxFamily>::enqueueBlocked(
        bool slmUsed = multiDispatchInfo.usesSlm();
        command = std::make_unique<CommandComputeKernel>(*this,
                                                         blockedCommandsData,
-                                                         allSurfaces,
+                                                         std::move(allSurfaces),
                                                         shouldFlushDC(commandType, printfHandler.get()),
                                                         slmUsed,
                                                         commandType,
--- a/opencl/source/helpers/task_information.cpp
+++ b/opencl/source/helpers/task_information.cpp
@ -112,15 +112,12 @@ CompletionStamp &CommandMapUnmap::submit(uint32_t taskLevel, bool terminated) {
    return completionStamp;
 }

-CommandComputeKernel::CommandComputeKernel(CommandQueue &commandQueue, std::unique_ptr<KernelOperation> &kernelOperation, std::vector<Surface *> &surfaces,
+CommandComputeKernel::CommandComputeKernel(CommandQueue &commandQueue, std::unique_ptr<KernelOperation> &kernelOperation, std::vector<Surface *> surfaces,
                                           bool flushDC, bool usesSLM, uint32_t commandType, std::unique_ptr<PrintfHandler> &&printfHandler,
                                           PreemptionMode preemptionMode, Kernel *kernel, uint32_t kernelCount)
-    : Command(commandQueue, kernelOperation), flushDC(flushDC), slmUsed(usesSLM),
+    : Command(commandQueue, kernelOperation), surfaces(std::move(surfaces)), flushDC(flushDC), slmUsed(usesSLM),
      commandType(commandType), printfHandler(std::move(printfHandler)), kernel(kernel),
      kernelCount(kernelCount), preemptionMode(preemptionMode) {
-    for (auto surface : surfaces) {
-        this->surfaces.push_back(surface);
-    }
    UNRECOVERABLE_IF(nullptr == this->kernel);
    kernel->incRefInternal();
 }
--- a/opencl/source/helpers/task_information.h
+++ b/opencl/source/helpers/task_information.h
@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2018-2021 Intel Corporation
+ * Copyright (C) 2018-2022 Intel Corporation
 *
 * SPDX-License-Identifier: MIT
 *
@ -130,7 +130,7 @@ class CommandMapUnmap : public Command {

 class CommandComputeKernel : public Command {
  public:
-    CommandComputeKernel(CommandQueue &commandQueue, std::unique_ptr<KernelOperation> &kernelOperation, std::vector<Surface *> &surfaces,
+    CommandComputeKernel(CommandQueue &commandQueue, std::unique_ptr<KernelOperation> &kernelOperation, std::vector<Surface *> surfaces,
                         bool flushDC, bool usesSLM, uint32_t commandType, std::unique_ptr<PrintfHandler> &&printfHandler,
                         PreemptionMode preemptionMode, Kernel *kernel, uint32_t kernelCount);