Remove device enqueue part 7

- mainly remove BlockKernelManager and ReflectionSurfaceHelper Related-To: NEO-6559 Signed-off-by: Katarzyna Cencelewska <katarzyna.cencelewska@intel.com>
2025-12-29 09:03:14 +08:00 · 2022-01-18 16:57:12 +00:00
parent 8ebef3769c
commit 97765cd39f
29 changed files with 15 additions and 2035 deletions
--- a/opencl/source/program/CMakeLists.txt
+++ b/opencl/source/program/CMakeLists.txt
@@ -1,5 +1,5 @@
 #
-# Copyright (C) 2018-2021 Intel Corporation
+# Copyright (C) 2018-2022 Intel Corporation
 #
 # SPDX-License-Identifier: MIT
 #
@@ -7,8 +7,6 @@
 set(RUNTIME_SRCS_PROGRAM
    ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt
    ${CMAKE_CURRENT_SOURCE_DIR}${BRANCH_DIR_SUFFIX}additional_options.cpp
-    ${CMAKE_CURRENT_SOURCE_DIR}/block_kernel_manager.cpp
-    ${CMAKE_CURRENT_SOURCE_DIR}/block_kernel_manager.h
    ${CMAKE_CURRENT_SOURCE_DIR}/build.cpp
    ${CMAKE_CURRENT_SOURCE_DIR}/compile.cpp
    ${CMAKE_CURRENT_SOURCE_DIR}/create.cpp
--- a/opencl/source/program/block_kernel_manager.cpp
+++ b/opencl/source/program/block_kernel_manager.cpp
@@ -1,60 +0,0 @@
-/*
- * Copyright (C) 2018-2021 Intel Corporation
- *
- * SPDX-License-Identifier: MIT
- *
- */
-
-#include "opencl/source/program/block_kernel_manager.h"
-
-#include "shared/source/command_stream/command_stream_receiver.h"
-#include "shared/source/helpers/debug_helpers.h"
-#include "shared/source/program/kernel_info.h"
-
-namespace NEO {
-
-void BlockKernelManager::addBlockKernelInfo(KernelInfo *blockKernelInfo) {
-    blockKernelInfoArray.push_back(blockKernelInfo);
-    blockUsesPrintf = blockKernelInfo->kernelDescriptor.kernelAttributes.flags.usesPrintf;
-}
-
-const KernelInfo *BlockKernelManager::getBlockKernelInfo(size_t ordinal) {
-    DEBUG_BREAK_IF(ordinal >= blockKernelInfoArray.size());
-    return blockKernelInfoArray[ordinal];
-}
-
-BlockKernelManager::~BlockKernelManager() {
-    for (auto &i : blockKernelInfoArray)
-        delete i;
-}
-void BlockKernelManager::pushPrivateSurface(GraphicsAllocation *allocation, size_t ordinal) {
-    if (blockPrivateSurfaceArray.size() < blockKernelInfoArray.size()) {
-        blockPrivateSurfaceArray.resize(blockKernelInfoArray.size(), nullptr);
-    }
-
-    DEBUG_BREAK_IF(ordinal >= blockPrivateSurfaceArray.size());
-
-    blockPrivateSurfaceArray[ordinal] = allocation;
-}
-
-GraphicsAllocation *BlockKernelManager::getPrivateSurface(size_t ordinal) {
-    // Ff queried ordinal is out of bound return nullptr,
-    // this happens when no private surface was not pushed
-    if (ordinal < blockPrivateSurfaceArray.size())
-        return blockPrivateSurfaceArray[ordinal];
-    return nullptr;
-}
-void BlockKernelManager::makeInternalAllocationsResident(CommandStreamReceiver &commandStreamReceiver) {
-    auto blockCount = blockKernelInfoArray.size();
-    for (uint32_t surfaceIndex = 0; surfaceIndex < blockCount; surfaceIndex++) {
-        auto surface = getPrivateSurface(surfaceIndex);
-        if (surface) {
-            commandStreamReceiver.makeResident(*surface);
-        }
-        surface = blockKernelInfoArray[surfaceIndex]->getGraphicsAllocation();
-        if (surface) {
-            commandStreamReceiver.makeResident(*surface);
-        }
-    }
-}
-} // namespace NEO
--- a/opencl/source/program/block_kernel_manager.h
+++ b/opencl/source/program/block_kernel_manager.h
@@ -1,41 +0,0 @@
-/*
- * Copyright (C) 2018-2021 Intel Corporation
- *
- * SPDX-License-Identifier: MIT
- *
- */
-
-#pragma once
-#include "opencl/source/api/cl_types.h"
-
-#include <vector>
-
-namespace NEO {
-class GraphicsAllocation;
-class CommandStreamReceiver;
-struct KernelInfo;
-
-class BlockKernelManager {
-  public:
-    BlockKernelManager() = default;
-    virtual ~BlockKernelManager();
-    void addBlockKernelInfo(KernelInfo *);
-    const KernelInfo *getBlockKernelInfo(size_t ordinal);
-    size_t getCount() const {
-        return blockKernelInfoArray.size();
-    }
-    bool getIfBlockUsesPrintf() const {
-        return blockUsesPrintf;
-    }
-
-    void pushPrivateSurface(GraphicsAllocation *allocation, size_t ordinal);
-    GraphicsAllocation *getPrivateSurface(size_t ordinal);
-
-    void makeInternalAllocationsResident(CommandStreamReceiver &);
-
-  protected:
-    bool blockUsesPrintf = false;
-    std::vector<KernelInfo *> blockKernelInfoArray;
-    std::vector<GraphicsAllocation *> blockPrivateSurfaceArray;
-};
-} // namespace NEO
--- a/opencl/source/program/build.cpp
+++ b/opencl/source/program/build.cpp
@@ -198,10 +198,6 @@ cl_int Program::build(
                phaseReached[rootDeviceIndex] = BuildPhase::DebugDataNotification;
            }
        }
-
-        for (const auto &device : deviceVector) {
-            separateBlockKernels(device->getRootDeviceIndex());
-        }
    } while (false);

    if (retVal != CL_SUCCESS) {
--- a/opencl/source/program/link.cpp
+++ b/opencl/source/program/link.cpp
@@ -204,9 +204,6 @@ cl_int Program::link(
            break;
        }
        updateNonUniformFlag(&*inputProgramsInternal.begin(), inputProgramsInternal.size());
-        for (const auto &device : deviceVector) {
-            separateBlockKernels(device->getRootDeviceIndex());
-        }
    } while (false);

    if (retVal != CL_SUCCESS) {
--- a/opencl/source/program/program.cpp
+++ b/opencl/source/program/program.cpp
@@ -30,7 +30,6 @@
 #include "opencl/source/cl_device/cl_device.h"
 #include "opencl/source/context/context.h"
 #include "opencl/source/platform/platform.h"
-#include "opencl/source/program/block_kernel_manager.h"

 #include "compiler_options.h"

@@ -45,7 +44,6 @@ Program::Program(Context *context, bool isBuiltIn, const ClDeviceVector &clDevic
    if (this->context && !this->isBuiltIn) {
        this->context->incRefInternal();
    }
-    blockKernelManager = new BlockKernelManager();

    maxRootDeviceIndex = 0;

@@ -112,9 +110,6 @@ Program::~Program() {
        cleanCurrentKernelInfo(i);
    }

-    freeBlockResources();
-
-    delete blockKernelManager;
    for (const auto &buildInfo : buildInfos) {
        if (buildInfo.constantSurface) {
            if ((nullptr != context) && (nullptr != context->getSVMAllocsManager()) && (context->getSVMAllocsManager()->getSVMAlloc(reinterpret_cast<const void *>(buildInfo.constantSurface->getGpuAddress())))) {
@@ -303,83 +298,6 @@ const char *Program::getBuildLog(uint32_t rootDeviceIndex) const {
    return currentLog.c_str();
 }

-void Program::separateBlockKernels(uint32_t rootDeviceIndex) {
-    if ((0 == buildInfos[rootDeviceIndex].parentKernelInfoArray.size()) && (0 == buildInfos[rootDeviceIndex].subgroupKernelInfoArray.size())) {
-        return;
-    }
-
-    auto allKernelInfos(buildInfos[rootDeviceIndex].kernelInfoArray);
-    buildInfos[rootDeviceIndex].kernelInfoArray.clear();
-    for (auto &i : allKernelInfos) {
-        auto end = i->kernelDescriptor.kernelMetadata.kernelName.rfind("_dispatch_");
-        if (end != std::string::npos) {
-            bool baseKernelFound = false;
-            std::string baseKernelName(i->kernelDescriptor.kernelMetadata.kernelName, 0, end);
-            for (auto &j : buildInfos[rootDeviceIndex].parentKernelInfoArray) {
-                if (j->kernelDescriptor.kernelMetadata.kernelName.compare(baseKernelName) == 0) {
-                    baseKernelFound = true;
-                    break;
-                }
-            }
-            if (!baseKernelFound) {
-                for (auto &j : buildInfos[rootDeviceIndex].subgroupKernelInfoArray) {
-                    if (j->kernelDescriptor.kernelMetadata.kernelName.compare(baseKernelName) == 0) {
-                        baseKernelFound = true;
-                        break;
-                    }
-                }
-            }
-            if (baseKernelFound) {
-                //Parent or subgroup kernel found -> child kernel
-                blockKernelManager->addBlockKernelInfo(i);
-            } else {
-                buildInfos[rootDeviceIndex].kernelInfoArray.push_back(i);
-            }
-        } else {
-            //Regular kernel found
-            buildInfos[rootDeviceIndex].kernelInfoArray.push_back(i);
-        }
-    }
-    allKernelInfos.clear();
-}
-
-void Program::allocateBlockPrivateSurfaces(const ClDevice &clDevice) {
-    auto rootDeviceIndex = clDevice.getRootDeviceIndex();
-    size_t blockCount = blockKernelManager->getCount();
-
-    for (uint32_t i = 0; i < blockCount; i++) {
-        const KernelInfo *info = blockKernelManager->getBlockKernelInfo(i);
-
-        auto perHwThreadPrivateMemorySize = info->kernelDescriptor.kernelAttributes.perHwThreadPrivateMemorySize;
-        if (perHwThreadPrivateMemorySize > 0 && blockKernelManager->getPrivateSurface(i) == nullptr) {
-            auto privateSize = static_cast<size_t>(KernelHelper::getPrivateSurfaceSize(perHwThreadPrivateMemorySize, clDevice.getSharedDeviceInfo().computeUnitsUsedForScratch));
-
-            auto *privateSurface = this->executionEnvironment.memoryManager->allocateGraphicsMemoryWithProperties(
-                {rootDeviceIndex, privateSize, GraphicsAllocation::AllocationType::PRIVATE_SURFACE, clDevice.getDeviceBitfield()});
-            blockKernelManager->pushPrivateSurface(privateSurface, i);
-        }
-    }
-}
-
-void Program::freeBlockResources() {
-    size_t blockCount = blockKernelManager->getCount();
-
-    for (uint32_t i = 0; i < blockCount; i++) {
-
-        auto *privateSurface = blockKernelManager->getPrivateSurface(i);
-
-        if (privateSurface != nullptr) {
-            blockKernelManager->pushPrivateSurface(nullptr, i);
-            this->executionEnvironment.memoryManager->freeGraphicsMemory(privateSurface);
-        }
-        auto kernelInfo = blockKernelManager->getBlockKernelInfo(i);
-        DEBUG_BREAK_IF(!kernelInfo->kernelAllocation);
-        if (kernelInfo->kernelAllocation) {
-            this->executionEnvironment.memoryManager->freeGraphicsMemory(kernelInfo->kernelAllocation);
-        }
-    }
-}
-
 void Program::cleanCurrentKernelInfo(uint32_t rootDeviceIndex) {
    auto &buildInfo = buildInfos[rootDeviceIndex];
    for (auto &kernelInfo : buildInfo.kernelInfoArray) {
--- a/opencl/source/program/program.h
+++ b/opencl/source/program/program.h
@@ -30,7 +30,6 @@ namespace PatchTokenBinary {
 struct ProgramFromPatchtokens;
 }

-class BlockKernelManager;
 class BuiltinDispatchInfoBuilder;
 class ClDevice;
 class Context;
@@ -200,12 +199,6 @@ class Program : public BaseObject<_cl_program> {
        return buildInfos[rootDeviceIndex].exportedFunctionsSurface;
    }

-    BlockKernelManager *getBlockKernelManager() const {
-        return blockKernelManager;
-    }
-
-    void allocateBlockPrivateSurfaces(const ClDevice &clDevice);
-    void freeBlockResources();
    void cleanCurrentKernelInfo(uint32_t rootDeviceIndex);

    const std::string &getOptions() const { return options; }
@@ -294,8 +287,6 @@ class Program : public BaseObject<_cl_program> {

    MOCKABLE_VIRTUAL cl_int linkBinary(Device *pDevice, const void *constantsInitData, const void *variablesInitData, const ProgramInfo::GlobalSurfaceInfo &stringInfo);

-    void separateBlockKernels(uint32_t rootDeviceIndex);
-
    void updateNonUniformFlag();
    void updateNonUniformFlag(const Program **inputProgram, size_t numInputPrograms);

@@ -365,7 +356,6 @@ class Program : public BaseObject<_cl_program> {
    CIF::RAII::UPtr_t<CIF::Builtins::BufferSimple> specConstantsSizes;
    specConstValuesMap specConstantsValues;

-    BlockKernelManager *blockKernelManager = nullptr;
    ExecutionEnvironment &executionEnvironment;
    Context *context = nullptr;
    ClDeviceVector clDevices;