Remove device enqueue part 7

- mainly remove BlockKernelManager and ReflectionSurfaceHelper

Related-To: NEO-6559
Signed-off-by: Katarzyna Cencelewska <katarzyna.cencelewska@intel.com>
This commit is contained in:
Katarzyna Cencelewska
2022-01-18 16:57:12 +00:00
committed by Compute-Runtime-Automation
parent 8ebef3769c
commit 97765cd39f
29 changed files with 15 additions and 2035 deletions

View File

@@ -1,5 +1,5 @@
#
# Copyright (C) 2018-2021 Intel Corporation
# Copyright (C) 2018-2022 Intel Corporation
#
# SPDX-License-Identifier: MIT
#
@@ -7,8 +7,6 @@
set(RUNTIME_SRCS_PROGRAM
${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt
${CMAKE_CURRENT_SOURCE_DIR}${BRANCH_DIR_SUFFIX}additional_options.cpp
${CMAKE_CURRENT_SOURCE_DIR}/block_kernel_manager.cpp
${CMAKE_CURRENT_SOURCE_DIR}/block_kernel_manager.h
${CMAKE_CURRENT_SOURCE_DIR}/build.cpp
${CMAKE_CURRENT_SOURCE_DIR}/compile.cpp
${CMAKE_CURRENT_SOURCE_DIR}/create.cpp

View File

@@ -1,60 +0,0 @@
/*
* Copyright (C) 2018-2021 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#include "opencl/source/program/block_kernel_manager.h"
#include "shared/source/command_stream/command_stream_receiver.h"
#include "shared/source/helpers/debug_helpers.h"
#include "shared/source/program/kernel_info.h"
namespace NEO {
void BlockKernelManager::addBlockKernelInfo(KernelInfo *blockKernelInfo) {
blockKernelInfoArray.push_back(blockKernelInfo);
blockUsesPrintf = blockKernelInfo->kernelDescriptor.kernelAttributes.flags.usesPrintf;
}
const KernelInfo *BlockKernelManager::getBlockKernelInfo(size_t ordinal) {
DEBUG_BREAK_IF(ordinal >= blockKernelInfoArray.size());
return blockKernelInfoArray[ordinal];
}
BlockKernelManager::~BlockKernelManager() {
for (auto &i : blockKernelInfoArray)
delete i;
}
void BlockKernelManager::pushPrivateSurface(GraphicsAllocation *allocation, size_t ordinal) {
if (blockPrivateSurfaceArray.size() < blockKernelInfoArray.size()) {
blockPrivateSurfaceArray.resize(blockKernelInfoArray.size(), nullptr);
}
DEBUG_BREAK_IF(ordinal >= blockPrivateSurfaceArray.size());
blockPrivateSurfaceArray[ordinal] = allocation;
}
GraphicsAllocation *BlockKernelManager::getPrivateSurface(size_t ordinal) {
// Ff queried ordinal is out of bound return nullptr,
// this happens when no private surface was not pushed
if (ordinal < blockPrivateSurfaceArray.size())
return blockPrivateSurfaceArray[ordinal];
return nullptr;
}
void BlockKernelManager::makeInternalAllocationsResident(CommandStreamReceiver &commandStreamReceiver) {
auto blockCount = blockKernelInfoArray.size();
for (uint32_t surfaceIndex = 0; surfaceIndex < blockCount; surfaceIndex++) {
auto surface = getPrivateSurface(surfaceIndex);
if (surface) {
commandStreamReceiver.makeResident(*surface);
}
surface = blockKernelInfoArray[surfaceIndex]->getGraphicsAllocation();
if (surface) {
commandStreamReceiver.makeResident(*surface);
}
}
}
} // namespace NEO

View File

@@ -1,41 +0,0 @@
/*
* Copyright (C) 2018-2021 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#pragma once
#include "opencl/source/api/cl_types.h"
#include <vector>
namespace NEO {
class GraphicsAllocation;
class CommandStreamReceiver;
struct KernelInfo;
class BlockKernelManager {
public:
BlockKernelManager() = default;
virtual ~BlockKernelManager();
void addBlockKernelInfo(KernelInfo *);
const KernelInfo *getBlockKernelInfo(size_t ordinal);
size_t getCount() const {
return blockKernelInfoArray.size();
}
bool getIfBlockUsesPrintf() const {
return blockUsesPrintf;
}
void pushPrivateSurface(GraphicsAllocation *allocation, size_t ordinal);
GraphicsAllocation *getPrivateSurface(size_t ordinal);
void makeInternalAllocationsResident(CommandStreamReceiver &);
protected:
bool blockUsesPrintf = false;
std::vector<KernelInfo *> blockKernelInfoArray;
std::vector<GraphicsAllocation *> blockPrivateSurfaceArray;
};
} // namespace NEO

View File

@@ -198,10 +198,6 @@ cl_int Program::build(
phaseReached[rootDeviceIndex] = BuildPhase::DebugDataNotification;
}
}
for (const auto &device : deviceVector) {
separateBlockKernels(device->getRootDeviceIndex());
}
} while (false);
if (retVal != CL_SUCCESS) {

View File

@@ -204,9 +204,6 @@ cl_int Program::link(
break;
}
updateNonUniformFlag(&*inputProgramsInternal.begin(), inputProgramsInternal.size());
for (const auto &device : deviceVector) {
separateBlockKernels(device->getRootDeviceIndex());
}
} while (false);
if (retVal != CL_SUCCESS) {

View File

@@ -30,7 +30,6 @@
#include "opencl/source/cl_device/cl_device.h"
#include "opencl/source/context/context.h"
#include "opencl/source/platform/platform.h"
#include "opencl/source/program/block_kernel_manager.h"
#include "compiler_options.h"
@@ -45,7 +44,6 @@ Program::Program(Context *context, bool isBuiltIn, const ClDeviceVector &clDevic
if (this->context && !this->isBuiltIn) {
this->context->incRefInternal();
}
blockKernelManager = new BlockKernelManager();
maxRootDeviceIndex = 0;
@@ -112,9 +110,6 @@ Program::~Program() {
cleanCurrentKernelInfo(i);
}
freeBlockResources();
delete blockKernelManager;
for (const auto &buildInfo : buildInfos) {
if (buildInfo.constantSurface) {
if ((nullptr != context) && (nullptr != context->getSVMAllocsManager()) && (context->getSVMAllocsManager()->getSVMAlloc(reinterpret_cast<const void *>(buildInfo.constantSurface->getGpuAddress())))) {
@@ -303,83 +298,6 @@ const char *Program::getBuildLog(uint32_t rootDeviceIndex) const {
return currentLog.c_str();
}
void Program::separateBlockKernels(uint32_t rootDeviceIndex) {
if ((0 == buildInfos[rootDeviceIndex].parentKernelInfoArray.size()) && (0 == buildInfos[rootDeviceIndex].subgroupKernelInfoArray.size())) {
return;
}
auto allKernelInfos(buildInfos[rootDeviceIndex].kernelInfoArray);
buildInfos[rootDeviceIndex].kernelInfoArray.clear();
for (auto &i : allKernelInfos) {
auto end = i->kernelDescriptor.kernelMetadata.kernelName.rfind("_dispatch_");
if (end != std::string::npos) {
bool baseKernelFound = false;
std::string baseKernelName(i->kernelDescriptor.kernelMetadata.kernelName, 0, end);
for (auto &j : buildInfos[rootDeviceIndex].parentKernelInfoArray) {
if (j->kernelDescriptor.kernelMetadata.kernelName.compare(baseKernelName) == 0) {
baseKernelFound = true;
break;
}
}
if (!baseKernelFound) {
for (auto &j : buildInfos[rootDeviceIndex].subgroupKernelInfoArray) {
if (j->kernelDescriptor.kernelMetadata.kernelName.compare(baseKernelName) == 0) {
baseKernelFound = true;
break;
}
}
}
if (baseKernelFound) {
//Parent or subgroup kernel found -> child kernel
blockKernelManager->addBlockKernelInfo(i);
} else {
buildInfos[rootDeviceIndex].kernelInfoArray.push_back(i);
}
} else {
//Regular kernel found
buildInfos[rootDeviceIndex].kernelInfoArray.push_back(i);
}
}
allKernelInfos.clear();
}
void Program::allocateBlockPrivateSurfaces(const ClDevice &clDevice) {
auto rootDeviceIndex = clDevice.getRootDeviceIndex();
size_t blockCount = blockKernelManager->getCount();
for (uint32_t i = 0; i < blockCount; i++) {
const KernelInfo *info = blockKernelManager->getBlockKernelInfo(i);
auto perHwThreadPrivateMemorySize = info->kernelDescriptor.kernelAttributes.perHwThreadPrivateMemorySize;
if (perHwThreadPrivateMemorySize > 0 && blockKernelManager->getPrivateSurface(i) == nullptr) {
auto privateSize = static_cast<size_t>(KernelHelper::getPrivateSurfaceSize(perHwThreadPrivateMemorySize, clDevice.getSharedDeviceInfo().computeUnitsUsedForScratch));
auto *privateSurface = this->executionEnvironment.memoryManager->allocateGraphicsMemoryWithProperties(
{rootDeviceIndex, privateSize, GraphicsAllocation::AllocationType::PRIVATE_SURFACE, clDevice.getDeviceBitfield()});
blockKernelManager->pushPrivateSurface(privateSurface, i);
}
}
}
void Program::freeBlockResources() {
size_t blockCount = blockKernelManager->getCount();
for (uint32_t i = 0; i < blockCount; i++) {
auto *privateSurface = blockKernelManager->getPrivateSurface(i);
if (privateSurface != nullptr) {
blockKernelManager->pushPrivateSurface(nullptr, i);
this->executionEnvironment.memoryManager->freeGraphicsMemory(privateSurface);
}
auto kernelInfo = blockKernelManager->getBlockKernelInfo(i);
DEBUG_BREAK_IF(!kernelInfo->kernelAllocation);
if (kernelInfo->kernelAllocation) {
this->executionEnvironment.memoryManager->freeGraphicsMemory(kernelInfo->kernelAllocation);
}
}
}
void Program::cleanCurrentKernelInfo(uint32_t rootDeviceIndex) {
auto &buildInfo = buildInfos[rootDeviceIndex];
for (auto &kernelInfo : buildInfo.kernelInfoArray) {

View File

@@ -30,7 +30,6 @@ namespace PatchTokenBinary {
struct ProgramFromPatchtokens;
}
class BlockKernelManager;
class BuiltinDispatchInfoBuilder;
class ClDevice;
class Context;
@@ -200,12 +199,6 @@ class Program : public BaseObject<_cl_program> {
return buildInfos[rootDeviceIndex].exportedFunctionsSurface;
}
BlockKernelManager *getBlockKernelManager() const {
return blockKernelManager;
}
void allocateBlockPrivateSurfaces(const ClDevice &clDevice);
void freeBlockResources();
void cleanCurrentKernelInfo(uint32_t rootDeviceIndex);
const std::string &getOptions() const { return options; }
@@ -294,8 +287,6 @@ class Program : public BaseObject<_cl_program> {
MOCKABLE_VIRTUAL cl_int linkBinary(Device *pDevice, const void *constantsInitData, const void *variablesInitData, const ProgramInfo::GlobalSurfaceInfo &stringInfo);
void separateBlockKernels(uint32_t rootDeviceIndex);
void updateNonUniformFlag();
void updateNonUniformFlag(const Program **inputProgram, size_t numInputPrograms);
@@ -365,7 +356,6 @@ class Program : public BaseObject<_cl_program> {
CIF::RAII::UPtr_t<CIF::Builtins::BufferSimple> specConstantsSizes;
specConstValuesMap specConstantsValues;
BlockKernelManager *blockKernelManager = nullptr;
ExecutionEnvironment &executionEnvironment;
Context *context = nullptr;
ClDeviceVector clDevices;