mirror of
https://github.com/intel/compute-runtime.git
synced 2025-12-29 09:03:14 +08:00
Remove device enqueue part 7
- mainly remove BlockKernelManager and ReflectionSurfaceHelper Related-To: NEO-6559 Signed-off-by: Katarzyna Cencelewska <katarzyna.cencelewska@intel.com>
This commit is contained in:
committed by
Compute-Runtime-Automation
parent
8ebef3769c
commit
97765cd39f
@@ -1,5 +1,5 @@
|
||||
#
|
||||
# Copyright (C) 2018-2021 Intel Corporation
|
||||
# Copyright (C) 2018-2022 Intel Corporation
|
||||
#
|
||||
# SPDX-License-Identifier: MIT
|
||||
#
|
||||
@@ -7,8 +7,6 @@
|
||||
set(RUNTIME_SRCS_PROGRAM
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt
|
||||
${CMAKE_CURRENT_SOURCE_DIR}${BRANCH_DIR_SUFFIX}additional_options.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/block_kernel_manager.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/block_kernel_manager.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/build.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/compile.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/create.cpp
|
||||
|
||||
@@ -1,60 +0,0 @@
|
||||
/*
|
||||
* Copyright (C) 2018-2021 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#include "opencl/source/program/block_kernel_manager.h"
|
||||
|
||||
#include "shared/source/command_stream/command_stream_receiver.h"
|
||||
#include "shared/source/helpers/debug_helpers.h"
|
||||
#include "shared/source/program/kernel_info.h"
|
||||
|
||||
namespace NEO {
|
||||
|
||||
void BlockKernelManager::addBlockKernelInfo(KernelInfo *blockKernelInfo) {
|
||||
blockKernelInfoArray.push_back(blockKernelInfo);
|
||||
blockUsesPrintf = blockKernelInfo->kernelDescriptor.kernelAttributes.flags.usesPrintf;
|
||||
}
|
||||
|
||||
const KernelInfo *BlockKernelManager::getBlockKernelInfo(size_t ordinal) {
|
||||
DEBUG_BREAK_IF(ordinal >= blockKernelInfoArray.size());
|
||||
return blockKernelInfoArray[ordinal];
|
||||
}
|
||||
|
||||
BlockKernelManager::~BlockKernelManager() {
|
||||
for (auto &i : blockKernelInfoArray)
|
||||
delete i;
|
||||
}
|
||||
void BlockKernelManager::pushPrivateSurface(GraphicsAllocation *allocation, size_t ordinal) {
|
||||
if (blockPrivateSurfaceArray.size() < blockKernelInfoArray.size()) {
|
||||
blockPrivateSurfaceArray.resize(blockKernelInfoArray.size(), nullptr);
|
||||
}
|
||||
|
||||
DEBUG_BREAK_IF(ordinal >= blockPrivateSurfaceArray.size());
|
||||
|
||||
blockPrivateSurfaceArray[ordinal] = allocation;
|
||||
}
|
||||
|
||||
GraphicsAllocation *BlockKernelManager::getPrivateSurface(size_t ordinal) {
|
||||
// Ff queried ordinal is out of bound return nullptr,
|
||||
// this happens when no private surface was not pushed
|
||||
if (ordinal < blockPrivateSurfaceArray.size())
|
||||
return blockPrivateSurfaceArray[ordinal];
|
||||
return nullptr;
|
||||
}
|
||||
void BlockKernelManager::makeInternalAllocationsResident(CommandStreamReceiver &commandStreamReceiver) {
|
||||
auto blockCount = blockKernelInfoArray.size();
|
||||
for (uint32_t surfaceIndex = 0; surfaceIndex < blockCount; surfaceIndex++) {
|
||||
auto surface = getPrivateSurface(surfaceIndex);
|
||||
if (surface) {
|
||||
commandStreamReceiver.makeResident(*surface);
|
||||
}
|
||||
surface = blockKernelInfoArray[surfaceIndex]->getGraphicsAllocation();
|
||||
if (surface) {
|
||||
commandStreamReceiver.makeResident(*surface);
|
||||
}
|
||||
}
|
||||
}
|
||||
} // namespace NEO
|
||||
@@ -1,41 +0,0 @@
|
||||
/*
|
||||
* Copyright (C) 2018-2021 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
#include "opencl/source/api/cl_types.h"
|
||||
|
||||
#include <vector>
|
||||
|
||||
namespace NEO {
|
||||
class GraphicsAllocation;
|
||||
class CommandStreamReceiver;
|
||||
struct KernelInfo;
|
||||
|
||||
class BlockKernelManager {
|
||||
public:
|
||||
BlockKernelManager() = default;
|
||||
virtual ~BlockKernelManager();
|
||||
void addBlockKernelInfo(KernelInfo *);
|
||||
const KernelInfo *getBlockKernelInfo(size_t ordinal);
|
||||
size_t getCount() const {
|
||||
return blockKernelInfoArray.size();
|
||||
}
|
||||
bool getIfBlockUsesPrintf() const {
|
||||
return blockUsesPrintf;
|
||||
}
|
||||
|
||||
void pushPrivateSurface(GraphicsAllocation *allocation, size_t ordinal);
|
||||
GraphicsAllocation *getPrivateSurface(size_t ordinal);
|
||||
|
||||
void makeInternalAllocationsResident(CommandStreamReceiver &);
|
||||
|
||||
protected:
|
||||
bool blockUsesPrintf = false;
|
||||
std::vector<KernelInfo *> blockKernelInfoArray;
|
||||
std::vector<GraphicsAllocation *> blockPrivateSurfaceArray;
|
||||
};
|
||||
} // namespace NEO
|
||||
@@ -198,10 +198,6 @@ cl_int Program::build(
|
||||
phaseReached[rootDeviceIndex] = BuildPhase::DebugDataNotification;
|
||||
}
|
||||
}
|
||||
|
||||
for (const auto &device : deviceVector) {
|
||||
separateBlockKernels(device->getRootDeviceIndex());
|
||||
}
|
||||
} while (false);
|
||||
|
||||
if (retVal != CL_SUCCESS) {
|
||||
|
||||
@@ -204,9 +204,6 @@ cl_int Program::link(
|
||||
break;
|
||||
}
|
||||
updateNonUniformFlag(&*inputProgramsInternal.begin(), inputProgramsInternal.size());
|
||||
for (const auto &device : deviceVector) {
|
||||
separateBlockKernels(device->getRootDeviceIndex());
|
||||
}
|
||||
} while (false);
|
||||
|
||||
if (retVal != CL_SUCCESS) {
|
||||
|
||||
@@ -30,7 +30,6 @@
|
||||
#include "opencl/source/cl_device/cl_device.h"
|
||||
#include "opencl/source/context/context.h"
|
||||
#include "opencl/source/platform/platform.h"
|
||||
#include "opencl/source/program/block_kernel_manager.h"
|
||||
|
||||
#include "compiler_options.h"
|
||||
|
||||
@@ -45,7 +44,6 @@ Program::Program(Context *context, bool isBuiltIn, const ClDeviceVector &clDevic
|
||||
if (this->context && !this->isBuiltIn) {
|
||||
this->context->incRefInternal();
|
||||
}
|
||||
blockKernelManager = new BlockKernelManager();
|
||||
|
||||
maxRootDeviceIndex = 0;
|
||||
|
||||
@@ -112,9 +110,6 @@ Program::~Program() {
|
||||
cleanCurrentKernelInfo(i);
|
||||
}
|
||||
|
||||
freeBlockResources();
|
||||
|
||||
delete blockKernelManager;
|
||||
for (const auto &buildInfo : buildInfos) {
|
||||
if (buildInfo.constantSurface) {
|
||||
if ((nullptr != context) && (nullptr != context->getSVMAllocsManager()) && (context->getSVMAllocsManager()->getSVMAlloc(reinterpret_cast<const void *>(buildInfo.constantSurface->getGpuAddress())))) {
|
||||
@@ -303,83 +298,6 @@ const char *Program::getBuildLog(uint32_t rootDeviceIndex) const {
|
||||
return currentLog.c_str();
|
||||
}
|
||||
|
||||
void Program::separateBlockKernels(uint32_t rootDeviceIndex) {
|
||||
if ((0 == buildInfos[rootDeviceIndex].parentKernelInfoArray.size()) && (0 == buildInfos[rootDeviceIndex].subgroupKernelInfoArray.size())) {
|
||||
return;
|
||||
}
|
||||
|
||||
auto allKernelInfos(buildInfos[rootDeviceIndex].kernelInfoArray);
|
||||
buildInfos[rootDeviceIndex].kernelInfoArray.clear();
|
||||
for (auto &i : allKernelInfos) {
|
||||
auto end = i->kernelDescriptor.kernelMetadata.kernelName.rfind("_dispatch_");
|
||||
if (end != std::string::npos) {
|
||||
bool baseKernelFound = false;
|
||||
std::string baseKernelName(i->kernelDescriptor.kernelMetadata.kernelName, 0, end);
|
||||
for (auto &j : buildInfos[rootDeviceIndex].parentKernelInfoArray) {
|
||||
if (j->kernelDescriptor.kernelMetadata.kernelName.compare(baseKernelName) == 0) {
|
||||
baseKernelFound = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (!baseKernelFound) {
|
||||
for (auto &j : buildInfos[rootDeviceIndex].subgroupKernelInfoArray) {
|
||||
if (j->kernelDescriptor.kernelMetadata.kernelName.compare(baseKernelName) == 0) {
|
||||
baseKernelFound = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (baseKernelFound) {
|
||||
//Parent or subgroup kernel found -> child kernel
|
||||
blockKernelManager->addBlockKernelInfo(i);
|
||||
} else {
|
||||
buildInfos[rootDeviceIndex].kernelInfoArray.push_back(i);
|
||||
}
|
||||
} else {
|
||||
//Regular kernel found
|
||||
buildInfos[rootDeviceIndex].kernelInfoArray.push_back(i);
|
||||
}
|
||||
}
|
||||
allKernelInfos.clear();
|
||||
}
|
||||
|
||||
void Program::allocateBlockPrivateSurfaces(const ClDevice &clDevice) {
|
||||
auto rootDeviceIndex = clDevice.getRootDeviceIndex();
|
||||
size_t blockCount = blockKernelManager->getCount();
|
||||
|
||||
for (uint32_t i = 0; i < blockCount; i++) {
|
||||
const KernelInfo *info = blockKernelManager->getBlockKernelInfo(i);
|
||||
|
||||
auto perHwThreadPrivateMemorySize = info->kernelDescriptor.kernelAttributes.perHwThreadPrivateMemorySize;
|
||||
if (perHwThreadPrivateMemorySize > 0 && blockKernelManager->getPrivateSurface(i) == nullptr) {
|
||||
auto privateSize = static_cast<size_t>(KernelHelper::getPrivateSurfaceSize(perHwThreadPrivateMemorySize, clDevice.getSharedDeviceInfo().computeUnitsUsedForScratch));
|
||||
|
||||
auto *privateSurface = this->executionEnvironment.memoryManager->allocateGraphicsMemoryWithProperties(
|
||||
{rootDeviceIndex, privateSize, GraphicsAllocation::AllocationType::PRIVATE_SURFACE, clDevice.getDeviceBitfield()});
|
||||
blockKernelManager->pushPrivateSurface(privateSurface, i);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void Program::freeBlockResources() {
|
||||
size_t blockCount = blockKernelManager->getCount();
|
||||
|
||||
for (uint32_t i = 0; i < blockCount; i++) {
|
||||
|
||||
auto *privateSurface = blockKernelManager->getPrivateSurface(i);
|
||||
|
||||
if (privateSurface != nullptr) {
|
||||
blockKernelManager->pushPrivateSurface(nullptr, i);
|
||||
this->executionEnvironment.memoryManager->freeGraphicsMemory(privateSurface);
|
||||
}
|
||||
auto kernelInfo = blockKernelManager->getBlockKernelInfo(i);
|
||||
DEBUG_BREAK_IF(!kernelInfo->kernelAllocation);
|
||||
if (kernelInfo->kernelAllocation) {
|
||||
this->executionEnvironment.memoryManager->freeGraphicsMemory(kernelInfo->kernelAllocation);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void Program::cleanCurrentKernelInfo(uint32_t rootDeviceIndex) {
|
||||
auto &buildInfo = buildInfos[rootDeviceIndex];
|
||||
for (auto &kernelInfo : buildInfo.kernelInfoArray) {
|
||||
|
||||
@@ -30,7 +30,6 @@ namespace PatchTokenBinary {
|
||||
struct ProgramFromPatchtokens;
|
||||
}
|
||||
|
||||
class BlockKernelManager;
|
||||
class BuiltinDispatchInfoBuilder;
|
||||
class ClDevice;
|
||||
class Context;
|
||||
@@ -200,12 +199,6 @@ class Program : public BaseObject<_cl_program> {
|
||||
return buildInfos[rootDeviceIndex].exportedFunctionsSurface;
|
||||
}
|
||||
|
||||
BlockKernelManager *getBlockKernelManager() const {
|
||||
return blockKernelManager;
|
||||
}
|
||||
|
||||
void allocateBlockPrivateSurfaces(const ClDevice &clDevice);
|
||||
void freeBlockResources();
|
||||
void cleanCurrentKernelInfo(uint32_t rootDeviceIndex);
|
||||
|
||||
const std::string &getOptions() const { return options; }
|
||||
@@ -294,8 +287,6 @@ class Program : public BaseObject<_cl_program> {
|
||||
|
||||
MOCKABLE_VIRTUAL cl_int linkBinary(Device *pDevice, const void *constantsInitData, const void *variablesInitData, const ProgramInfo::GlobalSurfaceInfo &stringInfo);
|
||||
|
||||
void separateBlockKernels(uint32_t rootDeviceIndex);
|
||||
|
||||
void updateNonUniformFlag();
|
||||
void updateNonUniformFlag(const Program **inputProgram, size_t numInputPrograms);
|
||||
|
||||
@@ -365,7 +356,6 @@ class Program : public BaseObject<_cl_program> {
|
||||
CIF::RAII::UPtr_t<CIF::Builtins::BufferSimple> specConstantsSizes;
|
||||
specConstValuesMap specConstantsValues;
|
||||
|
||||
BlockKernelManager *blockKernelManager = nullptr;
|
||||
ExecutionEnvironment &executionEnvironment;
|
||||
Context *context = nullptr;
|
||||
ClDeviceVector clDevices;
|
||||
|
||||
Reference in New Issue
Block a user