mirror of
https://github.com/intel/compute-runtime.git
synced 2025-11-10 05:49:51 +08:00
Reorganization directory structure [1/n]
Change-Id: Id1a94577437a4826a32411869f516fec20314ec0
This commit is contained in:
18
opencl/source/scheduler/CMakeLists.txt
Normal file
18
opencl/source/scheduler/CMakeLists.txt
Normal file
@@ -0,0 +1,18 @@
|
||||
#
|
||||
# Copyright (C) 2017-2020 Intel Corporation
|
||||
#
|
||||
# SPDX-License-Identifier: MIT
|
||||
#
|
||||
|
||||
set(RUNTIME_SRCS_SCHEDULER
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/scheduler.cl
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/scheduler_kernel.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/scheduler_kernel.h
|
||||
)
|
||||
target_sources(${NEO_STATIC_LIB_NAME} PRIVATE ${RUNTIME_SRCS_SCHEDULER})
|
||||
set_property(GLOBAL PROPERTY RUNTIME_SRCS_SCHEDULER ${RUNTIME_SRCS_SCHEDULER})
|
||||
|
||||
if(NOT (TARGET ${SCHEDULER_BINARY_LIB_NAME}))
|
||||
include(scheduler_binary.cmake)
|
||||
endif()
|
||||
3332
opencl/source/scheduler/scheduler.cl
Normal file
3332
opencl/source/scheduler/scheduler.cl
Normal file
File diff suppressed because it is too large
Load Diff
98
opencl/source/scheduler/scheduler_binary.cmake
Normal file
98
opencl/source/scheduler/scheduler_binary.cmake
Normal file
@@ -0,0 +1,98 @@
|
||||
#
|
||||
# Copyright (C) 2018-2020 Intel Corporation
|
||||
#
|
||||
# SPDX-License-Identifier: MIT
|
||||
#
|
||||
|
||||
add_custom_target(scheduler)
|
||||
set(SCHEDULER_OUTDIR_WITH_ARCH "${TargetDir}/scheduler/${NEO_ARCH}")
|
||||
set_target_properties(scheduler PROPERTIES FOLDER "scheduler")
|
||||
|
||||
set (SCHEDULER_KERNEL scheduler.cl)
|
||||
if(DEFINED NEO__IGC_INCLUDE_DIR)
|
||||
list(APPEND __cloc__options__ "-I$<JOIN:${NEO__IGC_INCLUDE_DIR}, -I>")
|
||||
endif()
|
||||
|
||||
if("${CMAKE_BUILD_TYPE}" STREQUAL "Debug")
|
||||
list(APPEND __cloc__options__ "-D DEBUG")
|
||||
endif()
|
||||
|
||||
set(SCHEDULER_INCLUDE_DIR ${TargetDir})
|
||||
|
||||
function(compile_kernel target gen_type platform_type kernel)
|
||||
get_family_name_with_type(${gen_type} ${platform_type})
|
||||
string(TOLOWER ${gen_type} gen_type_lower)
|
||||
# get filename
|
||||
set(OUTPUTDIR "${SCHEDULER_OUTDIR_WITH_ARCH}/${gen_type_lower}")
|
||||
list(APPEND __cloc__options__ "-I ../${gen_type_lower}")
|
||||
|
||||
get_filename_component(BASENAME ${kernel} NAME_WE)
|
||||
|
||||
set(OUTPUTPATH "${OUTPUTDIR}/${BASENAME}_${family_name_with_type}.bin")
|
||||
|
||||
set(SCHEDULER_CPP "${OUTPUTDIR}/${BASENAME}_${family_name_with_type}.cpp")
|
||||
if(WIN32)
|
||||
set(cloc_cmd_prefix ocloc)
|
||||
else()
|
||||
if(DEFINED NEO__IGC_LIBRARY_PATH)
|
||||
set(cloc_cmd_prefix LD_LIBRARY_PATH=${NEO__IGC_LIBRARY_PATH} $<TARGET_FILE:ocloc>)
|
||||
else()
|
||||
set(cloc_cmd_prefix LD_LIBRARY_PATH=$<TARGET_FILE_DIR:ocloc> $<TARGET_FILE:ocloc>)
|
||||
endif()
|
||||
endif()
|
||||
list(APPEND __cloc__options__ "-cl-kernel-arg-info")
|
||||
list(APPEND __cloc__options__ "-cl-std=CL2.0")
|
||||
list(APPEND __cloc__options__ "-cl-intel-disable-a64WA")
|
||||
add_custom_command(
|
||||
OUTPUT ${OUTPUTPATH}
|
||||
COMMAND ${cloc_cmd_prefix} -q -file ${kernel} -device ${DEFAULT_SUPPORTED_${gen_type}_${platform_type}_PLATFORM} -cl-intel-greater-than-4GB-buffer-required -${NEO_BITS} -out_dir ${OUTPUTDIR} -cpp_file -options "$<JOIN:${__cloc__options__}, >"
|
||||
WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}
|
||||
DEPENDS ${kernel} ocloc copy_compiler_files
|
||||
)
|
||||
set(SCHEDULER_CPP ${SCHEDULER_CPP} PARENT_SCOPE)
|
||||
|
||||
add_custom_target(${target} DEPENDS ${OUTPUTPATH})
|
||||
set_target_properties(${target} PROPERTIES FOLDER "scheduler/${gen_type_lower}")
|
||||
endfunction()
|
||||
|
||||
macro(macro_for_each_gen)
|
||||
foreach(PLATFORM_TYPE ${PLATFORM_TYPES})
|
||||
if(${GEN_TYPE}_HAS_${PLATFORM_TYPE} AND SUPPORT_DEVICE_ENQUEUE_${GEN_TYPE})
|
||||
get_family_name_with_type(${GEN_TYPE} ${PLATFORM_TYPE})
|
||||
set(PLATFORM_2_0_LOWER ${DEFAULT_SUPPORTED_2_0_${GEN_TYPE}_${PLATFORM_TYPE}_PLATFORM})
|
||||
if(COMPILE_BUILT_INS AND PLATFORM_2_0_LOWER)
|
||||
compile_kernel(scheduler_${family_name_with_type} ${GEN_TYPE} ${PLATFORM_TYPE} ${SCHEDULER_KERNEL})
|
||||
add_dependencies(scheduler scheduler_${family_name_with_type})
|
||||
list(APPEND SCHEDULER_TARGETS scheduler_${family_name_with_type})
|
||||
list(APPEND GENERATED_SCHEDULER_CPPS ${SCHEDULER_CPP})
|
||||
endif()
|
||||
endif()
|
||||
endforeach()
|
||||
source_group("generated files\\${GEN_TYPE_LOWER}" FILES ${GENERATED_SCHEDULER_CPPS})
|
||||
endmacro()
|
||||
|
||||
apply_macro_for_each_gen("SUPPORTED")
|
||||
|
||||
add_library(${SCHEDULER_BINARY_LIB_NAME} OBJECT EXCLUDE_FROM_ALL CMakeLists.txt)
|
||||
|
||||
if(COMPILE_BUILT_INS)
|
||||
target_sources(${SCHEDULER_BINARY_LIB_NAME} PUBLIC ${GENERATED_SCHEDULER_CPPS})
|
||||
set_source_files_properties(${GENERATED_SCHEDULER_CPPS} PROPERTIES GENERATED TRUE)
|
||||
foreach(SCHEDULER_TARGET ${SCHEDULER_TARGETS})
|
||||
add_dependencies(${SCHEDULER_BINARY_LIB_NAME} ${SCHEDULER_TARGET})
|
||||
endforeach()
|
||||
endif(COMPILE_BUILT_INS)
|
||||
|
||||
set_target_properties(${SCHEDULER_BINARY_LIB_NAME} PROPERTIES LINKER_LANGUAGE CXX)
|
||||
set_target_properties(${SCHEDULER_BINARY_LIB_NAME} PROPERTIES POSITION_INDEPENDENT_CODE ON)
|
||||
set_target_properties(${SCHEDULER_BINARY_LIB_NAME} PROPERTIES FOLDER "scheduler")
|
||||
|
||||
add_dependencies(${SCHEDULER_BINARY_LIB_NAME} scheduler)
|
||||
|
||||
target_include_directories(${SCHEDULER_BINARY_LIB_NAME} PRIVATE
|
||||
${ENGINE_NODE_DIR}
|
||||
${KHRONOS_HEADERS_DIR}
|
||||
${NEO__GMM_INCLUDE_DIR}
|
||||
${NEO__IGC_INCLUDE_DIR}
|
||||
${THIRD_PARTY_DIR}
|
||||
)
|
||||
77
opencl/source/scheduler/scheduler_kernel.cpp
Normal file
77
opencl/source/scheduler/scheduler_kernel.cpp
Normal file
@@ -0,0 +1,77 @@
|
||||
/*
|
||||
* Copyright (C) 2017-2020 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#include "scheduler/scheduler_kernel.h"
|
||||
|
||||
#include "core/device/device.h"
|
||||
#include "core/helpers/hw_helper.h"
|
||||
|
||||
#include "device/cl_device.h"
|
||||
|
||||
#include <cinttypes>
|
||||
|
||||
namespace NEO {
|
||||
|
||||
void SchedulerKernel::setArgs(GraphicsAllocation *queue,
|
||||
GraphicsAllocation *commandsStack,
|
||||
GraphicsAllocation *eventsPool,
|
||||
GraphicsAllocation *secondaryBatchBuffer,
|
||||
GraphicsAllocation *dsh,
|
||||
GraphicsAllocation *reflectionSurface,
|
||||
GraphicsAllocation *queueStorageBuffer,
|
||||
GraphicsAllocation *ssh,
|
||||
GraphicsAllocation *debugQueue) {
|
||||
|
||||
setArgSvmAlloc(0, reinterpret_cast<void *>(queue->getGpuAddress()), queue);
|
||||
setArgSvmAlloc(1, reinterpret_cast<void *>(commandsStack->getGpuAddress()), commandsStack);
|
||||
setArgSvmAlloc(2, reinterpret_cast<void *>(eventsPool->getGpuAddress()), eventsPool);
|
||||
setArgSvmAlloc(3, reinterpret_cast<void *>(secondaryBatchBuffer->getGpuAddress()), secondaryBatchBuffer);
|
||||
setArgSvmAlloc(4, reinterpret_cast<void *>(dsh->getGpuAddress()), dsh);
|
||||
setArgSvmAlloc(5, reinterpret_cast<void *>(reflectionSurface->getGpuAddress()), reflectionSurface);
|
||||
setArgSvmAlloc(6, reinterpret_cast<void *>(queueStorageBuffer->getGpuAddress()), queueStorageBuffer);
|
||||
setArgSvmAlloc(7, reinterpret_cast<void *>(ssh->getGpuAddress()), ssh);
|
||||
if (debugQueue)
|
||||
setArgSvmAlloc(8, reinterpret_cast<void *>(debugQueue->getGpuAddress()), debugQueue);
|
||||
|
||||
DBG_LOG(PrintEMDebugInformation,
|
||||
"Scheduler Surfaces: \nqueue=", queue->getUnderlyingBuffer(), " \nstack=", commandsStack->getUnderlyingBuffer(),
|
||||
" \nevents=", eventsPool->getUnderlyingBuffer(), " \nslb=", secondaryBatchBuffer->getUnderlyingBuffer(), "\ndsh=", dsh->getUnderlyingBuffer(),
|
||||
" \nkrs=", reflectionSurface->getUnderlyingBuffer(), " \nstorage=", queueStorageBuffer->getUnderlyingBuffer(), "\nssh=", ssh->getUnderlyingBuffer());
|
||||
}
|
||||
void SchedulerKernel::computeGws() {
|
||||
auto &devInfo = device.getDeviceInfo();
|
||||
auto &hwInfo = device.getHardwareInfo();
|
||||
auto &helper = HwHelper::get(hwInfo.platform.eRenderCoreFamily);
|
||||
|
||||
size_t hWThreadsPerSubSlice = devInfo.maxComputUnits / hwInfo.gtSystemInfo.SubSliceCount;
|
||||
size_t wkgsPerSubSlice = hWThreadsPerSubSlice / PARALLEL_SCHEDULER_HWTHREADS_IN_HW_GROUP20;
|
||||
|
||||
wkgsPerSubSlice = std::min(wkgsPerSubSlice, helper.getMaxBarrierRegisterPerSlice());
|
||||
gws = wkgsPerSubSlice * hwInfo.gtSystemInfo.SubSliceCount * PARALLEL_SCHEDULER_HWTHREADS_IN_HW_GROUP20 * PARALLEL_SCHEDULER_COMPILATION_SIZE_20;
|
||||
|
||||
if (device.isSimulation()) {
|
||||
gws = PARALLEL_SCHEDULER_HWTHREADS_IN_HW_GROUP20 * PARALLEL_SCHEDULER_COMPILATION_SIZE_20;
|
||||
}
|
||||
if (DebugManager.flags.SchedulerGWS.get() != 0) {
|
||||
DEBUG_BREAK_IF(DebugManager.flags.SchedulerGWS.get() % (PARALLEL_SCHEDULER_HWTHREADS_IN_HW_GROUP20 * PARALLEL_SCHEDULER_COMPILATION_SIZE_20) != 0);
|
||||
gws = DebugManager.flags.SchedulerGWS.get();
|
||||
}
|
||||
|
||||
DBG_LOG(PrintEMDebugInformation, "Scheduler GWS: ", gws);
|
||||
printDebugString(DebugManager.flags.PrintDebugMessages.get(), stderr, "Scheduler GWS: %" PRIu64, static_cast<uint64_t>(gws));
|
||||
}
|
||||
BuiltinCode SchedulerKernel::loadSchedulerKernel(Device *device) {
|
||||
std::string schedulerResourceName = getFamilyNameWithType(device->getHardwareInfo()) + "_0_scheduler.builtin_kernel.bin";
|
||||
|
||||
BuiltinCode ret;
|
||||
auto storage = std::make_unique<EmbeddedStorage>("");
|
||||
ret.resource = storage.get()->load(schedulerResourceName);
|
||||
ret.type = BuiltinCode::ECodeType::Binary;
|
||||
ret.targetDevice = device;
|
||||
return ret;
|
||||
}
|
||||
} // namespace NEO
|
||||
66
opencl/source/scheduler/scheduler_kernel.h
Normal file
66
opencl/source/scheduler/scheduler_kernel.h
Normal file
@@ -0,0 +1,66 @@
|
||||
/*
|
||||
* Copyright (C) 2017-2020 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
#include "built_ins/built_ins.h"
|
||||
#include "kernel/kernel.h"
|
||||
|
||||
#include <algorithm>
|
||||
|
||||
namespace NEO {
|
||||
|
||||
class SchedulerKernel : public Kernel {
|
||||
public:
|
||||
static constexpr const char *schedulerName = "SchedulerParallel20";
|
||||
friend Kernel;
|
||||
|
||||
~SchedulerKernel() override = default;
|
||||
|
||||
size_t getLws() {
|
||||
return PARALLEL_SCHEDULER_HWTHREADS_IN_HW_GROUP20 * PARALLEL_SCHEDULER_COMPILATION_SIZE_20;
|
||||
}
|
||||
|
||||
size_t getGws() {
|
||||
return gws;
|
||||
}
|
||||
|
||||
void setGws(size_t newGws) {
|
||||
gws = newGws;
|
||||
}
|
||||
|
||||
size_t getCurbeSize() {
|
||||
size_t crossTrheadDataSize = kernelInfo.patchInfo.dataParameterStream ? kernelInfo.patchInfo.dataParameterStream->DataParameterStreamSize : 0;
|
||||
size_t dshSize = kernelInfo.heapInfo.pKernelHeader ? kernelInfo.heapInfo.pKernelHeader->DynamicStateHeapSize : 0;
|
||||
|
||||
crossTrheadDataSize = alignUp(crossTrheadDataSize, 64);
|
||||
dshSize = alignUp(dshSize, 64);
|
||||
|
||||
return alignUp(SCHEDULER_DYNAMIC_PAYLOAD_SIZE, 64) + crossTrheadDataSize + dshSize;
|
||||
}
|
||||
|
||||
void setArgs(GraphicsAllocation *queue,
|
||||
GraphicsAllocation *commandsStack,
|
||||
GraphicsAllocation *eventsPool,
|
||||
GraphicsAllocation *secondaryBatchBuffer,
|
||||
GraphicsAllocation *dsh,
|
||||
GraphicsAllocation *reflectionSurface,
|
||||
GraphicsAllocation *queueStorageBuffer,
|
||||
GraphicsAllocation *ssh,
|
||||
GraphicsAllocation *debugQueue = nullptr);
|
||||
static BuiltinCode loadSchedulerKernel(Device *device);
|
||||
|
||||
protected:
|
||||
SchedulerKernel(Program *programArg, const KernelInfo &kernelInfoArg, const ClDevice &deviceArg) : Kernel(programArg, kernelInfoArg, deviceArg, true), gws(0) {
|
||||
computeGws();
|
||||
};
|
||||
|
||||
void computeGws();
|
||||
|
||||
size_t gws;
|
||||
};
|
||||
|
||||
} // namespace NEO
|
||||
Reference in New Issue
Block a user