mirror of
https://github.com/intel/compute-runtime.git
synced 2025-09-10 12:53:42 +08:00
Remove device enqueue part 5
-remove scheduler and builtin_kernels_simulation Related-To: NEO-6559 Signed-off-by: Katarzyna Cencelewska <katarzyna.cencelewska@intel.com>
This commit is contained in:

committed by
Compute-Runtime-Automation

parent
86161a8a4f
commit
d2818aaea2
@ -747,7 +747,6 @@ set(BUILTINS_BINARIES_BINDFUL_LIB_NAME "builtins_binaries_bindful")
|
||||
set(BUILTINS_BINARIES_BINDLESS_LIB_NAME "builtins_binaries_bindless")
|
||||
set(BUILTINS_SPIRV_LIB_NAME "builtins_spirv")
|
||||
set(BUILTINS_VME_LIB_NAME "builtins_vme")
|
||||
set(SCHEDULER_BINARY_LIB_NAME "scheduler_binary")
|
||||
|
||||
if(WIN32)
|
||||
set(NEO_EXTRA_LIBS Ws2_32)
|
||||
|
@ -1,5 +1,5 @@
|
||||
#
|
||||
# Copyright (C) 2021 Intel Corporation
|
||||
# Copyright (C) 2021-2022 Intel Corporation
|
||||
#
|
||||
# SPDX-License-Identifier: MIT
|
||||
#
|
||||
@ -21,9 +21,6 @@ macro(generate_runtime_lib LIB_NAME MOCKABLE GENERATE_EXEC)
|
||||
endif()
|
||||
target_compile_definitions(${BUILTINS_SOURCES_LIB_NAME} PUBLIC MOCKABLE_VIRTUAL=)
|
||||
target_compile_definitions(${BUILTINS_VME_LIB_NAME} PUBLIC MOCKABLE_VIRTUAL=)
|
||||
if(TARGET ${SCHEDULER_BINARY_LIB_NAME})
|
||||
target_compile_definitions(${SCHEDULER_BINARY_LIB_NAME} PUBLIC MOCKABLE_VIRTUAL=)
|
||||
endif()
|
||||
|
||||
if(${MOCKABLE})
|
||||
target_compile_definitions(${LIB_NAME} PUBLIC MOCKABLE_VIRTUAL=virtual)
|
||||
|
@ -1,5 +1,5 @@
|
||||
#
|
||||
# Copyright (C) 2018-2021 Intel Corporation
|
||||
# Copyright (C) 2018-2022 Intel Corporation
|
||||
#
|
||||
# SPDX-License-Identifier: MIT
|
||||
#
|
||||
@ -17,17 +17,11 @@ set(MSVC_DEF_ADDITIONAL_EXPORTS "")
|
||||
set(OPENCL_RUNTIME_PROJECTS_FOLDER "opencl runtime")
|
||||
set(OPENCL_BUILTINS_PROJECTS_FOLDER "built_ins")
|
||||
|
||||
if(NOT TARGET ${BIKSIM_LIB_NAME})
|
||||
add_subdirectory(builtin_kernels_simulation)
|
||||
endif()
|
||||
|
||||
hide_subdir(builtin_kernels_simulation)
|
||||
hide_subdir(dll)
|
||||
|
||||
add_library(${NEO_STATIC_LIB_NAME} STATIC EXCLUDE_FROM_ALL
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/enable_cores.cmake
|
||||
$<TARGET_OBJECTS:${BIKSIM_LIB_NAME}>
|
||||
)
|
||||
|
||||
add_subdirectories()
|
||||
@ -117,11 +111,6 @@ if(${GENERATE_EXECUTABLE})
|
||||
$<TARGET_OBJECTS:${BUILTINS_VME_LIB_NAME}>
|
||||
$<TARGET_OBJECTS:${BUILTINS_BINARIES_BINDFUL_LIB_NAME}>
|
||||
)
|
||||
if(TARGET ${SCHEDULER_BINARY_LIB_NAME})
|
||||
list(APPEND NEO_DYNAMIC_LIB__TARGET_OBJECTS
|
||||
$<TARGET_OBJECTS:${SCHEDULER_BINARY_LIB_NAME}>
|
||||
)
|
||||
endif()
|
||||
|
||||
if(DEFINED AUB_STREAM_PROJECT_NAME)
|
||||
list(APPEND NEO_DYNAMIC_LIB__TARGET_OBJECTS $<TARGET_OBJECTS:${AUB_STREAM_PROJECT_NAME}_all_hw>)
|
||||
@ -211,9 +200,7 @@ endif()
|
||||
create_project_source_tree(${NEO_STATIC_LIB_NAME})
|
||||
|
||||
if(UNIX AND NOT (TARGET clang-tidy))
|
||||
add_custom_target(clang-tidy
|
||||
DEPENDS scheduler
|
||||
)
|
||||
add_custom_target(clang-tidy)
|
||||
add_custom_command(
|
||||
TARGET clang-tidy
|
||||
POST_BUILD
|
||||
@ -222,4 +209,3 @@ if(UNIX AND NOT (TARGET clang-tidy))
|
||||
WORKING_DIRECTORY ${NEO_SOURCE_DIR}
|
||||
)
|
||||
endif()
|
||||
|
||||
|
@ -1,53 +0,0 @@
|
||||
#
|
||||
# Copyright (C) 2018-2022 Intel Corporation
|
||||
#
|
||||
# SPDX-License-Identifier: MIT
|
||||
#
|
||||
|
||||
set(BUILTIN_KERNELS_SIMULATION_SRCS
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/opencl_c.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/opencl_c.h
|
||||
)
|
||||
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}")
|
||||
|
||||
string(REPLACE "/WX" "" CMAKE_CXX_FLAGS ${CMAKE_CXX_FLAGS})
|
||||
string(REGEX REPLACE "-Werror[^ \t\n]*" "" CMAKE_CXX_FLAGS ${CMAKE_CXX_FLAGS})
|
||||
string(REPLACE "-Wsometimes-uninitialized" "" CMAKE_CXX_FLAGS ${CMAKE_CXX_FLAGS})
|
||||
string(REPLACE "-Wsign-compare" "" CMAKE_CXX_FLAGS ${CMAKE_CXX_FLAGS})
|
||||
string(REPLACE "-Wunused-variable" "" CMAKE_CXX_FLAGS ${CMAKE_CXX_FLAGS})
|
||||
|
||||
if(${CMAKE_CXX_COMPILER_ID} STREQUAL "Clang")
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-everything")
|
||||
endif()
|
||||
|
||||
if(COMPILER_SUPPORTS_CXX11)
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11")
|
||||
elseif(COMPILER_SUPPORTS_CXX0X)
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++0x")
|
||||
endif()
|
||||
|
||||
if(NOT MSVC)
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fpermissive -fPIC")
|
||||
endif()
|
||||
|
||||
macro(macro_for_each_core_type)
|
||||
foreach(BRANCH_DIR ${BRANCH_DIR_LIST})
|
||||
list(APPEND DEFAULT_CORE_PLATFORMS_DEFITIONS DEFAULT_${CORE_TYPE}_PLATFORM=${DEFAULT_SUPPORTED_${CORE_TYPE}_PLATFORM})
|
||||
endforeach()
|
||||
endmacro()
|
||||
|
||||
apply_macro_for_each_core_type("SUPPORTED")
|
||||
|
||||
add_library(${BIKSIM_LIB_NAME} OBJECT EXCLUDE_FROM_ALL ${BUILTIN_KERNELS_SIMULATION_SRCS})
|
||||
target_include_directories(${BIKSIM_LIB_NAME} BEFORE PRIVATE
|
||||
${ENGINE_NODE_DIR}
|
||||
${NEO__GMM_INCLUDE_DIR}
|
||||
${KHRONOS_HEADERS_DIR}
|
||||
${KHRONOS_GL_HEADERS_DIR}
|
||||
${NEO__IGC_INCLUDE_DIR}
|
||||
${THIRD_PARTY_DIR}
|
||||
)
|
||||
set_target_properties(${BIKSIM_LIB_NAME} PROPERTIES FOLDER "${OPENCL_RUNTIME_PROJECTS_FOLDER}/${OPENCL_BUILTINS_PROJECTS_FOLDER}")
|
||||
target_compile_definitions(${BIKSIM_LIB_NAME} PUBLIC ${SUPPORTED_CORE_FLAGS_DEFINITONS} ${DEFAULT_CORE_PLATFORMS_DEFITIONS} MOCKABLE_VIRTUAL=)
|
@ -1,136 +0,0 @@
|
||||
/*
|
||||
* Copyright (C) 2018-2021 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#include "opencl_c.h"
|
||||
|
||||
#include "shared/source/helpers/string.h"
|
||||
|
||||
namespace BuiltinKernelsSimulation {
|
||||
|
||||
#define SCHEDULER_EMULATION 1
|
||||
|
||||
// globals
|
||||
std::mutex gMutex;
|
||||
unsigned int globalID[3];
|
||||
unsigned int localID[3];
|
||||
unsigned int localSize[3];
|
||||
|
||||
std::map<std::thread::id, uint32_t> threadIDToLocalIDmap;
|
||||
|
||||
SynchronizationBarrier *pGlobalBarrier = nullptr;
|
||||
|
||||
uint4 operator+(uint4 const &a, uint4 const &b) {
|
||||
uint4 c(0, 0, 0, 0);
|
||||
c.x = a.x + b.x;
|
||||
c.y = a.y + b.y;
|
||||
c.z = a.z + b.z;
|
||||
c.w = a.w + b.w;
|
||||
return c;
|
||||
}
|
||||
|
||||
int4 operator+(int4 const &a, int4 const &b) {
|
||||
int4 c(0, 0, 0, 0);
|
||||
c.x = a.x + b.x;
|
||||
c.y = a.y + b.y;
|
||||
c.z = a.z + b.z;
|
||||
c.w = a.w + b.w;
|
||||
return c;
|
||||
}
|
||||
|
||||
uint get_local_id(int dim) {
|
||||
uint LID = 0;
|
||||
|
||||
// use thread id
|
||||
if (threadIDToLocalIDmap.size() > 0) {
|
||||
std::thread::id id = std::this_thread::get_id();
|
||||
LID = threadIDToLocalIDmap[id] % 24;
|
||||
}
|
||||
// use id from loop iteration
|
||||
else {
|
||||
LID = localID[dim];
|
||||
}
|
||||
return LID;
|
||||
}
|
||||
|
||||
uint get_global_id(int dim) {
|
||||
uint GID = 0;
|
||||
|
||||
// use thread id
|
||||
if (threadIDToLocalIDmap.size() > 0) {
|
||||
std::thread::id id = std::this_thread::get_id();
|
||||
GID = threadIDToLocalIDmap[id];
|
||||
}
|
||||
// use id from loop iteration
|
||||
else {
|
||||
GID = globalID[dim];
|
||||
}
|
||||
return GID;
|
||||
}
|
||||
|
||||
uint get_local_size(int dim) {
|
||||
return localSize[dim];
|
||||
}
|
||||
|
||||
uint get_num_groups(int dim) {
|
||||
return NUM_OF_THREADS / 24;
|
||||
}
|
||||
|
||||
uint get_group_id(int dim) {
|
||||
return get_global_id(dim) / 24;
|
||||
}
|
||||
|
||||
void barrier(int x) {
|
||||
pGlobalBarrier->enter();
|
||||
|
||||
// int LID = get_local_id(0);
|
||||
volatile int BreakPointHere = 0;
|
||||
|
||||
// PUT BREAKPOINT HERE to stop after each barrier
|
||||
BreakPointHere++;
|
||||
}
|
||||
|
||||
uint4 read_imageui(image *im, int4 coord) {
|
||||
uint4 color = {0, 0, 0, 1};
|
||||
|
||||
uint offset = ((coord.z * im->height + coord.y) * im->width + coord.x) * im->bytesPerChannel * im->channels;
|
||||
|
||||
char *temp = &im->ptr[offset];
|
||||
char *colorDst = (char *)&color;
|
||||
|
||||
for (uint i = 0; i < im->channels; i++) {
|
||||
memcpy_s(colorDst, sizeof(uint4), temp, im->bytesPerChannel);
|
||||
temp += im->bytesPerChannel;
|
||||
colorDst += 4;
|
||||
}
|
||||
return color;
|
||||
}
|
||||
|
||||
uint4 write_imageui(image *im, uint4 coord, uint4 color) {
|
||||
uint offset = ((coord.z * im->height + coord.y) * im->width + coord.x) * im->bytesPerChannel * im->channels;
|
||||
|
||||
char *temp = &im->ptr[offset];
|
||||
char *colorSrc = (char *)&color;
|
||||
|
||||
size_t size = im->width * im->height * im->depth * im->bytesPerChannel * im->channels;
|
||||
|
||||
for (uint i = 0; i < im->channels; i++) {
|
||||
memcpy_s(temp, size - offset, colorSrc, im->bytesPerChannel);
|
||||
temp += im->bytesPerChannel;
|
||||
colorSrc += 4;
|
||||
}
|
||||
return *(uint4 *)temp; // NOLINT
|
||||
}
|
||||
|
||||
uchar convert_uchar_sat(uint c) {
|
||||
return (uchar)c;
|
||||
}
|
||||
|
||||
ushort convert_ushort_sat(uint c) {
|
||||
return (ushort)c;
|
||||
}
|
||||
|
||||
} // namespace BuiltinKernelsSimulation
|
@ -1,291 +0,0 @@
|
||||
/*
|
||||
* Copyright (C) 2018-2021 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
#include "CL/cl.h"
|
||||
|
||||
#include <condition_variable>
|
||||
#include <cstdint>
|
||||
#include <map>
|
||||
#include <mutex>
|
||||
#include <string.h>
|
||||
#include <thread>
|
||||
|
||||
// OpenCL Types
|
||||
typedef uint32_t uint;
|
||||
typedef uint8_t uchar;
|
||||
typedef uint16_t ushort;
|
||||
typedef uint64_t ulong;
|
||||
|
||||
namespace BuiltinKernelsSimulation {
|
||||
|
||||
// number of threads in wkg
|
||||
#define NUM_OF_THREADS 24
|
||||
|
||||
#define CLK_GLOBAL_MEM_FENCE 1
|
||||
#define CLK_LOCAL_MEM_FENCE 2
|
||||
|
||||
class SynchronizationBarrier {
|
||||
public:
|
||||
SynchronizationBarrier(int count) : m_InitialCount(count) {
|
||||
m_Count = count;
|
||||
m_BarrierCounter = 0;
|
||||
}
|
||||
|
||||
~SynchronizationBarrier() {
|
||||
}
|
||||
|
||||
void enter() {
|
||||
std::unique_lock<std::mutex> lck(m_Mutex);
|
||||
|
||||
m_Count--;
|
||||
|
||||
unsigned int BarrierCount = m_BarrierCounter;
|
||||
|
||||
if (m_Count > 0) {
|
||||
while (BarrierCount == m_BarrierCounter) {
|
||||
m_AllHitBarrierCondition.wait(lck);
|
||||
}
|
||||
} else {
|
||||
m_Count = m_InitialCount;
|
||||
m_BarrierCounter++;
|
||||
m_AllHitBarrierCondition.notify_all();
|
||||
}
|
||||
}
|
||||
|
||||
private:
|
||||
std::mutex m_Mutex;
|
||||
std::condition_variable m_AllHitBarrierCondition;
|
||||
int m_Count;
|
||||
const int m_InitialCount;
|
||||
unsigned int m_BarrierCounter;
|
||||
};
|
||||
|
||||
// globals
|
||||
extern std::mutex gMutex;
|
||||
extern unsigned int globalID[3];
|
||||
extern unsigned int localID[3];
|
||||
extern unsigned int localSize[3];
|
||||
extern std::map<std::thread::id, uint32_t> threadIDToLocalIDmap;
|
||||
extern SynchronizationBarrier *pGlobalBarrier;
|
||||
|
||||
typedef struct taguint2 {
|
||||
taguint2(uint x, uint y) {
|
||||
this->x = x;
|
||||
this->y = y;
|
||||
}
|
||||
taguint2() {
|
||||
this->x = 0;
|
||||
this->y = 0;
|
||||
}
|
||||
uint x;
|
||||
uint y;
|
||||
} uint2;
|
||||
|
||||
typedef struct taguint3 {
|
||||
taguint3(uint x, uint y, uint z) {
|
||||
this->x = x;
|
||||
this->y = y;
|
||||
this->z = z;
|
||||
}
|
||||
taguint3() {
|
||||
this->x = 0;
|
||||
this->y = 0;
|
||||
this->z = 0;
|
||||
}
|
||||
uint x;
|
||||
uint y;
|
||||
uint z;
|
||||
} uint3;
|
||||
|
||||
typedef struct taguint4 {
|
||||
taguint4(uint x, uint y, uint z, uint w) {
|
||||
this->x = x;
|
||||
this->y = y;
|
||||
this->z = z;
|
||||
this->w = w;
|
||||
}
|
||||
uint x;
|
||||
uint y;
|
||||
uint z;
|
||||
uint w;
|
||||
} uint4;
|
||||
|
||||
typedef struct tagint2 {
|
||||
tagint2(int x, int y) {
|
||||
this->x = x;
|
||||
this->y = y;
|
||||
}
|
||||
int x;
|
||||
int y;
|
||||
} int2;
|
||||
|
||||
typedef struct tagint3 {
|
||||
tagint3(int x, int y, int z) {
|
||||
this->x = x;
|
||||
this->y = y;
|
||||
this->z = z;
|
||||
}
|
||||
int x;
|
||||
int y;
|
||||
int z;
|
||||
} int3;
|
||||
|
||||
typedef struct tagint4 {
|
||||
tagint4(int x, int y, int z, int w) {
|
||||
this->x = x;
|
||||
this->y = y;
|
||||
this->z = z;
|
||||
this->w = w;
|
||||
}
|
||||
int x;
|
||||
int y;
|
||||
int z;
|
||||
int w;
|
||||
} int4;
|
||||
|
||||
typedef struct tagushort2 {
|
||||
tagushort2(ushort x, ushort y) {
|
||||
this->x = x;
|
||||
this->y = y;
|
||||
}
|
||||
unsigned short x;
|
||||
unsigned short y;
|
||||
} ushort2;
|
||||
|
||||
typedef struct tagushort8 {
|
||||
unsigned short xxx[8];
|
||||
} ushort8;
|
||||
|
||||
typedef struct tagushort16 {
|
||||
unsigned short xxx[16];
|
||||
} ushort16;
|
||||
|
||||
uint4 operator+(uint4 const &a, uint4 const &b);
|
||||
int4 operator+(int4 const &a, int4 const &b);
|
||||
|
||||
typedef struct tagimage {
|
||||
char *ptr;
|
||||
uint width;
|
||||
uint height;
|
||||
uint depth;
|
||||
uint bytesPerChannel;
|
||||
uint channels;
|
||||
} image;
|
||||
|
||||
// images as pointer
|
||||
typedef image *image1d_t;
|
||||
typedef image *image2d_t;
|
||||
typedef image *image3d_t;
|
||||
|
||||
// OpenCL keywords
|
||||
#define __global
|
||||
#define __local
|
||||
#define __private
|
||||
#define __kernel
|
||||
#define __attribute__(...)
|
||||
#define __read_only
|
||||
#define __write_only
|
||||
#define queue_t void *
|
||||
|
||||
struct clk_event_t {
|
||||
clk_event_t() {
|
||||
value = 0;
|
||||
}
|
||||
clk_event_t(void *v) {
|
||||
value = static_cast<uint>(reinterpret_cast<uintptr_t>(v));
|
||||
}
|
||||
|
||||
explicit operator void *() const {
|
||||
return reinterpret_cast<void *>(static_cast<uintptr_t>(value));
|
||||
}
|
||||
|
||||
operator uint() {
|
||||
return (uint)value;
|
||||
}
|
||||
|
||||
void operator=(uint input) {
|
||||
value = input;
|
||||
}
|
||||
|
||||
uint value;
|
||||
};
|
||||
|
||||
// OpenCL builtins
|
||||
#define __builtin_astype(var, type) \
|
||||
( \
|
||||
(type)var)
|
||||
|
||||
#define select(a, b, c) (c ? b : a)
|
||||
|
||||
uint get_local_id(int dim);
|
||||
uint get_global_id(int dim);
|
||||
uint get_local_size(int dim);
|
||||
uint get_num_groups(int dim);
|
||||
uint get_group_id(int dim);
|
||||
void barrier(int x);
|
||||
uint4 read_imageui(image *im, int4 coord);
|
||||
uint4 write_imageui(image *im, uint4 coord, uint4 color);
|
||||
uchar convert_uchar_sat(uint c);
|
||||
ushort convert_ushort_sat(uint c);
|
||||
|
||||
#define EMULATION_ENTER_FUNCTION() \
|
||||
uint __LOCAL_ID__ = 0; \
|
||||
__LOCAL_ID__ = get_local_id(0);
|
||||
|
||||
template <class TYPE, class TYPE2>
|
||||
void atomic_xchg(TYPE *dest, TYPE2 val) {
|
||||
gMutex.lock();
|
||||
dest[0] = (TYPE)val;
|
||||
gMutex.unlock();
|
||||
}
|
||||
|
||||
template <class TYPE, class TYPE2>
|
||||
TYPE atomic_add(TYPE *first, TYPE2 second) {
|
||||
gMutex.lock();
|
||||
TYPE temp = first[0];
|
||||
first[0] = (TYPE)(temp + (TYPE)second);
|
||||
gMutex.unlock();
|
||||
return temp;
|
||||
}
|
||||
|
||||
template <class TYPE, class TYPE2>
|
||||
TYPE atomic_sub(TYPE *first, TYPE2 second) {
|
||||
gMutex.lock();
|
||||
TYPE temp = first[0];
|
||||
first[0] = temp - second;
|
||||
gMutex.unlock();
|
||||
return temp;
|
||||
}
|
||||
|
||||
template <class TYPE>
|
||||
TYPE atomic_inc(TYPE *first) {
|
||||
gMutex.lock();
|
||||
TYPE temp = first[0];
|
||||
first[0] = temp + 1;
|
||||
gMutex.unlock();
|
||||
return temp;
|
||||
}
|
||||
|
||||
template <class TYPE>
|
||||
TYPE atomic_dec(TYPE *first) {
|
||||
gMutex.lock();
|
||||
TYPE temp = first[0];
|
||||
first[0] = temp - 1;
|
||||
gMutex.unlock();
|
||||
return temp;
|
||||
}
|
||||
|
||||
template <class TYPE, class TYPE2>
|
||||
TYPE atomic_min(TYPE *first, TYPE2 second) {
|
||||
gMutex.lock();
|
||||
TYPE temp = first[0];
|
||||
first[0] = (TYPE)((TYPE)second < temp ? (TYPE)second : temp);
|
||||
gMutex.unlock();
|
||||
return temp;
|
||||
}
|
||||
} // namespace BuiltinKernelsSimulation
|
@ -1,21 +0,0 @@
|
||||
/*
|
||||
* Copyright (C) 2018-2021 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#include "opencl/source/builtin_kernels_simulation/scheduler_simulation.h"
|
||||
|
||||
#include "opencl/source/builtin_kernels_simulation/opencl_c.h"
|
||||
|
||||
#include <thread>
|
||||
|
||||
using namespace NEO;
|
||||
|
||||
namespace BuiltinKernelsSimulation {
|
||||
|
||||
bool conditionReady = false;
|
||||
std::thread threads[NUM_OF_THREADS];
|
||||
|
||||
} // namespace BuiltinKernelsSimulation
|
@ -1,116 +0,0 @@
|
||||
/*
|
||||
* Copyright (C) 2021 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#include "opencl/source/command_queue/gpgpu_walker.h"
|
||||
|
||||
namespace NEO {
|
||||
template <typename GfxFamily>
|
||||
void GpgpuWalkerHelper<GfxFamily>::dispatchScheduler(
|
||||
LinearStream &commandStream,
|
||||
DeviceQueueHw<GfxFamily> &devQueueHw,
|
||||
PreemptionMode preemptionMode,
|
||||
SchedulerKernel &scheduler,
|
||||
IndirectHeap *ssh,
|
||||
IndirectHeap *dsh,
|
||||
bool isCcsUsed) {
|
||||
|
||||
const auto &kernelInfo = scheduler.getKernelInfo();
|
||||
|
||||
using INTERFACE_DESCRIPTOR_DATA = typename GfxFamily::INTERFACE_DESCRIPTOR_DATA;
|
||||
using GPGPU_WALKER = typename GfxFamily::GPGPU_WALKER;
|
||||
using MI_BATCH_BUFFER_START = typename GfxFamily::MI_BATCH_BUFFER_START;
|
||||
|
||||
const auto &hwInfo = devQueueHw.getDevice().getHardwareInfo();
|
||||
NEO::PipeControlArgs args;
|
||||
MemorySynchronizationCommands<GfxFamily>::addPipeControl(commandStream, args);
|
||||
|
||||
uint32_t interfaceDescriptorIndex = devQueueHw.schedulerIDIndex;
|
||||
const size_t offsetInterfaceDescriptorTable = devQueueHw.colorCalcStateSize;
|
||||
const size_t offsetInterfaceDescriptor = offsetInterfaceDescriptorTable;
|
||||
const size_t totalInterfaceDescriptorTableSize = devQueueHw.interfaceDescriptorEntries * sizeof(INTERFACE_DESCRIPTOR_DATA);
|
||||
|
||||
// Program media interface descriptor load
|
||||
HardwareCommandsHelper<GfxFamily>::sendMediaInterfaceDescriptorLoad(
|
||||
commandStream,
|
||||
offsetInterfaceDescriptor,
|
||||
totalInterfaceDescriptorTableSize);
|
||||
|
||||
DEBUG_BREAK_IF(offsetInterfaceDescriptorTable % 64 != 0);
|
||||
|
||||
// Determine SIMD size
|
||||
uint32_t simd = kernelInfo.getMaxSimdSize();
|
||||
DEBUG_BREAK_IF(simd != PARALLEL_SCHEDULER_COMPILATION_SIZE_20);
|
||||
|
||||
// Patch our kernel constants
|
||||
scheduler.setGlobalWorkOffsetValues(0, 0, 0);
|
||||
scheduler.setGlobalWorkSizeValues(static_cast<uint32_t>(scheduler.getGws()), 1, 1);
|
||||
scheduler.setLocalWorkSizeValues(static_cast<uint32_t>(scheduler.getLws()), 1, 1);
|
||||
scheduler.setLocalWorkSize2Values(static_cast<uint32_t>(scheduler.getLws()), 1, 1);
|
||||
scheduler.setEnqueuedLocalWorkSizeValues(static_cast<uint32_t>(scheduler.getLws()), 1, 1);
|
||||
scheduler.setNumWorkGroupsValues(static_cast<uint32_t>(scheduler.getGws() / scheduler.getLws()), 0, 0);
|
||||
scheduler.setWorkDim(1);
|
||||
|
||||
// Send our indirect object data
|
||||
size_t localWorkSizes[3] = {scheduler.getLws(), 1, 1};
|
||||
|
||||
// Create indirectHeap for IOH that is located at the end of device enqueue DSH
|
||||
size_t curbeOffset = devQueueHw.setSchedulerCrossThreadData(scheduler);
|
||||
IndirectHeap indirectObjectHeap(dsh->getCpuBase(), dsh->getMaxAvailableSpace());
|
||||
indirectObjectHeap.getSpace(curbeOffset);
|
||||
IndirectHeap *ioh = &indirectObjectHeap;
|
||||
|
||||
// Program the walker. Invokes execution so all state should already be programmed
|
||||
auto pGpGpuWalkerCmd = commandStream.getSpaceForCmd<GPGPU_WALKER>();
|
||||
GPGPU_WALKER cmdWalker = GfxFamily::cmdInitGpgpuWalker;
|
||||
|
||||
bool inlineDataProgrammingRequired = HardwareCommandsHelper<GfxFamily>::inlineDataProgrammingRequired(scheduler);
|
||||
auto kernelUsesLocalIds = HardwareCommandsHelper<GfxFamily>::kernelUsesLocalIds(scheduler);
|
||||
|
||||
HardwareCommandsHelper<GfxFamily>::sendIndirectState(
|
||||
commandStream,
|
||||
*dsh,
|
||||
*ioh,
|
||||
*ssh,
|
||||
scheduler,
|
||||
scheduler.getKernelStartOffset(true, kernelUsesLocalIds, isCcsUsed),
|
||||
simd,
|
||||
localWorkSizes,
|
||||
offsetInterfaceDescriptorTable,
|
||||
interfaceDescriptorIndex,
|
||||
preemptionMode,
|
||||
&cmdWalker,
|
||||
nullptr,
|
||||
true,
|
||||
devQueueHw.getDevice());
|
||||
|
||||
// Implement enabling special WA DisableLSQCROPERFforOCL if needed
|
||||
GpgpuWalkerHelper<GfxFamily>::applyWADisableLSQCROPERFforOCL(&commandStream, scheduler, true);
|
||||
|
||||
size_t globalOffsets[3] = {0, 0, 0};
|
||||
size_t workGroups[3] = {(scheduler.getGws() / scheduler.getLws()), 1, 1};
|
||||
GpgpuWalkerHelper<GfxFamily>::setGpgpuWalkerThreadData(&cmdWalker, kernelInfo.kernelDescriptor, globalOffsets, globalOffsets, workGroups, localWorkSizes,
|
||||
simd, 1, true, inlineDataProgrammingRequired, 0u);
|
||||
*pGpGpuWalkerCmd = cmdWalker;
|
||||
|
||||
// Implement disabling special WA DisableLSQCROPERFforOCL if needed
|
||||
GpgpuWalkerHelper<GfxFamily>::applyWADisableLSQCROPERFforOCL(&commandStream, scheduler, false);
|
||||
|
||||
// Do not put BB_START only when returning in first Scheduler run
|
||||
if (devQueueHw.getSchedulerReturnInstance() != 1) {
|
||||
args.dcFlushEnable = MemorySynchronizationCommands<GfxFamily>::getDcFlushEnable(true, hwInfo);
|
||||
MemorySynchronizationCommands<GfxFamily>::addPipeControl(commandStream, args);
|
||||
|
||||
// Add BB Start Cmd to the SLB in the Primary Batch Buffer
|
||||
auto bbStart = commandStream.getSpaceForCmd<MI_BATCH_BUFFER_START>();
|
||||
MI_BATCH_BUFFER_START cmdBbStart = GfxFamily::cmdInitBatchBufferStart;
|
||||
cmdBbStart.setSecondLevelBatchBuffer(MI_BATCH_BUFFER_START::SECOND_LEVEL_BATCH_BUFFER_FIRST_LEVEL_BATCH);
|
||||
uint64_t slbAddress = devQueueHw.getSlbBuffer()->getGpuAddress();
|
||||
cmdBbStart.setBatchBufferStartAddress(slbAddress);
|
||||
*bbStart = cmdBbStart;
|
||||
}
|
||||
}
|
||||
} // namespace NEO
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) 2018-2021 Intel Corporation
|
||||
* Copyright (C) 2018-2022 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
@ -27,7 +27,6 @@
|
||||
#include "opencl/source/helpers/surface_formats.h"
|
||||
#include "opencl/source/mem_obj/image.h"
|
||||
#include "opencl/source/platform/platform.h"
|
||||
#include "opencl/source/scheduler/scheduler_kernel.h"
|
||||
#include "opencl/source/sharings/sharing.h"
|
||||
#include "opencl/source/sharings/sharing_factory.h"
|
||||
|
||||
@ -44,7 +43,6 @@ Context::Context(
|
||||
contextCallback = funcNotify;
|
||||
userData = data;
|
||||
sharingFunctions.resize(SharingType::MAX_SHARING_VALUE);
|
||||
schedulerBuiltIn = std::make_unique<BuiltInKernel>();
|
||||
}
|
||||
|
||||
Context::~Context() {
|
||||
@ -69,10 +67,6 @@ Context::~Context() {
|
||||
for (auto &device : devices) {
|
||||
device->decRefInternal();
|
||||
}
|
||||
delete static_cast<SchedulerKernel *>(schedulerBuiltIn->pKernel);
|
||||
delete schedulerBuiltIn->pProgram;
|
||||
schedulerBuiltIn->pKernel = nullptr;
|
||||
schedulerBuiltIn->pProgram = nullptr;
|
||||
}
|
||||
|
||||
cl_int Context::setDestructorCallback(void(CL_CALLBACK *funcNotify)(cl_context, void *),
|
||||
@ -414,48 +408,6 @@ cl_int Context::getSupportedImageFormats(
|
||||
return CL_SUCCESS;
|
||||
}
|
||||
|
||||
SchedulerKernel &Context::getSchedulerKernel() {
|
||||
if (schedulerBuiltIn->pKernel) {
|
||||
return *static_cast<SchedulerKernel *>(schedulerBuiltIn->pKernel);
|
||||
}
|
||||
|
||||
auto initializeSchedulerProgramAndKernel = [&] {
|
||||
cl_int retVal = CL_SUCCESS;
|
||||
auto clDevice = getDevice(0);
|
||||
auto src = SchedulerKernel::loadSchedulerKernel(&clDevice->getDevice());
|
||||
|
||||
auto program = Program::createBuiltInFromGenBinary(this,
|
||||
devices,
|
||||
src.resource.data(),
|
||||
src.resource.size(),
|
||||
&retVal);
|
||||
DEBUG_BREAK_IF(retVal != CL_SUCCESS);
|
||||
DEBUG_BREAK_IF(!program);
|
||||
|
||||
retVal = program->processGenBinary(*clDevice);
|
||||
DEBUG_BREAK_IF(retVal != CL_SUCCESS);
|
||||
|
||||
schedulerBuiltIn->pProgram = program;
|
||||
|
||||
auto kernelInfo = schedulerBuiltIn->pProgram->getKernelInfo(SchedulerKernel::schedulerName, clDevice->getRootDeviceIndex());
|
||||
DEBUG_BREAK_IF(!kernelInfo);
|
||||
|
||||
schedulerBuiltIn->pKernel = Kernel::create<SchedulerKernel>(
|
||||
schedulerBuiltIn->pProgram,
|
||||
*kernelInfo,
|
||||
*clDevice,
|
||||
&retVal);
|
||||
|
||||
UNRECOVERABLE_IF(schedulerBuiltIn->pKernel->getScratchSize() != 0);
|
||||
|
||||
DEBUG_BREAK_IF(retVal != CL_SUCCESS);
|
||||
};
|
||||
std::call_once(schedulerBuiltIn->programIsInitialized, initializeSchedulerProgramAndKernel);
|
||||
|
||||
UNRECOVERABLE_IF(schedulerBuiltIn->pKernel == nullptr);
|
||||
return *static_cast<SchedulerKernel *>(schedulerBuiltIn->pKernel);
|
||||
}
|
||||
|
||||
bool Context::isDeviceAssociated(const ClDevice &clDevice) const {
|
||||
for (const auto &pDevice : devices) {
|
||||
if (pDevice == &clDevice) {
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) 2018-2021 Intel Corporation
|
||||
* Copyright (C) 2018-2022 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
@ -35,7 +35,6 @@ class MemObj;
|
||||
class MemoryManager;
|
||||
class SharingFunctions;
|
||||
class SVMAllocsManager;
|
||||
class SchedulerKernel;
|
||||
class Program;
|
||||
class Platform;
|
||||
|
||||
@ -167,8 +166,6 @@ class Context : public BaseObject<_cl_context> {
|
||||
|
||||
ContextType peekContextType() const { return contextType; }
|
||||
|
||||
MOCKABLE_VIRTUAL SchedulerKernel &getSchedulerKernel();
|
||||
|
||||
bool isDeviceAssociated(const ClDevice &clDevice) const;
|
||||
ClDevice *getSubDeviceByIndex(uint32_t subDeviceIndex) const;
|
||||
|
||||
@ -212,7 +209,6 @@ class Context : public BaseObject<_cl_context> {
|
||||
std::vector<std::unique_ptr<SharingFunctions>> sharingFunctions;
|
||||
ClDeviceVector devices;
|
||||
ContextDestructorCallbacks destructorCallbacks;
|
||||
std::unique_ptr<BuiltInKernel> schedulerBuiltIn;
|
||||
|
||||
const cl_context_properties *properties = nullptr;
|
||||
size_t numProperties = 0u;
|
||||
|
@ -22,7 +22,6 @@ class Device;
|
||||
class Kernel;
|
||||
class Event;
|
||||
struct MultiDispatchInfo;
|
||||
class SchedulerKernel;
|
||||
class HwTimeStamps;
|
||||
class TagNodeBase;
|
||||
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) 2019-2021 Intel Corporation
|
||||
* Copyright (C) 2019-2022 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
@ -172,30 +172,6 @@ IndirectHeap *DeviceQueueHw<GfxFamily>::getIndirectHeap(IndirectHeap::Type type)
|
||||
return heaps[type];
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
size_t DeviceQueueHw<GfxFamily>::setSchedulerCrossThreadData(SchedulerKernel &scheduler) {
|
||||
using INTERFACE_DESCRIPTOR_DATA = typename GfxFamily::INTERFACE_DESCRIPTOR_DATA;
|
||||
size_t offset = dshBuffer->getUnderlyingBufferSize() - scheduler.getCurbeSize() - 4096; // Page size padding
|
||||
|
||||
auto igilCmdQueue = reinterpret_cast<IGIL_CommandQueue *>(queueBuffer->getUnderlyingBuffer());
|
||||
igilCmdQueue->m_controls.m_SchedulerDSHOffset = (uint32_t)offset;
|
||||
igilCmdQueue->m_controls.m_SchedulerConstantBufferSize = (uint32_t)scheduler.getCurbeSize();
|
||||
|
||||
return offset;
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
void DeviceQueueHw<GfxFamily>::dispatchScheduler(LinearStream &commandStream, SchedulerKernel &scheduler, PreemptionMode preemptionMode, IndirectHeap *ssh, IndirectHeap *dsh, bool isCcsUsed) {
|
||||
GpgpuWalkerHelper<GfxFamily>::dispatchScheduler(commandStream,
|
||||
*this,
|
||||
preemptionMode,
|
||||
scheduler,
|
||||
ssh,
|
||||
dsh,
|
||||
isCcsUsed);
|
||||
return;
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
size_t DeviceQueueHw<GfxFamily>::getCSPrefetchSize() {
|
||||
return 512;
|
||||
|
@ -23,7 +23,6 @@
|
||||
#include "opencl/source/helpers/dispatch_info.h"
|
||||
#include "opencl/source/kernel/kernel.h"
|
||||
#include "opencl/source/program/block_kernel_manager.h"
|
||||
#include "opencl/source/scheduler/scheduler_kernel.h"
|
||||
|
||||
#include <cstring>
|
||||
|
||||
@ -127,10 +126,6 @@ size_t HardwareCommandsHelper<GfxFamily>::getSshSizeForExecutionModel(const Kern
|
||||
maxBindingTableCount = std::max(maxBindingTableCount, static_cast<uint32_t>(pBlockInfo->kernelDescriptor.payloadMappings.bindingTable.numEntries));
|
||||
}
|
||||
|
||||
SchedulerKernel &scheduler = kernel.getContext().getSchedulerKernel();
|
||||
|
||||
totalSize += getSizeRequiredSSH(scheduler);
|
||||
|
||||
totalSize += maxBindingTableCount * sizeof(BINDING_TABLE_STATE) * DeviceQueue::interfaceDescriptorEntries;
|
||||
totalSize = alignUp(totalSize, BINDING_TABLE_STATE::SURFACESTATEPOINTER_ALIGN_SIZE);
|
||||
|
||||
|
@ -1,18 +0,0 @@
|
||||
#
|
||||
# Copyright (C) 2018-2021 Intel Corporation
|
||||
#
|
||||
# SPDX-License-Identifier: MIT
|
||||
#
|
||||
|
||||
set(RUNTIME_SRCS_SCHEDULER
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/scheduler.cl
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/scheduler_kernel.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/scheduler_kernel.h
|
||||
)
|
||||
target_sources(${NEO_STATIC_LIB_NAME} PRIVATE ${RUNTIME_SRCS_SCHEDULER})
|
||||
set_property(GLOBAL PROPERTY RUNTIME_SRCS_SCHEDULER ${RUNTIME_SRCS_SCHEDULER})
|
||||
|
||||
if(NOT (TARGET ${SCHEDULER_BINARY_LIB_NAME}))
|
||||
include(scheduler_binary.cmake)
|
||||
endif()
|
File diff suppressed because it is too large
Load Diff
@ -1,98 +0,0 @@
|
||||
#
|
||||
# Copyright (C) 2018-2022 Intel Corporation
|
||||
#
|
||||
# SPDX-License-Identifier: MIT
|
||||
#
|
||||
|
||||
add_custom_target(scheduler)
|
||||
set(OPENCL_SCHEDULER_PROJECTS_FOLDER "scheduler")
|
||||
set(SCHEDULER_OUTDIR_WITH_ARCH "${TargetDir}/scheduler/${NEO_ARCH}")
|
||||
set_target_properties(scheduler PROPERTIES FOLDER "${OPENCL_RUNTIME_PROJECTS_FOLDER}/${OPENCL_SCHEDULER_PROJECTS_FOLDER}")
|
||||
|
||||
set(SCHEDULER_KERNEL scheduler.cl)
|
||||
if(DEFINED NEO__IGC_INCLUDE_DIR)
|
||||
list(APPEND __ocloc__options__ "-I$<JOIN:${NEO__IGC_INCLUDE_DIR}, -I>")
|
||||
endif()
|
||||
|
||||
if("${CMAKE_BUILD_TYPE}" STREQUAL "Debug")
|
||||
list(APPEND __ocloc__options__ "-D DEBUG")
|
||||
endif()
|
||||
|
||||
set(SCHEDULER_INCLUDE_DIR ${TargetDir})
|
||||
|
||||
function(compile_kernel target core_type platform_type kernel)
|
||||
get_family_name_with_type(${core_type} ${platform_type})
|
||||
string(TOLOWER ${core_type} core_type_lower)
|
||||
# get filename
|
||||
set(OUTPUTDIR "${SCHEDULER_OUTDIR_WITH_ARCH}/${core_type_lower}")
|
||||
list(APPEND __ocloc__options__ "-I ../${core_type_lower}")
|
||||
|
||||
get_filename_component(BASENAME ${kernel} NAME_WE)
|
||||
|
||||
set(OUTPUTPATH "${OUTPUTDIR}/${BASENAME}_${family_name_with_type}.bin")
|
||||
|
||||
set(SCHEDULER_CPP "${OUTPUTDIR}/${BASENAME}_${family_name_with_type}.cpp")
|
||||
|
||||
list(APPEND __ocloc__options__ "-cl-kernel-arg-info")
|
||||
list(APPEND __ocloc__options__ "-cl-std=CL2.0")
|
||||
list(APPEND __ocloc__options__ "-cl-intel-disable-a64WA")
|
||||
if(NOT NEO_DISABLE_BUILTINS_COMPILATION)
|
||||
add_custom_command(
|
||||
OUTPUT ${OUTPUTPATH}
|
||||
COMMAND ${ocloc_cmd_prefix} -q -file ${kernel} -device ${DEFAULT_SUPPORTED_${core_type}_${platform_type}_PLATFORM} -cl-intel-greater-than-4GB-buffer-required -${NEO_BITS} -out_dir ${OUTPUTDIR} -cpp_file -options "$<JOIN:${__ocloc__options__}, >" -internal_options "-cl-intel-no-spill"
|
||||
WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}
|
||||
DEPENDS ${kernel} ocloc copy_compiler_files
|
||||
)
|
||||
set(SCHEDULER_CPP ${SCHEDULER_CPP} PARENT_SCOPE)
|
||||
add_custom_target(${target} DEPENDS ${OUTPUTPATH})
|
||||
set_target_properties(${target} PROPERTIES FOLDER "${OPENCL_RUNTIME_PROJECTS_FOLDER}/${OPENCL_SCHEDULER_PROJECTS_FOLDER}/${core_type_lower}")
|
||||
else()
|
||||
set(_file_prebuilt "${NEO_SOURCE_DIR}/../neo_test_kernels/scheduler/${NEO_ARCH}/${core_type_lower}/${BASENAME}_${family_name_with_type}.bin")
|
||||
if(EXISTS ${_file_prebuilt})
|
||||
add_custom_command(
|
||||
OUTPUT ${OUTPUTPATH}
|
||||
COMMAND ${CMAKE_COMMAND} -E make_directory ${OUTPUTDIR}
|
||||
COMMAND ${CMAKE_COMMAND} -E copy_if_different ${_file_prebuilt} ${OUTPUTDIR}
|
||||
)
|
||||
add_custom_target(${target} DEPENDS ${OUTPUTPATH})
|
||||
set_target_properties(${target} PROPERTIES FOLDER "${OPENCL_RUNTIME_PROJECTS_FOLDER}/${OPENCL_SCHEDULER_PROJECTS_FOLDER}/${core_type_lower}")
|
||||
endif()
|
||||
set(_file_prebuilt "${NEO_SOURCE_DIR}/../neo_test_kernels/scheduler/${NEO_ARCH}/${core_type_lower}/${BASENAME}_${family_name_with_type}.cpp")
|
||||
if(EXISTS ${_file_prebuilt})
|
||||
add_custom_command(
|
||||
OUTPUT ${SCHEDULER_CPP}
|
||||
COMMAND ${CMAKE_COMMAND} -E make_directory ${OUTPUTDIR}
|
||||
COMMAND ${CMAKE_COMMAND} -E copy_if_different ${_file_prebuilt} ${OUTPUTDIR}
|
||||
)
|
||||
set(SCHEDULER_CPP ${SCHEDULER_CPP} PARENT_SCOPE)
|
||||
endif()
|
||||
endif()
|
||||
endfunction()
|
||||
|
||||
apply_macro_for_each_core_type("SUPPORTED")
|
||||
|
||||
add_library(${SCHEDULER_BINARY_LIB_NAME} OBJECT EXCLUDE_FROM_ALL CMakeLists.txt)
|
||||
|
||||
if(COMPILE_BUILT_INS)
|
||||
if(NOT "${GENERATED_SCHEDULER_CPPS}" STREQUAL "")
|
||||
target_sources(${SCHEDULER_BINARY_LIB_NAME} PUBLIC ${GENERATED_SCHEDULER_CPPS})
|
||||
set_source_files_properties(${GENERATED_SCHEDULER_CPPS} PROPERTIES GENERATED TRUE)
|
||||
endif()
|
||||
foreach(SCHEDULER_TARGET ${SCHEDULER_TARGETS})
|
||||
add_dependencies(${SCHEDULER_BINARY_LIB_NAME} ${SCHEDULER_TARGET})
|
||||
endforeach()
|
||||
endif()
|
||||
|
||||
set_target_properties(${SCHEDULER_BINARY_LIB_NAME} PROPERTIES LINKER_LANGUAGE CXX)
|
||||
set_target_properties(${SCHEDULER_BINARY_LIB_NAME} PROPERTIES POSITION_INDEPENDENT_CODE ON)
|
||||
set_target_properties(${SCHEDULER_BINARY_LIB_NAME} PROPERTIES FOLDER "${OPENCL_RUNTIME_PROJECTS_FOLDER}/${OPENCL_SCHEDULER_PROJECTS_FOLDER}")
|
||||
|
||||
add_dependencies(${SCHEDULER_BINARY_LIB_NAME} scheduler)
|
||||
|
||||
target_include_directories(${SCHEDULER_BINARY_LIB_NAME} PRIVATE
|
||||
${ENGINE_NODE_DIR}
|
||||
${KHRONOS_HEADERS_DIR}
|
||||
${NEO__GMM_INCLUDE_DIR}
|
||||
${NEO__IGC_INCLUDE_DIR}
|
||||
${THIRD_PARTY_DIR}
|
||||
)
|
@ -1,78 +0,0 @@
|
||||
/*
|
||||
* Copyright (C) 2018-2021 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#include "opencl/source/scheduler/scheduler_kernel.h"
|
||||
|
||||
#include "shared/source/device/device.h"
|
||||
#include "shared/source/helpers/hw_helper.h"
|
||||
|
||||
#include "opencl/source/cl_device/cl_device.h"
|
||||
|
||||
#include <cinttypes>
|
||||
|
||||
namespace NEO {
|
||||
|
||||
void SchedulerKernel::setArgs(GraphicsAllocation *queue,
|
||||
GraphicsAllocation *commandsStack,
|
||||
GraphicsAllocation *eventsPool,
|
||||
GraphicsAllocation *secondaryBatchBuffer,
|
||||
GraphicsAllocation *dsh,
|
||||
GraphicsAllocation *reflectionSurface,
|
||||
GraphicsAllocation *queueStorageBuffer,
|
||||
GraphicsAllocation *ssh,
|
||||
GraphicsAllocation *debugQueue) {
|
||||
|
||||
setArgSvmAlloc(0, reinterpret_cast<void *>(queue->getGpuAddress()), queue);
|
||||
setArgSvmAlloc(1, reinterpret_cast<void *>(commandsStack->getGpuAddress()), commandsStack);
|
||||
setArgSvmAlloc(2, reinterpret_cast<void *>(eventsPool->getGpuAddress()), eventsPool);
|
||||
setArgSvmAlloc(3, reinterpret_cast<void *>(secondaryBatchBuffer->getGpuAddress()), secondaryBatchBuffer);
|
||||
setArgSvmAlloc(4, reinterpret_cast<void *>(dsh->getGpuAddress()), dsh);
|
||||
setArgSvmAlloc(5, reinterpret_cast<void *>(reflectionSurface->getGpuAddress()), reflectionSurface);
|
||||
setArgSvmAlloc(6, reinterpret_cast<void *>(queueStorageBuffer->getGpuAddress()), queueStorageBuffer);
|
||||
setArgSvmAlloc(7, reinterpret_cast<void *>(ssh->getGpuAddress()), ssh);
|
||||
if (debugQueue)
|
||||
setArgSvmAlloc(8, reinterpret_cast<void *>(debugQueue->getGpuAddress()), debugQueue);
|
||||
|
||||
DBG_LOG(PrintEMDebugInformation,
|
||||
"Scheduler Surfaces: \nqueue=", queue->getUnderlyingBuffer(), " \nstack=", commandsStack->getUnderlyingBuffer(),
|
||||
" \nevents=", eventsPool->getUnderlyingBuffer(), " \nslb=", secondaryBatchBuffer->getUnderlyingBuffer(), "\ndsh=", dsh->getUnderlyingBuffer(),
|
||||
" \nkrs=", reflectionSurface->getUnderlyingBuffer(), " \nstorage=", queueStorageBuffer->getUnderlyingBuffer(), "\nssh=", ssh->getUnderlyingBuffer());
|
||||
}
|
||||
void SchedulerKernel::computeGws() {
|
||||
auto pClDevice = program->getDevices()[0];
|
||||
auto &devInfo = pClDevice->getDeviceInfo();
|
||||
auto &hwInfo = pClDevice->getHardwareInfo();
|
||||
auto &helper = HwHelper::get(hwInfo.platform.eRenderCoreFamily);
|
||||
|
||||
size_t hWThreadsPerSubSlice = devInfo.maxComputUnits / hwInfo.gtSystemInfo.SubSliceCount;
|
||||
size_t wkgsPerSubSlice = hWThreadsPerSubSlice / PARALLEL_SCHEDULER_HWTHREADS_IN_HW_GROUP20;
|
||||
|
||||
wkgsPerSubSlice = std::min(wkgsPerSubSlice, helper.getMaxBarrierRegisterPerSlice());
|
||||
gws = wkgsPerSubSlice * hwInfo.gtSystemInfo.SubSliceCount * PARALLEL_SCHEDULER_HWTHREADS_IN_HW_GROUP20 * PARALLEL_SCHEDULER_COMPILATION_SIZE_20;
|
||||
|
||||
if (pClDevice->isSimulation()) {
|
||||
gws = PARALLEL_SCHEDULER_HWTHREADS_IN_HW_GROUP20 * PARALLEL_SCHEDULER_COMPILATION_SIZE_20;
|
||||
}
|
||||
if (DebugManager.flags.SchedulerGWS.get() != 0) {
|
||||
DEBUG_BREAK_IF(DebugManager.flags.SchedulerGWS.get() % (PARALLEL_SCHEDULER_HWTHREADS_IN_HW_GROUP20 * PARALLEL_SCHEDULER_COMPILATION_SIZE_20) != 0);
|
||||
gws = DebugManager.flags.SchedulerGWS.get();
|
||||
}
|
||||
|
||||
DBG_LOG(PrintEMDebugInformation, "Scheduler GWS: ", gws);
|
||||
PRINT_DEBUG_STRING(DebugManager.flags.PrintDebugMessages.get(), stderr, "Scheduler GWS: %" PRIu64, static_cast<uint64_t>(gws));
|
||||
}
|
||||
BuiltinCode SchedulerKernel::loadSchedulerKernel(Device *device) {
|
||||
std::string schedulerResourceName = getFamilyNameWithType(device->getHardwareInfo()) + "_0_scheduler.builtin_kernel.bin";
|
||||
|
||||
BuiltinCode ret;
|
||||
auto storage = std::make_unique<EmbeddedStorage>("");
|
||||
ret.resource = storage.get()->load(schedulerResourceName);
|
||||
ret.type = BuiltinCode::ECodeType::Binary;
|
||||
ret.targetDevice = device;
|
||||
return ret;
|
||||
}
|
||||
} // namespace NEO
|
@ -1,67 +0,0 @@
|
||||
/*
|
||||
* Copyright (C) 2018-2022 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
#include "shared/source/built_ins/built_ins.h"
|
||||
|
||||
#include "opencl/source/kernel/kernel.h"
|
||||
|
||||
#include <algorithm>
|
||||
|
||||
namespace NEO {
|
||||
|
||||
class SchedulerKernel : public Kernel {
|
||||
public:
|
||||
static constexpr const char *schedulerName = "SchedulerParallel20";
|
||||
friend Kernel;
|
||||
|
||||
~SchedulerKernel() override = default;
|
||||
|
||||
size_t getLws() {
|
||||
return PARALLEL_SCHEDULER_HWTHREADS_IN_HW_GROUP20 * PARALLEL_SCHEDULER_COMPILATION_SIZE_20;
|
||||
}
|
||||
|
||||
size_t getGws() {
|
||||
return gws;
|
||||
}
|
||||
|
||||
void setGws(size_t newGws) {
|
||||
gws = newGws;
|
||||
}
|
||||
|
||||
size_t getCurbeSize() {
|
||||
size_t crossThreadDataSize = kernelInfo.kernelDescriptor.kernelAttributes.crossThreadDataSize;
|
||||
size_t dshSize = kernelInfo.heapInfo.DynamicStateHeapSize;
|
||||
|
||||
crossThreadDataSize = alignUp(crossThreadDataSize, 64);
|
||||
dshSize = alignUp(dshSize, 64);
|
||||
|
||||
return alignUp(SCHEDULER_DYNAMIC_PAYLOAD_SIZE, 64) + crossThreadDataSize + dshSize;
|
||||
}
|
||||
|
||||
void setArgs(GraphicsAllocation *queue,
|
||||
GraphicsAllocation *commandsStack,
|
||||
GraphicsAllocation *eventsPool,
|
||||
GraphicsAllocation *secondaryBatchBuffer,
|
||||
GraphicsAllocation *dsh,
|
||||
GraphicsAllocation *reflectionSurface,
|
||||
GraphicsAllocation *queueStorageBuffer,
|
||||
GraphicsAllocation *ssh,
|
||||
GraphicsAllocation *debugQueue = nullptr);
|
||||
static BuiltinCode loadSchedulerKernel(Device *device);
|
||||
|
||||
protected:
|
||||
SchedulerKernel(Program *programArg, const KernelInfo &kernelInfoArg, ClDevice &clDeviceArg) : Kernel(programArg, kernelInfoArg, clDeviceArg) {
|
||||
computeGws();
|
||||
};
|
||||
|
||||
void computeGws();
|
||||
|
||||
size_t gws = 0u;
|
||||
};
|
||||
|
||||
} // namespace NEO
|
@ -1,5 +1,5 @@
|
||||
#
|
||||
# Copyright (C) 2018-2021 Intel Corporation
|
||||
# Copyright (C) 2018-2022 Intel Corporation
|
||||
#
|
||||
# SPDX-License-Identifier: MIT
|
||||
#
|
||||
@ -22,7 +22,6 @@ ADD_SUPPORTED_TEST_PRODUCT_FAMILIES_DEFINITION()
|
||||
link_libraries(${ASAN_LIBS} ${TSAN_LIBS})
|
||||
|
||||
add_custom_target(prepare_test_kernels_for_ocl)
|
||||
add_dependencies(prepare_test_kernels_for_ocl ${SCHEDULER_BINARY_LIB_NAME})
|
||||
add_dependencies(prepare_test_kernels_for_ocl ${BUILTINS_BINARIES_BINDFUL_LIB_NAME})
|
||||
add_custom_target(copy_test_files_per_product)
|
||||
add_custom_target(run_unit_tests ALL)
|
||||
@ -63,11 +62,6 @@ set(NEO_IGDRCL_TESTS__TARGET_OBJECTS
|
||||
$<TARGET_OBJECTS:neo_shared_mocks>
|
||||
$<TARGET_OBJECTS:neo_unit_tests_config>
|
||||
)
|
||||
if(TARGET ${SCHEDULER_BINARY_LIB_NAME})
|
||||
list(APPEND NEO_IGDRCL_TESTS__TARGET_OBJECTS
|
||||
$<TARGET_OBJECTS:${SCHEDULER_BINARY_LIB_NAME}>
|
||||
)
|
||||
endif()
|
||||
|
||||
add_executable(igdrcl_tests
|
||||
${NEO_IGDRCL_TESTS__TARGET_OBJECTS}
|
||||
|
@ -1,5 +1,5 @@
|
||||
#
|
||||
# Copyright (C) 2018-2021 Intel Corporation
|
||||
# Copyright (C) 2018-2022 Intel Corporation
|
||||
#
|
||||
# SPDX-License-Identifier: MIT
|
||||
#
|
||||
@ -20,11 +20,6 @@ list(APPEND IGDRCL_AUB_TESTS__TARGET_OBJECTS
|
||||
$<TARGET_OBJECTS:neo_shared_mocks>
|
||||
$<TARGET_OBJECTS:neo_aub_tests_config>
|
||||
)
|
||||
if(TARGET ${SCHEDULER_BINARY_LIB_NAME})
|
||||
list(APPEND IGDRCL_AUB_TESTS__TARGET_OBJECTS
|
||||
$<TARGET_OBJECTS:${SCHEDULER_BINARY_LIB_NAME}>
|
||||
)
|
||||
endif()
|
||||
|
||||
if(DEFINED AUB_STREAM_PROJECT_NAME)
|
||||
list(APPEND IGDRCL_AUB_TESTS__TARGET_OBJECTS $<TARGET_OBJECTS:${AUB_STREAM_PROJECT_NAME}_all_hw>)
|
||||
|
@ -1,5 +1,5 @@
|
||||
#
|
||||
# Copyright (C) 2018-2021 Intel Corporation
|
||||
# Copyright (C) 2018-2022 Intel Corporation
|
||||
#
|
||||
# SPDX-License-Identifier: MIT
|
||||
#
|
||||
@ -8,7 +8,6 @@ set(IGDRCL_SRCS_tests_built_in
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/built_ins_file_names.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/built_ins_file_names.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/built_in_kernels_tests.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/built_in_tests.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/built_in_tests_ocl.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}${BRANCH_DIR_SUFFIX}get_built_ins_file_names.cpp
|
||||
|
@ -1,121 +0,0 @@
|
||||
/*
|
||||
* Copyright (C) 2018-2021 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#include "opencl/source/builtin_kernels_simulation/opencl_c.h"
|
||||
|
||||
#include "gtest/gtest.h"
|
||||
|
||||
namespace BuiltinKernelsSimulation {
|
||||
|
||||
__kernel void CopyImage3dToBuffer16Bytes(__read_only image3d_t input,
|
||||
__global uchar *dst,
|
||||
int4 srcOffset,
|
||||
int dstOffset,
|
||||
uint2 pitch) {
|
||||
const int x = get_global_id(0);
|
||||
const int y = get_global_id(1);
|
||||
const int z = get_global_id(2);
|
||||
|
||||
int4 srcCoord = {x, y, z, 0};
|
||||
srcCoord = srcCoord + srcOffset;
|
||||
uint DstOffset = dstOffset + (y * pitch.x) + (z * pitch.y);
|
||||
|
||||
const uint4 c = read_imageui(input, srcCoord);
|
||||
|
||||
if ((ulong)(dst + dstOffset) & 0x0000000f) {
|
||||
*((__global uchar *)(dst + DstOffset + x * 16 + 3)) = convert_uchar_sat((c.x >> 24) & 0xff);
|
||||
*((__global uchar *)(dst + DstOffset + x * 16 + 2)) = convert_uchar_sat((c.x >> 16) & 0xff);
|
||||
*((__global uchar *)(dst + DstOffset + x * 16 + 1)) = convert_uchar_sat((c.x >> 8) & 0xff);
|
||||
*((__global uchar *)(dst + DstOffset + x * 16)) = convert_uchar_sat(c.x & 0xff);
|
||||
*((__global uchar *)(dst + DstOffset + x * 16 + 7)) = convert_uchar_sat((c.y >> 24) & 0xff);
|
||||
*((__global uchar *)(dst + DstOffset + x * 16 + 6)) = convert_uchar_sat((c.y >> 16) & 0xff);
|
||||
*((__global uchar *)(dst + DstOffset + x * 16 + 5)) = convert_uchar_sat((c.y >> 8) & 0xff);
|
||||
*((__global uchar *)(dst + DstOffset + x * 16 + 4)) = convert_uchar_sat(c.y & 0xff);
|
||||
*((__global uchar *)(dst + DstOffset + x * 16 + 11)) = convert_uchar_sat((c.z >> 24) & 0xff);
|
||||
*((__global uchar *)(dst + DstOffset + x * 16 + 10)) = convert_uchar_sat((c.z >> 16) & 0xff);
|
||||
*((__global uchar *)(dst + DstOffset + x * 16 + 9)) = convert_uchar_sat((c.z >> 8) & 0xff);
|
||||
*((__global uchar *)(dst + DstOffset + x * 16 + 8)) = convert_uchar_sat(c.z & 0xff);
|
||||
*((__global uchar *)(dst + DstOffset + x * 16 + 15)) = convert_uchar_sat((c.w >> 24) & 0xff);
|
||||
*((__global uchar *)(dst + DstOffset + x * 16 + 14)) = convert_uchar_sat((c.w >> 16) & 0xff);
|
||||
*((__global uchar *)(dst + DstOffset + x * 16 + 13)) = convert_uchar_sat((c.w >> 8) & 0xff);
|
||||
*((__global uchar *)(dst + DstOffset + x * 16 + 12)) = convert_uchar_sat(c.w & 0xff);
|
||||
} else {
|
||||
*(__global uint4 *)(dst + DstOffset + x * 16) = c;
|
||||
}
|
||||
}
|
||||
|
||||
TEST(BuiltInKernelTests, WhenBuiltInCopiesImageThenDataIsWrittenIntoCorrectMemory) {
|
||||
|
||||
uint width = 3;
|
||||
uint height = 3;
|
||||
uint depth = 3;
|
||||
uint bytesPerChannel = 4;
|
||||
uint channels = 4;
|
||||
|
||||
uint bpp = bytesPerChannel * channels;
|
||||
|
||||
globalID[0] = 0;
|
||||
globalID[1] = 0;
|
||||
globalID[2] = 0;
|
||||
localID[0] = 0;
|
||||
localID[1] = 0;
|
||||
localID[2] = 0;
|
||||
localSize[0] = width;
|
||||
localSize[1] = height;
|
||||
localSize[2] = depth;
|
||||
|
||||
size_t size = width * height * depth * bytesPerChannel * channels;
|
||||
auto ptrSrc = std::make_unique<char[]>(64 + size + 64);
|
||||
auto ptrDst = std::make_unique<char[]>(64 + size + 64);
|
||||
auto ptrZero = std::make_unique<char[]>(64);
|
||||
|
||||
memset(ptrZero.get(), 0, 64);
|
||||
memset(ptrDst.get(), 0, 64 + size + 64);
|
||||
memset(ptrSrc.get(), 0, 64 + size + 64);
|
||||
|
||||
char *temp = ptrSrc.get() + 64;
|
||||
|
||||
for (uint i = 0; i < size; i++) {
|
||||
temp[i] = i;
|
||||
}
|
||||
|
||||
image im;
|
||||
im.ptr = ptrSrc.get() + 64;
|
||||
im.bytesPerChannel = bytesPerChannel;
|
||||
im.channels = channels;
|
||||
im.width = width;
|
||||
im.height = height;
|
||||
im.depth = depth;
|
||||
|
||||
uint2 Pitch(0, 0);
|
||||
Pitch.x = width * bpp;
|
||||
Pitch.y = width * height * bpp;
|
||||
|
||||
for (uint dimZ = 0; dimZ < depth; dimZ++) {
|
||||
globalID[1] = 0;
|
||||
for (uint dimY = 0; dimY < height; dimY++) {
|
||||
globalID[0] = 0;
|
||||
for (uint dimX = 0; dimX < width; dimX++) {
|
||||
|
||||
CopyImage3dToBuffer16Bytes(&im,
|
||||
(uchar *)ptrDst.get() + 64,
|
||||
{0, 0, 0, 0},
|
||||
0,
|
||||
Pitch);
|
||||
globalID[0]++;
|
||||
}
|
||||
globalID[1]++;
|
||||
}
|
||||
globalID[2]++;
|
||||
}
|
||||
|
||||
EXPECT_EQ(0, memcmp(im.ptr, ptrDst.get() + 64, size)) << "Data not copied properly!\n";
|
||||
|
||||
EXPECT_EQ(0, memcmp(ptrDst.get(), ptrZero.get(), 64)) << "Data written before passed ptr!\n";
|
||||
EXPECT_EQ(0, memcmp(ptrDst.get() + size + 64, ptrZero.get(), 64)) << "Data written after passed ptr!\n";
|
||||
}
|
||||
} // namespace BuiltinKernelsSimulation
|
@ -1029,29 +1029,6 @@ TEST_F(BuiltInTests, GivenUnknownBuiltInOpWhenGettingBuilderInfoThenExceptionThr
|
||||
EXPECT_TRUE(caughtException);
|
||||
}
|
||||
|
||||
HWCMDTEST_F(IGFX_GEN8_CORE, BuiltInTests, WhenGettingSchedulerKernelThenCorrectKernelReturned) {
|
||||
REQUIRE_OCL_21_OR_SKIP(defaultHwInfo);
|
||||
SchedulerKernel &schedulerKernel = pContext->getSchedulerKernel();
|
||||
std::string name = SchedulerKernel::schedulerName;
|
||||
EXPECT_EQ(name, schedulerKernel.getKernelInfo().kernelDescriptor.kernelMetadata.kernelName);
|
||||
}
|
||||
|
||||
HWCMDTEST_F(IGFX_GEN8_CORE, BuiltInTests, WhenGetttingSchedulerKernelForSecondTimeThenReuseKernel) {
|
||||
REQUIRE_OCL_21_OR_SKIP(defaultHwInfo);
|
||||
|
||||
SchedulerKernel &schedulerKernel = pContext->getSchedulerKernel();
|
||||
|
||||
Program *program = schedulerKernel.getProgram();
|
||||
EXPECT_NE(nullptr, program);
|
||||
|
||||
SchedulerKernel &schedulerKernelSecond = pContext->getSchedulerKernel();
|
||||
|
||||
Program *program2 = schedulerKernelSecond.getProgram();
|
||||
|
||||
EXPECT_EQ(&schedulerKernel, &schedulerKernelSecond);
|
||||
EXPECT_EQ(program, program2);
|
||||
}
|
||||
|
||||
TEST_F(BuiltInTests, GivenUnsupportedBuildTypeWhenBuildingDispatchInfoThenFalseIsReturned) {
|
||||
auto &builtIns = *pDevice->getBuiltIns();
|
||||
BuiltinDispatchInfoBuilder dispatchInfoBuilder{builtIns, *pClDevice};
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) 2020-2021 Intel Corporation
|
||||
* Copyright (C) 2020-2022 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
@ -17,7 +17,6 @@
|
||||
#include "opencl/source/command_queue/enqueue_kernel.h"
|
||||
#include "opencl/source/command_queue/enqueue_marker.h"
|
||||
#include "opencl/source/helpers/dispatch_info.h"
|
||||
#include "opencl/source/scheduler/scheduler_kernel.h"
|
||||
#include "opencl/test/unit_test/fixtures/hello_world_fixture.h"
|
||||
#include "opencl/test/unit_test/mocks/mock_command_queue.h"
|
||||
#include "opencl/test/unit_test/mocks/mock_context.h"
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) 2018-2021 Intel Corporation
|
||||
* Copyright (C) 2018-2022 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
@ -132,48 +132,6 @@ void MockContext::initializeWithDevices(const ClDeviceVector &devices, bool noSp
|
||||
setupContextType();
|
||||
}
|
||||
|
||||
SchedulerKernel &MockContext::getSchedulerKernel() {
|
||||
if (schedulerBuiltIn->pKernel) {
|
||||
return *static_cast<SchedulerKernel *>(schedulerBuiltIn->pKernel);
|
||||
}
|
||||
|
||||
auto initializeSchedulerProgramAndKernel = [&] {
|
||||
cl_int retVal = CL_SUCCESS;
|
||||
auto clDevice = getDevice(0);
|
||||
auto src = SchedulerKernel::loadSchedulerKernel(&clDevice->getDevice());
|
||||
|
||||
auto program = Program::createBuiltInFromGenBinary(this,
|
||||
devices,
|
||||
src.resource.data(),
|
||||
src.resource.size(),
|
||||
&retVal);
|
||||
DEBUG_BREAK_IF(retVal != CL_SUCCESS);
|
||||
DEBUG_BREAK_IF(!program);
|
||||
|
||||
retVal = program->processGenBinary(*clDevice);
|
||||
DEBUG_BREAK_IF(retVal != CL_SUCCESS);
|
||||
|
||||
schedulerBuiltIn->pProgram = program;
|
||||
|
||||
auto kernelInfo = schedulerBuiltIn->pProgram->getKernelInfo(SchedulerKernel::schedulerName, clDevice->getRootDeviceIndex());
|
||||
DEBUG_BREAK_IF(!kernelInfo);
|
||||
|
||||
schedulerBuiltIn->pKernel = Kernel::create<MockSchedulerKernel>(
|
||||
schedulerBuiltIn->pProgram,
|
||||
*kernelInfo,
|
||||
*clDevice,
|
||||
&retVal);
|
||||
|
||||
UNRECOVERABLE_IF(schedulerBuiltIn->pKernel->getScratchSize() != 0);
|
||||
|
||||
DEBUG_BREAK_IF(retVal != CL_SUCCESS);
|
||||
};
|
||||
std::call_once(schedulerBuiltIn->programIsInitialized, initializeSchedulerProgramAndKernel);
|
||||
|
||||
UNRECOVERABLE_IF(schedulerBuiltIn->pKernel == nullptr);
|
||||
return *static_cast<SchedulerKernel *>(schedulerBuiltIn->pKernel);
|
||||
}
|
||||
|
||||
MockDefaultContext::MockDefaultContext() : MockDefaultContext(false) {}
|
||||
|
||||
MockDefaultContext::MockDefaultContext(bool initSpecialQueues) : MockContext(nullptr, nullptr) {
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) 2018-2021 Intel Corporation
|
||||
* Copyright (C) 2018-2022 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
@ -52,8 +52,6 @@ class MockContext : public Context {
|
||||
std::unique_ptr<AsyncEventsHandler> &getAsyncEventsHandlerUniquePtr();
|
||||
void initializeWithDevices(const ClDeviceVector &devices, bool noSpecialQueue);
|
||||
|
||||
SchedulerKernel &getSchedulerKernel() override;
|
||||
|
||||
private:
|
||||
ClDevice *pDevice = nullptr;
|
||||
};
|
||||
|
@ -19,7 +19,6 @@
|
||||
#include "opencl/source/kernel/multi_device_kernel.h"
|
||||
#include "opencl/source/platform/platform.h"
|
||||
#include "opencl/source/program/block_kernel_manager.h"
|
||||
#include "opencl/source/scheduler/scheduler_kernel.h"
|
||||
#include "opencl/test/unit_test/mocks/mock_buffer.h"
|
||||
#include "opencl/test/unit_test/mocks/mock_context.h"
|
||||
#include "opencl/test/unit_test/mocks/mock_program.h"
|
||||
@ -548,11 +547,6 @@ class MockParentKernel : public Kernel {
|
||||
KernelInfo *mockKernelInfo = nullptr;
|
||||
};
|
||||
|
||||
class MockSchedulerKernel : public SchedulerKernel {
|
||||
public:
|
||||
MockSchedulerKernel(Program *programArg, const KernelInfo &kernelInfoArg, ClDevice &clDeviceArg) : SchedulerKernel(programArg, kernelInfoArg, clDeviceArg){};
|
||||
};
|
||||
|
||||
class MockDebugKernel : public MockKernel {
|
||||
public:
|
||||
MockDebugKernel(Program *program, const KernelInfo &kernelInfo, ClDevice &clDeviceArg) : MockKernel(program, kernelInfo, clDeviceArg) {
|
||||
|
@ -816,6 +816,7 @@ TEST_F(OfflineCompilerTests, WhenParsingBinToCharArrayThenCorrectResult) {
|
||||
|
||||
delete pOfflineCompiler;
|
||||
}
|
||||
|
||||
TEST_F(OfflineCompilerTests, GivenCppFileWhenBuildingThenBuildSucceeds) {
|
||||
std::vector<std::string> argv = {
|
||||
"ocloc",
|
||||
|
@ -1,11 +0,0 @@
|
||||
#
|
||||
# Copyright (C) 2018-2022 Intel Corporation
|
||||
#
|
||||
# SPDX-License-Identifier: MIT
|
||||
#
|
||||
|
||||
set(IGDRCL_SRCS_tests_scheduler
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/scheduler_kernel_tests.cpp
|
||||
)
|
||||
target_sources(igdrcl_tests PRIVATE ${IGDRCL_SRCS_tests_scheduler})
|
@ -1,309 +0,0 @@
|
||||
/*
|
||||
* Copyright (C) 2018-2021 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#include "shared/test/common/helpers/debug_manager_state_restore.h"
|
||||
#include "shared/test/common/mocks/mock_device.h"
|
||||
#include "shared/test/common/mocks/mock_graphics_allocation.h"
|
||||
#include "shared/test/common/mocks/mock_ostime.h"
|
||||
#include "shared/test/common/test_macros/test.h"
|
||||
#include "shared/test/unit_test/utilities/base_object_utils.h"
|
||||
|
||||
#include "opencl/source/scheduler/scheduler_kernel.h"
|
||||
#include "opencl/test/unit_test/mocks/mock_cl_device.h"
|
||||
#include "opencl/test/unit_test/mocks/mock_context.h"
|
||||
#include "opencl/test/unit_test/mocks/mock_program.h"
|
||||
|
||||
#include "gtest/gtest.h"
|
||||
|
||||
#include <cstdint>
|
||||
#include <memory>
|
||||
|
||||
using namespace NEO;
|
||||
using namespace iOpenCL;
|
||||
|
||||
class MockSchedulerKernel : public SchedulerKernel {
|
||||
public:
|
||||
MockSchedulerKernel(Program *program, const KernelInfo &info, ClDevice &clDeviceArg) : SchedulerKernel(program, info, clDeviceArg) {
|
||||
}
|
||||
|
||||
static MockSchedulerKernel *create(Program &program, KernelInfo *&info) {
|
||||
info = new KernelInfo;
|
||||
|
||||
info->kernelDescriptor.kernelAttributes.crossThreadDataSize = 8;
|
||||
info->kernelDescriptor.kernelAttributes.simdSize = 32;
|
||||
info->kernelDescriptor.kernelAttributes.flags.usesDeviceSideEnqueue = false;
|
||||
|
||||
ArgDescriptor bufferArg;
|
||||
auto &asPtr = bufferArg.as<ArgDescPointer>(true);
|
||||
|
||||
for (uint32_t i = 0; i < 9; i++) {
|
||||
asPtr.stateless = 0;
|
||||
asPtr.pointerSize = 0;
|
||||
asPtr.bufferOffset = 0;
|
||||
info->kernelDescriptor.payloadMappings.explicitArgs.push_back(std::move(bufferArg));
|
||||
}
|
||||
|
||||
MockSchedulerKernel *mock = Kernel::create<MockSchedulerKernel>(&program, *info, *program.getDevices()[0], nullptr);
|
||||
return mock;
|
||||
}
|
||||
};
|
||||
|
||||
TEST(SchedulerKernelTest, WhenSchedulerKernelIsCreatedThenLwsIs24) {
|
||||
auto device = std::make_unique<MockClDevice>(MockDevice::createWithNewExecutionEnvironment<MockDevice>(nullptr));
|
||||
MockProgram program(toClDeviceVector(*device));
|
||||
KernelInfo info;
|
||||
MockSchedulerKernel kernel(&program, info, *device);
|
||||
|
||||
size_t lws = kernel.getLws();
|
||||
EXPECT_EQ((size_t)24u, lws);
|
||||
}
|
||||
|
||||
TEST(SchedulerKernelTest, WhenSchedulerKernelIsCreatedThenGwsIs24) {
|
||||
auto device = std::make_unique<MockClDevice>(MockDevice::createWithNewExecutionEnvironment<MockDevice>(nullptr));
|
||||
MockProgram program(toClDeviceVector(*device));
|
||||
KernelInfo info;
|
||||
MockSchedulerKernel kernel(&program, info, *device);
|
||||
|
||||
const size_t hwThreads = 3;
|
||||
const size_t simdSize = 8;
|
||||
|
||||
size_t maxGws = defaultHwInfo->gtSystemInfo.EUCount * hwThreads * simdSize;
|
||||
|
||||
size_t gws = kernel.getGws();
|
||||
EXPECT_GE(maxGws, gws);
|
||||
EXPECT_LT(hwThreads * simdSize, gws);
|
||||
}
|
||||
|
||||
TEST(SchedulerKernelTest, WhenSettingGwsThenGetGwsReturnedSetValue) {
|
||||
auto device = std::make_unique<MockClDevice>(MockDevice::createWithNewExecutionEnvironment<MockDevice>(nullptr));
|
||||
MockProgram program(toClDeviceVector(*device));
|
||||
KernelInfo info;
|
||||
MockSchedulerKernel kernel(&program, info, *device);
|
||||
|
||||
kernel.setGws(24);
|
||||
|
||||
size_t gws = kernel.getGws();
|
||||
|
||||
EXPECT_EQ(24u, gws);
|
||||
}
|
||||
|
||||
TEST(SchedulerKernelTest, WhenSchedulerKernelIsCreatedThenCurbeSizeIsCorrect) {
|
||||
auto device = std::make_unique<MockClDevice>(MockDevice::createWithNewExecutionEnvironment<MockDevice>(nullptr));
|
||||
MockProgram program(toClDeviceVector(*device));
|
||||
KernelInfo info;
|
||||
uint32_t crossThreadDataSize = 32;
|
||||
uint32_t dshSize = 48;
|
||||
|
||||
info.kernelDescriptor.kernelAttributes.crossThreadDataSize = crossThreadDataSize;
|
||||
info.heapInfo.DynamicStateHeapSize = dshSize;
|
||||
|
||||
MockSchedulerKernel kernel(&program, info, *device);
|
||||
|
||||
uint32_t expectedCurbeSize = alignUp(crossThreadDataSize, 64) + alignUp(dshSize, 64) + alignUp(SCHEDULER_DYNAMIC_PAYLOAD_SIZE, 64);
|
||||
EXPECT_GE((size_t)expectedCurbeSize, kernel.getCurbeSize());
|
||||
}
|
||||
|
||||
TEST(SchedulerKernelTest, WhenSettingArgsForSchedulerKernelThenAllocationsAreCorrect) {
|
||||
auto device = std::make_unique<MockClDevice>(MockDevice::createWithNewExecutionEnvironment<MockDevice>(nullptr));
|
||||
auto context = clUniquePtr(new MockContext(device.get()));
|
||||
auto program = clUniquePtr(new MockProgram(context.get(), false, toClDeviceVector(*device)));
|
||||
std::unique_ptr<KernelInfo> info(nullptr);
|
||||
KernelInfo *infoPtr = nullptr;
|
||||
std::unique_ptr<MockSchedulerKernel> scheduler = std::unique_ptr<MockSchedulerKernel>(MockSchedulerKernel::create(*program, infoPtr));
|
||||
info.reset(infoPtr);
|
||||
std::unique_ptr<MockGraphicsAllocation> allocs[9];
|
||||
|
||||
for (uint32_t i = 0; i < 9; i++) {
|
||||
allocs[i] = std::unique_ptr<MockGraphicsAllocation>(new MockGraphicsAllocation((void *)0x1234, 10));
|
||||
}
|
||||
|
||||
scheduler->setArgs(allocs[0].get(),
|
||||
allocs[1].get(),
|
||||
allocs[2].get(),
|
||||
allocs[3].get(),
|
||||
allocs[4].get(),
|
||||
allocs[5].get(),
|
||||
allocs[6].get(),
|
||||
allocs[7].get(),
|
||||
allocs[8].get());
|
||||
|
||||
for (uint32_t i = 0; i < 9; i++) {
|
||||
EXPECT_EQ(allocs[i].get(), scheduler->getKernelArg(i));
|
||||
}
|
||||
}
|
||||
|
||||
TEST(SchedulerKernelTest, GivenNullDebugQueueWhenSettingArgsForSchedulerKernelThenAllocationsAreCorrect) {
|
||||
auto device = std::make_unique<MockClDevice>(MockDevice::createWithNewExecutionEnvironment<MockDevice>(nullptr));
|
||||
auto context = clUniquePtr(new MockContext(device.get()));
|
||||
auto program = clUniquePtr(new MockProgram(context.get(), false, toClDeviceVector(*device)));
|
||||
|
||||
std::unique_ptr<KernelInfo> info(nullptr);
|
||||
KernelInfo *infoPtr = nullptr;
|
||||
std::unique_ptr<MockSchedulerKernel> scheduler = std::unique_ptr<MockSchedulerKernel>(MockSchedulerKernel::create(*program, infoPtr));
|
||||
info.reset(infoPtr);
|
||||
std::unique_ptr<MockGraphicsAllocation> allocs[9];
|
||||
|
||||
for (uint32_t i = 0; i < 9; i++) {
|
||||
allocs[i] = std::unique_ptr<MockGraphicsAllocation>(new MockGraphicsAllocation((void *)0x1234, 10));
|
||||
}
|
||||
|
||||
scheduler->setArgs(allocs[0].get(),
|
||||
allocs[1].get(),
|
||||
allocs[2].get(),
|
||||
allocs[3].get(),
|
||||
allocs[4].get(),
|
||||
allocs[5].get(),
|
||||
allocs[6].get(),
|
||||
allocs[7].get());
|
||||
|
||||
for (uint32_t i = 0; i < 8; i++) {
|
||||
EXPECT_EQ(allocs[i].get(), scheduler->getKernelArg(i));
|
||||
}
|
||||
EXPECT_EQ(nullptr, scheduler->getKernelArg(8));
|
||||
}
|
||||
|
||||
TEST(SchedulerKernelTest, givenGraphicsAllocationWithDifferentCpuAndGpuAddressesWhenCallSetArgsThenGpuAddressesAreTaken) {
|
||||
auto device = std::make_unique<MockClDevice>(MockDevice::createWithNewExecutionEnvironment<MockDevice>(nullptr));
|
||||
auto context = clUniquePtr(new MockContext(device.get()));
|
||||
auto program = clUniquePtr(new MockProgram(context.get(), false, toClDeviceVector(*device)));
|
||||
|
||||
std::unique_ptr<KernelInfo> info(nullptr);
|
||||
KernelInfo *infoPtr = nullptr;
|
||||
auto scheduler = std::unique_ptr<MockSchedulerKernel>(MockSchedulerKernel::create(*program, infoPtr));
|
||||
info.reset(infoPtr);
|
||||
std::unique_ptr<MockGraphicsAllocation> allocs[9];
|
||||
|
||||
for (uint32_t i = 0; i < 9; i++) {
|
||||
allocs[i] = std::make_unique<MockGraphicsAllocation>(reinterpret_cast<void *>(0x1234), 0x4321, 10);
|
||||
}
|
||||
|
||||
scheduler->setArgs(allocs[0].get(),
|
||||
allocs[1].get(),
|
||||
allocs[2].get(),
|
||||
allocs[3].get(),
|
||||
allocs[4].get(),
|
||||
allocs[5].get(),
|
||||
allocs[6].get(),
|
||||
allocs[7].get(),
|
||||
allocs[8].get());
|
||||
|
||||
for (uint32_t i = 0; i < 9; i++) {
|
||||
auto argAddr = reinterpret_cast<uint64_t>(scheduler->getKernelArgInfo(i).value);
|
||||
EXPECT_EQ(allocs[i]->getGpuAddress(), argAddr);
|
||||
}
|
||||
}
|
||||
|
||||
TEST(SchedulerKernelTest, GivenForceDispatchSchedulerWhenCreatingKernelReflectionThenKernelReflectSurfaceIsNotNull) {
|
||||
DebugManagerStateRestore dbgRestorer;
|
||||
|
||||
DebugManager.flags.ForceDispatchScheduler.set(true);
|
||||
auto device = std::make_unique<MockClDevice>(MockDevice::createWithNewExecutionEnvironment<MockDevice>(nullptr));
|
||||
auto context = clUniquePtr(new MockContext(device.get()));
|
||||
auto program = clUniquePtr(new MockProgram(context.get(), false, toClDeviceVector(*device)));
|
||||
|
||||
std::unique_ptr<KernelInfo> info(nullptr);
|
||||
KernelInfo *infoPtr = nullptr;
|
||||
std::unique_ptr<MockSchedulerKernel> scheduler = std::unique_ptr<MockSchedulerKernel>(MockSchedulerKernel::create(*program, infoPtr));
|
||||
info.reset(infoPtr);
|
||||
|
||||
scheduler->createReflectionSurface();
|
||||
|
||||
EXPECT_NE(nullptr, scheduler->getKernelReflectionSurface());
|
||||
}
|
||||
|
||||
TEST(SchedulerKernelTest, GivenForceDispatchSchedulerWhenCreatingKernelReflectionTwiceThenTheSameAllocationIsUsed) {
|
||||
DebugManagerStateRestore dbgRestorer;
|
||||
|
||||
DebugManager.flags.ForceDispatchScheduler.set(true);
|
||||
auto device = std::make_unique<MockClDevice>(MockDevice::createWithNewExecutionEnvironment<MockDevice>(nullptr));
|
||||
auto context = clUniquePtr(new MockContext(device.get()));
|
||||
auto program = clUniquePtr(new MockProgram(context.get(), false, toClDeviceVector(*device)));
|
||||
|
||||
std::unique_ptr<KernelInfo> info(nullptr);
|
||||
KernelInfo *infoPtr = nullptr;
|
||||
std::unique_ptr<MockSchedulerKernel> scheduler = std::unique_ptr<MockSchedulerKernel>(MockSchedulerKernel::create(*program, infoPtr));
|
||||
info.reset(infoPtr);
|
||||
|
||||
scheduler->createReflectionSurface();
|
||||
|
||||
auto *allocation = scheduler->getKernelReflectionSurface();
|
||||
scheduler->createReflectionSurface();
|
||||
auto *allocation2 = scheduler->getKernelReflectionSurface();
|
||||
|
||||
EXPECT_EQ(allocation, allocation2);
|
||||
}
|
||||
|
||||
TEST(SchedulerKernelTest, GivenNoForceDispatchSchedulerWhenCreatingKernelReflectionThenKernelReflectionSurfaceIsNotCreated) {
|
||||
DebugManagerStateRestore dbgRestorer;
|
||||
|
||||
DebugManager.flags.ForceDispatchScheduler.set(false);
|
||||
auto device = std::make_unique<MockClDevice>(MockDevice::createWithNewExecutionEnvironment<MockDevice>(nullptr));
|
||||
auto context = clUniquePtr(new MockContext(device.get()));
|
||||
auto program = clUniquePtr(new MockProgram(context.get(), false, toClDeviceVector(*device)));
|
||||
|
||||
std::unique_ptr<KernelInfo> info(nullptr);
|
||||
KernelInfo *infoPtr = nullptr;
|
||||
std::unique_ptr<MockSchedulerKernel> scheduler = std::unique_ptr<MockSchedulerKernel>(MockSchedulerKernel::create(*program, infoPtr));
|
||||
info.reset(infoPtr);
|
||||
|
||||
scheduler->createReflectionSurface();
|
||||
|
||||
EXPECT_EQ(nullptr, scheduler->getKernelReflectionSurface());
|
||||
}
|
||||
|
||||
TEST(SchedulerKernelTest, GivenNullKernelInfoWhenGettingCurbeSizeThenSizeIsCorrect) {
|
||||
auto device = std::make_unique<MockClDevice>(MockDevice::createWithNewExecutionEnvironment<MockDevice>(nullptr));
|
||||
MockProgram program(toClDeviceVector(*device));
|
||||
KernelInfo info;
|
||||
|
||||
MockSchedulerKernel kernel(&program, info, *device);
|
||||
|
||||
uint32_t expectedCurbeSize = alignUp(SCHEDULER_DYNAMIC_PAYLOAD_SIZE, 64);
|
||||
EXPECT_GE((size_t)expectedCurbeSize, kernel.getCurbeSize());
|
||||
}
|
||||
|
||||
TEST(SchedulerKernelTest, givenForcedSchedulerGwsByDebugVariableWhenSchedulerKernelIsCreatedThenGwsIsSetToForcedValue) {
|
||||
DebugManagerStateRestore dbgRestorer;
|
||||
DebugManager.flags.SchedulerGWS.set(48);
|
||||
|
||||
auto device = std::make_unique<MockClDevice>(MockDevice::createWithNewExecutionEnvironment<MockDevice>(nullptr));
|
||||
MockProgram program(toClDeviceVector(*device));
|
||||
KernelInfo info;
|
||||
MockSchedulerKernel kernel(&program, info, *device);
|
||||
|
||||
size_t gws = kernel.getGws();
|
||||
EXPECT_EQ(static_cast<size_t>(48u), gws);
|
||||
}
|
||||
|
||||
TEST(SchedulerKernelTest, givenSimulationModeWhenSchedulerKernelIsCreatedThenGwsIsSetToOneWorkgroup) {
|
||||
HardwareInfo hwInfo = *defaultHwInfo;
|
||||
hwInfo.featureTable.flags.ftrSimulationMode = true;
|
||||
|
||||
auto device = std::make_unique<MockClDevice>(MockDevice::createWithNewExecutionEnvironment<MockDevice>(&hwInfo));
|
||||
MockProgram program(toClDeviceVector(*device));
|
||||
|
||||
KernelInfo info;
|
||||
MockSchedulerKernel kernel(&program, info, *device);
|
||||
size_t gws = kernel.getGws();
|
||||
EXPECT_EQ(static_cast<size_t>(24u), gws);
|
||||
}
|
||||
|
||||
TEST(SchedulerKernelTest, givenForcedSchedulerGwsByDebugVariableAndSimulationModeWhenSchedulerKernelIsCreatedThenGwsIsSetToForcedValue) {
|
||||
DebugManagerStateRestore dbgRestorer;
|
||||
DebugManager.flags.SchedulerGWS.set(48);
|
||||
|
||||
HardwareInfo hwInfo = *defaultHwInfo;
|
||||
hwInfo.featureTable.flags.ftrSimulationMode = true;
|
||||
|
||||
auto device = std::make_unique<MockClDevice>(MockDevice::createWithNewExecutionEnvironment<MockDevice>(&hwInfo));
|
||||
MockProgram program(toClDeviceVector(*device));
|
||||
|
||||
KernelInfo info;
|
||||
MockSchedulerKernel kernel(&program, info, *device);
|
||||
size_t gws = kernel.getGws();
|
||||
EXPECT_EQ(static_cast<size_t>(48u), gws);
|
||||
}
|
Reference in New Issue
Block a user