Remove device enqueue part 5

-remove scheduler and builtin_kernels_simulation

Related-To: NEO-6559
Signed-off-by: Katarzyna Cencelewska <katarzyna.cencelewska@intel.com>
This commit is contained in:
Katarzyna Cencelewska
2022-01-13 11:27:41 +00:00
committed by Compute-Runtime-Automation
parent 86161a8a4f
commit d2818aaea2
30 changed files with 13 additions and 4849 deletions

View File

@ -747,7 +747,6 @@ set(BUILTINS_BINARIES_BINDFUL_LIB_NAME "builtins_binaries_bindful")
set(BUILTINS_BINARIES_BINDLESS_LIB_NAME "builtins_binaries_bindless")
set(BUILTINS_SPIRV_LIB_NAME "builtins_spirv")
set(BUILTINS_VME_LIB_NAME "builtins_vme")
set(SCHEDULER_BINARY_LIB_NAME "scheduler_binary")
if(WIN32)
set(NEO_EXTRA_LIBS Ws2_32)

View File

@ -1,5 +1,5 @@
#
# Copyright (C) 2021 Intel Corporation
# Copyright (C) 2021-2022 Intel Corporation
#
# SPDX-License-Identifier: MIT
#
@ -21,9 +21,6 @@ macro(generate_runtime_lib LIB_NAME MOCKABLE GENERATE_EXEC)
endif()
target_compile_definitions(${BUILTINS_SOURCES_LIB_NAME} PUBLIC MOCKABLE_VIRTUAL=)
target_compile_definitions(${BUILTINS_VME_LIB_NAME} PUBLIC MOCKABLE_VIRTUAL=)
if(TARGET ${SCHEDULER_BINARY_LIB_NAME})
target_compile_definitions(${SCHEDULER_BINARY_LIB_NAME} PUBLIC MOCKABLE_VIRTUAL=)
endif()
if(${MOCKABLE})
target_compile_definitions(${LIB_NAME} PUBLIC MOCKABLE_VIRTUAL=virtual)

View File

@ -1,5 +1,5 @@
#
# Copyright (C) 2018-2021 Intel Corporation
# Copyright (C) 2018-2022 Intel Corporation
#
# SPDX-License-Identifier: MIT
#
@ -17,17 +17,11 @@ set(MSVC_DEF_ADDITIONAL_EXPORTS "")
set(OPENCL_RUNTIME_PROJECTS_FOLDER "opencl runtime")
set(OPENCL_BUILTINS_PROJECTS_FOLDER "built_ins")
if(NOT TARGET ${BIKSIM_LIB_NAME})
add_subdirectory(builtin_kernels_simulation)
endif()
hide_subdir(builtin_kernels_simulation)
hide_subdir(dll)
add_library(${NEO_STATIC_LIB_NAME} STATIC EXCLUDE_FROM_ALL
${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt
${CMAKE_CURRENT_SOURCE_DIR}/enable_cores.cmake
$<TARGET_OBJECTS:${BIKSIM_LIB_NAME}>
)
add_subdirectories()
@ -117,11 +111,6 @@ if(${GENERATE_EXECUTABLE})
$<TARGET_OBJECTS:${BUILTINS_VME_LIB_NAME}>
$<TARGET_OBJECTS:${BUILTINS_BINARIES_BINDFUL_LIB_NAME}>
)
if(TARGET ${SCHEDULER_BINARY_LIB_NAME})
list(APPEND NEO_DYNAMIC_LIB__TARGET_OBJECTS
$<TARGET_OBJECTS:${SCHEDULER_BINARY_LIB_NAME}>
)
endif()
if(DEFINED AUB_STREAM_PROJECT_NAME)
list(APPEND NEO_DYNAMIC_LIB__TARGET_OBJECTS $<TARGET_OBJECTS:${AUB_STREAM_PROJECT_NAME}_all_hw>)
@ -211,9 +200,7 @@ endif()
create_project_source_tree(${NEO_STATIC_LIB_NAME})
if(UNIX AND NOT (TARGET clang-tidy))
add_custom_target(clang-tidy
DEPENDS scheduler
)
add_custom_target(clang-tidy)
add_custom_command(
TARGET clang-tidy
POST_BUILD
@ -222,4 +209,3 @@ if(UNIX AND NOT (TARGET clang-tidy))
WORKING_DIRECTORY ${NEO_SOURCE_DIR}
)
endif()

View File

@ -1,53 +0,0 @@
#
# Copyright (C) 2018-2022 Intel Corporation
#
# SPDX-License-Identifier: MIT
#
set(BUILTIN_KERNELS_SIMULATION_SRCS
${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt
${CMAKE_CURRENT_SOURCE_DIR}/opencl_c.cpp
${CMAKE_CURRENT_SOURCE_DIR}/opencl_c.h
)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}")
string(REPLACE "/WX" "" CMAKE_CXX_FLAGS ${CMAKE_CXX_FLAGS})
string(REGEX REPLACE "-Werror[^ \t\n]*" "" CMAKE_CXX_FLAGS ${CMAKE_CXX_FLAGS})
string(REPLACE "-Wsometimes-uninitialized" "" CMAKE_CXX_FLAGS ${CMAKE_CXX_FLAGS})
string(REPLACE "-Wsign-compare" "" CMAKE_CXX_FLAGS ${CMAKE_CXX_FLAGS})
string(REPLACE "-Wunused-variable" "" CMAKE_CXX_FLAGS ${CMAKE_CXX_FLAGS})
if(${CMAKE_CXX_COMPILER_ID} STREQUAL "Clang")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-everything")
endif()
if(COMPILER_SUPPORTS_CXX11)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11")
elseif(COMPILER_SUPPORTS_CXX0X)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++0x")
endif()
if(NOT MSVC)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fpermissive -fPIC")
endif()
macro(macro_for_each_core_type)
foreach(BRANCH_DIR ${BRANCH_DIR_LIST})
list(APPEND DEFAULT_CORE_PLATFORMS_DEFITIONS DEFAULT_${CORE_TYPE}_PLATFORM=${DEFAULT_SUPPORTED_${CORE_TYPE}_PLATFORM})
endforeach()
endmacro()
apply_macro_for_each_core_type("SUPPORTED")
add_library(${BIKSIM_LIB_NAME} OBJECT EXCLUDE_FROM_ALL ${BUILTIN_KERNELS_SIMULATION_SRCS})
target_include_directories(${BIKSIM_LIB_NAME} BEFORE PRIVATE
${ENGINE_NODE_DIR}
${NEO__GMM_INCLUDE_DIR}
${KHRONOS_HEADERS_DIR}
${KHRONOS_GL_HEADERS_DIR}
${NEO__IGC_INCLUDE_DIR}
${THIRD_PARTY_DIR}
)
set_target_properties(${BIKSIM_LIB_NAME} PROPERTIES FOLDER "${OPENCL_RUNTIME_PROJECTS_FOLDER}/${OPENCL_BUILTINS_PROJECTS_FOLDER}")
target_compile_definitions(${BIKSIM_LIB_NAME} PUBLIC ${SUPPORTED_CORE_FLAGS_DEFINITONS} ${DEFAULT_CORE_PLATFORMS_DEFITIONS} MOCKABLE_VIRTUAL=)

View File

@ -1,136 +0,0 @@
/*
* Copyright (C) 2018-2021 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#include "opencl_c.h"
#include "shared/source/helpers/string.h"
namespace BuiltinKernelsSimulation {
#define SCHEDULER_EMULATION 1
// globals
std::mutex gMutex;
unsigned int globalID[3];
unsigned int localID[3];
unsigned int localSize[3];
std::map<std::thread::id, uint32_t> threadIDToLocalIDmap;
SynchronizationBarrier *pGlobalBarrier = nullptr;
uint4 operator+(uint4 const &a, uint4 const &b) {
uint4 c(0, 0, 0, 0);
c.x = a.x + b.x;
c.y = a.y + b.y;
c.z = a.z + b.z;
c.w = a.w + b.w;
return c;
}
int4 operator+(int4 const &a, int4 const &b) {
int4 c(0, 0, 0, 0);
c.x = a.x + b.x;
c.y = a.y + b.y;
c.z = a.z + b.z;
c.w = a.w + b.w;
return c;
}
uint get_local_id(int dim) {
uint LID = 0;
// use thread id
if (threadIDToLocalIDmap.size() > 0) {
std::thread::id id = std::this_thread::get_id();
LID = threadIDToLocalIDmap[id] % 24;
}
// use id from loop iteration
else {
LID = localID[dim];
}
return LID;
}
uint get_global_id(int dim) {
uint GID = 0;
// use thread id
if (threadIDToLocalIDmap.size() > 0) {
std::thread::id id = std::this_thread::get_id();
GID = threadIDToLocalIDmap[id];
}
// use id from loop iteration
else {
GID = globalID[dim];
}
return GID;
}
uint get_local_size(int dim) {
return localSize[dim];
}
uint get_num_groups(int dim) {
return NUM_OF_THREADS / 24;
}
uint get_group_id(int dim) {
return get_global_id(dim) / 24;
}
void barrier(int x) {
pGlobalBarrier->enter();
// int LID = get_local_id(0);
volatile int BreakPointHere = 0;
// PUT BREAKPOINT HERE to stop after each barrier
BreakPointHere++;
}
uint4 read_imageui(image *im, int4 coord) {
uint4 color = {0, 0, 0, 1};
uint offset = ((coord.z * im->height + coord.y) * im->width + coord.x) * im->bytesPerChannel * im->channels;
char *temp = &im->ptr[offset];
char *colorDst = (char *)&color;
for (uint i = 0; i < im->channels; i++) {
memcpy_s(colorDst, sizeof(uint4), temp, im->bytesPerChannel);
temp += im->bytesPerChannel;
colorDst += 4;
}
return color;
}
uint4 write_imageui(image *im, uint4 coord, uint4 color) {
uint offset = ((coord.z * im->height + coord.y) * im->width + coord.x) * im->bytesPerChannel * im->channels;
char *temp = &im->ptr[offset];
char *colorSrc = (char *)&color;
size_t size = im->width * im->height * im->depth * im->bytesPerChannel * im->channels;
for (uint i = 0; i < im->channels; i++) {
memcpy_s(temp, size - offset, colorSrc, im->bytesPerChannel);
temp += im->bytesPerChannel;
colorSrc += 4;
}
return *(uint4 *)temp; // NOLINT
}
uchar convert_uchar_sat(uint c) {
return (uchar)c;
}
ushort convert_ushort_sat(uint c) {
return (ushort)c;
}
} // namespace BuiltinKernelsSimulation

View File

@ -1,291 +0,0 @@
/*
* Copyright (C) 2018-2021 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#pragma once
#include "CL/cl.h"
#include <condition_variable>
#include <cstdint>
#include <map>
#include <mutex>
#include <string.h>
#include <thread>
// OpenCL Types
typedef uint32_t uint;
typedef uint8_t uchar;
typedef uint16_t ushort;
typedef uint64_t ulong;
namespace BuiltinKernelsSimulation {
// number of threads in wkg
#define NUM_OF_THREADS 24
#define CLK_GLOBAL_MEM_FENCE 1
#define CLK_LOCAL_MEM_FENCE 2
class SynchronizationBarrier {
public:
SynchronizationBarrier(int count) : m_InitialCount(count) {
m_Count = count;
m_BarrierCounter = 0;
}
~SynchronizationBarrier() {
}
void enter() {
std::unique_lock<std::mutex> lck(m_Mutex);
m_Count--;
unsigned int BarrierCount = m_BarrierCounter;
if (m_Count > 0) {
while (BarrierCount == m_BarrierCounter) {
m_AllHitBarrierCondition.wait(lck);
}
} else {
m_Count = m_InitialCount;
m_BarrierCounter++;
m_AllHitBarrierCondition.notify_all();
}
}
private:
std::mutex m_Mutex;
std::condition_variable m_AllHitBarrierCondition;
int m_Count;
const int m_InitialCount;
unsigned int m_BarrierCounter;
};
// globals
extern std::mutex gMutex;
extern unsigned int globalID[3];
extern unsigned int localID[3];
extern unsigned int localSize[3];
extern std::map<std::thread::id, uint32_t> threadIDToLocalIDmap;
extern SynchronizationBarrier *pGlobalBarrier;
typedef struct taguint2 {
taguint2(uint x, uint y) {
this->x = x;
this->y = y;
}
taguint2() {
this->x = 0;
this->y = 0;
}
uint x;
uint y;
} uint2;
typedef struct taguint3 {
taguint3(uint x, uint y, uint z) {
this->x = x;
this->y = y;
this->z = z;
}
taguint3() {
this->x = 0;
this->y = 0;
this->z = 0;
}
uint x;
uint y;
uint z;
} uint3;
typedef struct taguint4 {
taguint4(uint x, uint y, uint z, uint w) {
this->x = x;
this->y = y;
this->z = z;
this->w = w;
}
uint x;
uint y;
uint z;
uint w;
} uint4;
typedef struct tagint2 {
tagint2(int x, int y) {
this->x = x;
this->y = y;
}
int x;
int y;
} int2;
typedef struct tagint3 {
tagint3(int x, int y, int z) {
this->x = x;
this->y = y;
this->z = z;
}
int x;
int y;
int z;
} int3;
typedef struct tagint4 {
tagint4(int x, int y, int z, int w) {
this->x = x;
this->y = y;
this->z = z;
this->w = w;
}
int x;
int y;
int z;
int w;
} int4;
typedef struct tagushort2 {
tagushort2(ushort x, ushort y) {
this->x = x;
this->y = y;
}
unsigned short x;
unsigned short y;
} ushort2;
typedef struct tagushort8 {
unsigned short xxx[8];
} ushort8;
typedef struct tagushort16 {
unsigned short xxx[16];
} ushort16;
uint4 operator+(uint4 const &a, uint4 const &b);
int4 operator+(int4 const &a, int4 const &b);
typedef struct tagimage {
char *ptr;
uint width;
uint height;
uint depth;
uint bytesPerChannel;
uint channels;
} image;
// images as pointer
typedef image *image1d_t;
typedef image *image2d_t;
typedef image *image3d_t;
// OpenCL keywords
#define __global
#define __local
#define __private
#define __kernel
#define __attribute__(...)
#define __read_only
#define __write_only
#define queue_t void *
struct clk_event_t {
clk_event_t() {
value = 0;
}
clk_event_t(void *v) {
value = static_cast<uint>(reinterpret_cast<uintptr_t>(v));
}
explicit operator void *() const {
return reinterpret_cast<void *>(static_cast<uintptr_t>(value));
}
operator uint() {
return (uint)value;
}
void operator=(uint input) {
value = input;
}
uint value;
};
// OpenCL builtins
#define __builtin_astype(var, type) \
( \
(type)var)
#define select(a, b, c) (c ? b : a)
uint get_local_id(int dim);
uint get_global_id(int dim);
uint get_local_size(int dim);
uint get_num_groups(int dim);
uint get_group_id(int dim);
void barrier(int x);
uint4 read_imageui(image *im, int4 coord);
uint4 write_imageui(image *im, uint4 coord, uint4 color);
uchar convert_uchar_sat(uint c);
ushort convert_ushort_sat(uint c);
#define EMULATION_ENTER_FUNCTION() \
uint __LOCAL_ID__ = 0; \
__LOCAL_ID__ = get_local_id(0);
template <class TYPE, class TYPE2>
void atomic_xchg(TYPE *dest, TYPE2 val) {
gMutex.lock();
dest[0] = (TYPE)val;
gMutex.unlock();
}
template <class TYPE, class TYPE2>
TYPE atomic_add(TYPE *first, TYPE2 second) {
gMutex.lock();
TYPE temp = first[0];
first[0] = (TYPE)(temp + (TYPE)second);
gMutex.unlock();
return temp;
}
template <class TYPE, class TYPE2>
TYPE atomic_sub(TYPE *first, TYPE2 second) {
gMutex.lock();
TYPE temp = first[0];
first[0] = temp - second;
gMutex.unlock();
return temp;
}
template <class TYPE>
TYPE atomic_inc(TYPE *first) {
gMutex.lock();
TYPE temp = first[0];
first[0] = temp + 1;
gMutex.unlock();
return temp;
}
template <class TYPE>
TYPE atomic_dec(TYPE *first) {
gMutex.lock();
TYPE temp = first[0];
first[0] = temp - 1;
gMutex.unlock();
return temp;
}
template <class TYPE, class TYPE2>
TYPE atomic_min(TYPE *first, TYPE2 second) {
gMutex.lock();
TYPE temp = first[0];
first[0] = (TYPE)((TYPE)second < temp ? (TYPE)second : temp);
gMutex.unlock();
return temp;
}
} // namespace BuiltinKernelsSimulation

View File

@ -1,21 +0,0 @@
/*
* Copyright (C) 2018-2021 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#include "opencl/source/builtin_kernels_simulation/scheduler_simulation.h"
#include "opencl/source/builtin_kernels_simulation/opencl_c.h"
#include <thread>
using namespace NEO;
namespace BuiltinKernelsSimulation {
bool conditionReady = false;
std::thread threads[NUM_OF_THREADS];
} // namespace BuiltinKernelsSimulation

View File

@ -1,116 +0,0 @@
/*
* Copyright (C) 2021 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#include "opencl/source/command_queue/gpgpu_walker.h"
namespace NEO {
template <typename GfxFamily>
void GpgpuWalkerHelper<GfxFamily>::dispatchScheduler(
LinearStream &commandStream,
DeviceQueueHw<GfxFamily> &devQueueHw,
PreemptionMode preemptionMode,
SchedulerKernel &scheduler,
IndirectHeap *ssh,
IndirectHeap *dsh,
bool isCcsUsed) {
const auto &kernelInfo = scheduler.getKernelInfo();
using INTERFACE_DESCRIPTOR_DATA = typename GfxFamily::INTERFACE_DESCRIPTOR_DATA;
using GPGPU_WALKER = typename GfxFamily::GPGPU_WALKER;
using MI_BATCH_BUFFER_START = typename GfxFamily::MI_BATCH_BUFFER_START;
const auto &hwInfo = devQueueHw.getDevice().getHardwareInfo();
NEO::PipeControlArgs args;
MemorySynchronizationCommands<GfxFamily>::addPipeControl(commandStream, args);
uint32_t interfaceDescriptorIndex = devQueueHw.schedulerIDIndex;
const size_t offsetInterfaceDescriptorTable = devQueueHw.colorCalcStateSize;
const size_t offsetInterfaceDescriptor = offsetInterfaceDescriptorTable;
const size_t totalInterfaceDescriptorTableSize = devQueueHw.interfaceDescriptorEntries * sizeof(INTERFACE_DESCRIPTOR_DATA);
// Program media interface descriptor load
HardwareCommandsHelper<GfxFamily>::sendMediaInterfaceDescriptorLoad(
commandStream,
offsetInterfaceDescriptor,
totalInterfaceDescriptorTableSize);
DEBUG_BREAK_IF(offsetInterfaceDescriptorTable % 64 != 0);
// Determine SIMD size
uint32_t simd = kernelInfo.getMaxSimdSize();
DEBUG_BREAK_IF(simd != PARALLEL_SCHEDULER_COMPILATION_SIZE_20);
// Patch our kernel constants
scheduler.setGlobalWorkOffsetValues(0, 0, 0);
scheduler.setGlobalWorkSizeValues(static_cast<uint32_t>(scheduler.getGws()), 1, 1);
scheduler.setLocalWorkSizeValues(static_cast<uint32_t>(scheduler.getLws()), 1, 1);
scheduler.setLocalWorkSize2Values(static_cast<uint32_t>(scheduler.getLws()), 1, 1);
scheduler.setEnqueuedLocalWorkSizeValues(static_cast<uint32_t>(scheduler.getLws()), 1, 1);
scheduler.setNumWorkGroupsValues(static_cast<uint32_t>(scheduler.getGws() / scheduler.getLws()), 0, 0);
scheduler.setWorkDim(1);
// Send our indirect object data
size_t localWorkSizes[3] = {scheduler.getLws(), 1, 1};
// Create indirectHeap for IOH that is located at the end of device enqueue DSH
size_t curbeOffset = devQueueHw.setSchedulerCrossThreadData(scheduler);
IndirectHeap indirectObjectHeap(dsh->getCpuBase(), dsh->getMaxAvailableSpace());
indirectObjectHeap.getSpace(curbeOffset);
IndirectHeap *ioh = &indirectObjectHeap;
// Program the walker. Invokes execution so all state should already be programmed
auto pGpGpuWalkerCmd = commandStream.getSpaceForCmd<GPGPU_WALKER>();
GPGPU_WALKER cmdWalker = GfxFamily::cmdInitGpgpuWalker;
bool inlineDataProgrammingRequired = HardwareCommandsHelper<GfxFamily>::inlineDataProgrammingRequired(scheduler);
auto kernelUsesLocalIds = HardwareCommandsHelper<GfxFamily>::kernelUsesLocalIds(scheduler);
HardwareCommandsHelper<GfxFamily>::sendIndirectState(
commandStream,
*dsh,
*ioh,
*ssh,
scheduler,
scheduler.getKernelStartOffset(true, kernelUsesLocalIds, isCcsUsed),
simd,
localWorkSizes,
offsetInterfaceDescriptorTable,
interfaceDescriptorIndex,
preemptionMode,
&cmdWalker,
nullptr,
true,
devQueueHw.getDevice());
// Implement enabling special WA DisableLSQCROPERFforOCL if needed
GpgpuWalkerHelper<GfxFamily>::applyWADisableLSQCROPERFforOCL(&commandStream, scheduler, true);
size_t globalOffsets[3] = {0, 0, 0};
size_t workGroups[3] = {(scheduler.getGws() / scheduler.getLws()), 1, 1};
GpgpuWalkerHelper<GfxFamily>::setGpgpuWalkerThreadData(&cmdWalker, kernelInfo.kernelDescriptor, globalOffsets, globalOffsets, workGroups, localWorkSizes,
simd, 1, true, inlineDataProgrammingRequired, 0u);
*pGpGpuWalkerCmd = cmdWalker;
// Implement disabling special WA DisableLSQCROPERFforOCL if needed
GpgpuWalkerHelper<GfxFamily>::applyWADisableLSQCROPERFforOCL(&commandStream, scheduler, false);
// Do not put BB_START only when returning in first Scheduler run
if (devQueueHw.getSchedulerReturnInstance() != 1) {
args.dcFlushEnable = MemorySynchronizationCommands<GfxFamily>::getDcFlushEnable(true, hwInfo);
MemorySynchronizationCommands<GfxFamily>::addPipeControl(commandStream, args);
// Add BB Start Cmd to the SLB in the Primary Batch Buffer
auto bbStart = commandStream.getSpaceForCmd<MI_BATCH_BUFFER_START>();
MI_BATCH_BUFFER_START cmdBbStart = GfxFamily::cmdInitBatchBufferStart;
cmdBbStart.setSecondLevelBatchBuffer(MI_BATCH_BUFFER_START::SECOND_LEVEL_BATCH_BUFFER_FIRST_LEVEL_BATCH);
uint64_t slbAddress = devQueueHw.getSlbBuffer()->getGpuAddress();
cmdBbStart.setBatchBufferStartAddress(slbAddress);
*bbStart = cmdBbStart;
}
}
} // namespace NEO

View File

@ -1,5 +1,5 @@
/*
* Copyright (C) 2018-2021 Intel Corporation
* Copyright (C) 2018-2022 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@ -27,7 +27,6 @@
#include "opencl/source/helpers/surface_formats.h"
#include "opencl/source/mem_obj/image.h"
#include "opencl/source/platform/platform.h"
#include "opencl/source/scheduler/scheduler_kernel.h"
#include "opencl/source/sharings/sharing.h"
#include "opencl/source/sharings/sharing_factory.h"
@ -44,7 +43,6 @@ Context::Context(
contextCallback = funcNotify;
userData = data;
sharingFunctions.resize(SharingType::MAX_SHARING_VALUE);
schedulerBuiltIn = std::make_unique<BuiltInKernel>();
}
Context::~Context() {
@ -69,10 +67,6 @@ Context::~Context() {
for (auto &device : devices) {
device->decRefInternal();
}
delete static_cast<SchedulerKernel *>(schedulerBuiltIn->pKernel);
delete schedulerBuiltIn->pProgram;
schedulerBuiltIn->pKernel = nullptr;
schedulerBuiltIn->pProgram = nullptr;
}
cl_int Context::setDestructorCallback(void(CL_CALLBACK *funcNotify)(cl_context, void *),
@ -414,48 +408,6 @@ cl_int Context::getSupportedImageFormats(
return CL_SUCCESS;
}
SchedulerKernel &Context::getSchedulerKernel() {
if (schedulerBuiltIn->pKernel) {
return *static_cast<SchedulerKernel *>(schedulerBuiltIn->pKernel);
}
auto initializeSchedulerProgramAndKernel = [&] {
cl_int retVal = CL_SUCCESS;
auto clDevice = getDevice(0);
auto src = SchedulerKernel::loadSchedulerKernel(&clDevice->getDevice());
auto program = Program::createBuiltInFromGenBinary(this,
devices,
src.resource.data(),
src.resource.size(),
&retVal);
DEBUG_BREAK_IF(retVal != CL_SUCCESS);
DEBUG_BREAK_IF(!program);
retVal = program->processGenBinary(*clDevice);
DEBUG_BREAK_IF(retVal != CL_SUCCESS);
schedulerBuiltIn->pProgram = program;
auto kernelInfo = schedulerBuiltIn->pProgram->getKernelInfo(SchedulerKernel::schedulerName, clDevice->getRootDeviceIndex());
DEBUG_BREAK_IF(!kernelInfo);
schedulerBuiltIn->pKernel = Kernel::create<SchedulerKernel>(
schedulerBuiltIn->pProgram,
*kernelInfo,
*clDevice,
&retVal);
UNRECOVERABLE_IF(schedulerBuiltIn->pKernel->getScratchSize() != 0);
DEBUG_BREAK_IF(retVal != CL_SUCCESS);
};
std::call_once(schedulerBuiltIn->programIsInitialized, initializeSchedulerProgramAndKernel);
UNRECOVERABLE_IF(schedulerBuiltIn->pKernel == nullptr);
return *static_cast<SchedulerKernel *>(schedulerBuiltIn->pKernel);
}
bool Context::isDeviceAssociated(const ClDevice &clDevice) const {
for (const auto &pDevice : devices) {
if (pDevice == &clDevice) {

View File

@ -1,5 +1,5 @@
/*
* Copyright (C) 2018-2021 Intel Corporation
* Copyright (C) 2018-2022 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@ -35,7 +35,6 @@ class MemObj;
class MemoryManager;
class SharingFunctions;
class SVMAllocsManager;
class SchedulerKernel;
class Program;
class Platform;
@ -167,8 +166,6 @@ class Context : public BaseObject<_cl_context> {
ContextType peekContextType() const { return contextType; }
MOCKABLE_VIRTUAL SchedulerKernel &getSchedulerKernel();
bool isDeviceAssociated(const ClDevice &clDevice) const;
ClDevice *getSubDeviceByIndex(uint32_t subDeviceIndex) const;
@ -212,7 +209,6 @@ class Context : public BaseObject<_cl_context> {
std::vector<std::unique_ptr<SharingFunctions>> sharingFunctions;
ClDeviceVector devices;
ContextDestructorCallbacks destructorCallbacks;
std::unique_ptr<BuiltInKernel> schedulerBuiltIn;
const cl_context_properties *properties = nullptr;
size_t numProperties = 0u;

View File

@ -22,7 +22,6 @@ class Device;
class Kernel;
class Event;
struct MultiDispatchInfo;
class SchedulerKernel;
class HwTimeStamps;
class TagNodeBase;

View File

@ -1,5 +1,5 @@
/*
* Copyright (C) 2019-2021 Intel Corporation
* Copyright (C) 2019-2022 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@ -172,30 +172,6 @@ IndirectHeap *DeviceQueueHw<GfxFamily>::getIndirectHeap(IndirectHeap::Type type)
return heaps[type];
}
template <typename GfxFamily>
size_t DeviceQueueHw<GfxFamily>::setSchedulerCrossThreadData(SchedulerKernel &scheduler) {
using INTERFACE_DESCRIPTOR_DATA = typename GfxFamily::INTERFACE_DESCRIPTOR_DATA;
size_t offset = dshBuffer->getUnderlyingBufferSize() - scheduler.getCurbeSize() - 4096; // Page size padding
auto igilCmdQueue = reinterpret_cast<IGIL_CommandQueue *>(queueBuffer->getUnderlyingBuffer());
igilCmdQueue->m_controls.m_SchedulerDSHOffset = (uint32_t)offset;
igilCmdQueue->m_controls.m_SchedulerConstantBufferSize = (uint32_t)scheduler.getCurbeSize();
return offset;
}
template <typename GfxFamily>
void DeviceQueueHw<GfxFamily>::dispatchScheduler(LinearStream &commandStream, SchedulerKernel &scheduler, PreemptionMode preemptionMode, IndirectHeap *ssh, IndirectHeap *dsh, bool isCcsUsed) {
GpgpuWalkerHelper<GfxFamily>::dispatchScheduler(commandStream,
*this,
preemptionMode,
scheduler,
ssh,
dsh,
isCcsUsed);
return;
}
template <typename GfxFamily>
size_t DeviceQueueHw<GfxFamily>::getCSPrefetchSize() {
return 512;

View File

@ -23,7 +23,6 @@
#include "opencl/source/helpers/dispatch_info.h"
#include "opencl/source/kernel/kernel.h"
#include "opencl/source/program/block_kernel_manager.h"
#include "opencl/source/scheduler/scheduler_kernel.h"
#include <cstring>
@ -127,10 +126,6 @@ size_t HardwareCommandsHelper<GfxFamily>::getSshSizeForExecutionModel(const Kern
maxBindingTableCount = std::max(maxBindingTableCount, static_cast<uint32_t>(pBlockInfo->kernelDescriptor.payloadMappings.bindingTable.numEntries));
}
SchedulerKernel &scheduler = kernel.getContext().getSchedulerKernel();
totalSize += getSizeRequiredSSH(scheduler);
totalSize += maxBindingTableCount * sizeof(BINDING_TABLE_STATE) * DeviceQueue::interfaceDescriptorEntries;
totalSize = alignUp(totalSize, BINDING_TABLE_STATE::SURFACESTATEPOINTER_ALIGN_SIZE);

View File

@ -1,18 +0,0 @@
#
# Copyright (C) 2018-2021 Intel Corporation
#
# SPDX-License-Identifier: MIT
#
set(RUNTIME_SRCS_SCHEDULER
${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt
${CMAKE_CURRENT_SOURCE_DIR}/scheduler.cl
${CMAKE_CURRENT_SOURCE_DIR}/scheduler_kernel.cpp
${CMAKE_CURRENT_SOURCE_DIR}/scheduler_kernel.h
)
target_sources(${NEO_STATIC_LIB_NAME} PRIVATE ${RUNTIME_SRCS_SCHEDULER})
set_property(GLOBAL PROPERTY RUNTIME_SRCS_SCHEDULER ${RUNTIME_SRCS_SCHEDULER})
if(NOT (TARGET ${SCHEDULER_BINARY_LIB_NAME}))
include(scheduler_binary.cmake)
endif()

File diff suppressed because it is too large Load Diff

View File

@ -1,98 +0,0 @@
#
# Copyright (C) 2018-2022 Intel Corporation
#
# SPDX-License-Identifier: MIT
#
add_custom_target(scheduler)
set(OPENCL_SCHEDULER_PROJECTS_FOLDER "scheduler")
set(SCHEDULER_OUTDIR_WITH_ARCH "${TargetDir}/scheduler/${NEO_ARCH}")
set_target_properties(scheduler PROPERTIES FOLDER "${OPENCL_RUNTIME_PROJECTS_FOLDER}/${OPENCL_SCHEDULER_PROJECTS_FOLDER}")
set(SCHEDULER_KERNEL scheduler.cl)
if(DEFINED NEO__IGC_INCLUDE_DIR)
list(APPEND __ocloc__options__ "-I$<JOIN:${NEO__IGC_INCLUDE_DIR}, -I>")
endif()
if("${CMAKE_BUILD_TYPE}" STREQUAL "Debug")
list(APPEND __ocloc__options__ "-D DEBUG")
endif()
set(SCHEDULER_INCLUDE_DIR ${TargetDir})
function(compile_kernel target core_type platform_type kernel)
get_family_name_with_type(${core_type} ${platform_type})
string(TOLOWER ${core_type} core_type_lower)
# get filename
set(OUTPUTDIR "${SCHEDULER_OUTDIR_WITH_ARCH}/${core_type_lower}")
list(APPEND __ocloc__options__ "-I ../${core_type_lower}")
get_filename_component(BASENAME ${kernel} NAME_WE)
set(OUTPUTPATH "${OUTPUTDIR}/${BASENAME}_${family_name_with_type}.bin")
set(SCHEDULER_CPP "${OUTPUTDIR}/${BASENAME}_${family_name_with_type}.cpp")
list(APPEND __ocloc__options__ "-cl-kernel-arg-info")
list(APPEND __ocloc__options__ "-cl-std=CL2.0")
list(APPEND __ocloc__options__ "-cl-intel-disable-a64WA")
if(NOT NEO_DISABLE_BUILTINS_COMPILATION)
add_custom_command(
OUTPUT ${OUTPUTPATH}
COMMAND ${ocloc_cmd_prefix} -q -file ${kernel} -device ${DEFAULT_SUPPORTED_${core_type}_${platform_type}_PLATFORM} -cl-intel-greater-than-4GB-buffer-required -${NEO_BITS} -out_dir ${OUTPUTDIR} -cpp_file -options "$<JOIN:${__ocloc__options__}, >" -internal_options "-cl-intel-no-spill"
WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}
DEPENDS ${kernel} ocloc copy_compiler_files
)
set(SCHEDULER_CPP ${SCHEDULER_CPP} PARENT_SCOPE)
add_custom_target(${target} DEPENDS ${OUTPUTPATH})
set_target_properties(${target} PROPERTIES FOLDER "${OPENCL_RUNTIME_PROJECTS_FOLDER}/${OPENCL_SCHEDULER_PROJECTS_FOLDER}/${core_type_lower}")
else()
set(_file_prebuilt "${NEO_SOURCE_DIR}/../neo_test_kernels/scheduler/${NEO_ARCH}/${core_type_lower}/${BASENAME}_${family_name_with_type}.bin")
if(EXISTS ${_file_prebuilt})
add_custom_command(
OUTPUT ${OUTPUTPATH}
COMMAND ${CMAKE_COMMAND} -E make_directory ${OUTPUTDIR}
COMMAND ${CMAKE_COMMAND} -E copy_if_different ${_file_prebuilt} ${OUTPUTDIR}
)
add_custom_target(${target} DEPENDS ${OUTPUTPATH})
set_target_properties(${target} PROPERTIES FOLDER "${OPENCL_RUNTIME_PROJECTS_FOLDER}/${OPENCL_SCHEDULER_PROJECTS_FOLDER}/${core_type_lower}")
endif()
set(_file_prebuilt "${NEO_SOURCE_DIR}/../neo_test_kernels/scheduler/${NEO_ARCH}/${core_type_lower}/${BASENAME}_${family_name_with_type}.cpp")
if(EXISTS ${_file_prebuilt})
add_custom_command(
OUTPUT ${SCHEDULER_CPP}
COMMAND ${CMAKE_COMMAND} -E make_directory ${OUTPUTDIR}
COMMAND ${CMAKE_COMMAND} -E copy_if_different ${_file_prebuilt} ${OUTPUTDIR}
)
set(SCHEDULER_CPP ${SCHEDULER_CPP} PARENT_SCOPE)
endif()
endif()
endfunction()
apply_macro_for_each_core_type("SUPPORTED")
add_library(${SCHEDULER_BINARY_LIB_NAME} OBJECT EXCLUDE_FROM_ALL CMakeLists.txt)
if(COMPILE_BUILT_INS)
if(NOT "${GENERATED_SCHEDULER_CPPS}" STREQUAL "")
target_sources(${SCHEDULER_BINARY_LIB_NAME} PUBLIC ${GENERATED_SCHEDULER_CPPS})
set_source_files_properties(${GENERATED_SCHEDULER_CPPS} PROPERTIES GENERATED TRUE)
endif()
foreach(SCHEDULER_TARGET ${SCHEDULER_TARGETS})
add_dependencies(${SCHEDULER_BINARY_LIB_NAME} ${SCHEDULER_TARGET})
endforeach()
endif()
set_target_properties(${SCHEDULER_BINARY_LIB_NAME} PROPERTIES LINKER_LANGUAGE CXX)
set_target_properties(${SCHEDULER_BINARY_LIB_NAME} PROPERTIES POSITION_INDEPENDENT_CODE ON)
set_target_properties(${SCHEDULER_BINARY_LIB_NAME} PROPERTIES FOLDER "${OPENCL_RUNTIME_PROJECTS_FOLDER}/${OPENCL_SCHEDULER_PROJECTS_FOLDER}")
add_dependencies(${SCHEDULER_BINARY_LIB_NAME} scheduler)
target_include_directories(${SCHEDULER_BINARY_LIB_NAME} PRIVATE
${ENGINE_NODE_DIR}
${KHRONOS_HEADERS_DIR}
${NEO__GMM_INCLUDE_DIR}
${NEO__IGC_INCLUDE_DIR}
${THIRD_PARTY_DIR}
)

View File

@ -1,78 +0,0 @@
/*
* Copyright (C) 2018-2021 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#include "opencl/source/scheduler/scheduler_kernel.h"
#include "shared/source/device/device.h"
#include "shared/source/helpers/hw_helper.h"
#include "opencl/source/cl_device/cl_device.h"
#include <cinttypes>
namespace NEO {
void SchedulerKernel::setArgs(GraphicsAllocation *queue,
GraphicsAllocation *commandsStack,
GraphicsAllocation *eventsPool,
GraphicsAllocation *secondaryBatchBuffer,
GraphicsAllocation *dsh,
GraphicsAllocation *reflectionSurface,
GraphicsAllocation *queueStorageBuffer,
GraphicsAllocation *ssh,
GraphicsAllocation *debugQueue) {
setArgSvmAlloc(0, reinterpret_cast<void *>(queue->getGpuAddress()), queue);
setArgSvmAlloc(1, reinterpret_cast<void *>(commandsStack->getGpuAddress()), commandsStack);
setArgSvmAlloc(2, reinterpret_cast<void *>(eventsPool->getGpuAddress()), eventsPool);
setArgSvmAlloc(3, reinterpret_cast<void *>(secondaryBatchBuffer->getGpuAddress()), secondaryBatchBuffer);
setArgSvmAlloc(4, reinterpret_cast<void *>(dsh->getGpuAddress()), dsh);
setArgSvmAlloc(5, reinterpret_cast<void *>(reflectionSurface->getGpuAddress()), reflectionSurface);
setArgSvmAlloc(6, reinterpret_cast<void *>(queueStorageBuffer->getGpuAddress()), queueStorageBuffer);
setArgSvmAlloc(7, reinterpret_cast<void *>(ssh->getGpuAddress()), ssh);
if (debugQueue)
setArgSvmAlloc(8, reinterpret_cast<void *>(debugQueue->getGpuAddress()), debugQueue);
DBG_LOG(PrintEMDebugInformation,
"Scheduler Surfaces: \nqueue=", queue->getUnderlyingBuffer(), " \nstack=", commandsStack->getUnderlyingBuffer(),
" \nevents=", eventsPool->getUnderlyingBuffer(), " \nslb=", secondaryBatchBuffer->getUnderlyingBuffer(), "\ndsh=", dsh->getUnderlyingBuffer(),
" \nkrs=", reflectionSurface->getUnderlyingBuffer(), " \nstorage=", queueStorageBuffer->getUnderlyingBuffer(), "\nssh=", ssh->getUnderlyingBuffer());
}
void SchedulerKernel::computeGws() {
auto pClDevice = program->getDevices()[0];
auto &devInfo = pClDevice->getDeviceInfo();
auto &hwInfo = pClDevice->getHardwareInfo();
auto &helper = HwHelper::get(hwInfo.platform.eRenderCoreFamily);
size_t hWThreadsPerSubSlice = devInfo.maxComputUnits / hwInfo.gtSystemInfo.SubSliceCount;
size_t wkgsPerSubSlice = hWThreadsPerSubSlice / PARALLEL_SCHEDULER_HWTHREADS_IN_HW_GROUP20;
wkgsPerSubSlice = std::min(wkgsPerSubSlice, helper.getMaxBarrierRegisterPerSlice());
gws = wkgsPerSubSlice * hwInfo.gtSystemInfo.SubSliceCount * PARALLEL_SCHEDULER_HWTHREADS_IN_HW_GROUP20 * PARALLEL_SCHEDULER_COMPILATION_SIZE_20;
if (pClDevice->isSimulation()) {
gws = PARALLEL_SCHEDULER_HWTHREADS_IN_HW_GROUP20 * PARALLEL_SCHEDULER_COMPILATION_SIZE_20;
}
if (DebugManager.flags.SchedulerGWS.get() != 0) {
DEBUG_BREAK_IF(DebugManager.flags.SchedulerGWS.get() % (PARALLEL_SCHEDULER_HWTHREADS_IN_HW_GROUP20 * PARALLEL_SCHEDULER_COMPILATION_SIZE_20) != 0);
gws = DebugManager.flags.SchedulerGWS.get();
}
DBG_LOG(PrintEMDebugInformation, "Scheduler GWS: ", gws);
PRINT_DEBUG_STRING(DebugManager.flags.PrintDebugMessages.get(), stderr, "Scheduler GWS: %" PRIu64, static_cast<uint64_t>(gws));
}
BuiltinCode SchedulerKernel::loadSchedulerKernel(Device *device) {
std::string schedulerResourceName = getFamilyNameWithType(device->getHardwareInfo()) + "_0_scheduler.builtin_kernel.bin";
BuiltinCode ret;
auto storage = std::make_unique<EmbeddedStorage>("");
ret.resource = storage.get()->load(schedulerResourceName);
ret.type = BuiltinCode::ECodeType::Binary;
ret.targetDevice = device;
return ret;
}
} // namespace NEO

View File

@ -1,67 +0,0 @@
/*
* Copyright (C) 2018-2022 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#pragma once
#include "shared/source/built_ins/built_ins.h"
#include "opencl/source/kernel/kernel.h"
#include <algorithm>
namespace NEO {
class SchedulerKernel : public Kernel {
public:
static constexpr const char *schedulerName = "SchedulerParallel20";
friend Kernel;
~SchedulerKernel() override = default;
size_t getLws() {
return PARALLEL_SCHEDULER_HWTHREADS_IN_HW_GROUP20 * PARALLEL_SCHEDULER_COMPILATION_SIZE_20;
}
size_t getGws() {
return gws;
}
void setGws(size_t newGws) {
gws = newGws;
}
size_t getCurbeSize() {
size_t crossThreadDataSize = kernelInfo.kernelDescriptor.kernelAttributes.crossThreadDataSize;
size_t dshSize = kernelInfo.heapInfo.DynamicStateHeapSize;
crossThreadDataSize = alignUp(crossThreadDataSize, 64);
dshSize = alignUp(dshSize, 64);
return alignUp(SCHEDULER_DYNAMIC_PAYLOAD_SIZE, 64) + crossThreadDataSize + dshSize;
}
void setArgs(GraphicsAllocation *queue,
GraphicsAllocation *commandsStack,
GraphicsAllocation *eventsPool,
GraphicsAllocation *secondaryBatchBuffer,
GraphicsAllocation *dsh,
GraphicsAllocation *reflectionSurface,
GraphicsAllocation *queueStorageBuffer,
GraphicsAllocation *ssh,
GraphicsAllocation *debugQueue = nullptr);
static BuiltinCode loadSchedulerKernel(Device *device);
protected:
SchedulerKernel(Program *programArg, const KernelInfo &kernelInfoArg, ClDevice &clDeviceArg) : Kernel(programArg, kernelInfoArg, clDeviceArg) {
computeGws();
};
void computeGws();
size_t gws = 0u;
};
} // namespace NEO

View File

@ -1,5 +1,5 @@
#
# Copyright (C) 2018-2021 Intel Corporation
# Copyright (C) 2018-2022 Intel Corporation
#
# SPDX-License-Identifier: MIT
#
@ -22,7 +22,6 @@ ADD_SUPPORTED_TEST_PRODUCT_FAMILIES_DEFINITION()
link_libraries(${ASAN_LIBS} ${TSAN_LIBS})
add_custom_target(prepare_test_kernels_for_ocl)
add_dependencies(prepare_test_kernels_for_ocl ${SCHEDULER_BINARY_LIB_NAME})
add_dependencies(prepare_test_kernels_for_ocl ${BUILTINS_BINARIES_BINDFUL_LIB_NAME})
add_custom_target(copy_test_files_per_product)
add_custom_target(run_unit_tests ALL)
@ -63,11 +62,6 @@ set(NEO_IGDRCL_TESTS__TARGET_OBJECTS
$<TARGET_OBJECTS:neo_shared_mocks>
$<TARGET_OBJECTS:neo_unit_tests_config>
)
if(TARGET ${SCHEDULER_BINARY_LIB_NAME})
list(APPEND NEO_IGDRCL_TESTS__TARGET_OBJECTS
$<TARGET_OBJECTS:${SCHEDULER_BINARY_LIB_NAME}>
)
endif()
add_executable(igdrcl_tests
${NEO_IGDRCL_TESTS__TARGET_OBJECTS}

View File

@ -1,5 +1,5 @@
#
# Copyright (C) 2018-2021 Intel Corporation
# Copyright (C) 2018-2022 Intel Corporation
#
# SPDX-License-Identifier: MIT
#
@ -20,11 +20,6 @@ list(APPEND IGDRCL_AUB_TESTS__TARGET_OBJECTS
$<TARGET_OBJECTS:neo_shared_mocks>
$<TARGET_OBJECTS:neo_aub_tests_config>
)
if(TARGET ${SCHEDULER_BINARY_LIB_NAME})
list(APPEND IGDRCL_AUB_TESTS__TARGET_OBJECTS
$<TARGET_OBJECTS:${SCHEDULER_BINARY_LIB_NAME}>
)
endif()
if(DEFINED AUB_STREAM_PROJECT_NAME)
list(APPEND IGDRCL_AUB_TESTS__TARGET_OBJECTS $<TARGET_OBJECTS:${AUB_STREAM_PROJECT_NAME}_all_hw>)

View File

@ -1,5 +1,5 @@
#
# Copyright (C) 2018-2021 Intel Corporation
# Copyright (C) 2018-2022 Intel Corporation
#
# SPDX-License-Identifier: MIT
#
@ -8,7 +8,6 @@ set(IGDRCL_SRCS_tests_built_in
${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt
${CMAKE_CURRENT_SOURCE_DIR}/built_ins_file_names.h
${CMAKE_CURRENT_SOURCE_DIR}/built_ins_file_names.cpp
${CMAKE_CURRENT_SOURCE_DIR}/built_in_kernels_tests.cpp
${CMAKE_CURRENT_SOURCE_DIR}/built_in_tests.cpp
${CMAKE_CURRENT_SOURCE_DIR}/built_in_tests_ocl.cpp
${CMAKE_CURRENT_SOURCE_DIR}${BRANCH_DIR_SUFFIX}get_built_ins_file_names.cpp

View File

@ -1,121 +0,0 @@
/*
* Copyright (C) 2018-2021 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#include "opencl/source/builtin_kernels_simulation/opencl_c.h"
#include "gtest/gtest.h"
namespace BuiltinKernelsSimulation {
__kernel void CopyImage3dToBuffer16Bytes(__read_only image3d_t input,
__global uchar *dst,
int4 srcOffset,
int dstOffset,
uint2 pitch) {
const int x = get_global_id(0);
const int y = get_global_id(1);
const int z = get_global_id(2);
int4 srcCoord = {x, y, z, 0};
srcCoord = srcCoord + srcOffset;
uint DstOffset = dstOffset + (y * pitch.x) + (z * pitch.y);
const uint4 c = read_imageui(input, srcCoord);
if ((ulong)(dst + dstOffset) & 0x0000000f) {
*((__global uchar *)(dst + DstOffset + x * 16 + 3)) = convert_uchar_sat((c.x >> 24) & 0xff);
*((__global uchar *)(dst + DstOffset + x * 16 + 2)) = convert_uchar_sat((c.x >> 16) & 0xff);
*((__global uchar *)(dst + DstOffset + x * 16 + 1)) = convert_uchar_sat((c.x >> 8) & 0xff);
*((__global uchar *)(dst + DstOffset + x * 16)) = convert_uchar_sat(c.x & 0xff);
*((__global uchar *)(dst + DstOffset + x * 16 + 7)) = convert_uchar_sat((c.y >> 24) & 0xff);
*((__global uchar *)(dst + DstOffset + x * 16 + 6)) = convert_uchar_sat((c.y >> 16) & 0xff);
*((__global uchar *)(dst + DstOffset + x * 16 + 5)) = convert_uchar_sat((c.y >> 8) & 0xff);
*((__global uchar *)(dst + DstOffset + x * 16 + 4)) = convert_uchar_sat(c.y & 0xff);
*((__global uchar *)(dst + DstOffset + x * 16 + 11)) = convert_uchar_sat((c.z >> 24) & 0xff);
*((__global uchar *)(dst + DstOffset + x * 16 + 10)) = convert_uchar_sat((c.z >> 16) & 0xff);
*((__global uchar *)(dst + DstOffset + x * 16 + 9)) = convert_uchar_sat((c.z >> 8) & 0xff);
*((__global uchar *)(dst + DstOffset + x * 16 + 8)) = convert_uchar_sat(c.z & 0xff);
*((__global uchar *)(dst + DstOffset + x * 16 + 15)) = convert_uchar_sat((c.w >> 24) & 0xff);
*((__global uchar *)(dst + DstOffset + x * 16 + 14)) = convert_uchar_sat((c.w >> 16) & 0xff);
*((__global uchar *)(dst + DstOffset + x * 16 + 13)) = convert_uchar_sat((c.w >> 8) & 0xff);
*((__global uchar *)(dst + DstOffset + x * 16 + 12)) = convert_uchar_sat(c.w & 0xff);
} else {
*(__global uint4 *)(dst + DstOffset + x * 16) = c;
}
}
TEST(BuiltInKernelTests, WhenBuiltInCopiesImageThenDataIsWrittenIntoCorrectMemory) {
uint width = 3;
uint height = 3;
uint depth = 3;
uint bytesPerChannel = 4;
uint channels = 4;
uint bpp = bytesPerChannel * channels;
globalID[0] = 0;
globalID[1] = 0;
globalID[2] = 0;
localID[0] = 0;
localID[1] = 0;
localID[2] = 0;
localSize[0] = width;
localSize[1] = height;
localSize[2] = depth;
size_t size = width * height * depth * bytesPerChannel * channels;
auto ptrSrc = std::make_unique<char[]>(64 + size + 64);
auto ptrDst = std::make_unique<char[]>(64 + size + 64);
auto ptrZero = std::make_unique<char[]>(64);
memset(ptrZero.get(), 0, 64);
memset(ptrDst.get(), 0, 64 + size + 64);
memset(ptrSrc.get(), 0, 64 + size + 64);
char *temp = ptrSrc.get() + 64;
for (uint i = 0; i < size; i++) {
temp[i] = i;
}
image im;
im.ptr = ptrSrc.get() + 64;
im.bytesPerChannel = bytesPerChannel;
im.channels = channels;
im.width = width;
im.height = height;
im.depth = depth;
uint2 Pitch(0, 0);
Pitch.x = width * bpp;
Pitch.y = width * height * bpp;
for (uint dimZ = 0; dimZ < depth; dimZ++) {
globalID[1] = 0;
for (uint dimY = 0; dimY < height; dimY++) {
globalID[0] = 0;
for (uint dimX = 0; dimX < width; dimX++) {
CopyImage3dToBuffer16Bytes(&im,
(uchar *)ptrDst.get() + 64,
{0, 0, 0, 0},
0,
Pitch);
globalID[0]++;
}
globalID[1]++;
}
globalID[2]++;
}
EXPECT_EQ(0, memcmp(im.ptr, ptrDst.get() + 64, size)) << "Data not copied properly!\n";
EXPECT_EQ(0, memcmp(ptrDst.get(), ptrZero.get(), 64)) << "Data written before passed ptr!\n";
EXPECT_EQ(0, memcmp(ptrDst.get() + size + 64, ptrZero.get(), 64)) << "Data written after passed ptr!\n";
}
} // namespace BuiltinKernelsSimulation

View File

@ -1029,29 +1029,6 @@ TEST_F(BuiltInTests, GivenUnknownBuiltInOpWhenGettingBuilderInfoThenExceptionThr
EXPECT_TRUE(caughtException);
}
HWCMDTEST_F(IGFX_GEN8_CORE, BuiltInTests, WhenGettingSchedulerKernelThenCorrectKernelReturned) {
REQUIRE_OCL_21_OR_SKIP(defaultHwInfo);
SchedulerKernel &schedulerKernel = pContext->getSchedulerKernel();
std::string name = SchedulerKernel::schedulerName;
EXPECT_EQ(name, schedulerKernel.getKernelInfo().kernelDescriptor.kernelMetadata.kernelName);
}
HWCMDTEST_F(IGFX_GEN8_CORE, BuiltInTests, WhenGetttingSchedulerKernelForSecondTimeThenReuseKernel) {
REQUIRE_OCL_21_OR_SKIP(defaultHwInfo);
SchedulerKernel &schedulerKernel = pContext->getSchedulerKernel();
Program *program = schedulerKernel.getProgram();
EXPECT_NE(nullptr, program);
SchedulerKernel &schedulerKernelSecond = pContext->getSchedulerKernel();
Program *program2 = schedulerKernelSecond.getProgram();
EXPECT_EQ(&schedulerKernel, &schedulerKernelSecond);
EXPECT_EQ(program, program2);
}
TEST_F(BuiltInTests, GivenUnsupportedBuildTypeWhenBuildingDispatchInfoThenFalseIsReturned) {
auto &builtIns = *pDevice->getBuiltIns();
BuiltinDispatchInfoBuilder dispatchInfoBuilder{builtIns, *pClDevice};

View File

@ -1,5 +1,5 @@
/*
* Copyright (C) 2020-2021 Intel Corporation
* Copyright (C) 2020-2022 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@ -17,7 +17,6 @@
#include "opencl/source/command_queue/enqueue_kernel.h"
#include "opencl/source/command_queue/enqueue_marker.h"
#include "opencl/source/helpers/dispatch_info.h"
#include "opencl/source/scheduler/scheduler_kernel.h"
#include "opencl/test/unit_test/fixtures/hello_world_fixture.h"
#include "opencl/test/unit_test/mocks/mock_command_queue.h"
#include "opencl/test/unit_test/mocks/mock_context.h"

View File

@ -1,5 +1,5 @@
/*
* Copyright (C) 2018-2021 Intel Corporation
* Copyright (C) 2018-2022 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@ -132,48 +132,6 @@ void MockContext::initializeWithDevices(const ClDeviceVector &devices, bool noSp
setupContextType();
}
SchedulerKernel &MockContext::getSchedulerKernel() {
if (schedulerBuiltIn->pKernel) {
return *static_cast<SchedulerKernel *>(schedulerBuiltIn->pKernel);
}
auto initializeSchedulerProgramAndKernel = [&] {
cl_int retVal = CL_SUCCESS;
auto clDevice = getDevice(0);
auto src = SchedulerKernel::loadSchedulerKernel(&clDevice->getDevice());
auto program = Program::createBuiltInFromGenBinary(this,
devices,
src.resource.data(),
src.resource.size(),
&retVal);
DEBUG_BREAK_IF(retVal != CL_SUCCESS);
DEBUG_BREAK_IF(!program);
retVal = program->processGenBinary(*clDevice);
DEBUG_BREAK_IF(retVal != CL_SUCCESS);
schedulerBuiltIn->pProgram = program;
auto kernelInfo = schedulerBuiltIn->pProgram->getKernelInfo(SchedulerKernel::schedulerName, clDevice->getRootDeviceIndex());
DEBUG_BREAK_IF(!kernelInfo);
schedulerBuiltIn->pKernel = Kernel::create<MockSchedulerKernel>(
schedulerBuiltIn->pProgram,
*kernelInfo,
*clDevice,
&retVal);
UNRECOVERABLE_IF(schedulerBuiltIn->pKernel->getScratchSize() != 0);
DEBUG_BREAK_IF(retVal != CL_SUCCESS);
};
std::call_once(schedulerBuiltIn->programIsInitialized, initializeSchedulerProgramAndKernel);
UNRECOVERABLE_IF(schedulerBuiltIn->pKernel == nullptr);
return *static_cast<SchedulerKernel *>(schedulerBuiltIn->pKernel);
}
MockDefaultContext::MockDefaultContext() : MockDefaultContext(false) {}
MockDefaultContext::MockDefaultContext(bool initSpecialQueues) : MockContext(nullptr, nullptr) {

View File

@ -1,5 +1,5 @@
/*
* Copyright (C) 2018-2021 Intel Corporation
* Copyright (C) 2018-2022 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@ -52,8 +52,6 @@ class MockContext : public Context {
std::unique_ptr<AsyncEventsHandler> &getAsyncEventsHandlerUniquePtr();
void initializeWithDevices(const ClDeviceVector &devices, bool noSpecialQueue);
SchedulerKernel &getSchedulerKernel() override;
private:
ClDevice *pDevice = nullptr;
};

View File

@ -19,7 +19,6 @@
#include "opencl/source/kernel/multi_device_kernel.h"
#include "opencl/source/platform/platform.h"
#include "opencl/source/program/block_kernel_manager.h"
#include "opencl/source/scheduler/scheduler_kernel.h"
#include "opencl/test/unit_test/mocks/mock_buffer.h"
#include "opencl/test/unit_test/mocks/mock_context.h"
#include "opencl/test/unit_test/mocks/mock_program.h"
@ -548,11 +547,6 @@ class MockParentKernel : public Kernel {
KernelInfo *mockKernelInfo = nullptr;
};
class MockSchedulerKernel : public SchedulerKernel {
public:
MockSchedulerKernel(Program *programArg, const KernelInfo &kernelInfoArg, ClDevice &clDeviceArg) : SchedulerKernel(programArg, kernelInfoArg, clDeviceArg){};
};
class MockDebugKernel : public MockKernel {
public:
MockDebugKernel(Program *program, const KernelInfo &kernelInfo, ClDevice &clDeviceArg) : MockKernel(program, kernelInfo, clDeviceArg) {

View File

@ -816,6 +816,7 @@ TEST_F(OfflineCompilerTests, WhenParsingBinToCharArrayThenCorrectResult) {
delete pOfflineCompiler;
}
TEST_F(OfflineCompilerTests, GivenCppFileWhenBuildingThenBuildSucceeds) {
std::vector<std::string> argv = {
"ocloc",

View File

@ -1,11 +0,0 @@
#
# Copyright (C) 2018-2022 Intel Corporation
#
# SPDX-License-Identifier: MIT
#
set(IGDRCL_SRCS_tests_scheduler
${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt
${CMAKE_CURRENT_SOURCE_DIR}/scheduler_kernel_tests.cpp
)
target_sources(igdrcl_tests PRIVATE ${IGDRCL_SRCS_tests_scheduler})

View File

@ -1,309 +0,0 @@
/*
* Copyright (C) 2018-2021 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#include "shared/test/common/helpers/debug_manager_state_restore.h"
#include "shared/test/common/mocks/mock_device.h"
#include "shared/test/common/mocks/mock_graphics_allocation.h"
#include "shared/test/common/mocks/mock_ostime.h"
#include "shared/test/common/test_macros/test.h"
#include "shared/test/unit_test/utilities/base_object_utils.h"
#include "opencl/source/scheduler/scheduler_kernel.h"
#include "opencl/test/unit_test/mocks/mock_cl_device.h"
#include "opencl/test/unit_test/mocks/mock_context.h"
#include "opencl/test/unit_test/mocks/mock_program.h"
#include "gtest/gtest.h"
#include <cstdint>
#include <memory>
using namespace NEO;
using namespace iOpenCL;
class MockSchedulerKernel : public SchedulerKernel {
public:
MockSchedulerKernel(Program *program, const KernelInfo &info, ClDevice &clDeviceArg) : SchedulerKernel(program, info, clDeviceArg) {
}
static MockSchedulerKernel *create(Program &program, KernelInfo *&info) {
info = new KernelInfo;
info->kernelDescriptor.kernelAttributes.crossThreadDataSize = 8;
info->kernelDescriptor.kernelAttributes.simdSize = 32;
info->kernelDescriptor.kernelAttributes.flags.usesDeviceSideEnqueue = false;
ArgDescriptor bufferArg;
auto &asPtr = bufferArg.as<ArgDescPointer>(true);
for (uint32_t i = 0; i < 9; i++) {
asPtr.stateless = 0;
asPtr.pointerSize = 0;
asPtr.bufferOffset = 0;
info->kernelDescriptor.payloadMappings.explicitArgs.push_back(std::move(bufferArg));
}
MockSchedulerKernel *mock = Kernel::create<MockSchedulerKernel>(&program, *info, *program.getDevices()[0], nullptr);
return mock;
}
};
TEST(SchedulerKernelTest, WhenSchedulerKernelIsCreatedThenLwsIs24) {
auto device = std::make_unique<MockClDevice>(MockDevice::createWithNewExecutionEnvironment<MockDevice>(nullptr));
MockProgram program(toClDeviceVector(*device));
KernelInfo info;
MockSchedulerKernel kernel(&program, info, *device);
size_t lws = kernel.getLws();
EXPECT_EQ((size_t)24u, lws);
}
TEST(SchedulerKernelTest, WhenSchedulerKernelIsCreatedThenGwsIs24) {
auto device = std::make_unique<MockClDevice>(MockDevice::createWithNewExecutionEnvironment<MockDevice>(nullptr));
MockProgram program(toClDeviceVector(*device));
KernelInfo info;
MockSchedulerKernel kernel(&program, info, *device);
const size_t hwThreads = 3;
const size_t simdSize = 8;
size_t maxGws = defaultHwInfo->gtSystemInfo.EUCount * hwThreads * simdSize;
size_t gws = kernel.getGws();
EXPECT_GE(maxGws, gws);
EXPECT_LT(hwThreads * simdSize, gws);
}
TEST(SchedulerKernelTest, WhenSettingGwsThenGetGwsReturnedSetValue) {
auto device = std::make_unique<MockClDevice>(MockDevice::createWithNewExecutionEnvironment<MockDevice>(nullptr));
MockProgram program(toClDeviceVector(*device));
KernelInfo info;
MockSchedulerKernel kernel(&program, info, *device);
kernel.setGws(24);
size_t gws = kernel.getGws();
EXPECT_EQ(24u, gws);
}
TEST(SchedulerKernelTest, WhenSchedulerKernelIsCreatedThenCurbeSizeIsCorrect) {
auto device = std::make_unique<MockClDevice>(MockDevice::createWithNewExecutionEnvironment<MockDevice>(nullptr));
MockProgram program(toClDeviceVector(*device));
KernelInfo info;
uint32_t crossThreadDataSize = 32;
uint32_t dshSize = 48;
info.kernelDescriptor.kernelAttributes.crossThreadDataSize = crossThreadDataSize;
info.heapInfo.DynamicStateHeapSize = dshSize;
MockSchedulerKernel kernel(&program, info, *device);
uint32_t expectedCurbeSize = alignUp(crossThreadDataSize, 64) + alignUp(dshSize, 64) + alignUp(SCHEDULER_DYNAMIC_PAYLOAD_SIZE, 64);
EXPECT_GE((size_t)expectedCurbeSize, kernel.getCurbeSize());
}
TEST(SchedulerKernelTest, WhenSettingArgsForSchedulerKernelThenAllocationsAreCorrect) {
auto device = std::make_unique<MockClDevice>(MockDevice::createWithNewExecutionEnvironment<MockDevice>(nullptr));
auto context = clUniquePtr(new MockContext(device.get()));
auto program = clUniquePtr(new MockProgram(context.get(), false, toClDeviceVector(*device)));
std::unique_ptr<KernelInfo> info(nullptr);
KernelInfo *infoPtr = nullptr;
std::unique_ptr<MockSchedulerKernel> scheduler = std::unique_ptr<MockSchedulerKernel>(MockSchedulerKernel::create(*program, infoPtr));
info.reset(infoPtr);
std::unique_ptr<MockGraphicsAllocation> allocs[9];
for (uint32_t i = 0; i < 9; i++) {
allocs[i] = std::unique_ptr<MockGraphicsAllocation>(new MockGraphicsAllocation((void *)0x1234, 10));
}
scheduler->setArgs(allocs[0].get(),
allocs[1].get(),
allocs[2].get(),
allocs[3].get(),
allocs[4].get(),
allocs[5].get(),
allocs[6].get(),
allocs[7].get(),
allocs[8].get());
for (uint32_t i = 0; i < 9; i++) {
EXPECT_EQ(allocs[i].get(), scheduler->getKernelArg(i));
}
}
TEST(SchedulerKernelTest, GivenNullDebugQueueWhenSettingArgsForSchedulerKernelThenAllocationsAreCorrect) {
auto device = std::make_unique<MockClDevice>(MockDevice::createWithNewExecutionEnvironment<MockDevice>(nullptr));
auto context = clUniquePtr(new MockContext(device.get()));
auto program = clUniquePtr(new MockProgram(context.get(), false, toClDeviceVector(*device)));
std::unique_ptr<KernelInfo> info(nullptr);
KernelInfo *infoPtr = nullptr;
std::unique_ptr<MockSchedulerKernel> scheduler = std::unique_ptr<MockSchedulerKernel>(MockSchedulerKernel::create(*program, infoPtr));
info.reset(infoPtr);
std::unique_ptr<MockGraphicsAllocation> allocs[9];
for (uint32_t i = 0; i < 9; i++) {
allocs[i] = std::unique_ptr<MockGraphicsAllocation>(new MockGraphicsAllocation((void *)0x1234, 10));
}
scheduler->setArgs(allocs[0].get(),
allocs[1].get(),
allocs[2].get(),
allocs[3].get(),
allocs[4].get(),
allocs[5].get(),
allocs[6].get(),
allocs[7].get());
for (uint32_t i = 0; i < 8; i++) {
EXPECT_EQ(allocs[i].get(), scheduler->getKernelArg(i));
}
EXPECT_EQ(nullptr, scheduler->getKernelArg(8));
}
TEST(SchedulerKernelTest, givenGraphicsAllocationWithDifferentCpuAndGpuAddressesWhenCallSetArgsThenGpuAddressesAreTaken) {
auto device = std::make_unique<MockClDevice>(MockDevice::createWithNewExecutionEnvironment<MockDevice>(nullptr));
auto context = clUniquePtr(new MockContext(device.get()));
auto program = clUniquePtr(new MockProgram(context.get(), false, toClDeviceVector(*device)));
std::unique_ptr<KernelInfo> info(nullptr);
KernelInfo *infoPtr = nullptr;
auto scheduler = std::unique_ptr<MockSchedulerKernel>(MockSchedulerKernel::create(*program, infoPtr));
info.reset(infoPtr);
std::unique_ptr<MockGraphicsAllocation> allocs[9];
for (uint32_t i = 0; i < 9; i++) {
allocs[i] = std::make_unique<MockGraphicsAllocation>(reinterpret_cast<void *>(0x1234), 0x4321, 10);
}
scheduler->setArgs(allocs[0].get(),
allocs[1].get(),
allocs[2].get(),
allocs[3].get(),
allocs[4].get(),
allocs[5].get(),
allocs[6].get(),
allocs[7].get(),
allocs[8].get());
for (uint32_t i = 0; i < 9; i++) {
auto argAddr = reinterpret_cast<uint64_t>(scheduler->getKernelArgInfo(i).value);
EXPECT_EQ(allocs[i]->getGpuAddress(), argAddr);
}
}
TEST(SchedulerKernelTest, GivenForceDispatchSchedulerWhenCreatingKernelReflectionThenKernelReflectSurfaceIsNotNull) {
DebugManagerStateRestore dbgRestorer;
DebugManager.flags.ForceDispatchScheduler.set(true);
auto device = std::make_unique<MockClDevice>(MockDevice::createWithNewExecutionEnvironment<MockDevice>(nullptr));
auto context = clUniquePtr(new MockContext(device.get()));
auto program = clUniquePtr(new MockProgram(context.get(), false, toClDeviceVector(*device)));
std::unique_ptr<KernelInfo> info(nullptr);
KernelInfo *infoPtr = nullptr;
std::unique_ptr<MockSchedulerKernel> scheduler = std::unique_ptr<MockSchedulerKernel>(MockSchedulerKernel::create(*program, infoPtr));
info.reset(infoPtr);
scheduler->createReflectionSurface();
EXPECT_NE(nullptr, scheduler->getKernelReflectionSurface());
}
TEST(SchedulerKernelTest, GivenForceDispatchSchedulerWhenCreatingKernelReflectionTwiceThenTheSameAllocationIsUsed) {
DebugManagerStateRestore dbgRestorer;
DebugManager.flags.ForceDispatchScheduler.set(true);
auto device = std::make_unique<MockClDevice>(MockDevice::createWithNewExecutionEnvironment<MockDevice>(nullptr));
auto context = clUniquePtr(new MockContext(device.get()));
auto program = clUniquePtr(new MockProgram(context.get(), false, toClDeviceVector(*device)));
std::unique_ptr<KernelInfo> info(nullptr);
KernelInfo *infoPtr = nullptr;
std::unique_ptr<MockSchedulerKernel> scheduler = std::unique_ptr<MockSchedulerKernel>(MockSchedulerKernel::create(*program, infoPtr));
info.reset(infoPtr);
scheduler->createReflectionSurface();
auto *allocation = scheduler->getKernelReflectionSurface();
scheduler->createReflectionSurface();
auto *allocation2 = scheduler->getKernelReflectionSurface();
EXPECT_EQ(allocation, allocation2);
}
TEST(SchedulerKernelTest, GivenNoForceDispatchSchedulerWhenCreatingKernelReflectionThenKernelReflectionSurfaceIsNotCreated) {
DebugManagerStateRestore dbgRestorer;
DebugManager.flags.ForceDispatchScheduler.set(false);
auto device = std::make_unique<MockClDevice>(MockDevice::createWithNewExecutionEnvironment<MockDevice>(nullptr));
auto context = clUniquePtr(new MockContext(device.get()));
auto program = clUniquePtr(new MockProgram(context.get(), false, toClDeviceVector(*device)));
std::unique_ptr<KernelInfo> info(nullptr);
KernelInfo *infoPtr = nullptr;
std::unique_ptr<MockSchedulerKernel> scheduler = std::unique_ptr<MockSchedulerKernel>(MockSchedulerKernel::create(*program, infoPtr));
info.reset(infoPtr);
scheduler->createReflectionSurface();
EXPECT_EQ(nullptr, scheduler->getKernelReflectionSurface());
}
TEST(SchedulerKernelTest, GivenNullKernelInfoWhenGettingCurbeSizeThenSizeIsCorrect) {
auto device = std::make_unique<MockClDevice>(MockDevice::createWithNewExecutionEnvironment<MockDevice>(nullptr));
MockProgram program(toClDeviceVector(*device));
KernelInfo info;
MockSchedulerKernel kernel(&program, info, *device);
uint32_t expectedCurbeSize = alignUp(SCHEDULER_DYNAMIC_PAYLOAD_SIZE, 64);
EXPECT_GE((size_t)expectedCurbeSize, kernel.getCurbeSize());
}
TEST(SchedulerKernelTest, givenForcedSchedulerGwsByDebugVariableWhenSchedulerKernelIsCreatedThenGwsIsSetToForcedValue) {
DebugManagerStateRestore dbgRestorer;
DebugManager.flags.SchedulerGWS.set(48);
auto device = std::make_unique<MockClDevice>(MockDevice::createWithNewExecutionEnvironment<MockDevice>(nullptr));
MockProgram program(toClDeviceVector(*device));
KernelInfo info;
MockSchedulerKernel kernel(&program, info, *device);
size_t gws = kernel.getGws();
EXPECT_EQ(static_cast<size_t>(48u), gws);
}
TEST(SchedulerKernelTest, givenSimulationModeWhenSchedulerKernelIsCreatedThenGwsIsSetToOneWorkgroup) {
HardwareInfo hwInfo = *defaultHwInfo;
hwInfo.featureTable.flags.ftrSimulationMode = true;
auto device = std::make_unique<MockClDevice>(MockDevice::createWithNewExecutionEnvironment<MockDevice>(&hwInfo));
MockProgram program(toClDeviceVector(*device));
KernelInfo info;
MockSchedulerKernel kernel(&program, info, *device);
size_t gws = kernel.getGws();
EXPECT_EQ(static_cast<size_t>(24u), gws);
}
TEST(SchedulerKernelTest, givenForcedSchedulerGwsByDebugVariableAndSimulationModeWhenSchedulerKernelIsCreatedThenGwsIsSetToForcedValue) {
DebugManagerStateRestore dbgRestorer;
DebugManager.flags.SchedulerGWS.set(48);
HardwareInfo hwInfo = *defaultHwInfo;
hwInfo.featureTable.flags.ftrSimulationMode = true;
auto device = std::make_unique<MockClDevice>(MockDevice::createWithNewExecutionEnvironment<MockDevice>(&hwInfo));
MockProgram program(toClDeviceVector(*device));
KernelInfo info;
MockSchedulerKernel kernel(&program, info, *device);
size_t gws = kernel.getGws();
EXPECT_EQ(static_cast<size_t>(48u), gws);
}