mirror of
https://github.com/intel/compute-runtime.git
synced 2025-09-15 13:01:45 +08:00
Initial commit
Change-Id: I4bf1707bd3dfeadf2c17b0a7daff372b1925ebbd
This commit is contained in:
74
runtime/builtin_kernels_simulation/CMakeLists.txt
Normal file
74
runtime/builtin_kernels_simulation/CMakeLists.txt
Normal file
@ -0,0 +1,74 @@
|
||||
# Copyright (c) 2017, Intel Corporation
|
||||
#
|
||||
# Permission is hereby granted, free of charge, to any person obtaining a
|
||||
# copy of this software and associated documentation files (the "Software"),
|
||||
# to deal in the Software without restriction, including without limitation
|
||||
# the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
# and/or sell copies of the Software, and to permit persons to whom the
|
||||
# Software is furnished to do so, subject to the following conditions:
|
||||
#
|
||||
# The above copyright notice and this permission notice shall be included
|
||||
# in all copies or substantial portions of the Software.
|
||||
#
|
||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
|
||||
# OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
# OTHER DEALINGS IN THE SOFTWARE.
|
||||
|
||||
set(BUILTIN_KERNELS_SIMULATION_SRCS
|
||||
"${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt"
|
||||
"${CMAKE_CURRENT_SOURCE_DIR}/opencl_c.cpp"
|
||||
"${CMAKE_CURRENT_SOURCE_DIR}/opencl_c.h"
|
||||
"${CMAKE_CURRENT_SOURCE_DIR}/scheduler_simulation.cpp"
|
||||
"${CMAKE_CURRENT_SOURCE_DIR}/scheduler_simulation.inl"
|
||||
"${CMAKE_CURRENT_SOURCE_DIR}/scheduler_simulation.h"
|
||||
)
|
||||
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}")
|
||||
|
||||
string(REPLACE "/WX" "" CMAKE_CXX_FLAGS ${CMAKE_CXX_FLAGS})
|
||||
string(REGEX REPLACE "-Werror[^ \t\n]*" "" CMAKE_CXX_FLAGS ${CMAKE_CXX_FLAGS})
|
||||
string(REPLACE "-Wsometimes-uninitialized" "" CMAKE_CXX_FLAGS ${CMAKE_CXX_FLAGS})
|
||||
string(REPLACE "-Wsign-compare" "" CMAKE_CXX_FLAGS ${CMAKE_CXX_FLAGS})
|
||||
string(REPLACE "-Wunused-variable" "" CMAKE_CXX_FLAGS ${CMAKE_CXX_FLAGS})
|
||||
|
||||
if(${CMAKE_CXX_COMPILER_ID} STREQUAL "Clang")
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-everything")
|
||||
endif()
|
||||
|
||||
if(COMPILER_SUPPORTS_CXX11)
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11")
|
||||
elseif(COMPILER_SUPPORTS_CXX0X)
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++0x")
|
||||
endif()
|
||||
|
||||
if(NOT MSVC)
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fpermissive -fPIC")
|
||||
endif()
|
||||
|
||||
ENABLE_WUD()
|
||||
|
||||
list (APPEND HEADER_INCLUDES ${IGDRCL_SOURCE_DIR}/runtime ${UMKM_SHAREDDATA_INCLUDE_PATHS})
|
||||
|
||||
|
||||
foreach(GEN_NUM RANGE ${MAX_GEN} 0 -1)
|
||||
GEN_CONTAINS_PLATFORMS("SUPPORTED" ${GEN_NUM} GENX_HAS_PLATFORMS)
|
||||
if(${GENX_HAS_PLATFORMS})
|
||||
list(APPEND DEFAULT_GEN_PLATFORMS_DEFITIONS DEFAULT_GEN${GEN_NUM}_PLATFORM=${DEFAULT_SUPPORTED_GEN${GEN_NUM}_PLATFORM})
|
||||
list (APPEND HEADER_INCLUDES ${IGDRCL_SOURCE_DIR}/runtime/gen${GEN_NUM})
|
||||
list (APPEND BUILTIN_KERNELS_SIMULATION_SRCS ${CMAKE_CURRENT_SOURCE_DIR}/gen${GEN_NUM}/scheduler_simulation.cpp)
|
||||
endif()
|
||||
endforeach()
|
||||
|
||||
add_library(${BIKSIM_LIB_NAME} OBJECT ${BUILTIN_KERNELS_SIMULATION_SRCS})
|
||||
target_include_directories(${BIKSIM_LIB_NAME} BEFORE PRIVATE ${HEADER_INCLUDES})
|
||||
target_include_directories(${BIKSIM_LIB_NAME} PRIVATE
|
||||
${KHRONOS_HEADERS_DIR}
|
||||
${IGDRCL__IGC_INCLUDE_DIR}
|
||||
${THIRD_PARTY_DIR}
|
||||
)
|
||||
set_target_properties(${BIKSIM_LIB_NAME} PROPERTIES FOLDER "built_ins")
|
||||
target_compile_definitions(${BIKSIM_LIB_NAME} PUBLIC ${SUPPORTED_GEN_FLAGS_DEFINITONS} ${DEFAULT_GEN_PLATFORMS_DEFITIONS})
|
105
runtime/builtin_kernels_simulation/gen8/scheduler_simulation.cpp
Normal file
105
runtime/builtin_kernels_simulation/gen8/scheduler_simulation.cpp
Normal file
@ -0,0 +1,105 @@
|
||||
/*
|
||||
* Copyright (c) 2017, Intel Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included
|
||||
* in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
|
||||
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "CL/cl.h"
|
||||
#include "runtime/builtin_kernels_simulation/opencl_c.h"
|
||||
#include "runtime/builtin_kernels_simulation/scheduler_simulation.h"
|
||||
#include "runtime/builtin_kernels_simulation/scheduler_simulation.inl"
|
||||
#include "runtime/memory_manager/graphics_allocation.h"
|
||||
#include "runtime/gen8/hw_cmds.h"
|
||||
#include "runtime/execution_model/device_enqueue.h"
|
||||
|
||||
using namespace OCLRT;
|
||||
using namespace BuiltinKernelsSimulation;
|
||||
|
||||
namespace Gen8SchedulerSimulation {
|
||||
|
||||
#define SCHEDULER_EMULATION
|
||||
|
||||
uint GetNextPowerof2(uint number);
|
||||
|
||||
float __intel__getProfilingTimerResolution() {
|
||||
return static_cast<float>(DEFAULT_GEN8_PLATFORM::hwInfo.capabilityTable.defaultProfilingTimerResolution);
|
||||
}
|
||||
|
||||
#include "runtime/gen8/device_enqueue.h"
|
||||
#include "runtime/gen8/scheduler_definitions.h"
|
||||
#include "runtime/gen8/scheduler_igdrcl_built_in.inl"
|
||||
#include "runtime/scheduler/scheduler.cl"
|
||||
}
|
||||
|
||||
namespace BuiltinKernelsSimulation {
|
||||
|
||||
template <>
|
||||
void SchedulerSimulation<BDWFamily>::startScheduler(uint32_t index,
|
||||
GraphicsAllocation *queue,
|
||||
GraphicsAllocation *commandsStack,
|
||||
GraphicsAllocation *eventsPool,
|
||||
GraphicsAllocation *secondaryBatchBuffer,
|
||||
GraphicsAllocation *dsh,
|
||||
GraphicsAllocation *reflectionSurface,
|
||||
GraphicsAllocation *queueStorageBuffer,
|
||||
GraphicsAllocation *ssh,
|
||||
GraphicsAllocation *debugQueue) {
|
||||
|
||||
threadIDToLocalIDmap.insert(std::make_pair(std::this_thread::get_id(), index));
|
||||
|
||||
while (!conditionReady) {
|
||||
}
|
||||
|
||||
Gen8SchedulerSimulation::SchedulerParallel20((IGIL_CommandQueue *)queue->getUnderlyingBuffer(),
|
||||
(uint *)commandsStack->getUnderlyingBuffer(),
|
||||
(IGIL_EventPool *)eventsPool->getUnderlyingBuffer(),
|
||||
(uint *)secondaryBatchBuffer->getUnderlyingBuffer(),
|
||||
(char *)dsh->getUnderlyingBuffer(),
|
||||
(IGIL_KernelDataHeader *)reflectionSurface->getUnderlyingBuffer(),
|
||||
(uint *)queueStorageBuffer->getUnderlyingBuffer(),
|
||||
(char *)ssh->getUnderlyingBuffer(),
|
||||
debugQueue != nullptr ? (DebugDataBuffer *)debugQueue->getUnderlyingBuffer() : nullptr);
|
||||
}
|
||||
|
||||
template <>
|
||||
void SchedulerSimulation<BDWFamily>::patchGpGpuWalker(uint secondLevelBatchOffset,
|
||||
__global uint *secondaryBatchBuffer,
|
||||
uint interfaceDescriptorOffset,
|
||||
uint simdSize,
|
||||
uint totalLocalWorkSize,
|
||||
uint3 dimSize,
|
||||
uint3 startPoint,
|
||||
uint numberOfHwThreadsPerWg,
|
||||
uint indirectPayloadSize,
|
||||
uint ioHoffset) {
|
||||
Gen8SchedulerSimulation::patchGpGpuWalker(secondLevelBatchOffset,
|
||||
secondaryBatchBuffer,
|
||||
interfaceDescriptorOffset,
|
||||
simdSize,
|
||||
totalLocalWorkSize,
|
||||
dimSize,
|
||||
startPoint,
|
||||
numberOfHwThreadsPerWg,
|
||||
indirectPayloadSize,
|
||||
ioHoffset);
|
||||
}
|
||||
|
||||
template class SchedulerSimulation<BDWFamily>;
|
||||
|
||||
} // namespace BuiltinKernelsSimulation
|
104
runtime/builtin_kernels_simulation/gen9/scheduler_simulation.cpp
Normal file
104
runtime/builtin_kernels_simulation/gen9/scheduler_simulation.cpp
Normal file
@ -0,0 +1,104 @@
|
||||
/*
|
||||
* Copyright (c) 2017, Intel Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included
|
||||
* in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
|
||||
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "CL/cl.h"
|
||||
#include "runtime/builtin_kernels_simulation/opencl_c.h"
|
||||
#include "runtime/builtin_kernels_simulation/scheduler_simulation.h"
|
||||
#include "runtime/builtin_kernels_simulation/scheduler_simulation.inl"
|
||||
#include "runtime/memory_manager/graphics_allocation.h"
|
||||
#include "runtime/gen9/hw_cmds.h"
|
||||
#include "runtime/execution_model/device_enqueue.h"
|
||||
|
||||
using namespace OCLRT;
|
||||
using namespace BuiltinKernelsSimulation;
|
||||
|
||||
namespace OCLRT {
|
||||
struct SKLFamily;
|
||||
}
|
||||
|
||||
namespace Gen9SchedulerSimulation {
|
||||
|
||||
#define SCHEDULER_EMULATION
|
||||
|
||||
float __intel__getProfilingTimerResolution() {
|
||||
return static_cast<float>(DEFAULT_GEN9_PLATFORM::hwInfo.capabilityTable.defaultProfilingTimerResolution);
|
||||
}
|
||||
|
||||
#include "runtime/gen9/device_enqueue.h"
|
||||
#include "runtime/gen9/scheduler_definitions.h"
|
||||
#include "runtime/gen9/scheduler_igdrcl_built_in.inl"
|
||||
#include "runtime/scheduler/scheduler.cl"
|
||||
}
|
||||
|
||||
namespace BuiltinKernelsSimulation {
|
||||
|
||||
template <>
|
||||
void SchedulerSimulation<SKLFamily>::startScheduler(uint32_t index,
|
||||
GraphicsAllocation *queue,
|
||||
GraphicsAllocation *commandsStack,
|
||||
GraphicsAllocation *eventsPool,
|
||||
GraphicsAllocation *secondaryBatchBuffer,
|
||||
GraphicsAllocation *dsh,
|
||||
GraphicsAllocation *reflectionSurface,
|
||||
GraphicsAllocation *queueStorageBuffer,
|
||||
GraphicsAllocation *ssh,
|
||||
GraphicsAllocation *debugQueue) {
|
||||
|
||||
threadIDToLocalIDmap.insert(std::make_pair(std::this_thread::get_id(), index));
|
||||
|
||||
while (!conditionReady) {
|
||||
}
|
||||
|
||||
Gen9SchedulerSimulation::SchedulerParallel20((IGIL_CommandQueue *)queue->getUnderlyingBuffer(),
|
||||
(uint *)commandsStack->getUnderlyingBuffer(),
|
||||
(IGIL_EventPool *)eventsPool->getUnderlyingBuffer(),
|
||||
(uint *)secondaryBatchBuffer->getUnderlyingBuffer(),
|
||||
(char *)dsh->getUnderlyingBuffer(),
|
||||
(IGIL_KernelDataHeader *)reflectionSurface->getUnderlyingBuffer(),
|
||||
(uint *)queueStorageBuffer->getUnderlyingBuffer(),
|
||||
(char *)ssh->getUnderlyingBuffer(),
|
||||
debugQueue != nullptr ? (DebugDataBuffer *)debugQueue->getUnderlyingBuffer() : nullptr);
|
||||
}
|
||||
template <>
|
||||
void SchedulerSimulation<SKLFamily>::patchGpGpuWalker(uint secondLevelBatchOffset,
|
||||
__global uint *secondaryBatchBuffer,
|
||||
uint interfaceDescriptorOffset,
|
||||
uint simdSize,
|
||||
uint totalLocalWorkSize,
|
||||
uint3 dimSize,
|
||||
uint3 startPoint,
|
||||
uint numberOfHwThreadsPerWg,
|
||||
uint indirectPayloadSize,
|
||||
uint ioHoffset) {
|
||||
Gen9SchedulerSimulation::patchGpGpuWalker(secondLevelBatchOffset,
|
||||
secondaryBatchBuffer,
|
||||
interfaceDescriptorOffset,
|
||||
simdSize,
|
||||
totalLocalWorkSize,
|
||||
dimSize,
|
||||
startPoint,
|
||||
numberOfHwThreadsPerWg,
|
||||
indirectPayloadSize,
|
||||
ioHoffset);
|
||||
}
|
||||
template class SchedulerSimulation<SKLFamily>;
|
||||
} // namespace BuiltinKernelsSimulation
|
152
runtime/builtin_kernels_simulation/opencl_c.cpp
Normal file
152
runtime/builtin_kernels_simulation/opencl_c.cpp
Normal file
@ -0,0 +1,152 @@
|
||||
/*
|
||||
* Copyright (c) 2017, Intel Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included
|
||||
* in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
|
||||
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include <cstdint>
|
||||
#include "runtime/helpers/string.h"
|
||||
#include "CL/cl.h"
|
||||
#include "opencl_c.h"
|
||||
|
||||
namespace BuiltinKernelsSimulation {
|
||||
|
||||
#define SCHEDULER_EMULATION 1
|
||||
|
||||
// globals
|
||||
std::mutex gMutex;
|
||||
unsigned int globalID[3];
|
||||
unsigned int localID[3];
|
||||
unsigned int localSize[3];
|
||||
|
||||
std::map<std::thread::id, uint32_t> threadIDToLocalIDmap;
|
||||
|
||||
SynchronizationBarrier *pGlobalBarrier = nullptr;
|
||||
|
||||
uint4 operator+(uint4 const &a, uint4 const &b) {
|
||||
uint4 c(0, 0, 0, 0);
|
||||
c.x = a.x + b.x;
|
||||
c.y = a.y + b.y;
|
||||
c.z = a.z + b.z;
|
||||
c.w = a.w + b.w;
|
||||
return c;
|
||||
}
|
||||
|
||||
int4 operator+(int4 const &a, int4 const &b) {
|
||||
int4 c(0, 0, 0, 0);
|
||||
c.x = a.x + b.x;
|
||||
c.y = a.y + b.y;
|
||||
c.z = a.z + b.z;
|
||||
c.w = a.w + b.w;
|
||||
return c;
|
||||
}
|
||||
|
||||
uint get_local_id(int dim) {
|
||||
uint LID = 0;
|
||||
|
||||
// use thread id
|
||||
if (threadIDToLocalIDmap.size() > 0) {
|
||||
std::thread::id id = std::this_thread::get_id();
|
||||
LID = threadIDToLocalIDmap[id] % 24;
|
||||
}
|
||||
// use id from loop iteration
|
||||
else {
|
||||
LID = localID[dim];
|
||||
}
|
||||
return LID;
|
||||
}
|
||||
|
||||
uint get_global_id(int dim) {
|
||||
uint GID = 0;
|
||||
|
||||
// use thread id
|
||||
if (threadIDToLocalIDmap.size() > 0) {
|
||||
std::thread::id id = std::this_thread::get_id();
|
||||
GID = threadIDToLocalIDmap[id];
|
||||
}
|
||||
// use id from loop iteration
|
||||
else {
|
||||
GID = globalID[dim];
|
||||
}
|
||||
return GID;
|
||||
}
|
||||
|
||||
uint get_local_size(int dim) {
|
||||
return localSize[dim];
|
||||
}
|
||||
|
||||
uint get_num_groups(int dim) {
|
||||
return NUM_OF_THREADS / 24;
|
||||
}
|
||||
|
||||
uint get_group_id(int dim) {
|
||||
return get_global_id(dim) / 24;
|
||||
}
|
||||
|
||||
void barrier(int x) {
|
||||
pGlobalBarrier->enter();
|
||||
|
||||
// int LID = get_local_id(0);
|
||||
volatile int BreakPointHere = 0;
|
||||
|
||||
// PUT BREAKPOINT HERE to stop after each barrier
|
||||
BreakPointHere++;
|
||||
}
|
||||
|
||||
uint4 read_imageui(image *im, int4 coord) {
|
||||
uint4 color = {0, 0, 0, 1};
|
||||
|
||||
uint offset = ((coord.z * im->height + coord.y) * im->width + coord.x) * im->bytesPerChannel * im->channels;
|
||||
|
||||
char *temp = &im->ptr[offset];
|
||||
char *colorDst = (char *)&color;
|
||||
|
||||
for (uint i = 0; i < im->channels; i++) {
|
||||
memcpy_s(colorDst, sizeof(uint4), temp, im->bytesPerChannel);
|
||||
temp += im->bytesPerChannel;
|
||||
colorDst += 4;
|
||||
}
|
||||
return color;
|
||||
}
|
||||
|
||||
uint4 write_imageui(image *im, uint4 coord, uint4 color) {
|
||||
uint offset = ((coord.z * im->height + coord.y) * im->width + coord.x) * im->bytesPerChannel * im->channels;
|
||||
|
||||
char *temp = &im->ptr[offset];
|
||||
char *colorSrc = (char *)&color;
|
||||
|
||||
size_t size = im->width * im->height * im->depth * im->bytesPerChannel * im->channels;
|
||||
|
||||
for (uint i = 0; i < im->channels; i++) {
|
||||
memcpy_s(temp, size - offset, colorSrc, im->bytesPerChannel);
|
||||
temp += im->bytesPerChannel;
|
||||
colorSrc += 4;
|
||||
}
|
||||
return *(uint4 *)temp;
|
||||
}
|
||||
|
||||
uchar convert_uchar_sat(uint c) {
|
||||
return (uchar)c;
|
||||
}
|
||||
|
||||
ushort convert_ushort_sat(uint c) {
|
||||
return (ushort)c;
|
||||
}
|
||||
|
||||
} // namespace BuiltinKernelsSimulation
|
304
runtime/builtin_kernels_simulation/opencl_c.h
Normal file
304
runtime/builtin_kernels_simulation/opencl_c.h
Normal file
@ -0,0 +1,304 @@
|
||||
/*
|
||||
* Copyright (c) 2017, Intel Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included
|
||||
* in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
|
||||
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
#include <mutex>
|
||||
#include <condition_variable>
|
||||
#include <map>
|
||||
#include <thread>
|
||||
#include <string.h>
|
||||
#include <cstdint>
|
||||
|
||||
// OpenCL Types
|
||||
typedef uint32_t uint;
|
||||
typedef uint8_t uchar;
|
||||
typedef uint16_t ushort;
|
||||
typedef uint64_t ulong;
|
||||
|
||||
namespace BuiltinKernelsSimulation {
|
||||
|
||||
// number of threads in wkg
|
||||
#define NUM_OF_THREADS 24
|
||||
|
||||
#define CLK_GLOBAL_MEM_FENCE 1
|
||||
#define CLK_LOCAL_MEM_FENCE 2
|
||||
|
||||
class SynchronizationBarrier {
|
||||
public:
|
||||
SynchronizationBarrier(int count) : m_InitialCount(count) {
|
||||
m_Count = count;
|
||||
m_BarrierCounter = 0;
|
||||
}
|
||||
|
||||
~SynchronizationBarrier() {
|
||||
}
|
||||
|
||||
void enter() {
|
||||
std::unique_lock<std::mutex> lck(m_Mutex);
|
||||
|
||||
m_Count--;
|
||||
|
||||
unsigned int BarrierCount = m_BarrierCounter;
|
||||
|
||||
if (m_Count > 0) {
|
||||
while (BarrierCount == m_BarrierCounter) {
|
||||
m_AllHitBarrierCondition.wait(lck);
|
||||
}
|
||||
} else {
|
||||
m_Count = m_InitialCount;
|
||||
m_BarrierCounter++;
|
||||
m_AllHitBarrierCondition.notify_all();
|
||||
}
|
||||
}
|
||||
|
||||
private:
|
||||
std::mutex m_Mutex;
|
||||
std::condition_variable m_AllHitBarrierCondition;
|
||||
int m_Count;
|
||||
const int m_InitialCount;
|
||||
unsigned int m_BarrierCounter;
|
||||
};
|
||||
|
||||
// globals
|
||||
extern std::mutex gMutex;
|
||||
extern unsigned int globalID[3];
|
||||
extern unsigned int localID[3];
|
||||
extern unsigned int localSize[3];
|
||||
extern std::map<std::thread::id, uint32_t> threadIDToLocalIDmap;
|
||||
extern SynchronizationBarrier *pGlobalBarrier;
|
||||
|
||||
typedef struct taguint2 {
|
||||
taguint2(uint x, uint y) {
|
||||
this->x = x;
|
||||
this->y = y;
|
||||
}
|
||||
taguint2() {
|
||||
this->x = 0;
|
||||
this->y = 0;
|
||||
}
|
||||
uint x;
|
||||
uint y;
|
||||
} uint2;
|
||||
|
||||
typedef struct taguint3 {
|
||||
taguint3(uint x, uint y, uint z) {
|
||||
this->x = x;
|
||||
this->y = y;
|
||||
this->z = z;
|
||||
}
|
||||
taguint3() {
|
||||
this->x = 0;
|
||||
this->y = 0;
|
||||
this->z = 0;
|
||||
}
|
||||
uint x;
|
||||
uint y;
|
||||
uint z;
|
||||
} uint3;
|
||||
|
||||
typedef struct taguint4 {
|
||||
taguint4(uint x, uint y, uint z, uint w) {
|
||||
this->x = x;
|
||||
this->y = y;
|
||||
this->z = z;
|
||||
this->w = w;
|
||||
}
|
||||
uint x;
|
||||
uint y;
|
||||
uint z;
|
||||
uint w;
|
||||
} uint4;
|
||||
|
||||
typedef struct tagint2 {
|
||||
tagint2(int x, int y) {
|
||||
this->x = x;
|
||||
this->y = y;
|
||||
}
|
||||
int x;
|
||||
int y;
|
||||
} int2;
|
||||
|
||||
typedef struct tagint3 {
|
||||
tagint3(int x, int y, int z) {
|
||||
this->x = x;
|
||||
this->y = y;
|
||||
this->z = z;
|
||||
}
|
||||
int x;
|
||||
int y;
|
||||
int z;
|
||||
} int3;
|
||||
|
||||
typedef struct tagint4 {
|
||||
tagint4(int x, int y, int z, int w) {
|
||||
this->x = x;
|
||||
this->y = y;
|
||||
this->z = z;
|
||||
this->w = w;
|
||||
}
|
||||
int x;
|
||||
int y;
|
||||
int z;
|
||||
int w;
|
||||
} int4;
|
||||
|
||||
typedef struct tagushort2 {
|
||||
tagushort2(ushort x, ushort y) {
|
||||
this->x = x;
|
||||
this->y = y;
|
||||
}
|
||||
unsigned short x;
|
||||
unsigned short y;
|
||||
} ushort2;
|
||||
|
||||
typedef struct tagushort8 {
|
||||
unsigned short xxx[8];
|
||||
} ushort8;
|
||||
|
||||
typedef struct tagushort16 {
|
||||
unsigned short xxx[16];
|
||||
} ushort16;
|
||||
|
||||
uint4 operator+(uint4 const &a, uint4 const &b);
|
||||
int4 operator+(int4 const &a, int4 const &b);
|
||||
|
||||
typedef struct tagimage {
|
||||
char *ptr;
|
||||
uint width;
|
||||
uint height;
|
||||
uint depth;
|
||||
uint bytesPerChannel;
|
||||
uint channels;
|
||||
} image;
|
||||
|
||||
// images as pointer
|
||||
typedef image *image1d_t;
|
||||
typedef image *image2d_t;
|
||||
typedef image *image3d_t;
|
||||
|
||||
// OpenCL keywords
|
||||
#define __global
|
||||
#define __local
|
||||
#define __private
|
||||
#define __kernel
|
||||
#define __attribute__(...)
|
||||
#define __read_only
|
||||
#define __write_only
|
||||
#define queue_t void *
|
||||
|
||||
struct clk_event_t {
|
||||
clk_event_t() {
|
||||
value = 0;
|
||||
}
|
||||
clk_event_t(void *v) {
|
||||
value = static_cast<uint>(reinterpret_cast<uintptr_t>(v));
|
||||
}
|
||||
|
||||
explicit operator void *() const {
|
||||
return reinterpret_cast<void *>(static_cast<uintptr_t>(value));
|
||||
}
|
||||
|
||||
operator uint() {
|
||||
return (uint)value;
|
||||
}
|
||||
|
||||
void operator=(uint input) {
|
||||
value = input;
|
||||
}
|
||||
|
||||
uint value;
|
||||
};
|
||||
|
||||
// OpenCL builtins
|
||||
#define __builtin_astype(var, type) \
|
||||
( \
|
||||
(type)var)
|
||||
|
||||
#define select(a, b, c) (c ? b : a)
|
||||
|
||||
uint get_local_id(int dim);
|
||||
uint get_global_id(int dim);
|
||||
uint get_local_size(int dim);
|
||||
uint get_num_groups(int dim);
|
||||
uint get_group_id(int dim);
|
||||
void barrier(int x);
|
||||
uint4 read_imageui(image *im, int4 coord);
|
||||
uint4 write_imageui(image *im, uint4 coord, uint4 color);
|
||||
uchar convert_uchar_sat(uint c);
|
||||
ushort convert_ushort_sat(uint c);
|
||||
|
||||
#define EMULATION_ENTER_FUNCTION() \
|
||||
uint __LOCAL_ID__ = 0; \
|
||||
__LOCAL_ID__ = get_local_id(0);
|
||||
|
||||
template <class TYPE, class TYPE2>
|
||||
void atomic_xchg(TYPE *dest, TYPE2 val) {
|
||||
gMutex.lock();
|
||||
dest[0] = (TYPE)val;
|
||||
gMutex.unlock();
|
||||
}
|
||||
|
||||
template <class TYPE, class TYPE2>
|
||||
TYPE atomic_add(TYPE *first, TYPE2 second) {
|
||||
gMutex.lock();
|
||||
TYPE temp = first[0];
|
||||
first[0] = (TYPE)(temp + (TYPE)second);
|
||||
gMutex.unlock();
|
||||
return temp;
|
||||
}
|
||||
|
||||
template <class TYPE, class TYPE2>
|
||||
TYPE atomic_sub(TYPE *first, TYPE2 second) {
|
||||
gMutex.lock();
|
||||
TYPE temp = first[0];
|
||||
first[0] = temp - second;
|
||||
gMutex.unlock();
|
||||
return temp;
|
||||
}
|
||||
|
||||
template <class TYPE>
|
||||
TYPE atomic_inc(TYPE *first) {
|
||||
gMutex.lock();
|
||||
TYPE temp = first[0];
|
||||
first[0] = temp + 1;
|
||||
gMutex.unlock();
|
||||
return temp;
|
||||
}
|
||||
|
||||
template <class TYPE>
|
||||
TYPE atomic_dec(TYPE *first) {
|
||||
gMutex.lock();
|
||||
TYPE temp = first[0];
|
||||
first[0] = temp - 1;
|
||||
gMutex.unlock();
|
||||
return temp;
|
||||
}
|
||||
|
||||
template <class TYPE, class TYPE2>
|
||||
TYPE atomic_min(TYPE *first, TYPE2 second) {
|
||||
gMutex.lock();
|
||||
TYPE temp = first[0];
|
||||
first[0] = (TYPE)((TYPE)second < temp ? (TYPE)second : temp);
|
||||
gMutex.unlock();
|
||||
return temp;
|
||||
}
|
||||
}
|
36
runtime/builtin_kernels_simulation/scheduler_simulation.cpp
Normal file
36
runtime/builtin_kernels_simulation/scheduler_simulation.cpp
Normal file
@ -0,0 +1,36 @@
|
||||
/*
|
||||
* Copyright (c) 2017, Intel Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included
|
||||
* in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
|
||||
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "runtime/builtin_kernels_simulation/scheduler_simulation.h"
|
||||
#include "runtime/builtin_kernels_simulation/opencl_c.h"
|
||||
|
||||
#include <thread>
|
||||
|
||||
using namespace std;
|
||||
using namespace OCLRT;
|
||||
|
||||
namespace BuiltinKernelsSimulation {
|
||||
|
||||
bool conditionReady = false;
|
||||
std::thread threads[NUM_OF_THREADS];
|
||||
|
||||
} // namespace BuiltinKernelsSimulation
|
92
runtime/builtin_kernels_simulation/scheduler_simulation.h
Normal file
92
runtime/builtin_kernels_simulation/scheduler_simulation.h
Normal file
@ -0,0 +1,92 @@
|
||||
/*
|
||||
* Copyright (c) 2017, Intel Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included
|
||||
* in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
|
||||
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
#pragma once
|
||||
#include <cstdint>
|
||||
#include <thread>
|
||||
|
||||
#include "runtime/builtin_kernels_simulation/opencl_c.h"
|
||||
namespace OCLRT {
|
||||
class GraphicsAllocation;
|
||||
}
|
||||
|
||||
namespace BuiltinKernelsSimulation {
|
||||
|
||||
extern bool conditionReady;
|
||||
extern std::thread threads[];
|
||||
|
||||
template <typename GfxFamily>
|
||||
class SchedulerSimulation {
|
||||
public:
|
||||
void runSchedulerSimulation(OCLRT::GraphicsAllocation *queue,
|
||||
OCLRT::GraphicsAllocation *commandsStack,
|
||||
OCLRT::GraphicsAllocation *eventsPool,
|
||||
OCLRT::GraphicsAllocation *secondaryBatchBuffer,
|
||||
OCLRT::GraphicsAllocation *dsh,
|
||||
OCLRT::GraphicsAllocation *reflectionSurface,
|
||||
OCLRT::GraphicsAllocation *queueStorageBuffer,
|
||||
OCLRT::GraphicsAllocation *ssh,
|
||||
OCLRT::GraphicsAllocation *debugQueue);
|
||||
|
||||
void cleanSchedulerSimulation();
|
||||
|
||||
static void startScheduler(uint32_t index,
|
||||
OCLRT::GraphicsAllocation *queue,
|
||||
OCLRT::GraphicsAllocation *commandsStack,
|
||||
OCLRT::GraphicsAllocation *eventsPool,
|
||||
OCLRT::GraphicsAllocation *secondaryBatchBuffer,
|
||||
OCLRT::GraphicsAllocation *dsh,
|
||||
OCLRT::GraphicsAllocation *reflectionSurface,
|
||||
OCLRT::GraphicsAllocation *queueStorageBuffer,
|
||||
OCLRT::GraphicsAllocation *ssh,
|
||||
OCLRT::GraphicsAllocation *debugQueue);
|
||||
|
||||
void initializeSchedulerSimulation(OCLRT::GraphicsAllocation *queue,
|
||||
OCLRT::GraphicsAllocation *commandsStack,
|
||||
OCLRT::GraphicsAllocation *eventsPool,
|
||||
OCLRT::GraphicsAllocation *secondaryBatchBuffer,
|
||||
OCLRT::GraphicsAllocation *dsh,
|
||||
OCLRT::GraphicsAllocation *reflectionSurface,
|
||||
OCLRT::GraphicsAllocation *queueStorageBuffer,
|
||||
OCLRT::GraphicsAllocation *ssh,
|
||||
OCLRT::GraphicsAllocation *debugQueue);
|
||||
|
||||
static void patchGpGpuWalker(uint secondLevelBatchOffset,
|
||||
__global uint *secondaryBatchBuffer,
|
||||
uint interfaceDescriptorOffset,
|
||||
uint simdSize,
|
||||
uint totalLocalWorkSize,
|
||||
uint3 dimSize,
|
||||
uint3 startPoint,
|
||||
uint numberOfHwThreadsPerWg,
|
||||
uint indirectPayloadSize,
|
||||
uint ioHoffset);
|
||||
static bool enabled;
|
||||
static bool simulationRun;
|
||||
};
|
||||
|
||||
template <typename GfxFamily>
|
||||
bool SchedulerSimulation<GfxFamily>::enabled = true;
|
||||
|
||||
template <typename GfxFamily>
|
||||
bool SchedulerSimulation<GfxFamily>::simulationRun = false;
|
||||
|
||||
} // namespace BuiltinKernelsSimulation
|
112
runtime/builtin_kernels_simulation/scheduler_simulation.inl
Normal file
112
runtime/builtin_kernels_simulation/scheduler_simulation.inl
Normal file
@ -0,0 +1,112 @@
|
||||
/*
|
||||
* Copyright (c) 2017, Intel Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included
|
||||
* in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
|
||||
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "runtime/memory_manager/graphics_allocation.h"
|
||||
#include "runtime/builtin_kernels_simulation/scheduler_simulation.h"
|
||||
|
||||
#include <cstdint>
|
||||
#include <mutex>
|
||||
#include <thread>
|
||||
|
||||
using namespace std;
|
||||
using namespace OCLRT;
|
||||
|
||||
namespace BuiltinKernelsSimulation {
|
||||
|
||||
template <typename GfxFamily>
|
||||
void SchedulerSimulation<GfxFamily>::cleanSchedulerSimulation() {
|
||||
threadIDToLocalIDmap.clear();
|
||||
delete pGlobalBarrier;
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
void SchedulerSimulation<GfxFamily>::initializeSchedulerSimulation(GraphicsAllocation *queue,
|
||||
GraphicsAllocation *commandsStack,
|
||||
GraphicsAllocation *eventsPool,
|
||||
GraphicsAllocation *secondaryBatchBuffer,
|
||||
GraphicsAllocation *dsh,
|
||||
GraphicsAllocation *reflectionSurface,
|
||||
GraphicsAllocation *queueStorageBuffer,
|
||||
GraphicsAllocation *ssh,
|
||||
GraphicsAllocation *debugQueue) {
|
||||
|
||||
localSize[0] = NUM_OF_THREADS;
|
||||
localSize[1] = 1;
|
||||
localSize[2] = 1;
|
||||
|
||||
threadIDToLocalIDmap.clear();
|
||||
pGlobalBarrier = new SynchronizationBarrier(NUM_OF_THREADS);
|
||||
|
||||
// Spawn Thread ID == 0 on main thread
|
||||
for (uint32_t i = 1; i < NUM_OF_THREADS; i++) {
|
||||
threads[i] = std::thread(startScheduler, i, queue, commandsStack, eventsPool, secondaryBatchBuffer, dsh, reflectionSurface, queueStorageBuffer, ssh, debugQueue);
|
||||
}
|
||||
|
||||
conditionReady = true;
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
void SchedulerSimulation<GfxFamily>::runSchedulerSimulation(GraphicsAllocation *queue,
|
||||
GraphicsAllocation *commandsStack,
|
||||
GraphicsAllocation *eventsPool,
|
||||
GraphicsAllocation *secondaryBatchBuffer,
|
||||
GraphicsAllocation *dsh,
|
||||
GraphicsAllocation *reflectionSurface,
|
||||
GraphicsAllocation *queueStorageBuffer,
|
||||
GraphicsAllocation *ssh,
|
||||
GraphicsAllocation *debugQueue) {
|
||||
simulationRun = true;
|
||||
if (enabled) {
|
||||
initializeSchedulerSimulation(queue,
|
||||
commandsStack,
|
||||
eventsPool,
|
||||
secondaryBatchBuffer,
|
||||
dsh,
|
||||
reflectionSurface,
|
||||
queueStorageBuffer,
|
||||
ssh,
|
||||
debugQueue);
|
||||
|
||||
// start main thread with LID == 0
|
||||
startScheduler(0,
|
||||
queue,
|
||||
commandsStack,
|
||||
eventsPool,
|
||||
secondaryBatchBuffer,
|
||||
dsh,
|
||||
reflectionSurface,
|
||||
queueStorageBuffer,
|
||||
ssh,
|
||||
debugQueue);
|
||||
|
||||
// Wait for all threads on main thread
|
||||
if (threadIDToLocalIDmap[std::this_thread::get_id()] == 0) {
|
||||
|
||||
for (uint32_t i = 1; i < NUM_OF_THREADS; i++)
|
||||
threads[i].join();
|
||||
|
||||
cleanSchedulerSimulation();
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace BuiltinKernelsSimulation
|
Reference in New Issue
Block a user