153 lines
4.0 KiB
C++
153 lines
4.0 KiB
C++
/*
|
|
* Copyright (c) 2017 - 2018, Intel Corporation
|
|
*
|
|
* Permission is hereby granted, free of charge, to any person obtaining a
|
|
* copy of this software and associated documentation files (the "Software"),
|
|
* to deal in the Software without restriction, including without limitation
|
|
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
|
* and/or sell copies of the Software, and to permit persons to whom the
|
|
* Software is furnished to do so, subject to the following conditions:
|
|
*
|
|
* The above copyright notice and this permission notice shall be included
|
|
* in all copies or substantial portions of the Software.
|
|
*
|
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
|
|
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
|
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
|
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
|
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
|
* OTHER DEALINGS IN THE SOFTWARE.
|
|
*/
|
|
|
|
#include <cstdint>
|
|
#include "CL/cl.h"
|
|
#include "opencl_c.h"
|
|
#include "runtime/helpers/string.h"
|
|
|
|
namespace BuiltinKernelsSimulation {
|
|
|
|
#define SCHEDULER_EMULATION 1
|
|
|
|
// globals
|
|
std::mutex gMutex;
|
|
unsigned int globalID[3];
|
|
unsigned int localID[3];
|
|
unsigned int localSize[3];
|
|
|
|
std::map<std::thread::id, uint32_t> threadIDToLocalIDmap;
|
|
|
|
SynchronizationBarrier *pGlobalBarrier = nullptr;
|
|
|
|
uint4 operator+(uint4 const &a, uint4 const &b) {
|
|
uint4 c(0, 0, 0, 0);
|
|
c.x = a.x + b.x;
|
|
c.y = a.y + b.y;
|
|
c.z = a.z + b.z;
|
|
c.w = a.w + b.w;
|
|
return c;
|
|
}
|
|
|
|
int4 operator+(int4 const &a, int4 const &b) {
|
|
int4 c(0, 0, 0, 0);
|
|
c.x = a.x + b.x;
|
|
c.y = a.y + b.y;
|
|
c.z = a.z + b.z;
|
|
c.w = a.w + b.w;
|
|
return c;
|
|
}
|
|
|
|
uint get_local_id(int dim) {
|
|
uint LID = 0;
|
|
|
|
// use thread id
|
|
if (threadIDToLocalIDmap.size() > 0) {
|
|
std::thread::id id = std::this_thread::get_id();
|
|
LID = threadIDToLocalIDmap[id] % 24;
|
|
}
|
|
// use id from loop iteration
|
|
else {
|
|
LID = localID[dim];
|
|
}
|
|
return LID;
|
|
}
|
|
|
|
uint get_global_id(int dim) {
|
|
uint GID = 0;
|
|
|
|
// use thread id
|
|
if (threadIDToLocalIDmap.size() > 0) {
|
|
std::thread::id id = std::this_thread::get_id();
|
|
GID = threadIDToLocalIDmap[id];
|
|
}
|
|
// use id from loop iteration
|
|
else {
|
|
GID = globalID[dim];
|
|
}
|
|
return GID;
|
|
}
|
|
|
|
uint get_local_size(int dim) {
|
|
return localSize[dim];
|
|
}
|
|
|
|
uint get_num_groups(int dim) {
|
|
return NUM_OF_THREADS / 24;
|
|
}
|
|
|
|
uint get_group_id(int dim) {
|
|
return get_global_id(dim) / 24;
|
|
}
|
|
|
|
void barrier(int x) {
|
|
pGlobalBarrier->enter();
|
|
|
|
// int LID = get_local_id(0);
|
|
volatile int BreakPointHere = 0;
|
|
|
|
// PUT BREAKPOINT HERE to stop after each barrier
|
|
BreakPointHere++;
|
|
}
|
|
|
|
uint4 read_imageui(image *im, int4 coord) {
|
|
uint4 color = {0, 0, 0, 1};
|
|
|
|
uint offset = ((coord.z * im->height + coord.y) * im->width + coord.x) * im->bytesPerChannel * im->channels;
|
|
|
|
char *temp = &im->ptr[offset];
|
|
char *colorDst = (char *)&color;
|
|
|
|
for (uint i = 0; i < im->channels; i++) {
|
|
memcpy_s(colorDst, sizeof(uint4), temp, im->bytesPerChannel);
|
|
temp += im->bytesPerChannel;
|
|
colorDst += 4;
|
|
}
|
|
return color;
|
|
}
|
|
|
|
uint4 write_imageui(image *im, uint4 coord, uint4 color) {
|
|
uint offset = ((coord.z * im->height + coord.y) * im->width + coord.x) * im->bytesPerChannel * im->channels;
|
|
|
|
char *temp = &im->ptr[offset];
|
|
char *colorSrc = (char *)&color;
|
|
|
|
size_t size = im->width * im->height * im->depth * im->bytesPerChannel * im->channels;
|
|
|
|
for (uint i = 0; i < im->channels; i++) {
|
|
memcpy_s(temp, size - offset, colorSrc, im->bytesPerChannel);
|
|
temp += im->bytesPerChannel;
|
|
colorSrc += 4;
|
|
}
|
|
return *(uint4 *)temp; // NOLINT
|
|
}
|
|
|
|
uchar convert_uchar_sat(uint c) {
|
|
return (uchar)c;
|
|
}
|
|
|
|
ushort convert_ushort_sat(uint c) {
|
|
return (ushort)c;
|
|
}
|
|
|
|
} // namespace BuiltinKernelsSimulation
|