/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "CL/cl.h" #include #include #include #include #include #include // OpenCL Types typedef uint32_t uint; typedef uint8_t uchar; typedef uint16_t ushort; typedef uint64_t ulong; namespace BuiltinKernelsSimulation { // number of threads in wkg #define NUM_OF_THREADS 24 #define CLK_GLOBAL_MEM_FENCE 1 #define CLK_LOCAL_MEM_FENCE 2 class SynchronizationBarrier { public: SynchronizationBarrier(int count) : m_InitialCount(count) { m_Count = count; m_BarrierCounter = 0; } ~SynchronizationBarrier() { } void enter() { std::unique_lock lck(m_Mutex); m_Count--; unsigned int BarrierCount = m_BarrierCounter; if (m_Count > 0) { while (BarrierCount == m_BarrierCounter) { m_AllHitBarrierCondition.wait(lck); } } else { m_Count = m_InitialCount; m_BarrierCounter++; m_AllHitBarrierCondition.notify_all(); } } private: std::mutex m_Mutex; std::condition_variable m_AllHitBarrierCondition; int m_Count; const int m_InitialCount; unsigned int m_BarrierCounter; }; // globals extern std::mutex gMutex; extern unsigned int globalID[3]; extern unsigned int localID[3]; extern unsigned int localSize[3]; extern std::map threadIDToLocalIDmap; extern SynchronizationBarrier *pGlobalBarrier; typedef struct taguint2 { taguint2(uint x, uint y) { this->x = x; this->y = y; } taguint2() { this->x = 0; this->y = 0; } uint x; uint y; } uint2; typedef struct taguint3 { taguint3(uint x, uint y, uint z) { this->x = x; this->y = y; this->z = z; } taguint3() { this->x = 0; this->y = 0; this->z = 0; } uint x; uint y; uint z; } uint3; typedef struct taguint4 { taguint4(uint x, uint y, uint z, uint w) { this->x = x; this->y = y; this->z = z; this->w = w; } uint x; uint y; uint z; uint w; } uint4; typedef struct tagint2 { tagint2(int x, int y) { this->x = x; this->y = y; } int x; int y; } int2; typedef struct tagint3 { tagint3(int x, int y, int z) { this->x = x; this->y = y; this->z = z; } int x; int y; int z; } int3; typedef struct tagint4 { tagint4(int x, int y, int z, int w) { this->x = x; this->y = y; this->z = z; this->w = w; } int x; int y; int z; int w; } int4; typedef struct tagushort2 { tagushort2(ushort x, ushort y) { this->x = x; this->y = y; } unsigned short x; unsigned short y; } ushort2; typedef struct tagushort8 { unsigned short xxx[8]; } ushort8; typedef struct tagushort16 { unsigned short xxx[16]; } ushort16; uint4 operator+(uint4 const &a, uint4 const &b); int4 operator+(int4 const &a, int4 const &b); typedef struct tagimage { char *ptr; uint width; uint height; uint depth; uint bytesPerChannel; uint channels; } image; // images as pointer typedef image *image1d_t; typedef image *image2d_t; typedef image *image3d_t; // OpenCL keywords #define __global #define __local #define __private #define __kernel #define __attribute__(...) #define __read_only #define __write_only #define queue_t void * struct clk_event_t { clk_event_t() { value = 0; } clk_event_t(void *v) { value = static_cast(reinterpret_cast(v)); } explicit operator void *() const { return reinterpret_cast(static_cast(value)); } operator uint() { return (uint)value; } void operator=(uint input) { value = input; } uint value; }; // OpenCL builtins #define __builtin_astype(var, type) \ ( \ (type)var) #define select(a, b, c) (c ? b : a) uint get_local_id(int dim); uint get_global_id(int dim); uint get_local_size(int dim); uint get_num_groups(int dim); uint get_group_id(int dim); void barrier(int x); uint4 read_imageui(image *im, int4 coord); uint4 write_imageui(image *im, uint4 coord, uint4 color); uchar convert_uchar_sat(uint c); ushort convert_ushort_sat(uint c); #define EMULATION_ENTER_FUNCTION() \ uint __LOCAL_ID__ = 0; \ __LOCAL_ID__ = get_local_id(0); template void atomic_xchg(TYPE *dest, TYPE2 val) { gMutex.lock(); dest[0] = (TYPE)val; gMutex.unlock(); } template TYPE atomic_add(TYPE *first, TYPE2 second) { gMutex.lock(); TYPE temp = first[0]; first[0] = (TYPE)(temp + (TYPE)second); gMutex.unlock(); return temp; } template TYPE atomic_sub(TYPE *first, TYPE2 second) { gMutex.lock(); TYPE temp = first[0]; first[0] = temp - second; gMutex.unlock(); return temp; } template TYPE atomic_inc(TYPE *first) { gMutex.lock(); TYPE temp = first[0]; first[0] = temp + 1; gMutex.unlock(); return temp; } template TYPE atomic_dec(TYPE *first) { gMutex.lock(); TYPE temp = first[0]; first[0] = temp - 1; gMutex.unlock(); return temp; } template TYPE atomic_min(TYPE *first, TYPE2 second) { gMutex.lock(); TYPE temp = first[0]; first[0] = (TYPE)((TYPE)second < temp ? (TYPE)second : temp); gMutex.unlock(); return temp; } } // namespace BuiltinKernelsSimulation