Move built ins to share directory

Change-Id: I740a349a0f15229cd356fffe996932029bf0f98b
Signed-off-by: Maciej Plewka <maciej.plewka@intel.com>
This commit is contained in:
Maciej Plewka
2020-02-24 13:10:44 +01:00
committed by sys_ocldev
parent 0e85ccf084
commit 357fdc2e65
115 changed files with 608 additions and 429 deletions

View File

@@ -59,6 +59,8 @@ else()
list(APPEND CORE_SOURCES ${CORE_SRCS_GENX_ALL_LINUX})
endif()
message("${NEO_CORE_SRCS_BUILT_INS}")
append_sources_from_properties(CORE_SOURCES
NEO_CORE_COMMAND_CONTAINER
NEO_CORE_DEBUG_SETTINGS
@@ -69,6 +71,9 @@ append_sources_from_properties(CORE_SOURCES
NEO_CORE_HELPERS
NEO_CORE_INDIRECT_HEAP
NEO_CORE_SKU_INFO_BASE
NEO_CORE_SRCS_BUILT_INS
NEO_CORE_SRCS_BUILT_IN_KERNELS
NEO_CORE_SRCS_BUILT_INS_OPS
NEO_CORE_UTILITIES
NEO_UNIFIED_MEMORY
)

View File

@@ -1,12 +1,46 @@
#
# Copyright (C) 2020 Intel Corporation
# Copyright (C) 2017-2020 Intel Corporation
#
# SPDX-License-Identifier: MIT
#
set(NEO_CORE_BUILT_INS
set(NEO_CORE_SRCS_BUILT_INS
${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt
${CMAKE_CURRENT_SOURCE_DIR}/built_ins_storage.cpp
${CMAKE_CURRENT_SOURCE_DIR}/built_ins.cpp
${CMAKE_CURRENT_SOURCE_DIR}/built_ins.h
${CMAKE_CURRENT_SOURCE_DIR}/built_in_ops_base.h
${CMAKE_CURRENT_SOURCE_DIR}/sip.cpp
${CMAKE_CURRENT_SOURCE_DIR}/sip.h
${CMAKE_CURRENT_SOURCE_DIR}/sip_kernel_type.h
)
set_property(GLOBAL PROPERTY NEO_CORE_BUILT_INS ${NEO_CORE_BUILT_INS})
add_subdirectory(builtinops)
set_property(GLOBAL PROPERTY NEO_CORE_SRCS_BUILT_INS ${NEO_CORE_SRCS_BUILT_INS})
set(NEO_CORE_SRCS_BUILT_IN_KERNELS
${CMAKE_CURRENT_SOURCE_DIR}/kernels/aux_translation.builtin_kernel
${CMAKE_CURRENT_SOURCE_DIR}/kernels/copy_buffer_rect.builtin_kernel
${CMAKE_CURRENT_SOURCE_DIR}/kernels/copy_buffer_rect_stateless.builtin_kernel
${CMAKE_CURRENT_SOURCE_DIR}/kernels/copy_buffer_to_buffer.builtin_kernel
${CMAKE_CURRENT_SOURCE_DIR}/kernels/copy_buffer_to_buffer_stateless.builtin_kernel
${CMAKE_CURRENT_SOURCE_DIR}/kernels/copy_buffer_to_image3d.builtin_kernel
${CMAKE_CURRENT_SOURCE_DIR}/kernels/copy_buffer_to_image3d_stateless.builtin_kernel
${CMAKE_CURRENT_SOURCE_DIR}/kernels/copy_image3d_to_buffer.builtin_kernel
${CMAKE_CURRENT_SOURCE_DIR}/kernels/copy_image3d_to_buffer_stateless.builtin_kernel
${CMAKE_CURRENT_SOURCE_DIR}/kernels/copy_image_to_image1d.builtin_kernel
${CMAKE_CURRENT_SOURCE_DIR}/kernels/copy_image_to_image2d.builtin_kernel
${CMAKE_CURRENT_SOURCE_DIR}/kernels/copy_image_to_image3d.builtin_kernel
${CMAKE_CURRENT_SOURCE_DIR}/kernels/fill_buffer.builtin_kernel
${CMAKE_CURRENT_SOURCE_DIR}/kernels/fill_buffer_stateless.builtin_kernel
${CMAKE_CURRENT_SOURCE_DIR}/kernels/fill_image1d.builtin_kernel
${CMAKE_CURRENT_SOURCE_DIR}/kernels/fill_image2d.builtin_kernel
${CMAKE_CURRENT_SOURCE_DIR}/kernels/fill_image3d.builtin_kernel
)
set_property(GLOBAL PROPERTY NEO_CORE_SRCS_BUILT_IN_KERNELS ${NEO_CORE_SRCS_BUILT_IN_KERNELS})
if(NOT (TARGET ${BUILTINS_BINARIES_LIB_NAME}))
include(builtins_binary.cmake)
endif()

View File

@@ -0,0 +1,37 @@
/*
* Copyright (C) 2019-2020 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#pragma once
#include <cstdint>
namespace NEO {
namespace EBuiltInOps {
using Type = uint32_t;
constexpr Type AuxTranslation{0};
constexpr Type CopyBufferToBuffer{1};
constexpr Type CopyBufferToBufferStateless{2};
constexpr Type CopyBufferRect{3};
constexpr Type CopyBufferRectStateless{4};
constexpr Type FillBuffer{5};
constexpr Type FillBufferStateless{6};
constexpr Type CopyBufferToImage3d{7};
constexpr Type CopyBufferToImage3dStateless{8};
constexpr Type CopyImage3dToBuffer{9};
constexpr Type CopyImage3dToBufferStateless{10};
constexpr Type CopyImageToImage1d{11};
constexpr Type CopyImageToImage2d{12};
constexpr Type CopyImageToImage3d{13};
constexpr Type FillImage1d{14};
constexpr Type FillImage2d{15};
constexpr Type FillImage3d{16};
constexpr Type MaxBaseValue{16};
constexpr Type COUNT{64};
} // namespace EBuiltInOps
} // namespace NEO

View File

@@ -0,0 +1,69 @@
/*
* Copyright (C) 2017-2020 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#include "shared/source/built_ins/built_ins.h"
#include "shared/source/built_ins/sip.h"
#include "shared/source/compiler_interface/compiler_interface.h"
#include "shared/source/helpers/basic_math.h"
#include "shared/source/helpers/debug_helpers.h"
#include "opencl/source/helpers/built_ins_helper.h"
#include "opencl/source/helpers/convert_color.h"
#include "opencl/source/helpers/dispatch_info_builder.h"
#include "compiler_options.h"
#include <cstdint>
#include <sstream>
namespace NEO {
BuiltIns::BuiltIns() {
builtinsLib.reset(new BuiltinsLib());
}
BuiltIns::~BuiltIns() = default;
const SipKernel &BuiltIns::getSipKernel(SipKernelType type, Device &device) {
uint32_t kernelId = static_cast<uint32_t>(type);
UNRECOVERABLE_IF(kernelId >= static_cast<uint32_t>(SipKernelType::COUNT));
auto &sipBuiltIn = this->sipKernels[kernelId];
auto initializer = [&] {
int retVal = 0;
std::vector<char> sipBinary;
auto compilerInteface = device.getExecutionEnvironment()->getCompilerInterface();
UNRECOVERABLE_IF(compilerInteface == nullptr);
auto ret = compilerInteface->getSipKernelBinary(device, type, sipBinary);
UNRECOVERABLE_IF(ret != TranslationOutput::ErrorCode::Success);
UNRECOVERABLE_IF(sipBinary.size() == 0);
auto program = createProgramForSip(*device.getExecutionEnvironment(),
nullptr,
sipBinary,
sipBinary.size(),
&retVal,
&device);
DEBUG_BREAK_IF(retVal != 0);
UNRECOVERABLE_IF(program == nullptr);
program->setDevice(&device);
retVal = program->processGenBinary();
DEBUG_BREAK_IF(retVal != 0);
sipBuiltIn.first.reset(new SipKernel(type, program));
};
std::call_once(sipBuiltIn.second, initializer);
UNRECOVERABLE_IF(sipBuiltIn.first == nullptr);
return *sipBuiltIn.first;
}
} // namespace NEO

View File

@@ -0,0 +1,201 @@
/*
* Copyright (C) 2017-2020 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#pragma once
#include "shared/source/built_ins/sip_kernel_type.h"
#include "shared/source/helpers/debug_helpers.h"
#include "shared/source/helpers/non_copyable_or_moveable.h"
#include "shared/source/helpers/vec.h"
#include "built_in_ops.h"
#include "compiler_options.h"
#include <array>
#include <cstdint>
#include <fstream>
#include <memory>
#include <mutex>
#include <string>
#include <tuple>
#include <unordered_map>
#include <vector>
namespace NEO {
typedef std::vector<char> BuiltinResourceT;
class Context;
class Device;
class Kernel;
struct KernelInfo;
struct MultiDispatchInfo;
class Program;
class SchedulerKernel;
class SipKernel;
static constexpr ConstStringRef mediaKernelsBuildOptionsList[] = {
"-D cl_intel_device_side_advanced_vme_enable",
"-D cl_intel_device_side_avc_vme_enable",
"-D cl_intel_device_side_vme_enable",
"-D cl_intel_media_block_io",
CompilerOptions::fastRelaxedMath};
static constexpr CompilerOptions::ConstConcatenation<> mediaKernelsBuildOptions{mediaKernelsBuildOptionsList};
BuiltinResourceT createBuiltinResource(const char *ptr, size_t size);
BuiltinResourceT createBuiltinResource(const BuiltinResourceT &r);
std::string createBuiltinResourceName(EBuiltInOps::Type builtin, const std::string &extension,
const std::string &platformName = "", uint32_t deviceRevId = 0);
std::string joinPath(const std::string &lhs, const std::string &rhs);
const char *getBuiltinAsString(EBuiltInOps::Type builtin);
const char *getUnknownBuiltinAsString(EBuiltInOps::Type builtin);
const char *getAdditionalBuiltinAsString(EBuiltInOps::Type builtin);
class Storage {
public:
Storage(const std::string &rootPath)
: rootPath(rootPath) {
}
virtual ~Storage() = default;
BuiltinResourceT load(const std::string &resourceName);
protected:
virtual BuiltinResourceT loadImpl(const std::string &fullResourceName) = 0;
std::string rootPath;
};
class FileStorage : public Storage {
public:
FileStorage(const std::string &rootPath = "")
: Storage(rootPath) {
}
protected:
BuiltinResourceT loadImpl(const std::string &fullResourceName) override;
};
struct EmbeddedStorageRegistry {
static EmbeddedStorageRegistry &getInstance() {
static EmbeddedStorageRegistry gsr;
return gsr;
}
void store(const std::string &name, BuiltinResourceT &&resource) {
resources.emplace(name, BuiltinResourceT(std::move(resource)));
}
const BuiltinResourceT *get(const std::string &name) const;
private:
using ResourcesContainer = std::unordered_map<std::string, BuiltinResourceT>;
ResourcesContainer resources;
};
class EmbeddedStorage : public Storage {
public:
EmbeddedStorage(const std::string &rootPath)
: Storage(rootPath) {
}
protected:
BuiltinResourceT loadImpl(const std::string &fullResourceName) override;
};
struct BuiltinCode {
enum class ECodeType {
Any = 0, // for requesting "any" code available - priorities as below
Binary = 1, // ISA - highest priority
Intermediate = 2, // SPIR/LLVM - medium prioroty
Source = 3, // OCL C - lowest priority
COUNT,
INVALID
};
static const char *getExtension(ECodeType ct) {
switch (ct) {
default:
return "";
case ECodeType::Binary:
return ".bin";
case ECodeType::Intermediate:
return ".bc";
case ECodeType::Source:
return ".cl";
}
}
ECodeType type;
BuiltinResourceT resource;
Device *targetDevice;
};
class BuiltinsLib {
public:
BuiltinsLib();
BuiltinCode getBuiltinCode(EBuiltInOps::Type builtin, BuiltinCode::ECodeType requestedCodeType, Device &device);
static std::unique_ptr<Program> createProgramFromCode(const BuiltinCode &bc, Device &device);
protected:
BuiltinResourceT getBuiltinResource(EBuiltInOps::Type builtin, BuiltinCode::ECodeType requestedCodeType, Device &device);
using StoragesContainerT = std::vector<std::unique_ptr<Storage>>;
StoragesContainerT allStorages; // sorted by priority allStorages[0] will be checked before allStorages[1], etc.
std::mutex mutex;
};
struct BuiltInKernel {
const char *pSource = nullptr;
Program *pProgram = nullptr;
std::once_flag programIsInitialized; // guard for creating+building the program
Kernel *pKernel = nullptr;
BuiltInKernel() {
}
};
class BuiltinDispatchInfoBuilder;
class BuiltIns {
public:
std::pair<std::unique_ptr<BuiltinDispatchInfoBuilder>, std::once_flag> BuiltinOpsBuilders[static_cast<uint32_t>(EBuiltInOps::COUNT)];
BuiltIns();
virtual ~BuiltIns();
MOCKABLE_VIRTUAL const SipKernel &getSipKernel(SipKernelType type, Device &device);
BuiltinsLib &getBuiltinsLib() {
DEBUG_BREAK_IF(!builtinsLib.get());
return *builtinsLib;
}
void setCacheingEnableState(bool enableCacheing) {
this->enableCacheing = enableCacheing;
}
bool isCacheingEnabled() const {
return this->enableCacheing;
}
protected:
// sip builtins
std::pair<std::unique_ptr<SipKernel>, std::once_flag> sipKernels[static_cast<uint32_t>(SipKernelType::COUNT)];
std::unique_ptr<BuiltinsLib> builtinsLib;
using ProgramsContainerT = std::array<std::pair<std::unique_ptr<Program>, std::once_flag>, static_cast<size_t>(EBuiltInOps::COUNT)>;
ProgramsContainerT builtinPrograms;
bool enableCacheing = true;
};
template <EBuiltInOps::Type OpCode>
class BuiltInOp;
} // namespace NEO

View File

@@ -0,0 +1,221 @@
/*
* Copyright (C) 2017-2020 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#include "shared/source/built_ins/built_ins.h"
#include "shared/source/debug_settings/debug_settings_manager.h"
#include "shared/source/device/device.h"
#include "opencl/source/built_ins/builtins_dispatch_builder.h"
#include "os_inc.h"
#include <cstdint>
namespace NEO {
const char *getBuiltinAsString(EBuiltInOps::Type builtin) {
const char *builtinString = getAdditionalBuiltinAsString(builtin);
if (builtinString) {
return builtinString;
}
switch (builtin) {
default:
return getUnknownBuiltinAsString(builtin);
case EBuiltInOps::AuxTranslation:
return "aux_translation.builtin_kernel";
case EBuiltInOps::CopyBufferToBuffer:
return "copy_buffer_to_buffer.builtin_kernel";
case EBuiltInOps::CopyBufferToBufferStateless:
return "copy_buffer_to_buffer_stateless.builtin_kernel";
case EBuiltInOps::CopyBufferRect:
return "copy_buffer_rect.builtin_kernel";
case EBuiltInOps::CopyBufferRectStateless:
return "copy_buffer_rect_stateless.builtin_kernel";
case EBuiltInOps::FillBuffer:
return "fill_buffer.builtin_kernel";
case EBuiltInOps::FillBufferStateless:
return "fill_buffer_stateless.builtin_kernel";
case EBuiltInOps::CopyBufferToImage3d:
return "copy_buffer_to_image3d.builtin_kernel";
case EBuiltInOps::CopyBufferToImage3dStateless:
return "copy_buffer_to_image3d_stateless.builtin_kernel";
case EBuiltInOps::CopyImage3dToBuffer:
return "copy_image3d_to_buffer.builtin_kernel";
case EBuiltInOps::CopyImage3dToBufferStateless:
return "copy_image3d_to_buffer_stateless.builtin_kernel";
case EBuiltInOps::CopyImageToImage1d:
return "copy_image_to_image1d.builtin_kernel";
case EBuiltInOps::CopyImageToImage2d:
return "copy_image_to_image2d.builtin_kernel";
case EBuiltInOps::CopyImageToImage3d:
return "copy_image_to_image3d.builtin_kernel";
case EBuiltInOps::FillImage1d:
return "fill_image1d.builtin_kernel";
case EBuiltInOps::FillImage2d:
return "fill_image2d.builtin_kernel";
case EBuiltInOps::FillImage3d:
return "fill_image3d.builtin_kernel";
};
}
BuiltinResourceT createBuiltinResource(const char *ptr, size_t size) {
return BuiltinResourceT(ptr, ptr + size);
}
BuiltinResourceT createBuiltinResource(const BuiltinResourceT &r) {
return BuiltinResourceT(r);
}
std::string createBuiltinResourceName(EBuiltInOps::Type builtin, const std::string &extension,
const std::string &platformName, uint32_t deviceRevId) {
std::string ret;
if (platformName.size() > 0) {
ret = platformName;
ret += "_" + std::to_string(deviceRevId);
ret += "_";
}
ret += getBuiltinAsString(builtin);
if (extension.size() > 0) {
ret += extension;
}
return ret;
}
std::string joinPath(const std::string &lhs, const std::string &rhs) {
if (lhs.size() == 0) {
return rhs;
}
if (rhs.size() == 0) {
return lhs;
}
if (*lhs.rbegin() == PATH_SEPARATOR) {
return lhs + rhs;
}
return lhs + PATH_SEPARATOR + rhs;
}
std::string getDriverInstallationPath() {
return "";
}
BuiltinResourceT Storage::load(const std::string &resourceName) {
return loadImpl(joinPath(rootPath, resourceName));
}
BuiltinResourceT FileStorage::loadImpl(const std::string &fullResourceName) {
BuiltinResourceT ret;
std::ifstream f{fullResourceName, std::ios::in | std::ios::binary | std::ios::ate};
auto end = f.tellg();
f.seekg(0, std::ios::beg);
auto beg = f.tellg();
auto s = end - beg;
ret.resize(static_cast<size_t>(s));
f.read(ret.data(), s);
return ret;
}
const BuiltinResourceT *EmbeddedStorageRegistry::get(const std::string &name) const {
auto it = resources.find(name);
if (resources.end() == it) {
return nullptr;
}
return &it->second;
}
BuiltinResourceT EmbeddedStorage::loadImpl(const std::string &fullResourceName) {
auto *constResource = EmbeddedStorageRegistry::getInstance().get(fullResourceName);
if (constResource == nullptr) {
BuiltinResourceT ret;
return ret;
}
return createBuiltinResource(*constResource);
}
BuiltinsLib::BuiltinsLib() {
allStorages.push_back(std::unique_ptr<Storage>(new EmbeddedStorage("")));
allStorages.push_back(std::unique_ptr<Storage>(new FileStorage(getDriverInstallationPath())));
}
BuiltinCode BuiltinsLib::getBuiltinCode(EBuiltInOps::Type builtin, BuiltinCode::ECodeType requestedCodeType, Device &device) {
std::lock_guard<std::mutex> lockRaii{mutex};
BuiltinResourceT bc;
BuiltinCode::ECodeType usedCodetType = BuiltinCode::ECodeType::INVALID;
if (requestedCodeType == BuiltinCode::ECodeType::Any) {
uint32_t codeType = static_cast<uint32_t>(BuiltinCode::ECodeType::Binary);
if (DebugManager.flags.RebuildPrecompiledKernels.get()) {
codeType = static_cast<uint32_t>(BuiltinCode::ECodeType::Source);
}
for (uint32_t e = static_cast<uint32_t>(BuiltinCode::ECodeType::COUNT);
codeType != e; ++codeType) {
bc = getBuiltinResource(builtin, static_cast<BuiltinCode::ECodeType>(codeType), device);
if (bc.size() > 0) {
usedCodetType = static_cast<BuiltinCode::ECodeType>(codeType);
break;
}
}
} else {
bc = getBuiltinResource(builtin, requestedCodeType, device);
usedCodetType = requestedCodeType;
}
BuiltinCode ret;
std::swap(ret.resource, bc);
ret.type = usedCodetType;
ret.targetDevice = &device;
return ret;
}
std::unique_ptr<Program> BuiltinsLib::createProgramFromCode(const BuiltinCode &bc, Device &device) {
std::unique_ptr<Program> ret;
const char *data = bc.resource.data();
size_t dataLen = bc.resource.size();
cl_int err = 0;
switch (bc.type) {
default:
break;
case BuiltinCode::ECodeType::Source:
case BuiltinCode::ECodeType::Intermediate:
ret.reset(Program::create(data, nullptr, device, true, &err));
break;
case BuiltinCode::ECodeType::Binary:
ret.reset(Program::createFromGenBinary(*device.getExecutionEnvironment(), nullptr, data, dataLen, true, nullptr, &device));
break;
}
return ret;
}
BuiltinResourceT BuiltinsLib::getBuiltinResource(EBuiltInOps::Type builtin, BuiltinCode::ECodeType requestedCodeType, Device &device) {
BuiltinResourceT bc;
std::string resourceNameGeneric = createBuiltinResourceName(builtin, BuiltinCode::getExtension(requestedCodeType));
std::string resourceNameForPlatformType = createBuiltinResourceName(builtin, BuiltinCode::getExtension(requestedCodeType), getFamilyNameWithType(device.getHardwareInfo()));
std::string resourceNameForPlatformTypeAndStepping = createBuiltinResourceName(builtin, BuiltinCode::getExtension(requestedCodeType), getFamilyNameWithType(device.getHardwareInfo()),
device.getHardwareInfo().platform.usRevId);
for (auto &rn : {resourceNameForPlatformTypeAndStepping, resourceNameForPlatformType, resourceNameGeneric}) { // first look for dedicated version, only fallback to generic one
for (auto &s : allStorages) {
bc = s.get()->load(rn);
if (bc.size() != 0) {
return bc;
}
}
}
return bc;
}
} // namespace NEO

View File

@@ -0,0 +1,13 @@
#
# Copyright (C) 2020 Intel Corporation
#
# SPDX-License-Identifier: MIT
#
set(NEO_CORE_SRCS_BUILT_INS_OPS
${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt
${CMAKE_CURRENT_SOURCE_DIR}${BRANCH_DIR_SUFFIX}/built_in_ops.h
)
add_subdirectories()
set_property(GLOBAL PROPERTY NEO_CORE_SRCS_BUILT_INS_OPS ${NEO_CORE_SRCS_BUILT_INS_OPS})

View File

@@ -0,0 +1,15 @@
/*
* Copyright (C) 2019-2020 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#pragma once
#include "shared/source/built_ins/built_in_ops_base.h"
namespace NEO {
namespace EBuiltInOps {
constexpr Type MaxCoreValue{MaxBaseValue};
}
} // namespace NEO

View File

@@ -0,0 +1,71 @@
#
# Copyright (C) 2018-2020 Intel Corporation
#
# SPDX-License-Identifier: MIT
#
add_library(${BUILTINS_BINARIES_LIB_NAME} OBJECT EXCLUDE_FROM_ALL builtins_binary.cmake)
# Add builtins sources
add_subdirectory(registry)
set(GENERATED_BUILTINS
"aux_translation"
"copy_buffer_rect"
"copy_buffer_to_buffer"
"copy_buffer_to_image3d"
"copy_image3d_to_buffer"
"copy_image_to_image1d"
"copy_image_to_image2d"
"copy_image_to_image3d"
"fill_buffer"
"fill_image1d"
"fill_image2d"
"fill_image3d"
)
set(GENERATED_BUILTINS_STATELESS
"copy_buffer_to_buffer_stateless"
"copy_buffer_rect_stateless"
"copy_buffer_to_image3d_stateless"
"copy_image3d_to_buffer_stateless"
"fill_buffer_stateless"
)
# Generate builtins cpps
if(COMPILE_BUILT_INS)
add_subdirectory(kernels)
endif()
macro(macro_for_each_gen)
foreach(PLATFORM_TYPE ${PLATFORM_TYPES})
get_family_name_with_type(${GEN_TYPE} ${PLATFORM_TYPE})
foreach(GENERATED_BUILTIN ${GENERATED_BUILTINS})
list(APPEND GENERATED_BUILTINS_CPPS ${BUILTINS_INCLUDE_DIR}/${RUNTIME_GENERATED_${GENERATED_BUILTIN}_${family_name_with_type}})
endforeach()
foreach(GENERATED_BUILTIN_STATELESS ${GENERATED_BUILTINS_STATELESS})
list(APPEND GENERATED_BUILTINS_CPPS ${BUILTINS_INCLUDE_DIR}/${RUNTIME_GENERATED_${GENERATED_BUILTIN_STATELESS}_${family_name_with_type}})
endforeach()
endforeach()
source_group("generated files\\${GEN_TYPE_LOWER}" FILES ${GENERATED_BUILTINS_CPPS})
endmacro()
apply_macro_for_each_gen("SUPPORTED")
if(COMPILE_BUILT_INS)
target_sources(${BUILTINS_BINARIES_LIB_NAME} PUBLIC ${GENERATED_BUILTINS_CPPS})
set_source_files_properties(${GENERATED_BUILTINS_CPPS} PROPERTIES GENERATED TRUE)
endif()
set_target_properties(${BUILTINS_BINARIES_LIB_NAME} PROPERTIES LINKER_LANGUAGE CXX)
set_target_properties(${BUILTINS_BINARIES_LIB_NAME} PROPERTIES POSITION_INDEPENDENT_CODE ON)
set_target_properties(${BUILTINS_BINARIES_LIB_NAME} PROPERTIES FOLDER "built_ins")
target_include_directories(${BUILTINS_BINARIES_LIB_NAME} PRIVATE
${ENGINE_NODE_DIR}
${KHRONOS_HEADERS_DIR}
${KHRONOS_GL_HEADERS_DIR}
${NEO__GMM_INCLUDE_DIR}
${NEO__IGC_INCLUDE_DIR}
${THIRD_PARTY_DIR}
)

View File

@@ -0,0 +1,106 @@
#
# Copyright (C) 2017-2020 Intel Corporation
#
# SPDX-License-Identifier: MIT
#
add_custom_target(builtins)
set_target_properties(builtins PROPERTIES FOLDER "built_ins")
set(BUILTINS_OUTDIR_WITH_ARCH "${TargetDir}/built_ins/${NEO_ARCH}")
add_dependencies(${BUILTINS_BINARIES_LIB_NAME} builtins)
add_subdirectories()
set(GENERATED_BUILTINS ${GENERATED_BUILTINS} PARENT_SCOPE)
set(GENERATED_BUILTINS_STATELESS ${GENERATED_BUILTINS_STATELESS} PARENT_SCOPE)
if("${NEO_ARCH}" STREQUAL "x32")
set(BUILTIN_OPTIONS "-cl-intel-greater-than-4GB-buffer-required")
else()
set(BUILTIN_OPTIONS "")
endif()
set(BUILTIN_OPTIONS_STATELESS
"-cl-intel-greater-than-4GB-buffer-required"
)
if("${CMAKE_BUILD_TYPE}" STREQUAL "Debug" )
list(APPEND __cloc__options__ "-D DEBUG")
endif()
set(BUILTINS_INCLUDE_DIR ${TargetDir} PARENT_SCOPE)
set(BUILTIN_CPP "")
function(get_bits_for_stateless gen_type platform_type)
# Force 32bits compiling on gen9lp for stateless builtins
if( (${GEN_TYPE} STREQUAL "GEN9" ) AND ( ${PLATFORM_TYPE} STREQUAL "LP"))
set(BITS "32" PARENT_SCOPE)
else()
set(BITS ${NEO_BITS} PARENT_SCOPE)
endif()
endfunction()
# Define function for compiling built-ins (with ocloc)
function(compile_builtin gen_type platform_type builtin bits builtin_options)
string(TOLOWER ${gen_type} gen_type_lower)
get_family_name_with_type(${gen_type} ${platform_type})
set(OUTPUTDIR "${BUILTINS_OUTDIR_WITH_ARCH}/${gen_type_lower}")
# get filename
set(FILENAME ${builtin})
# get name of the file w/o extension
get_filename_component(BASENAME ${builtin} NAME_WE)
set(OUTPUTPATH_BASE "${OUTPUTDIR}/${BASENAME}_${family_name_with_type}")
set(OUTPUT_FILES
${OUTPUTPATH_BASE}.spv
${OUTPUTPATH_BASE}.bin
${OUTPUTPATH_BASE}.cpp
${OUTPUTPATH_BASE}.gen
)
# function returns builtin cpp filename
unset(BUILTIN_CPP)
# set variable outside function
set(BUILTIN_CPP built_ins/${NEO_ARCH}/${gen_type_lower}/${BASENAME}_${family_name_with_type}.cpp PARENT_SCOPE)
if(WIN32)
set(cloc_cmd_prefix ocloc)
else()
if(DEFINED NEO__IGC_LIBRARY_PATH)
set(cloc_cmd_prefix LD_LIBRARY_PATH=${NEO__IGC_LIBRARY_PATH} $<TARGET_FILE:ocloc>)
else()
set(cloc_cmd_prefix LD_LIBRARY_PATH=$<TARGET_FILE_DIR:ocloc> $<TARGET_FILE:ocloc>)
endif()
endif()
list(APPEND __cloc__options__ "-cl-kernel-arg-info")
add_custom_command(
OUTPUT ${OUTPUT_FILES}
COMMAND ${cloc_cmd_prefix} -q -file ${FILENAME} -device ${DEFAULT_SUPPORTED_${gen_type}_${platform_type}_PLATFORM} ${builtin_options} -${bits} -out_dir ${OUTPUTDIR} -cpp_file -options "$<JOIN:${__cloc__options__}, >"
WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}
DEPENDS ${builtin} ocloc copy_compiler_files
)
endfunction()
macro(macro_for_each_gen)
foreach(PLATFORM_TYPE ${PLATFORM_TYPES})
if(${GEN_TYPE}_HAS_${PLATFORM_TYPE})
get_family_name_with_type(${GEN_TYPE} ${PLATFORM_TYPE})
string(TOLOWER ${PLATFORM_TYPE} PLATFORM_TYPE_LOWER)
unset(BUILTINS_COMMANDS)
foreach(GENERATED_BUILTIN ${GENERATED_BUILTINS})
compile_builtin(${GEN_TYPE} ${PLATFORM_TYPE} ${GENERATED_BUILTIN}.builtin_kernel ${NEO_BITS} "${BUILTIN_OPTIONS}")
list(APPEND BUILTINS_COMMANDS ${TargetDir}/${BUILTIN_CPP})
set(RUNTIME_GENERATED_${GENERATED_BUILTIN}_${family_name_with_type} ${BUILTIN_CPP} PARENT_SCOPE)
endforeach()
get_bits_for_stateless(${GEN_TYPE} ${PLATFORM_TYPE})
foreach(GENERATED_BUILTIN_STATELESS ${GENERATED_BUILTINS_STATELESS})
compile_builtin(${GEN_TYPE} ${PLATFORM_TYPE} ${GENERATED_BUILTIN_STATELESS}.builtin_kernel ${BITS} "${BUILTIN_OPTIONS_STATELESS}")
list(APPEND BUILTINS_COMMANDS ${TargetDir}/${BUILTIN_CPP})
set(RUNTIME_GENERATED_${GENERATED_BUILTIN_STATELESS}_${family_name_with_type} ${BUILTIN_CPP} PARENT_SCOPE)
endforeach()
set(target_name builtins_${family_name_with_type})
add_custom_target(${target_name} DEPENDS ${BUILTINS_COMMANDS})
add_dependencies(builtins ${target_name})
set_target_properties(${target_name} PROPERTIES FOLDER "opencl/source/built_ins/${family_name_with_type}")
endif()
endforeach()
endmacro()
apply_macro_for_each_gen("SUPPORTED")

View File

@@ -0,0 +1,14 @@
/*
* Copyright (C) 2018-2020 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
R"===(
__kernel void fullCopy(__global const uint* src, __global uint* dst) {
unsigned int gid = get_global_id(0);
uint4 loaded = vload4(gid, src);
vstore4(loaded, gid, dst);
}
)==="

View File

@@ -0,0 +1,48 @@
/*
* Copyright (C) 2017-2020 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
R"===(
//////////////////////////////////////////////////////////////////////////////
__kernel void CopyBufferRectBytes2d(
__global const char* src,
__global char* dst,
uint4 SrcOrigin,
uint4 DstOrigin,
uint2 SrcPitch,
uint2 DstPitch )
{
int x = get_global_id(0);
int y = get_global_id(1);
uint LSrcOffset = x + SrcOrigin.x + ( ( y + SrcOrigin.y ) * SrcPitch.x );
uint LDstOffset = x + DstOrigin.x + ( ( y + DstOrigin.y ) * DstPitch.x );
*( dst + LDstOffset ) = *( src + LSrcOffset );
}
//////////////////////////////////////////////////////////////////////////////
__kernel void CopyBufferRectBytes3d(
__global const char* src,
__global char* dst,
uint4 SrcOrigin,
uint4 DstOrigin,
uint2 SrcPitch,
uint2 DstPitch )
{
int x = get_global_id(0);
int y = get_global_id(1);
int z = get_global_id(2);
uint LSrcOffset = x + SrcOrigin.x + ( ( y + SrcOrigin.y ) * SrcPitch.x ) + ( ( z + SrcOrigin.z ) * SrcPitch.y );
uint LDstOffset = x + DstOrigin.x + ( ( y + DstOrigin.y ) * DstPitch.x ) + ( ( z + DstOrigin.z ) * DstPitch.y );
*( dst + LDstOffset ) = *( src + LSrcOffset );
}
)==="

View File

@@ -0,0 +1,48 @@
/*
* Copyright (C) 2019-2020 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
R"===(
//////////////////////////////////////////////////////////////////////////////
__kernel void CopyBufferRectBytes2d(
__global const char* src,
__global char* dst,
ulong4 SrcOrigin,
ulong4 DstOrigin,
ulong2 SrcPitch,
ulong2 DstPitch )
{
size_t x = get_global_id(0);
size_t y = get_global_id(1);
size_t LSrcOffset = x + SrcOrigin.x + ( ( y + SrcOrigin.y ) * SrcPitch.x );
size_t LDstOffset = x + DstOrigin.x + ( ( y + DstOrigin.y ) * DstPitch.x );
*( dst + LDstOffset ) = *( src + LSrcOffset );
}
//////////////////////////////////////////////////////////////////////////////
__kernel void CopyBufferRectBytes3d(
__global const char* src,
__global char* dst,
ulong4 SrcOrigin,
ulong4 DstOrigin,
ulong2 SrcPitch,
ulong2 DstPitch )
{
size_t x = get_global_id(0);
size_t y = get_global_id(1);
size_t z = get_global_id(2);
size_t LSrcOffset = x + SrcOrigin.x + ( ( y + SrcOrigin.y ) * SrcPitch.x ) + ( ( z + SrcOrigin.z ) * SrcPitch.y );
size_t LDstOffset = x + DstOrigin.x + ( ( y + DstOrigin.y ) * DstPitch.x ) + ( ( z + DstOrigin.z ) * DstPitch.y );
*( dst + LDstOffset ) = *( src + LSrcOffset );
}
)==="

View File

@@ -0,0 +1,90 @@
/*
* Copyright (C) 2017-2020 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
R"===(
__kernel void CopyBufferToBufferBytes(
const __global uchar* pSrc,
__global uchar* pDst,
uint srcOffsetInBytes,
uint dstOffsetInBytes,
uint bytesToRead )
{
pSrc += ( srcOffsetInBytes + get_global_id(0) );
pDst += ( dstOffsetInBytes + get_global_id(0) );
pDst[ 0 ] = pSrc[ 0 ];
}
__kernel void CopyBufferToBufferLeftLeftover(
const __global uchar* pSrc,
__global uchar* pDst,
uint srcOffsetInBytes,
uint dstOffsetInBytes)
{
unsigned int gid = get_global_id(0);
pDst[ gid + dstOffsetInBytes ] = pSrc[ gid + srcOffsetInBytes ];
}
__kernel void CopyBufferToBufferMiddle(
const __global uint* pSrc,
__global uint* pDst,
uint srcOffsetInBytes,
uint dstOffsetInBytes)
{
unsigned int gid = get_global_id(0);
pDst += dstOffsetInBytes >> 2;
pSrc += srcOffsetInBytes >> 2;
uint4 loaded = vload4(gid, pSrc);
vstore4(loaded, gid, pDst);
}
__kernel void CopyBufferToBufferRightLeftover(
const __global uchar* pSrc,
__global uchar* pDst,
uint srcOffsetInBytes,
uint dstOffsetInBytes)
{
unsigned int gid = get_global_id(0);
pDst[ gid + dstOffsetInBytes ] = pSrc[ gid + srcOffsetInBytes ];
}
__kernel void copyBufferToBufferBytesSingle(__global uchar *dst, const __global uchar *src) {
unsigned int gid = get_global_id(0);
dst[gid] = (uchar)(src[gid]);
}
__kernel void CopyBufferToBufferSideRegion(
__global uchar* pDst,
const __global uchar* pSrc,
unsigned int len,
uint dstSshOffset, // Offset needed in case ptr has been adjusted for SSH alignment
uint srcSshOffset // Offset needed in case ptr has been adjusted for SSH alignment
)
{
unsigned int gid = get_global_id(0);
__global uchar* pDstWithOffset = (__global uchar*)((__global uchar*)pDst + dstSshOffset);
__global uchar* pSrcWithOffset = (__global uchar*)((__global uchar*)pSrc + srcSshOffset);
if (gid < len) {
pDstWithOffset[ gid ] = pSrcWithOffset[ gid ];
}
}
__kernel void CopyBufferToBufferMiddleRegion(
__global uint* pDst,
const __global uint* pSrc,
unsigned int elems,
uint dstSshOffset, // Offset needed in case ptr has been adjusted for SSH alignment
uint srcSshOffset // Offset needed in case ptr has been adjusted for SSH alignment
)
{
unsigned int gid = get_global_id(0);
__global uint* pDstWithOffset = (__global uint*)((__global uchar*)pDst + dstSshOffset);
__global uint* pSrcWithOffset = (__global uint*)((__global uchar*)pSrc + srcSshOffset);
if (gid < elems) {
uint4 loaded = vload4(gid, pSrcWithOffset);
vstore4(loaded, gid, pDstWithOffset);
}
}
)==="

View File

@@ -0,0 +1,54 @@
/*
* Copyright (C) 2019-2020 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
R"===(
__kernel void CopyBufferToBufferBytes(
const __global uchar* pSrc,
__global uchar* pDst,
ulong srcOffsetInBytes,
ulong dstOffsetInBytes,
ulong bytesToRead )
{
pSrc += ( srcOffsetInBytes + get_global_id(0) );
pDst += ( dstOffsetInBytes + get_global_id(0) );
pDst[ 0 ] = pSrc[ 0 ];
}
__kernel void CopyBufferToBufferLeftLeftover(
const __global uchar* pSrc,
__global uchar* pDst,
ulong srcOffsetInBytes,
ulong dstOffsetInBytes)
{
size_t gid = get_global_id(0);
pDst[ gid + dstOffsetInBytes ] = pSrc[ gid + srcOffsetInBytes ];
}
__kernel void CopyBufferToBufferMiddle(
const __global uint* pSrc,
__global uint* pDst,
ulong srcOffsetInBytes,
ulong dstOffsetInBytes)
{
size_t gid = get_global_id(0);
pDst += dstOffsetInBytes >> 2;
pSrc += srcOffsetInBytes >> 2;
uint4 loaded = vload4(gid, pSrc);
vstore4(loaded, gid, pDst);
}
__kernel void CopyBufferToBufferRightLeftover(
const __global uchar* pSrc,
__global uchar* pDst,
ulong srcOffsetInBytes,
ulong dstOffsetInBytes)
{
size_t gid = get_global_id(0);
pDst[ gid + dstOffsetInBytes ] = pSrc[ gid + srcOffsetInBytes ];
}
)==="

View File

@@ -0,0 +1,161 @@
/*
* Copyright (C) 2017-2020 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
R"===(
#pragma OPENCL EXTENSION cl_khr_3d_image_writes : enable
__kernel void CopyBufferToImage3dBytes(__global uchar *src,
__write_only image3d_t output,
int srcOffset,
int4 dstOffset,
uint2 Pitch) {
const uint x = get_global_id(0);
const uint y = get_global_id(1);
const uint z = get_global_id(2);
int4 dstCoord = (int4)(x, y, z, 0) + dstOffset;
uint LOffset = srcOffset + (y * Pitch.x) + (z * Pitch.y);
write_imageui(output, dstCoord, (uint4)(*(src + LOffset + x), 0, 0, 1));
}
__kernel void CopyBufferToImage3d2Bytes(__global uchar *src,
__write_only image3d_t output,
int srcOffset,
int4 dstOffset,
uint2 Pitch) {
const uint x = get_global_id(0);
const uint y = get_global_id(1);
const uint z = get_global_id(2);
int4 dstCoord = (int4)(x, y, z, 0) + dstOffset;
uint LOffset = srcOffset + (y * Pitch.x) + (z * Pitch.y);
uint4 c = (uint4)(0, 0, 0, 1);
if(( ulong )(src + srcOffset) & 0x00000001){
ushort upper = *((__global uchar*)(src + LOffset + x * 2 + 1));
ushort lower = *((__global uchar*)(src + LOffset + x * 2));
ushort combined = (upper << 8) | lower;
c.x = (uint)combined;
}
else{
c.x = (uint)(*(__global ushort*)(src + LOffset + x * 2));
}
write_imageui(output, dstCoord, c);
}
__kernel void CopyBufferToImage3d4Bytes(__global uchar *src,
__write_only image3d_t output,
int srcOffset,
int4 dstOffset,
uint2 Pitch) {
const uint x = get_global_id(0);
const uint y = get_global_id(1);
const uint z = get_global_id(2);
int4 dstCoord = (int4)(x, y, z, 0) + dstOffset;
uint LOffset = srcOffset + (y * Pitch.x) + (z * Pitch.y);
uint4 c = (uint4)(0, 0, 0, 1);
if(( ulong )(src + srcOffset) & 0x00000003){
uint upper2 = *((__global uchar*)(src + LOffset + x * 4 + 3));
uint upper = *((__global uchar*)(src + LOffset + x * 4 + 2));
uint lower2 = *((__global uchar*)(src + LOffset + x * 4 + 1));
uint lower = *((__global uchar*)(src + LOffset + x * 4));
uint combined = (upper2 << 24) | (upper << 16) | (lower2 << 8) | lower;
c.x = combined;
}
else{
c.x = (*(__global uint*)(src + LOffset + x * 4));
}
write_imageui(output, dstCoord, c);
}
__kernel void CopyBufferToImage3d8Bytes(__global uchar *src,
__write_only image3d_t output,
int srcOffset,
int4 dstOffset,
uint2 Pitch) {
const uint x = get_global_id(0);
const uint y = get_global_id(1);
const uint z = get_global_id(2);
int4 dstCoord = (int4)(x, y, z, 0) + dstOffset;
uint LOffset = srcOffset + (y * Pitch.x) + (z * Pitch.y);
uint2 c = (uint2)(0, 0);//*((__global uint2*)(src + LOffset + x * 8));
if(( ulong )(src + srcOffset) & 0x00000007){
uint upper2 = *((__global uchar*)(src + LOffset + x * 8 + 3));
uint upper = *((__global uchar*)(src + LOffset + x * 8 + 2));
uint lower2 = *((__global uchar*)(src + LOffset + x * 8 + 1));
uint lower = *((__global uchar*)(src + LOffset + x * 8));
uint combined = (upper2 << 24) | (upper << 16) | (lower2 << 8) | lower;
c.x = combined;
upper2 = *((__global uchar*)(src + LOffset + x * 8 + 7));
upper = *((__global uchar*)(src + LOffset + x * 8 + 6));
lower2 = *((__global uchar*)(src + LOffset + x * 8 + 5));
lower = *((__global uchar*)(src + LOffset + x * 8 + 4));
combined = ((uint)upper2 << 24) | ((uint)upper << 16) | ((uint)lower2 << 8) | lower;
c.y = combined;
}
else{
c = *((__global uint2*)(src + LOffset + x * 8));
}
write_imageui(output, dstCoord, (uint4)(c.x, c.y, 0, 1));
}
__kernel void CopyBufferToImage3d16Bytes(__global uchar *src,
__write_only image3d_t output,
int srcOffset,
int4 dstOffset,
uint2 Pitch) {
const uint x = get_global_id(0);
const uint y = get_global_id(1);
const uint z = get_global_id(2);
int4 dstCoord = (int4)(x, y, z, 0) + dstOffset;
uint LOffset = srcOffset + (y * Pitch.x) + (z * Pitch.y);
uint4 c = (uint4)(0, 0, 0, 0);
if(( ulong )(src + srcOffset) & 0x0000000f){
uint upper2 = *((__global uchar*)(src + LOffset + x * 16 + 3));
uint upper = *((__global uchar*)(src + LOffset + x * 16 + 2));
uint lower2 = *((__global uchar*)(src + LOffset + x * 16 + 1));
uint lower = *((__global uchar*)(src + LOffset + x * 16));
uint combined = (upper2 << 24) | (upper << 16) | (lower2 << 8) | lower;
c.x = combined;
upper2 = *((__global uchar*)(src + LOffset + x * 16 + 7));
upper = *((__global uchar*)(src + LOffset + x * 16 + 6));
lower2 = *((__global uchar*)(src + LOffset + x * 16 + 5));
lower = *((__global uchar*)(src + LOffset + x * 16 + 4));
combined = (upper2 << 24) | (upper << 16) | (lower2 << 8) | lower;
c.y = combined;
upper2 = *((__global uchar*)(src + LOffset + x * 16 + 11));
upper = *((__global uchar*)(src + LOffset + x * 16 + 10));
lower2 = *((__global uchar*)(src + LOffset + x * 16 + 9));
lower = *((__global uchar*)(src + LOffset + x * 16 + 8));
combined = (upper2 << 24) | (upper << 16) | (lower2 << 8) | lower;
c.z = combined;
upper2 = *((__global uchar*)(src + LOffset + x * 16 + 15));
upper = *((__global uchar*)(src + LOffset + x * 16 + 14));
lower2 = *((__global uchar*)(src + LOffset + x * 16 + 13));
lower = *((__global uchar*)(src + LOffset + x * 16 + 12));
combined = (upper2 << 24) | (upper << 16) | (lower2 << 8) | lower;
c.w = combined;
}
else{
c = *((__global uint4 *)(src + LOffset + x * 16));
}
write_imageui(output, dstCoord, c);
}
)==="

View File

@@ -0,0 +1,161 @@
/*
* Copyright (C) 2017-2020 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
R"===(
#pragma OPENCL EXTENSION cl_khr_3d_image_writes : enable
__kernel void CopyBufferToImage3dBytes(__global uchar *src,
__write_only image3d_t output,
ulong srcOffset,
int4 dstOffset,
ulong2 Pitch) {
const uint x = get_global_id(0);
const uint y = get_global_id(1);
const uint z = get_global_id(2);
int4 dstCoord = (int4)(x, y, z, 0) + dstOffset;
ulong LOffset = srcOffset + (y * Pitch.x) + (z * Pitch.y);
write_imageui(output, dstCoord, (uint4)(*(src + LOffset + x), 0, 0, 1));
}
__kernel void CopyBufferToImage3d2Bytes(__global uchar *src,
__write_only image3d_t output,
ulong srcOffset,
int4 dstOffset,
ulong2 Pitch) {
const uint x = get_global_id(0);
const uint y = get_global_id(1);
const uint z = get_global_id(2);
int4 dstCoord = (int4)(x, y, z, 0) + dstOffset;
ulong LOffset = srcOffset + (y * Pitch.x) + (z * Pitch.y);
uint4 c = (uint4)(0, 0, 0, 1);
if(( ulong )(src + srcOffset) & 0x00000001){
ushort upper = *((__global uchar*)(src + LOffset + x * 2 + 1));
ushort lower = *((__global uchar*)(src + LOffset + x * 2));
ushort combined = (upper << 8) | lower;
c.x = (uint)combined;
}
else{
c.x = (uint)(*(__global ushort*)(src + LOffset + x * 2));
}
write_imageui(output, dstCoord, c);
}
__kernel void CopyBufferToImage3d4Bytes(__global uchar *src,
__write_only image3d_t output,
ulong srcOffset,
int4 dstOffset,
ulong2 Pitch) {
const uint x = get_global_id(0);
const uint y = get_global_id(1);
const uint z = get_global_id(2);
int4 dstCoord = (int4)(x, y, z, 0) + dstOffset;
ulong LOffset = srcOffset + (y * Pitch.x) + (z * Pitch.y);
uint4 c = (uint4)(0, 0, 0, 1);
if(( ulong )(src + srcOffset) & 0x00000003){
uint upper2 = *((__global uchar*)(src + LOffset + x * 4 + 3));
uint upper = *((__global uchar*)(src + LOffset + x * 4 + 2));
uint lower2 = *((__global uchar*)(src + LOffset + x * 4 + 1));
uint lower = *((__global uchar*)(src + LOffset + x * 4));
uint combined = (upper2 << 24) | (upper << 16) | (lower2 << 8) | lower;
c.x = combined;
}
else{
c.x = (*(__global uint*)(src + LOffset + x * 4));
}
write_imageui(output, dstCoord, c);
}
__kernel void CopyBufferToImage3d8Bytes(__global uchar *src,
__write_only image3d_t output,
ulong srcOffset,
int4 dstOffset,
ulong2 Pitch) {
const uint x = get_global_id(0);
const uint y = get_global_id(1);
const uint z = get_global_id(2);
int4 dstCoord = (int4)(x, y, z, 0) + dstOffset;
ulong LOffset = srcOffset + (y * Pitch.x) + (z * Pitch.y);
uint2 c = (uint2)(0, 0);//*((__global uint2*)(src + LOffset + x * 8));
if(( ulong )(src + srcOffset) & 0x00000007){
uint upper2 = *((__global uchar*)(src + LOffset + x * 8 + 3));
uint upper = *((__global uchar*)(src + LOffset + x * 8 + 2));
uint lower2 = *((__global uchar*)(src + LOffset + x * 8 + 1));
uint lower = *((__global uchar*)(src + LOffset + x * 8));
uint combined = (upper2 << 24) | (upper << 16) | (lower2 << 8) | lower;
c.x = combined;
upper2 = *((__global uchar*)(src + LOffset + x * 8 + 7));
upper = *((__global uchar*)(src + LOffset + x * 8 + 6));
lower2 = *((__global uchar*)(src + LOffset + x * 8 + 5));
lower = *((__global uchar*)(src + LOffset + x * 8 + 4));
combined = ((uint)upper2 << 24) | ((uint)upper << 16) | ((uint)lower2 << 8) | lower;
c.y = combined;
}
else{
c = *((__global uint2*)(src + LOffset + x * 8));
}
write_imageui(output, dstCoord, (uint4)(c.x, c.y, 0, 1));
}
__kernel void CopyBufferToImage3d16Bytes(__global uchar *src,
__write_only image3d_t output,
ulong srcOffset,
int4 dstOffset,
ulong2 Pitch) {
const uint x = get_global_id(0);
const uint y = get_global_id(1);
const uint z = get_global_id(2);
int4 dstCoord = (int4)(x, y, z, 0) + dstOffset;
ulong LOffset = srcOffset + (y * Pitch.x) + (z * Pitch.y);
uint4 c = (uint4)(0, 0, 0, 0);
if(( ulong )(src + srcOffset) & 0x0000000f){
uint upper2 = *((__global uchar*)(src + LOffset + x * 16 + 3));
uint upper = *((__global uchar*)(src + LOffset + x * 16 + 2));
uint lower2 = *((__global uchar*)(src + LOffset + x * 16 + 1));
uint lower = *((__global uchar*)(src + LOffset + x * 16));
uint combined = (upper2 << 24) | (upper << 16) | (lower2 << 8) | lower;
c.x = combined;
upper2 = *((__global uchar*)(src + LOffset + x * 16 + 7));
upper = *((__global uchar*)(src + LOffset + x * 16 + 6));
lower2 = *((__global uchar*)(src + LOffset + x * 16 + 5));
lower = *((__global uchar*)(src + LOffset + x * 16 + 4));
combined = (upper2 << 24) | (upper << 16) | (lower2 << 8) | lower;
c.y = combined;
upper2 = *((__global uchar*)(src + LOffset + x * 16 + 11));
upper = *((__global uchar*)(src + LOffset + x * 16 + 10));
lower2 = *((__global uchar*)(src + LOffset + x * 16 + 9));
lower = *((__global uchar*)(src + LOffset + x * 16 + 8));
combined = (upper2 << 24) | (upper << 16) | (lower2 << 8) | lower;
c.z = combined;
upper2 = *((__global uchar*)(src + LOffset + x * 16 + 15));
upper = *((__global uchar*)(src + LOffset + x * 16 + 14));
lower2 = *((__global uchar*)(src + LOffset + x * 16 + 13));
lower = *((__global uchar*)(src + LOffset + x * 16 + 12));
combined = (upper2 << 24) | (upper << 16) | (lower2 << 8) | lower;
c.w = combined;
}
else{
c = *((__global uint4 *)(src + LOffset + x * 16));
}
write_imageui(output, dstCoord, c);
}
)==="

View File

@@ -0,0 +1,139 @@
/*
* Copyright (C) 2017-2020 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
R"===(
__kernel void CopyImage3dToBufferBytes(__read_only image3d_t input,
__global uchar *dst,
int4 srcOffset,
int dstOffset,
uint2 Pitch) {
const uint x = get_global_id(0);
const uint y = get_global_id(1);
const uint z = get_global_id(2);
const int4 srcCoord = (int4)(x, y, z, 0) + srcOffset;
uint DstOffset = dstOffset + (y * Pitch.x) + (z * Pitch.y);
uint4 c = read_imageui(input, srcCoord);
*(dst + DstOffset + x) = convert_uchar_sat(c.x);
}
__kernel void CopyImage3dToBuffer2Bytes(__read_only image3d_t input,
__global uchar *dst,
int4 srcOffset,
int dstOffset,
uint2 Pitch) {
const uint x = get_global_id(0);
const uint y = get_global_id(1);
const uint z = get_global_id(2);
const int4 srcCoord = (int4)(x, y, z, 0) + srcOffset;
uint DstOffset = dstOffset + (y * Pitch.x) + (z * Pitch.y);
uint4 c = read_imageui(input, srcCoord);
if(( ulong )(dst + dstOffset) & 0x00000001){
*((__global uchar*)(dst + DstOffset + x * 2 + 1)) = convert_uchar_sat((c.x >> 8 ) & 0xff);
*((__global uchar*)(dst + DstOffset + x * 2)) = convert_uchar_sat(c.x & 0xff);
}
else{
*((__global ushort*)(dst + DstOffset + x * 2)) = convert_ushort_sat(c.x);
}
}
__kernel void CopyImage3dToBuffer4Bytes(__read_only image3d_t input,
__global uchar *dst,
int4 srcOffset,
int dstOffset,
uint2 Pitch) {
const uint x = get_global_id(0);
const uint y = get_global_id(1);
const uint z = get_global_id(2);
const int4 srcCoord = (int4)(x, y, z, 0) + srcOffset;
uint DstOffset = dstOffset + (y * Pitch.x) + (z * Pitch.y);
uint4 c = read_imageui(input, srcCoord);
if(( ulong )(dst + dstOffset) & 0x00000003){
*((__global uchar*)(dst + DstOffset + x * 4 + 3)) = convert_uchar_sat((c.x >> 24 ) & 0xff);
*((__global uchar*)(dst + DstOffset + x * 4 + 2)) = convert_uchar_sat((c.x >> 16 ) & 0xff);
*((__global uchar*)(dst + DstOffset + x * 4 + 1)) = convert_uchar_sat((c.x >> 8 ) & 0xff);
*((__global uchar*)(dst + DstOffset + x * 4)) = convert_uchar_sat(c.x & 0xff);
}
else{
*((__global uint*)(dst + DstOffset + x * 4)) = c.x;
}
}
__kernel void CopyImage3dToBuffer8Bytes(__read_only image3d_t input,
__global uchar *dst,
int4 srcOffset,
int dstOffset,
uint2 Pitch) {
const uint x = get_global_id(0);
const uint y = get_global_id(1);
const uint z = get_global_id(2);
const int4 srcCoord = (int4)(x, y, z, 0) + srcOffset;
uint DstOffset = dstOffset + (y * Pitch.x) + (z * Pitch.y);
uint4 c = read_imageui(input, srcCoord);
if(( ulong )(dst + dstOffset) & 0x00000007){
*((__global uchar*)(dst + DstOffset + x * 8 + 3)) = convert_uchar_sat((c.x >> 24 ) & 0xff);
*((__global uchar*)(dst + DstOffset + x * 8 + 2)) = convert_uchar_sat((c.x >> 16 ) & 0xff);
*((__global uchar*)(dst + DstOffset + x * 8 + 1)) = convert_uchar_sat((c.x >> 8 ) & 0xff);
*((__global uchar*)(dst + DstOffset + x * 8)) = convert_uchar_sat(c.x & 0xff);
*((__global uchar*)(dst + DstOffset + x * 8 + 7)) = convert_uchar_sat((c.y >> 24 ) & 0xff);
*((__global uchar*)(dst + DstOffset + x * 8 + 6)) = convert_uchar_sat((c.y >> 16 ) & 0xff);
*((__global uchar*)(dst + DstOffset + x * 8 + 5)) = convert_uchar_sat((c.y >> 8 ) & 0xff);
*((__global uchar*)(dst + DstOffset + x * 8 + 4)) = convert_uchar_sat(c.y & 0xff);
}
else{
uint2 d = (uint2)(c.x,c.y);
*((__global uint2*)(dst + DstOffset + x * 8)) = d;
}
}
__kernel void CopyImage3dToBuffer16Bytes(__read_only image3d_t input,
__global uchar *dst,
int4 srcOffset,
int dstOffset,
uint2 Pitch) {
const uint x = get_global_id(0);
const uint y = get_global_id(1);
const uint z = get_global_id(2);
const int4 srcCoord = (int4)(x, y, z, 0) + srcOffset;
uint DstOffset = dstOffset + (y * Pitch.x) + (z * Pitch.y);
const uint4 c = read_imageui(input, srcCoord);
if(( ulong )(dst + dstOffset) & 0x0000000f){
*((__global uchar*)(dst + DstOffset + x * 16 + 3)) = convert_uchar_sat((c.x >> 24 ) & 0xff);
*((__global uchar*)(dst + DstOffset + x * 16 + 2)) = convert_uchar_sat((c.x >> 16 ) & 0xff);
*((__global uchar*)(dst + DstOffset + x * 16 + 1)) = convert_uchar_sat((c.x >> 8 ) & 0xff);
*((__global uchar*)(dst + DstOffset + x * 16)) = convert_uchar_sat(c.x & 0xff);
*((__global uchar*)(dst + DstOffset + x * 16 + 7)) = convert_uchar_sat((c.y >> 24 ) & 0xff);
*((__global uchar*)(dst + DstOffset + x * 16 + 6)) = convert_uchar_sat((c.y >> 16 ) & 0xff);
*((__global uchar*)(dst + DstOffset + x * 16 + 5)) = convert_uchar_sat((c.y >> 8 ) & 0xff);
*((__global uchar*)(dst + DstOffset + x * 16 + 4)) = convert_uchar_sat(c.y & 0xff);
*((__global uchar*)(dst + DstOffset + x * 16 + 11)) = convert_uchar_sat((c.z >> 24 ) & 0xff);
*((__global uchar*)(dst + DstOffset + x * 16 + 10)) = convert_uchar_sat((c.z >> 16 ) & 0xff);
*((__global uchar*)(dst + DstOffset + x * 16 + 9)) = convert_uchar_sat((c.z >> 8 ) & 0xff);
*((__global uchar*)(dst + DstOffset + x * 16 + 8)) = convert_uchar_sat(c.z & 0xff);
*((__global uchar*)(dst + DstOffset + x * 16 + 15)) = convert_uchar_sat((c.w >> 24 ) & 0xff);
*((__global uchar*)(dst + DstOffset + x * 16 + 14)) = convert_uchar_sat((c.w >> 16 ) & 0xff);
*((__global uchar*)(dst + DstOffset + x * 16 + 13)) = convert_uchar_sat((c.w >> 8 ) & 0xff);
*((__global uchar*)(dst + DstOffset + x * 16 + 12)) = convert_uchar_sat(c.w & 0xff);
}
else{
*(__global uint4*)(dst + DstOffset + x * 16) = c;
}
}
)==="

View File

@@ -0,0 +1,139 @@
/*
* Copyright (C) 2017-2020 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
R"===(
__kernel void CopyImage3dToBufferBytes(__read_only image3d_t input,
__global uchar *dst,
int4 srcOffset,
ulong dstOffset,
ulong2 Pitch) {
const uint x = get_global_id(0);
const uint y = get_global_id(1);
const uint z = get_global_id(2);
const int4 srcCoord = (int4)(x, y, z, 0) + srcOffset;
ulong DstOffset = dstOffset + (y * Pitch.x) + (z * Pitch.y);
uint4 c = read_imageui(input, srcCoord);
*(dst + DstOffset + x) = convert_uchar_sat(c.x);
}
__kernel void CopyImage3dToBuffer2Bytes(__read_only image3d_t input,
__global uchar *dst,
int4 srcOffset,
ulong dstOffset,
ulong2 Pitch) {
const uint x = get_global_id(0);
const uint y = get_global_id(1);
const uint z = get_global_id(2);
const int4 srcCoord = (int4)(x, y, z, 0) + srcOffset;
ulong DstOffset = dstOffset + (y * Pitch.x) + (z * Pitch.y);
uint4 c = read_imageui(input, srcCoord);
if(( ulong )(dst + dstOffset) & 0x00000001){
*((__global uchar*)(dst + DstOffset + x * 2 + 1)) = convert_uchar_sat((c.x >> 8 ) & 0xff);
*((__global uchar*)(dst + DstOffset + x * 2)) = convert_uchar_sat(c.x & 0xff);
}
else{
*((__global ushort*)(dst + DstOffset + x * 2)) = convert_ushort_sat(c.x);
}
}
__kernel void CopyImage3dToBuffer4Bytes(__read_only image3d_t input,
__global uchar *dst,
int4 srcOffset,
ulong dstOffset,
ulong2 Pitch) {
const uint x = get_global_id(0);
const uint y = get_global_id(1);
const uint z = get_global_id(2);
const int4 srcCoord = (int4)(x, y, z, 0) + srcOffset;
ulong DstOffset = dstOffset + (y * Pitch.x) + (z * Pitch.y);
uint4 c = read_imageui(input, srcCoord);
if(( ulong )(dst + dstOffset) & 0x00000003){
*((__global uchar*)(dst + DstOffset + x * 4 + 3)) = convert_uchar_sat((c.x >> 24 ) & 0xff);
*((__global uchar*)(dst + DstOffset + x * 4 + 2)) = convert_uchar_sat((c.x >> 16 ) & 0xff);
*((__global uchar*)(dst + DstOffset + x * 4 + 1)) = convert_uchar_sat((c.x >> 8 ) & 0xff);
*((__global uchar*)(dst + DstOffset + x * 4)) = convert_uchar_sat(c.x & 0xff);
}
else{
*((__global uint*)(dst + DstOffset + x * 4)) = c.x;
}
}
__kernel void CopyImage3dToBuffer8Bytes(__read_only image3d_t input,
__global uchar *dst,
int4 srcOffset,
ulong dstOffset,
ulong2 Pitch) {
const uint x = get_global_id(0);
const uint y = get_global_id(1);
const uint z = get_global_id(2);
const int4 srcCoord = (int4)(x, y, z, 0) + srcOffset;
ulong DstOffset = dstOffset + (y * Pitch.x) + (z * Pitch.y);
uint4 c = read_imageui(input, srcCoord);
if(( ulong )(dst + dstOffset) & 0x00000007){
*((__global uchar*)(dst + DstOffset + x * 8 + 3)) = convert_uchar_sat((c.x >> 24 ) & 0xff);
*((__global uchar*)(dst + DstOffset + x * 8 + 2)) = convert_uchar_sat((c.x >> 16 ) & 0xff);
*((__global uchar*)(dst + DstOffset + x * 8 + 1)) = convert_uchar_sat((c.x >> 8 ) & 0xff);
*((__global uchar*)(dst + DstOffset + x * 8)) = convert_uchar_sat(c.x & 0xff);
*((__global uchar*)(dst + DstOffset + x * 8 + 7)) = convert_uchar_sat((c.y >> 24 ) & 0xff);
*((__global uchar*)(dst + DstOffset + x * 8 + 6)) = convert_uchar_sat((c.y >> 16 ) & 0xff);
*((__global uchar*)(dst + DstOffset + x * 8 + 5)) = convert_uchar_sat((c.y >> 8 ) & 0xff);
*((__global uchar*)(dst + DstOffset + x * 8 + 4)) = convert_uchar_sat(c.y & 0xff);
}
else{
uint2 d = (uint2)(c.x,c.y);
*((__global uint2*)(dst + DstOffset + x * 8)) = d;
}
}
__kernel void CopyImage3dToBuffer16Bytes(__read_only image3d_t input,
__global uchar *dst,
int4 srcOffset,
ulong dstOffset,
ulong2 Pitch) {
const uint x = get_global_id(0);
const uint y = get_global_id(1);
const uint z = get_global_id(2);
const int4 srcCoord = (int4)(x, y, z, 0) + srcOffset;
ulong DstOffset = dstOffset + (y * Pitch.x) + (z * Pitch.y);
const uint4 c = read_imageui(input, srcCoord);
if(( ulong )(dst + dstOffset) & 0x0000000f){
*((__global uchar*)(dst + DstOffset + x * 16 + 3)) = convert_uchar_sat((c.x >> 24 ) & 0xff);
*((__global uchar*)(dst + DstOffset + x * 16 + 2)) = convert_uchar_sat((c.x >> 16 ) & 0xff);
*((__global uchar*)(dst + DstOffset + x * 16 + 1)) = convert_uchar_sat((c.x >> 8 ) & 0xff);
*((__global uchar*)(dst + DstOffset + x * 16)) = convert_uchar_sat(c.x & 0xff);
*((__global uchar*)(dst + DstOffset + x * 16 + 7)) = convert_uchar_sat((c.y >> 24 ) & 0xff);
*((__global uchar*)(dst + DstOffset + x * 16 + 6)) = convert_uchar_sat((c.y >> 16 ) & 0xff);
*((__global uchar*)(dst + DstOffset + x * 16 + 5)) = convert_uchar_sat((c.y >> 8 ) & 0xff);
*((__global uchar*)(dst + DstOffset + x * 16 + 4)) = convert_uchar_sat(c.y & 0xff);
*((__global uchar*)(dst + DstOffset + x * 16 + 11)) = convert_uchar_sat((c.z >> 24 ) & 0xff);
*((__global uchar*)(dst + DstOffset + x * 16 + 10)) = convert_uchar_sat((c.z >> 16 ) & 0xff);
*((__global uchar*)(dst + DstOffset + x * 16 + 9)) = convert_uchar_sat((c.z >> 8 ) & 0xff);
*((__global uchar*)(dst + DstOffset + x * 16 + 8)) = convert_uchar_sat(c.z & 0xff);
*((__global uchar*)(dst + DstOffset + x * 16 + 15)) = convert_uchar_sat((c.w >> 24 ) & 0xff);
*((__global uchar*)(dst + DstOffset + x * 16 + 14)) = convert_uchar_sat((c.w >> 16 ) & 0xff);
*((__global uchar*)(dst + DstOffset + x * 16 + 13)) = convert_uchar_sat((c.w >> 8 ) & 0xff);
*((__global uchar*)(dst + DstOffset + x * 16 + 12)) = convert_uchar_sat(c.w & 0xff);
}
else{
*(__global uint4*)(dst + DstOffset + x * 16) = c;
}
}
)==="

View File

@@ -0,0 +1,21 @@
/*
* Copyright (C) 2017-2020 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
R"===(
__kernel void CopyImageToImage1d(
__read_only image1d_t input,
__write_only image1d_t output,
int4 srcOffset,
int4 dstOffset) {
const int x = get_global_id(0);
const int srcCoord = x + srcOffset.x;
const int dstCoord = x + dstOffset.x;
const uint4 c = read_imageui(input, srcCoord);
write_imageui(output, dstCoord, c);
}
)==="

View File

@@ -0,0 +1,22 @@
/*
* Copyright (C) 2017-2020 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
R"===(
__kernel void CopyImageToImage2d(
__read_only image2d_t input,
__write_only image2d_t output,
int4 srcOffset,
int4 dstOffset) {
const int x = get_global_id(0);
const int y = get_global_id(1);
const int2 srcCoord = (int2)(x, y) + (int2)(srcOffset.x, srcOffset.y);
const int2 dstCoord = (int2)(x, y) + (int2)(dstOffset.x, dstOffset.y);
const uint4 c = read_imageui(input, srcCoord);
write_imageui(output, dstCoord, c);
}
)==="

View File

@@ -0,0 +1,25 @@
/*
* Copyright (C) 2017-2020 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
R"===(
#pragma OPENCL EXTENSION cl_khr_3d_image_writes : enable
__kernel void CopyImageToImage3d(
__read_only image3d_t input,
__write_only image3d_t output,
int4 srcOffset,
int4 dstOffset) {
const int x = get_global_id(0);
const int y = get_global_id(1);
const int z = get_global_id(2);
const int4 srcCoord = (int4)(x, y, z, 0) + srcOffset;
const int4 dstCoord = (int4)(x, y, z, 0) + dstOffset;
const uint4 c = read_imageui(input, srcCoord);
write_imageui(output, dstCoord, c);
}
)==="

View File

@@ -0,0 +1,73 @@
/*
* Copyright (C) 2017-2020 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
R"===(
// assumption is local work size = pattern size
__kernel void FillBufferBytes(
__global uchar* pDst,
uint dstOffsetInBytes,
const __global uchar* pPattern )
{
uint dstIndex = get_global_id(0) + dstOffsetInBytes;
uint srcIndex = get_local_id(0);
pDst[dstIndex] = pPattern[srcIndex];
}
__kernel void FillBufferLeftLeftover(
__global uchar* pDst,
uint dstOffsetInBytes,
const __global uchar* pPattern,
const uint patternSizeInEls )
{
uint gid = get_global_id(0);
pDst[ gid + dstOffsetInBytes ] = pPattern[ gid & (patternSizeInEls - 1) ];
}
__kernel void FillBufferMiddle(
__global uchar* pDst,
uint dstOffsetInBytes,
const __global uint* pPattern,
const uint patternSizeInEls )
{
uint gid = get_global_id(0);
((__global uint*)(pDst + dstOffsetInBytes))[gid] = pPattern[ gid & (patternSizeInEls - 1) ];
}
__kernel void FillBufferRightLeftover(
__global uchar* pDst,
uint dstOffsetInBytes,
const __global uchar* pPattern,
const uint patternSizeInEls )
{
uint gid = get_global_id(0);
pDst[ gid + dstOffsetInBytes ] = pPattern[ gid & (patternSizeInEls - 1) ];
}
__kernel void FillBufferImmediate(
__global uchar* ptr,
uint dstSshOffset, // Offset needed in case ptr has been adjusted for SSH alignment
const uint value)
{
uint dstIndex = get_global_id(0);
__global uchar* pDst = (__global uchar*)ptr + dstSshOffset;
pDst[dstIndex] = value;
}
__kernel void FillBufferSSHOffset(
__global uchar* ptr,
uint dstSshOffset, // Offset needed in case ptr has been adjusted for SSH alignment
const __global uchar* pPattern,
uint patternSshOffset // Offset needed in case pPattern has been adjusted for SSH alignment
)
{
uint dstIndex = get_global_id(0);
uint srcIndex = get_local_id(0);
__global uchar* pDst = (__global uchar*)ptr + dstSshOffset;
__global uchar* pSrc = (__global uchar*)pPattern + patternSshOffset;
pDst[dstIndex] = pSrc[srcIndex];
}
)==="

View File

@@ -0,0 +1,49 @@
/*
* Copyright (C) 2019-2020 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
R"===(
// assumption is local work size = pattern size
__kernel void FillBufferBytes(
__global uchar* pDst,
ulong dstOffsetInBytes,
const __global uchar* pPattern )
{
size_t dstIndex = get_global_id(0) + dstOffsetInBytes;
size_t srcIndex = get_local_id(0);
pDst[dstIndex] = pPattern[srcIndex];
}
__kernel void FillBufferLeftLeftover(
__global uchar* pDst,
ulong dstOffsetInBytes,
const __global uchar* pPattern,
const ulong patternSizeInEls )
{
size_t gid = get_global_id(0);
pDst[ gid + dstOffsetInBytes ] = pPattern[ gid & (patternSizeInEls - 1) ];
}
__kernel void FillBufferMiddle(
__global uchar* pDst,
ulong dstOffsetInBytes,
const __global uint* pPattern,
const ulong patternSizeInEls )
{
size_t gid = get_global_id(0);
((__global uint*)(pDst + dstOffsetInBytes))[gid] = pPattern[ gid & (patternSizeInEls - 1) ];
}
__kernel void FillBufferRightLeftover(
__global uchar* pDst,
ulong dstOffsetInBytes,
const __global uchar* pPattern,
const ulong patternSizeInEls )
{
size_t gid = get_global_id(0);
pDst[ gid + dstOffsetInBytes ] = pPattern[ gid & (patternSizeInEls - 1) ];
}
)==="

View File

@@ -0,0 +1,18 @@
/*
* Copyright (C) 2017-2020 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
R"===(
__kernel void FillImage1d(
__write_only image1d_t output,
uint4 color,
int4 dstOffset) {
const int x = get_global_id(0);
const int dstCoord = x + dstOffset.x;
write_imageui(output, dstCoord, color);
}
)==="

View File

@@ -0,0 +1,19 @@
/*
* Copyright (C) 2017-2020 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
R"===(
__kernel void FillImage2d(
__write_only image2d_t output,
uint4 color,
int4 dstOffset) {
const int x = get_global_id(0);
const int y = get_global_id(1);
const int2 dstCoord = (int2)(x, y) + (int2)(dstOffset.x, dstOffset.y);
write_imageui(output, dstCoord, color);
}
)==="

View File

@@ -0,0 +1,22 @@
/*
* Copyright (C) 2017-2020 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
R"===(
#pragma OPENCL EXTENSION cl_khr_3d_image_writes : enable
__kernel void FillImage3d(
__write_only image3d_t output,
uint4 color,
int4 dstOffset) {
const int x = get_global_id(0);
const int y = get_global_id(1);
const int z = get_global_id(2);
const int4 dstCoord = (int4)(x, y, z, 0) + dstOffset;
write_imageui(output, dstCoord, color);
}
)==="

View File

@@ -0,0 +1,25 @@
#
# Copyright (C) 2017-2020 Intel Corporation
#
# SPDX-License-Identifier: MIT
#
add_library(${BUILTINS_SOURCES_LIB_NAME} OBJECT EXCLUDE_FROM_ALL
CMakeLists.txt
built_ins_registry.h
register_copy_kernels_source.cpp
)
set_target_properties(${BUILTINS_SOURCES_LIB_NAME} PROPERTIES POSITION_INDEPENDENT_CODE ON)
set_target_properties(${BUILTINS_SOURCES_LIB_NAME} PROPERTIES FOLDER "built_ins")
target_include_directories(${BUILTINS_SOURCES_LIB_NAME} PRIVATE
${ENGINE_NODE_DIR}
${KHRONOS_HEADERS_DIR}
${KHRONOS_GL_HEADERS_DIR}
${NEO__GMM_INCLUDE_DIR}
${NEO__IGC_INCLUDE_DIR}
${THIRD_PARTY_DIR}
)
add_subdirectories()

View File

@@ -0,0 +1,28 @@
/*
* Copyright (C) 2017-2020 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#pragma once
#include "shared/source/built_ins/built_ins.h"
#include <string>
#include <unordered_map>
namespace NEO {
struct RegisterEmbeddedResource {
RegisterEmbeddedResource(const char *name, const char *resource, size_t resourceLength) {
auto &storageRegistry = EmbeddedStorageRegistry::getInstance();
storageRegistry.store(name, createBuiltinResource(resource, resourceLength));
}
RegisterEmbeddedResource(const char *name, std::string &&resource)
: RegisterEmbeddedResource(name, resource.data(), resource.size() + 1) {
}
};
} // namespace NEO

View File

@@ -0,0 +1,167 @@
/*
* Copyright (C) 2017-2020 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#include "shared/source/built_ins/registry/built_ins_registry.h"
#include <string>
namespace NEO {
static RegisterEmbeddedResource registerCopyBufferToBufferSrc(
createBuiltinResourceName(
EBuiltInOps::CopyBufferToBuffer,
BuiltinCode::getExtension(BuiltinCode::ECodeType::Source))
.c_str(),
std::string(
#include "shared/source/built_ins/kernels/copy_buffer_to_buffer.builtin_kernel"
));
static RegisterEmbeddedResource registerCopyBufferToBufferStatelessSrc(
createBuiltinResourceName(
EBuiltInOps::CopyBufferToBufferStateless,
BuiltinCode::getExtension(BuiltinCode::ECodeType::Source))
.c_str(),
std::string(
#include "shared/source/built_ins/kernels/copy_buffer_to_buffer_stateless.builtin_kernel"
));
static RegisterEmbeddedResource registerCopyBufferRectSrc(
createBuiltinResourceName(
EBuiltInOps::CopyBufferRect,
BuiltinCode::getExtension(BuiltinCode::ECodeType::Source))
.c_str(),
std::string(
#include "shared/source/built_ins/kernels/copy_buffer_rect.builtin_kernel"
));
static RegisterEmbeddedResource registerCopyBufferRectStatelessSrc(
createBuiltinResourceName(
EBuiltInOps::CopyBufferRectStateless,
BuiltinCode::getExtension(BuiltinCode::ECodeType::Source))
.c_str(),
std::string(
#include "shared/source/built_ins/kernels/copy_buffer_rect_stateless.builtin_kernel"
));
static RegisterEmbeddedResource registerFillBufferSrc(
createBuiltinResourceName(
EBuiltInOps::FillBuffer,
BuiltinCode::getExtension(BuiltinCode::ECodeType::Source))
.c_str(),
std::string(
#include "shared/source/built_ins/kernels/fill_buffer.builtin_kernel"
));
static RegisterEmbeddedResource registerFillBufferStatelessSrc(
createBuiltinResourceName(
EBuiltInOps::FillBufferStateless,
BuiltinCode::getExtension(BuiltinCode::ECodeType::Source))
.c_str(),
std::string(
#include "shared/source/built_ins/kernels/fill_buffer_stateless.builtin_kernel"
));
static RegisterEmbeddedResource registerCopyBufferToImage3dSrc(
createBuiltinResourceName(
EBuiltInOps::CopyBufferToImage3d,
BuiltinCode::getExtension(BuiltinCode::ECodeType::Source))
.c_str(),
std::string(
#include "shared/source/built_ins/kernels/copy_buffer_to_image3d.builtin_kernel"
));
static RegisterEmbeddedResource registerCopyBufferToImage3dStatelessSrc(
createBuiltinResourceName(
EBuiltInOps::CopyBufferToImage3dStateless,
BuiltinCode::getExtension(BuiltinCode::ECodeType::Source))
.c_str(),
std::string(
#include "shared/source/built_ins/kernels/copy_buffer_to_image3d_stateless.builtin_kernel"
));
static RegisterEmbeddedResource registerCopyImage3dToBufferSrc(
createBuiltinResourceName(
EBuiltInOps::CopyImage3dToBuffer,
BuiltinCode::getExtension(BuiltinCode::ECodeType::Source))
.c_str(),
std::string(
#include "shared/source/built_ins/kernels/copy_image3d_to_buffer.builtin_kernel"
));
static RegisterEmbeddedResource registerCopyImage3dToBufferStatelessSrc(
createBuiltinResourceName(
EBuiltInOps::CopyImage3dToBufferStateless,
BuiltinCode::getExtension(BuiltinCode::ECodeType::Source))
.c_str(),
std::string(
#include "shared/source/built_ins/kernels/copy_image3d_to_buffer_stateless.builtin_kernel"
));
static RegisterEmbeddedResource registerCopyImageToImage1dSrc(
createBuiltinResourceName(
EBuiltInOps::CopyImageToImage1d,
BuiltinCode::getExtension(BuiltinCode::ECodeType::Source))
.c_str(),
std::string(
#include "shared/source/built_ins/kernels/copy_image_to_image1d.builtin_kernel"
));
static RegisterEmbeddedResource registerCopyImageToImage2dSrc(
createBuiltinResourceName(
EBuiltInOps::CopyImageToImage2d,
BuiltinCode::getExtension(BuiltinCode::ECodeType::Source))
.c_str(),
std::string(
#include "shared/source/built_ins/kernels/copy_image_to_image2d.builtin_kernel"
));
static RegisterEmbeddedResource registerCopyImageToImage3dSrc(
createBuiltinResourceName(
EBuiltInOps::CopyImageToImage3d,
BuiltinCode::getExtension(BuiltinCode::ECodeType::Source))
.c_str(),
std::string(
#include "shared/source/built_ins/kernels/copy_image_to_image3d.builtin_kernel"
));
static RegisterEmbeddedResource registerFillImage1dSrc(
createBuiltinResourceName(
EBuiltInOps::FillImage1d,
BuiltinCode::getExtension(BuiltinCode::ECodeType::Source))
.c_str(),
std::string(
#include "shared/source/built_ins/kernels/fill_image1d.builtin_kernel"
));
static RegisterEmbeddedResource registerFillImage2dSrc(
createBuiltinResourceName(
EBuiltInOps::FillImage2d,
BuiltinCode::getExtension(BuiltinCode::ECodeType::Source))
.c_str(),
std::string(
#include "shared/source/built_ins/kernels/fill_image2d.builtin_kernel"
));
static RegisterEmbeddedResource registerFillImage3dSrc(
createBuiltinResourceName(
EBuiltInOps::FillImage3d,
BuiltinCode::getExtension(BuiltinCode::ECodeType::Source))
.c_str(),
std::string(
#include "shared/source/built_ins/kernels/fill_image3d.builtin_kernel"
));
static RegisterEmbeddedResource registerAuxTranslationSrc(
createBuiltinResourceName(
EBuiltInOps::AuxTranslation,
BuiltinCode::getExtension(BuiltinCode::ECodeType::Source))
.c_str(),
std::string(
#include "shared/source/built_ins/kernels/aux_translation.builtin_kernel"
));
} // namespace NEO

View File

@@ -0,0 +1,102 @@
/*
* Copyright (C) 2017-2020 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#include "shared/source/built_ins/sip.h"
#include "shared/source/built_ins/built_ins.h"
#include "shared/source/device/device.h"
#include "shared/source/execution_environment/execution_environment.h"
#include "shared/source/helpers/debug_helpers.h"
#include "shared/source/helpers/hw_helper.h"
#include "shared/source/helpers/ptr_math.h"
#include "shared/source/helpers/string.h"
#include "shared/source/memory_manager/graphics_allocation.h"
#include "opencl/source/program/kernel_info.h"
#include "opencl/source/program/program.h"
namespace NEO {
const size_t SipKernel::maxDbgSurfaceSize = 0x49c000; // proper value should be taken from compiler when it's ready
const char *getSipKernelCompilerInternalOptions(SipKernelType kernel) {
switch (kernel) {
default:
DEBUG_BREAK_IF(true);
return "";
case SipKernelType::Csr:
return "-cl-include-sip-csr";
case SipKernelType::DbgCsr:
return "-cl-include-sip-kernel-debug -cl-include-sip-csr -cl-set-bti:0";
case SipKernelType::DbgCsrLocal:
return "-cl-include-sip-kernel-local-debug -cl-include-sip-csr -cl-set-bti:0";
}
}
const char *getSipLlSrc(const Device &device) {
#define M_DUMMY_LL_SRC \
"define void @f() { \n" \
" ret void \n" \
"} \n" \
"!opencl.compiler.options = !{!0} \n" \
"!opencl.kernels = !{!1} \n" \
"!0 = !{} \n" \
"!1 = !{void()* @f, !2, !3, !4, !5, !6, !7} \n" \
"!2 = !{!\"kernel_arg_addr_space\"} \n" \
"!3 = !{!\"kernel_arg_access_qual\"} \n" \
"!4 = !{!\"kernel_arg_type\"} \n" \
"!5 = !{!\"kernel_arg_type_qual\"} \n" \
"!6 = !{!\"kernel_arg_base_type\"} \n" \
"!7 = !{!\"kernel_arg_name\"} \n"
constexpr const char *llDummySrc32 =
"target datalayout = \"e-p:32:32:32\" \n"
"target triple = \"spir\" \n" M_DUMMY_LL_SRC;
constexpr const char *llDummySrc64 =
"target datalayout = \"e-p:64:64:64\" \n"
"target triple = \"spir64\" \n" M_DUMMY_LL_SRC;
#undef M_DUMMY_LL_SRC
const uint32_t ptrSize = device.getDeviceInfo().force32BitAddressess ? 4 : sizeof(void *);
return (ptrSize == 8) ? llDummySrc64 : llDummySrc32;
}
SipKernel::SipKernel(SipKernelType type, Program *sipProgram)
: type(type) {
program = sipProgram;
}
SipKernel::~SipKernel() {
program->release();
}
GraphicsAllocation *SipKernel::getSipAllocation() const {
return program->getKernelInfo(size_t{0})->getGraphicsAllocation();
}
const char *SipKernel::getBinary() const {
auto kernelInfo = program->getKernelInfo(size_t{0});
return reinterpret_cast<const char *>(ptrOffset(kernelInfo->heapInfo.pKernelHeap, kernelInfo->systemKernelOffset));
}
size_t SipKernel::getBinarySize() const {
auto kernelInfo = program->getKernelInfo(size_t{0});
return kernelInfo->heapInfo.pKernelHeader->KernelHeapSize - kernelInfo->systemKernelOffset;
}
SipKernelType SipKernel::getSipKernelType(GFXCORE_FAMILY family, bool debuggingActive) {
auto &hwHelper = HwHelper::get(family);
return hwHelper.getSipKernelType(debuggingActive);
}
GraphicsAllocation *SipKernel::getSipKernelAllocation(Device &device) {
auto sipType = SipKernel::getSipKernelType(device.getHardwareInfo().platform.eRenderCoreFamily, device.isDebuggerActive());
return device.getExecutionEnvironment()->getBuiltIns()->getSipKernel(sipType, device).getSipAllocation();
}
} // namespace NEO

View File

@@ -0,0 +1,51 @@
/*
* Copyright (C) 2017-2020 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#pragma once
#include "shared/source/built_ins/sip_kernel_type.h"
#include "shared/source/helpers/hw_info.h"
#include <memory>
namespace NEO {
class Device;
class Program;
class GraphicsAllocation;
const char *getSipKernelCompilerInternalOptions(SipKernelType kernel);
const char *getSipLlSrc(const Device &device);
class SipKernel {
public:
SipKernel(SipKernelType type, Program *sipProgram);
SipKernel(const SipKernel &) = delete;
SipKernel &operator=(const SipKernel &) = delete;
SipKernel(SipKernel &&) = default;
SipKernel &operator=(SipKernel &&) = default;
virtual ~SipKernel();
const char *getBinary() const;
size_t getBinarySize() const;
SipKernelType getType() const {
return type;
}
static const size_t maxDbgSurfaceSize;
MOCKABLE_VIRTUAL GraphicsAllocation *getSipAllocation() const;
static SipKernelType getSipKernelType(GFXCORE_FAMILY family, bool debuggingActive);
static GraphicsAllocation *getSipKernelAllocation(Device &device);
protected:
SipKernelType type = SipKernelType::COUNT;
Program *program = nullptr;
};
} // namespace NEO

View File

@@ -7,6 +7,7 @@
#include "shared/source/command_stream/command_stream_receiver.h"
#include "shared/source/built_ins/built_ins.h"
#include "shared/source/command_stream/experimental_command_buffer.h"
#include "shared/source/command_stream/preemption.h"
#include "shared/source/command_stream/scratch_space_controller.h"
@@ -26,8 +27,6 @@
#include "shared/source/utilities/cpuintrinsics.h"
#include "shared/source/utilities/tag_allocator.h"
#include "opencl/source/built_ins/built_ins.h"
namespace NEO {
// Global table of CommandStreamReceiver factories for HW and tests

View File

@@ -7,10 +7,10 @@
#include "shared/source/command_stream/preemption.h"
#include "shared/source/built_ins/built_ins.h"
#include "shared/source/device/device.h"
#include "shared/source/helpers/string.h"
#include "opencl/source/built_ins/built_ins.h"
#include "opencl/source/helpers/dispatch_info.h"
#include "opencl/source/kernel/kernel.h"

View File

@@ -5,12 +5,12 @@
*
*/
#include "shared/source/built_ins/sip.h"
#include "shared/source/command_stream/preemption.h"
#include "shared/source/device/device.h"
#include "shared/source/helpers/hw_helper.h"
#include "shared/source/memory_manager/graphics_allocation.h"
#include "opencl/source/built_ins/sip.h"
#include "opencl/source/command_queue/gpgpu_walker.h"
namespace NEO {

View File

@@ -6,14 +6,13 @@
*/
#pragma once
#include "shared/source/built_ins/sip.h"
#include "shared/source/compiler_interface/compiler_cache.h"
#include "shared/source/helpers/string.h"
#include "shared/source/os_interface/os_library.h"
#include "shared/source/utilities/arrayref.h"
#include "shared/source/utilities/spinlock.h"
#include "opencl/source/built_ins/sip.h"
#include "cif/common/cif_main.h"
#include "ocl_igc_interface/code_type.h"
#include "ocl_igc_interface/fcl_ocl_device_ctx.h"

View File

@@ -7,6 +7,7 @@
#include "shared/source/execution_environment/execution_environment.h"
#include "shared/source/built_ins/built_ins.h"
#include "shared/source/compiler_interface/compiler_interface.h"
#include "shared/source/compiler_interface/default_cache_config.h"
#include "shared/source/debugger/debugger.h"
@@ -14,7 +15,6 @@
#include "shared/source/gmm_helper/gmm_helper.h"
#include "shared/source/helpers/hw_helper.h"
#include "opencl/source/built_ins/built_ins.h"
#include "opencl/source/memory_manager/os_agnostic_memory_manager.h"
namespace NEO {

View File

@@ -5,12 +5,11 @@
*
*/
#include "shared/source/built_ins/built_ins.h"
#include "shared/source/command_stream/csr_definitions.h"
#include "shared/source/command_stream/preemption.h"
#include "shared/source/command_stream/preemption.inl"
#include "opencl/source/built_ins/built_ins.h"
#include <cstring>
namespace NEO {

View File

@@ -6,11 +6,11 @@
*/
#pragma once
#include "shared/source/built_ins/sip.h"
#include "shared/source/command_stream/linear_stream.h"
#include "shared/source/helpers/aux_translation.h"
#include "shared/source/helpers/hw_cmds.h"
#include "opencl/source/built_ins/sip.h"
#include "opencl/source/gen_common/aub_mapper.h"
#include "opencl/source/mem_obj/buffer.h"