Move built ins to share directory

Change-Id: I740a349a0f15229cd356fffe996932029bf0f98b
Signed-off-by: Maciej Plewka <maciej.plewka@intel.com>
This commit is contained in:
Maciej Plewka
2020-02-24 13:10:44 +01:00
committed by sys_ocldev
parent 0e85ccf084
commit 357fdc2e65
115 changed files with 608 additions and 429 deletions

View File

@@ -7,6 +7,7 @@
#include "api.h"
#include "shared/source/built_ins/built_ins.h"
#include "shared/source/command_stream/command_stream_receiver.h"
#include "shared/source/debug_settings/debug_settings_manager.h"
#include "shared/source/execution_environment/root_device_environment.h"
@@ -22,7 +23,6 @@
#include "opencl/source/accelerators/intel_motion_estimation.h"
#include "opencl/source/api/additional_extensions.h"
#include "opencl/source/aub/aub_center.h"
#include "opencl/source/built_ins/built_ins.h"
#include "opencl/source/built_ins/vme_builtin.h"
#include "opencl/source/command_queue/command_queue.h"
#include "opencl/source/context/context.h"

View File

@@ -9,43 +9,18 @@ set(RUNTIME_SRCS_BUILT_INS
${CMAKE_CURRENT_SOURCE_DIR}/aux_translation_builtin.h
${CMAKE_CURRENT_SOURCE_DIR}/builtins_dispatch_builder.cpp
${CMAKE_CURRENT_SOURCE_DIR}/builtins_dispatch_builder.h
${CMAKE_CURRENT_SOURCE_DIR}/built_ins_storage.cpp
${CMAKE_CURRENT_SOURCE_DIR}/built_ins.cpp
${CMAKE_CURRENT_SOURCE_DIR}/built_ins.h
${CMAKE_CURRENT_SOURCE_DIR}/built_in_ops_base.h
${CMAKE_CURRENT_SOURCE_DIR}/built_in_ops_vme.h
${CMAKE_CURRENT_SOURCE_DIR}/built_ins.inl
${CMAKE_CURRENT_SOURCE_DIR}/sip.cpp
${CMAKE_CURRENT_SOURCE_DIR}/sip.h
${CMAKE_CURRENT_SOURCE_DIR}${BRANCH_DIR_SUFFIX}/unknown_built_in_name.cpp
${CMAKE_CURRENT_SOURCE_DIR}/vme_builtin.cpp
${CMAKE_CURRENT_SOURCE_DIR}/vme_builtin.h
${CMAKE_CURRENT_SOURCE_DIR}/vme_dispatch_builder.h
)
add_subdirectory(builtinops)
target_sources(${NEO_STATIC_LIB_NAME} PRIVATE ${RUNTIME_SRCS_BUILT_INS})
set_property(GLOBAL PROPERTY RUNTIME_SRCS_BUILT_INS ${RUNTIME_SRCS_BUILT_INS})
set(RUNTIME_SRCS_BUILT_IN_KERNELS
${CMAKE_CURRENT_SOURCE_DIR}/kernels/aux_translation.builtin_kernel
${CMAKE_CURRENT_SOURCE_DIR}/kernels/copy_buffer_rect.builtin_kernel
${CMAKE_CURRENT_SOURCE_DIR}/kernels/copy_buffer_rect_stateless.builtin_kernel
${CMAKE_CURRENT_SOURCE_DIR}/kernels/copy_buffer_to_buffer.builtin_kernel
${CMAKE_CURRENT_SOURCE_DIR}/kernels/copy_buffer_to_buffer_stateless.builtin_kernel
${CMAKE_CURRENT_SOURCE_DIR}/kernels/copy_buffer_to_image3d.builtin_kernel
${CMAKE_CURRENT_SOURCE_DIR}/kernels/copy_buffer_to_image3d_stateless.builtin_kernel
${CMAKE_CURRENT_SOURCE_DIR}/kernels/copy_image3d_to_buffer.builtin_kernel
${CMAKE_CURRENT_SOURCE_DIR}/kernels/copy_image3d_to_buffer_stateless.builtin_kernel
${CMAKE_CURRENT_SOURCE_DIR}/kernels/copy_image_to_image1d.builtin_kernel
${CMAKE_CURRENT_SOURCE_DIR}/kernels/copy_image_to_image2d.builtin_kernel
${CMAKE_CURRENT_SOURCE_DIR}/kernels/copy_image_to_image3d.builtin_kernel
${CMAKE_CURRENT_SOURCE_DIR}/kernels/fill_buffer.builtin_kernel
${CMAKE_CURRENT_SOURCE_DIR}/kernels/fill_buffer_stateless.builtin_kernel
${CMAKE_CURRENT_SOURCE_DIR}/kernels/fill_image1d.builtin_kernel
${CMAKE_CURRENT_SOURCE_DIR}/kernels/fill_image2d.builtin_kernel
${CMAKE_CURRENT_SOURCE_DIR}/kernels/fill_image3d.builtin_kernel
${CMAKE_CURRENT_SOURCE_DIR}/kernels/vme_block_advanced_motion_estimate_bidirectional_check_intel.builtin_kernel
${CMAKE_CURRENT_SOURCE_DIR}/kernels/vme_block_advanced_motion_estimate_check_intel.builtin_kernel
${CMAKE_CURRENT_SOURCE_DIR}/kernels/vme_block_motion_estimate_intel.builtin_kernel
@@ -53,6 +28,9 @@ set(RUNTIME_SRCS_BUILT_IN_KERNELS
target_sources(${NEO_STATIC_LIB_NAME} PRIVATE ${RUNTIME_SRCS_BUILT_IN_KERNELS})
if(NOT (TARGET ${BUILTINS_BINARIES_LIB_NAME}))
include(builtins_binary.cmake)
if(NOT (TARGET ${BUILTINS_VME_LIB_NAME}))
add_subdirectory(registry)
if(COMPILE_BUILT_INS)
add_subdirectory(kernels)
endif()
endif()

View File

@@ -6,9 +6,9 @@
*/
#pragma once
#include "shared/source/built_ins/built_ins.h"
#include "shared/source/helpers/hw_helper.h"
#include "opencl/source/built_ins/built_ins.h"
#include "opencl/source/built_ins/builtins_dispatch_builder.h"
#include "opencl/source/helpers/dispatch_info_builder.h"

View File

@@ -1,37 +0,0 @@
/*
* Copyright (C) 2019-2020 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#pragma once
#include <cstdint>
namespace NEO {
namespace EBuiltInOps {
using Type = uint32_t;
constexpr Type AuxTranslation{0};
constexpr Type CopyBufferToBuffer{1};
constexpr Type CopyBufferToBufferStateless{2};
constexpr Type CopyBufferRect{3};
constexpr Type CopyBufferRectStateless{4};
constexpr Type FillBuffer{5};
constexpr Type FillBufferStateless{6};
constexpr Type CopyBufferToImage3d{7};
constexpr Type CopyBufferToImage3dStateless{8};
constexpr Type CopyImage3dToBuffer{9};
constexpr Type CopyImage3dToBufferStateless{10};
constexpr Type CopyImageToImage1d{11};
constexpr Type CopyImageToImage2d{12};
constexpr Type CopyImageToImage3d{13};
constexpr Type FillImage1d{14};
constexpr Type FillImage2d{15};
constexpr Type FillImage3d{16};
constexpr Type MaxBaseValue{16};
constexpr Type COUNT{64};
} // namespace EBuiltInOps
} // namespace NEO

View File

@@ -1,94 +0,0 @@
/*
* Copyright (C) 2017-2020 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#include "opencl/source/built_ins/built_ins.h"
#include "shared/source/compiler_interface/compiler_interface.h"
#include "shared/source/helpers/basic_math.h"
#include "shared/source/helpers/debug_helpers.h"
#include "opencl/source/built_ins/aux_translation_builtin.h"
#include "opencl/source/built_ins/sip.h"
#include "opencl/source/device/cl_device.h"
#include "opencl/source/helpers/built_ins_helper.h"
#include "opencl/source/helpers/convert_color.h"
#include "opencl/source/helpers/dispatch_info_builder.h"
#include "opencl/source/kernel/kernel.h"
#include "opencl/source/mem_obj/image.h"
#include "opencl/source/program/program.h"
#include "compiler_options.h"
#include <cstdint>
#include <sstream>
namespace NEO {
BuiltIns::BuiltIns() {
builtinsLib.reset(new BuiltinsLib());
}
BuiltIns::~BuiltIns() = default;
const SipKernel &BuiltIns::getSipKernel(SipKernelType type, Device &device) {
uint32_t kernelId = static_cast<uint32_t>(type);
UNRECOVERABLE_IF(kernelId >= static_cast<uint32_t>(SipKernelType::COUNT));
auto &sipBuiltIn = this->sipKernels[kernelId];
auto initializer = [&] {
cl_int retVal = CL_SUCCESS;
std::vector<char> sipBinary;
auto compilerInteface = device.getExecutionEnvironment()->getCompilerInterface();
UNRECOVERABLE_IF(compilerInteface == nullptr);
auto ret = compilerInteface->getSipKernelBinary(device, type, sipBinary);
UNRECOVERABLE_IF(ret != TranslationOutput::ErrorCode::Success);
UNRECOVERABLE_IF(sipBinary.size() == 0);
auto program = createProgramForSip(*device.getExecutionEnvironment(),
nullptr,
sipBinary,
sipBinary.size(),
&retVal,
&device);
DEBUG_BREAK_IF(retVal != CL_SUCCESS);
UNRECOVERABLE_IF(program == nullptr);
program->setDevice(&device);
retVal = program->processGenBinary();
DEBUG_BREAK_IF(retVal != CL_SUCCESS);
sipBuiltIn.first.reset(new SipKernel(type, program));
};
std::call_once(sipBuiltIn.second, initializer);
UNRECOVERABLE_IF(sipBuiltIn.first == nullptr);
return *sipBuiltIn.first;
}
BuiltInOwnershipWrapper::BuiltInOwnershipWrapper(BuiltinDispatchInfoBuilder &inputBuilder, Context *context) {
takeOwnership(inputBuilder, context);
}
BuiltInOwnershipWrapper::~BuiltInOwnershipWrapper() {
if (builder) {
for (auto &kernel : builder->peekUsedKernels()) {
kernel->setContext(nullptr);
kernel->releaseOwnership();
}
}
}
void BuiltInOwnershipWrapper::takeOwnership(BuiltinDispatchInfoBuilder &inputBuilder, Context *context) {
UNRECOVERABLE_IF(builder);
builder = &inputBuilder;
for (auto &kernel : builder->peekUsedKernels()) {
kernel->takeOwnership();
kernel->setContext(context);
}
}
} // namespace NEO

View File

@@ -1,214 +0,0 @@
/*
* Copyright (C) 2017-2020 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#pragma once
#include "shared/source/built_ins/sip_kernel_type.h"
#include "shared/source/helpers/debug_helpers.h"
#include "shared/source/helpers/non_copyable_or_moveable.h"
#include "shared/source/helpers/vec.h"
#include "CL/cl.h"
#include "built_in_ops.h"
#include "compiler_options.h"
#include <array>
#include <cstdint>
#include <fstream>
#include <memory>
#include <mutex>
#include <string>
#include <tuple>
#include <unordered_map>
#include <vector>
namespace NEO {
typedef std::vector<char> BuiltinResourceT;
class Context;
class Device;
class Kernel;
struct KernelInfo;
struct MultiDispatchInfo;
class Program;
class SchedulerKernel;
class SipKernel;
static constexpr ConstStringRef mediaKernelsBuildOptionsList[] = {
"-D cl_intel_device_side_advanced_vme_enable",
"-D cl_intel_device_side_avc_vme_enable",
"-D cl_intel_device_side_vme_enable",
"-D cl_intel_media_block_io",
CompilerOptions::fastRelaxedMath};
static constexpr CompilerOptions::ConstConcatenation<> mediaKernelsBuildOptions{mediaKernelsBuildOptionsList};
BuiltinResourceT createBuiltinResource(const char *ptr, size_t size);
BuiltinResourceT createBuiltinResource(const BuiltinResourceT &r);
std::string createBuiltinResourceName(EBuiltInOps::Type builtin, const std::string &extension,
const std::string &platformName = "", uint32_t deviceRevId = 0);
std::string joinPath(const std::string &lhs, const std::string &rhs);
const char *getBuiltinAsString(EBuiltInOps::Type builtin);
const char *getUnknownBuiltinAsString(EBuiltInOps::Type builtin);
const char *getAdditionalBuiltinAsString(EBuiltInOps::Type builtin);
class Storage {
public:
Storage(const std::string &rootPath)
: rootPath(rootPath) {
}
virtual ~Storage() = default;
BuiltinResourceT load(const std::string &resourceName);
protected:
virtual BuiltinResourceT loadImpl(const std::string &fullResourceName) = 0;
std::string rootPath;
};
class FileStorage : public Storage {
public:
FileStorage(const std::string &rootPath = "")
: Storage(rootPath) {
}
protected:
BuiltinResourceT loadImpl(const std::string &fullResourceName) override;
};
struct EmbeddedStorageRegistry {
static EmbeddedStorageRegistry &getInstance() {
static EmbeddedStorageRegistry gsr;
return gsr;
}
void store(const std::string &name, BuiltinResourceT &&resource) {
resources.emplace(name, BuiltinResourceT(std::move(resource)));
}
const BuiltinResourceT *get(const std::string &name) const;
private:
using ResourcesContainer = std::unordered_map<std::string, BuiltinResourceT>;
ResourcesContainer resources;
};
class EmbeddedStorage : public Storage {
public:
EmbeddedStorage(const std::string &rootPath)
: Storage(rootPath) {
}
protected:
BuiltinResourceT loadImpl(const std::string &fullResourceName) override;
};
struct BuiltinCode {
enum class ECodeType {
Any = 0, // for requesting "any" code available - priorities as below
Binary = 1, // ISA - highest priority
Intermediate = 2, // SPIR/LLVM - medium prioroty
Source = 3, // OCL C - lowest priority
COUNT,
INVALID
};
static const char *getExtension(ECodeType ct) {
switch (ct) {
default:
return "";
case ECodeType::Binary:
return ".bin";
case ECodeType::Intermediate:
return ".bc";
case ECodeType::Source:
return ".cl";
}
}
ECodeType type;
BuiltinResourceT resource;
Device *targetDevice;
};
class BuiltinsLib {
public:
BuiltinsLib();
BuiltinCode getBuiltinCode(EBuiltInOps::Type builtin, BuiltinCode::ECodeType requestedCodeType, Device &device);
static std::unique_ptr<Program> createProgramFromCode(const BuiltinCode &bc, Device &device);
protected:
BuiltinResourceT getBuiltinResource(EBuiltInOps::Type builtin, BuiltinCode::ECodeType requestedCodeType, Device &device);
using StoragesContainerT = std::vector<std::unique_ptr<Storage>>;
StoragesContainerT allStorages; // sorted by priority allStorages[0] will be checked before allStorages[1], etc.
std::mutex mutex;
};
struct BuiltInKernel {
const char *pSource = nullptr;
Program *pProgram = nullptr;
std::once_flag programIsInitialized; // guard for creating+building the program
Kernel *pKernel = nullptr;
BuiltInKernel() {
}
};
class BuiltinDispatchInfoBuilder;
class BuiltIns {
public:
std::pair<std::unique_ptr<BuiltinDispatchInfoBuilder>, std::once_flag> BuiltinOpsBuilders[static_cast<uint32_t>(EBuiltInOps::COUNT)];
BuiltIns();
virtual ~BuiltIns();
MOCKABLE_VIRTUAL const SipKernel &getSipKernel(SipKernelType type, Device &device);
BuiltinsLib &getBuiltinsLib() {
DEBUG_BREAK_IF(!builtinsLib.get());
return *builtinsLib;
}
void setCacheingEnableState(bool enableCacheing) {
this->enableCacheing = enableCacheing;
}
bool isCacheingEnabled() const {
return this->enableCacheing;
}
protected:
// sip builtins
std::pair<std::unique_ptr<SipKernel>, std::once_flag> sipKernels[static_cast<uint32_t>(SipKernelType::COUNT)];
std::unique_ptr<BuiltinsLib> builtinsLib;
using ProgramsContainerT = std::array<std::pair<std::unique_ptr<Program>, std::once_flag>, static_cast<size_t>(EBuiltInOps::COUNT)>;
ProgramsContainerT builtinPrograms;
bool enableCacheing = true;
};
class BuiltInOwnershipWrapper : public NonCopyableOrMovableClass {
public:
BuiltInOwnershipWrapper() = default;
BuiltInOwnershipWrapper(BuiltinDispatchInfoBuilder &inputBuilder, Context *context);
~BuiltInOwnershipWrapper();
void takeOwnership(BuiltinDispatchInfoBuilder &inputBuilder, Context *context);
protected:
BuiltinDispatchInfoBuilder *builder = nullptr;
};
template <EBuiltInOps::Type OpCode>
class BuiltInOp;
} // namespace NEO

View File

@@ -1,221 +0,0 @@
/*
* Copyright (C) 2017-2020 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#include "shared/source/debug_settings/debug_settings_manager.h"
#include "shared/source/device/device.h"
#include "opencl/source/built_ins/built_ins.h"
#include "opencl/source/built_ins/builtins_dispatch_builder.h"
#include "os_inc.h"
#include <cstdint>
namespace NEO {
const char *getBuiltinAsString(EBuiltInOps::Type builtin) {
const char *builtinString = getAdditionalBuiltinAsString(builtin);
if (builtinString) {
return builtinString;
}
switch (builtin) {
default:
return getUnknownBuiltinAsString(builtin);
case EBuiltInOps::AuxTranslation:
return "aux_translation.builtin_kernel";
case EBuiltInOps::CopyBufferToBuffer:
return "copy_buffer_to_buffer.builtin_kernel";
case EBuiltInOps::CopyBufferToBufferStateless:
return "copy_buffer_to_buffer_stateless.builtin_kernel";
case EBuiltInOps::CopyBufferRect:
return "copy_buffer_rect.builtin_kernel";
case EBuiltInOps::CopyBufferRectStateless:
return "copy_buffer_rect_stateless.builtin_kernel";
case EBuiltInOps::FillBuffer:
return "fill_buffer.builtin_kernel";
case EBuiltInOps::FillBufferStateless:
return "fill_buffer_stateless.builtin_kernel";
case EBuiltInOps::CopyBufferToImage3d:
return "copy_buffer_to_image3d.builtin_kernel";
case EBuiltInOps::CopyBufferToImage3dStateless:
return "copy_buffer_to_image3d_stateless.builtin_kernel";
case EBuiltInOps::CopyImage3dToBuffer:
return "copy_image3d_to_buffer.builtin_kernel";
case EBuiltInOps::CopyImage3dToBufferStateless:
return "copy_image3d_to_buffer_stateless.builtin_kernel";
case EBuiltInOps::CopyImageToImage1d:
return "copy_image_to_image1d.builtin_kernel";
case EBuiltInOps::CopyImageToImage2d:
return "copy_image_to_image2d.builtin_kernel";
case EBuiltInOps::CopyImageToImage3d:
return "copy_image_to_image3d.builtin_kernel";
case EBuiltInOps::FillImage1d:
return "fill_image1d.builtin_kernel";
case EBuiltInOps::FillImage2d:
return "fill_image2d.builtin_kernel";
case EBuiltInOps::FillImage3d:
return "fill_image3d.builtin_kernel";
};
}
BuiltinResourceT createBuiltinResource(const char *ptr, size_t size) {
return BuiltinResourceT(ptr, ptr + size);
}
BuiltinResourceT createBuiltinResource(const BuiltinResourceT &r) {
return BuiltinResourceT(r);
}
std::string createBuiltinResourceName(EBuiltInOps::Type builtin, const std::string &extension,
const std::string &platformName, uint32_t deviceRevId) {
std::string ret;
if (platformName.size() > 0) {
ret = platformName;
ret += "_" + std::to_string(deviceRevId);
ret += "_";
}
ret += getBuiltinAsString(builtin);
if (extension.size() > 0) {
ret += extension;
}
return ret;
}
std::string joinPath(const std::string &lhs, const std::string &rhs) {
if (lhs.size() == 0) {
return rhs;
}
if (rhs.size() == 0) {
return lhs;
}
if (*lhs.rbegin() == PATH_SEPARATOR) {
return lhs + rhs;
}
return lhs + PATH_SEPARATOR + rhs;
}
std::string getDriverInstallationPath() {
return "";
}
BuiltinResourceT Storage::load(const std::string &resourceName) {
return loadImpl(joinPath(rootPath, resourceName));
}
BuiltinResourceT FileStorage::loadImpl(const std::string &fullResourceName) {
BuiltinResourceT ret;
std::ifstream f{fullResourceName, std::ios::in | std::ios::binary | std::ios::ate};
auto end = f.tellg();
f.seekg(0, std::ios::beg);
auto beg = f.tellg();
auto s = end - beg;
ret.resize(static_cast<size_t>(s));
f.read(ret.data(), s);
return ret;
}
const BuiltinResourceT *EmbeddedStorageRegistry::get(const std::string &name) const {
auto it = resources.find(name);
if (resources.end() == it) {
return nullptr;
}
return &it->second;
}
BuiltinResourceT EmbeddedStorage::loadImpl(const std::string &fullResourceName) {
auto *constResource = EmbeddedStorageRegistry::getInstance().get(fullResourceName);
if (constResource == nullptr) {
BuiltinResourceT ret;
return ret;
}
return createBuiltinResource(*constResource);
}
BuiltinsLib::BuiltinsLib() {
allStorages.push_back(std::unique_ptr<Storage>(new EmbeddedStorage("")));
allStorages.push_back(std::unique_ptr<Storage>(new FileStorage(getDriverInstallationPath())));
}
BuiltinCode BuiltinsLib::getBuiltinCode(EBuiltInOps::Type builtin, BuiltinCode::ECodeType requestedCodeType, Device &device) {
std::lock_guard<std::mutex> lockRaii{mutex};
BuiltinResourceT bc;
BuiltinCode::ECodeType usedCodetType = BuiltinCode::ECodeType::INVALID;
if (requestedCodeType == BuiltinCode::ECodeType::Any) {
uint32_t codeType = static_cast<uint32_t>(BuiltinCode::ECodeType::Binary);
if (DebugManager.flags.RebuildPrecompiledKernels.get()) {
codeType = static_cast<uint32_t>(BuiltinCode::ECodeType::Source);
}
for (uint32_t e = static_cast<uint32_t>(BuiltinCode::ECodeType::COUNT);
codeType != e; ++codeType) {
bc = getBuiltinResource(builtin, static_cast<BuiltinCode::ECodeType>(codeType), device);
if (bc.size() > 0) {
usedCodetType = static_cast<BuiltinCode::ECodeType>(codeType);
break;
}
}
} else {
bc = getBuiltinResource(builtin, requestedCodeType, device);
usedCodetType = requestedCodeType;
}
BuiltinCode ret;
std::swap(ret.resource, bc);
ret.type = usedCodetType;
ret.targetDevice = &device;
return ret;
}
std::unique_ptr<Program> BuiltinsLib::createProgramFromCode(const BuiltinCode &bc, Device &device) {
std::unique_ptr<Program> ret;
const char *data = bc.resource.data();
size_t dataLen = bc.resource.size();
cl_int err = 0;
switch (bc.type) {
default:
break;
case BuiltinCode::ECodeType::Source:
case BuiltinCode::ECodeType::Intermediate:
ret.reset(Program::create(data, nullptr, device, true, &err));
break;
case BuiltinCode::ECodeType::Binary:
ret.reset(Program::createFromGenBinary(*device.getExecutionEnvironment(), nullptr, data, dataLen, true, nullptr, &device));
break;
}
return ret;
}
BuiltinResourceT BuiltinsLib::getBuiltinResource(EBuiltInOps::Type builtin, BuiltinCode::ECodeType requestedCodeType, Device &device) {
BuiltinResourceT bc;
std::string resourceNameGeneric = createBuiltinResourceName(builtin, BuiltinCode::getExtension(requestedCodeType));
std::string resourceNameForPlatformType = createBuiltinResourceName(builtin, BuiltinCode::getExtension(requestedCodeType), getFamilyNameWithType(device.getHardwareInfo()));
std::string resourceNameForPlatformTypeAndStepping = createBuiltinResourceName(builtin, BuiltinCode::getExtension(requestedCodeType), getFamilyNameWithType(device.getHardwareInfo()),
device.getHardwareInfo().platform.usRevId);
for (auto &rn : {resourceNameForPlatformTypeAndStepping, resourceNameForPlatformType, resourceNameGeneric}) { // first look for dedicated version, only fallback to generic one
for (auto &s : allStorages) {
bc = s.get()->load(rn);
if (bc.size() != 0) {
return bc;
}
}
}
return bc;
}
} // namespace NEO

View File

@@ -1,14 +0,0 @@
#
# Copyright (C) 2020 Intel Corporation
#
# SPDX-License-Identifier: MIT
#
set(RUNTIME_SRCS_BUILT_INS_OPS
${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt
${CMAKE_CURRENT_SOURCE_DIR}${BRANCH_DIR_SUFFIX}/built_in_ops.h
)
target_sources(${NEO_STATIC_LIB_NAME} PRIVATE ${RUNTIME_SRCS_EMBARGO_BUILT_INS_OPS})
add_subdirectories()

View File

@@ -1,15 +0,0 @@
/*
* Copyright (C) 2019-2020 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#pragma once
#include "opencl/source/built_ins/built_in_ops_base.h"
namespace NEO {
namespace EBuiltInOps {
constexpr Type MaxCoreValue{MaxBaseValue};
}
} // namespace NEO

View File

@@ -1,71 +0,0 @@
#
# Copyright (C) 2018-2020 Intel Corporation
#
# SPDX-License-Identifier: MIT
#
add_library(${BUILTINS_BINARIES_LIB_NAME} OBJECT EXCLUDE_FROM_ALL builtins_binary.cmake)
# Add builtins sources
add_subdirectory(registry)
set(GENERATED_BUILTINS
"aux_translation"
"copy_buffer_rect"
"copy_buffer_to_buffer"
"copy_buffer_to_image3d"
"copy_image3d_to_buffer"
"copy_image_to_image1d"
"copy_image_to_image2d"
"copy_image_to_image3d"
"fill_buffer"
"fill_image1d"
"fill_image2d"
"fill_image3d"
)
set(GENERATED_BUILTINS_STATELESS
"copy_buffer_to_buffer_stateless"
"copy_buffer_rect_stateless"
"copy_buffer_to_image3d_stateless"
"copy_image3d_to_buffer_stateless"
"fill_buffer_stateless"
)
# Generate builtins cpps
if(COMPILE_BUILT_INS)
add_subdirectory(kernels)
endif()
macro(macro_for_each_gen)
foreach(PLATFORM_TYPE ${PLATFORM_TYPES})
get_family_name_with_type(${GEN_TYPE} ${PLATFORM_TYPE})
foreach(GENERATED_BUILTIN ${GENERATED_BUILTINS})
list(APPEND GENERATED_BUILTINS_CPPS ${BUILTINS_INCLUDE_DIR}/${RUNTIME_GENERATED_${GENERATED_BUILTIN}_${family_name_with_type}})
endforeach()
foreach(GENERATED_BUILTIN_STATELESS ${GENERATED_BUILTINS_STATELESS})
list(APPEND GENERATED_BUILTINS_CPPS ${BUILTINS_INCLUDE_DIR}/${RUNTIME_GENERATED_${GENERATED_BUILTIN_STATELESS}_${family_name_with_type}})
endforeach()
endforeach()
source_group("generated files\\${GEN_TYPE_LOWER}" FILES ${GENERATED_BUILTINS_CPPS})
endmacro()
apply_macro_for_each_gen("SUPPORTED")
if(COMPILE_BUILT_INS)
target_sources(${BUILTINS_BINARIES_LIB_NAME} PUBLIC ${GENERATED_BUILTINS_CPPS})
set_source_files_properties(${GENERATED_BUILTINS_CPPS} PROPERTIES GENERATED TRUE)
endif()
set_target_properties(${BUILTINS_BINARIES_LIB_NAME} PROPERTIES LINKER_LANGUAGE CXX)
set_target_properties(${BUILTINS_BINARIES_LIB_NAME} PROPERTIES POSITION_INDEPENDENT_CODE ON)
set_target_properties(${BUILTINS_BINARIES_LIB_NAME} PROPERTIES FOLDER "built_ins")
target_include_directories(${BUILTINS_BINARIES_LIB_NAME} PRIVATE
${ENGINE_NODE_DIR}
${KHRONOS_HEADERS_DIR}
${KHRONOS_GL_HEADERS_DIR}
${NEO__GMM_INCLUDE_DIR}
${NEO__IGC_INCLUDE_DIR}
${THIRD_PARTY_DIR}
)

View File

@@ -7,14 +7,14 @@
#include "opencl/source/built_ins/builtins_dispatch_builder.h"
#include "shared/source/built_ins/built_ins.h"
#include "shared/source/built_ins/sip.h"
#include "shared/source/compiler_interface/compiler_interface.h"
#include "shared/source/helpers/basic_math.h"
#include "shared/source/helpers/debug_helpers.h"
#include "opencl/source/built_ins/aux_translation_builtin.h"
#include "opencl/source/built_ins/built_ins.h"
#include "opencl/source/built_ins/built_ins.inl"
#include "opencl/source/built_ins/sip.h"
#include "opencl/source/built_ins/vme_dispatch_builder.h"
#include "opencl/source/device/cl_device.h"
#include "opencl/source/helpers/built_ins_helper.h"
@@ -761,4 +761,25 @@ BuiltinDispatchInfoBuilder &BuiltInDispatchBuilderOp::getBuiltinDispatchInfoBuil
}
return *operationBuilder.first;
}
BuiltInOwnershipWrapper::BuiltInOwnershipWrapper(BuiltinDispatchInfoBuilder &inputBuilder, Context *context) {
takeOwnership(inputBuilder, context);
}
BuiltInOwnershipWrapper::~BuiltInOwnershipWrapper() {
if (builder) {
for (auto &kernel : builder->peekUsedKernels()) {
kernel->setContext(nullptr);
kernel->releaseOwnership();
}
}
}
void BuiltInOwnershipWrapper::takeOwnership(BuiltinDispatchInfoBuilder &inputBuilder, Context *context) {
UNRECOVERABLE_IF(builder);
builder = &inputBuilder;
for (auto &kernel : builder->peekUsedKernels()) {
kernel->takeOwnership();
kernel->setContext(context);
}
}
} // namespace NEO

View File

@@ -6,9 +6,9 @@
*/
#pragma once
#include "shared/source/built_ins/built_ins.h"
#include "shared/source/helpers/vec.h"
#include "opencl/source/built_ins/built_ins.h"
#include "opencl/source/kernel/kernel.h"
#include "CL/cl.h"
@@ -109,4 +109,16 @@ class BuiltInDispatchBuilderOp {
std::unique_ptr<BuiltinDispatchInfoBuilder> newBuilder);
};
class BuiltInOwnershipWrapper : public NonCopyableOrMovableClass {
public:
BuiltInOwnershipWrapper() = default;
BuiltInOwnershipWrapper(BuiltinDispatchInfoBuilder &inputBuilder, Context *context);
~BuiltInOwnershipWrapper();
void takeOwnership(BuiltinDispatchInfoBuilder &inputBuilder, Context *context);
protected:
BuiltinDispatchInfoBuilder *builder = nullptr;
};
} // namespace NEO

View File

@@ -4,10 +4,10 @@
# SPDX-License-Identifier: MIT
#
add_custom_target(builtins)
set_target_properties(builtins PROPERTIES FOLDER "built_ins")
add_custom_target(builtins_vme_sources)
set_target_properties(builtins_vme_sources PROPERTIES FOLDER "built_ins")
set(BUILTINS_OUTDIR_WITH_ARCH "${TargetDir}/built_ins/${NEO_ARCH}")
add_dependencies(${BUILTINS_BINARIES_LIB_NAME} builtins)
add_dependencies(${BUILTINS_BINARIES_LIB_NAME} builtins_vme_sources)
add_subdirectories()
set(GENERATED_BUILTINS ${GENERATED_BUILTINS} PARENT_SCOPE)
set(GENERATED_BUILTINS_STATELESS ${GENERATED_BUILTINS_STATELESS} PARENT_SCOPE)
@@ -95,7 +95,7 @@ macro(macro_for_each_gen)
list(APPEND BUILTINS_COMMANDS ${TargetDir}/${BUILTIN_CPP})
set(RUNTIME_GENERATED_${GENERATED_BUILTIN_STATELESS}_${family_name_with_type} ${BUILTIN_CPP} PARENT_SCOPE)
endforeach()
set(target_name builtins_${family_name_with_type})
set(target_name builtins_${family_name_with_type}_vme)
add_custom_target(${target_name} DEPENDS ${BUILTINS_COMMANDS})
add_dependencies(builtins ${target_name})
set_target_properties(${target_name} PROPERTIES FOLDER "opencl/source/built_ins/${family_name_with_type}")

View File

@@ -1,14 +0,0 @@
/*
* Copyright (C) 2018-2020 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
R"===(
__kernel void fullCopy(__global const uint* src, __global uint* dst) {
unsigned int gid = get_global_id(0);
uint4 loaded = vload4(gid, src);
vstore4(loaded, gid, dst);
}
)==="

View File

@@ -1,48 +0,0 @@
/*
* Copyright (C) 2017-2020 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
R"===(
//////////////////////////////////////////////////////////////////////////////
__kernel void CopyBufferRectBytes2d(
__global const char* src,
__global char* dst,
uint4 SrcOrigin,
uint4 DstOrigin,
uint2 SrcPitch,
uint2 DstPitch )
{
int x = get_global_id(0);
int y = get_global_id(1);
uint LSrcOffset = x + SrcOrigin.x + ( ( y + SrcOrigin.y ) * SrcPitch.x );
uint LDstOffset = x + DstOrigin.x + ( ( y + DstOrigin.y ) * DstPitch.x );
*( dst + LDstOffset ) = *( src + LSrcOffset );
}
//////////////////////////////////////////////////////////////////////////////
__kernel void CopyBufferRectBytes3d(
__global const char* src,
__global char* dst,
uint4 SrcOrigin,
uint4 DstOrigin,
uint2 SrcPitch,
uint2 DstPitch )
{
int x = get_global_id(0);
int y = get_global_id(1);
int z = get_global_id(2);
uint LSrcOffset = x + SrcOrigin.x + ( ( y + SrcOrigin.y ) * SrcPitch.x ) + ( ( z + SrcOrigin.z ) * SrcPitch.y );
uint LDstOffset = x + DstOrigin.x + ( ( y + DstOrigin.y ) * DstPitch.x ) + ( ( z + DstOrigin.z ) * DstPitch.y );
*( dst + LDstOffset ) = *( src + LSrcOffset );
}
)==="

View File

@@ -1,48 +0,0 @@
/*
* Copyright (C) 2019-2020 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
R"===(
//////////////////////////////////////////////////////////////////////////////
__kernel void CopyBufferRectBytes2d(
__global const char* src,
__global char* dst,
ulong4 SrcOrigin,
ulong4 DstOrigin,
ulong2 SrcPitch,
ulong2 DstPitch )
{
size_t x = get_global_id(0);
size_t y = get_global_id(1);
size_t LSrcOffset = x + SrcOrigin.x + ( ( y + SrcOrigin.y ) * SrcPitch.x );
size_t LDstOffset = x + DstOrigin.x + ( ( y + DstOrigin.y ) * DstPitch.x );
*( dst + LDstOffset ) = *( src + LSrcOffset );
}
//////////////////////////////////////////////////////////////////////////////
__kernel void CopyBufferRectBytes3d(
__global const char* src,
__global char* dst,
ulong4 SrcOrigin,
ulong4 DstOrigin,
ulong2 SrcPitch,
ulong2 DstPitch )
{
size_t x = get_global_id(0);
size_t y = get_global_id(1);
size_t z = get_global_id(2);
size_t LSrcOffset = x + SrcOrigin.x + ( ( y + SrcOrigin.y ) * SrcPitch.x ) + ( ( z + SrcOrigin.z ) * SrcPitch.y );
size_t LDstOffset = x + DstOrigin.x + ( ( y + DstOrigin.y ) * DstPitch.x ) + ( ( z + DstOrigin.z ) * DstPitch.y );
*( dst + LDstOffset ) = *( src + LSrcOffset );
}
)==="

View File

@@ -1,54 +0,0 @@
/*
* Copyright (C) 2017-2020 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
R"===(
__kernel void CopyBufferToBufferBytes(
const __global uchar* pSrc,
__global uchar* pDst,
uint srcOffsetInBytes,
uint dstOffsetInBytes,
uint bytesToRead )
{
pSrc += ( srcOffsetInBytes + get_global_id(0) );
pDst += ( dstOffsetInBytes + get_global_id(0) );
pDst[ 0 ] = pSrc[ 0 ];
}
__kernel void CopyBufferToBufferLeftLeftover(
const __global uchar* pSrc,
__global uchar* pDst,
uint srcOffsetInBytes,
uint dstOffsetInBytes)
{
unsigned int gid = get_global_id(0);
pDst[ gid + dstOffsetInBytes ] = pSrc[ gid + srcOffsetInBytes ];
}
__kernel void CopyBufferToBufferMiddle(
const __global uint* pSrc,
__global uint* pDst,
uint srcOffsetInBytes,
uint dstOffsetInBytes)
{
unsigned int gid = get_global_id(0);
pDst += dstOffsetInBytes >> 2;
pSrc += srcOffsetInBytes >> 2;
uint4 loaded = vload4(gid, pSrc);
vstore4(loaded, gid, pDst);
}
__kernel void CopyBufferToBufferRightLeftover(
const __global uchar* pSrc,
__global uchar* pDst,
uint srcOffsetInBytes,
uint dstOffsetInBytes)
{
unsigned int gid = get_global_id(0);
pDst[ gid + dstOffsetInBytes ] = pSrc[ gid + srcOffsetInBytes ];
}
)==="

View File

@@ -1,54 +0,0 @@
/*
* Copyright (C) 2019-2020 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
R"===(
__kernel void CopyBufferToBufferBytes(
const __global uchar* pSrc,
__global uchar* pDst,
ulong srcOffsetInBytes,
ulong dstOffsetInBytes,
ulong bytesToRead )
{
pSrc += ( srcOffsetInBytes + get_global_id(0) );
pDst += ( dstOffsetInBytes + get_global_id(0) );
pDst[ 0 ] = pSrc[ 0 ];
}
__kernel void CopyBufferToBufferLeftLeftover(
const __global uchar* pSrc,
__global uchar* pDst,
ulong srcOffsetInBytes,
ulong dstOffsetInBytes)
{
size_t gid = get_global_id(0);
pDst[ gid + dstOffsetInBytes ] = pSrc[ gid + srcOffsetInBytes ];
}
__kernel void CopyBufferToBufferMiddle(
const __global uint* pSrc,
__global uint* pDst,
ulong srcOffsetInBytes,
ulong dstOffsetInBytes)
{
size_t gid = get_global_id(0);
pDst += dstOffsetInBytes >> 2;
pSrc += srcOffsetInBytes >> 2;
uint4 loaded = vload4(gid, pSrc);
vstore4(loaded, gid, pDst);
}
__kernel void CopyBufferToBufferRightLeftover(
const __global uchar* pSrc,
__global uchar* pDst,
ulong srcOffsetInBytes,
ulong dstOffsetInBytes)
{
size_t gid = get_global_id(0);
pDst[ gid + dstOffsetInBytes ] = pSrc[ gid + srcOffsetInBytes ];
}
)==="

View File

@@ -1,161 +0,0 @@
/*
* Copyright (C) 2017-2020 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
R"===(
#pragma OPENCL EXTENSION cl_khr_3d_image_writes : enable
__kernel void CopyBufferToImage3dBytes(__global uchar *src,
__write_only image3d_t output,
int srcOffset,
int4 dstOffset,
uint2 Pitch) {
const uint x = get_global_id(0);
const uint y = get_global_id(1);
const uint z = get_global_id(2);
int4 dstCoord = (int4)(x, y, z, 0) + dstOffset;
uint LOffset = srcOffset + (y * Pitch.x) + (z * Pitch.y);
write_imageui(output, dstCoord, (uint4)(*(src + LOffset + x), 0, 0, 1));
}
__kernel void CopyBufferToImage3d2Bytes(__global uchar *src,
__write_only image3d_t output,
int srcOffset,
int4 dstOffset,
uint2 Pitch) {
const uint x = get_global_id(0);
const uint y = get_global_id(1);
const uint z = get_global_id(2);
int4 dstCoord = (int4)(x, y, z, 0) + dstOffset;
uint LOffset = srcOffset + (y * Pitch.x) + (z * Pitch.y);
uint4 c = (uint4)(0, 0, 0, 1);
if(( ulong )(src + srcOffset) & 0x00000001){
ushort upper = *((__global uchar*)(src + LOffset + x * 2 + 1));
ushort lower = *((__global uchar*)(src + LOffset + x * 2));
ushort combined = (upper << 8) | lower;
c.x = (uint)combined;
}
else{
c.x = (uint)(*(__global ushort*)(src + LOffset + x * 2));
}
write_imageui(output, dstCoord, c);
}
__kernel void CopyBufferToImage3d4Bytes(__global uchar *src,
__write_only image3d_t output,
int srcOffset,
int4 dstOffset,
uint2 Pitch) {
const uint x = get_global_id(0);
const uint y = get_global_id(1);
const uint z = get_global_id(2);
int4 dstCoord = (int4)(x, y, z, 0) + dstOffset;
uint LOffset = srcOffset + (y * Pitch.x) + (z * Pitch.y);
uint4 c = (uint4)(0, 0, 0, 1);
if(( ulong )(src + srcOffset) & 0x00000003){
uint upper2 = *((__global uchar*)(src + LOffset + x * 4 + 3));
uint upper = *((__global uchar*)(src + LOffset + x * 4 + 2));
uint lower2 = *((__global uchar*)(src + LOffset + x * 4 + 1));
uint lower = *((__global uchar*)(src + LOffset + x * 4));
uint combined = (upper2 << 24) | (upper << 16) | (lower2 << 8) | lower;
c.x = combined;
}
else{
c.x = (*(__global uint*)(src + LOffset + x * 4));
}
write_imageui(output, dstCoord, c);
}
__kernel void CopyBufferToImage3d8Bytes(__global uchar *src,
__write_only image3d_t output,
int srcOffset,
int4 dstOffset,
uint2 Pitch) {
const uint x = get_global_id(0);
const uint y = get_global_id(1);
const uint z = get_global_id(2);
int4 dstCoord = (int4)(x, y, z, 0) + dstOffset;
uint LOffset = srcOffset + (y * Pitch.x) + (z * Pitch.y);
uint2 c = (uint2)(0, 0);//*((__global uint2*)(src + LOffset + x * 8));
if(( ulong )(src + srcOffset) & 0x00000007){
uint upper2 = *((__global uchar*)(src + LOffset + x * 8 + 3));
uint upper = *((__global uchar*)(src + LOffset + x * 8 + 2));
uint lower2 = *((__global uchar*)(src + LOffset + x * 8 + 1));
uint lower = *((__global uchar*)(src + LOffset + x * 8));
uint combined = (upper2 << 24) | (upper << 16) | (lower2 << 8) | lower;
c.x = combined;
upper2 = *((__global uchar*)(src + LOffset + x * 8 + 7));
upper = *((__global uchar*)(src + LOffset + x * 8 + 6));
lower2 = *((__global uchar*)(src + LOffset + x * 8 + 5));
lower = *((__global uchar*)(src + LOffset + x * 8 + 4));
combined = ((uint)upper2 << 24) | ((uint)upper << 16) | ((uint)lower2 << 8) | lower;
c.y = combined;
}
else{
c = *((__global uint2*)(src + LOffset + x * 8));
}
write_imageui(output, dstCoord, (uint4)(c.x, c.y, 0, 1));
}
__kernel void CopyBufferToImage3d16Bytes(__global uchar *src,
__write_only image3d_t output,
int srcOffset,
int4 dstOffset,
uint2 Pitch) {
const uint x = get_global_id(0);
const uint y = get_global_id(1);
const uint z = get_global_id(2);
int4 dstCoord = (int4)(x, y, z, 0) + dstOffset;
uint LOffset = srcOffset + (y * Pitch.x) + (z * Pitch.y);
uint4 c = (uint4)(0, 0, 0, 0);
if(( ulong )(src + srcOffset) & 0x0000000f){
uint upper2 = *((__global uchar*)(src + LOffset + x * 16 + 3));
uint upper = *((__global uchar*)(src + LOffset + x * 16 + 2));
uint lower2 = *((__global uchar*)(src + LOffset + x * 16 + 1));
uint lower = *((__global uchar*)(src + LOffset + x * 16));
uint combined = (upper2 << 24) | (upper << 16) | (lower2 << 8) | lower;
c.x = combined;
upper2 = *((__global uchar*)(src + LOffset + x * 16 + 7));
upper = *((__global uchar*)(src + LOffset + x * 16 + 6));
lower2 = *((__global uchar*)(src + LOffset + x * 16 + 5));
lower = *((__global uchar*)(src + LOffset + x * 16 + 4));
combined = (upper2 << 24) | (upper << 16) | (lower2 << 8) | lower;
c.y = combined;
upper2 = *((__global uchar*)(src + LOffset + x * 16 + 11));
upper = *((__global uchar*)(src + LOffset + x * 16 + 10));
lower2 = *((__global uchar*)(src + LOffset + x * 16 + 9));
lower = *((__global uchar*)(src + LOffset + x * 16 + 8));
combined = (upper2 << 24) | (upper << 16) | (lower2 << 8) | lower;
c.z = combined;
upper2 = *((__global uchar*)(src + LOffset + x * 16 + 15));
upper = *((__global uchar*)(src + LOffset + x * 16 + 14));
lower2 = *((__global uchar*)(src + LOffset + x * 16 + 13));
lower = *((__global uchar*)(src + LOffset + x * 16 + 12));
combined = (upper2 << 24) | (upper << 16) | (lower2 << 8) | lower;
c.w = combined;
}
else{
c = *((__global uint4 *)(src + LOffset + x * 16));
}
write_imageui(output, dstCoord, c);
}
)==="

View File

@@ -1,161 +0,0 @@
/*
* Copyright (C) 2017-2020 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
R"===(
#pragma OPENCL EXTENSION cl_khr_3d_image_writes : enable
__kernel void CopyBufferToImage3dBytes(__global uchar *src,
__write_only image3d_t output,
ulong srcOffset,
int4 dstOffset,
ulong2 Pitch) {
const uint x = get_global_id(0);
const uint y = get_global_id(1);
const uint z = get_global_id(2);
int4 dstCoord = (int4)(x, y, z, 0) + dstOffset;
ulong LOffset = srcOffset + (y * Pitch.x) + (z * Pitch.y);
write_imageui(output, dstCoord, (uint4)(*(src + LOffset + x), 0, 0, 1));
}
__kernel void CopyBufferToImage3d2Bytes(__global uchar *src,
__write_only image3d_t output,
ulong srcOffset,
int4 dstOffset,
ulong2 Pitch) {
const uint x = get_global_id(0);
const uint y = get_global_id(1);
const uint z = get_global_id(2);
int4 dstCoord = (int4)(x, y, z, 0) + dstOffset;
ulong LOffset = srcOffset + (y * Pitch.x) + (z * Pitch.y);
uint4 c = (uint4)(0, 0, 0, 1);
if(( ulong )(src + srcOffset) & 0x00000001){
ushort upper = *((__global uchar*)(src + LOffset + x * 2 + 1));
ushort lower = *((__global uchar*)(src + LOffset + x * 2));
ushort combined = (upper << 8) | lower;
c.x = (uint)combined;
}
else{
c.x = (uint)(*(__global ushort*)(src + LOffset + x * 2));
}
write_imageui(output, dstCoord, c);
}
__kernel void CopyBufferToImage3d4Bytes(__global uchar *src,
__write_only image3d_t output,
ulong srcOffset,
int4 dstOffset,
ulong2 Pitch) {
const uint x = get_global_id(0);
const uint y = get_global_id(1);
const uint z = get_global_id(2);
int4 dstCoord = (int4)(x, y, z, 0) + dstOffset;
ulong LOffset = srcOffset + (y * Pitch.x) + (z * Pitch.y);
uint4 c = (uint4)(0, 0, 0, 1);
if(( ulong )(src + srcOffset) & 0x00000003){
uint upper2 = *((__global uchar*)(src + LOffset + x * 4 + 3));
uint upper = *((__global uchar*)(src + LOffset + x * 4 + 2));
uint lower2 = *((__global uchar*)(src + LOffset + x * 4 + 1));
uint lower = *((__global uchar*)(src + LOffset + x * 4));
uint combined = (upper2 << 24) | (upper << 16) | (lower2 << 8) | lower;
c.x = combined;
}
else{
c.x = (*(__global uint*)(src + LOffset + x * 4));
}
write_imageui(output, dstCoord, c);
}
__kernel void CopyBufferToImage3d8Bytes(__global uchar *src,
__write_only image3d_t output,
ulong srcOffset,
int4 dstOffset,
ulong2 Pitch) {
const uint x = get_global_id(0);
const uint y = get_global_id(1);
const uint z = get_global_id(2);
int4 dstCoord = (int4)(x, y, z, 0) + dstOffset;
ulong LOffset = srcOffset + (y * Pitch.x) + (z * Pitch.y);
uint2 c = (uint2)(0, 0);//*((__global uint2*)(src + LOffset + x * 8));
if(( ulong )(src + srcOffset) & 0x00000007){
uint upper2 = *((__global uchar*)(src + LOffset + x * 8 + 3));
uint upper = *((__global uchar*)(src + LOffset + x * 8 + 2));
uint lower2 = *((__global uchar*)(src + LOffset + x * 8 + 1));
uint lower = *((__global uchar*)(src + LOffset + x * 8));
uint combined = (upper2 << 24) | (upper << 16) | (lower2 << 8) | lower;
c.x = combined;
upper2 = *((__global uchar*)(src + LOffset + x * 8 + 7));
upper = *((__global uchar*)(src + LOffset + x * 8 + 6));
lower2 = *((__global uchar*)(src + LOffset + x * 8 + 5));
lower = *((__global uchar*)(src + LOffset + x * 8 + 4));
combined = ((uint)upper2 << 24) | ((uint)upper << 16) | ((uint)lower2 << 8) | lower;
c.y = combined;
}
else{
c = *((__global uint2*)(src + LOffset + x * 8));
}
write_imageui(output, dstCoord, (uint4)(c.x, c.y, 0, 1));
}
__kernel void CopyBufferToImage3d16Bytes(__global uchar *src,
__write_only image3d_t output,
ulong srcOffset,
int4 dstOffset,
ulong2 Pitch) {
const uint x = get_global_id(0);
const uint y = get_global_id(1);
const uint z = get_global_id(2);
int4 dstCoord = (int4)(x, y, z, 0) + dstOffset;
ulong LOffset = srcOffset + (y * Pitch.x) + (z * Pitch.y);
uint4 c = (uint4)(0, 0, 0, 0);
if(( ulong )(src + srcOffset) & 0x0000000f){
uint upper2 = *((__global uchar*)(src + LOffset + x * 16 + 3));
uint upper = *((__global uchar*)(src + LOffset + x * 16 + 2));
uint lower2 = *((__global uchar*)(src + LOffset + x * 16 + 1));
uint lower = *((__global uchar*)(src + LOffset + x * 16));
uint combined = (upper2 << 24) | (upper << 16) | (lower2 << 8) | lower;
c.x = combined;
upper2 = *((__global uchar*)(src + LOffset + x * 16 + 7));
upper = *((__global uchar*)(src + LOffset + x * 16 + 6));
lower2 = *((__global uchar*)(src + LOffset + x * 16 + 5));
lower = *((__global uchar*)(src + LOffset + x * 16 + 4));
combined = (upper2 << 24) | (upper << 16) | (lower2 << 8) | lower;
c.y = combined;
upper2 = *((__global uchar*)(src + LOffset + x * 16 + 11));
upper = *((__global uchar*)(src + LOffset + x * 16 + 10));
lower2 = *((__global uchar*)(src + LOffset + x * 16 + 9));
lower = *((__global uchar*)(src + LOffset + x * 16 + 8));
combined = (upper2 << 24) | (upper << 16) | (lower2 << 8) | lower;
c.z = combined;
upper2 = *((__global uchar*)(src + LOffset + x * 16 + 15));
upper = *((__global uchar*)(src + LOffset + x * 16 + 14));
lower2 = *((__global uchar*)(src + LOffset + x * 16 + 13));
lower = *((__global uchar*)(src + LOffset + x * 16 + 12));
combined = (upper2 << 24) | (upper << 16) | (lower2 << 8) | lower;
c.w = combined;
}
else{
c = *((__global uint4 *)(src + LOffset + x * 16));
}
write_imageui(output, dstCoord, c);
}
)==="

View File

@@ -1,139 +0,0 @@
/*
* Copyright (C) 2017-2020 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
R"===(
__kernel void CopyImage3dToBufferBytes(__read_only image3d_t input,
__global uchar *dst,
int4 srcOffset,
int dstOffset,
uint2 Pitch) {
const uint x = get_global_id(0);
const uint y = get_global_id(1);
const uint z = get_global_id(2);
const int4 srcCoord = (int4)(x, y, z, 0) + srcOffset;
uint DstOffset = dstOffset + (y * Pitch.x) + (z * Pitch.y);
uint4 c = read_imageui(input, srcCoord);
*(dst + DstOffset + x) = convert_uchar_sat(c.x);
}
__kernel void CopyImage3dToBuffer2Bytes(__read_only image3d_t input,
__global uchar *dst,
int4 srcOffset,
int dstOffset,
uint2 Pitch) {
const uint x = get_global_id(0);
const uint y = get_global_id(1);
const uint z = get_global_id(2);
const int4 srcCoord = (int4)(x, y, z, 0) + srcOffset;
uint DstOffset = dstOffset + (y * Pitch.x) + (z * Pitch.y);
uint4 c = read_imageui(input, srcCoord);
if(( ulong )(dst + dstOffset) & 0x00000001){
*((__global uchar*)(dst + DstOffset + x * 2 + 1)) = convert_uchar_sat((c.x >> 8 ) & 0xff);
*((__global uchar*)(dst + DstOffset + x * 2)) = convert_uchar_sat(c.x & 0xff);
}
else{
*((__global ushort*)(dst + DstOffset + x * 2)) = convert_ushort_sat(c.x);
}
}
__kernel void CopyImage3dToBuffer4Bytes(__read_only image3d_t input,
__global uchar *dst,
int4 srcOffset,
int dstOffset,
uint2 Pitch) {
const uint x = get_global_id(0);
const uint y = get_global_id(1);
const uint z = get_global_id(2);
const int4 srcCoord = (int4)(x, y, z, 0) + srcOffset;
uint DstOffset = dstOffset + (y * Pitch.x) + (z * Pitch.y);
uint4 c = read_imageui(input, srcCoord);
if(( ulong )(dst + dstOffset) & 0x00000003){
*((__global uchar*)(dst + DstOffset + x * 4 + 3)) = convert_uchar_sat((c.x >> 24 ) & 0xff);
*((__global uchar*)(dst + DstOffset + x * 4 + 2)) = convert_uchar_sat((c.x >> 16 ) & 0xff);
*((__global uchar*)(dst + DstOffset + x * 4 + 1)) = convert_uchar_sat((c.x >> 8 ) & 0xff);
*((__global uchar*)(dst + DstOffset + x * 4)) = convert_uchar_sat(c.x & 0xff);
}
else{
*((__global uint*)(dst + DstOffset + x * 4)) = c.x;
}
}
__kernel void CopyImage3dToBuffer8Bytes(__read_only image3d_t input,
__global uchar *dst,
int4 srcOffset,
int dstOffset,
uint2 Pitch) {
const uint x = get_global_id(0);
const uint y = get_global_id(1);
const uint z = get_global_id(2);
const int4 srcCoord = (int4)(x, y, z, 0) + srcOffset;
uint DstOffset = dstOffset + (y * Pitch.x) + (z * Pitch.y);
uint4 c = read_imageui(input, srcCoord);
if(( ulong )(dst + dstOffset) & 0x00000007){
*((__global uchar*)(dst + DstOffset + x * 8 + 3)) = convert_uchar_sat((c.x >> 24 ) & 0xff);
*((__global uchar*)(dst + DstOffset + x * 8 + 2)) = convert_uchar_sat((c.x >> 16 ) & 0xff);
*((__global uchar*)(dst + DstOffset + x * 8 + 1)) = convert_uchar_sat((c.x >> 8 ) & 0xff);
*((__global uchar*)(dst + DstOffset + x * 8)) = convert_uchar_sat(c.x & 0xff);
*((__global uchar*)(dst + DstOffset + x * 8 + 7)) = convert_uchar_sat((c.y >> 24 ) & 0xff);
*((__global uchar*)(dst + DstOffset + x * 8 + 6)) = convert_uchar_sat((c.y >> 16 ) & 0xff);
*((__global uchar*)(dst + DstOffset + x * 8 + 5)) = convert_uchar_sat((c.y >> 8 ) & 0xff);
*((__global uchar*)(dst + DstOffset + x * 8 + 4)) = convert_uchar_sat(c.y & 0xff);
}
else{
uint2 d = (uint2)(c.x,c.y);
*((__global uint2*)(dst + DstOffset + x * 8)) = d;
}
}
__kernel void CopyImage3dToBuffer16Bytes(__read_only image3d_t input,
__global uchar *dst,
int4 srcOffset,
int dstOffset,
uint2 Pitch) {
const uint x = get_global_id(0);
const uint y = get_global_id(1);
const uint z = get_global_id(2);
const int4 srcCoord = (int4)(x, y, z, 0) + srcOffset;
uint DstOffset = dstOffset + (y * Pitch.x) + (z * Pitch.y);
const uint4 c = read_imageui(input, srcCoord);
if(( ulong )(dst + dstOffset) & 0x0000000f){
*((__global uchar*)(dst + DstOffset + x * 16 + 3)) = convert_uchar_sat((c.x >> 24 ) & 0xff);
*((__global uchar*)(dst + DstOffset + x * 16 + 2)) = convert_uchar_sat((c.x >> 16 ) & 0xff);
*((__global uchar*)(dst + DstOffset + x * 16 + 1)) = convert_uchar_sat((c.x >> 8 ) & 0xff);
*((__global uchar*)(dst + DstOffset + x * 16)) = convert_uchar_sat(c.x & 0xff);
*((__global uchar*)(dst + DstOffset + x * 16 + 7)) = convert_uchar_sat((c.y >> 24 ) & 0xff);
*((__global uchar*)(dst + DstOffset + x * 16 + 6)) = convert_uchar_sat((c.y >> 16 ) & 0xff);
*((__global uchar*)(dst + DstOffset + x * 16 + 5)) = convert_uchar_sat((c.y >> 8 ) & 0xff);
*((__global uchar*)(dst + DstOffset + x * 16 + 4)) = convert_uchar_sat(c.y & 0xff);
*((__global uchar*)(dst + DstOffset + x * 16 + 11)) = convert_uchar_sat((c.z >> 24 ) & 0xff);
*((__global uchar*)(dst + DstOffset + x * 16 + 10)) = convert_uchar_sat((c.z >> 16 ) & 0xff);
*((__global uchar*)(dst + DstOffset + x * 16 + 9)) = convert_uchar_sat((c.z >> 8 ) & 0xff);
*((__global uchar*)(dst + DstOffset + x * 16 + 8)) = convert_uchar_sat(c.z & 0xff);
*((__global uchar*)(dst + DstOffset + x * 16 + 15)) = convert_uchar_sat((c.w >> 24 ) & 0xff);
*((__global uchar*)(dst + DstOffset + x * 16 + 14)) = convert_uchar_sat((c.w >> 16 ) & 0xff);
*((__global uchar*)(dst + DstOffset + x * 16 + 13)) = convert_uchar_sat((c.w >> 8 ) & 0xff);
*((__global uchar*)(dst + DstOffset + x * 16 + 12)) = convert_uchar_sat(c.w & 0xff);
}
else{
*(__global uint4*)(dst + DstOffset + x * 16) = c;
}
}
)==="

View File

@@ -1,139 +0,0 @@
/*
* Copyright (C) 2017-2020 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
R"===(
__kernel void CopyImage3dToBufferBytes(__read_only image3d_t input,
__global uchar *dst,
int4 srcOffset,
ulong dstOffset,
ulong2 Pitch) {
const uint x = get_global_id(0);
const uint y = get_global_id(1);
const uint z = get_global_id(2);
const int4 srcCoord = (int4)(x, y, z, 0) + srcOffset;
ulong DstOffset = dstOffset + (y * Pitch.x) + (z * Pitch.y);
uint4 c = read_imageui(input, srcCoord);
*(dst + DstOffset + x) = convert_uchar_sat(c.x);
}
__kernel void CopyImage3dToBuffer2Bytes(__read_only image3d_t input,
__global uchar *dst,
int4 srcOffset,
ulong dstOffset,
ulong2 Pitch) {
const uint x = get_global_id(0);
const uint y = get_global_id(1);
const uint z = get_global_id(2);
const int4 srcCoord = (int4)(x, y, z, 0) + srcOffset;
ulong DstOffset = dstOffset + (y * Pitch.x) + (z * Pitch.y);
uint4 c = read_imageui(input, srcCoord);
if(( ulong )(dst + dstOffset) & 0x00000001){
*((__global uchar*)(dst + DstOffset + x * 2 + 1)) = convert_uchar_sat((c.x >> 8 ) & 0xff);
*((__global uchar*)(dst + DstOffset + x * 2)) = convert_uchar_sat(c.x & 0xff);
}
else{
*((__global ushort*)(dst + DstOffset + x * 2)) = convert_ushort_sat(c.x);
}
}
__kernel void CopyImage3dToBuffer4Bytes(__read_only image3d_t input,
__global uchar *dst,
int4 srcOffset,
ulong dstOffset,
ulong2 Pitch) {
const uint x = get_global_id(0);
const uint y = get_global_id(1);
const uint z = get_global_id(2);
const int4 srcCoord = (int4)(x, y, z, 0) + srcOffset;
ulong DstOffset = dstOffset + (y * Pitch.x) + (z * Pitch.y);
uint4 c = read_imageui(input, srcCoord);
if(( ulong )(dst + dstOffset) & 0x00000003){
*((__global uchar*)(dst + DstOffset + x * 4 + 3)) = convert_uchar_sat((c.x >> 24 ) & 0xff);
*((__global uchar*)(dst + DstOffset + x * 4 + 2)) = convert_uchar_sat((c.x >> 16 ) & 0xff);
*((__global uchar*)(dst + DstOffset + x * 4 + 1)) = convert_uchar_sat((c.x >> 8 ) & 0xff);
*((__global uchar*)(dst + DstOffset + x * 4)) = convert_uchar_sat(c.x & 0xff);
}
else{
*((__global uint*)(dst + DstOffset + x * 4)) = c.x;
}
}
__kernel void CopyImage3dToBuffer8Bytes(__read_only image3d_t input,
__global uchar *dst,
int4 srcOffset,
ulong dstOffset,
ulong2 Pitch) {
const uint x = get_global_id(0);
const uint y = get_global_id(1);
const uint z = get_global_id(2);
const int4 srcCoord = (int4)(x, y, z, 0) + srcOffset;
ulong DstOffset = dstOffset + (y * Pitch.x) + (z * Pitch.y);
uint4 c = read_imageui(input, srcCoord);
if(( ulong )(dst + dstOffset) & 0x00000007){
*((__global uchar*)(dst + DstOffset + x * 8 + 3)) = convert_uchar_sat((c.x >> 24 ) & 0xff);
*((__global uchar*)(dst + DstOffset + x * 8 + 2)) = convert_uchar_sat((c.x >> 16 ) & 0xff);
*((__global uchar*)(dst + DstOffset + x * 8 + 1)) = convert_uchar_sat((c.x >> 8 ) & 0xff);
*((__global uchar*)(dst + DstOffset + x * 8)) = convert_uchar_sat(c.x & 0xff);
*((__global uchar*)(dst + DstOffset + x * 8 + 7)) = convert_uchar_sat((c.y >> 24 ) & 0xff);
*((__global uchar*)(dst + DstOffset + x * 8 + 6)) = convert_uchar_sat((c.y >> 16 ) & 0xff);
*((__global uchar*)(dst + DstOffset + x * 8 + 5)) = convert_uchar_sat((c.y >> 8 ) & 0xff);
*((__global uchar*)(dst + DstOffset + x * 8 + 4)) = convert_uchar_sat(c.y & 0xff);
}
else{
uint2 d = (uint2)(c.x,c.y);
*((__global uint2*)(dst + DstOffset + x * 8)) = d;
}
}
__kernel void CopyImage3dToBuffer16Bytes(__read_only image3d_t input,
__global uchar *dst,
int4 srcOffset,
ulong dstOffset,
ulong2 Pitch) {
const uint x = get_global_id(0);
const uint y = get_global_id(1);
const uint z = get_global_id(2);
const int4 srcCoord = (int4)(x, y, z, 0) + srcOffset;
ulong DstOffset = dstOffset + (y * Pitch.x) + (z * Pitch.y);
const uint4 c = read_imageui(input, srcCoord);
if(( ulong )(dst + dstOffset) & 0x0000000f){
*((__global uchar*)(dst + DstOffset + x * 16 + 3)) = convert_uchar_sat((c.x >> 24 ) & 0xff);
*((__global uchar*)(dst + DstOffset + x * 16 + 2)) = convert_uchar_sat((c.x >> 16 ) & 0xff);
*((__global uchar*)(dst + DstOffset + x * 16 + 1)) = convert_uchar_sat((c.x >> 8 ) & 0xff);
*((__global uchar*)(dst + DstOffset + x * 16)) = convert_uchar_sat(c.x & 0xff);
*((__global uchar*)(dst + DstOffset + x * 16 + 7)) = convert_uchar_sat((c.y >> 24 ) & 0xff);
*((__global uchar*)(dst + DstOffset + x * 16 + 6)) = convert_uchar_sat((c.y >> 16 ) & 0xff);
*((__global uchar*)(dst + DstOffset + x * 16 + 5)) = convert_uchar_sat((c.y >> 8 ) & 0xff);
*((__global uchar*)(dst + DstOffset + x * 16 + 4)) = convert_uchar_sat(c.y & 0xff);
*((__global uchar*)(dst + DstOffset + x * 16 + 11)) = convert_uchar_sat((c.z >> 24 ) & 0xff);
*((__global uchar*)(dst + DstOffset + x * 16 + 10)) = convert_uchar_sat((c.z >> 16 ) & 0xff);
*((__global uchar*)(dst + DstOffset + x * 16 + 9)) = convert_uchar_sat((c.z >> 8 ) & 0xff);
*((__global uchar*)(dst + DstOffset + x * 16 + 8)) = convert_uchar_sat(c.z & 0xff);
*((__global uchar*)(dst + DstOffset + x * 16 + 15)) = convert_uchar_sat((c.w >> 24 ) & 0xff);
*((__global uchar*)(dst + DstOffset + x * 16 + 14)) = convert_uchar_sat((c.w >> 16 ) & 0xff);
*((__global uchar*)(dst + DstOffset + x * 16 + 13)) = convert_uchar_sat((c.w >> 8 ) & 0xff);
*((__global uchar*)(dst + DstOffset + x * 16 + 12)) = convert_uchar_sat(c.w & 0xff);
}
else{
*(__global uint4*)(dst + DstOffset + x * 16) = c;
}
}
)==="

View File

@@ -1,21 +0,0 @@
/*
* Copyright (C) 2017-2020 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
R"===(
__kernel void CopyImageToImage1d(
__read_only image1d_t input,
__write_only image1d_t output,
int4 srcOffset,
int4 dstOffset) {
const int x = get_global_id(0);
const int srcCoord = x + srcOffset.x;
const int dstCoord = x + dstOffset.x;
const uint4 c = read_imageui(input, srcCoord);
write_imageui(output, dstCoord, c);
}
)==="

View File

@@ -1,22 +0,0 @@
/*
* Copyright (C) 2017-2020 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
R"===(
__kernel void CopyImageToImage2d(
__read_only image2d_t input,
__write_only image2d_t output,
int4 srcOffset,
int4 dstOffset) {
const int x = get_global_id(0);
const int y = get_global_id(1);
const int2 srcCoord = (int2)(x, y) + (int2)(srcOffset.x, srcOffset.y);
const int2 dstCoord = (int2)(x, y) + (int2)(dstOffset.x, dstOffset.y);
const uint4 c = read_imageui(input, srcCoord);
write_imageui(output, dstCoord, c);
}
)==="

View File

@@ -1,25 +0,0 @@
/*
* Copyright (C) 2017-2020 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
R"===(
#pragma OPENCL EXTENSION cl_khr_3d_image_writes : enable
__kernel void CopyImageToImage3d(
__read_only image3d_t input,
__write_only image3d_t output,
int4 srcOffset,
int4 dstOffset) {
const int x = get_global_id(0);
const int y = get_global_id(1);
const int z = get_global_id(2);
const int4 srcCoord = (int4)(x, y, z, 0) + srcOffset;
const int4 dstCoord = (int4)(x, y, z, 0) + dstOffset;
const uint4 c = read_imageui(input, srcCoord);
write_imageui(output, dstCoord, c);
}
)==="

View File

@@ -1,49 +0,0 @@
/*
* Copyright (C) 2017-2020 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
R"===(
// assumption is local work size = pattern size
__kernel void FillBufferBytes(
__global uchar* pDst,
uint dstOffsetInBytes,
const __global uchar* pPattern )
{
uint dstIndex = get_global_id(0) + dstOffsetInBytes;
uint srcIndex = get_local_id(0);
pDst[dstIndex] = pPattern[srcIndex];
}
__kernel void FillBufferLeftLeftover(
__global uchar* pDst,
uint dstOffsetInBytes,
const __global uchar* pPattern,
const uint patternSizeInEls )
{
uint gid = get_global_id(0);
pDst[ gid + dstOffsetInBytes ] = pPattern[ gid & (patternSizeInEls - 1) ];
}
__kernel void FillBufferMiddle(
__global uchar* pDst,
uint dstOffsetInBytes,
const __global uint* pPattern,
const uint patternSizeInEls )
{
uint gid = get_global_id(0);
((__global uint*)(pDst + dstOffsetInBytes))[gid] = pPattern[ gid & (patternSizeInEls - 1) ];
}
__kernel void FillBufferRightLeftover(
__global uchar* pDst,
uint dstOffsetInBytes,
const __global uchar* pPattern,
const uint patternSizeInEls )
{
uint gid = get_global_id(0);
pDst[ gid + dstOffsetInBytes ] = pPattern[ gid & (patternSizeInEls - 1) ];
}
)==="

View File

@@ -1,49 +0,0 @@
/*
* Copyright (C) 2019-2020 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
R"===(
// assumption is local work size = pattern size
__kernel void FillBufferBytes(
__global uchar* pDst,
ulong dstOffsetInBytes,
const __global uchar* pPattern )
{
size_t dstIndex = get_global_id(0) + dstOffsetInBytes;
size_t srcIndex = get_local_id(0);
pDst[dstIndex] = pPattern[srcIndex];
}
__kernel void FillBufferLeftLeftover(
__global uchar* pDst,
ulong dstOffsetInBytes,
const __global uchar* pPattern,
const ulong patternSizeInEls )
{
size_t gid = get_global_id(0);
pDst[ gid + dstOffsetInBytes ] = pPattern[ gid & (patternSizeInEls - 1) ];
}
__kernel void FillBufferMiddle(
__global uchar* pDst,
ulong dstOffsetInBytes,
const __global uint* pPattern,
const ulong patternSizeInEls )
{
size_t gid = get_global_id(0);
((__global uint*)(pDst + dstOffsetInBytes))[gid] = pPattern[ gid & (patternSizeInEls - 1) ];
}
__kernel void FillBufferRightLeftover(
__global uchar* pDst,
ulong dstOffsetInBytes,
const __global uchar* pPattern,
const ulong patternSizeInEls )
{
size_t gid = get_global_id(0);
pDst[ gid + dstOffsetInBytes ] = pPattern[ gid & (patternSizeInEls - 1) ];
}
)==="

View File

@@ -1,18 +0,0 @@
/*
* Copyright (C) 2017-2020 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
R"===(
__kernel void FillImage1d(
__write_only image1d_t output,
uint4 color,
int4 dstOffset) {
const int x = get_global_id(0);
const int dstCoord = x + dstOffset.x;
write_imageui(output, dstCoord, color);
}
)==="

View File

@@ -1,19 +0,0 @@
/*
* Copyright (C) 2017-2020 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
R"===(
__kernel void FillImage2d(
__write_only image2d_t output,
uint4 color,
int4 dstOffset) {
const int x = get_global_id(0);
const int y = get_global_id(1);
const int2 dstCoord = (int2)(x, y) + (int2)(dstOffset.x, dstOffset.y);
write_imageui(output, dstCoord, color);
}
)==="

View File

@@ -1,22 +0,0 @@
/*
* Copyright (C) 2017-2020 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
R"===(
#pragma OPENCL EXTENSION cl_khr_3d_image_writes : enable
__kernel void FillImage3d(
__write_only image3d_t output,
uint4 color,
int4 dstOffset) {
const int x = get_global_id(0);
const int y = get_global_id(1);
const int z = get_global_id(2);
const int4 dstCoord = (int4)(x, y, z, 0) + dstOffset;
write_imageui(output, dstCoord, color);
}
)==="

View File

@@ -4,26 +4,6 @@
# SPDX-License-Identifier: MIT
#
add_library(${BUILTINS_SOURCES_LIB_NAME} OBJECT EXCLUDE_FROM_ALL
CMakeLists.txt
built_ins_registry.h
register_copy_kernels_source.cpp
)
set_target_properties(${BUILTINS_SOURCES_LIB_NAME} PROPERTIES POSITION_INDEPENDENT_CODE ON)
set_target_properties(${BUILTINS_SOURCES_LIB_NAME} PROPERTIES FOLDER "built_ins")
target_include_directories(${BUILTINS_SOURCES_LIB_NAME} PRIVATE
${ENGINE_NODE_DIR}
${KHRONOS_HEADERS_DIR}
${KHRONOS_GL_HEADERS_DIR}
${NEO__GMM_INCLUDE_DIR}
${NEO__IGC_INCLUDE_DIR}
${THIRD_PARTY_DIR}
)
add_subdirectories()
add_library(${BUILTINS_VME_LIB_NAME} OBJECT EXCLUDE_FROM_ALL
CMakeLists.txt
register_ext_vme_source.cpp

View File

@@ -1,28 +0,0 @@
/*
* Copyright (C) 2017-2020 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#pragma once
#include "opencl/source/built_ins/built_ins.h"
#include <string>
#include <unordered_map>
namespace NEO {
struct RegisterEmbeddedResource {
RegisterEmbeddedResource(const char *name, const char *resource, size_t resourceLength) {
auto &storageRegistry = EmbeddedStorageRegistry::getInstance();
storageRegistry.store(name, createBuiltinResource(resource, resourceLength));
}
RegisterEmbeddedResource(const char *name, std::string &&resource)
: RegisterEmbeddedResource(name, resource.data(), resource.size() + 1) {
}
};
} // namespace NEO

View File

@@ -1,167 +0,0 @@
/*
* Copyright (C) 2017-2020 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#include "opencl/source/built_ins/registry/built_ins_registry.h"
#include <string>
namespace NEO {
static RegisterEmbeddedResource registerCopyBufferToBufferSrc(
createBuiltinResourceName(
EBuiltInOps::CopyBufferToBuffer,
BuiltinCode::getExtension(BuiltinCode::ECodeType::Source))
.c_str(),
std::string(
#include "opencl/source/built_ins/kernels/copy_buffer_to_buffer.builtin_kernel"
));
static RegisterEmbeddedResource registerCopyBufferToBufferStatelessSrc(
createBuiltinResourceName(
EBuiltInOps::CopyBufferToBufferStateless,
BuiltinCode::getExtension(BuiltinCode::ECodeType::Source))
.c_str(),
std::string(
#include "opencl/source/built_ins/kernels/copy_buffer_to_buffer_stateless.builtin_kernel"
));
static RegisterEmbeddedResource registerCopyBufferRectSrc(
createBuiltinResourceName(
EBuiltInOps::CopyBufferRect,
BuiltinCode::getExtension(BuiltinCode::ECodeType::Source))
.c_str(),
std::string(
#include "opencl/source/built_ins/kernels/copy_buffer_rect.builtin_kernel"
));
static RegisterEmbeddedResource registerCopyBufferRectStatelessSrc(
createBuiltinResourceName(
EBuiltInOps::CopyBufferRectStateless,
BuiltinCode::getExtension(BuiltinCode::ECodeType::Source))
.c_str(),
std::string(
#include "opencl/source/built_ins/kernels/copy_buffer_rect_stateless.builtin_kernel"
));
static RegisterEmbeddedResource registerFillBufferSrc(
createBuiltinResourceName(
EBuiltInOps::FillBuffer,
BuiltinCode::getExtension(BuiltinCode::ECodeType::Source))
.c_str(),
std::string(
#include "opencl/source/built_ins/kernels/fill_buffer.builtin_kernel"
));
static RegisterEmbeddedResource registerFillBufferStatelessSrc(
createBuiltinResourceName(
EBuiltInOps::FillBufferStateless,
BuiltinCode::getExtension(BuiltinCode::ECodeType::Source))
.c_str(),
std::string(
#include "opencl/source/built_ins/kernels/fill_buffer_stateless.builtin_kernel"
));
static RegisterEmbeddedResource registerCopyBufferToImage3dSrc(
createBuiltinResourceName(
EBuiltInOps::CopyBufferToImage3d,
BuiltinCode::getExtension(BuiltinCode::ECodeType::Source))
.c_str(),
std::string(
#include "opencl/source/built_ins/kernels/copy_buffer_to_image3d.builtin_kernel"
));
static RegisterEmbeddedResource registerCopyBufferToImage3dStatelessSrc(
createBuiltinResourceName(
EBuiltInOps::CopyBufferToImage3dStateless,
BuiltinCode::getExtension(BuiltinCode::ECodeType::Source))
.c_str(),
std::string(
#include "opencl/source/built_ins/kernels/copy_buffer_to_image3d_stateless.builtin_kernel"
));
static RegisterEmbeddedResource registerCopyImage3dToBufferSrc(
createBuiltinResourceName(
EBuiltInOps::CopyImage3dToBuffer,
BuiltinCode::getExtension(BuiltinCode::ECodeType::Source))
.c_str(),
std::string(
#include "opencl/source/built_ins/kernels/copy_image3d_to_buffer.builtin_kernel"
));
static RegisterEmbeddedResource registerCopyImage3dToBufferStatelessSrc(
createBuiltinResourceName(
EBuiltInOps::CopyImage3dToBufferStateless,
BuiltinCode::getExtension(BuiltinCode::ECodeType::Source))
.c_str(),
std::string(
#include "opencl/source/built_ins/kernels/copy_image3d_to_buffer_stateless.builtin_kernel"
));
static RegisterEmbeddedResource registerCopyImageToImage1dSrc(
createBuiltinResourceName(
EBuiltInOps::CopyImageToImage1d,
BuiltinCode::getExtension(BuiltinCode::ECodeType::Source))
.c_str(),
std::string(
#include "opencl/source/built_ins/kernels/copy_image_to_image1d.builtin_kernel"
));
static RegisterEmbeddedResource registerCopyImageToImage2dSrc(
createBuiltinResourceName(
EBuiltInOps::CopyImageToImage2d,
BuiltinCode::getExtension(BuiltinCode::ECodeType::Source))
.c_str(),
std::string(
#include "opencl/source/built_ins/kernels/copy_image_to_image2d.builtin_kernel"
));
static RegisterEmbeddedResource registerCopyImageToImage3dSrc(
createBuiltinResourceName(
EBuiltInOps::CopyImageToImage3d,
BuiltinCode::getExtension(BuiltinCode::ECodeType::Source))
.c_str(),
std::string(
#include "opencl/source/built_ins/kernels/copy_image_to_image3d.builtin_kernel"
));
static RegisterEmbeddedResource registerFillImage1dSrc(
createBuiltinResourceName(
EBuiltInOps::FillImage1d,
BuiltinCode::getExtension(BuiltinCode::ECodeType::Source))
.c_str(),
std::string(
#include "opencl/source/built_ins/kernels/fill_image1d.builtin_kernel"
));
static RegisterEmbeddedResource registerFillImage2dSrc(
createBuiltinResourceName(
EBuiltInOps::FillImage2d,
BuiltinCode::getExtension(BuiltinCode::ECodeType::Source))
.c_str(),
std::string(
#include "opencl/source/built_ins/kernels/fill_image2d.builtin_kernel"
));
static RegisterEmbeddedResource registerFillImage3dSrc(
createBuiltinResourceName(
EBuiltInOps::FillImage3d,
BuiltinCode::getExtension(BuiltinCode::ECodeType::Source))
.c_str(),
std::string(
#include "opencl/source/built_ins/kernels/fill_image3d.builtin_kernel"
));
static RegisterEmbeddedResource registerAuxTranslationSrc(
createBuiltinResourceName(
EBuiltInOps::AuxTranslation,
BuiltinCode::getExtension(BuiltinCode::ECodeType::Source))
.c_str(),
std::string(
#include "opencl/source/built_ins/kernels/aux_translation.builtin_kernel"
));
} // namespace NEO

View File

@@ -5,8 +5,9 @@
*
*/
#include "shared/source/built_ins/registry/built_ins_registry.h"
#include "opencl/source/built_ins/built_in_ops_vme.h"
#include "opencl/source/built_ins/registry/built_ins_registry.h"
#include <string>

View File

@@ -1,102 +0,0 @@
/*
* Copyright (C) 2017-2020 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#include "opencl/source/built_ins/sip.h"
#include "shared/source/device/device.h"
#include "shared/source/execution_environment/execution_environment.h"
#include "shared/source/helpers/debug_helpers.h"
#include "shared/source/helpers/hw_helper.h"
#include "shared/source/helpers/ptr_math.h"
#include "shared/source/helpers/string.h"
#include "shared/source/memory_manager/graphics_allocation.h"
#include "opencl/source/built_ins/built_ins.h"
#include "opencl/source/program/kernel_info.h"
#include "opencl/source/program/program.h"
namespace NEO {
const size_t SipKernel::maxDbgSurfaceSize = 0x49c000; // proper value should be taken from compiler when it's ready
const char *getSipKernelCompilerInternalOptions(SipKernelType kernel) {
switch (kernel) {
default:
DEBUG_BREAK_IF(true);
return "";
case SipKernelType::Csr:
return "-cl-include-sip-csr";
case SipKernelType::DbgCsr:
return "-cl-include-sip-kernel-debug -cl-include-sip-csr -cl-set-bti:0";
case SipKernelType::DbgCsrLocal:
return "-cl-include-sip-kernel-local-debug -cl-include-sip-csr -cl-set-bti:0";
}
}
const char *getSipLlSrc(const Device &device) {
#define M_DUMMY_LL_SRC \
"define void @f() { \n" \
" ret void \n" \
"} \n" \
"!opencl.compiler.options = !{!0} \n" \
"!opencl.kernels = !{!1} \n" \
"!0 = !{} \n" \
"!1 = !{void()* @f, !2, !3, !4, !5, !6, !7} \n" \
"!2 = !{!\"kernel_arg_addr_space\"} \n" \
"!3 = !{!\"kernel_arg_access_qual\"} \n" \
"!4 = !{!\"kernel_arg_type\"} \n" \
"!5 = !{!\"kernel_arg_type_qual\"} \n" \
"!6 = !{!\"kernel_arg_base_type\"} \n" \
"!7 = !{!\"kernel_arg_name\"} \n"
constexpr const char *llDummySrc32 =
"target datalayout = \"e-p:32:32:32\" \n"
"target triple = \"spir\" \n" M_DUMMY_LL_SRC;
constexpr const char *llDummySrc64 =
"target datalayout = \"e-p:64:64:64\" \n"
"target triple = \"spir64\" \n" M_DUMMY_LL_SRC;
#undef M_DUMMY_LL_SRC
const uint32_t ptrSize = device.getDeviceInfo().force32BitAddressess ? 4 : sizeof(void *);
return (ptrSize == 8) ? llDummySrc64 : llDummySrc32;
}
SipKernel::SipKernel(SipKernelType type, Program *sipProgram)
: type(type) {
program = sipProgram;
}
SipKernel::~SipKernel() {
program->release();
}
GraphicsAllocation *SipKernel::getSipAllocation() const {
return program->getKernelInfo(size_t{0})->getGraphicsAllocation();
}
const char *SipKernel::getBinary() const {
auto kernelInfo = program->getKernelInfo(size_t{0});
return reinterpret_cast<const char *>(ptrOffset(kernelInfo->heapInfo.pKernelHeap, kernelInfo->systemKernelOffset));
}
size_t SipKernel::getBinarySize() const {
auto kernelInfo = program->getKernelInfo(size_t{0});
return kernelInfo->heapInfo.pKernelHeader->KernelHeapSize - kernelInfo->systemKernelOffset;
}
SipKernelType SipKernel::getSipKernelType(GFXCORE_FAMILY family, bool debuggingActive) {
auto &hwHelper = HwHelper::get(family);
return hwHelper.getSipKernelType(debuggingActive);
}
GraphicsAllocation *SipKernel::getSipKernelAllocation(Device &device) {
auto sipType = SipKernel::getSipKernelType(device.getHardwareInfo().platform.eRenderCoreFamily, device.isDebuggerActive());
return device.getExecutionEnvironment()->getBuiltIns()->getSipKernel(sipType, device).getSipAllocation();
}
} // namespace NEO

View File

@@ -1,51 +0,0 @@
/*
* Copyright (C) 2017-2020 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#pragma once
#include "shared/source/built_ins/sip_kernel_type.h"
#include "shared/source/helpers/hw_info.h"
#include <memory>
namespace NEO {
class Device;
class Program;
class GraphicsAllocation;
const char *getSipKernelCompilerInternalOptions(SipKernelType kernel);
const char *getSipLlSrc(const Device &device);
class SipKernel {
public:
SipKernel(SipKernelType type, Program *sipProgram);
SipKernel(const SipKernel &) = delete;
SipKernel &operator=(const SipKernel &) = delete;
SipKernel(SipKernel &&) = default;
SipKernel &operator=(SipKernel &&) = default;
virtual ~SipKernel();
const char *getBinary() const;
size_t getBinarySize() const;
SipKernelType getType() const {
return type;
}
static const size_t maxDbgSurfaceSize;
MOCKABLE_VIRTUAL GraphicsAllocation *getSipAllocation() const;
static SipKernelType getSipKernelType(GFXCORE_FAMILY family, bool debuggingActive);
static GraphicsAllocation *getSipKernelAllocation(Device &device);
protected:
SipKernelType type = SipKernelType::COUNT;
Program *program = nullptr;
};
} // namespace NEO

View File

@@ -5,7 +5,8 @@
*
*/
#include "opencl/source/built_ins/built_ins.h"
#include "shared/source/built_ins/built_ins.h"
#include "opencl/source/built_ins/builtins_dispatch_builder.h"
namespace NEO {

View File

@@ -7,10 +7,10 @@
#include "opencl/source/built_ins/vme_builtin.h"
#include "shared/source/built_ins/built_ins.h"
#include "shared/source/device/device.h"
#include "opencl/source/built_ins/built_in_ops_vme.h"
#include "opencl/source/built_ins/built_ins.h"
#include "opencl/source/built_ins/builtins_dispatch_builder.h"
#include "opencl/source/built_ins/populate_built_ins.inl"
#include "opencl/source/built_ins/vme_dispatch_builder.h"

View File

@@ -7,10 +7,11 @@
#pragma once
#include "shared/source/built_ins/built_ins.h"
#include "opencl/source/accelerators/intel_accelerator.h"
#include "opencl/source/accelerators/intel_motion_estimation.h"
#include "opencl/source/built_ins/built_in_ops_vme.h"
#include "opencl/source/built_ins/built_ins.h"
#include "opencl/source/built_ins/builtins_dispatch_builder.h"
#include "opencl/source/helpers/dispatch_info_builder.h"
#include "opencl/source/mem_obj/buffer.h"

View File

@@ -6,6 +6,7 @@
*/
#pragma once
#include "shared/source/built_ins/built_ins.h"
#include "shared/source/command_stream/command_stream_receiver.h"
#include "shared/source/helpers/array_count.h"
#include "shared/source/helpers/engine_node_helper.h"
@@ -17,7 +18,6 @@
#include "shared/source/utilities/range.h"
#include "shared/source/utilities/tag_allocator.h"
#include "opencl/source/built_ins/built_ins.h"
#include "opencl/source/built_ins/builtins_dispatch_builder.h"
#include "opencl/source/builtin_kernels_simulation/scheduler_simulation.h"
#include "opencl/source/command_queue/command_queue_hw.h"

View File

@@ -6,9 +6,9 @@
*/
#pragma once
#include "shared/source/built_ins/built_ins.h"
#include "shared/source/command_stream/command_stream_receiver.h"
#include "opencl/source/built_ins/built_ins.h"
#include "opencl/source/command_queue/command_queue_hw.h"
#include "opencl/source/command_queue/enqueue_common.h"
#include "opencl/source/helpers/hardware_commands_helper.h"

View File

@@ -6,9 +6,9 @@
*/
#pragma once
#include "shared/source/built_ins/built_ins.h"
#include "shared/source/command_stream/command_stream_receiver.h"
#include "opencl/source/built_ins/built_ins.h"
#include "opencl/source/command_queue/command_queue_hw.h"
#include "opencl/source/helpers/hardware_commands_helper.h"
#include "opencl/source/mem_obj/buffer.h"

View File

@@ -6,9 +6,9 @@
*/
#pragma once
#include "shared/source/built_ins/built_ins.h"
#include "shared/source/command_stream/command_stream_receiver.h"
#include "opencl/source/built_ins/built_ins.h"
#include "opencl/source/command_queue/command_queue_hw.h"
#include "opencl/source/helpers/hardware_commands_helper.h"
#include "opencl/source/helpers/mipmap.h"

View File

@@ -6,10 +6,10 @@
*/
#pragma once
#include "shared/source/built_ins/built_ins.h"
#include "shared/source/command_stream/command_stream_receiver.h"
#include "shared/source/helpers/basic_math.h"
#include "opencl/source/built_ins/built_ins.h"
#include "opencl/source/command_queue/command_queue_hw.h"
#include "opencl/source/helpers/hardware_commands_helper.h"
#include "opencl/source/helpers/mipmap.h"

View File

@@ -6,9 +6,9 @@
*/
#pragma once
#include "shared/source/built_ins/built_ins.h"
#include "shared/source/command_stream/command_stream_receiver.h"
#include "opencl/source/built_ins/built_ins.h"
#include "opencl/source/command_queue/command_queue_hw.h"
#include "opencl/source/helpers/hardware_commands_helper.h"
#include "opencl/source/helpers/mipmap.h"

View File

@@ -6,11 +6,11 @@
*/
#pragma once
#include "shared/source/built_ins/built_ins.h"
#include "shared/source/command_stream/command_stream_receiver.h"
#include "shared/source/memory_manager/internal_allocation_storage.h"
#include "shared/source/memory_manager/memory_manager.h"
#include "opencl/source/built_ins/built_ins.h"
#include "opencl/source/command_queue/command_queue_hw.h"
#include "opencl/source/helpers/hardware_commands_helper.h"
#include "opencl/source/mem_obj/buffer.h"

View File

@@ -6,10 +6,10 @@
*/
#pragma once
#include "shared/source/built_ins/built_ins.h"
#include "shared/source/command_stream/command_stream_receiver.h"
#include "shared/source/helpers/basic_math.h"
#include "opencl/source/built_ins/built_ins.h"
#include "opencl/source/command_queue/command_queue_hw.h"
#include "opencl/source/helpers/hardware_commands_helper.h"
#include "opencl/source/mem_obj/image.h"

View File

@@ -6,11 +6,11 @@
*/
#pragma once
#include "shared/source/built_ins/built_ins.h"
#include "shared/source/command_stream/command_stream_receiver.h"
#include "shared/source/helpers/cache_policy.h"
#include "shared/source/memory_manager/unified_memory_manager.h"
#include "opencl/source/built_ins/built_ins.h"
#include "opencl/source/command_queue/command_queue_hw.h"
#include "opencl/source/command_queue/enqueue_common.h"
#include "opencl/source/helpers/hardware_commands_helper.h"

View File

@@ -6,9 +6,9 @@
*/
#pragma once
#include "shared/source/built_ins/built_ins.h"
#include "shared/source/command_stream/command_stream_receiver.h"
#include "opencl/source/built_ins/built_ins.h"
#include "opencl/source/command_queue/command_queue_hw.h"
#include "opencl/source/command_queue/enqueue_common.h"
#include "opencl/source/helpers/hardware_commands_helper.h"

View File

@@ -6,12 +6,12 @@
*/
#pragma once
#include "shared/source/built_ins/built_ins.h"
#include "shared/source/command_stream/command_stream_receiver.h"
#include "shared/source/helpers/basic_math.h"
#include "shared/source/helpers/cache_policy.h"
#include "shared/source/memory_manager/graphics_allocation.h"
#include "opencl/source/built_ins/built_ins.h"
#include "opencl/source/command_queue/command_queue_hw.h"
#include "opencl/source/context/context.h"
#include "opencl/source/event/event.h"

View File

@@ -6,10 +6,10 @@
*/
#pragma once
#include "shared/source/built_ins/built_ins.h"
#include "shared/source/memory_manager/surface.h"
#include "shared/source/memory_manager/unified_memory_manager.h"
#include "opencl/source/built_ins/built_ins.h"
#include "opencl/source/command_queue/command_queue_hw.h"
#include "opencl/source/command_queue/enqueue_common.h"
#include "opencl/source/event/event.h"

View File

@@ -6,11 +6,11 @@
*/
#pragma once
#include "shared/source/built_ins/built_ins.h"
#include "shared/source/command_stream/command_stream_receiver.h"
#include "shared/source/helpers/string.h"
#include "shared/source/memory_manager/unified_memory_manager.h"
#include "opencl/source/built_ins/built_ins.h"
#include "opencl/source/command_queue/command_queue_hw.h"
#include "opencl/source/helpers/hardware_commands_helper.h"
#include "opencl/source/mem_obj/buffer.h"

View File

@@ -6,9 +6,9 @@
*/
#pragma once
#include "shared/source/built_ins/built_ins.h"
#include "shared/source/command_stream/command_stream_receiver.h"
#include "opencl/source/built_ins/built_ins.h"
#include "opencl/source/command_queue/command_queue_hw.h"
#include "opencl/source/helpers/hardware_commands_helper.h"
#include "opencl/source/mem_obj/buffer.h"

View File

@@ -6,11 +6,11 @@
*/
#pragma once
#include "shared/source/built_ins/built_ins.h"
#include "shared/source/command_stream/command_stream_receiver.h"
#include "shared/source/helpers/basic_math.h"
#include "shared/source/memory_manager/graphics_allocation.h"
#include "opencl/source/built_ins/built_ins.h"
#include "opencl/source/command_queue/command_queue_hw.h"
#include "opencl/source/helpers/hardware_commands_helper.h"
#include "opencl/source/helpers/mipmap.h"

View File

@@ -7,6 +7,7 @@
#pragma once
#include "shared/source/built_ins/built_ins.h"
#include "shared/source/command_stream/linear_stream.h"
#include "shared/source/command_stream/preemption.h"
#include "shared/source/helpers/register_offsets.h"
@@ -15,7 +16,6 @@
#include "shared/source/indirect_heap/indirect_heap.h"
#include "shared/source/utilities/tag_allocator.h"
#include "opencl/source/built_ins/built_ins.h"
#include "opencl/source/command_queue/command_queue.h"
#include "opencl/source/context/context.h"
#include "opencl/source/device_queue/device_queue_hw.h"

View File

@@ -7,6 +7,7 @@
#include "opencl/source/context/context.h"
#include "shared/source/built_ins/built_ins.h"
#include "shared/source/command_stream/command_stream_receiver.h"
#include "shared/source/compiler_interface/compiler_interface.h"
#include "shared/source/debug_settings/debug_settings_manager.h"
@@ -17,7 +18,6 @@
#include "shared/source/memory_manager/memory_manager.h"
#include "shared/source/memory_manager/unified_memory_manager.h"
#include "opencl/source/built_ins/built_ins.h"
#include "opencl/source/command_queue/command_queue.h"
#include "opencl/source/device/cl_device.h"
#include "opencl/source/device_queue/device_queue.h"

View File

@@ -7,10 +7,10 @@
#include "shared/source/debugger/debugger.h"
#include "shared/source/built_ins/sip.h"
#include "shared/source/built_ins/sip_kernel_type.h"
#include "shared/source/helpers/hw_info.h"
#include "opencl/source/built_ins/sip.h"
#include "opencl/source/source_level_debugger/source_level_debugger.h"
namespace NEO {

View File

@@ -6,10 +6,9 @@
*/
#pragma once
#include "shared/source/built_ins/built_ins.h"
#include "shared/source/execution_environment/execution_environment.h"
#include "opencl/source/built_ins/built_ins.h"
namespace NEO {
class Device;
@@ -18,6 +17,6 @@ Program *createProgramForSip(ExecutionEnvironment &executionEnvironment,
Context *context,
std::vector<char> &binary,
size_t size,
cl_int *errcodeRet,
int *errcodeRet,
Device *device);
} // namespace NEO

View File

@@ -6,9 +6,9 @@
*/
#pragma once
#include "shared/source/built_ins/built_ins.h"
#include "shared/source/indirect_heap/indirect_heap.h"
#include "opencl/source/built_ins/built_ins.h"
#include "opencl/source/helpers/per_thread_data.h"
#include "opencl/source/kernel/kernel.h"

View File

@@ -7,6 +7,7 @@
#include "opencl/source/kernel/kernel.h"
#include "shared/source/built_ins/built_ins.h"
#include "shared/source/command_stream/command_stream_receiver.h"
#include "shared/source/debug_settings/debug_settings_manager.h"
#include "shared/source/gmm_helper/gmm_helper.h"
@@ -22,7 +23,6 @@
#include "opencl/source/accelerators/intel_accelerator.h"
#include "opencl/source/accelerators/intel_motion_estimation.h"
#include "opencl/source/built_ins/built_ins.h"
#include "opencl/source/built_ins/builtins_dispatch_builder.h"
#include "opencl/source/command_queue/command_queue.h"
#include "opencl/source/command_queue/gpgpu_walker.h"

View File

@@ -6,7 +6,8 @@
*/
#pragma once
#include "opencl/source/built_ins/built_ins.h"
#include "shared/source/built_ins/built_ins.h"
#include "opencl/source/kernel/kernel.h"
#include <algorithm>

View File

@@ -161,7 +161,7 @@ target_include_directories(igdrcl_tests PRIVATE
target_link_libraries(igdrcl_tests gmock-gtest ${IGDRCL_EXTRA_LIBS})
set(BUILT_IN_KERNEL_DIR "${NEO_SOURCE_DIR}/opencl/source/built_ins")
set(BUILT_IN_KERNEL_DIR "${NEO_SOURCE_DIR}/shared/source/built_ins")
function(neo_copy_test_files target product)
set(outputdir "${TargetDir}/${product}")

View File

@@ -5,11 +5,11 @@
*
*/
#include "shared/source/built_ins/built_ins.h"
#include "shared/source/compiler_interface/compiler_interface.h"
#include "shared/source/device/device.h"
#include "opencl/source/built_ins/built_in_ops_vme.h"
#include "opencl/source/built_ins/built_ins.h"
#include "opencl/source/built_ins/vme_builtin.h"
#include "opencl/source/context/context.h"
#include "opencl/source/device/cl_device.h"

View File

@@ -5,7 +5,8 @@
*
*/
#include "opencl/source/built_ins/built_ins.h"
#include "shared/source/built_ins/built_ins.h"
#include "opencl/source/mem_obj/image.h"
#include "opencl/source/sampler/sampler.h"
#include "opencl/test/unit_test/aub_tests/fixtures/aub_parent_kernel_fixture.h"

View File

@@ -5,7 +5,8 @@
*
*/
#include "opencl/source/built_ins/built_ins.h"
#include "shared/source/built_ins/built_ins.h"
#include "opencl/source/mem_obj/image.h"
#include "opencl/source/sampler/sampler.h"
#include "opencl/test/unit_test/aub_tests/fixtures/aub_parent_kernel_fixture.h"

View File

@@ -5,7 +5,8 @@
*
*/
#include "opencl/source/built_ins/built_ins.h"
#include "shared/source/built_ins/built_ins.h"
#include "opencl/source/mem_obj/image.h"
#include "opencl/source/sampler/sampler.h"
#include "opencl/test/unit_test/aub_tests/fixtures/aub_parent_kernel_fixture.h"

View File

@@ -5,7 +5,8 @@
*
*/
#include "opencl/source/built_ins/built_ins.h"
#include "shared/source/built_ins/built_ins.h"
#include "opencl/source/mem_obj/image.h"
#include "opencl/source/sampler/sampler.h"
#include "opencl/test/unit_test/aub_tests/fixtures/aub_parent_kernel_fixture.h"

View File

@@ -5,6 +5,7 @@
*
*/
#include "shared/source/built_ins/built_ins.h"
#include "shared/source/debug_settings/debug_settings_manager.h"
#include "shared/source/gmm_helper/gmm.h"
#include "shared/source/gmm_helper/gmm_helper.h"
@@ -15,7 +16,6 @@
#include "shared/test/unit_test/utilities/base_object_utils.h"
#include "opencl/source/built_ins/aux_translation_builtin.h"
#include "opencl/source/built_ins/built_ins.h"
#include "opencl/source/built_ins/builtins_dispatch_builder.h"
#include "opencl/source/built_ins/vme_builtin.h"
#include "opencl/source/built_ins/vme_dispatch_builder.h"

View File

@@ -5,8 +5,9 @@
*
*/
#include "opencl/source/built_ins/built_ins.h"
#include "opencl/source/built_ins/sip.h"
#include "shared/source/built_ins/built_ins.h"
#include "shared/source/built_ins/sip.h"
#include "opencl/test/unit_test/global_environment.h"
#include "opencl/test/unit_test/helpers/test_files.h"
#include "opencl/test/unit_test/mocks/mock_device.h"

View File

@@ -5,9 +5,9 @@
*
*/
#include "shared/source/built_ins/built_ins.h"
#include "shared/source/memory_manager/memory_constants.h"
#include "opencl/source/built_ins/built_ins.h"
#include "opencl/source/built_ins/builtins_dispatch_builder.h"
#include "opencl/source/helpers/dispatch_info.h"
#include "opencl/test/unit_test/command_queue/enqueue_copy_buffer_rect_fixture.h"

View File

@@ -5,9 +5,9 @@
*
*/
#include "shared/source/built_ins/built_ins.h"
#include "shared/source/helpers/ptr_math.h"
#include "opencl/source/built_ins/built_ins.h"
#include "opencl/source/built_ins/builtins_dispatch_builder.h"
#include "opencl/source/command_queue/command_queue_hw.h"
#include "opencl/source/helpers/dispatch_info.h"

View File

@@ -5,6 +5,7 @@
*
*/
#include "shared/source/built_ins/built_ins.h"
#include "shared/source/command_stream/command_stream_receiver.h"
#include "shared/source/helpers/aligned_memory.h"
#include "shared/source/helpers/ptr_math.h"
@@ -12,7 +13,6 @@
#include "shared/source/memory_manager/memory_manager.h"
#include "shared/source/os_interface/os_context.h"
#include "opencl/source/built_ins/built_ins.h"
#include "opencl/source/built_ins/builtins_dispatch_builder.h"
#include "opencl/source/command_queue/command_queue.h"
#include "opencl/source/helpers/dispatch_info.h"

View File

@@ -5,9 +5,9 @@
*
*/
#include "shared/source/built_ins/built_ins.h"
#include "shared/source/memory_manager/memory_constants.h"
#include "opencl/source/built_ins/built_ins.h"
#include "opencl/source/built_ins/builtins_dispatch_builder.h"
#include "opencl/source/event/event.h"
#include "opencl/source/helpers/dispatch_info.h"

View File

@@ -5,12 +5,12 @@
*
*/
#include "shared/source/built_ins/built_ins.h"
#include "shared/source/gmm_helper/gmm_helper.h"
#include "shared/source/helpers/cache_policy.h"
#include "shared/source/memory_manager/allocations_list.h"
#include "shared/test/unit_test/helpers/debug_manager_state_restore.h"
#include "opencl/source/built_ins/built_ins.h"
#include "opencl/source/built_ins/builtins_dispatch_builder.h"
#include "opencl/source/helpers/dispatch_info.h"
#include "opencl/test/unit_test/command_queue/enqueue_fixture.h"

View File

@@ -5,7 +5,8 @@
*
*/
#include "opencl/source/built_ins/built_ins.h"
#include "shared/source/built_ins/built_ins.h"
#include "opencl/source/built_ins/builtins_dispatch_builder.h"
#include "opencl/source/event/event.h"
#include "opencl/source/helpers/dispatch_info.h"

View File

@@ -5,10 +5,10 @@
*
*/
#include "shared/source/built_ins/built_ins.h"
#include "shared/source/memory_manager/allocations_list.h"
#include "shared/test/unit_test/helpers/debug_manager_state_restore.h"
#include "opencl/source/built_ins/built_ins.h"
#include "opencl/source/built_ins/builtins_dispatch_builder.h"
#include "opencl/source/helpers/dispatch_info.h"
#include "opencl/test/unit_test/command_queue/buffer_operations_fixture.h"

View File

@@ -5,7 +5,8 @@
*
*/
#include "opencl/source/built_ins/built_ins.h"
#include "shared/source/built_ins/built_ins.h"
#include "opencl/source/built_ins/builtins_dispatch_builder.h"
#include "opencl/source/command_queue/command_queue_hw.h"
#include "opencl/source/command_queue/enqueue_copy_image.h"

View File

@@ -5,7 +5,8 @@
*
*/
#include "opencl/source/built_ins/built_ins.h"
#include "shared/source/built_ins/built_ins.h"
#include "opencl/source/command_queue/command_queue_hw.h"
#include "opencl/source/command_queue/enqueue_barrier.h"
#include "opencl/source/command_queue/enqueue_marker.h"

View File

@@ -5,11 +5,11 @@
*
*/
#include "shared/source/built_ins/built_ins.h"
#include "shared/source/helpers/preamble.h"
#include "shared/source/memory_manager/graphics_allocation.h"
#include "shared/source/memory_manager/memory_constants.h"
#include "opencl/source/built_ins/built_ins.h"
#include "opencl/source/sharings/gl/gl_buffer.h"
#include "opencl/test/unit_test/command_queue/enqueue_fixture.h"
#include "opencl/test/unit_test/fixtures/hello_world_fixture.h"

View File

@@ -5,6 +5,7 @@
*
*/
#include "shared/source/built_ins/built_ins.h"
#include "shared/source/command_stream/command_stream_receiver.h"
#include "shared/source/command_stream/linear_stream.h"
#include "shared/source/command_stream/preemption.h"
@@ -20,7 +21,6 @@
#include "shared/test/unit_test/helpers/debug_manager_state_restore.h"
#include "shared/test/unit_test/helpers/ult_hw_helper.h"
#include "opencl/source/built_ins/built_ins.h"
#include "opencl/source/command_queue/command_queue_hw.h"
#include "opencl/source/command_queue/gpgpu_walker.h"
#include "opencl/source/event/user_event.h"

View File

@@ -5,6 +5,7 @@
*
*/
#include "shared/source/built_ins/built_ins.h"
#include "shared/source/command_stream/command_stream_receiver.h"
#include "shared/source/command_stream/linear_stream.h"
#include "shared/source/command_stream/preemption.h"
@@ -23,7 +24,6 @@
#include "shared/test/unit_test/helpers/debug_manager_state_restore.h"
#include "shared/test/unit_test/utilities/base_object_utils.h"
#include "opencl/source/built_ins/built_ins.h"
#include "opencl/source/command_queue/command_queue_hw.h"
#include "opencl/source/command_queue/gpgpu_walker.h"
#include "opencl/source/event/user_event.h"

View File

@@ -5,6 +5,7 @@
*
*/
#include "shared/source/built_ins/built_ins.h"
#include "shared/source/command_stream/preemption.h"
#include "shared/source/compiler_interface/compiler_interface.h"
#include "shared/source/device/device.h"
@@ -16,7 +17,6 @@
#include "shared/test/unit_test/utilities/destructor_counted.h"
#include "opencl/source/aub/aub_center.h"
#include "opencl/source/built_ins/built_ins.h"
#include "opencl/source/memory_manager/os_agnostic_memory_manager.h"
#include "opencl/source/platform/platform.h"
#include "opencl/source/source_level_debugger/source_level_debugger.h"

View File

@@ -5,9 +5,9 @@
*
*/
#include "shared/source/built_ins/built_ins.h"
#include "shared/test/unit_test/helpers/debug_manager_state_restore.h"
#include "opencl/source/built_ins/built_ins.h"
#include "opencl/source/command_queue/enqueue_kernel.h"
#include "opencl/source/device_queue/device_queue.h"
#include "opencl/source/scheduler/scheduler_kernel.h"

View File

@@ -7,9 +7,9 @@
#include "opencl/test/unit_test/fixtures/built_in_fixture.h"
#include "shared/source/built_ins/built_ins.h"
#include "shared/source/device/device.h"
#include "opencl/source/built_ins/built_ins.h"
#include "opencl/test/unit_test/global_environment.h"
#include "opencl/test/unit_test/helpers/kernel_binary_helper.h"
#include "opencl/test/unit_test/helpers/test_files.h"

View File

@@ -5,7 +5,8 @@
*
*/
#include "opencl/source/built_ins/built_ins.h"
#include "shared/source/built_ins/built_ins.h"
#include "opencl/source/command_queue/enqueue_kernel.h"
#include "opencl/source/command_queue/gpgpu_walker.h"
#include "opencl/source/device_queue/device_queue.h"

View File

@@ -5,10 +5,10 @@
*
*/
#include "shared/source/built_ins/built_ins.h"
#include "shared/source/command_stream/command_stream_receiver.h"
#include "shared/source/command_stream/linear_stream.h"
#include "opencl/source/built_ins/built_ins.h"
#include "opencl/source/command_queue/command_queue_hw.h"
#include "opencl/source/event/user_event.h"
#include "opencl/test/unit_test/fixtures/device_fixture.h"

View File

@@ -5,8 +5,9 @@
*
*/
#include "opencl/source/built_ins/built_ins.h"
#include "opencl/source/built_ins/sip.h"
#include "shared/source/built_ins/built_ins.h"
#include "shared/source/built_ins/sip.h"
#include "opencl/test/unit_test/global_environment.h"
#include "opencl/test/unit_test/helpers/test_files.h"
#include "opencl/test/unit_test/mocks/mock_device.h"

View File

@@ -5,7 +5,8 @@
*
*/
#include "opencl/source/built_ins/built_ins.h"
#include "shared/source/built_ins/built_ins.h"
#include "opencl/source/helpers/hardware_commands_helper.h"
#include "opencl/source/kernel/kernel.h"
#include "opencl/test/unit_test/fixtures/built_in_fixture.h"

View File

@@ -7,4 +7,4 @@
#include "opencl/test/unit_test/helpers/kernel_binary_helper.h"
const std::string KernelBinaryHelper::BUILT_INS("7030307152995455603");
const std::string KernelBinaryHelper::BUILT_INS("7206969092167061981");

View File

@@ -5,13 +5,13 @@
*
*/
#include "shared/source/built_ins/built_ins.h"
#include "shared/source/compiler_interface/compiler_interface.h"
#include "shared/source/helpers/aligned_memory.h"
#include "shared/source/image/image_surface_state.h"
#include "shared/source/os_interface/os_context.h"
#include "shared/test/unit_test/helpers/debug_manager_state_restore.h"
#include "opencl/source/built_ins/built_ins.h"
#include "opencl/source/helpers/mipmap.h"
#include "opencl/source/mem_obj/image.h"
#include "opencl/source/mem_obj/mem_obj_helper.h"

View File

@@ -6,7 +6,8 @@
*/
#pragma once
#include "opencl/source/built_ins/built_ins.h"
#include "shared/source/built_ins/built_ins.h"
#include "opencl/source/helpers/dispatch_info.h"
using namespace NEO;

View File

@@ -7,9 +7,10 @@
#pragma once
#include "opencl/source/built_ins/built_ins.h"
#include "shared/source/built_ins/built_ins.h"
#include "shared/source/built_ins/sip.h"
#include "opencl/source/built_ins/builtins_dispatch_builder.h"
#include "opencl/source/built_ins/sip.h"
#include "opencl/source/program/program.h"
#include <memory>

View File

@@ -7,11 +7,11 @@
#include "opencl/test/unit_test/mocks/mock_context.h"
#include "shared/source/built_ins/built_ins.h"
#include "shared/source/compiler_interface/compiler_interface.h"
#include "shared/source/memory_manager/deferred_deleter.h"
#include "shared/source/memory_manager/unified_memory_manager.h"
#include "opencl/source/built_ins/built_ins.h"
#include "opencl/source/command_queue/command_queue.h"
#include "opencl/source/memory_manager/os_agnostic_memory_manager.h"
#include "opencl/source/sharings/sharing.h"

View File

@@ -7,7 +7,8 @@
#pragma once
#include "opencl/source/built_ins/sip.h"
#include "shared/source/built_ins/sip.h"
#include "opencl/test/unit_test/mocks/mock_execution_environment.h"
#include <memory>

View File

@@ -349,7 +349,7 @@ TEST_F(OfflineCompilerTests, GoodParseBinToCharArray) {
familyNameWithType + "[10] = {\n"
" 0x40032302, 0x90800756, 0x05340301, 0x66097860, 0x101010ff, 0x40032302, 0x90800756, 0x05340301, \n"
" 0x66097860, 0xff000000};\n\n"
"#include \"opencl/source/built_ins/registry/built_ins_registry.h\"\n\n"
"#include \"shared/source/built_ins/registry/built_ins_registry.h\"\n\n"
"namespace NEO {\n"
"static RegisterEmbeddedResource registerSchedulerBin(\n"
" \"" +

View File

@@ -6,7 +6,8 @@
*/
#pragma once
#include "opencl/source/built_ins/built_ins.h"
#include "shared/source/built_ins/built_ins.h"
#include "opencl/test/unit_test/fixtures/context_fixture.h"
#include "opencl/test/unit_test/fixtures/device_fixture.h"
#include "opencl/test/unit_test/fixtures/program_fixture.h"

View File

@@ -5,40 +5,37 @@
*
*/
__kernel void fullCopy(__global const uint* src, __global uint* dst) {
__kernel void fullCopy(__global const uint *src, __global uint *dst) {
unsigned int gid = get_global_id(0);
uint4 loaded = vload4(gid, src);
vstore4(loaded, gid, dst);
}
__kernel void CopyBufferToBufferBytes(
const __global uchar* pSrc,
__global uchar* pDst,
const __global uchar *pSrc,
__global uchar *pDst,
uint srcOffsetInBytes,
uint dstOffsetInBytes,
uint bytesToRead )
{
pSrc += ( srcOffsetInBytes + get_global_id(0) );
pDst += ( dstOffsetInBytes + get_global_id(0) );
pDst[ 0 ] = pSrc[ 0 ];
uint bytesToRead) {
pSrc += (srcOffsetInBytes + get_global_id(0));
pDst += (dstOffsetInBytes + get_global_id(0));
pDst[0] = pSrc[0];
}
__kernel void CopyBufferToBufferLeftLeftover(
const __global uchar* pSrc,
__global uchar* pDst,
const __global uchar *pSrc,
__global uchar *pDst,
uint srcOffsetInBytes,
uint dstOffsetInBytes)
{
uint dstOffsetInBytes) {
unsigned int gid = get_global_id(0);
pDst[ gid + dstOffsetInBytes ] = pSrc[ gid + srcOffsetInBytes ];
pDst[gid + dstOffsetInBytes] = pSrc[gid + srcOffsetInBytes];
}
__kernel void CopyBufferToBufferMiddle(
const __global uint* pSrc,
__global uint* pDst,
const __global uint *pSrc,
__global uint *pDst,
uint srcOffsetInBytes,
uint dstOffsetInBytes)
{
uint dstOffsetInBytes) {
unsigned int gid = get_global_id(0);
pDst += dstOffsetInBytes >> 2;
pSrc += srcOffsetInBytes >> 2;
@@ -47,55 +44,49 @@ __kernel void CopyBufferToBufferMiddle(
}
__kernel void CopyBufferToBufferRightLeftover(
const __global uchar* pSrc,
__global uchar* pDst,
const __global uchar *pSrc,
__global uchar *pDst,
uint srcOffsetInBytes,
uint dstOffsetInBytes)
{
uint dstOffsetInBytes) {
unsigned int gid = get_global_id(0);
pDst[ gid + dstOffsetInBytes ] = pSrc[ gid + srcOffsetInBytes ];
pDst[gid + dstOffsetInBytes] = pSrc[gid + srcOffsetInBytes];
}
// assumption is local work size = pattern size
__kernel void FillBufferBytes(
__global uchar* pDst,
__global uchar *pDst,
uint dstOffsetInBytes,
const __global uchar* pPattern )
{
const __global uchar *pPattern) {
uint dstIndex = get_global_id(0) + dstOffsetInBytes;
uint srcIndex = get_local_id(0);
pDst[dstIndex] = pPattern[srcIndex];
}
__kernel void FillBufferLeftLeftover(
__global uchar* pDst,
__global uchar *pDst,
uint dstOffsetInBytes,
const __global uchar* pPattern,
const uint patternSizeInEls )
{
const __global uchar *pPattern,
const uint patternSizeInEls) {
uint gid = get_global_id(0);
pDst[ gid + dstOffsetInBytes ] = pPattern[ gid & (patternSizeInEls - 1) ];
pDst[gid + dstOffsetInBytes] = pPattern[gid & (patternSizeInEls - 1)];
}
__kernel void FillBufferMiddle(
__global uchar* pDst,
__global uchar *pDst,
uint dstOffsetInBytes,
const __global uint* pPattern,
const uint patternSizeInEls )
{
const __global uint *pPattern,
const uint patternSizeInEls) {
uint gid = get_global_id(0);
((__global uint*)(pDst + dstOffsetInBytes))[gid] = pPattern[ gid & (patternSizeInEls - 1) ];
((__global uint *)(pDst + dstOffsetInBytes))[gid] = pPattern[gid & (patternSizeInEls - 1)];
}
__kernel void FillBufferRightLeftover(
__global uchar* pDst,
__global uchar *pDst,
uint dstOffsetInBytes,
const __global uchar* pPattern,
const uint patternSizeInEls )
{
const __global uchar *pPattern,
const uint patternSizeInEls) {
uint gid = get_global_id(0);
pDst[ gid + dstOffsetInBytes ] = pPattern[ gid & (patternSizeInEls - 1) ];
pDst[gid + dstOffsetInBytes] = pPattern[gid & (patternSizeInEls - 1)];
}
__kernel void FillImage1d(
@@ -179,42 +170,40 @@ __kernel void CopyImageToImage3d(
//////////////////////////////////////////////////////////////////////////////
__kernel void CopyBufferRectBytes2d(
__global const char* src,
__global char* dst,
__global const char *src,
__global char *dst,
uint4 SrcOrigin,
uint4 DstOrigin,
uint2 SrcPitch,
uint2 DstPitch )
uint2 DstPitch)
{
int x = get_global_id(0);
int y = get_global_id(1);
uint LSrcOffset = x + SrcOrigin.x + ( ( y + SrcOrigin.y ) * SrcPitch.x );
uint LDstOffset = x + DstOrigin.x + ( ( y + DstOrigin.y ) * DstPitch.x );
*( dst + LDstOffset ) = *( src + LSrcOffset );
uint LSrcOffset = x + SrcOrigin.x + ((y + SrcOrigin.y) * SrcPitch.x);
uint LDstOffset = x + DstOrigin.x + ((y + DstOrigin.y) * DstPitch.x);
*(dst + LDstOffset) = *(src + LSrcOffset);
}
//////////////////////////////////////////////////////////////////////////////
__kernel void CopyBufferRectBytes3d(
__global const char* src,
__global char* dst,
uint4 SrcOrigin,
uint4 DstOrigin,
uint2 SrcPitch,
uint2 DstPitch )
{
int x = get_global_id(0);
int y = get_global_id(1);
int z = get_global_id(2);
uint LSrcOffset = x + SrcOrigin.x + ( ( y + SrcOrigin.y ) * SrcPitch.x ) + ( ( z + SrcOrigin.z ) * SrcPitch.y );
uint LDstOffset = x + DstOrigin.x + ( ( y + DstOrigin.y ) * DstPitch.x ) + ( ( z + DstOrigin.z ) * DstPitch.y );
*( dst + LDstOffset ) = *( src + LSrcOffset );
__global const char *src,
__global char *dst,
uint4 SrcOrigin,
uint4 DstOrigin,
uint2 SrcPitch,
uint2 DstPitch)
{
int x = get_global_id(0);
int y = get_global_id(1);
int z = get_global_id(2);
uint LSrcOffset = x + SrcOrigin.x + ((y + SrcOrigin.y) * SrcPitch.x) + ((z + SrcOrigin.z) * SrcPitch.y);
uint LDstOffset = x + DstOrigin.x + ((y + DstOrigin.y) * DstPitch.x) + ((z + DstOrigin.z) * DstPitch.y);
*(dst + LDstOffset) = *(src + LSrcOffset);
}
#pragma OPENCL EXTENSION cl_khr_3d_image_writes : enable
@@ -248,14 +237,13 @@ __kernel void CopyBufferToImage3d2Bytes(__global uchar *src,
uint4 c = (uint4)(0, 0, 0, 1);
if(( ulong )(src + srcOffset) & 0x00000001){
ushort upper = *((__global uchar*)(src + LOffset + x * 2 + 1));
ushort lower = *((__global uchar*)(src + LOffset + x * 2));
if ((ulong)(src + srcOffset) & 0x00000001) {
ushort upper = *((__global uchar *)(src + LOffset + x * 2 + 1));
ushort lower = *((__global uchar *)(src + LOffset + x * 2));
ushort combined = (upper << 8) | lower;
c.x = (uint)combined;
}
else{
c.x = (uint)(*(__global ushort*)(src + LOffset + x * 2));
} else {
c.x = (uint)(*(__global ushort *)(src + LOffset + x * 2));
}
write_imageui(output, dstCoord, c);
}
@@ -274,16 +262,15 @@ __kernel void CopyBufferToImage3d4Bytes(__global uchar *src,
uint4 c = (uint4)(0, 0, 0, 1);
if(( ulong )(src + srcOffset) & 0x00000003){
uint upper2 = *((__global uchar*)(src + LOffset + x * 4 + 3));
uint upper = *((__global uchar*)(src + LOffset + x * 4 + 2));
uint lower2 = *((__global uchar*)(src + LOffset + x * 4 + 1));
uint lower = *((__global uchar*)(src + LOffset + x * 4));
if ((ulong)(src + srcOffset) & 0x00000003) {
uint upper2 = *((__global uchar *)(src + LOffset + x * 4 + 3));
uint upper = *((__global uchar *)(src + LOffset + x * 4 + 2));
uint lower2 = *((__global uchar *)(src + LOffset + x * 4 + 1));
uint lower = *((__global uchar *)(src + LOffset + x * 4));
uint combined = (upper2 << 24) | (upper << 16) | (lower2 << 8) | lower;
c.x = combined;
}
else{
c.x = (*(__global uint*)(src + LOffset + x * 4));
} else {
c.x = (*(__global uint *)(src + LOffset + x * 4));
}
write_imageui(output, dstCoord, c);
}
@@ -300,24 +287,23 @@ __kernel void CopyBufferToImage3d8Bytes(__global uchar *src,
int4 dstCoord = (int4)(x, y, z, 0) + dstOffset;
uint LOffset = srcOffset + (y * Pitch.x) + (z * Pitch.y);
uint2 c = (uint2)(0, 0);//*((__global uint2*)(src + LOffset + x * 8));
uint2 c = (uint2)(0, 0); //*((__global uint2*)(src + LOffset + x * 8));
if(( ulong )(src + srcOffset) & 0x00000007){
uint upper2 = *((__global uchar*)(src + LOffset + x * 8 + 3));
uint upper = *((__global uchar*)(src + LOffset + x * 8 + 2));
uint lower2 = *((__global uchar*)(src + LOffset + x * 8 + 1));
uint lower = *((__global uchar*)(src + LOffset + x * 8));
if ((ulong)(src + srcOffset) & 0x00000007) {
uint upper2 = *((__global uchar *)(src + LOffset + x * 8 + 3));
uint upper = *((__global uchar *)(src + LOffset + x * 8 + 2));
uint lower2 = *((__global uchar *)(src + LOffset + x * 8 + 1));
uint lower = *((__global uchar *)(src + LOffset + x * 8));
uint combined = (upper2 << 24) | (upper << 16) | (lower2 << 8) | lower;
c.x = combined;
upper2 = *((__global uchar*)(src + LOffset + x * 8 + 7));
upper = *((__global uchar*)(src + LOffset + x * 8 + 6));
lower2 = *((__global uchar*)(src + LOffset + x * 8 + 5));
lower = *((__global uchar*)(src + LOffset + x * 8 + 4));
upper2 = *((__global uchar *)(src + LOffset + x * 8 + 7));
upper = *((__global uchar *)(src + LOffset + x * 8 + 6));
lower2 = *((__global uchar *)(src + LOffset + x * 8 + 5));
lower = *((__global uchar *)(src + LOffset + x * 8 + 4));
combined = ((uint)upper2 << 24) | ((uint)upper << 16) | ((uint)lower2 << 8) | lower;
c.y = combined;
}
else{
c = *((__global uint2*)(src + LOffset + x * 8));
} else {
c = *((__global uint2 *)(src + LOffset + x * 8));
}
write_imageui(output, dstCoord, (uint4)(c.x, c.y, 0, 1));
@@ -337,33 +323,32 @@ __kernel void CopyBufferToImage3d16Bytes(__global uchar *src,
uint4 c = (uint4)(0, 0, 0, 0);
if(( ulong )(src + srcOffset) & 0x0000000f){
uint upper2 = *((__global uchar*)(src + LOffset + x * 16 + 3));
uint upper = *((__global uchar*)(src + LOffset + x * 16 + 2));
uint lower2 = *((__global uchar*)(src + LOffset + x * 16 + 1));
uint lower = *((__global uchar*)(src + LOffset + x * 16));
if ((ulong)(src + srcOffset) & 0x0000000f) {
uint upper2 = *((__global uchar *)(src + LOffset + x * 16 + 3));
uint upper = *((__global uchar *)(src + LOffset + x * 16 + 2));
uint lower2 = *((__global uchar *)(src + LOffset + x * 16 + 1));
uint lower = *((__global uchar *)(src + LOffset + x * 16));
uint combined = (upper2 << 24) | (upper << 16) | (lower2 << 8) | lower;
c.x = combined;
upper2 = *((__global uchar*)(src + LOffset + x * 16 + 7));
upper = *((__global uchar*)(src + LOffset + x * 16 + 6));
lower2 = *((__global uchar*)(src + LOffset + x * 16 + 5));
lower = *((__global uchar*)(src + LOffset + x * 16 + 4));
upper2 = *((__global uchar *)(src + LOffset + x * 16 + 7));
upper = *((__global uchar *)(src + LOffset + x * 16 + 6));
lower2 = *((__global uchar *)(src + LOffset + x * 16 + 5));
lower = *((__global uchar *)(src + LOffset + x * 16 + 4));
combined = (upper2 << 24) | (upper << 16) | (lower2 << 8) | lower;
c.y = combined;
upper2 = *((__global uchar*)(src + LOffset + x * 16 + 11));
upper = *((__global uchar*)(src + LOffset + x * 16 + 10));
lower2 = *((__global uchar*)(src + LOffset + x * 16 + 9));
lower = *((__global uchar*)(src + LOffset + x * 16 + 8));
upper2 = *((__global uchar *)(src + LOffset + x * 16 + 11));
upper = *((__global uchar *)(src + LOffset + x * 16 + 10));
lower2 = *((__global uchar *)(src + LOffset + x * 16 + 9));
lower = *((__global uchar *)(src + LOffset + x * 16 + 8));
combined = (upper2 << 24) | (upper << 16) | (lower2 << 8) | lower;
c.z = combined;
upper2 = *((__global uchar*)(src + LOffset + x * 16 + 15));
upper = *((__global uchar*)(src + LOffset + x * 16 + 14));
lower2 = *((__global uchar*)(src + LOffset + x * 16 + 13));
lower = *((__global uchar*)(src + LOffset + x * 16 + 12));
upper2 = *((__global uchar *)(src + LOffset + x * 16 + 15));
upper = *((__global uchar *)(src + LOffset + x * 16 + 14));
lower2 = *((__global uchar *)(src + LOffset + x * 16 + 13));
lower = *((__global uchar *)(src + LOffset + x * 16 + 12));
combined = (upper2 << 24) | (upper << 16) | (lower2 << 8) | lower;
c.w = combined;
}
else{
} else {
c = *((__global uint4 *)(src + LOffset + x * 16));
}
@@ -397,15 +382,14 @@ __kernel void CopyImage3dToBuffer2Bytes(__read_only image3d_t input,
const int4 srcCoord = (int4)(x, y, z, 0) + srcOffset;
uint DstOffset = dstOffset + (y * Pitch.x) + (z * Pitch.y);
uint4 c = read_imageui(input, srcCoord);
if(( ulong )(dst + dstOffset) & 0x00000001){
*((__global uchar*)(dst + DstOffset + x * 2 + 1)) = convert_uchar_sat((c.x >> 8 ) & 0xff);
*((__global uchar*)(dst + DstOffset + x * 2)) = convert_uchar_sat(c.x & 0xff);
}
else{
*((__global ushort*)(dst + DstOffset + x * 2)) = convert_ushort_sat(c.x);
if ((ulong)(dst + dstOffset) & 0x00000001) {
*((__global uchar *)(dst + DstOffset + x * 2 + 1)) = convert_uchar_sat((c.x >> 8) & 0xff);
*((__global uchar *)(dst + DstOffset + x * 2)) = convert_uchar_sat(c.x & 0xff);
} else {
*((__global ushort *)(dst + DstOffset + x * 2)) = convert_ushort_sat(c.x);
}
}
@@ -423,14 +407,13 @@ __kernel void CopyImage3dToBuffer4Bytes(__read_only image3d_t input,
uint4 c = read_imageui(input, srcCoord);
if(( ulong )(dst + dstOffset) & 0x00000003){
*((__global uchar*)(dst + DstOffset + x * 4 + 3)) = convert_uchar_sat((c.x >> 24 ) & 0xff);
*((__global uchar*)(dst + DstOffset + x * 4 + 2)) = convert_uchar_sat((c.x >> 16 ) & 0xff);
*((__global uchar*)(dst + DstOffset + x * 4 + 1)) = convert_uchar_sat((c.x >> 8 ) & 0xff);
*((__global uchar*)(dst + DstOffset + x * 4)) = convert_uchar_sat(c.x & 0xff);
}
else{
*((__global uint*)(dst + DstOffset + x * 4)) = c.x;
if ((ulong)(dst + dstOffset) & 0x00000003) {
*((__global uchar *)(dst + DstOffset + x * 4 + 3)) = convert_uchar_sat((c.x >> 24) & 0xff);
*((__global uchar *)(dst + DstOffset + x * 4 + 2)) = convert_uchar_sat((c.x >> 16) & 0xff);
*((__global uchar *)(dst + DstOffset + x * 4 + 1)) = convert_uchar_sat((c.x >> 8) & 0xff);
*((__global uchar *)(dst + DstOffset + x * 4)) = convert_uchar_sat(c.x & 0xff);
} else {
*((__global uint *)(dst + DstOffset + x * 4)) = c.x;
}
}
@@ -448,19 +431,18 @@ __kernel void CopyImage3dToBuffer8Bytes(__read_only image3d_t input,
uint4 c = read_imageui(input, srcCoord);
if(( ulong )(dst + dstOffset) & 0x00000007){
*((__global uchar*)(dst + DstOffset + x * 8 + 3)) = convert_uchar_sat((c.x >> 24 ) & 0xff);
*((__global uchar*)(dst + DstOffset + x * 8 + 2)) = convert_uchar_sat((c.x >> 16 ) & 0xff);
*((__global uchar*)(dst + DstOffset + x * 8 + 1)) = convert_uchar_sat((c.x >> 8 ) & 0xff);
*((__global uchar*)(dst + DstOffset + x * 8)) = convert_uchar_sat(c.x & 0xff);
*((__global uchar*)(dst + DstOffset + x * 8 + 7)) = convert_uchar_sat((c.y >> 24 ) & 0xff);
*((__global uchar*)(dst + DstOffset + x * 8 + 6)) = convert_uchar_sat((c.y >> 16 ) & 0xff);
*((__global uchar*)(dst + DstOffset + x * 8 + 5)) = convert_uchar_sat((c.y >> 8 ) & 0xff);
*((__global uchar*)(dst + DstOffset + x * 8 + 4)) = convert_uchar_sat(c.y & 0xff);
}
else{
uint2 d = (uint2)(c.x,c.y);
*((__global uint2*)(dst + DstOffset + x * 8)) = d;
if ((ulong)(dst + dstOffset) & 0x00000007) {
*((__global uchar *)(dst + DstOffset + x * 8 + 3)) = convert_uchar_sat((c.x >> 24) & 0xff);
*((__global uchar *)(dst + DstOffset + x * 8 + 2)) = convert_uchar_sat((c.x >> 16) & 0xff);
*((__global uchar *)(dst + DstOffset + x * 8 + 1)) = convert_uchar_sat((c.x >> 8) & 0xff);
*((__global uchar *)(dst + DstOffset + x * 8)) = convert_uchar_sat(c.x & 0xff);
*((__global uchar *)(dst + DstOffset + x * 8 + 7)) = convert_uchar_sat((c.y >> 24) & 0xff);
*((__global uchar *)(dst + DstOffset + x * 8 + 6)) = convert_uchar_sat((c.y >> 16) & 0xff);
*((__global uchar *)(dst + DstOffset + x * 8 + 5)) = convert_uchar_sat((c.y >> 8) & 0xff);
*((__global uchar *)(dst + DstOffset + x * 8 + 4)) = convert_uchar_sat(c.y & 0xff);
} else {
uint2 d = (uint2)(c.x, c.y);
*((__global uint2 *)(dst + DstOffset + x * 8)) = d;
}
}
@@ -478,25 +460,24 @@ __kernel void CopyImage3dToBuffer16Bytes(__read_only image3d_t input,
const uint4 c = read_imageui(input, srcCoord);
if(( ulong )(dst + dstOffset) & 0x0000000f){
*((__global uchar*)(dst + DstOffset + x * 16 + 3)) = convert_uchar_sat((c.x >> 24 ) & 0xff);
*((__global uchar*)(dst + DstOffset + x * 16 + 2)) = convert_uchar_sat((c.x >> 16 ) & 0xff);
*((__global uchar*)(dst + DstOffset + x * 16 + 1)) = convert_uchar_sat((c.x >> 8 ) & 0xff);
*((__global uchar*)(dst + DstOffset + x * 16)) = convert_uchar_sat(c.x & 0xff);
*((__global uchar*)(dst + DstOffset + x * 16 + 7)) = convert_uchar_sat((c.y >> 24 ) & 0xff);
*((__global uchar*)(dst + DstOffset + x * 16 + 6)) = convert_uchar_sat((c.y >> 16 ) & 0xff);
*((__global uchar*)(dst + DstOffset + x * 16 + 5)) = convert_uchar_sat((c.y >> 8 ) & 0xff);
*((__global uchar*)(dst + DstOffset + x * 16 + 4)) = convert_uchar_sat(c.y & 0xff);
*((__global uchar*)(dst + DstOffset + x * 16 + 11)) = convert_uchar_sat((c.z >> 24 ) & 0xff);
*((__global uchar*)(dst + DstOffset + x * 16 + 10)) = convert_uchar_sat((c.z >> 16 ) & 0xff);
*((__global uchar*)(dst + DstOffset + x * 16 + 9)) = convert_uchar_sat((c.z >> 8 ) & 0xff);
*((__global uchar*)(dst + DstOffset + x * 16 + 8)) = convert_uchar_sat(c.z & 0xff);
*((__global uchar*)(dst + DstOffset + x * 16 + 15)) = convert_uchar_sat((c.w >> 24 ) & 0xff);
*((__global uchar*)(dst + DstOffset + x * 16 + 14)) = convert_uchar_sat((c.w >> 16 ) & 0xff);
*((__global uchar*)(dst + DstOffset + x * 16 + 13)) = convert_uchar_sat((c.w >> 8 ) & 0xff);
*((__global uchar*)(dst + DstOffset + x * 16 + 12)) = convert_uchar_sat(c.w & 0xff);
}
else{
*(__global uint4*)(dst + DstOffset + x * 16) = c;
if ((ulong)(dst + dstOffset) & 0x0000000f) {
*((__global uchar *)(dst + DstOffset + x * 16 + 3)) = convert_uchar_sat((c.x >> 24) & 0xff);
*((__global uchar *)(dst + DstOffset + x * 16 + 2)) = convert_uchar_sat((c.x >> 16) & 0xff);
*((__global uchar *)(dst + DstOffset + x * 16 + 1)) = convert_uchar_sat((c.x >> 8) & 0xff);
*((__global uchar *)(dst + DstOffset + x * 16)) = convert_uchar_sat(c.x & 0xff);
*((__global uchar *)(dst + DstOffset + x * 16 + 7)) = convert_uchar_sat((c.y >> 24) & 0xff);
*((__global uchar *)(dst + DstOffset + x * 16 + 6)) = convert_uchar_sat((c.y >> 16) & 0xff);
*((__global uchar *)(dst + DstOffset + x * 16 + 5)) = convert_uchar_sat((c.y >> 8) & 0xff);
*((__global uchar *)(dst + DstOffset + x * 16 + 4)) = convert_uchar_sat(c.y & 0xff);
*((__global uchar *)(dst + DstOffset + x * 16 + 11)) = convert_uchar_sat((c.z >> 24) & 0xff);
*((__global uchar *)(dst + DstOffset + x * 16 + 10)) = convert_uchar_sat((c.z >> 16) & 0xff);
*((__global uchar *)(dst + DstOffset + x * 16 + 9)) = convert_uchar_sat((c.z >> 8) & 0xff);
*((__global uchar *)(dst + DstOffset + x * 16 + 8)) = convert_uchar_sat(c.z & 0xff);
*((__global uchar *)(dst + DstOffset + x * 16 + 15)) = convert_uchar_sat((c.w >> 24) & 0xff);
*((__global uchar *)(dst + DstOffset + x * 16 + 14)) = convert_uchar_sat((c.w >> 16) & 0xff);
*((__global uchar *)(dst + DstOffset + x * 16 + 13)) = convert_uchar_sat((c.w >> 8) & 0xff);
*((__global uchar *)(dst + DstOffset + x * 16 + 12)) = convert_uchar_sat(c.w & 0xff);
} else {
*(__global uint4 *)(dst + DstOffset + x * 16) = c;
}
}