mirror of
https://github.com/intel/compute-runtime.git
synced 2026-01-05 18:06:32 +08:00
Move built ins to share directory
Change-Id: I740a349a0f15229cd356fffe996932029bf0f98b Signed-off-by: Maciej Plewka <maciej.plewka@intel.com>
This commit is contained in:
committed by
sys_ocldev
parent
0e85ccf084
commit
357fdc2e65
@@ -7,6 +7,7 @@
|
||||
|
||||
#include "api.h"
|
||||
|
||||
#include "shared/source/built_ins/built_ins.h"
|
||||
#include "shared/source/command_stream/command_stream_receiver.h"
|
||||
#include "shared/source/debug_settings/debug_settings_manager.h"
|
||||
#include "shared/source/execution_environment/root_device_environment.h"
|
||||
@@ -22,7 +23,6 @@
|
||||
#include "opencl/source/accelerators/intel_motion_estimation.h"
|
||||
#include "opencl/source/api/additional_extensions.h"
|
||||
#include "opencl/source/aub/aub_center.h"
|
||||
#include "opencl/source/built_ins/built_ins.h"
|
||||
#include "opencl/source/built_ins/vme_builtin.h"
|
||||
#include "opencl/source/command_queue/command_queue.h"
|
||||
#include "opencl/source/context/context.h"
|
||||
|
||||
@@ -9,43 +9,18 @@ set(RUNTIME_SRCS_BUILT_INS
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/aux_translation_builtin.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/builtins_dispatch_builder.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/builtins_dispatch_builder.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/built_ins_storage.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/built_ins.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/built_ins.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/built_in_ops_base.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/built_in_ops_vme.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/built_ins.inl
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/sip.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/sip.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}${BRANCH_DIR_SUFFIX}/unknown_built_in_name.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/vme_builtin.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/vme_builtin.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/vme_dispatch_builder.h
|
||||
)
|
||||
|
||||
add_subdirectory(builtinops)
|
||||
|
||||
target_sources(${NEO_STATIC_LIB_NAME} PRIVATE ${RUNTIME_SRCS_BUILT_INS})
|
||||
set_property(GLOBAL PROPERTY RUNTIME_SRCS_BUILT_INS ${RUNTIME_SRCS_BUILT_INS})
|
||||
|
||||
set(RUNTIME_SRCS_BUILT_IN_KERNELS
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/kernels/aux_translation.builtin_kernel
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/kernels/copy_buffer_rect.builtin_kernel
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/kernels/copy_buffer_rect_stateless.builtin_kernel
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/kernels/copy_buffer_to_buffer.builtin_kernel
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/kernels/copy_buffer_to_buffer_stateless.builtin_kernel
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/kernels/copy_buffer_to_image3d.builtin_kernel
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/kernels/copy_buffer_to_image3d_stateless.builtin_kernel
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/kernels/copy_image3d_to_buffer.builtin_kernel
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/kernels/copy_image3d_to_buffer_stateless.builtin_kernel
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/kernels/copy_image_to_image1d.builtin_kernel
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/kernels/copy_image_to_image2d.builtin_kernel
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/kernels/copy_image_to_image3d.builtin_kernel
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/kernels/fill_buffer.builtin_kernel
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/kernels/fill_buffer_stateless.builtin_kernel
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/kernels/fill_image1d.builtin_kernel
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/kernels/fill_image2d.builtin_kernel
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/kernels/fill_image3d.builtin_kernel
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/kernels/vme_block_advanced_motion_estimate_bidirectional_check_intel.builtin_kernel
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/kernels/vme_block_advanced_motion_estimate_check_intel.builtin_kernel
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/kernels/vme_block_motion_estimate_intel.builtin_kernel
|
||||
@@ -53,6 +28,9 @@ set(RUNTIME_SRCS_BUILT_IN_KERNELS
|
||||
|
||||
target_sources(${NEO_STATIC_LIB_NAME} PRIVATE ${RUNTIME_SRCS_BUILT_IN_KERNELS})
|
||||
|
||||
if(NOT (TARGET ${BUILTINS_BINARIES_LIB_NAME}))
|
||||
include(builtins_binary.cmake)
|
||||
if(NOT (TARGET ${BUILTINS_VME_LIB_NAME}))
|
||||
add_subdirectory(registry)
|
||||
if(COMPILE_BUILT_INS)
|
||||
add_subdirectory(kernels)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
@@ -6,9 +6,9 @@
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
#include "shared/source/built_ins/built_ins.h"
|
||||
#include "shared/source/helpers/hw_helper.h"
|
||||
|
||||
#include "opencl/source/built_ins/built_ins.h"
|
||||
#include "opencl/source/built_ins/builtins_dispatch_builder.h"
|
||||
#include "opencl/source/helpers/dispatch_info_builder.h"
|
||||
|
||||
|
||||
@@ -1,37 +0,0 @@
|
||||
/*
|
||||
* Copyright (C) 2019-2020 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
#include <cstdint>
|
||||
|
||||
namespace NEO {
|
||||
namespace EBuiltInOps {
|
||||
|
||||
using Type = uint32_t;
|
||||
|
||||
constexpr Type AuxTranslation{0};
|
||||
constexpr Type CopyBufferToBuffer{1};
|
||||
constexpr Type CopyBufferToBufferStateless{2};
|
||||
constexpr Type CopyBufferRect{3};
|
||||
constexpr Type CopyBufferRectStateless{4};
|
||||
constexpr Type FillBuffer{5};
|
||||
constexpr Type FillBufferStateless{6};
|
||||
constexpr Type CopyBufferToImage3d{7};
|
||||
constexpr Type CopyBufferToImage3dStateless{8};
|
||||
constexpr Type CopyImage3dToBuffer{9};
|
||||
constexpr Type CopyImage3dToBufferStateless{10};
|
||||
constexpr Type CopyImageToImage1d{11};
|
||||
constexpr Type CopyImageToImage2d{12};
|
||||
constexpr Type CopyImageToImage3d{13};
|
||||
constexpr Type FillImage1d{14};
|
||||
constexpr Type FillImage2d{15};
|
||||
constexpr Type FillImage3d{16};
|
||||
|
||||
constexpr Type MaxBaseValue{16};
|
||||
constexpr Type COUNT{64};
|
||||
} // namespace EBuiltInOps
|
||||
} // namespace NEO
|
||||
@@ -1,94 +0,0 @@
|
||||
/*
|
||||
* Copyright (C) 2017-2020 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#include "opencl/source/built_ins/built_ins.h"
|
||||
|
||||
#include "shared/source/compiler_interface/compiler_interface.h"
|
||||
#include "shared/source/helpers/basic_math.h"
|
||||
#include "shared/source/helpers/debug_helpers.h"
|
||||
|
||||
#include "opencl/source/built_ins/aux_translation_builtin.h"
|
||||
#include "opencl/source/built_ins/sip.h"
|
||||
#include "opencl/source/device/cl_device.h"
|
||||
#include "opencl/source/helpers/built_ins_helper.h"
|
||||
#include "opencl/source/helpers/convert_color.h"
|
||||
#include "opencl/source/helpers/dispatch_info_builder.h"
|
||||
#include "opencl/source/kernel/kernel.h"
|
||||
#include "opencl/source/mem_obj/image.h"
|
||||
#include "opencl/source/program/program.h"
|
||||
|
||||
#include "compiler_options.h"
|
||||
|
||||
#include <cstdint>
|
||||
#include <sstream>
|
||||
|
||||
namespace NEO {
|
||||
|
||||
BuiltIns::BuiltIns() {
|
||||
builtinsLib.reset(new BuiltinsLib());
|
||||
}
|
||||
|
||||
BuiltIns::~BuiltIns() = default;
|
||||
|
||||
const SipKernel &BuiltIns::getSipKernel(SipKernelType type, Device &device) {
|
||||
uint32_t kernelId = static_cast<uint32_t>(type);
|
||||
UNRECOVERABLE_IF(kernelId >= static_cast<uint32_t>(SipKernelType::COUNT));
|
||||
auto &sipBuiltIn = this->sipKernels[kernelId];
|
||||
|
||||
auto initializer = [&] {
|
||||
cl_int retVal = CL_SUCCESS;
|
||||
|
||||
std::vector<char> sipBinary;
|
||||
auto compilerInteface = device.getExecutionEnvironment()->getCompilerInterface();
|
||||
UNRECOVERABLE_IF(compilerInteface == nullptr);
|
||||
|
||||
auto ret = compilerInteface->getSipKernelBinary(device, type, sipBinary);
|
||||
|
||||
UNRECOVERABLE_IF(ret != TranslationOutput::ErrorCode::Success);
|
||||
UNRECOVERABLE_IF(sipBinary.size() == 0);
|
||||
auto program = createProgramForSip(*device.getExecutionEnvironment(),
|
||||
nullptr,
|
||||
sipBinary,
|
||||
sipBinary.size(),
|
||||
&retVal,
|
||||
&device);
|
||||
DEBUG_BREAK_IF(retVal != CL_SUCCESS);
|
||||
UNRECOVERABLE_IF(program == nullptr);
|
||||
|
||||
program->setDevice(&device);
|
||||
|
||||
retVal = program->processGenBinary();
|
||||
DEBUG_BREAK_IF(retVal != CL_SUCCESS);
|
||||
|
||||
sipBuiltIn.first.reset(new SipKernel(type, program));
|
||||
};
|
||||
std::call_once(sipBuiltIn.second, initializer);
|
||||
UNRECOVERABLE_IF(sipBuiltIn.first == nullptr);
|
||||
return *sipBuiltIn.first;
|
||||
}
|
||||
|
||||
BuiltInOwnershipWrapper::BuiltInOwnershipWrapper(BuiltinDispatchInfoBuilder &inputBuilder, Context *context) {
|
||||
takeOwnership(inputBuilder, context);
|
||||
}
|
||||
BuiltInOwnershipWrapper::~BuiltInOwnershipWrapper() {
|
||||
if (builder) {
|
||||
for (auto &kernel : builder->peekUsedKernels()) {
|
||||
kernel->setContext(nullptr);
|
||||
kernel->releaseOwnership();
|
||||
}
|
||||
}
|
||||
}
|
||||
void BuiltInOwnershipWrapper::takeOwnership(BuiltinDispatchInfoBuilder &inputBuilder, Context *context) {
|
||||
UNRECOVERABLE_IF(builder);
|
||||
builder = &inputBuilder;
|
||||
for (auto &kernel : builder->peekUsedKernels()) {
|
||||
kernel->takeOwnership();
|
||||
kernel->setContext(context);
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace NEO
|
||||
@@ -1,214 +0,0 @@
|
||||
/*
|
||||
* Copyright (C) 2017-2020 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
#include "shared/source/built_ins/sip_kernel_type.h"
|
||||
#include "shared/source/helpers/debug_helpers.h"
|
||||
#include "shared/source/helpers/non_copyable_or_moveable.h"
|
||||
#include "shared/source/helpers/vec.h"
|
||||
|
||||
#include "CL/cl.h"
|
||||
#include "built_in_ops.h"
|
||||
#include "compiler_options.h"
|
||||
|
||||
#include <array>
|
||||
#include <cstdint>
|
||||
#include <fstream>
|
||||
#include <memory>
|
||||
#include <mutex>
|
||||
#include <string>
|
||||
#include <tuple>
|
||||
#include <unordered_map>
|
||||
#include <vector>
|
||||
|
||||
namespace NEO {
|
||||
typedef std::vector<char> BuiltinResourceT;
|
||||
|
||||
class Context;
|
||||
class Device;
|
||||
class Kernel;
|
||||
struct KernelInfo;
|
||||
struct MultiDispatchInfo;
|
||||
class Program;
|
||||
class SchedulerKernel;
|
||||
class SipKernel;
|
||||
|
||||
static constexpr ConstStringRef mediaKernelsBuildOptionsList[] = {
|
||||
"-D cl_intel_device_side_advanced_vme_enable",
|
||||
"-D cl_intel_device_side_avc_vme_enable",
|
||||
"-D cl_intel_device_side_vme_enable",
|
||||
"-D cl_intel_media_block_io",
|
||||
CompilerOptions::fastRelaxedMath};
|
||||
|
||||
static constexpr CompilerOptions::ConstConcatenation<> mediaKernelsBuildOptions{mediaKernelsBuildOptionsList};
|
||||
|
||||
BuiltinResourceT createBuiltinResource(const char *ptr, size_t size);
|
||||
BuiltinResourceT createBuiltinResource(const BuiltinResourceT &r);
|
||||
std::string createBuiltinResourceName(EBuiltInOps::Type builtin, const std::string &extension,
|
||||
const std::string &platformName = "", uint32_t deviceRevId = 0);
|
||||
std::string joinPath(const std::string &lhs, const std::string &rhs);
|
||||
const char *getBuiltinAsString(EBuiltInOps::Type builtin);
|
||||
const char *getUnknownBuiltinAsString(EBuiltInOps::Type builtin);
|
||||
const char *getAdditionalBuiltinAsString(EBuiltInOps::Type builtin);
|
||||
|
||||
class Storage {
|
||||
public:
|
||||
Storage(const std::string &rootPath)
|
||||
: rootPath(rootPath) {
|
||||
}
|
||||
|
||||
virtual ~Storage() = default;
|
||||
|
||||
BuiltinResourceT load(const std::string &resourceName);
|
||||
|
||||
protected:
|
||||
virtual BuiltinResourceT loadImpl(const std::string &fullResourceName) = 0;
|
||||
|
||||
std::string rootPath;
|
||||
};
|
||||
|
||||
class FileStorage : public Storage {
|
||||
public:
|
||||
FileStorage(const std::string &rootPath = "")
|
||||
: Storage(rootPath) {
|
||||
}
|
||||
|
||||
protected:
|
||||
BuiltinResourceT loadImpl(const std::string &fullResourceName) override;
|
||||
};
|
||||
|
||||
struct EmbeddedStorageRegistry {
|
||||
static EmbeddedStorageRegistry &getInstance() {
|
||||
static EmbeddedStorageRegistry gsr;
|
||||
return gsr;
|
||||
}
|
||||
|
||||
void store(const std::string &name, BuiltinResourceT &&resource) {
|
||||
resources.emplace(name, BuiltinResourceT(std::move(resource)));
|
||||
}
|
||||
|
||||
const BuiltinResourceT *get(const std::string &name) const;
|
||||
|
||||
private:
|
||||
using ResourcesContainer = std::unordered_map<std::string, BuiltinResourceT>;
|
||||
ResourcesContainer resources;
|
||||
};
|
||||
|
||||
class EmbeddedStorage : public Storage {
|
||||
public:
|
||||
EmbeddedStorage(const std::string &rootPath)
|
||||
: Storage(rootPath) {
|
||||
}
|
||||
|
||||
protected:
|
||||
BuiltinResourceT loadImpl(const std::string &fullResourceName) override;
|
||||
};
|
||||
|
||||
struct BuiltinCode {
|
||||
enum class ECodeType {
|
||||
Any = 0, // for requesting "any" code available - priorities as below
|
||||
Binary = 1, // ISA - highest priority
|
||||
Intermediate = 2, // SPIR/LLVM - medium prioroty
|
||||
Source = 3, // OCL C - lowest priority
|
||||
COUNT,
|
||||
INVALID
|
||||
};
|
||||
|
||||
static const char *getExtension(ECodeType ct) {
|
||||
switch (ct) {
|
||||
default:
|
||||
return "";
|
||||
case ECodeType::Binary:
|
||||
return ".bin";
|
||||
case ECodeType::Intermediate:
|
||||
return ".bc";
|
||||
case ECodeType::Source:
|
||||
return ".cl";
|
||||
}
|
||||
}
|
||||
|
||||
ECodeType type;
|
||||
BuiltinResourceT resource;
|
||||
Device *targetDevice;
|
||||
};
|
||||
|
||||
class BuiltinsLib {
|
||||
public:
|
||||
BuiltinsLib();
|
||||
BuiltinCode getBuiltinCode(EBuiltInOps::Type builtin, BuiltinCode::ECodeType requestedCodeType, Device &device);
|
||||
|
||||
static std::unique_ptr<Program> createProgramFromCode(const BuiltinCode &bc, Device &device);
|
||||
|
||||
protected:
|
||||
BuiltinResourceT getBuiltinResource(EBuiltInOps::Type builtin, BuiltinCode::ECodeType requestedCodeType, Device &device);
|
||||
|
||||
using StoragesContainerT = std::vector<std::unique_ptr<Storage>>;
|
||||
StoragesContainerT allStorages; // sorted by priority allStorages[0] will be checked before allStorages[1], etc.
|
||||
|
||||
std::mutex mutex;
|
||||
};
|
||||
|
||||
struct BuiltInKernel {
|
||||
const char *pSource = nullptr;
|
||||
Program *pProgram = nullptr;
|
||||
std::once_flag programIsInitialized; // guard for creating+building the program
|
||||
Kernel *pKernel = nullptr;
|
||||
|
||||
BuiltInKernel() {
|
||||
}
|
||||
};
|
||||
|
||||
class BuiltinDispatchInfoBuilder;
|
||||
|
||||
class BuiltIns {
|
||||
public:
|
||||
std::pair<std::unique_ptr<BuiltinDispatchInfoBuilder>, std::once_flag> BuiltinOpsBuilders[static_cast<uint32_t>(EBuiltInOps::COUNT)];
|
||||
BuiltIns();
|
||||
virtual ~BuiltIns();
|
||||
|
||||
MOCKABLE_VIRTUAL const SipKernel &getSipKernel(SipKernelType type, Device &device);
|
||||
|
||||
BuiltinsLib &getBuiltinsLib() {
|
||||
DEBUG_BREAK_IF(!builtinsLib.get());
|
||||
return *builtinsLib;
|
||||
}
|
||||
|
||||
void setCacheingEnableState(bool enableCacheing) {
|
||||
this->enableCacheing = enableCacheing;
|
||||
}
|
||||
|
||||
bool isCacheingEnabled() const {
|
||||
return this->enableCacheing;
|
||||
}
|
||||
|
||||
protected:
|
||||
// sip builtins
|
||||
std::pair<std::unique_ptr<SipKernel>, std::once_flag> sipKernels[static_cast<uint32_t>(SipKernelType::COUNT)];
|
||||
|
||||
std::unique_ptr<BuiltinsLib> builtinsLib;
|
||||
|
||||
using ProgramsContainerT = std::array<std::pair<std::unique_ptr<Program>, std::once_flag>, static_cast<size_t>(EBuiltInOps::COUNT)>;
|
||||
ProgramsContainerT builtinPrograms;
|
||||
bool enableCacheing = true;
|
||||
};
|
||||
|
||||
class BuiltInOwnershipWrapper : public NonCopyableOrMovableClass {
|
||||
public:
|
||||
BuiltInOwnershipWrapper() = default;
|
||||
BuiltInOwnershipWrapper(BuiltinDispatchInfoBuilder &inputBuilder, Context *context);
|
||||
~BuiltInOwnershipWrapper();
|
||||
|
||||
void takeOwnership(BuiltinDispatchInfoBuilder &inputBuilder, Context *context);
|
||||
|
||||
protected:
|
||||
BuiltinDispatchInfoBuilder *builder = nullptr;
|
||||
};
|
||||
|
||||
template <EBuiltInOps::Type OpCode>
|
||||
class BuiltInOp;
|
||||
|
||||
} // namespace NEO
|
||||
@@ -1,221 +0,0 @@
|
||||
/*
|
||||
* Copyright (C) 2017-2020 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#include "shared/source/debug_settings/debug_settings_manager.h"
|
||||
#include "shared/source/device/device.h"
|
||||
|
||||
#include "opencl/source/built_ins/built_ins.h"
|
||||
#include "opencl/source/built_ins/builtins_dispatch_builder.h"
|
||||
|
||||
#include "os_inc.h"
|
||||
|
||||
#include <cstdint>
|
||||
|
||||
namespace NEO {
|
||||
|
||||
const char *getBuiltinAsString(EBuiltInOps::Type builtin) {
|
||||
const char *builtinString = getAdditionalBuiltinAsString(builtin);
|
||||
if (builtinString) {
|
||||
return builtinString;
|
||||
}
|
||||
switch (builtin) {
|
||||
default:
|
||||
return getUnknownBuiltinAsString(builtin);
|
||||
case EBuiltInOps::AuxTranslation:
|
||||
return "aux_translation.builtin_kernel";
|
||||
case EBuiltInOps::CopyBufferToBuffer:
|
||||
return "copy_buffer_to_buffer.builtin_kernel";
|
||||
case EBuiltInOps::CopyBufferToBufferStateless:
|
||||
return "copy_buffer_to_buffer_stateless.builtin_kernel";
|
||||
case EBuiltInOps::CopyBufferRect:
|
||||
return "copy_buffer_rect.builtin_kernel";
|
||||
case EBuiltInOps::CopyBufferRectStateless:
|
||||
return "copy_buffer_rect_stateless.builtin_kernel";
|
||||
case EBuiltInOps::FillBuffer:
|
||||
return "fill_buffer.builtin_kernel";
|
||||
case EBuiltInOps::FillBufferStateless:
|
||||
return "fill_buffer_stateless.builtin_kernel";
|
||||
case EBuiltInOps::CopyBufferToImage3d:
|
||||
return "copy_buffer_to_image3d.builtin_kernel";
|
||||
case EBuiltInOps::CopyBufferToImage3dStateless:
|
||||
return "copy_buffer_to_image3d_stateless.builtin_kernel";
|
||||
case EBuiltInOps::CopyImage3dToBuffer:
|
||||
return "copy_image3d_to_buffer.builtin_kernel";
|
||||
case EBuiltInOps::CopyImage3dToBufferStateless:
|
||||
return "copy_image3d_to_buffer_stateless.builtin_kernel";
|
||||
case EBuiltInOps::CopyImageToImage1d:
|
||||
return "copy_image_to_image1d.builtin_kernel";
|
||||
case EBuiltInOps::CopyImageToImage2d:
|
||||
return "copy_image_to_image2d.builtin_kernel";
|
||||
case EBuiltInOps::CopyImageToImage3d:
|
||||
return "copy_image_to_image3d.builtin_kernel";
|
||||
case EBuiltInOps::FillImage1d:
|
||||
return "fill_image1d.builtin_kernel";
|
||||
case EBuiltInOps::FillImage2d:
|
||||
return "fill_image2d.builtin_kernel";
|
||||
case EBuiltInOps::FillImage3d:
|
||||
return "fill_image3d.builtin_kernel";
|
||||
};
|
||||
}
|
||||
|
||||
BuiltinResourceT createBuiltinResource(const char *ptr, size_t size) {
|
||||
return BuiltinResourceT(ptr, ptr + size);
|
||||
}
|
||||
|
||||
BuiltinResourceT createBuiltinResource(const BuiltinResourceT &r) {
|
||||
return BuiltinResourceT(r);
|
||||
}
|
||||
|
||||
std::string createBuiltinResourceName(EBuiltInOps::Type builtin, const std::string &extension,
|
||||
const std::string &platformName, uint32_t deviceRevId) {
|
||||
std::string ret;
|
||||
if (platformName.size() > 0) {
|
||||
ret = platformName;
|
||||
ret += "_" + std::to_string(deviceRevId);
|
||||
ret += "_";
|
||||
}
|
||||
|
||||
ret += getBuiltinAsString(builtin);
|
||||
|
||||
if (extension.size() > 0) {
|
||||
ret += extension;
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
std::string joinPath(const std::string &lhs, const std::string &rhs) {
|
||||
if (lhs.size() == 0) {
|
||||
return rhs;
|
||||
}
|
||||
|
||||
if (rhs.size() == 0) {
|
||||
return lhs;
|
||||
}
|
||||
|
||||
if (*lhs.rbegin() == PATH_SEPARATOR) {
|
||||
return lhs + rhs;
|
||||
}
|
||||
|
||||
return lhs + PATH_SEPARATOR + rhs;
|
||||
}
|
||||
|
||||
std::string getDriverInstallationPath() {
|
||||
return "";
|
||||
}
|
||||
|
||||
BuiltinResourceT Storage::load(const std::string &resourceName) {
|
||||
return loadImpl(joinPath(rootPath, resourceName));
|
||||
}
|
||||
|
||||
BuiltinResourceT FileStorage::loadImpl(const std::string &fullResourceName) {
|
||||
BuiltinResourceT ret;
|
||||
|
||||
std::ifstream f{fullResourceName, std::ios::in | std::ios::binary | std::ios::ate};
|
||||
auto end = f.tellg();
|
||||
f.seekg(0, std::ios::beg);
|
||||
auto beg = f.tellg();
|
||||
auto s = end - beg;
|
||||
ret.resize(static_cast<size_t>(s));
|
||||
f.read(ret.data(), s);
|
||||
return ret;
|
||||
}
|
||||
|
||||
const BuiltinResourceT *EmbeddedStorageRegistry::get(const std::string &name) const {
|
||||
auto it = resources.find(name);
|
||||
if (resources.end() == it) {
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
return &it->second;
|
||||
}
|
||||
|
||||
BuiltinResourceT EmbeddedStorage::loadImpl(const std::string &fullResourceName) {
|
||||
auto *constResource = EmbeddedStorageRegistry::getInstance().get(fullResourceName);
|
||||
if (constResource == nullptr) {
|
||||
BuiltinResourceT ret;
|
||||
return ret;
|
||||
}
|
||||
|
||||
return createBuiltinResource(*constResource);
|
||||
}
|
||||
|
||||
BuiltinsLib::BuiltinsLib() {
|
||||
allStorages.push_back(std::unique_ptr<Storage>(new EmbeddedStorage("")));
|
||||
allStorages.push_back(std::unique_ptr<Storage>(new FileStorage(getDriverInstallationPath())));
|
||||
}
|
||||
|
||||
BuiltinCode BuiltinsLib::getBuiltinCode(EBuiltInOps::Type builtin, BuiltinCode::ECodeType requestedCodeType, Device &device) {
|
||||
std::lock_guard<std::mutex> lockRaii{mutex};
|
||||
|
||||
BuiltinResourceT bc;
|
||||
BuiltinCode::ECodeType usedCodetType = BuiltinCode::ECodeType::INVALID;
|
||||
|
||||
if (requestedCodeType == BuiltinCode::ECodeType::Any) {
|
||||
uint32_t codeType = static_cast<uint32_t>(BuiltinCode::ECodeType::Binary);
|
||||
if (DebugManager.flags.RebuildPrecompiledKernels.get()) {
|
||||
codeType = static_cast<uint32_t>(BuiltinCode::ECodeType::Source);
|
||||
}
|
||||
for (uint32_t e = static_cast<uint32_t>(BuiltinCode::ECodeType::COUNT);
|
||||
codeType != e; ++codeType) {
|
||||
bc = getBuiltinResource(builtin, static_cast<BuiltinCode::ECodeType>(codeType), device);
|
||||
if (bc.size() > 0) {
|
||||
usedCodetType = static_cast<BuiltinCode::ECodeType>(codeType);
|
||||
break;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
bc = getBuiltinResource(builtin, requestedCodeType, device);
|
||||
usedCodetType = requestedCodeType;
|
||||
}
|
||||
|
||||
BuiltinCode ret;
|
||||
std::swap(ret.resource, bc);
|
||||
ret.type = usedCodetType;
|
||||
ret.targetDevice = &device;
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
std::unique_ptr<Program> BuiltinsLib::createProgramFromCode(const BuiltinCode &bc, Device &device) {
|
||||
std::unique_ptr<Program> ret;
|
||||
const char *data = bc.resource.data();
|
||||
size_t dataLen = bc.resource.size();
|
||||
cl_int err = 0;
|
||||
switch (bc.type) {
|
||||
default:
|
||||
break;
|
||||
case BuiltinCode::ECodeType::Source:
|
||||
case BuiltinCode::ECodeType::Intermediate:
|
||||
ret.reset(Program::create(data, nullptr, device, true, &err));
|
||||
break;
|
||||
case BuiltinCode::ECodeType::Binary:
|
||||
ret.reset(Program::createFromGenBinary(*device.getExecutionEnvironment(), nullptr, data, dataLen, true, nullptr, &device));
|
||||
break;
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
BuiltinResourceT BuiltinsLib::getBuiltinResource(EBuiltInOps::Type builtin, BuiltinCode::ECodeType requestedCodeType, Device &device) {
|
||||
BuiltinResourceT bc;
|
||||
std::string resourceNameGeneric = createBuiltinResourceName(builtin, BuiltinCode::getExtension(requestedCodeType));
|
||||
std::string resourceNameForPlatformType = createBuiltinResourceName(builtin, BuiltinCode::getExtension(requestedCodeType), getFamilyNameWithType(device.getHardwareInfo()));
|
||||
std::string resourceNameForPlatformTypeAndStepping = createBuiltinResourceName(builtin, BuiltinCode::getExtension(requestedCodeType), getFamilyNameWithType(device.getHardwareInfo()),
|
||||
device.getHardwareInfo().platform.usRevId);
|
||||
|
||||
for (auto &rn : {resourceNameForPlatformTypeAndStepping, resourceNameForPlatformType, resourceNameGeneric}) { // first look for dedicated version, only fallback to generic one
|
||||
for (auto &s : allStorages) {
|
||||
bc = s.get()->load(rn);
|
||||
if (bc.size() != 0) {
|
||||
return bc;
|
||||
}
|
||||
}
|
||||
}
|
||||
return bc;
|
||||
}
|
||||
|
||||
} // namespace NEO
|
||||
@@ -1,14 +0,0 @@
|
||||
#
|
||||
# Copyright (C) 2020 Intel Corporation
|
||||
#
|
||||
# SPDX-License-Identifier: MIT
|
||||
#
|
||||
|
||||
set(RUNTIME_SRCS_BUILT_INS_OPS
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt
|
||||
${CMAKE_CURRENT_SOURCE_DIR}${BRANCH_DIR_SUFFIX}/built_in_ops.h
|
||||
)
|
||||
|
||||
target_sources(${NEO_STATIC_LIB_NAME} PRIVATE ${RUNTIME_SRCS_EMBARGO_BUILT_INS_OPS})
|
||||
|
||||
add_subdirectories()
|
||||
@@ -1,15 +0,0 @@
|
||||
/*
|
||||
* Copyright (C) 2019-2020 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
#include "opencl/source/built_ins/built_in_ops_base.h"
|
||||
|
||||
namespace NEO {
|
||||
namespace EBuiltInOps {
|
||||
constexpr Type MaxCoreValue{MaxBaseValue};
|
||||
}
|
||||
} // namespace NEO
|
||||
@@ -1,71 +0,0 @@
|
||||
#
|
||||
# Copyright (C) 2018-2020 Intel Corporation
|
||||
#
|
||||
# SPDX-License-Identifier: MIT
|
||||
#
|
||||
|
||||
add_library(${BUILTINS_BINARIES_LIB_NAME} OBJECT EXCLUDE_FROM_ALL builtins_binary.cmake)
|
||||
|
||||
# Add builtins sources
|
||||
add_subdirectory(registry)
|
||||
|
||||
set(GENERATED_BUILTINS
|
||||
"aux_translation"
|
||||
"copy_buffer_rect"
|
||||
"copy_buffer_to_buffer"
|
||||
"copy_buffer_to_image3d"
|
||||
"copy_image3d_to_buffer"
|
||||
"copy_image_to_image1d"
|
||||
"copy_image_to_image2d"
|
||||
"copy_image_to_image3d"
|
||||
"fill_buffer"
|
||||
"fill_image1d"
|
||||
"fill_image2d"
|
||||
"fill_image3d"
|
||||
)
|
||||
|
||||
set(GENERATED_BUILTINS_STATELESS
|
||||
"copy_buffer_to_buffer_stateless"
|
||||
"copy_buffer_rect_stateless"
|
||||
"copy_buffer_to_image3d_stateless"
|
||||
"copy_image3d_to_buffer_stateless"
|
||||
"fill_buffer_stateless"
|
||||
)
|
||||
|
||||
# Generate builtins cpps
|
||||
if(COMPILE_BUILT_INS)
|
||||
add_subdirectory(kernels)
|
||||
endif()
|
||||
|
||||
macro(macro_for_each_gen)
|
||||
foreach(PLATFORM_TYPE ${PLATFORM_TYPES})
|
||||
get_family_name_with_type(${GEN_TYPE} ${PLATFORM_TYPE})
|
||||
foreach(GENERATED_BUILTIN ${GENERATED_BUILTINS})
|
||||
list(APPEND GENERATED_BUILTINS_CPPS ${BUILTINS_INCLUDE_DIR}/${RUNTIME_GENERATED_${GENERATED_BUILTIN}_${family_name_with_type}})
|
||||
endforeach()
|
||||
foreach(GENERATED_BUILTIN_STATELESS ${GENERATED_BUILTINS_STATELESS})
|
||||
list(APPEND GENERATED_BUILTINS_CPPS ${BUILTINS_INCLUDE_DIR}/${RUNTIME_GENERATED_${GENERATED_BUILTIN_STATELESS}_${family_name_with_type}})
|
||||
endforeach()
|
||||
endforeach()
|
||||
source_group("generated files\\${GEN_TYPE_LOWER}" FILES ${GENERATED_BUILTINS_CPPS})
|
||||
endmacro()
|
||||
|
||||
apply_macro_for_each_gen("SUPPORTED")
|
||||
|
||||
if(COMPILE_BUILT_INS)
|
||||
target_sources(${BUILTINS_BINARIES_LIB_NAME} PUBLIC ${GENERATED_BUILTINS_CPPS})
|
||||
set_source_files_properties(${GENERATED_BUILTINS_CPPS} PROPERTIES GENERATED TRUE)
|
||||
endif()
|
||||
|
||||
set_target_properties(${BUILTINS_BINARIES_LIB_NAME} PROPERTIES LINKER_LANGUAGE CXX)
|
||||
set_target_properties(${BUILTINS_BINARIES_LIB_NAME} PROPERTIES POSITION_INDEPENDENT_CODE ON)
|
||||
set_target_properties(${BUILTINS_BINARIES_LIB_NAME} PROPERTIES FOLDER "built_ins")
|
||||
|
||||
target_include_directories(${BUILTINS_BINARIES_LIB_NAME} PRIVATE
|
||||
${ENGINE_NODE_DIR}
|
||||
${KHRONOS_HEADERS_DIR}
|
||||
${KHRONOS_GL_HEADERS_DIR}
|
||||
${NEO__GMM_INCLUDE_DIR}
|
||||
${NEO__IGC_INCLUDE_DIR}
|
||||
${THIRD_PARTY_DIR}
|
||||
)
|
||||
@@ -7,14 +7,14 @@
|
||||
|
||||
#include "opencl/source/built_ins/builtins_dispatch_builder.h"
|
||||
|
||||
#include "shared/source/built_ins/built_ins.h"
|
||||
#include "shared/source/built_ins/sip.h"
|
||||
#include "shared/source/compiler_interface/compiler_interface.h"
|
||||
#include "shared/source/helpers/basic_math.h"
|
||||
#include "shared/source/helpers/debug_helpers.h"
|
||||
|
||||
#include "opencl/source/built_ins/aux_translation_builtin.h"
|
||||
#include "opencl/source/built_ins/built_ins.h"
|
||||
#include "opencl/source/built_ins/built_ins.inl"
|
||||
#include "opencl/source/built_ins/sip.h"
|
||||
#include "opencl/source/built_ins/vme_dispatch_builder.h"
|
||||
#include "opencl/source/device/cl_device.h"
|
||||
#include "opencl/source/helpers/built_ins_helper.h"
|
||||
@@ -761,4 +761,25 @@ BuiltinDispatchInfoBuilder &BuiltInDispatchBuilderOp::getBuiltinDispatchInfoBuil
|
||||
}
|
||||
return *operationBuilder.first;
|
||||
}
|
||||
|
||||
BuiltInOwnershipWrapper::BuiltInOwnershipWrapper(BuiltinDispatchInfoBuilder &inputBuilder, Context *context) {
|
||||
takeOwnership(inputBuilder, context);
|
||||
}
|
||||
BuiltInOwnershipWrapper::~BuiltInOwnershipWrapper() {
|
||||
if (builder) {
|
||||
for (auto &kernel : builder->peekUsedKernels()) {
|
||||
kernel->setContext(nullptr);
|
||||
kernel->releaseOwnership();
|
||||
}
|
||||
}
|
||||
}
|
||||
void BuiltInOwnershipWrapper::takeOwnership(BuiltinDispatchInfoBuilder &inputBuilder, Context *context) {
|
||||
UNRECOVERABLE_IF(builder);
|
||||
builder = &inputBuilder;
|
||||
for (auto &kernel : builder->peekUsedKernels()) {
|
||||
kernel->takeOwnership();
|
||||
kernel->setContext(context);
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace NEO
|
||||
@@ -6,9 +6,9 @@
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
#include "shared/source/built_ins/built_ins.h"
|
||||
#include "shared/source/helpers/vec.h"
|
||||
|
||||
#include "opencl/source/built_ins/built_ins.h"
|
||||
#include "opencl/source/kernel/kernel.h"
|
||||
|
||||
#include "CL/cl.h"
|
||||
@@ -109,4 +109,16 @@ class BuiltInDispatchBuilderOp {
|
||||
std::unique_ptr<BuiltinDispatchInfoBuilder> newBuilder);
|
||||
};
|
||||
|
||||
class BuiltInOwnershipWrapper : public NonCopyableOrMovableClass {
|
||||
public:
|
||||
BuiltInOwnershipWrapper() = default;
|
||||
BuiltInOwnershipWrapper(BuiltinDispatchInfoBuilder &inputBuilder, Context *context);
|
||||
~BuiltInOwnershipWrapper();
|
||||
|
||||
void takeOwnership(BuiltinDispatchInfoBuilder &inputBuilder, Context *context);
|
||||
|
||||
protected:
|
||||
BuiltinDispatchInfoBuilder *builder = nullptr;
|
||||
};
|
||||
|
||||
} // namespace NEO
|
||||
|
||||
@@ -4,10 +4,10 @@
|
||||
# SPDX-License-Identifier: MIT
|
||||
#
|
||||
|
||||
add_custom_target(builtins)
|
||||
set_target_properties(builtins PROPERTIES FOLDER "built_ins")
|
||||
add_custom_target(builtins_vme_sources)
|
||||
set_target_properties(builtins_vme_sources PROPERTIES FOLDER "built_ins")
|
||||
set(BUILTINS_OUTDIR_WITH_ARCH "${TargetDir}/built_ins/${NEO_ARCH}")
|
||||
add_dependencies(${BUILTINS_BINARIES_LIB_NAME} builtins)
|
||||
add_dependencies(${BUILTINS_BINARIES_LIB_NAME} builtins_vme_sources)
|
||||
add_subdirectories()
|
||||
set(GENERATED_BUILTINS ${GENERATED_BUILTINS} PARENT_SCOPE)
|
||||
set(GENERATED_BUILTINS_STATELESS ${GENERATED_BUILTINS_STATELESS} PARENT_SCOPE)
|
||||
@@ -95,7 +95,7 @@ macro(macro_for_each_gen)
|
||||
list(APPEND BUILTINS_COMMANDS ${TargetDir}/${BUILTIN_CPP})
|
||||
set(RUNTIME_GENERATED_${GENERATED_BUILTIN_STATELESS}_${family_name_with_type} ${BUILTIN_CPP} PARENT_SCOPE)
|
||||
endforeach()
|
||||
set(target_name builtins_${family_name_with_type})
|
||||
set(target_name builtins_${family_name_with_type}_vme)
|
||||
add_custom_target(${target_name} DEPENDS ${BUILTINS_COMMANDS})
|
||||
add_dependencies(builtins ${target_name})
|
||||
set_target_properties(${target_name} PROPERTIES FOLDER "opencl/source/built_ins/${family_name_with_type}")
|
||||
|
||||
@@ -1,14 +0,0 @@
|
||||
/*
|
||||
* Copyright (C) 2018-2020 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
R"===(
|
||||
__kernel void fullCopy(__global const uint* src, __global uint* dst) {
|
||||
unsigned int gid = get_global_id(0);
|
||||
uint4 loaded = vload4(gid, src);
|
||||
vstore4(loaded, gid, dst);
|
||||
}
|
||||
)==="
|
||||
@@ -1,48 +0,0 @@
|
||||
/*
|
||||
* Copyright (C) 2017-2020 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
R"===(
|
||||
//////////////////////////////////////////////////////////////////////////////
|
||||
__kernel void CopyBufferRectBytes2d(
|
||||
__global const char* src,
|
||||
__global char* dst,
|
||||
uint4 SrcOrigin,
|
||||
uint4 DstOrigin,
|
||||
uint2 SrcPitch,
|
||||
uint2 DstPitch )
|
||||
|
||||
{
|
||||
int x = get_global_id(0);
|
||||
int y = get_global_id(1);
|
||||
|
||||
uint LSrcOffset = x + SrcOrigin.x + ( ( y + SrcOrigin.y ) * SrcPitch.x );
|
||||
uint LDstOffset = x + DstOrigin.x + ( ( y + DstOrigin.y ) * DstPitch.x );
|
||||
|
||||
*( dst + LDstOffset ) = *( src + LSrcOffset );
|
||||
|
||||
}
|
||||
//////////////////////////////////////////////////////////////////////////////
|
||||
__kernel void CopyBufferRectBytes3d(
|
||||
__global const char* src,
|
||||
__global char* dst,
|
||||
uint4 SrcOrigin,
|
||||
uint4 DstOrigin,
|
||||
uint2 SrcPitch,
|
||||
uint2 DstPitch )
|
||||
|
||||
{
|
||||
int x = get_global_id(0);
|
||||
int y = get_global_id(1);
|
||||
int z = get_global_id(2);
|
||||
|
||||
uint LSrcOffset = x + SrcOrigin.x + ( ( y + SrcOrigin.y ) * SrcPitch.x ) + ( ( z + SrcOrigin.z ) * SrcPitch.y );
|
||||
uint LDstOffset = x + DstOrigin.x + ( ( y + DstOrigin.y ) * DstPitch.x ) + ( ( z + DstOrigin.z ) * DstPitch.y );
|
||||
|
||||
*( dst + LDstOffset ) = *( src + LSrcOffset );
|
||||
|
||||
}
|
||||
)==="
|
||||
@@ -1,48 +0,0 @@
|
||||
/*
|
||||
* Copyright (C) 2019-2020 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
R"===(
|
||||
//////////////////////////////////////////////////////////////////////////////
|
||||
__kernel void CopyBufferRectBytes2d(
|
||||
__global const char* src,
|
||||
__global char* dst,
|
||||
ulong4 SrcOrigin,
|
||||
ulong4 DstOrigin,
|
||||
ulong2 SrcPitch,
|
||||
ulong2 DstPitch )
|
||||
|
||||
{
|
||||
size_t x = get_global_id(0);
|
||||
size_t y = get_global_id(1);
|
||||
|
||||
size_t LSrcOffset = x + SrcOrigin.x + ( ( y + SrcOrigin.y ) * SrcPitch.x );
|
||||
size_t LDstOffset = x + DstOrigin.x + ( ( y + DstOrigin.y ) * DstPitch.x );
|
||||
|
||||
*( dst + LDstOffset ) = *( src + LSrcOffset );
|
||||
|
||||
}
|
||||
//////////////////////////////////////////////////////////////////////////////
|
||||
__kernel void CopyBufferRectBytes3d(
|
||||
__global const char* src,
|
||||
__global char* dst,
|
||||
ulong4 SrcOrigin,
|
||||
ulong4 DstOrigin,
|
||||
ulong2 SrcPitch,
|
||||
ulong2 DstPitch )
|
||||
|
||||
{
|
||||
size_t x = get_global_id(0);
|
||||
size_t y = get_global_id(1);
|
||||
size_t z = get_global_id(2);
|
||||
|
||||
size_t LSrcOffset = x + SrcOrigin.x + ( ( y + SrcOrigin.y ) * SrcPitch.x ) + ( ( z + SrcOrigin.z ) * SrcPitch.y );
|
||||
size_t LDstOffset = x + DstOrigin.x + ( ( y + DstOrigin.y ) * DstPitch.x ) + ( ( z + DstOrigin.z ) * DstPitch.y );
|
||||
|
||||
*( dst + LDstOffset ) = *( src + LSrcOffset );
|
||||
|
||||
}
|
||||
)==="
|
||||
@@ -1,54 +0,0 @@
|
||||
/*
|
||||
* Copyright (C) 2017-2020 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
R"===(
|
||||
__kernel void CopyBufferToBufferBytes(
|
||||
const __global uchar* pSrc,
|
||||
__global uchar* pDst,
|
||||
uint srcOffsetInBytes,
|
||||
uint dstOffsetInBytes,
|
||||
uint bytesToRead )
|
||||
{
|
||||
pSrc += ( srcOffsetInBytes + get_global_id(0) );
|
||||
pDst += ( dstOffsetInBytes + get_global_id(0) );
|
||||
pDst[ 0 ] = pSrc[ 0 ];
|
||||
}
|
||||
|
||||
__kernel void CopyBufferToBufferLeftLeftover(
|
||||
const __global uchar* pSrc,
|
||||
__global uchar* pDst,
|
||||
uint srcOffsetInBytes,
|
||||
uint dstOffsetInBytes)
|
||||
{
|
||||
unsigned int gid = get_global_id(0);
|
||||
pDst[ gid + dstOffsetInBytes ] = pSrc[ gid + srcOffsetInBytes ];
|
||||
}
|
||||
|
||||
__kernel void CopyBufferToBufferMiddle(
|
||||
const __global uint* pSrc,
|
||||
__global uint* pDst,
|
||||
uint srcOffsetInBytes,
|
||||
uint dstOffsetInBytes)
|
||||
{
|
||||
unsigned int gid = get_global_id(0);
|
||||
pDst += dstOffsetInBytes >> 2;
|
||||
pSrc += srcOffsetInBytes >> 2;
|
||||
uint4 loaded = vload4(gid, pSrc);
|
||||
vstore4(loaded, gid, pDst);
|
||||
}
|
||||
|
||||
__kernel void CopyBufferToBufferRightLeftover(
|
||||
const __global uchar* pSrc,
|
||||
__global uchar* pDst,
|
||||
uint srcOffsetInBytes,
|
||||
uint dstOffsetInBytes)
|
||||
{
|
||||
unsigned int gid = get_global_id(0);
|
||||
pDst[ gid + dstOffsetInBytes ] = pSrc[ gid + srcOffsetInBytes ];
|
||||
}
|
||||
|
||||
)==="
|
||||
@@ -1,54 +0,0 @@
|
||||
/*
|
||||
* Copyright (C) 2019-2020 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
R"===(
|
||||
__kernel void CopyBufferToBufferBytes(
|
||||
const __global uchar* pSrc,
|
||||
__global uchar* pDst,
|
||||
ulong srcOffsetInBytes,
|
||||
ulong dstOffsetInBytes,
|
||||
ulong bytesToRead )
|
||||
{
|
||||
pSrc += ( srcOffsetInBytes + get_global_id(0) );
|
||||
pDst += ( dstOffsetInBytes + get_global_id(0) );
|
||||
pDst[ 0 ] = pSrc[ 0 ];
|
||||
}
|
||||
|
||||
__kernel void CopyBufferToBufferLeftLeftover(
|
||||
const __global uchar* pSrc,
|
||||
__global uchar* pDst,
|
||||
ulong srcOffsetInBytes,
|
||||
ulong dstOffsetInBytes)
|
||||
{
|
||||
size_t gid = get_global_id(0);
|
||||
pDst[ gid + dstOffsetInBytes ] = pSrc[ gid + srcOffsetInBytes ];
|
||||
}
|
||||
|
||||
__kernel void CopyBufferToBufferMiddle(
|
||||
const __global uint* pSrc,
|
||||
__global uint* pDst,
|
||||
ulong srcOffsetInBytes,
|
||||
ulong dstOffsetInBytes)
|
||||
{
|
||||
size_t gid = get_global_id(0);
|
||||
pDst += dstOffsetInBytes >> 2;
|
||||
pSrc += srcOffsetInBytes >> 2;
|
||||
uint4 loaded = vload4(gid, pSrc);
|
||||
vstore4(loaded, gid, pDst);
|
||||
}
|
||||
|
||||
__kernel void CopyBufferToBufferRightLeftover(
|
||||
const __global uchar* pSrc,
|
||||
__global uchar* pDst,
|
||||
ulong srcOffsetInBytes,
|
||||
ulong dstOffsetInBytes)
|
||||
{
|
||||
size_t gid = get_global_id(0);
|
||||
pDst[ gid + dstOffsetInBytes ] = pSrc[ gid + srcOffsetInBytes ];
|
||||
}
|
||||
|
||||
)==="
|
||||
@@ -1,161 +0,0 @@
|
||||
/*
|
||||
* Copyright (C) 2017-2020 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
R"===(
|
||||
#pragma OPENCL EXTENSION cl_khr_3d_image_writes : enable
|
||||
|
||||
__kernel void CopyBufferToImage3dBytes(__global uchar *src,
|
||||
__write_only image3d_t output,
|
||||
int srcOffset,
|
||||
int4 dstOffset,
|
||||
uint2 Pitch) {
|
||||
const uint x = get_global_id(0);
|
||||
const uint y = get_global_id(1);
|
||||
const uint z = get_global_id(2);
|
||||
|
||||
int4 dstCoord = (int4)(x, y, z, 0) + dstOffset;
|
||||
uint LOffset = srcOffset + (y * Pitch.x) + (z * Pitch.y);
|
||||
|
||||
write_imageui(output, dstCoord, (uint4)(*(src + LOffset + x), 0, 0, 1));
|
||||
}
|
||||
|
||||
__kernel void CopyBufferToImage3d2Bytes(__global uchar *src,
|
||||
__write_only image3d_t output,
|
||||
int srcOffset,
|
||||
int4 dstOffset,
|
||||
uint2 Pitch) {
|
||||
const uint x = get_global_id(0);
|
||||
const uint y = get_global_id(1);
|
||||
const uint z = get_global_id(2);
|
||||
|
||||
int4 dstCoord = (int4)(x, y, z, 0) + dstOffset;
|
||||
uint LOffset = srcOffset + (y * Pitch.x) + (z * Pitch.y);
|
||||
|
||||
uint4 c = (uint4)(0, 0, 0, 1);
|
||||
|
||||
if(( ulong )(src + srcOffset) & 0x00000001){
|
||||
ushort upper = *((__global uchar*)(src + LOffset + x * 2 + 1));
|
||||
ushort lower = *((__global uchar*)(src + LOffset + x * 2));
|
||||
ushort combined = (upper << 8) | lower;
|
||||
c.x = (uint)combined;
|
||||
}
|
||||
else{
|
||||
c.x = (uint)(*(__global ushort*)(src + LOffset + x * 2));
|
||||
}
|
||||
write_imageui(output, dstCoord, c);
|
||||
}
|
||||
|
||||
__kernel void CopyBufferToImage3d4Bytes(__global uchar *src,
|
||||
__write_only image3d_t output,
|
||||
int srcOffset,
|
||||
int4 dstOffset,
|
||||
uint2 Pitch) {
|
||||
const uint x = get_global_id(0);
|
||||
const uint y = get_global_id(1);
|
||||
const uint z = get_global_id(2);
|
||||
|
||||
int4 dstCoord = (int4)(x, y, z, 0) + dstOffset;
|
||||
uint LOffset = srcOffset + (y * Pitch.x) + (z * Pitch.y);
|
||||
|
||||
uint4 c = (uint4)(0, 0, 0, 1);
|
||||
|
||||
if(( ulong )(src + srcOffset) & 0x00000003){
|
||||
uint upper2 = *((__global uchar*)(src + LOffset + x * 4 + 3));
|
||||
uint upper = *((__global uchar*)(src + LOffset + x * 4 + 2));
|
||||
uint lower2 = *((__global uchar*)(src + LOffset + x * 4 + 1));
|
||||
uint lower = *((__global uchar*)(src + LOffset + x * 4));
|
||||
uint combined = (upper2 << 24) | (upper << 16) | (lower2 << 8) | lower;
|
||||
c.x = combined;
|
||||
}
|
||||
else{
|
||||
c.x = (*(__global uint*)(src + LOffset + x * 4));
|
||||
}
|
||||
write_imageui(output, dstCoord, c);
|
||||
}
|
||||
|
||||
__kernel void CopyBufferToImage3d8Bytes(__global uchar *src,
|
||||
__write_only image3d_t output,
|
||||
int srcOffset,
|
||||
int4 dstOffset,
|
||||
uint2 Pitch) {
|
||||
const uint x = get_global_id(0);
|
||||
const uint y = get_global_id(1);
|
||||
const uint z = get_global_id(2);
|
||||
|
||||
int4 dstCoord = (int4)(x, y, z, 0) + dstOffset;
|
||||
uint LOffset = srcOffset + (y * Pitch.x) + (z * Pitch.y);
|
||||
|
||||
uint2 c = (uint2)(0, 0);//*((__global uint2*)(src + LOffset + x * 8));
|
||||
|
||||
if(( ulong )(src + srcOffset) & 0x00000007){
|
||||
uint upper2 = *((__global uchar*)(src + LOffset + x * 8 + 3));
|
||||
uint upper = *((__global uchar*)(src + LOffset + x * 8 + 2));
|
||||
uint lower2 = *((__global uchar*)(src + LOffset + x * 8 + 1));
|
||||
uint lower = *((__global uchar*)(src + LOffset + x * 8));
|
||||
uint combined = (upper2 << 24) | (upper << 16) | (lower2 << 8) | lower;
|
||||
c.x = combined;
|
||||
upper2 = *((__global uchar*)(src + LOffset + x * 8 + 7));
|
||||
upper = *((__global uchar*)(src + LOffset + x * 8 + 6));
|
||||
lower2 = *((__global uchar*)(src + LOffset + x * 8 + 5));
|
||||
lower = *((__global uchar*)(src + LOffset + x * 8 + 4));
|
||||
combined = ((uint)upper2 << 24) | ((uint)upper << 16) | ((uint)lower2 << 8) | lower;
|
||||
c.y = combined;
|
||||
}
|
||||
else{
|
||||
c = *((__global uint2*)(src + LOffset + x * 8));
|
||||
}
|
||||
|
||||
write_imageui(output, dstCoord, (uint4)(c.x, c.y, 0, 1));
|
||||
}
|
||||
|
||||
__kernel void CopyBufferToImage3d16Bytes(__global uchar *src,
|
||||
__write_only image3d_t output,
|
||||
int srcOffset,
|
||||
int4 dstOffset,
|
||||
uint2 Pitch) {
|
||||
const uint x = get_global_id(0);
|
||||
const uint y = get_global_id(1);
|
||||
const uint z = get_global_id(2);
|
||||
|
||||
int4 dstCoord = (int4)(x, y, z, 0) + dstOffset;
|
||||
uint LOffset = srcOffset + (y * Pitch.x) + (z * Pitch.y);
|
||||
|
||||
uint4 c = (uint4)(0, 0, 0, 0);
|
||||
|
||||
if(( ulong )(src + srcOffset) & 0x0000000f){
|
||||
uint upper2 = *((__global uchar*)(src + LOffset + x * 16 + 3));
|
||||
uint upper = *((__global uchar*)(src + LOffset + x * 16 + 2));
|
||||
uint lower2 = *((__global uchar*)(src + LOffset + x * 16 + 1));
|
||||
uint lower = *((__global uchar*)(src + LOffset + x * 16));
|
||||
uint combined = (upper2 << 24) | (upper << 16) | (lower2 << 8) | lower;
|
||||
c.x = combined;
|
||||
upper2 = *((__global uchar*)(src + LOffset + x * 16 + 7));
|
||||
upper = *((__global uchar*)(src + LOffset + x * 16 + 6));
|
||||
lower2 = *((__global uchar*)(src + LOffset + x * 16 + 5));
|
||||
lower = *((__global uchar*)(src + LOffset + x * 16 + 4));
|
||||
combined = (upper2 << 24) | (upper << 16) | (lower2 << 8) | lower;
|
||||
c.y = combined;
|
||||
upper2 = *((__global uchar*)(src + LOffset + x * 16 + 11));
|
||||
upper = *((__global uchar*)(src + LOffset + x * 16 + 10));
|
||||
lower2 = *((__global uchar*)(src + LOffset + x * 16 + 9));
|
||||
lower = *((__global uchar*)(src + LOffset + x * 16 + 8));
|
||||
combined = (upper2 << 24) | (upper << 16) | (lower2 << 8) | lower;
|
||||
c.z = combined;
|
||||
upper2 = *((__global uchar*)(src + LOffset + x * 16 + 15));
|
||||
upper = *((__global uchar*)(src + LOffset + x * 16 + 14));
|
||||
lower2 = *((__global uchar*)(src + LOffset + x * 16 + 13));
|
||||
lower = *((__global uchar*)(src + LOffset + x * 16 + 12));
|
||||
combined = (upper2 << 24) | (upper << 16) | (lower2 << 8) | lower;
|
||||
c.w = combined;
|
||||
}
|
||||
else{
|
||||
c = *((__global uint4 *)(src + LOffset + x * 16));
|
||||
}
|
||||
|
||||
write_imageui(output, dstCoord, c);
|
||||
}
|
||||
)==="
|
||||
@@ -1,161 +0,0 @@
|
||||
/*
|
||||
* Copyright (C) 2017-2020 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
R"===(
|
||||
#pragma OPENCL EXTENSION cl_khr_3d_image_writes : enable
|
||||
|
||||
__kernel void CopyBufferToImage3dBytes(__global uchar *src,
|
||||
__write_only image3d_t output,
|
||||
ulong srcOffset,
|
||||
int4 dstOffset,
|
||||
ulong2 Pitch) {
|
||||
const uint x = get_global_id(0);
|
||||
const uint y = get_global_id(1);
|
||||
const uint z = get_global_id(2);
|
||||
|
||||
int4 dstCoord = (int4)(x, y, z, 0) + dstOffset;
|
||||
ulong LOffset = srcOffset + (y * Pitch.x) + (z * Pitch.y);
|
||||
|
||||
write_imageui(output, dstCoord, (uint4)(*(src + LOffset + x), 0, 0, 1));
|
||||
}
|
||||
|
||||
__kernel void CopyBufferToImage3d2Bytes(__global uchar *src,
|
||||
__write_only image3d_t output,
|
||||
ulong srcOffset,
|
||||
int4 dstOffset,
|
||||
ulong2 Pitch) {
|
||||
const uint x = get_global_id(0);
|
||||
const uint y = get_global_id(1);
|
||||
const uint z = get_global_id(2);
|
||||
|
||||
int4 dstCoord = (int4)(x, y, z, 0) + dstOffset;
|
||||
ulong LOffset = srcOffset + (y * Pitch.x) + (z * Pitch.y);
|
||||
|
||||
uint4 c = (uint4)(0, 0, 0, 1);
|
||||
|
||||
if(( ulong )(src + srcOffset) & 0x00000001){
|
||||
ushort upper = *((__global uchar*)(src + LOffset + x * 2 + 1));
|
||||
ushort lower = *((__global uchar*)(src + LOffset + x * 2));
|
||||
ushort combined = (upper << 8) | lower;
|
||||
c.x = (uint)combined;
|
||||
}
|
||||
else{
|
||||
c.x = (uint)(*(__global ushort*)(src + LOffset + x * 2));
|
||||
}
|
||||
write_imageui(output, dstCoord, c);
|
||||
}
|
||||
|
||||
__kernel void CopyBufferToImage3d4Bytes(__global uchar *src,
|
||||
__write_only image3d_t output,
|
||||
ulong srcOffset,
|
||||
int4 dstOffset,
|
||||
ulong2 Pitch) {
|
||||
const uint x = get_global_id(0);
|
||||
const uint y = get_global_id(1);
|
||||
const uint z = get_global_id(2);
|
||||
|
||||
int4 dstCoord = (int4)(x, y, z, 0) + dstOffset;
|
||||
ulong LOffset = srcOffset + (y * Pitch.x) + (z * Pitch.y);
|
||||
|
||||
uint4 c = (uint4)(0, 0, 0, 1);
|
||||
|
||||
if(( ulong )(src + srcOffset) & 0x00000003){
|
||||
uint upper2 = *((__global uchar*)(src + LOffset + x * 4 + 3));
|
||||
uint upper = *((__global uchar*)(src + LOffset + x * 4 + 2));
|
||||
uint lower2 = *((__global uchar*)(src + LOffset + x * 4 + 1));
|
||||
uint lower = *((__global uchar*)(src + LOffset + x * 4));
|
||||
uint combined = (upper2 << 24) | (upper << 16) | (lower2 << 8) | lower;
|
||||
c.x = combined;
|
||||
}
|
||||
else{
|
||||
c.x = (*(__global uint*)(src + LOffset + x * 4));
|
||||
}
|
||||
write_imageui(output, dstCoord, c);
|
||||
}
|
||||
|
||||
__kernel void CopyBufferToImage3d8Bytes(__global uchar *src,
|
||||
__write_only image3d_t output,
|
||||
ulong srcOffset,
|
||||
int4 dstOffset,
|
||||
ulong2 Pitch) {
|
||||
const uint x = get_global_id(0);
|
||||
const uint y = get_global_id(1);
|
||||
const uint z = get_global_id(2);
|
||||
|
||||
int4 dstCoord = (int4)(x, y, z, 0) + dstOffset;
|
||||
ulong LOffset = srcOffset + (y * Pitch.x) + (z * Pitch.y);
|
||||
|
||||
uint2 c = (uint2)(0, 0);//*((__global uint2*)(src + LOffset + x * 8));
|
||||
|
||||
if(( ulong )(src + srcOffset) & 0x00000007){
|
||||
uint upper2 = *((__global uchar*)(src + LOffset + x * 8 + 3));
|
||||
uint upper = *((__global uchar*)(src + LOffset + x * 8 + 2));
|
||||
uint lower2 = *((__global uchar*)(src + LOffset + x * 8 + 1));
|
||||
uint lower = *((__global uchar*)(src + LOffset + x * 8));
|
||||
uint combined = (upper2 << 24) | (upper << 16) | (lower2 << 8) | lower;
|
||||
c.x = combined;
|
||||
upper2 = *((__global uchar*)(src + LOffset + x * 8 + 7));
|
||||
upper = *((__global uchar*)(src + LOffset + x * 8 + 6));
|
||||
lower2 = *((__global uchar*)(src + LOffset + x * 8 + 5));
|
||||
lower = *((__global uchar*)(src + LOffset + x * 8 + 4));
|
||||
combined = ((uint)upper2 << 24) | ((uint)upper << 16) | ((uint)lower2 << 8) | lower;
|
||||
c.y = combined;
|
||||
}
|
||||
else{
|
||||
c = *((__global uint2*)(src + LOffset + x * 8));
|
||||
}
|
||||
|
||||
write_imageui(output, dstCoord, (uint4)(c.x, c.y, 0, 1));
|
||||
}
|
||||
|
||||
__kernel void CopyBufferToImage3d16Bytes(__global uchar *src,
|
||||
__write_only image3d_t output,
|
||||
ulong srcOffset,
|
||||
int4 dstOffset,
|
||||
ulong2 Pitch) {
|
||||
const uint x = get_global_id(0);
|
||||
const uint y = get_global_id(1);
|
||||
const uint z = get_global_id(2);
|
||||
|
||||
int4 dstCoord = (int4)(x, y, z, 0) + dstOffset;
|
||||
ulong LOffset = srcOffset + (y * Pitch.x) + (z * Pitch.y);
|
||||
|
||||
uint4 c = (uint4)(0, 0, 0, 0);
|
||||
|
||||
if(( ulong )(src + srcOffset) & 0x0000000f){
|
||||
uint upper2 = *((__global uchar*)(src + LOffset + x * 16 + 3));
|
||||
uint upper = *((__global uchar*)(src + LOffset + x * 16 + 2));
|
||||
uint lower2 = *((__global uchar*)(src + LOffset + x * 16 + 1));
|
||||
uint lower = *((__global uchar*)(src + LOffset + x * 16));
|
||||
uint combined = (upper2 << 24) | (upper << 16) | (lower2 << 8) | lower;
|
||||
c.x = combined;
|
||||
upper2 = *((__global uchar*)(src + LOffset + x * 16 + 7));
|
||||
upper = *((__global uchar*)(src + LOffset + x * 16 + 6));
|
||||
lower2 = *((__global uchar*)(src + LOffset + x * 16 + 5));
|
||||
lower = *((__global uchar*)(src + LOffset + x * 16 + 4));
|
||||
combined = (upper2 << 24) | (upper << 16) | (lower2 << 8) | lower;
|
||||
c.y = combined;
|
||||
upper2 = *((__global uchar*)(src + LOffset + x * 16 + 11));
|
||||
upper = *((__global uchar*)(src + LOffset + x * 16 + 10));
|
||||
lower2 = *((__global uchar*)(src + LOffset + x * 16 + 9));
|
||||
lower = *((__global uchar*)(src + LOffset + x * 16 + 8));
|
||||
combined = (upper2 << 24) | (upper << 16) | (lower2 << 8) | lower;
|
||||
c.z = combined;
|
||||
upper2 = *((__global uchar*)(src + LOffset + x * 16 + 15));
|
||||
upper = *((__global uchar*)(src + LOffset + x * 16 + 14));
|
||||
lower2 = *((__global uchar*)(src + LOffset + x * 16 + 13));
|
||||
lower = *((__global uchar*)(src + LOffset + x * 16 + 12));
|
||||
combined = (upper2 << 24) | (upper << 16) | (lower2 << 8) | lower;
|
||||
c.w = combined;
|
||||
}
|
||||
else{
|
||||
c = *((__global uint4 *)(src + LOffset + x * 16));
|
||||
}
|
||||
|
||||
write_imageui(output, dstCoord, c);
|
||||
}
|
||||
)==="
|
||||
@@ -1,139 +0,0 @@
|
||||
/*
|
||||
* Copyright (C) 2017-2020 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
R"===(
|
||||
__kernel void CopyImage3dToBufferBytes(__read_only image3d_t input,
|
||||
__global uchar *dst,
|
||||
int4 srcOffset,
|
||||
int dstOffset,
|
||||
uint2 Pitch) {
|
||||
const uint x = get_global_id(0);
|
||||
const uint y = get_global_id(1);
|
||||
const uint z = get_global_id(2);
|
||||
|
||||
const int4 srcCoord = (int4)(x, y, z, 0) + srcOffset;
|
||||
uint DstOffset = dstOffset + (y * Pitch.x) + (z * Pitch.y);
|
||||
|
||||
uint4 c = read_imageui(input, srcCoord);
|
||||
*(dst + DstOffset + x) = convert_uchar_sat(c.x);
|
||||
}
|
||||
|
||||
__kernel void CopyImage3dToBuffer2Bytes(__read_only image3d_t input,
|
||||
__global uchar *dst,
|
||||
int4 srcOffset,
|
||||
int dstOffset,
|
||||
uint2 Pitch) {
|
||||
const uint x = get_global_id(0);
|
||||
const uint y = get_global_id(1);
|
||||
const uint z = get_global_id(2);
|
||||
|
||||
const int4 srcCoord = (int4)(x, y, z, 0) + srcOffset;
|
||||
uint DstOffset = dstOffset + (y * Pitch.x) + (z * Pitch.y);
|
||||
|
||||
uint4 c = read_imageui(input, srcCoord);
|
||||
|
||||
if(( ulong )(dst + dstOffset) & 0x00000001){
|
||||
*((__global uchar*)(dst + DstOffset + x * 2 + 1)) = convert_uchar_sat((c.x >> 8 ) & 0xff);
|
||||
*((__global uchar*)(dst + DstOffset + x * 2)) = convert_uchar_sat(c.x & 0xff);
|
||||
}
|
||||
else{
|
||||
*((__global ushort*)(dst + DstOffset + x * 2)) = convert_ushort_sat(c.x);
|
||||
}
|
||||
}
|
||||
|
||||
__kernel void CopyImage3dToBuffer4Bytes(__read_only image3d_t input,
|
||||
__global uchar *dst,
|
||||
int4 srcOffset,
|
||||
int dstOffset,
|
||||
uint2 Pitch) {
|
||||
const uint x = get_global_id(0);
|
||||
const uint y = get_global_id(1);
|
||||
const uint z = get_global_id(2);
|
||||
|
||||
const int4 srcCoord = (int4)(x, y, z, 0) + srcOffset;
|
||||
uint DstOffset = dstOffset + (y * Pitch.x) + (z * Pitch.y);
|
||||
|
||||
uint4 c = read_imageui(input, srcCoord);
|
||||
|
||||
if(( ulong )(dst + dstOffset) & 0x00000003){
|
||||
*((__global uchar*)(dst + DstOffset + x * 4 + 3)) = convert_uchar_sat((c.x >> 24 ) & 0xff);
|
||||
*((__global uchar*)(dst + DstOffset + x * 4 + 2)) = convert_uchar_sat((c.x >> 16 ) & 0xff);
|
||||
*((__global uchar*)(dst + DstOffset + x * 4 + 1)) = convert_uchar_sat((c.x >> 8 ) & 0xff);
|
||||
*((__global uchar*)(dst + DstOffset + x * 4)) = convert_uchar_sat(c.x & 0xff);
|
||||
}
|
||||
else{
|
||||
*((__global uint*)(dst + DstOffset + x * 4)) = c.x;
|
||||
}
|
||||
}
|
||||
|
||||
__kernel void CopyImage3dToBuffer8Bytes(__read_only image3d_t input,
|
||||
__global uchar *dst,
|
||||
int4 srcOffset,
|
||||
int dstOffset,
|
||||
uint2 Pitch) {
|
||||
const uint x = get_global_id(0);
|
||||
const uint y = get_global_id(1);
|
||||
const uint z = get_global_id(2);
|
||||
|
||||
const int4 srcCoord = (int4)(x, y, z, 0) + srcOffset;
|
||||
uint DstOffset = dstOffset + (y * Pitch.x) + (z * Pitch.y);
|
||||
|
||||
uint4 c = read_imageui(input, srcCoord);
|
||||
|
||||
if(( ulong )(dst + dstOffset) & 0x00000007){
|
||||
*((__global uchar*)(dst + DstOffset + x * 8 + 3)) = convert_uchar_sat((c.x >> 24 ) & 0xff);
|
||||
*((__global uchar*)(dst + DstOffset + x * 8 + 2)) = convert_uchar_sat((c.x >> 16 ) & 0xff);
|
||||
*((__global uchar*)(dst + DstOffset + x * 8 + 1)) = convert_uchar_sat((c.x >> 8 ) & 0xff);
|
||||
*((__global uchar*)(dst + DstOffset + x * 8)) = convert_uchar_sat(c.x & 0xff);
|
||||
*((__global uchar*)(dst + DstOffset + x * 8 + 7)) = convert_uchar_sat((c.y >> 24 ) & 0xff);
|
||||
*((__global uchar*)(dst + DstOffset + x * 8 + 6)) = convert_uchar_sat((c.y >> 16 ) & 0xff);
|
||||
*((__global uchar*)(dst + DstOffset + x * 8 + 5)) = convert_uchar_sat((c.y >> 8 ) & 0xff);
|
||||
*((__global uchar*)(dst + DstOffset + x * 8 + 4)) = convert_uchar_sat(c.y & 0xff);
|
||||
}
|
||||
else{
|
||||
uint2 d = (uint2)(c.x,c.y);
|
||||
*((__global uint2*)(dst + DstOffset + x * 8)) = d;
|
||||
}
|
||||
}
|
||||
|
||||
__kernel void CopyImage3dToBuffer16Bytes(__read_only image3d_t input,
|
||||
__global uchar *dst,
|
||||
int4 srcOffset,
|
||||
int dstOffset,
|
||||
uint2 Pitch) {
|
||||
const uint x = get_global_id(0);
|
||||
const uint y = get_global_id(1);
|
||||
const uint z = get_global_id(2);
|
||||
|
||||
const int4 srcCoord = (int4)(x, y, z, 0) + srcOffset;
|
||||
uint DstOffset = dstOffset + (y * Pitch.x) + (z * Pitch.y);
|
||||
|
||||
const uint4 c = read_imageui(input, srcCoord);
|
||||
|
||||
if(( ulong )(dst + dstOffset) & 0x0000000f){
|
||||
*((__global uchar*)(dst + DstOffset + x * 16 + 3)) = convert_uchar_sat((c.x >> 24 ) & 0xff);
|
||||
*((__global uchar*)(dst + DstOffset + x * 16 + 2)) = convert_uchar_sat((c.x >> 16 ) & 0xff);
|
||||
*((__global uchar*)(dst + DstOffset + x * 16 + 1)) = convert_uchar_sat((c.x >> 8 ) & 0xff);
|
||||
*((__global uchar*)(dst + DstOffset + x * 16)) = convert_uchar_sat(c.x & 0xff);
|
||||
*((__global uchar*)(dst + DstOffset + x * 16 + 7)) = convert_uchar_sat((c.y >> 24 ) & 0xff);
|
||||
*((__global uchar*)(dst + DstOffset + x * 16 + 6)) = convert_uchar_sat((c.y >> 16 ) & 0xff);
|
||||
*((__global uchar*)(dst + DstOffset + x * 16 + 5)) = convert_uchar_sat((c.y >> 8 ) & 0xff);
|
||||
*((__global uchar*)(dst + DstOffset + x * 16 + 4)) = convert_uchar_sat(c.y & 0xff);
|
||||
*((__global uchar*)(dst + DstOffset + x * 16 + 11)) = convert_uchar_sat((c.z >> 24 ) & 0xff);
|
||||
*((__global uchar*)(dst + DstOffset + x * 16 + 10)) = convert_uchar_sat((c.z >> 16 ) & 0xff);
|
||||
*((__global uchar*)(dst + DstOffset + x * 16 + 9)) = convert_uchar_sat((c.z >> 8 ) & 0xff);
|
||||
*((__global uchar*)(dst + DstOffset + x * 16 + 8)) = convert_uchar_sat(c.z & 0xff);
|
||||
*((__global uchar*)(dst + DstOffset + x * 16 + 15)) = convert_uchar_sat((c.w >> 24 ) & 0xff);
|
||||
*((__global uchar*)(dst + DstOffset + x * 16 + 14)) = convert_uchar_sat((c.w >> 16 ) & 0xff);
|
||||
*((__global uchar*)(dst + DstOffset + x * 16 + 13)) = convert_uchar_sat((c.w >> 8 ) & 0xff);
|
||||
*((__global uchar*)(dst + DstOffset + x * 16 + 12)) = convert_uchar_sat(c.w & 0xff);
|
||||
}
|
||||
else{
|
||||
*(__global uint4*)(dst + DstOffset + x * 16) = c;
|
||||
}
|
||||
}
|
||||
)==="
|
||||
@@ -1,139 +0,0 @@
|
||||
/*
|
||||
* Copyright (C) 2017-2020 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
R"===(
|
||||
__kernel void CopyImage3dToBufferBytes(__read_only image3d_t input,
|
||||
__global uchar *dst,
|
||||
int4 srcOffset,
|
||||
ulong dstOffset,
|
||||
ulong2 Pitch) {
|
||||
const uint x = get_global_id(0);
|
||||
const uint y = get_global_id(1);
|
||||
const uint z = get_global_id(2);
|
||||
|
||||
const int4 srcCoord = (int4)(x, y, z, 0) + srcOffset;
|
||||
ulong DstOffset = dstOffset + (y * Pitch.x) + (z * Pitch.y);
|
||||
|
||||
uint4 c = read_imageui(input, srcCoord);
|
||||
*(dst + DstOffset + x) = convert_uchar_sat(c.x);
|
||||
}
|
||||
|
||||
__kernel void CopyImage3dToBuffer2Bytes(__read_only image3d_t input,
|
||||
__global uchar *dst,
|
||||
int4 srcOffset,
|
||||
ulong dstOffset,
|
||||
ulong2 Pitch) {
|
||||
const uint x = get_global_id(0);
|
||||
const uint y = get_global_id(1);
|
||||
const uint z = get_global_id(2);
|
||||
|
||||
const int4 srcCoord = (int4)(x, y, z, 0) + srcOffset;
|
||||
ulong DstOffset = dstOffset + (y * Pitch.x) + (z * Pitch.y);
|
||||
|
||||
uint4 c = read_imageui(input, srcCoord);
|
||||
|
||||
if(( ulong )(dst + dstOffset) & 0x00000001){
|
||||
*((__global uchar*)(dst + DstOffset + x * 2 + 1)) = convert_uchar_sat((c.x >> 8 ) & 0xff);
|
||||
*((__global uchar*)(dst + DstOffset + x * 2)) = convert_uchar_sat(c.x & 0xff);
|
||||
}
|
||||
else{
|
||||
*((__global ushort*)(dst + DstOffset + x * 2)) = convert_ushort_sat(c.x);
|
||||
}
|
||||
}
|
||||
|
||||
__kernel void CopyImage3dToBuffer4Bytes(__read_only image3d_t input,
|
||||
__global uchar *dst,
|
||||
int4 srcOffset,
|
||||
ulong dstOffset,
|
||||
ulong2 Pitch) {
|
||||
const uint x = get_global_id(0);
|
||||
const uint y = get_global_id(1);
|
||||
const uint z = get_global_id(2);
|
||||
|
||||
const int4 srcCoord = (int4)(x, y, z, 0) + srcOffset;
|
||||
ulong DstOffset = dstOffset + (y * Pitch.x) + (z * Pitch.y);
|
||||
|
||||
uint4 c = read_imageui(input, srcCoord);
|
||||
|
||||
if(( ulong )(dst + dstOffset) & 0x00000003){
|
||||
*((__global uchar*)(dst + DstOffset + x * 4 + 3)) = convert_uchar_sat((c.x >> 24 ) & 0xff);
|
||||
*((__global uchar*)(dst + DstOffset + x * 4 + 2)) = convert_uchar_sat((c.x >> 16 ) & 0xff);
|
||||
*((__global uchar*)(dst + DstOffset + x * 4 + 1)) = convert_uchar_sat((c.x >> 8 ) & 0xff);
|
||||
*((__global uchar*)(dst + DstOffset + x * 4)) = convert_uchar_sat(c.x & 0xff);
|
||||
}
|
||||
else{
|
||||
*((__global uint*)(dst + DstOffset + x * 4)) = c.x;
|
||||
}
|
||||
}
|
||||
|
||||
__kernel void CopyImage3dToBuffer8Bytes(__read_only image3d_t input,
|
||||
__global uchar *dst,
|
||||
int4 srcOffset,
|
||||
ulong dstOffset,
|
||||
ulong2 Pitch) {
|
||||
const uint x = get_global_id(0);
|
||||
const uint y = get_global_id(1);
|
||||
const uint z = get_global_id(2);
|
||||
|
||||
const int4 srcCoord = (int4)(x, y, z, 0) + srcOffset;
|
||||
ulong DstOffset = dstOffset + (y * Pitch.x) + (z * Pitch.y);
|
||||
|
||||
uint4 c = read_imageui(input, srcCoord);
|
||||
|
||||
if(( ulong )(dst + dstOffset) & 0x00000007){
|
||||
*((__global uchar*)(dst + DstOffset + x * 8 + 3)) = convert_uchar_sat((c.x >> 24 ) & 0xff);
|
||||
*((__global uchar*)(dst + DstOffset + x * 8 + 2)) = convert_uchar_sat((c.x >> 16 ) & 0xff);
|
||||
*((__global uchar*)(dst + DstOffset + x * 8 + 1)) = convert_uchar_sat((c.x >> 8 ) & 0xff);
|
||||
*((__global uchar*)(dst + DstOffset + x * 8)) = convert_uchar_sat(c.x & 0xff);
|
||||
*((__global uchar*)(dst + DstOffset + x * 8 + 7)) = convert_uchar_sat((c.y >> 24 ) & 0xff);
|
||||
*((__global uchar*)(dst + DstOffset + x * 8 + 6)) = convert_uchar_sat((c.y >> 16 ) & 0xff);
|
||||
*((__global uchar*)(dst + DstOffset + x * 8 + 5)) = convert_uchar_sat((c.y >> 8 ) & 0xff);
|
||||
*((__global uchar*)(dst + DstOffset + x * 8 + 4)) = convert_uchar_sat(c.y & 0xff);
|
||||
}
|
||||
else{
|
||||
uint2 d = (uint2)(c.x,c.y);
|
||||
*((__global uint2*)(dst + DstOffset + x * 8)) = d;
|
||||
}
|
||||
}
|
||||
|
||||
__kernel void CopyImage3dToBuffer16Bytes(__read_only image3d_t input,
|
||||
__global uchar *dst,
|
||||
int4 srcOffset,
|
||||
ulong dstOffset,
|
||||
ulong2 Pitch) {
|
||||
const uint x = get_global_id(0);
|
||||
const uint y = get_global_id(1);
|
||||
const uint z = get_global_id(2);
|
||||
|
||||
const int4 srcCoord = (int4)(x, y, z, 0) + srcOffset;
|
||||
ulong DstOffset = dstOffset + (y * Pitch.x) + (z * Pitch.y);
|
||||
|
||||
const uint4 c = read_imageui(input, srcCoord);
|
||||
|
||||
if(( ulong )(dst + dstOffset) & 0x0000000f){
|
||||
*((__global uchar*)(dst + DstOffset + x * 16 + 3)) = convert_uchar_sat((c.x >> 24 ) & 0xff);
|
||||
*((__global uchar*)(dst + DstOffset + x * 16 + 2)) = convert_uchar_sat((c.x >> 16 ) & 0xff);
|
||||
*((__global uchar*)(dst + DstOffset + x * 16 + 1)) = convert_uchar_sat((c.x >> 8 ) & 0xff);
|
||||
*((__global uchar*)(dst + DstOffset + x * 16)) = convert_uchar_sat(c.x & 0xff);
|
||||
*((__global uchar*)(dst + DstOffset + x * 16 + 7)) = convert_uchar_sat((c.y >> 24 ) & 0xff);
|
||||
*((__global uchar*)(dst + DstOffset + x * 16 + 6)) = convert_uchar_sat((c.y >> 16 ) & 0xff);
|
||||
*((__global uchar*)(dst + DstOffset + x * 16 + 5)) = convert_uchar_sat((c.y >> 8 ) & 0xff);
|
||||
*((__global uchar*)(dst + DstOffset + x * 16 + 4)) = convert_uchar_sat(c.y & 0xff);
|
||||
*((__global uchar*)(dst + DstOffset + x * 16 + 11)) = convert_uchar_sat((c.z >> 24 ) & 0xff);
|
||||
*((__global uchar*)(dst + DstOffset + x * 16 + 10)) = convert_uchar_sat((c.z >> 16 ) & 0xff);
|
||||
*((__global uchar*)(dst + DstOffset + x * 16 + 9)) = convert_uchar_sat((c.z >> 8 ) & 0xff);
|
||||
*((__global uchar*)(dst + DstOffset + x * 16 + 8)) = convert_uchar_sat(c.z & 0xff);
|
||||
*((__global uchar*)(dst + DstOffset + x * 16 + 15)) = convert_uchar_sat((c.w >> 24 ) & 0xff);
|
||||
*((__global uchar*)(dst + DstOffset + x * 16 + 14)) = convert_uchar_sat((c.w >> 16 ) & 0xff);
|
||||
*((__global uchar*)(dst + DstOffset + x * 16 + 13)) = convert_uchar_sat((c.w >> 8 ) & 0xff);
|
||||
*((__global uchar*)(dst + DstOffset + x * 16 + 12)) = convert_uchar_sat(c.w & 0xff);
|
||||
}
|
||||
else{
|
||||
*(__global uint4*)(dst + DstOffset + x * 16) = c;
|
||||
}
|
||||
}
|
||||
)==="
|
||||
@@ -1,21 +0,0 @@
|
||||
/*
|
||||
* Copyright (C) 2017-2020 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
R"===(
|
||||
__kernel void CopyImageToImage1d(
|
||||
__read_only image1d_t input,
|
||||
__write_only image1d_t output,
|
||||
int4 srcOffset,
|
||||
int4 dstOffset) {
|
||||
const int x = get_global_id(0);
|
||||
|
||||
const int srcCoord = x + srcOffset.x;
|
||||
const int dstCoord = x + dstOffset.x;
|
||||
const uint4 c = read_imageui(input, srcCoord);
|
||||
write_imageui(output, dstCoord, c);
|
||||
}
|
||||
)==="
|
||||
@@ -1,22 +0,0 @@
|
||||
/*
|
||||
* Copyright (C) 2017-2020 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
R"===(
|
||||
__kernel void CopyImageToImage2d(
|
||||
__read_only image2d_t input,
|
||||
__write_only image2d_t output,
|
||||
int4 srcOffset,
|
||||
int4 dstOffset) {
|
||||
const int x = get_global_id(0);
|
||||
const int y = get_global_id(1);
|
||||
|
||||
const int2 srcCoord = (int2)(x, y) + (int2)(srcOffset.x, srcOffset.y);
|
||||
const int2 dstCoord = (int2)(x, y) + (int2)(dstOffset.x, dstOffset.y);
|
||||
const uint4 c = read_imageui(input, srcCoord);
|
||||
write_imageui(output, dstCoord, c);
|
||||
}
|
||||
)==="
|
||||
@@ -1,25 +0,0 @@
|
||||
/*
|
||||
* Copyright (C) 2017-2020 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
R"===(
|
||||
#pragma OPENCL EXTENSION cl_khr_3d_image_writes : enable
|
||||
|
||||
__kernel void CopyImageToImage3d(
|
||||
__read_only image3d_t input,
|
||||
__write_only image3d_t output,
|
||||
int4 srcOffset,
|
||||
int4 dstOffset) {
|
||||
const int x = get_global_id(0);
|
||||
const int y = get_global_id(1);
|
||||
const int z = get_global_id(2);
|
||||
|
||||
const int4 srcCoord = (int4)(x, y, z, 0) + srcOffset;
|
||||
const int4 dstCoord = (int4)(x, y, z, 0) + dstOffset;
|
||||
const uint4 c = read_imageui(input, srcCoord);
|
||||
write_imageui(output, dstCoord, c);
|
||||
}
|
||||
)==="
|
||||
@@ -1,49 +0,0 @@
|
||||
/*
|
||||
* Copyright (C) 2017-2020 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
R"===(
|
||||
// assumption is local work size = pattern size
|
||||
__kernel void FillBufferBytes(
|
||||
__global uchar* pDst,
|
||||
uint dstOffsetInBytes,
|
||||
const __global uchar* pPattern )
|
||||
{
|
||||
uint dstIndex = get_global_id(0) + dstOffsetInBytes;
|
||||
uint srcIndex = get_local_id(0);
|
||||
pDst[dstIndex] = pPattern[srcIndex];
|
||||
}
|
||||
|
||||
__kernel void FillBufferLeftLeftover(
|
||||
__global uchar* pDst,
|
||||
uint dstOffsetInBytes,
|
||||
const __global uchar* pPattern,
|
||||
const uint patternSizeInEls )
|
||||
{
|
||||
uint gid = get_global_id(0);
|
||||
pDst[ gid + dstOffsetInBytes ] = pPattern[ gid & (patternSizeInEls - 1) ];
|
||||
}
|
||||
|
||||
__kernel void FillBufferMiddle(
|
||||
__global uchar* pDst,
|
||||
uint dstOffsetInBytes,
|
||||
const __global uint* pPattern,
|
||||
const uint patternSizeInEls )
|
||||
{
|
||||
uint gid = get_global_id(0);
|
||||
((__global uint*)(pDst + dstOffsetInBytes))[gid] = pPattern[ gid & (patternSizeInEls - 1) ];
|
||||
}
|
||||
|
||||
__kernel void FillBufferRightLeftover(
|
||||
__global uchar* pDst,
|
||||
uint dstOffsetInBytes,
|
||||
const __global uchar* pPattern,
|
||||
const uint patternSizeInEls )
|
||||
{
|
||||
uint gid = get_global_id(0);
|
||||
pDst[ gid + dstOffsetInBytes ] = pPattern[ gid & (patternSizeInEls - 1) ];
|
||||
}
|
||||
)==="
|
||||
@@ -1,49 +0,0 @@
|
||||
/*
|
||||
* Copyright (C) 2019-2020 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
R"===(
|
||||
// assumption is local work size = pattern size
|
||||
__kernel void FillBufferBytes(
|
||||
__global uchar* pDst,
|
||||
ulong dstOffsetInBytes,
|
||||
const __global uchar* pPattern )
|
||||
{
|
||||
size_t dstIndex = get_global_id(0) + dstOffsetInBytes;
|
||||
size_t srcIndex = get_local_id(0);
|
||||
pDst[dstIndex] = pPattern[srcIndex];
|
||||
}
|
||||
|
||||
__kernel void FillBufferLeftLeftover(
|
||||
__global uchar* pDst,
|
||||
ulong dstOffsetInBytes,
|
||||
const __global uchar* pPattern,
|
||||
const ulong patternSizeInEls )
|
||||
{
|
||||
size_t gid = get_global_id(0);
|
||||
pDst[ gid + dstOffsetInBytes ] = pPattern[ gid & (patternSizeInEls - 1) ];
|
||||
}
|
||||
|
||||
__kernel void FillBufferMiddle(
|
||||
__global uchar* pDst,
|
||||
ulong dstOffsetInBytes,
|
||||
const __global uint* pPattern,
|
||||
const ulong patternSizeInEls )
|
||||
{
|
||||
size_t gid = get_global_id(0);
|
||||
((__global uint*)(pDst + dstOffsetInBytes))[gid] = pPattern[ gid & (patternSizeInEls - 1) ];
|
||||
}
|
||||
|
||||
__kernel void FillBufferRightLeftover(
|
||||
__global uchar* pDst,
|
||||
ulong dstOffsetInBytes,
|
||||
const __global uchar* pPattern,
|
||||
const ulong patternSizeInEls )
|
||||
{
|
||||
size_t gid = get_global_id(0);
|
||||
pDst[ gid + dstOffsetInBytes ] = pPattern[ gid & (patternSizeInEls - 1) ];
|
||||
}
|
||||
)==="
|
||||
@@ -1,18 +0,0 @@
|
||||
/*
|
||||
* Copyright (C) 2017-2020 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
R"===(
|
||||
__kernel void FillImage1d(
|
||||
__write_only image1d_t output,
|
||||
uint4 color,
|
||||
int4 dstOffset) {
|
||||
const int x = get_global_id(0);
|
||||
|
||||
const int dstCoord = x + dstOffset.x;
|
||||
write_imageui(output, dstCoord, color);
|
||||
}
|
||||
)==="
|
||||
@@ -1,19 +0,0 @@
|
||||
/*
|
||||
* Copyright (C) 2017-2020 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
R"===(
|
||||
__kernel void FillImage2d(
|
||||
__write_only image2d_t output,
|
||||
uint4 color,
|
||||
int4 dstOffset) {
|
||||
const int x = get_global_id(0);
|
||||
const int y = get_global_id(1);
|
||||
|
||||
const int2 dstCoord = (int2)(x, y) + (int2)(dstOffset.x, dstOffset.y);
|
||||
write_imageui(output, dstCoord, color);
|
||||
}
|
||||
)==="
|
||||
@@ -1,22 +0,0 @@
|
||||
/*
|
||||
* Copyright (C) 2017-2020 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
R"===(
|
||||
#pragma OPENCL EXTENSION cl_khr_3d_image_writes : enable
|
||||
|
||||
__kernel void FillImage3d(
|
||||
__write_only image3d_t output,
|
||||
uint4 color,
|
||||
int4 dstOffset) {
|
||||
const int x = get_global_id(0);
|
||||
const int y = get_global_id(1);
|
||||
const int z = get_global_id(2);
|
||||
|
||||
const int4 dstCoord = (int4)(x, y, z, 0) + dstOffset;
|
||||
write_imageui(output, dstCoord, color);
|
||||
}
|
||||
)==="
|
||||
@@ -4,26 +4,6 @@
|
||||
# SPDX-License-Identifier: MIT
|
||||
#
|
||||
|
||||
add_library(${BUILTINS_SOURCES_LIB_NAME} OBJECT EXCLUDE_FROM_ALL
|
||||
CMakeLists.txt
|
||||
built_ins_registry.h
|
||||
register_copy_kernels_source.cpp
|
||||
)
|
||||
|
||||
set_target_properties(${BUILTINS_SOURCES_LIB_NAME} PROPERTIES POSITION_INDEPENDENT_CODE ON)
|
||||
set_target_properties(${BUILTINS_SOURCES_LIB_NAME} PROPERTIES FOLDER "built_ins")
|
||||
|
||||
target_include_directories(${BUILTINS_SOURCES_LIB_NAME} PRIVATE
|
||||
${ENGINE_NODE_DIR}
|
||||
${KHRONOS_HEADERS_DIR}
|
||||
${KHRONOS_GL_HEADERS_DIR}
|
||||
${NEO__GMM_INCLUDE_DIR}
|
||||
${NEO__IGC_INCLUDE_DIR}
|
||||
${THIRD_PARTY_DIR}
|
||||
)
|
||||
|
||||
add_subdirectories()
|
||||
|
||||
add_library(${BUILTINS_VME_LIB_NAME} OBJECT EXCLUDE_FROM_ALL
|
||||
CMakeLists.txt
|
||||
register_ext_vme_source.cpp
|
||||
|
||||
@@ -1,28 +0,0 @@
|
||||
/*
|
||||
* Copyright (C) 2017-2020 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "opencl/source/built_ins/built_ins.h"
|
||||
|
||||
#include <string>
|
||||
#include <unordered_map>
|
||||
|
||||
namespace NEO {
|
||||
|
||||
struct RegisterEmbeddedResource {
|
||||
RegisterEmbeddedResource(const char *name, const char *resource, size_t resourceLength) {
|
||||
auto &storageRegistry = EmbeddedStorageRegistry::getInstance();
|
||||
storageRegistry.store(name, createBuiltinResource(resource, resourceLength));
|
||||
}
|
||||
|
||||
RegisterEmbeddedResource(const char *name, std::string &&resource)
|
||||
: RegisterEmbeddedResource(name, resource.data(), resource.size() + 1) {
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace NEO
|
||||
@@ -1,167 +0,0 @@
|
||||
/*
|
||||
* Copyright (C) 2017-2020 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#include "opencl/source/built_ins/registry/built_ins_registry.h"
|
||||
|
||||
#include <string>
|
||||
|
||||
namespace NEO {
|
||||
|
||||
static RegisterEmbeddedResource registerCopyBufferToBufferSrc(
|
||||
createBuiltinResourceName(
|
||||
EBuiltInOps::CopyBufferToBuffer,
|
||||
BuiltinCode::getExtension(BuiltinCode::ECodeType::Source))
|
||||
.c_str(),
|
||||
std::string(
|
||||
#include "opencl/source/built_ins/kernels/copy_buffer_to_buffer.builtin_kernel"
|
||||
));
|
||||
|
||||
static RegisterEmbeddedResource registerCopyBufferToBufferStatelessSrc(
|
||||
createBuiltinResourceName(
|
||||
EBuiltInOps::CopyBufferToBufferStateless,
|
||||
BuiltinCode::getExtension(BuiltinCode::ECodeType::Source))
|
||||
.c_str(),
|
||||
std::string(
|
||||
#include "opencl/source/built_ins/kernels/copy_buffer_to_buffer_stateless.builtin_kernel"
|
||||
));
|
||||
|
||||
static RegisterEmbeddedResource registerCopyBufferRectSrc(
|
||||
createBuiltinResourceName(
|
||||
EBuiltInOps::CopyBufferRect,
|
||||
BuiltinCode::getExtension(BuiltinCode::ECodeType::Source))
|
||||
.c_str(),
|
||||
std::string(
|
||||
#include "opencl/source/built_ins/kernels/copy_buffer_rect.builtin_kernel"
|
||||
));
|
||||
|
||||
static RegisterEmbeddedResource registerCopyBufferRectStatelessSrc(
|
||||
createBuiltinResourceName(
|
||||
EBuiltInOps::CopyBufferRectStateless,
|
||||
BuiltinCode::getExtension(BuiltinCode::ECodeType::Source))
|
||||
.c_str(),
|
||||
std::string(
|
||||
#include "opencl/source/built_ins/kernels/copy_buffer_rect_stateless.builtin_kernel"
|
||||
));
|
||||
|
||||
static RegisterEmbeddedResource registerFillBufferSrc(
|
||||
createBuiltinResourceName(
|
||||
EBuiltInOps::FillBuffer,
|
||||
BuiltinCode::getExtension(BuiltinCode::ECodeType::Source))
|
||||
.c_str(),
|
||||
std::string(
|
||||
#include "opencl/source/built_ins/kernels/fill_buffer.builtin_kernel"
|
||||
));
|
||||
|
||||
static RegisterEmbeddedResource registerFillBufferStatelessSrc(
|
||||
createBuiltinResourceName(
|
||||
EBuiltInOps::FillBufferStateless,
|
||||
BuiltinCode::getExtension(BuiltinCode::ECodeType::Source))
|
||||
.c_str(),
|
||||
std::string(
|
||||
#include "opencl/source/built_ins/kernels/fill_buffer_stateless.builtin_kernel"
|
||||
));
|
||||
|
||||
static RegisterEmbeddedResource registerCopyBufferToImage3dSrc(
|
||||
createBuiltinResourceName(
|
||||
EBuiltInOps::CopyBufferToImage3d,
|
||||
BuiltinCode::getExtension(BuiltinCode::ECodeType::Source))
|
||||
.c_str(),
|
||||
std::string(
|
||||
#include "opencl/source/built_ins/kernels/copy_buffer_to_image3d.builtin_kernel"
|
||||
));
|
||||
|
||||
static RegisterEmbeddedResource registerCopyBufferToImage3dStatelessSrc(
|
||||
createBuiltinResourceName(
|
||||
EBuiltInOps::CopyBufferToImage3dStateless,
|
||||
BuiltinCode::getExtension(BuiltinCode::ECodeType::Source))
|
||||
.c_str(),
|
||||
std::string(
|
||||
#include "opencl/source/built_ins/kernels/copy_buffer_to_image3d_stateless.builtin_kernel"
|
||||
));
|
||||
|
||||
static RegisterEmbeddedResource registerCopyImage3dToBufferSrc(
|
||||
createBuiltinResourceName(
|
||||
EBuiltInOps::CopyImage3dToBuffer,
|
||||
BuiltinCode::getExtension(BuiltinCode::ECodeType::Source))
|
||||
.c_str(),
|
||||
std::string(
|
||||
#include "opencl/source/built_ins/kernels/copy_image3d_to_buffer.builtin_kernel"
|
||||
));
|
||||
|
||||
static RegisterEmbeddedResource registerCopyImage3dToBufferStatelessSrc(
|
||||
createBuiltinResourceName(
|
||||
EBuiltInOps::CopyImage3dToBufferStateless,
|
||||
BuiltinCode::getExtension(BuiltinCode::ECodeType::Source))
|
||||
.c_str(),
|
||||
std::string(
|
||||
#include "opencl/source/built_ins/kernels/copy_image3d_to_buffer_stateless.builtin_kernel"
|
||||
));
|
||||
|
||||
static RegisterEmbeddedResource registerCopyImageToImage1dSrc(
|
||||
createBuiltinResourceName(
|
||||
EBuiltInOps::CopyImageToImage1d,
|
||||
BuiltinCode::getExtension(BuiltinCode::ECodeType::Source))
|
||||
.c_str(),
|
||||
std::string(
|
||||
#include "opencl/source/built_ins/kernels/copy_image_to_image1d.builtin_kernel"
|
||||
));
|
||||
|
||||
static RegisterEmbeddedResource registerCopyImageToImage2dSrc(
|
||||
createBuiltinResourceName(
|
||||
EBuiltInOps::CopyImageToImage2d,
|
||||
BuiltinCode::getExtension(BuiltinCode::ECodeType::Source))
|
||||
.c_str(),
|
||||
std::string(
|
||||
#include "opencl/source/built_ins/kernels/copy_image_to_image2d.builtin_kernel"
|
||||
));
|
||||
|
||||
static RegisterEmbeddedResource registerCopyImageToImage3dSrc(
|
||||
createBuiltinResourceName(
|
||||
EBuiltInOps::CopyImageToImage3d,
|
||||
BuiltinCode::getExtension(BuiltinCode::ECodeType::Source))
|
||||
.c_str(),
|
||||
std::string(
|
||||
#include "opencl/source/built_ins/kernels/copy_image_to_image3d.builtin_kernel"
|
||||
));
|
||||
|
||||
static RegisterEmbeddedResource registerFillImage1dSrc(
|
||||
createBuiltinResourceName(
|
||||
EBuiltInOps::FillImage1d,
|
||||
BuiltinCode::getExtension(BuiltinCode::ECodeType::Source))
|
||||
.c_str(),
|
||||
std::string(
|
||||
#include "opencl/source/built_ins/kernels/fill_image1d.builtin_kernel"
|
||||
));
|
||||
|
||||
static RegisterEmbeddedResource registerFillImage2dSrc(
|
||||
createBuiltinResourceName(
|
||||
EBuiltInOps::FillImage2d,
|
||||
BuiltinCode::getExtension(BuiltinCode::ECodeType::Source))
|
||||
.c_str(),
|
||||
std::string(
|
||||
#include "opencl/source/built_ins/kernels/fill_image2d.builtin_kernel"
|
||||
));
|
||||
|
||||
static RegisterEmbeddedResource registerFillImage3dSrc(
|
||||
createBuiltinResourceName(
|
||||
EBuiltInOps::FillImage3d,
|
||||
BuiltinCode::getExtension(BuiltinCode::ECodeType::Source))
|
||||
.c_str(),
|
||||
std::string(
|
||||
#include "opencl/source/built_ins/kernels/fill_image3d.builtin_kernel"
|
||||
));
|
||||
|
||||
static RegisterEmbeddedResource registerAuxTranslationSrc(
|
||||
createBuiltinResourceName(
|
||||
EBuiltInOps::AuxTranslation,
|
||||
BuiltinCode::getExtension(BuiltinCode::ECodeType::Source))
|
||||
.c_str(),
|
||||
std::string(
|
||||
#include "opencl/source/built_ins/kernels/aux_translation.builtin_kernel"
|
||||
));
|
||||
|
||||
} // namespace NEO
|
||||
@@ -5,8 +5,9 @@
|
||||
*
|
||||
*/
|
||||
|
||||
#include "shared/source/built_ins/registry/built_ins_registry.h"
|
||||
|
||||
#include "opencl/source/built_ins/built_in_ops_vme.h"
|
||||
#include "opencl/source/built_ins/registry/built_ins_registry.h"
|
||||
|
||||
#include <string>
|
||||
|
||||
|
||||
@@ -1,102 +0,0 @@
|
||||
/*
|
||||
* Copyright (C) 2017-2020 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#include "opencl/source/built_ins/sip.h"
|
||||
|
||||
#include "shared/source/device/device.h"
|
||||
#include "shared/source/execution_environment/execution_environment.h"
|
||||
#include "shared/source/helpers/debug_helpers.h"
|
||||
#include "shared/source/helpers/hw_helper.h"
|
||||
#include "shared/source/helpers/ptr_math.h"
|
||||
#include "shared/source/helpers/string.h"
|
||||
#include "shared/source/memory_manager/graphics_allocation.h"
|
||||
|
||||
#include "opencl/source/built_ins/built_ins.h"
|
||||
#include "opencl/source/program/kernel_info.h"
|
||||
#include "opencl/source/program/program.h"
|
||||
|
||||
namespace NEO {
|
||||
|
||||
const size_t SipKernel::maxDbgSurfaceSize = 0x49c000; // proper value should be taken from compiler when it's ready
|
||||
|
||||
const char *getSipKernelCompilerInternalOptions(SipKernelType kernel) {
|
||||
switch (kernel) {
|
||||
default:
|
||||
DEBUG_BREAK_IF(true);
|
||||
return "";
|
||||
case SipKernelType::Csr:
|
||||
return "-cl-include-sip-csr";
|
||||
case SipKernelType::DbgCsr:
|
||||
return "-cl-include-sip-kernel-debug -cl-include-sip-csr -cl-set-bti:0";
|
||||
case SipKernelType::DbgCsrLocal:
|
||||
return "-cl-include-sip-kernel-local-debug -cl-include-sip-csr -cl-set-bti:0";
|
||||
}
|
||||
}
|
||||
|
||||
const char *getSipLlSrc(const Device &device) {
|
||||
#define M_DUMMY_LL_SRC \
|
||||
"define void @f() { \n" \
|
||||
" ret void \n" \
|
||||
"} \n" \
|
||||
"!opencl.compiler.options = !{!0} \n" \
|
||||
"!opencl.kernels = !{!1} \n" \
|
||||
"!0 = !{} \n" \
|
||||
"!1 = !{void()* @f, !2, !3, !4, !5, !6, !7} \n" \
|
||||
"!2 = !{!\"kernel_arg_addr_space\"} \n" \
|
||||
"!3 = !{!\"kernel_arg_access_qual\"} \n" \
|
||||
"!4 = !{!\"kernel_arg_type\"} \n" \
|
||||
"!5 = !{!\"kernel_arg_type_qual\"} \n" \
|
||||
"!6 = !{!\"kernel_arg_base_type\"} \n" \
|
||||
"!7 = !{!\"kernel_arg_name\"} \n"
|
||||
|
||||
constexpr const char *llDummySrc32 =
|
||||
"target datalayout = \"e-p:32:32:32\" \n"
|
||||
"target triple = \"spir\" \n" M_DUMMY_LL_SRC;
|
||||
|
||||
constexpr const char *llDummySrc64 =
|
||||
"target datalayout = \"e-p:64:64:64\" \n"
|
||||
"target triple = \"spir64\" \n" M_DUMMY_LL_SRC;
|
||||
|
||||
#undef M_DUMMY_LL_SRC
|
||||
|
||||
const uint32_t ptrSize = device.getDeviceInfo().force32BitAddressess ? 4 : sizeof(void *);
|
||||
|
||||
return (ptrSize == 8) ? llDummySrc64 : llDummySrc32;
|
||||
}
|
||||
|
||||
SipKernel::SipKernel(SipKernelType type, Program *sipProgram)
|
||||
: type(type) {
|
||||
program = sipProgram;
|
||||
}
|
||||
SipKernel::~SipKernel() {
|
||||
program->release();
|
||||
}
|
||||
|
||||
GraphicsAllocation *SipKernel::getSipAllocation() const {
|
||||
return program->getKernelInfo(size_t{0})->getGraphicsAllocation();
|
||||
}
|
||||
|
||||
const char *SipKernel::getBinary() const {
|
||||
auto kernelInfo = program->getKernelInfo(size_t{0});
|
||||
return reinterpret_cast<const char *>(ptrOffset(kernelInfo->heapInfo.pKernelHeap, kernelInfo->systemKernelOffset));
|
||||
}
|
||||
size_t SipKernel::getBinarySize() const {
|
||||
auto kernelInfo = program->getKernelInfo(size_t{0});
|
||||
return kernelInfo->heapInfo.pKernelHeader->KernelHeapSize - kernelInfo->systemKernelOffset;
|
||||
}
|
||||
|
||||
SipKernelType SipKernel::getSipKernelType(GFXCORE_FAMILY family, bool debuggingActive) {
|
||||
auto &hwHelper = HwHelper::get(family);
|
||||
return hwHelper.getSipKernelType(debuggingActive);
|
||||
}
|
||||
|
||||
GraphicsAllocation *SipKernel::getSipKernelAllocation(Device &device) {
|
||||
auto sipType = SipKernel::getSipKernelType(device.getHardwareInfo().platform.eRenderCoreFamily, device.isDebuggerActive());
|
||||
return device.getExecutionEnvironment()->getBuiltIns()->getSipKernel(sipType, device).getSipAllocation();
|
||||
}
|
||||
|
||||
} // namespace NEO
|
||||
@@ -1,51 +0,0 @@
|
||||
/*
|
||||
* Copyright (C) 2017-2020 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
#include "shared/source/built_ins/sip_kernel_type.h"
|
||||
#include "shared/source/helpers/hw_info.h"
|
||||
|
||||
#include <memory>
|
||||
|
||||
namespace NEO {
|
||||
|
||||
class Device;
|
||||
class Program;
|
||||
class GraphicsAllocation;
|
||||
|
||||
const char *getSipKernelCompilerInternalOptions(SipKernelType kernel);
|
||||
|
||||
const char *getSipLlSrc(const Device &device);
|
||||
|
||||
class SipKernel {
|
||||
public:
|
||||
SipKernel(SipKernelType type, Program *sipProgram);
|
||||
SipKernel(const SipKernel &) = delete;
|
||||
SipKernel &operator=(const SipKernel &) = delete;
|
||||
SipKernel(SipKernel &&) = default;
|
||||
SipKernel &operator=(SipKernel &&) = default;
|
||||
virtual ~SipKernel();
|
||||
|
||||
const char *getBinary() const;
|
||||
|
||||
size_t getBinarySize() const;
|
||||
|
||||
SipKernelType getType() const {
|
||||
return type;
|
||||
}
|
||||
|
||||
static const size_t maxDbgSurfaceSize;
|
||||
|
||||
MOCKABLE_VIRTUAL GraphicsAllocation *getSipAllocation() const;
|
||||
static SipKernelType getSipKernelType(GFXCORE_FAMILY family, bool debuggingActive);
|
||||
static GraphicsAllocation *getSipKernelAllocation(Device &device);
|
||||
|
||||
protected:
|
||||
SipKernelType type = SipKernelType::COUNT;
|
||||
Program *program = nullptr;
|
||||
};
|
||||
} // namespace NEO
|
||||
@@ -5,7 +5,8 @@
|
||||
*
|
||||
*/
|
||||
|
||||
#include "opencl/source/built_ins/built_ins.h"
|
||||
#include "shared/source/built_ins/built_ins.h"
|
||||
|
||||
#include "opencl/source/built_ins/builtins_dispatch_builder.h"
|
||||
|
||||
namespace NEO {
|
||||
|
||||
@@ -7,10 +7,10 @@
|
||||
|
||||
#include "opencl/source/built_ins/vme_builtin.h"
|
||||
|
||||
#include "shared/source/built_ins/built_ins.h"
|
||||
#include "shared/source/device/device.h"
|
||||
|
||||
#include "opencl/source/built_ins/built_in_ops_vme.h"
|
||||
#include "opencl/source/built_ins/built_ins.h"
|
||||
#include "opencl/source/built_ins/builtins_dispatch_builder.h"
|
||||
#include "opencl/source/built_ins/populate_built_ins.inl"
|
||||
#include "opencl/source/built_ins/vme_dispatch_builder.h"
|
||||
|
||||
@@ -7,10 +7,11 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "shared/source/built_ins/built_ins.h"
|
||||
|
||||
#include "opencl/source/accelerators/intel_accelerator.h"
|
||||
#include "opencl/source/accelerators/intel_motion_estimation.h"
|
||||
#include "opencl/source/built_ins/built_in_ops_vme.h"
|
||||
#include "opencl/source/built_ins/built_ins.h"
|
||||
#include "opencl/source/built_ins/builtins_dispatch_builder.h"
|
||||
#include "opencl/source/helpers/dispatch_info_builder.h"
|
||||
#include "opencl/source/mem_obj/buffer.h"
|
||||
|
||||
@@ -6,6 +6,7 @@
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
#include "shared/source/built_ins/built_ins.h"
|
||||
#include "shared/source/command_stream/command_stream_receiver.h"
|
||||
#include "shared/source/helpers/array_count.h"
|
||||
#include "shared/source/helpers/engine_node_helper.h"
|
||||
@@ -17,7 +18,6 @@
|
||||
#include "shared/source/utilities/range.h"
|
||||
#include "shared/source/utilities/tag_allocator.h"
|
||||
|
||||
#include "opencl/source/built_ins/built_ins.h"
|
||||
#include "opencl/source/built_ins/builtins_dispatch_builder.h"
|
||||
#include "opencl/source/builtin_kernels_simulation/scheduler_simulation.h"
|
||||
#include "opencl/source/command_queue/command_queue_hw.h"
|
||||
|
||||
@@ -6,9 +6,9 @@
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
#include "shared/source/built_ins/built_ins.h"
|
||||
#include "shared/source/command_stream/command_stream_receiver.h"
|
||||
|
||||
#include "opencl/source/built_ins/built_ins.h"
|
||||
#include "opencl/source/command_queue/command_queue_hw.h"
|
||||
#include "opencl/source/command_queue/enqueue_common.h"
|
||||
#include "opencl/source/helpers/hardware_commands_helper.h"
|
||||
|
||||
@@ -6,9 +6,9 @@
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
#include "shared/source/built_ins/built_ins.h"
|
||||
#include "shared/source/command_stream/command_stream_receiver.h"
|
||||
|
||||
#include "opencl/source/built_ins/built_ins.h"
|
||||
#include "opencl/source/command_queue/command_queue_hw.h"
|
||||
#include "opencl/source/helpers/hardware_commands_helper.h"
|
||||
#include "opencl/source/mem_obj/buffer.h"
|
||||
|
||||
@@ -6,9 +6,9 @@
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
#include "shared/source/built_ins/built_ins.h"
|
||||
#include "shared/source/command_stream/command_stream_receiver.h"
|
||||
|
||||
#include "opencl/source/built_ins/built_ins.h"
|
||||
#include "opencl/source/command_queue/command_queue_hw.h"
|
||||
#include "opencl/source/helpers/hardware_commands_helper.h"
|
||||
#include "opencl/source/helpers/mipmap.h"
|
||||
|
||||
@@ -6,10 +6,10 @@
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
#include "shared/source/built_ins/built_ins.h"
|
||||
#include "shared/source/command_stream/command_stream_receiver.h"
|
||||
#include "shared/source/helpers/basic_math.h"
|
||||
|
||||
#include "opencl/source/built_ins/built_ins.h"
|
||||
#include "opencl/source/command_queue/command_queue_hw.h"
|
||||
#include "opencl/source/helpers/hardware_commands_helper.h"
|
||||
#include "opencl/source/helpers/mipmap.h"
|
||||
|
||||
@@ -6,9 +6,9 @@
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
#include "shared/source/built_ins/built_ins.h"
|
||||
#include "shared/source/command_stream/command_stream_receiver.h"
|
||||
|
||||
#include "opencl/source/built_ins/built_ins.h"
|
||||
#include "opencl/source/command_queue/command_queue_hw.h"
|
||||
#include "opencl/source/helpers/hardware_commands_helper.h"
|
||||
#include "opencl/source/helpers/mipmap.h"
|
||||
|
||||
@@ -6,11 +6,11 @@
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
#include "shared/source/built_ins/built_ins.h"
|
||||
#include "shared/source/command_stream/command_stream_receiver.h"
|
||||
#include "shared/source/memory_manager/internal_allocation_storage.h"
|
||||
#include "shared/source/memory_manager/memory_manager.h"
|
||||
|
||||
#include "opencl/source/built_ins/built_ins.h"
|
||||
#include "opencl/source/command_queue/command_queue_hw.h"
|
||||
#include "opencl/source/helpers/hardware_commands_helper.h"
|
||||
#include "opencl/source/mem_obj/buffer.h"
|
||||
|
||||
@@ -6,10 +6,10 @@
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
#include "shared/source/built_ins/built_ins.h"
|
||||
#include "shared/source/command_stream/command_stream_receiver.h"
|
||||
#include "shared/source/helpers/basic_math.h"
|
||||
|
||||
#include "opencl/source/built_ins/built_ins.h"
|
||||
#include "opencl/source/command_queue/command_queue_hw.h"
|
||||
#include "opencl/source/helpers/hardware_commands_helper.h"
|
||||
#include "opencl/source/mem_obj/image.h"
|
||||
|
||||
@@ -6,11 +6,11 @@
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
#include "shared/source/built_ins/built_ins.h"
|
||||
#include "shared/source/command_stream/command_stream_receiver.h"
|
||||
#include "shared/source/helpers/cache_policy.h"
|
||||
#include "shared/source/memory_manager/unified_memory_manager.h"
|
||||
|
||||
#include "opencl/source/built_ins/built_ins.h"
|
||||
#include "opencl/source/command_queue/command_queue_hw.h"
|
||||
#include "opencl/source/command_queue/enqueue_common.h"
|
||||
#include "opencl/source/helpers/hardware_commands_helper.h"
|
||||
|
||||
@@ -6,9 +6,9 @@
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
#include "shared/source/built_ins/built_ins.h"
|
||||
#include "shared/source/command_stream/command_stream_receiver.h"
|
||||
|
||||
#include "opencl/source/built_ins/built_ins.h"
|
||||
#include "opencl/source/command_queue/command_queue_hw.h"
|
||||
#include "opencl/source/command_queue/enqueue_common.h"
|
||||
#include "opencl/source/helpers/hardware_commands_helper.h"
|
||||
|
||||
@@ -6,12 +6,12 @@
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
#include "shared/source/built_ins/built_ins.h"
|
||||
#include "shared/source/command_stream/command_stream_receiver.h"
|
||||
#include "shared/source/helpers/basic_math.h"
|
||||
#include "shared/source/helpers/cache_policy.h"
|
||||
#include "shared/source/memory_manager/graphics_allocation.h"
|
||||
|
||||
#include "opencl/source/built_ins/built_ins.h"
|
||||
#include "opencl/source/command_queue/command_queue_hw.h"
|
||||
#include "opencl/source/context/context.h"
|
||||
#include "opencl/source/event/event.h"
|
||||
|
||||
@@ -6,10 +6,10 @@
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
#include "shared/source/built_ins/built_ins.h"
|
||||
#include "shared/source/memory_manager/surface.h"
|
||||
#include "shared/source/memory_manager/unified_memory_manager.h"
|
||||
|
||||
#include "opencl/source/built_ins/built_ins.h"
|
||||
#include "opencl/source/command_queue/command_queue_hw.h"
|
||||
#include "opencl/source/command_queue/enqueue_common.h"
|
||||
#include "opencl/source/event/event.h"
|
||||
|
||||
@@ -6,11 +6,11 @@
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
#include "shared/source/built_ins/built_ins.h"
|
||||
#include "shared/source/command_stream/command_stream_receiver.h"
|
||||
#include "shared/source/helpers/string.h"
|
||||
#include "shared/source/memory_manager/unified_memory_manager.h"
|
||||
|
||||
#include "opencl/source/built_ins/built_ins.h"
|
||||
#include "opencl/source/command_queue/command_queue_hw.h"
|
||||
#include "opencl/source/helpers/hardware_commands_helper.h"
|
||||
#include "opencl/source/mem_obj/buffer.h"
|
||||
|
||||
@@ -6,9 +6,9 @@
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
#include "shared/source/built_ins/built_ins.h"
|
||||
#include "shared/source/command_stream/command_stream_receiver.h"
|
||||
|
||||
#include "opencl/source/built_ins/built_ins.h"
|
||||
#include "opencl/source/command_queue/command_queue_hw.h"
|
||||
#include "opencl/source/helpers/hardware_commands_helper.h"
|
||||
#include "opencl/source/mem_obj/buffer.h"
|
||||
|
||||
@@ -6,11 +6,11 @@
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
#include "shared/source/built_ins/built_ins.h"
|
||||
#include "shared/source/command_stream/command_stream_receiver.h"
|
||||
#include "shared/source/helpers/basic_math.h"
|
||||
#include "shared/source/memory_manager/graphics_allocation.h"
|
||||
|
||||
#include "opencl/source/built_ins/built_ins.h"
|
||||
#include "opencl/source/command_queue/command_queue_hw.h"
|
||||
#include "opencl/source/helpers/hardware_commands_helper.h"
|
||||
#include "opencl/source/helpers/mipmap.h"
|
||||
|
||||
@@ -7,6 +7,7 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "shared/source/built_ins/built_ins.h"
|
||||
#include "shared/source/command_stream/linear_stream.h"
|
||||
#include "shared/source/command_stream/preemption.h"
|
||||
#include "shared/source/helpers/register_offsets.h"
|
||||
@@ -15,7 +16,6 @@
|
||||
#include "shared/source/indirect_heap/indirect_heap.h"
|
||||
#include "shared/source/utilities/tag_allocator.h"
|
||||
|
||||
#include "opencl/source/built_ins/built_ins.h"
|
||||
#include "opencl/source/command_queue/command_queue.h"
|
||||
#include "opencl/source/context/context.h"
|
||||
#include "opencl/source/device_queue/device_queue_hw.h"
|
||||
|
||||
@@ -7,6 +7,7 @@
|
||||
|
||||
#include "opencl/source/context/context.h"
|
||||
|
||||
#include "shared/source/built_ins/built_ins.h"
|
||||
#include "shared/source/command_stream/command_stream_receiver.h"
|
||||
#include "shared/source/compiler_interface/compiler_interface.h"
|
||||
#include "shared/source/debug_settings/debug_settings_manager.h"
|
||||
@@ -17,7 +18,6 @@
|
||||
#include "shared/source/memory_manager/memory_manager.h"
|
||||
#include "shared/source/memory_manager/unified_memory_manager.h"
|
||||
|
||||
#include "opencl/source/built_ins/built_ins.h"
|
||||
#include "opencl/source/command_queue/command_queue.h"
|
||||
#include "opencl/source/device/cl_device.h"
|
||||
#include "opencl/source/device_queue/device_queue.h"
|
||||
|
||||
@@ -7,10 +7,10 @@
|
||||
|
||||
#include "shared/source/debugger/debugger.h"
|
||||
|
||||
#include "shared/source/built_ins/sip.h"
|
||||
#include "shared/source/built_ins/sip_kernel_type.h"
|
||||
#include "shared/source/helpers/hw_info.h"
|
||||
|
||||
#include "opencl/source/built_ins/sip.h"
|
||||
#include "opencl/source/source_level_debugger/source_level_debugger.h"
|
||||
|
||||
namespace NEO {
|
||||
|
||||
@@ -6,10 +6,9 @@
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
#include "shared/source/built_ins/built_ins.h"
|
||||
#include "shared/source/execution_environment/execution_environment.h"
|
||||
|
||||
#include "opencl/source/built_ins/built_ins.h"
|
||||
|
||||
namespace NEO {
|
||||
class Device;
|
||||
|
||||
@@ -18,6 +17,6 @@ Program *createProgramForSip(ExecutionEnvironment &executionEnvironment,
|
||||
Context *context,
|
||||
std::vector<char> &binary,
|
||||
size_t size,
|
||||
cl_int *errcodeRet,
|
||||
int *errcodeRet,
|
||||
Device *device);
|
||||
} // namespace NEO
|
||||
|
||||
@@ -6,9 +6,9 @@
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
#include "shared/source/built_ins/built_ins.h"
|
||||
#include "shared/source/indirect_heap/indirect_heap.h"
|
||||
|
||||
#include "opencl/source/built_ins/built_ins.h"
|
||||
#include "opencl/source/helpers/per_thread_data.h"
|
||||
#include "opencl/source/kernel/kernel.h"
|
||||
|
||||
|
||||
@@ -7,6 +7,7 @@
|
||||
|
||||
#include "opencl/source/kernel/kernel.h"
|
||||
|
||||
#include "shared/source/built_ins/built_ins.h"
|
||||
#include "shared/source/command_stream/command_stream_receiver.h"
|
||||
#include "shared/source/debug_settings/debug_settings_manager.h"
|
||||
#include "shared/source/gmm_helper/gmm_helper.h"
|
||||
@@ -22,7 +23,6 @@
|
||||
|
||||
#include "opencl/source/accelerators/intel_accelerator.h"
|
||||
#include "opencl/source/accelerators/intel_motion_estimation.h"
|
||||
#include "opencl/source/built_ins/built_ins.h"
|
||||
#include "opencl/source/built_ins/builtins_dispatch_builder.h"
|
||||
#include "opencl/source/command_queue/command_queue.h"
|
||||
#include "opencl/source/command_queue/gpgpu_walker.h"
|
||||
|
||||
@@ -6,7 +6,8 @@
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
#include "opencl/source/built_ins/built_ins.h"
|
||||
#include "shared/source/built_ins/built_ins.h"
|
||||
|
||||
#include "opencl/source/kernel/kernel.h"
|
||||
|
||||
#include <algorithm>
|
||||
|
||||
@@ -161,7 +161,7 @@ target_include_directories(igdrcl_tests PRIVATE
|
||||
|
||||
target_link_libraries(igdrcl_tests gmock-gtest ${IGDRCL_EXTRA_LIBS})
|
||||
|
||||
set(BUILT_IN_KERNEL_DIR "${NEO_SOURCE_DIR}/opencl/source/built_ins")
|
||||
set(BUILT_IN_KERNEL_DIR "${NEO_SOURCE_DIR}/shared/source/built_ins")
|
||||
|
||||
function(neo_copy_test_files target product)
|
||||
set(outputdir "${TargetDir}/${product}")
|
||||
|
||||
@@ -5,11 +5,11 @@
|
||||
*
|
||||
*/
|
||||
|
||||
#include "shared/source/built_ins/built_ins.h"
|
||||
#include "shared/source/compiler_interface/compiler_interface.h"
|
||||
#include "shared/source/device/device.h"
|
||||
|
||||
#include "opencl/source/built_ins/built_in_ops_vme.h"
|
||||
#include "opencl/source/built_ins/built_ins.h"
|
||||
#include "opencl/source/built_ins/vme_builtin.h"
|
||||
#include "opencl/source/context/context.h"
|
||||
#include "opencl/source/device/cl_device.h"
|
||||
|
||||
@@ -5,7 +5,8 @@
|
||||
*
|
||||
*/
|
||||
|
||||
#include "opencl/source/built_ins/built_ins.h"
|
||||
#include "shared/source/built_ins/built_ins.h"
|
||||
|
||||
#include "opencl/source/mem_obj/image.h"
|
||||
#include "opencl/source/sampler/sampler.h"
|
||||
#include "opencl/test/unit_test/aub_tests/fixtures/aub_parent_kernel_fixture.h"
|
||||
|
||||
@@ -5,7 +5,8 @@
|
||||
*
|
||||
*/
|
||||
|
||||
#include "opencl/source/built_ins/built_ins.h"
|
||||
#include "shared/source/built_ins/built_ins.h"
|
||||
|
||||
#include "opencl/source/mem_obj/image.h"
|
||||
#include "opencl/source/sampler/sampler.h"
|
||||
#include "opencl/test/unit_test/aub_tests/fixtures/aub_parent_kernel_fixture.h"
|
||||
|
||||
@@ -5,7 +5,8 @@
|
||||
*
|
||||
*/
|
||||
|
||||
#include "opencl/source/built_ins/built_ins.h"
|
||||
#include "shared/source/built_ins/built_ins.h"
|
||||
|
||||
#include "opencl/source/mem_obj/image.h"
|
||||
#include "opencl/source/sampler/sampler.h"
|
||||
#include "opencl/test/unit_test/aub_tests/fixtures/aub_parent_kernel_fixture.h"
|
||||
|
||||
@@ -5,7 +5,8 @@
|
||||
*
|
||||
*/
|
||||
|
||||
#include "opencl/source/built_ins/built_ins.h"
|
||||
#include "shared/source/built_ins/built_ins.h"
|
||||
|
||||
#include "opencl/source/mem_obj/image.h"
|
||||
#include "opencl/source/sampler/sampler.h"
|
||||
#include "opencl/test/unit_test/aub_tests/fixtures/aub_parent_kernel_fixture.h"
|
||||
|
||||
@@ -5,6 +5,7 @@
|
||||
*
|
||||
*/
|
||||
|
||||
#include "shared/source/built_ins/built_ins.h"
|
||||
#include "shared/source/debug_settings/debug_settings_manager.h"
|
||||
#include "shared/source/gmm_helper/gmm.h"
|
||||
#include "shared/source/gmm_helper/gmm_helper.h"
|
||||
@@ -15,7 +16,6 @@
|
||||
#include "shared/test/unit_test/utilities/base_object_utils.h"
|
||||
|
||||
#include "opencl/source/built_ins/aux_translation_builtin.h"
|
||||
#include "opencl/source/built_ins/built_ins.h"
|
||||
#include "opencl/source/built_ins/builtins_dispatch_builder.h"
|
||||
#include "opencl/source/built_ins/vme_builtin.h"
|
||||
#include "opencl/source/built_ins/vme_dispatch_builder.h"
|
||||
|
||||
@@ -5,8 +5,9 @@
|
||||
*
|
||||
*/
|
||||
|
||||
#include "opencl/source/built_ins/built_ins.h"
|
||||
#include "opencl/source/built_ins/sip.h"
|
||||
#include "shared/source/built_ins/built_ins.h"
|
||||
#include "shared/source/built_ins/sip.h"
|
||||
|
||||
#include "opencl/test/unit_test/global_environment.h"
|
||||
#include "opencl/test/unit_test/helpers/test_files.h"
|
||||
#include "opencl/test/unit_test/mocks/mock_device.h"
|
||||
|
||||
@@ -5,9 +5,9 @@
|
||||
*
|
||||
*/
|
||||
|
||||
#include "shared/source/built_ins/built_ins.h"
|
||||
#include "shared/source/memory_manager/memory_constants.h"
|
||||
|
||||
#include "opencl/source/built_ins/built_ins.h"
|
||||
#include "opencl/source/built_ins/builtins_dispatch_builder.h"
|
||||
#include "opencl/source/helpers/dispatch_info.h"
|
||||
#include "opencl/test/unit_test/command_queue/enqueue_copy_buffer_rect_fixture.h"
|
||||
|
||||
@@ -5,9 +5,9 @@
|
||||
*
|
||||
*/
|
||||
|
||||
#include "shared/source/built_ins/built_ins.h"
|
||||
#include "shared/source/helpers/ptr_math.h"
|
||||
|
||||
#include "opencl/source/built_ins/built_ins.h"
|
||||
#include "opencl/source/built_ins/builtins_dispatch_builder.h"
|
||||
#include "opencl/source/command_queue/command_queue_hw.h"
|
||||
#include "opencl/source/helpers/dispatch_info.h"
|
||||
|
||||
@@ -5,6 +5,7 @@
|
||||
*
|
||||
*/
|
||||
|
||||
#include "shared/source/built_ins/built_ins.h"
|
||||
#include "shared/source/command_stream/command_stream_receiver.h"
|
||||
#include "shared/source/helpers/aligned_memory.h"
|
||||
#include "shared/source/helpers/ptr_math.h"
|
||||
@@ -12,7 +13,6 @@
|
||||
#include "shared/source/memory_manager/memory_manager.h"
|
||||
#include "shared/source/os_interface/os_context.h"
|
||||
|
||||
#include "opencl/source/built_ins/built_ins.h"
|
||||
#include "opencl/source/built_ins/builtins_dispatch_builder.h"
|
||||
#include "opencl/source/command_queue/command_queue.h"
|
||||
#include "opencl/source/helpers/dispatch_info.h"
|
||||
|
||||
@@ -5,9 +5,9 @@
|
||||
*
|
||||
*/
|
||||
|
||||
#include "shared/source/built_ins/built_ins.h"
|
||||
#include "shared/source/memory_manager/memory_constants.h"
|
||||
|
||||
#include "opencl/source/built_ins/built_ins.h"
|
||||
#include "opencl/source/built_ins/builtins_dispatch_builder.h"
|
||||
#include "opencl/source/event/event.h"
|
||||
#include "opencl/source/helpers/dispatch_info.h"
|
||||
|
||||
@@ -5,12 +5,12 @@
|
||||
*
|
||||
*/
|
||||
|
||||
#include "shared/source/built_ins/built_ins.h"
|
||||
#include "shared/source/gmm_helper/gmm_helper.h"
|
||||
#include "shared/source/helpers/cache_policy.h"
|
||||
#include "shared/source/memory_manager/allocations_list.h"
|
||||
#include "shared/test/unit_test/helpers/debug_manager_state_restore.h"
|
||||
|
||||
#include "opencl/source/built_ins/built_ins.h"
|
||||
#include "opencl/source/built_ins/builtins_dispatch_builder.h"
|
||||
#include "opencl/source/helpers/dispatch_info.h"
|
||||
#include "opencl/test/unit_test/command_queue/enqueue_fixture.h"
|
||||
|
||||
@@ -5,7 +5,8 @@
|
||||
*
|
||||
*/
|
||||
|
||||
#include "opencl/source/built_ins/built_ins.h"
|
||||
#include "shared/source/built_ins/built_ins.h"
|
||||
|
||||
#include "opencl/source/built_ins/builtins_dispatch_builder.h"
|
||||
#include "opencl/source/event/event.h"
|
||||
#include "opencl/source/helpers/dispatch_info.h"
|
||||
|
||||
@@ -5,10 +5,10 @@
|
||||
*
|
||||
*/
|
||||
|
||||
#include "shared/source/built_ins/built_ins.h"
|
||||
#include "shared/source/memory_manager/allocations_list.h"
|
||||
#include "shared/test/unit_test/helpers/debug_manager_state_restore.h"
|
||||
|
||||
#include "opencl/source/built_ins/built_ins.h"
|
||||
#include "opencl/source/built_ins/builtins_dispatch_builder.h"
|
||||
#include "opencl/source/helpers/dispatch_info.h"
|
||||
#include "opencl/test/unit_test/command_queue/buffer_operations_fixture.h"
|
||||
|
||||
@@ -5,7 +5,8 @@
|
||||
*
|
||||
*/
|
||||
|
||||
#include "opencl/source/built_ins/built_ins.h"
|
||||
#include "shared/source/built_ins/built_ins.h"
|
||||
|
||||
#include "opencl/source/built_ins/builtins_dispatch_builder.h"
|
||||
#include "opencl/source/command_queue/command_queue_hw.h"
|
||||
#include "opencl/source/command_queue/enqueue_copy_image.h"
|
||||
|
||||
@@ -5,7 +5,8 @@
|
||||
*
|
||||
*/
|
||||
|
||||
#include "opencl/source/built_ins/built_ins.h"
|
||||
#include "shared/source/built_ins/built_ins.h"
|
||||
|
||||
#include "opencl/source/command_queue/command_queue_hw.h"
|
||||
#include "opencl/source/command_queue/enqueue_barrier.h"
|
||||
#include "opencl/source/command_queue/enqueue_marker.h"
|
||||
|
||||
@@ -5,11 +5,11 @@
|
||||
*
|
||||
*/
|
||||
|
||||
#include "shared/source/built_ins/built_ins.h"
|
||||
#include "shared/source/helpers/preamble.h"
|
||||
#include "shared/source/memory_manager/graphics_allocation.h"
|
||||
#include "shared/source/memory_manager/memory_constants.h"
|
||||
|
||||
#include "opencl/source/built_ins/built_ins.h"
|
||||
#include "opencl/source/sharings/gl/gl_buffer.h"
|
||||
#include "opencl/test/unit_test/command_queue/enqueue_fixture.h"
|
||||
#include "opencl/test/unit_test/fixtures/hello_world_fixture.h"
|
||||
|
||||
@@ -5,6 +5,7 @@
|
||||
*
|
||||
*/
|
||||
|
||||
#include "shared/source/built_ins/built_ins.h"
|
||||
#include "shared/source/command_stream/command_stream_receiver.h"
|
||||
#include "shared/source/command_stream/linear_stream.h"
|
||||
#include "shared/source/command_stream/preemption.h"
|
||||
@@ -20,7 +21,6 @@
|
||||
#include "shared/test/unit_test/helpers/debug_manager_state_restore.h"
|
||||
#include "shared/test/unit_test/helpers/ult_hw_helper.h"
|
||||
|
||||
#include "opencl/source/built_ins/built_ins.h"
|
||||
#include "opencl/source/command_queue/command_queue_hw.h"
|
||||
#include "opencl/source/command_queue/gpgpu_walker.h"
|
||||
#include "opencl/source/event/user_event.h"
|
||||
|
||||
@@ -5,6 +5,7 @@
|
||||
*
|
||||
*/
|
||||
|
||||
#include "shared/source/built_ins/built_ins.h"
|
||||
#include "shared/source/command_stream/command_stream_receiver.h"
|
||||
#include "shared/source/command_stream/linear_stream.h"
|
||||
#include "shared/source/command_stream/preemption.h"
|
||||
@@ -23,7 +24,6 @@
|
||||
#include "shared/test/unit_test/helpers/debug_manager_state_restore.h"
|
||||
#include "shared/test/unit_test/utilities/base_object_utils.h"
|
||||
|
||||
#include "opencl/source/built_ins/built_ins.h"
|
||||
#include "opencl/source/command_queue/command_queue_hw.h"
|
||||
#include "opencl/source/command_queue/gpgpu_walker.h"
|
||||
#include "opencl/source/event/user_event.h"
|
||||
|
||||
@@ -5,6 +5,7 @@
|
||||
*
|
||||
*/
|
||||
|
||||
#include "shared/source/built_ins/built_ins.h"
|
||||
#include "shared/source/command_stream/preemption.h"
|
||||
#include "shared/source/compiler_interface/compiler_interface.h"
|
||||
#include "shared/source/device/device.h"
|
||||
@@ -16,7 +17,6 @@
|
||||
#include "shared/test/unit_test/utilities/destructor_counted.h"
|
||||
|
||||
#include "opencl/source/aub/aub_center.h"
|
||||
#include "opencl/source/built_ins/built_ins.h"
|
||||
#include "opencl/source/memory_manager/os_agnostic_memory_manager.h"
|
||||
#include "opencl/source/platform/platform.h"
|
||||
#include "opencl/source/source_level_debugger/source_level_debugger.h"
|
||||
|
||||
@@ -5,9 +5,9 @@
|
||||
*
|
||||
*/
|
||||
|
||||
#include "shared/source/built_ins/built_ins.h"
|
||||
#include "shared/test/unit_test/helpers/debug_manager_state_restore.h"
|
||||
|
||||
#include "opencl/source/built_ins/built_ins.h"
|
||||
#include "opencl/source/command_queue/enqueue_kernel.h"
|
||||
#include "opencl/source/device_queue/device_queue.h"
|
||||
#include "opencl/source/scheduler/scheduler_kernel.h"
|
||||
|
||||
@@ -7,9 +7,9 @@
|
||||
|
||||
#include "opencl/test/unit_test/fixtures/built_in_fixture.h"
|
||||
|
||||
#include "shared/source/built_ins/built_ins.h"
|
||||
#include "shared/source/device/device.h"
|
||||
|
||||
#include "opencl/source/built_ins/built_ins.h"
|
||||
#include "opencl/test/unit_test/global_environment.h"
|
||||
#include "opencl/test/unit_test/helpers/kernel_binary_helper.h"
|
||||
#include "opencl/test/unit_test/helpers/test_files.h"
|
||||
|
||||
@@ -5,7 +5,8 @@
|
||||
*
|
||||
*/
|
||||
|
||||
#include "opencl/source/built_ins/built_ins.h"
|
||||
#include "shared/source/built_ins/built_ins.h"
|
||||
|
||||
#include "opencl/source/command_queue/enqueue_kernel.h"
|
||||
#include "opencl/source/command_queue/gpgpu_walker.h"
|
||||
#include "opencl/source/device_queue/device_queue.h"
|
||||
|
||||
@@ -5,10 +5,10 @@
|
||||
*
|
||||
*/
|
||||
|
||||
#include "shared/source/built_ins/built_ins.h"
|
||||
#include "shared/source/command_stream/command_stream_receiver.h"
|
||||
#include "shared/source/command_stream/linear_stream.h"
|
||||
|
||||
#include "opencl/source/built_ins/built_ins.h"
|
||||
#include "opencl/source/command_queue/command_queue_hw.h"
|
||||
#include "opencl/source/event/user_event.h"
|
||||
#include "opencl/test/unit_test/fixtures/device_fixture.h"
|
||||
|
||||
@@ -5,8 +5,9 @@
|
||||
*
|
||||
*/
|
||||
|
||||
#include "opencl/source/built_ins/built_ins.h"
|
||||
#include "opencl/source/built_ins/sip.h"
|
||||
#include "shared/source/built_ins/built_ins.h"
|
||||
#include "shared/source/built_ins/sip.h"
|
||||
|
||||
#include "opencl/test/unit_test/global_environment.h"
|
||||
#include "opencl/test/unit_test/helpers/test_files.h"
|
||||
#include "opencl/test/unit_test/mocks/mock_device.h"
|
||||
|
||||
@@ -5,7 +5,8 @@
|
||||
*
|
||||
*/
|
||||
|
||||
#include "opencl/source/built_ins/built_ins.h"
|
||||
#include "shared/source/built_ins/built_ins.h"
|
||||
|
||||
#include "opencl/source/helpers/hardware_commands_helper.h"
|
||||
#include "opencl/source/kernel/kernel.h"
|
||||
#include "opencl/test/unit_test/fixtures/built_in_fixture.h"
|
||||
|
||||
@@ -7,4 +7,4 @@
|
||||
|
||||
#include "opencl/test/unit_test/helpers/kernel_binary_helper.h"
|
||||
|
||||
const std::string KernelBinaryHelper::BUILT_INS("7030307152995455603");
|
||||
const std::string KernelBinaryHelper::BUILT_INS("7206969092167061981");
|
||||
@@ -5,13 +5,13 @@
|
||||
*
|
||||
*/
|
||||
|
||||
#include "shared/source/built_ins/built_ins.h"
|
||||
#include "shared/source/compiler_interface/compiler_interface.h"
|
||||
#include "shared/source/helpers/aligned_memory.h"
|
||||
#include "shared/source/image/image_surface_state.h"
|
||||
#include "shared/source/os_interface/os_context.h"
|
||||
#include "shared/test/unit_test/helpers/debug_manager_state_restore.h"
|
||||
|
||||
#include "opencl/source/built_ins/built_ins.h"
|
||||
#include "opencl/source/helpers/mipmap.h"
|
||||
#include "opencl/source/mem_obj/image.h"
|
||||
#include "opencl/source/mem_obj/mem_obj_helper.h"
|
||||
|
||||
@@ -6,7 +6,8 @@
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
#include "opencl/source/built_ins/built_ins.h"
|
||||
#include "shared/source/built_ins/built_ins.h"
|
||||
|
||||
#include "opencl/source/helpers/dispatch_info.h"
|
||||
|
||||
using namespace NEO;
|
||||
|
||||
@@ -7,9 +7,10 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "opencl/source/built_ins/built_ins.h"
|
||||
#include "shared/source/built_ins/built_ins.h"
|
||||
#include "shared/source/built_ins/sip.h"
|
||||
|
||||
#include "opencl/source/built_ins/builtins_dispatch_builder.h"
|
||||
#include "opencl/source/built_ins/sip.h"
|
||||
#include "opencl/source/program/program.h"
|
||||
|
||||
#include <memory>
|
||||
|
||||
@@ -7,11 +7,11 @@
|
||||
|
||||
#include "opencl/test/unit_test/mocks/mock_context.h"
|
||||
|
||||
#include "shared/source/built_ins/built_ins.h"
|
||||
#include "shared/source/compiler_interface/compiler_interface.h"
|
||||
#include "shared/source/memory_manager/deferred_deleter.h"
|
||||
#include "shared/source/memory_manager/unified_memory_manager.h"
|
||||
|
||||
#include "opencl/source/built_ins/built_ins.h"
|
||||
#include "opencl/source/command_queue/command_queue.h"
|
||||
#include "opencl/source/memory_manager/os_agnostic_memory_manager.h"
|
||||
#include "opencl/source/sharings/sharing.h"
|
||||
|
||||
@@ -7,7 +7,8 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "opencl/source/built_ins/sip.h"
|
||||
#include "shared/source/built_ins/sip.h"
|
||||
|
||||
#include "opencl/test/unit_test/mocks/mock_execution_environment.h"
|
||||
|
||||
#include <memory>
|
||||
|
||||
@@ -349,7 +349,7 @@ TEST_F(OfflineCompilerTests, GoodParseBinToCharArray) {
|
||||
familyNameWithType + "[10] = {\n"
|
||||
" 0x40032302, 0x90800756, 0x05340301, 0x66097860, 0x101010ff, 0x40032302, 0x90800756, 0x05340301, \n"
|
||||
" 0x66097860, 0xff000000};\n\n"
|
||||
"#include \"opencl/source/built_ins/registry/built_ins_registry.h\"\n\n"
|
||||
"#include \"shared/source/built_ins/registry/built_ins_registry.h\"\n\n"
|
||||
"namespace NEO {\n"
|
||||
"static RegisterEmbeddedResource registerSchedulerBin(\n"
|
||||
" \"" +
|
||||
|
||||
@@ -6,7 +6,8 @@
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
#include "opencl/source/built_ins/built_ins.h"
|
||||
#include "shared/source/built_ins/built_ins.h"
|
||||
|
||||
#include "opencl/test/unit_test/fixtures/context_fixture.h"
|
||||
#include "opencl/test/unit_test/fixtures/device_fixture.h"
|
||||
#include "opencl/test/unit_test/fixtures/program_fixture.h"
|
||||
|
||||
@@ -5,40 +5,37 @@
|
||||
*
|
||||
*/
|
||||
|
||||
__kernel void fullCopy(__global const uint* src, __global uint* dst) {
|
||||
__kernel void fullCopy(__global const uint *src, __global uint *dst) {
|
||||
unsigned int gid = get_global_id(0);
|
||||
uint4 loaded = vload4(gid, src);
|
||||
vstore4(loaded, gid, dst);
|
||||
}
|
||||
|
||||
__kernel void CopyBufferToBufferBytes(
|
||||
const __global uchar* pSrc,
|
||||
__global uchar* pDst,
|
||||
const __global uchar *pSrc,
|
||||
__global uchar *pDst,
|
||||
uint srcOffsetInBytes,
|
||||
uint dstOffsetInBytes,
|
||||
uint bytesToRead )
|
||||
{
|
||||
pSrc += ( srcOffsetInBytes + get_global_id(0) );
|
||||
pDst += ( dstOffsetInBytes + get_global_id(0) );
|
||||
pDst[ 0 ] = pSrc[ 0 ];
|
||||
uint bytesToRead) {
|
||||
pSrc += (srcOffsetInBytes + get_global_id(0));
|
||||
pDst += (dstOffsetInBytes + get_global_id(0));
|
||||
pDst[0] = pSrc[0];
|
||||
}
|
||||
|
||||
__kernel void CopyBufferToBufferLeftLeftover(
|
||||
const __global uchar* pSrc,
|
||||
__global uchar* pDst,
|
||||
const __global uchar *pSrc,
|
||||
__global uchar *pDst,
|
||||
uint srcOffsetInBytes,
|
||||
uint dstOffsetInBytes)
|
||||
{
|
||||
uint dstOffsetInBytes) {
|
||||
unsigned int gid = get_global_id(0);
|
||||
pDst[ gid + dstOffsetInBytes ] = pSrc[ gid + srcOffsetInBytes ];
|
||||
pDst[gid + dstOffsetInBytes] = pSrc[gid + srcOffsetInBytes];
|
||||
}
|
||||
|
||||
__kernel void CopyBufferToBufferMiddle(
|
||||
const __global uint* pSrc,
|
||||
__global uint* pDst,
|
||||
const __global uint *pSrc,
|
||||
__global uint *pDst,
|
||||
uint srcOffsetInBytes,
|
||||
uint dstOffsetInBytes)
|
||||
{
|
||||
uint dstOffsetInBytes) {
|
||||
unsigned int gid = get_global_id(0);
|
||||
pDst += dstOffsetInBytes >> 2;
|
||||
pSrc += srcOffsetInBytes >> 2;
|
||||
@@ -47,55 +44,49 @@ __kernel void CopyBufferToBufferMiddle(
|
||||
}
|
||||
|
||||
__kernel void CopyBufferToBufferRightLeftover(
|
||||
const __global uchar* pSrc,
|
||||
__global uchar* pDst,
|
||||
const __global uchar *pSrc,
|
||||
__global uchar *pDst,
|
||||
uint srcOffsetInBytes,
|
||||
uint dstOffsetInBytes)
|
||||
{
|
||||
uint dstOffsetInBytes) {
|
||||
unsigned int gid = get_global_id(0);
|
||||
pDst[ gid + dstOffsetInBytes ] = pSrc[ gid + srcOffsetInBytes ];
|
||||
pDst[gid + dstOffsetInBytes] = pSrc[gid + srcOffsetInBytes];
|
||||
}
|
||||
|
||||
|
||||
// assumption is local work size = pattern size
|
||||
__kernel void FillBufferBytes(
|
||||
__global uchar* pDst,
|
||||
__global uchar *pDst,
|
||||
uint dstOffsetInBytes,
|
||||
const __global uchar* pPattern )
|
||||
{
|
||||
const __global uchar *pPattern) {
|
||||
uint dstIndex = get_global_id(0) + dstOffsetInBytes;
|
||||
uint srcIndex = get_local_id(0);
|
||||
pDst[dstIndex] = pPattern[srcIndex];
|
||||
}
|
||||
|
||||
__kernel void FillBufferLeftLeftover(
|
||||
__global uchar* pDst,
|
||||
__global uchar *pDst,
|
||||
uint dstOffsetInBytes,
|
||||
const __global uchar* pPattern,
|
||||
const uint patternSizeInEls )
|
||||
{
|
||||
const __global uchar *pPattern,
|
||||
const uint patternSizeInEls) {
|
||||
uint gid = get_global_id(0);
|
||||
pDst[ gid + dstOffsetInBytes ] = pPattern[ gid & (patternSizeInEls - 1) ];
|
||||
pDst[gid + dstOffsetInBytes] = pPattern[gid & (patternSizeInEls - 1)];
|
||||
}
|
||||
|
||||
__kernel void FillBufferMiddle(
|
||||
__global uchar* pDst,
|
||||
__global uchar *pDst,
|
||||
uint dstOffsetInBytes,
|
||||
const __global uint* pPattern,
|
||||
const uint patternSizeInEls )
|
||||
{
|
||||
const __global uint *pPattern,
|
||||
const uint patternSizeInEls) {
|
||||
uint gid = get_global_id(0);
|
||||
((__global uint*)(pDst + dstOffsetInBytes))[gid] = pPattern[ gid & (patternSizeInEls - 1) ];
|
||||
((__global uint *)(pDst + dstOffsetInBytes))[gid] = pPattern[gid & (patternSizeInEls - 1)];
|
||||
}
|
||||
|
||||
__kernel void FillBufferRightLeftover(
|
||||
__global uchar* pDst,
|
||||
__global uchar *pDst,
|
||||
uint dstOffsetInBytes,
|
||||
const __global uchar* pPattern,
|
||||
const uint patternSizeInEls )
|
||||
{
|
||||
const __global uchar *pPattern,
|
||||
const uint patternSizeInEls) {
|
||||
uint gid = get_global_id(0);
|
||||
pDst[ gid + dstOffsetInBytes ] = pPattern[ gid & (patternSizeInEls - 1) ];
|
||||
pDst[gid + dstOffsetInBytes] = pPattern[gid & (patternSizeInEls - 1)];
|
||||
}
|
||||
|
||||
__kernel void FillImage1d(
|
||||
@@ -179,42 +170,40 @@ __kernel void CopyImageToImage3d(
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////
|
||||
__kernel void CopyBufferRectBytes2d(
|
||||
__global const char* src,
|
||||
__global char* dst,
|
||||
__global const char *src,
|
||||
__global char *dst,
|
||||
uint4 SrcOrigin,
|
||||
uint4 DstOrigin,
|
||||
uint2 SrcPitch,
|
||||
uint2 DstPitch )
|
||||
uint2 DstPitch)
|
||||
|
||||
{
|
||||
int x = get_global_id(0);
|
||||
int y = get_global_id(1);
|
||||
|
||||
uint LSrcOffset = x + SrcOrigin.x + ( ( y + SrcOrigin.y ) * SrcPitch.x );
|
||||
uint LDstOffset = x + DstOrigin.x + ( ( y + DstOrigin.y ) * DstPitch.x );
|
||||
|
||||
*( dst + LDstOffset ) = *( src + LSrcOffset );
|
||||
uint LSrcOffset = x + SrcOrigin.x + ((y + SrcOrigin.y) * SrcPitch.x);
|
||||
uint LDstOffset = x + DstOrigin.x + ((y + DstOrigin.y) * DstPitch.x);
|
||||
|
||||
*(dst + LDstOffset) = *(src + LSrcOffset);
|
||||
}
|
||||
//////////////////////////////////////////////////////////////////////////////
|
||||
__kernel void CopyBufferRectBytes3d(
|
||||
__global const char* src,
|
||||
__global char* dst,
|
||||
uint4 SrcOrigin,
|
||||
uint4 DstOrigin,
|
||||
uint2 SrcPitch,
|
||||
uint2 DstPitch )
|
||||
|
||||
{
|
||||
int x = get_global_id(0);
|
||||
int y = get_global_id(1);
|
||||
int z = get_global_id(2);
|
||||
|
||||
uint LSrcOffset = x + SrcOrigin.x + ( ( y + SrcOrigin.y ) * SrcPitch.x ) + ( ( z + SrcOrigin.z ) * SrcPitch.y );
|
||||
uint LDstOffset = x + DstOrigin.x + ( ( y + DstOrigin.y ) * DstPitch.x ) + ( ( z + DstOrigin.z ) * DstPitch.y );
|
||||
|
||||
*( dst + LDstOffset ) = *( src + LSrcOffset );
|
||||
|
||||
__global const char *src,
|
||||
__global char *dst,
|
||||
uint4 SrcOrigin,
|
||||
uint4 DstOrigin,
|
||||
uint2 SrcPitch,
|
||||
uint2 DstPitch)
|
||||
|
||||
{
|
||||
int x = get_global_id(0);
|
||||
int y = get_global_id(1);
|
||||
int z = get_global_id(2);
|
||||
|
||||
uint LSrcOffset = x + SrcOrigin.x + ((y + SrcOrigin.y) * SrcPitch.x) + ((z + SrcOrigin.z) * SrcPitch.y);
|
||||
uint LDstOffset = x + DstOrigin.x + ((y + DstOrigin.y) * DstPitch.x) + ((z + DstOrigin.z) * DstPitch.y);
|
||||
|
||||
*(dst + LDstOffset) = *(src + LSrcOffset);
|
||||
}
|
||||
|
||||
#pragma OPENCL EXTENSION cl_khr_3d_image_writes : enable
|
||||
@@ -248,14 +237,13 @@ __kernel void CopyBufferToImage3d2Bytes(__global uchar *src,
|
||||
|
||||
uint4 c = (uint4)(0, 0, 0, 1);
|
||||
|
||||
if(( ulong )(src + srcOffset) & 0x00000001){
|
||||
ushort upper = *((__global uchar*)(src + LOffset + x * 2 + 1));
|
||||
ushort lower = *((__global uchar*)(src + LOffset + x * 2));
|
||||
if ((ulong)(src + srcOffset) & 0x00000001) {
|
||||
ushort upper = *((__global uchar *)(src + LOffset + x * 2 + 1));
|
||||
ushort lower = *((__global uchar *)(src + LOffset + x * 2));
|
||||
ushort combined = (upper << 8) | lower;
|
||||
c.x = (uint)combined;
|
||||
}
|
||||
else{
|
||||
c.x = (uint)(*(__global ushort*)(src + LOffset + x * 2));
|
||||
} else {
|
||||
c.x = (uint)(*(__global ushort *)(src + LOffset + x * 2));
|
||||
}
|
||||
write_imageui(output, dstCoord, c);
|
||||
}
|
||||
@@ -274,16 +262,15 @@ __kernel void CopyBufferToImage3d4Bytes(__global uchar *src,
|
||||
|
||||
uint4 c = (uint4)(0, 0, 0, 1);
|
||||
|
||||
if(( ulong )(src + srcOffset) & 0x00000003){
|
||||
uint upper2 = *((__global uchar*)(src + LOffset + x * 4 + 3));
|
||||
uint upper = *((__global uchar*)(src + LOffset + x * 4 + 2));
|
||||
uint lower2 = *((__global uchar*)(src + LOffset + x * 4 + 1));
|
||||
uint lower = *((__global uchar*)(src + LOffset + x * 4));
|
||||
if ((ulong)(src + srcOffset) & 0x00000003) {
|
||||
uint upper2 = *((__global uchar *)(src + LOffset + x * 4 + 3));
|
||||
uint upper = *((__global uchar *)(src + LOffset + x * 4 + 2));
|
||||
uint lower2 = *((__global uchar *)(src + LOffset + x * 4 + 1));
|
||||
uint lower = *((__global uchar *)(src + LOffset + x * 4));
|
||||
uint combined = (upper2 << 24) | (upper << 16) | (lower2 << 8) | lower;
|
||||
c.x = combined;
|
||||
}
|
||||
else{
|
||||
c.x = (*(__global uint*)(src + LOffset + x * 4));
|
||||
} else {
|
||||
c.x = (*(__global uint *)(src + LOffset + x * 4));
|
||||
}
|
||||
write_imageui(output, dstCoord, c);
|
||||
}
|
||||
@@ -300,24 +287,23 @@ __kernel void CopyBufferToImage3d8Bytes(__global uchar *src,
|
||||
int4 dstCoord = (int4)(x, y, z, 0) + dstOffset;
|
||||
uint LOffset = srcOffset + (y * Pitch.x) + (z * Pitch.y);
|
||||
|
||||
uint2 c = (uint2)(0, 0);//*((__global uint2*)(src + LOffset + x * 8));
|
||||
uint2 c = (uint2)(0, 0); //*((__global uint2*)(src + LOffset + x * 8));
|
||||
|
||||
if(( ulong )(src + srcOffset) & 0x00000007){
|
||||
uint upper2 = *((__global uchar*)(src + LOffset + x * 8 + 3));
|
||||
uint upper = *((__global uchar*)(src + LOffset + x * 8 + 2));
|
||||
uint lower2 = *((__global uchar*)(src + LOffset + x * 8 + 1));
|
||||
uint lower = *((__global uchar*)(src + LOffset + x * 8));
|
||||
if ((ulong)(src + srcOffset) & 0x00000007) {
|
||||
uint upper2 = *((__global uchar *)(src + LOffset + x * 8 + 3));
|
||||
uint upper = *((__global uchar *)(src + LOffset + x * 8 + 2));
|
||||
uint lower2 = *((__global uchar *)(src + LOffset + x * 8 + 1));
|
||||
uint lower = *((__global uchar *)(src + LOffset + x * 8));
|
||||
uint combined = (upper2 << 24) | (upper << 16) | (lower2 << 8) | lower;
|
||||
c.x = combined;
|
||||
upper2 = *((__global uchar*)(src + LOffset + x * 8 + 7));
|
||||
upper = *((__global uchar*)(src + LOffset + x * 8 + 6));
|
||||
lower2 = *((__global uchar*)(src + LOffset + x * 8 + 5));
|
||||
lower = *((__global uchar*)(src + LOffset + x * 8 + 4));
|
||||
upper2 = *((__global uchar *)(src + LOffset + x * 8 + 7));
|
||||
upper = *((__global uchar *)(src + LOffset + x * 8 + 6));
|
||||
lower2 = *((__global uchar *)(src + LOffset + x * 8 + 5));
|
||||
lower = *((__global uchar *)(src + LOffset + x * 8 + 4));
|
||||
combined = ((uint)upper2 << 24) | ((uint)upper << 16) | ((uint)lower2 << 8) | lower;
|
||||
c.y = combined;
|
||||
}
|
||||
else{
|
||||
c = *((__global uint2*)(src + LOffset + x * 8));
|
||||
} else {
|
||||
c = *((__global uint2 *)(src + LOffset + x * 8));
|
||||
}
|
||||
|
||||
write_imageui(output, dstCoord, (uint4)(c.x, c.y, 0, 1));
|
||||
@@ -337,33 +323,32 @@ __kernel void CopyBufferToImage3d16Bytes(__global uchar *src,
|
||||
|
||||
uint4 c = (uint4)(0, 0, 0, 0);
|
||||
|
||||
if(( ulong )(src + srcOffset) & 0x0000000f){
|
||||
uint upper2 = *((__global uchar*)(src + LOffset + x * 16 + 3));
|
||||
uint upper = *((__global uchar*)(src + LOffset + x * 16 + 2));
|
||||
uint lower2 = *((__global uchar*)(src + LOffset + x * 16 + 1));
|
||||
uint lower = *((__global uchar*)(src + LOffset + x * 16));
|
||||
if ((ulong)(src + srcOffset) & 0x0000000f) {
|
||||
uint upper2 = *((__global uchar *)(src + LOffset + x * 16 + 3));
|
||||
uint upper = *((__global uchar *)(src + LOffset + x * 16 + 2));
|
||||
uint lower2 = *((__global uchar *)(src + LOffset + x * 16 + 1));
|
||||
uint lower = *((__global uchar *)(src + LOffset + x * 16));
|
||||
uint combined = (upper2 << 24) | (upper << 16) | (lower2 << 8) | lower;
|
||||
c.x = combined;
|
||||
upper2 = *((__global uchar*)(src + LOffset + x * 16 + 7));
|
||||
upper = *((__global uchar*)(src + LOffset + x * 16 + 6));
|
||||
lower2 = *((__global uchar*)(src + LOffset + x * 16 + 5));
|
||||
lower = *((__global uchar*)(src + LOffset + x * 16 + 4));
|
||||
upper2 = *((__global uchar *)(src + LOffset + x * 16 + 7));
|
||||
upper = *((__global uchar *)(src + LOffset + x * 16 + 6));
|
||||
lower2 = *((__global uchar *)(src + LOffset + x * 16 + 5));
|
||||
lower = *((__global uchar *)(src + LOffset + x * 16 + 4));
|
||||
combined = (upper2 << 24) | (upper << 16) | (lower2 << 8) | lower;
|
||||
c.y = combined;
|
||||
upper2 = *((__global uchar*)(src + LOffset + x * 16 + 11));
|
||||
upper = *((__global uchar*)(src + LOffset + x * 16 + 10));
|
||||
lower2 = *((__global uchar*)(src + LOffset + x * 16 + 9));
|
||||
lower = *((__global uchar*)(src + LOffset + x * 16 + 8));
|
||||
upper2 = *((__global uchar *)(src + LOffset + x * 16 + 11));
|
||||
upper = *((__global uchar *)(src + LOffset + x * 16 + 10));
|
||||
lower2 = *((__global uchar *)(src + LOffset + x * 16 + 9));
|
||||
lower = *((__global uchar *)(src + LOffset + x * 16 + 8));
|
||||
combined = (upper2 << 24) | (upper << 16) | (lower2 << 8) | lower;
|
||||
c.z = combined;
|
||||
upper2 = *((__global uchar*)(src + LOffset + x * 16 + 15));
|
||||
upper = *((__global uchar*)(src + LOffset + x * 16 + 14));
|
||||
lower2 = *((__global uchar*)(src + LOffset + x * 16 + 13));
|
||||
lower = *((__global uchar*)(src + LOffset + x * 16 + 12));
|
||||
upper2 = *((__global uchar *)(src + LOffset + x * 16 + 15));
|
||||
upper = *((__global uchar *)(src + LOffset + x * 16 + 14));
|
||||
lower2 = *((__global uchar *)(src + LOffset + x * 16 + 13));
|
||||
lower = *((__global uchar *)(src + LOffset + x * 16 + 12));
|
||||
combined = (upper2 << 24) | (upper << 16) | (lower2 << 8) | lower;
|
||||
c.w = combined;
|
||||
}
|
||||
else{
|
||||
} else {
|
||||
c = *((__global uint4 *)(src + LOffset + x * 16));
|
||||
}
|
||||
|
||||
@@ -397,15 +382,14 @@ __kernel void CopyImage3dToBuffer2Bytes(__read_only image3d_t input,
|
||||
|
||||
const int4 srcCoord = (int4)(x, y, z, 0) + srcOffset;
|
||||
uint DstOffset = dstOffset + (y * Pitch.x) + (z * Pitch.y);
|
||||
|
||||
|
||||
uint4 c = read_imageui(input, srcCoord);
|
||||
|
||||
if(( ulong )(dst + dstOffset) & 0x00000001){
|
||||
*((__global uchar*)(dst + DstOffset + x * 2 + 1)) = convert_uchar_sat((c.x >> 8 ) & 0xff);
|
||||
*((__global uchar*)(dst + DstOffset + x * 2)) = convert_uchar_sat(c.x & 0xff);
|
||||
}
|
||||
else{
|
||||
*((__global ushort*)(dst + DstOffset + x * 2)) = convert_ushort_sat(c.x);
|
||||
if ((ulong)(dst + dstOffset) & 0x00000001) {
|
||||
*((__global uchar *)(dst + DstOffset + x * 2 + 1)) = convert_uchar_sat((c.x >> 8) & 0xff);
|
||||
*((__global uchar *)(dst + DstOffset + x * 2)) = convert_uchar_sat(c.x & 0xff);
|
||||
} else {
|
||||
*((__global ushort *)(dst + DstOffset + x * 2)) = convert_ushort_sat(c.x);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -423,14 +407,13 @@ __kernel void CopyImage3dToBuffer4Bytes(__read_only image3d_t input,
|
||||
|
||||
uint4 c = read_imageui(input, srcCoord);
|
||||
|
||||
if(( ulong )(dst + dstOffset) & 0x00000003){
|
||||
*((__global uchar*)(dst + DstOffset + x * 4 + 3)) = convert_uchar_sat((c.x >> 24 ) & 0xff);
|
||||
*((__global uchar*)(dst + DstOffset + x * 4 + 2)) = convert_uchar_sat((c.x >> 16 ) & 0xff);
|
||||
*((__global uchar*)(dst + DstOffset + x * 4 + 1)) = convert_uchar_sat((c.x >> 8 ) & 0xff);
|
||||
*((__global uchar*)(dst + DstOffset + x * 4)) = convert_uchar_sat(c.x & 0xff);
|
||||
}
|
||||
else{
|
||||
*((__global uint*)(dst + DstOffset + x * 4)) = c.x;
|
||||
if ((ulong)(dst + dstOffset) & 0x00000003) {
|
||||
*((__global uchar *)(dst + DstOffset + x * 4 + 3)) = convert_uchar_sat((c.x >> 24) & 0xff);
|
||||
*((__global uchar *)(dst + DstOffset + x * 4 + 2)) = convert_uchar_sat((c.x >> 16) & 0xff);
|
||||
*((__global uchar *)(dst + DstOffset + x * 4 + 1)) = convert_uchar_sat((c.x >> 8) & 0xff);
|
||||
*((__global uchar *)(dst + DstOffset + x * 4)) = convert_uchar_sat(c.x & 0xff);
|
||||
} else {
|
||||
*((__global uint *)(dst + DstOffset + x * 4)) = c.x;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -448,19 +431,18 @@ __kernel void CopyImage3dToBuffer8Bytes(__read_only image3d_t input,
|
||||
|
||||
uint4 c = read_imageui(input, srcCoord);
|
||||
|
||||
if(( ulong )(dst + dstOffset) & 0x00000007){
|
||||
*((__global uchar*)(dst + DstOffset + x * 8 + 3)) = convert_uchar_sat((c.x >> 24 ) & 0xff);
|
||||
*((__global uchar*)(dst + DstOffset + x * 8 + 2)) = convert_uchar_sat((c.x >> 16 ) & 0xff);
|
||||
*((__global uchar*)(dst + DstOffset + x * 8 + 1)) = convert_uchar_sat((c.x >> 8 ) & 0xff);
|
||||
*((__global uchar*)(dst + DstOffset + x * 8)) = convert_uchar_sat(c.x & 0xff);
|
||||
*((__global uchar*)(dst + DstOffset + x * 8 + 7)) = convert_uchar_sat((c.y >> 24 ) & 0xff);
|
||||
*((__global uchar*)(dst + DstOffset + x * 8 + 6)) = convert_uchar_sat((c.y >> 16 ) & 0xff);
|
||||
*((__global uchar*)(dst + DstOffset + x * 8 + 5)) = convert_uchar_sat((c.y >> 8 ) & 0xff);
|
||||
*((__global uchar*)(dst + DstOffset + x * 8 + 4)) = convert_uchar_sat(c.y & 0xff);
|
||||
}
|
||||
else{
|
||||
uint2 d = (uint2)(c.x,c.y);
|
||||
*((__global uint2*)(dst + DstOffset + x * 8)) = d;
|
||||
if ((ulong)(dst + dstOffset) & 0x00000007) {
|
||||
*((__global uchar *)(dst + DstOffset + x * 8 + 3)) = convert_uchar_sat((c.x >> 24) & 0xff);
|
||||
*((__global uchar *)(dst + DstOffset + x * 8 + 2)) = convert_uchar_sat((c.x >> 16) & 0xff);
|
||||
*((__global uchar *)(dst + DstOffset + x * 8 + 1)) = convert_uchar_sat((c.x >> 8) & 0xff);
|
||||
*((__global uchar *)(dst + DstOffset + x * 8)) = convert_uchar_sat(c.x & 0xff);
|
||||
*((__global uchar *)(dst + DstOffset + x * 8 + 7)) = convert_uchar_sat((c.y >> 24) & 0xff);
|
||||
*((__global uchar *)(dst + DstOffset + x * 8 + 6)) = convert_uchar_sat((c.y >> 16) & 0xff);
|
||||
*((__global uchar *)(dst + DstOffset + x * 8 + 5)) = convert_uchar_sat((c.y >> 8) & 0xff);
|
||||
*((__global uchar *)(dst + DstOffset + x * 8 + 4)) = convert_uchar_sat(c.y & 0xff);
|
||||
} else {
|
||||
uint2 d = (uint2)(c.x, c.y);
|
||||
*((__global uint2 *)(dst + DstOffset + x * 8)) = d;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -478,25 +460,24 @@ __kernel void CopyImage3dToBuffer16Bytes(__read_only image3d_t input,
|
||||
|
||||
const uint4 c = read_imageui(input, srcCoord);
|
||||
|
||||
if(( ulong )(dst + dstOffset) & 0x0000000f){
|
||||
*((__global uchar*)(dst + DstOffset + x * 16 + 3)) = convert_uchar_sat((c.x >> 24 ) & 0xff);
|
||||
*((__global uchar*)(dst + DstOffset + x * 16 + 2)) = convert_uchar_sat((c.x >> 16 ) & 0xff);
|
||||
*((__global uchar*)(dst + DstOffset + x * 16 + 1)) = convert_uchar_sat((c.x >> 8 ) & 0xff);
|
||||
*((__global uchar*)(dst + DstOffset + x * 16)) = convert_uchar_sat(c.x & 0xff);
|
||||
*((__global uchar*)(dst + DstOffset + x * 16 + 7)) = convert_uchar_sat((c.y >> 24 ) & 0xff);
|
||||
*((__global uchar*)(dst + DstOffset + x * 16 + 6)) = convert_uchar_sat((c.y >> 16 ) & 0xff);
|
||||
*((__global uchar*)(dst + DstOffset + x * 16 + 5)) = convert_uchar_sat((c.y >> 8 ) & 0xff);
|
||||
*((__global uchar*)(dst + DstOffset + x * 16 + 4)) = convert_uchar_sat(c.y & 0xff);
|
||||
*((__global uchar*)(dst + DstOffset + x * 16 + 11)) = convert_uchar_sat((c.z >> 24 ) & 0xff);
|
||||
*((__global uchar*)(dst + DstOffset + x * 16 + 10)) = convert_uchar_sat((c.z >> 16 ) & 0xff);
|
||||
*((__global uchar*)(dst + DstOffset + x * 16 + 9)) = convert_uchar_sat((c.z >> 8 ) & 0xff);
|
||||
*((__global uchar*)(dst + DstOffset + x * 16 + 8)) = convert_uchar_sat(c.z & 0xff);
|
||||
*((__global uchar*)(dst + DstOffset + x * 16 + 15)) = convert_uchar_sat((c.w >> 24 ) & 0xff);
|
||||
*((__global uchar*)(dst + DstOffset + x * 16 + 14)) = convert_uchar_sat((c.w >> 16 ) & 0xff);
|
||||
*((__global uchar*)(dst + DstOffset + x * 16 + 13)) = convert_uchar_sat((c.w >> 8 ) & 0xff);
|
||||
*((__global uchar*)(dst + DstOffset + x * 16 + 12)) = convert_uchar_sat(c.w & 0xff);
|
||||
}
|
||||
else{
|
||||
*(__global uint4*)(dst + DstOffset + x * 16) = c;
|
||||
if ((ulong)(dst + dstOffset) & 0x0000000f) {
|
||||
*((__global uchar *)(dst + DstOffset + x * 16 + 3)) = convert_uchar_sat((c.x >> 24) & 0xff);
|
||||
*((__global uchar *)(dst + DstOffset + x * 16 + 2)) = convert_uchar_sat((c.x >> 16) & 0xff);
|
||||
*((__global uchar *)(dst + DstOffset + x * 16 + 1)) = convert_uchar_sat((c.x >> 8) & 0xff);
|
||||
*((__global uchar *)(dst + DstOffset + x * 16)) = convert_uchar_sat(c.x & 0xff);
|
||||
*((__global uchar *)(dst + DstOffset + x * 16 + 7)) = convert_uchar_sat((c.y >> 24) & 0xff);
|
||||
*((__global uchar *)(dst + DstOffset + x * 16 + 6)) = convert_uchar_sat((c.y >> 16) & 0xff);
|
||||
*((__global uchar *)(dst + DstOffset + x * 16 + 5)) = convert_uchar_sat((c.y >> 8) & 0xff);
|
||||
*((__global uchar *)(dst + DstOffset + x * 16 + 4)) = convert_uchar_sat(c.y & 0xff);
|
||||
*((__global uchar *)(dst + DstOffset + x * 16 + 11)) = convert_uchar_sat((c.z >> 24) & 0xff);
|
||||
*((__global uchar *)(dst + DstOffset + x * 16 + 10)) = convert_uchar_sat((c.z >> 16) & 0xff);
|
||||
*((__global uchar *)(dst + DstOffset + x * 16 + 9)) = convert_uchar_sat((c.z >> 8) & 0xff);
|
||||
*((__global uchar *)(dst + DstOffset + x * 16 + 8)) = convert_uchar_sat(c.z & 0xff);
|
||||
*((__global uchar *)(dst + DstOffset + x * 16 + 15)) = convert_uchar_sat((c.w >> 24) & 0xff);
|
||||
*((__global uchar *)(dst + DstOffset + x * 16 + 14)) = convert_uchar_sat((c.w >> 16) & 0xff);
|
||||
*((__global uchar *)(dst + DstOffset + x * 16 + 13)) = convert_uchar_sat((c.w >> 8) & 0xff);
|
||||
*((__global uchar *)(dst + DstOffset + x * 16 + 12)) = convert_uchar_sat(c.w & 0xff);
|
||||
} else {
|
||||
*(__global uint4 *)(dst + DstOffset + x * 16) = c;
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user