mirror of
https://github.com/intel/compute-runtime.git
synced 2025-12-21 01:04:57 +08:00
Move built ins to share directory
Change-Id: I740a349a0f15229cd356fffe996932029bf0f98b Signed-off-by: Maciej Plewka <maciej.plewka@intel.com>
This commit is contained in:
committed by
sys_ocldev
parent
0e85ccf084
commit
357fdc2e65
@@ -59,6 +59,8 @@ else()
|
||||
list(APPEND CORE_SOURCES ${CORE_SRCS_GENX_ALL_LINUX})
|
||||
endif()
|
||||
|
||||
message("${NEO_CORE_SRCS_BUILT_INS}")
|
||||
|
||||
append_sources_from_properties(CORE_SOURCES
|
||||
NEO_CORE_COMMAND_CONTAINER
|
||||
NEO_CORE_DEBUG_SETTINGS
|
||||
@@ -69,6 +71,9 @@ append_sources_from_properties(CORE_SOURCES
|
||||
NEO_CORE_HELPERS
|
||||
NEO_CORE_INDIRECT_HEAP
|
||||
NEO_CORE_SKU_INFO_BASE
|
||||
NEO_CORE_SRCS_BUILT_INS
|
||||
NEO_CORE_SRCS_BUILT_IN_KERNELS
|
||||
NEO_CORE_SRCS_BUILT_INS_OPS
|
||||
NEO_CORE_UTILITIES
|
||||
NEO_UNIFIED_MEMORY
|
||||
)
|
||||
|
||||
@@ -1,12 +1,46 @@
|
||||
#
|
||||
# Copyright (C) 2020 Intel Corporation
|
||||
# Copyright (C) 2017-2020 Intel Corporation
|
||||
#
|
||||
# SPDX-License-Identifier: MIT
|
||||
#
|
||||
|
||||
set(NEO_CORE_BUILT_INS
|
||||
set(NEO_CORE_SRCS_BUILT_INS
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/built_ins_storage.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/built_ins.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/built_ins.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/built_in_ops_base.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/sip.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/sip.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/sip_kernel_type.h
|
||||
)
|
||||
|
||||
set_property(GLOBAL PROPERTY NEO_CORE_BUILT_INS ${NEO_CORE_BUILT_INS})
|
||||
add_subdirectory(builtinops)
|
||||
|
||||
set_property(GLOBAL PROPERTY NEO_CORE_SRCS_BUILT_INS ${NEO_CORE_SRCS_BUILT_INS})
|
||||
|
||||
set(NEO_CORE_SRCS_BUILT_IN_KERNELS
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/kernels/aux_translation.builtin_kernel
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/kernels/copy_buffer_rect.builtin_kernel
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/kernels/copy_buffer_rect_stateless.builtin_kernel
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/kernels/copy_buffer_to_buffer.builtin_kernel
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/kernels/copy_buffer_to_buffer_stateless.builtin_kernel
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/kernels/copy_buffer_to_image3d.builtin_kernel
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/kernels/copy_buffer_to_image3d_stateless.builtin_kernel
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/kernels/copy_image3d_to_buffer.builtin_kernel
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/kernels/copy_image3d_to_buffer_stateless.builtin_kernel
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/kernels/copy_image_to_image1d.builtin_kernel
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/kernels/copy_image_to_image2d.builtin_kernel
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/kernels/copy_image_to_image3d.builtin_kernel
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/kernels/fill_buffer.builtin_kernel
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/kernels/fill_buffer_stateless.builtin_kernel
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/kernels/fill_image1d.builtin_kernel
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/kernels/fill_image2d.builtin_kernel
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/kernels/fill_image3d.builtin_kernel
|
||||
)
|
||||
|
||||
set_property(GLOBAL PROPERTY NEO_CORE_SRCS_BUILT_IN_KERNELS ${NEO_CORE_SRCS_BUILT_IN_KERNELS})
|
||||
|
||||
if(NOT (TARGET ${BUILTINS_BINARIES_LIB_NAME}))
|
||||
include(builtins_binary.cmake)
|
||||
endif()
|
||||
|
||||
37
shared/source/built_ins/built_in_ops_base.h
Normal file
37
shared/source/built_ins/built_in_ops_base.h
Normal file
@@ -0,0 +1,37 @@
|
||||
/*
|
||||
* Copyright (C) 2019-2020 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
#include <cstdint>
|
||||
|
||||
namespace NEO {
|
||||
namespace EBuiltInOps {
|
||||
|
||||
using Type = uint32_t;
|
||||
|
||||
constexpr Type AuxTranslation{0};
|
||||
constexpr Type CopyBufferToBuffer{1};
|
||||
constexpr Type CopyBufferToBufferStateless{2};
|
||||
constexpr Type CopyBufferRect{3};
|
||||
constexpr Type CopyBufferRectStateless{4};
|
||||
constexpr Type FillBuffer{5};
|
||||
constexpr Type FillBufferStateless{6};
|
||||
constexpr Type CopyBufferToImage3d{7};
|
||||
constexpr Type CopyBufferToImage3dStateless{8};
|
||||
constexpr Type CopyImage3dToBuffer{9};
|
||||
constexpr Type CopyImage3dToBufferStateless{10};
|
||||
constexpr Type CopyImageToImage1d{11};
|
||||
constexpr Type CopyImageToImage2d{12};
|
||||
constexpr Type CopyImageToImage3d{13};
|
||||
constexpr Type FillImage1d{14};
|
||||
constexpr Type FillImage2d{15};
|
||||
constexpr Type FillImage3d{16};
|
||||
|
||||
constexpr Type MaxBaseValue{16};
|
||||
constexpr Type COUNT{64};
|
||||
} // namespace EBuiltInOps
|
||||
} // namespace NEO
|
||||
69
shared/source/built_ins/built_ins.cpp
Normal file
69
shared/source/built_ins/built_ins.cpp
Normal file
@@ -0,0 +1,69 @@
|
||||
/*
|
||||
* Copyright (C) 2017-2020 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#include "shared/source/built_ins/built_ins.h"
|
||||
|
||||
#include "shared/source/built_ins/sip.h"
|
||||
#include "shared/source/compiler_interface/compiler_interface.h"
|
||||
#include "shared/source/helpers/basic_math.h"
|
||||
#include "shared/source/helpers/debug_helpers.h"
|
||||
|
||||
#include "opencl/source/helpers/built_ins_helper.h"
|
||||
#include "opencl/source/helpers/convert_color.h"
|
||||
#include "opencl/source/helpers/dispatch_info_builder.h"
|
||||
|
||||
#include "compiler_options.h"
|
||||
|
||||
#include <cstdint>
|
||||
#include <sstream>
|
||||
|
||||
namespace NEO {
|
||||
|
||||
BuiltIns::BuiltIns() {
|
||||
builtinsLib.reset(new BuiltinsLib());
|
||||
}
|
||||
|
||||
BuiltIns::~BuiltIns() = default;
|
||||
|
||||
const SipKernel &BuiltIns::getSipKernel(SipKernelType type, Device &device) {
|
||||
uint32_t kernelId = static_cast<uint32_t>(type);
|
||||
UNRECOVERABLE_IF(kernelId >= static_cast<uint32_t>(SipKernelType::COUNT));
|
||||
auto &sipBuiltIn = this->sipKernels[kernelId];
|
||||
|
||||
auto initializer = [&] {
|
||||
int retVal = 0;
|
||||
|
||||
std::vector<char> sipBinary;
|
||||
auto compilerInteface = device.getExecutionEnvironment()->getCompilerInterface();
|
||||
UNRECOVERABLE_IF(compilerInteface == nullptr);
|
||||
|
||||
auto ret = compilerInteface->getSipKernelBinary(device, type, sipBinary);
|
||||
|
||||
UNRECOVERABLE_IF(ret != TranslationOutput::ErrorCode::Success);
|
||||
UNRECOVERABLE_IF(sipBinary.size() == 0);
|
||||
auto program = createProgramForSip(*device.getExecutionEnvironment(),
|
||||
nullptr,
|
||||
sipBinary,
|
||||
sipBinary.size(),
|
||||
&retVal,
|
||||
&device);
|
||||
DEBUG_BREAK_IF(retVal != 0);
|
||||
UNRECOVERABLE_IF(program == nullptr);
|
||||
|
||||
program->setDevice(&device);
|
||||
|
||||
retVal = program->processGenBinary();
|
||||
DEBUG_BREAK_IF(retVal != 0);
|
||||
|
||||
sipBuiltIn.first.reset(new SipKernel(type, program));
|
||||
};
|
||||
std::call_once(sipBuiltIn.second, initializer);
|
||||
UNRECOVERABLE_IF(sipBuiltIn.first == nullptr);
|
||||
return *sipBuiltIn.first;
|
||||
}
|
||||
|
||||
} // namespace NEO
|
||||
201
shared/source/built_ins/built_ins.h
Normal file
201
shared/source/built_ins/built_ins.h
Normal file
@@ -0,0 +1,201 @@
|
||||
/*
|
||||
* Copyright (C) 2017-2020 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
#include "shared/source/built_ins/sip_kernel_type.h"
|
||||
#include "shared/source/helpers/debug_helpers.h"
|
||||
#include "shared/source/helpers/non_copyable_or_moveable.h"
|
||||
#include "shared/source/helpers/vec.h"
|
||||
|
||||
#include "built_in_ops.h"
|
||||
#include "compiler_options.h"
|
||||
|
||||
#include <array>
|
||||
#include <cstdint>
|
||||
#include <fstream>
|
||||
#include <memory>
|
||||
#include <mutex>
|
||||
#include <string>
|
||||
#include <tuple>
|
||||
#include <unordered_map>
|
||||
#include <vector>
|
||||
|
||||
namespace NEO {
|
||||
typedef std::vector<char> BuiltinResourceT;
|
||||
|
||||
class Context;
|
||||
class Device;
|
||||
class Kernel;
|
||||
struct KernelInfo;
|
||||
struct MultiDispatchInfo;
|
||||
class Program;
|
||||
class SchedulerKernel;
|
||||
class SipKernel;
|
||||
|
||||
static constexpr ConstStringRef mediaKernelsBuildOptionsList[] = {
|
||||
"-D cl_intel_device_side_advanced_vme_enable",
|
||||
"-D cl_intel_device_side_avc_vme_enable",
|
||||
"-D cl_intel_device_side_vme_enable",
|
||||
"-D cl_intel_media_block_io",
|
||||
CompilerOptions::fastRelaxedMath};
|
||||
|
||||
static constexpr CompilerOptions::ConstConcatenation<> mediaKernelsBuildOptions{mediaKernelsBuildOptionsList};
|
||||
|
||||
BuiltinResourceT createBuiltinResource(const char *ptr, size_t size);
|
||||
BuiltinResourceT createBuiltinResource(const BuiltinResourceT &r);
|
||||
std::string createBuiltinResourceName(EBuiltInOps::Type builtin, const std::string &extension,
|
||||
const std::string &platformName = "", uint32_t deviceRevId = 0);
|
||||
std::string joinPath(const std::string &lhs, const std::string &rhs);
|
||||
const char *getBuiltinAsString(EBuiltInOps::Type builtin);
|
||||
const char *getUnknownBuiltinAsString(EBuiltInOps::Type builtin);
|
||||
const char *getAdditionalBuiltinAsString(EBuiltInOps::Type builtin);
|
||||
|
||||
class Storage {
|
||||
public:
|
||||
Storage(const std::string &rootPath)
|
||||
: rootPath(rootPath) {
|
||||
}
|
||||
|
||||
virtual ~Storage() = default;
|
||||
|
||||
BuiltinResourceT load(const std::string &resourceName);
|
||||
|
||||
protected:
|
||||
virtual BuiltinResourceT loadImpl(const std::string &fullResourceName) = 0;
|
||||
|
||||
std::string rootPath;
|
||||
};
|
||||
|
||||
class FileStorage : public Storage {
|
||||
public:
|
||||
FileStorage(const std::string &rootPath = "")
|
||||
: Storage(rootPath) {
|
||||
}
|
||||
|
||||
protected:
|
||||
BuiltinResourceT loadImpl(const std::string &fullResourceName) override;
|
||||
};
|
||||
|
||||
struct EmbeddedStorageRegistry {
|
||||
static EmbeddedStorageRegistry &getInstance() {
|
||||
static EmbeddedStorageRegistry gsr;
|
||||
return gsr;
|
||||
}
|
||||
|
||||
void store(const std::string &name, BuiltinResourceT &&resource) {
|
||||
resources.emplace(name, BuiltinResourceT(std::move(resource)));
|
||||
}
|
||||
|
||||
const BuiltinResourceT *get(const std::string &name) const;
|
||||
|
||||
private:
|
||||
using ResourcesContainer = std::unordered_map<std::string, BuiltinResourceT>;
|
||||
ResourcesContainer resources;
|
||||
};
|
||||
|
||||
class EmbeddedStorage : public Storage {
|
||||
public:
|
||||
EmbeddedStorage(const std::string &rootPath)
|
||||
: Storage(rootPath) {
|
||||
}
|
||||
|
||||
protected:
|
||||
BuiltinResourceT loadImpl(const std::string &fullResourceName) override;
|
||||
};
|
||||
|
||||
struct BuiltinCode {
|
||||
enum class ECodeType {
|
||||
Any = 0, // for requesting "any" code available - priorities as below
|
||||
Binary = 1, // ISA - highest priority
|
||||
Intermediate = 2, // SPIR/LLVM - medium prioroty
|
||||
Source = 3, // OCL C - lowest priority
|
||||
COUNT,
|
||||
INVALID
|
||||
};
|
||||
|
||||
static const char *getExtension(ECodeType ct) {
|
||||
switch (ct) {
|
||||
default:
|
||||
return "";
|
||||
case ECodeType::Binary:
|
||||
return ".bin";
|
||||
case ECodeType::Intermediate:
|
||||
return ".bc";
|
||||
case ECodeType::Source:
|
||||
return ".cl";
|
||||
}
|
||||
}
|
||||
|
||||
ECodeType type;
|
||||
BuiltinResourceT resource;
|
||||
Device *targetDevice;
|
||||
};
|
||||
|
||||
class BuiltinsLib {
|
||||
public:
|
||||
BuiltinsLib();
|
||||
BuiltinCode getBuiltinCode(EBuiltInOps::Type builtin, BuiltinCode::ECodeType requestedCodeType, Device &device);
|
||||
|
||||
static std::unique_ptr<Program> createProgramFromCode(const BuiltinCode &bc, Device &device);
|
||||
|
||||
protected:
|
||||
BuiltinResourceT getBuiltinResource(EBuiltInOps::Type builtin, BuiltinCode::ECodeType requestedCodeType, Device &device);
|
||||
|
||||
using StoragesContainerT = std::vector<std::unique_ptr<Storage>>;
|
||||
StoragesContainerT allStorages; // sorted by priority allStorages[0] will be checked before allStorages[1], etc.
|
||||
|
||||
std::mutex mutex;
|
||||
};
|
||||
|
||||
struct BuiltInKernel {
|
||||
const char *pSource = nullptr;
|
||||
Program *pProgram = nullptr;
|
||||
std::once_flag programIsInitialized; // guard for creating+building the program
|
||||
Kernel *pKernel = nullptr;
|
||||
|
||||
BuiltInKernel() {
|
||||
}
|
||||
};
|
||||
|
||||
class BuiltinDispatchInfoBuilder;
|
||||
|
||||
class BuiltIns {
|
||||
public:
|
||||
std::pair<std::unique_ptr<BuiltinDispatchInfoBuilder>, std::once_flag> BuiltinOpsBuilders[static_cast<uint32_t>(EBuiltInOps::COUNT)];
|
||||
BuiltIns();
|
||||
virtual ~BuiltIns();
|
||||
|
||||
MOCKABLE_VIRTUAL const SipKernel &getSipKernel(SipKernelType type, Device &device);
|
||||
|
||||
BuiltinsLib &getBuiltinsLib() {
|
||||
DEBUG_BREAK_IF(!builtinsLib.get());
|
||||
return *builtinsLib;
|
||||
}
|
||||
|
||||
void setCacheingEnableState(bool enableCacheing) {
|
||||
this->enableCacheing = enableCacheing;
|
||||
}
|
||||
|
||||
bool isCacheingEnabled() const {
|
||||
return this->enableCacheing;
|
||||
}
|
||||
|
||||
protected:
|
||||
// sip builtins
|
||||
std::pair<std::unique_ptr<SipKernel>, std::once_flag> sipKernels[static_cast<uint32_t>(SipKernelType::COUNT)];
|
||||
|
||||
std::unique_ptr<BuiltinsLib> builtinsLib;
|
||||
|
||||
using ProgramsContainerT = std::array<std::pair<std::unique_ptr<Program>, std::once_flag>, static_cast<size_t>(EBuiltInOps::COUNT)>;
|
||||
ProgramsContainerT builtinPrograms;
|
||||
bool enableCacheing = true;
|
||||
};
|
||||
|
||||
template <EBuiltInOps::Type OpCode>
|
||||
class BuiltInOp;
|
||||
|
||||
} // namespace NEO
|
||||
221
shared/source/built_ins/built_ins_storage.cpp
Normal file
221
shared/source/built_ins/built_ins_storage.cpp
Normal file
@@ -0,0 +1,221 @@
|
||||
/*
|
||||
* Copyright (C) 2017-2020 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#include "shared/source/built_ins/built_ins.h"
|
||||
#include "shared/source/debug_settings/debug_settings_manager.h"
|
||||
#include "shared/source/device/device.h"
|
||||
|
||||
#include "opencl/source/built_ins/builtins_dispatch_builder.h"
|
||||
|
||||
#include "os_inc.h"
|
||||
|
||||
#include <cstdint>
|
||||
|
||||
namespace NEO {
|
||||
|
||||
const char *getBuiltinAsString(EBuiltInOps::Type builtin) {
|
||||
const char *builtinString = getAdditionalBuiltinAsString(builtin);
|
||||
if (builtinString) {
|
||||
return builtinString;
|
||||
}
|
||||
switch (builtin) {
|
||||
default:
|
||||
return getUnknownBuiltinAsString(builtin);
|
||||
case EBuiltInOps::AuxTranslation:
|
||||
return "aux_translation.builtin_kernel";
|
||||
case EBuiltInOps::CopyBufferToBuffer:
|
||||
return "copy_buffer_to_buffer.builtin_kernel";
|
||||
case EBuiltInOps::CopyBufferToBufferStateless:
|
||||
return "copy_buffer_to_buffer_stateless.builtin_kernel";
|
||||
case EBuiltInOps::CopyBufferRect:
|
||||
return "copy_buffer_rect.builtin_kernel";
|
||||
case EBuiltInOps::CopyBufferRectStateless:
|
||||
return "copy_buffer_rect_stateless.builtin_kernel";
|
||||
case EBuiltInOps::FillBuffer:
|
||||
return "fill_buffer.builtin_kernel";
|
||||
case EBuiltInOps::FillBufferStateless:
|
||||
return "fill_buffer_stateless.builtin_kernel";
|
||||
case EBuiltInOps::CopyBufferToImage3d:
|
||||
return "copy_buffer_to_image3d.builtin_kernel";
|
||||
case EBuiltInOps::CopyBufferToImage3dStateless:
|
||||
return "copy_buffer_to_image3d_stateless.builtin_kernel";
|
||||
case EBuiltInOps::CopyImage3dToBuffer:
|
||||
return "copy_image3d_to_buffer.builtin_kernel";
|
||||
case EBuiltInOps::CopyImage3dToBufferStateless:
|
||||
return "copy_image3d_to_buffer_stateless.builtin_kernel";
|
||||
case EBuiltInOps::CopyImageToImage1d:
|
||||
return "copy_image_to_image1d.builtin_kernel";
|
||||
case EBuiltInOps::CopyImageToImage2d:
|
||||
return "copy_image_to_image2d.builtin_kernel";
|
||||
case EBuiltInOps::CopyImageToImage3d:
|
||||
return "copy_image_to_image3d.builtin_kernel";
|
||||
case EBuiltInOps::FillImage1d:
|
||||
return "fill_image1d.builtin_kernel";
|
||||
case EBuiltInOps::FillImage2d:
|
||||
return "fill_image2d.builtin_kernel";
|
||||
case EBuiltInOps::FillImage3d:
|
||||
return "fill_image3d.builtin_kernel";
|
||||
};
|
||||
}
|
||||
|
||||
BuiltinResourceT createBuiltinResource(const char *ptr, size_t size) {
|
||||
return BuiltinResourceT(ptr, ptr + size);
|
||||
}
|
||||
|
||||
BuiltinResourceT createBuiltinResource(const BuiltinResourceT &r) {
|
||||
return BuiltinResourceT(r);
|
||||
}
|
||||
|
||||
std::string createBuiltinResourceName(EBuiltInOps::Type builtin, const std::string &extension,
|
||||
const std::string &platformName, uint32_t deviceRevId) {
|
||||
std::string ret;
|
||||
if (platformName.size() > 0) {
|
||||
ret = platformName;
|
||||
ret += "_" + std::to_string(deviceRevId);
|
||||
ret += "_";
|
||||
}
|
||||
|
||||
ret += getBuiltinAsString(builtin);
|
||||
|
||||
if (extension.size() > 0) {
|
||||
ret += extension;
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
std::string joinPath(const std::string &lhs, const std::string &rhs) {
|
||||
if (lhs.size() == 0) {
|
||||
return rhs;
|
||||
}
|
||||
|
||||
if (rhs.size() == 0) {
|
||||
return lhs;
|
||||
}
|
||||
|
||||
if (*lhs.rbegin() == PATH_SEPARATOR) {
|
||||
return lhs + rhs;
|
||||
}
|
||||
|
||||
return lhs + PATH_SEPARATOR + rhs;
|
||||
}
|
||||
|
||||
std::string getDriverInstallationPath() {
|
||||
return "";
|
||||
}
|
||||
|
||||
BuiltinResourceT Storage::load(const std::string &resourceName) {
|
||||
return loadImpl(joinPath(rootPath, resourceName));
|
||||
}
|
||||
|
||||
BuiltinResourceT FileStorage::loadImpl(const std::string &fullResourceName) {
|
||||
BuiltinResourceT ret;
|
||||
|
||||
std::ifstream f{fullResourceName, std::ios::in | std::ios::binary | std::ios::ate};
|
||||
auto end = f.tellg();
|
||||
f.seekg(0, std::ios::beg);
|
||||
auto beg = f.tellg();
|
||||
auto s = end - beg;
|
||||
ret.resize(static_cast<size_t>(s));
|
||||
f.read(ret.data(), s);
|
||||
return ret;
|
||||
}
|
||||
|
||||
const BuiltinResourceT *EmbeddedStorageRegistry::get(const std::string &name) const {
|
||||
auto it = resources.find(name);
|
||||
if (resources.end() == it) {
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
return &it->second;
|
||||
}
|
||||
|
||||
BuiltinResourceT EmbeddedStorage::loadImpl(const std::string &fullResourceName) {
|
||||
auto *constResource = EmbeddedStorageRegistry::getInstance().get(fullResourceName);
|
||||
if (constResource == nullptr) {
|
||||
BuiltinResourceT ret;
|
||||
return ret;
|
||||
}
|
||||
|
||||
return createBuiltinResource(*constResource);
|
||||
}
|
||||
|
||||
BuiltinsLib::BuiltinsLib() {
|
||||
allStorages.push_back(std::unique_ptr<Storage>(new EmbeddedStorage("")));
|
||||
allStorages.push_back(std::unique_ptr<Storage>(new FileStorage(getDriverInstallationPath())));
|
||||
}
|
||||
|
||||
BuiltinCode BuiltinsLib::getBuiltinCode(EBuiltInOps::Type builtin, BuiltinCode::ECodeType requestedCodeType, Device &device) {
|
||||
std::lock_guard<std::mutex> lockRaii{mutex};
|
||||
|
||||
BuiltinResourceT bc;
|
||||
BuiltinCode::ECodeType usedCodetType = BuiltinCode::ECodeType::INVALID;
|
||||
|
||||
if (requestedCodeType == BuiltinCode::ECodeType::Any) {
|
||||
uint32_t codeType = static_cast<uint32_t>(BuiltinCode::ECodeType::Binary);
|
||||
if (DebugManager.flags.RebuildPrecompiledKernels.get()) {
|
||||
codeType = static_cast<uint32_t>(BuiltinCode::ECodeType::Source);
|
||||
}
|
||||
for (uint32_t e = static_cast<uint32_t>(BuiltinCode::ECodeType::COUNT);
|
||||
codeType != e; ++codeType) {
|
||||
bc = getBuiltinResource(builtin, static_cast<BuiltinCode::ECodeType>(codeType), device);
|
||||
if (bc.size() > 0) {
|
||||
usedCodetType = static_cast<BuiltinCode::ECodeType>(codeType);
|
||||
break;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
bc = getBuiltinResource(builtin, requestedCodeType, device);
|
||||
usedCodetType = requestedCodeType;
|
||||
}
|
||||
|
||||
BuiltinCode ret;
|
||||
std::swap(ret.resource, bc);
|
||||
ret.type = usedCodetType;
|
||||
ret.targetDevice = &device;
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
std::unique_ptr<Program> BuiltinsLib::createProgramFromCode(const BuiltinCode &bc, Device &device) {
|
||||
std::unique_ptr<Program> ret;
|
||||
const char *data = bc.resource.data();
|
||||
size_t dataLen = bc.resource.size();
|
||||
cl_int err = 0;
|
||||
switch (bc.type) {
|
||||
default:
|
||||
break;
|
||||
case BuiltinCode::ECodeType::Source:
|
||||
case BuiltinCode::ECodeType::Intermediate:
|
||||
ret.reset(Program::create(data, nullptr, device, true, &err));
|
||||
break;
|
||||
case BuiltinCode::ECodeType::Binary:
|
||||
ret.reset(Program::createFromGenBinary(*device.getExecutionEnvironment(), nullptr, data, dataLen, true, nullptr, &device));
|
||||
break;
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
BuiltinResourceT BuiltinsLib::getBuiltinResource(EBuiltInOps::Type builtin, BuiltinCode::ECodeType requestedCodeType, Device &device) {
|
||||
BuiltinResourceT bc;
|
||||
std::string resourceNameGeneric = createBuiltinResourceName(builtin, BuiltinCode::getExtension(requestedCodeType));
|
||||
std::string resourceNameForPlatformType = createBuiltinResourceName(builtin, BuiltinCode::getExtension(requestedCodeType), getFamilyNameWithType(device.getHardwareInfo()));
|
||||
std::string resourceNameForPlatformTypeAndStepping = createBuiltinResourceName(builtin, BuiltinCode::getExtension(requestedCodeType), getFamilyNameWithType(device.getHardwareInfo()),
|
||||
device.getHardwareInfo().platform.usRevId);
|
||||
|
||||
for (auto &rn : {resourceNameForPlatformTypeAndStepping, resourceNameForPlatformType, resourceNameGeneric}) { // first look for dedicated version, only fallback to generic one
|
||||
for (auto &s : allStorages) {
|
||||
bc = s.get()->load(rn);
|
||||
if (bc.size() != 0) {
|
||||
return bc;
|
||||
}
|
||||
}
|
||||
}
|
||||
return bc;
|
||||
}
|
||||
|
||||
} // namespace NEO
|
||||
13
shared/source/built_ins/builtinops/CMakeLists.txt
Normal file
13
shared/source/built_ins/builtinops/CMakeLists.txt
Normal file
@@ -0,0 +1,13 @@
|
||||
#
|
||||
# Copyright (C) 2020 Intel Corporation
|
||||
#
|
||||
# SPDX-License-Identifier: MIT
|
||||
#
|
||||
|
||||
set(NEO_CORE_SRCS_BUILT_INS_OPS
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt
|
||||
${CMAKE_CURRENT_SOURCE_DIR}${BRANCH_DIR_SUFFIX}/built_in_ops.h
|
||||
)
|
||||
add_subdirectories()
|
||||
|
||||
set_property(GLOBAL PROPERTY NEO_CORE_SRCS_BUILT_INS_OPS ${NEO_CORE_SRCS_BUILT_INS_OPS})
|
||||
15
shared/source/built_ins/builtinops/built_in_ops.h
Normal file
15
shared/source/built_ins/builtinops/built_in_ops.h
Normal file
@@ -0,0 +1,15 @@
|
||||
/*
|
||||
* Copyright (C) 2019-2020 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
#include "shared/source/built_ins/built_in_ops_base.h"
|
||||
|
||||
namespace NEO {
|
||||
namespace EBuiltInOps {
|
||||
constexpr Type MaxCoreValue{MaxBaseValue};
|
||||
}
|
||||
} // namespace NEO
|
||||
71
shared/source/built_ins/builtins_binary.cmake
Normal file
71
shared/source/built_ins/builtins_binary.cmake
Normal file
@@ -0,0 +1,71 @@
|
||||
#
|
||||
# Copyright (C) 2018-2020 Intel Corporation
|
||||
#
|
||||
# SPDX-License-Identifier: MIT
|
||||
#
|
||||
|
||||
add_library(${BUILTINS_BINARIES_LIB_NAME} OBJECT EXCLUDE_FROM_ALL builtins_binary.cmake)
|
||||
|
||||
# Add builtins sources
|
||||
add_subdirectory(registry)
|
||||
|
||||
set(GENERATED_BUILTINS
|
||||
"aux_translation"
|
||||
"copy_buffer_rect"
|
||||
"copy_buffer_to_buffer"
|
||||
"copy_buffer_to_image3d"
|
||||
"copy_image3d_to_buffer"
|
||||
"copy_image_to_image1d"
|
||||
"copy_image_to_image2d"
|
||||
"copy_image_to_image3d"
|
||||
"fill_buffer"
|
||||
"fill_image1d"
|
||||
"fill_image2d"
|
||||
"fill_image3d"
|
||||
)
|
||||
|
||||
set(GENERATED_BUILTINS_STATELESS
|
||||
"copy_buffer_to_buffer_stateless"
|
||||
"copy_buffer_rect_stateless"
|
||||
"copy_buffer_to_image3d_stateless"
|
||||
"copy_image3d_to_buffer_stateless"
|
||||
"fill_buffer_stateless"
|
||||
)
|
||||
|
||||
# Generate builtins cpps
|
||||
if(COMPILE_BUILT_INS)
|
||||
add_subdirectory(kernels)
|
||||
endif()
|
||||
|
||||
macro(macro_for_each_gen)
|
||||
foreach(PLATFORM_TYPE ${PLATFORM_TYPES})
|
||||
get_family_name_with_type(${GEN_TYPE} ${PLATFORM_TYPE})
|
||||
foreach(GENERATED_BUILTIN ${GENERATED_BUILTINS})
|
||||
list(APPEND GENERATED_BUILTINS_CPPS ${BUILTINS_INCLUDE_DIR}/${RUNTIME_GENERATED_${GENERATED_BUILTIN}_${family_name_with_type}})
|
||||
endforeach()
|
||||
foreach(GENERATED_BUILTIN_STATELESS ${GENERATED_BUILTINS_STATELESS})
|
||||
list(APPEND GENERATED_BUILTINS_CPPS ${BUILTINS_INCLUDE_DIR}/${RUNTIME_GENERATED_${GENERATED_BUILTIN_STATELESS}_${family_name_with_type}})
|
||||
endforeach()
|
||||
endforeach()
|
||||
source_group("generated files\\${GEN_TYPE_LOWER}" FILES ${GENERATED_BUILTINS_CPPS})
|
||||
endmacro()
|
||||
|
||||
apply_macro_for_each_gen("SUPPORTED")
|
||||
|
||||
if(COMPILE_BUILT_INS)
|
||||
target_sources(${BUILTINS_BINARIES_LIB_NAME} PUBLIC ${GENERATED_BUILTINS_CPPS})
|
||||
set_source_files_properties(${GENERATED_BUILTINS_CPPS} PROPERTIES GENERATED TRUE)
|
||||
endif()
|
||||
|
||||
set_target_properties(${BUILTINS_BINARIES_LIB_NAME} PROPERTIES LINKER_LANGUAGE CXX)
|
||||
set_target_properties(${BUILTINS_BINARIES_LIB_NAME} PROPERTIES POSITION_INDEPENDENT_CODE ON)
|
||||
set_target_properties(${BUILTINS_BINARIES_LIB_NAME} PROPERTIES FOLDER "built_ins")
|
||||
|
||||
target_include_directories(${BUILTINS_BINARIES_LIB_NAME} PRIVATE
|
||||
${ENGINE_NODE_DIR}
|
||||
${KHRONOS_HEADERS_DIR}
|
||||
${KHRONOS_GL_HEADERS_DIR}
|
||||
${NEO__GMM_INCLUDE_DIR}
|
||||
${NEO__IGC_INCLUDE_DIR}
|
||||
${THIRD_PARTY_DIR}
|
||||
)
|
||||
106
shared/source/built_ins/kernels/CMakeLists.txt
Normal file
106
shared/source/built_ins/kernels/CMakeLists.txt
Normal file
@@ -0,0 +1,106 @@
|
||||
#
|
||||
# Copyright (C) 2017-2020 Intel Corporation
|
||||
#
|
||||
# SPDX-License-Identifier: MIT
|
||||
#
|
||||
|
||||
add_custom_target(builtins)
|
||||
set_target_properties(builtins PROPERTIES FOLDER "built_ins")
|
||||
set(BUILTINS_OUTDIR_WITH_ARCH "${TargetDir}/built_ins/${NEO_ARCH}")
|
||||
add_dependencies(${BUILTINS_BINARIES_LIB_NAME} builtins)
|
||||
add_subdirectories()
|
||||
set(GENERATED_BUILTINS ${GENERATED_BUILTINS} PARENT_SCOPE)
|
||||
set(GENERATED_BUILTINS_STATELESS ${GENERATED_BUILTINS_STATELESS} PARENT_SCOPE)
|
||||
|
||||
if("${NEO_ARCH}" STREQUAL "x32")
|
||||
set(BUILTIN_OPTIONS "-cl-intel-greater-than-4GB-buffer-required")
|
||||
else()
|
||||
set(BUILTIN_OPTIONS "")
|
||||
endif()
|
||||
|
||||
set(BUILTIN_OPTIONS_STATELESS
|
||||
"-cl-intel-greater-than-4GB-buffer-required"
|
||||
)
|
||||
|
||||
if("${CMAKE_BUILD_TYPE}" STREQUAL "Debug" )
|
||||
list(APPEND __cloc__options__ "-D DEBUG")
|
||||
endif()
|
||||
|
||||
set(BUILTINS_INCLUDE_DIR ${TargetDir} PARENT_SCOPE)
|
||||
set(BUILTIN_CPP "")
|
||||
|
||||
function(get_bits_for_stateless gen_type platform_type)
|
||||
# Force 32bits compiling on gen9lp for stateless builtins
|
||||
if( (${GEN_TYPE} STREQUAL "GEN9" ) AND ( ${PLATFORM_TYPE} STREQUAL "LP"))
|
||||
set(BITS "32" PARENT_SCOPE)
|
||||
else()
|
||||
set(BITS ${NEO_BITS} PARENT_SCOPE)
|
||||
endif()
|
||||
endfunction()
|
||||
|
||||
# Define function for compiling built-ins (with ocloc)
|
||||
function(compile_builtin gen_type platform_type builtin bits builtin_options)
|
||||
string(TOLOWER ${gen_type} gen_type_lower)
|
||||
get_family_name_with_type(${gen_type} ${platform_type})
|
||||
set(OUTPUTDIR "${BUILTINS_OUTDIR_WITH_ARCH}/${gen_type_lower}")
|
||||
# get filename
|
||||
set(FILENAME ${builtin})
|
||||
# get name of the file w/o extension
|
||||
get_filename_component(BASENAME ${builtin} NAME_WE)
|
||||
|
||||
set(OUTPUTPATH_BASE "${OUTPUTDIR}/${BASENAME}_${family_name_with_type}")
|
||||
set(OUTPUT_FILES
|
||||
${OUTPUTPATH_BASE}.spv
|
||||
${OUTPUTPATH_BASE}.bin
|
||||
${OUTPUTPATH_BASE}.cpp
|
||||
${OUTPUTPATH_BASE}.gen
|
||||
)
|
||||
|
||||
# function returns builtin cpp filename
|
||||
unset(BUILTIN_CPP)
|
||||
# set variable outside function
|
||||
set(BUILTIN_CPP built_ins/${NEO_ARCH}/${gen_type_lower}/${BASENAME}_${family_name_with_type}.cpp PARENT_SCOPE)
|
||||
if(WIN32)
|
||||
set(cloc_cmd_prefix ocloc)
|
||||
else()
|
||||
if(DEFINED NEO__IGC_LIBRARY_PATH)
|
||||
set(cloc_cmd_prefix LD_LIBRARY_PATH=${NEO__IGC_LIBRARY_PATH} $<TARGET_FILE:ocloc>)
|
||||
else()
|
||||
set(cloc_cmd_prefix LD_LIBRARY_PATH=$<TARGET_FILE_DIR:ocloc> $<TARGET_FILE:ocloc>)
|
||||
endif()
|
||||
endif()
|
||||
list(APPEND __cloc__options__ "-cl-kernel-arg-info")
|
||||
add_custom_command(
|
||||
OUTPUT ${OUTPUT_FILES}
|
||||
COMMAND ${cloc_cmd_prefix} -q -file ${FILENAME} -device ${DEFAULT_SUPPORTED_${gen_type}_${platform_type}_PLATFORM} ${builtin_options} -${bits} -out_dir ${OUTPUTDIR} -cpp_file -options "$<JOIN:${__cloc__options__}, >"
|
||||
WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}
|
||||
DEPENDS ${builtin} ocloc copy_compiler_files
|
||||
)
|
||||
endfunction()
|
||||
|
||||
macro(macro_for_each_gen)
|
||||
foreach(PLATFORM_TYPE ${PLATFORM_TYPES})
|
||||
if(${GEN_TYPE}_HAS_${PLATFORM_TYPE})
|
||||
get_family_name_with_type(${GEN_TYPE} ${PLATFORM_TYPE})
|
||||
string(TOLOWER ${PLATFORM_TYPE} PLATFORM_TYPE_LOWER)
|
||||
unset(BUILTINS_COMMANDS)
|
||||
foreach(GENERATED_BUILTIN ${GENERATED_BUILTINS})
|
||||
compile_builtin(${GEN_TYPE} ${PLATFORM_TYPE} ${GENERATED_BUILTIN}.builtin_kernel ${NEO_BITS} "${BUILTIN_OPTIONS}")
|
||||
list(APPEND BUILTINS_COMMANDS ${TargetDir}/${BUILTIN_CPP})
|
||||
set(RUNTIME_GENERATED_${GENERATED_BUILTIN}_${family_name_with_type} ${BUILTIN_CPP} PARENT_SCOPE)
|
||||
endforeach()
|
||||
get_bits_for_stateless(${GEN_TYPE} ${PLATFORM_TYPE})
|
||||
foreach(GENERATED_BUILTIN_STATELESS ${GENERATED_BUILTINS_STATELESS})
|
||||
compile_builtin(${GEN_TYPE} ${PLATFORM_TYPE} ${GENERATED_BUILTIN_STATELESS}.builtin_kernel ${BITS} "${BUILTIN_OPTIONS_STATELESS}")
|
||||
list(APPEND BUILTINS_COMMANDS ${TargetDir}/${BUILTIN_CPP})
|
||||
set(RUNTIME_GENERATED_${GENERATED_BUILTIN_STATELESS}_${family_name_with_type} ${BUILTIN_CPP} PARENT_SCOPE)
|
||||
endforeach()
|
||||
set(target_name builtins_${family_name_with_type})
|
||||
add_custom_target(${target_name} DEPENDS ${BUILTINS_COMMANDS})
|
||||
add_dependencies(builtins ${target_name})
|
||||
set_target_properties(${target_name} PROPERTIES FOLDER "opencl/source/built_ins/${family_name_with_type}")
|
||||
endif()
|
||||
endforeach()
|
||||
endmacro()
|
||||
|
||||
apply_macro_for_each_gen("SUPPORTED")
|
||||
@@ -0,0 +1,14 @@
|
||||
/*
|
||||
* Copyright (C) 2018-2020 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
R"===(
|
||||
__kernel void fullCopy(__global const uint* src, __global uint* dst) {
|
||||
unsigned int gid = get_global_id(0);
|
||||
uint4 loaded = vload4(gid, src);
|
||||
vstore4(loaded, gid, dst);
|
||||
}
|
||||
)==="
|
||||
@@ -0,0 +1,48 @@
|
||||
/*
|
||||
* Copyright (C) 2017-2020 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
R"===(
|
||||
//////////////////////////////////////////////////////////////////////////////
|
||||
__kernel void CopyBufferRectBytes2d(
|
||||
__global const char* src,
|
||||
__global char* dst,
|
||||
uint4 SrcOrigin,
|
||||
uint4 DstOrigin,
|
||||
uint2 SrcPitch,
|
||||
uint2 DstPitch )
|
||||
|
||||
{
|
||||
int x = get_global_id(0);
|
||||
int y = get_global_id(1);
|
||||
|
||||
uint LSrcOffset = x + SrcOrigin.x + ( ( y + SrcOrigin.y ) * SrcPitch.x );
|
||||
uint LDstOffset = x + DstOrigin.x + ( ( y + DstOrigin.y ) * DstPitch.x );
|
||||
|
||||
*( dst + LDstOffset ) = *( src + LSrcOffset );
|
||||
|
||||
}
|
||||
//////////////////////////////////////////////////////////////////////////////
|
||||
__kernel void CopyBufferRectBytes3d(
|
||||
__global const char* src,
|
||||
__global char* dst,
|
||||
uint4 SrcOrigin,
|
||||
uint4 DstOrigin,
|
||||
uint2 SrcPitch,
|
||||
uint2 DstPitch )
|
||||
|
||||
{
|
||||
int x = get_global_id(0);
|
||||
int y = get_global_id(1);
|
||||
int z = get_global_id(2);
|
||||
|
||||
uint LSrcOffset = x + SrcOrigin.x + ( ( y + SrcOrigin.y ) * SrcPitch.x ) + ( ( z + SrcOrigin.z ) * SrcPitch.y );
|
||||
uint LDstOffset = x + DstOrigin.x + ( ( y + DstOrigin.y ) * DstPitch.x ) + ( ( z + DstOrigin.z ) * DstPitch.y );
|
||||
|
||||
*( dst + LDstOffset ) = *( src + LSrcOffset );
|
||||
|
||||
}
|
||||
)==="
|
||||
@@ -0,0 +1,48 @@
|
||||
/*
|
||||
* Copyright (C) 2019-2020 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
R"===(
|
||||
//////////////////////////////////////////////////////////////////////////////
|
||||
__kernel void CopyBufferRectBytes2d(
|
||||
__global const char* src,
|
||||
__global char* dst,
|
||||
ulong4 SrcOrigin,
|
||||
ulong4 DstOrigin,
|
||||
ulong2 SrcPitch,
|
||||
ulong2 DstPitch )
|
||||
|
||||
{
|
||||
size_t x = get_global_id(0);
|
||||
size_t y = get_global_id(1);
|
||||
|
||||
size_t LSrcOffset = x + SrcOrigin.x + ( ( y + SrcOrigin.y ) * SrcPitch.x );
|
||||
size_t LDstOffset = x + DstOrigin.x + ( ( y + DstOrigin.y ) * DstPitch.x );
|
||||
|
||||
*( dst + LDstOffset ) = *( src + LSrcOffset );
|
||||
|
||||
}
|
||||
//////////////////////////////////////////////////////////////////////////////
|
||||
__kernel void CopyBufferRectBytes3d(
|
||||
__global const char* src,
|
||||
__global char* dst,
|
||||
ulong4 SrcOrigin,
|
||||
ulong4 DstOrigin,
|
||||
ulong2 SrcPitch,
|
||||
ulong2 DstPitch )
|
||||
|
||||
{
|
||||
size_t x = get_global_id(0);
|
||||
size_t y = get_global_id(1);
|
||||
size_t z = get_global_id(2);
|
||||
|
||||
size_t LSrcOffset = x + SrcOrigin.x + ( ( y + SrcOrigin.y ) * SrcPitch.x ) + ( ( z + SrcOrigin.z ) * SrcPitch.y );
|
||||
size_t LDstOffset = x + DstOrigin.x + ( ( y + DstOrigin.y ) * DstPitch.x ) + ( ( z + DstOrigin.z ) * DstPitch.y );
|
||||
|
||||
*( dst + LDstOffset ) = *( src + LSrcOffset );
|
||||
|
||||
}
|
||||
)==="
|
||||
@@ -0,0 +1,90 @@
|
||||
/*
|
||||
* Copyright (C) 2017-2020 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
R"===(
|
||||
__kernel void CopyBufferToBufferBytes(
|
||||
const __global uchar* pSrc,
|
||||
__global uchar* pDst,
|
||||
uint srcOffsetInBytes,
|
||||
uint dstOffsetInBytes,
|
||||
uint bytesToRead )
|
||||
{
|
||||
pSrc += ( srcOffsetInBytes + get_global_id(0) );
|
||||
pDst += ( dstOffsetInBytes + get_global_id(0) );
|
||||
pDst[ 0 ] = pSrc[ 0 ];
|
||||
}
|
||||
|
||||
__kernel void CopyBufferToBufferLeftLeftover(
|
||||
const __global uchar* pSrc,
|
||||
__global uchar* pDst,
|
||||
uint srcOffsetInBytes,
|
||||
uint dstOffsetInBytes)
|
||||
{
|
||||
unsigned int gid = get_global_id(0);
|
||||
pDst[ gid + dstOffsetInBytes ] = pSrc[ gid + srcOffsetInBytes ];
|
||||
}
|
||||
|
||||
__kernel void CopyBufferToBufferMiddle(
|
||||
const __global uint* pSrc,
|
||||
__global uint* pDst,
|
||||
uint srcOffsetInBytes,
|
||||
uint dstOffsetInBytes)
|
||||
{
|
||||
unsigned int gid = get_global_id(0);
|
||||
pDst += dstOffsetInBytes >> 2;
|
||||
pSrc += srcOffsetInBytes >> 2;
|
||||
uint4 loaded = vload4(gid, pSrc);
|
||||
vstore4(loaded, gid, pDst);
|
||||
}
|
||||
|
||||
__kernel void CopyBufferToBufferRightLeftover(
|
||||
const __global uchar* pSrc,
|
||||
__global uchar* pDst,
|
||||
uint srcOffsetInBytes,
|
||||
uint dstOffsetInBytes)
|
||||
{
|
||||
unsigned int gid = get_global_id(0);
|
||||
pDst[ gid + dstOffsetInBytes ] = pSrc[ gid + srcOffsetInBytes ];
|
||||
}
|
||||
|
||||
__kernel void copyBufferToBufferBytesSingle(__global uchar *dst, const __global uchar *src) {
|
||||
unsigned int gid = get_global_id(0);
|
||||
dst[gid] = (uchar)(src[gid]);
|
||||
}
|
||||
__kernel void CopyBufferToBufferSideRegion(
|
||||
__global uchar* pDst,
|
||||
const __global uchar* pSrc,
|
||||
unsigned int len,
|
||||
uint dstSshOffset, // Offset needed in case ptr has been adjusted for SSH alignment
|
||||
uint srcSshOffset // Offset needed in case ptr has been adjusted for SSH alignment
|
||||
)
|
||||
{
|
||||
unsigned int gid = get_global_id(0);
|
||||
__global uchar* pDstWithOffset = (__global uchar*)((__global uchar*)pDst + dstSshOffset);
|
||||
__global uchar* pSrcWithOffset = (__global uchar*)((__global uchar*)pSrc + srcSshOffset);
|
||||
if (gid < len) {
|
||||
pDstWithOffset[ gid ] = pSrcWithOffset[ gid ];
|
||||
}
|
||||
}
|
||||
|
||||
__kernel void CopyBufferToBufferMiddleRegion(
|
||||
__global uint* pDst,
|
||||
const __global uint* pSrc,
|
||||
unsigned int elems,
|
||||
uint dstSshOffset, // Offset needed in case ptr has been adjusted for SSH alignment
|
||||
uint srcSshOffset // Offset needed in case ptr has been adjusted for SSH alignment
|
||||
)
|
||||
{
|
||||
unsigned int gid = get_global_id(0);
|
||||
__global uint* pDstWithOffset = (__global uint*)((__global uchar*)pDst + dstSshOffset);
|
||||
__global uint* pSrcWithOffset = (__global uint*)((__global uchar*)pSrc + srcSshOffset);
|
||||
if (gid < elems) {
|
||||
uint4 loaded = vload4(gid, pSrcWithOffset);
|
||||
vstore4(loaded, gid, pDstWithOffset);
|
||||
}
|
||||
}
|
||||
)==="
|
||||
@@ -0,0 +1,54 @@
|
||||
/*
|
||||
* Copyright (C) 2019-2020 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
R"===(
|
||||
__kernel void CopyBufferToBufferBytes(
|
||||
const __global uchar* pSrc,
|
||||
__global uchar* pDst,
|
||||
ulong srcOffsetInBytes,
|
||||
ulong dstOffsetInBytes,
|
||||
ulong bytesToRead )
|
||||
{
|
||||
pSrc += ( srcOffsetInBytes + get_global_id(0) );
|
||||
pDst += ( dstOffsetInBytes + get_global_id(0) );
|
||||
pDst[ 0 ] = pSrc[ 0 ];
|
||||
}
|
||||
|
||||
__kernel void CopyBufferToBufferLeftLeftover(
|
||||
const __global uchar* pSrc,
|
||||
__global uchar* pDst,
|
||||
ulong srcOffsetInBytes,
|
||||
ulong dstOffsetInBytes)
|
||||
{
|
||||
size_t gid = get_global_id(0);
|
||||
pDst[ gid + dstOffsetInBytes ] = pSrc[ gid + srcOffsetInBytes ];
|
||||
}
|
||||
|
||||
__kernel void CopyBufferToBufferMiddle(
|
||||
const __global uint* pSrc,
|
||||
__global uint* pDst,
|
||||
ulong srcOffsetInBytes,
|
||||
ulong dstOffsetInBytes)
|
||||
{
|
||||
size_t gid = get_global_id(0);
|
||||
pDst += dstOffsetInBytes >> 2;
|
||||
pSrc += srcOffsetInBytes >> 2;
|
||||
uint4 loaded = vload4(gid, pSrc);
|
||||
vstore4(loaded, gid, pDst);
|
||||
}
|
||||
|
||||
__kernel void CopyBufferToBufferRightLeftover(
|
||||
const __global uchar* pSrc,
|
||||
__global uchar* pDst,
|
||||
ulong srcOffsetInBytes,
|
||||
ulong dstOffsetInBytes)
|
||||
{
|
||||
size_t gid = get_global_id(0);
|
||||
pDst[ gid + dstOffsetInBytes ] = pSrc[ gid + srcOffsetInBytes ];
|
||||
}
|
||||
|
||||
)==="
|
||||
@@ -0,0 +1,161 @@
|
||||
/*
|
||||
* Copyright (C) 2017-2020 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
R"===(
|
||||
#pragma OPENCL EXTENSION cl_khr_3d_image_writes : enable
|
||||
|
||||
__kernel void CopyBufferToImage3dBytes(__global uchar *src,
|
||||
__write_only image3d_t output,
|
||||
int srcOffset,
|
||||
int4 dstOffset,
|
||||
uint2 Pitch) {
|
||||
const uint x = get_global_id(0);
|
||||
const uint y = get_global_id(1);
|
||||
const uint z = get_global_id(2);
|
||||
|
||||
int4 dstCoord = (int4)(x, y, z, 0) + dstOffset;
|
||||
uint LOffset = srcOffset + (y * Pitch.x) + (z * Pitch.y);
|
||||
|
||||
write_imageui(output, dstCoord, (uint4)(*(src + LOffset + x), 0, 0, 1));
|
||||
}
|
||||
|
||||
__kernel void CopyBufferToImage3d2Bytes(__global uchar *src,
|
||||
__write_only image3d_t output,
|
||||
int srcOffset,
|
||||
int4 dstOffset,
|
||||
uint2 Pitch) {
|
||||
const uint x = get_global_id(0);
|
||||
const uint y = get_global_id(1);
|
||||
const uint z = get_global_id(2);
|
||||
|
||||
int4 dstCoord = (int4)(x, y, z, 0) + dstOffset;
|
||||
uint LOffset = srcOffset + (y * Pitch.x) + (z * Pitch.y);
|
||||
|
||||
uint4 c = (uint4)(0, 0, 0, 1);
|
||||
|
||||
if(( ulong )(src + srcOffset) & 0x00000001){
|
||||
ushort upper = *((__global uchar*)(src + LOffset + x * 2 + 1));
|
||||
ushort lower = *((__global uchar*)(src + LOffset + x * 2));
|
||||
ushort combined = (upper << 8) | lower;
|
||||
c.x = (uint)combined;
|
||||
}
|
||||
else{
|
||||
c.x = (uint)(*(__global ushort*)(src + LOffset + x * 2));
|
||||
}
|
||||
write_imageui(output, dstCoord, c);
|
||||
}
|
||||
|
||||
__kernel void CopyBufferToImage3d4Bytes(__global uchar *src,
|
||||
__write_only image3d_t output,
|
||||
int srcOffset,
|
||||
int4 dstOffset,
|
||||
uint2 Pitch) {
|
||||
const uint x = get_global_id(0);
|
||||
const uint y = get_global_id(1);
|
||||
const uint z = get_global_id(2);
|
||||
|
||||
int4 dstCoord = (int4)(x, y, z, 0) + dstOffset;
|
||||
uint LOffset = srcOffset + (y * Pitch.x) + (z * Pitch.y);
|
||||
|
||||
uint4 c = (uint4)(0, 0, 0, 1);
|
||||
|
||||
if(( ulong )(src + srcOffset) & 0x00000003){
|
||||
uint upper2 = *((__global uchar*)(src + LOffset + x * 4 + 3));
|
||||
uint upper = *((__global uchar*)(src + LOffset + x * 4 + 2));
|
||||
uint lower2 = *((__global uchar*)(src + LOffset + x * 4 + 1));
|
||||
uint lower = *((__global uchar*)(src + LOffset + x * 4));
|
||||
uint combined = (upper2 << 24) | (upper << 16) | (lower2 << 8) | lower;
|
||||
c.x = combined;
|
||||
}
|
||||
else{
|
||||
c.x = (*(__global uint*)(src + LOffset + x * 4));
|
||||
}
|
||||
write_imageui(output, dstCoord, c);
|
||||
}
|
||||
|
||||
__kernel void CopyBufferToImage3d8Bytes(__global uchar *src,
|
||||
__write_only image3d_t output,
|
||||
int srcOffset,
|
||||
int4 dstOffset,
|
||||
uint2 Pitch) {
|
||||
const uint x = get_global_id(0);
|
||||
const uint y = get_global_id(1);
|
||||
const uint z = get_global_id(2);
|
||||
|
||||
int4 dstCoord = (int4)(x, y, z, 0) + dstOffset;
|
||||
uint LOffset = srcOffset + (y * Pitch.x) + (z * Pitch.y);
|
||||
|
||||
uint2 c = (uint2)(0, 0);//*((__global uint2*)(src + LOffset + x * 8));
|
||||
|
||||
if(( ulong )(src + srcOffset) & 0x00000007){
|
||||
uint upper2 = *((__global uchar*)(src + LOffset + x * 8 + 3));
|
||||
uint upper = *((__global uchar*)(src + LOffset + x * 8 + 2));
|
||||
uint lower2 = *((__global uchar*)(src + LOffset + x * 8 + 1));
|
||||
uint lower = *((__global uchar*)(src + LOffset + x * 8));
|
||||
uint combined = (upper2 << 24) | (upper << 16) | (lower2 << 8) | lower;
|
||||
c.x = combined;
|
||||
upper2 = *((__global uchar*)(src + LOffset + x * 8 + 7));
|
||||
upper = *((__global uchar*)(src + LOffset + x * 8 + 6));
|
||||
lower2 = *((__global uchar*)(src + LOffset + x * 8 + 5));
|
||||
lower = *((__global uchar*)(src + LOffset + x * 8 + 4));
|
||||
combined = ((uint)upper2 << 24) | ((uint)upper << 16) | ((uint)lower2 << 8) | lower;
|
||||
c.y = combined;
|
||||
}
|
||||
else{
|
||||
c = *((__global uint2*)(src + LOffset + x * 8));
|
||||
}
|
||||
|
||||
write_imageui(output, dstCoord, (uint4)(c.x, c.y, 0, 1));
|
||||
}
|
||||
|
||||
__kernel void CopyBufferToImage3d16Bytes(__global uchar *src,
|
||||
__write_only image3d_t output,
|
||||
int srcOffset,
|
||||
int4 dstOffset,
|
||||
uint2 Pitch) {
|
||||
const uint x = get_global_id(0);
|
||||
const uint y = get_global_id(1);
|
||||
const uint z = get_global_id(2);
|
||||
|
||||
int4 dstCoord = (int4)(x, y, z, 0) + dstOffset;
|
||||
uint LOffset = srcOffset + (y * Pitch.x) + (z * Pitch.y);
|
||||
|
||||
uint4 c = (uint4)(0, 0, 0, 0);
|
||||
|
||||
if(( ulong )(src + srcOffset) & 0x0000000f){
|
||||
uint upper2 = *((__global uchar*)(src + LOffset + x * 16 + 3));
|
||||
uint upper = *((__global uchar*)(src + LOffset + x * 16 + 2));
|
||||
uint lower2 = *((__global uchar*)(src + LOffset + x * 16 + 1));
|
||||
uint lower = *((__global uchar*)(src + LOffset + x * 16));
|
||||
uint combined = (upper2 << 24) | (upper << 16) | (lower2 << 8) | lower;
|
||||
c.x = combined;
|
||||
upper2 = *((__global uchar*)(src + LOffset + x * 16 + 7));
|
||||
upper = *((__global uchar*)(src + LOffset + x * 16 + 6));
|
||||
lower2 = *((__global uchar*)(src + LOffset + x * 16 + 5));
|
||||
lower = *((__global uchar*)(src + LOffset + x * 16 + 4));
|
||||
combined = (upper2 << 24) | (upper << 16) | (lower2 << 8) | lower;
|
||||
c.y = combined;
|
||||
upper2 = *((__global uchar*)(src + LOffset + x * 16 + 11));
|
||||
upper = *((__global uchar*)(src + LOffset + x * 16 + 10));
|
||||
lower2 = *((__global uchar*)(src + LOffset + x * 16 + 9));
|
||||
lower = *((__global uchar*)(src + LOffset + x * 16 + 8));
|
||||
combined = (upper2 << 24) | (upper << 16) | (lower2 << 8) | lower;
|
||||
c.z = combined;
|
||||
upper2 = *((__global uchar*)(src + LOffset + x * 16 + 15));
|
||||
upper = *((__global uchar*)(src + LOffset + x * 16 + 14));
|
||||
lower2 = *((__global uchar*)(src + LOffset + x * 16 + 13));
|
||||
lower = *((__global uchar*)(src + LOffset + x * 16 + 12));
|
||||
combined = (upper2 << 24) | (upper << 16) | (lower2 << 8) | lower;
|
||||
c.w = combined;
|
||||
}
|
||||
else{
|
||||
c = *((__global uint4 *)(src + LOffset + x * 16));
|
||||
}
|
||||
|
||||
write_imageui(output, dstCoord, c);
|
||||
}
|
||||
)==="
|
||||
@@ -0,0 +1,161 @@
|
||||
/*
|
||||
* Copyright (C) 2017-2020 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
R"===(
|
||||
#pragma OPENCL EXTENSION cl_khr_3d_image_writes : enable
|
||||
|
||||
__kernel void CopyBufferToImage3dBytes(__global uchar *src,
|
||||
__write_only image3d_t output,
|
||||
ulong srcOffset,
|
||||
int4 dstOffset,
|
||||
ulong2 Pitch) {
|
||||
const uint x = get_global_id(0);
|
||||
const uint y = get_global_id(1);
|
||||
const uint z = get_global_id(2);
|
||||
|
||||
int4 dstCoord = (int4)(x, y, z, 0) + dstOffset;
|
||||
ulong LOffset = srcOffset + (y * Pitch.x) + (z * Pitch.y);
|
||||
|
||||
write_imageui(output, dstCoord, (uint4)(*(src + LOffset + x), 0, 0, 1));
|
||||
}
|
||||
|
||||
__kernel void CopyBufferToImage3d2Bytes(__global uchar *src,
|
||||
__write_only image3d_t output,
|
||||
ulong srcOffset,
|
||||
int4 dstOffset,
|
||||
ulong2 Pitch) {
|
||||
const uint x = get_global_id(0);
|
||||
const uint y = get_global_id(1);
|
||||
const uint z = get_global_id(2);
|
||||
|
||||
int4 dstCoord = (int4)(x, y, z, 0) + dstOffset;
|
||||
ulong LOffset = srcOffset + (y * Pitch.x) + (z * Pitch.y);
|
||||
|
||||
uint4 c = (uint4)(0, 0, 0, 1);
|
||||
|
||||
if(( ulong )(src + srcOffset) & 0x00000001){
|
||||
ushort upper = *((__global uchar*)(src + LOffset + x * 2 + 1));
|
||||
ushort lower = *((__global uchar*)(src + LOffset + x * 2));
|
||||
ushort combined = (upper << 8) | lower;
|
||||
c.x = (uint)combined;
|
||||
}
|
||||
else{
|
||||
c.x = (uint)(*(__global ushort*)(src + LOffset + x * 2));
|
||||
}
|
||||
write_imageui(output, dstCoord, c);
|
||||
}
|
||||
|
||||
__kernel void CopyBufferToImage3d4Bytes(__global uchar *src,
|
||||
__write_only image3d_t output,
|
||||
ulong srcOffset,
|
||||
int4 dstOffset,
|
||||
ulong2 Pitch) {
|
||||
const uint x = get_global_id(0);
|
||||
const uint y = get_global_id(1);
|
||||
const uint z = get_global_id(2);
|
||||
|
||||
int4 dstCoord = (int4)(x, y, z, 0) + dstOffset;
|
||||
ulong LOffset = srcOffset + (y * Pitch.x) + (z * Pitch.y);
|
||||
|
||||
uint4 c = (uint4)(0, 0, 0, 1);
|
||||
|
||||
if(( ulong )(src + srcOffset) & 0x00000003){
|
||||
uint upper2 = *((__global uchar*)(src + LOffset + x * 4 + 3));
|
||||
uint upper = *((__global uchar*)(src + LOffset + x * 4 + 2));
|
||||
uint lower2 = *((__global uchar*)(src + LOffset + x * 4 + 1));
|
||||
uint lower = *((__global uchar*)(src + LOffset + x * 4));
|
||||
uint combined = (upper2 << 24) | (upper << 16) | (lower2 << 8) | lower;
|
||||
c.x = combined;
|
||||
}
|
||||
else{
|
||||
c.x = (*(__global uint*)(src + LOffset + x * 4));
|
||||
}
|
||||
write_imageui(output, dstCoord, c);
|
||||
}
|
||||
|
||||
__kernel void CopyBufferToImage3d8Bytes(__global uchar *src,
|
||||
__write_only image3d_t output,
|
||||
ulong srcOffset,
|
||||
int4 dstOffset,
|
||||
ulong2 Pitch) {
|
||||
const uint x = get_global_id(0);
|
||||
const uint y = get_global_id(1);
|
||||
const uint z = get_global_id(2);
|
||||
|
||||
int4 dstCoord = (int4)(x, y, z, 0) + dstOffset;
|
||||
ulong LOffset = srcOffset + (y * Pitch.x) + (z * Pitch.y);
|
||||
|
||||
uint2 c = (uint2)(0, 0);//*((__global uint2*)(src + LOffset + x * 8));
|
||||
|
||||
if(( ulong )(src + srcOffset) & 0x00000007){
|
||||
uint upper2 = *((__global uchar*)(src + LOffset + x * 8 + 3));
|
||||
uint upper = *((__global uchar*)(src + LOffset + x * 8 + 2));
|
||||
uint lower2 = *((__global uchar*)(src + LOffset + x * 8 + 1));
|
||||
uint lower = *((__global uchar*)(src + LOffset + x * 8));
|
||||
uint combined = (upper2 << 24) | (upper << 16) | (lower2 << 8) | lower;
|
||||
c.x = combined;
|
||||
upper2 = *((__global uchar*)(src + LOffset + x * 8 + 7));
|
||||
upper = *((__global uchar*)(src + LOffset + x * 8 + 6));
|
||||
lower2 = *((__global uchar*)(src + LOffset + x * 8 + 5));
|
||||
lower = *((__global uchar*)(src + LOffset + x * 8 + 4));
|
||||
combined = ((uint)upper2 << 24) | ((uint)upper << 16) | ((uint)lower2 << 8) | lower;
|
||||
c.y = combined;
|
||||
}
|
||||
else{
|
||||
c = *((__global uint2*)(src + LOffset + x * 8));
|
||||
}
|
||||
|
||||
write_imageui(output, dstCoord, (uint4)(c.x, c.y, 0, 1));
|
||||
}
|
||||
|
||||
__kernel void CopyBufferToImage3d16Bytes(__global uchar *src,
|
||||
__write_only image3d_t output,
|
||||
ulong srcOffset,
|
||||
int4 dstOffset,
|
||||
ulong2 Pitch) {
|
||||
const uint x = get_global_id(0);
|
||||
const uint y = get_global_id(1);
|
||||
const uint z = get_global_id(2);
|
||||
|
||||
int4 dstCoord = (int4)(x, y, z, 0) + dstOffset;
|
||||
ulong LOffset = srcOffset + (y * Pitch.x) + (z * Pitch.y);
|
||||
|
||||
uint4 c = (uint4)(0, 0, 0, 0);
|
||||
|
||||
if(( ulong )(src + srcOffset) & 0x0000000f){
|
||||
uint upper2 = *((__global uchar*)(src + LOffset + x * 16 + 3));
|
||||
uint upper = *((__global uchar*)(src + LOffset + x * 16 + 2));
|
||||
uint lower2 = *((__global uchar*)(src + LOffset + x * 16 + 1));
|
||||
uint lower = *((__global uchar*)(src + LOffset + x * 16));
|
||||
uint combined = (upper2 << 24) | (upper << 16) | (lower2 << 8) | lower;
|
||||
c.x = combined;
|
||||
upper2 = *((__global uchar*)(src + LOffset + x * 16 + 7));
|
||||
upper = *((__global uchar*)(src + LOffset + x * 16 + 6));
|
||||
lower2 = *((__global uchar*)(src + LOffset + x * 16 + 5));
|
||||
lower = *((__global uchar*)(src + LOffset + x * 16 + 4));
|
||||
combined = (upper2 << 24) | (upper << 16) | (lower2 << 8) | lower;
|
||||
c.y = combined;
|
||||
upper2 = *((__global uchar*)(src + LOffset + x * 16 + 11));
|
||||
upper = *((__global uchar*)(src + LOffset + x * 16 + 10));
|
||||
lower2 = *((__global uchar*)(src + LOffset + x * 16 + 9));
|
||||
lower = *((__global uchar*)(src + LOffset + x * 16 + 8));
|
||||
combined = (upper2 << 24) | (upper << 16) | (lower2 << 8) | lower;
|
||||
c.z = combined;
|
||||
upper2 = *((__global uchar*)(src + LOffset + x * 16 + 15));
|
||||
upper = *((__global uchar*)(src + LOffset + x * 16 + 14));
|
||||
lower2 = *((__global uchar*)(src + LOffset + x * 16 + 13));
|
||||
lower = *((__global uchar*)(src + LOffset + x * 16 + 12));
|
||||
combined = (upper2 << 24) | (upper << 16) | (lower2 << 8) | lower;
|
||||
c.w = combined;
|
||||
}
|
||||
else{
|
||||
c = *((__global uint4 *)(src + LOffset + x * 16));
|
||||
}
|
||||
|
||||
write_imageui(output, dstCoord, c);
|
||||
}
|
||||
)==="
|
||||
@@ -0,0 +1,139 @@
|
||||
/*
|
||||
* Copyright (C) 2017-2020 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
R"===(
|
||||
__kernel void CopyImage3dToBufferBytes(__read_only image3d_t input,
|
||||
__global uchar *dst,
|
||||
int4 srcOffset,
|
||||
int dstOffset,
|
||||
uint2 Pitch) {
|
||||
const uint x = get_global_id(0);
|
||||
const uint y = get_global_id(1);
|
||||
const uint z = get_global_id(2);
|
||||
|
||||
const int4 srcCoord = (int4)(x, y, z, 0) + srcOffset;
|
||||
uint DstOffset = dstOffset + (y * Pitch.x) + (z * Pitch.y);
|
||||
|
||||
uint4 c = read_imageui(input, srcCoord);
|
||||
*(dst + DstOffset + x) = convert_uchar_sat(c.x);
|
||||
}
|
||||
|
||||
__kernel void CopyImage3dToBuffer2Bytes(__read_only image3d_t input,
|
||||
__global uchar *dst,
|
||||
int4 srcOffset,
|
||||
int dstOffset,
|
||||
uint2 Pitch) {
|
||||
const uint x = get_global_id(0);
|
||||
const uint y = get_global_id(1);
|
||||
const uint z = get_global_id(2);
|
||||
|
||||
const int4 srcCoord = (int4)(x, y, z, 0) + srcOffset;
|
||||
uint DstOffset = dstOffset + (y * Pitch.x) + (z * Pitch.y);
|
||||
|
||||
uint4 c = read_imageui(input, srcCoord);
|
||||
|
||||
if(( ulong )(dst + dstOffset) & 0x00000001){
|
||||
*((__global uchar*)(dst + DstOffset + x * 2 + 1)) = convert_uchar_sat((c.x >> 8 ) & 0xff);
|
||||
*((__global uchar*)(dst + DstOffset + x * 2)) = convert_uchar_sat(c.x & 0xff);
|
||||
}
|
||||
else{
|
||||
*((__global ushort*)(dst + DstOffset + x * 2)) = convert_ushort_sat(c.x);
|
||||
}
|
||||
}
|
||||
|
||||
__kernel void CopyImage3dToBuffer4Bytes(__read_only image3d_t input,
|
||||
__global uchar *dst,
|
||||
int4 srcOffset,
|
||||
int dstOffset,
|
||||
uint2 Pitch) {
|
||||
const uint x = get_global_id(0);
|
||||
const uint y = get_global_id(1);
|
||||
const uint z = get_global_id(2);
|
||||
|
||||
const int4 srcCoord = (int4)(x, y, z, 0) + srcOffset;
|
||||
uint DstOffset = dstOffset + (y * Pitch.x) + (z * Pitch.y);
|
||||
|
||||
uint4 c = read_imageui(input, srcCoord);
|
||||
|
||||
if(( ulong )(dst + dstOffset) & 0x00000003){
|
||||
*((__global uchar*)(dst + DstOffset + x * 4 + 3)) = convert_uchar_sat((c.x >> 24 ) & 0xff);
|
||||
*((__global uchar*)(dst + DstOffset + x * 4 + 2)) = convert_uchar_sat((c.x >> 16 ) & 0xff);
|
||||
*((__global uchar*)(dst + DstOffset + x * 4 + 1)) = convert_uchar_sat((c.x >> 8 ) & 0xff);
|
||||
*((__global uchar*)(dst + DstOffset + x * 4)) = convert_uchar_sat(c.x & 0xff);
|
||||
}
|
||||
else{
|
||||
*((__global uint*)(dst + DstOffset + x * 4)) = c.x;
|
||||
}
|
||||
}
|
||||
|
||||
__kernel void CopyImage3dToBuffer8Bytes(__read_only image3d_t input,
|
||||
__global uchar *dst,
|
||||
int4 srcOffset,
|
||||
int dstOffset,
|
||||
uint2 Pitch) {
|
||||
const uint x = get_global_id(0);
|
||||
const uint y = get_global_id(1);
|
||||
const uint z = get_global_id(2);
|
||||
|
||||
const int4 srcCoord = (int4)(x, y, z, 0) + srcOffset;
|
||||
uint DstOffset = dstOffset + (y * Pitch.x) + (z * Pitch.y);
|
||||
|
||||
uint4 c = read_imageui(input, srcCoord);
|
||||
|
||||
if(( ulong )(dst + dstOffset) & 0x00000007){
|
||||
*((__global uchar*)(dst + DstOffset + x * 8 + 3)) = convert_uchar_sat((c.x >> 24 ) & 0xff);
|
||||
*((__global uchar*)(dst + DstOffset + x * 8 + 2)) = convert_uchar_sat((c.x >> 16 ) & 0xff);
|
||||
*((__global uchar*)(dst + DstOffset + x * 8 + 1)) = convert_uchar_sat((c.x >> 8 ) & 0xff);
|
||||
*((__global uchar*)(dst + DstOffset + x * 8)) = convert_uchar_sat(c.x & 0xff);
|
||||
*((__global uchar*)(dst + DstOffset + x * 8 + 7)) = convert_uchar_sat((c.y >> 24 ) & 0xff);
|
||||
*((__global uchar*)(dst + DstOffset + x * 8 + 6)) = convert_uchar_sat((c.y >> 16 ) & 0xff);
|
||||
*((__global uchar*)(dst + DstOffset + x * 8 + 5)) = convert_uchar_sat((c.y >> 8 ) & 0xff);
|
||||
*((__global uchar*)(dst + DstOffset + x * 8 + 4)) = convert_uchar_sat(c.y & 0xff);
|
||||
}
|
||||
else{
|
||||
uint2 d = (uint2)(c.x,c.y);
|
||||
*((__global uint2*)(dst + DstOffset + x * 8)) = d;
|
||||
}
|
||||
}
|
||||
|
||||
__kernel void CopyImage3dToBuffer16Bytes(__read_only image3d_t input,
|
||||
__global uchar *dst,
|
||||
int4 srcOffset,
|
||||
int dstOffset,
|
||||
uint2 Pitch) {
|
||||
const uint x = get_global_id(0);
|
||||
const uint y = get_global_id(1);
|
||||
const uint z = get_global_id(2);
|
||||
|
||||
const int4 srcCoord = (int4)(x, y, z, 0) + srcOffset;
|
||||
uint DstOffset = dstOffset + (y * Pitch.x) + (z * Pitch.y);
|
||||
|
||||
const uint4 c = read_imageui(input, srcCoord);
|
||||
|
||||
if(( ulong )(dst + dstOffset) & 0x0000000f){
|
||||
*((__global uchar*)(dst + DstOffset + x * 16 + 3)) = convert_uchar_sat((c.x >> 24 ) & 0xff);
|
||||
*((__global uchar*)(dst + DstOffset + x * 16 + 2)) = convert_uchar_sat((c.x >> 16 ) & 0xff);
|
||||
*((__global uchar*)(dst + DstOffset + x * 16 + 1)) = convert_uchar_sat((c.x >> 8 ) & 0xff);
|
||||
*((__global uchar*)(dst + DstOffset + x * 16)) = convert_uchar_sat(c.x & 0xff);
|
||||
*((__global uchar*)(dst + DstOffset + x * 16 + 7)) = convert_uchar_sat((c.y >> 24 ) & 0xff);
|
||||
*((__global uchar*)(dst + DstOffset + x * 16 + 6)) = convert_uchar_sat((c.y >> 16 ) & 0xff);
|
||||
*((__global uchar*)(dst + DstOffset + x * 16 + 5)) = convert_uchar_sat((c.y >> 8 ) & 0xff);
|
||||
*((__global uchar*)(dst + DstOffset + x * 16 + 4)) = convert_uchar_sat(c.y & 0xff);
|
||||
*((__global uchar*)(dst + DstOffset + x * 16 + 11)) = convert_uchar_sat((c.z >> 24 ) & 0xff);
|
||||
*((__global uchar*)(dst + DstOffset + x * 16 + 10)) = convert_uchar_sat((c.z >> 16 ) & 0xff);
|
||||
*((__global uchar*)(dst + DstOffset + x * 16 + 9)) = convert_uchar_sat((c.z >> 8 ) & 0xff);
|
||||
*((__global uchar*)(dst + DstOffset + x * 16 + 8)) = convert_uchar_sat(c.z & 0xff);
|
||||
*((__global uchar*)(dst + DstOffset + x * 16 + 15)) = convert_uchar_sat((c.w >> 24 ) & 0xff);
|
||||
*((__global uchar*)(dst + DstOffset + x * 16 + 14)) = convert_uchar_sat((c.w >> 16 ) & 0xff);
|
||||
*((__global uchar*)(dst + DstOffset + x * 16 + 13)) = convert_uchar_sat((c.w >> 8 ) & 0xff);
|
||||
*((__global uchar*)(dst + DstOffset + x * 16 + 12)) = convert_uchar_sat(c.w & 0xff);
|
||||
}
|
||||
else{
|
||||
*(__global uint4*)(dst + DstOffset + x * 16) = c;
|
||||
}
|
||||
}
|
||||
)==="
|
||||
@@ -0,0 +1,139 @@
|
||||
/*
|
||||
* Copyright (C) 2017-2020 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
R"===(
|
||||
__kernel void CopyImage3dToBufferBytes(__read_only image3d_t input,
|
||||
__global uchar *dst,
|
||||
int4 srcOffset,
|
||||
ulong dstOffset,
|
||||
ulong2 Pitch) {
|
||||
const uint x = get_global_id(0);
|
||||
const uint y = get_global_id(1);
|
||||
const uint z = get_global_id(2);
|
||||
|
||||
const int4 srcCoord = (int4)(x, y, z, 0) + srcOffset;
|
||||
ulong DstOffset = dstOffset + (y * Pitch.x) + (z * Pitch.y);
|
||||
|
||||
uint4 c = read_imageui(input, srcCoord);
|
||||
*(dst + DstOffset + x) = convert_uchar_sat(c.x);
|
||||
}
|
||||
|
||||
__kernel void CopyImage3dToBuffer2Bytes(__read_only image3d_t input,
|
||||
__global uchar *dst,
|
||||
int4 srcOffset,
|
||||
ulong dstOffset,
|
||||
ulong2 Pitch) {
|
||||
const uint x = get_global_id(0);
|
||||
const uint y = get_global_id(1);
|
||||
const uint z = get_global_id(2);
|
||||
|
||||
const int4 srcCoord = (int4)(x, y, z, 0) + srcOffset;
|
||||
ulong DstOffset = dstOffset + (y * Pitch.x) + (z * Pitch.y);
|
||||
|
||||
uint4 c = read_imageui(input, srcCoord);
|
||||
|
||||
if(( ulong )(dst + dstOffset) & 0x00000001){
|
||||
*((__global uchar*)(dst + DstOffset + x * 2 + 1)) = convert_uchar_sat((c.x >> 8 ) & 0xff);
|
||||
*((__global uchar*)(dst + DstOffset + x * 2)) = convert_uchar_sat(c.x & 0xff);
|
||||
}
|
||||
else{
|
||||
*((__global ushort*)(dst + DstOffset + x * 2)) = convert_ushort_sat(c.x);
|
||||
}
|
||||
}
|
||||
|
||||
__kernel void CopyImage3dToBuffer4Bytes(__read_only image3d_t input,
|
||||
__global uchar *dst,
|
||||
int4 srcOffset,
|
||||
ulong dstOffset,
|
||||
ulong2 Pitch) {
|
||||
const uint x = get_global_id(0);
|
||||
const uint y = get_global_id(1);
|
||||
const uint z = get_global_id(2);
|
||||
|
||||
const int4 srcCoord = (int4)(x, y, z, 0) + srcOffset;
|
||||
ulong DstOffset = dstOffset + (y * Pitch.x) + (z * Pitch.y);
|
||||
|
||||
uint4 c = read_imageui(input, srcCoord);
|
||||
|
||||
if(( ulong )(dst + dstOffset) & 0x00000003){
|
||||
*((__global uchar*)(dst + DstOffset + x * 4 + 3)) = convert_uchar_sat((c.x >> 24 ) & 0xff);
|
||||
*((__global uchar*)(dst + DstOffset + x * 4 + 2)) = convert_uchar_sat((c.x >> 16 ) & 0xff);
|
||||
*((__global uchar*)(dst + DstOffset + x * 4 + 1)) = convert_uchar_sat((c.x >> 8 ) & 0xff);
|
||||
*((__global uchar*)(dst + DstOffset + x * 4)) = convert_uchar_sat(c.x & 0xff);
|
||||
}
|
||||
else{
|
||||
*((__global uint*)(dst + DstOffset + x * 4)) = c.x;
|
||||
}
|
||||
}
|
||||
|
||||
__kernel void CopyImage3dToBuffer8Bytes(__read_only image3d_t input,
|
||||
__global uchar *dst,
|
||||
int4 srcOffset,
|
||||
ulong dstOffset,
|
||||
ulong2 Pitch) {
|
||||
const uint x = get_global_id(0);
|
||||
const uint y = get_global_id(1);
|
||||
const uint z = get_global_id(2);
|
||||
|
||||
const int4 srcCoord = (int4)(x, y, z, 0) + srcOffset;
|
||||
ulong DstOffset = dstOffset + (y * Pitch.x) + (z * Pitch.y);
|
||||
|
||||
uint4 c = read_imageui(input, srcCoord);
|
||||
|
||||
if(( ulong )(dst + dstOffset) & 0x00000007){
|
||||
*((__global uchar*)(dst + DstOffset + x * 8 + 3)) = convert_uchar_sat((c.x >> 24 ) & 0xff);
|
||||
*((__global uchar*)(dst + DstOffset + x * 8 + 2)) = convert_uchar_sat((c.x >> 16 ) & 0xff);
|
||||
*((__global uchar*)(dst + DstOffset + x * 8 + 1)) = convert_uchar_sat((c.x >> 8 ) & 0xff);
|
||||
*((__global uchar*)(dst + DstOffset + x * 8)) = convert_uchar_sat(c.x & 0xff);
|
||||
*((__global uchar*)(dst + DstOffset + x * 8 + 7)) = convert_uchar_sat((c.y >> 24 ) & 0xff);
|
||||
*((__global uchar*)(dst + DstOffset + x * 8 + 6)) = convert_uchar_sat((c.y >> 16 ) & 0xff);
|
||||
*((__global uchar*)(dst + DstOffset + x * 8 + 5)) = convert_uchar_sat((c.y >> 8 ) & 0xff);
|
||||
*((__global uchar*)(dst + DstOffset + x * 8 + 4)) = convert_uchar_sat(c.y & 0xff);
|
||||
}
|
||||
else{
|
||||
uint2 d = (uint2)(c.x,c.y);
|
||||
*((__global uint2*)(dst + DstOffset + x * 8)) = d;
|
||||
}
|
||||
}
|
||||
|
||||
__kernel void CopyImage3dToBuffer16Bytes(__read_only image3d_t input,
|
||||
__global uchar *dst,
|
||||
int4 srcOffset,
|
||||
ulong dstOffset,
|
||||
ulong2 Pitch) {
|
||||
const uint x = get_global_id(0);
|
||||
const uint y = get_global_id(1);
|
||||
const uint z = get_global_id(2);
|
||||
|
||||
const int4 srcCoord = (int4)(x, y, z, 0) + srcOffset;
|
||||
ulong DstOffset = dstOffset + (y * Pitch.x) + (z * Pitch.y);
|
||||
|
||||
const uint4 c = read_imageui(input, srcCoord);
|
||||
|
||||
if(( ulong )(dst + dstOffset) & 0x0000000f){
|
||||
*((__global uchar*)(dst + DstOffset + x * 16 + 3)) = convert_uchar_sat((c.x >> 24 ) & 0xff);
|
||||
*((__global uchar*)(dst + DstOffset + x * 16 + 2)) = convert_uchar_sat((c.x >> 16 ) & 0xff);
|
||||
*((__global uchar*)(dst + DstOffset + x * 16 + 1)) = convert_uchar_sat((c.x >> 8 ) & 0xff);
|
||||
*((__global uchar*)(dst + DstOffset + x * 16)) = convert_uchar_sat(c.x & 0xff);
|
||||
*((__global uchar*)(dst + DstOffset + x * 16 + 7)) = convert_uchar_sat((c.y >> 24 ) & 0xff);
|
||||
*((__global uchar*)(dst + DstOffset + x * 16 + 6)) = convert_uchar_sat((c.y >> 16 ) & 0xff);
|
||||
*((__global uchar*)(dst + DstOffset + x * 16 + 5)) = convert_uchar_sat((c.y >> 8 ) & 0xff);
|
||||
*((__global uchar*)(dst + DstOffset + x * 16 + 4)) = convert_uchar_sat(c.y & 0xff);
|
||||
*((__global uchar*)(dst + DstOffset + x * 16 + 11)) = convert_uchar_sat((c.z >> 24 ) & 0xff);
|
||||
*((__global uchar*)(dst + DstOffset + x * 16 + 10)) = convert_uchar_sat((c.z >> 16 ) & 0xff);
|
||||
*((__global uchar*)(dst + DstOffset + x * 16 + 9)) = convert_uchar_sat((c.z >> 8 ) & 0xff);
|
||||
*((__global uchar*)(dst + DstOffset + x * 16 + 8)) = convert_uchar_sat(c.z & 0xff);
|
||||
*((__global uchar*)(dst + DstOffset + x * 16 + 15)) = convert_uchar_sat((c.w >> 24 ) & 0xff);
|
||||
*((__global uchar*)(dst + DstOffset + x * 16 + 14)) = convert_uchar_sat((c.w >> 16 ) & 0xff);
|
||||
*((__global uchar*)(dst + DstOffset + x * 16 + 13)) = convert_uchar_sat((c.w >> 8 ) & 0xff);
|
||||
*((__global uchar*)(dst + DstOffset + x * 16 + 12)) = convert_uchar_sat(c.w & 0xff);
|
||||
}
|
||||
else{
|
||||
*(__global uint4*)(dst + DstOffset + x * 16) = c;
|
||||
}
|
||||
}
|
||||
)==="
|
||||
@@ -0,0 +1,21 @@
|
||||
/*
|
||||
* Copyright (C) 2017-2020 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
R"===(
|
||||
__kernel void CopyImageToImage1d(
|
||||
__read_only image1d_t input,
|
||||
__write_only image1d_t output,
|
||||
int4 srcOffset,
|
||||
int4 dstOffset) {
|
||||
const int x = get_global_id(0);
|
||||
|
||||
const int srcCoord = x + srcOffset.x;
|
||||
const int dstCoord = x + dstOffset.x;
|
||||
const uint4 c = read_imageui(input, srcCoord);
|
||||
write_imageui(output, dstCoord, c);
|
||||
}
|
||||
)==="
|
||||
@@ -0,0 +1,22 @@
|
||||
/*
|
||||
* Copyright (C) 2017-2020 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
R"===(
|
||||
__kernel void CopyImageToImage2d(
|
||||
__read_only image2d_t input,
|
||||
__write_only image2d_t output,
|
||||
int4 srcOffset,
|
||||
int4 dstOffset) {
|
||||
const int x = get_global_id(0);
|
||||
const int y = get_global_id(1);
|
||||
|
||||
const int2 srcCoord = (int2)(x, y) + (int2)(srcOffset.x, srcOffset.y);
|
||||
const int2 dstCoord = (int2)(x, y) + (int2)(dstOffset.x, dstOffset.y);
|
||||
const uint4 c = read_imageui(input, srcCoord);
|
||||
write_imageui(output, dstCoord, c);
|
||||
}
|
||||
)==="
|
||||
@@ -0,0 +1,25 @@
|
||||
/*
|
||||
* Copyright (C) 2017-2020 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
R"===(
|
||||
#pragma OPENCL EXTENSION cl_khr_3d_image_writes : enable
|
||||
|
||||
__kernel void CopyImageToImage3d(
|
||||
__read_only image3d_t input,
|
||||
__write_only image3d_t output,
|
||||
int4 srcOffset,
|
||||
int4 dstOffset) {
|
||||
const int x = get_global_id(0);
|
||||
const int y = get_global_id(1);
|
||||
const int z = get_global_id(2);
|
||||
|
||||
const int4 srcCoord = (int4)(x, y, z, 0) + srcOffset;
|
||||
const int4 dstCoord = (int4)(x, y, z, 0) + dstOffset;
|
||||
const uint4 c = read_imageui(input, srcCoord);
|
||||
write_imageui(output, dstCoord, c);
|
||||
}
|
||||
)==="
|
||||
73
shared/source/built_ins/kernels/fill_buffer.builtin_kernel
Normal file
73
shared/source/built_ins/kernels/fill_buffer.builtin_kernel
Normal file
@@ -0,0 +1,73 @@
|
||||
/*
|
||||
* Copyright (C) 2017-2020 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
R"===(
|
||||
// assumption is local work size = pattern size
|
||||
__kernel void FillBufferBytes(
|
||||
__global uchar* pDst,
|
||||
uint dstOffsetInBytes,
|
||||
const __global uchar* pPattern )
|
||||
{
|
||||
uint dstIndex = get_global_id(0) + dstOffsetInBytes;
|
||||
uint srcIndex = get_local_id(0);
|
||||
pDst[dstIndex] = pPattern[srcIndex];
|
||||
}
|
||||
|
||||
__kernel void FillBufferLeftLeftover(
|
||||
__global uchar* pDst,
|
||||
uint dstOffsetInBytes,
|
||||
const __global uchar* pPattern,
|
||||
const uint patternSizeInEls )
|
||||
{
|
||||
uint gid = get_global_id(0);
|
||||
pDst[ gid + dstOffsetInBytes ] = pPattern[ gid & (patternSizeInEls - 1) ];
|
||||
}
|
||||
|
||||
__kernel void FillBufferMiddle(
|
||||
__global uchar* pDst,
|
||||
uint dstOffsetInBytes,
|
||||
const __global uint* pPattern,
|
||||
const uint patternSizeInEls )
|
||||
{
|
||||
uint gid = get_global_id(0);
|
||||
((__global uint*)(pDst + dstOffsetInBytes))[gid] = pPattern[ gid & (patternSizeInEls - 1) ];
|
||||
}
|
||||
|
||||
__kernel void FillBufferRightLeftover(
|
||||
__global uchar* pDst,
|
||||
uint dstOffsetInBytes,
|
||||
const __global uchar* pPattern,
|
||||
const uint patternSizeInEls )
|
||||
{
|
||||
uint gid = get_global_id(0);
|
||||
pDst[ gid + dstOffsetInBytes ] = pPattern[ gid & (patternSizeInEls - 1) ];
|
||||
}
|
||||
|
||||
__kernel void FillBufferImmediate(
|
||||
__global uchar* ptr,
|
||||
uint dstSshOffset, // Offset needed in case ptr has been adjusted for SSH alignment
|
||||
const uint value)
|
||||
{
|
||||
uint dstIndex = get_global_id(0);
|
||||
__global uchar* pDst = (__global uchar*)ptr + dstSshOffset;
|
||||
pDst[dstIndex] = value;
|
||||
}
|
||||
|
||||
__kernel void FillBufferSSHOffset(
|
||||
__global uchar* ptr,
|
||||
uint dstSshOffset, // Offset needed in case ptr has been adjusted for SSH alignment
|
||||
const __global uchar* pPattern,
|
||||
uint patternSshOffset // Offset needed in case pPattern has been adjusted for SSH alignment
|
||||
)
|
||||
{
|
||||
uint dstIndex = get_global_id(0);
|
||||
uint srcIndex = get_local_id(0);
|
||||
__global uchar* pDst = (__global uchar*)ptr + dstSshOffset;
|
||||
__global uchar* pSrc = (__global uchar*)pPattern + patternSshOffset;
|
||||
pDst[dstIndex] = pSrc[srcIndex];
|
||||
}
|
||||
)==="
|
||||
@@ -0,0 +1,49 @@
|
||||
/*
|
||||
* Copyright (C) 2019-2020 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
R"===(
|
||||
// assumption is local work size = pattern size
|
||||
__kernel void FillBufferBytes(
|
||||
__global uchar* pDst,
|
||||
ulong dstOffsetInBytes,
|
||||
const __global uchar* pPattern )
|
||||
{
|
||||
size_t dstIndex = get_global_id(0) + dstOffsetInBytes;
|
||||
size_t srcIndex = get_local_id(0);
|
||||
pDst[dstIndex] = pPattern[srcIndex];
|
||||
}
|
||||
|
||||
__kernel void FillBufferLeftLeftover(
|
||||
__global uchar* pDst,
|
||||
ulong dstOffsetInBytes,
|
||||
const __global uchar* pPattern,
|
||||
const ulong patternSizeInEls )
|
||||
{
|
||||
size_t gid = get_global_id(0);
|
||||
pDst[ gid + dstOffsetInBytes ] = pPattern[ gid & (patternSizeInEls - 1) ];
|
||||
}
|
||||
|
||||
__kernel void FillBufferMiddle(
|
||||
__global uchar* pDst,
|
||||
ulong dstOffsetInBytes,
|
||||
const __global uint* pPattern,
|
||||
const ulong patternSizeInEls )
|
||||
{
|
||||
size_t gid = get_global_id(0);
|
||||
((__global uint*)(pDst + dstOffsetInBytes))[gid] = pPattern[ gid & (patternSizeInEls - 1) ];
|
||||
}
|
||||
|
||||
__kernel void FillBufferRightLeftover(
|
||||
__global uchar* pDst,
|
||||
ulong dstOffsetInBytes,
|
||||
const __global uchar* pPattern,
|
||||
const ulong patternSizeInEls )
|
||||
{
|
||||
size_t gid = get_global_id(0);
|
||||
pDst[ gid + dstOffsetInBytes ] = pPattern[ gid & (patternSizeInEls - 1) ];
|
||||
}
|
||||
)==="
|
||||
18
shared/source/built_ins/kernels/fill_image1d.builtin_kernel
Normal file
18
shared/source/built_ins/kernels/fill_image1d.builtin_kernel
Normal file
@@ -0,0 +1,18 @@
|
||||
/*
|
||||
* Copyright (C) 2017-2020 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
R"===(
|
||||
__kernel void FillImage1d(
|
||||
__write_only image1d_t output,
|
||||
uint4 color,
|
||||
int4 dstOffset) {
|
||||
const int x = get_global_id(0);
|
||||
|
||||
const int dstCoord = x + dstOffset.x;
|
||||
write_imageui(output, dstCoord, color);
|
||||
}
|
||||
)==="
|
||||
19
shared/source/built_ins/kernels/fill_image2d.builtin_kernel
Normal file
19
shared/source/built_ins/kernels/fill_image2d.builtin_kernel
Normal file
@@ -0,0 +1,19 @@
|
||||
/*
|
||||
* Copyright (C) 2017-2020 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
R"===(
|
||||
__kernel void FillImage2d(
|
||||
__write_only image2d_t output,
|
||||
uint4 color,
|
||||
int4 dstOffset) {
|
||||
const int x = get_global_id(0);
|
||||
const int y = get_global_id(1);
|
||||
|
||||
const int2 dstCoord = (int2)(x, y) + (int2)(dstOffset.x, dstOffset.y);
|
||||
write_imageui(output, dstCoord, color);
|
||||
}
|
||||
)==="
|
||||
22
shared/source/built_ins/kernels/fill_image3d.builtin_kernel
Normal file
22
shared/source/built_ins/kernels/fill_image3d.builtin_kernel
Normal file
@@ -0,0 +1,22 @@
|
||||
/*
|
||||
* Copyright (C) 2017-2020 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
R"===(
|
||||
#pragma OPENCL EXTENSION cl_khr_3d_image_writes : enable
|
||||
|
||||
__kernel void FillImage3d(
|
||||
__write_only image3d_t output,
|
||||
uint4 color,
|
||||
int4 dstOffset) {
|
||||
const int x = get_global_id(0);
|
||||
const int y = get_global_id(1);
|
||||
const int z = get_global_id(2);
|
||||
|
||||
const int4 dstCoord = (int4)(x, y, z, 0) + dstOffset;
|
||||
write_imageui(output, dstCoord, color);
|
||||
}
|
||||
)==="
|
||||
25
shared/source/built_ins/registry/CMakeLists.txt
Normal file
25
shared/source/built_ins/registry/CMakeLists.txt
Normal file
@@ -0,0 +1,25 @@
|
||||
#
|
||||
# Copyright (C) 2017-2020 Intel Corporation
|
||||
#
|
||||
# SPDX-License-Identifier: MIT
|
||||
#
|
||||
|
||||
add_library(${BUILTINS_SOURCES_LIB_NAME} OBJECT EXCLUDE_FROM_ALL
|
||||
CMakeLists.txt
|
||||
built_ins_registry.h
|
||||
register_copy_kernels_source.cpp
|
||||
)
|
||||
|
||||
set_target_properties(${BUILTINS_SOURCES_LIB_NAME} PROPERTIES POSITION_INDEPENDENT_CODE ON)
|
||||
set_target_properties(${BUILTINS_SOURCES_LIB_NAME} PROPERTIES FOLDER "built_ins")
|
||||
|
||||
target_include_directories(${BUILTINS_SOURCES_LIB_NAME} PRIVATE
|
||||
${ENGINE_NODE_DIR}
|
||||
${KHRONOS_HEADERS_DIR}
|
||||
${KHRONOS_GL_HEADERS_DIR}
|
||||
${NEO__GMM_INCLUDE_DIR}
|
||||
${NEO__IGC_INCLUDE_DIR}
|
||||
${THIRD_PARTY_DIR}
|
||||
)
|
||||
|
||||
add_subdirectories()
|
||||
28
shared/source/built_ins/registry/built_ins_registry.h
Normal file
28
shared/source/built_ins/registry/built_ins_registry.h
Normal file
@@ -0,0 +1,28 @@
|
||||
/*
|
||||
* Copyright (C) 2017-2020 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "shared/source/built_ins/built_ins.h"
|
||||
|
||||
#include <string>
|
||||
#include <unordered_map>
|
||||
|
||||
namespace NEO {
|
||||
|
||||
struct RegisterEmbeddedResource {
|
||||
RegisterEmbeddedResource(const char *name, const char *resource, size_t resourceLength) {
|
||||
auto &storageRegistry = EmbeddedStorageRegistry::getInstance();
|
||||
storageRegistry.store(name, createBuiltinResource(resource, resourceLength));
|
||||
}
|
||||
|
||||
RegisterEmbeddedResource(const char *name, std::string &&resource)
|
||||
: RegisterEmbeddedResource(name, resource.data(), resource.size() + 1) {
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace NEO
|
||||
@@ -0,0 +1,167 @@
|
||||
/*
|
||||
* Copyright (C) 2017-2020 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#include "shared/source/built_ins/registry/built_ins_registry.h"
|
||||
|
||||
#include <string>
|
||||
|
||||
namespace NEO {
|
||||
|
||||
static RegisterEmbeddedResource registerCopyBufferToBufferSrc(
|
||||
createBuiltinResourceName(
|
||||
EBuiltInOps::CopyBufferToBuffer,
|
||||
BuiltinCode::getExtension(BuiltinCode::ECodeType::Source))
|
||||
.c_str(),
|
||||
std::string(
|
||||
#include "shared/source/built_ins/kernels/copy_buffer_to_buffer.builtin_kernel"
|
||||
));
|
||||
|
||||
static RegisterEmbeddedResource registerCopyBufferToBufferStatelessSrc(
|
||||
createBuiltinResourceName(
|
||||
EBuiltInOps::CopyBufferToBufferStateless,
|
||||
BuiltinCode::getExtension(BuiltinCode::ECodeType::Source))
|
||||
.c_str(),
|
||||
std::string(
|
||||
#include "shared/source/built_ins/kernels/copy_buffer_to_buffer_stateless.builtin_kernel"
|
||||
));
|
||||
|
||||
static RegisterEmbeddedResource registerCopyBufferRectSrc(
|
||||
createBuiltinResourceName(
|
||||
EBuiltInOps::CopyBufferRect,
|
||||
BuiltinCode::getExtension(BuiltinCode::ECodeType::Source))
|
||||
.c_str(),
|
||||
std::string(
|
||||
#include "shared/source/built_ins/kernels/copy_buffer_rect.builtin_kernel"
|
||||
));
|
||||
|
||||
static RegisterEmbeddedResource registerCopyBufferRectStatelessSrc(
|
||||
createBuiltinResourceName(
|
||||
EBuiltInOps::CopyBufferRectStateless,
|
||||
BuiltinCode::getExtension(BuiltinCode::ECodeType::Source))
|
||||
.c_str(),
|
||||
std::string(
|
||||
#include "shared/source/built_ins/kernels/copy_buffer_rect_stateless.builtin_kernel"
|
||||
));
|
||||
|
||||
static RegisterEmbeddedResource registerFillBufferSrc(
|
||||
createBuiltinResourceName(
|
||||
EBuiltInOps::FillBuffer,
|
||||
BuiltinCode::getExtension(BuiltinCode::ECodeType::Source))
|
||||
.c_str(),
|
||||
std::string(
|
||||
#include "shared/source/built_ins/kernels/fill_buffer.builtin_kernel"
|
||||
));
|
||||
|
||||
static RegisterEmbeddedResource registerFillBufferStatelessSrc(
|
||||
createBuiltinResourceName(
|
||||
EBuiltInOps::FillBufferStateless,
|
||||
BuiltinCode::getExtension(BuiltinCode::ECodeType::Source))
|
||||
.c_str(),
|
||||
std::string(
|
||||
#include "shared/source/built_ins/kernels/fill_buffer_stateless.builtin_kernel"
|
||||
));
|
||||
|
||||
static RegisterEmbeddedResource registerCopyBufferToImage3dSrc(
|
||||
createBuiltinResourceName(
|
||||
EBuiltInOps::CopyBufferToImage3d,
|
||||
BuiltinCode::getExtension(BuiltinCode::ECodeType::Source))
|
||||
.c_str(),
|
||||
std::string(
|
||||
#include "shared/source/built_ins/kernels/copy_buffer_to_image3d.builtin_kernel"
|
||||
));
|
||||
|
||||
static RegisterEmbeddedResource registerCopyBufferToImage3dStatelessSrc(
|
||||
createBuiltinResourceName(
|
||||
EBuiltInOps::CopyBufferToImage3dStateless,
|
||||
BuiltinCode::getExtension(BuiltinCode::ECodeType::Source))
|
||||
.c_str(),
|
||||
std::string(
|
||||
#include "shared/source/built_ins/kernels/copy_buffer_to_image3d_stateless.builtin_kernel"
|
||||
));
|
||||
|
||||
static RegisterEmbeddedResource registerCopyImage3dToBufferSrc(
|
||||
createBuiltinResourceName(
|
||||
EBuiltInOps::CopyImage3dToBuffer,
|
||||
BuiltinCode::getExtension(BuiltinCode::ECodeType::Source))
|
||||
.c_str(),
|
||||
std::string(
|
||||
#include "shared/source/built_ins/kernels/copy_image3d_to_buffer.builtin_kernel"
|
||||
));
|
||||
|
||||
static RegisterEmbeddedResource registerCopyImage3dToBufferStatelessSrc(
|
||||
createBuiltinResourceName(
|
||||
EBuiltInOps::CopyImage3dToBufferStateless,
|
||||
BuiltinCode::getExtension(BuiltinCode::ECodeType::Source))
|
||||
.c_str(),
|
||||
std::string(
|
||||
#include "shared/source/built_ins/kernels/copy_image3d_to_buffer_stateless.builtin_kernel"
|
||||
));
|
||||
|
||||
static RegisterEmbeddedResource registerCopyImageToImage1dSrc(
|
||||
createBuiltinResourceName(
|
||||
EBuiltInOps::CopyImageToImage1d,
|
||||
BuiltinCode::getExtension(BuiltinCode::ECodeType::Source))
|
||||
.c_str(),
|
||||
std::string(
|
||||
#include "shared/source/built_ins/kernels/copy_image_to_image1d.builtin_kernel"
|
||||
));
|
||||
|
||||
static RegisterEmbeddedResource registerCopyImageToImage2dSrc(
|
||||
createBuiltinResourceName(
|
||||
EBuiltInOps::CopyImageToImage2d,
|
||||
BuiltinCode::getExtension(BuiltinCode::ECodeType::Source))
|
||||
.c_str(),
|
||||
std::string(
|
||||
#include "shared/source/built_ins/kernels/copy_image_to_image2d.builtin_kernel"
|
||||
));
|
||||
|
||||
static RegisterEmbeddedResource registerCopyImageToImage3dSrc(
|
||||
createBuiltinResourceName(
|
||||
EBuiltInOps::CopyImageToImage3d,
|
||||
BuiltinCode::getExtension(BuiltinCode::ECodeType::Source))
|
||||
.c_str(),
|
||||
std::string(
|
||||
#include "shared/source/built_ins/kernels/copy_image_to_image3d.builtin_kernel"
|
||||
));
|
||||
|
||||
static RegisterEmbeddedResource registerFillImage1dSrc(
|
||||
createBuiltinResourceName(
|
||||
EBuiltInOps::FillImage1d,
|
||||
BuiltinCode::getExtension(BuiltinCode::ECodeType::Source))
|
||||
.c_str(),
|
||||
std::string(
|
||||
#include "shared/source/built_ins/kernels/fill_image1d.builtin_kernel"
|
||||
));
|
||||
|
||||
static RegisterEmbeddedResource registerFillImage2dSrc(
|
||||
createBuiltinResourceName(
|
||||
EBuiltInOps::FillImage2d,
|
||||
BuiltinCode::getExtension(BuiltinCode::ECodeType::Source))
|
||||
.c_str(),
|
||||
std::string(
|
||||
#include "shared/source/built_ins/kernels/fill_image2d.builtin_kernel"
|
||||
));
|
||||
|
||||
static RegisterEmbeddedResource registerFillImage3dSrc(
|
||||
createBuiltinResourceName(
|
||||
EBuiltInOps::FillImage3d,
|
||||
BuiltinCode::getExtension(BuiltinCode::ECodeType::Source))
|
||||
.c_str(),
|
||||
std::string(
|
||||
#include "shared/source/built_ins/kernels/fill_image3d.builtin_kernel"
|
||||
));
|
||||
|
||||
static RegisterEmbeddedResource registerAuxTranslationSrc(
|
||||
createBuiltinResourceName(
|
||||
EBuiltInOps::AuxTranslation,
|
||||
BuiltinCode::getExtension(BuiltinCode::ECodeType::Source))
|
||||
.c_str(),
|
||||
std::string(
|
||||
#include "shared/source/built_ins/kernels/aux_translation.builtin_kernel"
|
||||
));
|
||||
|
||||
} // namespace NEO
|
||||
102
shared/source/built_ins/sip.cpp
Normal file
102
shared/source/built_ins/sip.cpp
Normal file
@@ -0,0 +1,102 @@
|
||||
/*
|
||||
* Copyright (C) 2017-2020 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#include "shared/source/built_ins/sip.h"
|
||||
|
||||
#include "shared/source/built_ins/built_ins.h"
|
||||
#include "shared/source/device/device.h"
|
||||
#include "shared/source/execution_environment/execution_environment.h"
|
||||
#include "shared/source/helpers/debug_helpers.h"
|
||||
#include "shared/source/helpers/hw_helper.h"
|
||||
#include "shared/source/helpers/ptr_math.h"
|
||||
#include "shared/source/helpers/string.h"
|
||||
#include "shared/source/memory_manager/graphics_allocation.h"
|
||||
|
||||
#include "opencl/source/program/kernel_info.h"
|
||||
#include "opencl/source/program/program.h"
|
||||
|
||||
namespace NEO {
|
||||
|
||||
const size_t SipKernel::maxDbgSurfaceSize = 0x49c000; // proper value should be taken from compiler when it's ready
|
||||
|
||||
const char *getSipKernelCompilerInternalOptions(SipKernelType kernel) {
|
||||
switch (kernel) {
|
||||
default:
|
||||
DEBUG_BREAK_IF(true);
|
||||
return "";
|
||||
case SipKernelType::Csr:
|
||||
return "-cl-include-sip-csr";
|
||||
case SipKernelType::DbgCsr:
|
||||
return "-cl-include-sip-kernel-debug -cl-include-sip-csr -cl-set-bti:0";
|
||||
case SipKernelType::DbgCsrLocal:
|
||||
return "-cl-include-sip-kernel-local-debug -cl-include-sip-csr -cl-set-bti:0";
|
||||
}
|
||||
}
|
||||
|
||||
const char *getSipLlSrc(const Device &device) {
|
||||
#define M_DUMMY_LL_SRC \
|
||||
"define void @f() { \n" \
|
||||
" ret void \n" \
|
||||
"} \n" \
|
||||
"!opencl.compiler.options = !{!0} \n" \
|
||||
"!opencl.kernels = !{!1} \n" \
|
||||
"!0 = !{} \n" \
|
||||
"!1 = !{void()* @f, !2, !3, !4, !5, !6, !7} \n" \
|
||||
"!2 = !{!\"kernel_arg_addr_space\"} \n" \
|
||||
"!3 = !{!\"kernel_arg_access_qual\"} \n" \
|
||||
"!4 = !{!\"kernel_arg_type\"} \n" \
|
||||
"!5 = !{!\"kernel_arg_type_qual\"} \n" \
|
||||
"!6 = !{!\"kernel_arg_base_type\"} \n" \
|
||||
"!7 = !{!\"kernel_arg_name\"} \n"
|
||||
|
||||
constexpr const char *llDummySrc32 =
|
||||
"target datalayout = \"e-p:32:32:32\" \n"
|
||||
"target triple = \"spir\" \n" M_DUMMY_LL_SRC;
|
||||
|
||||
constexpr const char *llDummySrc64 =
|
||||
"target datalayout = \"e-p:64:64:64\" \n"
|
||||
"target triple = \"spir64\" \n" M_DUMMY_LL_SRC;
|
||||
|
||||
#undef M_DUMMY_LL_SRC
|
||||
|
||||
const uint32_t ptrSize = device.getDeviceInfo().force32BitAddressess ? 4 : sizeof(void *);
|
||||
|
||||
return (ptrSize == 8) ? llDummySrc64 : llDummySrc32;
|
||||
}
|
||||
|
||||
SipKernel::SipKernel(SipKernelType type, Program *sipProgram)
|
||||
: type(type) {
|
||||
program = sipProgram;
|
||||
}
|
||||
SipKernel::~SipKernel() {
|
||||
program->release();
|
||||
}
|
||||
|
||||
GraphicsAllocation *SipKernel::getSipAllocation() const {
|
||||
return program->getKernelInfo(size_t{0})->getGraphicsAllocation();
|
||||
}
|
||||
|
||||
const char *SipKernel::getBinary() const {
|
||||
auto kernelInfo = program->getKernelInfo(size_t{0});
|
||||
return reinterpret_cast<const char *>(ptrOffset(kernelInfo->heapInfo.pKernelHeap, kernelInfo->systemKernelOffset));
|
||||
}
|
||||
size_t SipKernel::getBinarySize() const {
|
||||
auto kernelInfo = program->getKernelInfo(size_t{0});
|
||||
return kernelInfo->heapInfo.pKernelHeader->KernelHeapSize - kernelInfo->systemKernelOffset;
|
||||
}
|
||||
|
||||
SipKernelType SipKernel::getSipKernelType(GFXCORE_FAMILY family, bool debuggingActive) {
|
||||
auto &hwHelper = HwHelper::get(family);
|
||||
return hwHelper.getSipKernelType(debuggingActive);
|
||||
}
|
||||
|
||||
GraphicsAllocation *SipKernel::getSipKernelAllocation(Device &device) {
|
||||
auto sipType = SipKernel::getSipKernelType(device.getHardwareInfo().platform.eRenderCoreFamily, device.isDebuggerActive());
|
||||
return device.getExecutionEnvironment()->getBuiltIns()->getSipKernel(sipType, device).getSipAllocation();
|
||||
}
|
||||
|
||||
} // namespace NEO
|
||||
51
shared/source/built_ins/sip.h
Normal file
51
shared/source/built_ins/sip.h
Normal file
@@ -0,0 +1,51 @@
|
||||
/*
|
||||
* Copyright (C) 2017-2020 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
#include "shared/source/built_ins/sip_kernel_type.h"
|
||||
#include "shared/source/helpers/hw_info.h"
|
||||
|
||||
#include <memory>
|
||||
|
||||
namespace NEO {
|
||||
|
||||
class Device;
|
||||
class Program;
|
||||
class GraphicsAllocation;
|
||||
|
||||
const char *getSipKernelCompilerInternalOptions(SipKernelType kernel);
|
||||
|
||||
const char *getSipLlSrc(const Device &device);
|
||||
|
||||
class SipKernel {
|
||||
public:
|
||||
SipKernel(SipKernelType type, Program *sipProgram);
|
||||
SipKernel(const SipKernel &) = delete;
|
||||
SipKernel &operator=(const SipKernel &) = delete;
|
||||
SipKernel(SipKernel &&) = default;
|
||||
SipKernel &operator=(SipKernel &&) = default;
|
||||
virtual ~SipKernel();
|
||||
|
||||
const char *getBinary() const;
|
||||
|
||||
size_t getBinarySize() const;
|
||||
|
||||
SipKernelType getType() const {
|
||||
return type;
|
||||
}
|
||||
|
||||
static const size_t maxDbgSurfaceSize;
|
||||
|
||||
MOCKABLE_VIRTUAL GraphicsAllocation *getSipAllocation() const;
|
||||
static SipKernelType getSipKernelType(GFXCORE_FAMILY family, bool debuggingActive);
|
||||
static GraphicsAllocation *getSipKernelAllocation(Device &device);
|
||||
|
||||
protected:
|
||||
SipKernelType type = SipKernelType::COUNT;
|
||||
Program *program = nullptr;
|
||||
};
|
||||
} // namespace NEO
|
||||
@@ -7,6 +7,7 @@
|
||||
|
||||
#include "shared/source/command_stream/command_stream_receiver.h"
|
||||
|
||||
#include "shared/source/built_ins/built_ins.h"
|
||||
#include "shared/source/command_stream/experimental_command_buffer.h"
|
||||
#include "shared/source/command_stream/preemption.h"
|
||||
#include "shared/source/command_stream/scratch_space_controller.h"
|
||||
@@ -26,8 +27,6 @@
|
||||
#include "shared/source/utilities/cpuintrinsics.h"
|
||||
#include "shared/source/utilities/tag_allocator.h"
|
||||
|
||||
#include "opencl/source/built_ins/built_ins.h"
|
||||
|
||||
namespace NEO {
|
||||
|
||||
// Global table of CommandStreamReceiver factories for HW and tests
|
||||
|
||||
@@ -7,10 +7,10 @@
|
||||
|
||||
#include "shared/source/command_stream/preemption.h"
|
||||
|
||||
#include "shared/source/built_ins/built_ins.h"
|
||||
#include "shared/source/device/device.h"
|
||||
#include "shared/source/helpers/string.h"
|
||||
|
||||
#include "opencl/source/built_ins/built_ins.h"
|
||||
#include "opencl/source/helpers/dispatch_info.h"
|
||||
#include "opencl/source/kernel/kernel.h"
|
||||
|
||||
|
||||
@@ -5,12 +5,12 @@
|
||||
*
|
||||
*/
|
||||
|
||||
#include "shared/source/built_ins/sip.h"
|
||||
#include "shared/source/command_stream/preemption.h"
|
||||
#include "shared/source/device/device.h"
|
||||
#include "shared/source/helpers/hw_helper.h"
|
||||
#include "shared/source/memory_manager/graphics_allocation.h"
|
||||
|
||||
#include "opencl/source/built_ins/sip.h"
|
||||
#include "opencl/source/command_queue/gpgpu_walker.h"
|
||||
|
||||
namespace NEO {
|
||||
|
||||
@@ -6,14 +6,13 @@
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
#include "shared/source/built_ins/sip.h"
|
||||
#include "shared/source/compiler_interface/compiler_cache.h"
|
||||
#include "shared/source/helpers/string.h"
|
||||
#include "shared/source/os_interface/os_library.h"
|
||||
#include "shared/source/utilities/arrayref.h"
|
||||
#include "shared/source/utilities/spinlock.h"
|
||||
|
||||
#include "opencl/source/built_ins/sip.h"
|
||||
|
||||
#include "cif/common/cif_main.h"
|
||||
#include "ocl_igc_interface/code_type.h"
|
||||
#include "ocl_igc_interface/fcl_ocl_device_ctx.h"
|
||||
|
||||
@@ -7,6 +7,7 @@
|
||||
|
||||
#include "shared/source/execution_environment/execution_environment.h"
|
||||
|
||||
#include "shared/source/built_ins/built_ins.h"
|
||||
#include "shared/source/compiler_interface/compiler_interface.h"
|
||||
#include "shared/source/compiler_interface/default_cache_config.h"
|
||||
#include "shared/source/debugger/debugger.h"
|
||||
@@ -14,7 +15,6 @@
|
||||
#include "shared/source/gmm_helper/gmm_helper.h"
|
||||
#include "shared/source/helpers/hw_helper.h"
|
||||
|
||||
#include "opencl/source/built_ins/built_ins.h"
|
||||
#include "opencl/source/memory_manager/os_agnostic_memory_manager.h"
|
||||
|
||||
namespace NEO {
|
||||
|
||||
@@ -5,12 +5,11 @@
|
||||
*
|
||||
*/
|
||||
|
||||
#include "shared/source/built_ins/built_ins.h"
|
||||
#include "shared/source/command_stream/csr_definitions.h"
|
||||
#include "shared/source/command_stream/preemption.h"
|
||||
#include "shared/source/command_stream/preemption.inl"
|
||||
|
||||
#include "opencl/source/built_ins/built_ins.h"
|
||||
|
||||
#include <cstring>
|
||||
|
||||
namespace NEO {
|
||||
|
||||
@@ -6,11 +6,11 @@
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
#include "shared/source/built_ins/sip.h"
|
||||
#include "shared/source/command_stream/linear_stream.h"
|
||||
#include "shared/source/helpers/aux_translation.h"
|
||||
#include "shared/source/helpers/hw_cmds.h"
|
||||
|
||||
#include "opencl/source/built_ins/sip.h"
|
||||
#include "opencl/source/gen_common/aub_mapper.h"
|
||||
#include "opencl/source/mem_obj/buffer.h"
|
||||
|
||||
|
||||
Reference in New Issue
Block a user