Initial commit

Change-Id: I4bf1707bd3dfeadf2c17b0a7daff372b1925ebbd
This commit is contained in:
Brandon Fliflet
2017-12-21 00:45:38 +01:00
commit 7e9ad41290
1350 changed files with 233156 additions and 0 deletions

242
runtime/device/device.cpp Normal file
View File

@@ -0,0 +1,242 @@
/*
* Copyright (c) 2017, Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
#include "runtime/command_stream/command_stream_receiver.h"
#include "runtime/command_stream/device_command_stream.h"
#include "hw_cmds.h"
#include "runtime/device/device.h"
#include "runtime/device/device_vector.h"
#include "runtime/helpers/debug_helpers.h"
#include "runtime/helpers/options.h"
#include "runtime/memory_manager/memory_manager.h"
#include "runtime/os_interface/os_time.h"
#include "runtime/device/driver_info.h"
#include <cstring>
#include <map>
namespace OCLRT {
decltype(&PerformanceCounters::create) Device::createPerformanceCountersFunc = PerformanceCounters::create;
DeviceVector::DeviceVector(const cl_device_id *devices,
cl_uint numDevices) {
for (cl_uint i = 0; i < numDevices; i++) {
this->push_back(castToObject<Device>(devices[i]));
}
}
void DeviceVector::toDeviceIDs(std::vector<cl_device_id> &devIDs) {
int i = 0;
devIDs.resize(this->size());
for (auto &it : *this) {
devIDs[i] = it;
i++;
}
}
CommandStreamReceiver *createCommandStream(const HardwareInfo *pHwInfo);
// Global table of hardware prefixes
const char *hardwarePrefix[IGFX_MAX_PRODUCT] = {
nullptr,
};
// Global table of family names
const char *familyName[IGFX_MAX_CORE] = {
nullptr,
};
// Global table of family names
bool familyEnabled[IGFX_MAX_CORE] = {
false,
};
Device::Device(const HardwareInfo &hwInfo,
bool isRootDevice)
: memoryManager(nullptr), enabledClVersion(false), hwInfo(hwInfo), isRoot(isRootDevice),
commandStreamReceiver(nullptr), tagAddress(nullptr), tagAllocation(nullptr),
osTime(nullptr), slmWindowStartAddress(nullptr) {
memset(&deviceInfo, 0, sizeof(deviceInfo));
deviceExtensions.reserve(1000);
preemptionMode = DebugManager.flags.ForcePreemptionMode.get() == 0
? hwInfo.capabilityTable.defaultPreemptionMode
: (PreemptionMode)DebugManager.flags.ForcePreemptionMode.get();
}
Device::~Device() {
DEBUG_BREAK_IF(nullptr == memoryManager);
if (memoryManager) {
memoryManager->freeGraphicsMemory(tagAllocation);
}
tagAllocation = nullptr;
if (performanceCounters) {
performanceCounters->shutdown();
}
delete commandStreamReceiver;
commandStreamReceiver = nullptr;
if (memoryManager) {
memoryManager->waitForDeletions();
}
delete memoryManager;
memoryManager = nullptr;
alignedFree(this->slmWindowStartAddress);
}
bool Device::createDeviceImpl(const HardwareInfo *pHwInfo,
bool isRootDevice, Device &outDevice) {
CommandStreamReceiver *commandStreamReceiver = createCommandStream(pHwInfo);
if (!commandStreamReceiver) {
return false;
}
outDevice.commandStreamReceiver = commandStreamReceiver;
if (!outDevice.memoryManager) {
outDevice.memoryManager = commandStreamReceiver->createMemoryManager(outDevice.deviceInfo.enabled64kbPages);
} else {
commandStreamReceiver->setMemoryManager(outDevice.memoryManager);
}
DEBUG_BREAK_IF(nullptr == outDevice.memoryManager);
outDevice.memoryManager->csr = commandStreamReceiver;
auto pTagAllocation = outDevice.memoryManager->allocateGraphicsMemory(
sizeof(uint32_t), sizeof(uint32_t));
if (!pTagAllocation) {
return false;
}
auto pTagMemory = reinterpret_cast<uint32_t *>(pTagAllocation->getUnderlyingBuffer());
// Initialize HW tag to a known value
*pTagMemory = DebugManager.flags.EnableNullHardware.get() ? -1 : initialHardwareTag;
commandStreamReceiver->setTagAllocation(pTagAllocation);
auto pDevice = &outDevice;
if (!pDevice->osTime) {
pDevice->osTime = OSTime::create(commandStreamReceiver->getOSInterface());
}
pDevice->driverInfo.reset(DriverInfo::create(commandStreamReceiver->getOSInterface()));
pDevice->memoryManager = outDevice.memoryManager;
pDevice->tagAddress = pTagMemory;
pDevice->initializeCaps();
pDevice->tagAllocation = pTagAllocation;
if (pDevice->osTime->getOSInterface()) {
if (pHwInfo->capabilityTable.instrumentationEnabled) {
pDevice->performanceCounters = createPerformanceCountersFunc(pDevice->osTime.get());
pDevice->performanceCounters->initialize(pHwInfo);
}
}
outDevice.memoryManager->setForce32BitAllocations(pDevice->getDeviceInfo().force32BitAddressess);
outDevice.memoryManager->device = pDevice;
if (pDevice->preemptionMode == PreemptionMode::MidThread) {
size_t requiredSize = pHwInfo->pSysInfo->CsrSizeInMb * MemoryConstants::megaByte;
size_t alignment = 256 * MemoryConstants::kiloByte;
auto preemptionAllocation = outDevice.memoryManager->allocateGraphicsMemory(requiredSize, alignment);
if (!preemptionAllocation) {
return false;
}
commandStreamReceiver->setPreemptionCsrAllocation(preemptionAllocation);
}
return true;
}
const HardwareInfo *Device::getDeviceInitHwInfo(const HardwareInfo *pHwInfoIn) {
return pHwInfoIn ? pHwInfoIn : platformDevices[0];
}
const HardwareInfo &Device::getHardwareInfo() const { return hwInfo; }
const WorkaroundTable *Device::getWaTable() const { return hwInfo.pWaTable; }
const DeviceInfo &Device::getDeviceInfo() const {
return deviceInfo;
}
DeviceInfo *Device::getMutableDeviceInfo() {
return &deviceInfo;
}
void *Device::getSLMWindowStartAddress() {
prepareSLMWindow();
return this->slmWindowStartAddress;
}
void Device::prepareSLMWindow() {
if (this->slmWindowStartAddress == nullptr) {
this->slmWindowStartAddress = alignedMalloc(MemoryConstants::slmWindowSize, MemoryConstants::slmWindowAlignment);
}
}
const char *Device::getProductAbbrev() const {
return hardwarePrefix[hwInfo.pPlatform->eProductFamily];
}
double Device::getProfilingTimerResolution() {
return osTime->getDynamicDeviceTimerResolution(hwInfo);
}
unsigned int Device::getSupportedClVersion() const {
return hwInfo.capabilityTable.clVersionSupport;
}
/* We hide the retain and release function of BaseObject. */
void Device::retain() {
DEBUG_BREAK_IF(!isValid());
/* According to CL spec, root devices are always available with
1 reference. Only subdevices need reference. */
if (!isRoot) {
BaseObject<_cl_device_id>::retain();
}
}
unique_ptr_if_unused<Device> Device::release() {
DEBUG_BREAK_IF(!isValid());
/* According to CL spec, root devices are always avaible with
1 reference. Only subdevices need reference. */
if (!isRoot) {
return BaseObject<_cl_device_id>::release();
}
return unique_ptr_if_unused<Device>(this, false);
}
bool Device::isSimulation() {
return hwInfo.capabilityTable.isSimulation(hwInfo.pPlatform->usDeviceID);
}
double Device::getPlatformHostTimerResolution() const {
if (osTime.get())
return osTime->getHostTimerResolution();
return 0.0;
}
GFXCORE_FAMILY Device::getRenderCoreFamily() const {
return this->getHardwareInfo().pPlatform->eRenderCoreFamily;
}
} // namespace OCLRT

182
runtime/device/device.h Normal file
View File

@@ -0,0 +1,182 @@
/*
* Copyright (c) 2017, Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
#pragma once
#include "runtime/api/cl_types.h"
#include "runtime/device/device_info_map.h"
#include "runtime/helpers/base_object.h"
#include "runtime/helpers/hw_info.h"
#include "runtime/os_interface/performance_counters.h"
#include <vector>
namespace OCLRT {
class CommandStreamReceiver;
class GraphicsAllocation;
class MemoryManager;
class OSTime;
class DriverInfo;
struct HardwareInfo;
template <>
struct OpenCLObjectMapper<_cl_device_id> {
typedef class Device DerivedType;
};
class Device : public BaseObject<_cl_device_id> {
protected:
MemoryManager *memoryManager;
public:
static const cl_ulong objectMagic = 0x8055832341AC8D08LL;
template <typename T>
static T *create(const HardwareInfo *pHwInfo,
bool isRootDevice = true) {
pHwInfo = getDeviceInitHwInfo(pHwInfo);
T *device = new T(*pHwInfo);
if (false == createDeviceImpl(pHwInfo, isRootDevice, *device)) {
delete device;
return nullptr;
}
return device;
}
Device &operator=(const Device &) = delete;
Device(const Device &) = delete;
~Device() override;
// API entry points
cl_int getDeviceInfo(cl_device_info paramName,
size_t paramValueSize,
void *paramValue,
size_t *paramValueSizeRet);
bool getDeviceAndHostTimer(uint64_t *deviceTimestamp, uint64_t *hostTimestamp) const;
bool getHostTimer(uint64_t *hostTimestamp) const;
// Helper functions
const HardwareInfo &getHardwareInfo() const;
const DeviceInfo &getDeviceInfo() const;
DeviceInfo *getMutableDeviceInfo();
MOCKABLE_VIRTUAL const WorkaroundTable *getWaTable() const;
void *getSLMWindowStartAddress();
void prepareSLMWindow();
void setForce32BitAddressing(bool value) {
deviceInfo.force32BitAddressess = value;
}
CommandStreamReceiver &getCommandStreamReceiver();
volatile uint32_t *getTagAddress() const;
const char *getProductAbbrev() const;
// This helper template is meant to simplify getDeviceInfo
template <cl_device_info Param>
void getCap(const void *&src,
size_t &size,
size_t &retSize);
template <cl_device_info Param>
void getStr(const void *&src,
size_t &size,
size_t &retSize);
MemoryManager *getMemoryManager() const;
/* We hide the retain and release function of BaseObject. */
void retain() override;
unique_ptr_if_unused<Device> release() override;
bool isRootDevice() const { return isRoot; }
OSTime *getOSTime() const { return osTime.get(); };
double getProfilingTimerResolution();
void increaseProgramCount() { programCount++; }
uint64_t getProgramCount() { return programCount; }
unsigned int getEnabledClVersion() const { return enabledClVersion; };
unsigned int getSupportedClVersion() const;
double getPlatformHostTimerResolution() const;
bool isSimulation();
void checkPriorityHints();
GFXCORE_FAMILY getRenderCoreFamily() const;
PerformanceCounters *getPerformanceCounters() { return performanceCounters.get(); }
static decltype(&PerformanceCounters::create) createPerformanceCountersFunc;
PreemptionMode getPreemptionMode() { return preemptionMode; }
MOCKABLE_VIRTUAL const WhitelistedRegisters &getWhitelistedRegisters() { return hwInfo.capabilityTable.whitelistedRegisters; }
std::vector<unsigned int> simultaneousInterops;
std::string deviceExtensions;
bool getEnabled64kbPages();
protected:
Device() = delete;
Device(const HardwareInfo &hwInfo,
bool isRootDevice = true);
static bool createDeviceImpl(const HardwareInfo *pHwInfo,
bool isRootDevice, Device &outDevice);
static const HardwareInfo *getDeviceInitHwInfo(const HardwareInfo *pHwInfoIn);
void initializeCaps();
void appendOSExtensions(std::string &deviceExtensions);
unsigned int enabledClVersion;
const HardwareInfo &hwInfo;
DeviceInfo deviceInfo;
const bool isRoot;
CommandStreamReceiver *commandStreamReceiver;
volatile uint32_t *tagAddress;
GraphicsAllocation *tagAllocation;
std::unique_ptr<OSTime> osTime;
std::unique_ptr<DriverInfo> driverInfo;
std::unique_ptr<PerformanceCounters> performanceCounters;
uint64_t programCount = 0u;
void *slmWindowStartAddress;
std::string exposedBuiltinKernels = "";
PreemptionMode preemptionMode;
};
template <cl_device_info Param>
inline void Device::getCap(const void *&src,
size_t &size,
size_t &retSize) {
src = &DeviceInfoTable::Map<Param>::getValue(deviceInfo);
retSize = size = DeviceInfoTable::Map<Param>::size;
}
inline CommandStreamReceiver &Device::getCommandStreamReceiver() {
return *commandStreamReceiver;
}
inline volatile uint32_t *Device::getTagAddress() const {
return tagAddress;
}
inline MemoryManager *Device::getMemoryManager() const {
return memoryManager;
}
} // namespace OCLRT

View File

@@ -0,0 +1,378 @@
/*
* Copyright (c) 2017, Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
#include "runtime/command_stream/command_stream_receiver.h"
#include <algorithm>
#include "runtime/device/device.h"
#include "runtime/helpers/basic_math.h"
#include "hw_info.h"
#include "runtime/helpers/options.h"
#include "runtime/memory_manager/memory_manager.h"
#include "runtime/sharings/sharing_factory.h"
#include "runtime/device/driver_info.h"
#include "CL/cl_ext_intel.h"
#include "runtime/os_interface/os_interface.h"
namespace OCLRT {
extern const char *familyName[];
static std::string name(128, '\0');
static std::string vendor = "Intel(R) Corporation";
static std::string profile = "FULL_PROFILE";
static std::string spirVersions = "1.2 ";
static const char *spirvVersion = "SPIR-V_1.0 ";
static std::string driverVersion = NEO_DRIVER_VERSION;
static const std::string deviceExtensionsList = "cl_khr_3d_image_writes "
"cl_khr_byte_addressable_store "
"cl_khr_fp16 "
"cl_khr_depth_images "
"cl_khr_global_int32_base_atomics "
"cl_khr_global_int32_extended_atomics "
"cl_khr_icd "
"cl_khr_image2d_from_buffer "
"cl_khr_local_int32_base_atomics "
"cl_khr_local_int32_extended_atomics "
"cl_intel_subgroups "
"cl_intel_required_subgroup_size "
"cl_intel_subgroups_short "
"cl_khr_spir "
"cl_intel_accelerator "
"cl_intel_media_block_io "
"cl_intel_driver_diagnostics "
"cl_intel_device_side_avc_motion_estimation ";
const char *builtInKernels = ""; // the "always available" (extension-independent) builtin kernels
void Device::checkPriorityHints() {
#ifdef SUPPORT_PRIORITY_HINTS
if (DebugManager.flags.ForcePreemptionMode.get() > PreemptionMode::Disabled ||
(preemptionMode >= ThreadGroup)) {
deviceExtensions += "cl_khr_priority_hints ";
deviceInfo.priorityHintsSupported = true;
}
#endif
}
bool Device::getEnabled64kbPages() {
if (DebugManager.flags.Enable64kbpages.get() == -1) {
// assign value according to os and hw configuration
return OSInterface::osEnabled64kbPages && hwInfo.capabilityTable.ftr64KBpages;
} else {
// force debug settings
return (DebugManager.flags.Enable64kbpages.get() != 0);
}
};
void Device::initializeCaps() {
deviceExtensions.clear();
deviceExtensions.append(deviceExtensionsList);
// Add our graphics family name to the device name
auto addressing32bitAllowed = is32BitOsAllocatorAvailable;
if (is32bit) {
addressing32bitAllowed = false;
}
std::string tempName = "Intel(R) ";
tempName += familyName[hwInfo.pPlatform->eRenderCoreFamily];
tempName += " HD Graphics NEO";
DEBUG_BREAK_IF(tempName.size() > name.size());
name = tempName;
driverVersion = NEO_DRIVER_VERSION;
if (driverInfo) {
name.assign(driverInfo.get()->getDeviceName(tempName).c_str());
driverVersion.assign(driverInfo.get()->getVersion(driverVersion).c_str());
}
deviceInfo.name = name.c_str();
deviceInfo.driverVersion = driverVersion.c_str();
deviceInfo.vendor = vendor.c_str();
deviceInfo.profile = profile.c_str();
deviceInfo.ilVersion = "";
enabledClVersion = hwInfo.capabilityTable.clVersionSupport;
if (DebugManager.flags.ForceOCLVersion.get() != 0) {
enabledClVersion = DebugManager.flags.ForceOCLVersion.get();
}
switch (enabledClVersion) {
case 21:
deviceInfo.clVersion = "OpenCL 2.1 NEO ";
deviceInfo.clCVersion = "OpenCL C 2.1 ";
deviceInfo.ilVersion = spirvVersion;
addressing32bitAllowed = false;
break;
case 20:
deviceInfo.clVersion = "OpenCL 2.0 NEO ";
deviceInfo.clCVersion = "OpenCL C 2.0 ";
addressing32bitAllowed = false;
break;
case 12:
default:
deviceInfo.clVersion = "OpenCL 1.2 NEO ";
deviceInfo.clCVersion = "OpenCL C 1.2 ";
break;
}
deviceInfo.platformLP = (hwInfo.capabilityTable.clVersionSupport == 12) ? true : false;
deviceInfo.cpuCopyAllowed = true;
deviceInfo.spirVersions = spirVersions.c_str();
if (enabledClVersion >= 21) {
deviceInfo.independentForwardProgress = true;
deviceExtensions += "cl_khr_subgroups ";
deviceExtensions += "cl_khr_il_program ";
} else {
deviceInfo.independentForwardProgress = false;
}
if (hwInfo.capabilityTable.ftrSupportsFP64) {
deviceExtensions += "cl_khr_fp64 ";
}
if (DebugManager.flags.EnableNV12.get()) {
deviceExtensions += "cl_intel_planar_yuv ";
deviceInfo.nv12Extension = true;
}
if (DebugManager.flags.EnablePackedYuv.get()) {
deviceExtensions += "cl_intel_packed_yuv ";
deviceInfo.packedYuvExtension = true;
}
if (DebugManager.flags.EnableIntelVme.get()) {
deviceExtensions += "cl_intel_motion_estimation ";
deviceInfo.vmeExtension = true;
}
if (DebugManager.flags.EnableIntelAdvancedVme.get()) {
deviceExtensions += "cl_intel_advanced_motion_estimation ";
}
deviceExtensions += sharingFactory.getExtensions();
simultaneousInterops = {0};
appendOSExtensions(deviceExtensions);
deviceInfo.deviceExtensions = deviceExtensions.c_str();
exposedBuiltinKernels = builtInKernels;
if (deviceExtensions.find("cl_intel_motion_estimation") != std::string::npos) {
exposedBuiltinKernels.append("block_motion_estimate_intel;");
}
if (deviceExtensions.find("cl_intel_advanced_motion_estimation") != std::string::npos) {
auto advVmeKernels = "block_advanced_motion_estimate_check_intel;block_advanced_motion_estimate_bidirectional_check_intel;";
exposedBuiltinKernels.append(advVmeKernels);
}
deviceInfo.builtInKernels = exposedBuiltinKernels.c_str();
deviceInfo.deviceType = CL_DEVICE_TYPE_GPU;
deviceInfo.vendorId = 0x8086;
deviceInfo.endianLittle = 1;
deviceInfo.hostUnifiedMemory = CL_TRUE;
deviceInfo.deviceAvailable = CL_TRUE;
deviceInfo.compilerAvailable = CL_TRUE;
deviceInfo.preferredVectorWidthChar = 16;
deviceInfo.preferredVectorWidthShort = 8;
deviceInfo.preferredVectorWidthInt = 4;
deviceInfo.preferredVectorWidthLong = 1;
deviceInfo.preferredVectorWidthFloat = 1;
deviceInfo.preferredVectorWidthDouble = 1;
deviceInfo.preferredVectorWidthHalf = 8;
deviceInfo.nativeVectorWidthChar = 16;
deviceInfo.nativeVectorWidthShort = 8;
deviceInfo.nativeVectorWidthInt = 4;
deviceInfo.nativeVectorWidthLong = 1;
deviceInfo.nativeVectorWidthFloat = 1;
deviceInfo.nativeVectorWidthDouble = 1;
deviceInfo.nativeVectorWidthHalf = 8;
deviceInfo.maxReadImageArgs = 128;
deviceInfo.maxWriteImageArgs = 128;
deviceInfo.maxReadWriteImageArgs = 0;
deviceInfo.maxParameterSize = 1024;
deviceInfo.executionCapabilities = CL_EXEC_KERNEL;
deviceInfo.addressBits = 64;
//copy system info to prevent misaligned reads
const auto systemInfo = *hwInfo.pSysInfo;
deviceInfo.globalMemCachelineSize = 64;
deviceInfo.globalMemCacheSize = systemInfo.L3BankCount * 128 * KB;
deviceInfo.globalMemSize = (cl_ulong)getMemoryManager()->getSystemSharedMemory();
deviceInfo.globalMemSize = std::min(deviceInfo.globalMemSize, (cl_ulong)(getMemoryManager()->getMaxApplicationAddress() + 1));
deviceInfo.globalMemSize = (cl_ulong)((double)deviceInfo.globalMemSize * 0.8);
if (DebugManager.flags.Force32bitAddressing.get() || addressing32bitAllowed || is32bit) {
deviceInfo.globalMemSize = std::min(deviceInfo.globalMemSize, (uint64_t)(4 * GB * 0.8));
deviceInfo.addressBits = 32;
deviceInfo.force32BitAddressess = is64bit;
}
deviceInfo.globalMemSize = alignDown(deviceInfo.globalMemSize, MemoryConstants::pageSize);
deviceInfo.globalMemCacheType = CL_READ_WRITE_CACHE;
deviceInfo.profilingTimerResolution = static_cast<size_t>(getProfilingTimerResolution());
deviceInfo.memBaseAddressAlign = 1024;
deviceInfo.minDataTypeAlignSize = 128;
deviceInfo.maxOnDeviceEvents = 1024;
deviceInfo.maxOnDeviceQueues = 1;
deviceInfo.queueOnDeviceMaxSize = 64 * MB;
deviceInfo.queueOnDevicePreferredSize = 128 * KB;
deviceInfo.queueOnDeviceProperties = CL_QUEUE_PROFILING_ENABLE | CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE;
deviceInfo.preferredInteropUserSync = 1u;
// OpenCL 1.2 requires 128MB minimum
auto maxMemAllocSize = std::max((uint64_t)(deviceInfo.globalMemSize / 2), (uint64_t)(128 * MB));
//With statefull messages we have an allocation cap of 4GB
//Reason to subtract 8KB is that driver may pad the buffer with addition pages for over fetching..
deviceInfo.maxMemAllocSize = std::min((uint64_t)((4 * GB) - (8 * KB)), maxMemAllocSize);
deviceInfo.maxConstantBufferSize = deviceInfo.maxMemAllocSize;
static const int maxPixelSize = 16;
deviceInfo.imageMaxBufferSize = static_cast<size_t>(deviceInfo.maxMemAllocSize / maxPixelSize);
deviceInfo.maxWorkItemDimensions = 3;
deviceInfo.maxComputUnits = systemInfo.EUCount;
deviceInfo.maxConstantArgs = 8;
deviceInfo.maxNumEUsPerSubSlice = 0;
deviceInfo.numThreadsPerEU = 0;
auto simdSizeUsed = DebugManager.flags.UseMaxSimdSizeToDeduceMaxWorkgroupSize.get() ? 32 : 8;
if (systemInfo.EUCount > 0) {
deviceInfo.maxNumEUsPerSubSlice = (systemInfo.EuCountPerPoolMin == 0 || hwInfo.pSkuTable->ftrPooledEuEnabled == 0)
? (systemInfo.EUCount / systemInfo.SubSliceCount)
: systemInfo.EuCountPerPoolMin;
deviceInfo.numThreadsPerEU = systemInfo.ThreadCount / systemInfo.EUCount;
auto maxWkgSize = DebugManager.flags.UseMaxSimdSizeToDeduceMaxWorkgroupSize.get() ? 1024u : 256u;
auto maxWS = deviceInfo.maxNumEUsPerSubSlice * deviceInfo.numThreadsPerEU * simdSizeUsed;
maxWS = Math::prevPowerOfTwo(uint32_t(maxWS));
deviceInfo.maxWorkGroupSize = std::min(uint32_t(maxWS), maxWkgSize);
} else {
//default value if systemInfo not provided
deviceInfo.maxWorkGroupSize = 128;
}
DEBUG_BREAK_IF(deviceInfo.maxWorkGroupSize > 256);
// calculate a maximum number of subgroups in a workgroup (for the required SIMD size)
deviceInfo.maxNumOfSubGroups = static_cast<uint32_t>(deviceInfo.maxWorkGroupSize / simdSizeUsed);
deviceInfo.maxWorkItemSizes[0] = deviceInfo.maxWorkGroupSize;
deviceInfo.maxWorkItemSizes[1] = deviceInfo.maxWorkGroupSize;
deviceInfo.maxWorkItemSizes[2] = deviceInfo.maxWorkGroupSize;
deviceInfo.maxSamplers = 16;
deviceInfo.singleFpConfig = CL_FP_ROUND_TO_NEAREST |
CL_FP_ROUND_TO_ZERO |
CL_FP_ROUND_TO_INF |
CL_FP_INF_NAN |
CL_FP_FMA |
CL_FP_DENORM;
deviceInfo.singleFpConfig |= static_cast<cl_device_fp_config>(
hwInfo.capabilityTable.ftrSupports64BitMath
? CL_FP_CORRECTLY_ROUNDED_DIVIDE_SQRT
: 0);
deviceInfo.halfFpConfig = CL_FP_ROUND_TO_NEAREST |
CL_FP_ROUND_TO_ZERO |
CL_FP_ROUND_TO_INF |
CL_FP_INF_NAN |
CL_FP_DENORM |
CL_FP_FMA;
deviceInfo.doubleFpConfig = hwInfo.capabilityTable.ftrSupportsFP64
? CL_FP_ROUND_TO_NEAREST |
CL_FP_ROUND_TO_ZERO |
CL_FP_ROUND_TO_INF |
CL_FP_INF_NAN |
CL_FP_DENORM |
CL_FP_FMA
: 0;
printDebugString(DebugManager.flags.PrintDebugMessages.get(), stderr, "hwInfo: {%d, %d}: (%d, %d, %d)\n",
systemInfo.EUCount,
systemInfo.ThreadCount,
systemInfo.MaxEuPerSubSlice,
systemInfo.MaxSlicesSupported,
systemInfo.MaxSubSlicesSupported);
if (systemInfo.EUCount > 0) {
deviceInfo.computeUnitsUsedForScratch = systemInfo.MaxSubSlicesSupported * systemInfo.MaxEuPerSubSlice * systemInfo.ThreadCount / systemInfo.EUCount;
}
printDebugString(DebugManager.flags.PrintDebugMessages.get(), stderr, "computeUnitsUsedForScratch: %d\n", deviceInfo.computeUnitsUsedForScratch);
deviceInfo.localMemType = CL_LOCAL;
deviceInfo.localMemSize = 64 << 10;
deviceInfo.imageSupport = CL_TRUE;
deviceInfo.image2DMaxWidth = 16384;
deviceInfo.image2DMaxHeight = 16384;
deviceInfo.image3DMaxWidth = 16384;
deviceInfo.image3DMaxHeight = 16384;
deviceInfo.image3DMaxDepth = 2048;
deviceInfo.imageMaxArraySize = 2048;
// cl_khr_image2d_from_buffer
deviceInfo.imagePitchAlignment = 4;
deviceInfo.imageBaseAddressAlignment = 4;
deviceInfo.maxPipeArgs = 16;
deviceInfo.pipeMaxPacketSize = 1024;
deviceInfo.pipeMaxActiveReservations = 1;
deviceInfo.queueOnHostProperties = CL_QUEUE_PROFILING_ENABLE | CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE;
deviceInfo.printfBufferSize = 4 * 1024 * 1024;
deviceInfo.maxClockFrequency = hwInfo.capabilityTable.maxRenderFrequency;
deviceInfo.maxSubGroups[0] = 8;
deviceInfo.maxSubGroups[1] = 16;
deviceInfo.maxSubGroups[2] = 32;
deviceInfo.linkerAvailable = true;
deviceInfo.svmCapabilities = hwInfo.capabilityTable.ftrSvm * CL_DEVICE_SVM_COARSE_GRAIN_BUFFER;
deviceInfo.svmCapabilities |= static_cast<cl_device_svm_capabilities>(
hwInfo.capabilityTable.ftrSvm * hwInfo.capabilityTable.ftrSupportsCoherency *
(CL_DEVICE_SVM_FINE_GRAIN_BUFFER | CL_DEVICE_SVM_ATOMICS));
deviceInfo.preemptionSupported = false;
deviceInfo.priorityHintsSupported = false;
deviceInfo.maxGlobalVariableSize = 64 * 1024;
deviceInfo.globalVariablePreferredTotalSize = (size_t)deviceInfo.maxMemAllocSize;
deviceInfo.planarYuvMaxWidth = 16384;
deviceInfo.planarYuvMaxHeight = 16380;
deviceInfo.vmeAvcSupportsPreemption = hwInfo.capabilityTable.ftrSupportsVmeAvcPreemption;
deviceInfo.vmeAvcSupportsTextureSampler = hwInfo.capabilityTable.ftrSupportsVmeAvcTextureSampler;
deviceInfo.vmeAvcVersion = CL_AVC_ME_VERSION_1_INTEL;
deviceInfo.vmeVersion = CL_ME_VERSION_ADVANCED_VER_2_INTEL;
deviceInfo.platformHostTimerResolution = getPlatformHostTimerResolution();
deviceInfo.internalDriverVersion = CL_DEVICE_DRIVER_VERSION_INTEL_NEO1;
deviceInfo.enabled64kbPages = getEnabled64kbPages();
#ifdef SUPPORT_PRIORITY_HINTS
checkPriorityHints();
#endif
}
} // namespace OCLRT

View File

@@ -0,0 +1,249 @@
/*
* Copyright (c) 2017, Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
#include "runtime/device/device.h"
#include "runtime/device/device_vector.h"
#include "runtime/device/device_info.h"
#include "runtime/device/device_info_map.h"
#include "runtime/helpers/get_info.h"
#include "runtime/platform/platform.h"
#include "runtime/os_interface/os_time.h"
using DeviceInfoTable::Map;
namespace OCLRT {
template <cl_device_info Param>
inline void Device::getStr(const void *&src,
size_t &size,
size_t &retSize) {
src = Map<Param>::getValue(deviceInfo);
retSize = size = strlen(Map<Param>::getValue(deviceInfo)) + 1;
}
template <>
inline void Device::getCap<CL_DEVICE_MAX_WORK_ITEM_SIZES>(const void *&src,
size_t &size,
size_t &retSize) {
src = deviceInfo.maxWorkItemSizes;
retSize = size = sizeof(deviceInfo.maxWorkItemSizes);
}
template <>
inline void Device::getCap<CL_DEVICE_PARTITION_PROPERTIES>(const void *&src,
size_t &size,
size_t &retSize) {
static cl_device_partition_property property = 0;
src = &property;
retSize = size = sizeof(cl_device_partition_property *);
}
template <>
inline void Device::getCap<CL_DEVICE_PLATFORM>(const void *&src,
size_t &size,
size_t &retSize) {
// This isn't referenced externally but because we're passing a pointer to a pointer,
// we need a persistent location
static cl_platform_id pPlatform = platform();
src = &pPlatform;
retSize = size = sizeof(cl_platform_id);
}
template <>
inline void Device::getCap<CL_DEVICE_SUB_GROUP_SIZES_INTEL>(const void *&src,
size_t &size,
size_t &retSize) {
src = deviceInfo.maxSubGroups;
retSize = size = sizeof(deviceInfo.maxSubGroups);
}
cl_int Device::getDeviceInfo(cl_device_info paramName,
size_t paramValueSize,
void *paramValue,
size_t *paramValueSizeRet) {
cl_int retVal;
size_t srcSize = 0;
size_t retSize = 0;
cl_uint param;
const void *src = nullptr;
// clang-format off
switch (paramName) {
case CL_DEVICE_ADDRESS_BITS: getCap<CL_DEVICE_ADDRESS_BITS >(src, srcSize, retSize); break;
case CL_DEVICE_AVAILABLE: getCap<CL_DEVICE_AVAILABLE >(src, srcSize, retSize); break;
case CL_DEVICE_AVC_ME_VERSION_INTEL: getCap<CL_DEVICE_AVC_ME_VERSION_INTEL >(src, srcSize, retSize); break;
case CL_DEVICE_AVC_ME_SUPPORTS_TEXTURE_SAMPLER_USE_INTEL: getCap<CL_DEVICE_AVC_ME_SUPPORTS_TEXTURE_SAMPLER_USE_INTEL >(src, srcSize, retSize); break;
case CL_DEVICE_AVC_ME_SUPPORTS_PREEMPTION_INTEL: getCap<CL_DEVICE_AVC_ME_SUPPORTS_PREEMPTION_INTEL >(src, srcSize, retSize); break;
case CL_DEVICE_BUILT_IN_KERNELS: getStr<CL_DEVICE_BUILT_IN_KERNELS >(src, srcSize, retSize); break;
case CL_DEVICE_COMPILER_AVAILABLE: getCap<CL_DEVICE_COMPILER_AVAILABLE >(src, srcSize, retSize); break;
case CL_DEVICE_DOUBLE_FP_CONFIG: getCap<CL_DEVICE_DOUBLE_FP_CONFIG >(src, srcSize, retSize); break;
case CL_DEVICE_ENDIAN_LITTLE: getCap<CL_DEVICE_ENDIAN_LITTLE >(src, srcSize, retSize); break;
case CL_DEVICE_ERROR_CORRECTION_SUPPORT: getCap<CL_DEVICE_ERROR_CORRECTION_SUPPORT >(src, srcSize, retSize); break;
case CL_DEVICE_EXECUTION_CAPABILITIES: getCap<CL_DEVICE_EXECUTION_CAPABILITIES >(src, srcSize, retSize); break;
case CL_DEVICE_EXTENSIONS: getStr<CL_DEVICE_EXTENSIONS >(src, srcSize, retSize); break;
case CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE: getCap<CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE >(src, srcSize, retSize); break;
case CL_DEVICE_GLOBAL_MEM_CACHE_SIZE: getCap<CL_DEVICE_GLOBAL_MEM_CACHE_SIZE >(src, srcSize, retSize); break;
case CL_DEVICE_GLOBAL_MEM_CACHE_TYPE: getCap<CL_DEVICE_GLOBAL_MEM_CACHE_TYPE >(src, srcSize, retSize); break;
case CL_DEVICE_GLOBAL_MEM_SIZE: getCap<CL_DEVICE_GLOBAL_MEM_SIZE >(src, srcSize, retSize); break;
case CL_DEVICE_GLOBAL_VARIABLE_PREFERRED_TOTAL_SIZE: getCap<CL_DEVICE_GLOBAL_VARIABLE_PREFERRED_TOTAL_SIZE >(src, srcSize, retSize); break;
case CL_DEVICE_HALF_FP_CONFIG: getCap<CL_DEVICE_HALF_FP_CONFIG >(src, srcSize, retSize); break;
case CL_DEVICE_HOST_UNIFIED_MEMORY: getCap<CL_DEVICE_HOST_UNIFIED_MEMORY >(src, srcSize, retSize); break;
case CL_DEVICE_IL_VERSION: getStr<CL_DEVICE_IL_VERSION >(src, srcSize, retSize); break;
case CL_DEVICE_IMAGE2D_MAX_HEIGHT: getCap<CL_DEVICE_IMAGE2D_MAX_HEIGHT >(src, srcSize, retSize); break;
case CL_DEVICE_IMAGE2D_MAX_WIDTH: getCap<CL_DEVICE_IMAGE2D_MAX_WIDTH >(src, srcSize, retSize); break;
case CL_DEVICE_IMAGE3D_MAX_DEPTH: getCap<CL_DEVICE_IMAGE3D_MAX_DEPTH >(src, srcSize, retSize); break;
case CL_DEVICE_IMAGE3D_MAX_HEIGHT: getCap<CL_DEVICE_IMAGE3D_MAX_HEIGHT >(src, srcSize, retSize); break;
case CL_DEVICE_IMAGE3D_MAX_WIDTH: getCap<CL_DEVICE_IMAGE3D_MAX_WIDTH >(src, srcSize, retSize); break;
case CL_DEVICE_IMAGE_BASE_ADDRESS_ALIGNMENT: getCap<CL_DEVICE_IMAGE_BASE_ADDRESS_ALIGNMENT >(src, srcSize, retSize); break;
case CL_DEVICE_IMAGE_MAX_ARRAY_SIZE: getCap<CL_DEVICE_IMAGE_MAX_ARRAY_SIZE >(src, srcSize, retSize); break;
case CL_DEVICE_IMAGE_MAX_BUFFER_SIZE: getCap<CL_DEVICE_IMAGE_MAX_BUFFER_SIZE >(src, srcSize, retSize); break;
case CL_DEVICE_IMAGE_PITCH_ALIGNMENT: getCap<CL_DEVICE_IMAGE_PITCH_ALIGNMENT >(src, srcSize, retSize); break;
case CL_DEVICE_IMAGE_SUPPORT: getCap<CL_DEVICE_IMAGE_SUPPORT >(src, srcSize, retSize); break;
case CL_DEVICE_LINKER_AVAILABLE: getCap<CL_DEVICE_LINKER_AVAILABLE >(src, srcSize, retSize); break;
case CL_DEVICE_LOCAL_MEM_SIZE: getCap<CL_DEVICE_LOCAL_MEM_SIZE >(src, srcSize, retSize); break;
case CL_DEVICE_LOCAL_MEM_TYPE: getCap<CL_DEVICE_LOCAL_MEM_TYPE >(src, srcSize, retSize); break;
case CL_DEVICE_MAX_CLOCK_FREQUENCY: getCap<CL_DEVICE_MAX_CLOCK_FREQUENCY >(src, srcSize, retSize); break;
case CL_DEVICE_MAX_COMPUTE_UNITS: getCap<CL_DEVICE_MAX_COMPUTE_UNITS >(src, srcSize, retSize); break;
case CL_DEVICE_MAX_CONSTANT_ARGS: getCap<CL_DEVICE_MAX_CONSTANT_ARGS >(src, srcSize, retSize); break;
case CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE: getCap<CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE >(src, srcSize, retSize); break;
case CL_DEVICE_MAX_GLOBAL_VARIABLE_SIZE: getCap<CL_DEVICE_MAX_GLOBAL_VARIABLE_SIZE >(src, srcSize, retSize); break;
case CL_DEVICE_MAX_MEM_ALLOC_SIZE: getCap<CL_DEVICE_MAX_MEM_ALLOC_SIZE >(src, srcSize, retSize); break;
case CL_DEVICE_MAX_NUM_SUB_GROUPS: getCap<CL_DEVICE_MAX_NUM_SUB_GROUPS >(src, srcSize, retSize); break;
case CL_DEVICE_MAX_ON_DEVICE_EVENTS: getCap<CL_DEVICE_MAX_ON_DEVICE_EVENTS >(src, srcSize, retSize); break;
case CL_DEVICE_MAX_ON_DEVICE_QUEUES: getCap<CL_DEVICE_MAX_ON_DEVICE_QUEUES >(src, srcSize, retSize); break;
case CL_DEVICE_MAX_PARAMETER_SIZE: getCap<CL_DEVICE_MAX_PARAMETER_SIZE >(src, srcSize, retSize); break;
case CL_DEVICE_MAX_PIPE_ARGS: getCap<CL_DEVICE_MAX_PIPE_ARGS >(src, srcSize, retSize); break;
case CL_DEVICE_MAX_READ_IMAGE_ARGS: getCap<CL_DEVICE_MAX_READ_IMAGE_ARGS >(src, srcSize, retSize); break;
case CL_DEVICE_MAX_READ_WRITE_IMAGE_ARGS: getCap<CL_DEVICE_MAX_READ_WRITE_IMAGE_ARGS >(src, srcSize, retSize); break;
case CL_DEVICE_MAX_SAMPLERS: getCap<CL_DEVICE_MAX_SAMPLERS >(src, srcSize, retSize); break;
case CL_DEVICE_MAX_WORK_GROUP_SIZE: getCap<CL_DEVICE_MAX_WORK_GROUP_SIZE >(src, srcSize, retSize); break;
case CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS: getCap<CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS >(src, srcSize, retSize); break;
case CL_DEVICE_MAX_WORK_ITEM_SIZES: getCap<CL_DEVICE_MAX_WORK_ITEM_SIZES >(src, srcSize, retSize); break;
case CL_DEVICE_MAX_WRITE_IMAGE_ARGS: getCap<CL_DEVICE_MAX_WRITE_IMAGE_ARGS >(src, srcSize, retSize); break;
case CL_DEVICE_MEM_BASE_ADDR_ALIGN: getCap<CL_DEVICE_MEM_BASE_ADDR_ALIGN >(src, srcSize, retSize); break;
case CL_DEVICE_ME_VERSION_INTEL: getCap<CL_DEVICE_ME_VERSION_INTEL >(src, srcSize, retSize); break;
case CL_DEVICE_MIN_DATA_TYPE_ALIGN_SIZE: getCap<CL_DEVICE_MIN_DATA_TYPE_ALIGN_SIZE >(src, srcSize, retSize); break;
case CL_DEVICE_NAME: getStr<CL_DEVICE_NAME >(src, srcSize, retSize); break;
case CL_DEVICE_NATIVE_VECTOR_WIDTH_CHAR: getCap<CL_DEVICE_NATIVE_VECTOR_WIDTH_CHAR >(src, srcSize, retSize); break;
case CL_DEVICE_NATIVE_VECTOR_WIDTH_DOUBLE: getCap<CL_DEVICE_NATIVE_VECTOR_WIDTH_DOUBLE >(src, srcSize, retSize); break;
case CL_DEVICE_NATIVE_VECTOR_WIDTH_FLOAT: getCap<CL_DEVICE_NATIVE_VECTOR_WIDTH_FLOAT >(src, srcSize, retSize); break;
case CL_DEVICE_NATIVE_VECTOR_WIDTH_HALF: getCap<CL_DEVICE_NATIVE_VECTOR_WIDTH_HALF >(src, srcSize, retSize); break;
case CL_DEVICE_NATIVE_VECTOR_WIDTH_INT: getCap<CL_DEVICE_NATIVE_VECTOR_WIDTH_INT >(src, srcSize, retSize); break;
case CL_DEVICE_NATIVE_VECTOR_WIDTH_LONG: getCap<CL_DEVICE_NATIVE_VECTOR_WIDTH_LONG >(src, srcSize, retSize); break;
case CL_DEVICE_NATIVE_VECTOR_WIDTH_SHORT: getCap<CL_DEVICE_NATIVE_VECTOR_WIDTH_SHORT >(src, srcSize, retSize); break;
case CL_DEVICE_OPENCL_C_VERSION: getStr<CL_DEVICE_OPENCL_C_VERSION >(src, srcSize, retSize); break;
case CL_DEVICE_PARENT_DEVICE: getCap<CL_DEVICE_PARENT_DEVICE >(src, srcSize, retSize); break;
case CL_DEVICE_PARTITION_AFFINITY_DOMAIN: getCap<CL_DEVICE_PARTITION_AFFINITY_DOMAIN >(src, srcSize, retSize); break;
case CL_DEVICE_PARTITION_MAX_SUB_DEVICES: getCap<CL_DEVICE_PARTITION_MAX_SUB_DEVICES >(src, srcSize, retSize); break;
case CL_DEVICE_PARTITION_PROPERTIES: getCap<CL_DEVICE_PARTITION_PROPERTIES >(src, srcSize, retSize); break;
case CL_DEVICE_PARTITION_TYPE: getCap<CL_DEVICE_PARTITION_TYPE >(src, srcSize, retSize); break;
case CL_DEVICE_PIPE_MAX_ACTIVE_RESERVATIONS: getCap<CL_DEVICE_PIPE_MAX_ACTIVE_RESERVATIONS >(src, srcSize, retSize); break;
case CL_DEVICE_PIPE_MAX_PACKET_SIZE: getCap<CL_DEVICE_PIPE_MAX_PACKET_SIZE >(src, srcSize, retSize); break;
case CL_DEVICE_PLATFORM: getCap<CL_DEVICE_PLATFORM >(src, srcSize, retSize); break;
case CL_DEVICE_PREFERRED_GLOBAL_ATOMIC_ALIGNMENT: getCap<CL_DEVICE_PREFERRED_GLOBAL_ATOMIC_ALIGNMENT >(src, srcSize, retSize); break;
case CL_DEVICE_PREFERRED_INTEROP_USER_SYNC: getCap<CL_DEVICE_PREFERRED_INTEROP_USER_SYNC >(src, srcSize, retSize); break;
case CL_DEVICE_PREFERRED_LOCAL_ATOMIC_ALIGNMENT: getCap<CL_DEVICE_PREFERRED_LOCAL_ATOMIC_ALIGNMENT >(src, srcSize, retSize); break;
case CL_DEVICE_PREFERRED_PLATFORM_ATOMIC_ALIGNMENT: getCap<CL_DEVICE_PREFERRED_PLATFORM_ATOMIC_ALIGNMENT >(src, srcSize, retSize); break;
case CL_DEVICE_PREFERRED_VECTOR_WIDTH_CHAR: getCap<CL_DEVICE_PREFERRED_VECTOR_WIDTH_CHAR >(src, srcSize, retSize); break;
case CL_DEVICE_PREFERRED_VECTOR_WIDTH_DOUBLE: getCap<CL_DEVICE_PREFERRED_VECTOR_WIDTH_DOUBLE >(src, srcSize, retSize); break;
case CL_DEVICE_PREFERRED_VECTOR_WIDTH_FLOAT: getCap<CL_DEVICE_PREFERRED_VECTOR_WIDTH_FLOAT >(src, srcSize, retSize); break;
case CL_DEVICE_PREFERRED_VECTOR_WIDTH_HALF: getCap<CL_DEVICE_PREFERRED_VECTOR_WIDTH_HALF >(src, srcSize, retSize); break;
case CL_DEVICE_PREFERRED_VECTOR_WIDTH_INT: getCap<CL_DEVICE_PREFERRED_VECTOR_WIDTH_INT >(src, srcSize, retSize); break;
case CL_DEVICE_PREFERRED_VECTOR_WIDTH_LONG: getCap<CL_DEVICE_PREFERRED_VECTOR_WIDTH_LONG >(src, srcSize, retSize); break;
case CL_DEVICE_PREFERRED_VECTOR_WIDTH_SHORT: getCap<CL_DEVICE_PREFERRED_VECTOR_WIDTH_SHORT >(src, srcSize, retSize); break;
case CL_DEVICE_PRINTF_BUFFER_SIZE: getCap<CL_DEVICE_PRINTF_BUFFER_SIZE >(src, srcSize, retSize); break;
case CL_DEVICE_PROFILE: getStr<CL_DEVICE_PROFILE >(src, srcSize, retSize); break;
case CL_DEVICE_PROFILING_TIMER_RESOLUTION: getCap<CL_DEVICE_PROFILING_TIMER_RESOLUTION >(src, srcSize, retSize); break;
case CL_DEVICE_QUEUE_ON_DEVICE_MAX_SIZE: getCap<CL_DEVICE_QUEUE_ON_DEVICE_MAX_SIZE >(src, srcSize, retSize); break;
case CL_DEVICE_QUEUE_ON_DEVICE_PREFERRED_SIZE: getCap<CL_DEVICE_QUEUE_ON_DEVICE_PREFERRED_SIZE >(src, srcSize, retSize); break;
case CL_DEVICE_QUEUE_ON_DEVICE_PROPERTIES: getCap<CL_DEVICE_QUEUE_ON_DEVICE_PROPERTIES >(src, srcSize, retSize); break;
case CL_DEVICE_QUEUE_ON_HOST_PROPERTIES: getCap<CL_DEVICE_QUEUE_ON_HOST_PROPERTIES >(src, srcSize, retSize); break;
case CL_DEVICE_SINGLE_FP_CONFIG: getCap<CL_DEVICE_SINGLE_FP_CONFIG >(src, srcSize, retSize); break;
case CL_DEVICE_SPIR_VERSIONS: getStr<CL_DEVICE_SPIR_VERSIONS >(src, srcSize, retSize); break;
case CL_DEVICE_SUB_GROUP_INDEPENDENT_FORWARD_PROGRESS: getCap<CL_DEVICE_SUB_GROUP_INDEPENDENT_FORWARD_PROGRESS >(src, srcSize, retSize); break;
case CL_DEVICE_SUB_GROUP_SIZES_INTEL: getCap<CL_DEVICE_SUB_GROUP_SIZES_INTEL >(src, srcSize, retSize); break;
case CL_DEVICE_SVM_CAPABILITIES: getCap<CL_DEVICE_SVM_CAPABILITIES >(src, srcSize, retSize); break;
case CL_DEVICE_TYPE: getCap<CL_DEVICE_TYPE >(src, srcSize, retSize); break;
case CL_DEVICE_VENDOR: getStr<CL_DEVICE_VENDOR >(src, srcSize, retSize); break;
case CL_DEVICE_VENDOR_ID: getCap<CL_DEVICE_VENDOR_ID >(src, srcSize, retSize); break;
case CL_DEVICE_VERSION: getStr<CL_DEVICE_VERSION >(src, srcSize, retSize); break;
case CL_DRIVER_VERSION: getStr<CL_DRIVER_VERSION >(src, srcSize, retSize); break;
case CL_DEVICE_DRIVER_VERSION_INTEL: getCap<CL_DEVICE_DRIVER_VERSION_INTEL >(src, srcSize, retSize); break;
// clang-format on
case CL_DEVICE_NUM_SIMULTANEOUS_INTEROPS_INTEL:
if (simultaneousInterops.size() > 1u) {
srcSize = retSize = sizeof(cl_uint);
param = 1u;
src = &param;
}
break;
case CL_DEVICE_SIMULTANEOUS_INTEROPS_INTEL:
if (simultaneousInterops.size() > 1u) {
srcSize = retSize = sizeof(cl_uint) * simultaneousInterops.size();
src = &simultaneousInterops[0];
}
break;
case CL_DEVICE_REFERENCE_COUNT: {
cl_int ref = this->getReference();
DEBUG_BREAK_IF(ref <= 0);
DEBUG_BREAK_IF(this->isRootDevice() && ref != 1);
param = static_cast<cl_uint>(ref);
src = &param;
retSize = srcSize = sizeof(param);
break;
}
case CL_DEVICE_PLANAR_YUV_MAX_WIDTH_INTEL:
if (deviceInfo.nv12Extension)
getCap<CL_DEVICE_PLANAR_YUV_MAX_WIDTH_INTEL>(src, srcSize, retSize);
break;
case CL_DEVICE_PLANAR_YUV_MAX_HEIGHT_INTEL:
if (deviceInfo.nv12Extension)
getCap<CL_DEVICE_PLANAR_YUV_MAX_HEIGHT_INTEL>(src, srcSize, retSize);
break;
}
retVal = ::getInfo(paramValue, paramValueSize, src, srcSize);
if (paramValueSizeRet) {
*paramValueSizeRet = retSize;
}
return retVal;
}
bool Device::getDeviceAndHostTimer(uint64_t *deviceTimestamp, uint64_t *hostTimestamp) const {
TimeStampData queueTimeStamp;
bool retVal = getOSTime()->getCpuGpuTime(&queueTimeStamp);
if (retVal) {
uint64_t resolution = (uint64_t)getOSTime()->getDynamicDeviceTimerResolution(this->hwInfo);
*deviceTimestamp = queueTimeStamp.GPUTimeStamp * resolution;
}
retVal = getOSTime()->getCpuTime(hostTimestamp);
return retVal;
}
bool Device::getHostTimer(uint64_t *hostTimestamp) const {
return getOSTime()->getCpuTime(hostTimestamp);
}
} // namespace OCLRT

View File

@@ -0,0 +1,155 @@
/*
* Copyright (c) 2017, Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
#pragma once
#include "config.h"
#include "CL/cl.h"
#include "CL/cl_ext.h"
#include <cstdint>
// clang-format off
struct DeviceInfo {
cl_device_type deviceType;
cl_uint vendorId;
cl_uint maxComputUnits;
cl_uint maxWorkItemDimensions;
size_t maxWorkItemSizes[3];
size_t maxWorkGroupSize;
cl_uint maxNumOfSubGroups;
size_t maxNumEUsPerSubSlice;
size_t maxSubGroups[3];
cl_bool independentForwardProgress;
cl_uint preferredVectorWidthChar;
cl_uint preferredVectorWidthShort;
cl_uint preferredVectorWidthInt;
cl_uint preferredVectorWidthLong;
cl_uint preferredVectorWidthFloat;
cl_uint preferredVectorWidthDouble;
cl_uint preferredVectorWidthHalf;
cl_uint nativeVectorWidthChar;
cl_uint nativeVectorWidthShort;
cl_uint nativeVectorWidthInt;
cl_uint nativeVectorWidthLong;
cl_uint nativeVectorWidthFloat;
cl_uint nativeVectorWidthDouble;
cl_uint nativeVectorWidthHalf;
cl_uint numThreadsPerEU;
cl_uint maxClockFrequency;
cl_uint addressBits;
cl_ulong maxMemAllocSize;
cl_bool imageSupport;
cl_uint maxReadImageArgs;
cl_uint maxWriteImageArgs;
cl_uint maxReadWriteImageArgs;
size_t imageMaxBufferSize;
size_t image2DMaxWidth;
size_t image2DMaxHeight;
size_t image3DMaxWidth;
size_t image3DMaxHeight;
size_t image3DMaxDepth;
size_t imageMaxArraySize;
size_t maxBufferSize;
size_t maxArraySize;
cl_uint maxSamplers;
cl_uint imagePitchAlignment;
cl_uint imageBaseAddressAlignment;
cl_uint maxPipeArgs;
cl_uint pipeMaxActiveReservations;
cl_uint pipeMaxPacketSize;
size_t maxParameterSize;
cl_uint memBaseAddressAlign;
cl_uint minDataTypeAlignSize;
cl_device_fp_config singleFpConfig;
cl_device_fp_config halfFpConfig;
cl_device_fp_config doubleFpConfig;
cl_device_mem_cache_type globalMemCacheType;
cl_uint globalMemCachelineSize;
cl_ulong globalMemCacheSize;
cl_ulong globalMemSize;
cl_ulong maxConstantBufferSize;
cl_uint maxConstantArgs;
size_t maxGlobalVariableSize;
size_t globalVariablePreferredTotalSize;
cl_device_local_mem_type localMemType;
cl_ulong localMemSize;
cl_bool errorCorrectionSupport;
size_t profilingTimerResolution;
cl_bool endianLittle;
cl_bool deviceAvailable;
cl_bool compilerAvailable;
cl_bool linkerAvailable;
cl_device_exec_capabilities executionCapabilities;
cl_command_queue_properties queueOnHostProperties;
cl_command_queue_properties queueOnDeviceProperties;
cl_uint queueOnDevicePreferredSize;
cl_uint queueOnDeviceMaxSize;
cl_uint maxOnDeviceQueues;
cl_uint maxOnDeviceEvents;
const char *builtInKernels;
cl_platform_id platform;
const char *name;
const char *vendor;
const char *driverVersion;
const char *profile;
const char *clVersion;
const char *clCVersion;
const char *spirVersions;
const char *deviceExtensions;
size_t printfBufferSize;
cl_bool preferredInteropUserSync;
cl_device_id parentDevice;
cl_uint partitionMaxSubDevices;
cl_device_partition_property partitionProperties;
cl_device_affinity_domain partitionAffinityDomain;
cl_device_partition_property *partitionType;
cl_uint referenceCount;
cl_device_svm_capabilities svmCapabilities;
cl_uint preferredPlatformAtomicAlignment;
cl_uint preferredGlobalAtomicAlignment;
cl_uint preferredLocalAtomicAlignment;
cl_bool hostUnifiedMemory;
const char *ilVersion;
uint32_t computeUnitsUsedForScratch;
bool force32BitAddressess;
bool preemptionSupported;
bool priorityHintsSupported;
double platformHostTimerResolution;
size_t planarYuvMaxWidth;
size_t planarYuvMaxHeight;
cl_bool vmeAvcSupportsPreemption;
cl_bool vmeAvcSupportsTextureSampler;
cl_uint vmeAvcVersion;
cl_uint vmeVersion;
/* Extensions supported */
bool nv12Extension;
bool vmeExtension;
bool platformLP;
bool cpuCopyAllowed;
bool packedYuvExtension;
cl_uint internalDriverVersion;
bool enabled64kbPages;
};
// clang-format on

View File

@@ -0,0 +1,152 @@
/*
* Copyright (c) 2017, Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
#pragma once
#include "runtime/device/device_info.h"
#include <CL/cl.h>
#include <CL/cl_ext.h>
#include "CL/cl_ext_intel.h"
#include "public/cl_ext_private.h"
namespace DeviceInfoTable {
template <cl_device_info Param, typename _Type, _Type DeviceInfo::*val>
struct MapBase {
enum { param = Param };
typedef _Type Type;
enum { size = sizeof(Type) };
static const Type &getValue(const DeviceInfo &deviceInfo) {
return deviceInfo.*val;
}
};
template <cl_device_info Param>
struct Map {};
//////////////////////////////////////////////////////
// DeviceInfo mapping table
// Map<Param>::param - i.e. CL_DEVICE_ADDRESS_BITS
// Map<Param>::Type - i.e. cl_uint
// Map<Param>::size - ie. sizeof( cl_uint )
// Map<Param>::getValue - ie. return deviceInfo.AddressBits
//////////////////////////////////////////////////////
// clang-format off
template<> struct Map<CL_DEVICE_ADDRESS_BITS > : public MapBase<CL_DEVICE_ADDRESS_BITS, unsigned int, &DeviceInfo::addressBits> {};
template<> struct Map<CL_DEVICE_AVAILABLE > : public MapBase<CL_DEVICE_AVAILABLE, uint32_t, &DeviceInfo::deviceAvailable> {};
template<> struct Map<CL_DEVICE_AVC_ME_VERSION_INTEL > : public MapBase<CL_DEVICE_AVC_ME_VERSION_INTEL, uint32_t, &DeviceInfo::vmeAvcVersion> {};
template<> struct Map<CL_DEVICE_AVC_ME_SUPPORTS_TEXTURE_SAMPLER_USE_INTEL > : public MapBase<CL_DEVICE_AVC_ME_SUPPORTS_TEXTURE_SAMPLER_USE_INTEL, uint32_t, &DeviceInfo::vmeAvcSupportsTextureSampler> {};
template<> struct Map<CL_DEVICE_AVC_ME_SUPPORTS_PREEMPTION_INTEL > : public MapBase<CL_DEVICE_AVC_ME_SUPPORTS_PREEMPTION_INTEL, uint32_t, &DeviceInfo::vmeAvcSupportsPreemption> {};
template<> struct Map<CL_DEVICE_BUILT_IN_KERNELS > : public MapBase<CL_DEVICE_BUILT_IN_KERNELS, const char *, &DeviceInfo::builtInKernels> {};
template<> struct Map<CL_DEVICE_COMPILER_AVAILABLE > : public MapBase<CL_DEVICE_COMPILER_AVAILABLE, uint32_t, &DeviceInfo::compilerAvailable> {};
template<> struct Map<CL_DEVICE_DOUBLE_FP_CONFIG > : public MapBase<CL_DEVICE_DOUBLE_FP_CONFIG, uint64_t, &DeviceInfo::doubleFpConfig> {};
template<> struct Map<CL_DEVICE_ENDIAN_LITTLE > : public MapBase<CL_DEVICE_ENDIAN_LITTLE, uint32_t, &DeviceInfo::endianLittle> {};
template<> struct Map<CL_DEVICE_ERROR_CORRECTION_SUPPORT > : public MapBase<CL_DEVICE_ERROR_CORRECTION_SUPPORT, uint32_t, &DeviceInfo::errorCorrectionSupport> {};
template<> struct Map<CL_DEVICE_EXECUTION_CAPABILITIES > : public MapBase<CL_DEVICE_EXECUTION_CAPABILITIES, uint64_t, &DeviceInfo::executionCapabilities> {};
template<> struct Map<CL_DEVICE_EXTENSIONS > : public MapBase<CL_DEVICE_EXTENSIONS, const char *, &DeviceInfo::deviceExtensions> {};
template<> struct Map<CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE > : public MapBase<CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE, unsigned int, &DeviceInfo::globalMemCachelineSize> {};
template<> struct Map<CL_DEVICE_GLOBAL_MEM_CACHE_SIZE > : public MapBase<CL_DEVICE_GLOBAL_MEM_CACHE_SIZE, uint64_t, &DeviceInfo::globalMemCacheSize> {};
template<> struct Map<CL_DEVICE_GLOBAL_MEM_CACHE_TYPE > : public MapBase<CL_DEVICE_GLOBAL_MEM_CACHE_TYPE, uint32_t, &DeviceInfo::globalMemCacheType> {};
template<> struct Map<CL_DEVICE_GLOBAL_MEM_SIZE > : public MapBase<CL_DEVICE_GLOBAL_MEM_SIZE, uint64_t, &DeviceInfo::globalMemSize> {};
template<> struct Map<CL_DEVICE_GLOBAL_VARIABLE_PREFERRED_TOTAL_SIZE > : public MapBase<CL_DEVICE_GLOBAL_VARIABLE_PREFERRED_TOTAL_SIZE, size_t, &DeviceInfo::globalVariablePreferredTotalSize> {};
template<> struct Map<CL_DEVICE_HALF_FP_CONFIG > : public MapBase<CL_DEVICE_HALF_FP_CONFIG, uint64_t, &DeviceInfo::halfFpConfig> {};
template<> struct Map<CL_DEVICE_HOST_UNIFIED_MEMORY > : public MapBase<CL_DEVICE_HOST_UNIFIED_MEMORY, uint32_t, &DeviceInfo::hostUnifiedMemory> {};
template<> struct Map<CL_DEVICE_IL_VERSION > : public MapBase<CL_DEVICE_IL_VERSION, const char *, &DeviceInfo::ilVersion> {};
template<> struct Map<CL_DEVICE_IMAGE2D_MAX_HEIGHT > : public MapBase<CL_DEVICE_IMAGE2D_MAX_HEIGHT, size_t, &DeviceInfo::image2DMaxHeight> {};
template<> struct Map<CL_DEVICE_IMAGE2D_MAX_WIDTH > : public MapBase<CL_DEVICE_IMAGE2D_MAX_WIDTH, size_t, &DeviceInfo::image2DMaxWidth> {};
template<> struct Map<CL_DEVICE_IMAGE3D_MAX_DEPTH > : public MapBase<CL_DEVICE_IMAGE3D_MAX_DEPTH, size_t, &DeviceInfo::image3DMaxDepth> {};
template<> struct Map<CL_DEVICE_IMAGE3D_MAX_HEIGHT > : public MapBase<CL_DEVICE_IMAGE3D_MAX_HEIGHT, size_t, &DeviceInfo::image3DMaxHeight> {};
template<> struct Map<CL_DEVICE_IMAGE3D_MAX_WIDTH > : public MapBase<CL_DEVICE_IMAGE3D_MAX_WIDTH, size_t, &DeviceInfo::image3DMaxWidth> {};
template<> struct Map<CL_DEVICE_IMAGE_BASE_ADDRESS_ALIGNMENT > : public MapBase<CL_DEVICE_IMAGE_BASE_ADDRESS_ALIGNMENT, uint32_t, &DeviceInfo::imageBaseAddressAlignment> {};
template<> struct Map<CL_DEVICE_IMAGE_MAX_ARRAY_SIZE > : public MapBase<CL_DEVICE_IMAGE_MAX_ARRAY_SIZE, size_t, &DeviceInfo::imageMaxArraySize> {};
template<> struct Map<CL_DEVICE_IMAGE_MAX_BUFFER_SIZE > : public MapBase<CL_DEVICE_IMAGE_MAX_BUFFER_SIZE, size_t, &DeviceInfo::imageMaxBufferSize> {};
template<> struct Map<CL_DEVICE_IMAGE_PITCH_ALIGNMENT > : public MapBase<CL_DEVICE_IMAGE_PITCH_ALIGNMENT, uint32_t, &DeviceInfo::imagePitchAlignment> {};
template<> struct Map<CL_DEVICE_IMAGE_SUPPORT > : public MapBase<CL_DEVICE_IMAGE_SUPPORT, uint32_t, &DeviceInfo::imageSupport> {};
template<> struct Map<CL_DEVICE_LINKER_AVAILABLE > : public MapBase<CL_DEVICE_LINKER_AVAILABLE, uint32_t, &DeviceInfo::linkerAvailable> {};
template<> struct Map<CL_DEVICE_LOCAL_MEM_SIZE > : public MapBase<CL_DEVICE_LOCAL_MEM_SIZE, uint64_t, &DeviceInfo::localMemSize> {};
template<> struct Map<CL_DEVICE_LOCAL_MEM_TYPE > : public MapBase<CL_DEVICE_LOCAL_MEM_TYPE, uint32_t, &DeviceInfo::localMemType> {};
template<> struct Map<CL_DEVICE_MAX_CLOCK_FREQUENCY > : public MapBase<CL_DEVICE_MAX_CLOCK_FREQUENCY, uint32_t, &DeviceInfo::maxClockFrequency> {};
template<> struct Map<CL_DEVICE_MAX_COMPUTE_UNITS > : public MapBase<CL_DEVICE_MAX_COMPUTE_UNITS, uint32_t, &DeviceInfo::maxComputUnits> {};
template<> struct Map<CL_DEVICE_MAX_CONSTANT_ARGS > : public MapBase<CL_DEVICE_MAX_CONSTANT_ARGS, uint32_t, &DeviceInfo::maxConstantArgs> {};
template<> struct Map<CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE > : public MapBase<CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE, uint64_t, &DeviceInfo::maxConstantBufferSize> {};
template<> struct Map<CL_DEVICE_MAX_GLOBAL_VARIABLE_SIZE > : public MapBase<CL_DEVICE_MAX_GLOBAL_VARIABLE_SIZE, size_t, &DeviceInfo::maxGlobalVariableSize> {};
template<> struct Map<CL_DEVICE_MAX_MEM_ALLOC_SIZE > : public MapBase<CL_DEVICE_MAX_MEM_ALLOC_SIZE, uint64_t, &DeviceInfo::maxMemAllocSize> {};
template<> struct Map<CL_DEVICE_MAX_NUM_SUB_GROUPS > : public MapBase<CL_DEVICE_MAX_NUM_SUB_GROUPS, uint32_t, &DeviceInfo::maxNumOfSubGroups> {};
template<> struct Map<CL_DEVICE_MAX_ON_DEVICE_EVENTS > : public MapBase<CL_DEVICE_MAX_ON_DEVICE_EVENTS, uint32_t, &DeviceInfo::maxOnDeviceEvents> {};
template<> struct Map<CL_DEVICE_MAX_ON_DEVICE_QUEUES > : public MapBase<CL_DEVICE_MAX_ON_DEVICE_QUEUES, uint32_t, &DeviceInfo::maxOnDeviceQueues> {};
template<> struct Map<CL_DEVICE_MAX_PARAMETER_SIZE > : public MapBase<CL_DEVICE_MAX_PARAMETER_SIZE, size_t, &DeviceInfo::maxParameterSize> {};
template<> struct Map<CL_DEVICE_MAX_PIPE_ARGS > : public MapBase<CL_DEVICE_MAX_PIPE_ARGS, uint32_t, &DeviceInfo::maxPipeArgs> {};
template<> struct Map<CL_DEVICE_MAX_READ_IMAGE_ARGS > : public MapBase<CL_DEVICE_MAX_READ_IMAGE_ARGS, uint32_t, &DeviceInfo::maxReadImageArgs> {};
template<> struct Map<CL_DEVICE_MAX_READ_WRITE_IMAGE_ARGS > : public MapBase<CL_DEVICE_MAX_READ_WRITE_IMAGE_ARGS, uint32_t, &DeviceInfo::maxReadWriteImageArgs> {};
template<> struct Map<CL_DEVICE_MAX_SAMPLERS > : public MapBase<CL_DEVICE_MAX_SAMPLERS, uint32_t, &DeviceInfo::maxSamplers> {};
template<> struct Map<CL_DEVICE_MAX_WORK_GROUP_SIZE > : public MapBase<CL_DEVICE_MAX_WORK_GROUP_SIZE, size_t, &DeviceInfo::maxWorkGroupSize> {};
template<> struct Map<CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS > : public MapBase<CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS, uint32_t, &DeviceInfo::maxWorkItemDimensions> {};
template<> struct Map<CL_DEVICE_MAX_WRITE_IMAGE_ARGS > : public MapBase<CL_DEVICE_MAX_WRITE_IMAGE_ARGS, uint32_t, &DeviceInfo::maxWriteImageArgs> {};
template<> struct Map<CL_DEVICE_MEM_BASE_ADDR_ALIGN > : public MapBase<CL_DEVICE_MEM_BASE_ADDR_ALIGN, uint32_t, &DeviceInfo::memBaseAddressAlign> {};
template<> struct Map<CL_DEVICE_ME_VERSION_INTEL > : public MapBase<CL_DEVICE_ME_VERSION_INTEL, uint32_t, &DeviceInfo::vmeVersion> {};
template<> struct Map<CL_DEVICE_MIN_DATA_TYPE_ALIGN_SIZE > : public MapBase<CL_DEVICE_MIN_DATA_TYPE_ALIGN_SIZE, uint32_t, &DeviceInfo::minDataTypeAlignSize> {};
template<> struct Map<CL_DEVICE_NAME > : public MapBase<CL_DEVICE_NAME, const char *, &DeviceInfo::name> {};
template<> struct Map<CL_DEVICE_NATIVE_VECTOR_WIDTH_CHAR > : public MapBase<CL_DEVICE_NATIVE_VECTOR_WIDTH_CHAR, uint32_t, &DeviceInfo::nativeVectorWidthChar> {};
template<> struct Map<CL_DEVICE_NATIVE_VECTOR_WIDTH_DOUBLE > : public MapBase<CL_DEVICE_NATIVE_VECTOR_WIDTH_DOUBLE, uint32_t, &DeviceInfo::nativeVectorWidthDouble> {};
template<> struct Map<CL_DEVICE_NATIVE_VECTOR_WIDTH_FLOAT > : public MapBase<CL_DEVICE_NATIVE_VECTOR_WIDTH_FLOAT, uint32_t, &DeviceInfo::nativeVectorWidthFloat> {};
template<> struct Map<CL_DEVICE_NATIVE_VECTOR_WIDTH_HALF > : public MapBase<CL_DEVICE_NATIVE_VECTOR_WIDTH_HALF, uint32_t, &DeviceInfo::nativeVectorWidthHalf> {};
template<> struct Map<CL_DEVICE_NATIVE_VECTOR_WIDTH_INT > : public MapBase<CL_DEVICE_NATIVE_VECTOR_WIDTH_INT, uint32_t, &DeviceInfo::nativeVectorWidthInt> {};
template<> struct Map<CL_DEVICE_NATIVE_VECTOR_WIDTH_LONG > : public MapBase<CL_DEVICE_NATIVE_VECTOR_WIDTH_LONG, uint32_t, &DeviceInfo::nativeVectorWidthLong> {};
template<> struct Map<CL_DEVICE_NATIVE_VECTOR_WIDTH_SHORT > : public MapBase<CL_DEVICE_NATIVE_VECTOR_WIDTH_SHORT, uint32_t, &DeviceInfo::nativeVectorWidthShort> {};
template<> struct Map<CL_DEVICE_OPENCL_C_VERSION > : public MapBase<CL_DEVICE_OPENCL_C_VERSION, const char *, &DeviceInfo::clCVersion> {};
template<> struct Map<CL_DEVICE_PARENT_DEVICE > : public MapBase<CL_DEVICE_PARENT_DEVICE, cl_device_id, &DeviceInfo::parentDevice> {};
template<> struct Map<CL_DEVICE_PARTITION_AFFINITY_DOMAIN > : public MapBase<CL_DEVICE_PARTITION_AFFINITY_DOMAIN, uint64_t, &DeviceInfo::partitionAffinityDomain> {};
template<> struct Map<CL_DEVICE_PARTITION_MAX_SUB_DEVICES > : public MapBase<CL_DEVICE_PARTITION_MAX_SUB_DEVICES, uint32_t, &DeviceInfo::partitionMaxSubDevices> {};
template<> struct Map<CL_DEVICE_PARTITION_TYPE > : public MapBase<CL_DEVICE_PARTITION_TYPE, cl_device_partition_property*, &DeviceInfo::partitionType> {};
template<> struct Map<CL_DEVICE_PIPE_MAX_ACTIVE_RESERVATIONS > : public MapBase<CL_DEVICE_PIPE_MAX_ACTIVE_RESERVATIONS, uint32_t, &DeviceInfo::pipeMaxActiveReservations> {};
template<> struct Map<CL_DEVICE_PIPE_MAX_PACKET_SIZE > : public MapBase<CL_DEVICE_PIPE_MAX_PACKET_SIZE, uint32_t, &DeviceInfo::pipeMaxPacketSize> {};
template<> struct Map<CL_DEVICE_PLANAR_YUV_MAX_HEIGHT_INTEL > : public MapBase<CL_DEVICE_PLANAR_YUV_MAX_HEIGHT_INTEL, size_t, &DeviceInfo::planarYuvMaxHeight> {};
template<> struct Map<CL_DEVICE_PLANAR_YUV_MAX_WIDTH_INTEL > : public MapBase<CL_DEVICE_PLANAR_YUV_MAX_WIDTH_INTEL, size_t, &DeviceInfo::planarYuvMaxWidth> {};
template<> struct Map<CL_DEVICE_PREFERRED_GLOBAL_ATOMIC_ALIGNMENT > : public MapBase<CL_DEVICE_PREFERRED_GLOBAL_ATOMIC_ALIGNMENT, uint32_t, &DeviceInfo::preferredGlobalAtomicAlignment> {};
template<> struct Map<CL_DEVICE_PREFERRED_INTEROP_USER_SYNC > : public MapBase<CL_DEVICE_PREFERRED_INTEROP_USER_SYNC, uint32_t, &DeviceInfo::preferredInteropUserSync> {};
template<> struct Map<CL_DEVICE_PREFERRED_LOCAL_ATOMIC_ALIGNMENT > : public MapBase<CL_DEVICE_PREFERRED_LOCAL_ATOMIC_ALIGNMENT, uint32_t, &DeviceInfo::preferredLocalAtomicAlignment> {};
template<> struct Map<CL_DEVICE_PREFERRED_PLATFORM_ATOMIC_ALIGNMENT > : public MapBase<CL_DEVICE_PREFERRED_PLATFORM_ATOMIC_ALIGNMENT, uint32_t, &DeviceInfo::preferredPlatformAtomicAlignment> {};
template<> struct Map<CL_DEVICE_PREFERRED_VECTOR_WIDTH_CHAR > : public MapBase<CL_DEVICE_PREFERRED_VECTOR_WIDTH_CHAR, uint32_t, &DeviceInfo::preferredVectorWidthChar> {};
template<> struct Map<CL_DEVICE_PREFERRED_VECTOR_WIDTH_DOUBLE > : public MapBase<CL_DEVICE_PREFERRED_VECTOR_WIDTH_DOUBLE, uint32_t, &DeviceInfo::preferredVectorWidthDouble> {};
template<> struct Map<CL_DEVICE_PREFERRED_VECTOR_WIDTH_FLOAT > : public MapBase<CL_DEVICE_PREFERRED_VECTOR_WIDTH_FLOAT, uint32_t, &DeviceInfo::preferredVectorWidthFloat> {};
template<> struct Map<CL_DEVICE_PREFERRED_VECTOR_WIDTH_HALF > : public MapBase<CL_DEVICE_PREFERRED_VECTOR_WIDTH_HALF, uint32_t, &DeviceInfo::preferredVectorWidthHalf> {};
template<> struct Map<CL_DEVICE_PREFERRED_VECTOR_WIDTH_INT > : public MapBase<CL_DEVICE_PREFERRED_VECTOR_WIDTH_INT, uint32_t, &DeviceInfo::preferredVectorWidthInt> {};
template<> struct Map<CL_DEVICE_PREFERRED_VECTOR_WIDTH_LONG > : public MapBase<CL_DEVICE_PREFERRED_VECTOR_WIDTH_LONG, uint32_t, &DeviceInfo::preferredVectorWidthLong> {};
template<> struct Map<CL_DEVICE_PREFERRED_VECTOR_WIDTH_SHORT > : public MapBase<CL_DEVICE_PREFERRED_VECTOR_WIDTH_SHORT, uint32_t, &DeviceInfo::preferredVectorWidthShort> {};
template<> struct Map<CL_DEVICE_PRINTF_BUFFER_SIZE > : public MapBase<CL_DEVICE_PRINTF_BUFFER_SIZE, size_t, &DeviceInfo::printfBufferSize> {};
template<> struct Map<CL_DEVICE_PROFILE > : public MapBase<CL_DEVICE_PROFILE, const char *, &DeviceInfo::profile> {};
template<> struct Map<CL_DEVICE_PROFILING_TIMER_RESOLUTION > : public MapBase<CL_DEVICE_PROFILING_TIMER_RESOLUTION, size_t, &DeviceInfo::profilingTimerResolution> {};
template<> struct Map<CL_DEVICE_QUEUE_ON_DEVICE_MAX_SIZE > : public MapBase<CL_DEVICE_QUEUE_ON_DEVICE_MAX_SIZE, uint32_t, &DeviceInfo::queueOnDeviceMaxSize> {};
template<> struct Map<CL_DEVICE_QUEUE_ON_DEVICE_PREFERRED_SIZE > : public MapBase<CL_DEVICE_QUEUE_ON_DEVICE_PREFERRED_SIZE, uint32_t, &DeviceInfo::queueOnDevicePreferredSize> {};
template<> struct Map<CL_DEVICE_QUEUE_ON_DEVICE_PROPERTIES > : public MapBase<CL_DEVICE_QUEUE_ON_DEVICE_PROPERTIES, uint64_t, &DeviceInfo::queueOnDeviceProperties> {};
template<> struct Map<CL_DEVICE_QUEUE_ON_HOST_PROPERTIES > : public MapBase<CL_DEVICE_QUEUE_ON_HOST_PROPERTIES, uint64_t, &DeviceInfo::queueOnHostProperties> {};
template<> struct Map<CL_DEVICE_SINGLE_FP_CONFIG > : public MapBase<CL_DEVICE_SINGLE_FP_CONFIG, uint64_t, &DeviceInfo::singleFpConfig> {};
template<> struct Map<CL_DEVICE_SPIR_VERSIONS > : public MapBase<CL_DEVICE_SPIR_VERSIONS, const char *, &DeviceInfo::spirVersions> {};
template<> struct Map<CL_DEVICE_SUB_GROUP_INDEPENDENT_FORWARD_PROGRESS > : public MapBase<CL_DEVICE_SUB_GROUP_INDEPENDENT_FORWARD_PROGRESS, uint32_t, &DeviceInfo::independentForwardProgress> {};
template<> struct Map<CL_DEVICE_SVM_CAPABILITIES > : public MapBase<CL_DEVICE_SVM_CAPABILITIES, uint64_t, &DeviceInfo::svmCapabilities> {};
template<> struct Map<CL_DEVICE_TYPE > : public MapBase<CL_DEVICE_TYPE, uint64_t, &DeviceInfo::deviceType> {};
template<> struct Map<CL_DEVICE_VENDOR_ID > : public MapBase<CL_DEVICE_VENDOR_ID, uint32_t, &DeviceInfo::vendorId> {};
template<> struct Map<CL_DEVICE_VENDOR > : public MapBase<CL_DEVICE_VENDOR, const char *, &DeviceInfo::vendor> {};
template<> struct Map<CL_DEVICE_VERSION > : public MapBase<CL_DEVICE_VERSION, const char *, &DeviceInfo::clVersion> {};
template<> struct Map<CL_DRIVER_VERSION > : public MapBase<CL_DRIVER_VERSION, const char *, &DeviceInfo::driverVersion> {};
template<> struct Map<CL_DEVICE_DRIVER_VERSION_INTEL > : public MapBase<CL_DEVICE_DRIVER_VERSION_INTEL, uint32_t, &DeviceInfo::internalDriverVersion> {};
// clang-format on
}

View File

@@ -0,0 +1,39 @@
/*
* Copyright (c) 2017, Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
#pragma once
#include "runtime/api/cl_types.h"
#include <vector>
namespace OCLRT {
class Device;
class DeviceVector : public std::vector<Device *> {
public:
DeviceVector() = default;
DeviceVector(const DeviceVector &) = default;
DeviceVector &operator=(const DeviceVector &) = default;
DeviceVector(const cl_device_id *devices,
cl_uint numDevices);
void toDeviceIDs(std::vector<cl_device_id> &devIDs);
};
} // namespace OCLRT

View File

@@ -0,0 +1,41 @@
/*
* Copyright (c) 2017, Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
#pragma once
#include <string>
namespace OCLRT {
class OSInterface;
class DriverInfo {
public:
static DriverInfo *create(OSInterface *osInterface);
virtual ~DriverInfo() = default;
virtual std::string getDeviceName(std::string defaultName) { return defaultName; };
virtual std::string getVersion(std::string defaultVersion) { return defaultVersion; };
};
} // namespace OCLRT