Initial commit

Change-Id: I4bf1707bd3dfeadf2c17b0a7daff372b1925ebbd
This commit is contained in:
Brandon Fliflet
2017-12-21 00:45:38 +01:00
commit 7e9ad41290
1350 changed files with 233156 additions and 0 deletions

View File

@@ -0,0 +1,51 @@
/*
* Copyright (c) 2017, Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
#pragma once
#include "runtime/utilities/heap_allocator.h"
#include <stdint.h>
#include <memory>
namespace OCLRT {
const uintptr_t max32BitAddress = 0xffffffff;
extern bool is32BitOsAllocatorAvailable;
class Allocator32bit {
protected:
class OsInternals;
public:
Allocator32bit(uint64_t base, uint64_t size);
Allocator32bit(Allocator32bit::OsInternals *osInternals);
Allocator32bit();
~Allocator32bit();
void *allocate(size_t &size);
uintptr_t getBase();
int free(void *ptr, size_t size);
protected:
std::unique_ptr<OsInternals> osInternals;
std::unique_ptr<HeapAllocator> heapAllocator;
uint64_t base = 0;
uint64_t size = 0;
};
} // namespace OCLRT

View File

@@ -0,0 +1,79 @@
/*
* Copyright (c) 2017, Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
/*DEBUG FLAGS*/
DECLARE_DEBUG_VARIABLE(bool, EnableDebugBreak, false, "Enable DEBUG_BREAKs")
DECLARE_DEBUG_VARIABLE(bool, FlushAllCaches, false, "pipe controls between enqueues flush all possible caches")
DECLARE_DEBUG_VARIABLE(bool, MakeEachEnqueueBlocking, false, "equivalent of finish after each enqueue")
DECLARE_DEBUG_VARIABLE(bool, DoCpuCopyOnReadBuffer, false, "triggers CPU copy path for Read Buffer calls, only supported for some basic use cases ( no events, not blocked calls )")
DECLARE_DEBUG_VARIABLE(bool, DoCpuCopyOnWriteBuffer, false, "triggers CPU copy path for Write Buffer calls, only supported for some basic use cases ( no events, not blocked calls )")
DECLARE_DEBUG_VARIABLE(bool, DisableResourceRecycling, false, "when set to true disables resource recycling optimization")
DECLARE_DEBUG_VARIABLE(int32_t, InitializeMemoryInDebug, 0x10, "Memory initialization in debug")
DECLARE_DEBUG_VARIABLE(int32_t, SchedulerSimulationReturnInstance, 0, "prints execution model related debug information")
DECLARE_DEBUG_VARIABLE(bool, ForceDispatchScheduler, false, "dispatches scheduler kernel instead of kernel enqueued")
DECLARE_DEBUG_VARIABLE(bool, TrackParentEvents, false, "events track their parents")
/*LOGGING FLAGS*/
DECLARE_DEBUG_VARIABLE(bool, PrintDebugMessages, false, "when enabled, some debug messages will be propagated to console")
DECLARE_DEBUG_VARIABLE(bool, DumpKernels, false, "Enables dumping kernels' program source code to text files and program from binary to bin file")
DECLARE_DEBUG_VARIABLE(bool, DumpKernelArgs, false, "Enables dumping kernels args to binary files")
DECLARE_DEBUG_VARIABLE(bool, LogApiCalls, false, "Enables logging api function calls, inputs and outputs to file")
DECLARE_DEBUG_VARIABLE(bool, LogPatchTokens, false, "Enables logging patch tokens, inputs and outputs to file")
DECLARE_DEBUG_VARIABLE(bool, LogTaskCounts, false, "Enables logging taskCounts and taskLevels to file")
DECLARE_DEBUG_VARIABLE(bool, LogAlignedAllocations, false, "Logs alignedMalloc and alignedFree allocations")
DECLARE_DEBUG_VARIABLE(bool, LogMemoryObject, false, "Logs memory object ptrs, sizes and operations")
DECLARE_DEBUG_VARIABLE(bool, ResidencyDebugEnable, 0, "enables debug messages and checks for Residency Model")
DECLARE_DEBUG_VARIABLE(bool, EventsDebugEnable, 0, "enables debug messages for events, virtual events, blocked enqueues, events trees etc.")
DECLARE_DEBUG_VARIABLE(bool, PrintEMDebugInformation, false, "prints execution model related debug information")
/*PERFORMANCE FLAGS*/
DECLARE_DEBUG_VARIABLE(bool, EnableNullHardware, false, "works on Windows only, sets the Null Hardware flag that makes all Command buffers completed while GPU does nothing")
DECLARE_DEBUG_VARIABLE(bool, ForceLinearImages, false, "Force linear images. Default is Y-tiled.")
DECLARE_DEBUG_VARIABLE(bool, ForceSLML3Config, false, "Forces L3Config with SLM for all kernels")
DECLARE_DEBUG_VARIABLE(bool, Force32bitAddressing, false, "Forces 32 bit addresses to be used in 64 bit dll")
DECLARE_DEBUG_VARIABLE(bool, DisableStatelessToStatefulOptimization, false, "Disables stateless to stateful optimization for buffers")
DECLARE_DEBUG_VARIABLE(bool, DisableConcurrentBlockExecution, 0, "disables concurrent block kernel execution")
DECLARE_DEBUG_VARIABLE(bool, UseNewHeapAllocator, true, "Custom 4GB heap allocator is used")
/*SIMULATION FLAGS*/
DECLARE_DEBUG_VARIABLE(int32_t, SetCommandStreamReceiver, 0, "Set command stream receiver")
DECLARE_DEBUG_VARIABLE(std::string, TbxServer, "127.0.0.1", "TCP-IP address of TBX server")
DECLARE_DEBUG_VARIABLE(int32_t, TbxPort, 4321, "TCP-IP port of TBX server")
DECLARE_DEBUG_VARIABLE(std::string, ProductFamilyOverride, "unk", "Specify product for use in AUB/TBX")
/*FEATURE FLAGS*/
DECLARE_DEBUG_VARIABLE(bool, EnableNV12, true, "Enables NV12 extension")
DECLARE_DEBUG_VARIABLE(bool, EnablePackedYuv, true, "Enables cl_packed_yuv extension")
DECLARE_DEBUG_VARIABLE(bool, EnableIntelVme, true, "Enables cl_intel_motion_estimation extension")
DECLARE_DEBUG_VARIABLE(bool, EnableIntelAdvancedVme, true, "Enables cl_intel_advanced_motion_estimation extension")
DECLARE_DEBUG_VARIABLE(bool, EnableStatelessToStatefulBufferOffsetOpt, false, "Temporary debug variable to help in enabling buffer-offset improvement of the stateless to stateful optimization")
DECLARE_DEBUG_VARIABLE(bool, EnableDeferredDeleter, true, "Enables async deleter")
DECLARE_DEBUG_VARIABLE(bool, EnableAsyncDestroyAllocations, true, "Enables async destroying graphics allocations in mem obj destructor")
DECLARE_DEBUG_VARIABLE(bool, EnableAsyncEventsHandler, true, "Enables async events handler")
DECLARE_DEBUG_VARIABLE(bool, EnableForcePin, true, "Enables early pinning for memory object")
DECLARE_DEBUG_VARIABLE(int32_t, Enable64kbpages, -1, "-1: default behaviour, 0 Disables, 1 Enables support for 64KB pages for driver allocated fine grain svm buffers")
DECLARE_DEBUG_VARIABLE(bool, EnableComputeWorkSizeND, false, "Enables diffrent algorithm to compute locla work size")
DECLARE_DEBUG_VARIABLE(bool, EnableComputeWorkSizeSquared, true, "Enables algorithm to compute the most squared work gropu as passible")
DECLARE_DEBUG_VARIABLE(int32_t, OverrideEnableKmdNotify, -1, "-1: dont override, 0: disable, 1: enable")
DECLARE_DEBUG_VARIABLE(int32_t, OverrideKmdNotifyDelayMs, -1, "-1: dont override, 0: infinite timeout, >0: timeout in ms")
DECLARE_DEBUG_VARIABLE(bool, EnableVaLibCalls, true, "Enable cl-va sharing lib calls")
DECLARE_DEBUG_VARIABLE(int32_t, CsrDispatchMode, 0, "Chooses DispatchMode for Csr")
/*DRIVER TOGGLES*/
DECLARE_DEBUG_VARIABLE(int32_t, ForceOCLVersion, 0, "Force specific OpenCL API version")
DECLARE_DEBUG_VARIABLE(int32_t, ForcePreemptionMode, 0, "Keep this variable in sync with PreemptionMode enum. 0 - dont force, 1 - disable, 2 - midBatch, 3 - threadGroup, 4 - midThread")
DECLARE_DEBUG_VARIABLE(int32_t, NodeOrdinal, -1, "-1: default do not override, 0: ENGINE_RCS")
DECLARE_DEBUG_VARIABLE(bool, UseMaxSimdSizeToDeduceMaxWorkgroupSize, false, "With this flag on, max workgroup size is deduced using SIMD32 instead of SIMD8, this causes the max wkg size to be 4 times bigger")

View File

@@ -0,0 +1,233 @@
/*
* Copyright (c) 2017, Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
#include "debug_settings_manager.h"
#include "runtime/event/event.h"
#include "runtime/kernel/kernel.h"
#include "runtime/mem_obj/mem_obj.h"
#include "runtime/helpers/ptr_math.h"
#include "runtime/helpers/dispatch_info.h"
#include "runtime/helpers/string.h"
#include "runtime/utilities/debug_settings_reader.h"
#include "CL/cl.h"
#include <cstdio>
#include <sstream>
namespace OCLRT {
DebugSettingsManager<globalDebugFunctionalityLevel> DebugManager;
template <DebugFunctionalityLevel DebugLevel>
DebugSettingsManager<DebugLevel>::DebugSettingsManager() {
logFileName = "igdrcl.log";
if (registryReadAvailable()) {
readerImpl = SettingsReader::create();
#undef DECLARE_DEBUG_VARIABLE
#define DECLARE_DEBUG_VARIABLE(dataType, variableName, defaultValue, description) \
{ \
dataType tempData = readerImpl->getSetting(#variableName, flags.variableName.get()); \
flags.variableName.set(tempData); \
}
#include "DebugVariables.def"
}
std::remove(logFileName.c_str());
}
template <DebugFunctionalityLevel DebugLevel>
void DebugSettingsManager<DebugLevel>::writeToFile(std::string filename, const char *str, size_t length, std::ios_base::openmode mode) {
std::ofstream outFile(filename, mode);
if (outFile.is_open()) {
outFile.write(str, length);
outFile.close();
}
}
template <DebugFunctionalityLevel DebugLevel>
DebugSettingsManager<DebugLevel>::~DebugSettingsManager() {
if (readerImpl) {
delete readerImpl;
}
}
template <DebugFunctionalityLevel DebugLevel>
void DebugSettingsManager<DebugLevel>::dumpKernel(const std::string &name, const std::string &src) {
if (false == debugKernelDumpingAvailable()) {
return;
}
if (flags.DumpKernels.get()) {
DBG_LOG(LogApiCalls, "Kernel size", src.size(), src.c_str());
writeToFile(name + ".txt", src.c_str(), src.size(), std::ios::trunc);
}
}
template <DebugFunctionalityLevel DebugLevel>
void DebugSettingsManager<DebugLevel>::logApiCall(const char *function, bool enter, int32_t errorCode) {
if (false == debugLoggingAvailable()) {
return;
}
if (flags.LogApiCalls.get()) {
std::unique_lock<std::mutex> theLock(mtx);
std::thread::id thisThread = std::this_thread::get_id();
std::stringstream ss;
ss << "ThreadID: " << thisThread << " ";
if (enter)
ss << "Function Enter: ";
else
ss << "Function Leave (" << errorCode << "): ";
ss << function << std::endl;
auto str = ss.str();
writeToFile(logFileName, str.c_str(), str.size(), std::ios::app);
}
}
template <DebugFunctionalityLevel DebugLevel>
size_t DebugSettingsManager<DebugLevel>::getInput(const size_t *input, int32_t index) {
if (debugLoggingAvailable() == false)
return 0;
return input != nullptr ? input[index] : 0;
}
template <DebugFunctionalityLevel DebugLevel>
const std::string DebugSettingsManager<DebugLevel>::getEvents(const uintptr_t *input, uint32_t numOfEvents) {
if (false == debugLoggingAvailable()) {
return "";
}
std::stringstream os;
for (uint32_t i = 0; i < numOfEvents; i++) {
if (input != nullptr) {
cl_event event = ((cl_event *)input)[i];
os << "cl_event " << event << ", Event " << (Event *)event << ", ";
}
}
return os.str();
}
template <DebugFunctionalityLevel DebugLevel>
void DebugSettingsManager<DebugLevel>::dumpBinaryProgram(int32_t numDevices, const size_t *lengths, const unsigned char **binaries) {
if (false == debugKernelDumpingAvailable()) {
return;
}
if (flags.DumpKernels.get()) {
if (lengths != nullptr && binaries != nullptr &&
lengths[0] != 0 && binaries[0] != nullptr) {
std::unique_lock<std::mutex> theLock(mtx);
writeToFile("programBinary.bin", reinterpret_cast<const char *>(binaries[0]), lengths[0], std::ios::trunc | std::ios::binary);
}
}
}
template <DebugFunctionalityLevel DebugLevel>
void DebugSettingsManager<DebugLevel>::dumpKernelArgs(const Kernel *kernel) {
if (false == kernelArgDumpingAvailable()) {
return;
}
if (flags.DumpKernelArgs.get() && kernel != nullptr) {
std::unique_lock<std::mutex> theLock(mtx);
std::ofstream outFile;
for (unsigned int i = 0; i < kernel->getKernelInfo().kernelArgInfo.size(); i++) {
std::string type;
std::string fileName;
const char *ptr = nullptr;
size_t size = 0;
uint64_t flags = 0;
std::unique_ptr<char[]> argVal = nullptr;
auto &argInfo = kernel->getKernelInfo().kernelArgInfo[i];
if (argInfo.addressQualifier == CL_KERNEL_ARG_ADDRESS_LOCAL) {
type = "local";
} else if (argInfo.typeStr.find("*") != std::string::npos) {
type = "buffer";
auto clMem = (const cl_mem)kernel->getKernelArg(i);
auto memObj = castToObject<MemObj>(clMem);
if (memObj != nullptr) {
ptr = static_cast<char *>(memObj->getCpuAddress());
size = memObj->getSize();
flags = memObj->getFlags();
}
} else if (argInfo.typeStr.find("image") != std::string::npos) {
type = "image";
auto clMem = (const cl_mem)kernel->getKernelArg(i);
auto memObj = castToObject<MemObj>(clMem);
if (memObj != nullptr) {
ptr = static_cast<char *>(memObj->getCpuAddress());
size = memObj->getSize();
flags = memObj->getFlags();
}
} else if (argInfo.typeStr.find("sampler") != std::string::npos) {
type = "sampler";
} else {
type = "immediate";
auto crossThreadData = kernel->getCrossThreadData();
auto crossThreadDataSize = kernel->getCrossThreadDataSize();
argVal = std::unique_ptr<char[]>(new char[crossThreadDataSize]);
size_t totalArgSize = 0;
for (const auto &kernelArgPatchInfo : argInfo.kernelArgPatchInfoVector) {
auto pSource = ptrOffset(crossThreadData, kernelArgPatchInfo.crossthreadOffset);
auto pDestination = ptrOffset(argVal.get(), kernelArgPatchInfo.sourceOffset);
memcpy_s(pDestination, kernelArgPatchInfo.size, pSource, kernelArgPatchInfo.size);
totalArgSize += kernelArgPatchInfo.size;
}
size = totalArgSize;
ptr = argVal.get();
}
if (ptr && size) {
fileName = kernel->getKernelInfo().name + "_arg_" + std::to_string(i) + "_" + type + "_size_" + std::to_string(size) + "_flags_" + std::to_string(flags) + ".bin";
writeToFile(fileName, ptr, size, std::ios::trunc | std::ios::binary);
}
}
}
}
template <DebugFunctionalityLevel DebugLevel>
void DebugSettingsManager<DebugLevel>::dumpKernelArgs(const MultiDispatchInfo *multiDispatchInfo) {
if (kernelArgDumpingAvailable() == false) {
return;
}
if ((flags.DumpKernelArgs.get() == false) || (multiDispatchInfo == nullptr)) {
return;
}
for (auto &dispatchInfo : *multiDispatchInfo) {
dumpKernelArgs(dispatchInfo.getKernel());
}
}
template class DebugSettingsManager<DebugFunctionalityLevel::None>;
template class DebugSettingsManager<DebugFunctionalityLevel::Full>;
template class DebugSettingsManager<DebugFunctionalityLevel::RegKeys>;
}; // namespace OCLRT

View File

@@ -0,0 +1,296 @@
/*
* Copyright (c) 2017, Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
#pragma once
#include <sstream>
#include <stdint.h>
#include <string>
#include <fstream>
#include <condition_variable>
#include <mutex>
#include <thread>
enum class DebugFunctionalityLevel {
None, // Debug functionality disabled
Full, // Debug functionality fully enabled
RegKeys // Only registry key reads enabled
};
#if defined(_DEBUG)
constexpr DebugFunctionalityLevel globalDebugFunctionalityLevel = DebugFunctionalityLevel::Full;
#elif defined(_RELEASE_INTERNAL)
constexpr DebugFunctionalityLevel globalDebugFunctionalityLevel = DebugFunctionalityLevel::RegKeys;
#else
constexpr DebugFunctionalityLevel globalDebugFunctionalityLevel = DebugFunctionalityLevel::None;
#endif
namespace OCLRT {
template <typename... Args>
void printDebugString(bool showDebugLogs, Args &&... args) {
if (showDebugLogs) {
fprintf(std::forward<Args>(args)...);
}
}
#if defined(__clang__)
#define NO_SANITIZE __attribute__((no_sanitize("undefined")))
#else
#define NO_SANITIZE
#endif
class Kernel;
struct MultiDispatchInfo;
class SettingsReader;
// clang-format off
#define DECLARE_DEBUG_VARIABLE(dataType, variableName, defaultValue, description) \
struct DebugVar##variableName \
{ \
DebugVar##variableName() { \
value = (dataType)defaultValue; \
} \
dataType get() const { \
return value; \
} \
void set(dataType data) { \
value = data; \
} \
private: \
dataType value; \
};
#include "DebugVariables.def"
#undef DECLARE_DEBUG_VARIABLE
// clang-format on
template <DebugFunctionalityLevel DebugLevel>
class DebugSettingsManager {
public:
struct DebugVariables {
#define DECLARE_DEBUG_VARIABLE(dataType, variableName, defaultValue, description) \
DebugVar##variableName variableName;
#include "DebugVariables.def"
#undef DECLARE_DEBUG_VARIABLE
};
DebugSettingsManager();
~DebugSettingsManager();
DebugSettingsManager(const DebugSettingsManager &) = delete;
DebugSettingsManager &operator=(const DebugSettingsManager &) = delete;
static constexpr bool debugLoggingAvailable() {
return DebugLevel == DebugFunctionalityLevel::Full;
}
static constexpr bool debugKernelDumpingAvailable() {
return DebugLevel == DebugFunctionalityLevel::Full;
}
static constexpr bool kernelArgDumpingAvailable() {
return DebugLevel == DebugFunctionalityLevel::Full;
}
static constexpr bool registryReadAvailable() {
return (DebugLevel == DebugFunctionalityLevel::Full) || (DebugLevel == DebugFunctionalityLevel::RegKeys);
}
static constexpr bool disabled() {
return DebugLevel == DebugFunctionalityLevel::None;
}
void dumpKernel(const std::string &name, const std::string &src);
void logApiCall(const char *function, bool enter, int32_t errorCode);
size_t getInput(const size_t *input, int32_t index);
const std::string getEvents(const uintptr_t *input, uint32_t numOfEvents);
MOCKABLE_VIRTUAL void writeToFile(std::string filename, const char *str, size_t length, std::ios_base::openmode mode);
void dumpBinaryProgram(int32_t numDevices, const size_t *lengths, const unsigned char **binaries);
void dumpKernelArgs(const Kernel *kernel);
void dumpKernelArgs(const MultiDispatchInfo *multiDispatchInfo);
const std::string getSizes(const uintptr_t *input, uint32_t workDim, bool local) {
if (false == debugLoggingAvailable()) {
return "";
}
std::stringstream os;
std::string workSize;
if (local) {
workSize = "localWorkSize";
} else {
workSize = "globalWorkSize";
}
for (uint32_t i = 0; i < workDim; i++) {
if (input != nullptr) {
os << workSize << "[" << i << "]: \t" << input[i] << "\n";
}
}
return os.str();
}
const std::string deviceInfoPointerToString(const void *paramValue, size_t paramSize) {
if (false == debugLoggingAvailable()) {
return "";
}
std::stringstream os;
if (paramValue) {
switch (paramSize) {
case sizeof(uint32_t):
os << *(uint32_t *)paramValue;
break;
case sizeof(uint64_t):
os << *(uint64_t *)paramValue;
break;
case sizeof(uint8_t):
os << (uint32_t)(*(uint8_t *)paramValue);
break;
default:
break;
}
}
return os.str();
}
// Expects pairs of args (even number of args)
template <typename... Types>
void logInputs(Types &&... params) {
if (debugLoggingAvailable()) {
if (this->flags.LogApiCalls.get()) {
std::unique_lock<std::mutex> theLock(mtx);
std::thread::id thisThread = std::this_thread::get_id();
std::stringstream ss;
ss << "------------------------------\n";
printInputs(ss, "ThreadID", thisThread, params...);
ss << "------------------------------" << std::endl;
writeToFile(logFileName, ss.str().c_str(), ss.str().length(), std::ios::app);
}
}
}
template <typename FT>
void logLazyEvaluateArgs(bool predicate, FT &&callable) {
if (debugLoggingAvailable()) {
if (predicate) {
callable();
}
}
}
template <typename... Types>
void log(bool enableLog, Types... params) {
if (debugLoggingAvailable()) {
if (enableLog) {
std::unique_lock<std::mutex> theLock(mtx);
std::thread::id thisThread = std::this_thread::get_id();
std::stringstream ss;
print(ss, "ThreadID", thisThread, params...);
writeToFile(logFileName, ss.str().c_str(), ss.str().length(), std::ios::app);
}
}
}
DebugVariables flags;
void *injectFcn = nullptr;
const char *getLogFileName() {
return logFileName.c_str();
}
void setLogFileName(std::string filename) {
logFileName = filename;
}
protected:
SettingsReader *readerImpl = nullptr;
std::mutex mtx;
std::string logFileName;
// Required for variadic template with 0 args passed
void printInputs(std::stringstream &ss) {}
// Prints inputs in format: InputName: InputValue \newline
template <typename T1, typename... Types>
void printInputs(std::stringstream &ss, T1 first, Types... params) {
if (debugLoggingAvailable()) {
const size_t argsLeft = sizeof...(params);
ss << "\t" << first;
if (argsLeft % 2) {
ss << ": ";
} else {
ss << std::endl;
}
printInputs(ss, params...);
}
}
// Required for variadic template with 0 args passed
void print(std::stringstream &ss) {}
template <typename T1, typename... Types>
void print(std::stringstream &ss, T1 first, Types... params) {
if (debugLoggingAvailable()) {
const size_t argsLeft = sizeof...(params);
ss << first << " ";
if (argsLeft == 0) {
ss << std::endl;
}
print(ss, params...);
}
}
};
extern DebugSettingsManager<globalDebugFunctionalityLevel> DebugManager;
template <bool Enabled>
class DebugSettingsApiEnterWrapper {
public:
DebugSettingsApiEnterWrapper(const char *funcName, const int *errorCode)
: funcName(funcName), errorCode(errorCode) {
if (Enabled) {
DebugManager.logApiCall(funcName, true, 0);
}
}
~DebugSettingsApiEnterWrapper() {
if (Enabled) {
DebugManager.logApiCall(funcName, false, (errorCode != nullptr) ? *errorCode : 0);
}
}
const char *funcName;
const int *errorCode;
};
}; // namespace OCLRT
#define DECLARE_DEBUG_VARIABLE(dataType, variableName, defaultValue, description)
#define DBG_LOG_LAZY_EVALUATE_ARGS(DBG_MANAGER, PREDICATE, LOG_FUNCTION, ...) \
DBG_MANAGER.logLazyEvaluateArgs(DBG_MANAGER.flags.PREDICATE.get(), [&] { DBG_MANAGER.LOG_FUNCTION(__VA_ARGS__); })
#define DBG_LOG(PREDICATE, ...) \
DBG_LOG_LAZY_EVALUATE_ARGS(OCLRT::DebugManager, PREDICATE, log, OCLRT::DebugManager.flags.PREDICATE.get(), __VA_ARGS__)
#define DBG_LOG_INPUTS(...) \
DBG_LOG_LAZY_EVALUATE_ARGS(OCLRT::DebugManager, LogApiCalls, logInputs, __VA_ARGS__)

View File

@@ -0,0 +1,40 @@
/*
* Copyright (c) 2017, Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
#pragma once
#include <cstdlib>
namespace OCLRT {
struct HardwareInfo;
class DeviceFactory {
public:
static bool getDevices(HardwareInfo **pHWInfos, size_t &numDevices);
static void releaseDevices();
protected:
static size_t numDevices;
static HardwareInfo *hwInfos;
static void *internal;
};
}

View File

@@ -0,0 +1,61 @@
/*
* Copyright (c) 2017, Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
#include "runtime/api/dispatch.h"
#include "runtime/api/api.h"
#include "runtime/helpers/get_info.h"
#include "runtime/context/context.h"
#include "runtime/mem_obj/mem_obj.h"
#include "runtime/mem_obj/image.h"
void OCLRT::MemObj::getOsSpecificMemObjectInfo(const cl_mem_info &paramName, size_t *srcParamSize, void **srcParam) {
switch (paramName) {
#ifdef LIBVA
case CL_MEM_VA_API_MEDIA_SURFACE_INTEL:
peekSharingHandler()->getMemObjectInfo(*srcParamSize, *srcParam);
break;
#endif
default:
break;
}
}
void OCLRT::Image::getOsSpecificImageInfo(const cl_image_info &paramName, size_t *srcParamSize, void **srcParam) {
switch (paramName) {
#ifdef LIBVA
case CL_IMAGE_VA_API_PLANE_INTEL:
*srcParamSize = sizeof(cl_uint);
*srcParam = &mediaPlaneType;
break;
#endif
default:
break;
}
}
cl_int OCLRT::Context::createContextOsProperties(cl_context_properties &propertyType, cl_context_properties &propertyValue) {
return CL_INVALID_PROPERTY;
}
void *OCLRT::Context::getOsContextInfo(cl_context_info &paramName, size_t *srcParamSize) {
return nullptr;
}

View File

@@ -0,0 +1,39 @@
/*
* Copyright (c) 2017, Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
#pragma once
namespace OCLRT {
namespace D3DTypesHelper {
struct D3D9 {
};
struct D3D10 {
};
struct D3D11 {
};
} // namespace D3DTypesHelper
template <typename D3D>
class D3DSharingFunctions {
};
} // namespace OCLRT

View File

@@ -0,0 +1,57 @@
/*
* Copyright (c) 2017, Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
#include "runtime/utilities/linux/debug_env_reader.h"
namespace OCLRT {
SettingsReader *SettingsReader::createOsReader() {
return new EnvironmentVariableReader;
}
bool EnvironmentVariableReader::getSetting(const char *settingName, bool defaultValue) {
return getSetting(settingName, static_cast<int32_t>(defaultValue)) ? true : false;
}
int32_t EnvironmentVariableReader::getSetting(const char *settingName, int32_t defaultValue) {
int32_t value = defaultValue;
char *envValue;
envValue = getenv(settingName);
if (envValue) {
value = atoi(envValue);
}
return value;
}
std::string EnvironmentVariableReader::getSetting(const char *settingName, const std::string &value) {
char *envValue;
std::string keyValue;
keyValue.assign(value);
envValue = getenv(settingName);
if (envValue) {
keyValue.assign(envValue);
}
return keyValue;
}
}

View File

@@ -0,0 +1,33 @@
/*
* Copyright (c) 2017, Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
#include "runtime/command_stream/device_command_stream.h"
#include "hw_cmds.h"
#include "drm_command_stream.h"
namespace OCLRT {
template <typename GfxFamily>
CommandStreamReceiver *DeviceCommandStreamReceiver<GfxFamily>::create(const HardwareInfo &hwInfo) {
return new DrmCommandStreamReceiver<GfxFamily>(hwInfo, nullptr, gemCloseWorkerMode::gemCloseWorkerInactive);
};
}

View File

@@ -0,0 +1,101 @@
/*
* Copyright (c) 2017, Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
#include "runtime/helpers/hw_info.h"
#include "runtime/helpers/options.h"
#include "runtime/os_interface/device_factory.h"
#include "runtime/os_interface/linux/drm_neo.h"
#include "runtime/os_interface/linux/hw_info_config.h"
#include "runtime/os_interface/linux/os_interface.h"
#include "drm/i915_drm.h"
#include "runtime/gmm_helper/gmm_helper.h"
#include "runtime/device/device.h"
#include <vector>
#include <cstring>
namespace OCLRT {
size_t DeviceFactory::numDevices = 0;
HardwareInfo *DeviceFactory::hwInfos = nullptr;
void *DeviceFactory::internal = nullptr;
bool DeviceFactory::getDevices(HardwareInfo **pHWInfos, size_t &numDevices) {
std::vector<HardwareInfo> tHwInfos;
int retVal;
unsigned int devNum = 0;
Drm *drm = nullptr;
std::unique_ptr<OSInterface> osInterface = std::unique_ptr<OSInterface>(new OSInterface());
while ((drm = Drm::create(devNum)) != nullptr) {
const HardwareInfo *pCurrDevice = platformDevices[devNum];
HardwareInfo tmpHwInfo;
memset(&tmpHwInfo, 0, sizeof(tmpHwInfo));
osInterface.get()->get()->setDrm(drm);
HwInfoConfig *hwConfig = HwInfoConfig::get(pCurrDevice->pPlatform->eProductFamily);
retVal = hwConfig->configureHwInfo(pCurrDevice, &tmpHwInfo, osInterface.get());
if (retVal != 0) {
return false;
}
tHwInfos.push_back(tmpHwInfo);
devNum++;
break;
}
if (devNum < 1)
return false;
HardwareInfo *ptr = new HardwareInfo[devNum];
for (size_t i = 0; i < tHwInfos.size(); i++)
ptr[i] = tHwInfos[i];
numDevices = devNum;
*pHWInfos = ptr;
DeviceFactory::numDevices = devNum;
DeviceFactory::hwInfos = ptr;
return Gmm::initContext(hwInfos->pPlatform, hwInfos->pSkuTable,
hwInfos->pWaTable, hwInfos->pSysInfo);
}
void DeviceFactory::releaseDevices() {
if (DeviceFactory::numDevices > 0) {
for (unsigned int i = 0; i < DeviceFactory::numDevices; ++i) {
Drm::closeDevice(i);
delete hwInfos[i].pSysInfo;
delete hwInfos[i].pSkuTable;
delete hwInfos[i].pWaTable;
delete hwInfos[i].pPlatform;
}
delete[] hwInfos;
}
DeviceFactory::hwInfos = nullptr;
DeviceFactory::numDevices = 0;
}
void Device::appendOSExtensions(std::string &deviceExtensions) {
}
} // namespace OCLRT

View File

@@ -0,0 +1,30 @@
/*
* Copyright (c) 2017, Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
#include "runtime/device/driver_info.h"
namespace OCLRT {
DriverInfo *DriverInfo::create(OSInterface *osInterface) {
return new DriverInfo();
};
} // namespace OCLRT

View File

@@ -0,0 +1,80 @@
/*
* Copyright (c) 2017, Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
#pragma once
#include "runtime/os_interface/32bit_memory.h"
#include "runtime/helpers/aligned_memory.h"
#include "runtime/helpers/ptr_math.h"
#include <sys/mman.h>
namespace OCLRT {
class Allocator32bit::OsInternals::Drm32BitAllocator {
protected:
Allocator32bit::OsInternals &outer;
public:
Drm32BitAllocator(Allocator32bit::OsInternals &outer) : outer(outer) {
}
void *allocate(size_t size) {
auto ptr = outer.mmapFunction(nullptr, size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS | MAP_32BIT, -1, 0);
// In case we failed, retry with address provided as a hint
if (ptr == MAP_FAILED) {
ptr = outer.mmapFunction((void *)outer.upperRangeAddress, size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
if (((uintptr_t)ptr + alignUp(size, 4096)) >= max32BitAddress || ptr == MAP_FAILED) {
outer.munmapFunction(ptr, size);
// Try to use lower range
ptr = outer.mmapFunction((void *)outer.lowerRangeAddress, size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
if ((uintptr_t)ptr >= max32BitAddress) {
outer.munmapFunction(ptr, size);
return nullptr;
}
outer.lowerRangeAddress = (uintptr_t)ptr + alignUp(size, 4096);
return ptr;
}
outer.upperRangeAddress = (uintptr_t)ptr + alignUp(size, 4096);
}
return ptr;
}
int free(void *ptr, uint64_t size) {
if ((ptr == MAP_FAILED) || (ptr == nullptr))
return 0;
auto alignedSize = alignUp(size, 4096);
auto offsetedPtr = (uintptr_t)ptrOffset(ptr, alignedSize);
if (offsetedPtr == outer.upperRangeAddress) {
outer.upperRangeAddress -= alignedSize;
} else if (offsetedPtr == outer.lowerRangeAddress) {
outer.lowerRangeAddress -= alignedSize;
}
return outer.munmapFunction(ptr, size);
}
~Drm32BitAllocator() = default;
}

View File

@@ -0,0 +1,176 @@
/*
* Copyright (c) 2017, Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
#include <memory>
#include "runtime/os_interface/32bit_memory.h"
#include "runtime/helpers/aligned_memory.h"
#include "runtime/helpers/ptr_math.h"
#include "runtime/helpers/basic_math.h"
#include "runtime/os_interface/debug_settings_manager.h"
#include <sys/mman.h>
using namespace OCLRT;
constexpr uintptr_t maxMmap32BitAddress = 0x80000000;
constexpr uintptr_t lowerRangeStart = 0x10000000;
class Allocator32bit::OsInternals {
public:
uintptr_t upperRangeAddress = maxMmap32BitAddress;
uintptr_t lowerRangeAddress = lowerRangeStart;
decltype(&mmap) mmapFunction = mmap;
decltype(&munmap) munmapFunction = munmap;
void *heapBasePtr = (void *)0;
size_t heapSize = 0;
class Drm32BitAllocator {
protected:
Allocator32bit::OsInternals &outer;
public:
Drm32BitAllocator(Allocator32bit::OsInternals &outer) : outer(outer) {
}
void *allocate(size_t size) {
auto ptr = outer.mmapFunction(nullptr, size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS | MAP_32BIT, -1, 0);
// In case we failed, retry with address provided as a hint
if (ptr == MAP_FAILED) {
ptr = outer.mmapFunction((void *)outer.upperRangeAddress, size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
if (((uintptr_t)ptr + alignUp(size, 4096)) >= max32BitAddress || ptr == MAP_FAILED) {
outer.munmapFunction(ptr, size);
// Try to use lower range
ptr = outer.mmapFunction((void *)outer.lowerRangeAddress, size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
if ((uintptr_t)ptr >= max32BitAddress) {
outer.munmapFunction(ptr, size);
return nullptr;
}
outer.lowerRangeAddress = (uintptr_t)ptr + alignUp(size, 4096);
return ptr;
}
outer.upperRangeAddress = (uintptr_t)ptr + alignUp(size, 4096);
}
return ptr;
}
int free(void *ptr, uint64_t size) {
auto alignedSize = alignUp(size, 4096);
auto offsetedPtr = (uintptr_t)ptrOffset(ptr, alignedSize);
if (offsetedPtr == outer.upperRangeAddress) {
outer.upperRangeAddress -= alignedSize;
} else if (offsetedPtr == outer.lowerRangeAddress) {
outer.lowerRangeAddress -= alignedSize;
}
return outer.munmapFunction(ptr, size);
}
~Drm32BitAllocator() = default;
};
Drm32BitAllocator *drmAllocator = nullptr;
};
bool OCLRT::is32BitOsAllocatorAvailable = true;
Allocator32bit::Allocator32bit(uint64_t base, uint64_t size) {
this->base = base;
this->size = size;
heapAllocator = std::unique_ptr<HeapAllocator>(new HeapAllocator((void *)base, size));
}
OCLRT::Allocator32bit::Allocator32bit() : Allocator32bit(new OsInternals) {
}
OCLRT::Allocator32bit::Allocator32bit(Allocator32bit::OsInternals *osInternalsIn) : osInternals(osInternalsIn) {
if (DebugManager.flags.UseNewHeapAllocator.get()) {
size_t sizeToMap = alignUp(4 * GB - 8096, 4096);
void *ptr = MAP_FAILED;
ptr = this->osInternals->mmapFunction(nullptr, sizeToMap, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS | MAP_NORESERVE, -1, 0);
if (ptr == MAP_FAILED) {
size_t sizeToMapRetry = sizeToMap - (sizeToMap / 4);
ptr = this->osInternals->mmapFunction(nullptr, sizeToMap, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS | MAP_NORESERVE, -1, 0);
DebugManager.log(DebugManager.flags.PrintDebugMessages.get(), __FUNCTION__, " Allocator RETRY ptr == ", ptr);
if (ptr != MAP_FAILED) {
sizeToMap = sizeToMapRetry;
}
}
DebugManager.log(DebugManager.flags.PrintDebugMessages.get(), __FUNCTION__, "Allocator ptr == \n", ptr);
if (ptr == MAP_FAILED) {
ptr = nullptr;
sizeToMap = 0;
}
osInternals->heapBasePtr = (void *)ptr;
osInternals->heapSize = sizeToMap;
base = (uint64_t)ptr;
size = sizeToMap;
heapAllocator = std::unique_ptr<HeapAllocator>(new HeapAllocator(ptr, sizeToMap));
} else {
this->osInternals->drmAllocator = new Allocator32bit::OsInternals::Drm32BitAllocator(*this->osInternals);
}
}
OCLRT::Allocator32bit::~Allocator32bit() {
if (this->osInternals.get() != nullptr) {
if (this->osInternals->heapBasePtr != (void *)0)
this->osInternals->munmapFunction(this->osInternals->heapBasePtr, this->osInternals->heapSize);
if (this->osInternals->drmAllocator != nullptr)
delete this->osInternals->drmAllocator;
}
}
void *OCLRT::Allocator32bit::allocate(size_t &size) {
void *ptr = nullptr;
if (DebugManager.flags.UseNewHeapAllocator.get()) {
ptr = this->heapAllocator->allocate(size);
} else {
ptr = this->osInternals->drmAllocator->allocate(size);
}
return ptr;
}
int Allocator32bit::free(void *ptr, size_t size) {
if ((ptr == MAP_FAILED) || (ptr == nullptr))
return 0;
if (DebugManager.flags.UseNewHeapAllocator.get()) {
this->heapAllocator->free(ptr, size);
} else {
return this->osInternals->drmAllocator->free(ptr, size);
}
return 0;
}
uintptr_t Allocator32bit::getBase() {
return (uintptr_t)base;
}

View File

@@ -0,0 +1,52 @@
/*
* Copyright (c) 2017, Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
#pragma once
#include "runtime/memory_manager/graphics_allocation.h"
namespace OCLRT {
class BufferObject;
struct OsHandle {
BufferObject *bo = nullptr;
};
class DrmAllocation : public GraphicsAllocation {
public:
DrmAllocation(BufferObject *bo, void *ptrIn, size_t sizeIn) : GraphicsAllocation(ptrIn, sizeIn), bo(bo) {
}
DrmAllocation(BufferObject *bo, void *ptrIn, size_t sizeIn, osHandle sharedHandle) : GraphicsAllocation(ptrIn, sizeIn, sharedHandle), bo(bo) {
}
DrmAllocation(BufferObject *bo, void *ptrIn, uint64_t gpuAddress, size_t sizeIn) : GraphicsAllocation(ptrIn, gpuAddress, 0, sizeIn), bo(bo) {
}
BufferObject *getBO() const {
if (fragmentsStorage.fragmentCount) {
return fragmentsStorage.fragmentStorageData[0].osHandleStorage->bo;
}
return this->bo;
}
protected:
BufferObject *bo;
};
}

View File

@@ -0,0 +1,223 @@
/*
* Copyright (c) 2017, Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
#include "runtime/helpers/aligned_memory.h"
#include "runtime/helpers/debug_helpers.h"
#include "runtime/os_interface/linux/drm_buffer_object.h"
#include "runtime/os_interface/linux/drm_memory_manager.h"
#include "runtime/os_interface/linux/drm_neo.h"
#include "runtime/os_interface/linux/os_time.h"
#include <sys/syscall.h>
#include <sys/mman.h>
#include <sys/ioctl.h>
#include <sys/types.h>
#include <errno.h>
#include <fcntl.h>
#include <string.h>
#include <unistd.h>
#include <stdarg.h>
#include "drm/i915_drm.h"
#include <map>
namespace OCLRT {
BufferObject::BufferObject(Drm *drm, int handle, bool isAllocated) : drm(drm), refCount(1), handle(handle), isReused(false), isAllocated(isAllocated) {
this->isSoftpin = false;
this->tiling_mode = I915_TILING_NONE;
this->stride = 0;
execObjectsStorage = nullptr;
this->size = 0;
this->address = nullptr;
this->offset64 = 0;
}
uint32_t BufferObject::getRefCount() const {
return this->refCount.load();
}
bool BufferObject::softPin(uint64_t offset) {
this->isSoftpin = true;
this->offset64 = offset;
return true;
};
bool BufferObject::close() {
struct drm_gem_close close;
memset(&close, 0, sizeof(close));
close.handle = this->handle;
close.pad = 0;
int ret = this->drm->ioctl(DRM_IOCTL_GEM_CLOSE, &close);
if (ret != 0) {
int err = errno;
printDebugString(DebugManager.flags.PrintDebugMessages.get(), stderr, "ioctl(GEM_CLOSE) failed with %d. errno=%d(%s)\n", ret, err, strerror(err));
UNRECOVERABLE_IF(true);
return false;
}
this->handle = -1;
return true;
}
int BufferObject::wait(int64_t timeoutNs) {
struct drm_i915_gem_wait wait;
wait.bo_handle = this->handle;
wait.flags = 0;
wait.timeout_ns = -1;
int ret = this->drm->ioctl(DRM_IOCTL_I915_GEM_WAIT, &wait);
if (ret != 0) {
int err = errno;
printDebugString(DebugManager.flags.PrintDebugMessages.get(), stderr, "ioctl(I915_GEM_WAIT) failed with %d. errno=%d(%s)\n", ret, err, strerror(err));
}
UNRECOVERABLE_IF(ret != 0);
return ret;
}
bool BufferObject::setTiling(uint32_t mode, uint32_t stride) {
if (this->tiling_mode == mode) {
return true;
}
drm_i915_gem_set_tiling set_tiling;
memset(&set_tiling, 0, sizeof(set_tiling));
set_tiling.handle = this->handle;
set_tiling.tiling_mode = mode;
set_tiling.stride = stride;
int ret = this->drm->ioctl(DRM_IOCTL_I915_GEM_SET_TILING, &set_tiling);
if (ret != 0) {
return false;
}
this->tiling_mode = set_tiling.tiling_mode;
this->stride = set_tiling.stride;
return set_tiling.tiling_mode == mode;
};
void BufferObject::fillExecObject(drm_i915_gem_exec_object2 &execObject) {
execObject.handle = this->handle;
execObject.relocation_count = 0; //No relocations, we are SoftPinning
execObject.relocs_ptr = 0ul;
execObject.alignment = 0;
execObject.offset = this->isSoftpin ? this->offset64 : 0;
execObject.flags = this->isSoftpin ? EXEC_OBJECT_PINNED : 0;
#ifdef __x86_64__
execObject.flags |= reinterpret_cast<uint64_t>(this->address) & MemoryConstants::zoneHigh ? EXEC_OBJECT_SUPPORTS_48B_ADDRESS : 0;
#endif
execObject.rsvd1 = this->drm->lowPriorityContextId;
execObject.rsvd2 = 0;
}
void BufferObject::processRelocs(int &idx) {
for (size_t i = 0; i < this->residency.size(); i++) {
residency[i]->fillExecObject(execObjectsStorage[idx]);
idx++;
}
}
int BufferObject::exec(uint32_t used, size_t startOffset, unsigned int flags, bool requiresCoherency, bool lowPriority) {
drm_i915_gem_execbuffer2 execbuf;
int idx = 0;
processRelocs(idx);
this->fillExecObject(execObjectsStorage[idx]);
idx++;
memset(&execbuf, 0, sizeof(execbuf));
execbuf.buffers_ptr = reinterpret_cast<uintptr_t>(execObjectsStorage);
execbuf.buffer_count = idx;
execbuf.batch_start_offset = static_cast<uint32_t>(startOffset);
execbuf.batch_len = alignUp(used, 8);
execbuf.cliprects_ptr = reinterpret_cast<uintptr_t>(nullptr);
execbuf.num_cliprects = 0;
execbuf.flags = flags;
if (drm->peekCoherencyDisablePatchActive() && !requiresCoherency) {
execbuf.flags = execbuf.flags | I915_PRIVATE_EXEC_FORCE_NON_COHERENT;
}
if (lowPriority) {
execbuf.rsvd1 = this->drm->lowPriorityContextId & I915_EXEC_CONTEXT_ID_MASK;
} else {
execbuf.rsvd1 = 0 & I915_EXEC_CONTEXT_ID_MASK;
}
execbuf.rsvd2 = 0;
int ret = this->drm->ioctl(DRM_IOCTL_I915_GEM_EXECBUFFER2, &execbuf);
if (ret != 0) {
int err = errno;
printDebugString(DebugManager.flags.PrintDebugMessages.get(), stderr, "ioctl(I915_GEM_EXECBUFFER2) failed with %d. errno=%d(%s)\n", ret, err, strerror(err));
UNRECOVERABLE_IF(true);
}
return ret;
}
int BufferObject::pin(BufferObject *boToPin) {
drm_i915_gem_execbuffer2 execbuf;
drm_i915_gem_exec_object2 execObject[2];
reinterpret_cast<uint32_t *>(this->address)[0] = 0x05000000;
reinterpret_cast<uint32_t *>(this->address)[1] = 0x00000000;
boToPin->fillExecObject(execObject[0]);
this->fillExecObject(execObject[1]);
memset(&execbuf, 0, sizeof(execbuf));
execbuf.buffers_ptr = reinterpret_cast<uintptr_t>(execObject);
execbuf.buffer_count = 2;
execbuf.batch_start_offset = static_cast<uint32_t>(0);
execbuf.batch_len = alignUp(static_cast<uint32_t>(sizeof(uint32_t)), 8);
execbuf.cliprects_ptr = reinterpret_cast<uintptr_t>(nullptr);
execbuf.num_cliprects = 0;
execbuf.flags = 0x0;
if (drm->peekCoherencyDisablePatchActive()) {
execbuf.flags = execbuf.flags | I915_PRIVATE_EXEC_FORCE_NON_COHERENT;
}
execbuf.rsvd1 = 0 & I915_EXEC_CONTEXT_ID_MASK;
execbuf.rsvd2 = 0;
int ret = this->drm->ioctl(DRM_IOCTL_I915_GEM_EXECBUFFER2, &execbuf);
if (ret != 0) {
int err = errno;
printDebugString(DebugManager.flags.PrintDebugMessages.get(), stderr, "ioctl(I915_GEM_EXECBUFFER2) failed with %d. errno=%d(%s)\n", ret, err, strerror(err));
}
return ret;
}
} // namespace OCLRT

View File

@@ -0,0 +1,117 @@
/*
* Copyright (c) 2017, Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
#pragma once
#include <sys/ioctl.h>
#include <errno.h>
#include <stdint.h>
#include <cstdlib>
#include <atomic>
#include <set>
#include <vector>
struct drm_i915_gem_exec_object2;
struct drm_i915_gem_relocation_entry;
namespace OCLRT {
class DrmMemoryManager;
class Drm;
enum StorageAllocatorType {
MMAP_ALLOCATOR,
BIT32_ALLOCATOR,
MALLOC_ALLOCATOR,
EXTERNAL_ALLOCATOR,
UNKNOWN_ALLOCATOR
};
class BufferObject {
friend DrmMemoryManager;
using ResidencyVector = std::vector<BufferObject *>;
public:
~BufferObject(){};
bool softPin(uint64_t offset);
bool setTiling(uint32_t mode, uint32_t stride);
int pin(BufferObject *boToPin);
int exec(uint32_t used, size_t startOffset, unsigned int flags, bool requiresCoherency = false, bool lowPriority = false);
int wait(int64_t timeoutNs);
bool close();
inline void reference() {
this->refCount++;
}
uint32_t getRefCount() const;
bool peekIsAllocated() { return isAllocated; }
size_t peekSize() { return size; }
int peekHandle() { return handle; }
void *peekAddress() { return address; }
void setAddress(void *address) { this->address = address; }
void setUnmapSize(uint64_t unmapSize) { this->unmapSize = unmapSize; }
uint64_t peekUnmapSize() { return unmapSize; }
void swapResidencyVector(ResidencyVector *residencyVect) {
std::swap(this->residency, *residencyVect);
}
void setExecObjectsStorage(drm_i915_gem_exec_object2 *storage) {
execObjectsStorage = storage;
}
ResidencyVector *getResidency() { return &residency; }
StorageAllocatorType peekAllocationType() { return storageAllocatorType; }
void setAllocationType(StorageAllocatorType allocatorType) { this->storageAllocatorType = allocatorType; }
protected:
BufferObject(Drm *drm, int handle, bool isAllocated);
Drm *drm;
std::atomic<uint32_t> refCount;
ResidencyVector residency;
drm_i915_gem_exec_object2 *execObjectsStorage;
int handle; // i915 gem object handle
bool isSoftpin;
bool isReused;
//Tiling
uint32_t tiling_mode;
uint32_t stride;
void fillExecObject(drm_i915_gem_exec_object2 &execObject);
void processRelocs(int &idx);
uint64_t offset64; // last-seen GPU offset
size_t size;
void *address; // GPU side virtual address
bool isAllocated = false;
uint64_t unmapSize = 0;
StorageAllocatorType storageAllocatorType = UNKNOWN_ALLOCATOR;
};
}

View File

@@ -0,0 +1,77 @@
/*
* Copyright (c) 2017, Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
#pragma once
#include "runtime/command_stream/device_command_stream.h"
#include "runtime/os_interface/linux/drm_gem_close_worker.h"
#include <vector>
extern "C" {
#include "drm/i915_drm.h"
}
namespace OCLRT {
class BufferObject;
class Drm;
class DrmMemoryManager;
template <typename GfxFamily>
class DrmCommandStreamReceiver : public DeviceCommandStreamReceiver<GfxFamily> {
protected:
typedef DeviceCommandStreamReceiver<GfxFamily> BaseClass;
using CommandStreamReceiverHw<GfxFamily>::CommandStreamReceiver::memoryManager;
using BaseClass::getScratchPatchAddress;
using BaseClass::hwInfo;
using BaseClass::makeNonResident;
using BaseClass::makeResident;
using BaseClass::mediaVfeStateDirty;
using BaseClass::requiredScratchSize;
public:
// When drm is null default implementation is used. In this case DrmCommandStreamReceiver is responsible to free drm.
// When drm is passed, DCSR will not free it at destruction
DrmCommandStreamReceiver(const HardwareInfo &hwInfoIn, Drm *drm, gemCloseWorkerMode mode);
FlushStamp flush(BatchBuffer &batchBuffer, EngineType engineOrdinal, ResidencyContainer *allocationsForResidency) override;
void makeResident(GraphicsAllocation &gfxAllocation) override;
void processResidency(ResidencyContainer *allocationsForResidency) override;
void makeNonResident(GraphicsAllocation &gfxAllocation) override;
bool waitForFlushStamp(FlushStamp &flushStampToWait) override;
void overrideMediaVFEStateDirty(bool dirty) override;
DrmMemoryManager *getMemoryManager();
MemoryManager *createMemoryManager(bool enable64kbPages) override;
gemCloseWorkerMode peekGemCloseWorkerOperationMode() {
return this->gemCloseWorkerOperationMode;
}
protected:
void makeResident(BufferObject *bo);
void programVFEState(LinearStream &csr, DispatchFlags &dispatchFlags) override;
std::vector<BufferObject *> residency;
std::vector<drm_i915_gem_exec_object2> execObjectsStorage;
Drm *drm;
gemCloseWorkerMode gemCloseWorkerOperationMode;
bool mediaVfeStateLowPriorityDirty = true;
};
} // namespace OCLRT

View File

@@ -0,0 +1,190 @@
/*
* Copyright (c) 2017, Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
#include "runtime/command_stream/linear_stream.h"
#include "hw_cmds.h"
#include "runtime/helpers/aligned_memory.h"
#include "runtime/helpers/preamble.h"
#include "runtime/mem_obj/buffer.h"
#include "runtime/os_interface/linux/drm_buffer_object.h"
#include "runtime/os_interface/linux/drm_command_stream.h"
#include "runtime/os_interface/linux/drm_memory_manager.h"
#include "runtime/os_interface/linux/drm_neo.h"
#include "runtime/os_interface/linux/os_interface.h"
#include <cstdlib>
#include <cstring>
namespace OCLRT {
template <typename GfxFamily>
DrmCommandStreamReceiver<GfxFamily>::DrmCommandStreamReceiver(const HardwareInfo &hwInfoIn,
Drm *drm, gemCloseWorkerMode mode)
: BaseClass(hwInfoIn), gemCloseWorkerOperationMode(mode) {
this->drm = drm ? drm : Drm::get(0);
residency.reserve(512);
execObjectsStorage.reserve(512);
CommandStreamReceiver::osInterface = std::unique_ptr<OSInterface>(new OSInterface());
CommandStreamReceiver::osInterface.get()->get()->setDrm(this->drm);
}
template <typename GfxFamily>
FlushStamp DrmCommandStreamReceiver<GfxFamily>::flush(BatchBuffer &batchBuffer, EngineType engineOrdinal, ResidencyContainer *allocationsForResidency) {
DEBUG_BREAK_IF(engineOrdinal != EngineType::ENGINE_RCS);
DrmAllocation *alloc = static_cast<DrmAllocation *>(batchBuffer.commandBufferAllocation);
DEBUG_BREAK_IF(!alloc);
size_t alignedStart = (reinterpret_cast<uintptr_t>(batchBuffer.commandBufferAllocation->getUnderlyingBuffer()) & (MemoryConstants::allocationAlignment - 1)) + batchBuffer.startOffset;
BufferObject *bb = alloc->getBO();
FlushStamp flushStamp = 0;
if (bb) {
flushStamp = bb->peekHandle();
this->processResidency(allocationsForResidency);
// Residency hold all allocation except command buffer, hence + 1
auto requiredSize = this->residency.size() + 1;
if (requiredSize > this->execObjectsStorage.size()) {
this->execObjectsStorage.resize(requiredSize);
}
bb->swapResidencyVector(&this->residency);
bb->setExecObjectsStorage(this->execObjectsStorage.data());
this->residency.reserve(512);
bb->exec(static_cast<uint32_t>(alignUp(batchBuffer.usedSize - batchBuffer.startOffset, 8)),
alignedStart, I915_EXEC_RENDER | I915_EXEC_NO_RELOC,
batchBuffer.requiresCoherency,
batchBuffer.low_priority);
if (this->gemCloseWorkerOperationMode == gemCloseWorkerMode::gemCloseWorkerConsumingCommandBuffers) {
// Consume all space in CS to force new allocation
batchBuffer.stream->replaceBuffer(nullptr, 0);
batchBuffer.stream->replaceGraphicsAllocation(nullptr);
// Push for asynchronous cleanup
getMemoryManager()->push(alloc);
} else {
bb->getResidency()->clear();
}
}
return flushStamp;
}
template <typename GfxFamily>
void DrmCommandStreamReceiver<GfxFamily>::makeResident(GraphicsAllocation &gfxAllocation) {
if (gfxAllocation.getUnderlyingBufferSize() == 0)
return;
CommandStreamReceiver::makeResident(gfxAllocation);
}
template <typename GfxFamily>
void DrmCommandStreamReceiver<GfxFamily>::makeResident(BufferObject *bo) {
if (bo) {
if (this->gemCloseWorkerOperationMode == gemCloseWorkerMode::gemCloseWorkerConsumingCommandBuffers) {
bo->reference();
}
residency.push_back(bo);
}
}
template <typename GfxFamily>
void DrmCommandStreamReceiver<GfxFamily>::processResidency(ResidencyContainer *inputAllocationsForResidency) {
auto &allocationsForResidency = inputAllocationsForResidency ? *inputAllocationsForResidency : getMemoryManager()->getResidencyAllocations();
for (uint32_t i = 0; i < allocationsForResidency.size(); i++) {
DrmAllocation *drmAlloc = reinterpret_cast<DrmAllocation *>(allocationsForResidency[i]);
if (drmAlloc->fragmentsStorage.fragmentCount) {
for (unsigned int i = 0; i < drmAlloc->fragmentsStorage.fragmentCount; i++) {
if (!drmAlloc->fragmentsStorage.fragmentStorageData[i].residency->resident) {
makeResident(drmAlloc->fragmentsStorage.fragmentStorageData[i].osHandleStorage->bo);
drmAlloc->fragmentsStorage.fragmentStorageData[i].residency->resident = true;
}
}
} else {
BufferObject *bo = drmAlloc->getBO();
makeResident(bo);
}
}
}
template <typename GfxFamily>
void DrmCommandStreamReceiver<GfxFamily>::makeNonResident(GraphicsAllocation &gfxAllocation) {
// Vector is moved to command buffer inside flush.
// If flush wasn't called we need to make all objects non-resident.
// If makeNonResident is called before flush, vector will be cleared.
if (gfxAllocation.residencyTaskCount != ObjectNotResident) {
if (this->residency.size() != 0) {
if (this->gemCloseWorkerOperationMode == gemCloseWorkerMode::gemCloseWorkerConsumingCommandBuffers) {
for (auto it : residency) {
getMemoryManager()->unreference(it);
}
}
this->residency.clear();
}
if (gfxAllocation.fragmentsStorage.fragmentCount) {
for (auto fragmentId = 0u; fragmentId < gfxAllocation.fragmentsStorage.fragmentCount; fragmentId++) {
gfxAllocation.fragmentsStorage.fragmentStorageData[fragmentId].residency->resident = false;
}
}
}
gfxAllocation.residencyTaskCount = ObjectNotResident;
}
template <typename GfxFamily>
DrmMemoryManager *DrmCommandStreamReceiver<GfxFamily>::getMemoryManager() {
return (DrmMemoryManager *)CommandStreamReceiver::getMemoryManager();
}
template <typename GfxFamily>
MemoryManager *DrmCommandStreamReceiver<GfxFamily>::createMemoryManager(bool enable64kbPages) {
memoryManager = new DrmMemoryManager(this->drm, this->gemCloseWorkerOperationMode, DebugManager.flags.EnableForcePin.get());
return memoryManager;
}
template <typename GfxFamily>
bool DrmCommandStreamReceiver<GfxFamily>::waitForFlushStamp(FlushStamp &flushStamp) {
drm_i915_gem_wait wait = {};
wait.bo_handle = flushStamp;
wait.timeout_ns = -1;
drm->ioctl(DRM_IOCTL_I915_GEM_WAIT, &wait);
return true;
}
template <typename GfxFamily>
inline void DrmCommandStreamReceiver<GfxFamily>::overrideMediaVFEStateDirty(bool dirty) {
this->mediaVfeStateDirty = dirty;
this->mediaVfeStateLowPriorityDirty = dirty;
}
template <typename GfxFamily>
inline void DrmCommandStreamReceiver<GfxFamily>::programVFEState(LinearStream &csr, DispatchFlags &dispatchFlags) {
bool &currentContextDirtyFlag = dispatchFlags.low_priority ? mediaVfeStateLowPriorityDirty : mediaVfeStateDirty;
if (currentContextDirtyFlag) {
PreambleHelper<GfxFamily>::programVFEState(&csr, hwInfo, requiredScratchSize, getScratchPatchAddress());
currentContextDirtyFlag = false;
}
}
} // namespace OCLRT

View File

@@ -0,0 +1,39 @@
/*
* Copyright (c) 2017, Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
#pragma once
#include "runtime/helpers/engine_node.h"
#include "runtime/helpers/hw_info.h"
#include "drm/i915_drm.h"
#include <cstdint>
namespace OCLRT {
template <typename gfxFamily>
class DrmEngineMapper {
public:
static bool engineNodeMap(EngineType engineType, unsigned int &flag);
};
} // namespace OCLRT

View File

@@ -0,0 +1,123 @@
/*
* Copyright (c) 2017, Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
#include <atomic>
#include <iostream>
#include <queue>
#include <stdio.h>
#include "runtime/helpers/aligned_memory.h"
#include "drm_buffer_object.h"
#include "drm_command_stream.h"
#include "drm_gem_close_worker.h"
#include "drm_memory_manager.h"
namespace OCLRT {
DrmGemCloseWorker::DrmGemCloseWorker(DrmMemoryManager &memoryManager) : active(true), thread(nullptr), workCount(0), memoryManager(memoryManager),
workerDone(false) {
thread = new std::thread(&DrmGemCloseWorker::worker, this);
}
void DrmGemCloseWorker::closeThread() {
if (thread) {
while (!workerDone.load()) {
condition.notify_all();
}
thread->join();
delete thread;
thread = nullptr;
}
}
DrmGemCloseWorker::~DrmGemCloseWorker() {
active = false;
closeThread();
}
void DrmGemCloseWorker::push(DrmAllocation *bo) {
std::unique_lock<std::mutex> lock(closeWorkerMutex);
workCount++;
queue.push(bo);
lock.unlock();
condition.notify_one();
}
void DrmGemCloseWorker::close(bool blocking) {
active = false;
condition.notify_all();
if (blocking) {
closeThread();
}
}
bool DrmGemCloseWorker::isEmpty() {
return workCount.load() == 0;
}
inline void DrmGemCloseWorker::close(DrmAllocation *alloc) {
auto bo = alloc->getBO();
bo->wait(-1);
memoryManager.unreference(bo);
workCount--;
delete alloc;
}
void DrmGemCloseWorker::worker() {
DrmAllocation *workItem = nullptr;
std::queue<DrmAllocation *> localQueue;
std::unique_lock<std::mutex> lock(closeWorkerMutex);
lock.unlock();
while (active) {
lock.lock();
workItem = nullptr;
while (queue.empty() && active) {
condition.wait(lock);
}
if (!queue.empty()) {
localQueue.swap(queue);
}
lock.unlock();
while (!localQueue.empty()) {
workItem = localQueue.front();
localQueue.pop();
close(workItem);
}
}
lock.lock();
while (!queue.empty()) {
workItem = queue.front();
queue.pop();
close(workItem);
}
lock.unlock();
workerDone.store(true);
}
}

View File

@@ -0,0 +1,73 @@
/*
* Copyright (c) 2017, Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
#pragma once
#include <atomic>
#include <condition_variable>
#include <mutex>
#include <thread>
#include <map>
#include <set>
#include <queue>
#include <cstdint>
namespace OCLRT {
class DrmMemoryManager;
class DrmAllocation;
enum gemCloseWorkerMode {
gemCloseWorkerConsumingCommandBuffers,
gemCloseWorkerInactive,
gemCloseWorkerConsumingResources
};
class DrmGemCloseWorker {
public:
DrmGemCloseWorker(DrmMemoryManager &memoryManager);
~DrmGemCloseWorker();
DrmGemCloseWorker(const DrmGemCloseWorker &) = delete;
DrmGemCloseWorker &operator=(const DrmGemCloseWorker &) = delete;
void push(DrmAllocation *allocation);
void close(bool blocking);
bool isEmpty();
private:
void close(DrmAllocation *workItem);
void closeThread();
void worker();
bool active;
std::thread *thread;
std::queue<DrmAllocation *> queue;
std::atomic<uint32_t> workCount;
DrmMemoryManager &memoryManager;
std::mutex closeWorkerMutex;
std::condition_variable condition;
std::atomic<bool> workerDone;
};
}

View File

@@ -0,0 +1,490 @@
/*
* Copyright (c) 2017, Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
#include "runtime/device/device.h"
#include "runtime/helpers/aligned_memory.h"
#include "runtime/helpers/ptr_math.h"
#include "runtime/helpers/options.h"
#include "runtime/os_interface/32bit_memory.h"
#include "runtime/os_interface/linux/drm_allocation.h"
#include "runtime/os_interface/linux/drm_buffer_object.h"
#include "runtime/os_interface/linux/drm_memory_manager.h"
#include "runtime/helpers/surface_formats.h"
#include <cstring>
#include <iostream>
extern "C" {
#include "drm/i915_drm.h"
#include "drm/drm.h"
}
#include "runtime/gmm_helper/gmm_helper.h"
#include "runtime/gmm_helper/resource_info.h"
namespace OCLRT {
DrmMemoryManager::DrmMemoryManager(Drm *drm, gemCloseWorkerMode mode, bool forcePinAllowed) : MemoryManager(false), drm(drm), pinBB(nullptr) {
MemoryManager::virtualPaddingAvailable = true;
allocator32Bit = std::unique_ptr<Allocator32bit>(new Allocator32bit);
if (mode != gemCloseWorkerMode::gemCloseWorkerInactive) {
gemCloseWorker.reset(new DrmGemCloseWorker(*this));
}
if (forcePinAllowed) {
auto mem = ::alignedMalloc(MemoryConstants::pageSize, MemoryConstants::pageSize);
DEBUG_BREAK_IF(mem == nullptr);
pinBB = allocUserptr(reinterpret_cast<uintptr_t>(mem), MemoryConstants::pageSize, 0, true);
if (!pinBB) {
::alignedFree(mem);
} else {
pinBB->isAllocated = true;
}
}
}
DrmMemoryManager::~DrmMemoryManager() {
applyCommonCleanup();
if (gemCloseWorker) {
gemCloseWorker->close(false);
}
if (pinBB) {
unreference(pinBB);
pinBB = nullptr;
}
}
void DrmMemoryManager::push(DrmAllocation *alloc) {
gemCloseWorker->push(alloc);
}
void DrmMemoryManager::eraseSharedBufferObject(OCLRT::BufferObject *bo) {
std::lock_guard<decltype(mtx)> lock(mtx);
auto it = std::find(sharingBufferObjects.begin(), sharingBufferObjects.end(), bo);
//If an object isReused = true, it must be in the vector
DEBUG_BREAK_IF(it == sharingBufferObjects.end());
sharingBufferObjects.erase(it);
}
void DrmMemoryManager::pushSharedBufferObject(OCLRT::BufferObject *bo) {
std::lock_guard<decltype(mtx)> lock(mtx);
bo->isReused = true;
sharingBufferObjects.push_back(bo);
}
uint32_t DrmMemoryManager::unreference(OCLRT::BufferObject *bo, bool synchronousDestroy) {
if (!bo)
return -1;
if (synchronousDestroy) {
while (bo->refCount > 1)
;
}
uint32_t r = bo->refCount.fetch_sub(1);
if (r == 1) {
for (auto it : *bo->getResidency()) {
unreference(it);
}
auto unmapSize = bo->peekUnmapSize();
auto address = bo->isAllocated || unmapSize > 0 ? bo->address : nullptr;
auto allocatorType = bo->peekAllocationType();
if (bo->isReused) {
eraseSharedBufferObject(bo);
}
bo->close();
delete bo;
if (address) {
if (unmapSize) {
if (allocatorType == MMAP_ALLOCATOR) {
munmapFunction(address, unmapSize);
} else {
allocator32Bit->free(address, unmapSize);
}
} else {
::alignedFree(address);
}
}
}
return r;
}
OCLRT::BufferObject *DrmMemoryManager::allocUserptr(uintptr_t address, size_t size, uint64_t flags, bool softpin) {
struct drm_i915_gem_userptr userptr;
memset(&userptr, 0, sizeof(userptr));
userptr.user_ptr = address;
userptr.user_size = size;
userptr.flags = static_cast<uint32_t>(flags);
int ret = this->drm->ioctl(DRM_IOCTL_I915_GEM_USERPTR,
&userptr);
if (ret != 0)
return nullptr;
auto res = new (std::nothrow) BufferObject(this->drm, userptr.handle, false);
if (!res) {
UNRECOVERABLE_IF(true);
return nullptr;
}
res->size = size;
res->address = reinterpret_cast<void *>(address);
res->softPin(address);
return res;
}
DrmAllocation *DrmMemoryManager::createGraphicsAllocation(OsHandleStorage &handleStorage, size_t hostPtrSize, const void *hostPtr) {
auto allocation = new DrmAllocation(nullptr, const_cast<void *>(hostPtr), hostPtrSize);
allocation->fragmentsStorage = handleStorage;
return allocation;
}
DrmAllocation *DrmMemoryManager::allocateGraphicsMemory(size_t size, size_t alignment, bool forcePin) {
const size_t minAlignment = MemoryConstants::allocationAlignment;
size_t cAlignment = alignUp(std::max(alignment, minAlignment), minAlignment);
// When size == 0 allocate allocationAlignment
// It's needed to prevent overlapping pages with user pointers
size_t cSize = std::max(alignUp(size, minAlignment), minAlignment);
auto res = ::alignedMalloc(cSize, cAlignment);
if (!res)
return nullptr;
BufferObject *bo = allocUserptr(reinterpret_cast<uintptr_t>(res), cSize, 0, true);
if (!bo) {
::alignedFree(res);
return nullptr;
}
bo->isAllocated = true;
if (pinBB != nullptr && forcePin && size >= this->pinThreshold) {
pinBB->pin(bo);
}
return new DrmAllocation(bo, res, cSize);
}
DrmAllocation *DrmMemoryManager::allocateGraphicsMemory(size_t size, const void *ptr, bool forcePin) {
auto res = (DrmAllocation *)MemoryManager::allocateGraphicsMemory(size, const_cast<void *>(ptr));
if (res != nullptr && pinBB != nullptr && forcePin && size >= this->pinThreshold) {
pinBB->pin(res->getBO());
}
return res;
}
DrmAllocation *DrmMemoryManager::allocateGraphicsMemory64kb(size_t size, size_t alignment, bool forcePin) {
return nullptr;
}
GraphicsAllocation *DrmMemoryManager::allocateGraphicsMemoryForImage(ImageInfo &imgInfo, Gmm *gmm) {
if (!Gmm::allowTiling(*imgInfo.imgDesc)) {
auto alloc = allocateGraphicsMemory(imgInfo.size, MemoryConstants::preferredAlignment);
if (alloc) {
alloc->gmm = gmm;
}
return alloc;
}
auto gpuRange = mmapFunction(nullptr, imgInfo.size, PROT_NONE, MAP_PRIVATE | MAP_ANONYMOUS | MAP_NORESERVE, -1, 0);
DEBUG_BREAK_IF(gpuRange == MAP_FAILED);
drm_i915_gem_create create = {0, 0, 0};
create.size = imgInfo.size;
auto ret = this->drm->ioctl(DRM_IOCTL_I915_GEM_CREATE, &create);
DEBUG_BREAK_IF(ret != 0);
((void)(ret));
auto bo = new (std::nothrow) BufferObject(this->drm, create.handle, true);
if (!bo) {
return nullptr;
}
bo->size = imgInfo.size;
bo->address = reinterpret_cast<void *>(gpuRange);
bo->softPin(reinterpret_cast<uint64_t>(gpuRange));
auto ret2 = bo->setTiling(I915_TILING_Y, static_cast<uint32_t>(imgInfo.rowPitch));
DEBUG_BREAK_IF(ret2 != true);
((void)(ret2));
bo->setUnmapSize(imgInfo.size);
auto allocation = new DrmAllocation(bo, gpuRange, imgInfo.size);
bo->setAllocationType(MMAP_ALLOCATOR);
allocation->gmm = gmm;
return allocation;
}
DrmAllocation *DrmMemoryManager::allocate32BitGraphicsMemory(size_t size, void *ptr) {
if (ptr) {
uintptr_t inputPtr = (uintptr_t)ptr;
auto allocationSize = alignSizeWholePage((void *)ptr, size);
auto realAllocationSize = allocationSize;
auto gpuVirtualAddress = allocator32Bit->allocate(realAllocationSize);
if (!gpuVirtualAddress) {
return nullptr;
}
auto alignedUserPointer = (uintptr_t)alignDown(ptr, MemoryConstants::pageSize);
auto inputPointerOffset = inputPtr - alignedUserPointer;
BufferObject *bo = allocUserptr(alignedUserPointer, allocationSize, 0, true);
if (!bo) {
allocator32Bit->free(gpuVirtualAddress, realAllocationSize);
return nullptr;
}
bo->isAllocated = false;
bo->setUnmapSize(realAllocationSize);
bo->address = gpuVirtualAddress;
uintptr_t offset = (uintptr_t)bo->address;
bo->softPin((uint64_t)offset);
auto drmAllocation = new DrmAllocation(bo, (void *)ptr, (uint64_t)ptrOffset(gpuVirtualAddress, inputPointerOffset), allocationSize);
drmAllocation->is32BitAllocation = true;
drmAllocation->gpuBaseAddress = allocator32Bit->getBase();
return drmAllocation;
}
size_t alignedAllocationSize = alignUp(size, MemoryConstants::pageSize);
auto allocationSize = alignedAllocationSize;
auto res = allocator32Bit->allocate(allocationSize);
if (!res) {
if (device && device->getProgramCount() == 0) {
this->force32bitAllocations = false;
device->setForce32BitAddressing(false);
return (DrmAllocation *)createGraphicsAllocationWithRequiredBitness(size, ptr);
}
return nullptr;
}
BufferObject *bo = allocUserptr(reinterpret_cast<uintptr_t>(res), alignedAllocationSize, 0, true);
if (!bo) {
allocator32Bit->free(res, allocationSize);
return nullptr;
}
bo->isAllocated = true;
bo->setUnmapSize(allocationSize);
auto drmAllocation = new DrmAllocation(bo, res, alignedAllocationSize);
drmAllocation->is32BitAllocation = true;
drmAllocation->gpuBaseAddress = allocator32Bit->getBase();
return drmAllocation;
}
BufferObject *DrmMemoryManager::findAndReferenceSharedBufferObject(int boHandle) {
BufferObject *bo = nullptr;
std::lock_guard<decltype(mtx)> lock(mtx);
for (const auto &i : sharingBufferObjects) {
if (i->handle == static_cast<int>(boHandle)) {
bo = i;
bo->reference();
break;
}
}
return bo;
}
BufferObject *DrmMemoryManager::createSharedBufferObject(int boHandle, size_t size, bool requireSpecificBitness) {
void *gpuRange = nullptr;
StorageAllocatorType storageType = UNKNOWN_ALLOCATOR;
if (requireSpecificBitness && this->force32bitAllocations) {
gpuRange = this->allocator32Bit->allocate(size);
storageType = BIT32_ALLOCATOR;
} else {
gpuRange = mmapFunction(nullptr, size, PROT_NONE, MAP_PRIVATE | MAP_ANONYMOUS | MAP_NORESERVE, -1, 0);
storageType = MMAP_ALLOCATOR;
}
DEBUG_BREAK_IF(gpuRange == MAP_FAILED);
auto bo = new (std::nothrow) BufferObject(this->drm, boHandle, true);
if (!bo) {
return nullptr;
}
bo->size = size;
bo->address = reinterpret_cast<void *>(gpuRange);
bo->softPin(reinterpret_cast<uint64_t>(gpuRange));
bo->setUnmapSize(size);
bo->setAllocationType(storageType);
return bo;
}
GraphicsAllocation *DrmMemoryManager::createGraphicsAllocationFromSharedHandle(osHandle handle, bool requireSpecificBitness, bool reuseBO) {
drm_prime_handle openFd = {0, 0, 0};
openFd.fd = handle;
auto ret = this->drm->ioctl(DRM_IOCTL_PRIME_FD_TO_HANDLE, &openFd);
DEBUG_BREAK_IF(ret != 0);
((void)(ret));
auto boHandle = openFd.handle;
BufferObject *bo = nullptr;
if (reuseBO) {
bo = findAndReferenceSharedBufferObject(boHandle);
}
if (bo == nullptr) {
size_t size = lseekFunction(handle, 0, SEEK_END);
bo = createSharedBufferObject(boHandle, size, requireSpecificBitness);
if (!bo) {
return nullptr;
}
if (reuseBO) {
pushSharedBufferObject(bo);
}
}
auto drmAllocation = new DrmAllocation(bo, bo->address, bo->size, handle);
if (requireSpecificBitness && this->force32bitAllocations) {
drmAllocation->is32BitAllocation = true;
drmAllocation->gpuBaseAddress = allocator32Bit->getBase();
}
return drmAllocation;
}
GraphicsAllocation *DrmMemoryManager::createPaddedAllocation(GraphicsAllocation *inputGraphicsAllocation, size_t sizeWithPadding) {
void *gpuRange = mmapFunction(nullptr, sizeWithPadding, PROT_NONE, MAP_PRIVATE | MAP_ANONYMOUS | MAP_NORESERVE, -1, 0);
auto srcPtr = inputGraphicsAllocation->getUnderlyingBuffer();
auto srcSize = inputGraphicsAllocation->getUnderlyingBufferSize();
auto alignedSrcSize = alignUp(srcSize, MemoryConstants::pageSize);
auto alignedPtr = (uintptr_t)alignDown(srcPtr, MemoryConstants::pageSize);
auto offset = (uintptr_t)srcPtr - alignedPtr;
BufferObject *bo = allocUserptr(alignedPtr, alignedSrcSize, 0, true);
if (!bo) {
return nullptr;
}
bo->setAddress(gpuRange);
bo->softPin(reinterpret_cast<uint64_t>(gpuRange));
bo->setUnmapSize(sizeWithPadding);
bo->setAllocationType(MMAP_ALLOCATOR);
return new DrmAllocation(bo, (void *)srcPtr, (uint64_t)ptrOffset(gpuRange, offset), sizeWithPadding);
}
void DrmMemoryManager::freeGraphicsMemoryImpl(GraphicsAllocation *gfxAllocation) {
DrmAllocation *input;
input = static_cast<DrmAllocation *>(gfxAllocation);
if (input == nullptr)
return;
if (input->gmm)
delete input->gmm;
if (gfxAllocation->fragmentsStorage.fragmentCount) {
cleanGraphicsMemoryCreatedFromHostPtr(gfxAllocation);
delete gfxAllocation;
return;
}
BufferObject *search = input->getBO();
if (gfxAllocation->peekSharedHandle() != Sharing::nonSharedResource) {
closeFunction(gfxAllocation->peekSharedHandle());
}
delete gfxAllocation;
search->wait(-1);
unreference(search);
};
uint64_t DrmMemoryManager::getSystemSharedMemory() {
uint64_t hostMemorySize = MemoryConstants::pageSize * (uint64_t)(sysconf(_SC_PHYS_PAGES));
drm_i915_gem_get_aperture getAperture;
auto ret = drm->ioctl(DRM_IOCTL_I915_GEM_GET_APERTURE, &getAperture);
DEBUG_BREAK_IF(ret != 0);
((void)(ret));
uint64_t gpuMemorySize = getAperture.aper_size;
return std::min(hostMemorySize, gpuMemorySize);
}
uint64_t DrmMemoryManager::getMaxApplicationAddress() {
return MemoryConstants::max32BitAppAddress + (uint64_t)is64bit * (MemoryConstants::max64BitAppAddress - MemoryConstants::max32BitAppAddress);
}
bool DrmMemoryManager::populateOsHandles(OsHandleStorage &handleStorage) {
for (unsigned int i = 0; i < max_fragments_count; i++) {
// If there is no fragment it means it already exists.
if (!handleStorage.fragmentStorageData[i].osHandleStorage && handleStorage.fragmentStorageData[i].fragmentSize) {
handleStorage.fragmentStorageData[i].osHandleStorage = new OsHandle();
handleStorage.fragmentStorageData[i].residency = new ResidencyData();
handleStorage.fragmentStorageData[i].osHandleStorage->bo = allocUserptr((uintptr_t)handleStorage.fragmentStorageData[i].cpuPtr,
handleStorage.fragmentStorageData[i].fragmentSize,
0,
true);
if (!handleStorage.fragmentStorageData[i].osHandleStorage->bo) {
handleStorage.fragmentStorageData[i].freeTheFragment = true;
return false;
}
hostPtrManager.storeFragment(handleStorage.fragmentStorageData[i]);
}
}
return true;
}
void DrmMemoryManager::cleanOsHandles(OsHandleStorage &handleStorage) {
for (unsigned int i = 0; i < max_fragments_count; i++) {
if (handleStorage.fragmentStorageData[i].freeTheFragment) {
if (handleStorage.fragmentStorageData[i].osHandleStorage->bo) {
BufferObject *search = handleStorage.fragmentStorageData[i].osHandleStorage->bo;
search->wait(-1);
auto refCount = unreference(search, true);
DEBUG_BREAK_IF(refCount != 1u);
((void)(refCount));
}
delete handleStorage.fragmentStorageData[i].osHandleStorage;
delete handleStorage.fragmentStorageData[i].residency;
}
}
}
BufferObject *DrmMemoryManager::getPinBB() const {
return pinBB;
}
} // namespace OCLRT

View File

@@ -0,0 +1,94 @@
/*
* Copyright (c) 2017, Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
#pragma once
#include "drm_gem_close_worker.h"
#include "runtime/memory_manager/memory_manager.h"
#include "runtime/os_interface/linux/drm_allocation.h"
#include "runtime/os_interface/linux/drm_neo.h"
#include <map>
#include <sys/mman.h>
namespace OCLRT {
class BufferObject;
class Drm;
class DrmMemoryManager : public MemoryManager {
public:
using MemoryManager::createGraphicsAllocationFromSharedHandle;
DrmMemoryManager(Drm *drm, gemCloseWorkerMode mode, bool forcePinAllowed);
~DrmMemoryManager() override;
BufferObject *getPinBB() const;
void freeGraphicsMemoryImpl(GraphicsAllocation *gfxAllocation) override;
DrmAllocation *allocateGraphicsMemory(size_t size, size_t alignment) override {
return allocateGraphicsMemory(size, alignment, false);
}
DrmAllocation *allocateGraphicsMemory(size_t size, size_t alignment, bool forcePin) override;
DrmAllocation *allocateGraphicsMemory64kb(size_t size, size_t alignment, bool forcePin) override;
DrmAllocation *allocateGraphicsMemory(size_t size, const void *ptr) override {
return allocateGraphicsMemory(size, ptr, false);
}
DrmAllocation *allocateGraphicsMemory(size_t size, const void *ptr, bool forcePin) override;
GraphicsAllocation *allocateGraphicsMemoryForImage(ImageInfo &imgInfo, Gmm *gmm) override;
DrmAllocation *allocate32BitGraphicsMemory(size_t size, void *ptr) override;
GraphicsAllocation *createGraphicsAllocationFromSharedHandle(osHandle handle, bool requireSpecificBitness, bool reuseBO) override;
GraphicsAllocation *createPaddedAllocation(GraphicsAllocation *inputGraphicsAllocation, size_t sizeWithPadding) override;
GraphicsAllocation *createGraphicsAllocationFromNTHandle(void *handle) override { return nullptr; }
void *lockResource(GraphicsAllocation *graphicsAllocation) override { return nullptr; };
void unlockResource(GraphicsAllocation *graphicsAllocation) override{};
uint64_t getSystemSharedMemory() override;
uint64_t getMaxApplicationAddress() override;
bool populateOsHandles(OsHandleStorage &handleStorage) override;
void cleanOsHandles(OsHandleStorage &handleStorage) override;
// drm/i915 ioctl wrappers
uint32_t unreference(BufferObject *bo, bool synchronousDestroy = false);
// CloseWorker delegate
void push(DrmAllocation *alloc);
DrmAllocation *createGraphicsAllocation(OsHandleStorage &handleStorage, size_t hostPtrSize, const void *hostPtr) override;
protected:
BufferObject *findAndReferenceSharedBufferObject(int boHandle);
BufferObject *createSharedBufferObject(int boHandle, size_t size, bool requireSpecificBitness);
void eraseSharedBufferObject(BufferObject *bo);
void pushSharedBufferObject(BufferObject *bo);
BufferObject *allocUserptr(uintptr_t address, size_t size, uint64_t flags, bool softpin);
Drm *drm;
BufferObject *pinBB;
size_t pinThreshold = 8 * 1024 * 1024;
std::unique_ptr<DrmGemCloseWorker> gemCloseWorker;
decltype(&lseek) lseekFunction = lseek;
decltype(&mmap) mmapFunction = mmap;
decltype(&munmap) munmapFunction = munmap;
decltype(&close) closeFunction = close;
std::vector<BufferObject *> sharingBufferObjects;
std::recursive_mutex mtx;
};
} // namespace OCLRT

View File

@@ -0,0 +1,285 @@
/*
* Copyright (c) 2017, Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
#include "drm_neo.h"
#include "runtime/os_interface/os_inc.h"
#include "runtime/utilities/directory.h"
#include "drm/i915_drm.h"
#include <cstdio>
#include <cstring>
#include <fstream>
namespace OCLRT {
const char *Drm::sysFsDefaultGpuPath = "/drm/card0";
const char *Drm::maxGpuFrequencyFile = "/gt_max_freq_mhz";
const char *Drm::configFileName = "/config";
int Drm::ioctl(unsigned long request, void *arg) {
int ret;
SYSTEM_ENTER();
do {
ret = ::ioctl(fd, request, arg);
} while (ret == -1 && (errno == EINTR || errno == EAGAIN));
SYSTEM_LEAVE(request);
return ret;
}
int Drm::getDeviceID(int &devId) {
int ret = 0;
#if defined(I915_PARAM_CHIPSET_ID)
drm_i915_getparam_t gp;
gp.param = I915_PARAM_CHIPSET_ID;
gp.value = &devId;
ret = ioctl(DRM_IOCTL_I915_GETPARAM, &gp);
#endif
return ret;
}
int Drm::getDeviceRevID(int &revId) {
int ret = 0;
#if defined(I915_PARAM_REVISION)
drm_i915_getparam_t gp;
gp.param = I915_PARAM_REVISION;
gp.value = &revId;
ret = ioctl(DRM_IOCTL_I915_GETPARAM, &gp);
#endif
return ret;
}
int Drm::getExecSoftPin(int &execSoftPin) {
int ret = 0;
drm_i915_getparam_t gp;
gp.param = I915_PARAM_HAS_EXEC_SOFTPIN;
gp.value = &execSoftPin;
ret = ioctl(DRM_IOCTL_I915_GETPARAM, &gp);
return ret;
}
int Drm::enableTurboBoost() {
int ret = 0;
struct drm_i915_gem_context_param contextParam;
memset(&contextParam, 0, sizeof(contextParam));
contextParam.param = I915_CONTEXT_PRIVATE_PARAM_BOOST;
contextParam.value = 1;
ret = ioctl(DRM_IOCTL_I915_GEM_CONTEXT_SETPARAM, &contextParam);
return ret;
}
int Drm::getEnabledPooledEu(int &enabled) {
int ret = 0;
drm_i915_getparam_t gp;
#if defined(I915_PARAM_HAS_POOLED_EU)
gp.value = &enabled;
gp.param = I915_PARAM_HAS_POOLED_EU;
ret = ioctl(DRM_IOCTL_I915_GETPARAM, &gp);
#endif
return ret;
}
int Drm::getMaxGpuFrequency(int &maxGpuFrequency) {
maxGpuFrequency = 0;
int deviceID = 0;
int ret = getDeviceID(deviceID);
if (ret != 0) {
return ret;
}
std::string clockSysFsPath = getSysFsPciPath(deviceID);
if (clockSysFsPath.size() == 0) {
return 0;
}
clockSysFsPath += sysFsDefaultGpuPath;
clockSysFsPath += maxGpuFrequencyFile;
std::ifstream ifs(clockSysFsPath.c_str(), std::ifstream::in);
if (ifs.fail()) {
return 0;
}
ifs >> maxGpuFrequency;
ifs.close();
return 0;
}
void Drm::obtainCoherencyDisablePatchActive() {
drm_i915_getparam_t GPUParams;
int value = 0;
GPUParams.param = I915_PRIVATE_PARAM_HAS_EXEC_FORCE_NON_COHERENT;
GPUParams.value = &value;
auto retVal = ioctl(DRM_IOCTL_I915_GETPARAM, &GPUParams);
if (retVal == 0) {
coherencyDisablePatchActive = value != 0 ? 1 : 0;
}
}
std::string Drm::getSysFsPciPath(int deviceID) {
std::string nullPath;
std::string sysFsPciDirectory = Os::sysFsPciPath;
std::vector<std::string> files = Directory::getFiles(sysFsPciDirectory);
for (std::vector<std::string>::iterator file = files.begin(); file != files.end(); ++file) {
PCIConfig config;
memset(&config, 0, sizeof(PCIConfig));
std::string configPath = *file + configFileName;
std::string sysfsPath = *file;
std::ifstream configFile(configPath, std::ifstream::binary);
if (configFile.is_open()) {
configFile.read(reinterpret_cast<char *>(&config), sizeof(config));
if (!configFile.good() || (config.DeviceID != deviceID)) {
configFile.close();
continue;
}
return sysfsPath;
}
}
return nullPath;
}
bool Drm::is48BitAddressRangeSupported() {
drm_i915_getparam_t gp;
int value = 0;
gp.value = &value;
gp.param = I915_PARAM_HAS_ALIASING_PPGTT;
int ret = ioctl(DRM_IOCTL_I915_GETPARAM, &gp);
if (ret == 0 && *gp.value == 3)
return true;
return false;
}
bool Drm::hasPreemption() {
#if defined(I915_PARAM_HAS_PREEMPTION)
drm_i915_getparam_t gp;
int value = 0;
gp.value = &value;
gp.param = I915_PARAM_HAS_PREEMPTION;
int ret = ioctl(DRM_IOCTL_I915_GETPARAM, &gp);
if (ret == 0 && *gp.value == 1) {
return contextCreate() && setLowPriority();
}
#endif
return false;
}
bool Drm::setLowPriority() {
#if defined(I915_PARAM_HAS_PREEMPTION)
struct drm_i915_gem_context_param gcp = {0};
gcp.ctx_id = lowPriorityContextId;
gcp.param = I915_CONTEXT_PARAM_PRIORITY;
gcp.value = -1023;
int ret = ioctl(DRM_IOCTL_I915_GEM_CONTEXT_SETPARAM, &gcp);
if (ret == 0) {
return true;
}
#endif
return false;
}
bool Drm::contextCreate() {
#if defined(I915_PARAM_HAS_PREEMPTION)
struct drm_i915_gem_context_create gcc;
memset(&gcc, 0, sizeof(gcc));
int ret = ioctl(DRM_IOCTL_I915_GEM_CONTEXT_CREATE, &gcc);
if (ret == 0) {
lowPriorityContextId = gcc.ctx_id;
return true;
}
#endif
return false;
}
void Drm::contextDestroy() {
#if defined(I915_PARAM_HAS_PREEMPTION)
struct drm_i915_gem_context_destroy destroy;
destroy.ctx_id = lowPriorityContextId;
ioctl(DRM_IOCTL_I915_GEM_CONTEXT_DESTROY, &destroy);
#endif
}
int Drm::getEuTotal(int &euTotal) {
#if defined(I915_PARAM_EU_TOTAL) || defined(I915_PARAM_EU_COUNT)
drm_i915_getparam_t gp;
memset(&gp, 0, sizeof(gp));
gp.value = &euTotal;
gp.param =
#if defined(I915_PARAM_EU_TOTAL)
I915_PARAM_EU_TOTAL;
#elif defined(I915_PARAM_EU_COUNT)
I915_PARAM_EU_COUNT;
#endif
int ret = ioctl(DRM_IOCTL_I915_GETPARAM, &gp);
return ret;
#else
(void)euTotal;
return 0;
#endif
}
int Drm::getSubsliceTotal(int &subsliceTotal) {
#if defined(I915_PARAM_SUBSLICE_TOTAL)
drm_i915_getparam_t gp;
memset(&gp, 0, sizeof(gp));
gp.value = &subsliceTotal;
gp.param = I915_PARAM_SUBSLICE_TOTAL;
int ret = ioctl(DRM_IOCTL_I915_GETPARAM, &gp);
return ret;
#else
(void)subsliceTotal;
return 0;
#endif
}
int Drm::getMinEuInPool(int &minEUinPool) {
#if defined(I915_PARAM_MIN_EU_IN_POOL)
drm_i915_getparam_t gp;
memset(&gp, 0, sizeof(gp));
gp.value = &minEUinPool;
gp.param = I915_PARAM_MIN_EU_IN_POOL;
int ret = ioctl(DRM_IOCTL_I915_GETPARAM, &gp);
return ret;
#else
(void)minEUinPool;
return 0;
#endif
}
} // namespace OCLRT

View File

@@ -0,0 +1,122 @@
/*
* Copyright (c) 2017, Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
#pragma once
#include "igfxfmid.h"
#include "runtime/utilities/api_intercept.h"
#include <sys/ioctl.h>
#include <fcntl.h>
#include <unistd.h>
#include <cerrno>
#include <string>
namespace OCLRT {
#define I915_PRIVATE_PARAM_HAS_EXEC_FORCE_NON_COHERENT (-1)
#define I915_PRIVATE_EXEC_FORCE_NON_COHERENT (1 << 31)
#define I915_CONTEXT_PRIVATE_PARAM_BOOST 0x80000000
class DeviceFactory;
class Drm {
friend DeviceFactory;
public:
uint32_t lowPriorityContextId;
static Drm *get(int32_t deviceOrdinal);
virtual int ioctl(unsigned long request, void *arg);
int getDeviceID(int &devId);
int getDeviceRevID(int &revId);
int getExecSoftPin(int &execSoftPin);
int enableTurboBoost();
int getEuTotal(int &euTotal);
int getSubsliceTotal(int &subsliceTotal);
int getMaxGpuFrequency(int &maxGpuFrequency);
int getEnabledPooledEu(int &enabled);
int getMinEuInPool(int &minEUinPool);
bool is48BitAddressRangeSupported();
MOCKABLE_VIRTUAL bool hasPreemption();
bool setLowPriority();
bool peekCoherencyDisablePatchActive() { return coherencyDisablePatchActive; }
virtual void obtainCoherencyDisablePatchActive();
int getFileDescriptor() const { return fd; }
bool contextCreate();
void contextDestroy();
void setGtType(GTTYPE eGtType) { this->eGtType = eGtType; }
GTTYPE getGtType() const { return this->eGtType; }
protected:
int fd;
int deviceId;
int revisionId;
GTTYPE eGtType;
bool coherencyDisablePatchActive = false;
Drm(int fd) : lowPriorityContextId(0), fd(fd), deviceId(0), revisionId(0), eGtType(GTTYPE_UNDEFINED) {}
virtual ~Drm();
static bool isi915Version(int fd);
static int getDeviceFd(const int devType);
static int openDevice();
static Drm *create(int32_t deviceOrdinal);
static void closeDevice(int32_t deviceOrdinal);
std::string getSysFsPciPath(int deviceID);
#pragma pack(1)
struct PCIConfig {
uint16_t VendorID;
uint16_t DeviceID;
uint16_t Command;
uint16_t Status;
uint8_t Revision;
uint8_t ProgIF;
uint8_t Subclass;
uint8_t ClassCode;
uint8_t cacheLineSize;
uint8_t LatencyTimer;
uint8_t HeaderType;
uint8_t BIST;
uint32_t BAR0[6];
uint32_t CardbusCISPointer;
uint16_t SubsystemVendorID;
uint16_t SubsystemDeviceID;
uint32_t ROM;
uint8_t Capabilities;
uint8_t Reserved[7];
uint8_t InterruptLine;
uint8_t InterruptPIN;
uint8_t MinGrant;
uint8_t MaxLatency;
};
#pragma pack()
static const char *sysFsDefaultGpuPath;
static const char *maxGpuFrequencyFile;
static const char *configFileName;
};
} // namespace OCLRT

View File

@@ -0,0 +1,41 @@
/*
* Copyright (c) 2017, Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
#include "runtime/os_interface/linux/drm_neo.h"
#include "runtime/helpers/options.h"
namespace OCLRT {
Drm::~Drm() = default;
Drm *Drm::get(int32_t deviceOrdinal) {
return nullptr;
}
Drm *Drm::create(int32_t deviceOrdinal) {
return nullptr;
}
void Drm::closeDevice(int32_t deviceOrdinal) {
return;
}
}

View File

@@ -0,0 +1,64 @@
/*
* Copyright (c) 2017, Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
#pragma once
#include "runtime/os_interface/linux/drm_neo.h"
#include "runtime/os_interface/linux/os_time.h"
#include "drm/i915_drm.h"
#include <cstdio>
namespace OCLRT {
class DrmNullDevice : public Drm {
friend Drm;
friend DeviceFactory;
public:
int ioctl(unsigned long request, void *arg) override {
if (request == DRM_IOCTL_I915_GETPARAM) {
return Drm::ioctl(request, arg);
} else if (request == DRM_IOCTL_I915_REG_READ) {
struct drm_i915_reg_read *regArg = static_cast<struct drm_i915_reg_read *>(arg);
// Handle only 36b timestamp
if (regArg->offset == (TIMESTAMP_LOW_REG | 1)) {
gpuTimestamp += 1000;
regArg->val = gpuTimestamp & 0x0000000FFFFFFFFF;
} else if (regArg->offset == TIMESTAMP_LOW_REG || regArg->offset == TIMESTAMP_HIGH_REG) {
return -1;
}
return 0;
} else {
return 0;
}
}
void obtainCoherencyDisablePatchActive() override { coherencyDisablePatchActive = true; }
protected:
DrmNullDevice(int fd) : Drm(fd), gpuTimestamp(0){};
uint64_t gpuTimestamp;
};
}

View File

@@ -0,0 +1,194 @@
/*
* Copyright (c) 2017, Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
#include "runtime/command_stream/preemption.h"
#include "runtime/helpers/hw_info.h"
#include "runtime/helpers/hw_helper.h"
#include "runtime/os_interface/linux/drm_neo.h"
#include "runtime/os_interface/linux/hw_info_config.h"
#include "runtime/os_interface/linux/os_interface.h"
#include "runtime/utilities/cpu_info.h"
#include <cstring>
namespace OCLRT {
HwInfoConfig *hwInfoConfigFactory[IGFX_MAX_PRODUCT] = {};
uint32_t bitExact(uint32_t value, uint32_t highBit, uint32_t lowBit) {
uint32_t bitVal = ((value >> lowBit) & ((1 << (highBit - lowBit + 1)) - 1));
return bitVal;
}
int configureCacheInfo(HardwareInfo *hwInfo) {
GT_SYSTEM_INFO *pSysInfo = const_cast<GT_SYSTEM_INFO *>(hwInfo->pSysInfo);
uint32_t type = 0;
uint32_t subleaf = 0;
uint32_t eax, ebx, ecx;
uint32_t cachelevel, linesize, partitions, ways;
uint64_t sets, size;
const CpuInfo &cpuInfo = CpuInfo::getInstance();
do {
uint32_t cpuRegsInfo[4];
cpuInfo.cpuidex(cpuRegsInfo, 4, subleaf);
eax = cpuRegsInfo[0];
ebx = cpuRegsInfo[1];
ecx = cpuRegsInfo[2];
type = bitExact(eax, 4, 0);
if (type != 0) {
cachelevel = bitExact(eax, 7, 5);
linesize = bitExact(ebx, 11, 0) + 1;
partitions = bitExact(ebx, 21, 12) + 1;
ways = bitExact(ebx, 31, 22) + 1;
sets = (uint64_t)ecx + 1;
size = sets * ways * partitions * linesize / 1024;
if (cachelevel == 3) {
pSysInfo->LLCCacheSizeInKb = size;
}
subleaf++;
}
} while (type);
return 0;
}
int HwInfoConfig::configureHwInfo(const HardwareInfo *inHwInfo, HardwareInfo *outHwInfo, OSInterface *osIface) {
int ret = 0;
Drm *drm = osIface->get()->getDrm();
auto pPlatform = std::unique_ptr<PLATFORM>(new PLATFORM);
*pPlatform = *(inHwInfo->pPlatform);
auto pSysInfo = std::unique_ptr<GT_SYSTEM_INFO>(new GT_SYSTEM_INFO);
*(pSysInfo) = *(inHwInfo->pSysInfo);
auto pSkuTable = std::unique_ptr<FeatureTable>(new FeatureTable);
*pSkuTable = *(inHwInfo->pSkuTable);
auto pWaTable = std::unique_ptr<WorkaroundTable>(new WorkaroundTable);
*pWaTable = *(inHwInfo->pWaTable);
outHwInfo->pPlatform = const_cast<const PLATFORM *>(pPlatform.get());
outHwInfo->pSysInfo = const_cast<const GT_SYSTEM_INFO *>(pSysInfo.get());
outHwInfo->pSkuTable = const_cast<const FeatureTable *>(pSkuTable.get());
outHwInfo->pWaTable = const_cast<const WorkaroundTable *>(pWaTable.get());
outHwInfo->capabilityTable = inHwInfo->capabilityTable;
int val = 0;
ret = drm->getDeviceID(val);
if (ret != 0 || val == 0) {
memset(outHwInfo, 0, sizeof(HardwareInfo));
return (ret == 0) ? -1 : ret;
}
pPlatform->usDeviceID = static_cast<unsigned short>(val);
ret = drm->getDeviceRevID(val);
if (ret != 0) {
memset(outHwInfo, 0, sizeof(HardwareInfo));
return ret;
}
pPlatform->usRevId = static_cast<unsigned short>(val);
int euCount;
ret = drm->getEuTotal(euCount);
if (ret != 0) {
memset(outHwInfo, 0, sizeof(HardwareInfo));
return ret;
}
pSysInfo->EUCount = static_cast<uint32_t>(euCount);
pSysInfo->ThreadCount = this->threadsPerEu * pSysInfo->EUCount;
int subSliceCount;
ret = drm->getSubsliceTotal(subSliceCount);
if (ret != 0) {
memset(outHwInfo, 0, sizeof(HardwareInfo));
return ret;
}
pSysInfo->SubSliceCount = static_cast<uint32_t>(subSliceCount);
drm->obtainCoherencyDisablePatchActive();
pSkuTable->ftrSVM = drm->is48BitAddressRangeSupported();
int maxGpuFreq = 0;
drm->getMaxGpuFrequency(maxGpuFreq);
GTTYPE gtType = drm->getGtType();
if (gtType == GTTYPE_UNDEFINED) {
memset(outHwInfo, 0, sizeof(HardwareInfo));
return -1;
}
pPlatform->eGTType = gtType;
pSkuTable->ftrGTA = (gtType == GTTYPE_GTA) ? 1 : 0;
pSkuTable->ftrGTC = (gtType == GTTYPE_GTC) ? 1 : 0;
pSkuTable->ftrGTX = (gtType == GTTYPE_GTX) ? 1 : 0;
pSkuTable->ftrGT1 = (gtType == GTTYPE_GT1) ? 1 : 0;
pSkuTable->ftrGT1_5 = (gtType == GTTYPE_GT1_5) ? 1 : 0;
pSkuTable->ftrGT2 = (gtType == GTTYPE_GT2) ? 1 : 0;
pSkuTable->ftrGT2_5 = (gtType == GTTYPE_GT2_5) ? 1 : 0;
pSkuTable->ftrGT3 = (gtType == GTTYPE_GT3) ? 1 : 0;
pSkuTable->ftrGT4 = (gtType == GTTYPE_GT4) ? 1 : 0;
ret = configureHardwareCustom(outHwInfo, osIface);
if (ret != 0) {
memset(outHwInfo, 0, sizeof(HardwareInfo));
return ret;
}
configureCacheInfo(outHwInfo);
pSkuTable->ftrEDram = (pSysInfo->EdramSizeInKb != 0) ? 1 : 0;
outHwInfo->capabilityTable.maxRenderFrequency = maxGpuFreq;
outHwInfo->capabilityTable.ftrSvm = pSkuTable->ftrSVM;
bool platformCoherency = false;
HwHelper &hwHelper = HwHelper::get(pPlatform->eRenderCoreFamily);
hwHelper.setCapabilityCoherencyFlag(const_cast<const HardwareInfo *>(outHwInfo), platformCoherency);
outHwInfo->capabilityTable.ftrSupportsCoherency = (platformCoherency && drm->peekCoherencyDisablePatchActive());
outHwInfo->capabilityTable.instrumentationEnabled = false;
bool preemption = drm->hasPreemption();
preemption = hwHelper.setupPreemptionRegisters(outHwInfo, preemption);
PreemptionHelper::adjustDefaultPreemptionMode(outHwInfo->capabilityTable,
static_cast<bool>(outHwInfo->pSkuTable->ftrGpGpuMidThreadLevelPreempt) && preemption,
static_cast<bool>(outHwInfo->pSkuTable->ftrGpGpuThreadGroupLevelPreempt) && preemption,
static_cast<bool>(outHwInfo->pSkuTable->ftrGpGpuMidBatchPreempt) && preemption);
outHwInfo->capabilityTable.enableKmdNotify = DebugManager.flags.OverrideEnableKmdNotify.get() >= 0
? !!DebugManager.flags.OverrideEnableKmdNotify.get()
: outHwInfo->capabilityTable.enableKmdNotify;
outHwInfo->capabilityTable.delayKmdNotifyMs = DebugManager.flags.OverrideKmdNotifyDelayMs.get() >= 0
? static_cast<int64_t>(DebugManager.flags.OverrideKmdNotifyDelayMs.get())
: outHwInfo->capabilityTable.delayKmdNotifyMs;
pPlatform.release();
pSkuTable.release();
pWaTable.release();
pSysInfo.release();
return 0;
}
} // namespace OCLRT

View File

@@ -0,0 +1,72 @@
/*
* Copyright (c) 2017, Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
#pragma once
#include "igfxfmid.h"
#include "runtime/helpers/hw_info.h"
namespace OCLRT {
struct HardwareInfo;
class OSInterface;
class HwInfoConfig;
extern HwInfoConfig *hwInfoConfigFactory[IGFX_MAX_PRODUCT];
class HwInfoConfig {
public:
static HwInfoConfig *get(PRODUCT_FAMILY product) {
return hwInfoConfigFactory[product];
}
int configureHwInfo(const HardwareInfo *inHwInfo, HardwareInfo *outHwInfo, OSInterface *osIface);
virtual int configureHardwareCustom(HardwareInfo *hwInfo, OSInterface *osIface) = 0;
uint32_t threadsPerEu;
};
template <PRODUCT_FAMILY gfxProduct>
class HwInfoConfigHw : public HwInfoConfig {
public:
static HwInfoConfig *get() {
static HwInfoConfigHw<gfxProduct> instance;
return &instance;
}
int configureHardwareCustom(HardwareInfo *hwInfo, OSInterface *osIface) override;
protected:
HwInfoConfigHw() {}
};
template <PRODUCT_FAMILY gfxProduct>
struct LinuxEnableGfxProductHw {
typedef typename HwMapper<gfxProduct>::GfxProduct GfxProduct;
LinuxEnableGfxProductHw() {
HwInfoConfig *pHwInfoConfig = HwInfoConfigHw<gfxProduct>::get();
hwInfoConfigFactory[gfxProduct] = pHwInfoConfig;
pHwInfoConfig->threadsPerEu = GfxProduct::threadsPerEu;
}
};
} // namespace OCLRT

View File

@@ -0,0 +1,30 @@
/*
* Copyright (c) 2017, Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
#include "runtime/os_interface/linux/linux_inc.h"
#include "runtime/utilities/perf_profiler.h"
namespace Os {
const char *fileSeparator = "/";
}
namespace OCLRT {
thread_local OCLRT::PerfProfiler *gPerfProfiler = nullptr;
}

View File

@@ -0,0 +1,40 @@
/*
* Copyright (c) 2017, Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
#pragma once
// For now we need to keep this file clean of OS specific #includes.
// Only issues to address portability should be covered here.
#if defined(__linux__)
#include <cstdint>
#define PATH_SEPARATOR '/'
#define __cdecl
namespace OCLRT {
class PerfProfiler;
typedef int FlushStamp; // i915 gem object handle
extern thread_local PerfProfiler *gPerfProfiler;
};
#endif

View File

@@ -0,0 +1,34 @@
/*
* Copyright (c) 2017, Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
#include "runtime/os_interface/os_library.h"
namespace Os {
#if defined(__linux__)
// Compiler library names
const char *frontEndDllName = "libigdfcl.so";
const char *igcDllName = "libigdccl.so";
const char *libvaDllName = "libva.so.1";
#endif //__linux__
const char *sysFsPciPath = "/sys/bus/pci/devices/";
const char *tbxLibName = "libtbxAccess.so";
}

View File

@@ -0,0 +1,45 @@
/*
* Copyright (c) 2017, Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
#include "os_interface.h"
namespace OCLRT {
bool OSInterface::osEnabled64kbPages = false;
OSInterface::OSInterface() {
osInterfaceImpl = new OSInterfaceImpl();
}
OSInterface::~OSInterface() {
delete osInterfaceImpl;
}
uint32_t OSInterface::getHwContextId() const {
return 0;
}
bool OSInterface::are64kbPagesEnabled() {
return osEnabled64kbPages;
}
} // namespace OCLRT

View File

@@ -0,0 +1,44 @@
/*
* Copyright (c) 2017, Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
#pragma once
#include "runtime/os_interface/os_interface.h"
#include "drm_neo.h"
namespace OCLRT {
class Drm;
class OSInterface::OSInterfaceImpl {
public:
OSInterfaceImpl() {
drm = nullptr;
}
Drm *getDrm() const {
return drm;
}
void setDrm(Drm *drm) {
this->drm = drm;
}
protected:
Drm *drm;
};
} // namespace OCLRT

View File

@@ -0,0 +1,69 @@
/*
* Copyright (c) 2017, Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
#if defined(__linux__)
#include "runtime/helpers/debug_helpers.h"
#include "runtime/os_interface/os_library.h"
#include "os_library.h"
#include <dlfcn.h>
namespace OCLRT {
OsLibrary *OsLibrary::load(const std::string &name) {
auto ptr = new (std::nothrow) Linux::OsLibrary(name);
if (ptr == nullptr)
return nullptr;
if (!ptr->isLoaded()) {
delete ptr;
return nullptr;
}
return ptr;
}
namespace Linux {
OsLibrary::OsLibrary(const std::string &name) {
if (name.empty()) {
this->handle = dlopen(0, RTLD_LAZY);
} else {
this->handle = dlopen(name.c_str(), RTLD_LAZY);
}
}
OsLibrary::~OsLibrary() {
if (this->handle != nullptr) {
dlclose(this->handle);
this->handle = nullptr;
}
}
bool OsLibrary::isLoaded() {
return this->handle != nullptr;
}
void *OsLibrary::getProcAddress(const std::string &procName) {
DEBUG_BREAK_IF(this->handle == nullptr);
return dlsym(this->handle, procName.c_str());
}
}
}
#endif

View File

@@ -0,0 +1,41 @@
/*
* Copyright (c) 2017, Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
#pragma once
#include "runtime/os_interface/os_library.h"
namespace OCLRT {
namespace Linux {
class OsLibrary : public OCLRT::OsLibrary {
private:
void *handle;
public:
OsLibrary(const std::string &name);
~OsLibrary() override;
bool isLoaded() override;
void *getProcAddress(const std::string &procName) override;
};
}
}

View File

@@ -0,0 +1,166 @@
/*
* Copyright (c) 2017, Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
#include <time.h>
#include "runtime/os_interface/linux/drm_neo.h"
#include "drm/i915_drm.h"
#include "runtime/os_interface/linux/os_interface.h"
#include "runtime/os_interface/linux/os_time.h"
namespace OCLRT {
OSTimeLinux::OSTimeLinux(OSInterface *osInterface) {
this->osInterface = osInterface;
resolutionFunc = &clock_getres;
getTimeFunc = &clock_gettime;
if (osInterface) {
pDrm = osInterface->get()->getDrm();
} else {
pDrm = Drm::get(0);
}
timestampTypeDetect();
}
OSTimeLinux::~OSTimeLinux(){};
void OSTimeLinux::timestampTypeDetect() {
struct drm_i915_reg_read reg;
int err;
if (pDrm == nullptr)
return;
reg.offset = (TIMESTAMP_LOW_REG | 1);
err = pDrm->ioctl(DRM_IOCTL_I915_REG_READ, &reg);
if (err) {
reg.offset = TIMESTAMP_HIGH_REG;
err = pDrm->ioctl(DRM_IOCTL_I915_REG_READ, &reg);
if (err) {
getGpuTime = &OSTimeLinux::getGpuTime32;
timestampSizeInBits = OCLRT_NUM_TIMESTAMP_BITS_FALLBACK;
} else {
getGpuTime = &OSTimeLinux::getGpuTimeSplitted;
timestampSizeInBits = OCLRT_NUM_TIMESTAMP_BITS;
}
} else {
getGpuTime = &OSTimeLinux::getGpuTime36;
timestampSizeInBits = OCLRT_NUM_TIMESTAMP_BITS;
}
}
bool OSTimeLinux::getCpuTime(uint64_t *timestamp) {
struct timespec ts;
if (getTimeFunc(CLOCK_MONOTONIC_RAW, &ts)) {
return false;
}
*timestamp = (uint64_t)ts.tv_sec * NSEC_PER_SEC + ts.tv_nsec;
return true;
}
bool OSTimeLinux::getGpuTime32(uint64_t *timestamp) {
struct drm_i915_reg_read reg;
reg.offset = TIMESTAMP_LOW_REG;
if (pDrm->ioctl(DRM_IOCTL_I915_REG_READ, &reg)) {
return false;
}
*timestamp = reg.val >> 32;
return true;
}
bool OSTimeLinux::getGpuTime36(uint64_t *timestamp) {
struct drm_i915_reg_read reg;
reg.offset = TIMESTAMP_LOW_REG | 1;
if (pDrm->ioctl(DRM_IOCTL_I915_REG_READ, &reg)) {
return false;
}
*timestamp = reg.val;
return true;
}
bool OSTimeLinux::getGpuTimeSplitted(uint64_t *timestamp) {
struct drm_i915_reg_read reg_hi, reg_lo;
uint64_t tmp_hi;
int err = 0, loop = 3;
reg_hi.offset = TIMESTAMP_HIGH_REG;
reg_lo.offset = TIMESTAMP_LOW_REG;
err += pDrm->ioctl(DRM_IOCTL_I915_REG_READ, &reg_hi);
do {
tmp_hi = reg_hi.val;
err += pDrm->ioctl(DRM_IOCTL_I915_REG_READ, &reg_lo);
err += pDrm->ioctl(DRM_IOCTL_I915_REG_READ, &reg_hi);
} while (err == 0 && reg_hi.val != tmp_hi && --loop);
if (err) {
return false;
}
*timestamp = reg_lo.val | (reg_hi.val << 32);
return true;
}
bool OSTimeLinux::getCpuGpuTime(TimeStampData *pGpuCpuTime) {
if (!(this->*getGpuTime)(&pGpuCpuTime->GPUTimeStamp)) {
return false;
}
if (!getCpuTime(&pGpuCpuTime->CPUTimeinNS)) {
return false;
}
return true;
}
std::unique_ptr<OSTime> OSTime::create(OSInterface *osInterface) {
return std::unique_ptr<OSTime>(new OSTimeLinux(osInterface));
}
double OSTimeLinux::getHostTimerResolution() const {
struct timespec ts;
if (resolutionFunc(CLOCK_MONOTONIC_RAW, &ts)) {
return 0;
}
return ts.tv_nsec + ts.tv_sec * NSEC_PER_SEC;
}
double OSTimeLinux::getDynamicDeviceTimerResolution(HardwareInfo const &hwInfo) const {
return OSTime::getDeviceTimerResolution(hwInfo);
}
uint64_t OSTimeLinux::getCpuRawTimestamp() {
uint64_t timesInNsec = 0;
uint64_t ticksInNsec = 0;
if (!getCpuTime(&timesInNsec)) {
return 0;
}
ticksInNsec = getHostTimerResolution();
if (ticksInNsec == 0) {
return 0;
}
return timesInNsec / ticksInNsec;
}
} // namespace OCLRT

View File

@@ -0,0 +1,58 @@
/*
* Copyright (c) 2017, Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
#pragma once
#include "runtime/os_interface/os_time.h"
#define OCLRT_NUM_TIMESTAMP_BITS (36)
#define OCLRT_NUM_TIMESTAMP_BITS_FALLBACK (32)
#define TIMESTAMP_HIGH_REG 0x0235C
#define TIMESTAMP_LOW_REG 0x02358
namespace OCLRT {
class OSTimeLinux : public OSTime {
public:
OSTimeLinux(OSInterface *osInterface);
~OSTimeLinux() override;
bool getCpuTime(uint64_t *timeStamp) override;
bool getCpuGpuTime(TimeStampData *pGpuCpuTime) override;
typedef bool (OSTimeLinux::*TimestampFunction)(uint64_t *);
void timestampTypeDetect();
TimestampFunction getGpuTime;
bool getGpuTime32(uint64_t *timestamp);
bool getGpuTime36(uint64_t *timestamp);
bool getGpuTimeSplitted(uint64_t *timestamp);
double getHostTimerResolution() const override;
double getDynamicDeviceTimerResolution(HardwareInfo const &hwInfo) const override;
uint64_t getCpuRawTimestamp() override;
protected:
typedef int (*resolutionFunc_t)(clockid_t, struct timespec *);
typedef int (*getTimeFunc_t)(clockid_t, struct timespec *);
Drm *pDrm;
unsigned timestampSizeInBits;
resolutionFunc_t resolutionFunc;
getTimeFunc_t getTimeFunc;
};
} // namespace OCLRT

View File

@@ -0,0 +1,87 @@
/*
* Copyright (c) 2017, Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
#include "performance_counters_linux.h"
namespace OCLRT {
std::unique_ptr<PerformanceCounters> PerformanceCounters::create(OSTime *osTime) {
return std::unique_ptr<PerformanceCounters>(new PerformanceCountersLinux(osTime));
}
PerformanceCountersLinux::PerformanceCountersLinux(OSTime *osTime) : PerformanceCounters(osTime) {
mdLibHandle = nullptr;
perfmonLoadConfigFunc = nullptr;
}
PerformanceCountersLinux::~PerformanceCountersLinux() {
if (pAutoSamplingInterface) {
autoSamplingStopFunc(&pAutoSamplingInterface);
pAutoSamplingInterface = nullptr;
available = false;
}
if (mdLibHandle) {
dlcloseFunc(mdLibHandle);
mdLibHandle = nullptr;
}
}
void PerformanceCountersLinux::initialize(const HardwareInfo *hwInfo) {
PerformanceCounters::initialize(hwInfo);
mdLibHandle = dlopenFunc("libmd.so", RTLD_LAZY | RTLD_LOCAL);
if (mdLibHandle) {
perfmonLoadConfigFunc = reinterpret_cast<perfmonLoadConfig_t>(dlsymFunc(mdLibHandle, "drm_intel_perfmon_load_config"));
}
setPlatformInfoFunc(hwInfo->pPlatform->eProductFamily, (void *)(hwInfo->pSkuTable));
}
void PerformanceCountersLinux::enableImpl() {
if (mdLibHandle && perfmonLoadConfigFunc) {
PerformanceCounters::enableImpl();
}
}
bool PerformanceCountersLinux::verifyPmRegsCfg(InstrPmRegsCfg *pCfg, uint32_t *pLastPmRegsCfgHandle, bool *pLastPmRegsCfgPending) {
if (perfmonLoadConfigFunc == nullptr) {
return false;
}
if (PerformanceCounters::verifyPmRegsCfg(pCfg, pLastPmRegsCfgHandle, pLastPmRegsCfgPending)) {
return getPerfmonConfig(pCfg);
}
return false;
}
bool PerformanceCountersLinux::getPerfmonConfig(InstrPmRegsCfg *pCfg) {
unsigned int oaCfgHandle = pCfg->oaCounters.handle;
unsigned int gpCfgHandle = pCfg->gpCounters.handle;
int fd = osInterface->get()->getDrm()->getFileDescriptor();
if (perfmonLoadConfigFunc(fd, nullptr, &oaCfgHandle, &gpCfgHandle) != 0) {
return false;
}
if (pCfg->oaCounters.handle != 0 && oaCfgHandle != pCfg->oaCounters.handle) {
return false;
}
if (pCfg->gpCounters.handle != 0 && gpCfgHandle != pCfg->gpCounters.handle) {
return false;
}
return true;
}
} // namespace OCLRT

View File

@@ -0,0 +1,55 @@
/*
* Copyright (c) 2017, Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
#pragma once
#include "os_interface.h"
#include "runtime/os_interface/performance_counters.h"
#include <dlfcn.h>
typedef struct _drm_intel_context drm_intel_context;
namespace OCLRT {
class PerformanceCountersLinux : virtual public PerformanceCounters {
public:
PerformanceCountersLinux(OSTime *osTime);
~PerformanceCountersLinux() override;
void initialize(const HardwareInfo *hwInfo) override;
void enableImpl() override;
protected:
virtual bool getPerfmonConfig(InstrPmRegsCfg *pCfg);
bool verifyPmRegsCfg(InstrPmRegsCfg *pCfg, uint32_t *pLastPmRegsCfgHandle, bool *pLastPmRegsCfgPending) override;
typedef int (*perfmonLoadConfig_t)(int fd, drm_intel_context *ctx, uint32_t *oaCfgId, uint32_t *gpCfgId);
typedef void *(*dlopenFunc_t)(const char *, int);
typedef void *(*dlsymFunc_t)(void *, const char *);
void *mdLibHandle;
perfmonLoadConfig_t perfmonLoadConfigFunc;
dlopenFunc_t dlopenFunc = dlopen;
dlsymFunc_t dlsymFunc = dlsym;
decltype(&dlclose) dlcloseFunc = dlclose;
decltype(&instrSetPlatformInfo) setPlatformInfoFunc = instrSetPlatformInfo;
};
} // namespace OCLRT

View File

@@ -0,0 +1,56 @@
/*
* Copyright (c) 2017, Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
#include "runtime/os_interface/print.h"
#include <iostream>
#include <cstdio>
void printToSTDOUT(const char *str) {
fprintf(stdout, "%s", str);
fflush(stdout);
}
template <class T>
size_t simple_sprintf(char *output, size_t outputSize, const char *format, T value) {
return snprintf(output, outputSize, format, value);
}
size_t simple_sprintf(char *output, size_t outputSize, const char *format, const char *value) {
return snprintf(output, outputSize, format, value);
}
size_t simple_sprintf(char *output, size_t outputSize, const char *format, void *value) {
return snprintf(output, outputSize, format, value);
}
template size_t simple_sprintf<float>(char *output, size_t output_size, const char *format, float value);
template size_t simple_sprintf<double>(char *output, size_t output_size, const char *format, double value);
template size_t simple_sprintf<char>(char *output, size_t output_size, const char *format, char value);
template size_t simple_sprintf<int8_t>(char *output, size_t output_size, const char *format, int8_t value);
template size_t simple_sprintf<int16_t>(char *output, size_t output_size, const char *format, int16_t value);
template size_t simple_sprintf<int32_t>(char *output, size_t output_size, const char *format, int32_t value);
template size_t simple_sprintf<int64_t>(char *output, size_t output_size, const char *format, int64_t value);
template size_t simple_sprintf<uint8_t>(char *output, size_t output_size, const char *format, uint8_t value);
template size_t simple_sprintf<uint16_t>(char *output, size_t output_size, const char *format, uint16_t value);
template size_t simple_sprintf<uint32_t>(char *output, size_t output_size, const char *format, uint32_t value);
template size_t simple_sprintf<uint64_t>(char *output, size_t output_size, const char *format, uint64_t value);

View File

@@ -0,0 +1,39 @@
/*
* Copyright (c) 2017, Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
#pragma once
#include "runtime/os_interface/linux/linux_inc.h"
#include "runtime/os_interface/windows/windows_inc.h"
#include "runtime/os_interface/os_library.h"
namespace Os {
// Compiler library names
extern const char *frontEndDllName;
extern const char *igcDllName;
extern const char *testDllName;
// OS specific directory separator
extern const char *fileSeparator;
// Pci Path
extern const char *sysFsPciPath;
};

View File

@@ -0,0 +1,46 @@
/*
* Copyright (c) 2017, Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
#pragma once
namespace OCLRT {
class OSInterface {
public:
class OSInterfaceImpl;
OSInterface();
~OSInterface();
OSInterface(const OSInterface &) = delete;
OSInterface &operator=(const OSInterface &) = delete;
OSInterfaceImpl *get() const {
return osInterfaceImpl;
};
unsigned int getHwContextId() const;
static bool osEnabled64kbPages;
static bool are64kbPagesEnabled();
protected:
OSInterfaceImpl *osInterfaceImpl;
};
} // namespace OCLRT

View File

@@ -0,0 +1,40 @@
/*
* Copyright (c) 2017, Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
#pragma once
#include <string>
namespace OCLRT {
class OsLibrary {
protected:
OsLibrary() = default;
public:
virtual ~OsLibrary() = default;
static OsLibrary *load(const std::string &name);
virtual void *getProcAddress(const std::string &procName) = 0;
virtual bool isLoaded() = 0;
};
}

View File

@@ -0,0 +1,31 @@
/*
* Copyright (c) 2017, Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
#include "runtime/helpers/hw_info.h"
#include "runtime/os_interface/os_time.h"
namespace OCLRT {
double OSTime::getDeviceTimerResolution(HardwareInfo const &hwInfo) {
return hwInfo.capabilityTable.defaultProfilingTimerResolution;
};
} // namespace OCLRT

View File

@@ -0,0 +1,58 @@
/*
* Copyright (c) 2017, Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
#pragma once
#include <memory>
#define NSEC_PER_SEC (1000000000ULL)
namespace OCLRT {
class OSInterface;
struct HardwareInfo;
struct TimeStampData {
uint64_t GPUTimeStamp; // GPU time in ns
uint64_t CPUTimeinNS; // CPU time in ns
};
class OSTime {
public:
static std::unique_ptr<OSTime> create(OSInterface *osInterface);
virtual ~OSTime() = default;
virtual bool getCpuTime(uint64_t *timeStamp) = 0;
virtual bool getCpuGpuTime(TimeStampData *pGpuCpuTime) = 0;
virtual double getHostTimerResolution() const = 0;
virtual double getDynamicDeviceTimerResolution(HardwareInfo const &hwInfo) const = 0;
virtual uint64_t getCpuRawTimestamp() = 0;
OSInterface *getOSInterface() const {
return osInterface;
}
static double getDeviceTimerResolution(HardwareInfo const &hwInfo);
protected:
OSTime() {}
OSInterface *osInterface = nullptr;
};
} // namespace OCLRT

View File

@@ -0,0 +1,184 @@
/*
* Copyright (c) 2017, Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
#include "CL/cl.h"
#include "runtime/helpers/debug_helpers.h"
#include "runtime/os_interface/performance_counters.h"
#include "runtime/os_interface/os_interface.h"
#include "runtime/os_interface/os_time.h"
namespace OCLRT {
PerformanceCounters::PerformanceCounters(OSTime *osTime) {
this->osTime = osTime;
DEBUG_BREAK_IF(osTime == nullptr);
gfxFamily = IGFX_UNKNOWN_CORE;
cbData = {
0,
};
this->osInterface = osTime->getOSInterface();
hwMetricsEnabled = false;
useMIRPC = false;
pAutoSamplingInterface = nullptr;
cpuRawTimestamp = 0;
refCounter = 0;
available = false;
reportId = 0;
}
void PerformanceCounters::enable() {
mutex.lock();
std::lock_guard<std::mutex> lg(mutex, std::adopt_lock);
if (refCounter == 0) {
enableImpl();
}
refCounter++;
}
void PerformanceCounters::shutdown() {
mutex.lock();
std::lock_guard<std::mutex> lg(mutex, std::adopt_lock);
if (refCounter >= 1) {
if (refCounter == 1) {
shutdownImpl();
}
refCounter--;
}
}
void PerformanceCounters::initialize(const HardwareInfo *hwInfo) {
useMIRPC = !(hwInfo->pWaTable->waDoNotUseMIReportPerfCount);
gfxFamily = hwInfo->pPlatform->eRenderCoreFamily;
}
void PerformanceCounters::enableImpl() {
hwMetricsEnabled = (hwMetricsEnableFunc(cbData, true) != 0) ? false : true;
if (!pAutoSamplingInterface && hwMetricsEnabled) {
autoSamplingStartFunc(cbData, &pAutoSamplingInterface);
if (pAutoSamplingInterface) {
available = true;
}
}
}
void PerformanceCounters::shutdownImpl() {
if (hwMetricsEnabled) {
hwMetricsEnableFunc(cbData, false);
hwMetricsEnabled = false;
}
if (pAutoSamplingInterface) {
autoSamplingStopFunc(&pAutoSamplingInterface);
pAutoSamplingInterface = nullptr;
available = false;
}
}
void PerformanceCounters::setCpuTimestamp() {
cpuRawTimestamp = osTime->getCpuRawTimestamp();
}
InstrPmRegsCfg *PerformanceCounters::getPmRegsCfg(uint32_t configuration) {
if (!hwMetricsEnabled) {
return nullptr;
}
switch (configuration) {
case GTDI_CONFIGURATION_SET_DYNAMIC:
case GTDI_CONFIGURATION_SET_1:
case GTDI_CONFIGURATION_SET_2:
case GTDI_CONFIGURATION_SET_3:
break;
default:
return nullptr;
}
InstrPmRegsCfg *pPmRegsCfg = new InstrPmRegsCfg();
pPmRegsCfg->oaCounters.handle = INSTR_PM_REGS_CFG_INVALID;
mutex.lock();
std::lock_guard<std::mutex> lg(mutex, std::adopt_lock);
if (getPmRegsCfgFunc(cbData, configuration, pPmRegsCfg, nullptr) == 0) {
return pPmRegsCfg;
}
delete pPmRegsCfg;
return nullptr;
}
bool PerformanceCounters::verifyPmRegsCfg(InstrPmRegsCfg *pCfg, uint32_t *pLastPmRegsCfgHandle, bool *pLastPmRegsCfgPending) {
if (pCfg == nullptr || pLastPmRegsCfgHandle == nullptr || pLastPmRegsCfgPending == nullptr) {
return false;
}
if (checkPmRegsCfgFunc(pCfg, pLastPmRegsCfgHandle, pAutoSamplingInterface) == 0) {
if (loadPmRegsCfgFunc(cbData, pCfg, 1) == 0) {
return true;
}
}
return false;
}
bool PerformanceCounters::sendPmRegsCfgCommands(InstrPmRegsCfg *pCfg, uint32_t *pLastPmRegsCfgHandle, bool *pLastPmRegsCfgPending) {
if (verifyPmRegsCfg(pCfg, pLastPmRegsCfgHandle, pLastPmRegsCfgPending)) {
*pLastPmRegsCfgPending = true;
return true;
}
return false;
}
bool PerformanceCounters::processEventReport(size_t inputParamSize, void *inputParam, size_t *outputParamSize, HwPerfCounter *pPrivateData, InstrPmRegsCfg *countersConfiguration, bool isEventComplete) {
size_t outputSize = querySize();
if (outputParamSize) {
*outputParamSize = outputSize;
}
if (inputParam == nullptr && inputParamSize == 0 && outputParamSize) {
return true;
}
if (inputParam == nullptr || isEventComplete == false) {
return false;
}
if (inputParamSize < outputSize) {
return false;
}
GTDI_QUERY *pClientData = static_cast<GTDI_QUERY *>(inputParam);
getPerfCountersQueryDataFunc(cbData, pClientData, &pPrivateData->HWPerfCounters,
cpuRawTimestamp, pAutoSamplingInterface, countersConfiguration, useMIRPC, true, nullptr);
return true;
}
int PerformanceCounters::sendPerfConfiguration(uint32_t count, uint32_t *pOffsets, uint32_t *pValues) {
int ret = -1;
if (count == 0 || pOffsets == NULL || pValues == NULL) {
return CL_INVALID_VALUE;
}
mutex.lock();
std::lock_guard<std::mutex> lg(mutex, std::adopt_lock);
if (pOffsets[0] != INSTR_READ_REGS_CFG_TAG) {
ret = setPmRegsCfgFunc(cbData, count, pOffsets, pValues);
} else if (count > 1) {
ret = sendReadRegsCfgFunc(cbData, count - 1, pOffsets + 1, pValues + 1);
}
return ret != 0 ? CL_PROFILING_INFO_NOT_AVAILABLE : CL_SUCCESS;
}
size_t PerformanceCounters::querySize() {
return sizeof(GTDI_QUERY);
}
uint32_t PerformanceCounters::getCurrentReportId() {
return (osInterface->getHwContextId() << 12) | getReportId();
}
} // namespace OCLRT

View File

@@ -0,0 +1,91 @@
/*
* Copyright (c) 2017, Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
#pragma once
#include <memory>
#include <mutex>
#include "CL/cl.h"
#include "runtime/event/perf_counter.h"
#include "runtime/helpers/hw_info.h"
namespace OCLRT {
struct HardwareInfo;
class OSInterface;
class OSTime;
class PerformanceCounters {
public:
static std::unique_ptr<PerformanceCounters> create(OSTime *osTime);
virtual ~PerformanceCounters() = default;
void enable();
void shutdown();
virtual void initialize(const HardwareInfo *hwInfo);
InstrPmRegsCfg *getPmRegsCfg(uint32_t configuration);
bool sendPmRegsCfgCommands(InstrPmRegsCfg *pCfg, uint32_t *pLastPmRegsCfgHandle, bool *pLastPmRegsCfgPending);
void setCpuTimestamp();
bool processEventReport(size_t pClientDataSize, void *pClientData, size_t *outputSize, HwPerfCounter *pPrivateData, InstrPmRegsCfg *countersConfiguration, bool isEventComplete);
int sendPerfConfiguration(uint32_t count, uint32_t *pOffsets, uint32_t *pValues);
uint32_t getCurrentReportId();
uint32_t getPerfCountersReferenceNumber() {
mutex.lock();
std::lock_guard<std::mutex> lg(mutex, std::adopt_lock);
return refCounter;
}
bool isAvailable() {
return available;
}
protected:
PerformanceCounters(OSTime *osTime);
virtual bool verifyPmRegsCfg(InstrPmRegsCfg *pCfg, uint32_t *pLastPmRegsCfgHandle, bool *pLastPmRegsCfgPending);
size_t querySize();
virtual void enableImpl();
void shutdownImpl();
MOCKABLE_VIRTUAL uint32_t getReportId() {
return ++reportId & 0xFFF;
}
GFXCORE_FAMILY gfxFamily;
InstrEscCbData cbData;
OSInterface *osInterface;
OSTime *osTime;
bool hwMetricsEnabled;
bool useMIRPC;
void *pAutoSamplingInterface;
uint64_t cpuRawTimestamp;
std::mutex mutex;
uint32_t refCounter;
bool available;
uint32_t reportId;
decltype(&instrAutoSamplingStart) autoSamplingStartFunc = instrAutoSamplingStart;
decltype(&instrAutoSamplingStop) autoSamplingStopFunc = instrAutoSamplingStop;
decltype(&instrCheckPmRegsCfg) checkPmRegsCfgFunc = instrCheckPmRegsCfg;
decltype(&instrGetPerfCountersQueryData) getPerfCountersQueryDataFunc = instrGetPerfCountersQueryData;
decltype(&instrEscGetPmRegsCfg) getPmRegsCfgFunc = instrEscGetPmRegsCfg;
decltype(&instrEscHwMetricsEnable) hwMetricsEnableFunc = instrEscHwMetricsEnable;
decltype(&instrEscLoadPmRegsCfg) loadPmRegsCfgFunc = instrEscLoadPmRegsCfg;
decltype(&instrEscSetPmRegsCfg) setPmRegsCfgFunc = instrEscSetPmRegsCfg;
decltype(&instrEscSendReadRegsCfg) sendReadRegsCfgFunc = instrEscSendReadRegsCfg;
};
} // namespace OCLRT

View File

@@ -0,0 +1,32 @@
/*
* Copyright (c) 2017, Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
#pragma once
#include <cstddef>
void printToSTDOUT(const char *str);
template <class T>
size_t simple_sprintf(char *output, size_t outputSize, const char *format, T value);
size_t simple_sprintf(char *output, size_t outputSize, const char *format, const char *value);
size_t simple_sprintf(char *output, size_t outputSize, const char *format, void *value);

View File

@@ -0,0 +1,436 @@
/*
* Copyright (c) 2017, Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
#include "runtime/api/dispatch.h"
#include "runtime/api/api.h"
#include "runtime/platform/platform.h"
#include "runtime/device/device.h"
#include "runtime/command_queue/command_queue.h"
#include "runtime/context/context.h"
#include "runtime/helpers/get_info.h"
#include "runtime/sharings/d3d/d3d_buffer.h"
#include "runtime/sharings/d3d/d3d_texture.h"
#include "runtime/sharings/d3d/d3d_surface.h"
#include "runtime/mem_obj/image.h"
using namespace OCLRT;
void OCLRT::MemObj::getOsSpecificMemObjectInfo(const cl_mem_info &paramName, size_t *srcParamSize, void **srcParam) {
switch (paramName) {
case CL_MEM_D3D10_RESOURCE_KHR:
*srcParamSize = sizeof(ID3D10Resource *);
*srcParam = static_cast<D3DSharing<D3DTypesHelper::D3D10> *>(peekSharingHandler())->getResourceHandler();
break;
case CL_MEM_D3D11_RESOURCE_KHR:
*srcParamSize = sizeof(ID3D11Resource *);
*srcParam = static_cast<D3DSharing<D3DTypesHelper::D3D11> *>(peekSharingHandler())->getResourceHandler();
break;
case CL_MEM_DX9_MEDIA_SURFACE_INFO_KHR:
*srcParamSize = sizeof(cl_dx9_surface_info_khr);
*srcParam = &static_cast<D3DSurface *>(peekSharingHandler())->getSurfaceInfo();
break;
case CL_MEM_DX9_MEDIA_ADAPTER_TYPE_KHR:
*srcParamSize = sizeof(cl_dx9_media_adapter_type_khr);
*srcParam = &static_cast<D3DSurface *>(peekSharingHandler())->getAdapterType();
break;
case CL_MEM_DX9_RESOURCE_INTEL:
*srcParamSize = sizeof(IDirect3DSurface9 *);
*srcParam = &static_cast<D3DSurface *>(peekSharingHandler())->getSurfaceInfo().resource;
break;
case CL_MEM_DX9_SHARED_HANDLE_INTEL:
*srcParamSize = sizeof(HANDLE);
*srcParam = &static_cast<D3DSurface *>(peekSharingHandler())->getSurfaceInfo().shared_handle;
break;
}
}
void OCLRT::Image::getOsSpecificImageInfo(const cl_mem_info &paramName, size_t *srcParamSize, void **srcParam) {
switch (paramName) {
case CL_IMAGE_D3D10_SUBRESOURCE_KHR:
*srcParamSize = sizeof(unsigned int);
*srcParam = &static_cast<D3DSharing<D3DTypesHelper::D3D10> *>(peekSharingHandler())->getSubresource();
break;
case CL_IMAGE_D3D11_SUBRESOURCE_KHR:
*srcParamSize = sizeof(unsigned int);
*srcParam = &static_cast<D3DSharing<D3DTypesHelper::D3D11> *>(peekSharingHandler())->getSubresource();
break;
case CL_IMAGE_DX9_MEDIA_PLANE_KHR:
case CL_IMAGE_DX9_PLANE_INTEL:
*srcParamSize = sizeof(cl_uint);
*srcParam = &static_cast<D3DSurface *>(peekSharingHandler())->getPlane();
break;
}
}
cl_int OCLRT::Context::createContextOsProperties(cl_context_properties &propertyType, cl_context_properties &propertyValue) {
return CL_INVALID_PROPERTY;
}
void *OCLRT::Context::getOsContextInfo(cl_context_info &paramName, size_t *srcParamSize) {
switch (paramName) {
case CL_CONTEXT_D3D10_PREFER_SHARED_RESOURCES_KHR:
case CL_CONTEXT_D3D11_PREFER_SHARED_RESOURCES_KHR:
*srcParamSize = sizeof(cl_bool);
return &preferD3dSharedResources;
default:
break;
}
return nullptr;
}
cl_int CL_API_CALL clGetDeviceIDsFromDX9INTEL(cl_platform_id platform, cl_dx9_device_source_intel dx9DeviceSource, void *dx9Object,
cl_dx9_device_set_intel dx9DeviceSet, cl_uint numEntries, cl_device_id *devices, cl_uint *numDevices) {
cl_device_id device = castToObject<Platform>(platform)->getDevice(0);
GetInfoHelper::set(devices, device);
GetInfoHelper::set(numDevices, 1u);
return CL_SUCCESS;
}
cl_mem CL_API_CALL clCreateFromDX9MediaSurfaceINTEL(cl_context context, cl_mem_flags flags, IDirect3DSurface9 *resource,
HANDLE sharedHandle, UINT plane, cl_int *errcodeRet) {
ErrorCodeHelper err(errcodeRet, CL_SUCCESS);
cl_mem_flags validFlags = CL_MEM_READ_WRITE | CL_MEM_WRITE_ONLY | CL_MEM_READ_ONLY;
if ((flags & (~validFlags)) != 0) {
err.set(CL_INVALID_VALUE);
return nullptr;
}
if (!resource) {
err.set(CL_INVALID_DX9_RESOURCE_INTEL);
return nullptr;
}
cl_dx9_surface_info_khr surfaceInfo = {resource, sharedHandle};
auto ctx = castToObject<Context>(context);
return D3DSurface::create(ctx, &surfaceInfo, flags, 0, plane, errcodeRet);
}
cl_int CL_API_CALL clEnqueueAcquireDX9ObjectsINTEL(cl_command_queue commandQueue, cl_uint numObjects, const cl_mem *memObjects,
cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) {
auto cmdQ = castToObject<CommandQueue>(commandQueue);
return cmdQ->enqueueAcquireSharedObjects(numObjects, memObjects, numEventsInWaitList,
eventWaitList, event, CL_COMMAND_ACQUIRE_DX9_OBJECTS_INTEL);
}
cl_int CL_API_CALL clEnqueueReleaseDX9ObjectsINTEL(cl_command_queue commandQueue, cl_uint numObjects, cl_mem *memObjects,
cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) {
auto cmdQ = castToObject<CommandQueue>(commandQueue);
for (unsigned int object = 0; object < numObjects; object++) {
auto memObject = castToObject<MemObj>(memObjects[object]);
if (!static_cast<D3DSharing<D3DTypesHelper::D3D9> *>(memObject->peekSharingHandler())->isSharedResource()) {
cmdQ->finish(true);
break;
}
}
auto retVal = cmdQ->enqueueReleaseSharedObjects(numObjects, memObjects, numEventsInWaitList,
eventWaitList, event, CL_COMMAND_RELEASE_DX9_OBJECTS_INTEL);
if (!cmdQ->getContext().getInteropUserSyncEnabled()) {
cmdQ->finish(true);
}
return retVal;
}
cl_int CL_API_CALL clGetDeviceIDsFromDX9MediaAdapterKHR(cl_platform_id platform, cl_uint numMediaAdapters,
cl_dx9_media_adapter_type_khr *mediaAdapterType, void *mediaAdapters,
cl_dx9_media_adapter_set_khr mediaAdapterSet, cl_uint numEntries,
cl_device_id *devices, cl_uint *numDevices) {
cl_device_id device = castToObject<Platform>(platform)->getDevice(0);
GetInfoHelper::set(devices, device);
GetInfoHelper::set(numDevices, 1u);
return CL_SUCCESS;
}
cl_mem CL_API_CALL clCreateFromDX9MediaSurfaceKHR(cl_context context, cl_mem_flags flags, cl_dx9_media_adapter_type_khr adapterType,
void *surfaceInfo, cl_uint plane, cl_int *errcodeRet) {
ErrorCodeHelper err(errcodeRet, CL_SUCCESS);
auto localSurfaceInfo = (cl_dx9_surface_info_khr *)surfaceInfo;
auto ctx = castToObject<Context>(context);
return D3DSurface::create(ctx, localSurfaceInfo, flags, adapterType, plane, errcodeRet);
}
cl_int CL_API_CALL clEnqueueAcquireDX9MediaSurfacesKHR(cl_command_queue commandQueue, cl_uint numObjects, const cl_mem *memObjects,
cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) {
auto cmdQ = castToObject<CommandQueue>(commandQueue);
return cmdQ->enqueueAcquireSharedObjects(numObjects, memObjects, numEventsInWaitList,
eventWaitList, event, CL_COMMAND_ACQUIRE_DX9_MEDIA_SURFACES_KHR);
}
cl_int CL_API_CALL clEnqueueReleaseDX9MediaSurfacesKHR(cl_command_queue commandQueue, cl_uint numObjects, const cl_mem *memObjects,
cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) {
auto cmdQ = castToObject<CommandQueue>(commandQueue);
for (unsigned int object = 0; object < numObjects; object++) {
auto memObject = castToObject<MemObj>(memObjects[object]);
if (!static_cast<D3DSharing<D3DTypesHelper::D3D9> *>(memObject->peekSharingHandler())->isSharedResource()) {
cmdQ->finish(true);
break;
}
}
auto retVal = cmdQ->enqueueReleaseSharedObjects(numObjects, memObjects, numEventsInWaitList,
eventWaitList, event, CL_COMMAND_RELEASE_DX9_MEDIA_SURFACES_KHR);
if (!cmdQ->getContext().getInteropUserSyncEnabled()) {
cmdQ->finish(true);
}
return retVal;
}
cl_int CL_API_CALL clGetDeviceIDsFromD3D10KHR(cl_platform_id platform, cl_d3d10_device_source_khr d3dDeviceSource,
void *d3dObject, cl_d3d10_device_set_khr d3dDeviceSet, cl_uint numEntries,
cl_device_id *devices, cl_uint *numDevices) {
DXGI_ADAPTER_DESC dxgiDesc = {{0}};
IDXGIAdapter *dxgiAdapter = nullptr;
ID3D10Device *d3dDevice = nullptr;
D3DSharingFunctions<D3DTypesHelper::D3D10> sharingFcns((ID3D10Device *)nullptr);
cl_uint localNumDevices = 0;
cl_int retCode = CL_SUCCESS;
if (DebugManager.injectFcn) {
sharingFcns.getDxgiDescFcn = (D3DSharingFunctions<D3DTypesHelper::D3D10>::GetDxgiDescFcn)DebugManager.injectFcn;
}
cl_device_id device = castToObject<Platform>(platform)->getDevice(0);
switch (d3dDeviceSource) {
case CL_D3D10_DEVICE_KHR:
d3dDevice = (ID3D10Device *)d3dObject;
break;
case CL_D3D10_DXGI_ADAPTER_KHR:
dxgiAdapter = (IDXGIAdapter *)d3dObject;
break;
default:
GetInfoHelper::set(numDevices, localNumDevices);
return CL_INVALID_VALUE;
break;
}
sharingFcns.getDxgiDescFcn(&dxgiDesc, dxgiAdapter, d3dDevice);
if (dxgiDesc.VendorId != INTEL_VENDOR_ID) {
GetInfoHelper::set(numDevices, localNumDevices);
return CL_DEVICE_NOT_FOUND;
}
switch (d3dDeviceSet) {
case CL_PREFERRED_DEVICES_FOR_D3D10_KHR:
case CL_ALL_DEVICES_FOR_D3D10_KHR:
GetInfoHelper::set(devices, device);
localNumDevices = 1;
break;
default:
retCode = CL_INVALID_VALUE;
break;
}
GetInfoHelper::set(numDevices, localNumDevices);
return retCode;
}
cl_mem CL_API_CALL clCreateFromD3D10BufferKHR(cl_context context, cl_mem_flags flags, ID3D10Buffer *resource, cl_int *errcodeRet) {
ErrorCodeHelper err(errcodeRet, CL_SUCCESS);
auto ctx = castToObject<Context>(context);
if (ctx->getSharing<D3DSharingFunctions<D3DTypesHelper::D3D10>>()->isTracked(resource, 0)) {
err.set(CL_INVALID_D3D10_RESOURCE_KHR);
return nullptr;
}
return D3DBuffer<D3DTypesHelper::D3D10>::create(ctx, resource, flags, errcodeRet);
}
cl_mem CL_API_CALL clCreateFromD3D10Texture2DKHR(cl_context context, cl_mem_flags flags, ID3D10Texture2D *resource,
UINT subresource, cl_int *errcodeRet) {
ErrorCodeHelper err(errcodeRet, CL_SUCCESS);
auto ctx = castToObject<Context>(context);
if (ctx->getSharing<D3DSharingFunctions<D3DTypesHelper::D3D10>>()->isTracked(resource, subresource)) {
err.set(CL_INVALID_D3D10_RESOURCE_KHR);
return nullptr;
}
return D3DTexture<D3DTypesHelper::D3D10>::create2d(ctx, resource, flags, subresource, errcodeRet);
}
cl_mem CL_API_CALL clCreateFromD3D10Texture3DKHR(cl_context context, cl_mem_flags flags, ID3D10Texture3D *resource,
UINT subresource, cl_int *errcodeRet) {
ErrorCodeHelper err(errcodeRet, CL_SUCCESS);
auto ctx = castToObject<Context>(context);
if (ctx->getSharing<D3DSharingFunctions<D3DTypesHelper::D3D10>>()->isTracked(resource, subresource)) {
err.set(CL_INVALID_D3D10_RESOURCE_KHR);
return nullptr;
}
return D3DTexture<D3DTypesHelper::D3D10>::create3d(ctx, resource, flags, subresource, errcodeRet);
}
cl_int CL_API_CALL clEnqueueAcquireD3D10ObjectsKHR(cl_command_queue commandQueue, cl_uint numObjects, const cl_mem *memObjects,
cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) {
auto cmdQ = castToObject<CommandQueue>(commandQueue);
for (unsigned int object = 0; object < numObjects; object++) {
if (castToObject<MemObj>(memObjects[object])->acquireCount >= 1) {
return CL_D3D10_RESOURCE_ALREADY_ACQUIRED_KHR;
}
}
return cmdQ->enqueueAcquireSharedObjects(numObjects, memObjects, numEventsInWaitList,
eventWaitList, event, CL_COMMAND_ACQUIRE_D3D10_OBJECTS_KHR);
}
cl_int CL_API_CALL clEnqueueReleaseD3D10ObjectsKHR(cl_command_queue commandQueue, cl_uint numObjects, const cl_mem *memObjects,
cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) {
auto cmdQ = castToObject<CommandQueue>(commandQueue);
for (unsigned int object = 0; object < numObjects; object++) {
auto memObject = castToObject<MemObj>(memObjects[object]);
if (memObject->acquireCount == 0) {
return CL_D3D10_RESOURCE_NOT_ACQUIRED_KHR;
}
if (!static_cast<D3DSharing<D3DTypesHelper::D3D10> *>(memObject->peekSharingHandler())->isSharedResource()) {
cmdQ->finish(true);
break;
}
}
auto retVal = cmdQ->enqueueReleaseSharedObjects(numObjects, memObjects, numEventsInWaitList,
eventWaitList, event, CL_COMMAND_RELEASE_D3D10_OBJECTS_KHR);
if (!cmdQ->getContext().getInteropUserSyncEnabled()) {
cmdQ->finish(true);
}
return retVal;
}
cl_int CL_API_CALL clGetDeviceIDsFromD3D11KHR(cl_platform_id platform, cl_d3d11_device_source_khr d3dDeviceSource,
void *d3dObject, cl_d3d11_device_set_khr d3dDeviceSet,
cl_uint numEntries, cl_device_id *devices, cl_uint *numDevices) {
DXGI_ADAPTER_DESC dxgiDesc = {{0}};
IDXGIAdapter *dxgiAdapter = nullptr;
ID3D11Device *d3dDevice = nullptr;
D3DSharingFunctions<D3DTypesHelper::D3D11> sharingFcns((ID3D11Device *)nullptr);
cl_uint localNumDevices = 0;
cl_int retCode = CL_SUCCESS;
if (DebugManager.injectFcn) {
sharingFcns.getDxgiDescFcn = (D3DSharingFunctions<D3DTypesHelper::D3D11>::GetDxgiDescFcn)DebugManager.injectFcn;
}
cl_device_id device = castToObject<Platform>(platform)->getDevice(0);
switch (d3dDeviceSource) {
case CL_D3D11_DEVICE_KHR:
d3dDevice = (ID3D11Device *)d3dObject;
break;
case CL_D3D11_DXGI_ADAPTER_KHR:
dxgiAdapter = (IDXGIAdapter *)d3dObject;
break;
default:
GetInfoHelper::set(numDevices, localNumDevices);
return CL_INVALID_VALUE;
break;
}
sharingFcns.getDxgiDescFcn(&dxgiDesc, dxgiAdapter, d3dDevice);
if (dxgiDesc.VendorId != INTEL_VENDOR_ID) {
GetInfoHelper::set(numDevices, localNumDevices);
return CL_DEVICE_NOT_FOUND;
}
switch (d3dDeviceSet) {
case CL_PREFERRED_DEVICES_FOR_D3D11_KHR:
case CL_ALL_DEVICES_FOR_D3D11_KHR:
GetInfoHelper::set(devices, device);
localNumDevices = 1;
break;
default:
retCode = CL_INVALID_VALUE;
break;
}
GetInfoHelper::set(numDevices, localNumDevices);
return retCode;
}
cl_mem CL_API_CALL clCreateFromD3D11BufferKHR(cl_context context, cl_mem_flags flags, ID3D11Buffer *resource, cl_int *errcodeRet) {
ErrorCodeHelper err(errcodeRet, CL_SUCCESS);
auto ctx = castToObject<Context>(context);
if (ctx->getSharing<D3DSharingFunctions<D3DTypesHelper::D3D11>>()->isTracked(resource, 0)) {
err.set(CL_INVALID_D3D11_RESOURCE_KHR);
return nullptr;
}
return D3DBuffer<D3DTypesHelper::D3D11>::create(ctx, resource, flags, errcodeRet);
}
cl_mem CL_API_CALL clCreateFromD3D11Texture2DKHR(cl_context context, cl_mem_flags flags, ID3D11Texture2D *resource,
UINT subresource, cl_int *errcodeRet) {
ErrorCodeHelper err(errcodeRet, CL_SUCCESS);
auto ctx = castToObject<Context>(context);
if (ctx->getSharing<D3DSharingFunctions<D3DTypesHelper::D3D11>>()->isTracked(resource, subresource)) {
err.set(CL_INVALID_D3D11_RESOURCE_KHR);
return nullptr;
}
return D3DTexture<D3DTypesHelper::D3D11>::create2d(ctx, resource, flags, subresource, errcodeRet);
}
cl_mem CL_API_CALL clCreateFromD3D11Texture3DKHR(cl_context context, cl_mem_flags flags, ID3D11Texture3D *resource,
UINT subresource, cl_int *errcodeRet) {
ErrorCodeHelper err(errcodeRet, CL_SUCCESS);
auto ctx = castToObject<Context>(context);
if (ctx->getSharing<D3DSharingFunctions<D3DTypesHelper::D3D11>>()->isTracked(resource, subresource)) {
err.set(CL_INVALID_D3D11_RESOURCE_KHR);
return nullptr;
}
return D3DTexture<D3DTypesHelper::D3D11>::create3d(ctx, resource, flags, subresource, errcodeRet);
}
cl_int CL_API_CALL clEnqueueAcquireD3D11ObjectsKHR(cl_command_queue commandQueue, cl_uint numObjects, const cl_mem *memObjects,
cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) {
auto cmdQ = castToObject<CommandQueue>(commandQueue);
for (unsigned int object = 0; object < numObjects; object++) {
if (castToObject<MemObj>(memObjects[object])->acquireCount >= 1) {
return CL_D3D11_RESOURCE_ALREADY_ACQUIRED_KHR;
}
}
return cmdQ->enqueueAcquireSharedObjects(numObjects, memObjects, numEventsInWaitList,
eventWaitList, event, CL_COMMAND_ACQUIRE_D3D11_OBJECTS_KHR);
}
cl_int CL_API_CALL clEnqueueReleaseD3D11ObjectsKHR(cl_command_queue commandQueue, cl_uint numObjects, const cl_mem *memObjects,
cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) {
auto cmdQ = castToObject<CommandQueue>(commandQueue);
for (unsigned int object = 0; object < numObjects; object++) {
auto memObject = castToObject<MemObj>(memObjects[object]);
if (memObject->acquireCount == 0) {
return CL_D3D11_RESOURCE_NOT_ACQUIRED_KHR;
}
if (!static_cast<D3DSharing<D3DTypesHelper::D3D11> *>(memObject->peekSharingHandler())->isSharedResource()) {
cmdQ->finish(true);
break;
}
}
auto retVal = cmdQ->enqueueReleaseSharedObjects(numObjects, memObjects, numEventsInWaitList,
eventWaitList, event, CL_COMMAND_RELEASE_D3D11_OBJECTS_KHR);
if (!cmdQ->getContext().getInteropUserSyncEnabled()) {
cmdQ->finish(true);
}
return retVal;
}

View File

@@ -0,0 +1,227 @@
/*
* Copyright (c) 2017, Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
#include "runtime/context/context.h"
#include "runtime/context/context.inl"
#include "runtime/os_interface/windows/d3d_sharing_functions.h"
#include "runtime/sharings/sharing_factory.h"
using namespace OCLRT;
template class D3DSharingFunctions<D3DTypesHelper::D3D10>;
template class D3DSharingFunctions<D3DTypesHelper::D3D11>;
const uint32_t D3DSharingFunctions<D3DTypesHelper::D3D10>::sharingId = SharingType::D3D10_SHARING;
const uint32_t D3DSharingFunctions<D3DTypesHelper::D3D11>::sharingId = SharingType::D3D11_SHARING;
template <typename D3D>
void D3DSharingFunctions<D3D>::createQuery(D3DQuery **query) {
D3DQueryDesc desc = {};
d3dDevice->CreateQuery(&desc, query);
}
template <typename D3D>
void D3DSharingFunctions<D3D>::updateDevice(D3DResource *resource) {
resource->GetDevice(&d3dDevice);
}
template <typename D3D>
void D3DSharingFunctions<D3D>::fillCreateBufferDesc(D3DBufferDesc &desc, unsigned int width) {
desc.ByteWidth = width;
desc.MiscFlags = D3DResourceFlags::MISC_SHARED;
}
template <typename D3D>
void D3DSharingFunctions<D3D>::fillCreateTexture2dDesc(D3DTexture2dDesc &desc, D3DTexture2dDesc *srcDesc, cl_uint subresource) {
desc.Width = srcDesc->Width;
desc.Height = srcDesc->Height;
desc.MipLevels = 1;
desc.ArraySize = 1;
desc.Format = srcDesc->Format;
desc.MiscFlags = D3DResourceFlags::MISC_SHARED;
desc.SampleDesc.Count = srcDesc->SampleDesc.Count;
desc.SampleDesc.Quality = srcDesc->SampleDesc.Quality;
for (uint32_t i = 0u; i < (subresource % srcDesc->MipLevels); i++) {
desc.Width /= 2;
desc.Height /= 2;
}
}
template <typename D3D>
void D3DSharingFunctions<D3D>::fillCreateTexture3dDesc(D3DTexture3dDesc &desc, D3DTexture3dDesc *srcDesc, cl_uint subresource) {
desc.Width = srcDesc->Width;
desc.Height = srcDesc->Height;
desc.Depth = srcDesc->Depth;
desc.MipLevels = 1;
desc.Format = srcDesc->Format;
desc.MiscFlags = D3DResourceFlags::MISC_SHARED;
for (uint32_t i = 0u; i < (subresource % srcDesc->MipLevels); i++) {
desc.Width /= 2;
desc.Height /= 2;
desc.Depth /= 2;
}
}
template <typename D3D>
void D3DSharingFunctions<D3D>::createBuffer(D3DBufferObj **buffer, unsigned int width) {
D3DBufferDesc stagingDesc = {};
fillCreateBufferDesc(stagingDesc, width);
d3dDevice->CreateBuffer(&stagingDesc, nullptr, buffer);
}
template <typename D3D>
void D3DSharingFunctions<D3D>::createTexture2d(D3DTexture2d **texture, D3DTexture2dDesc *desc, cl_uint subresource) {
D3DTexture2dDesc stagingDesc = {};
fillCreateTexture2dDesc(stagingDesc, desc, subresource);
d3dDevice->CreateTexture2D(&stagingDesc, nullptr, texture);
}
template <typename D3D>
void D3DSharingFunctions<D3D>::createTexture3d(D3DTexture3d **texture, D3DTexture3dDesc *desc, cl_uint subresource) {
D3DTexture3dDesc stagingDesc = {};
fillCreateTexture3dDesc(stagingDesc, desc, subresource);
d3dDevice->CreateTexture3D(&stagingDesc, nullptr, texture);
}
template <typename D3D>
void D3DSharingFunctions<D3D>::getBufferDesc(D3DBufferDesc *bufferDesc, D3DBufferObj *buffer) {
buffer->GetDesc(bufferDesc);
}
template <typename D3D>
void D3DSharingFunctions<D3D>::getTexture2dDesc(D3DTexture2dDesc *textureDesc, D3DTexture2d *texture) {
texture->GetDesc(textureDesc);
}
template <typename D3D>
void D3DSharingFunctions<D3D>::getTexture3dDesc(D3DTexture3dDesc *textureDesc, D3DTexture3d *texture) {
texture->GetDesc(textureDesc);
}
template <typename D3D>
void D3DSharingFunctions<D3D>::getSharedHandle(D3DResource *resource, void **handle) {
IDXGIResource *dxgiResource = nullptr;
resource->QueryInterface(__uuidof(IDXGIResource), (void **)&dxgiResource);
dxgiResource->GetSharedHandle(handle);
dxgiResource->Release();
}
template <typename D3D>
void D3DSharingFunctions<D3D>::getSharedNTHandle(D3DResource *resource, void **handle) {
IDXGIResource *dxgiResource = nullptr;
IDXGIResource1 *dxgiResource1 = nullptr;
resource->QueryInterface(__uuidof(IDXGIResource), (void **)&dxgiResource);
dxgiResource->QueryInterface(__uuidof(IDXGIResource1), (void **)&dxgiResource1);
dxgiResource1->CreateSharedHandle(nullptr, DXGI_SHARED_RESOURCE_READ | DXGI_SHARED_RESOURCE_WRITE, nullptr, handle);
dxgiResource1->Release();
dxgiResource->Release();
}
template <typename D3D>
void D3DSharingFunctions<D3D>::addRef(D3DResource *resource) {
resource->AddRef();
}
template <typename D3D>
void D3DSharingFunctions<D3D>::release(IUnknown *resource) {
resource->Release();
}
template <typename D3D>
void D3DSharingFunctions<D3D>::lockRect(D3DTexture2d *resource, D3DLOCKED_RECT *lockedRect, uint32_t flags) {
}
template <typename D3D>
void D3DSharingFunctions<D3D>::unlockRect(D3DTexture2d *resource) {
}
template <typename D3D>
void D3DSharingFunctions<D3D>::updateSurface(D3DTexture2d *src, D3DTexture2d *dst) {
}
template <typename D3D>
void D3DSharingFunctions<D3D>::getRenderTargetData(D3DTexture2d *renderTarget, D3DTexture2d *dstSurface) {
}
template <>
void D3DSharingFunctions<D3DTypesHelper::D3D10>::copySubresourceRegion(D3DResource *dst, cl_uint dstSubresource,
D3DResource *src, cl_uint srcSubresource) {
d3dDevice->CopySubresourceRegion(dst, dstSubresource, 0, 0, 0, src, srcSubresource, nullptr);
}
template <>
void D3DSharingFunctions<D3DTypesHelper::D3D11>::copySubresourceRegion(D3DResource *dst, cl_uint dstSubresource,
D3DResource *src, cl_uint srcSubresource) {
d3d11DeviceContext->CopySubresourceRegion(dst, dstSubresource, 0, 0, 0, src, srcSubresource, nullptr);
}
template <>
void D3DSharingFunctions<D3DTypesHelper::D3D10>::flushAndWait(D3DQuery *query) {
query->End();
d3dDevice->Flush();
while (query->GetData(nullptr, 0, 0) != S_OK)
;
}
template <>
void D3DSharingFunctions<D3DTypesHelper::D3D11>::flushAndWait(D3DQuery *query) {
d3d11DeviceContext->End(query);
d3d11DeviceContext->Flush();
while (d3d11DeviceContext->GetData(query, nullptr, 0, 0) != S_OK)
;
}
template <>
void D3DSharingFunctions<D3DTypesHelper::D3D10>::getDeviceContext(D3DQuery *query) {
}
template <>
void D3DSharingFunctions<D3DTypesHelper::D3D11>::getDeviceContext(D3DQuery *query) {
d3dDevice->GetImmediateContext(&d3d11DeviceContext);
}
template <>
void D3DSharingFunctions<D3DTypesHelper::D3D10>::releaseDeviceContext(D3DQuery *query) {
}
template <>
void D3DSharingFunctions<D3DTypesHelper::D3D11>::releaseDeviceContext(D3DQuery *query) {
d3d11DeviceContext->Release();
d3d11DeviceContext = nullptr;
}
template <typename D3D>
void D3DSharingFunctions<D3D>::getDxgiDesc(DXGI_ADAPTER_DESC *dxgiDesc, IDXGIAdapter *adapter, D3DDevice *device) {
if (!adapter) {
IDXGIDevice *dxgiDevice = nullptr;
device->QueryInterface(__uuidof(IDXGIDevice), (void **)&dxgiDevice);
dxgiDevice->GetAdapter(&adapter);
dxgiDevice->Release();
} else {
adapter->AddRef();
}
adapter->GetDesc(dxgiDesc);
adapter->Release();
}
template D3DSharingFunctions<D3DTypesHelper::D3D10> *Context::getSharing<D3DSharingFunctions<D3DTypesHelper::D3D10>>();
template D3DSharingFunctions<D3DTypesHelper::D3D11> *Context::getSharing<D3DSharingFunctions<D3DTypesHelper::D3D11>>();

View File

@@ -0,0 +1,156 @@
/*
* Copyright (c) 2017, Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
#include "runtime/context/context.h"
#include "runtime/context/context.inl"
#include "runtime/os_interface/windows/d3d_sharing_functions.h"
#include "runtime/sharings/sharing_factory.h"
using namespace OCLRT;
template class D3DSharingFunctions<D3DTypesHelper::D3D9>;
const uint32_t D3DSharingFunctions<D3DTypesHelper::D3D9>::sharingId = SharingType::D3D9_SHARING;
template <>
void D3DSharingFunctions<D3DTypesHelper::D3D9>::createQuery(D3DQuery **query) {
D3DQUERYTYPE queryType = D3DQUERYTYPE_EVENT;
d3dDevice->CreateQuery(queryType, query);
}
template <>
void D3DSharingFunctions<D3DTypesHelper::D3D9>::updateDevice(D3DResource *resource) {
resource->GetDevice(&d3dDevice);
}
template <>
void D3DSharingFunctions<D3DTypesHelper::D3D9>::fillCreateBufferDesc(D3DBufferDesc &desc, unsigned int width) {
}
template <>
void D3DSharingFunctions<D3DTypesHelper::D3D9>::fillCreateTexture2dDesc(D3DTexture2dDesc &desc, D3DTexture2dDesc *srcDesc, cl_uint subresource) {
}
template <>
void D3DSharingFunctions<D3DTypesHelper::D3D9>::fillCreateTexture3dDesc(D3DTexture3dDesc &desc, D3DTexture3dDesc *srcDesc, cl_uint subresource) {
}
template <>
void D3DSharingFunctions<D3DTypesHelper::D3D9>::createBuffer(D3DBufferObj **buffer, unsigned int width) {
}
template <>
void D3DSharingFunctions<D3DTypesHelper::D3D9>::createTexture2d(D3DTexture2d **texture, D3DTexture2dDesc *desc, cl_uint subresource) {
d3dDevice->CreateOffscreenPlainSurface(desc->Width, desc->Height, desc->Format, D3DPOOL_SYSTEMMEM, texture, nullptr);
}
template <>
void D3DSharingFunctions<D3DTypesHelper::D3D9>::createTexture3d(D3DTexture3d **texture, D3DTexture3dDesc *desc, cl_uint subresource) {
}
template <>
void D3DSharingFunctions<D3DTypesHelper::D3D9>::getBufferDesc(D3DBufferDesc *bufferDesc, D3DBufferObj *buffer) {
}
template <>
void D3DSharingFunctions<D3DTypesHelper::D3D9>::getTexture2dDesc(D3DTexture2dDesc *textureDesc, D3DTexture2d *texture) {
texture->GetDesc(textureDesc);
}
template <>
void D3DSharingFunctions<D3DTypesHelper::D3D9>::getTexture3dDesc(D3DTexture3dDesc *textureDesc, D3DTexture3d *texture) {
}
template <>
void D3DSharingFunctions<D3DTypesHelper::D3D9>::getSharedHandle(D3DResource *resource, void **handle) {
}
template <>
void D3DSharingFunctions<D3DTypesHelper::D3D9>::getSharedNTHandle(D3DResource *resource, void **handle) {
}
template <>
void D3DSharingFunctions<D3DTypesHelper::D3D9>::addRef(D3DResource *resource) {
resource->AddRef();
}
template <>
void D3DSharingFunctions<D3DTypesHelper::D3D9>::release(IUnknown *resource) {
if (resource) {
resource->Release();
}
}
template <>
void D3DSharingFunctions<D3DTypesHelper::D3D9>::lockRect(D3DTexture2d *d3dresource, D3DLOCKED_RECT *lockedRect, uint32_t flags) {
d3dresource->LockRect(lockedRect, nullptr, flags);
}
template <>
void D3DSharingFunctions<D3DTypesHelper::D3D9>::unlockRect(D3DTexture2d *d3dresource) {
d3dresource->UnlockRect();
}
template <>
void D3DSharingFunctions<D3DTypesHelper::D3D9>::getRenderTargetData(D3DTexture2d *renderTarget, D3DTexture2d *dstSurface) {
d3dDevice->GetRenderTargetData(renderTarget, dstSurface);
}
template <>
void D3DSharingFunctions<D3DTypesHelper::D3D9>::copySubresourceRegion(D3DResource *dst, cl_uint dstSubresource,
D3DResource *src, cl_uint srcSubresource) {
}
template <>
void D3DSharingFunctions<D3DTypesHelper::D3D9>::updateSurface(D3DTexture2d *src, D3DTexture2d *dst) {
d3dDevice->UpdateSurface(src, nullptr, dst, nullptr);
}
template <>
void D3DSharingFunctions<D3DTypesHelper::D3D9>::flushAndWait(D3DQuery *query) {
query->Issue(D3DISSUE_END);
while (query->GetData(nullptr, 0, D3DGETDATA_FLUSH) != S_OK)
;
}
template <>
void D3DSharingFunctions<D3DTypesHelper::D3D9>::getDeviceContext(D3DQuery *query) {
}
template <>
void D3DSharingFunctions<D3DTypesHelper::D3D9>::releaseDeviceContext(D3DQuery *query) {
}
template <>
void D3DSharingFunctions<D3DTypesHelper::D3D9>::getDxgiDesc(DXGI_ADAPTER_DESC *dxgiDesc, IDXGIAdapter *adapter, D3DDevice *device) {
if (!adapter) {
IDXGIDevice *dxgiDevice = nullptr;
device->QueryInterface(__uuidof(IDXGIDevice), (void **)&dxgiDevice);
dxgiDevice->GetAdapter(&adapter);
dxgiDevice->Release();
} else {
adapter->AddRef();
}
adapter->GetDesc(dxgiDesc);
adapter->Release();
}
template D3DSharingFunctions<D3DTypesHelper::D3D9> *Context::getSharing<D3DSharingFunctions<D3DTypesHelper::D3D9>>();

View File

@@ -0,0 +1,161 @@
/*
* Copyright (c) 2017, Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
#pragma once
#include "runtime/api/dispatch.h"
#include "runtime/helpers/debug_helpers.h"
#include "runtime/sharings/sharing.h"
#include "DXGI1_2.h"
#include <vector>
namespace OCLRT {
namespace D3DTypesHelper {
struct D3D9 {
typedef IDirect3DDevice9 D3DDevice;
typedef IDirect3DQuery9 D3DQuery;
typedef D3DQUERYTYPE D3DQueryDesc;
typedef IDirect3DResource9 D3DResource;
typedef struct {
} D3DBufferDesc;
typedef void *D3DBufferObj;
typedef D3DSURFACE_DESC D3DTexture2dDesc;
typedef struct {
} D3DTexture3dDesc;
typedef IDirect3DSurface9 D3DTexture2d;
typedef struct {
} D3DTexture3d;
};
struct D3D10 {
typedef ID3D10Device D3DDevice;
typedef ID3D10Query D3DQuery;
typedef D3D10_QUERY_DESC D3DQueryDesc;
typedef ID3D10Resource D3DResource;
typedef D3D10_BUFFER_DESC D3DBufferDesc;
typedef ID3D10Buffer D3DBufferObj;
typedef D3D10_TEXTURE2D_DESC D3DTexture2dDesc;
typedef D3D10_TEXTURE3D_DESC D3DTexture3dDesc;
typedef ID3D10Texture2D D3DTexture2d;
typedef ID3D10Texture3D D3DTexture3d;
};
struct D3D11 {
typedef ID3D11Device D3DDevice;
typedef ID3D11Query D3DQuery;
typedef D3D11_QUERY_DESC D3DQueryDesc;
typedef ID3D11Resource D3DResource;
typedef D3D11_BUFFER_DESC D3DBufferDesc;
typedef ID3D11Buffer D3DBufferObj;
typedef D3D11_TEXTURE2D_DESC D3DTexture2dDesc;
typedef D3D11_TEXTURE3D_DESC D3DTexture3dDesc;
typedef ID3D11Texture2D D3DTexture2d;
typedef ID3D11Texture3D D3DTexture3d;
};
} // namespace D3DTypesHelper
enum D3DResourceFlags {
USAGE_RENDERTARGET = 1,
MISC_SHARED = 2,
MISC_SHARED_KEYEDMUTEX = 256,
MISC_SHARED_NTHANDLE = 2048
};
template <typename D3D>
class D3DSharingFunctions : public SharingFunctions {
typedef typename D3D::D3DDevice D3DDevice;
typedef typename D3D::D3DQuery D3DQuery;
typedef typename D3D::D3DQueryDesc D3DQueryDesc;
typedef typename D3D::D3DResource D3DResource;
typedef typename D3D::D3DBufferDesc D3DBufferDesc;
typedef typename D3D::D3DBufferObj D3DBufferObj;
typedef typename D3D::D3DTexture2dDesc D3DTexture2dDesc;
typedef typename D3D::D3DTexture3dDesc D3DTexture3dDesc;
typedef typename D3D::D3DTexture2d D3DTexture2d;
typedef typename D3D::D3DTexture3d D3DTexture3d;
public:
typedef void (*GetDxgiDescFcn)(DXGI_ADAPTER_DESC *dxgiDesc, IDXGIAdapter *adapter, D3DDevice *device);
D3DSharingFunctions(D3DDevice *d3dDevice) : d3dDevice(d3dDevice) {
trackedResources.reserve(128);
getDxgiDescFcn = &this->getDxgiDesc;
};
uint32_t getId() const override {
return D3DSharingFunctions<D3D>::sharingId;
}
D3DSharingFunctions() = delete;
virtual ~D3DSharingFunctions(){};
static const uint32_t sharingId;
MOCKABLE_VIRTUAL void createQuery(D3DQuery **query);
MOCKABLE_VIRTUAL void createBuffer(D3DBufferObj **buffer, unsigned int width);
MOCKABLE_VIRTUAL void createTexture2d(D3DTexture2d **texture, D3DTexture2dDesc *desc, cl_uint subresource);
MOCKABLE_VIRTUAL void createTexture3d(D3DTexture3d **texture, D3DTexture3dDesc *desc, cl_uint subresource);
MOCKABLE_VIRTUAL void getBufferDesc(D3DBufferDesc *bufferDesc, D3DBufferObj *buffer);
MOCKABLE_VIRTUAL void getTexture2dDesc(D3DTexture2dDesc *textureDesc, D3DTexture2d *texture);
MOCKABLE_VIRTUAL void getTexture3dDesc(D3DTexture3dDesc *textureDesc, D3DTexture3d *texture);
MOCKABLE_VIRTUAL void getSharedHandle(D3DResource *resource, void **handle);
MOCKABLE_VIRTUAL void getSharedNTHandle(D3DResource *resource, void **handle);
MOCKABLE_VIRTUAL void addRef(D3DResource *resource);
MOCKABLE_VIRTUAL void release(IUnknown *resource);
MOCKABLE_VIRTUAL void copySubresourceRegion(D3DResource *dst, cl_uint dstSubresource,
D3DResource *src, cl_uint srcSubresource);
MOCKABLE_VIRTUAL void flushAndWait(D3DQuery *query);
MOCKABLE_VIRTUAL void getDeviceContext(D3DQuery *query);
MOCKABLE_VIRTUAL void releaseDeviceContext(D3DQuery *query);
MOCKABLE_VIRTUAL void lockRect(D3DTexture2d *d3dResource, D3DLOCKED_RECT *lockedRect, uint32_t flags);
MOCKABLE_VIRTUAL void unlockRect(D3DTexture2d *d3dResource);
MOCKABLE_VIRTUAL void getRenderTargetData(D3DTexture2d *renderTarget, D3DTexture2d *dstSurface);
MOCKABLE_VIRTUAL void updateSurface(D3DTexture2d *src, D3DTexture2d *dst);
MOCKABLE_VIRTUAL void updateDevice(D3DResource *resource);
GetDxgiDescFcn getDxgiDescFcn = nullptr;
bool isTracked(D3DResource *resource, cl_uint subresource) {
return std::find(trackedResources.begin(), trackedResources.end(), std::make_pair(resource, subresource)) != trackedResources.end();
}
void track(D3DResource *resource, cl_uint subresource) {
trackedResources.push_back(std::make_pair(resource, subresource));
}
void untrack(D3DResource *resource, cl_uint subresource) {
auto element = std::find(trackedResources.begin(), trackedResources.end(), std::make_pair(resource, subresource));
DEBUG_BREAK_IF(element == trackedResources.end());
trackedResources.erase(element);
}
void setDevice(D3DDevice *d3dDevice) { this->d3dDevice = d3dDevice; }
D3DDevice *getDevice() { return d3dDevice; }
void fillCreateBufferDesc(D3DBufferDesc &desc, unsigned int width);
void fillCreateTexture2dDesc(D3DTexture2dDesc &desc, D3DTexture2dDesc *srcDesc, cl_uint subresource);
void fillCreateTexture3dDesc(D3DTexture3dDesc &desc, D3DTexture3dDesc *srcDesc, cl_uint subresource);
protected:
D3DDevice *d3dDevice = nullptr;
ID3D11DeviceContext *d3d11DeviceContext = nullptr;
std::vector<std::pair<D3DResource *, cl_uint>> trackedResources;
static void getDxgiDesc(DXGI_ADAPTER_DESC *dxgiDesc, IDXGIAdapter *adapter, D3DDevice *device);
};
} // namespace OCLRT

View File

@@ -0,0 +1,122 @@
/*
* Copyright (c) 2017, Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
#include "runtime/os_interface/debug_settings_manager.h"
#include "runtime/utilities/debug_settings_reader.h"
#include <stdint.h>
#include "runtime/os_interface/windows/windows_wrapper.h"
#include "runtime/os_interface/windows/registry_reader.h"
namespace OCLRT {
SettingsReader *SettingsReader::createOsReader() {
return new RegistryReader;
}
bool RegistryReader::getSetting(const char *settingName, bool defaultValue) {
return getSetting(settingName, static_cast<int32_t>(defaultValue)) ? true : false;
}
int32_t RegistryReader::getSetting(const char *settingName, int32_t defaultValue) {
HKEY Key;
DWORD value = defaultValue;
DWORD success = ERROR_SUCCESS;
success = RegOpenKeyExA(HKEY_LOCAL_MACHINE,
igdrclRegKey.c_str(),
0,
KEY_READ,
&Key);
if (ERROR_SUCCESS == success) {
DWORD regType;
DWORD size = sizeof(ULONG);
success = RegQueryValueExA(Key,
settingName,
NULL,
&regType,
(LPBYTE)&value,
&size);
RegCloseKey(Key);
}
return value;
}
std::string RegistryReader::getSetting(const char *settingName, const std::string &value) {
HKEY Key;
DWORD success = ERROR_SUCCESS;
bool retFlag = false;
std::string keyValue = value;
success = RegOpenKeyExA(HKEY_LOCAL_MACHINE,
igdrclRegKey.c_str(),
0,
KEY_READ,
&Key);
if (ERROR_SUCCESS == success) {
DWORD regType = REG_NONE;
DWORD regSize = 0;
success = RegQueryValueExA(Key,
settingName,
NULL,
&regType,
NULL,
&regSize);
if (success == ERROR_SUCCESS && regType == REG_SZ) {
char *regData = new char[regSize];
success = RegQueryValueExA(Key,
settingName,
NULL,
&regType,
(LPBYTE)regData,
&regSize);
keyValue.assign(regData);
delete[] regData;
retFlag = true;
} else if (success == ERROR_SUCCESS && regType == REG_BINARY) {
std::unique_ptr<wchar_t[]> regData(new wchar_t[regSize]);
success = RegQueryValueExA(Key,
settingName,
NULL,
&regType,
(LPBYTE)regData.get(),
&regSize);
size_t charsConverted = 0;
std::unique_ptr<char[]> convertedData(new char[regSize]);
wcstombs_s(&charsConverted, convertedData.get(), regSize, regData.get(), regSize);
keyValue.assign(convertedData.get());
retFlag = true;
}
RegCloseKey(Key);
}
return keyValue;
}
}; // namespace OCLRT

View File

@@ -0,0 +1,64 @@
/*
* Copyright (c) 2017, Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
#include "runtime/os_interface/windows/wddm.h"
#include "runtime/helpers/aligned_memory.h"
#include "runtime/os_interface/windows/deferrable_deletion_win.h"
namespace OCLRT {
template <typename... Args>
DeferrableDeletion *DeferrableDeletion::create(Args... args) {
return new DeferrableDeletionImpl(std::forward<Args>(args)...);
}
template DeferrableDeletion *DeferrableDeletion::create(Wddm *wddm, D3DKMT_HANDLE *handles, uint32_t allocationCount, uint64_t lastFenceValue,
D3DKMT_HANDLE resourceHandle, void *cpuPtr, void *gpuPtr);
DeferrableDeletionImpl::DeferrableDeletionImpl(Wddm *wddm, D3DKMT_HANDLE *handles, uint32_t allocationCount, uint64_t lastFenceValue,
D3DKMT_HANDLE resourceHandle, void *cpuPtr, void *gpuPtr) {
this->wddm = wddm;
if (handles) {
this->handles = new D3DKMT_HANDLE[allocationCount];
for (uint32_t i = 0; i < allocationCount; i++) {
this->handles[i] = handles[i];
}
}
this->allocationCount = allocationCount;
this->lastFenceValue = lastFenceValue;
this->resourceHandle = resourceHandle;
this->cpuPtr = cpuPtr;
this->gpuPtr = gpuPtr;
}
void DeferrableDeletionImpl::apply() {
bool destroyStatus = wddm->destroyAllocations(handles, allocationCount, lastFenceValue, resourceHandle);
DEBUG_BREAK_IF(!destroyStatus);
::alignedFree(cpuPtr);
cpuPtr = nullptr;
wddm->releaseGpuPtr(gpuPtr);
gpuPtr = nullptr;
}
DeferrableDeletionImpl::~DeferrableDeletionImpl() {
if (handles) {
delete[] handles;
}
}
} // namespace OCLRT

View File

@@ -0,0 +1,46 @@
/*
* Copyright (c) 2017, Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
#pragma once
#include "runtime/memory_manager/deferrable_deletion.h"
namespace OCLRT {
class Wddm;
class DeferrableDeletionImpl : public DeferrableDeletion {
public:
DeferrableDeletionImpl(Wddm *wddm, D3DKMT_HANDLE *handles, uint32_t allocationCount, uint64_t lastFenceValue,
D3DKMT_HANDLE resourceHandle, void *cpuPtr, void *gpuPtr);
void apply() override;
~DeferrableDeletionImpl();
protected:
Wddm *wddm;
D3DKMT_HANDLE *handles = nullptr;
uint32_t allocationCount;
uint64_t lastFenceValue;
D3DKMT_HANDLE resourceHandle;
void *cpuPtr;
void *gpuPtr;
};
} // namespace OCLRT

View File

@@ -0,0 +1,38 @@
/*
* Copyright (c) 2017, Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
// Need to suppress warining 4005 caused by hw_cmds.h and wddm.h order.
// Current order must be preserved due to two versions of igfxfmid.h
#pragma warning(push)
#pragma warning(disable : 4005)
#include "hw_cmds.h"
#include "runtime/command_stream/device_command_stream.h"
#include "runtime/os_interface/windows/wddm_device_command_stream.h"
#pragma warning(pop)
namespace OCLRT {
template <typename GfxFamily>
CommandStreamReceiver *DeviceCommandStreamReceiver<GfxFamily>::create(const HardwareInfo &hwInfo) {
return new WddmCommandStreamReceiver<GfxFamily>(hwInfo, nullptr);
}
}

View File

@@ -0,0 +1,133 @@
/*
* Copyright (c) 2017, Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
#ifdef _WIN32
#include "hw_info.h"
#include "runtime/command_stream/preemption.h"
#include "runtime/helpers/debug_helpers.h"
#include "runtime/helpers/hw_helper.h"
#include "runtime/os_interface/device_factory.h"
#include "runtime/os_interface/windows/wddm.h"
#include "runtime/device/device.h"
#include "runtime/os_interface/debug_settings_manager.h"
namespace OCLRT {
extern const HardwareInfo *hardwareInfoTable[IGFX_MAX_PRODUCT];
size_t DeviceFactory::numDevices = 0;
HardwareInfo *DeviceFactory::hwInfos = nullptr;
void *DeviceFactory::internal = nullptr;
bool DeviceFactory::getDevices(HardwareInfo **pHWInfos, size_t &numDevices) {
bool success = false;
HardwareInfo *tempHwInfos = new HardwareInfo[1];
ADAPTER_INFO *adapterInfo = new ADAPTER_INFO[1];
unsigned int devNum = 0;
numDevices = 0;
success = Wddm::enumAdapters(devNum, adapterInfo);
if (success) {
auto featureTable = new FeatureTable();
auto waTable = new WorkaroundTable();
tempHwInfos[devNum].pPlatform = new PLATFORM(adapterInfo->GfxPlatform);
tempHwInfos[devNum].pSkuTable = featureTable;
tempHwInfos[devNum].pWaTable = waTable;
tempHwInfos[devNum].pSysInfo = new GT_SYSTEM_INFO(adapterInfo->SystemInfo);
Wddm::setupFeatureTableFromAdapterInfo(featureTable, adapterInfo);
Wddm::setupWorkaroundTableFromAdapterInfo(waTable, adapterInfo);
auto productFamily = tempHwInfos[devNum].pPlatform->eProductFamily;
DEBUG_BREAK_IF(hardwareInfoTable[productFamily] == nullptr);
tempHwInfos[devNum].capabilityTable = hardwareInfoTable[productFamily]->capabilityTable;
// Overwrite dynamic parameters
tempHwInfos[devNum].capabilityTable.maxRenderFrequency = adapterInfo->MaxRenderFreq;
tempHwInfos[devNum].capabilityTable.ftrSvm = adapterInfo->SkuTable.FtrSVM;
HwHelper &hwHelper = HwHelper::get(adapterInfo->GfxPlatform.eRenderCoreFamily);
hwHelper.setCapabilityCoherencyFlag(&tempHwInfos[devNum], tempHwInfos[devNum].capabilityTable.ftrSupportsCoherency);
hwHelper.setupPreemptionRegisters(&tempHwInfos[devNum], !!adapterInfo->WaTable.WaEnablePreemptionGranularityControlByUMD);
// Instrumentation
tempHwInfos[devNum].capabilityTable.instrumentationEnabled = false; // Intentionally disable, after enabling use adapterInfo->Caps.InstrumentationIsEnabled
PreemptionHelper::adjustDefaultPreemptionMode(tempHwInfos[devNum].capabilityTable,
static_cast<bool>(adapterInfo->SkuTable.FtrGpGpuMidThreadLevelPreempt),
static_cast<bool>(adapterInfo->SkuTable.FtrGpGpuThreadGroupLevelPreempt),
static_cast<bool>(adapterInfo->SkuTable.FtrGpGpuMidBatchPreempt));
tempHwInfos[devNum].capabilityTable.enableKmdNotify = DebugManager.flags.OverrideEnableKmdNotify.get() >= 0
? !!DebugManager.flags.OverrideEnableKmdNotify.get()
: tempHwInfos[devNum].capabilityTable.enableKmdNotify;
tempHwInfos[devNum].capabilityTable.delayKmdNotifyMs = DebugManager.flags.OverrideKmdNotifyDelayMs.get() >= 0
? static_cast<int64_t>(DebugManager.flags.OverrideKmdNotifyDelayMs.get())
: tempHwInfos[devNum].capabilityTable.delayKmdNotifyMs;
numDevices = 1;
*pHWInfos = tempHwInfos;
internal = static_cast<void *>(adapterInfo);
DeviceFactory::numDevices = 1;
DeviceFactory::hwInfos = tempHwInfos;
} else {
delete[] tempHwInfos;
}
return success;
}
void DeviceFactory::releaseDevices() {
if (DeviceFactory::numDevices > 0) {
for (unsigned int i = 0; i < DeviceFactory::numDevices; ++i) {
delete hwInfos[i].pPlatform;
delete hwInfos[i].pSkuTable;
delete hwInfos[i].pWaTable;
delete hwInfos[i].pSysInfo;
}
delete[] hwInfos;
ADAPTER_INFO *adapterInfo = static_cast<ADAPTER_INFO *>(internal);
delete[] adapterInfo;
}
DeviceFactory::hwInfos = nullptr;
DeviceFactory::numDevices = 0;
}
void Device::appendOSExtensions(std::string &deviceExtensions) {
deviceExtensions += "cl_intel_simultaneous_sharing ";
simultaneousInterops = {CL_GL_CONTEXT_KHR,
CL_WGL_HDC_KHR,
CL_CONTEXT_ADAPTER_D3D9_KHR,
CL_CONTEXT_D3D9_DEVICE_INTEL,
CL_CONTEXT_ADAPTER_D3D9EX_KHR,
CL_CONTEXT_D3D9EX_DEVICE_INTEL,
CL_CONTEXT_ADAPTER_DXVA_KHR,
CL_CONTEXT_DXVA_DEVICE_INTEL,
CL_CONTEXT_D3D10_DEVICE_KHR,
CL_CONTEXT_D3D11_DEVICE_KHR,
0};
}
} // namespace OCLRT
#endif

View File

@@ -0,0 +1,69 @@
/*
* Copyright (c) 2017, Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
#include "runtime/os_interface/windows/wddm.h"
#include "runtime/device/driver_info.h"
#include "runtime/os_interface/windows/driver_info.h"
#include "runtime/os_interface/windows/os_interface.h"
#include "runtime/os_interface/windows/registry_reader.h"
namespace OCLRT {
DriverInfo *DriverInfo::create(OSInterface *osInterface) {
if (osInterface) {
auto wddm = osInterface->get()->getWddm();
DEBUG_BREAK_IF(wddm == nullptr);
DEBUG_BREAK_IF(wddm->getAdapterInfo() == nullptr);
std::string path(wddm->getAdapterInfo()->DeviceRegistryPath);
auto result = new DriverInfoWindows();
path = result->trimRegistryKey(path);
result->setRegistryReader(new RegistryReader(path));
return result;
}
return nullptr;
};
void DriverInfoWindows::setRegistryReader(SettingsReader *reader) {
registryReader.reset(reader);
}
std::string DriverInfoWindows::trimRegistryKey(std::string path) {
std::string prefix("\\REGISTRY\\MACHINE\\");
auto pos = prefix.find(prefix);
if (pos != std::string::npos)
path.erase(pos, prefix.length());
return path;
}
std::string DriverInfoWindows::getDeviceName(std::string defaultName) {
return registryReader.get()->getSetting("HardwareInformation.AdapterString", defaultName);
}
std::string DriverInfoWindows::getVersion(std::string defaultVersion) {
return registryReader.get()->getSetting("DriverVersion", defaultVersion);
};
} // namespace OCLRT

View File

@@ -0,0 +1,45 @@
/*
* Copyright (c) 2017, Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
#pragma once
#include "runtime/device/driver_info.h"
#include <memory>
#include <string>
namespace OCLRT {
class SettingsReader;
class DriverInfoWindows : public DriverInfo {
public:
std::string getDeviceName(std::string defaultName);
std::string getVersion(std::string defaultVersion);
void setRegistryReader(SettingsReader *reader);
std::string trimRegistryKey(std::string key);
protected:
std::unique_ptr<SettingsReader> registryReader;
};
} // namespace OCLRT

View File

@@ -0,0 +1,98 @@
/*
* Copyright (c) 2017, Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
#include "gdi_interface.h"
namespace OCLRT {
Gdi::Gdi() : gdiDll(Os::gdiDllName),
initialized(false) {
if (gdiDll.isLoaded()) {
initialized = getAllProcAddresses();
}
}
bool Gdi::getAllProcAddresses() {
openAdapterFromHdc = reinterpret_cast<PFND3DKMT_OPENADAPTERFROMHDC>(gdiDll.getProcAddress("D3DKMTOpenAdapterFromHdc"));
openAdapterFromLuid = reinterpret_cast<PFND3DKMT_OPENADAPTERFROMLUID>(gdiDll.getProcAddress("D3DKMTOpenAdapterFromLuid"));
createAllocation = reinterpret_cast<PFND3DKMT_CREATEALLOCATION>(gdiDll.getProcAddress("D3DKMTCreateAllocation"));
destroyAllocation = reinterpret_cast<PFND3DKMT_DESTROYALLOCATION>(gdiDll.getProcAddress("D3DKMTDestroyAllocation"));
destroyAllocation2 = reinterpret_cast<PFND3DKMT_DESTROYALLOCATION2>(gdiDll.getProcAddress("D3DKMTDestroyAllocation2"));
queryAdapterInfo = reinterpret_cast<PFND3DKMT_QUERYADAPTERINFO>(gdiDll.getProcAddress("D3DKMTQueryAdapterInfo"));
closeAdapter = reinterpret_cast<PFND3DKMT_CLOSEADAPTER>(gdiDll.getProcAddress("D3DKMTCloseAdapter"));
createDevice = reinterpret_cast<PFND3DKMT_CREATEDEVICE>(gdiDll.getProcAddress("D3DKMTCreateDevice"));
destroyDevice = reinterpret_cast<PFND3DKMT_DESTROYDEVICE>(gdiDll.getProcAddress("D3DKMTDestroyDevice"));
escape = reinterpret_cast<PFND3DKMT_ESCAPE>(gdiDll.getProcAddress("D3DKMTEscape"));
createContext = reinterpret_cast<PFND3DKMT_CREATECONTEXTVIRTUAL>(gdiDll.getProcAddress("D3DKMTCreateContextVirtual"));
destroyContext = reinterpret_cast<PFND3DKMT_DESTROYCONTEXT>(gdiDll.getProcAddress("D3DKMTDestroyContext"));
openResource = reinterpret_cast<PFND3DKMT_OPENRESOURCE>(gdiDll.getProcAddress("D3DKMTOpenResource"));
openResourceFromNtHandle = reinterpret_cast<PFND3DKMT_OPENRESOURCEFROMNTHANDLE>(gdiDll.getProcAddress("D3DKMTOpenResourceFromNtHandle"));
queryResourceInfo = reinterpret_cast<PFND3DKMT_QUERYRESOURCEINFO>(gdiDll.getProcAddress("D3DKMTQueryResourceInfo"));
queryResourceInfoFromNtHandle = reinterpret_cast<PFND3DKMT_QUERYRESOURCEINFOFROMNTHANDLE>(gdiDll.getProcAddress("D3DKMTQueryResourceInfoFromNtHandle"));
lock = reinterpret_cast<PFND3DKMT_LOCK>(gdiDll.getProcAddress("D3DKMTLock"));
unlock = reinterpret_cast<PFND3DKMT_UNLOCK>(gdiDll.getProcAddress("D3DKMTUnlock"));
render = reinterpret_cast<PFND3DKMT_RENDER>(gdiDll.getProcAddress("D3DKMTRender"));
createSynchronizationObject = reinterpret_cast<PFND3DKMT_CREATESYNCHRONIZATIONOBJECT>(gdiDll.getProcAddress("D3DKMTCreateSynchronizationObject"));
createSynchronizationObject2 = reinterpret_cast<PFND3DKMT_CREATESYNCHRONIZATIONOBJECT2>(gdiDll.getProcAddress("D3DKMTCreateSynchronizationObject2"));
destroySynchronizationObject = reinterpret_cast<PFND3DKMT_DESTROYSYNCHRONIZATIONOBJECT>(gdiDll.getProcAddress("D3DKMTDestroySynchronizationObject"));
signalSynchronizationObject = reinterpret_cast<PFND3DKMT_SIGNALSYNCHRONIZATIONOBJECT>(gdiDll.getProcAddress("D3DKMTSignalSynchronizationObject"));
waitForSynchronizationObject = reinterpret_cast<PFND3DKMT_WAITFORSYNCHRONIZATIONOBJECT>(gdiDll.getProcAddress("D3DKMTWaitForSynchronizationObject"));
waitForSynchronizationObjectFromCpu = reinterpret_cast<PFND3DKMT_WAITFORSYNCHRONIZATIONOBJECTFROMCPU>(gdiDll.getProcAddress("D3DKMTWaitForSynchronizationObjectFromCpu"));
signalSynchronizationObjectFromCpu = reinterpret_cast<PFND3DKMT_SIGNALSYNCHRONIZATIONOBJECTFROMCPU>(gdiDll.getProcAddress("D3DKMTSignalSynchronizationObjectFromCpu"));
waitForSynchronizationObjectFromGpu = reinterpret_cast<PFND3DKMT_WAITFORSYNCHRONIZATIONOBJECTFROMGPU>(gdiDll.getProcAddress("D3DKMTWaitForSynchronizationObjectFromGpu"));
signalSynchronizationObjectFromGpu = reinterpret_cast<PFND3DKMT_SIGNALSYNCHRONIZATIONOBJECTFROMGPU>(gdiDll.getProcAddress("D3DKMTSignalSynchronizationObjectFromGpu"));
createPagingQueue = reinterpret_cast<PFND3DKMT_CREATEPAGINGQUEUE>(gdiDll.getProcAddress("D3DKMTCreatePagingQueue"));
destroyPagingQueue = reinterpret_cast<PFND3DKMT_DESTROYPAGINGQUEUE>(gdiDll.getProcAddress("D3DKMTDestroyPagingQueue"));
lock2 = reinterpret_cast<PFND3DKMT_LOCK2>(gdiDll.getProcAddress("D3DKMTLock2"));
unlock2 = reinterpret_cast<PFND3DKMT_UNLOCK2>(gdiDll.getProcAddress("D3DKMTUnlock2"));
mapGpuVirtualAddress = reinterpret_cast<PFND3DKMT_MAPGPUVIRTUALADDRESS>(gdiDll.getProcAddress("D3DKMTMapGpuVirtualAddress"));
reserveGpuVirtualAddress = reinterpret_cast<PFND3DKMT_RESERVEGPUVIRTUALADDRESS>(gdiDll.getProcAddress("D3DKMTReserveGpuVirtualAddress"));
freeGpuVirtualAddress = reinterpret_cast<PFND3DKMT_FREEGPUVIRTUALADDRESS>(gdiDll.getProcAddress("D3DKMTFreeGpuVirtualAddress"));
updateGpuVirtualAddress = reinterpret_cast<PFND3DKMT_UPDATEGPUVIRTUALADDRESS>(gdiDll.getProcAddress("D3DKMTUpdateGpuVirtualAddress"));
submitCommand = reinterpret_cast<PFND3DKMT_SUBMITCOMMAND>(gdiDll.getProcAddress("D3DKMTSubmitCommand"));
makeResident = reinterpret_cast<PFND3DKMT_MAKERESIDENT>(gdiDll.getProcAddress("D3DKMTMakeResident"));
evict = reinterpret_cast<PFND3DKMT_EVICT>(gdiDll.getProcAddress("D3DKMTEvict"));
registerTrimNotification = reinterpret_cast<PFND3DKMT_REGISTERTRIMNOTIFICATION>(gdiDll.getProcAddress("D3DKMTRegisterTrimNotification"));
unregisterTrimNotification = reinterpret_cast<PFND3DKMT_UNREGISTERTRIMNOTIFICATION>(gdiDll.getProcAddress("D3DKMTUnregisterTrimNotification"));
// For debug purposes
getDeviceState = reinterpret_cast<PFND3DKMT_GETDEVICESTATE>(gdiDll.getProcAddress("D3DKMTGetDeviceState"));
// clang-format off
if (openAdapterFromHdc && openAdapterFromLuid && createAllocation && destroyAllocation
&& destroyAllocation2 && queryAdapterInfo && closeAdapter && createDevice
&& destroyDevice && escape && createContext && destroyContext
&& openResource && queryResourceInfo && lock && unlock && render
&& createSynchronizationObject && createSynchronizationObject2
&& destroySynchronizationObject && signalSynchronizationObject
&& waitForSynchronizationObject && waitForSynchronizationObjectFromCpu
&& signalSynchronizationObjectFromCpu && waitForSynchronizationObjectFromGpu
&& signalSynchronizationObjectFromGpu && createPagingQueue && destroyPagingQueue
&& lock2 && unlock2 && mapGpuVirtualAddress && reserveGpuVirtualAddress
&& freeGpuVirtualAddress && updateGpuVirtualAddress &&submitCommand
&& makeResident && evict && registerTrimNotification && unregisterTrimNotification){
return true;
}
// clang-format on
return false;
}
} // namespace OCLRT

View File

@@ -0,0 +1,98 @@
/*
* Copyright (c) 2017, Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
#pragma once
#include "runtime/helpers/options.h"
#include "runtime/os_interface/windows/windows_inc.h"
#include "runtime/os_interface/windows/os_library.h"
#include <d3d9types.h>
#include <d3dkmthk.h>
#include <string>
#include "runtime/os_interface/windows/thk_wrapper.h"
namespace OCLRT {
class Gdi {
public:
Gdi();
~Gdi(){};
ThkWrapper<OCL_RUNTIME_PROFILING, IN OUT D3DKMT_OPENADAPTERFROMHDC *> openAdapterFromHdc;
ThkWrapper<OCL_RUNTIME_PROFILING, IN OUT D3DKMT_OPENADAPTERFROMLUID *> openAdapterFromLuid;
ThkWrapper<OCL_RUNTIME_PROFILING, IN OUT D3DKMT_CREATEALLOCATION *> createAllocation;
ThkWrapper<OCL_RUNTIME_PROFILING, IN CONST D3DKMT_DESTROYALLOCATION *> destroyAllocation;
ThkWrapper<OCL_RUNTIME_PROFILING, IN CONST D3DKMT_DESTROYALLOCATION2 *> destroyAllocation2;
ThkWrapper<OCL_RUNTIME_PROFILING, IN CONST D3DKMT_QUERYADAPTERINFO *> queryAdapterInfo;
ThkWrapper<OCL_RUNTIME_PROFILING, IN CONST D3DKMT_CLOSEADAPTER *> closeAdapter;
ThkWrapper<OCL_RUNTIME_PROFILING, IN OUT D3DKMT_CREATEDEVICE *> createDevice;
ThkWrapper<OCL_RUNTIME_PROFILING, IN CONST D3DKMT_DESTROYDEVICE *> destroyDevice;
ThkWrapper<OCL_RUNTIME_PROFILING, IN CONST D3DKMT_ESCAPE *> escape;
ThkWrapper<OCL_RUNTIME_PROFILING, IN D3DKMT_CREATECONTEXTVIRTUAL *> createContext;
ThkWrapper<OCL_RUNTIME_PROFILING, IN CONST D3DKMT_DESTROYCONTEXT *> destroyContext;
ThkWrapper<OCL_RUNTIME_PROFILING, IN OUT D3DKMT_OPENRESOURCE *> openResource;
ThkWrapper<OCL_RUNTIME_PROFILING, IN OUT D3DKMT_OPENRESOURCEFROMNTHANDLE *> openResourceFromNtHandle;
ThkWrapper<OCL_RUNTIME_PROFILING, IN OUT D3DKMT_QUERYRESOURCEINFO *> queryResourceInfo;
ThkWrapper<OCL_RUNTIME_PROFILING, IN OUT D3DKMT_QUERYRESOURCEINFOFROMNTHANDLE *> queryResourceInfoFromNtHandle;
ThkWrapper<OCL_RUNTIME_PROFILING, IN OUT D3DKMT_LOCK *> lock;
ThkWrapper<OCL_RUNTIME_PROFILING, IN CONST D3DKMT_UNLOCK *> unlock;
ThkWrapper<OCL_RUNTIME_PROFILING, IN OUT D3DKMT_RENDER *> render;
ThkWrapper<OCL_RUNTIME_PROFILING, IN OUT D3DKMT_CREATESYNCHRONIZATIONOBJECT *> createSynchronizationObject;
ThkWrapper<OCL_RUNTIME_PROFILING, IN OUT D3DKMT_CREATESYNCHRONIZATIONOBJECT2 *> createSynchronizationObject2;
ThkWrapper<OCL_RUNTIME_PROFILING, IN CONST D3DKMT_DESTROYSYNCHRONIZATIONOBJECT *> destroySynchronizationObject;
ThkWrapper<OCL_RUNTIME_PROFILING, IN CONST D3DKMT_SIGNALSYNCHRONIZATIONOBJECT *> signalSynchronizationObject;
ThkWrapper<OCL_RUNTIME_PROFILING, IN OUT D3DKMT_WAITFORSYNCHRONIZATIONOBJECT *> waitForSynchronizationObject;
ThkWrapper<OCL_RUNTIME_PROFILING, IN CONST D3DKMT_WAITFORSYNCHRONIZATIONOBJECTFROMCPU *> waitForSynchronizationObjectFromCpu;
ThkWrapper<OCL_RUNTIME_PROFILING, IN CONST D3DKMT_SIGNALSYNCHRONIZATIONOBJECTFROMCPU *> signalSynchronizationObjectFromCpu;
ThkWrapper<OCL_RUNTIME_PROFILING, IN CONST D3DKMT_WAITFORSYNCHRONIZATIONOBJECTFROMGPU *> waitForSynchronizationObjectFromGpu;
ThkWrapper<OCL_RUNTIME_PROFILING, IN CONST D3DKMT_SIGNALSYNCHRONIZATIONOBJECTFROMGPU *> signalSynchronizationObjectFromGpu;
ThkWrapper<OCL_RUNTIME_PROFILING, IN OUT D3DKMT_CREATEPAGINGQUEUE *> createPagingQueue;
ThkWrapper<OCL_RUNTIME_PROFILING, IN OUT D3DDDI_DESTROYPAGINGQUEUE *> destroyPagingQueue;
ThkWrapper<OCL_RUNTIME_PROFILING, IN OUT D3DKMT_LOCK2 *> lock2;
ThkWrapper<OCL_RUNTIME_PROFILING, IN CONST D3DKMT_UNLOCK2 *> unlock2;
ThkWrapper<OCL_RUNTIME_PROFILING, IN OUT D3DDDI_MAPGPUVIRTUALADDRESS *> mapGpuVirtualAddress;
ThkWrapper<OCL_RUNTIME_PROFILING, IN OUT D3DDDI_RESERVEGPUVIRTUALADDRESS *> reserveGpuVirtualAddress;
ThkWrapper<OCL_RUNTIME_PROFILING, IN CONST D3DKMT_FREEGPUVIRTUALADDRESS *> freeGpuVirtualAddress;
ThkWrapper<OCL_RUNTIME_PROFILING, IN CONST D3DKMT_UPDATEGPUVIRTUALADDRESS *> updateGpuVirtualAddress;
ThkWrapper<OCL_RUNTIME_PROFILING, IN CONST D3DKMT_SUBMITCOMMAND *> submitCommand;
ThkWrapper<OCL_RUNTIME_PROFILING, IN OUT D3DDDI_MAKERESIDENT *> makeResident;
ThkWrapper<OCL_RUNTIME_PROFILING, IN D3DKMT_EVICT *> evict;
ThkWrapper<OCL_RUNTIME_PROFILING, IN D3DKMT_REGISTERTRIMNOTIFICATION *> registerTrimNotification;
ThkWrapper<OCL_RUNTIME_PROFILING, IN D3DKMT_UNREGISTERTRIMNOTIFICATION *> unregisterTrimNotification;
// For debug purposes
ThkWrapper<OCL_RUNTIME_PROFILING, IN OUT D3DKMT_GETDEVICESTATE *> getDeviceState;
bool isInitialized() {
return initialized;
}
protected:
virtual bool getAllProcAddresses();
bool initialized;
private:
OCLRT::Windows::OsLibrary gdiDll;
static const std::string gdiDllName;
static const std::string gdiMockDllName;
};
} // namespace OCLRT

View File

@@ -0,0 +1,33 @@
/*
* Copyright (c) 2017, Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
namespace Os {
#if defined(_WIN64)
const char *frontEndDllName = "igdfcl64.dll";
const char *igcDllName = "igc64.dll";
#else
const char *frontEndDllName = "igdfcl32.dll";
const char *igcDllName = "igc32.dll";
#endif
const char *gdiDllName = "gdi32.dll";
}

View File

@@ -0,0 +1,76 @@
/*
* Copyright (c) 2017, Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
#include "runtime/os_interface/windows/wddm.h"
#include "os_interface.h"
namespace OCLRT {
bool OSInterface::osEnabled64kbPages = true;
OSInterface::OSInterface() {
osInterfaceImpl = new OSInterfaceImpl();
}
OSInterface::~OSInterface() {
delete osInterfaceImpl;
}
uint32_t OSInterface::getHwContextId() const {
return osInterfaceImpl->getHwContextId();
}
OSInterface::OSInterfaceImpl::OSInterfaceImpl() {
wddm = nullptr;
}
D3DKMT_HANDLE OSInterface::OSInterfaceImpl::getAdapterHandle() const {
return wddm->getAdapter();
}
D3DKMT_HANDLE OSInterface::OSInterfaceImpl::getDeviceHandle() const {
return wddm->getDevice();
}
PFND3DKMT_ESCAPE OSInterface::OSInterfaceImpl::getEscapeHandle() const {
return wddm->getEscapeHandle();
}
uint32_t OSInterface::OSInterfaceImpl::getHwContextId() const {
if (wddm == nullptr) {
return 0;
}
return wddm->getHwContextId();
}
bool OSInterface::are64kbPagesEnabled() {
return osEnabled64kbPages;
}
Wddm *OSInterface::OSInterfaceImpl::getWddm() const {
return wddm;
}
void OSInterface::OSInterfaceImpl::setWddm(Wddm *wddm) {
this->wddm = wddm;
}
} // namespace OCLRT

View File

@@ -0,0 +1,45 @@
/*
* Copyright (c) 2017, Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
#pragma once
#include "runtime/os_interface/os_interface.h"
#include "profileapi.h"
#include "runtime/os_interface/windows/windows_wrapper.h"
#include <d3dkmthk.h>
namespace OCLRT {
class Wddm;
class OSInterface::OSInterfaceImpl {
public:
OSInterfaceImpl();
Wddm *getWddm() const;
void setWddm(Wddm *wddm);
D3DKMT_HANDLE getAdapterHandle() const;
D3DKMT_HANDLE getDeviceHandle() const;
PFND3DKMT_ESCAPE getEscapeHandle() const;
uint32_t getHwContextId() const;
protected:
Wddm *wddm;
};
} // namespace OCLRT

View File

@@ -0,0 +1,67 @@
/*
* Copyright (c) 2017, Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
#include "runtime/os_interface/os_library.h"
#include "os_library.h"
#include "DriverStore.h"
namespace OCLRT {
OsLibrary *OsLibrary::load(const std::string &name) {
Windows::OsLibrary *ptr = new Windows::OsLibrary(name);
if (!ptr->isLoaded()) {
delete ptr;
return nullptr;
}
return ptr;
}
namespace Windows {
OsLibrary::OsLibrary(const std::string &name) {
if (name.empty()) {
this->handle = GetModuleHandleA(nullptr);
} else {
this->handle = LoadDependency(name.c_str());
if (this->handle == nullptr) {
this->handle = ::LoadLibraryA(name.c_str());
}
}
}
OsLibrary::~OsLibrary() {
if ((this->handle != nullptr) && (this->handle != GetModuleHandleA(nullptr))) {
::FreeLibrary(this->handle);
this->handle = nullptr;
}
}
bool OsLibrary::isLoaded() {
return this->handle != nullptr;
}
void *OsLibrary::getProcAddress(const std::string &procName) {
return ::GetProcAddress(this->handle, procName.c_str());
}
}
}

View File

@@ -0,0 +1,44 @@
/*
* Copyright (c) 2017, Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
#pragma once
#include "runtime/os_interface/os_library.h"
#define UMDF_USING_NTSTATUS
#include "runtime/os_interface/windows/windows_wrapper.h"
namespace OCLRT {
namespace Windows {
class OsLibrary : public OCLRT::OsLibrary {
private:
HMODULE handle;
public:
OsLibrary(const std::string &name);
~OsLibrary();
bool isLoaded();
void *getProcAddress(const std::string &procName);
};
}
}

View File

@@ -0,0 +1,132 @@
/*
* Copyright (c) 2017, Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
#include <memory>
#include <ntstatus.h>
//For not redefining STATUS_* from ntstatus.h
#define WIN32_NO_STATUS
#include "runtime/os_interface/windows/wddm.h"
#include "runtime/os_interface/windows/os_interface.h"
#include "runtime/os_interface/windows/os_time.h"
#undef WIN32_NO_STATUS
namespace OCLRT {
bool runEscape(Wddm *wddm, TimeStampDataHeader &escapeInfo) {
if (wddm) {
D3DKMT_ESCAPE escapeCommand = {0};
GTDIGetGpuCpuTimestampsIn in = {GTDI_FNC_GET_GPU_CPU_TIMESTAMPS};
GTDIGetGpuCpuTimestampsOut out = {GTDI_RET_FAILED, 0};
uint32_t inSize = sizeof(GTDIGetGpuCpuTimestampsIn);
uint32_t outSize = sizeof(GTDIGetGpuCpuTimestampsOut);
escapeInfo.m_Header.EscapeCode = GFX_ESCAPE_IGPA_INSTRUMENTATION_CONTROL;
escapeInfo.m_Header.Size = outSize;
escapeInfo.m_Data.m_In = in;
escapeCommand.Flags.Value = 0;
escapeCommand.hAdapter = (D3DKMT_HANDLE)0;
escapeCommand.hContext = (D3DKMT_HANDLE)0; // escape is not context specific
escapeCommand.hDevice = (D3DKMT_HANDLE)0; // escape not device specific
escapeCommand.pPrivateDriverData = &escapeInfo;
escapeCommand.PrivateDriverDataSize = sizeof(escapeInfo);
escapeCommand.Type = D3DKMT_ESCAPE_DRIVERPRIVATE;
auto status = wddm->escape(escapeCommand);
if (status == STATUS_SUCCESS) {
return true;
}
}
return false;
}
bool OSTimeWin::getCpuGpuTime(TimeStampData *pGpuCpuTime) {
bool retVal = false;
pGpuCpuTime->CPUTimeinNS = 0;
pGpuCpuTime->GPUTimeStamp = 0;
TimeStampDataHeader escapeInfo = {0};
if (runEscape(wddm, escapeInfo)) {
double cpuNanoseconds = escapeInfo.m_Data.m_Out.cpuPerfTicks *
(1000000000.0 / escapeInfo.m_Data.m_Out.cpuPerfFreq);
pGpuCpuTime->CPUTimeinNS = (unsigned long long)cpuNanoseconds;
pGpuCpuTime->GPUTimeStamp = (unsigned long long)escapeInfo.m_Data.m_Out.gpuPerfTicks;
retVal = true;
}
return retVal;
}
bool OSTimeWin::getCpuTime(uint64_t *timeStamp) {
uint64_t time;
uint64_t frequency;
QueryPerformanceCounter((LARGE_INTEGER *)&time);
QueryPerformanceFrequency((LARGE_INTEGER *)&frequency);
*timeStamp = time * NSEC_PER_SEC / frequency;
return true;
};
std::unique_ptr<OSTime> OSTime::create(OSInterface *osInterface) {
return std::unique_ptr<OSTime>(new OSTimeWin(osInterface));
}
OSTimeWin::OSTimeWin(OSInterface *osInterface) : wddm(nullptr) {
this->osInterface = osInterface;
if (osInterface) {
wddm = osInterface->get()->getWddm();
}
QueryPerformanceFrequency(&frequency);
}
double OSTimeWin::getHostTimerResolution() const {
double retValue = 0;
if (frequency.QuadPart) {
retValue = 1e9 / frequency.QuadPart;
}
return retValue;
}
double OSTimeWin::getDynamicDeviceTimerResolution(HardwareInfo const &hwInfo) const {
double retVal = 0;
TimeStampDataHeader escapeInfo = {0};
if (runEscape(wddm, escapeInfo)) {
retVal = 1000000000.0 / (double)escapeInfo.m_Data.m_Out.gpuPerfFreq;
}
return retVal;
}
uint64_t OSTimeWin::getCpuRawTimestamp() {
LARGE_INTEGER cpuRawTimestamp = {};
QueryPerformanceCounter(&cpuRawTimestamp);
return cpuRawTimestamp.QuadPart;
}
} // namespace OCLRT

View File

@@ -0,0 +1,88 @@
/*
* Copyright (c) 2017, Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
#pragma once
#include "runtime/os_interface/windows/windows_wrapper.h"
#include "gfxEscape.h"
#include "runtime/os_interface/os_time.h"
namespace OCLRT {
class Wddm;
class OSTimeWin : public OSTime {
public:
OSTimeWin(OSInterface *osInterface);
bool getCpuTime(uint64_t *timeStamp) override;
bool getCpuGpuTime(TimeStampData *pGpuCpuTime) override;
double getHostTimerResolution() const override;
double getDynamicDeviceTimerResolution(HardwareInfo const &hwInfo) const override;
uint64_t getCpuRawTimestamp() override;
protected:
Wddm *wddm;
LARGE_INTEGER frequency;
OSTimeWin() {}
};
typedef enum GTDI_ESCAPE_FUNCTION_ENUM {
GTDI_FNC_GET_GPU_CPU_TIMESTAMPS = 25
} GTDI_ESCAPE_FUNCTION;
typedef struct GTDIBaseInStruct {
GTDI_ESCAPE_FUNCTION Function;
} GTDIHeaderIn;
typedef GTDIHeaderIn GTDIGetGpuCpuTimestampsIn;
typedef enum GTDI_RETURN_CODE_ENUM {
GTDI_RET_OK = 0,
GTDI_RET_FAILED,
GTDI_RET_NOT_CONNECTED,
GTDI_RET_HW_METRICS_NOT_ENABLED,
GTDI_RET_CONTEXT_ID_MISMATCH,
GTDI_RET_NOT_SUPPORTED,
GTDI_RET_PENDING,
GTDI_RET_INVALID_CONFIGURATION,
GTDI_RET_CONCURRENT_API_ENABLED,
GTDI_RET_NO_INFORMATION, // for GTDI_FNC_GET_ERROR_INFO escape only
// ...
GTDI_RET_MAX = 0xFFFFFFFF
} GTDI_RETURN_CODE;
typedef struct GTDIGetGpuCpuTimestampsOutStruct {
GTDI_RETURN_CODE RetCode; // Result of the call
uint64_t gpuPerfTicks; // in GPU_timestamp_ticks
uint64_t cpuPerfTicks; // in CPU_timestamp_ticks
uint64_t gpuPerfFreq; // in GPU_timestamp_ticks/s
uint64_t cpuPerfFreq; // in CPU_timestamp_ticks/s
} GTDIGetGpuCpuTimestampsOut;
struct TimeStampDataHeader {
GFX_ESCAPE_HEADER_T m_Header;
union {
GTDIGetGpuCpuTimestampsIn m_In;
GTDIGetGpuCpuTimestampsOut m_Out;
} m_Data;
};
} // namespace OCLRT

View File

@@ -0,0 +1,52 @@
/*
* Copyright (c) 2017, Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
#include "config.h"
#include "runtime/os_interface/windows/windows_wrapper.h"
#include "performance_counters_win.h"
#include "runtime/os_interface/windows/os_interface.h"
namespace OCLRT {
std::unique_ptr<PerformanceCounters> PerformanceCounters::create(OSTime *osTime) {
return std::unique_ptr<PerformanceCounters>(new PerformanceCountersWin(osTime));
}
PerformanceCountersWin::PerformanceCountersWin(OSTime *osTime) : PerformanceCounters(osTime) {
cbData.hAdapter = (void *)(UINT_PTR)osInterface->get()->getAdapterHandle();
cbData.hDevice = (void *)(UINT_PTR)osInterface->get()->getDeviceHandle();
cbData.pfnEscapeCb = osInterface->get()->getEscapeHandle();
}
PerformanceCountersWin::~PerformanceCountersWin() {
if (pAutoSamplingInterface) {
autoSamplingStopFunc(&pAutoSamplingInterface);
pAutoSamplingInterface = nullptr;
available = false;
}
}
void PerformanceCountersWin::initialize(const HardwareInfo *hwInfo) {
PerformanceCounters::initialize(hwInfo);
setAvailableFunc(true);
verifyEnableFunc(cbData);
}
} // namespace OCLRT

View File

@@ -0,0 +1,39 @@
/*
* Copyright (c) 2017, Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
#pragma once
#include "runtime/os_interface/performance_counters.h"
#include "runtime/os_interface/windows/os_interface.h"
namespace OCLRT {
class PerformanceCountersWin : virtual public PerformanceCounters {
public:
PerformanceCountersWin(OSTime *osTime);
~PerformanceCountersWin() override;
void initialize(const HardwareInfo *hwInfo) override;
protected:
decltype(&instrSetAvailable) setAvailableFunc = instrSetAvailable;
decltype(&instrEscVerifyEnable) verifyEnableFunc = instrEscVerifyEnable;
};
} // namespace OCLRT

View File

@@ -0,0 +1,97 @@
/*
* Copyright (c) 2017, Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
#include "runtime/os_interface/print.h"
#include <cstdint>
#include <cctype>
#include <iostream>
#include "runtime/os_interface/windows/windows_wrapper.h"
#include <io.h>
#include <fcntl.h>
void printToSTDOUT(const char *str) {
int bytesRead = 0;
int fd = 0;
HANDLE stdoutDuplicate = 0;
FILE *pFile = nullptr;
if ((DuplicateHandle(GetCurrentProcess(), GetStdHandle(STD_OUTPUT_HANDLE),
GetCurrentProcess(), &stdoutDuplicate, 0L, TRUE, DUPLICATE_SAME_ACCESS))) {
if ((fd = _open_osfhandle((DWORD_PTR)stdoutDuplicate, _O_TEXT)) &&
(pFile = _fdopen(fd, "w"))) {
fprintf_s(pFile, "%s", str);
fflush(pFile);
fclose(pFile);
}
}
}
template <class T>
size_t simple_sprintf(char *output, size_t outputSize, const char *format, T value) {
#if (_MSC_VER == 1800)
_set_output_format(_TWO_DIGIT_EXPONENT);
#endif
size_t len = strlen(format);
if (len > 3 && *(format + len - 2) == 'h' && *(format + len - 3) == 'h') {
if (*(format + len - 1) == 'i' || *(format + len - 1) == 'd') {
int32_t fixedValue = (char)value;
return sprintf_s(output, outputSize, format, fixedValue);
} else {
uint32_t fixedValue = (unsigned char)value;
return sprintf_s(output, outputSize, format, fixedValue);
}
} else if (format[len - 1] == 'F') {
char formatCopy[1024];
strcpy_s(formatCopy, 1024, format);
formatCopy[len - 1] = 'f';
size_t returnValue = sprintf_s(output, outputSize, formatCopy, value);
for (size_t i = 0; i < returnValue; i++)
output[i] = std::toupper(output[i]);
return returnValue;
} else {
return sprintf_s(output, outputSize, format, value);
}
}
size_t simple_sprintf(char *output, size_t outputSize, const char *format, const char *value) {
return sprintf_s(output, outputSize, format, value);
}
size_t simple_sprintf(char *output, size_t outputSize, const char *format, void *value) {
return sprintf_s(output, outputSize, format, value);
}
template size_t simple_sprintf<float>(char *output, size_t output_size, const char *format, float value);
template size_t simple_sprintf<double>(char *output, size_t output_size, const char *format, double value);
template size_t simple_sprintf<char>(char *output, size_t output_size, const char *format, char value);
template size_t simple_sprintf<int8_t>(char *output, size_t output_size, const char *format, int8_t value);
template size_t simple_sprintf<int16_t>(char *output, size_t output_size, const char *format, int16_t value);
template size_t simple_sprintf<int32_t>(char *output, size_t output_size, const char *format, int32_t value);
template size_t simple_sprintf<int64_t>(char *output, size_t output_size, const char *format, int64_t value);
template size_t simple_sprintf<uint8_t>(char *output, size_t output_size, const char *format, uint8_t value);
template size_t simple_sprintf<uint16_t>(char *output, size_t output_size, const char *format, uint16_t value);
template size_t simple_sprintf<uint32_t>(char *output, size_t output_size, const char *format, uint32_t value);
template size_t simple_sprintf<uint64_t>(char *output, size_t output_size, const char *format, uint64_t value);

View File

@@ -0,0 +1,44 @@
/*
* Copyright (c) 2017, Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
#pragma once
#include "runtime/utilities/debug_settings_reader.h"
#include <string>
#include <stdint.h>
namespace OCLRT {
class RegistryReader : public SettingsReader {
public:
int32_t getSetting(const char *settingName, int32_t defaultValue) override;
bool getSetting(const char *settingName, bool defaultValue) override;
std::string getSetting(const char *settingName, const std::string &value) override;
RegistryReader() {}
RegistryReader(std::string regKey) {
igdrclRegKey = regKey;
}
private:
std::string igdrclRegKey = "Software\\Intel\\IGFX\\OCL";
};
} // namespace OCLRT

View File

@@ -0,0 +1,176 @@
/*
* Copyright (c) 2017, Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
#pragma once
#include "runtime/utilities/api_intercept.h"
namespace OCLRT {
// IDs for easy system times identification
enum SystemCallsIds {
SYSTIMER_ID_OPENADAPTERFROMHDC = 1,
SYSTIMER_ID_OPENADAPTERFROMLUID = 2,
SYSTIMER_ID_CLOSEADAPTER = 3,
SYSTIMER_ID_QUERYADAPTERINFO = 4,
SYSTIMER_ID_ESCAPE = 5,
SYSTIMER_ID_CREATEDEVICE = 6,
SYSTIMER_ID_DESTROYDEVICE = 7,
SYSTIMER_ID_CREATECONTEXT = 8,
SYSTIMER_ID_DESTROYCONTEXT = 9,
SYSTIMER_ID_CREATEALLOCATION = 10,
SYSTIMER_ID_DESTROYALLOCATION = 11,
SYSTIMER_ID_OPENRESOURCE = 12,
SYSTIMER_ID_QUERYRESOURCEINFO = 13,
SYSTIMER_ID_LOCK = 14,
SYSTIMER_ID_UNLOCK = 15,
SYSTIMER_ID_RENDER = 16,
SYSTIMER_ID_CREATESYNCHRONIZATIONOBJECT = 17,
SYSTIMER_ID_DESTROYSYNCHRONIZATIONOBJECT = 18,
SYSTIMER_ID_SIGNALSYNCHRONIZATIONOBJECT = 19,
SYSTIMER_ID_WAITFORSYNCHRONIZATIONOBJECT = 20,
SYSTIMER_ID_CREATESYNCHRONIZATIONOBJECT2 = 21,
SYSTIMER_ID_GETDEVICESTATE = 22,
SYSTIMER_ID_MAKERESIDENT = 23,
SYSTIMER_ID_EVICT = 24,
SYSTIMER_ID_WAITFORSYNCHRONIZATIONOBJECTFROMCPU = 25,
SYSTIMER_ID_SIGNALSYNCHRONIZATIONOBJECTFROMCPU = 26,
SYSTIMER_ID_WAITFORSYNCHRONIZATIONOBJECTFROMGPU = 27,
SYSTIMER_ID_SIGNALSYNCHRONIZATIONOBJECTFROMGPU = 28,
SYSTIMER_ID_CREATEPAGINGQUEUE = 29,
SYSTIMER_ID_D3DDDI_DESTROYPAGINGQUEUE = 30,
SYSTIMER_ID_LOCK2 = 31,
SYSTIMER_ID_UNLOCK2 = 32,
SYSTIMER_ID_INVALIDATECACHE = 33,
SYSTIMER_ID_D3DDDI_MAPGPUVIRTUALADDRESS = 34,
SYSTIMER_ID_D3DDDI_RESERVEGPUVIRTUALADDRESS = 35,
SYSTIMER_ID_FREEGPUVIRTUALADDRESS = 36,
SYSTIMER_ID_UPDATEGPUVIRTUALADDRESS = 37,
SYSTIMER_ID_CREATECONTEXTVIRTUAL = 38,
SYSTIMER_ID_SUBMITCOMMAND = 39,
SYSTIMER_ID_OPENSYNCOBJECTFROMNTHANDLE2 = 40,
SYSTIMER_ID_OPENSYNCOBJECTNTHANDLEFROMNAME = 41,
SYSTIMER_ID_DESTROYALLOCATION2 = 42,
SYSTIMER_ID_REGISTERTRIMNOTIFICATION = 43,
SYSTIMER_ID_UNREGISTERTRIMNOTIFICATION = 44,
SYSTIMER_ID_QUERYRESOURCEINFOFROMNTHANDLE = 45,
SYSTIMER_ID_OPENRESOURCEFROMNTHANDLE = 46,
SYSTIMER_ID_SLEEP_0 = 100,
SYSTIMER_ID_WAIT_FOR_KMD = 200,
SYSTIMER_ID_CMD_COMPLETE = 300,
SYSTIMER_ID_OGL = 400,
};
template <bool UseTimer, typename Param>
class ThkWrapper {
typedef NTSTATUS(APIENTRY *Func)(Param);
public:
Func mFunc;
inline NTSTATUS operator()(Param param) const {
if (UseTimer) {
SYSTEM_ENTER()
NTSTATUS Status;
Status = mFunc(param);
unsigned int ID = getId<Param>();
SYSTEM_LEAVE(ID);
return Status;
} else {
return mFunc(param);
}
}
template <class T>
inline T &operator=(T func) {
return mFunc = func;
}
// This operator overload is for implicit casting ThkWrapper struct to Function Pointer in GetPfn methods like GetEscapePfn() or for comparing against NULL function pointer
operator Func() const {
return mFunc;
}
private:
// Default template for GetID( ) for Thk function, causing compilation error !!
// Returns ID for specific ThkWrapper type
template <class Param>
unsigned int getId() const {
static_assert(0, "Template specialization for GetID is required for each new THKWrapper");
return 0;
}
// Template specializations of GetID(), required for every new Thk function
#define GET_ID(TYPE, VALUE) \
template <> \
unsigned int getId<TYPE>() const { \
return VALUE; \
}
GET_ID(D3DKMT_OPENADAPTERFROMHDC *, SYSTIMER_ID_OPENADAPTERFROMHDC)
GET_ID(D3DKMT_OPENADAPTERFROMLUID *, SYSTIMER_ID_OPENADAPTERFROMLUID)
GET_ID(CONST D3DKMT_CLOSEADAPTER *, SYSTIMER_ID_CLOSEADAPTER)
GET_ID(CONST D3DKMT_QUERYADAPTERINFO *, SYSTIMER_ID_QUERYADAPTERINFO)
GET_ID(CONST D3DKMT_ESCAPE *, SYSTIMER_ID_ESCAPE)
GET_ID(D3DKMT_CREATEDEVICE *, SYSTIMER_ID_CREATEDEVICE)
GET_ID(CONST D3DKMT_DESTROYDEVICE *, SYSTIMER_ID_DESTROYDEVICE)
GET_ID(D3DKMT_CREATECONTEXT *, SYSTIMER_ID_CREATECONTEXT)
GET_ID(CONST D3DKMT_DESTROYCONTEXT *, SYSTIMER_ID_DESTROYCONTEXT)
GET_ID(D3DKMT_CREATEALLOCATION *, SYSTIMER_ID_CREATEALLOCATION)
GET_ID(CONST D3DKMT_DESTROYALLOCATION *, SYSTIMER_ID_DESTROYALLOCATION)
GET_ID(D3DKMT_OPENRESOURCE *, SYSTIMER_ID_OPENRESOURCE)
GET_ID(D3DKMT_QUERYRESOURCEINFO *, SYSTIMER_ID_QUERYRESOURCEINFO)
GET_ID(D3DKMT_LOCK *, SYSTIMER_ID_LOCK)
GET_ID(CONST D3DKMT_UNLOCK *, SYSTIMER_ID_UNLOCK)
GET_ID(D3DKMT_RENDER *, SYSTIMER_ID_RENDER)
GET_ID(D3DKMT_CREATESYNCHRONIZATIONOBJECT *, SYSTIMER_ID_CREATESYNCHRONIZATIONOBJECT)
GET_ID(CONST D3DKMT_DESTROYSYNCHRONIZATIONOBJECT *, SYSTIMER_ID_DESTROYSYNCHRONIZATIONOBJECT)
GET_ID(CONST D3DKMT_SIGNALSYNCHRONIZATIONOBJECT *, SYSTIMER_ID_SIGNALSYNCHRONIZATIONOBJECT)
GET_ID(D3DKMT_WAITFORSYNCHRONIZATIONOBJECT *, SYSTIMER_ID_WAITFORSYNCHRONIZATIONOBJECT)
GET_ID(D3DKMT_CREATESYNCHRONIZATIONOBJECT2 *, SYSTIMER_ID_CREATESYNCHRONIZATIONOBJECT2)
GET_ID(D3DKMT_GETDEVICESTATE *, SYSTIMER_ID_GETDEVICESTATE)
GET_ID(D3DDDI_MAKERESIDENT *, SYSTIMER_ID_MAKERESIDENT)
GET_ID(D3DKMT_EVICT *, SYSTIMER_ID_EVICT)
GET_ID(CONST D3DKMT_WAITFORSYNCHRONIZATIONOBJECTFROMCPU *, SYSTIMER_ID_WAITFORSYNCHRONIZATIONOBJECTFROMCPU)
GET_ID(CONST D3DKMT_SIGNALSYNCHRONIZATIONOBJECTFROMCPU *, SYSTIMER_ID_SIGNALSYNCHRONIZATIONOBJECTFROMCPU)
GET_ID(CONST D3DKMT_WAITFORSYNCHRONIZATIONOBJECTFROMGPU *, SYSTIMER_ID_WAITFORSYNCHRONIZATIONOBJECTFROMGPU)
GET_ID(CONST D3DKMT_SIGNALSYNCHRONIZATIONOBJECTFROMGPU *, SYSTIMER_ID_SIGNALSYNCHRONIZATIONOBJECTFROMGPU)
GET_ID(D3DKMT_CREATEPAGINGQUEUE *, SYSTIMER_ID_CREATEPAGINGQUEUE)
GET_ID(D3DDDI_DESTROYPAGINGQUEUE *, SYSTIMER_ID_D3DDDI_DESTROYPAGINGQUEUE)
GET_ID(D3DKMT_LOCK2 *, SYSTIMER_ID_LOCK2)
GET_ID(CONST D3DKMT_UNLOCK2 *, SYSTIMER_ID_UNLOCK2)
GET_ID(CONST D3DKMT_INVALIDATECACHE *, SYSTIMER_ID_INVALIDATECACHE)
GET_ID(D3DDDI_MAPGPUVIRTUALADDRESS *, SYSTIMER_ID_D3DDDI_MAPGPUVIRTUALADDRESS)
GET_ID(D3DDDI_RESERVEGPUVIRTUALADDRESS *, SYSTIMER_ID_D3DDDI_RESERVEGPUVIRTUALADDRESS)
GET_ID(CONST D3DKMT_FREEGPUVIRTUALADDRESS *, SYSTIMER_ID_FREEGPUVIRTUALADDRESS)
GET_ID(CONST D3DKMT_UPDATEGPUVIRTUALADDRESS *, SYSTIMER_ID_UPDATEGPUVIRTUALADDRESS)
GET_ID(D3DKMT_CREATECONTEXTVIRTUAL *, SYSTIMER_ID_CREATECONTEXTVIRTUAL)
GET_ID(CONST D3DKMT_SUBMITCOMMAND *, SYSTIMER_ID_SUBMITCOMMAND)
GET_ID(D3DKMT_OPENSYNCOBJECTFROMNTHANDLE2 *, SYSTIMER_ID_OPENSYNCOBJECTFROMNTHANDLE2)
GET_ID(D3DKMT_OPENSYNCOBJECTNTHANDLEFROMNAME *, SYSTIMER_ID_OPENSYNCOBJECTNTHANDLEFROMNAME)
GET_ID(CONST D3DKMT_DESTROYALLOCATION2 *, SYSTIMER_ID_DESTROYALLOCATION2)
GET_ID(D3DKMT_REGISTERTRIMNOTIFICATION *, SYSTIMER_ID_REGISTERTRIMNOTIFICATION)
GET_ID(D3DKMT_UNREGISTERTRIMNOTIFICATION *, SYSTIMER_ID_UNREGISTERTRIMNOTIFICATION)
GET_ID(D3DKMT_OPENRESOURCEFROMNTHANDLE *, SYSTIMER_ID_OPENRESOURCEFROMNTHANDLE)
GET_ID(D3DKMT_QUERYRESOURCEINFOFROMNTHANDLE *, SYSTIMER_ID_QUERYRESOURCEINFOFROMNTHANDLE)
};
} // namespace OCLRT

View File

@@ -0,0 +1,992 @@
/*
* Copyright (c) 2017, Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
#include "runtime/helpers/aligned_memory.h"
#include "runtime/helpers/options.h"
#include "runtime/os_interface/windows/gdi_interface.h"
#include "runtime/gmm_helper/gmm_helper.h"
#include "runtime/gmm_helper/gmm_memory.h"
#include "runtime/gmm_helper/resource_info.h"
#include "runtime/gmm_helper/page_table_mngr.h"
#include "runtime/os_interface/windows/wddm.h"
#include "runtime/os_interface/windows/wddm_allocation.h"
#include "runtime/os_interface/windows/registry_reader.h"
#include "runtime/helpers/debug_helpers.h"
#include "runtime/helpers/hw_info.h"
#include "runtime/helpers/wddm_helper.h"
#include "runtime/command_stream/linear_stream.h"
#include <dxgi.h>
#include <ntstatus.h>
#include "CL/cl.h"
namespace OCLRT {
extern Wddm::CreateDXGIFactoryFcn getCreateDxgiFactory();
extern Wddm::GetSystemInfoFcn getGetSystemInfo();
class WddmMemoryManager;
Wddm::CreateDXGIFactoryFcn Wddm::createDxgiFactory = getCreateDxgiFactory();
Wddm::GetSystemInfoFcn Wddm::getSystemInfo = getGetSystemInfo();
Wddm::Wddm(Gdi *gdi) : initialized(false),
gdiAllocated(false),
gdi(gdi),
adapter(0),
context(0),
device(0),
pagingQueue(0),
pagingQueueSyncObject(0),
pagingFenceAddress(nullptr),
currentPagingFenceValue(0),
hwContextId(0),
trimCallbackHandle(nullptr) {
adapterInfo = reinterpret_cast<ADAPTER_INFO *>(alignedMalloc(sizeof(ADAPTER_INFO), 64));
memset(adapterInfo, 0, sizeof(ADAPTER_INFO));
registryReader.reset(new RegistryReader("System\\CurrentControlSet\\Control\\GraphicsDrivers\\Scheduler"));
adapterLuid.HighPart = 0;
adapterLuid.LowPart = 0;
maximumApplicationAddress = 0;
node = GPUNODE_3D;
gmmMemory = std::unique_ptr<GmmMemory>(GmmMemory::create());
}
Wddm::Wddm() : Wddm(new Gdi()) {
gdiAllocated = true;
}
Wddm::~Wddm() {
resetPageTableManager(nullptr);
alignedFree(adapterInfo);
if (initialized)
Gmm::destroyContext();
destroyContext(context);
destroyPagingQueue();
destroyDevice();
closeAdapter();
if (gdiAllocated)
delete gdi;
}
bool Wddm::enumAdapters(unsigned int devNum, ADAPTER_INFO *adapterInfo) {
bool success = false;
if (devNum > 0)
return false;
if (adapterInfo == nullptr)
return false;
Wddm *wddm = createWddm();
DEBUG_BREAK_IF(wddm == nullptr);
if (wddm->gdi->isInitialized()) {
do {
success = wddm->openAdapter();
if (!success)
break;
success = wddm->queryAdapterInfo();
if (!success)
break;
memcpy_s(adapterInfo, sizeof(ADAPTER_INFO), wddm->adapterInfo, sizeof(ADAPTER_INFO));
} while (!success);
}
delete wddm;
return success;
}
void Wddm::setupFeatureTableFromAdapterInfo(FeatureTable *table, ADAPTER_INFO *adapterInfo) {
#define COPY_FTR(DST_VAL_NAME, SRC_VAL_NAME) table->DST_VAL_NAME = adapterInfo->SkuTable.SRC_VAL_NAME
COPY_FTR(ftrDesktop, FtrDesktop);
COPY_FTR(ftrChannelSwizzlingXOREnabled, FtrChannelSwizzlingXOREnabled);
COPY_FTR(ftrGtBigDie, FtrGtBigDie);
COPY_FTR(ftrGtMediumDie, FtrGtMediumDie);
COPY_FTR(ftrGtSmallDie, FtrGtSmallDie);
COPY_FTR(ftrGT1, FtrGT1);
COPY_FTR(ftrGT1_5, FtrGT1_5);
COPY_FTR(ftrGT2, FtrGT2);
COPY_FTR(ftrGT2_5, FtrGT2_5);
COPY_FTR(ftrGT3, FtrGT3);
COPY_FTR(ftrGT4, FtrGT4);
COPY_FTR(ftrIVBM0M1Platform, FtrIVBM0M1Platform);
COPY_FTR(ftrSGTPVSKUStrapPresent, FtrSGTPVSKUStrapPresent);
COPY_FTR(ftrGTA, FtrGTA);
COPY_FTR(ftrGTC, FtrGTC);
COPY_FTR(ftrGTX, FtrGTX);
COPY_FTR(ftr5Slice, Ftr5Slice);
COPY_FTR(ftrGpGpuMidBatchPreempt, FtrGpGpuMidBatchPreempt);
COPY_FTR(ftrGpGpuThreadGroupLevelPreempt, FtrGpGpuThreadGroupLevelPreempt);
COPY_FTR(ftrGpGpuMidThreadLevelPreempt, FtrGpGpuMidThreadLevelPreempt);
COPY_FTR(ftrIoMmuPageFaulting, FtrIoMmuPageFaulting);
COPY_FTR(ftrWddm2Svm, FtrWddm2Svm);
COPY_FTR(ftrPooledEuEnabled, FtrPooledEuEnabled);
COPY_FTR(ftrResourceStreamer, FtrResourceStreamer);
COPY_FTR(ftrPPGTT, FtrPPGTT);
COPY_FTR(ftrSVM, FtrSVM);
COPY_FTR(ftrEDram, FtrEDram);
COPY_FTR(ftrL3IACoherency, FtrL3IACoherency);
COPY_FTR(ftrIA32eGfxPTEs, FtrIA32eGfxPTEs);
COPY_FTR(ftr3dMidBatchPreempt, Ftr3dMidBatchPreempt);
COPY_FTR(ftr3dObjectLevelPreempt, Ftr3dObjectLevelPreempt);
COPY_FTR(ftrPerCtxtPreemptionGranularityControl, FtrPerCtxtPreemptionGranularityControl);
COPY_FTR(ftrDisplayYTiling, FtrDisplayYTiling);
COPY_FTR(ftrTranslationTable, FtrTranslationTable);
COPY_FTR(ftrUserModeTranslationTable, FtrUserModeTranslationTable);
COPY_FTR(ftrEnableGuC, FtrEnableGuC);
COPY_FTR(ftrFbc, FtrFbc);
COPY_FTR(ftrFbc2AddressTranslation, FtrFbc2AddressTranslation);
COPY_FTR(ftrFbcBlitterTracking, FtrFbcBlitterTracking);
COPY_FTR(ftrFbcCpuTracking, FtrFbcCpuTracking);
COPY_FTR(ftrVcs2, FtrVcs2);
COPY_FTR(ftrVEBOX, FtrVEBOX);
COPY_FTR(ftrSingleVeboxSlice, FtrSingleVeboxSlice);
COPY_FTR(ftrULT, FtrULT);
COPY_FTR(ftrLCIA, FtrLCIA);
COPY_FTR(ftrGttCacheInvalidation, FtrGttCacheInvalidation);
COPY_FTR(ftrTileMappedResource, FtrTileMappedResource);
COPY_FTR(ftrAstcHdr2D, FtrAstcHdr2D);
COPY_FTR(ftrAstcLdr2D, FtrAstcLdr2D);
COPY_FTR(ftrStandardMipTailFormat, FtrStandardMipTailFormat);
COPY_FTR(ftrFrameBufferLLC, FtrFrameBufferLLC);
COPY_FTR(ftrCrystalwell, FtrCrystalwell);
COPY_FTR(ftrLLCBypass, FtrLLCBypass);
COPY_FTR(ftrDisplayEngineS3d, FtrDisplayEngineS3d);
COPY_FTR(ftrVERing, FtrVERing);
#undef COPY_FTR
}
void Wddm::setupWorkaroundTableFromAdapterInfo(WorkaroundTable *table, ADAPTER_INFO *adapterInfo) {
#define COPY_WA(DST_VAL_NAME, SRC_VAL_NAME) table->DST_VAL_NAME = adapterInfo->WaTable.SRC_VAL_NAME
COPY_WA(waDoNotUseMIReportPerfCount, WaDoNotUseMIReportPerfCount);
COPY_WA(waEnablePreemptionGranularityControlByUMD, WaEnablePreemptionGranularityControlByUMD);
COPY_WA(waSendMIFLUSHBeforeVFE, WaSendMIFLUSHBeforeVFE);
COPY_WA(waReportPerfCountUseGlobalContextID, WaReportPerfCountUseGlobalContextID);
COPY_WA(waDisableLSQCROPERFforOCL, WaDisableLSQCROPERFforOCL);
COPY_WA(waMsaa8xTileYDepthPitchAlignment, WaMsaa8xTileYDepthPitchAlignment);
COPY_WA(waLosslessCompressionSurfaceStride, WaLosslessCompressionSurfaceStride);
COPY_WA(waFbcLinearSurfaceStride, WaFbcLinearSurfaceStride);
COPY_WA(wa4kAlignUVOffsetNV12LinearSurface, Wa4kAlignUVOffsetNV12LinearSurface);
COPY_WA(waEncryptedEdramOnlyPartials, WaEncryptedEdramOnlyPartials);
COPY_WA(waDisableEdramForDisplayRT, WaDisableEdramForDisplayRT);
COPY_WA(waForcePcBbFullCfgRestore, WaForcePcBbFullCfgRestore);
COPY_WA(waCompressedResourceRequiresConstVA21, WaCompressedResourceRequiresConstVA21);
COPY_WA(waDisablePerCtxtPreemptionGranularityControl, WaDisablePerCtxtPreemptionGranularityControl);
COPY_WA(waLLCCachingUnsupported, WaLLCCachingUnsupported);
COPY_WA(waUseVAlign16OnTileXYBpp816, WaUseVAlign16OnTileXYBpp816);
#undef COPY_WA
}
bool Wddm::queryAdapterInfo() {
NTSTATUS status = STATUS_UNSUCCESSFUL;
D3DKMT_QUERYADAPTERINFO QueryAdapterInfo = {0};
QueryAdapterInfo.hAdapter = adapter;
QueryAdapterInfo.Type = KMTQAITYPE_UMDRIVERPRIVATE;
QueryAdapterInfo.pPrivateDriverData = adapterInfo;
QueryAdapterInfo.PrivateDriverDataSize = sizeof(ADAPTER_INFO);
status = gdi->queryAdapterInfo(&QueryAdapterInfo);
DEBUG_BREAK_IF(status != STATUS_SUCCESS);
// translate
if (status == STATUS_SUCCESS) {
featureTable.reset(new FeatureTable());
Wddm::setupFeatureTableFromAdapterInfo(featureTable.get(), adapterInfo);
waTable.reset(new WorkaroundTable());
Wddm::setupWorkaroundTableFromAdapterInfo(waTable.get(), adapterInfo);
}
return status == STATUS_SUCCESS;
}
bool Wddm::createPagingQueue() {
NTSTATUS status = STATUS_UNSUCCESSFUL;
D3DKMT_CREATEPAGINGQUEUE CreatePagingQueue = {0};
CreatePagingQueue.hDevice = device;
CreatePagingQueue.Priority = D3DDDI_PAGINGQUEUE_PRIORITY_NORMAL;
status = gdi->createPagingQueue(&CreatePagingQueue);
if (status == STATUS_SUCCESS) {
pagingQueue = CreatePagingQueue.hPagingQueue;
pagingQueueSyncObject = CreatePagingQueue.hSyncObject;
pagingFenceAddress = reinterpret_cast<UINT64 *>(CreatePagingQueue.FenceValueCPUVirtualAddress);
}
return status == STATUS_SUCCESS;
}
bool Wddm::destroyPagingQueue() {
D3DDDI_DESTROYPAGINGQUEUE DestroyPagingQueue = {0};
if (pagingQueue) {
DestroyPagingQueue.hPagingQueue = pagingQueue;
NTSTATUS status = gdi->destroyPagingQueue(&DestroyPagingQueue);
DEBUG_BREAK_IF(status != STATUS_SUCCESS);
pagingQueue = 0;
}
return true;
}
bool Wddm::createDevice() {
NTSTATUS status = STATUS_UNSUCCESSFUL;
D3DKMT_CREATEDEVICE CreateDevice = {{0}};
if (adapter) {
CreateDevice.hAdapter = adapter;
CreateDevice.Flags.LegacyMode = FALSE;
if (DebugManager.flags.ForcePreemptionMode.get() != PreemptionMode::Disabled) {
CreateDevice.Flags.DisableGpuTimeout = readEnablePreemptionRegKey();
}
status = gdi->createDevice(&CreateDevice);
if (status == STATUS_SUCCESS) {
device = CreateDevice.hDevice;
}
}
return status == STATUS_SUCCESS;
}
bool Wddm::destroyDevice() {
NTSTATUS status = STATUS_UNSUCCESSFUL;
D3DKMT_DESTROYDEVICE DestroyDevice = {0};
if (device) {
DestroyDevice.hDevice = device;
status = gdi->destroyDevice(&DestroyDevice);
DEBUG_BREAK_IF(status != STATUS_SUCCESS);
device = 0;
}
return true;
}
bool Wddm::createMonitoredFence() {
NTSTATUS Status;
D3DKMT_CREATESYNCHRONIZATIONOBJECT2 CreateSynchronizationObject = {0};
DEBUG_BREAK_IF(!device);
CreateSynchronizationObject.hDevice = device;
CreateSynchronizationObject.Info.Type = D3DDDI_MONITORED_FENCE;
CreateSynchronizationObject.Info.MonitoredFence.InitialFenceValue = 0;
Status = gdi->createSynchronizationObject2(&CreateSynchronizationObject);
DEBUG_BREAK_IF(STATUS_SUCCESS != Status);
monitoredFence.currentFenceValue = 1;
monitoredFence.fenceHandle = CreateSynchronizationObject.hSyncObject;
monitoredFence.cpuAddress = reinterpret_cast<UINT64 *>(CreateSynchronizationObject.Info.MonitoredFence.FenceValueCPUVirtualAddress);
monitoredFence.lastSubmittedFence = 0;
monitoredFence.gpuAddress = CreateSynchronizationObject.Info.MonitoredFence.FenceValueGPUVirtualAddress;
return Status == STATUS_SUCCESS;
}
bool Wddm::closeAdapter() {
NTSTATUS status = STATUS_UNSUCCESSFUL;
D3DKMT_CLOSEADAPTER CloseAdapter = {0};
CloseAdapter.hAdapter = adapter;
status = gdi->closeAdapter(&CloseAdapter);
DEBUG_BREAK_IF(status != STATUS_SUCCESS);
adapter = 0;
return true;
}
bool Wddm::openAdapter() {
NTSTATUS status = STATUS_UNSUCCESSFUL;
D3DKMT_OPENADAPTERFROMLUID OpenAdapterData = {{0}};
DXGI_ADAPTER_DESC1 OpenAdapterDesc = {{0}};
IDXGIFactory1 *pFactory = nullptr;
IDXGIAdapter1 *pAdapter = nullptr;
DWORD iDevNum = 0;
HRESULT hr = Wddm::createDxgiFactory(__uuidof(IDXGIFactory), (void **)(&pFactory));
if ((hr != S_OK) || (pFactory == nullptr)) {
return false;
}
while (pFactory->EnumAdapters1(iDevNum++, &pAdapter) != DXGI_ERROR_NOT_FOUND) {
hr = pAdapter->GetDesc1(&OpenAdapterDesc);
if (hr == S_OK) {
// Check for adapters that include either "Intel" or "Citrix" (which may
// be virtualizing one of our adapters) in the description
if ((wcsstr(OpenAdapterDesc.Description, L"Intel") != 0) ||
(wcsstr(OpenAdapterDesc.Description, L"Citrix") != 0)) {
break;
}
}
// Release all the non-Intel adapters
pAdapter->Release();
pAdapter = nullptr;
}
OpenAdapterData.AdapterLuid = OpenAdapterDesc.AdapterLuid;
status = gdi->openAdapterFromLuid(&OpenAdapterData);
if (pAdapter != nullptr) {
// If an Intel adapter was found, release it here
pAdapter->Release();
pAdapter = nullptr;
}
if (pFactory != nullptr) {
pFactory->Release();
pFactory = nullptr;
}
if (status == STATUS_SUCCESS) {
adapter = OpenAdapterData.hAdapter;
adapterLuid = OpenAdapterDesc.AdapterLuid;
}
return status == STATUS_SUCCESS;
}
bool Wddm::evict(D3DKMT_HANDLE *handleList, uint32_t numOfHandles, uint64_t &sizeToTrim) {
NTSTATUS status = STATUS_SUCCESS;
D3DKMT_EVICT Evict = {0};
Evict.AllocationList = handleList;
Evict.hDevice = device;
Evict.NumAllocations = numOfHandles;
Evict.NumBytesToTrim = 0;
status = gdi->evict(&Evict);
sizeToTrim = Evict.NumBytesToTrim;
return status == STATUS_SUCCESS;
}
bool Wddm::makeResident(D3DKMT_HANDLE *handles, uint32_t count, bool cantTrimFurther, uint64_t *numberOfBytesToTrim) {
NTSTATUS status = STATUS_SUCCESS;
D3DDDI_MAKERESIDENT makeResident = {0};
UINT priority = 0;
bool success = false;
makeResident.AllocationList = handles;
makeResident.hPagingQueue = pagingQueue;
makeResident.NumAllocations = count;
makeResident.PriorityList = &priority;
makeResident.Flags.CantTrimFurther = cantTrimFurther ? 1 : 0;
makeResident.Flags.MustSucceed = cantTrimFurther ? 1 : 0;
status = gdi->makeResident(&makeResident);
if (status == STATUS_PENDING) {
interlockedMax(currentPagingFenceValue, makeResident.PagingFenceValue);
success = true;
} else if (status == STATUS_SUCCESS) {
success = true;
} else {
DEBUG_BREAK_IF(true);
if (numberOfBytesToTrim != nullptr)
*numberOfBytesToTrim = makeResident.NumBytesToTrim;
UNRECOVERABLE_IF(cantTrimFurther);
}
return success;
}
bool Wddm::evict(OsHandleStorage &osHandles) {
NTSTATUS status = STATUS_SUCCESS;
D3DKMT_EVICT Evict = {0};
auto sizeToTrim = 0uLL;
D3DKMT_HANDLE handles[max_fragments_count] = {0};
for (uint32_t allocationId = 0; allocationId < osHandles.fragmentCount; allocationId++) {
handles[allocationId] = osHandles.fragmentStorageData[allocationId].osHandleStorage->handle;
sizeToTrim += osHandles.fragmentStorageData[allocationId].fragmentSize;
}
Evict.AllocationList = handles;
Evict.hDevice = device;
Evict.NumAllocations = osHandles.fragmentCount;
Evict.NumBytesToTrim = sizeToTrim;
return status == STATUS_SUCCESS;
}
bool Wddm::mapGpuVirtualAddress(WddmAllocation *allocation, void *cpuPtr, uint64_t size, bool allocation32bit, bool use64kbPages) {
return mapGpuVirtualAddressImpl(allocation->gmm, allocation->handle, cpuPtr, size, allocation->gpuPtr, allocation32bit, use64kbPages);
}
bool Wddm::mapGpuVirtualAddress(AllocationStorageData *allocationStorageData, bool allocation32bit, bool use64kbPages) {
return mapGpuVirtualAddressImpl(allocationStorageData->osHandleStorage->gmm,
allocationStorageData->osHandleStorage->handle,
const_cast<void *>(allocationStorageData->cpuPtr),
allocationStorageData->fragmentSize,
allocationStorageData->osHandleStorage->gpuPtr,
allocation32bit, use64kbPages);
}
bool Wddm::mapGpuVirtualAddressImpl(Gmm *gmm, D3DKMT_HANDLE handle, void *cpuPtr, uint64_t size, D3DGPU_VIRTUAL_ADDRESS &gpuPtr, bool allocation32bit, bool use64kbPages) {
NTSTATUS status = STATUS_SUCCESS;
D3DDDI_MAPGPUVIRTUALADDRESS MapGPUVA = {0};
D3DDDIGPUVIRTUALADDRESS_PROTECTION_TYPE protectionType = {{{0}}};
protectionType.Write = TRUE;
MapGPUVA.hPagingQueue = pagingQueue;
MapGPUVA.hAllocation = handle;
MapGPUVA.Protection = protectionType;
MapGPUVA.SizeInPages = size / MemoryConstants::pageSize;
MapGPUVA.OffsetInPages = 0;
if (use64kbPages) {
MapGPUVA.MinimumAddress = adapterInfo->GfxPartition.Standard64KB.Base;
MapGPUVA.MaximumAddress = adapterInfo->GfxPartition.Standard64KB.Limit;
} else {
MapGPUVA.BaseAddress = reinterpret_cast<D3DGPU_VIRTUAL_ADDRESS>(cpuPtr);
MapGPUVA.MinimumAddress = static_cast<D3DGPU_VIRTUAL_ADDRESS>(0x0);
MapGPUVA.MaximumAddress = static_cast<D3DGPU_VIRTUAL_ADDRESS>((sizeof(size_t) == 8) ? 0x7ffffffffff : (D3DGPU_VIRTUAL_ADDRESS)0xffffffff);
if (!cpuPtr) {
MapGPUVA.MinimumAddress = adapterInfo->GfxPartition.Standard.Base;
MapGPUVA.MaximumAddress = adapterInfo->GfxPartition.Standard.Limit;
}
if (allocation32bit) {
MapGPUVA.MinimumAddress = adapterInfo->GfxPartition.Heap32[0].Base;
MapGPUVA.MaximumAddress = adapterInfo->GfxPartition.Heap32[0].Limit;
MapGPUVA.BaseAddress = 0;
}
}
status = gdi->mapGpuVirtualAddress(&MapGPUVA);
gpuPtr = Gmm::canonize(MapGPUVA.VirtualAddress);
if (status == STATUS_PENDING) {
interlockedMax(currentPagingFenceValue, MapGPUVA.PagingFenceValue);
status = STATUS_SUCCESS;
}
if (status != STATUS_SUCCESS) {
DEBUG_BREAK_IF(true);
return false;
}
if (gmm && gmm->isRenderCompressed) {
GMM_DDI_UPDATEAUXTABLE ddiUpdateAuxTable = {};
ddiUpdateAuxTable.BaseGpuVA = gpuPtr;
ddiUpdateAuxTable.BaseResInfo = gmm->gmmResourceInfo->peekHandle();
ddiUpdateAuxTable.DoNotWait = true;
ddiUpdateAuxTable.Map = true;
return updateAuxTable(ddiUpdateAuxTable);
}
return status == STATUS_SUCCESS;
}
bool Wddm::freeGpuVirtualAddres(D3DGPU_VIRTUAL_ADDRESS &gpuPtr, uint64_t size) {
NTSTATUS status = STATUS_SUCCESS;
D3DKMT_FREEGPUVIRTUALADDRESS FreeGPUVA = {0};
FreeGPUVA.hAdapter = adapter;
FreeGPUVA.BaseAddress = gpuPtr;
FreeGPUVA.Size = size;
status = gdi->freeGpuVirtualAddress(&FreeGPUVA);
gpuPtr = static_cast<D3DGPU_VIRTUAL_ADDRESS>(0);
return status == STATUS_SUCCESS;
}
bool Wddm::createAllocation(WddmAllocation *alloc) {
NTSTATUS status = STATUS_SUCCESS;
D3DDDI_ALLOCATIONINFO AllocationInfo = {0};
D3DKMT_CREATEALLOCATION CreateAllocation = {0};
bool success = false;
size_t size;
if (alloc == nullptr)
return false;
size = alloc->getUnderlyingBufferSize();
if (size == 0)
return false;
AllocationInfo.pSystemMem = alloc->getAlignedCpuPtr();
AllocationInfo.pPrivateDriverData = alloc->gmm->gmmResourceInfo->peekHandle();
AllocationInfo.PrivateDriverDataSize = static_cast<unsigned int>(sizeof(GMM_RESOURCE_INFO));
AllocationInfo.Flags.Primary = 0;
CreateAllocation.hGlobalShare = 0;
CreateAllocation.PrivateRuntimeDataSize = 0;
CreateAllocation.PrivateDriverDataSize = 0;
CreateAllocation.Flags.Reserved = 0;
CreateAllocation.NumAllocations = 1;
CreateAllocation.pPrivateRuntimeData = NULL;
CreateAllocation.pPrivateDriverData = NULL;
CreateAllocation.Flags.NonSecure = FALSE;
CreateAllocation.Flags.CreateShared = FALSE;
CreateAllocation.Flags.RestrictSharedAccess = FALSE;
CreateAllocation.Flags.CreateResource = alloc->getAlignedCpuPtr() == 0 ? TRUE : FALSE;
CreateAllocation.pAllocationInfo = &AllocationInfo;
CreateAllocation.hDevice = device;
while (!success) {
status = gdi->createAllocation(&CreateAllocation);
if (status != STATUS_SUCCESS) {
DEBUG_BREAK_IF(true);
break;
}
alloc->handle = AllocationInfo.hAllocation;
success = mapGpuVirtualAddress(alloc, alloc->getAlignedCpuPtr(), size, alloc->is32BitAllocation, false);
if (!success) {
DEBUG_BREAK_IF(true);
break;
}
success = true;
}
return success;
}
bool Wddm::createAllocation64k(WddmAllocation *alloc) {
NTSTATUS status = STATUS_SUCCESS;
D3DDDI_ALLOCATIONINFO AllocationInfo = {0};
D3DKMT_CREATEALLOCATION CreateAllocation = {0};
bool success = false;
AllocationInfo.pSystemMem = 0;
AllocationInfo.pPrivateDriverData = alloc->gmm->gmmResourceInfo->peekHandle();
AllocationInfo.PrivateDriverDataSize = static_cast<unsigned int>(sizeof(GMM_RESOURCE_INFO));
AllocationInfo.Flags.Primary = 0;
CreateAllocation.NumAllocations = 1;
CreateAllocation.pPrivateRuntimeData = NULL;
CreateAllocation.pPrivateDriverData = NULL;
CreateAllocation.Flags.CreateResource = TRUE;
CreateAllocation.pAllocationInfo = &AllocationInfo;
CreateAllocation.hDevice = device;
while (!success) {
status = gdi->createAllocation(&CreateAllocation);
if (status != STATUS_SUCCESS) {
DEBUG_BREAK_IF(true);
break;
}
alloc->handle = AllocationInfo.hAllocation;
success = true;
}
return true;
}
bool Wddm::createAllocationsAndMapGpuVa(OsHandleStorage &osHandles) {
NTSTATUS status = STATUS_SUCCESS;
D3DDDI_ALLOCATIONINFO AllocationInfo[max_fragments_count] = {{0}};
D3DKMT_CREATEALLOCATION CreateAllocation = {0};
bool success = false;
auto allocationCount = 0;
for (unsigned int i = 0; i < max_fragments_count; i++) {
if (!osHandles.fragmentStorageData[i].osHandleStorage) {
break;
}
if (osHandles.fragmentStorageData[i].osHandleStorage->handle == (D3DKMT_HANDLE) nullptr && osHandles.fragmentStorageData[i].fragmentSize) {
AllocationInfo[allocationCount].pPrivateDriverData = osHandles.fragmentStorageData[i].osHandleStorage->gmm->gmmResourceInfo->peekHandle();
auto pSysMem = osHandles.fragmentStorageData[i].cpuPtr;
auto PSysMemFromGmm = osHandles.fragmentStorageData[i].osHandleStorage->gmm->gmmResourceInfo->getSystemMemPointer(CL_TRUE);
DEBUG_BREAK_IF(PSysMemFromGmm != pSysMem);
AllocationInfo[allocationCount].pSystemMem = osHandles.fragmentStorageData[i].cpuPtr;
AllocationInfo[allocationCount].PrivateDriverDataSize = static_cast<unsigned int>(sizeof(GMM_RESOURCE_INFO));
allocationCount++;
}
}
if (allocationCount == 0)
return true;
CreateAllocation.hGlobalShare = 0;
CreateAllocation.PrivateRuntimeDataSize = 0;
CreateAllocation.PrivateDriverDataSize = 0;
CreateAllocation.Flags.Reserved = 0;
CreateAllocation.NumAllocations = allocationCount;
CreateAllocation.pPrivateRuntimeData = NULL;
CreateAllocation.pPrivateDriverData = NULL;
CreateAllocation.Flags.NonSecure = FALSE;
CreateAllocation.Flags.CreateShared = FALSE;
CreateAllocation.Flags.RestrictSharedAccess = FALSE;
CreateAllocation.Flags.CreateResource = FALSE;
CreateAllocation.pAllocationInfo = AllocationInfo;
CreateAllocation.hDevice = device;
while (!success) {
status = gdi->createAllocation(&CreateAllocation);
if (status != STATUS_SUCCESS) {
DBG_LOG(PrintDebugMessages, __FUNCTION__, "status: ", status);
DEBUG_BREAK_IF(true);
break;
}
auto allocationIndex = 0;
for (int i = 0; i < allocationCount; i++) {
while (osHandles.fragmentStorageData[allocationIndex].osHandleStorage->handle) {
allocationIndex++;
}
osHandles.fragmentStorageData[allocationIndex].osHandleStorage->handle = AllocationInfo[i].hAllocation;
success = mapGpuVirtualAddress(&osHandles.fragmentStorageData[allocationIndex], false, false);
allocationIndex++;
}
if (!success) {
DBG_LOG(PrintDebugMessages, __FUNCTION__, "mapGpuVirtualAddress: ", success);
DEBUG_BREAK_IF(true);
break;
}
success = true;
}
return success;
}
bool Wddm::destroyAllocations(D3DKMT_HANDLE *handles, uint32_t allocationCount, uint64_t lastFenceValue, D3DKMT_HANDLE resourceHandle) {
NTSTATUS status = STATUS_SUCCESS;
D3DKMT_DESTROYALLOCATION2 DestroyAllocation = {0};
DEBUG_BREAK_IF(!(allocationCount <= 1 || resourceHandle == 0));
waitFromCpu(lastFenceValue);
DestroyAllocation.hDevice = device;
DestroyAllocation.hResource = resourceHandle;
DestroyAllocation.phAllocationList = handles;
DestroyAllocation.AllocationCount = allocationCount;
DestroyAllocation.Flags.AssumeNotInUse = 1;
status = gdi->destroyAllocation2(&DestroyAllocation);
return status == STATUS_SUCCESS;
}
bool Wddm::openSharedHandle(D3DKMT_HANDLE handle, WddmAllocation *alloc) {
D3DKMT_QUERYRESOURCEINFO QueryResourceInfo = {0};
QueryResourceInfo.hDevice = device;
QueryResourceInfo.hGlobalShare = handle;
auto status = gdi->queryResourceInfo(&QueryResourceInfo);
DEBUG_BREAK_IF(status != STATUS_SUCCESS);
std::unique_ptr<char[]> allocPrivateData(new char[QueryResourceInfo.TotalPrivateDriverDataSize]);
std::unique_ptr<char[]> resPrivateData(new char[QueryResourceInfo.ResourcePrivateDriverDataSize]);
std::unique_ptr<char[]> resPrivateRuntimeData(new char[QueryResourceInfo.PrivateRuntimeDataSize]);
std::unique_ptr<D3DDDI_OPENALLOCATIONINFO[]> allocationInfo(new D3DDDI_OPENALLOCATIONINFO[QueryResourceInfo.NumAllocations]);
D3DKMT_OPENRESOURCE OpenResource = {0};
OpenResource.hDevice = device;
OpenResource.hGlobalShare = handle;
OpenResource.NumAllocations = QueryResourceInfo.NumAllocations;
OpenResource.pOpenAllocationInfo = allocationInfo.get();
OpenResource.pTotalPrivateDriverDataBuffer = allocPrivateData.get();
OpenResource.TotalPrivateDriverDataBufferSize = QueryResourceInfo.TotalPrivateDriverDataSize;
OpenResource.pResourcePrivateDriverData = resPrivateData.get();
OpenResource.ResourcePrivateDriverDataSize = QueryResourceInfo.ResourcePrivateDriverDataSize;
OpenResource.pPrivateRuntimeData = resPrivateRuntimeData.get();
OpenResource.PrivateRuntimeDataSize = QueryResourceInfo.PrivateRuntimeDataSize;
status = gdi->openResource(&OpenResource);
DEBUG_BREAK_IF(status != STATUS_SUCCESS);
alloc->handle = allocationInfo[0].hAllocation;
alloc->resourceHandle = OpenResource.hResource;
alloc->gmm = Gmm::create((PGMM_RESOURCE_INFO)(allocationInfo[0].pPrivateDriverData));
return STATUS_SUCCESS;
}
bool Wddm::openNTHandle(HANDLE handle, WddmAllocation *alloc) {
D3DKMT_QUERYRESOURCEINFOFROMNTHANDLE queryResourceInfoFromNtHandle = {};
queryResourceInfoFromNtHandle.hDevice = device;
queryResourceInfoFromNtHandle.hNtHandle = handle;
auto status = gdi->queryResourceInfoFromNtHandle(&queryResourceInfoFromNtHandle);
DEBUG_BREAK_IF(status != STATUS_SUCCESS);
std::unique_ptr<char[]> allocPrivateData(new char[queryResourceInfoFromNtHandle.TotalPrivateDriverDataSize]);
std::unique_ptr<char[]> resPrivateData(new char[queryResourceInfoFromNtHandle.ResourcePrivateDriverDataSize]);
std::unique_ptr<char[]> resPrivateRuntimeData(new char[queryResourceInfoFromNtHandle.PrivateRuntimeDataSize]);
std::unique_ptr<D3DDDI_OPENALLOCATIONINFO2[]> allocationInfo2(new D3DDDI_OPENALLOCATIONINFO2[queryResourceInfoFromNtHandle.NumAllocations]);
D3DKMT_OPENRESOURCEFROMNTHANDLE openResourceFromNtHandle = {};
openResourceFromNtHandle.hDevice = device;
openResourceFromNtHandle.hNtHandle = handle;
openResourceFromNtHandle.NumAllocations = queryResourceInfoFromNtHandle.NumAllocations;
openResourceFromNtHandle.pOpenAllocationInfo2 = allocationInfo2.get();
openResourceFromNtHandle.pTotalPrivateDriverDataBuffer = allocPrivateData.get();
openResourceFromNtHandle.TotalPrivateDriverDataBufferSize = queryResourceInfoFromNtHandle.TotalPrivateDriverDataSize;
openResourceFromNtHandle.pResourcePrivateDriverData = resPrivateData.get();
openResourceFromNtHandle.ResourcePrivateDriverDataSize = queryResourceInfoFromNtHandle.ResourcePrivateDriverDataSize;
openResourceFromNtHandle.pPrivateRuntimeData = resPrivateRuntimeData.get();
openResourceFromNtHandle.PrivateRuntimeDataSize = queryResourceInfoFromNtHandle.PrivateRuntimeDataSize;
status = gdi->openResourceFromNtHandle(&openResourceFromNtHandle);
DEBUG_BREAK_IF(status != STATUS_SUCCESS);
alloc->handle = allocationInfo2[0].hAllocation;
alloc->resourceHandle = openResourceFromNtHandle.hResource;
alloc->gmm = Gmm::create((PGMM_RESOURCE_INFO)(allocationInfo2[0].pPrivateDriverData));
return STATUS_SUCCESS;
}
void *Wddm::lockResource(WddmAllocation *wddmAllocation) {
NTSTATUS status = STATUS_UNSUCCESSFUL;
D3DKMT_LOCK2 lock2 = {};
lock2.hAllocation = wddmAllocation->handle;
lock2.hDevice = this->device;
status = gdi->lock2(&lock2);
DEBUG_BREAK_IF(status != STATUS_SUCCESS);
return lock2.pData;
}
void Wddm::unlockResource(WddmAllocation *wddmAllocation) {
NTSTATUS status = STATUS_UNSUCCESSFUL;
D3DKMT_UNLOCK2 unlock2 = {};
unlock2.hAllocation = wddmAllocation->handle;
unlock2.hDevice = this->device;
status = gdi->unlock2(&unlock2);
DEBUG_BREAK_IF(status != STATUS_SUCCESS);
}
D3DKMT_HANDLE Wddm::createContext() {
NTSTATUS status = STATUS_UNSUCCESSFUL;
D3DKMT_CREATECONTEXTVIRTUAL CreateContext = {0};
CREATECONTEXT_PVTDATA PrivateData = {{0}};
PrivateData.IsProtectedProcess = FALSE;
PrivateData.IsDwm = FALSE;
PrivateData.ProcessID = GetCurrentProcessId();
PrivateData.GpuVAContext = TRUE;
PrivateData.pHwContextId = &hwContextId;
PrivateData.IsMediaUsage = false;
CreateContext.EngineAffinity = 0;
CreateContext.Flags.NullRendering = (UINT)DebugManager.flags.EnableNullHardware.get();
CreateContext.PrivateDriverDataSize = sizeof(PrivateData);
CreateContext.NodeOrdinal = node;
CreateContext.pPrivateDriverData = &PrivateData;
CreateContext.ClientHint = D3DKMT_CLIENTHINT_OPENGL;
CreateContext.hDevice = device;
status = gdi->createContext(&CreateContext);
if (status == STATUS_SUCCESS) {
return CreateContext.hContext;
}
return static_cast<D3DKMT_HANDLE>(0);
}
bool Wddm::destroyContext(D3DKMT_HANDLE context) {
D3DKMT_DESTROYCONTEXT DestroyContext = {0};
NTSTATUS status = STATUS_UNSUCCESSFUL;
if (context != static_cast<D3DKMT_HANDLE>(0)) {
DestroyContext.hContext = context;
status = gdi->destroyContext(&DestroyContext);
}
return status == STATUS_SUCCESS ? true : false;
}
bool Wddm::submit(void *commandBuffer, size_t size, void *commandHeader) {
D3DKMT_SUBMITCOMMAND SubmitCommand = {0};
NTSTATUS status = STATUS_SUCCESS;
bool success = true;
SubmitCommand.Commands = reinterpret_cast<D3DGPU_VIRTUAL_ADDRESS>(commandBuffer);
SubmitCommand.CommandLength = static_cast<UINT>(size);
SubmitCommand.BroadcastContextCount = 1;
SubmitCommand.BroadcastContext[0] = context;
SubmitCommand.Flags.NullRendering = (UINT)DebugManager.flags.EnableNullHardware.get();
COMMAND_BUFFER_HEADER *pHeader = reinterpret_cast<COMMAND_BUFFER_HEADER *>(commandHeader);
pHeader->MonitorFenceVA = monitoredFence.gpuAddress;
pHeader->MonitorFenceValue = monitoredFence.currentFenceValue;
// Note: Private data should be the CPU VA Address
SubmitCommand.pPrivateDriverData = commandHeader;
SubmitCommand.PrivateDriverDataSize = sizeof(COMMAND_BUFFER_HEADER);
if (currentPagingFenceValue > *pagingFenceAddress) {
success = waitOnGPU();
}
if (success) {
DBG_LOG(ResidencyDebugEnable, "Residency:", __FUNCTION__, "currentFenceValue =", monitoredFence.currentFenceValue);
status = gdi->submitCommand(&SubmitCommand);
if (STATUS_SUCCESS != status) {
success = false;
} else {
monitoredFence.lastSubmittedFence = monitoredFence.currentFenceValue;
monitoredFence.currentFenceValue++;
}
}
UNRECOVERABLE_IF(!success);
getDeviceState();
return success;
}
bool Wddm::getDeviceState() {
#ifdef _DEBUG
D3DKMT_GETDEVICESTATE GetDevState;
memset(&GetDevState, 0, sizeof(GetDevState));
NTSTATUS status = STATUS_SUCCESS;
GetDevState.hDevice = device;
GetDevState.StateType = D3DKMT_DEVICESTATE_EXECUTION;
status = gdi->getDeviceState(&GetDevState);
if (status == STATUS_SUCCESS) {
if (GetDevState.ExecutionState == D3DKMT_DEVICEEXECUTION_ERROR_OUTOFMEMORY) {
DEBUG_BREAK_IF(true);
}
}
#endif
return true;
}
void Wddm::handleCompletion() {
if (monitoredFence.cpuAddress) {
auto *currentTag = monitoredFence.cpuAddress;
while (*currentTag < monitoredFence.currentFenceValue - 1)
;
}
}
unsigned int Wddm::readEnablePreemptionRegKey() {
return static_cast<unsigned int>(registryReader->getSetting("EnablePreemption", 1));
}
bool Wddm::waitOnGPU() {
D3DKMT_WAITFORSYNCHRONIZATIONOBJECTFROMGPU WaitOnGPU = {0};
WaitOnGPU.hContext = context;
WaitOnGPU.ObjectCount = 1;
WaitOnGPU.ObjectHandleArray = &pagingQueueSyncObject;
uint64_t localPagingFenceValue = currentPagingFenceValue;
WaitOnGPU.MonitoredFenceValueArray = &localPagingFenceValue;
NTSTATUS status = gdi->waitForSynchronizationObjectFromGpu(&WaitOnGPU);
return status == STATUS_SUCCESS;
}
bool Wddm::waitFromCpu(uint64_t lastFenceValue) {
NTSTATUS status = STATUS_SUCCESS;
if (lastFenceValue > *monitoredFence.cpuAddress) {
D3DKMT_WAITFORSYNCHRONIZATIONOBJECTFROMCPU waitFromCpu = {0};
waitFromCpu.ObjectCount = 1;
waitFromCpu.ObjectHandleArray = &monitoredFence.fenceHandle;
waitFromCpu.FenceValueArray = &lastFenceValue;
waitFromCpu.hDevice = device;
waitFromCpu.hAsyncEvent = NULL;
status = gdi->waitForSynchronizationObjectFromCpu(&waitFromCpu);
DEBUG_BREAK_IF(status != STATUS_SUCCESS);
}
return status == STATUS_SUCCESS;
}
uint64_t Wddm::getSystemSharedMemory() {
return adapterInfo->SystemSharedMemory;
}
uint64_t Wddm::getMaxApplicationAddress() {
return maximumApplicationAddress;
}
NTSTATUS Wddm::escape(D3DKMT_ESCAPE &escapeCommand) {
escapeCommand.hAdapter = adapter;
return gdi->escape(&escapeCommand);
};
PFND3DKMT_ESCAPE Wddm::getEscapeHandle() const {
return gdi->escape;
}
uint64_t Wddm::getHeap32Base() {
return alignUp(adapterInfo->GfxPartition.Heap32[0].Base, MemoryConstants::pageSize);
}
uint64_t Wddm::getHeap32Size() {
return alignDown(adapterInfo->GfxPartition.Heap32[0].Limit, MemoryConstants::pageSize);
}
void Wddm::registerTrimCallback(PFND3DKMT_TRIMNOTIFICATIONCALLBACK callback, WddmMemoryManager *memoryManager) {
D3DKMT_REGISTERTRIMNOTIFICATION registerTrimNotification;
registerTrimNotification.Callback = callback;
registerTrimNotification.AdapterLuid = this->adapterLuid;
registerTrimNotification.Context = memoryManager;
registerTrimNotification.hDevice = this->device;
NTSTATUS status = gdi->registerTrimNotification(&registerTrimNotification);
if (status == STATUS_SUCCESS) {
trimCallbackHandle = registerTrimNotification.Handle;
}
}
void Wddm::releaseGpuPtr(void *gpuPtr) {
if (gpuPtr) {
auto status = VirtualFree(gpuPtr, 0, MEM_RELEASE);
DEBUG_BREAK_IF(status != 1);
}
}
void Wddm::initPageTableManagerRegisters(LinearStream &stream) {
if (pageTableManager.get() && !pageTableManagerInitialized) {
pageTableManager->initContextTRTableRegister(&stream, GMM_ENGINE_TYPE::ENGINE_TYPE_RCS);
pageTableManager->initContextAuxTableRegister(&stream, GMM_ENGINE_TYPE::ENGINE_TYPE_RCS);
pageTableManagerInitialized = true;
}
}
bool Wddm::updateAuxTable(GMM_DDI_UPDATEAUXTABLE &ddiUpdateAuxTable) {
return pageTableManager->updateAuxTable(&ddiUpdateAuxTable) == GMM_STATUS::GMM_SUCCESS;
}
void Wddm::resetPageTableManager(GmmPageTableMngr *newPageTableManager) {
pageTableManager.reset(newPageTableManager);
}
} // namespace OCLRT

View File

@@ -0,0 +1,233 @@
/*
* Copyright (c) 2017, Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
#pragma once
#include "runtime/os_interface/windows/windows_wrapper.h"
#include "umKmInc/sharedata.h"
#include "runtime/helpers/debug_helpers.h"
#include <d3d9types.h>
#include <d3dkmthk.h>
#include "gfxEscape.h"
#include "runtime/memory_manager/host_ptr_defines.h"
#include "runtime/utilities/debug_settings_reader.h"
#include "runtime/gmm_helper/gmm_lib.h"
#include "runtime/gmm_helper/gmm_helper.h"
#include "runtime/gmm_helper/gmm_memory.h"
#include <memory>
#include <atomic>
namespace OCLRT {
class WddmAllocation;
class WddmMemoryManager;
class Gdi;
class Gmm;
class LinearStream;
class GmmPageTableMngr;
struct FeatureTable;
struct WorkaroundTable;
class Wddm {
private:
struct MonitoredFence {
D3DKMT_HANDLE fenceHandle;
D3DGPU_VIRTUAL_ADDRESS gpuAddress;
volatile uint64_t *cpuAddress;
volatile uint64_t currentFenceValue;
uint64_t lastSubmittedFence;
};
protected:
Wddm();
Wddm(Gdi *gdi);
public:
typedef HRESULT(WINAPI *CreateDXGIFactoryFcn)(REFIID riid, void **ppFactory);
typedef void(WINAPI *GetSystemInfoFcn)(SYSTEM_INFO *pSystemInfo);
virtual ~Wddm();
static Wddm *createWddm(Gdi *gdi = nullptr);
static bool enumAdapters(unsigned int devNum, ADAPTER_INFO *adapterInfo);
static void setupFeatureTableFromAdapterInfo(FeatureTable *table, ADAPTER_INFO *adapterInfo);
static void setupWorkaroundTableFromAdapterInfo(WorkaroundTable *table, ADAPTER_INFO *adapterInfo);
MOCKABLE_VIRTUAL bool evict(D3DKMT_HANDLE *handleList, uint32_t numOfHandles, uint64_t &sizeToTrim);
MOCKABLE_VIRTUAL bool makeResident(D3DKMT_HANDLE *handles, uint32_t count, bool cantTrimFurther, uint64_t *numberOfBytesToTrim);
MOCKABLE_VIRTUAL bool evict(OsHandleStorage &osHandles);
bool mapGpuVirtualAddress(WddmAllocation *allocation, void *cpuPtr, uint64_t size, bool allocation32bit, bool use64kbPages);
bool mapGpuVirtualAddress(AllocationStorageData *allocationStorageData, bool allocation32bit, bool use64kbPages);
MOCKABLE_VIRTUAL D3DKMT_HANDLE createContext();
MOCKABLE_VIRTUAL bool freeGpuVirtualAddres(D3DGPU_VIRTUAL_ADDRESS &gpuPtr, uint64_t size);
MOCKABLE_VIRTUAL bool createAllocation(WddmAllocation *alloc);
MOCKABLE_VIRTUAL bool createAllocation64k(WddmAllocation *alloc);
bool createAllocationsAndMapGpuVa(OsHandleStorage &osHandles);
MOCKABLE_VIRTUAL bool destroyAllocations(D3DKMT_HANDLE *handles, uint32_t allocationCount, uint64_t lastFenceValue, D3DKMT_HANDLE resourceHandle);
bool openSharedHandle(D3DKMT_HANDLE handle, WddmAllocation *alloc);
bool openNTHandle(HANDLE handle, WddmAllocation *alloc);
MOCKABLE_VIRTUAL void *lockResource(WddmAllocation *wddmAllocation);
MOCKABLE_VIRTUAL void unlockResource(WddmAllocation *wddmAllocation);
MOCKABLE_VIRTUAL bool destroyContext(D3DKMT_HANDLE context);
MOCKABLE_VIRTUAL bool queryAdapterInfo();
MOCKABLE_VIRTUAL bool submit(void *commandBuffer, size_t size, void *commandHeader);
MOCKABLE_VIRTUAL bool waitOnGPU();
MOCKABLE_VIRTUAL bool waitFromCpu(uint64_t lastFenceValue);
NTSTATUS escape(D3DKMT_ESCAPE &escapeCommand);
void registerTrimCallback(PFND3DKMT_TRIMNOTIFICATIONCALLBACK callback, WddmMemoryManager *memoryManager);
MOCKABLE_VIRTUAL void releaseGpuPtr(void *gpuPtr);
template <typename GfxFamily>
bool configureDeviceAddressSpace();
template <typename GfxFamily>
bool init() {
bool success = false;
if (gdi != nullptr && gdi->isInitialized() && !initialized) {
do {
success = openAdapter();
if (!success)
break;
success = queryAdapterInfo();
if (!success)
break;
success = createDevice();
if (!success)
break;
success = createPagingQueue();
if (!success)
break;
success = Gmm::initContext(&adapterInfo->GfxPlatform,
featureTable.get(),
waTable.get(),
&adapterInfo->SystemInfo);
if (!success)
break;
success = configureDeviceAddressSpace<GfxFamily>();
if (!success)
break;
context = createContext();
if (context == static_cast<D3DKMT_HANDLE>(0))
break;
success = createMonitoredFence();
if (!success)
break;
initialized = true;
} while (!success);
}
return initialized;
}
bool isInitialized() {
return initialized;
}
GT_SYSTEM_INFO *getGtSysInfo() {
DEBUG_BREAK_IF(!adapterInfo);
return &adapterInfo->SystemInfo;
}
ADAPTER_INFO *getAdapterInfo() {
DEBUG_BREAK_IF(!adapterInfo);
return adapterInfo;
}
MonitoredFence &getMonitoredFence() { return monitoredFence; }
uint64_t getSystemSharedMemory();
uint64_t getMaxApplicationAddress();
D3DKMT_HANDLE getAdapter() const { return adapter; }
D3DKMT_HANDLE getDevice() const { return device; }
D3DKMT_HANDLE getPagingQueue() const { return pagingQueue; }
D3DKMT_HANDLE getPagingQueueSyncObject() const { return pagingQueueSyncObject; }
Gdi *getGdi() const { return gdi; }
PFND3DKMT_ESCAPE getEscapeHandle() const;
uint32_t getHwContextId() const {
return static_cast<uint32_t>(hwContextId);
}
uint64_t getHeap32Base();
uint64_t getHeap32Size();
std::unique_ptr<SettingsReader> registryReader;
void setNode(GPUNODE_ORDINAL node) {
this->node = node;
}
void resetPageTableManager(GmmPageTableMngr *newPageTableManager);
void initPageTableManagerRegisters(LinearStream &stream);
bool updateAuxTable(GMM_DDI_UPDATEAUXTABLE &ddiUpdateAuxTable);
protected:
bool initialized;
bool gdiAllocated;
Gdi *gdi;
D3DKMT_HANDLE adapter;
D3DKMT_HANDLE context;
D3DKMT_HANDLE device;
D3DKMT_HANDLE pagingQueue;
D3DKMT_HANDLE pagingQueueSyncObject;
uint64_t *pagingFenceAddress;
std::atomic<std::uint64_t> currentPagingFenceValue;
MonitoredFence monitoredFence;
ADAPTER_INFO *adapterInfo;
std::unique_ptr<FeatureTable> featureTable;
std::unique_ptr<WorkaroundTable> waTable;
unsigned long hwContextId;
LUID adapterLuid;
void *trimCallbackHandle;
uintptr_t maximumApplicationAddress;
GPUNODE_ORDINAL node;
std::unique_ptr<GmmMemory> gmmMemory;
MOCKABLE_VIRTUAL bool mapGpuVirtualAddressImpl(Gmm *gmm, D3DKMT_HANDLE handle, void *cpuPtr, uint64_t size, D3DGPU_VIRTUAL_ADDRESS &gpuPtr, bool allocation32bit, bool use64kbPages);
MOCKABLE_VIRTUAL bool openAdapter();
bool createDevice();
bool createPagingQueue();
bool destroyPagingQueue();
bool destroyDevice();
bool closeAdapter();
bool createMonitoredFence();
bool getDeviceState();
void handleCompletion();
unsigned int readEnablePreemptionRegKey();
static CreateDXGIFactoryFcn createDxgiFactory;
static GetSystemInfoFcn getSystemInfo;
std::unique_ptr<GmmPageTableMngr> pageTableManager;
bool pageTableManagerInitialized = false;
};
} // namespace OCLRT

View File

@@ -0,0 +1,37 @@
/*
* Copyright (c) 2017, Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
#include "runtime/os_interface/windows/gdi_interface.h"
#include "runtime/os_interface/windows/wddm.h"
namespace OCLRT {
template <typename GfxFamily>
bool Wddm::configureDeviceAddressSpace() {
SYSTEM_INFO sysInfo;
Wddm::getSystemInfo(&sysInfo);
maximumApplicationAddress = reinterpret_cast<uintptr_t>(sysInfo.lpMaximumApplicationAddress);
return gmmMemory->configureDeviceAddressSpace(adapter, device, gdi->escape,
maximumApplicationAddress + 1u,
0, 0, adapterInfo->SkuTable.FtrL3IACoherency, 0, 0);
}
} // namespace OCLRT

View File

@@ -0,0 +1,68 @@
/*
* Copyright (c) 2017, Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
#include "runtime/os_interface/32bit_memory.h"
#include "runtime/helpers/aligned_memory.h"
using namespace OCLRT;
bool OCLRT::is32BitOsAllocatorAvailable = is64bit ? true : false;
class Allocator32bit::OsInternals {
public:
void *allocatedRange;
};
Allocator32bit::Allocator32bit(uint64_t base, uint64_t size) {
this->base = base;
this->size = size;
heapAllocator = std::unique_ptr<HeapAllocator>(new HeapAllocator((void *)base, size));
}
OCLRT::Allocator32bit::Allocator32bit() {
size_t sizeToMap = 100 * 4096;
this->base = (uint64_t)alignedMalloc(sizeToMap, 4096);
osInternals = std::unique_ptr<OsInternals>(new OsInternals);
osInternals.get()->allocatedRange = (void *)((uintptr_t)this->base);
heapAllocator = std::unique_ptr<HeapAllocator>(new HeapAllocator((void *)this->base, sizeToMap));
}
OCLRT::Allocator32bit::~Allocator32bit() {
if (this->osInternals.get() != nullptr) {
alignedFree(this->osInternals->allocatedRange);
}
}
void *Allocator32bit::allocate(size_t &size) {
if (size >= 0xfffff000)
return nullptr;
return this->heapAllocator->allocate(size);
}
int Allocator32bit::free(void *ptr, size_t size) {
this->heapAllocator->free(ptr, size);
return 0;
}
uintptr_t Allocator32bit::getBase() {
return (uintptr_t)base;
}

View File

@@ -0,0 +1,108 @@
/*
* Copyright (c) 2017, Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
#pragma once
#define UMDF_USING_NTSTATUS
#include "runtime/memory_manager/graphics_allocation.h"
#include "runtime/os_interface/windows/windows_wrapper.h"
#include <d3dkmthk.h>
namespace OCLRT {
class Gmm;
struct OsHandle {
D3DKMT_HANDLE handle;
D3DGPU_VIRTUAL_ADDRESS gpuPtr;
Gmm *gmm;
};
const size_t trimListUnusedPosition = (size_t)-1;
class WddmAllocation : public GraphicsAllocation {
public:
// runtime assigned fields
bool cpuPtrAllocated; // flag indicating if cpuPtr is driver-allocated
// OS assigned fields
D3DKMT_HANDLE handle; // set by createAllocation
D3DKMT_HANDLE resourceHandle = 0u; // used by shared resources
D3DGPU_VIRTUAL_ADDRESS gpuPtr; // set by mapGpuVA
WddmAllocation(void *cpuPtrIn, size_t sizeIn, void *alignedCpuPtr, size_t alignedSize)
: GraphicsAllocation(cpuPtrIn, sizeIn),
cpuPtrAllocated(false),
handle(0),
gpuPtr(0),
alignedCpuPtr(alignedCpuPtr),
alignedSize(alignedSize) {
trimListPosition = trimListUnusedPosition;
}
WddmAllocation(void *cpuPtrIn, size_t sizeIn, osHandle sharedHandle) : GraphicsAllocation(cpuPtrIn, sizeIn, sharedHandle),
cpuPtrAllocated(false),
handle(0),
gpuPtr(0),
alignedCpuPtr(nullptr),
alignedSize(sizeIn) {
trimListPosition = trimListUnusedPosition;
}
WddmAllocation(void *alignedCpuPtr, size_t sizeIn)
: WddmAllocation(alignedCpuPtr, sizeIn, alignedCpuPtr, sizeIn) {
}
WddmAllocation() : WddmAllocation(nullptr, 0, nullptr, 0) {
}
void *getAlignedCpuPtr() const {
return this->alignedCpuPtr;
}
void setAlignedCpuPtr(void *ptr) {
this->alignedCpuPtr = ptr;
this->cpuPtr = ptr;
}
size_t getAlignedSize() const {
return this->alignedSize;
}
ResidencyData &getResidencyData() {
return residency;
}
void setTrimCandidateListPosition(size_t position) {
trimListPosition = position;
}
size_t getTrimCandidateListPosition() {
return trimListPosition;
}
protected:
void *alignedCpuPtr;
size_t alignedSize;
ResidencyData residency;
size_t trimListPosition;
};
} // namespace OCLRT

View File

@@ -0,0 +1,39 @@
/*
* Copyright (c) 2017, Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
#include "runtime/os_interface/windows/wddm.h"
#include <dxgi.h>
namespace OCLRT {
Wddm *Wddm::createWddm(Gdi *gdi) {
return new Wddm();
}
Wddm::CreateDXGIFactoryFcn getCreateDxgiFactory() {
return CreateDXGIFactory;
}
Wddm::GetSystemInfoFcn getGetSystemInfo() {
return GetSystemInfo;
}
} // namespace OCLRT

View File

@@ -0,0 +1,62 @@
/*
* Copyright (c) 2017, Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
#pragma once
#include "runtime/command_stream/device_command_stream.h"
struct COMMAND_BUFFER_HEADER_REC;
namespace OCLRT {
class GmmPageTableMngr;
class GraphicsAllocation;
class WddmMemoryManager;
class Wddm;
template <typename GfxFamily>
class WddmCommandStreamReceiver : public DeviceCommandStreamReceiver<GfxFamily> {
typedef DeviceCommandStreamReceiver<GfxFamily> BaseClass;
using CommandStreamReceiverHw<GfxFamily>::CommandStreamReceiver::memoryManager;
public:
WddmCommandStreamReceiver(const HardwareInfo &hwInfoIn, Wddm *wddm);
virtual ~WddmCommandStreamReceiver();
FlushStamp flush(BatchBuffer &batchBuffer, EngineType engineOrdinal, ResidencyContainer *allocationsForResidency) override;
void makeResident(GraphicsAllocation &gfxAllocation) override;
void processResidency(ResidencyContainer *allocationsForResidency) override;
void processEviction() override;
bool waitForFlushStamp(FlushStamp &flushStampToWait) override;
WddmMemoryManager *getMemoryManager();
MemoryManager *createMemoryManager(bool enable64kbPages);
Wddm *peekWddm() {
return wddm;
}
protected:
void initPageTableManagerRegisters(LinearStream &csr) override;
GmmPageTableMngr *createPageTableManager();
Wddm *wddm;
COMMAND_BUFFER_HEADER_REC *commandBufferHeader;
};
} // namespace OCLRT

View File

@@ -0,0 +1,197 @@
/*
* Copyright (c) 2017, Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
// Need to suppress warining 4005 caused by hw_cmds.h and wddm.h order.
// Current order must be preserved due to two versions of igfxfmid.h
#pragma warning(push)
#pragma warning(disable : 4005)
#include "hw_cmds.h"
#include "runtime/command_stream/linear_stream.h"
#include "runtime/mem_obj/mem_obj.h"
#include "runtime/device/device.h"
#include "runtime/os_interface/windows/wddm.h"
#include "runtime/os_interface/windows/wddm_device_command_stream.h"
#include "runtime/helpers/ptr_math.h"
#include "runtime/helpers/translationtable_callbacks.h"
#include "runtime/gmm_helper/page_table_mngr.h"
#pragma warning(pop)
#undef max
#include "runtime/os_interface/windows/wddm_memory_manager.h"
#include "runtime/os_interface/windows/wddm_engine_mapper.h"
#include "runtime/os_interface/windows/os_interface.h"
#include "runtime/os_interface/windows/gdi_interface.h"
namespace OCLRT {
// Initialize COMMAND_BUFFER_HEADER Type PatchList Streamer Perf Tag
DECLARE_COMMAND_BUFFER(CommandBufferHeader, UMD_OCL, FALSE, FALSE, PERFTAG_OCL);
template <typename GfxFamily>
WddmCommandStreamReceiver<GfxFamily>::WddmCommandStreamReceiver(const HardwareInfo &hwInfoIn, Wddm *wddm)
: BaseClass(hwInfoIn) {
this->wddm = wddm;
if (this->wddm == nullptr) {
this->wddm = Wddm::createWddm();
}
int32_t node = DebugManager.flags.NodeOrdinal.get();
if (node == -1) {
node = hwInfoIn.capabilityTable.nodeOrdinal;
}
GPUNODE_ORDINAL nodeOrdinal = GPUNODE_3D;
UNRECOVERABLE_IF(!WddmEngineMapper<GfxFamily>::engineNodeMap(static_cast<EngineType>(node), nodeOrdinal));
this->wddm->setNode(nodeOrdinal);
this->osInterface = std::unique_ptr<OSInterface>(new OSInterface());
this->osInterface.get()->get()->setWddm(this->wddm);
commandBufferHeader = new COMMAND_BUFFER_HEADER;
*commandBufferHeader = CommandBufferHeader;
this->dispatchMode = DispatchMode::BatchedDispatch;
if (DebugManager.flags.CsrDispatchMode.get()) {
this->dispatchMode = (DispatchMode)DebugManager.flags.CsrDispatchMode.get();
}
bool success = this->wddm->init<GfxFamily>();
DEBUG_BREAK_IF(!success);
if (hwInfoIn.capabilityTable.ftrCompression) {
this->wddm->resetPageTableManager(createPageTableManager());
}
}
template <typename GfxFamily>
WddmCommandStreamReceiver<GfxFamily>::~WddmCommandStreamReceiver() {
this->cleanupResources();
if (commandBufferHeader)
delete commandBufferHeader;
}
template <typename GfxFamily>
FlushStamp WddmCommandStreamReceiver<GfxFamily>::flush(BatchBuffer &batchBuffer,
EngineType engineOrdinal, ResidencyContainer *allocationsForResidency) {
DEBUG_BREAK_IF(engineOrdinal != EngineType::ENGINE_RCS);
void *commandStreamAddress = ptrOffset(batchBuffer.commandBufferAllocation->getUnderlyingBuffer(), batchBuffer.startOffset);
bool success = true;
if (this->dispatchMode == DispatchMode::ImmediateDispatch) {
makeResident(*batchBuffer.commandBufferAllocation);
} else {
allocationsForResidency->push_back(batchBuffer.commandBufferAllocation);
batchBuffer.commandBufferAllocation->residencyTaskCount = this->taskCount;
}
this->processResidency(allocationsForResidency);
COMMAND_BUFFER_HEADER *pHeader = reinterpret_cast<COMMAND_BUFFER_HEADER *>(commandBufferHeader);
if (memoryManager->device->getPreemptionMode() != PreemptionMode::Disabled) {
pHeader->NeedsMidBatchPreEmptionSupport = 1u;
} else {
pHeader->NeedsMidBatchPreEmptionSupport = 0u;
}
pHeader->RequiresCoherency = batchBuffer.requiresCoherency;
pHeader->UmdRequestedSliceState = 0;
pHeader->UmdRequestedSubsliceCount = 0;
pHeader->UmdRequestedEUCount = wddm->getGtSysInfo()->EUCount / wddm->getGtSysInfo()->SubSliceCount;
success = wddm->submit(commandStreamAddress, batchBuffer.usedSize - batchBuffer.startOffset, commandBufferHeader);
return wddm->getMonitoredFence().lastSubmittedFence;
}
template <typename GfxFamily>
void WddmCommandStreamReceiver<GfxFamily>::makeResident(GraphicsAllocation &gfxAllocation) {
DBG_LOG(ResidencyDebugEnable, "Residency:", __FUNCTION__, "allocation =", reinterpret_cast<WddmAllocation *>(&gfxAllocation));
if (gfxAllocation.fragmentsStorage.fragmentCount == 0) {
DBG_LOG(ResidencyDebugEnable, "Residency:", __FUNCTION__, "allocation handle =", reinterpret_cast<WddmAllocation *>(&gfxAllocation)->handle);
} else {
for (uint32_t allocationId = 0; allocationId < reinterpret_cast<WddmAllocation *>(&gfxAllocation)->fragmentsStorage.fragmentCount; allocationId++) {
DBG_LOG(ResidencyDebugEnable, "Residency:", __FUNCTION__, "fragment handle =", reinterpret_cast<WddmAllocation *>(&gfxAllocation)->fragmentsStorage.fragmentStorageData[allocationId].osHandleStorage->handle);
}
}
CommandStreamReceiver::makeResident(gfxAllocation);
}
template <typename GfxFamily>
void WddmCommandStreamReceiver<GfxFamily>::processResidency(ResidencyContainer *allocationsForResidency) {
bool success = getMemoryManager()->makeResidentResidencyAllocations(allocationsForResidency);
DEBUG_BREAK_IF(!success);
}
template <typename GfxFamily>
void WddmCommandStreamReceiver<GfxFamily>::processEviction() {
getMemoryManager()->makeNonResidentEvictionAllocations();
getMemoryManager()->clearEvictionAllocations();
}
template <typename GfxFamily>
WddmMemoryManager *WddmCommandStreamReceiver<GfxFamily>::getMemoryManager() {
return (WddmMemoryManager *)CommandStreamReceiver::getMemoryManager();
}
template <typename GfxFamily>
MemoryManager *WddmCommandStreamReceiver<GfxFamily>::createMemoryManager(bool enable64kbPages) {
return memoryManager = new WddmMemoryManager(enable64kbPages, this->wddm);
}
template <typename GfxFamily>
bool WddmCommandStreamReceiver<GfxFamily>::waitForFlushStamp(FlushStamp &flushStampToWait) {
return wddm->waitFromCpu(flushStampToWait);
}
template <typename GfxFamily>
GmmPageTableMngr *WddmCommandStreamReceiver<GfxFamily>::createPageTableManager() {
GMM_DEVICE_CALLBACKS deviceCallbacks = {};
GMM_TRANSLATIONTABLE_CALLBACKS ttCallbacks = {};
auto gdi = wddm->getGdi();
// clang-format off
deviceCallbacks.Adapter = wddm->getAdapter();
deviceCallbacks.hDevice = wddm->getDevice();
deviceCallbacks.PagingQueue = wddm->getPagingQueue();
deviceCallbacks.PagingFence = wddm->getPagingQueueSyncObject();
deviceCallbacks.pfnAllocate = gdi->createAllocation;
deviceCallbacks.pfnDeallocate = gdi->destroyAllocation;
deviceCallbacks.pfnMapGPUVA = gdi->mapGpuVirtualAddress;
deviceCallbacks.pfnMakeResident = gdi->makeResident;
deviceCallbacks.pfnEvict = gdi->evict;
deviceCallbacks.pfnReserveGPUVA = gdi->reserveGpuVirtualAddress;
deviceCallbacks.pfnUpdateGPUVA = gdi->updateGpuVirtualAddress;
deviceCallbacks.pfnWaitFromCpu = gdi->waitForSynchronizationObjectFromCpu;
deviceCallbacks.pfnLock = gdi->lock2;
deviceCallbacks.pfnUnLock = gdi->unlock2;
deviceCallbacks.pfnEscape = gdi->escape;
ttCallbacks.pfWriteL3Adr = TTCallbacks<GfxFamily>::writeL3Address;
// clang-format on
return GmmPageTableMngr::create(&deviceCallbacks, TT_TYPE::TRTT | TT_TYPE::AUXTT, &ttCallbacks);
}
template <typename GfxFamily>
void WddmCommandStreamReceiver<GfxFamily>::initPageTableManagerRegisters(LinearStream &csr) {
this->wddm->initPageTableManagerRegisters(csr);
}
} // namespace OCLRT

View File

@@ -0,0 +1,39 @@
/*
* Copyright (c) 2017, Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
#pragma once
#include "runtime/helpers/engine_node.h"
#include "runtime/helpers/hw_info.h"
#include "runtime/os_interface/windows/wddm.h"
#include <cstdint>
namespace OCLRT {
template <typename gfxFamily>
class WddmEngineMapper {
public:
static bool engineNodeMap(EngineType engineType, GPUNODE_ORDINAL &gpuNode);
};
} // namespace OCLRT

View File

@@ -0,0 +1,741 @@
/*
* Copyright (c) 2017, Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
#include "runtime/helpers/aligned_memory.h"
#include "runtime/helpers/ptr_math.h"
#include "runtime/gmm_helper/gmm_helper.h"
#include "runtime/gmm_helper/resource_info.h"
#include "runtime/helpers/surface_formats.h"
#include "runtime/memory_manager/deferred_deleter.h"
#include "runtime/memory_manager/deferrable_deletion.h"
#include "runtime/os_interface/windows/wddm_memory_manager.h"
#include "runtime/os_interface/windows/wddm_allocation.h"
#include "runtime/os_interface/windows/wddm.h"
#include <algorithm>
#undef max
namespace OCLRT {
WddmMemoryManager::~WddmMemoryManager() {
applyCommonCleanup();
delete this->wddm;
}
WddmMemoryManager::WddmMemoryManager(bool enable64kbPages, Wddm *wddm) : MemoryManager(enable64kbPages), residencyLock(false) {
DEBUG_BREAK_IF(wddm == nullptr);
this->wddm = wddm;
allocator32Bit = std::unique_ptr<Allocator32bit>(new Allocator32bit(wddm->getHeap32Base(), wddm->getHeap32Size()));
wddm->registerTrimCallback(trimCallback, this);
asyncDeleterEnabled = DebugManager.flags.EnableDeferredDeleter.get();
if (asyncDeleterEnabled)
deferredDeleter = createDeferredDeleter();
}
void APIENTRY WddmMemoryManager::trimCallback(_Inout_ D3DKMT_TRIMNOTIFICATION *trimNotification) {
WddmMemoryManager *wddmMemMngr = (WddmMemoryManager *)trimNotification->Context;
DEBUG_BREAK_IF(wddmMemMngr == nullptr);
wddmMemMngr->trimResidency(trimNotification->Flags, trimNotification->NumBytesToTrim);
}
GraphicsAllocation *WddmMemoryManager::allocateGraphicsMemoryForImage(ImageInfo &imgInfo, Gmm *gmm) {
if (!Gmm::allowTiling(*imgInfo.imgDesc)) {
delete gmm;
return allocateGraphicsMemory(imgInfo.size, MemoryConstants::preferredAlignment);
}
WddmAllocation allocation(nullptr, imgInfo.size);
allocation.gmm = gmm;
bool success = wddm->createAllocation(&allocation);
allocation.setGpuAddress(allocation.gpuPtr);
if (success) {
auto *wddmAllocation = new WddmAllocation(allocation);
return wddmAllocation;
}
return nullptr;
}
GraphicsAllocation *WddmMemoryManager::allocateGraphicsMemory64kb(size_t size, size_t alignment, bool forcePin) {
size_t sizeAligned = alignUp(size, MemoryConstants::pageSize64k);
bool success = true;
Gmm *gmm = nullptr;
WddmAllocation allocation(nullptr, sizeAligned, nullptr, sizeAligned);
gmm = Gmm::create(nullptr, sizeAligned);
while (success) {
allocation.gmm = gmm;
success = wddm->createAllocation64k(&allocation);
if (!success)
break;
auto *wddmAllocation = new WddmAllocation(allocation);
auto cpuPtr = lockResource(wddmAllocation);
wddmAllocation->setLocked(true);
wddmAllocation->setAlignedCpuPtr(cpuPtr);
// 64kb map is not needed
wddm->mapGpuVirtualAddress(wddmAllocation, cpuPtr, sizeAligned, false, false);
wddmAllocation->setCpuPtrAndGpuAddress(cpuPtr, (uint64_t)wddmAllocation->gpuPtr);
return wddmAllocation;
}
delete gmm;
return nullptr;
}
GraphicsAllocation *WddmMemoryManager::allocateGraphicsMemory(size_t size, size_t alignment, bool forcePin) {
size_t newAlignment = alignment ? alignUp(alignment, MemoryConstants::pageSize) : MemoryConstants::pageSize;
size_t sizeAligned = size ? alignUp(size, MemoryConstants::pageSize) : MemoryConstants::pageSize;
void *pSysMem = allocateSystemMemory(sizeAligned, newAlignment);
bool success = true;
Gmm *gmm = nullptr;
if (pSysMem == nullptr) {
return nullptr;
}
WddmAllocation allocation(pSysMem, sizeAligned, pSysMem, sizeAligned);
allocation.cpuPtrAllocated = true;
gmm = Gmm::create(pSysMem, sizeAligned);
while (success) {
allocation.gmm = gmm;
success = wddm->createAllocation(&allocation);
if (!success)
break;
auto *wddmAllocation = new WddmAllocation(allocation);
return wddmAllocation;
}
delete gmm;
freeSystemMemory(pSysMem);
return nullptr;
}
GraphicsAllocation *WddmMemoryManager::allocateGraphicsMemory(size_t size, const void *ptrArg) {
void *ptr = const_cast<void *>(ptrArg);
if (ptr == nullptr) {
DEBUG_BREAK_IF(true);
return nullptr;
}
return MemoryManager::allocateGraphicsMemory(size, ptr);
}
GraphicsAllocation *WddmMemoryManager::allocate32BitGraphicsMemory(size_t size, void *ptr) {
GraphicsAllocation *graphicsAllocation = nullptr;
bool success = true;
Gmm *gmm = nullptr;
const void *ptrAligned = nullptr;
size_t sizeAligned = size;
void *pSysMem = nullptr;
size_t offset = 0;
bool cpuPtrAllocated = false;
if (ptr) {
ptrAligned = alignDown(ptr, MemoryConstants::allocationAlignment);
sizeAligned = alignSizeWholePage(ptr, size);
offset = ptrDiff(ptr, ptrAligned);
} else {
sizeAligned = alignUp(size, MemoryConstants::allocationAlignment);
pSysMem = allocateSystemMemory(sizeAligned, MemoryConstants::allocationAlignment);
if (pSysMem == nullptr) {
return nullptr;
}
ptrAligned = pSysMem;
cpuPtrAllocated = true;
}
WddmAllocation allocation((void *)ptrAligned, sizeAligned, (void *)ptrAligned, sizeAligned);
allocation.cpuPtrAllocated = cpuPtrAllocated;
allocation.is32BitAllocation = true;
gmm = Gmm::create(ptrAligned, sizeAligned);
while (success) {
allocation.gmm = gmm;
success = wddm->createAllocation(&allocation);
if (!success)
break;
allocation.setGpuAddress(allocation.gpuPtr);
allocation.allocationOffset = offset;
auto *wddmAllocation = new WddmAllocation(allocation);
graphicsAllocation = wddmAllocation;
graphicsAllocation->is32BitAllocation = true;
graphicsAllocation->gpuBaseAddress = Gmm::canonize(allocator32Bit->getBase());
return graphicsAllocation;
}
delete gmm;
freeSystemMemory(pSysMem);
return nullptr;
}
GraphicsAllocation *WddmMemoryManager::createAllocationFromHandle(osHandle handle, bool requireSpecificBitness, bool ntHandle) {
WddmAllocation allocation(nullptr, 0, handle);
bool is32BitAllocation = false;
if (ntHandle) {
wddm->openNTHandle((HANDLE)((UINT_PTR)handle), &allocation);
} else {
wddm->openSharedHandle(handle, &allocation);
}
// Shared objects are passed without size
size_t size = allocation.gmm->gmmResourceInfo->getSizeAllocation();
allocation.setSize(size);
void *ptr = nullptr;
if (is32bit) {
ptr = (void *)VirtualAlloc(nullptr, size, MEM_RESERVE, PAGE_READWRITE);
} else if (requireSpecificBitness && this->force32bitAllocations) {
is32BitAllocation = true;
allocation.is32BitAllocation = true;
allocation.gpuBaseAddress = Gmm::canonize(allocator32Bit->getBase());
}
wddm->mapGpuVirtualAddress(&allocation, ptr, size, is32BitAllocation, false);
allocation.setGpuAddress(allocation.gpuPtr);
auto *wddmAllocation = new WddmAllocation(allocation);
return wddmAllocation;
}
GraphicsAllocation *WddmMemoryManager::createGraphicsAllocationFromSharedHandle(osHandle handle, bool requireSpecificBitness, bool /*isReused*/) {
return createAllocationFromHandle(handle, requireSpecificBitness, false);
}
GraphicsAllocation *WddmMemoryManager::createGraphicsAllocationFromNTHandle(void *handle) {
return createAllocationFromHandle((osHandle)((UINT_PTR)handle), false, true);
}
void *WddmMemoryManager::lockResource(GraphicsAllocation *graphicsAllocation) {
return wddm->lockResource(static_cast<WddmAllocation *>(graphicsAllocation));
};
void WddmMemoryManager::unlockResource(GraphicsAllocation *graphicsAllocation) {
wddm->unlockResource(static_cast<WddmAllocation *>(graphicsAllocation));
};
void WddmMemoryManager::freeGraphicsMemoryImpl(GraphicsAllocation *gfxAllocation) {
WddmAllocation *input = static_cast<WddmAllocation *>(gfxAllocation);
auto status = validateAllocation(input);
DEBUG_BREAK_IF(!status);
acquireResidencyLock();
if (input->getTrimCandidateListPosition() != trimListUnusedPosition) {
removeFromTrimCandidateList(gfxAllocation, true);
}
releaseResidencyLock();
if (input->gmm) {
if (input->gmm->isRenderCompressed) {
status = unmapAuxVA(input->gmm, input->gpuPtr);
DEBUG_BREAK_IF(!status);
}
delete input->gmm;
}
if (input->peekSharedHandle() == false &&
input->cpuPtrAllocated == false &&
input->fragmentsStorage.fragmentCount > 0) {
cleanGraphicsMemoryCreatedFromHostPtr(gfxAllocation);
} else {
D3DKMT_HANDLE *allocationHandles = nullptr;
uint32_t allocationCount = 0;
D3DKMT_HANDLE resourceHandle = 0;
void *cpuPtr = nullptr;
void *gpuPtr = nullptr;
if (input->peekSharedHandle()) {
resourceHandle = input->resourceHandle;
if (is32bit) {
gpuPtr = (void *)input->gpuPtr;
}
} else {
allocationHandles = &input->handle;
allocationCount = 1;
if (input->cpuPtrAllocated) {
cpuPtr = input->getAlignedCpuPtr();
}
}
if (input->isLocked()) {
unlockResource(input);
input->setLocked(false);
}
status = tryDeferDeletions(allocationHandles, allocationCount, input->getResidencyData().lastFence, resourceHandle, cpuPtr, gpuPtr);
DEBUG_BREAK_IF(!status);
}
delete gfxAllocation;
}
bool WddmMemoryManager::tryDeferDeletions(D3DKMT_HANDLE *handles, uint32_t allocationCount, uint64_t lastFenceValue, D3DKMT_HANDLE resourceHandle, void *cpuPtr, void *gpuPtr) {
bool status = true;
if (deferredDeleter) {
deferredDeleter->deferDeletion(DeferrableDeletion::create(wddm, handles, allocationCount, lastFenceValue, resourceHandle, cpuPtr, gpuPtr));
} else {
status = wddm->destroyAllocations(handles, allocationCount, lastFenceValue, resourceHandle);
::alignedFree(cpuPtr);
wddm->releaseGpuPtr(gpuPtr);
}
return status;
}
bool WddmMemoryManager::validateAllocation(WddmAllocation *alloc) {
if (alloc == nullptr)
return false;
auto size = alloc->getUnderlyingBufferSize();
if (alloc->getGpuAddress() == 0u || size == 0 || (alloc->handle == 0 && alloc->fragmentsStorage.fragmentCount == 0))
return false;
return true;
}
bool WddmMemoryManager::populateOsHandles(OsHandleStorage &handleStorage) {
for (unsigned int i = 0; i < max_fragments_count; i++) {
// If no fragment is present it means it already exists.
if (!handleStorage.fragmentStorageData[i].osHandleStorage && handleStorage.fragmentStorageData[i].cpuPtr) {
handleStorage.fragmentStorageData[i].osHandleStorage = new OsHandle();
handleStorage.fragmentStorageData[i].residency = new ResidencyData();
handleStorage.fragmentStorageData[i].osHandleStorage->gmm = Gmm::create(handleStorage.fragmentStorageData[i].cpuPtr, handleStorage.fragmentStorageData[i].fragmentSize);
hostPtrManager.storeFragment(handleStorage.fragmentStorageData[i]);
}
}
wddm->createAllocationsAndMapGpuVa(handleStorage);
return true;
}
void WddmMemoryManager::cleanOsHandles(OsHandleStorage &handleStorage) {
D3DKMT_HANDLE handles[max_fragments_count] = {0};
auto allocationCount = 0;
uint64_t lastFenceValue = 0;
for (unsigned int i = 0; i < max_fragments_count; i++) {
if (handleStorage.fragmentStorageData[i].freeTheFragment) {
handles[allocationCount] = handleStorage.fragmentStorageData[i].osHandleStorage->handle;
handleStorage.fragmentStorageData[i].residency->resident = false;
allocationCount++;
lastFenceValue = std::max(handleStorage.fragmentStorageData[i].residency->lastFence, lastFenceValue);
}
}
bool success = tryDeferDeletions(handles, allocationCount, lastFenceValue, 0, nullptr, nullptr);
for (unsigned int i = 0; i < max_fragments_count; i++) {
if (handleStorage.fragmentStorageData[i].freeTheFragment) {
if (success) {
handleStorage.fragmentStorageData[i].osHandleStorage->handle = 0;
}
delete handleStorage.fragmentStorageData[i].osHandleStorage->gmm;
delete handleStorage.fragmentStorageData[i].osHandleStorage;
delete handleStorage.fragmentStorageData[i].residency;
}
}
}
GraphicsAllocation *WddmMemoryManager::createGraphicsAllocation(OsHandleStorage &handleStorage, size_t hostPtrSize, const void *hostPtr) {
auto allocation = new WddmAllocation(const_cast<void *>(hostPtr), hostPtrSize, const_cast<void *>(hostPtr), hostPtrSize);
allocation->fragmentsStorage = handleStorage;
return allocation;
}
uint64_t WddmMemoryManager::getSystemSharedMemory() {
return wddm->getSystemSharedMemory();
}
uint64_t WddmMemoryManager::getMaxApplicationAddress() {
return wddm->getMaxApplicationAddress();
}
bool WddmMemoryManager::makeResidentResidencyAllocations(ResidencyContainer *allocationsForResidency) {
auto &residencyAllocations = allocationsForResidency ? *allocationsForResidency : this->residencyAllocations;
size_t residencyCount = residencyAllocations.size();
std::unique_ptr<D3DKMT_HANDLE[]> handlesForResidency(new D3DKMT_HANDLE[residencyCount * max_fragments_count]);
uint32_t totalHandlesCount = 0;
acquireResidencyLock();
DBG_LOG(ResidencyDebugEnable, "Residency:", __FUNCTION__, "currentFenceValue =", wddm->getMonitoredFence().currentFenceValue);
for (uint32_t i = 0; i < residencyCount; i++) {
WddmAllocation *allocation = reinterpret_cast<WddmAllocation *>(residencyAllocations[i]);
bool mainResidency = false;
bool fragmentResidency[3] = {false, false, false};
mainResidency = allocation->getResidencyData().resident;
DBG_LOG(ResidencyDebugEnable, "Residency:", __FUNCTION__, "allocation =", allocation, mainResidency ? "resident" : "not resident");
if (allocation->getTrimCandidateListPosition() != trimListUnusedPosition) {
DBG_LOG(ResidencyDebugEnable, "Residency:", __FUNCTION__, "allocation =", allocation, "on trimCandidateList");
removeFromTrimCandidateList(allocation);
} else {
for (uint32_t allocationId = 0; allocationId < allocation->fragmentsStorage.fragmentCount; allocationId++) {
fragmentResidency[allocationId] = allocation->fragmentsStorage.fragmentStorageData[allocationId].residency->resident;
DBG_LOG(ResidencyDebugEnable, "Residency:", __FUNCTION__, "fragment handle =",
allocation->fragmentsStorage.fragmentStorageData[allocationId].osHandleStorage->handle,
fragmentResidency[allocationId] ? "resident" : "not resident");
}
}
if (allocation->fragmentsStorage.fragmentCount == 0) {
if (!mainResidency)
handlesForResidency[totalHandlesCount++] = allocation->handle;
} else {
for (uint32_t allocationId = 0; allocationId < allocation->fragmentsStorage.fragmentCount; allocationId++) {
if (!fragmentResidency[allocationId])
handlesForResidency[totalHandlesCount++] = allocation->fragmentsStorage.fragmentStorageData[allocationId].osHandleStorage->handle;
}
}
}
bool result = true;
if (totalHandlesCount) {
uint64_t bytesToTrim = 0;
while ((result = wddm->makeResident(handlesForResidency.get(), totalHandlesCount, false, &bytesToTrim)) == false) {
this->memoryBudgetExhausted = true;
bool trimmingDone = trimResidencyToBudget(bytesToTrim);
bool cantTrimFurther = !trimmingDone;
if (cantTrimFurther) {
result = wddm->makeResident(handlesForResidency.get(), totalHandlesCount, true, &bytesToTrim);
break;
}
}
}
if (result == true) {
for (uint32_t i = 0; i < residencyCount; i++) {
WddmAllocation *allocation = reinterpret_cast<WddmAllocation *>(residencyAllocations[i]);
// Update fence value not to early destroy / evict allocation
allocation->getResidencyData().lastFence = wddm->getMonitoredFence().currentFenceValue;
allocation->getResidencyData().resident = true;
for (uint32_t allocationId = 0; allocationId < allocation->fragmentsStorage.fragmentCount; allocationId++) {
allocation->fragmentsStorage.fragmentStorageData[allocationId].residency->resident = true;
// Update fence value not to remove the fragment referenced by different GA in trimming callback
allocation->fragmentsStorage.fragmentStorageData[allocationId].residency->lastFence = wddm->getMonitoredFence().currentFenceValue;
}
}
}
releaseResidencyLock();
return result;
}
void WddmMemoryManager::makeNonResidentEvictionAllocations() {
acquireResidencyLock();
size_t residencyCount = evictionAllocations.size();
for (uint32_t i = 0; i < residencyCount; i++) {
WddmAllocation *allocation = reinterpret_cast<WddmAllocation *>(evictionAllocations[i]);
addToTrimCandidateList(allocation);
}
releaseResidencyLock();
}
void WddmMemoryManager::removeFromTrimCandidateList(GraphicsAllocation *allocation, bool compactList) {
WddmAllocation *wddmAllocation = (WddmAllocation *)allocation;
size_t position = wddmAllocation->getTrimCandidateListPosition();
DEBUG_BREAK_IF(!(trimCandidatesCount > (trimCandidatesCount - 1)));
DEBUG_BREAK_IF(trimCandidatesCount > trimCandidateList.size());
trimCandidatesCount--;
trimCandidateList[position] = nullptr;
checkTrimCandidateCount();
if (position == trimCandidateList.size() - 1) {
size_t erasePosition = position;
if (position == 0) {
trimCandidateList.resize(0);
} else {
while (trimCandidateList[erasePosition] == nullptr && erasePosition > 0) {
erasePosition--;
}
size_t sizeRemaining = erasePosition + 1;
if (erasePosition == 0 && trimCandidateList[erasePosition] == nullptr) {
sizeRemaining = 0;
}
trimCandidateList.resize(sizeRemaining);
}
}
wddmAllocation->setTrimCandidateListPosition(trimListUnusedPosition);
if (compactList && checkTrimCandidateListCompaction()) {
compactTrimCandidateList();
}
checkTrimCandidateCount();
}
void WddmMemoryManager::addToTrimCandidateList(GraphicsAllocation *allocation) {
WddmAllocation *wddmAllocation = (WddmAllocation *)allocation;
size_t position = trimCandidateList.size();
DEBUG_BREAK_IF(trimCandidatesCount > trimCandidateList.size());
if (wddmAllocation->getTrimCandidateListPosition() == trimListUnusedPosition) {
trimCandidatesCount++;
trimCandidateList.push_back(allocation);
wddmAllocation->setTrimCandidateListPosition(position);
}
checkTrimCandidateCount();
}
void WddmMemoryManager::compactTrimCandidateList() {
size_t size = trimCandidateList.size();
size_t freePosition = 0;
if (size == 0 || size == trimCandidatesCount) {
return;
}
DEBUG_BREAK_IF(!(trimCandidateList[size - 1] != nullptr));
uint32_t previousCount = trimCandidatesCount;
DEBUG_BREAK_IF(trimCandidatesCount > trimCandidateList.size());
while (freePosition < trimCandidatesCount && trimCandidateList[freePosition] != nullptr)
freePosition++;
for (uint32_t i = 1; i < size; i++) {
if (trimCandidateList[i] != nullptr && freePosition < i) {
trimCandidateList[freePosition] = trimCandidateList[i];
trimCandidateList[i] = nullptr;
((WddmAllocation *)trimCandidateList[freePosition])->setTrimCandidateListPosition(freePosition);
freePosition++;
// Last element was moved, erase elements from freePosition
if (i == size - 1) {
trimCandidateList.resize(freePosition);
}
}
}
DEBUG_BREAK_IF(trimCandidatesCount > trimCandidateList.size());
DEBUG_BREAK_IF(trimCandidatesCount != previousCount);
checkTrimCandidateCount();
}
void WddmMemoryManager::trimResidency(D3DDDI_TRIMRESIDENCYSET_FLAGS flags, uint64_t bytes) {
if (flags.PeriodicTrim) {
bool periodicTrimDone = false;
D3DKMT_HANDLE fragmentEvictHandles[3] = {0};
uint64_t sizeToTrim = 0;
acquireResidencyLock();
size_t size = trimCandidateList.size();
WddmAllocation *wddmAllocation = nullptr;
while ((wddmAllocation = getTrimCandidateHead()) != nullptr) {
DBG_LOG(ResidencyDebugEnable, "Residency:", __FUNCTION__, "lastPeriodicTrimFenceValue = ", lastPeriodicTrimFenceValue);
// allocation was not used from last periodic trim
if ((wddmAllocation)->getResidencyData().lastFence <= lastPeriodicTrimFenceValue) {
DBG_LOG(ResidencyDebugEnable, "Residency:", __FUNCTION__, "allocation: handle =", wddmAllocation->handle, "lastFence =", (wddmAllocation)->getResidencyData().lastFence);
size_t fragmentsSizeToEvict = 0;
uint32_t fragmentsToEvict = 0;
if (wddmAllocation->fragmentsStorage.fragmentCount == 0) {
DBG_LOG(ResidencyDebugEnable, "Residency:", __FUNCTION__, "Evict allocation: handle =", wddmAllocation->handle, "lastFence =", (wddmAllocation)->getResidencyData().lastFence);
wddm->evict(&wddmAllocation->handle, 1, sizeToTrim);
}
for (uint32_t allocationId = 0; allocationId < wddmAllocation->fragmentsStorage.fragmentCount; allocationId++) {
if (wddmAllocation->fragmentsStorage.fragmentStorageData[allocationId].residency->lastFence <= lastPeriodicTrimFenceValue) {
DBG_LOG(ResidencyDebugEnable, "Residency:", __FUNCTION__, "Evict fragment: handle =", wddmAllocation->fragmentsStorage.fragmentStorageData[allocationId].osHandleStorage->handle, "lastFence =", wddmAllocation->fragmentsStorage.fragmentStorageData[allocationId].residency->lastFence);
fragmentEvictHandles[fragmentsToEvict++] = wddmAllocation->fragmentsStorage.fragmentStorageData[allocationId].osHandleStorage->handle;
wddmAllocation->fragmentsStorage.fragmentStorageData[allocationId].residency->resident = false;
}
}
if (fragmentsToEvict != 0) {
wddm->evict((D3DKMT_HANDLE *)fragmentEvictHandles, fragmentsToEvict, sizeToTrim);
}
wddmAllocation->getResidencyData().resident = false;
removeFromTrimCandidateList(wddmAllocation);
} else {
periodicTrimDone = true;
break;
}
}
if (checkTrimCandidateListCompaction()) {
compactTrimCandidateList();
}
releaseResidencyLock();
}
if (flags.TrimToBudget) {
acquireResidencyLock();
trimResidencyToBudget(bytes);
releaseResidencyLock();
}
if (flags.PeriodicTrim || flags.RestartPeriodicTrim) {
lastPeriodicTrimFenceValue = *wddm->getMonitoredFence().cpuAddress;
DBG_LOG(ResidencyDebugEnable, "Residency:", __FUNCTION__, "updated lastPeriodicTrimFenceValue =", lastPeriodicTrimFenceValue);
}
}
void WddmMemoryManager::checkTrimCandidateCount() {
if (DebugManager.flags.ResidencyDebugEnable.get()) {
uint32_t sum = 0;
for (size_t i = 0; i < trimCandidateList.size(); i++) {
if (trimCandidateList[i] != nullptr) {
sum++;
}
}
DEBUG_BREAK_IF(sum != trimCandidatesCount);
}
}
bool WddmMemoryManager::checkTrimCandidateListCompaction() {
if (2 * trimCandidatesCount <= trimCandidateList.size()) {
return true;
}
return false;
}
bool WddmMemoryManager::trimResidencyToBudget(uint64_t bytes) {
bool trimToBudgetDone = false;
D3DKMT_HANDLE fragmentEvictHandles[3] = {0};
uint64_t numberOfBytesToTrim = bytes;
WddmAllocation *wddmAllocation = nullptr;
trimToBudgetDone = (numberOfBytesToTrim == 0);
while (!trimToBudgetDone) {
uint64_t lastFence = 0;
wddmAllocation = getTrimCandidateHead();
if (wddmAllocation == nullptr) {
break;
}
lastFence = wddmAllocation->getResidencyData().lastFence;
if (lastFence <= wddm->getMonitoredFence().lastSubmittedFence) {
uint32_t fragmentsToEvict = 0;
uint64_t sizeEvicted = 0;
uint64_t sizeToTrim = 0;
if (lastFence > *wddm->getMonitoredFence().cpuAddress) {
wddm->waitFromCpu(lastFence);
}
if (wddmAllocation->fragmentsStorage.fragmentCount == 0) {
wddm->evict(&wddmAllocation->handle, 1, sizeToTrim);
sizeEvicted = wddmAllocation->getUnderlyingBufferSize();
} else {
for (uint32_t allocationId = 0; allocationId < wddmAllocation->fragmentsStorage.fragmentCount; allocationId++) {
if (wddmAllocation->fragmentsStorage.fragmentStorageData[allocationId].residency->lastFence <= wddm->getMonitoredFence().lastSubmittedFence) {
fragmentEvictHandles[fragmentsToEvict++] = wddmAllocation->fragmentsStorage.fragmentStorageData[allocationId].osHandleStorage->handle;
}
}
if (fragmentsToEvict != 0) {
wddm->evict((D3DKMT_HANDLE *)fragmentEvictHandles, fragmentsToEvict, sizeToTrim);
for (uint32_t allocationId = 0; allocationId < wddmAllocation->fragmentsStorage.fragmentCount; allocationId++) {
if (wddmAllocation->fragmentsStorage.fragmentStorageData[allocationId].residency->lastFence <= wddm->getMonitoredFence().lastSubmittedFence) {
wddmAllocation->fragmentsStorage.fragmentStorageData[allocationId].residency->resident = false;
sizeEvicted += wddmAllocation->fragmentsStorage.fragmentStorageData[allocationId].fragmentSize;
}
}
}
}
if (sizeEvicted >= numberOfBytesToTrim) {
numberOfBytesToTrim = 0;
} else {
numberOfBytesToTrim -= sizeEvicted;
}
wddmAllocation->getResidencyData().resident = false;
removeFromTrimCandidateList(wddmAllocation);
trimToBudgetDone = (numberOfBytesToTrim == 0);
} else {
trimToBudgetDone = true;
}
}
if (bytes > numberOfBytesToTrim && checkTrimCandidateListCompaction()) {
compactTrimCandidateList();
}
return numberOfBytesToTrim == 0;
}
bool WddmMemoryManager::unmapAuxVA(Gmm *gmm, D3DGPU_VIRTUAL_ADDRESS &gpuVA) {
GMM_DDI_UPDATEAUXTABLE ddiUpdateAuxTable = {};
ddiUpdateAuxTable.BaseGpuVA = gpuVA;
ddiUpdateAuxTable.BaseResInfo = gmm->gmmResourceInfo->peekHandle();
ddiUpdateAuxTable.DoNotWait = true;
ddiUpdateAuxTable.Map = false;
return wddm->updateAuxTable(ddiUpdateAuxTable);
}
} // namespace OCLRT

View File

@@ -0,0 +1,119 @@
/*
* Copyright (c) 2017, Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
#pragma once
#include "runtime/helpers/aligned_memory.h"
#include "runtime/memory_manager/memory_manager.h"
#include "runtime/os_interface/windows/wddm_allocation.h"
#include "runtime/os_interface/windows/windows_wrapper.h"
#include <d3dkmthk.h>
#include <map>
#include <mutex>
#include <vector>
namespace OCLRT {
class Gmm;
class Wddm;
class WddmMemoryManager : public MemoryManager {
public:
using MemoryManager::allocateGraphicsMemory;
using MemoryManager::createGraphicsAllocationFromSharedHandle;
~WddmMemoryManager();
WddmMemoryManager(bool enable64kbPages, Wddm *wddm);
void freeGraphicsMemoryImpl(GraphicsAllocation *gfxAllocation) override;
GraphicsAllocation *allocateGraphicsMemory64kb(size_t size, size_t alignment, bool forcePin) override;
GraphicsAllocation *allocateGraphicsMemory(size_t size, size_t alignment, bool forcePin) override;
GraphicsAllocation *allocateGraphicsMemory(size_t size, const void *ptr) override;
GraphicsAllocation *allocate32BitGraphicsMemory(size_t size, void *ptr) override;
GraphicsAllocation *createGraphicsAllocationFromSharedHandle(osHandle handle, bool requireSpecificBitness, bool reuseBO) override;
GraphicsAllocation *createGraphicsAllocationFromNTHandle(void *handle) override;
GraphicsAllocation *allocateGraphicsMemoryForImage(ImageInfo &imgInfo, Gmm *gmm) override;
void *lockResource(GraphicsAllocation *graphicsAllocation) override;
void unlockResource(GraphicsAllocation *graphicsAllocation) override;
bool makeResidentResidencyAllocations(ResidencyContainer *allocationsForResidency);
void makeNonResidentEvictionAllocations();
bool populateOsHandles(OsHandleStorage &handleStorage) override;
void cleanOsHandles(OsHandleStorage &handleStorage) override;
GraphicsAllocation *createGraphicsAllocation(OsHandleStorage &handleStorage, size_t hostPtrSize, const void *hostPtr) override;
static const D3DGPU_VIRTUAL_ADDRESS minimumAddress = static_cast<D3DGPU_VIRTUAL_ADDRESS>(0x0);
static const D3DGPU_VIRTUAL_ADDRESS maximumAddress = static_cast<D3DGPU_VIRTUAL_ADDRESS>((sizeof(size_t) == 8) ? 0x7ffffffffff : (D3DGPU_VIRTUAL_ADDRESS)0xffffffff);
uint64_t getSystemSharedMemory() override;
uint64_t getMaxApplicationAddress() override;
static void APIENTRY trimCallback(_Inout_ D3DKMT_TRIMNOTIFICATION *trimNotification);
void acquireResidencyLock() {
bool previousLockValue = false;
while (!residencyLock.compare_exchange_weak(previousLockValue, true))
previousLockValue = false;
}
void releaseResidencyLock() {
residencyLock = false;
}
bool tryDeferDeletions(D3DKMT_HANDLE *handles, uint32_t allocationCount, uint64_t lastFenceValue, D3DKMT_HANDLE resourceHandle, void *cpuPtr, void *gpuPtr);
bool isMemoryBudgetExhausted() const override { return memoryBudgetExhausted; }
protected:
GraphicsAllocation *createAllocationFromHandle(osHandle handle, bool requireSpecificBitness, bool ntHandle);
WddmAllocation *getTrimCandidateHead() {
uint32_t i = 0;
size_t size = trimCandidateList.size();
if (size == 0) {
return nullptr;
}
while ((trimCandidateList[i] == nullptr) && (i < size))
i++;
return (WddmAllocation *)trimCandidateList[i];
}
void removeFromTrimCandidateList(GraphicsAllocation *allocation, bool compactList = false);
void addToTrimCandidateList(GraphicsAllocation *allocation);
void compactTrimCandidateList();
void trimResidency(D3DDDI_TRIMRESIDENCYSET_FLAGS flags, uint64_t bytes);
bool trimResidencyToBudget(uint64_t bytes);
static bool validateAllocation(WddmAllocation *alloc);
bool checkTrimCandidateListCompaction();
void checkTrimCandidateCount();
ResidencyContainer trimCandidateList;
std::mutex trimCandidateListMutex;
std::atomic<bool> residencyLock;
uint64_t lastPeriodicTrimFenceValue = 0;
uint32_t trimCandidatesCount = 0;
bool memoryBudgetExhausted = false;
bool unmapAuxVA(Gmm *gmm, D3DGPU_VIRTUAL_ADDRESS &gpuVA);
private:
Wddm *wddm;
};
} // namespace OCLRT

View File

@@ -0,0 +1,30 @@
/*
* Copyright (c) 2017, Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
#include "runtime/os_interface/windows/windows_inc.h"
#include "runtime/utilities/perf_profiler.h"
namespace Os {
const char *fileSeparator = "/";
}
namespace OCLRT {
__declspec(thread) PerfProfiler *gPerfProfiler = nullptr;
}

View File

@@ -0,0 +1,40 @@
/*
* Copyright (c) 2017, Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
#pragma once
#if defined(_WIN32)
#define PATH_SEPARATOR '\\'
namespace OCLRT {
class PerfProfiler;
typedef unsigned long long FlushStamp; // monitored fence value
extern __declspec(thread) PerfProfiler *gPerfProfiler;
};
// For now we need to keep this file clean of OS specific #includes.
// Only issues to address portability should be covered here.
namespace Os {
// OS GDI name
extern const char *gdiDllName;
};
#endif // _WIN32

View File

@@ -0,0 +1,30 @@
/*
* Copyright (c) 2017, Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
#pragma once
#include <Windows.h>
#ifndef NTSTATUS
#define NTSTATUS LONG
#endif
// There is a conflict with max/min defined as macro in windows headers with std::max/std::min
#undef min
#undef max