mirror of
https://github.com/intel/compute-runtime.git
synced 2025-12-22 10:17:01 +08:00
Add support for GT-Pin Callbacks [3/n]
Change-Id: Iea4b49efc9a666fde310ece15a9c69686d22f627
This commit is contained in:
2
Jenkinsfile
vendored
2
Jenkinsfile
vendored
@@ -2,4 +2,4 @@
|
|||||||
neoDependenciesRev='735095-769'
|
neoDependenciesRev='735095-769'
|
||||||
strategy='EQUAL'
|
strategy='EQUAL'
|
||||||
allowedF=43
|
allowedF=43
|
||||||
allowedCD=341
|
allowedCD=340
|
||||||
|
|||||||
@@ -359,7 +359,6 @@ cl_int CL_API_CALL clReleaseContext(cl_context context) {
|
|||||||
Context *pContext = castToObject<Context>(context);
|
Context *pContext = castToObject<Context>(context);
|
||||||
if (pContext) {
|
if (pContext) {
|
||||||
pContext->release();
|
pContext->release();
|
||||||
gtpinNotifyContextDestroy(context);
|
|
||||||
return CL_SUCCESS;
|
return CL_SUCCESS;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -1294,7 +1293,6 @@ cl_kernel CL_API_CALL clCreateKernel(cl_program clProgram,
|
|||||||
Program *pProgram = nullptr;
|
Program *pProgram = nullptr;
|
||||||
cl_kernel kernel = nullptr;
|
cl_kernel kernel = nullptr;
|
||||||
cl_int retVal = CL_SUCCESS;
|
cl_int retVal = CL_SUCCESS;
|
||||||
|
|
||||||
DBG_LOG_INPUTS("clProgram", clProgram, "kernelName", kernelName);
|
DBG_LOG_INPUTS("clProgram", clProgram, "kernelName", kernelName);
|
||||||
|
|
||||||
do {
|
do {
|
||||||
@@ -1350,6 +1348,9 @@ cl_int CL_API_CALL clCreateKernelsInProgram(cl_program clProgram,
|
|||||||
program,
|
program,
|
||||||
*kernelInfo,
|
*kernelInfo,
|
||||||
nullptr);
|
nullptr);
|
||||||
|
if (kernels[ordinal] != nullptr) {
|
||||||
|
gtpinNotifyKernelCreate(kernels[ordinal]);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -2559,6 +2560,11 @@ cl_int CL_API_CALL clEnqueueNDRangeKernel(cl_command_queue commandQueue,
|
|||||||
return retVal;
|
return retVal;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
auto pKernel = castToObjectOrAbort<Kernel>(kernel);
|
||||||
|
TakeOwnershipWrapper<Kernel> kernelOwnership(*pKernel, gtpinIsGTPinInitialized());
|
||||||
|
|
||||||
|
gtpinNotifyKernelSubmit(kernel, pCommandQueue);
|
||||||
|
|
||||||
retVal = pCommandQueue->enqueueKernel(
|
retVal = pCommandQueue->enqueueKernel(
|
||||||
kernel,
|
kernel,
|
||||||
workDim,
|
workDim,
|
||||||
@@ -3767,6 +3773,9 @@ cl_kernel CL_API_CALL clCloneKernel(cl_kernel sourceKernel,
|
|||||||
if (errcodeRet) {
|
if (errcodeRet) {
|
||||||
*errcodeRet = retVal;
|
*errcodeRet = retVal;
|
||||||
}
|
}
|
||||||
|
if (pClonedKernel != nullptr) {
|
||||||
|
gtpinNotifyKernelCreate(pClonedKernel);
|
||||||
|
}
|
||||||
|
|
||||||
return pClonedKernel;
|
return pClonedKernel;
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -28,6 +28,7 @@
|
|||||||
#include "runtime/device_queue/device_queue.h"
|
#include "runtime/device_queue/device_queue.h"
|
||||||
#include "runtime/event/event.h"
|
#include "runtime/event/event.h"
|
||||||
#include "runtime/event/event_builder.h"
|
#include "runtime/event/event_builder.h"
|
||||||
|
#include "runtime/gtpin/gtpin_notify.h"
|
||||||
#include "runtime/helpers/aligned_memory.h"
|
#include "runtime/helpers/aligned_memory.h"
|
||||||
#include "runtime/helpers/array_count.h"
|
#include "runtime/helpers/array_count.h"
|
||||||
#include "runtime/helpers/get_info.h"
|
#include "runtime/helpers/get_info.h"
|
||||||
|
|||||||
@@ -27,6 +27,7 @@
|
|||||||
#include "runtime/command_queue/dispatch_walker.h"
|
#include "runtime/command_queue/dispatch_walker.h"
|
||||||
#include "runtime/command_stream/command_stream_receiver.h"
|
#include "runtime/command_stream/command_stream_receiver.h"
|
||||||
#include "runtime/event/event_builder.h"
|
#include "runtime/event/event_builder.h"
|
||||||
|
#include "runtime/gtpin/gtpin_notify.h"
|
||||||
#include "runtime/helpers/kernel_commands.h"
|
#include "runtime/helpers/kernel_commands.h"
|
||||||
#include "runtime/helpers/dispatch_info_builder.h"
|
#include "runtime/helpers/dispatch_info_builder.h"
|
||||||
#include "runtime/mem_obj/buffer.h"
|
#include "runtime/mem_obj/buffer.h"
|
||||||
@@ -549,6 +550,8 @@ CompletionStamp CommandQueueHw<GfxFamily>::enqueueNonBlocked(
|
|||||||
|
|
||||||
DEBUG_BREAK_IF(taskLevel >= Event::eventNotReady);
|
DEBUG_BREAK_IF(taskLevel >= Event::eventNotReady);
|
||||||
|
|
||||||
|
gtpinNotifyPreFlushTask(this);
|
||||||
|
|
||||||
CompletionStamp completionStamp = commandStreamReceiver.flushTask(
|
CompletionStamp completionStamp = commandStreamReceiver.flushTask(
|
||||||
commandStream,
|
commandStream,
|
||||||
commandStreamStart,
|
commandStreamStart,
|
||||||
|
|||||||
@@ -24,6 +24,7 @@
|
|||||||
#include "runtime/command_stream/command_stream_receiver.h"
|
#include "runtime/command_stream/command_stream_receiver.h"
|
||||||
#include "runtime/command_stream/preemption.h"
|
#include "runtime/command_stream/preemption.h"
|
||||||
#include "runtime/device/device.h"
|
#include "runtime/device/device.h"
|
||||||
|
#include "runtime/gtpin/gtpin_notify.h"
|
||||||
#include "runtime/memory_manager/memory_manager.h"
|
#include "runtime/memory_manager/memory_manager.h"
|
||||||
#include "runtime/helpers/cache_policy.h"
|
#include "runtime/helpers/cache_policy.h"
|
||||||
#include "runtime/os_interface/os_interface.h"
|
#include "runtime/os_interface/os_interface.h"
|
||||||
@@ -192,6 +193,7 @@ bool CommandStreamReceiver::waitForCompletionWithTimeout(bool enableTimeout, int
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (*getTagAddress() >= taskCountToWait) {
|
if (*getTagAddress() >= taskCountToWait) {
|
||||||
|
gtpinNotifyTaskCompletion(taskCountToWait);
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
return false;
|
return false;
|
||||||
|
|||||||
@@ -23,6 +23,7 @@
|
|||||||
#include "runtime/command_stream/command_stream_receiver_hw.h"
|
#include "runtime/command_stream/command_stream_receiver_hw.h"
|
||||||
#include "runtime/command_stream/linear_stream.h"
|
#include "runtime/command_stream/linear_stream.h"
|
||||||
#include "runtime/device/device.h"
|
#include "runtime/device/device.h"
|
||||||
|
#include "runtime/gtpin/gtpin_notify.h"
|
||||||
#include "runtime/helpers/cache_policy.h"
|
#include "runtime/helpers/cache_policy.h"
|
||||||
#include "runtime/helpers/preamble.h"
|
#include "runtime/helpers/preamble.h"
|
||||||
#include "runtime/helpers/ptr_math.h"
|
#include "runtime/helpers/ptr_math.h"
|
||||||
@@ -377,6 +378,9 @@ CompletionStamp CommandStreamReceiverHw<GfxFamily>::flushTask(
|
|||||||
engineType};
|
engineType};
|
||||||
|
|
||||||
this->taskLevel += levelClosed ? 1 : 0;
|
this->taskLevel += levelClosed ? 1 : 0;
|
||||||
|
|
||||||
|
gtpinNotifyFlushTask(completionStamp.taskCount);
|
||||||
|
|
||||||
return completionStamp;
|
return completionStamp;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -25,6 +25,7 @@
|
|||||||
#include "runtime/device/device.h"
|
#include "runtime/device/device.h"
|
||||||
#include "runtime/device_queue/device_queue.h"
|
#include "runtime/device_queue/device_queue.h"
|
||||||
#include "runtime/mem_obj/image.h"
|
#include "runtime/mem_obj/image.h"
|
||||||
|
#include "runtime/gtpin/gtpin_notify.h"
|
||||||
#include "runtime/helpers/get_info.h"
|
#include "runtime/helpers/get_info.h"
|
||||||
#include "runtime/helpers/ptr_math.h"
|
#include "runtime/helpers/ptr_math.h"
|
||||||
#include "runtime/platform/platform.h"
|
#include "runtime/platform/platform.h"
|
||||||
@@ -71,6 +72,7 @@ Context::~Context() {
|
|||||||
if (memoryManager && memoryManager->isAsyncDeleterEnabled()) {
|
if (memoryManager && memoryManager->isAsyncDeleterEnabled()) {
|
||||||
memoryManager->getDeferredDeleter()->removeClient();
|
memoryManager->getDeferredDeleter()->removeClient();
|
||||||
}
|
}
|
||||||
|
gtpinNotifyContextDestroy((cl_context)this);
|
||||||
}
|
}
|
||||||
|
|
||||||
DeviceQueue *Context::getDefaultDeviceQueue() {
|
DeviceQueue *Context::getDefaultDeviceQueue() {
|
||||||
|
|||||||
@@ -22,6 +22,7 @@
|
|||||||
|
|
||||||
#include "gtpin_ocl_interface.h"
|
#include "gtpin_ocl_interface.h"
|
||||||
#include "runtime/gtpin/gtpin_hw_helper.h"
|
#include "runtime/gtpin/gtpin_hw_helper.h"
|
||||||
|
#include "runtime/gtpin/gtpin_hw_helper.inl"
|
||||||
|
|
||||||
namespace OCLRT {
|
namespace OCLRT {
|
||||||
|
|
||||||
|
|||||||
@@ -22,6 +22,7 @@
|
|||||||
|
|
||||||
#include "gtpin_ocl_interface.h"
|
#include "gtpin_ocl_interface.h"
|
||||||
#include "runtime/gtpin/gtpin_hw_helper.h"
|
#include "runtime/gtpin/gtpin_hw_helper.h"
|
||||||
|
#include "runtime/gtpin/gtpin_hw_helper.inl"
|
||||||
|
|
||||||
namespace OCLRT {
|
namespace OCLRT {
|
||||||
|
|
||||||
|
|||||||
@@ -26,9 +26,11 @@ if(GTPIN_HEADERS_DIR)
|
|||||||
${CMAKE_CURRENT_SOURCE_DIR}/gtpin_helpers.h
|
${CMAKE_CURRENT_SOURCE_DIR}/gtpin_helpers.h
|
||||||
${CMAKE_CURRENT_SOURCE_DIR}/gtpin_hw_helper.cpp
|
${CMAKE_CURRENT_SOURCE_DIR}/gtpin_hw_helper.cpp
|
||||||
${CMAKE_CURRENT_SOURCE_DIR}/gtpin_hw_helper.h
|
${CMAKE_CURRENT_SOURCE_DIR}/gtpin_hw_helper.h
|
||||||
|
${CMAKE_CURRENT_SOURCE_DIR}/gtpin_hw_helper.inl
|
||||||
${CMAKE_CURRENT_SOURCE_DIR}/gtpin_init.cpp
|
${CMAKE_CURRENT_SOURCE_DIR}/gtpin_init.cpp
|
||||||
${CMAKE_CURRENT_SOURCE_DIR}/gtpin_init.h
|
${CMAKE_CURRENT_SOURCE_DIR}/gtpin_init.h
|
||||||
${CMAKE_CURRENT_SOURCE_DIR}/gtpin_notify.h
|
${CMAKE_CURRENT_SOURCE_DIR}/gtpin_notify.h
|
||||||
|
${CMAKE_CURRENT_SOURCE_DIR}/gtpin_defs.h
|
||||||
PARENT_SCOPE
|
PARENT_SCOPE
|
||||||
)
|
)
|
||||||
else()
|
else()
|
||||||
|
|||||||
@@ -32,4 +32,29 @@ void gtpinNotifyContextDestroy(cl_context context) {
|
|||||||
|
|
||||||
void gtpinNotifyKernelCreate(cl_kernel kernel) {
|
void gtpinNotifyKernelCreate(cl_kernel kernel) {
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void gtpinNotifyKernelSubmit(cl_kernel kernel, void *pCmdQueue) {
|
||||||
|
}
|
||||||
|
|
||||||
|
void gtpinNotifyPreFlushTask(void *pCmdQueue) {
|
||||||
|
}
|
||||||
|
|
||||||
|
void gtpinNotifyFlushTask(uint32_t flushedTaskCount) {
|
||||||
|
}
|
||||||
|
|
||||||
|
void gtpinNotifyTaskCompletion(uint32_t completedTaskCount) {
|
||||||
|
}
|
||||||
|
|
||||||
|
void gtpinNotifyMakeResident(void *pKernel, void *pCommandStreamReceiver) {
|
||||||
|
}
|
||||||
|
|
||||||
|
void gtpinNotifyUpdateResidencyList(void *pKernel, void *pResidencyVector) {
|
||||||
|
}
|
||||||
|
|
||||||
|
void gtpinNotifyPlatformShutdown() {
|
||||||
|
}
|
||||||
|
|
||||||
|
bool gtpinIsGTPinInitialized() {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -23,20 +23,33 @@
|
|||||||
#include "config.h"
|
#include "config.h"
|
||||||
#include "gtpin_ocl_interface.h"
|
#include "gtpin_ocl_interface.h"
|
||||||
#include "CL/cl.h"
|
#include "CL/cl.h"
|
||||||
|
#include "runtime/command_queue/command_queue.h"
|
||||||
|
#include "runtime/command_stream/command_stream_receiver.h"
|
||||||
#include "runtime/context/context.h"
|
#include "runtime/context/context.h"
|
||||||
#include "runtime/device/device.h"
|
#include "runtime/device/device.h"
|
||||||
#include "runtime/device/device_info.h"
|
#include "runtime/device/device_info.h"
|
||||||
|
#include "runtime/gtpin/gtpin_defs.h"
|
||||||
#include "runtime/gtpin/gtpin_hw_helper.h"
|
#include "runtime/gtpin/gtpin_hw_helper.h"
|
||||||
#include "runtime/kernel/kernel.h"
|
#include "runtime/kernel/kernel.h"
|
||||||
|
#include "runtime/mem_obj/buffer.h"
|
||||||
|
#include "runtime/memory_manager/surface.h"
|
||||||
#include "runtime/platform/platform.h"
|
#include "runtime/platform/platform.h"
|
||||||
|
#include "runtime/utilities/spinlock.h"
|
||||||
|
#include <deque>
|
||||||
|
#include <vector>
|
||||||
|
|
||||||
using namespace gtpin;
|
using namespace gtpin;
|
||||||
|
|
||||||
namespace OCLRT {
|
namespace OCLRT {
|
||||||
|
|
||||||
extern bool isGTPinInitialized;
|
extern bool isGTPinInitialized;
|
||||||
extern gtpin::ocl::gtpin_events_t GTPinCallbacks;
|
extern gtpin::ocl::gtpin_events_t GTPinCallbacks;
|
||||||
|
|
||||||
igc_init_t *pIgcInfo = nullptr;
|
igc_init_t *pIgcInfo = nullptr;
|
||||||
|
std::atomic<int> sequenceCount(1);
|
||||||
|
CommandQueue *pCmdQueueForFlushTask = nullptr;
|
||||||
|
std::deque<gtpinkexec_t> kernelExecQueue;
|
||||||
|
std::atomic_flag kernelExecQueueLock = ATOMIC_FLAG_INIT;
|
||||||
|
|
||||||
void gtpinNotifyContextCreate(cl_context context) {
|
void gtpinNotifyContextCreate(cl_context context) {
|
||||||
if (isGTPinInitialized) {
|
if (isGTPinInitialized) {
|
||||||
@@ -47,7 +60,6 @@ void gtpinNotifyContextCreate(cl_context context) {
|
|||||||
GTPinHwHelper >pinHelper = GTPinHwHelper::get(genFamily);
|
GTPinHwHelper >pinHelper = GTPinHwHelper::get(genFamily);
|
||||||
gtpinPlatformInfo.gen_version = (gtpin::GTPIN_GEN_VERSION)gtpinHelper.getGenVersion();
|
gtpinPlatformInfo.gen_version = (gtpin::GTPIN_GEN_VERSION)gtpinHelper.getGenVersion();
|
||||||
gtpinPlatformInfo.device_id = static_cast<uint32_t>(pDevice->getHardwareInfo().pPlatform->usDeviceID);
|
gtpinPlatformInfo.device_id = static_cast<uint32_t>(pDevice->getHardwareInfo().pPlatform->usDeviceID);
|
||||||
|
|
||||||
(*GTPinCallbacks.onContextCreate)((context_handle_t)context, >pinPlatformInfo, &pIgcInfo);
|
(*GTPinCallbacks.onContextCreate)((context_handle_t)context, >pinPlatformInfo, &pIgcInfo);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -60,24 +72,178 @@ void gtpinNotifyContextDestroy(cl_context context) {
|
|||||||
|
|
||||||
void gtpinNotifyKernelCreate(cl_kernel kernel) {
|
void gtpinNotifyKernelCreate(cl_kernel kernel) {
|
||||||
if (isGTPinInitialized) {
|
if (isGTPinInitialized) {
|
||||||
auto pKernel = castToObject<Kernel>(kernel);
|
auto pKernel = castToObjectOrAbort<Kernel>(kernel);
|
||||||
|
size_t gtpinBTI = pKernel->getNumberOfBindingTableStates();
|
||||||
|
// Enlarge local copy of SSH by 1 SS
|
||||||
|
auto pPlatform = platform();
|
||||||
|
auto pDevice = pPlatform->getDevice(0);
|
||||||
|
GFXCORE_FAMILY genFamily = pDevice->getHardwareInfo().pPlatform->eRenderCoreFamily;
|
||||||
|
GTPinHwHelper >pinHelper = GTPinHwHelper::get(genFamily);
|
||||||
|
if (!gtpinHelper.addSurfaceState(pKernel)) {
|
||||||
|
// Kernel with no SSH or Kernel EM, not supported
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
if (pKernel->isKernelHeapSubstituted()) {
|
||||||
|
// ISA for this kernel was already substituted
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
// Notify GT-Pin that new kernel was created
|
||||||
Context *pContext = &(pKernel->getContext());
|
Context *pContext = &(pKernel->getContext());
|
||||||
cl_context context = (cl_context)pContext;
|
cl_context context = (cl_context)pContext;
|
||||||
const KernelInfo &kInfo = pKernel->getKernelInfo();
|
auto &kernelInfo = pKernel->getKernelInfo();
|
||||||
instrument_params_in_t paramsIn;
|
instrument_params_in_t paramsIn;
|
||||||
paramsIn.kernel_type = GTPIN_KERNEL_TYPE_CS;
|
paramsIn.kernel_type = GTPIN_KERNEL_TYPE_CS;
|
||||||
paramsIn.simd = (GTPIN_SIMD_WIDTH)kInfo.getMaxSimdSize();
|
paramsIn.simd = (GTPIN_SIMD_WIDTH)kernelInfo.getMaxSimdSize();
|
||||||
paramsIn.orig_kernel_binary = (uint8_t *)pKernel->getKernelHeap();
|
paramsIn.orig_kernel_binary = (uint8_t *)pKernel->getKernelHeap();
|
||||||
paramsIn.orig_kernel_size = static_cast<uint32_t>(pKernel->getKernelHeapSize());
|
paramsIn.orig_kernel_size = static_cast<uint32_t>(pKernel->getKernelHeapSize());
|
||||||
paramsIn.buffer_type = GTPIN_BUFFER_BINDFULL;
|
paramsIn.buffer_type = GTPIN_BUFFER_BINDFULL;
|
||||||
paramsIn.buffer_desc.BTI = kInfo.patchInfo.bindingTableState->Count;
|
paramsIn.buffer_desc.BTI = static_cast<uint32_t>(gtpinBTI);
|
||||||
paramsIn.igc_hash_id = kInfo.heapInfo.pKernelHeader->ShaderHashCode;
|
paramsIn.igc_hash_id = kernelInfo.heapInfo.pKernelHeader->ShaderHashCode;
|
||||||
paramsIn.kernel_name = (char *)kInfo.name.c_str();
|
paramsIn.kernel_name = (char *)kernelInfo.name.c_str();
|
||||||
paramsIn.igc_info = nullptr;
|
paramsIn.igc_info = nullptr;
|
||||||
instrument_params_out_t paramsOut = {0};
|
instrument_params_out_t paramsOut = {0};
|
||||||
(*GTPinCallbacks.onKernelCreate)((context_handle_t)(cl_context)context, ¶msIn, ¶msOut);
|
(*GTPinCallbacks.onKernelCreate)((context_handle_t)(cl_context)context, ¶msIn, ¶msOut);
|
||||||
|
// Substitute ISA of created kernel with instrumented code
|
||||||
pKernel->substituteKernelHeap(paramsOut.inst_kernel_binary, paramsOut.inst_kernel_size);
|
pKernel->substituteKernelHeap(paramsOut.inst_kernel_binary, paramsOut.inst_kernel_size);
|
||||||
pKernel->setKernelId(paramsOut.kernel_id);
|
pKernel->setKernelId(paramsOut.kernel_id);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void gtpinNotifyKernelSubmit(cl_kernel kernel, void *pCmdQueue) {
|
||||||
|
if (isGTPinInitialized) {
|
||||||
|
auto pKernel = castToObjectOrAbort<Kernel>(kernel);
|
||||||
|
if (pKernel->getSurfaceStateHeapSize() == 0) {
|
||||||
|
// Kernel with no SSH, not supported
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
Context *pContext = &(pKernel->getContext());
|
||||||
|
cl_context context = (cl_context)pContext;
|
||||||
|
uint64_t kernelId = pKernel->getKernelId();
|
||||||
|
command_buffer_handle_t commandBuffer = (command_buffer_handle_t)((uintptr_t)(sequenceCount++));
|
||||||
|
uint32_t kernelOffset = 0;
|
||||||
|
resource_handle_t resource = 0;
|
||||||
|
// Notify GT-Pin that abstract "command buffer" was created
|
||||||
|
(*GTPinCallbacks.onCommandBufferCreate)((context_handle_t)context, commandBuffer);
|
||||||
|
// Notify GT-Pin that kernel was submited for execution
|
||||||
|
(*GTPinCallbacks.onKernelSubmit)(commandBuffer, kernelId, &kernelOffset, &resource);
|
||||||
|
// Create new record in Kernel Execution Queue describing submited kernel
|
||||||
|
gtpinkexec_t kExec;
|
||||||
|
kExec.pKernel = pKernel;
|
||||||
|
kExec.gtpinResource = (cl_mem)resource;
|
||||||
|
kExec.commandBuffer = commandBuffer;
|
||||||
|
kExec.pCommandQueue = (CommandQueue *)pCmdQueue;
|
||||||
|
SpinLock lock;
|
||||||
|
lock.enter(kernelExecQueueLock);
|
||||||
|
kernelExecQueue.push_back(kExec);
|
||||||
|
lock.leave(kernelExecQueueLock);
|
||||||
|
// Patch SSH[gtpinBTI] with GT-Pin resource
|
||||||
|
auto pPlatform = platform();
|
||||||
|
auto pDevice = pPlatform->getDevice(0);
|
||||||
|
GFXCORE_FAMILY genFamily = pDevice->getHardwareInfo().pPlatform->eRenderCoreFamily;
|
||||||
|
GTPinHwHelper >pinHelper = GTPinHwHelper::get(genFamily);
|
||||||
|
size_t gtpinBTI = pKernel->getNumberOfBindingTableStates() - 1;
|
||||||
|
void *pSurfaceState = gtpinHelper.getSurfaceState(pKernel, gtpinBTI);
|
||||||
|
cl_mem buffer = (cl_mem)resource;
|
||||||
|
auto pBuffer = castToObjectOrAbort<Buffer>(buffer);
|
||||||
|
pBuffer->setArgStateful(const_cast<void *>(pSurfaceState));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void gtpinNotifyPreFlushTask(void *pCmdQueue) {
|
||||||
|
if (isGTPinInitialized) {
|
||||||
|
pCmdQueueForFlushTask = (CommandQueue *)pCmdQueue;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void gtpinNotifyFlushTask(uint32_t flushedTaskCount) {
|
||||||
|
if (isGTPinInitialized) {
|
||||||
|
SpinLock lock;
|
||||||
|
lock.enter(kernelExecQueueLock);
|
||||||
|
size_t numElems = kernelExecQueue.size();
|
||||||
|
for (size_t n = 0; n < numElems; n++) {
|
||||||
|
if ((kernelExecQueue[n].pCommandQueue == pCmdQueueForFlushTask) && !kernelExecQueue[n].isTaskCountValid) {
|
||||||
|
// Update record in Kernel Execution Queue with kernel's TC
|
||||||
|
kernelExecQueue[n].isTaskCountValid = true;
|
||||||
|
kernelExecQueue[n].taskCount = flushedTaskCount;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
lock.leave(kernelExecQueueLock);
|
||||||
|
pCmdQueueForFlushTask = nullptr;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void gtpinNotifyTaskCompletion(uint32_t completedTaskCount) {
|
||||||
|
if (isGTPinInitialized) {
|
||||||
|
SpinLock lock;
|
||||||
|
lock.enter(kernelExecQueueLock);
|
||||||
|
size_t numElems = kernelExecQueue.size();
|
||||||
|
for (size_t n = 0; n < numElems;) {
|
||||||
|
if (kernelExecQueue[n].isTaskCountValid && (kernelExecQueue[n].taskCount <= completedTaskCount)) {
|
||||||
|
// Notify GT-Pin that execution of "command buffer" was completed
|
||||||
|
(*GTPinCallbacks.onCommandBufferComplete)(kernelExecQueue[n].commandBuffer);
|
||||||
|
// Remove kernel's record from Kernel Execution Queue
|
||||||
|
kernelExecQueue.erase(kernelExecQueue.begin() + n);
|
||||||
|
numElems--;
|
||||||
|
} else {
|
||||||
|
n++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
lock.leave(kernelExecQueueLock);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void gtpinNotifyMakeResident(void *pKernel, void *pCSR) {
|
||||||
|
if (isGTPinInitialized) {
|
||||||
|
SpinLock lock;
|
||||||
|
lock.enter(kernelExecQueueLock);
|
||||||
|
size_t numElems = kernelExecQueue.size();
|
||||||
|
for (size_t n = 0; n < numElems; n++) {
|
||||||
|
if ((kernelExecQueue[n].pKernel == pKernel) && !kernelExecQueue[n].isResourceResident) {
|
||||||
|
// It's time for kernel to make resident its GT-Pin resource
|
||||||
|
CommandStreamReceiver *pCommandStreamReceiver = reinterpret_cast<CommandStreamReceiver *>(pCSR);
|
||||||
|
cl_mem gtpinBuffer = kernelExecQueue[n].gtpinResource;
|
||||||
|
auto pBuffer = castToObjectOrAbort<Buffer>(gtpinBuffer);
|
||||||
|
GraphicsAllocation *pGfxAlloc = pBuffer->getGraphicsAllocation();
|
||||||
|
pCommandStreamReceiver->makeResident(*pGfxAlloc);
|
||||||
|
kernelExecQueue[n].isResourceResident = true;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
lock.leave(kernelExecQueueLock);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void gtpinNotifyUpdateResidencyList(void *pKernel, void *pResVec) {
|
||||||
|
if (isGTPinInitialized) {
|
||||||
|
SpinLock lock;
|
||||||
|
lock.enter(kernelExecQueueLock);
|
||||||
|
size_t numElems = kernelExecQueue.size();
|
||||||
|
for (size_t n = 0; n < numElems; n++) {
|
||||||
|
if ((kernelExecQueue[n].pKernel == pKernel) && !kernelExecQueue[n].isResourceResident) {
|
||||||
|
// It's time for kernel to update its residency list with its GT-Pin resource
|
||||||
|
std::vector<Surface *> *pResidencyVector = (std::vector<Surface *> *)pResVec;
|
||||||
|
cl_mem gtpinBuffer = kernelExecQueue[n].gtpinResource;
|
||||||
|
auto pBuffer = castToObjectOrAbort<Buffer>(gtpinBuffer);
|
||||||
|
GraphicsAllocation *pGfxAlloc = pBuffer->getGraphicsAllocation();
|
||||||
|
GeneralSurface *pSurface = new GeneralSurface(pGfxAlloc);
|
||||||
|
pResidencyVector->push_back(pSurface);
|
||||||
|
kernelExecQueue[n].isResourceResident = true;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
lock.leave(kernelExecQueueLock);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void gtpinNotifyPlatformShutdown() {
|
||||||
|
if (isGTPinInitialized) {
|
||||||
|
// Clear Kernel Execution Queue
|
||||||
|
kernelExecQueue.clear();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
bool gtpinIsGTPinInitialized() {
|
||||||
|
return isGTPinInitialized;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
52
runtime/gtpin/gtpin_defs.h
Normal file
52
runtime/gtpin/gtpin_defs.h
Normal file
@@ -0,0 +1,52 @@
|
|||||||
|
/*
|
||||||
|
* Copyright (c) 2018, Intel Corporation
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||||
|
* copy of this software and associated documentation files (the "Software"),
|
||||||
|
* to deal in the Software without restriction, including without limitation
|
||||||
|
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||||
|
* and/or sell copies of the Software, and to permit persons to whom the
|
||||||
|
* Software is furnished to do so, subject to the following conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice shall be included
|
||||||
|
* in all copies or substantial portions of the Software.
|
||||||
|
*
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
|
||||||
|
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||||
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||||
|
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||||
|
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||||
|
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||||
|
* OTHER DEALINGS IN THE SOFTWARE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "config.h"
|
||||||
|
#include "gtpin_ocl_interface.h"
|
||||||
|
#include "CL/cl.h"
|
||||||
|
#include "runtime/command_queue/command_queue.h"
|
||||||
|
#include "runtime/kernel/kernel.h"
|
||||||
|
|
||||||
|
namespace OCLRT {
|
||||||
|
|
||||||
|
struct GTPinKernelExec {
|
||||||
|
Kernel *pKernel;
|
||||||
|
cl_mem gtpinResource;
|
||||||
|
CommandQueue *pCommandQueue;
|
||||||
|
gtpin::command_buffer_handle_t commandBuffer;
|
||||||
|
uint32_t taskCount;
|
||||||
|
bool isTaskCountValid;
|
||||||
|
bool isResourceResident;
|
||||||
|
|
||||||
|
GTPinKernelExec() {
|
||||||
|
pKernel = nullptr;
|
||||||
|
gtpinResource = nullptr;
|
||||||
|
pCommandQueue = nullptr;
|
||||||
|
commandBuffer = nullptr;
|
||||||
|
taskCount = 0;
|
||||||
|
isTaskCountValid = false;
|
||||||
|
isResourceResident = false;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
typedef struct GTPinKernelExec gtpinkexec_t;
|
||||||
|
|
||||||
|
} // OCLRT
|
||||||
@@ -24,10 +24,14 @@
|
|||||||
#include "runtime/gen_common/hw_cmds.h"
|
#include "runtime/gen_common/hw_cmds.h"
|
||||||
|
|
||||||
namespace OCLRT {
|
namespace OCLRT {
|
||||||
|
class Kernel;
|
||||||
|
|
||||||
class GTPinHwHelper {
|
class GTPinHwHelper {
|
||||||
public:
|
public:
|
||||||
static GTPinHwHelper &get(GFXCORE_FAMILY gfxCore);
|
static GTPinHwHelper &get(GFXCORE_FAMILY gfxCore);
|
||||||
virtual uint32_t getGenVersion() = 0;
|
virtual uint32_t getGenVersion() = 0;
|
||||||
|
virtual bool addSurfaceState(Kernel *pKernel) = 0;
|
||||||
|
virtual void *getSurfaceState(Kernel *pKernel, size_t bti) = 0;
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
GTPinHwHelper(){};
|
GTPinHwHelper(){};
|
||||||
@@ -41,6 +45,8 @@ class GTPinHwHelperHw : public GTPinHwHelper {
|
|||||||
return gtpinHwHelper;
|
return gtpinHwHelper;
|
||||||
}
|
}
|
||||||
uint32_t getGenVersion() override;
|
uint32_t getGenVersion() override;
|
||||||
|
bool addSurfaceState(Kernel *pKernel) override;
|
||||||
|
void *getSurfaceState(Kernel *pKernel, size_t bti) override;
|
||||||
|
|
||||||
private:
|
private:
|
||||||
GTPinHwHelperHw(){};
|
GTPinHwHelperHw(){};
|
||||||
|
|||||||
74
runtime/gtpin/gtpin_hw_helper.inl
Normal file
74
runtime/gtpin/gtpin_hw_helper.inl
Normal file
@@ -0,0 +1,74 @@
|
|||||||
|
/*
|
||||||
|
* Copyright (c) 2018, Intel Corporation
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||||
|
* copy of this software and associated documentation files (the "Software"),
|
||||||
|
* to deal in the Software without restriction, including without limitation
|
||||||
|
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||||
|
* and/or sell copies of the Software, and to permit persons to whom the
|
||||||
|
* Software is furnished to do so, subject to the following conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice shall be included
|
||||||
|
* in all copies or substantial portions of the Software.
|
||||||
|
*
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
|
||||||
|
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||||
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||||
|
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||||
|
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||||
|
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||||
|
* OTHER DEALINGS IN THE SOFTWARE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "hw_cmds.h"
|
||||||
|
#include "runtime/gtpin/gtpin_hw_helper.h"
|
||||||
|
#include "runtime/helpers/string.h"
|
||||||
|
#include "runtime/kernel/kernel.h"
|
||||||
|
|
||||||
|
namespace OCLRT {
|
||||||
|
|
||||||
|
template <typename GfxFamily>
|
||||||
|
bool GTPinHwHelperHw<GfxFamily>::addSurfaceState(Kernel *pKernel) {
|
||||||
|
using RENDER_SURFACE_STATE = typename GfxFamily::RENDER_SURFACE_STATE;
|
||||||
|
using BINDING_TABLE_STATE = typename GfxFamily::BINDING_TABLE_STATE;
|
||||||
|
|
||||||
|
size_t sshSize = pKernel->getSurfaceStateHeapSize();
|
||||||
|
if ((sshSize == 0) || pKernel->isParentKernel) {
|
||||||
|
// Kernels which do not use SSH or use Execution Model are not supported (yet)
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
size_t ssSize = sizeof(RENDER_SURFACE_STATE);
|
||||||
|
size_t btsSize = sizeof(BINDING_TABLE_STATE);
|
||||||
|
size_t sizeToEnlarge = ssSize + btsSize;
|
||||||
|
size_t currBTOffset = pKernel->getBindingTableOffset();
|
||||||
|
size_t currSurfaceStateSize = currBTOffset;
|
||||||
|
char *pSsh = reinterpret_cast<char *>(pKernel->getSurfaceStateHeap());
|
||||||
|
char *pNewSsh = new char[sshSize + sizeToEnlarge];
|
||||||
|
memcpy_s(pNewSsh, sshSize + sizeToEnlarge, pSsh, currSurfaceStateSize);
|
||||||
|
RENDER_SURFACE_STATE *pSS = reinterpret_cast<RENDER_SURFACE_STATE *>(pNewSsh + currSurfaceStateSize);
|
||||||
|
pSS->init();
|
||||||
|
size_t newSurfaceStateSize = currSurfaceStateSize + ssSize;
|
||||||
|
size_t currBTCount = pKernel->getNumberOfBindingTableStates();
|
||||||
|
memcpy_s(pNewSsh + newSurfaceStateSize, sshSize + sizeToEnlarge - newSurfaceStateSize, pSsh + currBTOffset, currBTCount * btsSize);
|
||||||
|
BINDING_TABLE_STATE *pNewBTS = reinterpret_cast<BINDING_TABLE_STATE *>(pNewSsh + newSurfaceStateSize + currBTCount * btsSize);
|
||||||
|
BINDING_TABLE_STATE bti;
|
||||||
|
bti.init();
|
||||||
|
bti.setSurfaceStatePointer((uint64_t)currBTOffset);
|
||||||
|
*pNewBTS = bti;
|
||||||
|
pKernel->resizeSurfaceStateHeap(pNewSsh, sshSize + sizeToEnlarge, currBTCount + 1, newSurfaceStateSize);
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename GfxFamily>
|
||||||
|
void *GTPinHwHelperHw<GfxFamily>::getSurfaceState(Kernel *pKernel, size_t bti) {
|
||||||
|
using BINDING_TABLE_STATE = typename GfxFamily::BINDING_TABLE_STATE;
|
||||||
|
|
||||||
|
if ((nullptr == pKernel->getSurfaceStateHeap()) || (bti >= pKernel->getNumberOfBindingTableStates())) {
|
||||||
|
return nullptr;
|
||||||
|
}
|
||||||
|
auto *pBts = reinterpret_cast<BINDING_TABLE_STATE *>(ptrOffset(pKernel->getSurfaceStateHeap(), (pKernel->getBindingTableOffset() + bti * sizeof(BINDING_TABLE_STATE))));
|
||||||
|
auto pSurfaceState = ptrOffset(pKernel->getSurfaceStateHeap(), pBts->getSurfaceStatePointer());
|
||||||
|
return pSurfaceState;
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace OCLRT
|
||||||
@@ -26,4 +26,12 @@ namespace OCLRT {
|
|||||||
void gtpinNotifyContextCreate(cl_context context);
|
void gtpinNotifyContextCreate(cl_context context);
|
||||||
void gtpinNotifyContextDestroy(cl_context context);
|
void gtpinNotifyContextDestroy(cl_context context);
|
||||||
void gtpinNotifyKernelCreate(cl_kernel kernel);
|
void gtpinNotifyKernelCreate(cl_kernel kernel);
|
||||||
|
void gtpinNotifyKernelSubmit(cl_kernel kernel, void *pCmdQueue);
|
||||||
|
void gtpinNotifyPreFlushTask(void *pCmdQueue);
|
||||||
|
void gtpinNotifyFlushTask(uint32_t flushedTaskCount);
|
||||||
|
void gtpinNotifyTaskCompletion(uint32_t completedTaskCount);
|
||||||
|
void gtpinNotifyMakeResident(void *pKernel, void *pCommandStreamReceiver);
|
||||||
|
void gtpinNotifyUpdateResidencyList(void *pKernel, void *pResidencyVector);
|
||||||
|
void gtpinNotifyPlatformShutdown();
|
||||||
|
bool gtpinIsGTPinInitialized();
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -107,6 +107,12 @@ class TakeOwnershipWrapper {
|
|||||||
: obj(obj) {
|
: obj(obj) {
|
||||||
this->locked = obj.takeOwnership(true);
|
this->locked = obj.takeOwnership(true);
|
||||||
}
|
}
|
||||||
|
TakeOwnershipWrapper(T &obj, bool lockImmediately)
|
||||||
|
: obj(obj) {
|
||||||
|
if (lockImmediately) {
|
||||||
|
this->locked = obj.takeOwnership(true);
|
||||||
|
}
|
||||||
|
}
|
||||||
~TakeOwnershipWrapper() {
|
~TakeOwnershipWrapper() {
|
||||||
if (locked) {
|
if (locked) {
|
||||||
obj.releaseOwnership();
|
obj.releaseOwnership();
|
||||||
|
|||||||
@@ -76,16 +76,20 @@ struct KernelCommandsHelper : public PerThreadDataHelper {
|
|||||||
const Kernel &kernel);
|
const Kernel &kernel);
|
||||||
|
|
||||||
static size_t pushBindingTableAndSurfaceStates(IndirectHeap &dstHeap, const KernelInfo &srcKernelInfo,
|
static size_t pushBindingTableAndSurfaceStates(IndirectHeap &dstHeap, const KernelInfo &srcKernelInfo,
|
||||||
const void *srcKernelSsh, size_t srcKernelSshSize);
|
const void *srcKernelSsh, size_t srcKernelSshSize,
|
||||||
|
size_t numberOfBindingTableStates, size_t offsetOfBindingTable);
|
||||||
|
|
||||||
static size_t pushBindingTableAndSurfaceStates(IndirectHeap &dstHeap, const KernelInfo &srcKernelInfo) {
|
static size_t pushBindingTableAndSurfaceStates(IndirectHeap &dstHeap, const KernelInfo &srcKernelInfo) {
|
||||||
return pushBindingTableAndSurfaceStates(dstHeap, srcKernelInfo, srcKernelInfo.heapInfo.pSsh,
|
return pushBindingTableAndSurfaceStates(dstHeap, srcKernelInfo, srcKernelInfo.heapInfo.pSsh,
|
||||||
srcKernelInfo.heapInfo.pKernelHeader->SurfaceStateHeapSize);
|
srcKernelInfo.heapInfo.pKernelHeader->SurfaceStateHeapSize,
|
||||||
|
(srcKernelInfo.patchInfo.bindingTableState != nullptr) ? srcKernelInfo.patchInfo.bindingTableState->Count : 0,
|
||||||
|
(srcKernelInfo.patchInfo.bindingTableState != nullptr) ? srcKernelInfo.patchInfo.bindingTableState->Offset : 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
static size_t pushBindingTableAndSurfaceStates(IndirectHeap &dstHeap, const Kernel &srcKernel) {
|
static size_t pushBindingTableAndSurfaceStates(IndirectHeap &dstHeap, const Kernel &srcKernel) {
|
||||||
return pushBindingTableAndSurfaceStates(dstHeap, srcKernel.getKernelInfo(),
|
return pushBindingTableAndSurfaceStates(dstHeap, srcKernel.getKernelInfo(),
|
||||||
srcKernel.getSurfaceStateHeap(), srcKernel.getSurfaceStateHeapSize());
|
srcKernel.getSurfaceStateHeap(), srcKernel.getSurfaceStateHeapSize(),
|
||||||
|
srcKernel.getNumberOfBindingTableStates(), srcKernel.getBindingTableOffset());
|
||||||
}
|
}
|
||||||
|
|
||||||
static size_t sendIndirectState(
|
static size_t sendIndirectState(
|
||||||
|
|||||||
@@ -265,7 +265,8 @@ size_t KernelCommandsHelper<GfxFamily>::sendCrossThreadData(
|
|||||||
// as required by the INTERFACE_DESCRIPTOR_DATA.
|
// as required by the INTERFACE_DESCRIPTOR_DATA.
|
||||||
template <typename GfxFamily>
|
template <typename GfxFamily>
|
||||||
size_t KernelCommandsHelper<GfxFamily>::pushBindingTableAndSurfaceStates(IndirectHeap &dstHeap, const KernelInfo &srcKernelInfo,
|
size_t KernelCommandsHelper<GfxFamily>::pushBindingTableAndSurfaceStates(IndirectHeap &dstHeap, const KernelInfo &srcKernelInfo,
|
||||||
const void *srcKernelSsh, size_t srcKernelSshSize) {
|
const void *srcKernelSsh, size_t srcKernelSshSize,
|
||||||
|
size_t numberOfBindingTableStates, size_t offsetOfBindingTable) {
|
||||||
using BINDING_TABLE_STATE = typename GfxFamily::BINDING_TABLE_STATE;
|
using BINDING_TABLE_STATE = typename GfxFamily::BINDING_TABLE_STATE;
|
||||||
using INTERFACE_DESCRIPTOR_DATA = typename GfxFamily::INTERFACE_DESCRIPTOR_DATA;
|
using INTERFACE_DESCRIPTOR_DATA = typename GfxFamily::INTERFACE_DESCRIPTOR_DATA;
|
||||||
using RENDER_SURFACE_STATE = typename GfxFamily::RENDER_SURFACE_STATE;
|
using RENDER_SURFACE_STATE = typename GfxFamily::RENDER_SURFACE_STATE;
|
||||||
@@ -274,9 +275,8 @@ size_t KernelCommandsHelper<GfxFamily>::pushBindingTableAndSurfaceStates(Indirec
|
|||||||
// according to compiler, kernel does not reference BTIs to stateful surfaces, so there's nothing to patch
|
// according to compiler, kernel does not reference BTIs to stateful surfaces, so there's nothing to patch
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
size_t sshSize = srcKernelInfo.heapInfo.pKernelHeader->SurfaceStateHeapSize;
|
size_t sshSize = srcKernelSshSize;
|
||||||
DEBUG_BREAK_IF(!((sshSize <= srcKernelSshSize) && (srcKernelSsh != nullptr)));
|
DEBUG_BREAK_IF(srcKernelSsh == nullptr);
|
||||||
uint32_t localBtiOffset = srcKernelInfo.patchInfo.bindingTableState->Offset;
|
|
||||||
|
|
||||||
auto srcSurfaceState = srcKernelSsh;
|
auto srcSurfaceState = srcKernelSsh;
|
||||||
// Align the heap and allocate space for new ssh data
|
// Align the heap and allocate space for new ssh data
|
||||||
@@ -289,21 +289,21 @@ size_t KernelCommandsHelper<GfxFamily>::pushBindingTableAndSurfaceStates(Indirec
|
|||||||
// nothing to patch, we're at the start of heap (which is assumed to be the surface state base address)
|
// nothing to patch, we're at the start of heap (which is assumed to be the surface state base address)
|
||||||
// we need to simply copy the ssh (including BTIs from compiler)
|
// we need to simply copy the ssh (including BTIs from compiler)
|
||||||
memcpy_s(dstSurfaceState, sshSize, srcSurfaceState, sshSize);
|
memcpy_s(dstSurfaceState, sshSize, srcSurfaceState, sshSize);
|
||||||
return localBtiOffset;
|
return offsetOfBindingTable;
|
||||||
}
|
}
|
||||||
|
|
||||||
// We can copy-over the surface states, but BTIs will need to be patched
|
// We can copy-over the surface states, but BTIs will need to be patched
|
||||||
memcpy_s(dstSurfaceState, sshSize, srcSurfaceState, localBtiOffset);
|
memcpy_s(dstSurfaceState, sshSize, srcSurfaceState, offsetOfBindingTable);
|
||||||
|
|
||||||
uint32_t surfaceStatesOffset = static_cast<uint32_t>(ptrDiff(dstSurfaceState, dstHeap.getBase()));
|
uint32_t surfaceStatesOffset = static_cast<uint32_t>(ptrDiff(dstSurfaceState, dstHeap.getBase()));
|
||||||
|
|
||||||
// march over BTIs and offset the pointers based on surface state base address
|
// march over BTIs and offset the pointers based on surface state base address
|
||||||
auto *dstBtiTableBase = reinterpret_cast<BINDING_TABLE_STATE *>(ptrOffset(dstSurfaceState, localBtiOffset));
|
auto *dstBtiTableBase = reinterpret_cast<BINDING_TABLE_STATE *>(ptrOffset(dstSurfaceState, offsetOfBindingTable));
|
||||||
DEBUG_BREAK_IF(reinterpret_cast<uintptr_t>(dstBtiTableBase) % INTERFACE_DESCRIPTOR_DATA::BINDINGTABLEPOINTER_ALIGN_SIZE != 0);
|
DEBUG_BREAK_IF(reinterpret_cast<uintptr_t>(dstBtiTableBase) % INTERFACE_DESCRIPTOR_DATA::BINDINGTABLEPOINTER_ALIGN_SIZE != 0);
|
||||||
auto *srcBtiTableBase = reinterpret_cast<const BINDING_TABLE_STATE *>(ptrOffset(srcSurfaceState, localBtiOffset));
|
auto *srcBtiTableBase = reinterpret_cast<const BINDING_TABLE_STATE *>(ptrOffset(srcSurfaceState, offsetOfBindingTable));
|
||||||
BINDING_TABLE_STATE bti;
|
BINDING_TABLE_STATE bti;
|
||||||
bti.init(); // init whole DWORD - i.e. not just the SurfaceStatePointer bits
|
bti.init(); // init whole DWORD - i.e. not just the SurfaceStatePointer bits
|
||||||
for (uint32_t i = 0, e = srcKernelInfo.patchInfo.bindingTableState->Count; i != e; ++i) {
|
for (uint32_t i = 0, e = (uint32_t)numberOfBindingTableStates; i != e; ++i) {
|
||||||
uint32_t localSurfaceStateOffset = srcBtiTableBase[i].getSurfaceStatePointer();
|
uint32_t localSurfaceStateOffset = srcBtiTableBase[i].getSurfaceStatePointer();
|
||||||
uint32_t offsetedSurfaceStateOffset = localSurfaceStateOffset + surfaceStatesOffset;
|
uint32_t offsetedSurfaceStateOffset = localSurfaceStateOffset + surfaceStatesOffset;
|
||||||
bti.setSurfaceStatePointer(offsetedSurfaceStateOffset); // patch just the SurfaceStatePointer bits
|
bti.setSurfaceStatePointer(offsetedSurfaceStateOffset); // patch just the SurfaceStatePointer bits
|
||||||
|
|||||||
@@ -26,6 +26,7 @@
|
|||||||
#include "runtime/command_queue/enqueue_common.h"
|
#include "runtime/command_queue/enqueue_common.h"
|
||||||
#include "runtime/device/device.h"
|
#include "runtime/device/device.h"
|
||||||
#include "runtime/device_queue/device_queue.h"
|
#include "runtime/device_queue/device_queue.h"
|
||||||
|
#include "runtime/gtpin/gtpin_notify.h"
|
||||||
#include "runtime/mem_obj/image.h"
|
#include "runtime/mem_obj/image.h"
|
||||||
#include "runtime/memory_manager/surface.h"
|
#include "runtime/memory_manager/surface.h"
|
||||||
#include "runtime/helpers/aligned_memory.h"
|
#include "runtime/helpers/aligned_memory.h"
|
||||||
@@ -76,6 +77,8 @@ CompletionStamp &CommandMapUnmap::submit(uint32_t taskLevel, bool terminated) {
|
|||||||
|
|
||||||
DEBUG_BREAK_IF(taskLevel >= Event::eventNotReady);
|
DEBUG_BREAK_IF(taskLevel >= Event::eventNotReady);
|
||||||
|
|
||||||
|
gtpinNotifyPreFlushTask(&cmdQ);
|
||||||
|
|
||||||
completionStamp = csr.flushTask(queueCommandStream,
|
completionStamp = csr.flushTask(queueCommandStream,
|
||||||
offset,
|
offset,
|
||||||
cmdQ.getIndirectHeap(IndirectHeap::DYNAMIC_STATE),
|
cmdQ.getIndirectHeap(IndirectHeap::DYNAMIC_STATE),
|
||||||
@@ -267,6 +270,8 @@ CompletionStamp &CommandComputeKernel::submit(uint32_t taskLevel, bool terminate
|
|||||||
|
|
||||||
DEBUG_BREAK_IF(taskLevel >= Event::eventNotReady);
|
DEBUG_BREAK_IF(taskLevel >= Event::eventNotReady);
|
||||||
|
|
||||||
|
gtpinNotifyPreFlushTask(&commandQueue);
|
||||||
|
|
||||||
completionStamp = commandStreamReceiver.flushTask(queueCommandStream,
|
completionStamp = commandStreamReceiver.flushTask(queueCommandStream,
|
||||||
offset,
|
offset,
|
||||||
*dsh,
|
*dsh,
|
||||||
@@ -275,12 +280,10 @@ CompletionStamp &CommandComputeKernel::submit(uint32_t taskLevel, bool terminate
|
|||||||
ssh,
|
ssh,
|
||||||
taskLevel,
|
taskLevel,
|
||||||
dispatchFlags);
|
dispatchFlags);
|
||||||
|
|
||||||
commandQueue.waitUntilComplete(completionStamp.taskCount, completionStamp.flushStamp);
|
|
||||||
|
|
||||||
for (auto &surface : surfaces) {
|
for (auto &surface : surfaces) {
|
||||||
surface->setCompletionStamp(completionStamp, nullptr, nullptr);
|
surface->setCompletionStamp(completionStamp, nullptr, nullptr);
|
||||||
}
|
}
|
||||||
|
commandQueue.waitUntilComplete(completionStamp.taskCount, completionStamp.flushStamp);
|
||||||
|
|
||||||
if (printfHandler) {
|
if (printfHandler) {
|
||||||
printfHandler.get()->printEnqueueOutput();
|
printfHandler.get()->printEnqueueOutput();
|
||||||
@@ -309,6 +312,8 @@ CompletionStamp &CommandMarker::submit(uint32_t taskLevel, bool terminated) {
|
|||||||
|
|
||||||
DEBUG_BREAK_IF(taskLevel >= Event::eventNotReady);
|
DEBUG_BREAK_IF(taskLevel >= Event::eventNotReady);
|
||||||
|
|
||||||
|
gtpinNotifyPreFlushTask(&cmdQ);
|
||||||
|
|
||||||
completionStamp = csr.flushTask(queueCommandStream,
|
completionStamp = csr.flushTask(queueCommandStream,
|
||||||
offset,
|
offset,
|
||||||
cmdQ.getIndirectHeap(IndirectHeap::DYNAMIC_STATE),
|
cmdQ.getIndirectHeap(IndirectHeap::DYNAMIC_STATE),
|
||||||
|
|||||||
@@ -30,6 +30,7 @@
|
|||||||
#include "runtime/helpers/surface_formats.h"
|
#include "runtime/helpers/surface_formats.h"
|
||||||
#include "runtime/device_queue/device_queue.h"
|
#include "runtime/device_queue/device_queue.h"
|
||||||
#include "runtime/execution_model/device_enqueue.h"
|
#include "runtime/execution_model/device_enqueue.h"
|
||||||
|
#include "runtime/gtpin/gtpin_notify.h"
|
||||||
#include "runtime/helpers/aligned_memory.h"
|
#include "runtime/helpers/aligned_memory.h"
|
||||||
#include "runtime/helpers/basic_math.h"
|
#include "runtime/helpers/basic_math.h"
|
||||||
#include "runtime/helpers/debug_helpers.h"
|
#include "runtime/helpers/debug_helpers.h"
|
||||||
@@ -94,6 +95,8 @@ Kernel::Kernel(Program *programArg, const KernelInfo &kernelInfoArg, const Devic
|
|||||||
context(nullptr),
|
context(nullptr),
|
||||||
device(deviceArg),
|
device(deviceArg),
|
||||||
kernelInfo(kernelInfoArg),
|
kernelInfo(kernelInfoArg),
|
||||||
|
numberOfBindingTableStates(0),
|
||||||
|
localBindingTableOffset(0),
|
||||||
pSshLocal(nullptr),
|
pSshLocal(nullptr),
|
||||||
sshLocalSize(0),
|
sshLocalSize(0),
|
||||||
crossThreadData(nullptr),
|
crossThreadData(nullptr),
|
||||||
@@ -245,6 +248,8 @@ cl_int Kernel::initialize() {
|
|||||||
// copy the ssh into our local copy
|
// copy the ssh into our local copy
|
||||||
memcpy_s(pSshLocal, sshLocalSize, heapInfo.pSsh, sshLocalSize);
|
memcpy_s(pSshLocal, sshLocalSize, heapInfo.pSsh, sshLocalSize);
|
||||||
}
|
}
|
||||||
|
numberOfBindingTableStates = (patchInfo.bindingTableState != nullptr) ? patchInfo.bindingTableState->Count : 0;
|
||||||
|
localBindingTableOffset = (patchInfo.bindingTableState != nullptr) ? patchInfo.bindingTableState->Offset : 0;
|
||||||
|
|
||||||
// patch crossthread data and ssh with inline surfaces, if necessary
|
// patch crossthread data and ssh with inline surfaces, if necessary
|
||||||
privateSurfaceSize = patchInfo.pAllocateStatelessPrivateSurface
|
privateSurfaceSize = patchInfo.pAllocateStatelessPrivateSurface
|
||||||
@@ -690,6 +695,11 @@ void Kernel::substituteKernelHeap(void *newKernelHeap, size_t newKernelHeapSize)
|
|||||||
*pKernelHeap = newKernelHeap;
|
*pKernelHeap = newKernelHeap;
|
||||||
SKernelBinaryHeaderCommon *pHeader = const_cast<SKernelBinaryHeaderCommon *>(pKernelInfo->heapInfo.pKernelHeader);
|
SKernelBinaryHeaderCommon *pHeader = const_cast<SKernelBinaryHeaderCommon *>(pKernelInfo->heapInfo.pKernelHeader);
|
||||||
pHeader->KernelHeapSize = static_cast<uint32_t>(newKernelHeapSize);
|
pHeader->KernelHeapSize = static_cast<uint32_t>(newKernelHeapSize);
|
||||||
|
pKernelInfo->isKernelHeapSubstituted = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool Kernel::isKernelHeapSubstituted() const {
|
||||||
|
return kernelInfo.isKernelHeapSubstituted;
|
||||||
}
|
}
|
||||||
|
|
||||||
uint64_t Kernel::getKernelId() const {
|
uint64_t Kernel::getKernelId() const {
|
||||||
@@ -721,16 +731,20 @@ const void *Kernel::getDynamicStateHeap() const {
|
|||||||
|
|
||||||
size_t Kernel::getSurfaceStateHeapSize() const {
|
size_t Kernel::getSurfaceStateHeapSize() const {
|
||||||
return kernelInfo.usesSsh
|
return kernelInfo.usesSsh
|
||||||
? kernelInfo.heapInfo.pKernelHeader->SurfaceStateHeapSize
|
? sshLocalSize
|
||||||
: 0;
|
: 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
size_t Kernel::getNumberOfSurfaceStates() const {
|
size_t Kernel::getNumberOfBindingTableStates() const {
|
||||||
const auto &patchInfo = kernelInfo.patchInfo;
|
return numberOfBindingTableStates;
|
||||||
if (patchInfo.bindingTableState == nullptr) {
|
}
|
||||||
return 0;
|
|
||||||
}
|
void Kernel::resizeSurfaceStateHeap(void *pNewSsh, size_t newSshSize, size_t newBindingTableCount, size_t newBindingTableOffset) {
|
||||||
return patchInfo.bindingTableState->Count;
|
delete[] pSshLocal;
|
||||||
|
pSshLocal = reinterpret_cast<char *>(pNewSsh);
|
||||||
|
sshLocalSize = static_cast<uint32_t>(newSshSize);
|
||||||
|
numberOfBindingTableStates = newBindingTableCount;
|
||||||
|
localBindingTableOffset = newBindingTableOffset;
|
||||||
}
|
}
|
||||||
|
|
||||||
uint32_t Kernel::getScratchSizeValueToProgramMediaVfeState(int scratchSize) {
|
uint32_t Kernel::getScratchSizeValueToProgramMediaVfeState(int scratchSize) {
|
||||||
@@ -927,6 +941,8 @@ void Kernel::makeResident(CommandStreamReceiver &commandStreamReceiver) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
makeArgsResident(commandStreamReceiver);
|
makeArgsResident(commandStreamReceiver);
|
||||||
|
|
||||||
|
gtpinNotifyMakeResident(this, &commandStreamReceiver);
|
||||||
}
|
}
|
||||||
|
|
||||||
void Kernel::getResidency(std::vector<Surface *> &dst) {
|
void Kernel::getResidency(std::vector<Surface *> &dst) {
|
||||||
@@ -964,6 +980,8 @@ void Kernel::getResidency(std::vector<Surface *> &dst) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
gtpinNotifyUpdateResidencyList(this, &dst);
|
||||||
}
|
}
|
||||||
|
|
||||||
bool Kernel::requiresCoherency() {
|
bool Kernel::requiresCoherency() {
|
||||||
|
|||||||
@@ -156,9 +156,15 @@ class Kernel : public BaseObject<_cl_kernel> {
|
|||||||
size_t getKernelHeapSize() const;
|
size_t getKernelHeapSize() const;
|
||||||
size_t getSurfaceStateHeapSize() const;
|
size_t getSurfaceStateHeapSize() const;
|
||||||
size_t getDynamicStateHeapSize() const;
|
size_t getDynamicStateHeapSize() const;
|
||||||
size_t getNumberOfSurfaceStates() const;
|
size_t getNumberOfBindingTableStates() const;
|
||||||
|
size_t getBindingTableOffset() const {
|
||||||
|
return localBindingTableOffset;
|
||||||
|
}
|
||||||
|
|
||||||
|
void resizeSurfaceStateHeap(void *pNewSsh, size_t newSshSize, size_t newBindingTableCount, size_t newBindingTableOffset);
|
||||||
|
|
||||||
void substituteKernelHeap(void *newKernelHeap, size_t newKernelHeapSize);
|
void substituteKernelHeap(void *newKernelHeap, size_t newKernelHeapSize);
|
||||||
|
bool isKernelHeapSubstituted() const;
|
||||||
uint64_t getKernelId() const;
|
uint64_t getKernelId() const;
|
||||||
void setKernelId(uint64_t newKernelId);
|
void setKernelId(uint64_t newKernelId);
|
||||||
|
|
||||||
@@ -434,6 +440,8 @@ class Kernel : public BaseObject<_cl_kernel> {
|
|||||||
std::vector<KernelArgHandler> kernelArgHandlers;
|
std::vector<KernelArgHandler> kernelArgHandlers;
|
||||||
std::vector<GraphicsAllocation *> kernelSvmGfxAllocations;
|
std::vector<GraphicsAllocation *> kernelSvmGfxAllocations;
|
||||||
|
|
||||||
|
size_t numberOfBindingTableStates;
|
||||||
|
size_t localBindingTableOffset;
|
||||||
char *pSshLocal;
|
char *pSshLocal;
|
||||||
uint32_t sshLocalSize;
|
uint32_t sshLocalSize;
|
||||||
|
|
||||||
|
|||||||
@@ -25,6 +25,7 @@
|
|||||||
#include "runtime/compiler_interface/compiler_interface.h"
|
#include "runtime/compiler_interface/compiler_interface.h"
|
||||||
#include "CL/cl_ext.h"
|
#include "CL/cl_ext.h"
|
||||||
#include "runtime/device/device.h"
|
#include "runtime/device/device.h"
|
||||||
|
#include "runtime/gtpin/gtpin_notify.h"
|
||||||
#include "runtime/helpers/debug_helpers.h"
|
#include "runtime/helpers/debug_helpers.h"
|
||||||
#include "runtime/helpers/get_info.h"
|
#include "runtime/helpers/get_info.h"
|
||||||
#include "runtime/helpers/options.h"
|
#include "runtime/helpers/options.h"
|
||||||
@@ -191,6 +192,8 @@ void Platform::shutdown() {
|
|||||||
|
|
||||||
DeviceFactory::releaseDevices();
|
DeviceFactory::releaseDevices();
|
||||||
std::string().swap(compilerExtensions);
|
std::string().swap(compilerExtensions);
|
||||||
|
|
||||||
|
gtpinNotifyPlatformShutdown();
|
||||||
}
|
}
|
||||||
|
|
||||||
Device *Platform::getDevice(size_t deviceOrdinal) {
|
Device *Platform::getDevice(size_t deviceOrdinal) {
|
||||||
|
|||||||
@@ -236,5 +236,6 @@ struct KernelInfo {
|
|||||||
uint32_t argumentsToPatchNum = 0;
|
uint32_t argumentsToPatchNum = 0;
|
||||||
uint32_t systemKernelOffset = 0;
|
uint32_t systemKernelOffset = 0;
|
||||||
uint64_t kernelId = 0;
|
uint64_t kernelId = 0;
|
||||||
|
bool isKernelHeapSubstituted = false;
|
||||||
};
|
};
|
||||||
} // namespace OCLRT
|
} // namespace OCLRT
|
||||||
|
|||||||
@@ -35,6 +35,7 @@ set(RUNTIME_SRCS_UTILITIES_BASE
|
|||||||
${CMAKE_CURRENT_SOURCE_DIR}/perf_profiler.cpp
|
${CMAKE_CURRENT_SOURCE_DIR}/perf_profiler.cpp
|
||||||
${CMAKE_CURRENT_SOURCE_DIR}/perf_profiler.h
|
${CMAKE_CURRENT_SOURCE_DIR}/perf_profiler.h
|
||||||
${CMAKE_CURRENT_SOURCE_DIR}/reference_tracked_object.h
|
${CMAKE_CURRENT_SOURCE_DIR}/reference_tracked_object.h
|
||||||
|
${CMAKE_CURRENT_SOURCE_DIR}/spinlock.h
|
||||||
${CMAKE_CURRENT_SOURCE_DIR}/stackvec.h
|
${CMAKE_CURRENT_SOURCE_DIR}/stackvec.h
|
||||||
${CMAKE_CURRENT_SOURCE_DIR}/tag_allocator.h
|
${CMAKE_CURRENT_SOURCE_DIR}/tag_allocator.h
|
||||||
${CMAKE_CURRENT_SOURCE_DIR}/tag_allocator_base.h
|
${CMAKE_CURRENT_SOURCE_DIR}/tag_allocator_base.h
|
||||||
|
|||||||
40
runtime/utilities/spinlock.h
Normal file
40
runtime/utilities/spinlock.h
Normal file
@@ -0,0 +1,40 @@
|
|||||||
|
/*
|
||||||
|
* Copyright (c) 2018, Intel Corporation
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||||
|
* copy of this software and associated documentation files (the "Software"),
|
||||||
|
* to deal in the Software without restriction, including without limitation
|
||||||
|
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||||
|
* and/or sell copies of the Software, and to permit persons to whom the
|
||||||
|
* Software is furnished to do so, subject to the following conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice shall be included
|
||||||
|
* in all copies or substantial portions of the Software.
|
||||||
|
*
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
|
||||||
|
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||||
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||||
|
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||||
|
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||||
|
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||||
|
* OTHER DEALINGS IN THE SOFTWARE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include <atomic>
|
||||||
|
|
||||||
|
namespace OCLRT {
|
||||||
|
|
||||||
|
class SpinLock {
|
||||||
|
public:
|
||||||
|
void enter(std::atomic_flag &spinLock) {
|
||||||
|
while (spinLock.test_and_set(std::memory_order_acquire)) {
|
||||||
|
};
|
||||||
|
}
|
||||||
|
void leave(std::atomic_flag &spinLock) {
|
||||||
|
spinLock.clear(std::memory_order_release);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
} // OCLRT
|
||||||
@@ -214,7 +214,7 @@ HWTEST_P(ParentKernelEnqueueTest, givenParentKernelWhenEnqueuedThenBlocksSurface
|
|||||||
|
|
||||||
Kernel *blockKernel = Kernel::create(pKernel->getProgram(), *pBlockInfo, nullptr);
|
Kernel *blockKernel = Kernel::create(pKernel->getProgram(), *pBlockInfo, nullptr);
|
||||||
blockSSH = alignUp(blockSSH, BINDING_TABLE_STATE::SURFACESTATEPOINTER_ALIGN_SIZE);
|
blockSSH = alignUp(blockSSH, BINDING_TABLE_STATE::SURFACESTATEPOINTER_ALIGN_SIZE);
|
||||||
if (blockKernel->getNumberOfSurfaceStates() > 0) {
|
if (blockKernel->getNumberOfBindingTableStates() > 0) {
|
||||||
ASSERT_NE(nullptr, pBlockInfo->patchInfo.bindingTableState);
|
ASSERT_NE(nullptr, pBlockInfo->patchInfo.bindingTableState);
|
||||||
auto dstBlockBti = ptrOffset(blockSSH, pBlockInfo->patchInfo.bindingTableState->Offset);
|
auto dstBlockBti = ptrOffset(blockSSH, pBlockInfo->patchInfo.bindingTableState->Offset);
|
||||||
EXPECT_EQ(0U, reinterpret_cast<uintptr_t>(dstBlockBti) % INTERFACE_DESCRIPTOR_DATA::BINDINGTABLEPOINTER_ALIGN_SIZE);
|
EXPECT_EQ(0U, reinterpret_cast<uintptr_t>(dstBlockBti) % INTERFACE_DESCRIPTOR_DATA::BINDINGTABLEPOINTER_ALIGN_SIZE);
|
||||||
@@ -222,7 +222,7 @@ HWTEST_P(ParentKernelEnqueueTest, givenParentKernelWhenEnqueuedThenBlocksSurface
|
|||||||
|
|
||||||
auto srcBlockBti = ptrOffset(pBlockInfo->heapInfo.pSsh, pBlockInfo->patchInfo.bindingTableState->Offset);
|
auto srcBlockBti = ptrOffset(pBlockInfo->heapInfo.pSsh, pBlockInfo->patchInfo.bindingTableState->Offset);
|
||||||
auto srcBindingTable = reinterpret_cast<BINDING_TABLE_STATE *>(srcBlockBti);
|
auto srcBindingTable = reinterpret_cast<BINDING_TABLE_STATE *>(srcBlockBti);
|
||||||
for (uint32_t i = 0; i < blockKernel->getNumberOfSurfaceStates(); ++i) {
|
for (uint32_t i = 0; i < blockKernel->getNumberOfBindingTableStates(); ++i) {
|
||||||
uint32_t dstSurfaceStatePointer = dstBindingTable[i].getSurfaceStatePointer();
|
uint32_t dstSurfaceStatePointer = dstBindingTable[i].getSurfaceStatePointer();
|
||||||
uint32_t srcSurfaceStatePointer = srcBindingTable[i].getSurfaceStatePointer();
|
uint32_t srcSurfaceStatePointer = srcBindingTable[i].getSurfaceStatePointer();
|
||||||
auto *dstSurfaceState = reinterpret_cast<RENDER_SURFACE_STATE *>(ptrOffset(ssh->getBase(), dstSurfaceStatePointer));
|
auto *dstSurfaceState = reinterpret_cast<RENDER_SURFACE_STATE *>(ptrOffset(ssh->getBase(), dstSurfaceStatePointer));
|
||||||
|
|||||||
@@ -188,7 +188,7 @@ HWTEST_P(ParentKernelDispatchTest, givenParentKernelWhenQueueIsBlockedThenSSHSiz
|
|||||||
|
|
||||||
size_t sshUsed = blockedCommandsData->ssh->getUsed();
|
size_t sshUsed = blockedCommandsData->ssh->getUsed();
|
||||||
|
|
||||||
size_t expectedSizeSSH = pKernel->getNumberOfSurfaceStates() * sizeof(RENDER_SURFACE_STATE) + pKernel->getKernelInfo().patchInfo.bindingTableState->Count * sizeof(BINDING_TABLE_STATE);
|
size_t expectedSizeSSH = pKernel->getNumberOfBindingTableStates() * sizeof(RENDER_SURFACE_STATE) + pKernel->getKernelInfo().patchInfo.bindingTableState->Count * sizeof(BINDING_TABLE_STATE);
|
||||||
|
|
||||||
if ((pKernel->requiresSshForBuffers()) || (pKernel->getKernelInfo().patchInfo.imageMemObjKernelArgs.size() > 0)) {
|
if ((pKernel->requiresSshForBuffers()) || (pKernel->getKernelInfo().patchInfo.imageMemObjKernelArgs.size() > 0)) {
|
||||||
EXPECT_EQ(expectedSizeSSH, sshUsed);
|
EXPECT_EQ(expectedSizeSSH, sshUsed);
|
||||||
|
|||||||
File diff suppressed because it is too large
Load Diff
@@ -324,6 +324,20 @@ TEST(BaseObjectTest, takeOwnership) {
|
|||||||
EXPECT_FALSE(buffer.hasOwnership());
|
EXPECT_FALSE(buffer.hasOwnership());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
TEST(BaseObjectTest, takeOwnershipWrapper) {
|
||||||
|
MockBuffer buffer;
|
||||||
|
{
|
||||||
|
TakeOwnershipWrapper<Buffer> bufferOwnership(buffer, false);
|
||||||
|
EXPECT_FALSE(buffer.hasOwnership());
|
||||||
|
}
|
||||||
|
{
|
||||||
|
TakeOwnershipWrapper<Buffer> bufferOwnership(buffer, true);
|
||||||
|
EXPECT_TRUE(buffer.hasOwnership());
|
||||||
|
bufferOwnership.unlock();
|
||||||
|
EXPECT_FALSE(buffer.hasOwnership());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
TYPED_TEST(BaseObjectTests, getCond) {
|
TYPED_TEST(BaseObjectTests, getCond) {
|
||||||
TypeParam *object = new TypeParam;
|
TypeParam *object = new TypeParam;
|
||||||
|
|
||||||
|
|||||||
@@ -486,7 +486,7 @@ HWTEST_F(KernelCommandsTest, usedBindingTableStatePointersForGlobalAndConstantAn
|
|||||||
auto &ssh = cmdQ.getIndirectHeap(IndirectHeap::SURFACE_STATE, 8192);
|
auto &ssh = cmdQ.getIndirectHeap(IndirectHeap::SURFACE_STATE, 8192);
|
||||||
|
|
||||||
// Initialize binding table state pointers with pattern
|
// Initialize binding table state pointers with pattern
|
||||||
EXPECT_EQ(numSurfaces, pKernel->getNumberOfSurfaceStates());
|
EXPECT_EQ(numSurfaces, pKernel->getNumberOfBindingTableStates());
|
||||||
|
|
||||||
const size_t localWorkSizes[3]{256, 1, 1};
|
const size_t localWorkSizes[3]{256, 1, 1};
|
||||||
|
|
||||||
@@ -566,7 +566,7 @@ HWTEST_F(KernelCommandsTest, setBindingTableStatesForKernelWithBuffersNotRequiri
|
|||||||
auto usedBefore = ssh.getUsed();
|
auto usedBefore = ssh.getUsed();
|
||||||
|
|
||||||
// Initialize binding table state pointers with pattern
|
// Initialize binding table state pointers with pattern
|
||||||
auto numSurfaceStates = pKernel->getNumberOfSurfaceStates();
|
auto numSurfaceStates = pKernel->getNumberOfBindingTableStates();
|
||||||
EXPECT_EQ(0u, numSurfaceStates);
|
EXPECT_EQ(0u, numSurfaceStates);
|
||||||
|
|
||||||
// set binding table states
|
// set binding table states
|
||||||
@@ -613,10 +613,13 @@ HWTEST_F(KernelCommandsTest, setBindingTableStatesForNoSurfaces) {
|
|||||||
auto &ssh = cmdQ.getIndirectHeap(IndirectHeap::SURFACE_STATE, 8192);
|
auto &ssh = cmdQ.getIndirectHeap(IndirectHeap::SURFACE_STATE, 8192);
|
||||||
|
|
||||||
// Initialize binding table state pointers with pattern
|
// Initialize binding table state pointers with pattern
|
||||||
auto numSurfaceStates = pKernel->getNumberOfSurfaceStates();
|
auto numSurfaceStates = pKernel->getNumberOfBindingTableStates();
|
||||||
EXPECT_EQ(0u, numSurfaceStates);
|
EXPECT_EQ(0u, numSurfaceStates);
|
||||||
|
|
||||||
auto dstBindingTablePointer = KernelCommandsHelper<FamilyType>::pushBindingTableAndSurfaceStates(ssh, *pKernel);
|
auto dstBindingTablePointer = KernelCommandsHelper<FamilyType>::pushBindingTableAndSurfaceStates(ssh, *pKernelInfo);
|
||||||
|
EXPECT_EQ(0u, dstBindingTablePointer);
|
||||||
|
|
||||||
|
dstBindingTablePointer = KernelCommandsHelper<FamilyType>::pushBindingTableAndSurfaceStates(ssh, *pKernel);
|
||||||
EXPECT_EQ(0u, dstBindingTablePointer);
|
EXPECT_EQ(0u, dstBindingTablePointer);
|
||||||
|
|
||||||
SPatchBindingTableState bindingTableState;
|
SPatchBindingTableState bindingTableState;
|
||||||
|
|||||||
@@ -31,6 +31,7 @@ set(IGDRCL_SRCS_tests_utilities
|
|||||||
"${CMAKE_CURRENT_SOURCE_DIR}/timer_util_tests.cpp"
|
"${CMAKE_CURRENT_SOURCE_DIR}/timer_util_tests.cpp"
|
||||||
"${CMAKE_CURRENT_SOURCE_DIR}/cpuinfo_tests.cpp"
|
"${CMAKE_CURRENT_SOURCE_DIR}/cpuinfo_tests.cpp"
|
||||||
"${CMAKE_CURRENT_SOURCE_DIR}/perf_profiler.cpp"
|
"${CMAKE_CURRENT_SOURCE_DIR}/perf_profiler.cpp"
|
||||||
|
"${CMAKE_CURRENT_SOURCE_DIR}/spinlock_tests.cpp"
|
||||||
"${CMAKE_CURRENT_SOURCE_DIR}/vec_tests.cpp"
|
"${CMAKE_CURRENT_SOURCE_DIR}/vec_tests.cpp"
|
||||||
PARENT_SCOPE
|
PARENT_SCOPE
|
||||||
)
|
)
|
||||||
|
|||||||
64
unit_tests/utilities/spinlock_tests.cpp
Normal file
64
unit_tests/utilities/spinlock_tests.cpp
Normal file
@@ -0,0 +1,64 @@
|
|||||||
|
/*
|
||||||
|
* Copyright (c) 2017, Intel Corporation
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||||
|
* copy of this software and associated documentation files (the "Software"),
|
||||||
|
* to deal in the Software without restriction, including without limitation
|
||||||
|
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||||
|
* and/or sell copies of the Software, and to permit persons to whom the
|
||||||
|
* Software is furnished to do so, subject to the following conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice shall be included
|
||||||
|
* in all copies or substantial portions of the Software.
|
||||||
|
*
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
|
||||||
|
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||||
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||||
|
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||||
|
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||||
|
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||||
|
* OTHER DEALINGS IN THE SOFTWARE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "runtime/utilities/spinlock.h"
|
||||||
|
#include "gtest/gtest.h"
|
||||||
|
#include <thread>
|
||||||
|
|
||||||
|
using namespace OCLRT;
|
||||||
|
|
||||||
|
TEST(SpinLockTest, givenTwoThreadsThenVerifyThatTheySynchronizeWithSpinLock) {
|
||||||
|
std::atomic_flag syncLock = ATOMIC_FLAG_INIT;
|
||||||
|
std::atomic<bool> threadStarted(false);
|
||||||
|
std::atomic<bool> threadFinished(false);
|
||||||
|
SpinLock lock1;
|
||||||
|
int sharedCount = 0;
|
||||||
|
|
||||||
|
// Initially acquire spin lock so the worker thread will wait
|
||||||
|
lock1.enter(syncLock);
|
||||||
|
|
||||||
|
// Start worker thread
|
||||||
|
std::thread t([&]() {
|
||||||
|
threadStarted = true;
|
||||||
|
SpinLock lock2;
|
||||||
|
lock2.enter(syncLock);
|
||||||
|
sharedCount++;
|
||||||
|
EXPECT_EQ(2, sharedCount);
|
||||||
|
lock2.leave(syncLock);
|
||||||
|
threadFinished = true;
|
||||||
|
});
|
||||||
|
|
||||||
|
// Wait till worker thread is started
|
||||||
|
while (!threadStarted) {
|
||||||
|
};
|
||||||
|
sharedCount++;
|
||||||
|
EXPECT_EQ(1, sharedCount);
|
||||||
|
|
||||||
|
// Release spin lock thus allowing worker thread to proceed
|
||||||
|
lock1.leave(syncLock);
|
||||||
|
|
||||||
|
// Wait till worker thread finishes
|
||||||
|
while (!threadFinished) {
|
||||||
|
};
|
||||||
|
EXPECT_EQ(2, sharedCount);
|
||||||
|
t.join();
|
||||||
|
}
|
||||||
Reference in New Issue
Block a user