Make sure that gtpin callbacks are not executed in enqueue path.

-This is to make sure those functions are not called when gtpin is not used
-This preserves CPU instruction cache pollution.
-Our enqueue path needs to be as thin as possible, even with this small change
there is visible gain in ULT execution time.

Change-Id: I44cc2144754cda95ca1fe058184cd8a151b8d35c
This commit is contained in:
Mrozek, Michal
2018-03-22 18:46:09 +01:00
committed by sys_ocldev
parent 93cb7be091
commit d7fe01454b
8 changed files with 30 additions and 16 deletions

View File

@@ -2585,8 +2585,9 @@ cl_int CL_API_CALL clEnqueueNDRangeKernel(cl_command_queue commandQueue,
auto pKernel = castToObjectOrAbort<Kernel>(kernel); auto pKernel = castToObjectOrAbort<Kernel>(kernel);
TakeOwnershipWrapper<Kernel> kernelOwnership(*pKernel, gtpinIsGTPinInitialized()); TakeOwnershipWrapper<Kernel> kernelOwnership(*pKernel, gtpinIsGTPinInitialized());
if (gtpinIsGTPinInitialized()) {
gtpinNotifyKernelSubmit(kernel, pCommandQueue); gtpinNotifyKernelSubmit(kernel, pCommandQueue);
}
retVal = pCommandQueue->enqueueKernel( retVal = pCommandQueue->enqueueKernel(
kernel, kernel,

View File

@@ -553,7 +553,9 @@ CompletionStamp CommandQueueHw<GfxFamily>::enqueueNonBlocked(
DEBUG_BREAK_IF(taskLevel >= Event::eventNotReady); DEBUG_BREAK_IF(taskLevel >= Event::eventNotReady);
gtpinNotifyPreFlushTask(this); if (gtpinIsGTPinInitialized()) {
gtpinNotifyPreFlushTask(this);
}
CompletionStamp completionStamp = commandStreamReceiver.flushTask( CompletionStamp completionStamp = commandStreamReceiver.flushTask(
commandStream, commandStream,

View File

@@ -193,7 +193,9 @@ bool CommandStreamReceiver::waitForCompletionWithTimeout(bool enableTimeout, int
} }
} }
if (*getTagAddress() >= taskCountToWait) { if (*getTagAddress() >= taskCountToWait) {
gtpinNotifyTaskCompletion(taskCountToWait); if (gtpinIsGTPinInitialized()) {
gtpinNotifyTaskCompletion(taskCountToWait);
}
return true; return true;
} }
return false; return false;

View File

@@ -398,7 +398,9 @@ CompletionStamp CommandStreamReceiverHw<GfxFamily>::flushTask(
this->taskLevel += levelClosed ? 1 : 0; this->taskLevel += levelClosed ? 1 : 0;
gtpinNotifyFlushTask(completionStamp.taskCount); if (gtpinIsGTPinInitialized()) {
gtpinNotifyFlushTask(completionStamp.taskCount);
}
return completionStamp; return completionStamp;
} }

View File

@@ -24,6 +24,7 @@
#include <cstdint> #include <cstdint>
namespace OCLRT { namespace OCLRT {
bool isGTPinInitialized = false;
void gtpinNotifyContextCreate(cl_context context) { void gtpinNotifyContextCreate(cl_context context) {
} }
@@ -54,8 +55,4 @@ void gtpinNotifyUpdateResidencyList(void *pKernel, void *pResidencyVector) {
void gtpinNotifyPlatformShutdown() { void gtpinNotifyPlatformShutdown() {
} }
bool gtpinIsGTPinInitialized() {
return false;
}
} }

View File

@@ -30,6 +30,7 @@
#include "runtime/device/device_info.h" #include "runtime/device/device_info.h"
#include "runtime/gtpin/gtpin_defs.h" #include "runtime/gtpin/gtpin_defs.h"
#include "runtime/gtpin/gtpin_hw_helper.h" #include "runtime/gtpin/gtpin_hw_helper.h"
#include "runtime/gtpin/gtpin_notify.h"
#include "runtime/kernel/kernel.h" #include "runtime/kernel/kernel.h"
#include "runtime/mem_obj/buffer.h" #include "runtime/mem_obj/buffer.h"
#include "runtime/memory_manager/surface.h" #include "runtime/memory_manager/surface.h"
@@ -42,7 +43,6 @@ using namespace gtpin;
namespace OCLRT { namespace OCLRT {
extern bool isGTPinInitialized;
extern gtpin::ocl::gtpin_events_t GTPinCallbacks; extern gtpin::ocl::gtpin_events_t GTPinCallbacks;
igc_init_t *pIgcInfo = nullptr; igc_init_t *pIgcInfo = nullptr;
@@ -242,8 +242,4 @@ void gtpinNotifyPlatformShutdown() {
kernelExecQueue.clear(); kernelExecQueue.clear();
} }
} }
bool gtpinIsGTPinInitialized() {
return isGTPinInitialized;
}
} }

View File

@@ -23,6 +23,8 @@
#pragma once #pragma once
namespace OCLRT { namespace OCLRT {
extern bool isGTPinInitialized;
void gtpinNotifyContextCreate(cl_context context); void gtpinNotifyContextCreate(cl_context context);
void gtpinNotifyContextDestroy(cl_context context); void gtpinNotifyContextDestroy(cl_context context);
void gtpinNotifyKernelCreate(cl_kernel kernel); void gtpinNotifyKernelCreate(cl_kernel kernel);
@@ -33,5 +35,5 @@ void gtpinNotifyTaskCompletion(uint32_t completedTaskCount);
void gtpinNotifyMakeResident(void *pKernel, void *pCommandStreamReceiver); void gtpinNotifyMakeResident(void *pKernel, void *pCommandStreamReceiver);
void gtpinNotifyUpdateResidencyList(void *pKernel, void *pResidencyVector); void gtpinNotifyUpdateResidencyList(void *pKernel, void *pResidencyVector);
void gtpinNotifyPlatformShutdown(); void gtpinNotifyPlatformShutdown();
bool gtpinIsGTPinInitialized(); inline bool gtpinIsGTPinInitialized() { return isGTPinInitialized; }
} }

View File

@@ -49,7 +49,6 @@ using namespace OCLRT;
using namespace gtpin; using namespace gtpin;
namespace OCLRT { namespace OCLRT {
extern bool isGTPinInitialized;
extern std::deque<gtpinkexec_t> kernelExecQueue; extern std::deque<gtpinkexec_t> kernelExecQueue;
} }
@@ -2073,4 +2072,17 @@ TEST_F(GTPinTests, checkWhetherGTPinHwHelperGetterWorksWell) {
EXPECT_NE(nullptr, pGTPinHelper); EXPECT_NE(nullptr, pGTPinHelper);
} }
TEST(GTPinOfflineTests, givenGtPinInDisabledStateWhenCallbacksFromEnqueuePathAreCalledThenNothingHappens) {
ASSERT_FALSE(gtpinIsGTPinInitialized());
auto dummyKernel = reinterpret_cast<cl_kernel>(0x1000);
auto dummyQueue = reinterpret_cast<void *>(0x1000);
uint32_t dummyCompletedTask = 0u;
//now call gtpin function with dummy data, this must not crash
gtpinNotifyKernelSubmit(dummyKernel, dummyQueue);
gtpinNotifyPreFlushTask(dummyQueue);
gtpinNotifyTaskCompletion(dummyCompletedTask);
gtpinNotifyFlushTask(dummyCompletedTask);
EXPECT_FALSE(gtpinIsGTPinInitialized());
}
} // namespace ULT } // namespace ULT