Globally enable priority hints extension.

Change-Id: I9f3b8d3cf1bedb41d9e0622ff514bf76b4518d8c
This commit is contained in:
Zdunowski, Piotr
2018-01-24 12:00:27 +01:00
committed by sys_ocldev
parent 5b0ebe25d5
commit 0b6b12ea57
17 changed files with 87 additions and 123 deletions

View File

@@ -765,12 +765,6 @@ if (UNIX)
)
endif (UNIX)
#cl_khr_priority support
if(NOT MSVC)
target_compile_definitions(${NEO_STATIC_LIB_NAME} PUBLIC -DSUPPORT_PRIORITY_HINTS)
message(STATUS "Supporting priority hints")
endif()
target_compile_definitions(${NEO_STATIC_LIB_NAME} PUBLIC DEFAULT_PLATFORM=${DEFAULT_SUPPORTED_PLATFORM})
link_directories(${GMM_LIB_PATHS})

View File

@@ -3481,13 +3481,6 @@ cl_command_queue CL_API_CALL clCreateCommandQueueWithProperties(cl_context conte
}
}
if (getCmdQueueProperties<cl_queue_priority_khr>(properties, CL_QUEUE_PRIORITY_KHR)) {
if (!pDevice->getDeviceInfo().priorityHintsSupported) {
err.set(CL_INVALID_QUEUE_PROPERTIES);
return commandQueue;
}
}
auto maskedFlags = getCmdQueueProperties<cl_command_queue_properties>(properties) &
minimumCreateDeviceQueueFlags;

View File

@@ -65,12 +65,12 @@ CommandQueue::CommandQueue() : CommandQueue(nullptr, nullptr, 0) {
CommandQueue::CommandQueue(Context *context,
Device *deviceId,
const cl_queue_properties *properties) : low_priority(false),
taskCount(0),
const cl_queue_properties *properties) : taskCount(0),
taskLevel(0),
virtualEvent(nullptr),
context(context),
device(deviceId),
priority(QueuePriority::MEDIUM),
perfCountersEnabled(false),
perfCountersConfig(UINT32_MAX),
perfCountersUserRegistersNumber(0),
@@ -427,7 +427,7 @@ void CommandQueue::flushWaitList(
if (flushTask) {
DispatchFlags dispatchFlags;
dispatchFlags.GSBA32BitRequired = ndRangeKernel;
dispatchFlags.low_priority = low_priority;
dispatchFlags.lowPriority = priority == QueuePriority::LOW;
dispatchFlags.implicitFlush = true;
dispatchFlags.preemptionMode = PreemptionHelper::taskPreemptionMode(*device, nullptr);

View File

@@ -41,21 +41,22 @@ class IndirectHeap;
class Kernel;
class MemObj;
enum class QueuePriority {
LOW,
MEDIUM,
HIGH
};
template <>
struct OpenCLObjectMapper<_cl_command_queue> {
typedef class CommandQueue DerivedType;
};
////////////////////////////////////////////////////////////////////////////////
// CommandQueue - Core implementation
////////////////////////////////////////////////////////////////////////////////
class CommandQueue : public BaseObject<_cl_command_queue> {
public:
static const cl_ulong objectMagic = 0x1234567890987654LL;
enum { NUM_HEAPS = IndirectHeap::NUM_TYPES };
bool low_priority;
static CommandQueue *create(Context *context, Device *device,
const cl_queue_properties *properties,
cl_int &errcodeRet);
@@ -381,6 +382,10 @@ class CommandQueue : public BaseObject<_cl_command_queue> {
return perfCountersUserRegistersNumber;
}
QueuePriority getPriority() const {
return priority;
}
// taskCount of last task
uint32_t taskCount;
@@ -400,6 +405,8 @@ class CommandQueue : public BaseObject<_cl_command_queue> {
cl_command_queue_properties commandQueueProperties;
QueuePriority priority;
bool perfCountersEnabled;
cl_uint perfCountersConfig;
uint32_t perfCountersUserRegistersNumber;

View File

@@ -43,9 +43,17 @@ class CommandQueueHw : public CommandQueue {
CommandQueueHw(Context *context,
Device *device,
const cl_queue_properties *properties) : BaseClass(context, device, properties) {
if (getCmdQueueProperties<cl_queue_priority_khr>(properties, CL_QUEUE_PRIORITY_KHR) & static_cast<cl_queue_priority_khr>(CL_QUEUE_PRIORITY_LOW_KHR)) {
low_priority = true;
auto clPriority = getCmdQueueProperties<cl_queue_priority_khr>(properties, CL_QUEUE_PRIORITY_KHR);
if (clPriority & static_cast<cl_queue_priority_khr>(CL_QUEUE_PRIORITY_LOW_KHR)) {
priority = QueuePriority::LOW;
} else if (clPriority & static_cast<cl_queue_priority_khr>(CL_QUEUE_PRIORITY_MED_KHR)) {
priority = QueuePriority::MEDIUM;
} else if (clPriority & static_cast<cl_queue_priority_khr>(CL_QUEUE_PRIORITY_HIGH_KHR)) {
priority = QueuePriority::HIGH;
}
if (getCmdQueueProperties<cl_queue_properties>(properties, CL_QUEUE_PROPERTIES) & static_cast<cl_queue_properties>(CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE)) {
device->getCommandStreamReceiver().overrideDispatchPolicy(CommandStreamReceiver::BatchedDispatch);
}

View File

@@ -534,7 +534,7 @@ CompletionStamp CommandQueueHw<GfxFamily>::enqueueNonBlocked(
dispatchFlags.GSBA32BitRequired = commandType == CL_COMMAND_NDRANGE_KERNEL;
dispatchFlags.mediaSamplerRequired = mediaSamplerRequired;
dispatchFlags.requiresCoherency = requiresCoherency;
dispatchFlags.low_priority = low_priority;
dispatchFlags.lowPriority = priority == QueuePriority::LOW;
dispatchFlags.implicitFlush = implicitFlush;
dispatchFlags.flushStampReference = this->flushStamp->getStampReference();
dispatchFlags.preemptionMode = PreemptionHelper::taskPreemptionMode(*device, multiDispatchInfo);

View File

@@ -335,7 +335,7 @@ CompletionStamp CommandStreamReceiverHw<GfxFamily>::flushTask(
size_t startOffset = submitCommandStreamFromCsr ? commandStreamStartCSR : commandStreamStartTask;
auto &streamToSubmit = submitCommandStreamFromCsr ? commandStreamCSR : commandStreamTask;
BatchBuffer batchBuffer{streamToSubmit.getGraphicsAllocation(), startOffset, dispatchFlags.requiresCoherency, dispatchFlags.low_priority, streamToSubmit.getUsed(), &streamToSubmit};
BatchBuffer batchBuffer{streamToSubmit.getGraphicsAllocation(), startOffset, dispatchFlags.requiresCoherency, dispatchFlags.lowPriority, streamToSubmit.getUsed(), &streamToSubmit};
EngineType engineType = device->getEngineType();
if (submitCSR | submitTask) {

View File

@@ -50,7 +50,7 @@ struct DispatchFlags {
bool GSBA32BitRequired = false;
bool mediaSamplerRequired = false;
bool requiresCoherency = false;
bool low_priority = false;
bool lowPriority = false;
bool implicitFlush = false;
bool outOfOrderExecutionAllowed = false;
FlushStampTrackingObj *flushStampReference = nullptr;

View File

@@ -44,16 +44,6 @@ static std::string driverVersion = NEO_DRIVER_VERSION;
const char *builtInKernels = ""; // the "always available" (extension-independent) builtin kernels
void Device::checkPriorityHints() {
#ifdef SUPPORT_PRIORITY_HINTS
if (static_cast<PreemptionMode>(DebugManager.flags.ForcePreemptionMode.get()) > PreemptionMode::Disabled ||
(preemptionMode >= PreemptionMode::ThreadGroup)) {
deviceExtensions += "cl_khr_priority_hints ";
deviceInfo.priorityHintsSupported = true;
}
#endif
}
bool Device::getEnabled64kbPages() {
if (DebugManager.flags.Enable64kbpages.get() == -1) {
// assign value according to os and hw configuration
@@ -170,25 +160,25 @@ void Device::initializeCaps() {
deviceInfo.hostUnifiedMemory = CL_TRUE;
deviceInfo.deviceAvailable = CL_TRUE;
deviceInfo.compilerAvailable = CL_TRUE;
deviceInfo.preferredVectorWidthChar = 16;
deviceInfo.preferredVectorWidthShort = 8;
deviceInfo.preferredVectorWidthInt = 4;
deviceInfo.preferredVectorWidthLong = 1;
deviceInfo.preferredVectorWidthFloat = 1;
deviceInfo.preferredVectorWidthChar = 16;
deviceInfo.preferredVectorWidthShort = 8;
deviceInfo.preferredVectorWidthInt = 4;
deviceInfo.preferredVectorWidthLong = 1;
deviceInfo.preferredVectorWidthFloat = 1;
deviceInfo.preferredVectorWidthDouble = 1;
deviceInfo.preferredVectorWidthHalf = 8;
deviceInfo.nativeVectorWidthChar = 16;
deviceInfo.nativeVectorWidthShort = 8;
deviceInfo.nativeVectorWidthInt = 4;
deviceInfo.nativeVectorWidthLong = 1;
deviceInfo.nativeVectorWidthFloat = 1;
deviceInfo.nativeVectorWidthDouble = 1;
deviceInfo.nativeVectorWidthHalf = 8;
deviceInfo.maxReadImageArgs = 128;
deviceInfo.maxWriteImageArgs = 128;
deviceInfo.maxReadWriteImageArgs = 0;
deviceInfo.maxParameterSize = 1024;
deviceInfo.executionCapabilities = CL_EXEC_KERNEL;
deviceInfo.preferredVectorWidthHalf = 8;
deviceInfo.nativeVectorWidthChar = 16;
deviceInfo.nativeVectorWidthShort = 8;
deviceInfo.nativeVectorWidthInt = 4;
deviceInfo.nativeVectorWidthLong = 1;
deviceInfo.nativeVectorWidthFloat = 1;
deviceInfo.nativeVectorWidthDouble = 1;
deviceInfo.nativeVectorWidthHalf = 8;
deviceInfo.maxReadImageArgs = 128;
deviceInfo.maxWriteImageArgs = 128;
deviceInfo.maxReadWriteImageArgs = 0;
deviceInfo.maxParameterSize = 1024;
deviceInfo.executionCapabilities = CL_EXEC_KERNEL;
deviceInfo.addressBits = 64;
@@ -337,7 +327,6 @@ void Device::initializeCaps() {
hwInfo.capabilityTable.ftrSvm * hwInfo.capabilityTable.ftrSupportsCoherency *
(CL_DEVICE_SVM_FINE_GRAIN_BUFFER | CL_DEVICE_SVM_ATOMICS));
deviceInfo.preemptionSupported = false;
deviceInfo.priorityHintsSupported = false;
deviceInfo.maxGlobalVariableSize = 64 * 1024;
deviceInfo.globalVariablePreferredTotalSize = (size_t)deviceInfo.maxMemAllocSize;
@@ -352,9 +341,5 @@ void Device::initializeCaps() {
deviceInfo.internalDriverVersion = CL_DEVICE_DRIVER_VERSION_INTEL_NEO1;
deviceInfo.enabled64kbPages = getEnabled64kbPages();
#ifdef SUPPORT_PRIORITY_HINTS
checkPriorityHints();
#endif
}
} // namespace OCLRT

View File

@@ -131,7 +131,6 @@ struct DeviceInfo {
uint32_t computeUnitsUsedForScratch;
bool force32BitAddressess;
bool preemptionSupported;
bool priorityHintsSupported;
double platformHostTimerResolution;
size_t planarYuvMaxWidth;

View File

@@ -70,7 +70,7 @@ CompletionStamp &CommandMapUnmap::submit(uint32_t taskLevel, bool terminated) {
dispatchFlags.dcFlush = true;
dispatchFlags.useSLM = true;
dispatchFlags.guardCommandBufferWithPipeControl = true;
dispatchFlags.low_priority = cmdQ.low_priority;
dispatchFlags.lowPriority = cmdQ.getPriority() == QueuePriority::LOW;
dispatchFlags.preemptionMode = PreemptionHelper::taskPreemptionMode(cmdQ.getDevice(), nullptr);
DEBUG_BREAK_IF(taskLevel >= Event::eventNotReady);
@@ -250,7 +250,7 @@ CompletionStamp &CommandComputeKernel::submit(uint32_t taskLevel, bool terminate
dispatchFlags.guardCommandBufferWithPipeControl = true;
dispatchFlags.GSBA32BitRequired = NDRangeKernel;
dispatchFlags.requiresCoherency = requiresCoherency;
dispatchFlags.low_priority = commandQueue.low_priority;
dispatchFlags.lowPriority = commandQueue.getPriority() == QueuePriority::LOW;
dispatchFlags.preemptionMode = PreemptionHelper::taskPreemptionMode(commandQueue.getDevice(), kernel);
DEBUG_BREAK_IF(taskLevel >= Event::eventNotReady);
@@ -291,7 +291,7 @@ CompletionStamp &CommandMarker::submit(uint32_t taskLevel, bool terminated) {
DispatchFlags dispatchFlags;
dispatchFlags.blocking = blocking;
dispatchFlags.dcFlush = shouldFlushDC(clCommandType, nullptr);
dispatchFlags.low_priority = cmdQ.low_priority;
dispatchFlags.lowPriority = cmdQ.getPriority() == QueuePriority::LOW;
dispatchFlags.preemptionMode = PreemptionHelper::taskPreemptionMode(cmdQ.getDevice(), nullptr);
DEBUG_BREAK_IF(taskLevel >= Event::eventNotReady);

View File

@@ -184,7 +184,7 @@ inline void DrmCommandStreamReceiver<GfxFamily>::overrideMediaVFEStateDirty(bool
template <typename GfxFamily>
inline void DrmCommandStreamReceiver<GfxFamily>::programVFEState(LinearStream &csr, DispatchFlags &dispatchFlags) {
bool &currentContextDirtyFlag = dispatchFlags.low_priority ? mediaVfeStateLowPriorityDirty : mediaVfeStateDirty;
bool &currentContextDirtyFlag = dispatchFlags.lowPriority ? mediaVfeStateLowPriorityDirty : mediaVfeStateDirty;
if (currentContextDirtyFlag) {
PreambleHelper<GfxFamily>::programVFEState(&csr, hwInfo, requiredScratchSize, getScratchPatchAddress());

View File

@@ -43,7 +43,8 @@ const char *deviceExtensionsList = "cl_khr_3d_image_writes "
"cl_intel_accelerator "
"cl_intel_media_block_io "
"cl_intel_driver_diagnostics "
"cl_intel_device_side_avc_motion_estimation ";
"cl_intel_device_side_avc_motion_estimation "
"cl_khr_priority_hints ";
std::string getExtensionsList(const HardwareInfo &hwInfo) {
std::string allExtensionsList;

View File

@@ -85,8 +85,6 @@ TEST_P(clCreateCommandQueueWithPropertiesTests, returnsSuccessForValidValues) {
CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE);
bool queueOnDeviceUsed = false;
bool priorityHintsUsed = false;
bool priorityHintsNotSupported = false;
Device *pDevice;
cl_queue_properties *pProp = &properties[0];
if (commandQueueProperties) {
@@ -111,14 +109,13 @@ TEST_P(clCreateCommandQueueWithPropertiesTests, returnsSuccessForValidValues) {
*pProp++ = queueThrottle;
}
*pProp++ = 0;
pDevice = castToObject<Device>(devices[0]);
priorityHintsNotSupported = !pDevice->getDeviceInfo().priorityHintsSupported;
cmdQ = clCreateCommandQueueWithProperties(
pContext,
devices[0],
properties,
&retVal);
if ((queueOnDeviceUsed && priorityHintsUsed) || (priorityHintsNotSupported && priorityHintsUsed)) {
if (queueOnDeviceUsed && priorityHintsUsed) {
EXPECT_EQ(nullptr, cmdQ);
EXPECT_EQ(retVal, CL_INVALID_QUEUE_PROPERTIES);
return;
@@ -285,7 +282,6 @@ TEST_F(clCreateCommandQueueWithPropertiesApi, returnOutOfMemoryWhenNumberOfDevic
TEST_F(clCreateCommandQueueWithPropertiesApi, returnOutOfMemory) {
InjectedFunction method = [this](size_t failureIndex) {
cl_queue_properties ooq[] = {CL_QUEUE_PROPERTIES, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE | CL_QUEUE_ON_DEVICE | CL_QUEUE_ON_DEVICE_DEFAULT, 0, 0};
auto retVal = CL_INVALID_VALUE;
@@ -326,21 +322,7 @@ TEST_F(clCreateCommandQueueWithPropertiesApi, returnErrorOnDeviceWithMedPriority
EXPECT_EQ(retVal, CL_INVALID_QUEUE_PROPERTIES);
}
TEST_F(clCreateCommandQueueWithPropertiesApi, returnErrorOnQueueWithPriority) {
auto pDevice = pPlatform->getDevice(0);
DeviceInfo &devInfo = const_cast<DeviceInfo &>(pDevice->getDeviceInfo());
devInfo.priorityHintsSupported = false;
cl_int retVal = CL_SUCCESS;
cl_queue_properties ondevice[] = {CL_QUEUE_PRIORITY_KHR, CL_QUEUE_PRIORITY_LOW_KHR, 0};
auto cmdqd = clCreateCommandQueueWithProperties(pContext, devices[0], ondevice, &retVal);
EXPECT_EQ(nullptr, cmdqd);
EXPECT_EQ(retVal, CL_INVALID_QUEUE_PROPERTIES);
}
TEST_F(clCreateCommandQueueWithPropertiesApi, returnSuccessOnQueueWithPriority) {
auto pDevice = pPlatform->getDevice(0);
DeviceInfo &devInfo = const_cast<DeviceInfo &>(pDevice->getDeviceInfo());
devInfo.priorityHintsSupported = true;
cl_int retVal = CL_SUCCESS;
cl_queue_properties ondevice[] = {CL_QUEUE_PRIORITY_KHR, CL_QUEUE_PRIORITY_LOW_KHR, 0};
auto cmdqd = clCreateCommandQueueWithProperties(pContext, devices[0], ondevice, &retVal);
@@ -349,4 +331,32 @@ TEST_F(clCreateCommandQueueWithPropertiesApi, returnSuccessOnQueueWithPriority)
retVal = clReleaseCommandQueue(cmdqd);
EXPECT_EQ(retVal, CL_SUCCESS);
}
std::pair<uint32_t, QueuePriority> priorityParams[3]{
std::make_pair(CL_QUEUE_PRIORITY_LOW_KHR, QueuePriority::LOW),
std::make_pair(CL_QUEUE_PRIORITY_MED_KHR, QueuePriority::MEDIUM),
std::make_pair(CL_QUEUE_PRIORITY_HIGH_KHR, QueuePriority::HIGH)};
class clCreateCommandQueueWithPropertiesApiPriority : public clCreateCommandQueueWithPropertiesApi,
public ::testing::WithParamInterface<std::pair<uint32_t, QueuePriority>> {
};
TEST_P(clCreateCommandQueueWithPropertiesApiPriority, givenCreateQueueWithWhenPriorityPropertiesThenSetCorrectProirityInternally) {
cl_int retVal = CL_SUCCESS;
cl_queue_properties ondevice[] = {CL_QUEUE_PRIORITY_KHR, GetParam().first, 0};
auto cmdqd = clCreateCommandQueueWithProperties(pContext, devices[0], ondevice, &retVal);
EXPECT_NE(nullptr, cmdqd);
EXPECT_EQ(retVal, CL_SUCCESS);
auto commandQueue = castToObject<CommandQueue>(cmdqd);
EXPECT_EQ(commandQueue->getPriority(), GetParam().second);
retVal = clReleaseCommandQueue(cmdqd);
EXPECT_EQ(retVal, CL_SUCCESS);
}
INSTANTIATE_TEST_CASE_P(AllValidPriorities,
clCreateCommandQueueWithPropertiesApiPriority,
::testing::ValuesIn(priorityParams));
} // namespace ULT

View File

@@ -133,7 +133,7 @@ struct UltCommandStreamReceiverTest
DispatchFlags dispatchFlags;
dispatchFlags.blocking = block;
dispatchFlags.requiresCoherency = requiresCoherency;
dispatchFlags.low_priority = lowPriority;
dispatchFlags.lowPriority = lowPriority;
return commandStreamReceiver.flushTask(
commandStream,
@@ -2153,7 +2153,7 @@ HWTEST_F(CommandStreamReceiverFlushTaskTests, givenCsrInBatchingModeWhenFlushTas
auto usedAfterFirstFlushTask = commandStream.getUsed();
dispatchFlags.requiresCoherency = true;
dispatchFlags.low_priority = true;
dispatchFlags.lowPriority = true;
mockCsr->flushTask(commandStream,
commandStream.getUsed(),

View File

@@ -486,44 +486,11 @@ TEST(Device_GetCaps, givenOpenCL12DeviceCapsWhenAskedForCPUcopyFlagThenTrueIsRet
EXPECT_TRUE(caps.cpuCopyAllowed);
}
TEST(Device_GetCaps, deviceReportsPriorityHintsExtensionWhenSupportEnabled) {
DebugManagerStateRestore stateRestorer;
PreemptionMode forceMode = PreemptionMode::ThreadGroup;
DebugManager.flags.ForcePreemptionMode.set((int32_t)forceMode);
TEST(Device_GetCaps, deviceReportsPriorityHintsExtension) {
auto device = std::unique_ptr<Device>(DeviceHelper<>::create(platformDevices[0]));
const auto &caps = device->getDeviceInfo();
#ifdef SUPPORT_PRIORITY_HINTS
EXPECT_TRUE(caps.priorityHintsSupported);
EXPECT_THAT(caps.deviceExtensions, testing::HasSubstr(std::string("cl_khr_priority_hints")));
#else
EXPECT_FALSE(caps.priorityHintsSupported);
#endif
}
TEST(Device_GetCaps, deviceDoesntReportsPriorityHintsExtensionWhenPreemptionDisabled) {
DebugManagerStateRestore stateRestorer;
PreemptionMode forceMode = PreemptionMode::Disabled;
DebugManager.flags.ForcePreemptionMode.set((int32_t)forceMode);
auto device = std::unique_ptr<Device>(DeviceHelper<>::create(platformDevices[0]));
const auto &caps = device->getDeviceInfo();
EXPECT_FALSE(caps.priorityHintsSupported);
}
TEST(Device_GetCaps, deviceDoesntReportsPriorityHintsExtensionWhenDefaultPreemptionDisabled) {
DebugManagerStateRestore stateRestorer;
const HardwareInfo &hwRef = *platformDevices[0];
HardwareInfo hwTest = hwRef;
auto devPreemption = hwTest.capabilityTable.defaultPreemptionMode;
hwTest.capabilityTable.defaultPreemptionMode = PreemptionMode::Disabled;
auto device = std::unique_ptr<Device>(DeviceHelper<>::create(&hwTest));
const auto &caps = device->getDeviceInfo();
EXPECT_FALSE(caps.priorityHintsSupported);
hwTest.capabilityTable.defaultPreemptionMode = devPreemption;
}
TEST(Device_GetCaps, givenDeviceThatDoesntHaveFp64ThenExtensionIsNotReported) {

View File

@@ -522,7 +522,7 @@ struct DrmCsrVfeTests : ::testing::Test {
};
void flushTask(CommandStreamReceiver &csr, LinearStream &stream, bool lowPriority) {
dispatchFlags.low_priority = lowPriority;
dispatchFlags.lowPriority = lowPriority;
csr.flushTask(stream, 0, stream, stream, stream, stream, 0, dispatchFlags);
}