Enable throttle hints extension.

Change-Id: I996fce8dbc792d77dc85df143ba5c0aa1cad83e5
This commit is contained in:
Zdunowski, Piotr 2018-01-29 11:18:34 +01:00 committed by sys_ocldev
parent eb42a1eba1
commit 1cfe5344fc
24 changed files with 361 additions and 61 deletions

View File

@ -96,7 +96,6 @@ The driver has the following functional delta compared to previously released dr
### Generic extensions
* cl_khr_mipmap
* cl_khr_mipmap_writes
* cl_khr_throttle_hints
* cl_khr_fp64
### Preview extensions
* cl_intelx_video_enhancement

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2017, Intel Corporation
* Copyright (c) 2017 - 2018, Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
@ -3478,6 +3478,13 @@ cl_command_queue CL_API_CALL clCreateCommandQueueWithProperties(cl_context conte
}
}
if (getCmdQueueProperties<cl_command_queue_properties>(properties) & static_cast<cl_command_queue_properties>(CL_QUEUE_ON_DEVICE)) {
if (getCmdQueueProperties<cl_queue_throttle_khr>(properties, CL_QUEUE_THROTTLE_KHR)) {
err.set(CL_INVALID_QUEUE_PROPERTIES);
return commandQueue;
}
}
auto maskedFlags = getCmdQueueProperties<cl_command_queue_properties>(properties) &
minimumCreateDeviceQueueFlags;

View File

@ -71,6 +71,7 @@ CommandQueue::CommandQueue(Context *context,
context(context),
device(deviceId),
priority(QueuePriority::MEDIUM),
throttle(QueueThrottle::MEDIUM),
perfCountersEnabled(false),
perfCountersConfig(UINT32_MAX),
perfCountersUserRegistersNumber(0),
@ -430,6 +431,7 @@ void CommandQueue::flushWaitList(
DispatchFlags dispatchFlags;
dispatchFlags.GSBA32BitRequired = ndRangeKernel;
dispatchFlags.lowPriority = priority == QueuePriority::LOW;
dispatchFlags.throttle = throttle;
dispatchFlags.implicitFlush = true;
dispatchFlags.preemptionMode = PreemptionHelper::taskPreemptionMode(*device, nullptr);

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2017, Intel Corporation
* Copyright (c) 2017 - 2018, Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
@ -26,6 +26,7 @@
#include "runtime/helpers/base_object.h"
#include "runtime/helpers/completion_stamp.h"
#include "runtime/helpers/flush_stamp.h"
#include "runtime/helpers/properties_helper.h"
#include "runtime/event/user_event.h"
#include "runtime/os_interface/performance_counters.h"
#include <atomic>
@ -386,6 +387,10 @@ class CommandQueue : public BaseObject<_cl_command_queue> {
return priority;
}
QueueThrottle getThrottle() const {
return throttle;
}
// taskCount of last task
uint32_t taskCount;
@ -408,6 +413,7 @@ class CommandQueue : public BaseObject<_cl_command_queue> {
cl_command_queue_properties commandQueueProperties;
QueuePriority priority;
QueueThrottle throttle;
bool perfCountersEnabled;
cl_uint perfCountersConfig;

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2017, Intel Corporation
* Copyright (c) 2017 - 2018, Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
@ -54,6 +54,16 @@ class CommandQueueHw : public CommandQueue {
priority = QueuePriority::HIGH;
}
auto clThrottle = getCmdQueueProperties<cl_queue_throttle_khr>(properties, CL_QUEUE_THROTTLE_KHR);
if (clThrottle & static_cast<cl_queue_throttle_khr>(CL_QUEUE_THROTTLE_LOW_KHR)) {
throttle = QueueThrottle::LOW;
} else if (clThrottle & static_cast<cl_queue_throttle_khr>(CL_QUEUE_THROTTLE_MED_KHR)) {
throttle = QueueThrottle::MEDIUM;
} else if (clThrottle & static_cast<cl_queue_throttle_khr>(CL_QUEUE_THROTTLE_HIGH_KHR)) {
throttle = QueueThrottle::HIGH;
}
if (getCmdQueueProperties<cl_queue_properties>(properties, CL_QUEUE_PROPERTIES) & static_cast<cl_queue_properties>(CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE)) {
device->getCommandStreamReceiver().overrideDispatchPolicy(CommandStreamReceiver::BatchedDispatch);
}

View File

@ -346,7 +346,7 @@ CompletionStamp CommandStreamReceiverHw<GfxFamily>::flushTask(
size_t startOffset = submitCommandStreamFromCsr ? commandStreamStartCSR : commandStreamStartTask;
auto &streamToSubmit = submitCommandStreamFromCsr ? commandStreamCSR : commandStreamTask;
BatchBuffer batchBuffer{streamToSubmit.getGraphicsAllocation(), startOffset, dispatchFlags.requiresCoherency, dispatchFlags.lowPriority, streamToSubmit.getUsed(), &streamToSubmit};
BatchBuffer batchBuffer{streamToSubmit.getGraphicsAllocation(), startOffset, dispatchFlags.requiresCoherency, dispatchFlags.lowPriority, dispatchFlags.throttle, streamToSubmit.getUsed(), &streamToSubmit};
EngineType engineType = device->getEngineType();
if (submitCSR | submitTask) {

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2017, Intel Corporation
* Copyright (c) 2017 - 2018, Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
@ -23,6 +23,7 @@
#pragma once
#include "runtime/memory_manager/memory_constants.h"
#include "runtime/helpers/hw_info.h"
#include "runtime/helpers/properties_helper.h"
#include <limits>
namespace OCLRT {
@ -51,6 +52,7 @@ struct DispatchFlags {
bool mediaSamplerRequired = false;
bool requiresCoherency = false;
bool lowPriority = false;
QueueThrottle throttle = QueueThrottle::MEDIUM;
bool implicitFlush = false;
bool outOfOrderExecutionAllowed = false;
FlushStampTrackingObj *flushStampReference = nullptr;

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2017, Intel Corporation
* Copyright (c) 2017 - 2018, Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
@ -61,6 +61,10 @@ void OCLRT::SubmissionAggregator::aggregateCommandBuffers(ResourcePackage &resou
return;
}
if (primaryCommandBuffer->next->batchBuffer.throttle != primaryCommandBuffer->batchBuffer.throttle) {
return;
}
auto nextCommandBuffer = primaryCommandBuffer->next;
ResourcePackage newResources;
@ -102,7 +106,7 @@ void OCLRT::SubmissionAggregator::aggregateCommandBuffers(ResourcePackage &resou
}
}
OCLRT::BatchBuffer::BatchBuffer(GraphicsAllocation *commandBufferAllocation, size_t startOffset, bool requiresCoherency, bool lowPriority, size_t usedSize, LinearStream *stream) : commandBufferAllocation(commandBufferAllocation), startOffset(startOffset), requiresCoherency(requiresCoherency), low_priority(lowPriority), usedSize(usedSize), stream(stream) {
OCLRT::BatchBuffer::BatchBuffer(GraphicsAllocation *commandBufferAllocation, size_t startOffset, bool requiresCoherency, bool lowPriority, QueueThrottle throttle, size_t usedSize, LinearStream *stream) : commandBufferAllocation(commandBufferAllocation), startOffset(startOffset), requiresCoherency(requiresCoherency), low_priority(lowPriority), throttle(throttle), usedSize(usedSize), stream(stream) {
}
OCLRT::CommandBuffer::CommandBuffer() {

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2017, Intel Corporation
* Copyright (c) 2017 - 2018, Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
@ -24,15 +24,18 @@
#include "runtime/utilities/idlist.h"
#include "runtime/utilities/stackvec.h"
#include "runtime/command_stream/linear_stream.h"
#include "runtime/helpers/properties_helper.h"
#include <vector>
namespace OCLRT {
class Event;
class FlushStampTracker;
struct BatchBuffer {
BatchBuffer(GraphicsAllocation *commandBufferAllocation,
size_t startOffset,
bool requiresCoherency,
bool lowPriority,
QueueThrottle throttle,
size_t usedSize,
LinearStream *stream);
BatchBuffer() {}
@ -40,6 +43,7 @@ struct BatchBuffer {
size_t startOffset = 0u;
bool requiresCoherency = false;
bool low_priority = false;
QueueThrottle throttle = QueueThrottle::MEDIUM;
size_t usedSize = 0u;
//only used in drm csr in gem close worker active mode

View File

@ -60,6 +60,7 @@ set(RUNTIME_SRCS_HELPERS_BASE
${CMAKE_CURRENT_SOURCE_DIR}/pipeline_select_helper.h
${CMAKE_CURRENT_SOURCE_DIR}/preamble.h
${CMAKE_CURRENT_SOURCE_DIR}/preamble.inl
${CMAKE_CURRENT_SOURCE_DIR}/properties_helper.h
${CMAKE_CURRENT_SOURCE_DIR}/ptr_math.h
${CMAKE_CURRENT_SOURCE_DIR}/queue_helpers.h
${CMAKE_CURRENT_SOURCE_DIR}/sampler_helpers.h

View File

@ -0,0 +1,34 @@
/*
* Copyright (c) 2018, Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
#pragma once
#include "runtime/helpers/hw_helper.h"
namespace OCLRT {
enum class QueueThrottle {
LOW,
MEDIUM,
HIGH
};
} // namespace OCLRT

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2017, Intel Corporation
* Copyright (c) 2017 - 2018, Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
@ -71,6 +71,7 @@ CompletionStamp &CommandMapUnmap::submit(uint32_t taskLevel, bool terminated) {
dispatchFlags.useSLM = true;
dispatchFlags.guardCommandBufferWithPipeControl = true;
dispatchFlags.lowPriority = cmdQ.getPriority() == QueuePriority::LOW;
dispatchFlags.throttle = cmdQ.getThrottle();
dispatchFlags.preemptionMode = PreemptionHelper::taskPreemptionMode(cmdQ.getDevice(), nullptr);
DEBUG_BREAK_IF(taskLevel >= Event::eventNotReady);
@ -251,6 +252,7 @@ CompletionStamp &CommandComputeKernel::submit(uint32_t taskLevel, bool terminate
dispatchFlags.GSBA32BitRequired = NDRangeKernel;
dispatchFlags.requiresCoherency = requiresCoherency;
dispatchFlags.lowPriority = commandQueue.getPriority() == QueuePriority::LOW;
dispatchFlags.throttle = commandQueue.getThrottle();
dispatchFlags.preemptionMode = PreemptionHelper::taskPreemptionMode(commandQueue.getDevice(), kernel);
DEBUG_BREAK_IF(taskLevel >= Event::eventNotReady);
@ -292,6 +294,7 @@ CompletionStamp &CommandMarker::submit(uint32_t taskLevel, bool terminated) {
dispatchFlags.blocking = blocking;
dispatchFlags.dcFlush = shouldFlushDC(clCommandType, nullptr);
dispatchFlags.lowPriority = cmdQ.getPriority() == QueuePriority::LOW;
dispatchFlags.throttle = cmdQ.getThrottle();
dispatchFlags.preemptionMode = PreemptionHelper::taskPreemptionMode(cmdQ.getDevice(), nullptr);
DEBUG_BREAK_IF(taskLevel >= Event::eventNotReady);

View File

@ -86,7 +86,6 @@ template <typename GfxFamily>
FlushStamp WddmCommandStreamReceiver<GfxFamily>::flush(BatchBuffer &batchBuffer,
EngineType engineType, ResidencyContainer *allocationsForResidency) {
void *commandStreamAddress = ptrOffset(batchBuffer.commandBufferAllocation->getUnderlyingBuffer(), batchBuffer.startOffset);
bool success = true;
if (this->dispatchMode == DispatchMode::ImmediateDispatch) {
makeResident(*batchBuffer.commandBufferAllocation);
@ -104,11 +103,24 @@ FlushStamp WddmCommandStreamReceiver<GfxFamily>::flush(BatchBuffer &batchBuffer,
pHeader->NeedsMidBatchPreEmptionSupport = 0u;
}
pHeader->RequiresCoherency = batchBuffer.requiresCoherency;
pHeader->UmdRequestedSliceState = 0;
pHeader->UmdRequestedSubsliceCount = 0;
pHeader->UmdRequestedEUCount = wddm->getGtSysInfo()->EUCount / wddm->getGtSysInfo()->SubSliceCount;
success = wddm->submit(commandStreamAddress, batchBuffer.usedSize - batchBuffer.startOffset, commandBufferHeader);
const uint32_t maxRequestedSubsliceCount = 7;
switch (batchBuffer.throttle) {
case QueueThrottle::LOW:
pHeader->UmdRequestedSubsliceCount = 1;
break;
case QueueThrottle::MEDIUM:
pHeader->UmdRequestedSubsliceCount = 0;
break;
case QueueThrottle::HIGH:
pHeader->UmdRequestedSubsliceCount = (wddm->getGtSysInfo()->SubSliceCount <= maxRequestedSubsliceCount) ? wddm->getGtSysInfo()->SubSliceCount : 0;
break;
}
wddm->submit(commandStreamAddress, batchBuffer.usedSize - batchBuffer.startOffset, commandBufferHeader);
return wddm->getMonitoredFence().lastSubmittedFence;
}

View File

@ -44,7 +44,8 @@ const char *deviceExtensionsList = "cl_khr_3d_image_writes "
"cl_intel_media_block_io "
"cl_intel_driver_diagnostics "
"cl_intel_device_side_avc_motion_estimation "
"cl_khr_priority_hints ";
"cl_khr_priority_hints "
"cl_khr_throttle_hints ";
std::string getExtensionsList(const HardwareInfo &hwInfo) {
std::string allExtensionsList;

View File

@ -85,6 +85,7 @@ TEST_P(clCreateCommandQueueWithPropertiesTests, returnsSuccessForValidValues) {
CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE);
bool queueOnDeviceUsed = false;
bool priorityHintsUsed = false;
bool throttleHintsUsed = false;
cl_queue_properties *pProp = &properties[0];
if (commandQueueProperties) {
@ -107,6 +108,7 @@ TEST_P(clCreateCommandQueueWithPropertiesTests, returnsSuccessForValidValues) {
if (queueThrottle) {
*pProp++ = CL_QUEUE_THROTTLE_KHR;
*pProp++ = queueThrottle;
throttleHintsUsed = true;
}
*pProp++ = 0;
@ -119,6 +121,10 @@ TEST_P(clCreateCommandQueueWithPropertiesTests, returnsSuccessForValidValues) {
EXPECT_EQ(nullptr, cmdQ);
EXPECT_EQ(retVal, CL_INVALID_QUEUE_PROPERTIES);
return;
} else if (queueOnDeviceUsed && throttleHintsUsed) {
EXPECT_EQ(nullptr, cmdQ);
EXPECT_EQ(retVal, CL_INVALID_QUEUE_PROPERTIES);
return;
} else {
EXPECT_EQ(CL_SUCCESS, retVal);
ASSERT_NE(nullptr, cmdQ);
@ -341,7 +347,7 @@ class clCreateCommandQueueWithPropertiesApiPriority : public clCreateCommandQueu
public ::testing::WithParamInterface<std::pair<uint32_t, QueuePriority>> {
};
TEST_P(clCreateCommandQueueWithPropertiesApiPriority, givenCreateQueueWithWhenPriorityPropertiesThenSetCorrectProirityInternally) {
TEST_P(clCreateCommandQueueWithPropertiesApiPriority, givenCreateQueueWithWhenPriorityPropertiesThenSetCorrectPriorityInternally) {
cl_int retVal = CL_SUCCESS;
cl_queue_properties ondevice[] = {CL_QUEUE_PRIORITY_KHR, GetParam().first, 0};
auto cmdqd = clCreateCommandQueueWithProperties(pContext, devices[0], ondevice, &retVal);
@ -359,4 +365,31 @@ INSTANTIATE_TEST_CASE_P(AllValidPriorities,
clCreateCommandQueueWithPropertiesApiPriority,
::testing::ValuesIn(priorityParams));
std::pair<uint32_t, QueueThrottle> throttleParams[3]{
std::make_pair(CL_QUEUE_THROTTLE_LOW_KHR, QueueThrottle::LOW),
std::make_pair(CL_QUEUE_THROTTLE_MED_KHR, QueueThrottle::MEDIUM),
std::make_pair(CL_QUEUE_THROTTLE_HIGH_KHR, QueueThrottle::HIGH)};
class clCreateCommandQueueWithPropertiesApiThrottle : public clCreateCommandQueueWithPropertiesApi,
public ::testing::WithParamInterface<std::pair<uint32_t, QueueThrottle>> {
};
TEST_P(clCreateCommandQueueWithPropertiesApiThrottle, givenCreateQueueWithWhenThrottlePropertiesThenSetCorrectThrottleInternally) {
cl_int retVal = CL_SUCCESS;
cl_queue_properties ondevice[] = {CL_QUEUE_THROTTLE_KHR, GetParam().first, 0};
auto cmdqd = clCreateCommandQueueWithProperties(pContext, devices[0], ondevice, &retVal);
EXPECT_NE(nullptr, cmdqd);
EXPECT_EQ(retVal, CL_SUCCESS);
auto commandQueue = castToObject<CommandQueue>(cmdqd);
EXPECT_EQ(commandQueue->getThrottle(), GetParam().second);
retVal = clReleaseCommandQueue(cmdqd);
EXPECT_EQ(retVal, CL_SUCCESS);
}
INSTANTIATE_TEST_CASE_P(AllValidThrottleValues,
clCreateCommandQueueWithPropertiesApiThrottle,
::testing::ValuesIn(throttleParams));
} // namespace ULT

View File

@ -68,7 +68,7 @@ struct AUBFixture : public AUBCommandStreamFixture,
CommandStreamReceiverHw<FamilyType>::addBatchBufferEnd(*pCS, nullptr);
CommandStreamReceiverHw<FamilyType>::alignToCacheLine(*pCS);
BatchBuffer batchBuffer{pCS->getGraphicsAllocation(), 0, false, false, pCS->getUsed(), pCS};
BatchBuffer batchBuffer{pCS->getGraphicsAllocation(), 0, false, false, QueueThrottle::MEDIUM, pCS->getUsed(), pCS};
ResidencyContainer allocationsForResidency;
pCommandStreamReceiver->flush(batchBuffer, engineType, &allocationsForResidency);
@ -82,10 +82,10 @@ typedef Test<AUBFixture> AUBcommandstreamTests;
HWTEST_F(AUBcommandstreamTests, testFlushTwice) {
CommandStreamReceiverHw<FamilyType>::addBatchBufferEnd(*pCS, nullptr);
CommandStreamReceiverHw<FamilyType>::alignToCacheLine(*pCS);
BatchBuffer batchBuffer{pCS->getGraphicsAllocation(), 0, false, false, pCS->getUsed(), pCS};
BatchBuffer batchBuffer{pCS->getGraphicsAllocation(), 0, false, false, QueueThrottle::MEDIUM, pCS->getUsed(), pCS};
ResidencyContainer allocationsForResidency;
pCommandStreamReceiver->flush(batchBuffer, EngineType::ENGINE_RCS, &allocationsForResidency);
BatchBuffer batchBuffer2{pCS->getGraphicsAllocation(), 0, false, false, pCS->getUsed(), pCS};
BatchBuffer batchBuffer2{pCS->getGraphicsAllocation(), 0, false, false, QueueThrottle::MEDIUM, pCS->getUsed(), pCS};
ResidencyContainer allocationsForResidency2;
pCommandStreamReceiver->flush(batchBuffer2, EngineType::ENGINE_RCS, &allocationsForResidency);
}

View File

@ -32,14 +32,14 @@ using OCLRT::AUBCommandStreamReceiver;
using OCLRT::AUBCommandStreamReceiverHw;
using OCLRT::BatchBuffer;
using OCLRT::CommandStreamReceiver;
using OCLRT::DebugManager;
using OCLRT::GraphicsAllocation;
using OCLRT::ResidencyContainer;
using OCLRT::HardwareInfo;
using OCLRT::LinearStream;
using OCLRT::MemoryManager;
using OCLRT::ObjectNotResident;
using OCLRT::platformDevices;
using OCLRT::DebugManager;
using OCLRT::ResidencyContainer;
typedef Test<DeviceFixture> AubCommandStreamReceiverTests;
@ -128,7 +128,7 @@ HWTEST_F(AubCommandStreamReceiverTests, flushShouldLeaveProperRingTailAlignment)
auto engineType = OCLRT::ENGINE_RCS;
auto ringTailAlignment = sizeof(uint64_t);
BatchBuffer batchBuffer{cs.getGraphicsAllocation(), 0, false, false, cs.getUsed(), &cs};
BatchBuffer batchBuffer{cs.getGraphicsAllocation(), 0, false, false, QueueThrottle::MEDIUM, cs.getUsed(), &cs};
// First flush typically includes a preamble and chain to command buffer
aubCsr->overrideDispatchPolicy(CommandStreamReceiver::DispatchMode::ImmediateDispatch);
@ -153,7 +153,7 @@ HWTEST_F(AubCommandStreamReceiverTests, flushShouldCallMakeResidentOnCommandBuff
ASSERT_NE(nullptr, commandBuffer);
LinearStream cs(commandBuffer);
BatchBuffer batchBuffer{cs.getGraphicsAllocation(), 0, false, false, cs.getUsed(), &cs};
BatchBuffer batchBuffer{cs.getGraphicsAllocation(), 0, false, false, QueueThrottle::MEDIUM, cs.getUsed(), &cs};
auto engineType = OCLRT::ENGINE_RCS;
EXPECT_EQ(ObjectNotResident, commandBuffer->residencyTaskCount);
@ -181,7 +181,7 @@ HWTEST_F(AubCommandStreamReceiverTests, flushShouldCallMakeResidentOnResidencyAl
ASSERT_NE(nullptr, commandBuffer);
LinearStream cs(commandBuffer);
BatchBuffer batchBuffer{cs.getGraphicsAllocation(), 0, false, false, cs.getUsed(), &cs};
BatchBuffer batchBuffer{cs.getGraphicsAllocation(), 0, false, false, QueueThrottle::MEDIUM, cs.getUsed(), &cs};
auto engineType = OCLRT::ENGINE_RCS;
ResidencyContainer allocationsForResidency = {gfxAllocation};
@ -291,7 +291,7 @@ HWTEST_F(AubCommandStreamReceiverTests, givenAubCommandStreamReceiverInNoneStand
ASSERT_NE(nullptr, commandBuffer);
LinearStream cs(commandBuffer);
BatchBuffer batchBuffer{cs.getGraphicsAllocation(), 0, false, false, cs.getUsed(), &cs};
BatchBuffer batchBuffer{cs.getGraphicsAllocation(), 0, false, false, QueueThrottle::MEDIUM, cs.getUsed(), &cs};
auto engineType = OCLRT::ENGINE_RCS;
EXPECT_EQ(ObjectNotResident, commandBuffer->residencyTaskCount);

View File

@ -3142,3 +3142,96 @@ HWTEST_F(CommandStreamReceiverFlushTaskTests, givenCsrWhenTemporaryAndReusableAl
EXPECT_TRUE(memoryManager->allocationsForReuse.peekIsEmpty());
EXPECT_TRUE(memoryManager->graphicsAllocations.peekIsEmpty());
}
HWTEST_F(CommandStreamReceiverFlushTaskTests, givenDispatchFlagsWithThrottleSetToLowWhenFlushTaskIsCalledThenThrottleIsSetInBatchBuffer) {
typedef typename FamilyType::MI_BATCH_BUFFER_END MI_BATCH_BUFFER_END;
CommandQueueHw<FamilyType> commandQueue(nullptr, pDevice, 0);
auto &commandStream = commandQueue.getCS(4096u);
auto mockCsr = new MockCsrHw2<FamilyType>(*platformDevices[0]);
pDevice->resetCommandStreamReceiver(mockCsr);
mockCsr->overrideDispatchPolicy(CommandStreamReceiver::DispatchMode::BatchedDispatch);
auto mockedSubmissionsAggregator = new mockSubmissionsAggregator();
mockCsr->overrideSubmissionAggregator(mockedSubmissionsAggregator);
DispatchFlags dispatchFlags;
dispatchFlags.throttle = QueueThrottle::LOW;
mockCsr->flushTask(commandStream,
0,
dsh,
ih,
ioh,
ssh,
taskLevel,
dispatchFlags);
auto &cmdBufferList = mockedSubmissionsAggregator->peekCommandBuffers();
auto cmdBuffer = cmdBufferList.peekHead();
EXPECT_EQ(cmdBuffer->batchBuffer.throttle, QueueThrottle::LOW);
}
HWTEST_F(CommandStreamReceiverFlushTaskTests, givenDispatchFlagsWithThrottleSetToMediumWhenFlushTaskIsCalledThenThrottleIsSetInBatchBuffer) {
typedef typename FamilyType::MI_BATCH_BUFFER_END MI_BATCH_BUFFER_END;
CommandQueueHw<FamilyType> commandQueue(nullptr, pDevice, 0);
auto &commandStream = commandQueue.getCS(4096u);
auto mockCsr = new MockCsrHw2<FamilyType>(*platformDevices[0]);
pDevice->resetCommandStreamReceiver(mockCsr);
mockCsr->overrideDispatchPolicy(CommandStreamReceiver::DispatchMode::BatchedDispatch);
auto mockedSubmissionsAggregator = new mockSubmissionsAggregator();
mockCsr->overrideSubmissionAggregator(mockedSubmissionsAggregator);
DispatchFlags dispatchFlags;
dispatchFlags.throttle = QueueThrottle::MEDIUM;
mockCsr->flushTask(commandStream,
0,
dsh,
ih,
ioh,
ssh,
taskLevel,
dispatchFlags);
auto &cmdBufferList = mockedSubmissionsAggregator->peekCommandBuffers();
auto cmdBuffer = cmdBufferList.peekHead();
EXPECT_EQ(cmdBuffer->batchBuffer.throttle, QueueThrottle::MEDIUM);
}
HWTEST_F(CommandStreamReceiverFlushTaskTests, givenDispatchFlagsWithThrottleSetToHighWhenFlushTaskIsCalledThenThrottleIsSetInBatchBuffer) {
typedef typename FamilyType::MI_BATCH_BUFFER_END MI_BATCH_BUFFER_END;
CommandQueueHw<FamilyType> commandQueue(nullptr, pDevice, 0);
auto &commandStream = commandQueue.getCS(4096u);
auto mockCsr = new MockCsrHw2<FamilyType>(*platformDevices[0]);
pDevice->resetCommandStreamReceiver(mockCsr);
mockCsr->overrideDispatchPolicy(CommandStreamReceiver::DispatchMode::BatchedDispatch);
auto mockedSubmissionsAggregator = new mockSubmissionsAggregator();
mockCsr->overrideSubmissionAggregator(mockedSubmissionsAggregator);
DispatchFlags dispatchFlags;
dispatchFlags.throttle = QueueThrottle::HIGH;
mockCsr->flushTask(commandStream,
0,
dsh,
ih,
ioh,
ssh,
taskLevel,
dispatchFlags);
auto &cmdBufferList = mockedSubmissionsAggregator->peekCommandBuffers();
auto cmdBuffer = cmdBufferList.peekHead();
EXPECT_EQ(cmdBuffer->batchBuffer.throttle, QueueThrottle::HIGH);
}

View File

@ -130,7 +130,7 @@ HWTEST_P(CommandStreamReceiverWithAubDumpTest, givenCommandStreamReceiverWithAub
ASSERT_NE(nullptr, commandBuffer);
LinearStream cs(commandBuffer);
BatchBuffer batchBuffer{cs.getGraphicsAllocation(), 0, false, false, cs.getUsed(), &cs};
BatchBuffer batchBuffer{cs.getGraphicsAllocation(), 0, false, false, QueueThrottle::MEDIUM, cs.getUsed(), &cs};
auto engineType = OCLRT::ENGINE_RCS;
ResidencyContainer allocationsForResidency;

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2017, Intel Corporation
* Copyright (c) 2017 - 2018, Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
@ -386,7 +386,7 @@ TEST(SubmissionsAggregator, givenTwoCommandBufferWhereSecondContainsTheFirstComm
EXPECT_EQ(12u, totalUsedSize);
}
TEST(SubmissionsAggregator, givenCommandBuffersRequiringDifferenctCoherencySettingWhenAggregateIsCalledThenTheyAreNotAgggregated) {
TEST(SubmissionsAggregator, givenCommandBuffersRequiringDifferentCoherencySettingWhenAggregateIsCalledThenTheyAreNotAgggregated) {
MockSubmissionAggregator submissionsAggregator;
CommandBuffer *cmdBuffer = new CommandBuffer;
@ -414,7 +414,35 @@ TEST(SubmissionsAggregator, givenCommandBuffersRequiringDifferenctCoherencySetti
EXPECT_EQ(1u, cmdBuffer->inspectionId);
}
TEST(SubmissionsAggregator, givenCommandBuffersRequiringDifferenctPrioritySettingWhenAggregateIsCalledThenTheyAreNotAgggregated) {
TEST(SubmissionsAggregator, givenCommandBuffersRequiringDifferentThrottleSettingWhenAggregateIsCalledThenTheyAreNotAgggregated) {
MockSubmissionAggregator submissionsAggregator;
CommandBuffer *cmdBuffer = new CommandBuffer;
CommandBuffer *cmdBuffer2 = new CommandBuffer;
GraphicsAllocation alloc1(nullptr, 1);
GraphicsAllocation alloc7(nullptr, 7);
cmdBuffer->batchBuffer.throttle = QueueThrottle::LOW;
cmdBuffer2->batchBuffer.throttle = QueueThrottle::MEDIUM;
cmdBuffer->surfaces.push_back(&alloc1);
cmdBuffer2->surfaces.push_back(&alloc7);
submissionsAggregator.recordCommandBuffer(cmdBuffer);
submissionsAggregator.recordCommandBuffer(cmdBuffer2);
ResourcePackage resourcePackage;
size_t totalUsedSize = 0;
size_t totalMemoryBudget = 200;
submissionsAggregator.aggregateCommandBuffers(resourcePackage, totalUsedSize, totalMemoryBudget);
EXPECT_EQ(1u, totalUsedSize);
EXPECT_EQ(1u, resourcePackage.size());
EXPECT_NE(cmdBuffer->inspectionId, cmdBuffer2->inspectionId);
EXPECT_EQ(1u, cmdBuffer->inspectionId);
}
TEST(SubmissionsAggregator, givenCommandBuffersRequiringDifferentPrioritySettingWhenAggregateIsCalledThenTheyAreNotAgggregated) {
MockSubmissionAggregator submissionsAggregator;
CommandBuffer *cmdBuffer = new CommandBuffer;

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2017, Intel Corporation
* Copyright (c) 2017 - 2018, Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
@ -86,7 +86,7 @@ TEST_F(Tbx_command_stream, DISABLED_flush) {
memset(buffer, 0, 4096);
LinearStream cs(buffer, 4096);
size_t startOffset = 0;
BatchBuffer batchBuffer{cs.getGraphicsAllocation(), startOffset, false, false, cs.getUsed(), &cs};
BatchBuffer batchBuffer{cs.getGraphicsAllocation(), startOffset, false, false, QueueThrottle::MEDIUM, cs.getUsed(), &cs};
pCommandStreamReceiver->flush(batchBuffer, EngineType::ENGINE_RCS, nullptr);
}
@ -98,7 +98,7 @@ HWTEST_F(Tbx_command_stream, DISABLED_flushUntilTailRCSLargerThanSizeRCS) {
TbxCommandStreamReceiverHw<FamilyType> *tbxCsr = (TbxCommandStreamReceiverHw<FamilyType> *)pCommandStreamReceiver;
auto &engineInfo = tbxCsr->engineInfoTable[EngineType::ENGINE_RCS];
BatchBuffer batchBuffer{cs.getGraphicsAllocation(), startOffset, false, false, cs.getUsed(), &cs};
BatchBuffer batchBuffer{cs.getGraphicsAllocation(), startOffset, false, false, QueueThrottle::MEDIUM, cs.getUsed(), &cs};
pCommandStreamReceiver->flush(batchBuffer, EngineType::ENGINE_RCS, nullptr);
auto size = engineInfo.sizeRCS;
engineInfo.sizeRCS = 64;

View File

@ -512,6 +512,13 @@ TEST(Device_GetCaps, deviceReportsPriorityHintsExtension) {
EXPECT_THAT(caps.deviceExtensions, testing::HasSubstr(std::string("cl_khr_priority_hints")));
}
TEST(Device_GetCaps, deviceReportsThrottleHintsExtension) {
auto device = std::unique_ptr<Device>(DeviceHelper<>::create(platformDevices[0]));
const auto &caps = device->getDeviceInfo();
EXPECT_THAT(caps.deviceExtensions, testing::HasSubstr(std::string("cl_khr_throttle_hints")));
}
TEST(Device_GetCaps, givenDeviceThatDoesntHaveFp64ThenExtensionIsNotReported) {
HardwareInfo nonFp64Device = *platformDevices[0];
nonFp64Device.capabilityTable.ftrSupportsFP64 = false;

View File

@ -203,7 +203,7 @@ TEST_F(DrmCommandStreamTest, Flush) {
csr->addBatchBufferEnd(cs, nullptr);
csr->alignToCacheLine(cs);
BatchBuffer batchBuffer{cs.getGraphicsAllocation(), 0, false, false, cs.getUsed(), &cs};
BatchBuffer batchBuffer{cs.getGraphicsAllocation(), 0, false, false, QueueThrottle::MEDIUM, cs.getUsed(), &cs};
auto flushStamp = csr->flush(batchBuffer, EngineType::ENGINE_RCS, nullptr);
EXPECT_EQ(boHandle, flushStamp);
EXPECT_EQ(cs.getBase(), nullptr);
@ -231,7 +231,7 @@ TEST_F(DrmCommandStreamTest, FlushWithLowPriorityContext) {
csr->addBatchBufferEnd(cs, nullptr);
csr->alignToCacheLine(cs);
BatchBuffer batchBuffer{cs.getGraphicsAllocation(), 0, false, true, cs.getUsed(), &cs};
BatchBuffer batchBuffer{cs.getGraphicsAllocation(), 0, false, true, QueueThrottle::MEDIUM, cs.getUsed(), &cs};
csr->flush(batchBuffer, EngineType::ENGINE_RCS, nullptr);
EXPECT_EQ(cs.getBase(), nullptr);
EXPECT_EQ(cs.getGraphicsAllocation(), nullptr);
@ -255,7 +255,7 @@ TEST_F(DrmCommandStreamTest, FlushInvalidAddress) {
csr->addBatchBufferEnd(cs, nullptr);
csr->alignToCacheLine(cs);
BatchBuffer batchBuffer{cs.getGraphicsAllocation(), 0, false, false, cs.getUsed(), &cs};
BatchBuffer batchBuffer{cs.getGraphicsAllocation(), 0, false, false, QueueThrottle::MEDIUM, cs.getUsed(), &cs};
csr->flush(batchBuffer, EngineType::ENGINE_RCS, nullptr);
delete[] commandBuffer;
}
@ -280,7 +280,7 @@ TEST_F(DrmCommandStreamTest, FlushMultipleTimes) {
LinearStream cs(commandBuffer);
csr->addBatchBufferEnd(cs, nullptr);
csr->alignToCacheLine(cs);
BatchBuffer batchBuffer{cs.getGraphicsAllocation(), 0, false, false, cs.getUsed(), &cs};
BatchBuffer batchBuffer{cs.getGraphicsAllocation(), 0, false, false, QueueThrottle::MEDIUM, cs.getUsed(), &cs};
csr->flush(batchBuffer, EngineType::ENGINE_RCS, nullptr);
EXPECT_EQ(0u, cs.getAvailableSpace());
}
@ -308,7 +308,7 @@ TEST_F(DrmCommandStreamTest, FlushNotEmptyBB) {
csr->addBatchBufferEnd(cs, nullptr);
csr->alignToCacheLine(cs);
BatchBuffer batchBuffer{cs.getGraphicsAllocation(), 0, false, false, cs.getUsed(), &cs};
BatchBuffer batchBuffer{cs.getGraphicsAllocation(), 0, false, false, QueueThrottle::MEDIUM, cs.getUsed(), &cs};
csr->flush(batchBuffer, EngineType::ENGINE_RCS, nullptr);
}
@ -333,7 +333,7 @@ TEST_F(DrmCommandStreamTest, FlushNotEmptyNotPaddedBB) {
csr->addBatchBufferEnd(cs, nullptr);
csr->alignToCacheLine(cs);
BatchBuffer batchBuffer{cs.getGraphicsAllocation(), 0, false, false, cs.getUsed(), &cs};
BatchBuffer batchBuffer{cs.getGraphicsAllocation(), 0, false, false, QueueThrottle::MEDIUM, cs.getUsed(), &cs};
csr->flush(batchBuffer, EngineType::ENGINE_RCS, nullptr);
}
@ -365,7 +365,7 @@ TEST_F(DrmCommandStreamTest, FlushNotAligned) {
csr->addBatchBufferEnd(cs, nullptr);
csr->alignToCacheLine(cs);
BatchBuffer batchBuffer{cs.getGraphicsAllocation(), 4, false, false, cs.getUsed(), &cs};
BatchBuffer batchBuffer{cs.getGraphicsAllocation(), 4, false, false, QueueThrottle::MEDIUM, cs.getUsed(), &cs};
csr->flush(batchBuffer, EngineType::ENGINE_RCS, nullptr);
}
@ -419,7 +419,7 @@ TEST_F(DrmCommandStreamTest, FlushCheckFlags) {
csr->makeResident(allocation2);
csr->addBatchBufferEnd(cs, nullptr);
csr->alignToCacheLine(cs);
BatchBuffer batchBuffer{cs.getGraphicsAllocation(), 0, false, false, cs.getUsed(), &cs};
BatchBuffer batchBuffer{cs.getGraphicsAllocation(), 0, false, false, QueueThrottle::MEDIUM, cs.getUsed(), &cs};
csr->flush(batchBuffer, EngineType::ENGINE_RCS, nullptr);
}
@ -453,7 +453,7 @@ TEST_F(DrmCommandStreamTest, CheckDrmFree) {
csr->makeResident(allocation);
csr->addBatchBufferEnd(cs, nullptr);
csr->alignToCacheLine(cs);
BatchBuffer batchBuffer{cs.getGraphicsAllocation(), 4, false, false, cs.getUsed(), &cs};
BatchBuffer batchBuffer{cs.getGraphicsAllocation(), 4, false, false, QueueThrottle::MEDIUM, cs.getUsed(), &cs};
csr->flush(batchBuffer, EngineType::ENGINE_RCS, nullptr);
}
@ -498,7 +498,7 @@ TEST_F(DrmCommandStreamTest, CheckDrmFreeCloseFailed) {
csr->makeResident(allocation);
csr->addBatchBufferEnd(cs, nullptr);
csr->alignToCacheLine(cs);
BatchBuffer batchBuffer{cs.getGraphicsAllocation(), 4, false, false, cs.getUsed(), &cs};
BatchBuffer batchBuffer{cs.getGraphicsAllocation(), 4, false, false, QueueThrottle::MEDIUM, cs.getUsed(), &cs};
csr->flush(batchBuffer, EngineType::ENGINE_RCS, nullptr);
}
@ -804,7 +804,7 @@ TEST_F(DrmCommandStreamGemWorkerTests, givenCommandStreamWhenItIsFlushedWithGemC
csr->alignToCacheLine(cs);
auto storedBase = cs.getBase();
auto storedGraphicsAllocation = cs.getGraphicsAllocation();
BatchBuffer batchBuffer{cs.getGraphicsAllocation(), 0, false, false, cs.getUsed(), &cs};
BatchBuffer batchBuffer{cs.getGraphicsAllocation(), 0, false, false, QueueThrottle::MEDIUM, cs.getUsed(), &cs};
csr->flush(batchBuffer, EngineType::ENGINE_RCS, nullptr);
EXPECT_EQ(cs.getBase(), storedBase);
EXPECT_EQ(cs.getGraphicsAllocation(), storedGraphicsAllocation);
@ -837,7 +837,7 @@ TEST_F(DrmCommandStreamGemWorkerTests, givenTaskThatRequiresLargeResourceCountWh
csr->addBatchBufferEnd(cs, nullptr);
csr->alignToCacheLine(cs);
BatchBuffer batchBuffer{cs.getGraphicsAllocation(), 0, false, false, cs.getUsed(), &cs};
BatchBuffer batchBuffer{cs.getGraphicsAllocation(), 0, false, false, QueueThrottle::MEDIUM, cs.getUsed(), &cs};
csr->flush(batchBuffer, EngineType::ENGINE_RCS, nullptr);
EXPECT_EQ(11u, this->mock->execBuffer.buffer_count);
@ -884,7 +884,7 @@ TEST_F(DrmCommandStreamGemWorkerTests, givenCommandStreamWithDuplicatesWhenItIsF
csr->alignToCacheLine(cs);
auto storedBase = cs.getBase();
auto storedGraphicsAllocation = cs.getGraphicsAllocation();
BatchBuffer batchBuffer{cs.getGraphicsAllocation(), 0, false, false, cs.getUsed(), &cs};
BatchBuffer batchBuffer{cs.getGraphicsAllocation(), 0, false, false, QueueThrottle::MEDIUM, cs.getUsed(), &cs};
csr->flush(batchBuffer, EngineType::ENGINE_RCS, nullptr);
EXPECT_EQ(cs.getBase(), storedBase);
EXPECT_EQ(cs.getGraphicsAllocation(), storedGraphicsAllocation);
@ -918,7 +918,7 @@ TEST_F(DrmCommandStreamBatchingTests, givenCSRWhenFlushIsCalledThenProperFlagsAr
csr->addBatchBufferEnd(cs, nullptr);
csr->alignToCacheLine(cs);
BatchBuffer batchBuffer{cs.getGraphicsAllocation(), 0, false, false, cs.getUsed(), &cs};
BatchBuffer batchBuffer{cs.getGraphicsAllocation(), 0, false, false, QueueThrottle::MEDIUM, cs.getUsed(), &cs};
csr->flush(batchBuffer, EngineType::ENGINE_RCS, nullptr);
EXPECT_EQ(3, this->mock->ioctl_cnt);
@ -1360,7 +1360,7 @@ TEST_F(DrmCommandStreamLeaksTest, Flush) {
csr->addBatchBufferEnd(cs, nullptr);
csr->alignToCacheLine(cs);
BatchBuffer batchBuffer{cs.getGraphicsAllocation(), 0, false, false, cs.getUsed(), &cs};
BatchBuffer batchBuffer{cs.getGraphicsAllocation(), 0, false, false, QueueThrottle::MEDIUM, cs.getUsed(), &cs};
csr->flush(batchBuffer, EngineType::ENGINE_RCS, nullptr);
EXPECT_EQ(cs.getBase(), nullptr);
EXPECT_EQ(cs.getGraphicsAllocation(), nullptr);
@ -1427,7 +1427,7 @@ TEST_F(DrmCommandStreamLeaksTest, ClearResidencyWhenFlushCalled) {
EXPECT_FALSE(isResident(allocation1->getBO()));
EXPECT_FALSE(isResident(allocation2->getBO()));
BatchBuffer batchBuffer{cs.getGraphicsAllocation(), 0, false, false, cs.getUsed(), &cs};
BatchBuffer batchBuffer{cs.getGraphicsAllocation(), 0, false, false, QueueThrottle::MEDIUM, cs.getUsed(), &cs};
csr->flush(batchBuffer, EngineType::ENGINE_RCS, nullptr);
EXPECT_EQ(cs.getBase(), nullptr);
EXPECT_EQ(cs.getGraphicsAllocation(), nullptr);
@ -1458,7 +1458,7 @@ TEST_F(DrmCommandStreamLeaksTest, FlushMultipleTimes) {
LinearStream cs(commandBuffer);
csr->addBatchBufferEnd(cs, nullptr);
csr->alignToCacheLine(cs);
BatchBuffer batchBuffer{cs.getGraphicsAllocation(), 0, false, false, cs.getUsed(), &cs};
BatchBuffer batchBuffer{cs.getGraphicsAllocation(), 0, false, false, QueueThrottle::MEDIUM, cs.getUsed(), &cs};
csr->flush(batchBuffer, EngineType::ENGINE_RCS, nullptr);
commandBuffer = mm->allocateGraphicsMemory(1024, 4096);
@ -1467,7 +1467,7 @@ TEST_F(DrmCommandStreamLeaksTest, FlushMultipleTimes) {
cs.replaceGraphicsAllocation(commandBuffer);
csr->addBatchBufferEnd(cs, nullptr);
csr->alignToCacheLine(cs);
BatchBuffer batchBuffer2{cs.getGraphicsAllocation(), 8, false, false, cs.getUsed(), &cs};
BatchBuffer batchBuffer2{cs.getGraphicsAllocation(), 8, false, false, QueueThrottle::MEDIUM, cs.getUsed(), &cs};
csr->flush(batchBuffer2, EngineType::ENGINE_RCS, nullptr);
auto allocation = mm->allocateGraphicsMemory(1024, 4096);
@ -1484,7 +1484,7 @@ TEST_F(DrmCommandStreamLeaksTest, FlushMultipleTimes) {
cs.replaceGraphicsAllocation(commandBuffer);
csr->addBatchBufferEnd(cs, nullptr);
csr->alignToCacheLine(cs);
BatchBuffer batchBuffer3{cs.getGraphicsAllocation(), 16, false, false, cs.getUsed(), &cs};
BatchBuffer batchBuffer3{cs.getGraphicsAllocation(), 16, false, false, QueueThrottle::MEDIUM, cs.getUsed(), &cs};
csr->flush(batchBuffer3, EngineType::ENGINE_RCS, nullptr);
csr->makeSurfacePackNonResident(nullptr);
mm->freeGraphicsMemory(allocation);
@ -1496,7 +1496,7 @@ TEST_F(DrmCommandStreamLeaksTest, FlushMultipleTimes) {
cs.replaceGraphicsAllocation(commandBuffer);
csr->addBatchBufferEnd(cs, nullptr);
csr->alignToCacheLine(cs);
BatchBuffer batchBuffer4{cs.getGraphicsAllocation(), 24, false, false, cs.getUsed(), &cs};
BatchBuffer batchBuffer4{cs.getGraphicsAllocation(), 24, false, false, QueueThrottle::MEDIUM, cs.getUsed(), &cs};
csr->flush(batchBuffer4, EngineType::ENGINE_RCS, nullptr);
}
@ -1511,7 +1511,7 @@ TEST_F(DrmCommandStreamLeaksTest, FlushNotEmptyBB) {
csr->addBatchBufferEnd(cs, nullptr);
csr->alignToCacheLine(cs);
BatchBuffer batchBuffer{cs.getGraphicsAllocation(), 0, false, false, cs.getUsed(), &cs};
BatchBuffer batchBuffer{cs.getGraphicsAllocation(), 0, false, false, QueueThrottle::MEDIUM, cs.getUsed(), &cs};
csr->flush(batchBuffer, EngineType::ENGINE_RCS, nullptr);
}
@ -1526,7 +1526,7 @@ TEST_F(DrmCommandStreamLeaksTest, FlushNotEmptyNotPaddedBB) {
csr->addBatchBufferEnd(cs, nullptr);
csr->alignToCacheLine(cs);
BatchBuffer batchBuffer{cs.getGraphicsAllocation(), 0, false, false, cs.getUsed(), &cs};
BatchBuffer batchBuffer{cs.getGraphicsAllocation(), 0, false, false, QueueThrottle::MEDIUM, cs.getUsed(), &cs};
csr->flush(batchBuffer, EngineType::ENGINE_RCS, nullptr);
}
@ -1541,7 +1541,7 @@ TEST_F(DrmCommandStreamLeaksTest, FlushNotAligned) {
csr->addBatchBufferEnd(cs, nullptr);
csr->alignToCacheLine(cs);
BatchBuffer batchBuffer{cs.getGraphicsAllocation(), 4, false, false, cs.getUsed(), &cs};
BatchBuffer batchBuffer{cs.getGraphicsAllocation(), 4, false, false, QueueThrottle::MEDIUM, cs.getUsed(), &cs};
csr->flush(batchBuffer, EngineType::ENGINE_RCS, nullptr);
}
@ -1559,7 +1559,7 @@ TEST_F(DrmCommandStreamLeaksTest, CheckDrmFree) {
csr->makeResident(*allocation);
csr->addBatchBufferEnd(cs, nullptr);
csr->alignToCacheLine(cs);
BatchBuffer batchBuffer{cs.getGraphicsAllocation(), 4, false, false, cs.getUsed(), &cs};
BatchBuffer batchBuffer{cs.getGraphicsAllocation(), 4, false, false, QueueThrottle::MEDIUM, cs.getUsed(), &cs};
csr->flush(batchBuffer, EngineType::ENGINE_RCS, nullptr);
csr->makeNonResident(*allocation);
mm->freeGraphicsMemory(allocation);

View File

@ -162,7 +162,7 @@ TEST_F(WddmCommandStreamTest, Flush) {
GraphicsAllocation *commandBuffer = memManager->allocateGraphicsMemory(4096, 4096);
ASSERT_NE(nullptr, commandBuffer);
LinearStream cs(commandBuffer);
BatchBuffer batchBuffer{cs.getGraphicsAllocation(), 0, false, false, cs.getUsed(), &cs};
BatchBuffer batchBuffer{cs.getGraphicsAllocation(), 0, false, false, QueueThrottle::MEDIUM, cs.getUsed(), &cs};
auto flushStamp = csr->flush(batchBuffer, EngineType::ENGINE_RCS, nullptr);
EXPECT_EQ(1u, wddm->submitResult.called);
@ -178,7 +178,7 @@ TEST_F(WddmCommandStreamTest, FlushWithOffset) {
ASSERT_NE(nullptr, commandBuffer);
LinearStream cs(commandBuffer);
BatchBuffer batchBuffer{cs.getGraphicsAllocation(), offset, false, false, cs.getUsed(), &cs};
BatchBuffer batchBuffer{cs.getGraphicsAllocation(), offset, false, false, QueueThrottle::MEDIUM, cs.getUsed(), &cs};
csr->flush(batchBuffer, EngineType::ENGINE_RCS, nullptr);
EXPECT_EQ(1u, wddm->submitResult.called);
EXPECT_TRUE(wddm->submitResult.success);
@ -192,7 +192,7 @@ TEST_F(WddmCommandStreamTest, givenWdmmWhenSubmitIsCalledThenCoherencyRequiredFl
ASSERT_NE(nullptr, commandBuffer);
LinearStream cs(commandBuffer);
BatchBuffer batchBuffer{cs.getGraphicsAllocation(), 0, false, false, cs.getUsed(), &cs};
BatchBuffer batchBuffer{cs.getGraphicsAllocation(), 0, false, false, QueueThrottle::MEDIUM, cs.getUsed(), &cs};
csr->flush(batchBuffer, EngineType::ENGINE_RCS, nullptr);
auto commandHeader = wddm->submitResult.commandHeaderSubmitted;
@ -210,7 +210,7 @@ TEST_F(WddmCommandStreamTest, givenWdmmWhenSubmitIsCalledAndPreemptionIsDisabled
ASSERT_NE(nullptr, commandBuffer);
LinearStream cs(commandBuffer);
BatchBuffer batchBuffer{cs.getGraphicsAllocation(), 0, false, false, cs.getUsed(), &cs};
BatchBuffer batchBuffer{cs.getGraphicsAllocation(), 0, false, false, QueueThrottle::MEDIUM, cs.getUsed(), &cs};
csr->flush(batchBuffer, EngineType::ENGINE_RCS, nullptr);
auto commandHeader = wddm->submitResult.commandHeaderSubmitted;
@ -228,7 +228,7 @@ TEST_F(WddmCommandStreamTest, givenWdmmWhenSubmitIsCalledAndPreemptionIsEnabledT
ASSERT_NE(nullptr, commandBuffer);
LinearStream cs(commandBuffer);
BatchBuffer batchBuffer{cs.getGraphicsAllocation(), 0, false, false, cs.getUsed(), &cs};
BatchBuffer batchBuffer{cs.getGraphicsAllocation(), 0, false, false, QueueThrottle::MEDIUM, cs.getUsed(), &cs};
csr->flush(batchBuffer, EngineType::ENGINE_RCS, nullptr);
auto commandHeader = wddm->submitResult.commandHeaderSubmitted;
@ -239,6 +239,60 @@ TEST_F(WddmCommandStreamTest, givenWdmmWhenSubmitIsCalledAndPreemptionIsEnabledT
memManager->freeGraphicsMemory(commandBuffer);
}
TEST_F(WddmCommandStreamTest, givenWdmmWhenSubmitIsCalledAndThrottleIsToLowThenSetHeaderFieldsProperly) {
GraphicsAllocation *commandBuffer = memManager->allocateGraphicsMemory(4096, 4096);
ASSERT_NE(nullptr, commandBuffer);
LinearStream cs(commandBuffer);
BatchBuffer batchBuffer{cs.getGraphicsAllocation(), 0, false, false, QueueThrottle::LOW, cs.getUsed(), &cs};
csr->flush(batchBuffer, EngineType::ENGINE_RCS, nullptr);
auto commandHeader = wddm->submitResult.commandHeaderSubmitted;
COMMAND_BUFFER_HEADER *pHeader = reinterpret_cast<COMMAND_BUFFER_HEADER *>(commandHeader);
EXPECT_EQ(0, pHeader->UmdRequestedSliceState);
EXPECT_EQ(1, pHeader->UmdRequestedSubsliceCount);
EXPECT_EQ(wddm->getGtSysInfo()->EUCount / wddm->getGtSysInfo()->SubSliceCount, pHeader->UmdRequestedEUCount);
memManager->freeGraphicsMemory(commandBuffer);
}
TEST_F(WddmCommandStreamTest, givenWdmmWhenSubmitIsCalledAndThrottleIsToMediumThenSetHeaderFieldsProperly) {
GraphicsAllocation *commandBuffer = memManager->allocateGraphicsMemory(4096, 4096);
ASSERT_NE(nullptr, commandBuffer);
LinearStream cs(commandBuffer);
BatchBuffer batchBuffer{cs.getGraphicsAllocation(), 0, false, false, QueueThrottle::MEDIUM, cs.getUsed(), &cs};
csr->flush(batchBuffer, EngineType::ENGINE_RCS, nullptr);
auto commandHeader = wddm->submitResult.commandHeaderSubmitted;
COMMAND_BUFFER_HEADER *pHeader = reinterpret_cast<COMMAND_BUFFER_HEADER *>(commandHeader);
EXPECT_EQ(0, pHeader->UmdRequestedSliceState);
EXPECT_EQ(0, pHeader->UmdRequestedSubsliceCount);
EXPECT_EQ(wddm->getGtSysInfo()->EUCount / wddm->getGtSysInfo()->SubSliceCount, pHeader->UmdRequestedEUCount);
memManager->freeGraphicsMemory(commandBuffer);
}
TEST_F(WddmCommandStreamTest, givenWdmmWhenSubmitIsCalledAndThrottleIsToHighThenSetHeaderFieldsProperly) {
GraphicsAllocation *commandBuffer = memManager->allocateGraphicsMemory(4096, 4096);
ASSERT_NE(nullptr, commandBuffer);
LinearStream cs(commandBuffer);
BatchBuffer batchBuffer{cs.getGraphicsAllocation(), 0, false, false, QueueThrottle::HIGH, cs.getUsed(), &cs};
csr->flush(batchBuffer, EngineType::ENGINE_RCS, nullptr);
auto commandHeader = wddm->submitResult.commandHeaderSubmitted;
COMMAND_BUFFER_HEADER *pHeader = reinterpret_cast<COMMAND_BUFFER_HEADER *>(commandHeader);
const uint32_t maxRequestedSubsliceCount = 7;
EXPECT_EQ(0, pHeader->UmdRequestedSliceState);
EXPECT_EQ((wddm->getGtSysInfo()->SubSliceCount <= maxRequestedSubsliceCount) ? wddm->getGtSysInfo()->SubSliceCount : 0, pHeader->UmdRequestedSubsliceCount);
EXPECT_EQ(wddm->getGtSysInfo()->EUCount / wddm->getGtSysInfo()->SubSliceCount, pHeader->UmdRequestedEUCount);
memManager->freeGraphicsMemory(commandBuffer);
}
TEST_F(WddmCommandStreamTest, makeResident) {
WddmMemoryManager *wddmMM = reinterpret_cast<WddmMemoryManager *>(memManager);
@ -409,7 +463,7 @@ TEST_F(WddmCommandStreamMockGdiTest, FlushCallsWddmMakeResidentForResidencyAlloc
gdi.getMakeResidentArg().NumAllocations = 0;
BatchBuffer batchBuffer{cs.getGraphicsAllocation(), 0, false, false, cs.getUsed(), &cs};
BatchBuffer batchBuffer{cs.getGraphicsAllocation(), 0, false, false, QueueThrottle::MEDIUM, cs.getUsed(), &cs};
csr->flush(batchBuffer, EngineType::ENGINE_RCS, nullptr);
EXPECT_NE(0u, gdi.getMakeResidentArg().NumAllocations);