Add logic to change queue slice count

Change-Id: I4f5ccb4ecb290e1a05e3a312b9fa2a5d9c5c17f7
Signed-off-by: Katarzyna Cencelewska <katarzyna.cencelewska@intel.com>
This commit is contained in:
Katarzyna Cencelewska
2019-08-21 03:50:47 -07:00
committed by sys_ocldev
parent 7fa3865f0b
commit 260759268a
33 changed files with 599 additions and 119 deletions

View File

@@ -240,6 +240,7 @@ class CommandStreamReceiver {
uint32_t lastSentNumGrfRequired = GrfConfig::DefaultGrfNumber;
uint32_t requiredThreadArbitrationPolicy = ThreadArbitrationPolicy::RoundRobin;
uint32_t lastSentThreadArbitrationPolicy = ThreadArbitrationPolicy::NotPresent;
uint64_t lastSentSliceCount = QueueSliceCount::defaultSliceCount;
uint32_t requiredScratchSize = 0;
uint32_t requiredPrivateScratchSize = 0;

View File

@@ -447,7 +447,7 @@ CompletionStamp CommandStreamReceiverHw<GfxFamily>::flushTask(
size_t startOffset = submitCommandStreamFromCsr ? commandStreamStartCSR : commandStreamStartTask;
auto &streamToSubmit = submitCommandStreamFromCsr ? commandStreamCSR : commandStreamTask;
BatchBuffer batchBuffer{streamToSubmit.getGraphicsAllocation(), startOffset, chainedBatchBufferStartOffset, chainedBatchBuffer, dispatchFlags.requiresCoherency, dispatchFlags.lowPriority, dispatchFlags.throttle, streamToSubmit.getUsed(), &streamToSubmit};
BatchBuffer batchBuffer{streamToSubmit.getGraphicsAllocation(), startOffset, chainedBatchBufferStartOffset, chainedBatchBuffer, dispatchFlags.requiresCoherency, dispatchFlags.lowPriority, dispatchFlags.throttle, dispatchFlags.sliceCount, streamToSubmit.getUsed(), &streamToSubmit};
if (submitCSR | submitTask) {
if (this->dispatchMode == DispatchMode::ImmediateDispatch) {
@@ -801,7 +801,7 @@ void CommandStreamReceiverHw<GfxFamily>::blitBuffer(const BlitProperties &blitPr
makeResident(*commandStream.getGraphicsAllocation());
makeResident(*tagAllocation);
BatchBuffer batchBuffer{commandStream.getGraphicsAllocation(), commandStreamStart, 0, nullptr, false, false, QueueThrottle::MEDIUM,
BatchBuffer batchBuffer{commandStream.getGraphicsAllocation(), commandStreamStart, 0, nullptr, false, false, QueueThrottle::MEDIUM, QueueSliceCount::defaultSliceCount,
commandStream.getUsed(), &commandStream};
flushStamp->setStamp(flush(batchBuffer, getResidencyAllocations()));

View File

@@ -31,6 +31,11 @@ constexpr auto csOverfetchSize = MemoryConstants::pageSize;
namespace TimeoutControls {
constexpr int64_t maxTimeout = std::numeric_limits<int64_t>::max();
}
namespace QueueSliceCount {
constexpr uint64_t defaultSliceCount = 0;
}
namespace L3CachingSettings {
constexpr uint32_t l3CacheOn = 0u;
constexpr uint32_t l3CacheOff = 1u;
@@ -41,7 +46,7 @@ struct DispatchFlags {
DispatchFlags() = delete;
DispatchFlags(CsrDependencies csrDependencies, PipelineSelectArgs pipelineSelectArgs, FlushStampTrackingObj *flushStampReference,
QueueThrottle throttle, PreemptionMode preemptionMode, uint32_t numGrfRequired,
uint32_t l3CacheSettings, bool blocking, bool dcFlush,
uint32_t l3CacheSettings, uint64_t sliceCount, bool blocking, bool dcFlush,
bool useSLM, bool guardCommandBufferWithPipeControl, bool gsba32BitRequired,
bool requiresCoherency, bool lowPriority, bool implicitFlush,
bool outOfOrderExecutionAllowed, bool multiEngineQueue, bool epilogueRequired) : csrDependencies(csrDependencies),
@@ -51,6 +56,7 @@ struct DispatchFlags {
preemptionMode(preemptionMode),
numGrfRequired(numGrfRequired),
l3CacheSettings(l3CacheSettings),
sliceCount(sliceCount),
blocking(blocking),
dcFlush(dcFlush),
useSLM(useSLM),
@@ -69,6 +75,7 @@ struct DispatchFlags {
PreemptionMode preemptionMode = PreemptionMode::Disabled;
uint32_t numGrfRequired = GrfConfig::DefaultGrfNumber;
uint32_t l3CacheSettings = L3CachingSettings::l3CacheOn;
uint64_t sliceCount = QueueSliceCount::defaultSliceCount;
bool blocking = false;
bool dcFlush = false;
bool useSLM = false;

View File

@@ -53,6 +53,10 @@ void NEO::SubmissionAggregator::aggregateCommandBuffers(ResourcePackage &resourc
return;
}
if (primaryCommandBuffer->next->batchBuffer.sliceCount != primaryCommandBuffer->batchBuffer.sliceCount) {
return;
}
auto nextCommandBuffer = primaryCommandBuffer->next;
ResourcePackage newResources;
@@ -94,8 +98,16 @@ void NEO::SubmissionAggregator::aggregateCommandBuffers(ResourcePackage &resourc
}
}
NEO::BatchBuffer::BatchBuffer(GraphicsAllocation *commandBufferAllocation, size_t startOffset, size_t chainedBatchBufferStartOffset, GraphicsAllocation *chainedBatchBuffer, bool requiresCoherency, bool lowPriority, QueueThrottle throttle, size_t usedSize, LinearStream *stream) : commandBufferAllocation(commandBufferAllocation), startOffset(startOffset), chainedBatchBufferStartOffset(chainedBatchBufferStartOffset), chainedBatchBuffer(chainedBatchBuffer), requiresCoherency(requiresCoherency), low_priority(lowPriority), throttle(throttle), usedSize(usedSize), stream(stream) {
}
NEO::BatchBuffer::BatchBuffer(GraphicsAllocation *commandBufferAllocation, size_t startOffset,
size_t chainedBatchBufferStartOffset, GraphicsAllocation *chainedBatchBuffer,
bool requiresCoherency, bool lowPriority,
QueueThrottle throttle, uint64_t sliceCount,
size_t usedSize, LinearStream *stream)
: commandBufferAllocation(commandBufferAllocation), startOffset(startOffset),
chainedBatchBufferStartOffset(chainedBatchBufferStartOffset), chainedBatchBuffer(chainedBatchBuffer),
requiresCoherency(requiresCoherency), low_priority(lowPriority),
throttle(throttle), sliceCount(sliceCount),
usedSize(usedSize), stream(stream) {}
NEO::CommandBuffer::CommandBuffer(Device &device) : device(device) {
flushStamp.reset(new FlushStampTracker(false));

View File

@@ -9,6 +9,7 @@
#include "core/command_stream/linear_stream.h"
#include "core/utilities/idlist.h"
#include "core/utilities/stackvec.h"
#include "runtime/command_stream/csr_definitions.h"
#include "runtime/helpers/properties_helper.h"
#include "runtime/memory_manager/residency_container.h"
@@ -27,6 +28,7 @@ struct BatchBuffer {
bool requiresCoherency,
bool lowPriority,
QueueThrottle throttle,
uint64_t sliceCount,
size_t usedSize,
LinearStream *stream);
BatchBuffer() {}
@@ -37,6 +39,7 @@ struct BatchBuffer {
bool requiresCoherency = false;
bool low_priority = false;
QueueThrottle throttle = QueueThrottle::MEDIUM;
uint64_t sliceCount = QueueSliceCount::defaultSliceCount;
size_t usedSize = 0u;
//only used in drm csr in gem close worker active mode