mirror of
https://github.com/intel/compute-runtime.git
synced 2025-12-21 09:14:47 +08:00
Add method to submit kernel on single subdevice
Signed-off-by: Jobczyk, Lukasz <lukasz.jobczyk@intel.com>
This commit is contained in:
committed by
Compute-Runtime-Automation
parent
00c92c8c14
commit
343fd602fa
@@ -146,6 +146,8 @@ class CommandStreamReceiverHw : public CommandStreamReceiver {
|
||||
|
||||
CsrSizeRequestFlags csrSizeRequestFlags = {};
|
||||
|
||||
bool wasSubmittedToSingleSubdevice = false;
|
||||
|
||||
std::unique_ptr<DirectSubmissionHw<GfxFamily, RenderDispatcher<GfxFamily>>> directSubmission;
|
||||
std::unique_ptr<DirectSubmissionHw<GfxFamily, BlitterDispatcher<GfxFamily>>> blitterDirectSubmission;
|
||||
};
|
||||
|
||||
@@ -197,7 +197,11 @@ CompletionStamp CommandStreamReceiverHw<GfxFamily>::flushTask(
|
||||
void *currentPipeControlForNooping = nullptr;
|
||||
void *epiloguePipeControlLocation = nullptr;
|
||||
|
||||
if (DebugManager.flags.ForceCsrFlushing.get()) {
|
||||
bool csrFlush = this->wasSubmittedToSingleSubdevice != dispatchFlags.useSingleSubdevice;
|
||||
|
||||
csrFlush |= DebugManager.flags.ForceCsrFlushing.get();
|
||||
|
||||
if (csrFlush) {
|
||||
flushBatchedSubmissions();
|
||||
}
|
||||
|
||||
@@ -544,7 +548,7 @@ CompletionStamp CommandStreamReceiverHw<GfxFamily>::flushTask(
|
||||
auto &streamToSubmit = submitCommandStreamFromCsr ? commandStreamCSR : commandStreamTask;
|
||||
BatchBuffer batchBuffer{streamToSubmit.getGraphicsAllocation(), startOffset, chainedBatchBufferStartOffset, chainedBatchBuffer,
|
||||
dispatchFlags.requiresCoherency, dispatchFlags.lowPriority, dispatchFlags.throttle, dispatchFlags.sliceCount,
|
||||
streamToSubmit.getUsed(), &streamToSubmit, bbEndLocation};
|
||||
streamToSubmit.getUsed(), &streamToSubmit, bbEndLocation, dispatchFlags.useSingleSubdevice};
|
||||
|
||||
if (submitCSR | submitTask) {
|
||||
if (this->dispatchMode == DispatchMode::ImmediateDispatch) {
|
||||
@@ -566,6 +570,8 @@ CompletionStamp CommandStreamReceiverHw<GfxFamily>::flushTask(
|
||||
this->makeSurfacePackNonResident(this->getResidencyAllocations());
|
||||
}
|
||||
|
||||
this->wasSubmittedToSingleSubdevice = dispatchFlags.useSingleSubdevice;
|
||||
|
||||
//check if we are not over the budget, if we are do implicit flush
|
||||
if (getMemoryManager()->isMemoryBudgetExhausted()) {
|
||||
if (this->totalMemoryUsed >= device.getDeviceInfo().globalMemSize / 4) {
|
||||
@@ -1027,7 +1033,7 @@ uint32_t CommandStreamReceiverHw<GfxFamily>::blitBuffer(const BlitPropertiesCont
|
||||
}
|
||||
|
||||
BatchBuffer batchBuffer{commandStream.getGraphicsAllocation(), commandStreamStart, 0, nullptr, false, false, QueueThrottle::MEDIUM, QueueSliceCount::defaultSliceCount,
|
||||
commandStream.getUsed(), &commandStream, endingCmdPtr};
|
||||
commandStream.getUsed(), &commandStream, endingCmdPtr, false};
|
||||
|
||||
flush(batchBuffer, getResidencyAllocations());
|
||||
makeSurfacePackNonResident(getResidencyAllocations());
|
||||
|
||||
@@ -55,29 +55,31 @@ struct DispatchFlags {
|
||||
uint32_t l3CacheSettings, uint32_t threadArbitrationPolicy, uint32_t additionalKernelExecInfo, KernelExecutionType kernelExecutionType, uint64_t sliceCount, bool blocking, bool dcFlush,
|
||||
bool useSLM, bool guardCommandBufferWithPipeControl, bool gsba32BitRequired,
|
||||
bool requiresCoherency, bool lowPriority, bool implicitFlush,
|
||||
bool outOfOrderExecutionAllowed, bool epilogueRequired, bool usePerDSSbackedBuffer) : csrDependencies(csrDependencies),
|
||||
barrierTimestampPacketNodes(barrierTimestampPacketNodes),
|
||||
pipelineSelectArgs(pipelineSelectArgs),
|
||||
flushStampReference(flushStampReference),
|
||||
throttle(throttle),
|
||||
preemptionMode(preemptionMode),
|
||||
numGrfRequired(numGrfRequired),
|
||||
l3CacheSettings(l3CacheSettings),
|
||||
threadArbitrationPolicy(threadArbitrationPolicy),
|
||||
additionalKernelExecInfo(additionalKernelExecInfo),
|
||||
kernelExecutionType(kernelExecutionType),
|
||||
sliceCount(sliceCount),
|
||||
blocking(blocking),
|
||||
dcFlush(dcFlush),
|
||||
useSLM(useSLM),
|
||||
guardCommandBufferWithPipeControl(guardCommandBufferWithPipeControl),
|
||||
gsba32BitRequired(gsba32BitRequired),
|
||||
requiresCoherency(requiresCoherency),
|
||||
lowPriority(lowPriority),
|
||||
implicitFlush(implicitFlush),
|
||||
outOfOrderExecutionAllowed(outOfOrderExecutionAllowed),
|
||||
epilogueRequired(epilogueRequired),
|
||||
usePerDssBackedBuffer(usePerDSSbackedBuffer){};
|
||||
bool outOfOrderExecutionAllowed, bool epilogueRequired, bool usePerDSSbackedBuffer, bool useSingleSubdevice) : csrDependencies(csrDependencies),
|
||||
barrierTimestampPacketNodes(barrierTimestampPacketNodes),
|
||||
pipelineSelectArgs(pipelineSelectArgs),
|
||||
flushStampReference(flushStampReference),
|
||||
throttle(throttle),
|
||||
preemptionMode(preemptionMode),
|
||||
numGrfRequired(numGrfRequired),
|
||||
l3CacheSettings(l3CacheSettings),
|
||||
threadArbitrationPolicy(threadArbitrationPolicy),
|
||||
additionalKernelExecInfo(additionalKernelExecInfo),
|
||||
kernelExecutionType(kernelExecutionType),
|
||||
sliceCount(sliceCount),
|
||||
blocking(blocking),
|
||||
dcFlush(dcFlush),
|
||||
useSLM(useSLM),
|
||||
guardCommandBufferWithPipeControl(guardCommandBufferWithPipeControl),
|
||||
gsba32BitRequired(gsba32BitRequired),
|
||||
requiresCoherency(requiresCoherency),
|
||||
lowPriority(lowPriority),
|
||||
implicitFlush(implicitFlush),
|
||||
outOfOrderExecutionAllowed(outOfOrderExecutionAllowed),
|
||||
epilogueRequired(epilogueRequired),
|
||||
usePerDssBackedBuffer(usePerDSSbackedBuffer),
|
||||
useSingleSubdevice(useSingleSubdevice){};
|
||||
|
||||
CsrDependencies csrDependencies;
|
||||
TimestampPacketContainer *barrierTimestampPacketNodes = nullptr;
|
||||
PipelineSelectArgs pipelineSelectArgs;
|
||||
@@ -102,6 +104,7 @@ struct DispatchFlags {
|
||||
bool outOfOrderExecutionAllowed = false;
|
||||
bool epilogueRequired = false;
|
||||
bool usePerDssBackedBuffer = false;
|
||||
bool useSingleSubdevice = false;
|
||||
};
|
||||
|
||||
struct CsrSizeRequestFlags {
|
||||
|
||||
@@ -102,12 +102,12 @@ NEO::BatchBuffer::BatchBuffer(GraphicsAllocation *commandBufferAllocation, size_
|
||||
size_t chainedBatchBufferStartOffset, GraphicsAllocation *chainedBatchBuffer,
|
||||
bool requiresCoherency, bool lowPriority,
|
||||
QueueThrottle throttle, uint64_t sliceCount,
|
||||
size_t usedSize, LinearStream *stream, void *endCmdPtr)
|
||||
size_t usedSize, LinearStream *stream, void *endCmdPtr, bool useSingleSubdevice)
|
||||
: commandBufferAllocation(commandBufferAllocation), startOffset(startOffset),
|
||||
chainedBatchBufferStartOffset(chainedBatchBufferStartOffset), chainedBatchBuffer(chainedBatchBuffer),
|
||||
requiresCoherency(requiresCoherency), low_priority(lowPriority),
|
||||
throttle(throttle), sliceCount(sliceCount),
|
||||
usedSize(usedSize), stream(stream), endCmdPtr(endCmdPtr) {}
|
||||
usedSize(usedSize), stream(stream), endCmdPtr(endCmdPtr), useSingleSubdevice(useSingleSubdevice) {}
|
||||
|
||||
NEO::CommandBuffer::CommandBuffer(Device &device) : device(device) {
|
||||
flushStamp.reset(new FlushStampTracker(false));
|
||||
|
||||
@@ -30,7 +30,8 @@ struct BatchBuffer {
|
||||
uint64_t sliceCount,
|
||||
size_t usedSize,
|
||||
LinearStream *stream,
|
||||
void *endCmdPtr);
|
||||
void *endCmdPtr,
|
||||
bool useSingleSubdevice);
|
||||
BatchBuffer() {}
|
||||
GraphicsAllocation *commandBufferAllocation = nullptr;
|
||||
size_t startOffset = 0u;
|
||||
@@ -45,6 +46,8 @@ struct BatchBuffer {
|
||||
//only used in drm csr in gem close worker active mode
|
||||
LinearStream *stream = nullptr;
|
||||
void *endCmdPtr = nullptr;
|
||||
|
||||
bool useSingleSubdevice = false;
|
||||
};
|
||||
|
||||
struct CommandBuffer : public IDNode<CommandBuffer> {
|
||||
|
||||
Reference in New Issue
Block a user