mirror of
https://github.com/intel/compute-runtime.git
synced 2025-12-21 01:04:57 +08:00
feature: introduce states programming at driver init heapless ocl
Related-To: NEO-7824 Signed-off-by: Kamil Kopryk <kamil.kopryk@intel.com>
This commit is contained in:
committed by
Compute-Runtime-Automation
parent
ea69b156d2
commit
168445784e
@@ -87,5 +87,11 @@ if(SUPPORT_DG2_AND_LATER)
|
||||
)
|
||||
endif()
|
||||
|
||||
if(NOT SUPPORT_HEAPLESS)
|
||||
list(APPEND NEO_CORE_COMMAND_STREAM
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/command_stream_receiver_hw_heap_addressing.inl
|
||||
)
|
||||
endif()
|
||||
|
||||
set_property(GLOBAL PROPERTY NEO_CORE_COMMAND_STREAM ${NEO_CORE_COMMAND_STREAM})
|
||||
add_subdirectories()
|
||||
|
||||
@@ -96,6 +96,11 @@ class CommandStreamReceiver {
|
||||
virtual CompletionStamp flushTask(LinearStream &commandStreamTask, size_t commandStreamTaskStart,
|
||||
const IndirectHeap *dsh, const IndirectHeap *ioh, const IndirectHeap *ssh,
|
||||
TaskCountType taskLevel, DispatchFlags &dispatchFlags, Device &device) = 0;
|
||||
|
||||
virtual CompletionStamp flushTaskStateless(LinearStream &commandStreamTask, size_t commandStreamTaskStart,
|
||||
const IndirectHeap *dsh, const IndirectHeap *ioh, const IndirectHeap *ssh,
|
||||
TaskCountType taskLevel, DispatchFlags &dispatchFlags, Device &device) = 0;
|
||||
|
||||
virtual CompletionStamp flushBcsTask(LinearStream &commandStream, size_t commandStreamStart, const DispatchBcsFlags &dispatchBcsFlags, const HardwareInfo &hwInfo) = 0;
|
||||
virtual CompletionStamp flushImmediateTask(LinearStream &immediateCommandStream, size_t immediateCommandStreamStart,
|
||||
ImmediateDispatchFlags &dispatchFlags, Device &device) = 0;
|
||||
@@ -414,7 +419,7 @@ class CommandStreamReceiver {
|
||||
lastPreemptionMode = value;
|
||||
}
|
||||
|
||||
virtual SubmissionStatus initializeDeviceWithFirstSubmission() = 0;
|
||||
virtual SubmissionStatus initializeDeviceWithFirstSubmission(Device &device) = 0;
|
||||
|
||||
uint32_t getNumClients() const {
|
||||
return this->numClients.load();
|
||||
|
||||
@@ -59,6 +59,12 @@ class CommandStreamReceiverHw : public CommandStreamReceiver {
|
||||
const IndirectHeap *dsh, const IndirectHeap *ioh, const IndirectHeap *ssh,
|
||||
TaskCountType taskLevel, DispatchFlags &dispatchFlags, Device &device) override;
|
||||
|
||||
CompletionStamp flushTaskStateless(LinearStream &commandStream, size_t commandStreamStart,
|
||||
const IndirectHeap *dsh, const IndirectHeap *ioh, const IndirectHeap *ssh,
|
||||
TaskCountType taskLevel, DispatchFlags &dispatchFlags, Device &device) override;
|
||||
|
||||
void addPipeControlFlushTaskIfNeeded(LinearStream &commandStreamCSR, TaskCountType taskLevel);
|
||||
|
||||
CompletionStamp flushBcsTask(LinearStream &commandStreamTask, size_t commandStreamTaskStart, const DispatchBcsFlags &dispatchBcsFlags, const HardwareInfo &hwInfo) override;
|
||||
|
||||
CompletionStamp flushImmediateTask(LinearStream &immediateCommandStream, size_t immediateCommandStreamStart,
|
||||
@@ -79,6 +85,10 @@ class CommandStreamReceiverHw : public CommandStreamReceiver {
|
||||
size_t getRequiredCmdStreamSizeAligned(const DispatchFlags &dispatchFlags, Device &device);
|
||||
size_t getRequiredCmdStreamSize(const DispatchBcsFlags &dispatchBcsFlags);
|
||||
size_t getRequiredCmdStreamSizeAligned(const DispatchBcsFlags &dispatchBcsFlags);
|
||||
|
||||
size_t getRequiredCmdStreamHeaplessSize(const DispatchFlags &dispatchFlags, Device &device);
|
||||
size_t getRequiredCmdStreamHeaplessSizeAligned(const DispatchFlags &dispatchFlags, Device &device);
|
||||
|
||||
size_t getRequiredCmdSizeForPreamble(Device &device) const;
|
||||
size_t getCmdSizeForPreemption(const DispatchFlags &dispatchFlags) const;
|
||||
size_t getCmdSizeForEpilogue(const DispatchFlags &dispatchFlags) const;
|
||||
@@ -164,7 +174,7 @@ class CommandStreamReceiverHw : public CommandStreamReceiver {
|
||||
return getCmdSizeForStallingNoPostSyncCommands();
|
||||
}
|
||||
void programStallingCommandsForBarrier(LinearStream &cmdStream, TimestampPacketContainer *barrierTimestampPacketNodes, const bool isDcFlushRequired) override;
|
||||
SubmissionStatus initializeDeviceWithFirstSubmission() override;
|
||||
SubmissionStatus initializeDeviceWithFirstSubmission(Device &device) override;
|
||||
|
||||
HeapDirtyState &getDshState() {
|
||||
return dshState;
|
||||
@@ -179,6 +189,12 @@ class CommandStreamReceiverHw : public CommandStreamReceiver {
|
||||
void dispatchRayTracingStateCommand(LinearStream &cmdStream, Device &device);
|
||||
uint64_t getScratchPatchAddress();
|
||||
|
||||
SubmissionStatus programHeaplessProlog(Device &device);
|
||||
void programHeaplessStateProlog(Device &device, LinearStream &commandStream);
|
||||
void programStateBaseAddressHeapless(Device &device, LinearStream &commandStream);
|
||||
void programComputeModeHeapless(Device &device, LinearStream &commandStream);
|
||||
void handleAllocationsResidencyForflushTaskStateless(const IndirectHeap *dsh, const IndirectHeap *ioh, const IndirectHeap *ssh);
|
||||
|
||||
protected:
|
||||
void programPreemption(LinearStream &csr, DispatchFlags &dispatchFlags);
|
||||
void programL3(LinearStream &csr, uint32_t &newL3Config);
|
||||
@@ -198,6 +214,8 @@ class CommandStreamReceiverHw : public CommandStreamReceiver {
|
||||
|
||||
void programEnginePrologue(LinearStream &csr);
|
||||
size_t getCmdSizeForPrologue() const;
|
||||
size_t getCmdSizeForHeaplessPrologue(Device &device) const;
|
||||
void handleAllocationsResidencyForHeaplessProlog(LinearStream &linearStream, Device &device);
|
||||
|
||||
void setClearSlmWorkAroundParameter(PipeControlArgs &args);
|
||||
void addPipeControlBeforeStateSip(LinearStream &commandStream, Device &device);
|
||||
@@ -328,6 +346,7 @@ class CommandStreamReceiverHw : public CommandStreamReceiver {
|
||||
|
||||
size_t cmdStreamStart = 0;
|
||||
uint32_t latestSentBcsWaValue = std::numeric_limits<uint32_t>::max();
|
||||
bool heaplessPrologueSent = false;
|
||||
};
|
||||
|
||||
} // namespace NEO
|
||||
|
||||
@@ -181,6 +181,30 @@ size_t CommandStreamReceiverHw<GfxFamily>::getCmdsSizeForHardwareContext() const
|
||||
return getCmdSizeForPrologue();
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
void CommandStreamReceiverHw<GfxFamily>::addPipeControlFlushTaskIfNeeded(LinearStream &commandStreamCSR, TaskCountType taskLevel) {
|
||||
|
||||
if (this->requiresInstructionCacheFlush) {
|
||||
MemorySynchronizationCommands<GfxFamily>::addInstructionCacheFlush(commandStreamCSR);
|
||||
this->requiresInstructionCacheFlush = false;
|
||||
}
|
||||
|
||||
// Add a Pipe Control if we have a dependency on a previous walker to avoid concurrency issues.
|
||||
if (taskLevel > this->taskLevel) {
|
||||
const auto programPipeControl = !timestampPacketWriteEnabled;
|
||||
if (programPipeControl) {
|
||||
PipeControlArgs args;
|
||||
MemorySynchronizationCommands<GfxFamily>::addSingleBarrier(commandStreamCSR, args);
|
||||
}
|
||||
this->taskLevel = taskLevel;
|
||||
DBG_LOG(LogTaskCounts, __FUNCTION__, "Line: ", __LINE__, "this->taskCount", peekTaskCount());
|
||||
}
|
||||
|
||||
if (debugManager.flags.ForcePipeControlPriorToWalker.get()) {
|
||||
forcePipeControl(commandStreamCSR);
|
||||
}
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
CompletionStamp CommandStreamReceiverHw<GfxFamily>::flushBcsTask(LinearStream &commandStreamTask, size_t commandStreamTaskStart,
|
||||
const DispatchBcsFlags &dispatchBcsFlags, const HardwareInfo &hwInfo) {
|
||||
@@ -509,25 +533,7 @@ CompletionStamp CommandStreamReceiverHw<GfxFamily>::flushTask(
|
||||
MemorySynchronizationCommands<GfxFamily>::addStateCacheFlush(commandStreamCSR, device.getRootDeviceEnvironment());
|
||||
}
|
||||
|
||||
if (requiresInstructionCacheFlush) {
|
||||
MemorySynchronizationCommands<GfxFamily>::addInstructionCacheFlush(commandStreamCSR);
|
||||
requiresInstructionCacheFlush = false;
|
||||
}
|
||||
|
||||
// Add a Pipe Control if we have a dependency on a previous walker to avoid concurrency issues.
|
||||
if (taskLevel > this->taskLevel) {
|
||||
const auto programPipeControl = !timestampPacketWriteEnabled;
|
||||
if (programPipeControl) {
|
||||
PipeControlArgs args;
|
||||
MemorySynchronizationCommands<GfxFamily>::addSingleBarrier(commandStreamCSR, args);
|
||||
}
|
||||
this->taskLevel = taskLevel;
|
||||
DBG_LOG(LogTaskCounts, __FUNCTION__, "Line: ", __LINE__, "this->taskCount", peekTaskCount());
|
||||
}
|
||||
|
||||
if (debugManager.flags.ForcePipeControlPriorToWalker.get()) {
|
||||
forcePipeControl(commandStreamCSR);
|
||||
}
|
||||
addPipeControlFlushTaskIfNeeded(commandStreamCSR, taskLevel);
|
||||
|
||||
this->makeResident(*tagAllocation);
|
||||
|
||||
@@ -1452,11 +1458,6 @@ size_t CommandStreamReceiverHw<GfxFamily>::getCmdSizeForComputeMode() {
|
||||
return EncodeComputeMode<GfxFamily>::getCmdSizeForComputeMode(this->peekRootDeviceEnvironment(), hasSharedHandles(), isRcs());
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
SubmissionStatus CommandStreamReceiverHw<GfxFamily>::initializeDeviceWithFirstSubmission() {
|
||||
return flushTagUpdate();
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
void CommandStreamReceiverHw<GfxFamily>::handleFrontEndStateTransition(const DispatchFlags &dispatchFlags) {
|
||||
if (streamProperties.frontEndState.disableOverdispatch.value != -1) {
|
||||
@@ -2302,5 +2303,4 @@ inline void CommandStreamReceiverHw<GfxFamily>::chainCsrWorkToTask(LinearStream
|
||||
this->makeResident(*chainedBatchBuffer);
|
||||
EncodeNoop<GfxFamily>::alignToCacheLine(commandStreamCSR);
|
||||
}
|
||||
|
||||
} // namespace NEO
|
||||
|
||||
@@ -0,0 +1,76 @@
|
||||
/*
|
||||
* Copyright (C) 2024 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#include "shared/source/command_stream/command_stream_receiver_hw.h"
|
||||
|
||||
namespace NEO {
|
||||
|
||||
template <typename GfxFamily>
|
||||
SubmissionStatus CommandStreamReceiverHw<GfxFamily>::initializeDeviceWithFirstSubmission(Device &device) {
|
||||
return flushTagUpdate();
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
CompletionStamp CommandStreamReceiverHw<GfxFamily>::flushTaskStateless(
|
||||
LinearStream &commandStream, size_t commandStreamStart,
|
||||
const IndirectHeap *dsh, const IndirectHeap *ioh, const IndirectHeap *ssh,
|
||||
TaskCountType taskLevel, DispatchFlags &dispatchFlags, Device &device) {
|
||||
|
||||
UNRECOVERABLE_IF(true);
|
||||
return {};
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
SubmissionStatus CommandStreamReceiverHw<GfxFamily>::programHeaplessProlog(Device &device) {
|
||||
UNRECOVERABLE_IF(true);
|
||||
return SubmissionStatus::unsupported;
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
void CommandStreamReceiverHw<GfxFamily>::programStateBaseAddressHeapless(Device &device, LinearStream &commandStream) {
|
||||
UNRECOVERABLE_IF(true);
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
void CommandStreamReceiverHw<GfxFamily>::programComputeModeHeapless(Device &device, LinearStream &commandStream) {
|
||||
UNRECOVERABLE_IF(true);
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
void CommandStreamReceiverHw<GfxFamily>::programHeaplessStateProlog(Device &device, LinearStream &commandStream) {
|
||||
UNRECOVERABLE_IF(true);
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
size_t CommandStreamReceiverHw<GfxFamily>::getCmdSizeForHeaplessPrologue(Device &device) const {
|
||||
UNRECOVERABLE_IF(true);
|
||||
return 0;
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
void CommandStreamReceiverHw<GfxFamily>::handleAllocationsResidencyForflushTaskStateless(const IndirectHeap *dsh, const IndirectHeap *ioh, const IndirectHeap *ssh) {
|
||||
UNRECOVERABLE_IF(true);
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
void CommandStreamReceiverHw<GfxFamily>::handleAllocationsResidencyForHeaplessProlog(LinearStream &linearStream, Device &device) {
|
||||
UNRECOVERABLE_IF(true);
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
inline size_t CommandStreamReceiverHw<GfxFamily>::getRequiredCmdStreamHeaplessSize(const DispatchFlags &dispatchFlags, Device &device) {
|
||||
UNRECOVERABLE_IF(true);
|
||||
return 0u;
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
inline size_t CommandStreamReceiverHw<GfxFamily>::getRequiredCmdStreamHeaplessSizeAligned(const DispatchFlags &dispatchFlags, Device &device) {
|
||||
UNRECOVERABLE_IF(true);
|
||||
return 0u;
|
||||
}
|
||||
|
||||
} // namespace NEO
|
||||
Reference in New Issue
Block a user