mirror of
https://github.com/intel/compute-runtime.git
synced 2026-01-03 14:55:24 +08:00
feature: introduce states programming at driver init heapless ocl
Related-To: NEO-7824 Signed-off-by: Kamil Kopryk <kamil.kopryk@intel.com>
This commit is contained in:
committed by
Compute-Runtime-Automation
parent
ea69b156d2
commit
168445784e
@@ -401,8 +401,8 @@ struct EncodeComputeMode {
|
||||
const PipelineSelectArgs &args, bool hasSharedHandles,
|
||||
const RootDeviceEnvironment &rootDeviceEnvironment, bool isRcs, bool dcFlush);
|
||||
static void programComputeModeCommand(LinearStream &csr, StateComputeModeProperties &properties, const RootDeviceEnvironment &rootDeviceEnvironment);
|
||||
|
||||
static void adjustPipelineSelect(CommandContainer &container, const NEO::KernelDescriptor &kernelDescriptor);
|
||||
static size_t getSizeForComputeMode();
|
||||
};
|
||||
|
||||
template <typename GfxFamily>
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) 2020-2023 Intel Corporation
|
||||
* Copyright (C) 2020-2024 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
@@ -33,6 +33,11 @@ size_t EncodeComputeMode<Family>::getCmdSizeForComputeMode(const RootDeviceEnvir
|
||||
return size;
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
inline size_t EncodeComputeMode<GfxFamily>::getSizeForComputeMode() {
|
||||
return sizeof(typename GfxFamily::STATE_COMPUTE_MODE);
|
||||
}
|
||||
|
||||
template <typename Family>
|
||||
inline void EncodeComputeMode<Family>::programComputeModeCommandWithSynchronization(
|
||||
LinearStream &csr, StateComputeModeProperties &properties, const PipelineSelectArgs &args,
|
||||
|
||||
@@ -87,5 +87,11 @@ if(SUPPORT_DG2_AND_LATER)
|
||||
)
|
||||
endif()
|
||||
|
||||
if(NOT SUPPORT_HEAPLESS)
|
||||
list(APPEND NEO_CORE_COMMAND_STREAM
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/command_stream_receiver_hw_heap_addressing.inl
|
||||
)
|
||||
endif()
|
||||
|
||||
set_property(GLOBAL PROPERTY NEO_CORE_COMMAND_STREAM ${NEO_CORE_COMMAND_STREAM})
|
||||
add_subdirectories()
|
||||
|
||||
@@ -96,6 +96,11 @@ class CommandStreamReceiver {
|
||||
virtual CompletionStamp flushTask(LinearStream &commandStreamTask, size_t commandStreamTaskStart,
|
||||
const IndirectHeap *dsh, const IndirectHeap *ioh, const IndirectHeap *ssh,
|
||||
TaskCountType taskLevel, DispatchFlags &dispatchFlags, Device &device) = 0;
|
||||
|
||||
virtual CompletionStamp flushTaskStateless(LinearStream &commandStreamTask, size_t commandStreamTaskStart,
|
||||
const IndirectHeap *dsh, const IndirectHeap *ioh, const IndirectHeap *ssh,
|
||||
TaskCountType taskLevel, DispatchFlags &dispatchFlags, Device &device) = 0;
|
||||
|
||||
virtual CompletionStamp flushBcsTask(LinearStream &commandStream, size_t commandStreamStart, const DispatchBcsFlags &dispatchBcsFlags, const HardwareInfo &hwInfo) = 0;
|
||||
virtual CompletionStamp flushImmediateTask(LinearStream &immediateCommandStream, size_t immediateCommandStreamStart,
|
||||
ImmediateDispatchFlags &dispatchFlags, Device &device) = 0;
|
||||
@@ -414,7 +419,7 @@ class CommandStreamReceiver {
|
||||
lastPreemptionMode = value;
|
||||
}
|
||||
|
||||
virtual SubmissionStatus initializeDeviceWithFirstSubmission() = 0;
|
||||
virtual SubmissionStatus initializeDeviceWithFirstSubmission(Device &device) = 0;
|
||||
|
||||
uint32_t getNumClients() const {
|
||||
return this->numClients.load();
|
||||
|
||||
@@ -59,6 +59,12 @@ class CommandStreamReceiverHw : public CommandStreamReceiver {
|
||||
const IndirectHeap *dsh, const IndirectHeap *ioh, const IndirectHeap *ssh,
|
||||
TaskCountType taskLevel, DispatchFlags &dispatchFlags, Device &device) override;
|
||||
|
||||
CompletionStamp flushTaskStateless(LinearStream &commandStream, size_t commandStreamStart,
|
||||
const IndirectHeap *dsh, const IndirectHeap *ioh, const IndirectHeap *ssh,
|
||||
TaskCountType taskLevel, DispatchFlags &dispatchFlags, Device &device) override;
|
||||
|
||||
void addPipeControlFlushTaskIfNeeded(LinearStream &commandStreamCSR, TaskCountType taskLevel);
|
||||
|
||||
CompletionStamp flushBcsTask(LinearStream &commandStreamTask, size_t commandStreamTaskStart, const DispatchBcsFlags &dispatchBcsFlags, const HardwareInfo &hwInfo) override;
|
||||
|
||||
CompletionStamp flushImmediateTask(LinearStream &immediateCommandStream, size_t immediateCommandStreamStart,
|
||||
@@ -79,6 +85,10 @@ class CommandStreamReceiverHw : public CommandStreamReceiver {
|
||||
size_t getRequiredCmdStreamSizeAligned(const DispatchFlags &dispatchFlags, Device &device);
|
||||
size_t getRequiredCmdStreamSize(const DispatchBcsFlags &dispatchBcsFlags);
|
||||
size_t getRequiredCmdStreamSizeAligned(const DispatchBcsFlags &dispatchBcsFlags);
|
||||
|
||||
size_t getRequiredCmdStreamHeaplessSize(const DispatchFlags &dispatchFlags, Device &device);
|
||||
size_t getRequiredCmdStreamHeaplessSizeAligned(const DispatchFlags &dispatchFlags, Device &device);
|
||||
|
||||
size_t getRequiredCmdSizeForPreamble(Device &device) const;
|
||||
size_t getCmdSizeForPreemption(const DispatchFlags &dispatchFlags) const;
|
||||
size_t getCmdSizeForEpilogue(const DispatchFlags &dispatchFlags) const;
|
||||
@@ -164,7 +174,7 @@ class CommandStreamReceiverHw : public CommandStreamReceiver {
|
||||
return getCmdSizeForStallingNoPostSyncCommands();
|
||||
}
|
||||
void programStallingCommandsForBarrier(LinearStream &cmdStream, TimestampPacketContainer *barrierTimestampPacketNodes, const bool isDcFlushRequired) override;
|
||||
SubmissionStatus initializeDeviceWithFirstSubmission() override;
|
||||
SubmissionStatus initializeDeviceWithFirstSubmission(Device &device) override;
|
||||
|
||||
HeapDirtyState &getDshState() {
|
||||
return dshState;
|
||||
@@ -179,6 +189,12 @@ class CommandStreamReceiverHw : public CommandStreamReceiver {
|
||||
void dispatchRayTracingStateCommand(LinearStream &cmdStream, Device &device);
|
||||
uint64_t getScratchPatchAddress();
|
||||
|
||||
SubmissionStatus programHeaplessProlog(Device &device);
|
||||
void programHeaplessStateProlog(Device &device, LinearStream &commandStream);
|
||||
void programStateBaseAddressHeapless(Device &device, LinearStream &commandStream);
|
||||
void programComputeModeHeapless(Device &device, LinearStream &commandStream);
|
||||
void handleAllocationsResidencyForflushTaskStateless(const IndirectHeap *dsh, const IndirectHeap *ioh, const IndirectHeap *ssh);
|
||||
|
||||
protected:
|
||||
void programPreemption(LinearStream &csr, DispatchFlags &dispatchFlags);
|
||||
void programL3(LinearStream &csr, uint32_t &newL3Config);
|
||||
@@ -198,6 +214,8 @@ class CommandStreamReceiverHw : public CommandStreamReceiver {
|
||||
|
||||
void programEnginePrologue(LinearStream &csr);
|
||||
size_t getCmdSizeForPrologue() const;
|
||||
size_t getCmdSizeForHeaplessPrologue(Device &device) const;
|
||||
void handleAllocationsResidencyForHeaplessProlog(LinearStream &linearStream, Device &device);
|
||||
|
||||
void setClearSlmWorkAroundParameter(PipeControlArgs &args);
|
||||
void addPipeControlBeforeStateSip(LinearStream &commandStream, Device &device);
|
||||
@@ -328,6 +346,7 @@ class CommandStreamReceiverHw : public CommandStreamReceiver {
|
||||
|
||||
size_t cmdStreamStart = 0;
|
||||
uint32_t latestSentBcsWaValue = std::numeric_limits<uint32_t>::max();
|
||||
bool heaplessPrologueSent = false;
|
||||
};
|
||||
|
||||
} // namespace NEO
|
||||
|
||||
@@ -181,6 +181,30 @@ size_t CommandStreamReceiverHw<GfxFamily>::getCmdsSizeForHardwareContext() const
|
||||
return getCmdSizeForPrologue();
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
void CommandStreamReceiverHw<GfxFamily>::addPipeControlFlushTaskIfNeeded(LinearStream &commandStreamCSR, TaskCountType taskLevel) {
|
||||
|
||||
if (this->requiresInstructionCacheFlush) {
|
||||
MemorySynchronizationCommands<GfxFamily>::addInstructionCacheFlush(commandStreamCSR);
|
||||
this->requiresInstructionCacheFlush = false;
|
||||
}
|
||||
|
||||
// Add a Pipe Control if we have a dependency on a previous walker to avoid concurrency issues.
|
||||
if (taskLevel > this->taskLevel) {
|
||||
const auto programPipeControl = !timestampPacketWriteEnabled;
|
||||
if (programPipeControl) {
|
||||
PipeControlArgs args;
|
||||
MemorySynchronizationCommands<GfxFamily>::addSingleBarrier(commandStreamCSR, args);
|
||||
}
|
||||
this->taskLevel = taskLevel;
|
||||
DBG_LOG(LogTaskCounts, __FUNCTION__, "Line: ", __LINE__, "this->taskCount", peekTaskCount());
|
||||
}
|
||||
|
||||
if (debugManager.flags.ForcePipeControlPriorToWalker.get()) {
|
||||
forcePipeControl(commandStreamCSR);
|
||||
}
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
CompletionStamp CommandStreamReceiverHw<GfxFamily>::flushBcsTask(LinearStream &commandStreamTask, size_t commandStreamTaskStart,
|
||||
const DispatchBcsFlags &dispatchBcsFlags, const HardwareInfo &hwInfo) {
|
||||
@@ -509,25 +533,7 @@ CompletionStamp CommandStreamReceiverHw<GfxFamily>::flushTask(
|
||||
MemorySynchronizationCommands<GfxFamily>::addStateCacheFlush(commandStreamCSR, device.getRootDeviceEnvironment());
|
||||
}
|
||||
|
||||
if (requiresInstructionCacheFlush) {
|
||||
MemorySynchronizationCommands<GfxFamily>::addInstructionCacheFlush(commandStreamCSR);
|
||||
requiresInstructionCacheFlush = false;
|
||||
}
|
||||
|
||||
// Add a Pipe Control if we have a dependency on a previous walker to avoid concurrency issues.
|
||||
if (taskLevel > this->taskLevel) {
|
||||
const auto programPipeControl = !timestampPacketWriteEnabled;
|
||||
if (programPipeControl) {
|
||||
PipeControlArgs args;
|
||||
MemorySynchronizationCommands<GfxFamily>::addSingleBarrier(commandStreamCSR, args);
|
||||
}
|
||||
this->taskLevel = taskLevel;
|
||||
DBG_LOG(LogTaskCounts, __FUNCTION__, "Line: ", __LINE__, "this->taskCount", peekTaskCount());
|
||||
}
|
||||
|
||||
if (debugManager.flags.ForcePipeControlPriorToWalker.get()) {
|
||||
forcePipeControl(commandStreamCSR);
|
||||
}
|
||||
addPipeControlFlushTaskIfNeeded(commandStreamCSR, taskLevel);
|
||||
|
||||
this->makeResident(*tagAllocation);
|
||||
|
||||
@@ -1452,11 +1458,6 @@ size_t CommandStreamReceiverHw<GfxFamily>::getCmdSizeForComputeMode() {
|
||||
return EncodeComputeMode<GfxFamily>::getCmdSizeForComputeMode(this->peekRootDeviceEnvironment(), hasSharedHandles(), isRcs());
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
SubmissionStatus CommandStreamReceiverHw<GfxFamily>::initializeDeviceWithFirstSubmission() {
|
||||
return flushTagUpdate();
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
void CommandStreamReceiverHw<GfxFamily>::handleFrontEndStateTransition(const DispatchFlags &dispatchFlags) {
|
||||
if (streamProperties.frontEndState.disableOverdispatch.value != -1) {
|
||||
@@ -2302,5 +2303,4 @@ inline void CommandStreamReceiverHw<GfxFamily>::chainCsrWorkToTask(LinearStream
|
||||
this->makeResident(*chainedBatchBuffer);
|
||||
EncodeNoop<GfxFamily>::alignToCacheLine(commandStreamCSR);
|
||||
}
|
||||
|
||||
} // namespace NEO
|
||||
|
||||
@@ -0,0 +1,76 @@
|
||||
/*
|
||||
* Copyright (C) 2024 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#include "shared/source/command_stream/command_stream_receiver_hw.h"
|
||||
|
||||
namespace NEO {
|
||||
|
||||
template <typename GfxFamily>
|
||||
SubmissionStatus CommandStreamReceiverHw<GfxFamily>::initializeDeviceWithFirstSubmission(Device &device) {
|
||||
return flushTagUpdate();
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
CompletionStamp CommandStreamReceiverHw<GfxFamily>::flushTaskStateless(
|
||||
LinearStream &commandStream, size_t commandStreamStart,
|
||||
const IndirectHeap *dsh, const IndirectHeap *ioh, const IndirectHeap *ssh,
|
||||
TaskCountType taskLevel, DispatchFlags &dispatchFlags, Device &device) {
|
||||
|
||||
UNRECOVERABLE_IF(true);
|
||||
return {};
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
SubmissionStatus CommandStreamReceiverHw<GfxFamily>::programHeaplessProlog(Device &device) {
|
||||
UNRECOVERABLE_IF(true);
|
||||
return SubmissionStatus::unsupported;
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
void CommandStreamReceiverHw<GfxFamily>::programStateBaseAddressHeapless(Device &device, LinearStream &commandStream) {
|
||||
UNRECOVERABLE_IF(true);
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
void CommandStreamReceiverHw<GfxFamily>::programComputeModeHeapless(Device &device, LinearStream &commandStream) {
|
||||
UNRECOVERABLE_IF(true);
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
void CommandStreamReceiverHw<GfxFamily>::programHeaplessStateProlog(Device &device, LinearStream &commandStream) {
|
||||
UNRECOVERABLE_IF(true);
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
size_t CommandStreamReceiverHw<GfxFamily>::getCmdSizeForHeaplessPrologue(Device &device) const {
|
||||
UNRECOVERABLE_IF(true);
|
||||
return 0;
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
void CommandStreamReceiverHw<GfxFamily>::handleAllocationsResidencyForflushTaskStateless(const IndirectHeap *dsh, const IndirectHeap *ioh, const IndirectHeap *ssh) {
|
||||
UNRECOVERABLE_IF(true);
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
void CommandStreamReceiverHw<GfxFamily>::handleAllocationsResidencyForHeaplessProlog(LinearStream &linearStream, Device &device) {
|
||||
UNRECOVERABLE_IF(true);
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
inline size_t CommandStreamReceiverHw<GfxFamily>::getRequiredCmdStreamHeaplessSize(const DispatchFlags &dispatchFlags, Device &device) {
|
||||
UNRECOVERABLE_IF(true);
|
||||
return 0u;
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
inline size_t CommandStreamReceiverHw<GfxFamily>::getRequiredCmdStreamHeaplessSizeAligned(const DispatchFlags &dispatchFlags, Device &device) {
|
||||
UNRECOVERABLE_IF(true);
|
||||
return 0u;
|
||||
}
|
||||
|
||||
} // namespace NEO
|
||||
@@ -18,6 +18,7 @@
|
||||
#include "shared/source/execution_environment/root_device_environment.h"
|
||||
#include "shared/source/gmm_helper/gmm_helper.h"
|
||||
#include "shared/source/helpers/api_specific_config.h"
|
||||
#include "shared/source/helpers/compiler_product_helper.h"
|
||||
#include "shared/source/helpers/gfx_core_helper.h"
|
||||
#include "shared/source/helpers/ray_tracing_helper.h"
|
||||
#include "shared/source/memory_manager/allocation_properties.h"
|
||||
@@ -395,7 +396,9 @@ bool Device::createEngine(uint32_t deviceCsrIndex, EngineTypeUsage engineTypeUsa
|
||||
const auto defaultEngineType = engineInstanced ? this->engineInstancedType : getChosenEngineType(hwInfo);
|
||||
const bool isDefaultEngine = defaultEngineType == engineType && engineUsage == EngineUsage::regular;
|
||||
const bool createAsEngineInstanced = engineInstanced && EngineHelpers::isCcs(engineType);
|
||||
const bool isPrimaryEngine = gfxCoreHelper.areSecondaryContextsSupported() && EngineHelpers::isCcs(engineType) && engineUsage == EngineUsage::regular;
|
||||
|
||||
const bool isPrimaryEngine = EngineHelpers::isCcs(engineType) && engineUsage == EngineUsage::regular;
|
||||
const bool useContextGroup = isPrimaryEngine && gfxCoreHelper.areSecondaryContextsSupported();
|
||||
|
||||
UNRECOVERABLE_IF(EngineHelpers::isBcs(engineType) && !hwInfo.capabilityTable.blitterOperationsSupported);
|
||||
|
||||
@@ -416,7 +419,7 @@ bool Device::createEngine(uint32_t deviceCsrIndex, EngineTypeUsage engineTypeUsa
|
||||
EngineDescriptor engineDescriptor(engineTypeUsage, getDeviceBitfield(), preemptionMode, false, createAsEngineInstanced);
|
||||
|
||||
auto osContext = executionEnvironment->memoryManager->createAndRegisterOsContext(commandStreamReceiver.get(), engineDescriptor);
|
||||
osContext->setContextGroup(isPrimaryEngine);
|
||||
osContext->setContextGroup(useContextGroup);
|
||||
|
||||
commandStreamReceiver->setupContext(*osContext);
|
||||
|
||||
@@ -447,7 +450,7 @@ bool Device::createEngine(uint32_t deviceCsrIndex, EngineTypeUsage engineTypeUsa
|
||||
|
||||
if (osContext->isDebuggableContext() ||
|
||||
this->isInitDeviceWithFirstSubmissionSupported(commandStreamReceiver->getType())) {
|
||||
if (SubmissionStatus::success != commandStreamReceiver->initializeDeviceWithFirstSubmission()) {
|
||||
if (SubmissionStatus::success != commandStreamReceiver->initializeDeviceWithFirstSubmission(*this)) {
|
||||
return false;
|
||||
}
|
||||
firstSubmissionDone = true;
|
||||
@@ -455,8 +458,12 @@ bool Device::createEngine(uint32_t deviceCsrIndex, EngineTypeUsage engineTypeUsa
|
||||
}
|
||||
}
|
||||
|
||||
if (isPrimaryEngine && !firstSubmissionDone) {
|
||||
commandStreamReceiver->initializeDeviceWithFirstSubmission();
|
||||
auto &compilerProductHelper = this->getCompilerProductHelper();
|
||||
bool isHeaplessStateInit = isPrimaryEngine && compilerProductHelper.isHeaplessStateInitEnabled();
|
||||
bool initializeDevice = (useContextGroup || isHeaplessStateInit) && !firstSubmissionDone;
|
||||
|
||||
if (initializeDevice) {
|
||||
commandStreamReceiver->initializeDeviceWithFirstSubmission(*this);
|
||||
}
|
||||
|
||||
if (EngineHelpers::isBcs(engineType) && (defaultBcsEngineIndex == std::numeric_limits<uint32_t>::max()) && (engineUsage == EngineUsage::regular)) {
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) 2018-2023 Intel Corporation
|
||||
* Copyright (C) 2018-2024 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
@@ -93,6 +93,10 @@ void ExecutionEnvironment::calculateMaxOsContextCount() {
|
||||
auto ccsCount = hwInfo->gtSystemInfo.CCSInfo.NumberOfCCSEnabled;
|
||||
bool hasRootCsr = subDevicesCount > 1;
|
||||
|
||||
if (debugManager.flags.ContextGroupSize.get() >= 1) {
|
||||
MemoryManager::maxOsContextCount += debugManager.flags.ContextGroupSize.get();
|
||||
}
|
||||
|
||||
MemoryManager::maxOsContextCount += osContextCount * subDevicesCount + hasRootCsr;
|
||||
|
||||
if (ccsCount > 1 && debugManager.flags.EngineInstancedSubDevices.get()) {
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) 2019-2023 Intel Corporation
|
||||
* Copyright (C) 2019-2024 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
@@ -7,6 +7,7 @@
|
||||
|
||||
#include "shared/source/command_container/command_encoder.h"
|
||||
#include "shared/source/command_stream/command_stream_receiver_hw_bdw_and_later.inl"
|
||||
#include "shared/source/command_stream/command_stream_receiver_hw_heap_addressing.inl"
|
||||
#include "shared/source/command_stream/device_command_stream.h"
|
||||
#include "shared/source/debug_settings/debug_settings_manager.h"
|
||||
#include "shared/source/gen11/hw_cmds_base.h"
|
||||
|
||||
@@ -11,6 +11,7 @@
|
||||
using Family = NEO::Gen12LpFamily;
|
||||
|
||||
#include "shared/source/command_stream/command_stream_receiver_hw_bdw_and_later.inl"
|
||||
#include "shared/source/command_stream/command_stream_receiver_hw_heap_addressing.inl"
|
||||
#include "shared/source/command_stream/device_command_stream.h"
|
||||
#include "shared/source/gmm_helper/gmm.h"
|
||||
#include "shared/source/helpers/blit_commands_helper_bdw_and_later.inl"
|
||||
|
||||
@@ -1,11 +1,12 @@
|
||||
/*
|
||||
* Copyright (C) 2018-2022 Intel Corporation
|
||||
* Copyright (C) 2018-2024 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#include "shared/source/command_stream/command_stream_receiver_hw_bdw_and_later.inl"
|
||||
#include "shared/source/command_stream/command_stream_receiver_hw_heap_addressing.inl"
|
||||
#include "shared/source/command_stream/device_command_stream.h"
|
||||
#include "shared/source/gen8/hw_cmds_base.h"
|
||||
#include "shared/source/helpers/blit_commands_helper_bdw_and_later.inl"
|
||||
|
||||
@@ -1,11 +1,12 @@
|
||||
/*
|
||||
* Copyright (C) 2018-2022 Intel Corporation
|
||||
* Copyright (C) 2018-2024 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#include "shared/source/command_stream/command_stream_receiver_hw_bdw_and_later.inl"
|
||||
#include "shared/source/command_stream/command_stream_receiver_hw_heap_addressing.inl"
|
||||
#include "shared/source/command_stream/device_command_stream.h"
|
||||
#include "shared/source/gen9/hw_cmds_base.h"
|
||||
#include "shared/source/helpers/blit_commands_helper_bdw_and_later.inl"
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) 2021-2023 Intel Corporation
|
||||
* Copyright (C) 2021-2024 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
@@ -77,6 +77,7 @@ class CompilerProductHelper {
|
||||
virtual StackVec<OclCVersion, 5> getDeviceOpenCLCVersions(const HardwareInfo &hwInfo, OclCVersion max) const = 0;
|
||||
virtual void adjustHwInfoForIgc(HardwareInfo &hwInfo) const = 0;
|
||||
virtual bool isHeaplessModeEnabled() const = 0;
|
||||
virtual bool isHeaplessStateInitEnabled() const = 0;
|
||||
|
||||
virtual ~CompilerProductHelper() = default;
|
||||
uint32_t getHwIpVersion(const HardwareInfo &hwInfo) const;
|
||||
@@ -119,6 +120,7 @@ class CompilerProductHelperHw : public CompilerProductHelper {
|
||||
StackVec<OclCVersion, 5> getDeviceOpenCLCVersions(const HardwareInfo &hwInfo, OclCVersion max) const override;
|
||||
void adjustHwInfoForIgc(HardwareInfo &hwInfo) const override;
|
||||
bool isHeaplessModeEnabled() const override;
|
||||
bool isHeaplessStateInitEnabled() const override;
|
||||
|
||||
~CompilerProductHelperHw() override = default;
|
||||
|
||||
|
||||
@@ -232,6 +232,11 @@ bool CompilerProductHelperHw<gfxProduct>::isHeaplessModeEnabled() const {
|
||||
return false;
|
||||
}
|
||||
|
||||
template <PRODUCT_FAMILY gfxProduct>
|
||||
bool CompilerProductHelperHw<gfxProduct>::isHeaplessStateInitEnabled() const {
|
||||
return false;
|
||||
}
|
||||
|
||||
template <PRODUCT_FAMILY gfxProduct>
|
||||
uint32_t CompilerProductHelperHw<gfxProduct>::matchRevisionIdWithProductConfig(HardwareIpVersion ipVersion, uint32_t revisionID) const {
|
||||
return ipVersion.value;
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) 2018-2023 Intel Corporation
|
||||
* Copyright (C) 2018-2024 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
@@ -79,5 +79,6 @@ struct StateBaseAddressHelper {
|
||||
static uint32_t getMaxBindlessSurfaceStates();
|
||||
|
||||
static void programHeaplessStateBaseAddress(STATE_BASE_ADDRESS &sba);
|
||||
static size_t getSbaCmdSize();
|
||||
};
|
||||
} // namespace NEO
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) 2019-2023 Intel Corporation
|
||||
* Copyright (C) 2019-2024 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
@@ -146,4 +146,9 @@ void StateBaseAddressHelper<GfxFamily>::programBindingTableBaseAddress(LinearStr
|
||||
StateBaseAddressHelper<GfxFamily>::programBindingTableBaseAddress(commandStream, ssh.getHeapGpuBase(), ssh.getHeapSizeInPages(), gmmHelper);
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
inline size_t StateBaseAddressHelper<GfxFamily>::getSbaCmdSize() {
|
||||
return sizeof(typename GfxFamily::STATE_BASE_ADDRESS);
|
||||
}
|
||||
|
||||
} // namespace NEO
|
||||
|
||||
@@ -13,6 +13,7 @@
|
||||
using Family = NEO::XeHpcCoreFamily;
|
||||
|
||||
#include "shared/source/command_stream/command_stream_receiver_hw_dg2_and_later.inl"
|
||||
#include "shared/source/command_stream/command_stream_receiver_hw_heap_addressing.inl"
|
||||
#include "shared/source/command_stream/command_stream_receiver_hw_xehp_and_later.inl"
|
||||
#include "shared/source/gmm_helper/gmm.h"
|
||||
#include "shared/source/helpers/blit_commands_helper_xehp_and_later.inl"
|
||||
|
||||
@@ -12,6 +12,7 @@
|
||||
using Family = NEO::XeHpgCoreFamily;
|
||||
|
||||
#include "shared/source/command_stream/command_stream_receiver_hw_dg2_and_later.inl"
|
||||
#include "shared/source/command_stream/command_stream_receiver_hw_heap_addressing.inl"
|
||||
#include "shared/source/command_stream/command_stream_receiver_hw_xehp_and_later.inl"
|
||||
#include "shared/source/helpers/blit_commands_helper_xehp_and_later.inl"
|
||||
#include "shared/source/helpers/populate_factory.h"
|
||||
|
||||
@@ -55,9 +55,11 @@ class UltCommandStreamReceiver : public CommandStreamReceiverHw<GfxFamily>, publ
|
||||
using BaseClass::dcFlushSupport;
|
||||
using BaseClass::directSubmission;
|
||||
using BaseClass::dshState;
|
||||
using BaseClass::getCmdSizeForHeaplessPrologue;
|
||||
using BaseClass::getCmdSizeForPrologue;
|
||||
using BaseClass::getScratchPatchAddress;
|
||||
using BaseClass::getScratchSpaceController;
|
||||
using BaseClass::handleAllocationsResidencyForHeaplessProlog;
|
||||
using BaseClass::handleFrontEndStateTransition;
|
||||
using BaseClass::handlePipelineSelectStateTransition;
|
||||
using BaseClass::handleStateBaseAddressStateTransition;
|
||||
@@ -68,6 +70,7 @@ class UltCommandStreamReceiver : public CommandStreamReceiverHw<GfxFamily>, publ
|
||||
using BaseClass::isDirectSubmissionEnabled;
|
||||
using BaseClass::isPerDssBackedBufferSent;
|
||||
using BaseClass::makeResident;
|
||||
using BaseClass::pageTableManagerInitialized;
|
||||
using BaseClass::perDssBackedBuffer;
|
||||
using BaseClass::postInitFlagsSetup;
|
||||
using BaseClass::programActivePartitionConfig;
|
||||
@@ -84,6 +87,7 @@ class UltCommandStreamReceiver : public CommandStreamReceiverHw<GfxFamily>, publ
|
||||
using BaseClass::sshState;
|
||||
using BaseClass::staticWorkPartitioningEnabled;
|
||||
using BaseClass::streamProperties;
|
||||
|
||||
using BaseClass::wasSubmittedToSingleSubdevice;
|
||||
using BaseClass::CommandStreamReceiver::activePartitions;
|
||||
using BaseClass::CommandStreamReceiver::activePartitionsConfig;
|
||||
@@ -97,6 +101,7 @@ class UltCommandStreamReceiver : public CommandStreamReceiverHw<GfxFamily>, publ
|
||||
using BaseClass::CommandStreamReceiver::commandStream;
|
||||
using BaseClass::CommandStreamReceiver::debugConfirmationFunction;
|
||||
using BaseClass::CommandStreamReceiver::debugPauseStateAddress;
|
||||
using BaseClass::CommandStreamReceiver::debugSurface;
|
||||
using BaseClass::CommandStreamReceiver::deviceBitfield;
|
||||
using BaseClass::CommandStreamReceiver::dispatchMode;
|
||||
using BaseClass::CommandStreamReceiver::doubleSbaWa;
|
||||
@@ -134,6 +139,7 @@ class UltCommandStreamReceiver : public CommandStreamReceiverHw<GfxFamily>, publ
|
||||
using BaseClass::CommandStreamReceiver::ownershipMutex;
|
||||
using BaseClass::CommandStreamReceiver::perfCounterAllocator;
|
||||
using BaseClass::CommandStreamReceiver::pipelineSupportFlags;
|
||||
using BaseClass::CommandStreamReceiver::preemptionAllocation;
|
||||
using BaseClass::CommandStreamReceiver::profilingTimeStampAllocator;
|
||||
using BaseClass::CommandStreamReceiver::requestedPreallocationsAmount;
|
||||
using BaseClass::CommandStreamReceiver::requiredScratchSlot0Size;
|
||||
@@ -145,6 +151,7 @@ class UltCommandStreamReceiver : public CommandStreamReceiverHw<GfxFamily>, publ
|
||||
using BaseClass::CommandStreamReceiver::stateComputeModeDirty;
|
||||
using BaseClass::CommandStreamReceiver::submissionAggregator;
|
||||
using BaseClass::CommandStreamReceiver::tagAddress;
|
||||
using BaseClass::CommandStreamReceiver::tagAllocation;
|
||||
using BaseClass::CommandStreamReceiver::taskCount;
|
||||
using BaseClass::CommandStreamReceiver::taskLevel;
|
||||
using BaseClass::CommandStreamReceiver::timestampPacketAllocator;
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) 2021-2023 Intel Corporation
|
||||
* Copyright (C) 2021-2024 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
@@ -28,6 +28,20 @@ CompletionStamp MockCommandStreamReceiver::flushTask(
|
||||
return stamp;
|
||||
}
|
||||
|
||||
CompletionStamp MockCommandStreamReceiver::flushTaskStateless(
|
||||
LinearStream &commandStream,
|
||||
size_t commandStreamStart,
|
||||
const IndirectHeap *dsh,
|
||||
const IndirectHeap *ioh,
|
||||
const IndirectHeap *ssh,
|
||||
TaskCountType taskLevel,
|
||||
DispatchFlags &dispatchFlags,
|
||||
Device &device) {
|
||||
++taskCount;
|
||||
CompletionStamp stamp = {taskCount, taskLevel, flushStamp->peekStamp()};
|
||||
return stamp;
|
||||
}
|
||||
|
||||
CompletionStamp MockCommandStreamReceiver::flushBcsTask(LinearStream &commandStreamTask, size_t commandStreamTaskStart,
|
||||
const DispatchBcsFlags &dispatchBcsFlags, const HardwareInfo &hwInfo) {
|
||||
++taskCount;
|
||||
|
||||
@@ -119,6 +119,16 @@ class MockCommandStreamReceiver : public CommandStreamReceiver {
|
||||
DispatchFlags &dispatchFlags,
|
||||
Device &device) override;
|
||||
|
||||
CompletionStamp flushTaskStateless(
|
||||
LinearStream &commandStream,
|
||||
size_t commandStreamStart,
|
||||
const IndirectHeap *dsh,
|
||||
const IndirectHeap *ioh,
|
||||
const IndirectHeap *ssh,
|
||||
TaskCountType taskLevel,
|
||||
DispatchFlags &dispatchFlags,
|
||||
Device &device) override;
|
||||
|
||||
CompletionStamp flushImmediateTask(
|
||||
LinearStream &immediateCommandStream,
|
||||
size_t immediateCommandStreamStart,
|
||||
@@ -207,7 +217,7 @@ class MockCommandStreamReceiver : public CommandStreamReceiver {
|
||||
}
|
||||
return isLocked;
|
||||
}
|
||||
SubmissionStatus initializeDeviceWithFirstSubmission() override { return SubmissionStatus::success; }
|
||||
SubmissionStatus initializeDeviceWithFirstSubmission(Device &device) override { return SubmissionStatus::success; }
|
||||
|
||||
static constexpr size_t tagSize = 256;
|
||||
static volatile TagAddressType mockTagAddress[tagSize];
|
||||
|
||||
@@ -3028,10 +3028,10 @@ HWTEST_F(CommandStreamReceiverHwTest, givenOutOfMemoryFailureOnFlushWhenInitiali
|
||||
|
||||
commandStreamReceiver.flushReturnValue = SubmissionStatus::outOfMemory;
|
||||
|
||||
EXPECT_EQ(SubmissionStatus::outOfMemory, commandStreamReceiver.initializeDeviceWithFirstSubmission());
|
||||
EXPECT_EQ(SubmissionStatus::outOfMemory, commandStreamReceiver.initializeDeviceWithFirstSubmission(*pDevice));
|
||||
|
||||
commandStreamReceiver.flushReturnValue = SubmissionStatus::outOfHostMemory;
|
||||
EXPECT_EQ(SubmissionStatus::outOfHostMemory, commandStreamReceiver.initializeDeviceWithFirstSubmission());
|
||||
EXPECT_EQ(SubmissionStatus::outOfHostMemory, commandStreamReceiver.initializeDeviceWithFirstSubmission(*pDevice));
|
||||
}
|
||||
|
||||
HWTEST_F(CommandStreamReceiverHwTest, whenFlushTagUpdateThenSetStallingCmdsFlag) {
|
||||
@@ -5022,3 +5022,53 @@ HWTEST2_F(CommandStreamReceiverHwTest, givenImplicitScalingEnabledWhenProgrammin
|
||||
|
||||
EXPECT_EQ(estimatedCmdSize, offset);
|
||||
}
|
||||
|
||||
HWTEST_F(CommandStreamReceiverHwTest, givenForcePipeControlPriorToWalkerWhenAddPipeControlFlushTaskIfNeededThenStallingPcIsProgrammed) {
|
||||
using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL;
|
||||
|
||||
DebugManagerStateRestore dbgRestorer;
|
||||
debugManager.flags.ForcePipeControlPriorToWalker.set(1);
|
||||
|
||||
auto &csr = pDevice->getUltCommandStreamReceiver<FamilyType>();
|
||||
|
||||
csr.addPipeControlFlushTaskIfNeeded(commandStream, 0);
|
||||
|
||||
GenCmdList commands;
|
||||
CmdParse<FamilyType>::parseCommandBuffer(commands,
|
||||
commandStream.getCpuBase(),
|
||||
commandStream.getUsed());
|
||||
|
||||
auto itorCmd = find<PIPE_CONTROL *>(commands.begin(), commands.end());
|
||||
ASSERT_NE(commands.end(), itorCmd);
|
||||
|
||||
auto pc = genCmdCast<PIPE_CONTROL *>(*itorCmd);
|
||||
EXPECT_TRUE(pc->getCommandStreamerStallEnable());
|
||||
}
|
||||
|
||||
HWTEST_F(CommandStreamReceiverTest, givenBcsCsrWhenInitializeDeviceWithFirstSubmissionIsCalledThenSuccessIsReturned) {
|
||||
MockOsContext mockOsContext(0, EngineDescriptorHelper::getDefaultDescriptor({aub_stream::EngineType::ENGINE_BCS, EngineUsage::regular}));
|
||||
MockCsrHw<FamilyType> commandStreamReceiver(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield());
|
||||
commandStreamReceiver.setupContext(mockOsContext);
|
||||
commandStreamReceiver.initializeTagAllocation();
|
||||
|
||||
EXPECT_EQ(SubmissionStatus::success, commandStreamReceiver.initializeDeviceWithFirstSubmission(*pDevice));
|
||||
}
|
||||
|
||||
using CommandStreamReceiverHwHeaplessTest = Test<DeviceFixture>;
|
||||
|
||||
HWTEST_F(CommandStreamReceiverHwHeaplessTest, whenHeaplessCommandStreamReceiverFunctionsAreCalledThenExceptionIsThrown) {
|
||||
std::unique_ptr<UltCommandStreamReceiver<FamilyType>> csr = std::make_unique<UltCommandStreamReceiver<FamilyType>>(*pDevice->executionEnvironment, rootDeviceIndex, pDevice->getDeviceBitfield());
|
||||
|
||||
LinearStream commandStream(0, 0);
|
||||
|
||||
EXPECT_ANY_THROW(csr->flushTaskStateless(commandStream, 0, nullptr, nullptr, nullptr, 0, csr->recordedDispatchFlags, *pDevice));
|
||||
EXPECT_ANY_THROW(csr->programHeaplessProlog(*pDevice));
|
||||
EXPECT_ANY_THROW(csr->programStateBaseAddressHeapless(*pDevice, commandStream));
|
||||
EXPECT_ANY_THROW(csr->programComputeModeHeapless(*pDevice, commandStream));
|
||||
EXPECT_ANY_THROW(csr->getCmdSizeForHeaplessPrologue(*pDevice));
|
||||
EXPECT_ANY_THROW(csr->handleAllocationsResidencyForHeaplessProlog(commandStream, *pDevice));
|
||||
EXPECT_ANY_THROW(csr->programHeaplessStateProlog(*pDevice, commandStream));
|
||||
EXPECT_ANY_THROW(csr->handleAllocationsResidencyForflushTaskStateless(nullptr, nullptr, nullptr));
|
||||
EXPECT_ANY_THROW(csr->getRequiredCmdStreamHeaplessSize(csr->recordedDispatchFlags, *pDevice));
|
||||
EXPECT_ANY_THROW(csr->getRequiredCmdStreamHeaplessSizeAligned(csr->recordedDispatchFlags, *pDevice));
|
||||
}
|
||||
|
||||
@@ -1097,8 +1097,6 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, DeviceTests, givenCCSEngineAndContextGroupSizeEnabl
|
||||
|
||||
UltDeviceFactory deviceFactory{1, 0, executionEnvironment};
|
||||
|
||||
MemoryManager::maxOsContextCount++;
|
||||
|
||||
deviceFactory.rootDevices[0]->createEngine(0, {aub_stream::EngineType::ENGINE_CCS, EngineUsage::regular});
|
||||
|
||||
auto defaultEngine = deviceFactory.rootDevices[0]->getDefaultEngine();
|
||||
|
||||
Reference in New Issue
Block a user