feature: introduce states programming at driver init heapless ocl

Related-To: NEO-7824
Signed-off-by: Kamil Kopryk <kamil.kopryk@intel.com>
This commit is contained in:
Kamil Kopryk
2024-03-07 16:50:57 +00:00
committed by Compute-Runtime-Automation
parent ea69b156d2
commit 168445784e
32 changed files with 383 additions and 96 deletions

View File

@@ -401,8 +401,8 @@ struct EncodeComputeMode {
const PipelineSelectArgs &args, bool hasSharedHandles,
const RootDeviceEnvironment &rootDeviceEnvironment, bool isRcs, bool dcFlush);
static void programComputeModeCommand(LinearStream &csr, StateComputeModeProperties &properties, const RootDeviceEnvironment &rootDeviceEnvironment);
static void adjustPipelineSelect(CommandContainer &container, const NEO::KernelDescriptor &kernelDescriptor);
static size_t getSizeForComputeMode();
};
template <typename GfxFamily>

View File

@@ -1,5 +1,5 @@
/*
* Copyright (C) 2020-2023 Intel Corporation
* Copyright (C) 2020-2024 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@@ -33,6 +33,11 @@ size_t EncodeComputeMode<Family>::getCmdSizeForComputeMode(const RootDeviceEnvir
return size;
}
template <typename GfxFamily>
inline size_t EncodeComputeMode<GfxFamily>::getSizeForComputeMode() {
return sizeof(typename GfxFamily::STATE_COMPUTE_MODE);
}
template <typename Family>
inline void EncodeComputeMode<Family>::programComputeModeCommandWithSynchronization(
LinearStream &csr, StateComputeModeProperties &properties, const PipelineSelectArgs &args,

View File

@@ -87,5 +87,11 @@ if(SUPPORT_DG2_AND_LATER)
)
endif()
if(NOT SUPPORT_HEAPLESS)
list(APPEND NEO_CORE_COMMAND_STREAM
${CMAKE_CURRENT_SOURCE_DIR}/command_stream_receiver_hw_heap_addressing.inl
)
endif()
set_property(GLOBAL PROPERTY NEO_CORE_COMMAND_STREAM ${NEO_CORE_COMMAND_STREAM})
add_subdirectories()

View File

@@ -96,6 +96,11 @@ class CommandStreamReceiver {
virtual CompletionStamp flushTask(LinearStream &commandStreamTask, size_t commandStreamTaskStart,
const IndirectHeap *dsh, const IndirectHeap *ioh, const IndirectHeap *ssh,
TaskCountType taskLevel, DispatchFlags &dispatchFlags, Device &device) = 0;
virtual CompletionStamp flushTaskStateless(LinearStream &commandStreamTask, size_t commandStreamTaskStart,
const IndirectHeap *dsh, const IndirectHeap *ioh, const IndirectHeap *ssh,
TaskCountType taskLevel, DispatchFlags &dispatchFlags, Device &device) = 0;
virtual CompletionStamp flushBcsTask(LinearStream &commandStream, size_t commandStreamStart, const DispatchBcsFlags &dispatchBcsFlags, const HardwareInfo &hwInfo) = 0;
virtual CompletionStamp flushImmediateTask(LinearStream &immediateCommandStream, size_t immediateCommandStreamStart,
ImmediateDispatchFlags &dispatchFlags, Device &device) = 0;
@@ -414,7 +419,7 @@ class CommandStreamReceiver {
lastPreemptionMode = value;
}
virtual SubmissionStatus initializeDeviceWithFirstSubmission() = 0;
virtual SubmissionStatus initializeDeviceWithFirstSubmission(Device &device) = 0;
uint32_t getNumClients() const {
return this->numClients.load();

View File

@@ -59,6 +59,12 @@ class CommandStreamReceiverHw : public CommandStreamReceiver {
const IndirectHeap *dsh, const IndirectHeap *ioh, const IndirectHeap *ssh,
TaskCountType taskLevel, DispatchFlags &dispatchFlags, Device &device) override;
CompletionStamp flushTaskStateless(LinearStream &commandStream, size_t commandStreamStart,
const IndirectHeap *dsh, const IndirectHeap *ioh, const IndirectHeap *ssh,
TaskCountType taskLevel, DispatchFlags &dispatchFlags, Device &device) override;
void addPipeControlFlushTaskIfNeeded(LinearStream &commandStreamCSR, TaskCountType taskLevel);
CompletionStamp flushBcsTask(LinearStream &commandStreamTask, size_t commandStreamTaskStart, const DispatchBcsFlags &dispatchBcsFlags, const HardwareInfo &hwInfo) override;
CompletionStamp flushImmediateTask(LinearStream &immediateCommandStream, size_t immediateCommandStreamStart,
@@ -79,6 +85,10 @@ class CommandStreamReceiverHw : public CommandStreamReceiver {
size_t getRequiredCmdStreamSizeAligned(const DispatchFlags &dispatchFlags, Device &device);
size_t getRequiredCmdStreamSize(const DispatchBcsFlags &dispatchBcsFlags);
size_t getRequiredCmdStreamSizeAligned(const DispatchBcsFlags &dispatchBcsFlags);
size_t getRequiredCmdStreamHeaplessSize(const DispatchFlags &dispatchFlags, Device &device);
size_t getRequiredCmdStreamHeaplessSizeAligned(const DispatchFlags &dispatchFlags, Device &device);
size_t getRequiredCmdSizeForPreamble(Device &device) const;
size_t getCmdSizeForPreemption(const DispatchFlags &dispatchFlags) const;
size_t getCmdSizeForEpilogue(const DispatchFlags &dispatchFlags) const;
@@ -164,7 +174,7 @@ class CommandStreamReceiverHw : public CommandStreamReceiver {
return getCmdSizeForStallingNoPostSyncCommands();
}
void programStallingCommandsForBarrier(LinearStream &cmdStream, TimestampPacketContainer *barrierTimestampPacketNodes, const bool isDcFlushRequired) override;
SubmissionStatus initializeDeviceWithFirstSubmission() override;
SubmissionStatus initializeDeviceWithFirstSubmission(Device &device) override;
HeapDirtyState &getDshState() {
return dshState;
@@ -179,6 +189,12 @@ class CommandStreamReceiverHw : public CommandStreamReceiver {
void dispatchRayTracingStateCommand(LinearStream &cmdStream, Device &device);
uint64_t getScratchPatchAddress();
SubmissionStatus programHeaplessProlog(Device &device);
void programHeaplessStateProlog(Device &device, LinearStream &commandStream);
void programStateBaseAddressHeapless(Device &device, LinearStream &commandStream);
void programComputeModeHeapless(Device &device, LinearStream &commandStream);
void handleAllocationsResidencyForflushTaskStateless(const IndirectHeap *dsh, const IndirectHeap *ioh, const IndirectHeap *ssh);
protected:
void programPreemption(LinearStream &csr, DispatchFlags &dispatchFlags);
void programL3(LinearStream &csr, uint32_t &newL3Config);
@@ -198,6 +214,8 @@ class CommandStreamReceiverHw : public CommandStreamReceiver {
void programEnginePrologue(LinearStream &csr);
size_t getCmdSizeForPrologue() const;
size_t getCmdSizeForHeaplessPrologue(Device &device) const;
void handleAllocationsResidencyForHeaplessProlog(LinearStream &linearStream, Device &device);
void setClearSlmWorkAroundParameter(PipeControlArgs &args);
void addPipeControlBeforeStateSip(LinearStream &commandStream, Device &device);
@@ -328,6 +346,7 @@ class CommandStreamReceiverHw : public CommandStreamReceiver {
size_t cmdStreamStart = 0;
uint32_t latestSentBcsWaValue = std::numeric_limits<uint32_t>::max();
bool heaplessPrologueSent = false;
};
} // namespace NEO

View File

@@ -181,6 +181,30 @@ size_t CommandStreamReceiverHw<GfxFamily>::getCmdsSizeForHardwareContext() const
return getCmdSizeForPrologue();
}
template <typename GfxFamily>
void CommandStreamReceiverHw<GfxFamily>::addPipeControlFlushTaskIfNeeded(LinearStream &commandStreamCSR, TaskCountType taskLevel) {
if (this->requiresInstructionCacheFlush) {
MemorySynchronizationCommands<GfxFamily>::addInstructionCacheFlush(commandStreamCSR);
this->requiresInstructionCacheFlush = false;
}
// Add a Pipe Control if we have a dependency on a previous walker to avoid concurrency issues.
if (taskLevel > this->taskLevel) {
const auto programPipeControl = !timestampPacketWriteEnabled;
if (programPipeControl) {
PipeControlArgs args;
MemorySynchronizationCommands<GfxFamily>::addSingleBarrier(commandStreamCSR, args);
}
this->taskLevel = taskLevel;
DBG_LOG(LogTaskCounts, __FUNCTION__, "Line: ", __LINE__, "this->taskCount", peekTaskCount());
}
if (debugManager.flags.ForcePipeControlPriorToWalker.get()) {
forcePipeControl(commandStreamCSR);
}
}
template <typename GfxFamily>
CompletionStamp CommandStreamReceiverHw<GfxFamily>::flushBcsTask(LinearStream &commandStreamTask, size_t commandStreamTaskStart,
const DispatchBcsFlags &dispatchBcsFlags, const HardwareInfo &hwInfo) {
@@ -509,25 +533,7 @@ CompletionStamp CommandStreamReceiverHw<GfxFamily>::flushTask(
MemorySynchronizationCommands<GfxFamily>::addStateCacheFlush(commandStreamCSR, device.getRootDeviceEnvironment());
}
if (requiresInstructionCacheFlush) {
MemorySynchronizationCommands<GfxFamily>::addInstructionCacheFlush(commandStreamCSR);
requiresInstructionCacheFlush = false;
}
// Add a Pipe Control if we have a dependency on a previous walker to avoid concurrency issues.
if (taskLevel > this->taskLevel) {
const auto programPipeControl = !timestampPacketWriteEnabled;
if (programPipeControl) {
PipeControlArgs args;
MemorySynchronizationCommands<GfxFamily>::addSingleBarrier(commandStreamCSR, args);
}
this->taskLevel = taskLevel;
DBG_LOG(LogTaskCounts, __FUNCTION__, "Line: ", __LINE__, "this->taskCount", peekTaskCount());
}
if (debugManager.flags.ForcePipeControlPriorToWalker.get()) {
forcePipeControl(commandStreamCSR);
}
addPipeControlFlushTaskIfNeeded(commandStreamCSR, taskLevel);
this->makeResident(*tagAllocation);
@@ -1452,11 +1458,6 @@ size_t CommandStreamReceiverHw<GfxFamily>::getCmdSizeForComputeMode() {
return EncodeComputeMode<GfxFamily>::getCmdSizeForComputeMode(this->peekRootDeviceEnvironment(), hasSharedHandles(), isRcs());
}
template <typename GfxFamily>
SubmissionStatus CommandStreamReceiverHw<GfxFamily>::initializeDeviceWithFirstSubmission() {
return flushTagUpdate();
}
template <typename GfxFamily>
void CommandStreamReceiverHw<GfxFamily>::handleFrontEndStateTransition(const DispatchFlags &dispatchFlags) {
if (streamProperties.frontEndState.disableOverdispatch.value != -1) {
@@ -2302,5 +2303,4 @@ inline void CommandStreamReceiverHw<GfxFamily>::chainCsrWorkToTask(LinearStream
this->makeResident(*chainedBatchBuffer);
EncodeNoop<GfxFamily>::alignToCacheLine(commandStreamCSR);
}
} // namespace NEO

View File

@@ -0,0 +1,76 @@
/*
* Copyright (C) 2024 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#include "shared/source/command_stream/command_stream_receiver_hw.h"
namespace NEO {
template <typename GfxFamily>
SubmissionStatus CommandStreamReceiverHw<GfxFamily>::initializeDeviceWithFirstSubmission(Device &device) {
return flushTagUpdate();
}
template <typename GfxFamily>
CompletionStamp CommandStreamReceiverHw<GfxFamily>::flushTaskStateless(
LinearStream &commandStream, size_t commandStreamStart,
const IndirectHeap *dsh, const IndirectHeap *ioh, const IndirectHeap *ssh,
TaskCountType taskLevel, DispatchFlags &dispatchFlags, Device &device) {
UNRECOVERABLE_IF(true);
return {};
}
template <typename GfxFamily>
SubmissionStatus CommandStreamReceiverHw<GfxFamily>::programHeaplessProlog(Device &device) {
UNRECOVERABLE_IF(true);
return SubmissionStatus::unsupported;
}
template <typename GfxFamily>
void CommandStreamReceiverHw<GfxFamily>::programStateBaseAddressHeapless(Device &device, LinearStream &commandStream) {
UNRECOVERABLE_IF(true);
}
template <typename GfxFamily>
void CommandStreamReceiverHw<GfxFamily>::programComputeModeHeapless(Device &device, LinearStream &commandStream) {
UNRECOVERABLE_IF(true);
}
template <typename GfxFamily>
void CommandStreamReceiverHw<GfxFamily>::programHeaplessStateProlog(Device &device, LinearStream &commandStream) {
UNRECOVERABLE_IF(true);
}
template <typename GfxFamily>
size_t CommandStreamReceiverHw<GfxFamily>::getCmdSizeForHeaplessPrologue(Device &device) const {
UNRECOVERABLE_IF(true);
return 0;
}
template <typename GfxFamily>
void CommandStreamReceiverHw<GfxFamily>::handleAllocationsResidencyForflushTaskStateless(const IndirectHeap *dsh, const IndirectHeap *ioh, const IndirectHeap *ssh) {
UNRECOVERABLE_IF(true);
}
template <typename GfxFamily>
void CommandStreamReceiverHw<GfxFamily>::handleAllocationsResidencyForHeaplessProlog(LinearStream &linearStream, Device &device) {
UNRECOVERABLE_IF(true);
}
template <typename GfxFamily>
inline size_t CommandStreamReceiverHw<GfxFamily>::getRequiredCmdStreamHeaplessSize(const DispatchFlags &dispatchFlags, Device &device) {
UNRECOVERABLE_IF(true);
return 0u;
}
template <typename GfxFamily>
inline size_t CommandStreamReceiverHw<GfxFamily>::getRequiredCmdStreamHeaplessSizeAligned(const DispatchFlags &dispatchFlags, Device &device) {
UNRECOVERABLE_IF(true);
return 0u;
}
} // namespace NEO

View File

@@ -18,6 +18,7 @@
#include "shared/source/execution_environment/root_device_environment.h"
#include "shared/source/gmm_helper/gmm_helper.h"
#include "shared/source/helpers/api_specific_config.h"
#include "shared/source/helpers/compiler_product_helper.h"
#include "shared/source/helpers/gfx_core_helper.h"
#include "shared/source/helpers/ray_tracing_helper.h"
#include "shared/source/memory_manager/allocation_properties.h"
@@ -395,7 +396,9 @@ bool Device::createEngine(uint32_t deviceCsrIndex, EngineTypeUsage engineTypeUsa
const auto defaultEngineType = engineInstanced ? this->engineInstancedType : getChosenEngineType(hwInfo);
const bool isDefaultEngine = defaultEngineType == engineType && engineUsage == EngineUsage::regular;
const bool createAsEngineInstanced = engineInstanced && EngineHelpers::isCcs(engineType);
const bool isPrimaryEngine = gfxCoreHelper.areSecondaryContextsSupported() && EngineHelpers::isCcs(engineType) && engineUsage == EngineUsage::regular;
const bool isPrimaryEngine = EngineHelpers::isCcs(engineType) && engineUsage == EngineUsage::regular;
const bool useContextGroup = isPrimaryEngine && gfxCoreHelper.areSecondaryContextsSupported();
UNRECOVERABLE_IF(EngineHelpers::isBcs(engineType) && !hwInfo.capabilityTable.blitterOperationsSupported);
@@ -416,7 +419,7 @@ bool Device::createEngine(uint32_t deviceCsrIndex, EngineTypeUsage engineTypeUsa
EngineDescriptor engineDescriptor(engineTypeUsage, getDeviceBitfield(), preemptionMode, false, createAsEngineInstanced);
auto osContext = executionEnvironment->memoryManager->createAndRegisterOsContext(commandStreamReceiver.get(), engineDescriptor);
osContext->setContextGroup(isPrimaryEngine);
osContext->setContextGroup(useContextGroup);
commandStreamReceiver->setupContext(*osContext);
@@ -447,7 +450,7 @@ bool Device::createEngine(uint32_t deviceCsrIndex, EngineTypeUsage engineTypeUsa
if (osContext->isDebuggableContext() ||
this->isInitDeviceWithFirstSubmissionSupported(commandStreamReceiver->getType())) {
if (SubmissionStatus::success != commandStreamReceiver->initializeDeviceWithFirstSubmission()) {
if (SubmissionStatus::success != commandStreamReceiver->initializeDeviceWithFirstSubmission(*this)) {
return false;
}
firstSubmissionDone = true;
@@ -455,8 +458,12 @@ bool Device::createEngine(uint32_t deviceCsrIndex, EngineTypeUsage engineTypeUsa
}
}
if (isPrimaryEngine && !firstSubmissionDone) {
commandStreamReceiver->initializeDeviceWithFirstSubmission();
auto &compilerProductHelper = this->getCompilerProductHelper();
bool isHeaplessStateInit = isPrimaryEngine && compilerProductHelper.isHeaplessStateInitEnabled();
bool initializeDevice = (useContextGroup || isHeaplessStateInit) && !firstSubmissionDone;
if (initializeDevice) {
commandStreamReceiver->initializeDeviceWithFirstSubmission(*this);
}
if (EngineHelpers::isBcs(engineType) && (defaultBcsEngineIndex == std::numeric_limits<uint32_t>::max()) && (engineUsage == EngineUsage::regular)) {

View File

@@ -1,5 +1,5 @@
/*
* Copyright (C) 2018-2023 Intel Corporation
* Copyright (C) 2018-2024 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@@ -93,6 +93,10 @@ void ExecutionEnvironment::calculateMaxOsContextCount() {
auto ccsCount = hwInfo->gtSystemInfo.CCSInfo.NumberOfCCSEnabled;
bool hasRootCsr = subDevicesCount > 1;
if (debugManager.flags.ContextGroupSize.get() >= 1) {
MemoryManager::maxOsContextCount += debugManager.flags.ContextGroupSize.get();
}
MemoryManager::maxOsContextCount += osContextCount * subDevicesCount + hasRootCsr;
if (ccsCount > 1 && debugManager.flags.EngineInstancedSubDevices.get()) {

View File

@@ -1,5 +1,5 @@
/*
* Copyright (C) 2019-2023 Intel Corporation
* Copyright (C) 2019-2024 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@@ -7,6 +7,7 @@
#include "shared/source/command_container/command_encoder.h"
#include "shared/source/command_stream/command_stream_receiver_hw_bdw_and_later.inl"
#include "shared/source/command_stream/command_stream_receiver_hw_heap_addressing.inl"
#include "shared/source/command_stream/device_command_stream.h"
#include "shared/source/debug_settings/debug_settings_manager.h"
#include "shared/source/gen11/hw_cmds_base.h"

View File

@@ -11,6 +11,7 @@
using Family = NEO::Gen12LpFamily;
#include "shared/source/command_stream/command_stream_receiver_hw_bdw_and_later.inl"
#include "shared/source/command_stream/command_stream_receiver_hw_heap_addressing.inl"
#include "shared/source/command_stream/device_command_stream.h"
#include "shared/source/gmm_helper/gmm.h"
#include "shared/source/helpers/blit_commands_helper_bdw_and_later.inl"

View File

@@ -1,11 +1,12 @@
/*
* Copyright (C) 2018-2022 Intel Corporation
* Copyright (C) 2018-2024 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#include "shared/source/command_stream/command_stream_receiver_hw_bdw_and_later.inl"
#include "shared/source/command_stream/command_stream_receiver_hw_heap_addressing.inl"
#include "shared/source/command_stream/device_command_stream.h"
#include "shared/source/gen8/hw_cmds_base.h"
#include "shared/source/helpers/blit_commands_helper_bdw_and_later.inl"

View File

@@ -1,11 +1,12 @@
/*
* Copyright (C) 2018-2022 Intel Corporation
* Copyright (C) 2018-2024 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#include "shared/source/command_stream/command_stream_receiver_hw_bdw_and_later.inl"
#include "shared/source/command_stream/command_stream_receiver_hw_heap_addressing.inl"
#include "shared/source/command_stream/device_command_stream.h"
#include "shared/source/gen9/hw_cmds_base.h"
#include "shared/source/helpers/blit_commands_helper_bdw_and_later.inl"

View File

@@ -1,5 +1,5 @@
/*
* Copyright (C) 2021-2023 Intel Corporation
* Copyright (C) 2021-2024 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@@ -77,6 +77,7 @@ class CompilerProductHelper {
virtual StackVec<OclCVersion, 5> getDeviceOpenCLCVersions(const HardwareInfo &hwInfo, OclCVersion max) const = 0;
virtual void adjustHwInfoForIgc(HardwareInfo &hwInfo) const = 0;
virtual bool isHeaplessModeEnabled() const = 0;
virtual bool isHeaplessStateInitEnabled() const = 0;
virtual ~CompilerProductHelper() = default;
uint32_t getHwIpVersion(const HardwareInfo &hwInfo) const;
@@ -119,6 +120,7 @@ class CompilerProductHelperHw : public CompilerProductHelper {
StackVec<OclCVersion, 5> getDeviceOpenCLCVersions(const HardwareInfo &hwInfo, OclCVersion max) const override;
void adjustHwInfoForIgc(HardwareInfo &hwInfo) const override;
bool isHeaplessModeEnabled() const override;
bool isHeaplessStateInitEnabled() const override;
~CompilerProductHelperHw() override = default;

View File

@@ -232,6 +232,11 @@ bool CompilerProductHelperHw<gfxProduct>::isHeaplessModeEnabled() const {
return false;
}
template <PRODUCT_FAMILY gfxProduct>
bool CompilerProductHelperHw<gfxProduct>::isHeaplessStateInitEnabled() const {
return false;
}
template <PRODUCT_FAMILY gfxProduct>
uint32_t CompilerProductHelperHw<gfxProduct>::matchRevisionIdWithProductConfig(HardwareIpVersion ipVersion, uint32_t revisionID) const {
return ipVersion.value;

View File

@@ -1,5 +1,5 @@
/*
* Copyright (C) 2018-2023 Intel Corporation
* Copyright (C) 2018-2024 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@@ -79,5 +79,6 @@ struct StateBaseAddressHelper {
static uint32_t getMaxBindlessSurfaceStates();
static void programHeaplessStateBaseAddress(STATE_BASE_ADDRESS &sba);
static size_t getSbaCmdSize();
};
} // namespace NEO

View File

@@ -1,5 +1,5 @@
/*
* Copyright (C) 2019-2023 Intel Corporation
* Copyright (C) 2019-2024 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@@ -146,4 +146,9 @@ void StateBaseAddressHelper<GfxFamily>::programBindingTableBaseAddress(LinearStr
StateBaseAddressHelper<GfxFamily>::programBindingTableBaseAddress(commandStream, ssh.getHeapGpuBase(), ssh.getHeapSizeInPages(), gmmHelper);
}
template <typename GfxFamily>
inline size_t StateBaseAddressHelper<GfxFamily>::getSbaCmdSize() {
return sizeof(typename GfxFamily::STATE_BASE_ADDRESS);
}
} // namespace NEO

View File

@@ -13,6 +13,7 @@
using Family = NEO::XeHpcCoreFamily;
#include "shared/source/command_stream/command_stream_receiver_hw_dg2_and_later.inl"
#include "shared/source/command_stream/command_stream_receiver_hw_heap_addressing.inl"
#include "shared/source/command_stream/command_stream_receiver_hw_xehp_and_later.inl"
#include "shared/source/gmm_helper/gmm.h"
#include "shared/source/helpers/blit_commands_helper_xehp_and_later.inl"

View File

@@ -12,6 +12,7 @@
using Family = NEO::XeHpgCoreFamily;
#include "shared/source/command_stream/command_stream_receiver_hw_dg2_and_later.inl"
#include "shared/source/command_stream/command_stream_receiver_hw_heap_addressing.inl"
#include "shared/source/command_stream/command_stream_receiver_hw_xehp_and_later.inl"
#include "shared/source/helpers/blit_commands_helper_xehp_and_later.inl"
#include "shared/source/helpers/populate_factory.h"

View File

@@ -55,9 +55,11 @@ class UltCommandStreamReceiver : public CommandStreamReceiverHw<GfxFamily>, publ
using BaseClass::dcFlushSupport;
using BaseClass::directSubmission;
using BaseClass::dshState;
using BaseClass::getCmdSizeForHeaplessPrologue;
using BaseClass::getCmdSizeForPrologue;
using BaseClass::getScratchPatchAddress;
using BaseClass::getScratchSpaceController;
using BaseClass::handleAllocationsResidencyForHeaplessProlog;
using BaseClass::handleFrontEndStateTransition;
using BaseClass::handlePipelineSelectStateTransition;
using BaseClass::handleStateBaseAddressStateTransition;
@@ -68,6 +70,7 @@ class UltCommandStreamReceiver : public CommandStreamReceiverHw<GfxFamily>, publ
using BaseClass::isDirectSubmissionEnabled;
using BaseClass::isPerDssBackedBufferSent;
using BaseClass::makeResident;
using BaseClass::pageTableManagerInitialized;
using BaseClass::perDssBackedBuffer;
using BaseClass::postInitFlagsSetup;
using BaseClass::programActivePartitionConfig;
@@ -84,6 +87,7 @@ class UltCommandStreamReceiver : public CommandStreamReceiverHw<GfxFamily>, publ
using BaseClass::sshState;
using BaseClass::staticWorkPartitioningEnabled;
using BaseClass::streamProperties;
using BaseClass::wasSubmittedToSingleSubdevice;
using BaseClass::CommandStreamReceiver::activePartitions;
using BaseClass::CommandStreamReceiver::activePartitionsConfig;
@@ -97,6 +101,7 @@ class UltCommandStreamReceiver : public CommandStreamReceiverHw<GfxFamily>, publ
using BaseClass::CommandStreamReceiver::commandStream;
using BaseClass::CommandStreamReceiver::debugConfirmationFunction;
using BaseClass::CommandStreamReceiver::debugPauseStateAddress;
using BaseClass::CommandStreamReceiver::debugSurface;
using BaseClass::CommandStreamReceiver::deviceBitfield;
using BaseClass::CommandStreamReceiver::dispatchMode;
using BaseClass::CommandStreamReceiver::doubleSbaWa;
@@ -134,6 +139,7 @@ class UltCommandStreamReceiver : public CommandStreamReceiverHw<GfxFamily>, publ
using BaseClass::CommandStreamReceiver::ownershipMutex;
using BaseClass::CommandStreamReceiver::perfCounterAllocator;
using BaseClass::CommandStreamReceiver::pipelineSupportFlags;
using BaseClass::CommandStreamReceiver::preemptionAllocation;
using BaseClass::CommandStreamReceiver::profilingTimeStampAllocator;
using BaseClass::CommandStreamReceiver::requestedPreallocationsAmount;
using BaseClass::CommandStreamReceiver::requiredScratchSlot0Size;
@@ -145,6 +151,7 @@ class UltCommandStreamReceiver : public CommandStreamReceiverHw<GfxFamily>, publ
using BaseClass::CommandStreamReceiver::stateComputeModeDirty;
using BaseClass::CommandStreamReceiver::submissionAggregator;
using BaseClass::CommandStreamReceiver::tagAddress;
using BaseClass::CommandStreamReceiver::tagAllocation;
using BaseClass::CommandStreamReceiver::taskCount;
using BaseClass::CommandStreamReceiver::taskLevel;
using BaseClass::CommandStreamReceiver::timestampPacketAllocator;

View File

@@ -1,5 +1,5 @@
/*
* Copyright (C) 2021-2023 Intel Corporation
* Copyright (C) 2021-2024 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@@ -28,6 +28,20 @@ CompletionStamp MockCommandStreamReceiver::flushTask(
return stamp;
}
CompletionStamp MockCommandStreamReceiver::flushTaskStateless(
LinearStream &commandStream,
size_t commandStreamStart,
const IndirectHeap *dsh,
const IndirectHeap *ioh,
const IndirectHeap *ssh,
TaskCountType taskLevel,
DispatchFlags &dispatchFlags,
Device &device) {
++taskCount;
CompletionStamp stamp = {taskCount, taskLevel, flushStamp->peekStamp()};
return stamp;
}
CompletionStamp MockCommandStreamReceiver::flushBcsTask(LinearStream &commandStreamTask, size_t commandStreamTaskStart,
const DispatchBcsFlags &dispatchBcsFlags, const HardwareInfo &hwInfo) {
++taskCount;

View File

@@ -119,6 +119,16 @@ class MockCommandStreamReceiver : public CommandStreamReceiver {
DispatchFlags &dispatchFlags,
Device &device) override;
CompletionStamp flushTaskStateless(
LinearStream &commandStream,
size_t commandStreamStart,
const IndirectHeap *dsh,
const IndirectHeap *ioh,
const IndirectHeap *ssh,
TaskCountType taskLevel,
DispatchFlags &dispatchFlags,
Device &device) override;
CompletionStamp flushImmediateTask(
LinearStream &immediateCommandStream,
size_t immediateCommandStreamStart,
@@ -207,7 +217,7 @@ class MockCommandStreamReceiver : public CommandStreamReceiver {
}
return isLocked;
}
SubmissionStatus initializeDeviceWithFirstSubmission() override { return SubmissionStatus::success; }
SubmissionStatus initializeDeviceWithFirstSubmission(Device &device) override { return SubmissionStatus::success; }
static constexpr size_t tagSize = 256;
static volatile TagAddressType mockTagAddress[tagSize];

View File

@@ -3028,10 +3028,10 @@ HWTEST_F(CommandStreamReceiverHwTest, givenOutOfMemoryFailureOnFlushWhenInitiali
commandStreamReceiver.flushReturnValue = SubmissionStatus::outOfMemory;
EXPECT_EQ(SubmissionStatus::outOfMemory, commandStreamReceiver.initializeDeviceWithFirstSubmission());
EXPECT_EQ(SubmissionStatus::outOfMemory, commandStreamReceiver.initializeDeviceWithFirstSubmission(*pDevice));
commandStreamReceiver.flushReturnValue = SubmissionStatus::outOfHostMemory;
EXPECT_EQ(SubmissionStatus::outOfHostMemory, commandStreamReceiver.initializeDeviceWithFirstSubmission());
EXPECT_EQ(SubmissionStatus::outOfHostMemory, commandStreamReceiver.initializeDeviceWithFirstSubmission(*pDevice));
}
HWTEST_F(CommandStreamReceiverHwTest, whenFlushTagUpdateThenSetStallingCmdsFlag) {
@@ -5022,3 +5022,53 @@ HWTEST2_F(CommandStreamReceiverHwTest, givenImplicitScalingEnabledWhenProgrammin
EXPECT_EQ(estimatedCmdSize, offset);
}
HWTEST_F(CommandStreamReceiverHwTest, givenForcePipeControlPriorToWalkerWhenAddPipeControlFlushTaskIfNeededThenStallingPcIsProgrammed) {
using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL;
DebugManagerStateRestore dbgRestorer;
debugManager.flags.ForcePipeControlPriorToWalker.set(1);
auto &csr = pDevice->getUltCommandStreamReceiver<FamilyType>();
csr.addPipeControlFlushTaskIfNeeded(commandStream, 0);
GenCmdList commands;
CmdParse<FamilyType>::parseCommandBuffer(commands,
commandStream.getCpuBase(),
commandStream.getUsed());
auto itorCmd = find<PIPE_CONTROL *>(commands.begin(), commands.end());
ASSERT_NE(commands.end(), itorCmd);
auto pc = genCmdCast<PIPE_CONTROL *>(*itorCmd);
EXPECT_TRUE(pc->getCommandStreamerStallEnable());
}
HWTEST_F(CommandStreamReceiverTest, givenBcsCsrWhenInitializeDeviceWithFirstSubmissionIsCalledThenSuccessIsReturned) {
MockOsContext mockOsContext(0, EngineDescriptorHelper::getDefaultDescriptor({aub_stream::EngineType::ENGINE_BCS, EngineUsage::regular}));
MockCsrHw<FamilyType> commandStreamReceiver(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield());
commandStreamReceiver.setupContext(mockOsContext);
commandStreamReceiver.initializeTagAllocation();
EXPECT_EQ(SubmissionStatus::success, commandStreamReceiver.initializeDeviceWithFirstSubmission(*pDevice));
}
using CommandStreamReceiverHwHeaplessTest = Test<DeviceFixture>;
HWTEST_F(CommandStreamReceiverHwHeaplessTest, whenHeaplessCommandStreamReceiverFunctionsAreCalledThenExceptionIsThrown) {
std::unique_ptr<UltCommandStreamReceiver<FamilyType>> csr = std::make_unique<UltCommandStreamReceiver<FamilyType>>(*pDevice->executionEnvironment, rootDeviceIndex, pDevice->getDeviceBitfield());
LinearStream commandStream(0, 0);
EXPECT_ANY_THROW(csr->flushTaskStateless(commandStream, 0, nullptr, nullptr, nullptr, 0, csr->recordedDispatchFlags, *pDevice));
EXPECT_ANY_THROW(csr->programHeaplessProlog(*pDevice));
EXPECT_ANY_THROW(csr->programStateBaseAddressHeapless(*pDevice, commandStream));
EXPECT_ANY_THROW(csr->programComputeModeHeapless(*pDevice, commandStream));
EXPECT_ANY_THROW(csr->getCmdSizeForHeaplessPrologue(*pDevice));
EXPECT_ANY_THROW(csr->handleAllocationsResidencyForHeaplessProlog(commandStream, *pDevice));
EXPECT_ANY_THROW(csr->programHeaplessStateProlog(*pDevice, commandStream));
EXPECT_ANY_THROW(csr->handleAllocationsResidencyForflushTaskStateless(nullptr, nullptr, nullptr));
EXPECT_ANY_THROW(csr->getRequiredCmdStreamHeaplessSize(csr->recordedDispatchFlags, *pDevice));
EXPECT_ANY_THROW(csr->getRequiredCmdStreamHeaplessSizeAligned(csr->recordedDispatchFlags, *pDevice));
}

View File

@@ -1097,8 +1097,6 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, DeviceTests, givenCCSEngineAndContextGroupSizeEnabl
UltDeviceFactory deviceFactory{1, 0, executionEnvironment};
MemoryManager::maxOsContextCount++;
deviceFactory.rootDevices[0]->createEngine(0, {aub_stream::EngineType::ENGINE_CCS, EngineUsage::regular});
auto defaultEngine = deviceFactory.rootDevices[0]->getDefaultEngine();