mirror of
https://github.com/intel/compute-runtime.git
synced 2026-01-03 06:49:52 +08:00
Add concurrent kernel execution type
Related-To: NEO-4940 Signed-off-by: Sebastian Luzynski <sebastian.jozef.luzynski@intel.com>
This commit is contained in:
committed by
Compute-Runtime-Automation
parent
85ce7a5866
commit
556b6cd457
@@ -287,6 +287,7 @@ class CommandStreamReceiver {
|
||||
uint32_t requiredScratchSize = 0;
|
||||
uint32_t requiredPrivateScratchSize = 0;
|
||||
uint32_t lastAdditionalKernelExecInfo = AdditionalKernelExecInfo::NotSet;
|
||||
KernelExecutionType lastKernelExecutionType = KernelExecutionType::Default;
|
||||
|
||||
const uint32_t rootDeviceIndex;
|
||||
const DeviceBitfield deviceBitfield;
|
||||
|
||||
@@ -311,6 +311,10 @@ CompletionStamp CommandStreamReceiverHw<GfxFamily>::flushTask(
|
||||
setMediaVFEStateDirty(true);
|
||||
}
|
||||
|
||||
if (dispatchFlags.kernelExecutionType != KernelExecutionType::NotApplicable && lastKernelExecutionType != dispatchFlags.kernelExecutionType) {
|
||||
setMediaVFEStateDirty(true);
|
||||
}
|
||||
|
||||
auto &commandStreamCSR = this->getCS(getRequiredCmdStreamSizeAligned(dispatchFlags, device));
|
||||
auto commandStreamStartCSR = commandStreamCSR.getUsed();
|
||||
|
||||
@@ -872,7 +876,10 @@ inline void CommandStreamReceiverHw<GfxFamily>::programVFEState(LinearStream &cs
|
||||
if (dispatchFlags.additionalKernelExecInfo != AdditionalKernelExecInfo::NotApplicable) {
|
||||
lastAdditionalKernelExecInfo = dispatchFlags.additionalKernelExecInfo;
|
||||
}
|
||||
auto commandOffset = PreambleHelper<GfxFamily>::programVFEState(&csr, peekHwInfo(), requiredScratchSize, getScratchPatchAddress(), maxFrontEndThreads, getOsContext().getEngineType(), lastAdditionalKernelExecInfo);
|
||||
if (dispatchFlags.kernelExecutionType != KernelExecutionType::NotApplicable) {
|
||||
lastKernelExecutionType = dispatchFlags.kernelExecutionType;
|
||||
}
|
||||
auto commandOffset = PreambleHelper<GfxFamily>::programVFEState(&csr, peekHwInfo(), requiredScratchSize, getScratchPatchAddress(), maxFrontEndThreads, getOsContext().getEngineType(), lastAdditionalKernelExecInfo, lastKernelExecutionType);
|
||||
if (DebugManager.flags.AddPatchInfoCommentsForAUBDump.get()) {
|
||||
flatBatchBufferHelper->collectScratchSpacePatchInfo(getScratchPatchAddress(), commandOffset, csr);
|
||||
}
|
||||
|
||||
@@ -14,6 +14,8 @@
|
||||
#include "shared/source/helpers/pipeline_select_args.h"
|
||||
#include "shared/source/kernel/grf_config.h"
|
||||
|
||||
#include "opencl/source/kernel/kernel_execution_type.h"
|
||||
|
||||
#include "csr_properties_flags.h"
|
||||
|
||||
#include <limits>
|
||||
@@ -50,7 +52,7 @@ struct DispatchFlags {
|
||||
DispatchFlags() = delete;
|
||||
DispatchFlags(CsrDependencies csrDependencies, TimestampPacketContainer *barrierTimestampPacketNodes, PipelineSelectArgs pipelineSelectArgs,
|
||||
FlushStampTrackingObj *flushStampReference, QueueThrottle throttle, PreemptionMode preemptionMode, uint32_t numGrfRequired,
|
||||
uint32_t l3CacheSettings, uint32_t threadArbitrationPolicy, uint32_t additionalKernelExecInfo, uint64_t sliceCount, bool blocking, bool dcFlush,
|
||||
uint32_t l3CacheSettings, uint32_t threadArbitrationPolicy, uint32_t additionalKernelExecInfo, KernelExecutionType kernelExecutionType, uint64_t sliceCount, bool blocking, bool dcFlush,
|
||||
bool useSLM, bool guardCommandBufferWithPipeControl, bool gsba32BitRequired,
|
||||
bool requiresCoherency, bool lowPriority, bool implicitFlush,
|
||||
bool outOfOrderExecutionAllowed, bool epilogueRequired, bool usePerDSSbackedBuffer) : csrDependencies(csrDependencies),
|
||||
@@ -63,6 +65,7 @@ struct DispatchFlags {
|
||||
l3CacheSettings(l3CacheSettings),
|
||||
threadArbitrationPolicy(threadArbitrationPolicy),
|
||||
additionalKernelExecInfo(additionalKernelExecInfo),
|
||||
kernelExecutionType(kernelExecutionType),
|
||||
sliceCount(sliceCount),
|
||||
blocking(blocking),
|
||||
dcFlush(dcFlush),
|
||||
@@ -85,6 +88,7 @@ struct DispatchFlags {
|
||||
uint32_t l3CacheSettings = L3CachingSettings::l3CacheOn;
|
||||
uint32_t threadArbitrationPolicy = ThreadArbitrationPolicy::NotPresent;
|
||||
uint32_t additionalKernelExecInfo = AdditionalKernelExecInfo::NotApplicable;
|
||||
KernelExecutionType kernelExecutionType = KernelExecutionType::NotApplicable;
|
||||
uint64_t sliceCount = QueueSliceCount::defaultSliceCount;
|
||||
uint64_t engineHints = 0;
|
||||
bool blocking = false;
|
||||
|
||||
@@ -8,6 +8,8 @@
|
||||
#pragma once
|
||||
#include "shared/source/helpers/pipeline_select_helper.h"
|
||||
|
||||
#include "opencl/source/kernel/kernel_execution_type.h"
|
||||
|
||||
#include "engine_node.h"
|
||||
#include "igfxfmid.h"
|
||||
|
||||
@@ -42,7 +44,8 @@ struct PreambleHelper {
|
||||
uint64_t scratchAddress,
|
||||
uint32_t maxFrontEndThreads,
|
||||
aub_stream::EngineType engineType,
|
||||
uint32_t additionalKernelExecInfo);
|
||||
uint32_t additionalKernelExecInfo,
|
||||
KernelExecutionType kernelExecutionType);
|
||||
static void programAdditionalFieldsInVfeState(VFE_STATE_TYPE *mediaVfeState, const HardwareInfo &hwInfo);
|
||||
static void programPreamble(LinearStream *pCommandStream, Device &device, uint32_t l3Config,
|
||||
uint32_t requiredThreadArbitrationPolicy, GraphicsAllocation *preemptionCsr, GraphicsAllocation *perDssBackedBuffer);
|
||||
|
||||
@@ -9,6 +9,8 @@
|
||||
#include "shared/source/helpers/hw_helper.h"
|
||||
#include "shared/source/helpers/preamble_base.inl"
|
||||
|
||||
#include "opencl/source/kernel/kernel_execution_type.h"
|
||||
|
||||
namespace NEO {
|
||||
|
||||
template <typename GfxFamily>
|
||||
@@ -31,7 +33,8 @@ uint64_t PreambleHelper<GfxFamily>::programVFEState(LinearStream *pCommandStream
|
||||
uint64_t scratchAddress,
|
||||
uint32_t maxFrontEndThreads,
|
||||
aub_stream::EngineType engineType,
|
||||
uint32_t additionalExecInfo) {
|
||||
uint32_t additionalExecInfo,
|
||||
KernelExecutionType kernelExecutionType) {
|
||||
using MEDIA_VFE_STATE = typename GfxFamily::MEDIA_VFE_STATE;
|
||||
|
||||
addPipeControlBeforeVfeCmd(pCommandStream, &hwInfo, engineType);
|
||||
|
||||
@@ -58,7 +58,9 @@ GEN11TEST_F(Gen11PreambleVfeState, GivenWaOffWhenProgrammingVfeStateThenProgramm
|
||||
typedef typename ICLFamily::PIPE_CONTROL PIPE_CONTROL;
|
||||
testWaTable->waSendMIFLUSHBeforeVFE = 0;
|
||||
LinearStream &cs = linearStream;
|
||||
PreambleHelper<ICLFamily>::programVFEState(&linearStream, pPlatform->getClDevice(0)->getHardwareInfo(), 0u, 0, 168u, aub_stream::EngineType::ENGINE_RCS, AdditionalKernelExecInfo::NotApplicable);
|
||||
PreambleHelper<ICLFamily>::programVFEState(&linearStream, pPlatform->getClDevice(0)->getHardwareInfo(), 0u, 0, 168u,
|
||||
aub_stream::EngineType::ENGINE_RCS, AdditionalKernelExecInfo::NotApplicable,
|
||||
KernelExecutionType::NotApplicable);
|
||||
|
||||
parseCommands<ICLFamily>(cs);
|
||||
|
||||
@@ -76,7 +78,9 @@ GEN11TEST_F(Gen11PreambleVfeState, GivenWaOnWhenProgrammingVfeStateThenProgrammi
|
||||
typedef typename ICLFamily::PIPE_CONTROL PIPE_CONTROL;
|
||||
testWaTable->waSendMIFLUSHBeforeVFE = 1;
|
||||
LinearStream &cs = linearStream;
|
||||
PreambleHelper<ICLFamily>::programVFEState(&linearStream, pPlatform->getClDevice(0)->getHardwareInfo(), 0u, 0, 168u, aub_stream::EngineType::ENGINE_RCS, AdditionalKernelExecInfo::NotApplicable);
|
||||
PreambleHelper<ICLFamily>::programVFEState(&linearStream, pPlatform->getClDevice(0)->getHardwareInfo(), 0u, 0, 168u,
|
||||
aub_stream::EngineType::ENGINE_RCS, AdditionalKernelExecInfo::NotApplicable,
|
||||
KernelExecutionType::NotApplicable);
|
||||
|
||||
parseCommands<ICLFamily>(cs);
|
||||
|
||||
|
||||
@@ -62,7 +62,9 @@ HWTEST2_F(Gen12LpPreambleVfeState, GivenWaOffWhenProgrammingVfeStateThenProgramm
|
||||
typedef typename FamilyType::PIPE_CONTROL PIPE_CONTROL;
|
||||
testWaTable->waSendMIFLUSHBeforeVFE = 0;
|
||||
LinearStream &cs = linearStream;
|
||||
PreambleHelper<FamilyType>::programVFEState(&linearStream, pPlatform->getClDevice(0)->getHardwareInfo(), 0u, 0, 672u, aub_stream::EngineType::ENGINE_RCS, AdditionalKernelExecInfo::NotApplicable);
|
||||
PreambleHelper<FamilyType>::programVFEState(&linearStream, pPlatform->getClDevice(0)->getHardwareInfo(), 0u, 0, 672u,
|
||||
aub_stream::EngineType::ENGINE_RCS, AdditionalKernelExecInfo::NotApplicable,
|
||||
KernelExecutionType::NotApplicable);
|
||||
|
||||
parseCommands<FamilyType>(cs);
|
||||
|
||||
@@ -82,7 +84,9 @@ HWTEST2_F(Gen12LpPreambleVfeState, givenCcsEngineWhenWaIsSetThenAppropriatePipeC
|
||||
testWaTable->waSendMIFLUSHBeforeVFE = 1;
|
||||
LinearStream &cs = linearStream;
|
||||
|
||||
PreambleHelper<FamilyType>::programVFEState(&linearStream, pPlatform->getClDevice(0)->getHardwareInfo(), 0u, 0, 672u, aub_stream::EngineType::ENGINE_CCS, AdditionalKernelExecInfo::NotApplicable);
|
||||
PreambleHelper<FamilyType>::programVFEState(&linearStream, pPlatform->getClDevice(0)->getHardwareInfo(), 0u, 0, 672u,
|
||||
aub_stream::EngineType::ENGINE_CCS, AdditionalKernelExecInfo::NotApplicable,
|
||||
KernelExecutionType::NotApplicable);
|
||||
|
||||
parseCommands<FamilyType>(cs);
|
||||
|
||||
@@ -101,7 +105,9 @@ HWTEST2_F(Gen12LpPreambleVfeState, givenRcsEngineWhenWaIsSetThenAppropriatePipeC
|
||||
testWaTable->waSendMIFLUSHBeforeVFE = 1;
|
||||
LinearStream &cs = linearStream;
|
||||
|
||||
PreambleHelper<FamilyType>::programVFEState(&linearStream, pPlatform->getClDevice(0)->getHardwareInfo(), 0u, 0, 672u, aub_stream::EngineType::ENGINE_RCS, AdditionalKernelExecInfo::NotApplicable);
|
||||
PreambleHelper<FamilyType>::programVFEState(&linearStream, pPlatform->getClDevice(0)->getHardwareInfo(), 0u, 0, 672u,
|
||||
aub_stream::EngineType::ENGINE_RCS, AdditionalKernelExecInfo::NotApplicable,
|
||||
KernelExecutionType::NotApplicable);
|
||||
|
||||
parseCommands<FamilyType>(cs);
|
||||
|
||||
|
||||
@@ -95,7 +95,8 @@ BDWTEST_F(PreambleVfeState, WhenProgrammingVfeStateThenProgrammingIsCorrect) {
|
||||
typedef BDWFamily::PIPE_CONTROL PIPE_CONTROL;
|
||||
|
||||
LinearStream &cs = linearStream;
|
||||
PreambleHelper<BDWFamily>::programVFEState(&linearStream, *defaultHwInfo, 0u, 0, 168u, aub_stream::EngineType::ENGINE_RCS, AdditionalKernelExecInfo::NotApplicable);
|
||||
PreambleHelper<BDWFamily>::programVFEState(&linearStream, *defaultHwInfo, 0u, 0, 168u, aub_stream::EngineType::ENGINE_RCS,
|
||||
AdditionalKernelExecInfo::NotApplicable, KernelExecutionType::NotApplicable);
|
||||
|
||||
parseCommands<BDWFamily>(cs);
|
||||
|
||||
|
||||
@@ -107,7 +107,9 @@ GEN9TEST_F(PreambleVfeState, GivenWaOffWhenProgrammingVfeStateThenProgrammingIsC
|
||||
typedef typename FamilyType::PIPE_CONTROL PIPE_CONTROL;
|
||||
testWaTable->waSendMIFLUSHBeforeVFE = 0;
|
||||
LinearStream &cs = linearStream;
|
||||
PreambleHelper<FamilyType>::programVFEState(&linearStream, pPlatform->getClDevice(0)->getHardwareInfo(), 0u, 0, 168u, aub_stream::EngineType::ENGINE_RCS, AdditionalKernelExecInfo::NotApplicable);
|
||||
PreambleHelper<FamilyType>::programVFEState(&linearStream, pPlatform->getClDevice(0)->getHardwareInfo(), 0u, 0, 168u,
|
||||
aub_stream::EngineType::ENGINE_RCS, AdditionalKernelExecInfo::NotApplicable,
|
||||
KernelExecutionType::NotApplicable);
|
||||
|
||||
parseCommands<FamilyType>(cs);
|
||||
|
||||
@@ -125,7 +127,9 @@ GEN9TEST_F(PreambleVfeState, GivenWaOnWhenProgrammingVfeStateThenProgrammingIsCo
|
||||
typedef typename FamilyType::PIPE_CONTROL PIPE_CONTROL;
|
||||
testWaTable->waSendMIFLUSHBeforeVFE = 1;
|
||||
LinearStream &cs = linearStream;
|
||||
PreambleHelper<FamilyType>::programVFEState(&linearStream, pPlatform->getClDevice(0)->getHardwareInfo(), 0u, 0, 168u, aub_stream::EngineType::ENGINE_RCS, AdditionalKernelExecInfo::NotApplicable);
|
||||
PreambleHelper<FamilyType>::programVFEState(&linearStream, pPlatform->getClDevice(0)->getHardwareInfo(), 0u, 0, 168u,
|
||||
aub_stream::EngineType::ENGINE_RCS, AdditionalKernelExecInfo::NotApplicable,
|
||||
KernelExecutionType::NotApplicable);
|
||||
|
||||
parseCommands<FamilyType>(cs);
|
||||
|
||||
|
||||
@@ -25,6 +25,7 @@ struct DispatchFlagsHelper {
|
||||
L3CachingSettings::l3CacheOn, //l3CacheSettings
|
||||
ThreadArbitrationPolicy::NotPresent, //threadArbitrationPolicy
|
||||
AdditionalKernelExecInfo::NotApplicable, //additionalKernelExecInfo
|
||||
KernelExecutionType::NotApplicable, //kernelExecutionType
|
||||
QueueSliceCount::defaultSliceCount, //sliceCount
|
||||
false, //blocking
|
||||
false, //dcFlush
|
||||
|
||||
@@ -222,7 +222,9 @@ HWCMDTEST_F(IGFX_GEN8_CORE, PreambleTest, givenPreambleHelperWhenMediaVfeStateIs
|
||||
FlatBatchBufferHelperHw<FamilyType> helper(*mockDevice->getExecutionEnvironment());
|
||||
uint64_t addressToPatch = 0xC0DEC0DE;
|
||||
|
||||
auto offset = PreambleHelper<FamilyType>::programVFEState(&preambleStream, mockDevice->getHardwareInfo(), 1024u, addressToPatch, 10u, aub_stream::EngineType::ENGINE_RCS, AdditionalKernelExecInfo::NotApplicable);
|
||||
auto offset = PreambleHelper<FamilyType>::programVFEState(&preambleStream, mockDevice->getHardwareInfo(), 1024u, addressToPatch,
|
||||
10u, aub_stream::EngineType::ENGINE_RCS, AdditionalKernelExecInfo::NotApplicable,
|
||||
KernelExecutionType::NotApplicable);
|
||||
EXPECT_NE(0u, offset);
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user