Add concurrent kernel execution type

Related-To: NEO-4940
Signed-off-by: Sebastian Luzynski <sebastian.jozef.luzynski@intel.com>
This commit is contained in:
Sebastian Luzynski
2020-11-17 11:42:29 +01:00
committed by Compute-Runtime-Automation
parent 85ce7a5866
commit 556b6cd457
19 changed files with 124 additions and 17 deletions

View File

@@ -287,6 +287,7 @@ class CommandStreamReceiver {
uint32_t requiredScratchSize = 0;
uint32_t requiredPrivateScratchSize = 0;
uint32_t lastAdditionalKernelExecInfo = AdditionalKernelExecInfo::NotSet;
KernelExecutionType lastKernelExecutionType = KernelExecutionType::Default;
const uint32_t rootDeviceIndex;
const DeviceBitfield deviceBitfield;

View File

@@ -311,6 +311,10 @@ CompletionStamp CommandStreamReceiverHw<GfxFamily>::flushTask(
setMediaVFEStateDirty(true);
}
if (dispatchFlags.kernelExecutionType != KernelExecutionType::NotApplicable && lastKernelExecutionType != dispatchFlags.kernelExecutionType) {
setMediaVFEStateDirty(true);
}
auto &commandStreamCSR = this->getCS(getRequiredCmdStreamSizeAligned(dispatchFlags, device));
auto commandStreamStartCSR = commandStreamCSR.getUsed();
@@ -872,7 +876,10 @@ inline void CommandStreamReceiverHw<GfxFamily>::programVFEState(LinearStream &cs
if (dispatchFlags.additionalKernelExecInfo != AdditionalKernelExecInfo::NotApplicable) {
lastAdditionalKernelExecInfo = dispatchFlags.additionalKernelExecInfo;
}
auto commandOffset = PreambleHelper<GfxFamily>::programVFEState(&csr, peekHwInfo(), requiredScratchSize, getScratchPatchAddress(), maxFrontEndThreads, getOsContext().getEngineType(), lastAdditionalKernelExecInfo);
if (dispatchFlags.kernelExecutionType != KernelExecutionType::NotApplicable) {
lastKernelExecutionType = dispatchFlags.kernelExecutionType;
}
auto commandOffset = PreambleHelper<GfxFamily>::programVFEState(&csr, peekHwInfo(), requiredScratchSize, getScratchPatchAddress(), maxFrontEndThreads, getOsContext().getEngineType(), lastAdditionalKernelExecInfo, lastKernelExecutionType);
if (DebugManager.flags.AddPatchInfoCommentsForAUBDump.get()) {
flatBatchBufferHelper->collectScratchSpacePatchInfo(getScratchPatchAddress(), commandOffset, csr);
}

View File

@@ -14,6 +14,8 @@
#include "shared/source/helpers/pipeline_select_args.h"
#include "shared/source/kernel/grf_config.h"
#include "opencl/source/kernel/kernel_execution_type.h"
#include "csr_properties_flags.h"
#include <limits>
@@ -50,7 +52,7 @@ struct DispatchFlags {
DispatchFlags() = delete;
DispatchFlags(CsrDependencies csrDependencies, TimestampPacketContainer *barrierTimestampPacketNodes, PipelineSelectArgs pipelineSelectArgs,
FlushStampTrackingObj *flushStampReference, QueueThrottle throttle, PreemptionMode preemptionMode, uint32_t numGrfRequired,
uint32_t l3CacheSettings, uint32_t threadArbitrationPolicy, uint32_t additionalKernelExecInfo, uint64_t sliceCount, bool blocking, bool dcFlush,
uint32_t l3CacheSettings, uint32_t threadArbitrationPolicy, uint32_t additionalKernelExecInfo, KernelExecutionType kernelExecutionType, uint64_t sliceCount, bool blocking, bool dcFlush,
bool useSLM, bool guardCommandBufferWithPipeControl, bool gsba32BitRequired,
bool requiresCoherency, bool lowPriority, bool implicitFlush,
bool outOfOrderExecutionAllowed, bool epilogueRequired, bool usePerDSSbackedBuffer) : csrDependencies(csrDependencies),
@@ -63,6 +65,7 @@ struct DispatchFlags {
l3CacheSettings(l3CacheSettings),
threadArbitrationPolicy(threadArbitrationPolicy),
additionalKernelExecInfo(additionalKernelExecInfo),
kernelExecutionType(kernelExecutionType),
sliceCount(sliceCount),
blocking(blocking),
dcFlush(dcFlush),
@@ -85,6 +88,7 @@ struct DispatchFlags {
uint32_t l3CacheSettings = L3CachingSettings::l3CacheOn;
uint32_t threadArbitrationPolicy = ThreadArbitrationPolicy::NotPresent;
uint32_t additionalKernelExecInfo = AdditionalKernelExecInfo::NotApplicable;
KernelExecutionType kernelExecutionType = KernelExecutionType::NotApplicable;
uint64_t sliceCount = QueueSliceCount::defaultSliceCount;
uint64_t engineHints = 0;
bool blocking = false;

View File

@@ -8,6 +8,8 @@
#pragma once
#include "shared/source/helpers/pipeline_select_helper.h"
#include "opencl/source/kernel/kernel_execution_type.h"
#include "engine_node.h"
#include "igfxfmid.h"
@@ -42,7 +44,8 @@ struct PreambleHelper {
uint64_t scratchAddress,
uint32_t maxFrontEndThreads,
aub_stream::EngineType engineType,
uint32_t additionalKernelExecInfo);
uint32_t additionalKernelExecInfo,
KernelExecutionType kernelExecutionType);
static void programAdditionalFieldsInVfeState(VFE_STATE_TYPE *mediaVfeState, const HardwareInfo &hwInfo);
static void programPreamble(LinearStream *pCommandStream, Device &device, uint32_t l3Config,
uint32_t requiredThreadArbitrationPolicy, GraphicsAllocation *preemptionCsr, GraphicsAllocation *perDssBackedBuffer);

View File

@@ -9,6 +9,8 @@
#include "shared/source/helpers/hw_helper.h"
#include "shared/source/helpers/preamble_base.inl"
#include "opencl/source/kernel/kernel_execution_type.h"
namespace NEO {
template <typename GfxFamily>
@@ -31,7 +33,8 @@ uint64_t PreambleHelper<GfxFamily>::programVFEState(LinearStream *pCommandStream
uint64_t scratchAddress,
uint32_t maxFrontEndThreads,
aub_stream::EngineType engineType,
uint32_t additionalExecInfo) {
uint32_t additionalExecInfo,
KernelExecutionType kernelExecutionType) {
using MEDIA_VFE_STATE = typename GfxFamily::MEDIA_VFE_STATE;
addPipeControlBeforeVfeCmd(pCommandStream, &hwInfo, engineType);

View File

@@ -58,7 +58,9 @@ GEN11TEST_F(Gen11PreambleVfeState, GivenWaOffWhenProgrammingVfeStateThenProgramm
typedef typename ICLFamily::PIPE_CONTROL PIPE_CONTROL;
testWaTable->waSendMIFLUSHBeforeVFE = 0;
LinearStream &cs = linearStream;
PreambleHelper<ICLFamily>::programVFEState(&linearStream, pPlatform->getClDevice(0)->getHardwareInfo(), 0u, 0, 168u, aub_stream::EngineType::ENGINE_RCS, AdditionalKernelExecInfo::NotApplicable);
PreambleHelper<ICLFamily>::programVFEState(&linearStream, pPlatform->getClDevice(0)->getHardwareInfo(), 0u, 0, 168u,
aub_stream::EngineType::ENGINE_RCS, AdditionalKernelExecInfo::NotApplicable,
KernelExecutionType::NotApplicable);
parseCommands<ICLFamily>(cs);
@@ -76,7 +78,9 @@ GEN11TEST_F(Gen11PreambleVfeState, GivenWaOnWhenProgrammingVfeStateThenProgrammi
typedef typename ICLFamily::PIPE_CONTROL PIPE_CONTROL;
testWaTable->waSendMIFLUSHBeforeVFE = 1;
LinearStream &cs = linearStream;
PreambleHelper<ICLFamily>::programVFEState(&linearStream, pPlatform->getClDevice(0)->getHardwareInfo(), 0u, 0, 168u, aub_stream::EngineType::ENGINE_RCS, AdditionalKernelExecInfo::NotApplicable);
PreambleHelper<ICLFamily>::programVFEState(&linearStream, pPlatform->getClDevice(0)->getHardwareInfo(), 0u, 0, 168u,
aub_stream::EngineType::ENGINE_RCS, AdditionalKernelExecInfo::NotApplicable,
KernelExecutionType::NotApplicable);
parseCommands<ICLFamily>(cs);

View File

@@ -62,7 +62,9 @@ HWTEST2_F(Gen12LpPreambleVfeState, GivenWaOffWhenProgrammingVfeStateThenProgramm
typedef typename FamilyType::PIPE_CONTROL PIPE_CONTROL;
testWaTable->waSendMIFLUSHBeforeVFE = 0;
LinearStream &cs = linearStream;
PreambleHelper<FamilyType>::programVFEState(&linearStream, pPlatform->getClDevice(0)->getHardwareInfo(), 0u, 0, 672u, aub_stream::EngineType::ENGINE_RCS, AdditionalKernelExecInfo::NotApplicable);
PreambleHelper<FamilyType>::programVFEState(&linearStream, pPlatform->getClDevice(0)->getHardwareInfo(), 0u, 0, 672u,
aub_stream::EngineType::ENGINE_RCS, AdditionalKernelExecInfo::NotApplicable,
KernelExecutionType::NotApplicable);
parseCommands<FamilyType>(cs);
@@ -82,7 +84,9 @@ HWTEST2_F(Gen12LpPreambleVfeState, givenCcsEngineWhenWaIsSetThenAppropriatePipeC
testWaTable->waSendMIFLUSHBeforeVFE = 1;
LinearStream &cs = linearStream;
PreambleHelper<FamilyType>::programVFEState(&linearStream, pPlatform->getClDevice(0)->getHardwareInfo(), 0u, 0, 672u, aub_stream::EngineType::ENGINE_CCS, AdditionalKernelExecInfo::NotApplicable);
PreambleHelper<FamilyType>::programVFEState(&linearStream, pPlatform->getClDevice(0)->getHardwareInfo(), 0u, 0, 672u,
aub_stream::EngineType::ENGINE_CCS, AdditionalKernelExecInfo::NotApplicable,
KernelExecutionType::NotApplicable);
parseCommands<FamilyType>(cs);
@@ -101,7 +105,9 @@ HWTEST2_F(Gen12LpPreambleVfeState, givenRcsEngineWhenWaIsSetThenAppropriatePipeC
testWaTable->waSendMIFLUSHBeforeVFE = 1;
LinearStream &cs = linearStream;
PreambleHelper<FamilyType>::programVFEState(&linearStream, pPlatform->getClDevice(0)->getHardwareInfo(), 0u, 0, 672u, aub_stream::EngineType::ENGINE_RCS, AdditionalKernelExecInfo::NotApplicable);
PreambleHelper<FamilyType>::programVFEState(&linearStream, pPlatform->getClDevice(0)->getHardwareInfo(), 0u, 0, 672u,
aub_stream::EngineType::ENGINE_RCS, AdditionalKernelExecInfo::NotApplicable,
KernelExecutionType::NotApplicable);
parseCommands<FamilyType>(cs);

View File

@@ -95,7 +95,8 @@ BDWTEST_F(PreambleVfeState, WhenProgrammingVfeStateThenProgrammingIsCorrect) {
typedef BDWFamily::PIPE_CONTROL PIPE_CONTROL;
LinearStream &cs = linearStream;
PreambleHelper<BDWFamily>::programVFEState(&linearStream, *defaultHwInfo, 0u, 0, 168u, aub_stream::EngineType::ENGINE_RCS, AdditionalKernelExecInfo::NotApplicable);
PreambleHelper<BDWFamily>::programVFEState(&linearStream, *defaultHwInfo, 0u, 0, 168u, aub_stream::EngineType::ENGINE_RCS,
AdditionalKernelExecInfo::NotApplicable, KernelExecutionType::NotApplicable);
parseCommands<BDWFamily>(cs);

View File

@@ -107,7 +107,9 @@ GEN9TEST_F(PreambleVfeState, GivenWaOffWhenProgrammingVfeStateThenProgrammingIsC
typedef typename FamilyType::PIPE_CONTROL PIPE_CONTROL;
testWaTable->waSendMIFLUSHBeforeVFE = 0;
LinearStream &cs = linearStream;
PreambleHelper<FamilyType>::programVFEState(&linearStream, pPlatform->getClDevice(0)->getHardwareInfo(), 0u, 0, 168u, aub_stream::EngineType::ENGINE_RCS, AdditionalKernelExecInfo::NotApplicable);
PreambleHelper<FamilyType>::programVFEState(&linearStream, pPlatform->getClDevice(0)->getHardwareInfo(), 0u, 0, 168u,
aub_stream::EngineType::ENGINE_RCS, AdditionalKernelExecInfo::NotApplicable,
KernelExecutionType::NotApplicable);
parseCommands<FamilyType>(cs);
@@ -125,7 +127,9 @@ GEN9TEST_F(PreambleVfeState, GivenWaOnWhenProgrammingVfeStateThenProgrammingIsCo
typedef typename FamilyType::PIPE_CONTROL PIPE_CONTROL;
testWaTable->waSendMIFLUSHBeforeVFE = 1;
LinearStream &cs = linearStream;
PreambleHelper<FamilyType>::programVFEState(&linearStream, pPlatform->getClDevice(0)->getHardwareInfo(), 0u, 0, 168u, aub_stream::EngineType::ENGINE_RCS, AdditionalKernelExecInfo::NotApplicable);
PreambleHelper<FamilyType>::programVFEState(&linearStream, pPlatform->getClDevice(0)->getHardwareInfo(), 0u, 0, 168u,
aub_stream::EngineType::ENGINE_RCS, AdditionalKernelExecInfo::NotApplicable,
KernelExecutionType::NotApplicable);
parseCommands<FamilyType>(cs);

View File

@@ -25,6 +25,7 @@ struct DispatchFlagsHelper {
L3CachingSettings::l3CacheOn, //l3CacheSettings
ThreadArbitrationPolicy::NotPresent, //threadArbitrationPolicy
AdditionalKernelExecInfo::NotApplicable, //additionalKernelExecInfo
KernelExecutionType::NotApplicable, //kernelExecutionType
QueueSliceCount::defaultSliceCount, //sliceCount
false, //blocking
false, //dcFlush

View File

@@ -222,7 +222,9 @@ HWCMDTEST_F(IGFX_GEN8_CORE, PreambleTest, givenPreambleHelperWhenMediaVfeStateIs
FlatBatchBufferHelperHw<FamilyType> helper(*mockDevice->getExecutionEnvironment());
uint64_t addressToPatch = 0xC0DEC0DE;
auto offset = PreambleHelper<FamilyType>::programVFEState(&preambleStream, mockDevice->getHardwareInfo(), 1024u, addressToPatch, 10u, aub_stream::EngineType::ENGINE_RCS, AdditionalKernelExecInfo::NotApplicable);
auto offset = PreambleHelper<FamilyType>::programVFEState(&preambleStream, mockDevice->getHardwareInfo(), 1024u, addressToPatch,
10u, aub_stream::EngineType::ENGINE_RCS, AdditionalKernelExecInfo::NotApplicable,
KernelExecutionType::NotApplicable);
EXPECT_NE(0u, offset);
}