feature: introduce states programming at driver init heapless ocl

Related-To: NEO-7824
Signed-off-by: Kamil Kopryk <kamil.kopryk@intel.com>
This commit is contained in:
Kamil Kopryk
2024-03-07 16:50:57 +00:00
committed by Compute-Runtime-Automation
parent ea69b156d2
commit 168445784e
32 changed files with 383 additions and 96 deletions

View File

@@ -55,9 +55,11 @@ class UltCommandStreamReceiver : public CommandStreamReceiverHw<GfxFamily>, publ
using BaseClass::dcFlushSupport;
using BaseClass::directSubmission;
using BaseClass::dshState;
using BaseClass::getCmdSizeForHeaplessPrologue;
using BaseClass::getCmdSizeForPrologue;
using BaseClass::getScratchPatchAddress;
using BaseClass::getScratchSpaceController;
using BaseClass::handleAllocationsResidencyForHeaplessProlog;
using BaseClass::handleFrontEndStateTransition;
using BaseClass::handlePipelineSelectStateTransition;
using BaseClass::handleStateBaseAddressStateTransition;
@@ -68,6 +70,7 @@ class UltCommandStreamReceiver : public CommandStreamReceiverHw<GfxFamily>, publ
using BaseClass::isDirectSubmissionEnabled;
using BaseClass::isPerDssBackedBufferSent;
using BaseClass::makeResident;
using BaseClass::pageTableManagerInitialized;
using BaseClass::perDssBackedBuffer;
using BaseClass::postInitFlagsSetup;
using BaseClass::programActivePartitionConfig;
@@ -84,6 +87,7 @@ class UltCommandStreamReceiver : public CommandStreamReceiverHw<GfxFamily>, publ
using BaseClass::sshState;
using BaseClass::staticWorkPartitioningEnabled;
using BaseClass::streamProperties;
using BaseClass::wasSubmittedToSingleSubdevice;
using BaseClass::CommandStreamReceiver::activePartitions;
using BaseClass::CommandStreamReceiver::activePartitionsConfig;
@@ -97,6 +101,7 @@ class UltCommandStreamReceiver : public CommandStreamReceiverHw<GfxFamily>, publ
using BaseClass::CommandStreamReceiver::commandStream;
using BaseClass::CommandStreamReceiver::debugConfirmationFunction;
using BaseClass::CommandStreamReceiver::debugPauseStateAddress;
using BaseClass::CommandStreamReceiver::debugSurface;
using BaseClass::CommandStreamReceiver::deviceBitfield;
using BaseClass::CommandStreamReceiver::dispatchMode;
using BaseClass::CommandStreamReceiver::doubleSbaWa;
@@ -134,6 +139,7 @@ class UltCommandStreamReceiver : public CommandStreamReceiverHw<GfxFamily>, publ
using BaseClass::CommandStreamReceiver::ownershipMutex;
using BaseClass::CommandStreamReceiver::perfCounterAllocator;
using BaseClass::CommandStreamReceiver::pipelineSupportFlags;
using BaseClass::CommandStreamReceiver::preemptionAllocation;
using BaseClass::CommandStreamReceiver::profilingTimeStampAllocator;
using BaseClass::CommandStreamReceiver::requestedPreallocationsAmount;
using BaseClass::CommandStreamReceiver::requiredScratchSlot0Size;
@@ -145,6 +151,7 @@ class UltCommandStreamReceiver : public CommandStreamReceiverHw<GfxFamily>, publ
using BaseClass::CommandStreamReceiver::stateComputeModeDirty;
using BaseClass::CommandStreamReceiver::submissionAggregator;
using BaseClass::CommandStreamReceiver::tagAddress;
using BaseClass::CommandStreamReceiver::tagAllocation;
using BaseClass::CommandStreamReceiver::taskCount;
using BaseClass::CommandStreamReceiver::taskLevel;
using BaseClass::CommandStreamReceiver::timestampPacketAllocator;

View File

@@ -1,5 +1,5 @@
/*
* Copyright (C) 2021-2023 Intel Corporation
* Copyright (C) 2021-2024 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@@ -28,6 +28,20 @@ CompletionStamp MockCommandStreamReceiver::flushTask(
return stamp;
}
CompletionStamp MockCommandStreamReceiver::flushTaskStateless(
LinearStream &commandStream,
size_t commandStreamStart,
const IndirectHeap *dsh,
const IndirectHeap *ioh,
const IndirectHeap *ssh,
TaskCountType taskLevel,
DispatchFlags &dispatchFlags,
Device &device) {
++taskCount;
CompletionStamp stamp = {taskCount, taskLevel, flushStamp->peekStamp()};
return stamp;
}
CompletionStamp MockCommandStreamReceiver::flushBcsTask(LinearStream &commandStreamTask, size_t commandStreamTaskStart,
const DispatchBcsFlags &dispatchBcsFlags, const HardwareInfo &hwInfo) {
++taskCount;

View File

@@ -119,6 +119,16 @@ class MockCommandStreamReceiver : public CommandStreamReceiver {
DispatchFlags &dispatchFlags,
Device &device) override;
CompletionStamp flushTaskStateless(
LinearStream &commandStream,
size_t commandStreamStart,
const IndirectHeap *dsh,
const IndirectHeap *ioh,
const IndirectHeap *ssh,
TaskCountType taskLevel,
DispatchFlags &dispatchFlags,
Device &device) override;
CompletionStamp flushImmediateTask(
LinearStream &immediateCommandStream,
size_t immediateCommandStreamStart,
@@ -207,7 +217,7 @@ class MockCommandStreamReceiver : public CommandStreamReceiver {
}
return isLocked;
}
SubmissionStatus initializeDeviceWithFirstSubmission() override { return SubmissionStatus::success; }
SubmissionStatus initializeDeviceWithFirstSubmission(Device &device) override { return SubmissionStatus::success; }
static constexpr size_t tagSize = 256;
static volatile TagAddressType mockTagAddress[tagSize];

View File

@@ -3028,10 +3028,10 @@ HWTEST_F(CommandStreamReceiverHwTest, givenOutOfMemoryFailureOnFlushWhenInitiali
commandStreamReceiver.flushReturnValue = SubmissionStatus::outOfMemory;
EXPECT_EQ(SubmissionStatus::outOfMemory, commandStreamReceiver.initializeDeviceWithFirstSubmission());
EXPECT_EQ(SubmissionStatus::outOfMemory, commandStreamReceiver.initializeDeviceWithFirstSubmission(*pDevice));
commandStreamReceiver.flushReturnValue = SubmissionStatus::outOfHostMemory;
EXPECT_EQ(SubmissionStatus::outOfHostMemory, commandStreamReceiver.initializeDeviceWithFirstSubmission());
EXPECT_EQ(SubmissionStatus::outOfHostMemory, commandStreamReceiver.initializeDeviceWithFirstSubmission(*pDevice));
}
HWTEST_F(CommandStreamReceiverHwTest, whenFlushTagUpdateThenSetStallingCmdsFlag) {
@@ -5022,3 +5022,53 @@ HWTEST2_F(CommandStreamReceiverHwTest, givenImplicitScalingEnabledWhenProgrammin
EXPECT_EQ(estimatedCmdSize, offset);
}
HWTEST_F(CommandStreamReceiverHwTest, givenForcePipeControlPriorToWalkerWhenAddPipeControlFlushTaskIfNeededThenStallingPcIsProgrammed) {
using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL;
DebugManagerStateRestore dbgRestorer;
debugManager.flags.ForcePipeControlPriorToWalker.set(1);
auto &csr = pDevice->getUltCommandStreamReceiver<FamilyType>();
csr.addPipeControlFlushTaskIfNeeded(commandStream, 0);
GenCmdList commands;
CmdParse<FamilyType>::parseCommandBuffer(commands,
commandStream.getCpuBase(),
commandStream.getUsed());
auto itorCmd = find<PIPE_CONTROL *>(commands.begin(), commands.end());
ASSERT_NE(commands.end(), itorCmd);
auto pc = genCmdCast<PIPE_CONTROL *>(*itorCmd);
EXPECT_TRUE(pc->getCommandStreamerStallEnable());
}
HWTEST_F(CommandStreamReceiverTest, givenBcsCsrWhenInitializeDeviceWithFirstSubmissionIsCalledThenSuccessIsReturned) {
MockOsContext mockOsContext(0, EngineDescriptorHelper::getDefaultDescriptor({aub_stream::EngineType::ENGINE_BCS, EngineUsage::regular}));
MockCsrHw<FamilyType> commandStreamReceiver(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield());
commandStreamReceiver.setupContext(mockOsContext);
commandStreamReceiver.initializeTagAllocation();
EXPECT_EQ(SubmissionStatus::success, commandStreamReceiver.initializeDeviceWithFirstSubmission(*pDevice));
}
using CommandStreamReceiverHwHeaplessTest = Test<DeviceFixture>;
HWTEST_F(CommandStreamReceiverHwHeaplessTest, whenHeaplessCommandStreamReceiverFunctionsAreCalledThenExceptionIsThrown) {
std::unique_ptr<UltCommandStreamReceiver<FamilyType>> csr = std::make_unique<UltCommandStreamReceiver<FamilyType>>(*pDevice->executionEnvironment, rootDeviceIndex, pDevice->getDeviceBitfield());
LinearStream commandStream(0, 0);
EXPECT_ANY_THROW(csr->flushTaskStateless(commandStream, 0, nullptr, nullptr, nullptr, 0, csr->recordedDispatchFlags, *pDevice));
EXPECT_ANY_THROW(csr->programHeaplessProlog(*pDevice));
EXPECT_ANY_THROW(csr->programStateBaseAddressHeapless(*pDevice, commandStream));
EXPECT_ANY_THROW(csr->programComputeModeHeapless(*pDevice, commandStream));
EXPECT_ANY_THROW(csr->getCmdSizeForHeaplessPrologue(*pDevice));
EXPECT_ANY_THROW(csr->handleAllocationsResidencyForHeaplessProlog(commandStream, *pDevice));
EXPECT_ANY_THROW(csr->programHeaplessStateProlog(*pDevice, commandStream));
EXPECT_ANY_THROW(csr->handleAllocationsResidencyForflushTaskStateless(nullptr, nullptr, nullptr));
EXPECT_ANY_THROW(csr->getRequiredCmdStreamHeaplessSize(csr->recordedDispatchFlags, *pDevice));
EXPECT_ANY_THROW(csr->getRequiredCmdStreamHeaplessSizeAligned(csr->recordedDispatchFlags, *pDevice));
}

View File

@@ -1097,8 +1097,6 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, DeviceTests, givenCCSEngineAndContextGroupSizeEnabl
UltDeviceFactory deviceFactory{1, 0, executionEnvironment};
MemoryManager::maxOsContextCount++;
deviceFactory.rootDevices[0]->createEngine(0, {aub_stream::EngineType::ENGINE_CCS, EngineUsage::regular});
auto defaultEngine = deviceFactory.rootDevices[0]->getDefaultEngine();