performance: add one time context init ray tracing to immediate flush task

Related-To: NEO-7808

Signed-off-by: Zbigniew Zdanowicz <zbigniew.zdanowicz@intel.com>
This commit is contained in:
Zbigniew Zdanowicz
2023-06-20 14:57:40 +00:00
committed by Compute-Runtime-Automation
parent a61b39c47b
commit c37dbc4cf0
3 changed files with 62 additions and 9 deletions

View File

@@ -249,10 +249,10 @@ class CommandStreamReceiverHw : public CommandStreamReceiver {
inline void handleImmediateFlushStateBaseAddressState(ImmediateDispatchFlags &dispatchFlags, ImmediateFlushData &flushData, Device &device);
inline void dispatchImmediateFlushStateBaseAddressCommand(ImmediateFlushData &flushData, LinearStream &csrStream, Device &device);
inline void handleImmediateFlushOneTimeContextInitState(ImmediateDispatchFlags &dispatchFlags, ImmediateFlushData &flushData);
inline void dispatchImmediateFlushOneTimeContextInitCommand(ImmediateFlushData &flushData, LinearStream &csrStream);
inline void handleImmediateFlushOneTimeContextInitState(ImmediateDispatchFlags &dispatchFlags, ImmediateFlushData &flushData, Device &device);
inline void dispatchImmediateFlushOneTimeContextInitCommand(ImmediateFlushData &flushData, LinearStream &csrStream, Device &device);
inline void handleImmediateFlushAllocationsResidency();
inline void handleImmediateFlushAllocationsResidency(Device &device);
HeapDirtyState dshState;
HeapDirtyState iohState;

View File

@@ -303,7 +303,7 @@ CompletionStamp CommandStreamReceiverHw<GfxFamily>::flushImmediateTask(
handleImmediateFlushFrontEndState(dispatchFlags, flushData);
handleImmediateFlushStateComputeModeState(dispatchFlags, flushData);
handleImmediateFlushStateBaseAddressState(dispatchFlags, flushData, device);
handleImmediateFlushOneTimeContextInitState(dispatchFlags, flushData);
handleImmediateFlushOneTimeContextInitState(dispatchFlags, flushData, device);
auto &csrCommandStream = getCS(flushData.estimatedSize);
@@ -311,9 +311,9 @@ CompletionStamp CommandStreamReceiverHw<GfxFamily>::flushImmediateTask(
dispatchImmediateFlushFrontEndCommand(scratchAddress, flushData, device, csrCommandStream);
dispatchImmediateFlushStateComputeModeCommand(flushData, csrCommandStream);
dispatchImmediateFlushStateBaseAddressCommand(flushData, csrCommandStream, device);
dispatchImmediateFlushOneTimeContextInitCommand(flushData, csrCommandStream);
dispatchImmediateFlushOneTimeContextInitCommand(flushData, csrCommandStream, device);
handleImmediateFlushAllocationsResidency();
handleImmediateFlushAllocationsResidency(device);
CompletionStamp completionStamp = {
this->taskCount,
@@ -2002,7 +2002,7 @@ void CommandStreamReceiverHw<GfxFamily>::dispatchImmediateFlushStateBaseAddressC
}
template <typename GfxFamily>
void CommandStreamReceiverHw<GfxFamily>::handleImmediateFlushOneTimeContextInitState(ImmediateDispatchFlags &dispatchFlags, ImmediateFlushData &flushData) {
void CommandStreamReceiverHw<GfxFamily>::handleImmediateFlushOneTimeContextInitState(ImmediateDispatchFlags &dispatchFlags, ImmediateFlushData &flushData, Device &device) {
size_t size = 0;
size = getCmdSizeForPrologue();
@@ -2013,21 +2013,30 @@ void CommandStreamReceiverHw<GfxFamily>::handleImmediateFlushOneTimeContextInitS
flushData.contextOneTimeInit = true;
flushData.estimatedSize += this->getCmdSizeForActivePartitionConfig();
}
if (this->isRayTracingStateProgramingNeeded(device)) {
flushData.contextOneTimeInit = true;
flushData.estimatedSize += this->getCmdSizeForPerDssBackedBuffer(peekHwInfo());
}
}
template <typename GfxFamily>
void CommandStreamReceiverHw<GfxFamily>::dispatchImmediateFlushOneTimeContextInitCommand(ImmediateFlushData &flushData, LinearStream &csrStream) {
void CommandStreamReceiverHw<GfxFamily>::dispatchImmediateFlushOneTimeContextInitCommand(ImmediateFlushData &flushData, LinearStream &csrStream, Device &device) {
if (flushData.contextOneTimeInit) {
programEnginePrologue(csrStream);
if (this->isProgramActivePartitionConfigRequired()) {
this->programActivePartitionConfig(csrStream);
}
if (this->isRayTracingStateProgramingNeeded(device)) {
this->dispatchRayTracingStateCommand(csrStream, device);
}
}
}
template <typename GfxFamily>
void CommandStreamReceiverHw<GfxFamily>::handleImmediateFlushAllocationsResidency() {
void CommandStreamReceiverHw<GfxFamily>::handleImmediateFlushAllocationsResidency(Device &device) {
if (globalFenceAllocation) {
makeResident(*globalFenceAllocation);
}
@@ -2035,6 +2044,10 @@ void CommandStreamReceiverHw<GfxFamily>::handleImmediateFlushAllocationsResidenc
if (workPartitionAllocation) {
makeResident(*workPartitionAllocation);
}
if (device.getRTMemoryBackedBuffer()) {
makeResident(*device.getRTMemoryBackedBuffer());
}
}
} // namespace NEO

View File

@@ -3741,3 +3741,43 @@ HWTEST2_F(CommandStreamReceiverHwTest,
EXPECT_TRUE(commandStreamReceiver.isMadeResident(commandStreamReceiver.getWorkPartitionAllocation()));
}
HWTEST2_F(CommandStreamReceiverHwTest,
givenImmediateFlushTaskWhenRayTracingAllocationCreatedThenOneTimeRayTracingCommandDispatched,
IsAtLeastXeHpgCore) {
using _3DSTATE_BTD = typename FamilyType::_3DSTATE_BTD;
auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver<FamilyType>();
commandStreamReceiver.storeMakeResidentAllocations = true;
commandStreamReceiver.flushImmediateTask(commandStream, commandStream.getUsed(), immediateFlushTaskFlags, *pDevice);
HardwareParse hwParserCsr;
hwParserCsr.parseCommands<FamilyType>(commandStreamReceiver.commandStream, 0);
auto btdStateCmd = hwParserCsr.getCommand<_3DSTATE_BTD>();
EXPECT_EQ(nullptr, btdStateCmd);
pDevice->initializeRayTracing(8);
size_t usedSize = commandStreamReceiver.commandStream.getUsed();
commandStreamReceiver.flushImmediateTask(commandStream, commandStream.getUsed(), immediateFlushTaskFlags, *pDevice);
hwParserCsr.tearDown();
hwParserCsr.parseCommands<FamilyType>(commandStreamReceiver.commandStream, usedSize);
btdStateCmd = hwParserCsr.getCommand<_3DSTATE_BTD>();
ASSERT_NE(nullptr, btdStateCmd);
EXPECT_TRUE(commandStreamReceiver.isMadeResident(pDevice->getRTMemoryBackedBuffer()));
usedSize = commandStreamReceiver.commandStream.getUsed();
commandStreamReceiver.flushImmediateTask(commandStream,
commandStream.getUsed(),
immediateFlushTaskFlags,
*pDevice);
hwParserCsr.tearDown();
hwParserCsr.parseCommands<FamilyType>(commandStreamReceiver.commandStream, usedSize);
btdStateCmd = hwParserCsr.getCommand<_3DSTATE_BTD>();
EXPECT_EQ(nullptr, btdStateCmd);
EXPECT_TRUE(commandStreamReceiver.isMadeResident(pDevice->getRTMemoryBackedBuffer()));
}