Introduce a concept of epilogue.

Change-Id: Ie04607625ed0aca940bef2fa04890f4232a1517b
Signed-off-by: Mrozek, Michal <michal.mrozek@intel.com>
This commit is contained in:
Mrozek, Michal 2019-08-07 19:33:40 +02:00 committed by sys_ocldev
parent 54d23b925d
commit 753e64bc6d
4 changed files with 78 additions and 0 deletions

View File

@ -47,6 +47,7 @@ class CommandStreamReceiverHw : public CommandStreamReceiver {
size_t getRequiredCmdStreamSizeAligned(const DispatchFlags &dispatchFlags, Device &device);
size_t getRequiredCmdSizeForPreamble(Device &device) const;
size_t getCmdSizeForPreemption(const DispatchFlags &dispatchFlags) const;
size_t getCmdSizeForEpilogue(const DispatchFlags &dispatchFlags) const;
size_t getCmdSizeForL3Config() const;
size_t getCmdSizeForPipelineSelect() const;
size_t getCmdSizeForComputeMode();
@ -77,6 +78,7 @@ class CommandStreamReceiverHw : public CommandStreamReceiver {
void programL3(LinearStream &csr, DispatchFlags &dispatchFlags, uint32_t &newL3Config);
void programPreamble(LinearStream &csr, Device &device, DispatchFlags &dispatchFlags, uint32_t &newL3Config);
void programPipelineSelect(LinearStream &csr, DispatchFlags &dispatchFlags);
void programEpilogue(LinearStream &csr, void **batchBufferEndLocation, DispatchFlags &dispatchFlags);
void programMediaSampler(LinearStream &csr, DispatchFlags &dispatchFlags);
void programStateSip(LinearStream &cmdStream, Device &device);
void programVFEState(LinearStream &csr, DispatchFlags &dispatchFlags, uint32_t maxFrontEndThreads);

View File

@ -415,7 +415,11 @@ CompletionStamp CommandStreamReceiverHw<GfxFamily>::flushTask(
this->makeResident(*commandStreamAllocation);
this->alignToCacheLine(commandStreamCSR);
submitCommandStreamFromCsr = true;
} else if (dispatchFlags.epilogueRequired) {
this->makeResident(*commandStreamCSR.getGraphicsAllocation());
}
this->programEpilogue(commandStreamCSR, &bbEndLocation, dispatchFlags);
} else if (submitCSR) {
this->addBatchBufferEnd(commandStreamCSR, &bbEndLocation);
this->emitNoop(commandStreamCSR, bbEndPaddingSize);
@ -589,6 +593,7 @@ size_t CommandStreamReceiverHw<GfxFamily>::getRequiredCmdStreamSize(const Dispat
size += getCmdSizeForMediaSampler(dispatchFlags.mediaSamplerRequired);
size += getCmdSizeForPipelineSelect();
size += getCmdSizeForPreemption(dispatchFlags);
size += getCmdSizeForEpilogue(dispatchFlags);
if (device.getWaTable()->waSamplerCacheFlushBetweenRedescribedSurfaceReads) {
if (this->samplerCacheFlushRequired != SamplerCacheFlushState::samplerCacheFlushNotRequired) {
@ -779,4 +784,24 @@ void CommandStreamReceiverHw<GfxFamily>::blitBuffer(const BlitProperties &blitPr
}
}
template <typename GfxFamily>
inline void CommandStreamReceiverHw<GfxFamily>::programEpilogue(LinearStream &csr, void **batchBufferEndLocation, DispatchFlags &dispatchFlags) {
if (dispatchFlags.epilogueRequired) {
auto currentOffset = ptrDiff(csr.getSpace(0u), csr.getCpuBase());
auto gpuAddress = ptrOffset(csr.getGraphicsAllocation()->getGpuAddress(), currentOffset);
addBatchBufferStart(reinterpret_cast<typename GfxFamily::MI_BATCH_BUFFER_START *>(*batchBufferEndLocation), gpuAddress, false);
this->addBatchBufferEnd(csr, batchBufferEndLocation);
this->alignToCacheLine(csr);
}
}
template <typename GfxFamily>
inline size_t CommandStreamReceiverHw<GfxFamily>::getCmdSizeForEpilogue(const DispatchFlags &dispatchFlags) const {
if (dispatchFlags.epilogueRequired) {
return alignUp(sizeof(typename GfxFamily::MI_BATCH_BUFFER_END), MemoryConstants::cacheLineSize);
}
return 0u;
}
} // namespace NEO

View File

@ -49,6 +49,7 @@ struct DispatchFlags {
bool outOfOrderExecutionAllowed = false;
bool specialPipelineSelectMode = false;
bool multiEngineQueue = false;
bool epilogueRequired = false;
};
struct CsrSizeRequestFlags {

View File

@ -16,6 +16,7 @@
#include "unit_tests/mocks/mock_command_queue.h"
#include "unit_tests/mocks/mock_context.h"
#include "unit_tests/mocks/mock_csr.h"
#include "unit_tests/mocks/mock_device.h"
#include "unit_tests/mocks/mock_event.h"
#include "unit_tests/mocks/mock_kernel.h"
#include "unit_tests/mocks/mock_program.h"
@ -1413,6 +1414,55 @@ HWTEST_F(CommandStreamReceiverFlushTaskTests, givenDispatchFlagsWithThrottleSetT
EXPECT_EQ(cmdBuffer->batchBuffer.throttle, QueueThrottle::HIGH);
}
HWTEST_F(CommandStreamReceiverFlushTaskTests, givenEpilogueRequiredFlagWhenTaskIsSubmittedDirectlyThenItPointsBackToCsr) {
configureCSRtoNonDirtyState<FamilyType>();
auto &commandStreamReceiver = this->pDevice->getUltCommandStreamReceiver<FamilyType>();
DispatchFlags dispatchFlags;
EXPECT_EQ(0u, commandStreamReceiver.getCmdSizeForEpilogue(dispatchFlags));
dispatchFlags.epilogueRequired = true;
dispatchFlags.preemptionMode = PreemptionHelper::getDefaultPreemptionMode(pDevice->getHardwareInfo());
EXPECT_EQ(MemoryConstants::cacheLineSize, commandStreamReceiver.getCmdSizeForEpilogue(dispatchFlags));
auto data = commandStream.getSpace(MemoryConstants::cacheLineSize);
memset(data, 0, MemoryConstants::cacheLineSize);
commandStreamReceiver.storeMakeResidentAllocations = true;
commandStreamReceiver.flushTask(commandStream,
0,
dsh,
ioh,
ssh,
taskLevel,
dispatchFlags,
*pDevice);
auto &commandStreamReceiverStream = commandStreamReceiver.getCS(0u);
EXPECT_EQ(MemoryConstants::cacheLineSize * 2, commandStream.getUsed());
EXPECT_EQ(MemoryConstants::cacheLineSize, commandStreamReceiverStream.getUsed());
parseCommands<FamilyType>(commandStream, 0);
auto itBBend = find<typename FamilyType::MI_BATCH_BUFFER_END *>(cmdList.begin(), cmdList.end());
EXPECT_EQ(itBBend, cmdList.end());
auto itBatchBufferStart = find<typename FamilyType::MI_BATCH_BUFFER_START *>(cmdList.begin(), cmdList.end());
EXPECT_NE(itBatchBufferStart, cmdList.end());
auto batchBufferStart = genCmdCast<typename FamilyType::MI_BATCH_BUFFER_START *>(*itBatchBufferStart);
EXPECT_EQ(batchBufferStart->getBatchBufferStartAddressGraphicsaddress472(), commandStreamReceiverStream.getGraphicsAllocation()->getGpuAddress());
parseCommands<FamilyType>(commandStreamReceiverStream, 0);
itBBend = find<typename FamilyType::MI_BATCH_BUFFER_END *>(cmdList.begin(), cmdList.end());
void *bbEndAddress = *itBBend;
EXPECT_EQ(commandStreamReceiverStream.getCpuBase(), bbEndAddress);
EXPECT_TRUE(commandStreamReceiver.isMadeResident(commandStreamReceiverStream.getGraphicsAllocation()));
}
template <typename GfxFamily>
class UltCommandStreamReceiverForDispatchFlags : public UltCommandStreamReceiver<GfxFamily> {