[perf] add state compute mode dirty flag to allow selective properties update
- full properties update is time intesive task and must be done only once - selective update can be done after initial update - dirty flag will allow to distinguish initial update is done Related-To: NEO-5055 Signed-off-by: Zbigniew Zdanowicz <zbigniew.zdanowicz@intel.com>
This commit is contained in:
parent
d93f00e075
commit
24c8f089ed
|
@ -653,9 +653,9 @@ size_t CommandQueueHw<gfxCoreFamily>::estimateLinearStreamSizeComplementary(
|
||||||
linearStreamSizeEstimate += estimatePipelineSelectCmdSize();
|
linearStreamSizeEstimate += estimatePipelineSelectCmdSize();
|
||||||
|
|
||||||
if (this->stateComputeModeTracking || this->pipelineSelectStateTracking || frontEndTrackingEnabled() || this->stateBaseAddressTracking) {
|
if (this->stateComputeModeTracking || this->pipelineSelectStateTracking || frontEndTrackingEnabled() || this->stateBaseAddressTracking) {
|
||||||
auto streamPropertiesCopy = csr->getStreamProperties();
|
auto streamPropertiesCopy = this->csr->getStreamProperties();
|
||||||
bool frontEndStateDirtyCopy = ctx.frontEndStateDirty;
|
bool frontEndStateDirtyCopy = ctx.frontEndStateDirty;
|
||||||
bool gpgpuEnabledCopy = csr->getPreambleSetFlag();
|
bool gpgpuEnabledCopy = this->csr->getPreambleSetFlag();
|
||||||
bool baseAdresStateDirtyCopy = ctx.gsbaStateDirty;
|
bool baseAdresStateDirtyCopy = ctx.gsbaStateDirty;
|
||||||
for (uint32_t i = 0; i < numCommandLists; i++) {
|
for (uint32_t i = 0; i < numCommandLists; i++) {
|
||||||
auto cmdList = CommandList::fromHandle(phCommandLists[i]);
|
auto cmdList = CommandList::fromHandle(phCommandLists[i]);
|
||||||
|
@ -1194,11 +1194,10 @@ size_t CommandQueueHw<gfxCoreFamily>::estimateScmCmdSizeForMultipleCommandLists(
|
||||||
size_t estimatedSize = 0;
|
size_t estimatedSize = 0;
|
||||||
|
|
||||||
bool isRcs = this->getCsr()->isRcs();
|
bool isRcs = this->getCsr()->isRcs();
|
||||||
size_t singleScmCmdSize = NEO::EncodeComputeMode<GfxFamily>::getCmdSizeForComputeMode(device->getNEODevice()->getRootDeviceEnvironment(), false, isRcs);
|
|
||||||
|
|
||||||
csrStateCopy.stateComputeMode.setProperties(cmdListRequired.stateComputeMode);
|
csrStateCopy.stateComputeMode.setProperties(cmdListRequired.stateComputeMode);
|
||||||
if (csrStateCopy.stateComputeMode.isDirty()) {
|
if (csrStateCopy.stateComputeMode.isDirty()) {
|
||||||
estimatedSize += singleScmCmdSize;
|
estimatedSize = NEO::EncodeComputeMode<GfxFamily>::getCmdSizeForComputeMode(device->getNEODevice()->getRootDeviceEnvironment(), false, isRcs);
|
||||||
}
|
}
|
||||||
csrStateCopy.stateComputeMode.setProperties(cmdListFinal.stateComputeMode);
|
csrStateCopy.stateComputeMode.setProperties(cmdListFinal.stateComputeMode);
|
||||||
|
|
||||||
|
@ -1227,6 +1226,7 @@ void CommandQueueHw<gfxCoreFamily>::programRequiredStateComputeModeForCommandLis
|
||||||
bool isRcs = this->getCsr()->isRcs();
|
bool isRcs = this->getCsr()->isRcs();
|
||||||
NEO::EncodeComputeMode<GfxFamily>::programComputeModeCommandWithSynchronization(commandStream, csrState.stateComputeMode, pipelineSelectArgs,
|
NEO::EncodeComputeMode<GfxFamily>::programComputeModeCommandWithSynchronization(commandStream, csrState.stateComputeMode, pipelineSelectArgs,
|
||||||
false, device->getNEODevice()->getRootDeviceEnvironment(), isRcs, this->getCsr()->getDcFlushSupport(), nullptr);
|
false, device->getNEODevice()->getRootDeviceEnvironment(), isRcs, this->getCsr()->getDcFlushSupport(), nullptr);
|
||||||
|
this->csr->setStateComputeModeDirty(false);
|
||||||
}
|
}
|
||||||
csrState.stateComputeMode.setProperties(cmdListFinal.stateComputeMode);
|
csrState.stateComputeMode.setProperties(cmdListFinal.stateComputeMode);
|
||||||
}
|
}
|
||||||
|
|
|
@ -593,6 +593,7 @@ void CmdListThreadArbitrationFixture::testBody() {
|
||||||
|
|
||||||
auto &commandListStream = *commandList->commandContainer.getCommandStream();
|
auto &commandListStream = *commandList->commandContainer.getCommandStream();
|
||||||
auto &cmdQueueStream = commandQueue->commandStream;
|
auto &cmdQueueStream = commandQueue->commandStream;
|
||||||
|
auto queueCsr = commandQueue->getCsr();
|
||||||
|
|
||||||
GenCmdList cmdList;
|
GenCmdList cmdList;
|
||||||
std::vector<GenCmdList::iterator> stateComputeModeList;
|
std::vector<GenCmdList::iterator> stateComputeModeList;
|
||||||
|
@ -622,11 +623,15 @@ void CmdListThreadArbitrationFixture::testBody() {
|
||||||
stateComputeModeList.clear();
|
stateComputeModeList.clear();
|
||||||
commandList->close();
|
commandList->close();
|
||||||
|
|
||||||
|
EXPECT_TRUE(queueCsr->getStateComputeModeDirty());
|
||||||
|
|
||||||
sizeBefore = cmdQueueStream.getUsed();
|
sizeBefore = cmdQueueStream.getUsed();
|
||||||
result = commandQueue->executeCommandLists(1, &commandListHandle, nullptr, false);
|
result = commandQueue->executeCommandLists(1, &commandListHandle, nullptr, false);
|
||||||
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
|
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
|
||||||
sizeAfter = cmdQueueStream.getUsed();
|
sizeAfter = cmdQueueStream.getUsed();
|
||||||
|
|
||||||
|
EXPECT_FALSE(queueCsr->getStateComputeModeDirty());
|
||||||
|
|
||||||
EXPECT_EQ(NEO::ThreadArbitrationPolicy::AgeBased, csrState.stateComputeMode.threadArbitrationPolicy.value);
|
EXPECT_EQ(NEO::ThreadArbitrationPolicy::AgeBased, csrState.stateComputeMode.threadArbitrationPolicy.value);
|
||||||
|
|
||||||
currentBuffer = ptrOffset(cmdQueueStream.getCpuBase(), sizeBefore);
|
currentBuffer = ptrOffset(cmdQueueStream.getCpuBase(), sizeBefore);
|
||||||
|
@ -850,6 +855,7 @@ void CmdListLargeGrfFixture::testBody() {
|
||||||
auto &cmdlistRequiredState = commandList->getRequiredStreamState();
|
auto &cmdlistRequiredState = commandList->getRequiredStreamState();
|
||||||
auto &cmdListFinalState = commandList->getFinalStreamState();
|
auto &cmdListFinalState = commandList->getFinalStreamState();
|
||||||
auto &csrState = commandQueue->csr->getStreamProperties();
|
auto &csrState = commandQueue->csr->getStreamProperties();
|
||||||
|
auto queueCsr = commandQueue->getCsr();
|
||||||
|
|
||||||
auto commandListHandle = commandList->toHandle();
|
auto commandListHandle = commandList->toHandle();
|
||||||
|
|
||||||
|
@ -884,11 +890,15 @@ void CmdListLargeGrfFixture::testBody() {
|
||||||
stateComputeModeList.clear();
|
stateComputeModeList.clear();
|
||||||
commandList->close();
|
commandList->close();
|
||||||
|
|
||||||
|
EXPECT_TRUE(queueCsr->getStateComputeModeDirty());
|
||||||
|
|
||||||
sizeBefore = cmdQueueStream.getUsed();
|
sizeBefore = cmdQueueStream.getUsed();
|
||||||
result = commandQueue->executeCommandLists(1, &commandListHandle, nullptr, false);
|
result = commandQueue->executeCommandLists(1, &commandListHandle, nullptr, false);
|
||||||
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
|
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
|
||||||
sizeAfter = cmdQueueStream.getUsed();
|
sizeAfter = cmdQueueStream.getUsed();
|
||||||
|
|
||||||
|
EXPECT_FALSE(queueCsr->getStateComputeModeDirty());
|
||||||
|
|
||||||
EXPECT_EQ(0, csrState.stateComputeMode.largeGrfMode.value);
|
EXPECT_EQ(0, csrState.stateComputeMode.largeGrfMode.value);
|
||||||
|
|
||||||
currentBuffer = ptrOffset(cmdQueueStream.getCpuBase(), sizeBefore);
|
currentBuffer = ptrOffset(cmdQueueStream.getCpuBase(), sizeBefore);
|
||||||
|
|
|
@ -143,6 +143,7 @@ struct UltCommandStreamReceiverTest
|
||||||
commandStreamReceiver.isStateSipSent = true;
|
commandStreamReceiver.isStateSipSent = true;
|
||||||
commandStreamReceiver.lastPreemptionMode = pDevice->getPreemptionMode();
|
commandStreamReceiver.lastPreemptionMode = pDevice->getPreemptionMode();
|
||||||
commandStreamReceiver.setMediaVFEStateDirty(false);
|
commandStreamReceiver.setMediaVFEStateDirty(false);
|
||||||
|
commandStreamReceiver.stateComputeModeDirty = false;
|
||||||
auto gmmHelper = pDevice->getGmmHelper();
|
auto gmmHelper = pDevice->getGmmHelper();
|
||||||
auto &gfxCoreHelper = pDevice->getGfxCoreHelper();
|
auto &gfxCoreHelper = pDevice->getGfxCoreHelper();
|
||||||
auto mocsIndex = gfxCoreHelper.getMocsIndex(*gmmHelper, true, isL1CacheEnabled);
|
auto mocsIndex = gfxCoreHelper.getMocsIndex(*gmmHelper, true, isL1CacheEnabled);
|
||||||
|
|
|
@ -534,6 +534,7 @@ void CommandStreamReceiver::initProgrammingFlags() {
|
||||||
bindingTableBaseAddressRequired = true;
|
bindingTableBaseAddressRequired = true;
|
||||||
mediaVfeStateDirty = true;
|
mediaVfeStateDirty = true;
|
||||||
lastVmeSubslicesConfig = false;
|
lastVmeSubslicesConfig = false;
|
||||||
|
stateComputeModeDirty = true;
|
||||||
|
|
||||||
lastSentL3Config = 0;
|
lastSentL3Config = 0;
|
||||||
lastMediaSamplerConfig = -1;
|
lastMediaSamplerConfig = -1;
|
||||||
|
|
|
@ -159,10 +159,13 @@ class CommandStreamReceiver {
|
||||||
void overrideDispatchPolicy(DispatchMode overrideValue) { this->dispatchMode = overrideValue; }
|
void overrideDispatchPolicy(DispatchMode overrideValue) { this->dispatchMode = overrideValue; }
|
||||||
|
|
||||||
void setMediaVFEStateDirty(bool dirty) { mediaVfeStateDirty = dirty; }
|
void setMediaVFEStateDirty(bool dirty) { mediaVfeStateDirty = dirty; }
|
||||||
bool getMediaVFEStateDirty() { return mediaVfeStateDirty; }
|
bool getMediaVFEStateDirty() const { return mediaVfeStateDirty; }
|
||||||
|
|
||||||
void setGSBAStateDirty(bool dirty) { GSBAStateDirty = dirty; }
|
void setGSBAStateDirty(bool dirty) { GSBAStateDirty = dirty; }
|
||||||
bool getGSBAStateDirty() { return GSBAStateDirty; }
|
bool getGSBAStateDirty() const { return GSBAStateDirty; }
|
||||||
|
|
||||||
|
void setStateComputeModeDirty(bool dirty) { stateComputeModeDirty = dirty; }
|
||||||
|
bool getStateComputeModeDirty() const { return stateComputeModeDirty; }
|
||||||
|
|
||||||
void setRequiredScratchSizes(uint32_t newRequiredScratchSize, uint32_t newRequiredPrivateScratchSize);
|
void setRequiredScratchSizes(uint32_t newRequiredScratchSize, uint32_t newRequiredPrivateScratchSize);
|
||||||
GraphicsAllocation *getScratchAllocation();
|
GraphicsAllocation *getScratchAllocation();
|
||||||
|
@ -507,6 +510,7 @@ class CommandStreamReceiver {
|
||||||
bool bindingTableBaseAddressRequired = false;
|
bool bindingTableBaseAddressRequired = false;
|
||||||
bool heapStorageRequiresRecyclingTag = false;
|
bool heapStorageRequiresRecyclingTag = false;
|
||||||
bool mediaVfeStateDirty = true;
|
bool mediaVfeStateDirty = true;
|
||||||
|
bool stateComputeModeDirty = true;
|
||||||
bool lastVmeSubslicesConfig = false;
|
bool lastVmeSubslicesConfig = false;
|
||||||
bool timestampPacketWriteEnabled = false;
|
bool timestampPacketWriteEnabled = false;
|
||||||
bool staticWorkPartitioningEnabled = false;
|
bool staticWorkPartitioningEnabled = false;
|
||||||
|
|
|
@ -804,6 +804,7 @@ void CommandStreamReceiverHw<GfxFamily>::programComputeMode(LinearStream &stream
|
||||||
EncodeComputeMode<GfxFamily>::programComputeModeCommandWithSynchronization(
|
EncodeComputeMode<GfxFamily>::programComputeModeCommandWithSynchronization(
|
||||||
stream, this->streamProperties.stateComputeMode, dispatchFlags.pipelineSelectArgs,
|
stream, this->streamProperties.stateComputeMode, dispatchFlags.pipelineSelectArgs,
|
||||||
hasSharedHandles(), this->peekRootDeviceEnvironment(), isRcs(), this->dcFlushSupport, logicalStateHelper.get());
|
hasSharedHandles(), this->peekRootDeviceEnvironment(), isRcs(), this->dcFlushSupport, logicalStateHelper.get());
|
||||||
|
this->setStateComputeModeDirty(false);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -124,6 +124,7 @@ class UltCommandStreamReceiver : public CommandStreamReceiverHw<GfxFamily>, publ
|
||||||
using BaseClass::CommandStreamReceiver::samplerCacheFlushRequired;
|
using BaseClass::CommandStreamReceiver::samplerCacheFlushRequired;
|
||||||
using BaseClass::CommandStreamReceiver::sbaSupportFlags;
|
using BaseClass::CommandStreamReceiver::sbaSupportFlags;
|
||||||
using BaseClass::CommandStreamReceiver::scratchSpaceController;
|
using BaseClass::CommandStreamReceiver::scratchSpaceController;
|
||||||
|
using BaseClass::CommandStreamReceiver::stateComputeModeDirty;
|
||||||
using BaseClass::CommandStreamReceiver::submissionAggregator;
|
using BaseClass::CommandStreamReceiver::submissionAggregator;
|
||||||
using BaseClass::CommandStreamReceiver::tagAddress;
|
using BaseClass::CommandStreamReceiver::tagAddress;
|
||||||
using BaseClass::CommandStreamReceiver::taskCount;
|
using BaseClass::CommandStreamReceiver::taskCount;
|
||||||
|
|
|
@ -198,6 +198,7 @@ HWTEST_F(CommandStreamReceiverTest, WhenCreatingCsrThenFlagsAreSetCorrectly) {
|
||||||
EXPECT_FALSE(csr.isPreambleSent);
|
EXPECT_FALSE(csr.isPreambleSent);
|
||||||
EXPECT_FALSE(csr.GSBAFor32BitProgrammed);
|
EXPECT_FALSE(csr.GSBAFor32BitProgrammed);
|
||||||
EXPECT_TRUE(csr.mediaVfeStateDirty);
|
EXPECT_TRUE(csr.mediaVfeStateDirty);
|
||||||
|
EXPECT_TRUE(csr.stateComputeModeDirty);
|
||||||
EXPECT_FALSE(csr.lastVmeSubslicesConfig);
|
EXPECT_FALSE(csr.lastVmeSubslicesConfig);
|
||||||
EXPECT_EQ(0u, csr.lastSentL3Config);
|
EXPECT_EQ(0u, csr.lastSentL3Config);
|
||||||
EXPECT_EQ(-1, csr.lastMediaSamplerConfig);
|
EXPECT_EQ(-1, csr.lastMediaSamplerConfig);
|
||||||
|
@ -2999,3 +3000,28 @@ HWTEST2_F(CommandStreamReceiverHwTest,
|
||||||
ASSERT_NE(nullptr, hwParserCsr.cmdStateBaseAddress);
|
ASSERT_NE(nullptr, hwParserCsr.cmdStateBaseAddress);
|
||||||
EXPECT_EQ(nullptr, hwParserCsr.cmdBindingTableBaseAddress);
|
EXPECT_EQ(nullptr, hwParserCsr.cmdBindingTableBaseAddress);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
HWTEST2_F(CommandStreamReceiverHwTest,
|
||||||
|
givenStateComputeModeDirtyWhenFlushingFirstTimeThenCleanDirtyFlagToDispatchStateComputeMode,
|
||||||
|
IsAtLeastXeHpCore) {
|
||||||
|
using STATE_COMPUTE_MODE = typename FamilyType::STATE_COMPUTE_MODE;
|
||||||
|
|
||||||
|
auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver<FamilyType>();
|
||||||
|
EXPECT_TRUE(commandStreamReceiver.getStateComputeModeDirty());
|
||||||
|
|
||||||
|
commandStreamReceiver.flushTask(commandStream,
|
||||||
|
0,
|
||||||
|
&dsh,
|
||||||
|
&ioh,
|
||||||
|
&ssh,
|
||||||
|
taskLevel,
|
||||||
|
flushTaskFlags,
|
||||||
|
*pDevice);
|
||||||
|
EXPECT_FALSE(commandStreamReceiver.getStateComputeModeDirty());
|
||||||
|
|
||||||
|
HardwareParse hwParserCsr;
|
||||||
|
hwParserCsr.parseCommands<FamilyType>(commandStreamReceiver.commandStream, 0);
|
||||||
|
hwParserCsr.findHardwareCommands<FamilyType>();
|
||||||
|
auto scmCmd = hwParserCsr.getCommand<STATE_COMPUTE_MODE>();
|
||||||
|
EXPECT_NE(nullptr, scmCmd);
|
||||||
|
}
|
||||||
|
|
Loading…
Reference in New Issue