performance(ocl): flag to not dcFlush on no event

If waitForBarrier is not passed outEvent then do
dcFlush on the next synchronize call.

Related-To: NEO-8147

Signed-off-by: Dominik Dabek <dominik.dabek@intel.com>
This commit is contained in:
Dominik Dabek
2023-07-13 09:26:41 +00:00
committed by Compute-Runtime-Automation
parent faa8907344
commit 622a3ed89c
17 changed files with 262 additions and 50 deletions

View File

@@ -601,7 +601,7 @@ TEST(CommandStreamReceiverSimpleTest, givenCsrWithoutTagAllocationWhenGetTagAllo
EXPECT_EQ(nullptr, csr.getTagAllocation());
}
TEST(CommandStreamReceiverSimpleTest, givenCsrWhenSubmitiingBatchBufferThenTaskCountIsIncrementedAndLatestsValuesSetCorrectly) {
TEST(CommandStreamReceiverSimpleTest, givenCsrWhenSubmitingBatchBufferThenTaskCountIsIncrementedAndLatestsValuesSetCorrectly) {
MockExecutionEnvironment executionEnvironment;
executionEnvironment.prepareRootDeviceEnvironments(1);
executionEnvironment.initializeMemoryManager();
@@ -4209,3 +4209,96 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, CommandStreamReceiverHwTest, givenScratchSpaceSurfa
EXPECT_EQ(scratchController->privateScratchSizeBytes, alignedSizeForPrivateScratch * scratchController->computeUnitsUsedForScratch);
EXPECT_EQ(scratchController->privateScratchSizeBytes, scratchController->getPrivateScratchSpaceAllocation()->getUnderlyingBufferSize());
}
HWTEST_F(CommandStreamReceiverHwTest, givenDcFlushRequiredWhenProgramStallingPostSyncCommandsForBarrierCalledThenDcFlushSet) {
using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL;
auto &ultCsr = pDevice->getUltCommandStreamReceiver<FamilyType>();
ultCsr.dcFlushSupport = true;
if (ultCsr.isMultiTileOperationEnabled()) {
GTEST_SKIP();
}
char commandBuffer[MemoryConstants::pageSize];
LinearStream commandStream(commandBuffer, MemoryConstants::pageSize);
TagNodeBase *tagNode = ultCsr.getTimestampPacketAllocator()->getTag();
constexpr bool dcFlushRequired = true;
ultCsr.programStallingPostSyncCommandsForBarrier(commandStream, *tagNode, dcFlushRequired);
GenCmdList cmdList;
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(
cmdList,
commandStream.getCpuBase(),
commandStream.getUsed()));
auto pipeControlIteratorVector = findAll<PIPE_CONTROL *>(cmdList.begin(), cmdList.end());
ASSERT_GE(pipeControlIteratorVector.size(), 1u);
auto pipeControlIterator = pipeControlIteratorVector[0];
const bool barrierWaRequired = MemorySynchronizationCommands<FamilyType>::isBarrierWaRequired(pDevice->getRootDeviceEnvironment());
if (barrierWaRequired) {
ASSERT_GE(pipeControlIteratorVector.size(), 2u);
pipeControlIterator = pipeControlIteratorVector[1];
}
auto pipeControl = genCmdCast<PIPE_CONTROL *>(*pipeControlIterator);
ASSERT_NE(nullptr, pipeControl);
EXPECT_TRUE(pipeControl->getDcFlushEnable());
}
HWTEST_F(CommandStreamReceiverHwTest, givenDcFlushRequiredButNoDcFlushSupportWhenProgramStallingPostSyncCommandsForBarrierCalledThenDcFlushNotSet) {
using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL;
auto &ultCsr = pDevice->getUltCommandStreamReceiver<FamilyType>();
ultCsr.dcFlushSupport = false;
if (ultCsr.isMultiTileOperationEnabled()) {
GTEST_SKIP();
}
char commandBuffer[MemoryConstants::pageSize];
LinearStream commandStream(commandBuffer, MemoryConstants::pageSize);
TagNodeBase *tagNode = ultCsr.getTimestampPacketAllocator()->getTag();
constexpr bool dcFlushRequired = true;
ultCsr.programStallingPostSyncCommandsForBarrier(commandStream, *tagNode, dcFlushRequired);
GenCmdList cmdList;
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(
cmdList,
commandStream.getCpuBase(),
commandStream.getUsed()));
auto pipeControlIteratorVector = findAll<PIPE_CONTROL *>(cmdList.begin(), cmdList.end());
ASSERT_GE(pipeControlIteratorVector.size(), 1u);
auto pipeControlIterator = pipeControlIteratorVector[0];
const bool barrierWaRequired = MemorySynchronizationCommands<FamilyType>::isBarrierWaRequired(pDevice->getRootDeviceEnvironment());
if (barrierWaRequired) {
ASSERT_GE(pipeControlIteratorVector.size(), 2u);
pipeControlIterator = pipeControlIteratorVector[1];
}
auto pipeControl = genCmdCast<PIPE_CONTROL *>(*pipeControlIterator);
ASSERT_NE(nullptr, pipeControl);
EXPECT_FALSE(pipeControl->getDcFlushEnable());
}
HWTEST_F(CommandStreamReceiverHwTest, givenDcFlushRequiredFalseWhenProgramStallingPostSyncCommandsForBarrierCalledThenDcFlushNotSet) {
using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL;
auto &ultCsr = pDevice->getUltCommandStreamReceiver<FamilyType>();
ultCsr.dcFlushSupport = true;
if (ultCsr.isMultiTileOperationEnabled()) {
GTEST_SKIP();
}
char commandBuffer[MemoryConstants::pageSize];
LinearStream commandStream(commandBuffer, MemoryConstants::pageSize);
TagNodeBase *tagNode = ultCsr.getTimestampPacketAllocator()->getTag();
constexpr bool dcFlushRequired = false;
ultCsr.programStallingPostSyncCommandsForBarrier(commandStream, *tagNode, dcFlushRequired);
GenCmdList cmdList;
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(
cmdList,
commandStream.getCpuBase(),
commandStream.getUsed()));
auto pipeControlIteratorVector = findAll<PIPE_CONTROL *>(cmdList.begin(), cmdList.end());
ASSERT_GE(pipeControlIteratorVector.size(), 1u);
auto pipeControlIterator = pipeControlIteratorVector[0];
const bool barrierWaRequired = MemorySynchronizationCommands<FamilyType>::isBarrierWaRequired(pDevice->getRootDeviceEnvironment());
if (barrierWaRequired) {
ASSERT_GE(pipeControlIteratorVector.size(), 2u);
pipeControlIterator = pipeControlIteratorVector[1];
}
auto pipeControl = genCmdCast<PIPE_CONTROL *>(*pipeControlIterator);
ASSERT_NE(nullptr, pipeControl);
EXPECT_FALSE(pipeControl->getDcFlushEnable());
}

View File

@@ -95,6 +95,6 @@ struct ComputeModeRequirements : public ::testing::Test {
CommandStreamReceiver *csr = nullptr;
std::unique_ptr<MockDevice> device;
DispatchFlags flags{{}, nullptr, {}, nullptr, QueueThrottle::MEDIUM, PreemptionMode::Disabled, GrfConfig::DefaultGrfNumber, L3CachingSettings::l3CacheOn, ThreadArbitrationPolicy::NotPresent, AdditionalKernelExecInfo::NotApplicable, KernelExecutionType::NotApplicable, MemoryCompressionState::NotApplicable, QueueSliceCount::defaultSliceCount, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false};
DispatchFlags flags{{}, nullptr, {}, nullptr, QueueThrottle::MEDIUM, PreemptionMode::Disabled, GrfConfig::DefaultGrfNumber, L3CachingSettings::l3CacheOn, ThreadArbitrationPolicy::NotPresent, AdditionalKernelExecInfo::NotApplicable, KernelExecutionType::NotApplicable, MemoryCompressionState::NotApplicable, QueueSliceCount::defaultSliceCount, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false};
GraphicsAllocation *alloc = nullptr;
};