fix: Invalidate constant cache when ULLS enabled

Related-To: NEO-8067

Signed-off-by: Lukasz Jobczyk <lukasz.jobczyk@intel.com>
This commit is contained in:
Lukasz Jobczyk 2023-06-21 11:40:18 +00:00 committed by Compute-Runtime-Automation
parent 9d96adc558
commit a61b39c47b
10 changed files with 101 additions and 0 deletions

View File

@ -3141,6 +3141,15 @@ void CommandListCoreFamily<gfxCoreFamily>::dispatchPostSyncCommands(const CmdLis
pipeControlArgs.dcFlushEnable = getDcFlushRequired(signalScope);
pipeControlArgs.workloadPartitionOffset = eventOperations.workPartitionOperation;
const auto &productHelper = this->device->getNEODevice()->getRootDeviceEnvironment().template getHelper<NEO::ProductHelper>();
if (productHelper.isDirectSubmissionConstantCacheInvalidationNeeded(this->device->getHwInfo())) {
if (this->cmdListType == CommandListType::TYPE_IMMEDIATE) {
pipeControlArgs.constantCacheInvalidationEnable = this->csr->isDirectSubmissionEnabled();
} else {
pipeControlArgs.constantCacheInvalidationEnable = productHelper.isDirectSubmissionSupported(this->device->getHwInfo());
}
}
NEO::MemorySynchronizationCommands<GfxFamily>::addBarrierWithPostSyncOperation(
*commandContainer.getCommandStream(),
NEO::PostSyncMode::ImmediateData,

View File

@ -198,6 +198,7 @@ struct MockCommandListImmediate : public CommandListCoreFamilyImmediate<gfxCoreF
using BaseClass::isFlushTaskSubmissionEnabled;
using BaseClass::isSyncModeQueue;
using BaseClass::isTbxMode;
using BaseClass::partitionCount;
using BaseClass::pipeControlMultiKernelEventSync;
using BaseClass::requiredStreamState;
using CommandList::kernelWithAssertAppended;
@ -567,6 +568,7 @@ class MockCommandListImmediateHw : public WhiteBox<::L0::CommandListCoreFamilyIm
public:
using BaseClass = WhiteBox<::L0::CommandListCoreFamilyImmediate<gfxCoreFamily>>;
MockCommandListImmediateHw() : BaseClass() {}
using BaseClass::appendSignalEventPostWalker;
using BaseClass::applyMemoryRangesBarrier;
using BaseClass::cmdListType;
using BaseClass::copyThroughLockedPtrEnabled;

View File

@ -343,6 +343,69 @@ HWTEST2_F(CommandListAppendUsedPacketSignalEvent,
EXPECT_TRUE(cmd->getCommandStreamerStallEnable());
EXPECT_EQ(gpuAddress, NEO::UnitTestHelper<FamilyType>::getPipeControlPostSyncAddress(*cmd));
EXPECT_EQ(MemorySynchronizationCommands<FamilyType>::getDcFlushEnable(true, device->getNEODevice()->getRootDeviceEnvironment()), cmd->getDcFlushEnable());
auto &productHelper = device->getNEODevice()->getRootDeviceEnvironment().getHelper<NEO::ProductHelper>();
EXPECT_EQ(productHelper.isDirectSubmissionConstantCacheInvalidationNeeded(device->getHwInfo()) && productHelper.isDirectSubmissionSupported(device->getHwInfo()), cmd->getConstantCacheInvalidationEnable());
EXPECT_TRUE(cmd->getWorkloadPartitionIdOffsetEnable());
postSyncFound++;
gpuAddress += event->getSinglePacketSize();
}
}
EXPECT_EQ(1u, postSyncFound);
}
HWTEST2_F(CommandListAppendUsedPacketSignalEvent,
givenMultiTileImmediateCommandListWhenAppendingScopeEventSignalAfterWalkerThenExpectPartitionedPipeControl, IsAtLeastXeHpCore) {
using GfxFamily = typename NEO::GfxFamilyMapper<gfxCoreFamily>::GfxFamily;
using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL;
using POST_SYNC_OPERATION = typename PIPE_CONTROL::POST_SYNC_OPERATION;
using MI_BATCH_BUFFER_END = typename FamilyType::MI_BATCH_BUFFER_END;
auto commandList = std::make_unique<::L0::ult::MockCommandListImmediateHw<gfxCoreFamily>>();
ASSERT_NE(nullptr, commandList);
ze_result_t returnValue = commandList->initialize(device, NEO::EngineGroupType::Compute, 0u);
EXPECT_EQ(ZE_RESULT_SUCCESS, returnValue);
commandList->csr = device->getNEODevice()->getDefaultEngine().commandStreamReceiver;
commandList->cmdListType = CommandList::CommandListType::TYPE_IMMEDIATE;
auto cmdStream = commandList->getCmdContainer().getCommandStream();
size_t useSize = cmdStream->getAvailableSpace();
useSize -= sizeof(MI_BATCH_BUFFER_END);
cmdStream->getSpace(useSize);
constexpr uint32_t packets = 2u;
event->setEventTimestampFlag(false);
event->signalScope = ZE_EVENT_SCOPE_FLAG_HOST;
commandList->partitionCount = packets;
commandList->appendSignalEventPostWalker(event.get());
EXPECT_EQ(packets, event->getPacketsInUse());
auto gpuAddress = event->getCompletionFieldGpuAddress(device);
size_t expectedSize = NEO::MemorySynchronizationCommands<GfxFamily>::getSizeForBarrierWithPostSyncOperation(device->getNEODevice()->getRootDeviceEnvironment(), false);
size_t usedSize = cmdStream->getUsed();
EXPECT_EQ(expectedSize, usedSize);
GenCmdList cmdList;
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(
cmdList,
cmdStream->getCpuBase(),
usedSize));
auto pipeControlList = findAll<PIPE_CONTROL *>(cmdList.begin(), cmdList.end());
ASSERT_NE(0u, pipeControlList.size());
uint32_t postSyncFound = 0;
for (auto &it : pipeControlList) {
auto cmd = genCmdCast<PIPE_CONTROL *>(*it);
if (cmd->getPostSyncOperation() == POST_SYNC_OPERATION::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA) {
EXPECT_EQ(Event::STATE_SIGNALED, cmd->getImmediateData());
EXPECT_TRUE(cmd->getCommandStreamerStallEnable());
EXPECT_EQ(gpuAddress, NEO::UnitTestHelper<FamilyType>::getPipeControlPostSyncAddress(*cmd));
EXPECT_EQ(MemorySynchronizationCommands<FamilyType>::getDcFlushEnable(true, device->getNEODevice()->getRootDeviceEnvironment()), cmd->getDcFlushEnable());
auto &productHelper = device->getNEODevice()->getRootDeviceEnvironment().getHelper<NEO::ProductHelper>();
EXPECT_EQ(productHelper.isDirectSubmissionConstantCacheInvalidationNeeded(device->getHwInfo()) && commandList->csr->isDirectSubmissionEnabled(), cmd->getConstantCacheInvalidationEnable());
EXPECT_TRUE(cmd->getWorkloadPartitionIdOffsetEnable());
postSyncFound++;
gpuAddress += event->getSinglePacketSize();

View File

@ -103,6 +103,7 @@ class ProductHelper {
virtual bool isAllocationSizeAdjustmentRequired(const HardwareInfo &hwInfo) const = 0;
virtual bool isNewResidencyModelSupported() const = 0;
virtual bool isDirectSubmissionSupported(const HardwareInfo &hwInfo) const = 0;
virtual bool isDirectSubmissionConstantCacheInvalidationNeeded(const HardwareInfo &hwInfo) const = 0;
virtual std::pair<bool, bool> isPipeControlPriorToNonPipelinedStateCommandsWARequired(const HardwareInfo &hwInfo, bool isRcs, const ReleaseHelper *releaseHelper) const = 0;
virtual bool heapInLocalMem(const HardwareInfo &hwInfo) const = 0;
virtual void setCapabilityCoherencyFlag(const HardwareInfo &hwInfo, bool &coherencyFlag) = 0;

View File

@ -322,6 +322,11 @@ bool ProductHelperHw<gfxProduct>::isDirectSubmissionSupported(const HardwareInfo
return false;
}
template <PRODUCT_FAMILY gfxProduct>
bool ProductHelperHw<gfxProduct>::isDirectSubmissionConstantCacheInvalidationNeeded(const HardwareInfo &hwInfo) const {
return false;
}
template <PRODUCT_FAMILY gfxProduct>
bool ProductHelperHw<gfxProduct>::isForceEmuInt32DivRemSPWARequired(const HardwareInfo &hwInfo) const {
return false;

View File

@ -56,6 +56,7 @@ class ProductHelperHw : public ProductHelper {
bool isAllocationSizeAdjustmentRequired(const HardwareInfo &hwInfo) const override;
bool isNewResidencyModelSupported() const override;
bool isDirectSubmissionSupported(const HardwareInfo &hwInfo) const override;
bool isDirectSubmissionConstantCacheInvalidationNeeded(const HardwareInfo &hwInfo) const override;
std::pair<bool, bool> isPipeControlPriorToNonPipelinedStateCommandsWARequired(const HardwareInfo &hwInfo, bool isRcs, const ReleaseHelper *releaseHelper) const override;
bool heapInLocalMem(const HardwareInfo &hwInfo) const override;
void setCapabilityCoherencyFlag(const HardwareInfo &hwInfo, bool &coherencyFlag) override;

View File

@ -41,6 +41,11 @@ uint32_t ProductHelperHw<gfxProduct>::getSteppingFromHwRevId(const HardwareInfo
return CommonConstants::invalidStepping;
}
template <>
bool ProductHelperHw<gfxProduct>::isDirectSubmissionConstantCacheInvalidationNeeded(const HardwareInfo &hwInfo) const {
return true;
}
template <>
std::pair<bool, bool> ProductHelperHw<gfxProduct>::isPipeControlPriorToNonPipelinedStateCommandsWARequired(const HardwareInfo &hwInfo, bool isRcs, const ReleaseHelper *releaseHelper) const {
auto isBasicWARequired = true;

View File

@ -165,6 +165,11 @@ bool ProductHelperHw<IGFX_UNKNOWN>::isDirectSubmissionSupported(const HardwareIn
return false;
}
template <>
bool ProductHelperHw<IGFX_UNKNOWN>::isDirectSubmissionConstantCacheInvalidationNeeded(const HardwareInfo &hwInfo) const {
return false;
}
template <>
LocalMemoryAccessMode ProductHelperHw<IGFX_UNKNOWN>::getDefaultLocalMemoryAccessMode(const HardwareInfo &hwInfo) const {
return LocalMemoryAccessMode::Default;

View File

@ -173,6 +173,11 @@ PVCTEST_F(PvcProductHelper, givenPvcProductHelperWhenCheckDirectSubmissionSuppor
EXPECT_TRUE(productHelper->isDirectSubmissionSupported(hwInfo));
}
PVCTEST_F(PvcProductHelper, givenPvcProductHelperWhenCheckDirectSubmissionConstantCacheInvalidationNeededThenFalseIsReturned) {
auto hwInfo = *defaultHwInfo;
EXPECT_FALSE(productHelper->isDirectSubmissionConstantCacheInvalidationNeeded(hwInfo));
}
PVCTEST_F(PvcProductHelper, givenPvcProductHelperWhenCheckCopyEngineSelectorEnabledThenFalseIsReturned) {
auto hwInfo = *defaultHwInfo;
EXPECT_FALSE(productHelper->isCopyEngineSelectorEnabled(hwInfo));

View File

@ -67,6 +67,11 @@ MTLTEST_F(MtlProductHelper, whenGettingAubstreamProductFamilyThenProperEnumValue
EXPECT_EQ(aub_stream::ProductFamily::Mtl, productHelper->getAubStreamProductFamily());
}
MTLTEST_F(MtlProductHelper, givenMtlProductHelperWhenCheckDirectSubmissionConstantCacheInvalidationNeededThenTrueIsReturned) {
auto hwInfo = *defaultHwInfo;
EXPECT_TRUE(productHelper->isDirectSubmissionConstantCacheInvalidationNeeded(hwInfo));
}
MTLTEST_F(MtlProductHelper, givenMtlProductHelperWhenIsInitBuiltinAsyncSupportedThenReturnFalse) {
EXPECT_FALSE(productHelper->isInitBuiltinAsyncSupported(*defaultHwInfo));
}