Add flag to control prefetcher disabling behaviour

Certain platforms might not require prefetcher to
be disabled in direct submission. This change
provides a way to control that behaviour.

Signed-off-by: Rafal Maziejuk <rafal.maziejuk@intel.com>
Related-To: NEO-7218
This commit is contained in:
Rafal Maziejuk
2022-08-10 11:52:06 +00:00
committed by Compute-Runtime-Automation
parent bfc0919999
commit 5e58104f5a
16 changed files with 144 additions and 16 deletions

View File

@@ -313,8 +313,9 @@ DECLARE_DEBUG_VARIABLE(int32_t, EnableRingSwitchTagUpdateWa, -1, "-1: default, 0
DECLARE_DEBUG_VARIABLE(int32_t, DirectSubmissionReadBackCommandBuffer, -1, "-1: default - disabled, 0 - disable, 1 - enable. If enabled, read first dword of cmd buffer after handling residency.")
DECLARE_DEBUG_VARIABLE(int32_t, DirectSubmissionReadBackRingBuffer, -1, "-1: default - disabled, 0 - disable, 1 - enable. If enabled, read first dword of ring buffer after handling residency.")
DECLARE_DEBUG_VARIABLE(int32_t, DirectSubmissionInsertExtraMiMemFenceCommands, -1, "-1: default, 0 - disable, 1 - enable. If enabled, add extra MI_MEM_FENCE instructions with acquire bit set")
DECLARE_DEBUG_VARIABLE(int32_t, DirectSubmissionInsertSfenceInstructionPriorToSubmission, -1, "-1: default, 0 - disable, 1 - Instert _mm_sfence before unlocking semaphore only, 2 - insert before and after semaphore")
DECLARE_DEBUG_VARIABLE(int32_t, DirectSubmissionInsertSfenceInstructionPriorToSubmission, -1, "-1: default, 0 - disable, 1 - Insert _mm_sfence before unlocking semaphore only, 2 - insert before and after semaphore")
DECLARE_DEBUG_VARIABLE(int32_t, DirectSubmissionMaxRingBuffers, -1, "-1: default, >0: max ring buffer count, During switch ring buffer, if there is no available ring, wait for completion instead of allocating new one if DirectSubmissionMaxRingBuffers is reached")
DECLARE_DEBUG_VARIABLE(int32_t, DirectSubmissionDisablePrefetcher, -1, "-1: default, 0 - disable, 1 - enable. If enabled, disable prefetcher is being dispatched")
DECLARE_DEBUG_VARIABLE(bool, DirectSubmissionPrintBuffers, false, "Print address of submitted command buffers")
/*FEATURE FLAGS*/

View File

@@ -202,5 +202,6 @@ class DirectSubmissionHw {
bool miMemFenceRequired = false;
bool systemMemoryFenceAddressSet = false;
bool completionFenceSupported = false;
bool isDisablePrefetcherRequired = false;
};
} // namespace NEO

View File

@@ -65,6 +65,11 @@ DirectSubmissionHw<GfxFamily, Dispatcher>::DirectSubmissionHw(const DirectSubmis
disableCpuCacheFlush = disableCacheFlushKey == 1 ? true : false;
}
isDisablePrefetcherRequired = hwInfoConfig->isPrefetcherDisablingInDirectSubmissionRequired();
if (DebugManager.flags.DirectSubmissionDisablePrefetcher.get() != -1) {
isDisablePrefetcherRequired = !!DebugManager.flags.DirectSubmissionDisablePrefetcher.get();
}
UNRECOVERABLE_IF(!CpuInfo::getInstance().isFeatureSupported(CpuInfo::featureClflush) && !disableCpuCacheFlush);
createDiagnostic();
@@ -298,7 +303,10 @@ template <typename GfxFamily, typename Dispatcher>
inline size_t DirectSubmissionHw<GfxFamily, Dispatcher>::getSizeSemaphoreSection() {
size_t semaphoreSize = EncodeSempahore<GfxFamily>::getSizeMiSemaphoreWait();
semaphoreSize += getSizePrefetchMitigation();
semaphoreSize += 2 * getSizeDisablePrefetcher();
if (isDisablePrefetcherRequired) {
semaphoreSize += 2 * getSizeDisablePrefetcher();
}
if (miMemFenceRequired) {
semaphoreSize += MemorySynchronizationCommands<GfxFamily>::getSizeForSingleAdditionalSynchronizationForDirectSubmission(*hwInfo);

View File

@@ -13,13 +13,15 @@ template <typename GfxFamily, typename Dispatcher>
inline void DirectSubmissionHw<GfxFamily, Dispatcher>::dispatchDisablePrefetcher(bool disable) {
using MI_ARB_CHECK = typename GfxFamily::MI_ARB_CHECK;
MI_ARB_CHECK arbCheck = GfxFamily::cmdInitArbCheck;
arbCheck.setPreParserDisable(disable);
if (isDisablePrefetcherRequired) {
MI_ARB_CHECK arbCheck = GfxFamily::cmdInitArbCheck;
arbCheck.setPreParserDisable(disable);
EncodeMiArbCheck<GfxFamily>::adjust(arbCheck);
EncodeMiArbCheck<GfxFamily>::adjust(arbCheck);
MI_ARB_CHECK *arbCheckSpace = ringCommandStream.getSpaceForCmd<MI_ARB_CHECK>();
*arbCheckSpace = arbCheck;
MI_ARB_CHECK *arbCheckSpace = ringCommandStream.getSpaceForCmd<MI_ARB_CHECK>();
*arbCheckSpace = arbCheck;
}
}
} // namespace NEO
} // namespace NEO

View File

@@ -13,13 +13,15 @@ template <typename GfxFamily, typename Dispatcher>
inline void DirectSubmissionHw<GfxFamily, Dispatcher>::dispatchDisablePrefetcher(bool disable) {
using MI_ARB_CHECK = typename GfxFamily::MI_ARB_CHECK;
MI_ARB_CHECK arbCheck = GfxFamily::cmdInitArbCheck;
arbCheck.setPreFetchDisable(disable);
if (isDisablePrefetcherRequired) {
MI_ARB_CHECK arbCheck = GfxFamily::cmdInitArbCheck;
arbCheck.setPreFetchDisable(disable);
EncodeMiArbCheck<GfxFamily>::adjust(arbCheck);
EncodeMiArbCheck<GfxFamily>::adjust(arbCheck);
MI_ARB_CHECK *arbCheckSpace = ringCommandStream.getSpaceForCmd<MI_ARB_CHECK>();
*arbCheckSpace = arbCheck;
MI_ARB_CHECK *arbCheckSpace = ringCommandStream.getSpaceForCmd<MI_ARB_CHECK>();
*arbCheckSpace = arbCheck;
}
}
} // namespace NEO
} // namespace NEO

View File

@@ -133,6 +133,7 @@ class HwInfoConfig {
virtual uint32_t getL1CachePolicy() const = 0;
virtual bool isEvictionWhenNecessaryFlagSupported() const = 0;
virtual void adjustNumberOfCcs(HardwareInfo &hwInfo) const = 0;
virtual bool isPrefetcherDisablingInDirectSubmissionRequired() const = 0;
MOCKABLE_VIRTUAL ~HwInfoConfig() = default;
@@ -236,6 +237,7 @@ class HwInfoConfigHw : public HwInfoConfig {
uint32_t getL1CachePolicy() const override;
bool isEvictionWhenNecessaryFlagSupported() const override;
void adjustNumberOfCcs(HardwareInfo &hwInfo) const override;
bool isPrefetcherDisablingInDirectSubmissionRequired() const override;
protected:
HwInfoConfigHw() = default;

View File

@@ -482,4 +482,9 @@ uint32_t HwInfoConfigHw<gfxProduct>::getL1CachePolicy() const {
template <PRODUCT_FAMILY gfxProduct>
void HwInfoConfigHw<gfxProduct>::adjustNumberOfCcs(HardwareInfo &hwInfo) const {}
template <PRODUCT_FAMILY gfxProduct>
bool HwInfoConfigHw<gfxProduct>::isPrefetcherDisablingInDirectSubmissionRequired() const {
return true;
}
} // namespace NEO

View File

@@ -53,3 +53,8 @@ void HwInfoConfigHw<gfxProduct>::getKernelExtendedProperties(uint32_t *fp16, uin
*fp32 = (FP_ATOMIC_EXT_FLAG_GLOBAL_LOAD_STORE | FP_ATOMIC_EXT_FLAG_GLOBAL_ADD | FP_ATOMIC_EXT_FLAG_GLOBAL_MIN_MAX);
*fp64 = (FP_ATOMIC_EXT_FLAG_GLOBAL_LOAD_STORE | FP_ATOMIC_EXT_FLAG_GLOBAL_ADD | FP_ATOMIC_EXT_FLAG_GLOBAL_MIN_MAX);
}
template <>
bool HwInfoConfigHw<gfxProduct>::isPrefetcherDisablingInDirectSubmissionRequired() const {
return false;
}

View File

@@ -406,6 +406,11 @@ uint32_t L1CachePolicyHelper<IGFX_UNKNOWN>::getDefaultL1CachePolicy() {
return 0u;
}
template <>
bool HwInfoConfigHw<IGFX_UNKNOWN>::isPrefetcherDisablingInDirectSubmissionRequired() const {
return true;
}
} // namespace NEO
#include "shared/source/os_interface/hw_info_config.inl"

View File

@@ -446,4 +446,5 @@ ExperimentalEnableL0DebuggerForOpenCL = 0
DebuggerDisableSingleAddressSbaTracking = 0
ForceImagesSupport = -1
ForceCsrLockInBcsEnqueueOnlyForGpgpuSubmission = -1
ExperimentalEnableTileAttach = 0
ExperimentalEnableTileAttach = 0
DirectSubmissionDisablePrefetcher = -1

View File

@@ -16,4 +16,8 @@ if(TESTS_XE_HP_CORE)
target_sources(neo_shared_tests PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/direct_submission_tests_xe_hp_core.cpp)
endif()
if(TESTS_XE_HPC_CORE)
target_sources(neo_shared_tests PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/direct_submission_tests_xe_hpc_core.cpp)
endif()
add_subdirectories()

View File

@@ -926,7 +926,9 @@ HWTEST_F(DirectSubmissionTest,
EXPECT_EQ(expectedStoreAddress, storeDataCmdAtPosition->getAddress());
cmdBufferPosition += sizeof(MI_STORE_DATA_IMM);
cmdBufferPosition += directSubmission.getSizeDisablePrefetcher();
if (HwInfoConfig::get(defaultHwInfo->platform.eProductFamily)->isPrefetcherDisablingInDirectSubmissionRequired()) {
cmdBufferPosition += directSubmission.getSizeDisablePrefetcher();
}
MI_SEMAPHORE_WAIT *semaphoreWaitCmdAtPosition = genCmdCast<MI_SEMAPHORE_WAIT *>(cmdBufferPosition);
ASSERT_NE(nullptr, semaphoreWaitCmdAtPosition);
EXPECT_EQ(COMPARE_OPERATION::COMPARE_OPERATION_SAD_GREATER_THAN_OR_EQUAL_SDD,
@@ -1103,3 +1105,24 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, DirectSubmissionTest, givenDebugFlagSetWhenDispatch
EXPECT_EQ(0u, arbCheck->getPreParserDisable());
}
HWCMDTEST_F(IGFX_XE_HP_CORE, DirectSubmissionTest, givenDisablePrefetcherDebugFlagDisabledWhenDispatchingPrefetcherThenSetCorrectValue) {
DebugManagerStateRestore restore;
DebugManager.flags.DirectSubmissionDisablePrefetcher.set(0);
using MI_ARB_CHECK = typename FamilyType::MI_ARB_CHECK;
using Dispatcher = BlitterDispatcher<FamilyType>;
MockDirectSubmissionHw<FamilyType, Dispatcher> directSubmission(*pDevice->getDefaultEngine().commandStreamReceiver);
bool ret = directSubmission.allocateResources();
EXPECT_TRUE(ret);
directSubmission.dispatchDisablePrefetcher(true);
HardwareParse hwParse;
hwParse.parseCommands<FamilyType>(directSubmission.ringCommandStream, 0);
hwParse.findHardwareCommands<FamilyType>();
MI_ARB_CHECK *arbCheck = hwParse.getCommand<MI_ARB_CHECK>();
EXPECT_EQ(nullptr, arbCheck);
}

View File

@@ -0,0 +1,57 @@
/*
* Copyright (C) 2022 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#include "shared/source/direct_submission/dispatchers/blitter_dispatcher.h"
#include "shared/source/xe_hp_core/hw_cmds.h"
#include "shared/test/common/cmd_parse/hw_parse.h"
#include "shared/test/common/helpers/debug_manager_state_restore.h"
#include "shared/test/common/mocks/mock_direct_submission_hw.h"
#include "shared/test/common/test_macros/hw_test.h"
#include "shared/test/unit_test/fixtures/direct_submission_fixture.h"
using DirectSubmissionTestXeHpcCore = Test<DirectSubmissionFixture>;
XE_HPC_CORETEST_F(DirectSubmissionTestXeHpcCore, givenXeHpcCoreWhenDispatchDisablePrefetcherIsCalledThenPrefetcherIsNotDisabled) {
using MI_ARB_CHECK = typename FamilyType::MI_ARB_CHECK;
using Dispatcher = BlitterDispatcher<FamilyType>;
MockDirectSubmissionHw<FamilyType, Dispatcher> directSubmission(*pDevice->getDefaultEngine().commandStreamReceiver);
EXPECT_EQ(sizeof(MI_ARB_CHECK), directSubmission.getSizeDisablePrefetcher());
bool ret = directSubmission.allocateResources();
EXPECT_TRUE(ret);
directSubmission.dispatchDisablePrefetcher(true);
HardwareParse hwParse;
hwParse.parseCommands<FamilyType>(directSubmission.ringCommandStream, 0);
hwParse.findHardwareCommands<FamilyType>();
MI_ARB_CHECK *arbCheck = hwParse.getCommand<MI_ARB_CHECK>();
EXPECT_EQ(nullptr, arbCheck);
}
XE_HPC_CORETEST_F(DirectSubmissionTestXeHpcCore, givenXeHpcCoreAndDisablePrefetcherDebugFlagEnabledWhenDispatchDisablePrefetcherIsCalledThenPrefetcherIsDisabled) {
DebugManagerStateRestore restore;
DebugManager.flags.DirectSubmissionDisablePrefetcher.set(1);
using MI_ARB_CHECK = typename FamilyType::MI_ARB_CHECK;
using Dispatcher = BlitterDispatcher<FamilyType>;
MockDirectSubmissionHw<FamilyType, Dispatcher> directSubmission(*pDevice->getDefaultEngine().commandStreamReceiver);
bool ret = directSubmission.allocateResources();
EXPECT_TRUE(ret);
directSubmission.dispatchDisablePrefetcher(true);
HardwareParse hwParse;
hwParse.parseCommands<FamilyType>(directSubmission.ringCommandStream, 0);
hwParse.findHardwareCommands<FamilyType>();
MI_ARB_CHECK *arbCheck = hwParse.getCommand<MI_ARB_CHECK>();
ASSERT_NE(nullptr, arbCheck);
}

View File

@@ -161,3 +161,8 @@ HWTEST2_F(HwInfoConfigTest, givenPlatformWithUnsupportedL1CachePoliciesWhenGetL1
EXPECT_EQ(0u, hwInfoConfig->getL1CachePolicy());
}
HWTEST_F(HwInfoConfigTest, givenHwInfoConfigWhenIsPrefetcherDisablingInDirectSubmissionRequiredThenTrueIsReturned) {
const auto &hwInfoConfig = *HwInfoConfig::get(defaultHwInfo->platform.eProductFamily);
EXPECT_TRUE(hwInfoConfig.isPrefetcherDisablingInDirectSubmissionRequired());
}

View File

@@ -37,3 +37,5 @@ HWTEST_EXCLUDE_PRODUCT(HwInfoConfigTest, whenCallingGetDeviceMemoryNameThenDdrIs
HWTEST_EXCLUDE_PRODUCT(HwInfoConfigTest, givenHardwareInfoWhenCallingIsMaxThreadsForWorkgroupWARequiredThenFalseIsReturned, IGFX_XE_HPC_CORE);
HWTEST_EXCLUDE_PRODUCT(HwInfoConfigTest, givenHwInfoConfigWhenAskedIfPipeControlPriorToNonPipelinedStateCommandsWARequiredThenFalseIsReturned, IGFX_XE_HPC_CORE);
HWTEST_EXCLUDE_PRODUCT(HwInfoConfigTest, givenHwInfoConfigWhenAskedIfTile64With3DSurfaceOnBCSIsSupportedThenTrueIsReturned, IGFX_XE_HPC_CORE);
HWTEST_EXCLUDE_PRODUCT(HwInfoConfigTest, givenHwInfoConfigWhenIsPrefetcherDisablingInDirectSubmissionRequiredThenTrueIsReturned, IGFX_XE_HPC_CORE);
HWTEST_EXCLUDE_PRODUCT(DirectSubmissionTest, givenDebugFlagSetWhenDispatchingPrefetcherThenSetCorrectValue, IGFX_XE_HPC_CORE);

View File

@@ -69,3 +69,8 @@ PVCTEST_F(PVCHwInfoConfig, givenHwInfoConfigWhenGettingEvictWhenNecessaryFlagSup
const auto &hwInfoConfig = *HwInfoConfig::get(hwInfo.platform.eProductFamily);
EXPECT_TRUE(hwInfoConfig.isEvictionWhenNecessaryFlagSupported());
}
PVCTEST_F(PVCHwInfoConfig, givenPVCHwInfoConfigWhenIsPrefetcherDisablingInDirectSubmissionRequiredThenFalseIsReturned) {
const auto &hwInfoConfig = *HwInfoConfig::get(productFamily);
EXPECT_FALSE(hwInfoConfig.isPrefetcherDisablingInDirectSubmissionRequired());
}