Set L1 policy globally

Related-To: NEO-7003

Add function to control l1 policy for both
stateless and surface state cache.


Signed-off-by: Szymon Morek <szymon.morek@intel.com>
This commit is contained in:
Szymon Morek
2022-06-22 13:13:37 +00:00
committed by Compute-Runtime-Automation
parent 7aa053199d
commit 5236b34629
19 changed files with 170 additions and 33 deletions

View File

@@ -14,9 +14,13 @@ namespace NEO {
template <>
void EncodeSurfaceState<Family>::encodeExtraCacheSettings(R_SURFACE_STATE *surfaceState, const HardwareInfo &hwInfo) {
surfaceState->setL1CachePolicyL1CacheControl(R_SURFACE_STATE::L1_CACHE_POLICY_WBP);
using L1_CACHE_POLICY = typename R_SURFACE_STATE::L1_CACHE_POLICY;
auto hwInfoConfig = HwInfoConfig::get(hwInfo.platform.eProductFamily);
auto cachePolicy = static_cast<L1_CACHE_POLICY>(hwInfoConfig->getL1CachePolicy());
surfaceState->setL1CachePolicyL1CacheControl(cachePolicy);
if (DebugManager.flags.OverrideL1CacheControlInSurfaceState.get() != -1) {
surfaceState->setL1CachePolicyL1CacheControl(static_cast<R_SURFACE_STATE::L1_CACHE_POLICY>(DebugManager.flags.OverrideL1CacheControlInSurfaceState.get()));
surfaceState->setL1CachePolicyL1CacheControl(static_cast<L1_CACHE_POLICY>(DebugManager.flags.OverrideL1CacheControlInSurfaceState.get()));
}
}

View File

@@ -207,6 +207,8 @@ DECLARE_DEBUG_VARIABLE(int32_t, ForceWddmLowPriorityContextValue, -1, "Force sch
DECLARE_DEBUG_VARIABLE(int32_t, FailBuildProgramWithStatefulAccess, -1, "-1: default, 0: disable, 1: enable, Fail build program/module creation whenever stateful access is discovered (except built in kernels).")
DECLARE_DEBUG_VARIABLE(bool, DisableScratchPages, false, "Disable scratch pages during VM creations")
DECLARE_DEBUG_VARIABLE(std::string, OverrideDeviceName, std::string("unk"), "Device name to override")
DECLARE_DEBUG_VARIABLE(int32_t, OverrideL1CachePolicyInSurfaceStateAndStateless, -1, "-1: default, >=0 : following policy will be programmed in render surface state (for regular buffers) and stateless L1 caching")
/*LOGGING FLAGS*/
DECLARE_DEBUG_VARIABLE(int32_t, PrintDriverDiagnostics, -1, "prints driver diagnostics messages to standard output, value corresponds to hint level")
DECLARE_DEBUG_VARIABLE(bool, PrintOsContextInitializations, false, "print initialized OsContexts to standard output")

View File

@@ -16,6 +16,7 @@ class GmmHelper;
class IndirectHeap;
class LinearStream;
struct DispatchFlags;
struct HardwareInfo;
template <typename GfxFamily>
struct StateBaseAddressHelper {
@@ -54,7 +55,7 @@ struct StateBaseAddressHelper {
bool useGlobalAtomics,
bool areMultipleSubDevicesInContext);
static void appendExtraCacheSettings(STATE_BASE_ADDRESS *stateBaseAddress);
static void appendExtraCacheSettings(STATE_BASE_ADDRESS *stateBaseAddress, const HardwareInfo *hwInfo);
static void programBindingTableBaseAddress(LinearStream &commandStream, const IndirectHeap &ssh, GmmHelper *gmmHelper);

View File

@@ -12,6 +12,7 @@
#include "shared/source/helpers/constants.h"
#include "shared/source/helpers/state_base_address.h"
#include "shared/source/indirect_heap/indirect_heap.h"
#include "shared/source/os_interface/hw_info_config.h"
#include "hw_cmds.h"
@@ -102,7 +103,4 @@ void StateBaseAddressHelper<GfxFamily>::programStateBaseAddress(
isMultiOsContextCapable, memoryCompressionState, overrideBindlessSurfaceStateBase, useGlobalAtomics, areMultipleSubDevicesInContext);
}
template <typename GfxFamily>
void StateBaseAddressHelper<GfxFamily>::appendExtraCacheSettings(STATE_BASE_ADDRESS *stateBaseAddress) {}
} // namespace NEO

View File

@@ -1,5 +1,5 @@
/*
* Copyright (C) 2019-2021 Intel Corporation
* Copyright (C) 2019-2022 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@@ -28,4 +28,7 @@ void StateBaseAddressHelper<GfxFamily>::appendIohParameters(typename GfxFamily::
}
}
template <typename GfxFamily>
void StateBaseAddressHelper<GfxFamily>::appendExtraCacheSettings(STATE_BASE_ADDRESS *stateBaseAddress, const HardwareInfo *hwInfo) {}
} // namespace NEO

View File

@@ -0,0 +1,17 @@
/*
* Copyright (C) 2022 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
template <typename GfxFamily>
void StateBaseAddressHelper<GfxFamily>::appendExtraCacheSettings(STATE_BASE_ADDRESS *stateBaseAddress, const HardwareInfo *hwInfo) {
auto hwInfoConfig = HwInfoConfig::get(hwInfo->platform.eProductFamily);
auto cachePolicy = hwInfoConfig->getL1CachePolicy();
stateBaseAddress->setL1CachePolicyL1CacheControl(static_cast<typename STATE_BASE_ADDRESS::L1_CACHE_POLICY>(cachePolicy));
if (DebugManager.flags.ForceStatelessL1CachingPolicy.get() != -1) {
stateBaseAddress->setL1CachePolicyL1CacheControl(static_cast<typename STATE_BASE_ADDRESS::L1_CACHE_POLICY>(DebugManager.flags.ForceStatelessL1CachingPolicy.get()));
}
}

View File

@@ -86,7 +86,7 @@ void StateBaseAddressHelper<GfxFamily>::appendStateBaseAddressParameters(
stateBaseAddress->setStatelessDataPortAccessMemoryObjectControlState(gmmHelper->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER_CONST));
}
appendExtraCacheSettings(stateBaseAddress);
appendExtraCacheSettings(stateBaseAddress, gmmHelper->getHardwareInfo());
}
template <typename GfxFamily>

View File

@@ -119,6 +119,8 @@ class HwInfoConfig {
virtual bool isCpuCopyNecessary(const void *ptr, MemoryManager *memoryManager) const = 0;
virtual bool isAdjustWalkOrderAvailable(const HardwareInfo &hwInfo) const = 0;
virtual bool isAssignEngineRoundRobinSupported() const = 0;
virtual uint32_t getDefaultL1CachePolicy() const = 0;
virtual uint32_t getL1CachePolicy() const = 0;
MOCKABLE_VIRTUAL ~HwInfoConfig() = default;
@@ -216,6 +218,8 @@ class HwInfoConfigHw : public HwInfoConfig {
bool isCpuCopyNecessary(const void *ptr, MemoryManager *memoryManager) const override;
bool isAdjustWalkOrderAvailable(const HardwareInfo &hwInfo) const override;
bool isAssignEngineRoundRobinSupported() const override;
uint32_t getDefaultL1CachePolicy() const override;
uint32_t getL1CachePolicy() const override;
protected:
HwInfoConfigHw() = default;

View File

@@ -426,4 +426,18 @@ bool HwInfoConfigHw<gfxProduct>::isCpuCopyNecessary(const void *ptr, MemoryManag
template <PRODUCT_FAMILY gfxProduct>
bool HwInfoConfigHw<gfxProduct>::isAdjustWalkOrderAvailable(const HardwareInfo &hwInfo) const { return false; }
template <PRODUCT_FAMILY gfxProduct>
uint32_t HwInfoConfigHw<gfxProduct>::getDefaultL1CachePolicy() const {
return 0;
}
template <PRODUCT_FAMILY gfxProduct>
uint32_t HwInfoConfigHw<gfxProduct>::getL1CachePolicy() const {
if (DebugManager.flags.OverrideL1CachePolicyInSurfaceStateAndStateless.get() != -1) {
return DebugManager.flags.OverrideL1CachePolicyInSurfaceStateAndStateless.get();
}
return getDefaultL1CachePolicy();
}
} // namespace NEO

View File

@@ -10,7 +10,7 @@
namespace NEO {
template <>
void StateBaseAddressHelper<XeHpFamily>::appendExtraCacheSettings(STATE_BASE_ADDRESS *stateBaseAddress) {
void StateBaseAddressHelper<XeHpFamily>::appendExtraCacheSettings(STATE_BASE_ADDRESS *stateBaseAddress, const HardwareInfo *hwInfo) {
}
template struct StateBaseAddressHelper<XeHpFamily>;

View File

@@ -53,3 +53,8 @@ void HwInfoConfigHw<gfxProduct>::getKernelExtendedProperties(uint32_t *fp16, uin
*fp32 = (FP_ATOMIC_EXT_FLAG_GLOBAL_LOAD_STORE | FP_ATOMIC_EXT_FLAG_GLOBAL_ADD | FP_ATOMIC_EXT_FLAG_GLOBAL_MIN_MAX);
*fp64 = (FP_ATOMIC_EXT_FLAG_GLOBAL_LOAD_STORE | FP_ATOMIC_EXT_FLAG_GLOBAL_ADD | FP_ATOMIC_EXT_FLAG_GLOBAL_MIN_MAX);
}
template <>
uint32_t HwInfoConfigHw<gfxProduct>::getDefaultL1CachePolicy() const {
return XE_HPC_COREFamily::STATE_BASE_ADDRESS::L1_CACHE_POLICY_WBP;
}

View File

@@ -8,14 +8,6 @@
#include "shared/source/helpers/state_base_address_xehp_and_later.inl"
namespace NEO {
template <>
void StateBaseAddressHelper<XE_HPC_COREFamily>::appendExtraCacheSettings(STATE_BASE_ADDRESS *stateBaseAddress) {
stateBaseAddress->setL1CachePolicyL1CacheControl(STATE_BASE_ADDRESS::L1_CACHE_POLICY_WBP);
if (DebugManager.flags.ForceStatelessL1CachingPolicy.get() != -1) {
stateBaseAddress->setL1CachePolicyL1CacheControl(static_cast<typename STATE_BASE_ADDRESS::L1_CACHE_POLICY>(DebugManager.flags.ForceStatelessL1CachingPolicy.get()));
}
}
#include "shared/source/helpers/state_base_address_xe_hpg_core_and_later.inl"
template struct StateBaseAddressHelper<XE_HPC_COREFamily>;
} // namespace NEO

View File

@@ -205,3 +205,8 @@ bool HwInfoConfigHw<gfxProduct>::isStorageInfoAdjustmentRequired() const {
return false;
}
}
template <>
uint32_t HwInfoConfigHw<gfxProduct>::getDefaultL1CachePolicy() const {
return XE_HPG_COREFamily::STATE_BASE_ADDRESS::L1_CACHE_POLICY_WBP;
}

View File

@@ -8,15 +8,6 @@
#include "shared/source/helpers/state_base_address_xehp_and_later.inl"
namespace NEO {
template <>
void StateBaseAddressHelper<XE_HPG_COREFamily>::appendExtraCacheSettings(STATE_BASE_ADDRESS *stateBaseAddress) {
stateBaseAddress->setL1CachePolicyL1CacheControl(STATE_BASE_ADDRESS::L1_CACHE_POLICY_WBP);
if (DebugManager.flags.ForceStatelessL1CachingPolicy.get() != -1) {
stateBaseAddress->setL1CachePolicyL1CacheControl(static_cast<typename STATE_BASE_ADDRESS::L1_CACHE_POLICY>(DebugManager.flags.ForceStatelessL1CachingPolicy.get()));
}
}
#include "shared/source/helpers/state_base_address_xe_hpg_core_and_later.inl"
template struct StateBaseAddressHelper<XE_HPG_COREFamily>;
} // namespace NEO

View File

@@ -7,6 +7,8 @@
#include "shared/test/common/helpers/state_base_address_tests.h"
#include "shared/source/command_container/command_encoder.h"
using IsBetweenSklAndTgllp = IsWithinProducts<IGFX_SKYLAKE, IGFX_TIGERLAKE_LP>;
HWTEST2_F(SBATest, WhenAppendStateBaseAddressParametersIsCalledThenSBACmdHasBindingSurfaceStateProgrammed, IsBetweenSklAndTgllp) {
@@ -240,10 +242,76 @@ HWTEST2_F(SBATest, givenStateBaseAddressAndDebugFlagSetWhenAppendExtraCacheSetti
auto stateBaseAddress = FamilyType::cmdInitStateBaseAddress;
auto expectedStateBaseAddress = FamilyType::cmdInitStateBaseAddress;
StateBaseAddressHelper<FamilyType>::appendExtraCacheSettings(&stateBaseAddress);
StateBaseAddressHelper<FamilyType>::appendExtraCacheSettings(&stateBaseAddress, &hardwareInfo);
EXPECT_EQ(0, memcmp(&stateBaseAddress, &expectedStateBaseAddress, sizeof(STATE_BASE_ADDRESS)));
DebugManager.flags.ForceStatelessL1CachingPolicy.set(2);
StateBaseAddressHelper<FamilyType>::appendExtraCacheSettings(&stateBaseAddress);
StateBaseAddressHelper<FamilyType>::appendExtraCacheSettings(&stateBaseAddress, &hardwareInfo);
EXPECT_EQ(0, memcmp(&stateBaseAddress, &expectedStateBaseAddress, sizeof(STATE_BASE_ADDRESS)));
}
HWTEST2_F(SBATest, givenDebugFlagSetWhenAppendingSbaThenProgramCorrectL1CachePolicy, IsAtLeastXeHpgCore) {
auto memoryManager = pDevice->getExecutionEnvironment()->memoryManager.get();
AllocationProperties properties(pDevice->getRootDeviceIndex(), 1, AllocationType::BUFFER, pDevice->getDeviceBitfield());
auto allocation = memoryManager->allocateGraphicsMemoryWithProperties(properties);
IndirectHeap indirectHeap(allocation, 1);
auto sbaCmd = FamilyType::cmdInitStateBaseAddress;
struct {
uint32_t option;
typename FamilyType::STATE_BASE_ADDRESS::L1_CACHE_POLICY cachePolicy;
} testInputs[] = {
{0, FamilyType::STATE_BASE_ADDRESS::L1_CACHE_POLICY_WBP},
{2, FamilyType::STATE_BASE_ADDRESS::L1_CACHE_POLICY_WB},
{3, FamilyType::STATE_BASE_ADDRESS::L1_CACHE_POLICY_WT},
{4, FamilyType::STATE_BASE_ADDRESS::L1_CACHE_POLICY_WS}};
for (const auto &input : testInputs) {
DebugManager.flags.OverrideL1CachePolicyInSurfaceStateAndStateless.set(input.option);
StateBaseAddressHelper<FamilyType>::appendStateBaseAddressParameters(&sbaCmd, &indirectHeap, true, 0,
pDevice->getRootDeviceEnvironment().getGmmHelper(), false,
MemoryCompressionState::NotApplicable, true, false, 1u);
EXPECT_EQ(input.cachePolicy, sbaCmd.getL1CachePolicyL1CacheControl());
}
memoryManager->freeGraphicsMemory(allocation);
}
HWTEST2_F(SBATest, givenDebugFlagSetWhenAppendingRssThenProgramCorrectL1CachePolicy, IsAtLeastXeHpgCore) {
auto memoryManager = pDevice->getExecutionEnvironment()->memoryManager.get();
size_t allocationSize = MemoryConstants::pageSize;
AllocationProperties properties(pDevice->getRootDeviceIndex(), allocationSize, AllocationType::BUFFER, pDevice->getDeviceBitfield());
auto allocation = memoryManager->allocateGraphicsMemoryWithProperties(properties);
auto rssCmd = FamilyType::cmdInitRenderSurfaceState;
auto multiGraphicsAllocation = MultiGraphicsAllocation(pDevice->getRootDeviceIndex());
multiGraphicsAllocation.addAllocation(allocation);
EncodeSurfaceStateArgs args;
args.outMemory = &rssCmd;
args.graphicsAddress = allocation->getGpuAddress();
args.size = allocation->getUnderlyingBufferSize();
args.mocs = 0;
args.numAvailableDevices = pDevice->getNumGenericSubDevices();
args.allocation = allocation;
args.gmmHelper = pDevice->getGmmHelper();
args.areMultipleSubDevicesInContext = true;
struct {
uint32_t option;
typename FamilyType::RENDER_SURFACE_STATE::L1_CACHE_POLICY cachePolicy;
} testInputs[] = {
{0, FamilyType::RENDER_SURFACE_STATE::L1_CACHE_POLICY_WBP},
{2, FamilyType::RENDER_SURFACE_STATE::L1_CACHE_POLICY_WB},
{3, FamilyType::RENDER_SURFACE_STATE::L1_CACHE_POLICY_WT},
{4, FamilyType::RENDER_SURFACE_STATE::L1_CACHE_POLICY_WS}};
for (const auto &input : testInputs) {
DebugManager.flags.OverrideL1CachePolicyInSurfaceStateAndStateless.set(input.option);
EncodeSurfaceState<FamilyType>::encodeBuffer(args);
EXPECT_EQ(input.cachePolicy, rssCmd.getL1CachePolicyL1CacheControl());
}
memoryManager->freeGraphicsMemory(allocation);
}

View File

@@ -430,3 +430,4 @@ ForceUncachedGmmUsageType = 0
OverrideDeviceName = unk
EnablePrivateBO = 0
ExperimentalEnableDeviceAllocationCache = 0
OverrideL1CachePolicyInSurfaceStateAndStateless = -1

View File

@@ -75,4 +75,4 @@ DG2TEST_F(TestDg2HwInfoConfig, givenDG2CompilerHwInfoConfigWhengetCachingPolicyO
auto compilerHwInfoConfig = CompilerHwInfoConfig::get(hwInfo.platform.eProductFamily);
const char *expectedStr = "-cl-store-cache-default=7 -cl-load-cache-default=4";
EXPECT_EQ(0, memcmp(compilerHwInfoConfig->getCachingPolicyOptions(), expectedStr, strlen(expectedStr)));
}
}

View File

@@ -109,4 +109,36 @@ HWTEST_F(HwInfoConfigTest, givenHwInfoConfigWhenIsAdjustWalkOrderAvailableCallTh
HWTEST_F(HwInfoConfigTest, givenCompilerHwInfoConfigWhengetCachingPolicyOptionsThenReturnNullptr) {
auto compilerHwInfoConfig = CompilerHwInfoConfig::get(defaultHwInfo->platform.eProductFamily);
EXPECT_EQ(compilerHwInfoConfig->getCachingPolicyOptions(), nullptr);
}
HWTEST2_F(HwInfoConfigTest, givenHwInfoConfigAndDebugFlagWhenGetL1CachePolicyThenReturnCorrectPolicy, IsAtLeastXeHpgCore) {
DebugManagerStateRestore restorer;
auto hwInfo = *defaultHwInfo;
auto hwInfoConfig = HwInfoConfig::get(hwInfo.platform.eProductFamily);
DebugManager.flags.OverrideL1CachePolicyInSurfaceStateAndStateless.set(0);
EXPECT_EQ(FamilyType::STATE_BASE_ADDRESS::L1_CACHE_POLICY_WBP, hwInfoConfig->getL1CachePolicy());
DebugManager.flags.OverrideL1CachePolicyInSurfaceStateAndStateless.set(2);
EXPECT_EQ(FamilyType::STATE_BASE_ADDRESS::L1_CACHE_POLICY_WB, hwInfoConfig->getL1CachePolicy());
DebugManager.flags.OverrideL1CachePolicyInSurfaceStateAndStateless.set(3);
EXPECT_EQ(FamilyType::STATE_BASE_ADDRESS::L1_CACHE_POLICY_WT, hwInfoConfig->getL1CachePolicy());
DebugManager.flags.OverrideL1CachePolicyInSurfaceStateAndStateless.set(4);
EXPECT_EQ(FamilyType::STATE_BASE_ADDRESS::L1_CACHE_POLICY_WS, hwInfoConfig->getL1CachePolicy());
}
HWTEST2_F(HwInfoConfigTest, givenHwInfoConfigWhenGetL1CachePolicyThenReturnWriteByPass, IsAtLeastXeHpgCore) {
auto hwInfo = *defaultHwInfo;
auto hwInfoConfig = HwInfoConfig::get(hwInfo.platform.eProductFamily);
EXPECT_EQ(FamilyType::STATE_BASE_ADDRESS::L1_CACHE_POLICY_WBP, hwInfoConfig->getL1CachePolicy());
}
HWTEST2_F(HwInfoConfigTest, givenPlatformWithUnsupportedL1CachePoliciesWhenGetL1CachePolicyThenReturnZero, IsAtMostXeHpCore) {
auto hwInfo = *defaultHwInfo;
auto hwInfoConfig = HwInfoConfig::get(hwInfo.platform.eProductFamily);
EXPECT_EQ(0u, hwInfoConfig->getL1CachePolicy());
}