Change DG2 l1 cache policy to WB

With compiler LSC WAs this gives better performance.

If debugger is active, policy will not be changed ie.
will be WBP.

Related-To: NEO-7003

Signed-off-by: Dominik Dabek <dominik.dabek@intel.com>
This commit is contained in:
Dominik Dabek
2022-08-23 11:48:18 +00:00
committed by Compute-Runtime-Automation
parent 2d949a9f3d
commit a820e73dd7
48 changed files with 447 additions and 128 deletions

View File

@@ -283,7 +283,7 @@ struct EncodeSurfaceState {
static void encodeBuffer(EncodeSurfaceStateArgs &args);
static void encodeExtraBufferParams(EncodeSurfaceStateArgs &args);
static void encodeImplicitScalingParams(const EncodeSurfaceStateArgs &args);
static void encodeExtraCacheSettings(R_SURFACE_STATE *surfaceState, const HardwareInfo &hwInfo);
static void encodeExtraCacheSettings(R_SURFACE_STATE *surfaceState, const EncodeSurfaceStateArgs &args);
static void appendBufferSurfaceState(EncodeSurfaceStateArgs &args);
static constexpr uintptr_t getSurfaceBaseAddressAlignmentMask() {

View File

@@ -467,7 +467,7 @@ size_t EncodeSurfaceState<Family>::pushBindingTableAndSurfaceStates(IndirectHeap
}
template <typename Family>
inline void EncodeSurfaceState<Family>::encodeExtraCacheSettings(R_SURFACE_STATE *surfaceState, const HardwareInfo &hwInfo) {}
inline void EncodeSurfaceState<Family>::encodeExtraCacheSettings(R_SURFACE_STATE *surfaceState, const EncodeSurfaceStateArgs &args) {}
template <typename Family>
void EncodeSurfaceState<Family>::setImageAuxParamsForCCS(R_SURFACE_STATE *surfaceState, Gmm *gmm) {

View File

@@ -371,6 +371,7 @@ void EncodeStateBaseAddress<Family>::encode(EncodeStateBaseAddressArgs<Family> &
auto dsh = args.container->isHeapDirty(HeapType::DYNAMIC_STATE) ? args.container->getIndirectHeap(HeapType::DYNAMIC_STATE) : nullptr;
auto ioh = args.container->isHeapDirty(HeapType::INDIRECT_OBJECT) ? args.container->getIndirectHeap(HeapType::INDIRECT_OBJECT) : nullptr;
auto ssh = args.container->isHeapDirty(HeapType::SURFACE_STATE) ? args.container->getIndirectHeap(HeapType::SURFACE_STATE) : nullptr;
auto isDebuggerActive = device.isDebuggerActive() || device.getDebugger() != nullptr;
StateBaseAddressHelperArgs<Family> stateBaseAddressHelperArgs = {
0, // generalStateBase
@@ -391,7 +392,8 @@ void EncodeStateBaseAddress<Family>::encode(EncodeStateBaseAddressArgs<Family> &
false, // isMultiOsContextCapable
args.useGlobalAtomics, // useGlobalAtomics
false, // areMultipleSubDevicesInContext
false // overrideSurfaceStateBaseAddress
false, // overrideSurfaceStateBaseAddress
isDebuggerActive // isDebuggerActive
};
StateBaseAddressHelper<Family>::programStateBaseAddress(stateBaseAddressHelperArgs);
@@ -457,7 +459,7 @@ inline void EncodeWA<GfxFamily>::adjustCompressionFormatForPlanarImage(uint32_t
template <typename GfxFamily>
inline void EncodeSurfaceState<GfxFamily>::encodeExtraBufferParams(EncodeSurfaceStateArgs &args) {
auto surfaceState = reinterpret_cast<R_SURFACE_STATE *>(args.outMemory);
encodeExtraCacheSettings(surfaceState, *args.gmmHelper->getHardwareInfo());
encodeExtraCacheSettings(surfaceState, args);
}
template <typename GfxFamily>

View File

@@ -13,16 +13,16 @@
namespace NEO {
template <>
void EncodeSurfaceState<Family>::encodeExtraCacheSettings(R_SURFACE_STATE *surfaceState, const HardwareInfo &hwInfo) {
void EncodeSurfaceState<Family>::encodeExtraCacheSettings(R_SURFACE_STATE *surfaceState, const EncodeSurfaceStateArgs &args) {
using L1_CACHE_POLICY = typename R_SURFACE_STATE::L1_CACHE_POLICY;
const auto &hwInfo = *args.gmmHelper->getHardwareInfo();
auto hwInfoConfig = HwInfoConfig::get(hwInfo.platform.eProductFamily);
auto cachePolicy = static_cast<L1_CACHE_POLICY>(hwInfoConfig->getL1CachePolicy());
surfaceState->setL1CachePolicyL1CacheControl(cachePolicy);
auto cachePolicy = static_cast<L1_CACHE_POLICY>(hwInfoConfig->getL1CachePolicy(args.isDebuggerActive));
if (DebugManager.flags.OverrideL1CacheControlInSurfaceState.get() != -1 &&
DebugManager.flags.ForceAllResourcesUncached.get() == false) {
surfaceState->setL1CachePolicyL1CacheControl(static_cast<L1_CACHE_POLICY>(DebugManager.flags.OverrideL1CacheControlInSurfaceState.get()));
cachePolicy = static_cast<L1_CACHE_POLICY>(DebugManager.flags.OverrideL1CacheControlInSurfaceState.get());
}
surfaceState->setL1CachePolicyL1CacheControl(cachePolicy);
}
template <typename GfxFamily>

View File

@@ -473,11 +473,13 @@ void EncodeStateBaseAddress<Family>::setSbaAddressesForDebugger(NEO::Debugger::S
template <typename Family>
void EncodeStateBaseAddress<Family>::encode(EncodeStateBaseAddressArgs<Family> &args) {
auto gmmHelper = args.container->getDevice()->getRootDeviceEnvironment().getGmmHelper();
auto &device = *args.container->getDevice();
auto gmmHelper = device.getRootDeviceEnvironment().getGmmHelper();
auto dsh = args.container->isHeapDirty(HeapType::DYNAMIC_STATE) ? args.container->getIndirectHeap(HeapType::DYNAMIC_STATE) : nullptr;
auto ioh = args.container->isHeapDirty(HeapType::INDIRECT_OBJECT) ? args.container->getIndirectHeap(HeapType::INDIRECT_OBJECT) : nullptr;
auto ssh = args.container->isHeapDirty(HeapType::SURFACE_STATE) ? args.container->getIndirectHeap(HeapType::SURFACE_STATE) : nullptr;
auto isDebuggerActive = device.isDebuggerActive() || device.getDebugger() != nullptr;
StateBaseAddressHelperArgs<Family> stateBaseAddressHelperArgs = {
0, // generalStateBase
@@ -498,7 +500,8 @@ void EncodeStateBaseAddress<Family>::encode(EncodeStateBaseAddressArgs<Family> &
args.multiOsContextCapable, // isMultiOsContextCapable
args.useGlobalAtomics, // useGlobalAtomics
false, // areMultipleSubDevicesInContext
false // overrideSurfaceStateBaseAddress
false, // overrideSurfaceStateBaseAddress
isDebuggerActive // isDebuggerActive
};
StateBaseAddressHelper<Family>::programStateBaseAddress(stateBaseAddressHelperArgs);
@@ -506,7 +509,7 @@ void EncodeStateBaseAddress<Family>::encode(EncodeStateBaseAddressArgs<Family> &
auto cmdSpace = StateBaseAddressHelper<Family>::getSpaceForSbaCmd(*args.container->getCommandStream());
*cmdSpace = args.sbaCmd;
auto &hwInfo = args.container->getDevice()->getHardwareInfo();
auto &hwInfo = device.getHardwareInfo();
auto &hwInfoConfig = *HwInfoConfig::get(hwInfo.platform.eProductFamily);
if (hwInfoConfig.isAdditionalStateBaseAddressWARequired(hwInfo)) {
cmdSpace = StateBaseAddressHelper<Family>::getSpaceForSbaCmd(*args.container->getCommandStream());
@@ -630,7 +633,7 @@ void EncodeSurfaceState<Family>::encodeExtraBufferParams(EncodeSurfaceStateArgs
surfaceState->setMemoryObjectControlState(args.gmmHelper->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER_CONST));
}
encodeExtraCacheSettings(surfaceState, *args.gmmHelper->getHardwareInfo());
encodeExtraCacheSettings(surfaceState, args);
encodeImplicitScalingParams(args);

View File

@@ -31,6 +31,7 @@ struct EncodeSurfaceStateArgsBase {
bool useGlobalAtomics = false;
bool areMultipleSubDevicesInContext = false;
bool implicitScaling = false;
bool isDebuggerActive = false;
protected:
EncodeSurfaceStateArgsBase() = default;

View File

@@ -428,7 +428,8 @@ CompletionStamp CommandStreamReceiverHw<GfxFamily>::flushTask(
isMultiOsContextCapable(), // isMultiOsContextCapable
dispatchFlags.useGlobalAtomics, // useGlobalAtomics
dispatchFlags.areMultipleSubDevicesInContext, // areMultipleSubDevicesInContext
false // overrideSurfaceStateBaseAddress
false, // overrideSurfaceStateBaseAddress
debuggingEnabled || device.isDebuggerActive() // isDebuggerActive
};
StateBaseAddressHelper<GfxFamily>::programStateBaseAddress(args);

View File

@@ -21,13 +21,13 @@ bool isL3Capable(const GraphicsAllocation &graphicsAllocation);
template <PRODUCT_FAMILY gfxProduct>
struct L1CachePolicyHelper {
static const char *getCachingPolicyOptions();
static const char *getCachingPolicyOptions(bool isDebuggerActive);
static uint32_t getDefaultL1CachePolicy();
static uint32_t getDefaultL1CachePolicy(bool isDebuggerActive);
static uint32_t getUncachedL1CachePolicy();
static uint32_t getL1CachePolicy();
static uint32_t getL1CachePolicy(bool isDebuggerActive);
};
} // namespace NEO

View File

@@ -12,14 +12,14 @@
namespace NEO {
template <PRODUCT_FAMILY gfxProduct>
uint32_t L1CachePolicyHelper<gfxProduct>::getL1CachePolicy() {
uint32_t L1CachePolicyHelper<gfxProduct>::getL1CachePolicy(bool isDebuggerActive) {
if (DebugManager.flags.ForceAllResourcesUncached.get()) {
return L1CachePolicyHelper<gfxProduct>::getUncachedL1CachePolicy();
}
if (DebugManager.flags.OverrideL1CachePolicyInSurfaceStateAndStateless.get() != -1) {
return DebugManager.flags.OverrideL1CachePolicyInSurfaceStateAndStateless.get();
}
return L1CachePolicyHelper<gfxProduct>::getDefaultL1CachePolicy();
return L1CachePolicyHelper<gfxProduct>::getDefaultL1CachePolicy(isDebuggerActive);
}
} // namespace NEO

View File

@@ -11,12 +11,12 @@
namespace NEO {
template <PRODUCT_FAMILY gfxProduct>
const char *L1CachePolicyHelper<gfxProduct>::getCachingPolicyOptions() {
const char *L1CachePolicyHelper<gfxProduct>::getCachingPolicyOptions(bool isDebuggerActive) {
return nullptr;
}
template <PRODUCT_FAMILY gfxProduct>
uint32_t L1CachePolicyHelper<gfxProduct>::getDefaultL1CachePolicy() {
uint32_t L1CachePolicyHelper<gfxProduct>::getDefaultL1CachePolicy(bool isDebuggerActive) {
return 0u;
}

View File

@@ -12,14 +12,14 @@
namespace NEO {
template <PRODUCT_FAMILY gfxProduct>
const char *L1CachePolicyHelper<gfxProduct>::getCachingPolicyOptions() {
const char *L1CachePolicyHelper<gfxProduct>::getCachingPolicyOptions(bool isDebuggerActive) {
using GfxFamily = typename HwMapper<gfxProduct>::GfxFamily;
static constexpr const char *writeBackCachingPolicy = "-cl-store-cache-default=7 -cl-load-cache-default=4";
static constexpr const char *writeByPassCachingPolicy = "-cl-store-cache-default=2 -cl-load-cache-default=4";
static constexpr const char *uncachedCachingPolicy = "-cl-store-cache-default=1 -cl-load-cache-default=1";
switch (L1CachePolicyHelper<gfxProduct>::getL1CachePolicy()) {
switch (L1CachePolicyHelper<gfxProduct>::getL1CachePolicy(isDebuggerActive)) {
case GfxFamily::STATE_BASE_ADDRESS::L1_CACHE_POLICY_WBP:
return writeByPassCachingPolicy;
case GfxFamily::STATE_BASE_ADDRESS::L1_CACHE_POLICY_WB:
@@ -32,7 +32,7 @@ const char *L1CachePolicyHelper<gfxProduct>::getCachingPolicyOptions() {
}
template <PRODUCT_FAMILY gfxProduct>
uint32_t L1CachePolicyHelper<gfxProduct>::getDefaultL1CachePolicy() {
uint32_t L1CachePolicyHelper<gfxProduct>::getDefaultL1CachePolicy(bool isDebuggerActive) {
using GfxFamily = typename HwMapper<gfxProduct>::GfxFamily;
return GfxFamily::STATE_BASE_ADDRESS::L1_CACHE_POLICY_WBP;
}

View File

@@ -30,7 +30,7 @@ class CompilerHwInfoConfig {
virtual bool isForceToStatelessRequired() const = 0;
virtual void adjustHwInfoForIgc(HardwareInfo &hwInfo) const = 0;
virtual void setProductConfigForHwInfo(HardwareInfo &hwInfo, AheadOfTimeConfig config) const = 0;
virtual const char *getCachingPolicyOptions() const = 0;
virtual const char *getCachingPolicyOptions(bool isDebuggerActive) const = 0;
};
template <PRODUCT_FAMILY gfxProduct>
@@ -47,7 +47,7 @@ class CompilerHwInfoConfigHw : public CompilerHwInfoConfig {
bool isForceToStatelessRequired() const override;
void adjustHwInfoForIgc(HardwareInfo &hwInfo) const override;
void setProductConfigForHwInfo(HardwareInfo &hwInfo, AheadOfTimeConfig config) const override;
const char *getCachingPolicyOptions() const override;
const char *getCachingPolicyOptions(bool isDebuggerActive) const override;
protected:
CompilerHwInfoConfigHw() = default;

View File

@@ -26,8 +26,8 @@ void CompilerHwInfoConfigHw<gfxProduct>::adjustHwInfoForIgc(HardwareInfo &hwInfo
}
template <PRODUCT_FAMILY gfxProduct>
const char *CompilerHwInfoConfigHw<gfxProduct>::getCachingPolicyOptions() const {
return L1CachePolicyHelper<gfxProduct>::getCachingPolicyOptions();
const char *CompilerHwInfoConfigHw<gfxProduct>::getCachingPolicyOptions(bool isDebuggerActive) const {
return L1CachePolicyHelper<gfxProduct>::getCachingPolicyOptions(isDebuggerActive);
};
} // namespace NEO

View File

@@ -45,6 +45,7 @@ struct StateBaseAddressHelperArgs {
bool useGlobalAtomics = false;
bool areMultipleSubDevicesInContext = false;
bool overrideSurfaceStateBaseAddress = false;
bool isDebuggerActive = false;
};
template <typename GfxFamily>
@@ -60,7 +61,7 @@ struct StateBaseAddressHelper {
static void appendStateBaseAddressParameters(StateBaseAddressHelperArgs<GfxFamily> &args,
bool overrideBindlessSurfaceStateBase);
static void appendExtraCacheSettings(STATE_BASE_ADDRESS *stateBaseAddress, const HardwareInfo *hwInfo);
static void appendExtraCacheSettings(StateBaseAddressHelperArgs<GfxFamily> &args);
static void programBindingTableBaseAddress(LinearStream &commandStream, const IndirectHeap &ssh, GmmHelper *gmmHelper);

View File

@@ -29,6 +29,6 @@ void StateBaseAddressHelper<GfxFamily>::appendIohParameters(StateBaseAddressHelp
}
template <typename GfxFamily>
void StateBaseAddressHelper<GfxFamily>::appendExtraCacheSettings(STATE_BASE_ADDRESS *stateBaseAddress, const HardwareInfo *hwInfo) {}
void StateBaseAddressHelper<GfxFamily>::appendExtraCacheSettings(StateBaseAddressHelperArgs<GfxFamily> &args) {}
} // namespace NEO

View File

@@ -6,13 +6,13 @@
*/
template <typename GfxFamily>
void StateBaseAddressHelper<GfxFamily>::appendExtraCacheSettings(STATE_BASE_ADDRESS *stateBaseAddress, const HardwareInfo *hwInfo) {
auto hwInfoConfig = HwInfoConfig::get(hwInfo->platform.eProductFamily);
auto cachePolicy = hwInfoConfig->getL1CachePolicy();
stateBaseAddress->setL1CachePolicyL1CacheControl(static_cast<typename STATE_BASE_ADDRESS::L1_CACHE_POLICY>(cachePolicy));
void StateBaseAddressHelper<GfxFamily>::appendExtraCacheSettings(StateBaseAddressHelperArgs<GfxFamily> &args) {
auto hwInfoConfig = HwInfoConfig::get(args.gmmHelper->getHardwareInfo()->platform.eProductFamily);
auto cachePolicy = hwInfoConfig->getL1CachePolicy(args.isDebuggerActive);
args.stateBaseAddressCmd->setL1CachePolicyL1CacheControl(static_cast<typename STATE_BASE_ADDRESS::L1_CACHE_POLICY>(cachePolicy));
if (DebugManager.flags.ForceStatelessL1CachingPolicy.get() != -1 &&
DebugManager.flags.ForceAllResourcesUncached.get() == false) {
stateBaseAddress->setL1CachePolicyL1CacheControl(static_cast<typename STATE_BASE_ADDRESS::L1_CACHE_POLICY>(DebugManager.flags.ForceStatelessL1CachingPolicy.get()));
args.stateBaseAddressCmd->setL1CachePolicyL1CacheControl(static_cast<typename STATE_BASE_ADDRESS::L1_CACHE_POLICY>(DebugManager.flags.ForceStatelessL1CachingPolicy.get()));
}
}

View File

@@ -78,7 +78,7 @@ void StateBaseAddressHelper<GfxFamily>::appendStateBaseAddressParameters(
args.stateBaseAddressCmd->setStatelessDataPortAccessMemoryObjectControlState(args.gmmHelper->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER_CONST));
}
appendExtraCacheSettings(args.stateBaseAddressCmd, args.gmmHelper->getHardwareInfo());
appendExtraCacheSettings(args);
}
template <typename GfxFamily>

View File

@@ -132,7 +132,7 @@ class HwInfoConfig {
virtual bool isCpuCopyNecessary(const void *ptr, MemoryManager *memoryManager) const = 0;
virtual bool isAdjustWalkOrderAvailable(const HardwareInfo &hwInfo) const = 0;
virtual bool isAssignEngineRoundRobinSupported() const = 0;
virtual uint32_t getL1CachePolicy() const = 0;
virtual uint32_t getL1CachePolicy(bool isDebuggerActive) const = 0;
virtual bool isEvictionIfNecessaryFlagSupported() const = 0;
virtual void adjustNumberOfCcs(HardwareInfo &hwInfo) const = 0;
virtual bool isPrefetcherDisablingInDirectSubmissionRequired() const = 0;
@@ -258,7 +258,7 @@ class HwInfoConfigHw : public HwInfoConfig {
bool isCpuCopyNecessary(const void *ptr, MemoryManager *memoryManager) const override;
bool isAdjustWalkOrderAvailable(const HardwareInfo &hwInfo) const override;
bool isAssignEngineRoundRobinSupported() const override;
uint32_t getL1CachePolicy() const override;
uint32_t getL1CachePolicy(bool isDebuggerActive) const override;
bool isEvictionIfNecessaryFlagSupported() const override;
void adjustNumberOfCcs(HardwareInfo &hwInfo) const override;
bool isPrefetcherDisablingInDirectSubmissionRequired() const override;

View File

@@ -482,8 +482,8 @@ template <PRODUCT_FAMILY gfxProduct>
bool HwInfoConfigHw<gfxProduct>::isAdjustWalkOrderAvailable(const HardwareInfo &hwInfo) const { return false; }
template <PRODUCT_FAMILY gfxProduct>
uint32_t HwInfoConfigHw<gfxProduct>::getL1CachePolicy() const {
return L1CachePolicyHelper<gfxProduct>::getL1CachePolicy();
uint32_t HwInfoConfigHw<gfxProduct>::getL1CachePolicy(bool isDebuggerActive) const {
return L1CachePolicyHelper<gfxProduct>::getL1CachePolicy(isDebuggerActive);
}
template <PRODUCT_FAMILY gfxProduct>

View File

@@ -26,7 +26,7 @@ void EncodeDispatchKernel<Family>::adjustTimestampPacket(WALKER_TYPE &walkerCmd,
}
template <>
inline void EncodeSurfaceState<Family>::encodeExtraCacheSettings(R_SURFACE_STATE *surfaceState, const HardwareInfo &hwInfo) {
inline void EncodeSurfaceState<Family>::encodeExtraCacheSettings(R_SURFACE_STATE *surfaceState, const EncodeSurfaceStateArgs &args) {
}
template <>

View File

@@ -11,7 +11,7 @@
namespace NEO {
template <>
void StateBaseAddressHelper<XeHpFamily>::appendExtraCacheSettings(STATE_BASE_ADDRESS *stateBaseAddress, const HardwareInfo *hwInfo) {
void StateBaseAddressHelper<XeHpFamily>::appendExtraCacheSettings(StateBaseAddressHelperArgs<XeHpFamily> &args) {
}
template struct StateBaseAddressHelper<XeHpFamily>;

View File

@@ -18,9 +18,12 @@ namespace NEO {
#ifdef SUPPORT_DG2
template <>
uint32_t L1CachePolicyHelper<IGFX_DG2>::getDefaultL1CachePolicy() {
uint32_t L1CachePolicyHelper<IGFX_DG2>::getDefaultL1CachePolicy(bool isDebuggerActive) {
using GfxFamily = HwMapper<IGFX_DG2>::GfxFamily;
return GfxFamily::STATE_BASE_ADDRESS::L1_CACHE_POLICY_WBP;
if (isDebuggerActive) {
return GfxFamily::STATE_BASE_ADDRESS::L1_CACHE_POLICY_WBP;
}
return GfxFamily::STATE_BASE_ADDRESS::L1_CACHE_POLICY_WB;
}
template struct L1CachePolicyHelper<IGFX_DG2>;