performance: set timestamps as non-coherent since xe2

Related-To: NEO-14360

Signed-off-by: Szymon Morek <szymon.morek@intel.com>
This commit is contained in:
Szymon Morek 2025-05-20 14:41:24 +00:00 committed by Compute-Runtime-Automation
parent ed7482751b
commit 3ba25459e8
9 changed files with 46 additions and 20 deletions

View File

@ -383,11 +383,11 @@ DECLARE_DEBUG_VARIABLE(bool, DisableStatelessToStatefulOptimization, false, "Dis
DECLARE_DEBUG_VARIABLE(bool, DisableConcurrentBlockExecution, false, "disables concurrent block kernel execution")
DECLARE_DEBUG_VARIABLE(bool, UseNoRingFlushesKmdMode, true, "Windows only, passes flag to KMD that informs KMD to not emit any ring buffer flushes.")
DECLARE_DEBUG_VARIABLE(bool, DisableZeroCopyForUseHostPtr, false, "When active all buffer allocations created with CL_MEM_USE_HOST_PTR flag will not share memory with CPU.")
DECLARE_DEBUG_VARIABLE(bool, ForceNonCoherentModeForTimestamps, false, "When active timestamp buffers are allocated in non coherent memory.")
DECLARE_DEBUG_VARIABLE(bool, SetAssumeNotInUse, true, "Set AssumeNotInUse flag in d3d destroy allocation.")
DECLARE_DEBUG_VARIABLE(bool, MitigateHostVisibleSignal, false, "Reset host visible signal in CB events, flush L3 when synchronize")
DECLARE_DEBUG_VARIABLE(bool, ForceZeroCopyForUseHostPtr, false, "When active all buffer allocations created with CL_MEM_USE_HOST_PTR flag will use share memory with CPU.")
DECLARE_DEBUG_VARIABLE(bool, DummyPageBackingEnabled, false, "When true, pass page backing flag to KMD to recover from page faults. Windows only.");
DECLARE_DEBUG_VARIABLE(int32_t, ForceNonCoherentModeForTimestamps, -1, "When active timestamp buffers are allocated in non coherent memory.")
DECLARE_DEBUG_VARIABLE(int32_t, EnableReusingGpuTimestamps, -1, "Reuse GPU timestamp for next device time requests. -1: os-specific, 0: disable, 1: enable")
DECLARE_DEBUG_VARIABLE(int32_t, AllowZeroCopyWithoutCoherency, -1, "Use cacheline flush instead of memory copy for map/unmap mem object")
DECLARE_DEBUG_VARIABLE(int32_t, EnableHostPtrTracking, -1, "Enable host ptr tracking: -1 - default platform setting, 0 - disabled, 1 - enabled")

View File

@ -99,13 +99,10 @@ GMM_RESOURCE_USAGE_TYPE_ENUM CacheSettingsHelper::getDefaultUsageTypeWithCaching
return GMM_RESOURCE_USAGE_OCL_SYSTEM_MEMORY_BUFFER;
case AllocationType::gpuTimestampDeviceBuffer:
case AllocationType::timestampPacketTagBuffer:
if (debugManager.flags.ForceNonCoherentModeForTimestamps.get()) {
if (productHelper.isNonCoherentTimestampsModeEnabled()) {
return GMM_RESOURCE_USAGE_OCL_BUFFER;
}
if (productHelper.isDcFlushAllowed()) {
return getDefaultUsageTypeWithCachingDisabled(allocationType, productHelper);
}
return GMM_RESOURCE_USAGE_OCL_BUFFER;
return getDefaultUsageTypeWithCachingDisabled(allocationType, productHelper);
default:
return GMM_RESOURCE_USAGE_OCL_BUFFER;
}
@ -118,12 +115,6 @@ GMM_RESOURCE_USAGE_TYPE_ENUM CacheSettingsHelper::getDefaultUsageTypeWithCaching
case AllocationType::internalHeap:
case AllocationType::linearStream:
return GMM_RESOURCE_USAGE_OCL_SYSTEM_MEMORY_BUFFER_CACHELINE_MISALIGNED;
case AllocationType::timestampPacketTagBuffer:
case AllocationType::gpuTimestampDeviceBuffer:
if (debugManager.flags.ForceNonCoherentModeForTimestamps.get()) {
return GMM_RESOURCE_USAGE_OCL_BUFFER;
}
[[fallthrough]];
default:
return productHelper.isNewCoherencyModelSupported() ? GMM_RESOURCE_USAGE_OCL_BUFFER_CSR_UC : GMM_RESOURCE_USAGE_OCL_BUFFER_CACHELINE_MISALIGNED;
}

View File

@ -269,6 +269,8 @@ class ProductHelper {
virtual bool isCompressionForbidden(const HardwareInfo &hwInfo) const = 0;
virtual bool isExposingSubdevicesAllowed() const = 0;
virtual bool useAdditionalBlitProperties() const = 0;
virtual bool isNonCoherentTimestampsModeEnabled() const = 0;
virtual bool getStorageInfoLocalOnlyFlag(LocalMemAllocationMode usmDeviceAllocationMode, bool defaultValue) const = 0;
virtual ~ProductHelper() = default;

View File

@ -50,4 +50,12 @@ bool ProductHelperHw<gfxProduct>::isResourceUncachedForCS(AllocationType allocat
return false;
}
template <PRODUCT_FAMILY gfxProduct>
bool ProductHelperHw<gfxProduct>::isNonCoherentTimestampsModeEnabled() const {
if (debugManager.flags.ForceNonCoherentModeForTimestamps.get() != -1) {
return debugManager.flags.ForceNonCoherentModeForTimestamps.get();
}
return !this->isDcFlushAllowed();
}
} // namespace NEO

View File

@ -206,6 +206,7 @@ class ProductHelperHw : public ProductHelper {
bool isCompressionForbidden(const HardwareInfo &hwInfo) const override;
bool isExposingSubdevicesAllowed() const override;
bool useAdditionalBlitProperties() const override;
bool isNonCoherentTimestampsModeEnabled() const override;
bool getStorageInfoLocalOnlyFlag(LocalMemAllocationMode usmDeviceAllocationMode, bool defaultValue) const override;
~ProductHelperHw() override = default;

View File

@ -53,4 +53,12 @@ bool ProductHelperHw<gfxProduct>::isResourceUncachedForCS(AllocationType allocat
return GraphicsAllocation::isAccessedFromCommandStreamer(allocationType);
}
template <PRODUCT_FAMILY gfxProduct>
bool ProductHelperHw<gfxProduct>::isNonCoherentTimestampsModeEnabled() const {
if (debugManager.flags.ForceNonCoherentModeForTimestamps.get() != -1) {
return debugManager.flags.ForceNonCoherentModeForTimestamps.get();
}
return true;
}
} // namespace NEO

View File

@ -483,6 +483,11 @@ bool ProductHelperHw<IGFX_UNKNOWN>::isResourceUncachedForCS(AllocationType alloc
return false;
}
template <>
bool ProductHelperHw<IGFX_UNKNOWN>::isNonCoherentTimestampsModeEnabled() const {
return false;
}
} // namespace NEO
#include "shared/source/os_interface/product_helper.inl"

View File

@ -621,7 +621,7 @@ ForceComputeWalkerPostSyncFlushWithWrite = -1
DeferStateInitSubmissionToFirstRegularUsage = -1
WaitForPagingFenceInController = -1
DirectSubmissionPrintSemaphoreUsage = -1
ForceNonCoherentModeForTimestamps = 0
ForceNonCoherentModeForTimestamps = -1
SetAssumeNotInUse = 1
ExperimentalUSMAllocationReuseVersion = -1
ForceNonWalkerSplitMemoryCopy = -1

View File

@ -707,8 +707,8 @@ TEST(GmmTest, givenAllocationTypeWhenGettingUsageTypeThenReturnCorrectValue) {
break;
case AllocationType::gpuTimestampDeviceBuffer:
case AllocationType::timestampPacketTagBuffer:
expectedUsage = (forceUncached || productHelper.isDcFlushAllowed()) ? uncachedGmmUsageType
: GMM_RESOURCE_USAGE_OCL_BUFFER;
expectedUsage = (forceUncached || !productHelper.isNonCoherentTimestampsModeEnabled()) ? uncachedGmmUsageType
: GMM_RESOURCE_USAGE_OCL_BUFFER;
break;
case AllocationType::bufferHostMemory:
case AllocationType::externalHostPtr:
@ -754,8 +754,8 @@ TEST(GmmTest, givenAllocationTypeWhenGettingUsageTypeThenReturnCorrectValue) {
break;
case AllocationType::gpuTimestampDeviceBuffer:
case AllocationType::timestampPacketTagBuffer:
expectedUsage = (forceUncached || productHelper.isDcFlushAllowed()) ? uncachedGmmUsageType
: GMM_RESOURCE_USAGE_OCL_BUFFER;
expectedUsage = (forceUncached || !productHelper.isNonCoherentTimestampsModeEnabled()) ? uncachedGmmUsageType
: GMM_RESOURCE_USAGE_OCL_BUFFER;
break;
case AllocationType::bufferHostMemory:
case AllocationType::externalHostPtr:
@ -884,14 +884,25 @@ TEST(GmmTest, givenAllocationTypeAndMitigatedDcFlushWhenGettingUsageTypeThenRetu
}
}
TEST(GmmTest, givenDebugFlagWhenTimestampAllocationsAreQueriedThenBufferPolicyIsReturned) {
TEST(GmmTest, whenTimestampAllocationsAreQueriedThenCorrectBufferPolicyIsReturned) {
DebugManagerStateRestore restorer;
debugManager.flags.ForceNonCoherentModeForTimestamps.set(1);
MockExecutionEnvironment mockExecutionEnvironment{};
const auto &productHelper = mockExecutionEnvironment.rootDeviceEnvironments[0]->getHelper<ProductHelper>();
auto expectedUsage = GMM_RESOURCE_USAGE_OCL_BUFFER;
auto uncachedType = productHelper.isNewCoherencyModelSupported() ? GMM_RESOURCE_USAGE_OCL_BUFFER_CSR_UC : GMM_RESOURCE_USAGE_OCL_BUFFER_CACHELINE_MISALIGNED;
auto expectedUsage = uncachedType;
if (productHelper.isNonCoherentTimestampsModeEnabled()) {
expectedUsage = GMM_RESOURCE_USAGE_OCL_BUFFER;
}
EXPECT_EQ(expectedUsage, CacheSettingsHelper::getGmmUsageType(AllocationType::gpuTimestampDeviceBuffer, false, productHelper, defaultHwInfo.get()));
EXPECT_EQ(expectedUsage, CacheSettingsHelper::getGmmUsageType(AllocationType::timestampPacketTagBuffer, false, productHelper, defaultHwInfo.get()));
debugManager.flags.ForceNonCoherentModeForTimestamps.set(0);
EXPECT_EQ(uncachedType, CacheSettingsHelper::getGmmUsageType(AllocationType::gpuTimestampDeviceBuffer, false, productHelper, defaultHwInfo.get()));
EXPECT_EQ(uncachedType, CacheSettingsHelper::getGmmUsageType(AllocationType::timestampPacketTagBuffer, false, productHelper, defaultHwInfo.get()));
debugManager.flags.ForceNonCoherentModeForTimestamps.set(1);
EXPECT_EQ(GMM_RESOURCE_USAGE_OCL_BUFFER, CacheSettingsHelper::getGmmUsageType(AllocationType::gpuTimestampDeviceBuffer, false, productHelper, defaultHwInfo.get()));
EXPECT_EQ(GMM_RESOURCE_USAGE_OCL_BUFFER, CacheSettingsHelper::getGmmUsageType(AllocationType::timestampPacketTagBuffer, false, productHelper, defaultHwInfo.get()));
}
TEST(GmmTest, givenForceAllResourcesUncachedFlagSetWhenGettingUsageTypeThenReturnUncached) {