performance: set timestamps as non-coherent since xe2

Related-To: NEO-14360

Signed-off-by: Szymon Morek <szymon.morek@intel.com>
This commit is contained in:
Szymon Morek 2025-05-20 14:41:24 +00:00 committed by Compute-Runtime-Automation
parent ed7482751b
commit 3ba25459e8
9 changed files with 46 additions and 20 deletions

View File

@ -383,11 +383,11 @@ DECLARE_DEBUG_VARIABLE(bool, DisableStatelessToStatefulOptimization, false, "Dis
DECLARE_DEBUG_VARIABLE(bool, DisableConcurrentBlockExecution, false, "disables concurrent block kernel execution") DECLARE_DEBUG_VARIABLE(bool, DisableConcurrentBlockExecution, false, "disables concurrent block kernel execution")
DECLARE_DEBUG_VARIABLE(bool, UseNoRingFlushesKmdMode, true, "Windows only, passes flag to KMD that informs KMD to not emit any ring buffer flushes.") DECLARE_DEBUG_VARIABLE(bool, UseNoRingFlushesKmdMode, true, "Windows only, passes flag to KMD that informs KMD to not emit any ring buffer flushes.")
DECLARE_DEBUG_VARIABLE(bool, DisableZeroCopyForUseHostPtr, false, "When active all buffer allocations created with CL_MEM_USE_HOST_PTR flag will not share memory with CPU.") DECLARE_DEBUG_VARIABLE(bool, DisableZeroCopyForUseHostPtr, false, "When active all buffer allocations created with CL_MEM_USE_HOST_PTR flag will not share memory with CPU.")
DECLARE_DEBUG_VARIABLE(bool, ForceNonCoherentModeForTimestamps, false, "When active timestamp buffers are allocated in non coherent memory.")
DECLARE_DEBUG_VARIABLE(bool, SetAssumeNotInUse, true, "Set AssumeNotInUse flag in d3d destroy allocation.") DECLARE_DEBUG_VARIABLE(bool, SetAssumeNotInUse, true, "Set AssumeNotInUse flag in d3d destroy allocation.")
DECLARE_DEBUG_VARIABLE(bool, MitigateHostVisibleSignal, false, "Reset host visible signal in CB events, flush L3 when synchronize") DECLARE_DEBUG_VARIABLE(bool, MitigateHostVisibleSignal, false, "Reset host visible signal in CB events, flush L3 when synchronize")
DECLARE_DEBUG_VARIABLE(bool, ForceZeroCopyForUseHostPtr, false, "When active all buffer allocations created with CL_MEM_USE_HOST_PTR flag will use share memory with CPU.") DECLARE_DEBUG_VARIABLE(bool, ForceZeroCopyForUseHostPtr, false, "When active all buffer allocations created with CL_MEM_USE_HOST_PTR flag will use share memory with CPU.")
DECLARE_DEBUG_VARIABLE(bool, DummyPageBackingEnabled, false, "When true, pass page backing flag to KMD to recover from page faults. Windows only."); DECLARE_DEBUG_VARIABLE(bool, DummyPageBackingEnabled, false, "When true, pass page backing flag to KMD to recover from page faults. Windows only.");
DECLARE_DEBUG_VARIABLE(int32_t, ForceNonCoherentModeForTimestamps, -1, "When active timestamp buffers are allocated in non coherent memory.")
DECLARE_DEBUG_VARIABLE(int32_t, EnableReusingGpuTimestamps, -1, "Reuse GPU timestamp for next device time requests. -1: os-specific, 0: disable, 1: enable") DECLARE_DEBUG_VARIABLE(int32_t, EnableReusingGpuTimestamps, -1, "Reuse GPU timestamp for next device time requests. -1: os-specific, 0: disable, 1: enable")
DECLARE_DEBUG_VARIABLE(int32_t, AllowZeroCopyWithoutCoherency, -1, "Use cacheline flush instead of memory copy for map/unmap mem object") DECLARE_DEBUG_VARIABLE(int32_t, AllowZeroCopyWithoutCoherency, -1, "Use cacheline flush instead of memory copy for map/unmap mem object")
DECLARE_DEBUG_VARIABLE(int32_t, EnableHostPtrTracking, -1, "Enable host ptr tracking: -1 - default platform setting, 0 - disabled, 1 - enabled") DECLARE_DEBUG_VARIABLE(int32_t, EnableHostPtrTracking, -1, "Enable host ptr tracking: -1 - default platform setting, 0 - disabled, 1 - enabled")

View File

@ -99,13 +99,10 @@ GMM_RESOURCE_USAGE_TYPE_ENUM CacheSettingsHelper::getDefaultUsageTypeWithCaching
return GMM_RESOURCE_USAGE_OCL_SYSTEM_MEMORY_BUFFER; return GMM_RESOURCE_USAGE_OCL_SYSTEM_MEMORY_BUFFER;
case AllocationType::gpuTimestampDeviceBuffer: case AllocationType::gpuTimestampDeviceBuffer:
case AllocationType::timestampPacketTagBuffer: case AllocationType::timestampPacketTagBuffer:
if (debugManager.flags.ForceNonCoherentModeForTimestamps.get()) { if (productHelper.isNonCoherentTimestampsModeEnabled()) {
return GMM_RESOURCE_USAGE_OCL_BUFFER; return GMM_RESOURCE_USAGE_OCL_BUFFER;
} }
if (productHelper.isDcFlushAllowed()) {
return getDefaultUsageTypeWithCachingDisabled(allocationType, productHelper); return getDefaultUsageTypeWithCachingDisabled(allocationType, productHelper);
}
return GMM_RESOURCE_USAGE_OCL_BUFFER;
default: default:
return GMM_RESOURCE_USAGE_OCL_BUFFER; return GMM_RESOURCE_USAGE_OCL_BUFFER;
} }
@ -118,12 +115,6 @@ GMM_RESOURCE_USAGE_TYPE_ENUM CacheSettingsHelper::getDefaultUsageTypeWithCaching
case AllocationType::internalHeap: case AllocationType::internalHeap:
case AllocationType::linearStream: case AllocationType::linearStream:
return GMM_RESOURCE_USAGE_OCL_SYSTEM_MEMORY_BUFFER_CACHELINE_MISALIGNED; return GMM_RESOURCE_USAGE_OCL_SYSTEM_MEMORY_BUFFER_CACHELINE_MISALIGNED;
case AllocationType::timestampPacketTagBuffer:
case AllocationType::gpuTimestampDeviceBuffer:
if (debugManager.flags.ForceNonCoherentModeForTimestamps.get()) {
return GMM_RESOURCE_USAGE_OCL_BUFFER;
}
[[fallthrough]];
default: default:
return productHelper.isNewCoherencyModelSupported() ? GMM_RESOURCE_USAGE_OCL_BUFFER_CSR_UC : GMM_RESOURCE_USAGE_OCL_BUFFER_CACHELINE_MISALIGNED; return productHelper.isNewCoherencyModelSupported() ? GMM_RESOURCE_USAGE_OCL_BUFFER_CSR_UC : GMM_RESOURCE_USAGE_OCL_BUFFER_CACHELINE_MISALIGNED;
} }

View File

@ -269,6 +269,8 @@ class ProductHelper {
virtual bool isCompressionForbidden(const HardwareInfo &hwInfo) const = 0; virtual bool isCompressionForbidden(const HardwareInfo &hwInfo) const = 0;
virtual bool isExposingSubdevicesAllowed() const = 0; virtual bool isExposingSubdevicesAllowed() const = 0;
virtual bool useAdditionalBlitProperties() const = 0; virtual bool useAdditionalBlitProperties() const = 0;
virtual bool isNonCoherentTimestampsModeEnabled() const = 0;
virtual bool getStorageInfoLocalOnlyFlag(LocalMemAllocationMode usmDeviceAllocationMode, bool defaultValue) const = 0; virtual bool getStorageInfoLocalOnlyFlag(LocalMemAllocationMode usmDeviceAllocationMode, bool defaultValue) const = 0;
virtual ~ProductHelper() = default; virtual ~ProductHelper() = default;

View File

@ -50,4 +50,12 @@ bool ProductHelperHw<gfxProduct>::isResourceUncachedForCS(AllocationType allocat
return false; return false;
} }
template <PRODUCT_FAMILY gfxProduct>
bool ProductHelperHw<gfxProduct>::isNonCoherentTimestampsModeEnabled() const {
if (debugManager.flags.ForceNonCoherentModeForTimestamps.get() != -1) {
return debugManager.flags.ForceNonCoherentModeForTimestamps.get();
}
return !this->isDcFlushAllowed();
}
} // namespace NEO } // namespace NEO

View File

@ -206,6 +206,7 @@ class ProductHelperHw : public ProductHelper {
bool isCompressionForbidden(const HardwareInfo &hwInfo) const override; bool isCompressionForbidden(const HardwareInfo &hwInfo) const override;
bool isExposingSubdevicesAllowed() const override; bool isExposingSubdevicesAllowed() const override;
bool useAdditionalBlitProperties() const override; bool useAdditionalBlitProperties() const override;
bool isNonCoherentTimestampsModeEnabled() const override;
bool getStorageInfoLocalOnlyFlag(LocalMemAllocationMode usmDeviceAllocationMode, bool defaultValue) const override; bool getStorageInfoLocalOnlyFlag(LocalMemAllocationMode usmDeviceAllocationMode, bool defaultValue) const override;
~ProductHelperHw() override = default; ~ProductHelperHw() override = default;

View File

@ -53,4 +53,12 @@ bool ProductHelperHw<gfxProduct>::isResourceUncachedForCS(AllocationType allocat
return GraphicsAllocation::isAccessedFromCommandStreamer(allocationType); return GraphicsAllocation::isAccessedFromCommandStreamer(allocationType);
} }
template <PRODUCT_FAMILY gfxProduct>
bool ProductHelperHw<gfxProduct>::isNonCoherentTimestampsModeEnabled() const {
if (debugManager.flags.ForceNonCoherentModeForTimestamps.get() != -1) {
return debugManager.flags.ForceNonCoherentModeForTimestamps.get();
}
return true;
}
} // namespace NEO } // namespace NEO

View File

@ -483,6 +483,11 @@ bool ProductHelperHw<IGFX_UNKNOWN>::isResourceUncachedForCS(AllocationType alloc
return false; return false;
} }
template <>
bool ProductHelperHw<IGFX_UNKNOWN>::isNonCoherentTimestampsModeEnabled() const {
return false;
}
} // namespace NEO } // namespace NEO
#include "shared/source/os_interface/product_helper.inl" #include "shared/source/os_interface/product_helper.inl"

View File

@ -621,7 +621,7 @@ ForceComputeWalkerPostSyncFlushWithWrite = -1
DeferStateInitSubmissionToFirstRegularUsage = -1 DeferStateInitSubmissionToFirstRegularUsage = -1
WaitForPagingFenceInController = -1 WaitForPagingFenceInController = -1
DirectSubmissionPrintSemaphoreUsage = -1 DirectSubmissionPrintSemaphoreUsage = -1
ForceNonCoherentModeForTimestamps = 0 ForceNonCoherentModeForTimestamps = -1
SetAssumeNotInUse = 1 SetAssumeNotInUse = 1
ExperimentalUSMAllocationReuseVersion = -1 ExperimentalUSMAllocationReuseVersion = -1
ForceNonWalkerSplitMemoryCopy = -1 ForceNonWalkerSplitMemoryCopy = -1

View File

@ -707,7 +707,7 @@ TEST(GmmTest, givenAllocationTypeWhenGettingUsageTypeThenReturnCorrectValue) {
break; break;
case AllocationType::gpuTimestampDeviceBuffer: case AllocationType::gpuTimestampDeviceBuffer:
case AllocationType::timestampPacketTagBuffer: case AllocationType::timestampPacketTagBuffer:
expectedUsage = (forceUncached || productHelper.isDcFlushAllowed()) ? uncachedGmmUsageType expectedUsage = (forceUncached || !productHelper.isNonCoherentTimestampsModeEnabled()) ? uncachedGmmUsageType
: GMM_RESOURCE_USAGE_OCL_BUFFER; : GMM_RESOURCE_USAGE_OCL_BUFFER;
break; break;
case AllocationType::bufferHostMemory: case AllocationType::bufferHostMemory:
@ -754,7 +754,7 @@ TEST(GmmTest, givenAllocationTypeWhenGettingUsageTypeThenReturnCorrectValue) {
break; break;
case AllocationType::gpuTimestampDeviceBuffer: case AllocationType::gpuTimestampDeviceBuffer:
case AllocationType::timestampPacketTagBuffer: case AllocationType::timestampPacketTagBuffer:
expectedUsage = (forceUncached || productHelper.isDcFlushAllowed()) ? uncachedGmmUsageType expectedUsage = (forceUncached || !productHelper.isNonCoherentTimestampsModeEnabled()) ? uncachedGmmUsageType
: GMM_RESOURCE_USAGE_OCL_BUFFER; : GMM_RESOURCE_USAGE_OCL_BUFFER;
break; break;
case AllocationType::bufferHostMemory: case AllocationType::bufferHostMemory:
@ -884,14 +884,25 @@ TEST(GmmTest, givenAllocationTypeAndMitigatedDcFlushWhenGettingUsageTypeThenRetu
} }
} }
TEST(GmmTest, givenDebugFlagWhenTimestampAllocationsAreQueriedThenBufferPolicyIsReturned) { TEST(GmmTest, whenTimestampAllocationsAreQueriedThenCorrectBufferPolicyIsReturned) {
DebugManagerStateRestore restorer; DebugManagerStateRestore restorer;
debugManager.flags.ForceNonCoherentModeForTimestamps.set(1);
MockExecutionEnvironment mockExecutionEnvironment{}; MockExecutionEnvironment mockExecutionEnvironment{};
const auto &productHelper = mockExecutionEnvironment.rootDeviceEnvironments[0]->getHelper<ProductHelper>(); const auto &productHelper = mockExecutionEnvironment.rootDeviceEnvironments[0]->getHelper<ProductHelper>();
auto expectedUsage = GMM_RESOURCE_USAGE_OCL_BUFFER; auto uncachedType = productHelper.isNewCoherencyModelSupported() ? GMM_RESOURCE_USAGE_OCL_BUFFER_CSR_UC : GMM_RESOURCE_USAGE_OCL_BUFFER_CACHELINE_MISALIGNED;
auto expectedUsage = uncachedType;
if (productHelper.isNonCoherentTimestampsModeEnabled()) {
expectedUsage = GMM_RESOURCE_USAGE_OCL_BUFFER;
}
EXPECT_EQ(expectedUsage, CacheSettingsHelper::getGmmUsageType(AllocationType::gpuTimestampDeviceBuffer, false, productHelper, defaultHwInfo.get())); EXPECT_EQ(expectedUsage, CacheSettingsHelper::getGmmUsageType(AllocationType::gpuTimestampDeviceBuffer, false, productHelper, defaultHwInfo.get()));
EXPECT_EQ(expectedUsage, CacheSettingsHelper::getGmmUsageType(AllocationType::timestampPacketTagBuffer, false, productHelper, defaultHwInfo.get())); EXPECT_EQ(expectedUsage, CacheSettingsHelper::getGmmUsageType(AllocationType::timestampPacketTagBuffer, false, productHelper, defaultHwInfo.get()));
debugManager.flags.ForceNonCoherentModeForTimestamps.set(0);
EXPECT_EQ(uncachedType, CacheSettingsHelper::getGmmUsageType(AllocationType::gpuTimestampDeviceBuffer, false, productHelper, defaultHwInfo.get()));
EXPECT_EQ(uncachedType, CacheSettingsHelper::getGmmUsageType(AllocationType::timestampPacketTagBuffer, false, productHelper, defaultHwInfo.get()));
debugManager.flags.ForceNonCoherentModeForTimestamps.set(1);
EXPECT_EQ(GMM_RESOURCE_USAGE_OCL_BUFFER, CacheSettingsHelper::getGmmUsageType(AllocationType::gpuTimestampDeviceBuffer, false, productHelper, defaultHwInfo.get()));
EXPECT_EQ(GMM_RESOURCE_USAGE_OCL_BUFFER, CacheSettingsHelper::getGmmUsageType(AllocationType::timestampPacketTagBuffer, false, productHelper, defaultHwInfo.get()));
} }
TEST(GmmTest, givenForceAllResourcesUncachedFlagSetWhenGettingUsageTypeThenReturnUncached) { TEST(GmmTest, givenForceAllResourcesUncachedFlagSetWhenGettingUsageTypeThenReturnUncached) {