mirror of
https://github.com/intel/compute-runtime.git
synced 2025-12-29 09:03:14 +08:00
refactor: add setupTimestampPacketFlushL3 function
Signed-off-by: Kamil Kopryk <kamil.kopryk@intel.com>
This commit is contained in:
committed by
Compute-Runtime-Automation
parent
c1d184fade
commit
73795ced64
@@ -1334,8 +1334,14 @@ bool CommandQueue::isWaitForTimestampsEnabled() const {
|
||||
auto &productHelper = getDevice().getProductHelper();
|
||||
|
||||
auto enabled = CommandQueue::isTimestampWaitEnabled();
|
||||
enabled &= productHelper.isTimestampWaitSupportedForQueues(false);
|
||||
enabled &= !productHelper.isDcFlushAllowed();
|
||||
enabled &= productHelper.isTimestampWaitSupportedForQueues(this->heaplessModeEnabled);
|
||||
|
||||
if (productHelper.isL3FlushAfterPostSyncRequired(this->heaplessModeEnabled)) {
|
||||
enabled &= true;
|
||||
} else {
|
||||
enabled &= !productHelper.isDcFlushAllowed();
|
||||
}
|
||||
|
||||
enabled &= !getDevice().getRootDeviceEnvironment().isWddmOnLinux();
|
||||
enabled &= !this->isOOQEnabled(); // TSP for OOQ dispatch is optional. We need to wait for task count.
|
||||
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) 2018-2024 Intel Corporation
|
||||
* Copyright (C) 2018-2025 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
@@ -69,6 +69,13 @@ class GpgpuWalkerHelper {
|
||||
TagNodeBase *timestampPacketNode,
|
||||
const RootDeviceEnvironment &rootDeviceEnvironment);
|
||||
|
||||
template <typename WalkerType>
|
||||
static void setupTimestampPacketFlushL3(
|
||||
WalkerType *walkerCmd,
|
||||
const ProductHelper &productHelper,
|
||||
bool flushL3AfterPostSyncForHostUsm,
|
||||
bool flushL3AfterPostSyncForExternalAllocation);
|
||||
|
||||
static void adjustMiStoreRegMemMode(MI_STORE_REG_MEM<GfxFamily> *storeCmd);
|
||||
|
||||
private:
|
||||
|
||||
@@ -160,4 +160,11 @@ size_t EnqueueOperation<GfxFamily>::getSizeRequiredCSNonKernel(bool reserveProfi
|
||||
return size;
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
template <typename WalkerType>
|
||||
void GpgpuWalkerHelper<GfxFamily>::setupTimestampPacketFlushL3(WalkerType *walkerCmd,
|
||||
const ProductHelper &productHelper,
|
||||
bool flushL3AfterPostSyncForHostUsm,
|
||||
bool flushL3AfterPostSyncForExternalAllocation) {
|
||||
}
|
||||
} // namespace NEO
|
||||
|
||||
@@ -84,19 +84,35 @@ inline void HardwareInterface<GfxFamily>::programWalker(
|
||||
auto &device = commandQueue.getDevice();
|
||||
auto &rootDeviceEnvironment = device.getRootDeviceEnvironment();
|
||||
|
||||
bool kernelSystemAllocation = false;
|
||||
if (kernel.isBuiltIn) {
|
||||
kernelSystemAllocation = kernel.getDestinationAllocationInSystemMemory();
|
||||
} else {
|
||||
kernelSystemAllocation = kernel.isAnyKernelArgumentUsingSystemMemory();
|
||||
}
|
||||
|
||||
TagNodeBase *timestampPacketNode = nullptr;
|
||||
if (walkerArgs.currentTimestampPacketNodes && (walkerArgs.currentTimestampPacketNodes->peekNodes().size() > walkerArgs.currentDispatchIndex)) {
|
||||
timestampPacketNode = walkerArgs.currentTimestampPacketNodes->peekNodes()[walkerArgs.currentDispatchIndex];
|
||||
}
|
||||
|
||||
constexpr bool heaplessModeEnabled = GfxFamily::template isHeaplessMode<WalkerType>();
|
||||
|
||||
if (timestampPacketNode) {
|
||||
|
||||
GpgpuWalkerHelper<GfxFamily>::template setupTimestampPacket<WalkerType>(&commandStream, &walkerCmd, timestampPacketNode, rootDeviceEnvironment);
|
||||
|
||||
if constexpr (heaplessModeEnabled) {
|
||||
auto &productHelper = rootDeviceEnvironment.getHelper<ProductHelper>();
|
||||
bool flushL3AfterPostSyncForHostUsm = kernelSystemAllocation;
|
||||
bool flushL3AfterPostSyncForExternalAllocation = kernel.isUsingSharedObjArgs();
|
||||
|
||||
GpgpuWalkerHelper<GfxFamily>::template setupTimestampPacketFlushL3<WalkerType>(&walkerCmd, productHelper, flushL3AfterPostSyncForHostUsm, flushL3AfterPostSyncForExternalAllocation);
|
||||
}
|
||||
}
|
||||
|
||||
auto isCcsUsed = EngineHelpers::isCcs(commandQueue.getGpgpuEngine().osContext->getEngineType());
|
||||
|
||||
constexpr bool heaplessModeEnabled = GfxFamily::template isHeaplessMode<WalkerType>();
|
||||
|
||||
if constexpr (heaplessModeEnabled == false) {
|
||||
if (auto kernelAllocation = kernelInfo.getGraphicsAllocation()) {
|
||||
EncodeMemoryPrefetch<GfxFamily>::programMemoryPrefetch(commandStream, *kernelAllocation, kernelInfo.heapInfo.kernelHeapSize, 0, rootDeviceEnvironment);
|
||||
@@ -135,13 +151,6 @@ inline void HardwareInterface<GfxFamily>::programWalker(
|
||||
scratchAddress,
|
||||
device);
|
||||
|
||||
bool kernelSystemAllocation = false;
|
||||
if (kernel.isBuiltIn) {
|
||||
kernelSystemAllocation = kernel.getDestinationAllocationInSystemMemory();
|
||||
} else {
|
||||
kernelSystemAllocation = kernel.isAnyKernelArgumentUsingSystemMemory();
|
||||
}
|
||||
|
||||
EncodeWalkerArgs encodeWalkerArgs{
|
||||
.kernelExecutionType = kernel.getExecutionType(),
|
||||
.requiredDispatchWalkOrder = kernelAttributes.dispatchWalkOrder,
|
||||
|
||||
@@ -297,6 +297,7 @@ template void HardwareInterface<Family>::dispatchKernelCommands<Family::DefaultW
|
||||
template Family::DefaultWalkerType *HardwareInterface<Family>::allocateWalkerSpace<Family::DefaultWalkerType>(LinearStream &commandStream, const Kernel &kernel);
|
||||
|
||||
template class GpgpuWalkerHelper<Family>;
|
||||
template void GpgpuWalkerHelper<Family>::setupTimestampPacketFlushL3<Family::DefaultWalkerType>(Family::DefaultWalkerType *walkerCmd, const ProductHelper &productHelper, bool flushL3AfterPostSyncForHostUsm, bool flushL3AfterPostSyncForExternalAllocation);
|
||||
template void GpgpuWalkerHelper<Family>::setupTimestampPacket<Family::DefaultWalkerType>(LinearStream *cmdStream, Family::DefaultWalkerType *walkerCmd, TagNodeBase *timestampPacketNode, const RootDeviceEnvironment &rootDeviceEnvironment);
|
||||
template size_t GpgpuWalkerHelper<Family>::setGpgpuWalkerThreadData<Family::DefaultWalkerType>(Family::DefaultWalkerType *walkerCmd, const KernelDescriptor &kernelDescriptor, const size_t startWorkGroups[3],
|
||||
const size_t numWorkGroups[3], const size_t localWorkSizesIn[3], uint32_t simd, uint32_t workDim, bool localIdsGenerationByRuntime, bool inlineDataProgrammingRequired, uint32_t requiredWorkGroupOrder);
|
||||
|
||||
@@ -89,4 +89,8 @@ bool ApiSpecificConfig::isGlobalStatelessEnabled(const RootDeviceEnvironment &ro
|
||||
return false;
|
||||
}
|
||||
|
||||
bool ApiSpecificConfig::isUpdateTagFromWaitEnabledForHeapless() {
|
||||
return true;
|
||||
}
|
||||
|
||||
} // namespace NEO
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) 2024 Intel Corporation
|
||||
* Copyright (C) 2024-2025 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
@@ -15,6 +15,8 @@ namespace NEO {
|
||||
using Family = Xe2HpgCoreFamily;
|
||||
|
||||
template class GpgpuWalkerHelper<Family>;
|
||||
|
||||
template void GpgpuWalkerHelper<Family>::setupTimestampPacketFlushL3<Family::DefaultWalkerType>(Family::DefaultWalkerType *walkerCmd, const ProductHelper &productHelper, bool flushL3AfterPostSyncForHostUsm, bool flushL3AfterPostSyncForExternalAllocation);
|
||||
template void GpgpuWalkerHelper<Family>::setupTimestampPacket<Family::DefaultWalkerType>(LinearStream *cmdStream, Family::DefaultWalkerType *walkerCmd, TagNodeBase *timestampPacketNode, const RootDeviceEnvironment &rootDeviceEnvironment);
|
||||
template size_t GpgpuWalkerHelper<Family>::setGpgpuWalkerThreadData<Family::DefaultWalkerType>(Family::DefaultWalkerType *walkerCmd, const KernelDescriptor &kernelDescriptor, const size_t startWorkGroups[3],
|
||||
const size_t numWorkGroups[3], const size_t localWorkSizesIn[3], uint32_t simd, uint32_t workDim, bool localIdsGenerationByRuntime, bool inlineDataProgrammingRequired, uint32_t requiredWorkGroupOrder);
|
||||
|
||||
@@ -16,6 +16,8 @@ namespace NEO {
|
||||
using Family = Xe3CoreFamily;
|
||||
|
||||
template class GpgpuWalkerHelper<Family>;
|
||||
|
||||
template void GpgpuWalkerHelper<Family>::setupTimestampPacketFlushL3<Family::DefaultWalkerType>(Family::DefaultWalkerType *walkerCmd, const ProductHelper &productHelper, bool flushL3AfterPostSyncForHostUsm, bool flushL3AfterPostSyncForExternalAllocation);
|
||||
template void GpgpuWalkerHelper<Family>::setupTimestampPacket<Family::DefaultWalkerType>(LinearStream *cmdStream, Family::DefaultWalkerType *walkerCmd, TagNodeBase *timestampPacketNode, const RootDeviceEnvironment &rootDeviceEnvironment);
|
||||
template size_t GpgpuWalkerHelper<Family>::setGpgpuWalkerThreadData<Family::DefaultWalkerType>(Family::DefaultWalkerType *walkerCmd, const KernelDescriptor &kernelDescriptor, const size_t startWorkGroups[3],
|
||||
const size_t numWorkGroups[3], const size_t localWorkSizesIn[3], uint32_t simd, uint32_t workDim, bool localIdsGenerationByRuntime, bool inlineDataProgrammingRequired, uint32_t requiredWorkGroupOrder);
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) 2021-2024 Intel Corporation
|
||||
* Copyright (C) 2021-2025 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
@@ -23,6 +23,8 @@ void GpgpuWalkerHelper<Family>::setSystolicModeEnable(Family::COMPUTE_WALKER *wa
|
||||
}
|
||||
|
||||
template class GpgpuWalkerHelper<Family>;
|
||||
|
||||
template void GpgpuWalkerHelper<Family>::setupTimestampPacketFlushL3<Family::DefaultWalkerType>(Family::DefaultWalkerType *walkerCmd, const ProductHelper &productHelper, bool flushL3AfterPostSyncForHostUsm, bool flushL3AfterPostSyncForExternalAllocation);
|
||||
template void GpgpuWalkerHelper<Family>::setupTimestampPacket<Family::DefaultWalkerType>(LinearStream *cmdStream, Family::DefaultWalkerType *walkerCmd, TagNodeBase *timestampPacketNode, const RootDeviceEnvironment &rootDeviceEnvironment);
|
||||
template size_t GpgpuWalkerHelper<Family>::setGpgpuWalkerThreadData<Family::DefaultWalkerType>(Family::DefaultWalkerType *walkerCmd, const KernelDescriptor &kernelDescriptor, const size_t startWorkGroups[3],
|
||||
const size_t numWorkGroups[3], const size_t localWorkSizesIn[3], uint32_t simd, uint32_t workDim, bool localIdsGenerationByRuntime, bool inlineDataProgrammingRequired, uint32_t requiredWorkGroupOrder);
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) 2021-2024 Intel Corporation
|
||||
* Copyright (C) 2021-2025 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
@@ -23,6 +23,8 @@ void GpgpuWalkerHelper<Family>::setSystolicModeEnable(Family::COMPUTE_WALKER *wa
|
||||
}
|
||||
|
||||
template class GpgpuWalkerHelper<Family>;
|
||||
|
||||
template void GpgpuWalkerHelper<Family>::setupTimestampPacketFlushL3<Family::DefaultWalkerType>(Family::DefaultWalkerType *walkerCmd, const ProductHelper &productHelper, bool flushL3AfterPostSyncForHostUsm, bool flushL3AfterPostSyncForExternalAllocation);
|
||||
template void GpgpuWalkerHelper<Family>::setupTimestampPacket<Family::DefaultWalkerType>(LinearStream *cmdStream, Family::DefaultWalkerType *walkerCmd, TagNodeBase *timestampPacketNode, const RootDeviceEnvironment &rootDeviceEnvironment);
|
||||
template size_t GpgpuWalkerHelper<Family>::setGpgpuWalkerThreadData<Family::DefaultWalkerType>(Family::DefaultWalkerType *walkerCmd, const KernelDescriptor &kernelDescriptor, const size_t startWorkGroups[3],
|
||||
const size_t numWorkGroups[3], const size_t localWorkSizesIn[3], uint32_t simd, uint32_t workDim, bool localIdsGenerationByRuntime, bool inlineDataProgrammingRequired, uint32_t requiredWorkGroupOrder);
|
||||
|
||||
@@ -170,7 +170,18 @@ TEST(CommandQueue, givenEnableTimestampWaitWhenCheckIsTimestampWaitEnabledThenRe
|
||||
{
|
||||
debugManager.flags.EnableTimestampWaitForQueues.set(-1);
|
||||
const auto &productHelper = mockDevice->getProductHelper();
|
||||
EXPECT_EQ(cmdQ.isWaitForTimestampsEnabled(), productHelper.isTimestampWaitSupportedForQueues(false) && !productHelper.isDcFlushAllowed());
|
||||
const auto &compilerProductHelper = mockDevice->getCompilerProductHelper();
|
||||
bool heaplessEnabled = compilerProductHelper.isHeaplessModeEnabled();
|
||||
|
||||
auto enabled = productHelper.isTimestampWaitSupportedForQueues(heaplessEnabled);
|
||||
|
||||
if (productHelper.isL3FlushAfterPostSyncRequired(heaplessEnabled)) {
|
||||
enabled &= true;
|
||||
} else {
|
||||
enabled &= !productHelper.isDcFlushAllowed();
|
||||
}
|
||||
|
||||
EXPECT_EQ(enabled, cmdQ.isWaitForTimestampsEnabled());
|
||||
}
|
||||
|
||||
{
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) 2018-2024 Intel Corporation
|
||||
* Copyright (C) 2018-2025 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
@@ -2015,7 +2015,7 @@ TEST(EventTimestampTest, givenEnableTimestampWaitWhenCheckIsTimestampWaitEnabled
|
||||
{
|
||||
debugManager.flags.EnableTimestampWaitForEvents.set(-1);
|
||||
const auto &productHelper = mockDevice->getRootDeviceEnvironment().getHelper<ProductHelper>();
|
||||
EXPECT_EQ(event.isWaitForTimestampsEnabled(), productHelper.isTimestampWaitSupportedForEvents());
|
||||
EXPECT_EQ(event.isWaitForTimestampsEnabled(), productHelper.isTimestampWaitSupportedForEvents() && cmdQ.isTimestampWaitEnabled());
|
||||
}
|
||||
|
||||
{
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) 2021-2024 Intel Corporation
|
||||
* Copyright (C) 2021-2025 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
@@ -80,4 +80,8 @@ TEST(ApiSpecificConfigOclTests, WhenCheckingIfCompilerCacheIsEnabledByDefaultThe
|
||||
EXPECT_EQ(1u, ApiSpecificConfig::compilerCacheDefaultEnabled());
|
||||
}
|
||||
|
||||
TEST(ApiSpecificConfigOclTests, WhenCheckingIsUpdateTagFromWaitEnabledForHeaplessThenTrueIsReturned) {
|
||||
EXPECT_TRUE(ApiSpecificConfig::isUpdateTagFromWaitEnabledForHeapless());
|
||||
}
|
||||
|
||||
} // namespace NEO
|
||||
|
||||
Reference in New Issue
Block a user