refactor: unify prefetch encode methods

Related-To: NEO-14703

Signed-off-by: Bartosz Dunajski <bartosz.dunajski@intel.com>
This commit is contained in:
Bartosz Dunajski
2025-06-02 13:55:17 +00:00
committed by Compute-Runtime-Automation
parent 76af9fbb31
commit a015188166
6 changed files with 51 additions and 147 deletions

View File

@@ -7,6 +7,7 @@
#include "shared/source/command_container/command_encoder.h"
#include "shared/source/command_stream/linear_stream.h"
#include "shared/source/gmm_helper/cache_settings_helper.h"
#include "shared/source/memory_manager/graphics_allocation.h"
#include "shared/source/utilities/lookup_array.h"
@@ -56,4 +57,51 @@ void EncodeDispatchKernel<Family>::programBarrierEnable(InterfaceDescriptorType
interfaceDescriptor.setNumberOfBarriers(numBarriers);
}
template <typename Family>
void EncodeMemoryPrefetch<Family>::programMemoryPrefetch(LinearStream &commandStream, const GraphicsAllocation &graphicsAllocation, uint32_t size, size_t offset, const RootDeviceEnvironment &rootDeviceEnvironment) {
using STATE_PREFETCH = typename Family::STATE_PREFETCH;
auto &hwInfo = *rootDeviceEnvironment.getHardwareInfo();
auto &productHelper = rootDeviceEnvironment.getHelper<ProductHelper>();
bool prefetch = productHelper.allowMemoryPrefetch(hwInfo);
if (!prefetch) {
return;
}
auto usage = CacheSettingsHelper::getGmmUsageType(graphicsAllocation.getAllocationType(), false, productHelper, &hwInfo);
uint32_t mocs = rootDeviceEnvironment.getGmmHelper()->getMOCS(usage);
uint64_t gpuVa = graphicsAllocation.getGpuAddress() + offset;
while (size > 0) {
uint32_t sizeInBytesToPrefetch = std::min(alignUp(size, MemoryConstants::cacheLineSize),
static_cast<uint32_t>(MemoryConstants::pageSize64k));
uint32_t prefetchSize = sizeInBytesToPrefetch / MemoryConstants::cacheLineSize;
auto statePrefetch = commandStream.getSpaceForCmd<STATE_PREFETCH>();
STATE_PREFETCH cmd = Family::cmdInitStatePrefetch;
cmd.setAddress(gpuVa);
cmd.setPrefetchSize(prefetchSize);
cmd.setMemoryObjectControlState(mocs);
cmd.setKernelInstructionPrefetch(GraphicsAllocation::isIsaAllocationType(graphicsAllocation.getAllocationType()));
if (debugManager.flags.ForceCsStallForStatePrefetch.get() == 1) {
cmd.setParserStall(true);
}
*statePrefetch = cmd;
if (sizeInBytesToPrefetch > size) {
break;
}
gpuVa += sizeInBytesToPrefetch;
size -= sizeInBytesToPrefetch;
}
}
} // namespace NEO

View File

@@ -7178,10 +7178,10 @@ struct STATE_PREFETCH {
}
inline void setPrefetchSize(const uint32_t value) {
TheStructure.Common.PrefetchSize = value;
TheStructure.Common.PrefetchSize = value - 1; // patched
}
inline uint32_t getPrefetchSize() const {
return TheStructure.Common.PrefetchSize;
return TheStructure.Common.PrefetchSize + 1; // patched
}
inline void setKernelInstructionPrefetch(const bool value) {

View File

@@ -16,7 +16,6 @@
#include "shared/source/command_container/command_encoder_xe_hpc_core_and_later.inl"
#include "shared/source/command_container/command_encoder_xehp_and_later.inl"
#include "shared/source/command_stream/stream_properties.h"
#include "shared/source/gmm_helper/cache_settings_helper.h"
#include "shared/source/helpers/constants.h"
#include "shared/source/kernel/grf_config.h"
#include "shared/source/release_helper/release_helper.h"
@@ -86,52 +85,6 @@ void EncodeComputeMode<Family>::programComputeModeCommand(LinearStream &csr, Sta
*buffer = stateComputeMode;
}
template <>
void EncodeMemoryPrefetch<Family>::programMemoryPrefetch(LinearStream &commandStream, const GraphicsAllocation &graphicsAllocation, uint32_t size, size_t offset, const RootDeviceEnvironment &rootDeviceEnvironment) {
using STATE_PREFETCH = typename Family::STATE_PREFETCH;
auto &hwInfo = *rootDeviceEnvironment.getHardwareInfo();
auto &productHelper = rootDeviceEnvironment.getHelper<ProductHelper>();
bool prefetch = productHelper.allowMemoryPrefetch(hwInfo);
if (!prefetch) {
return;
}
auto usage = CacheSettingsHelper::getGmmUsageType(graphicsAllocation.getAllocationType(), false, productHelper, &hwInfo);
uint32_t mocs = rootDeviceEnvironment.getGmmHelper()->getMOCS(usage);
uint64_t gpuVa = graphicsAllocation.getGpuAddress() + offset;
while (size > 0) {
uint32_t sizeInBytesToPrefetch = std::min(alignUp(size, MemoryConstants::cacheLineSize),
static_cast<uint32_t>(MemoryConstants::pageSize64k));
uint32_t prefetchSize = sizeInBytesToPrefetch / MemoryConstants::cacheLineSize;
auto statePrefetch = commandStream.getSpaceForCmd<STATE_PREFETCH>();
STATE_PREFETCH cmd = Family::cmdInitStatePrefetch;
cmd.setAddress(gpuVa);
cmd.setPrefetchSize(prefetchSize);
cmd.setMemoryObjectControlState(mocs);
cmd.setKernelInstructionPrefetch(GraphicsAllocation::isIsaAllocationType(graphicsAllocation.getAllocationType()));
if (debugManager.flags.ForceCsStallForStatePrefetch.get() == 1) {
cmd.setParserStall(true);
}
*statePrefetch = cmd;
if (sizeInBytesToPrefetch > size) {
break;
}
gpuVa += sizeInBytesToPrefetch;
size -= sizeInBytesToPrefetch;
}
}
template <>
void EncodeSurfaceState<Family>::setAuxParamsForMCSCCS(R_SURFACE_STATE *surfaceState, const ReleaseHelper *releaseHelper) {
if (releaseHelper && releaseHelper->isAuxSurfaceModeOverrideRequired())

View File

@@ -15,7 +15,6 @@
#include "shared/source/command_container/command_encoder_xe_hpc_core_and_later.inl"
#include "shared/source/command_container/command_encoder_xehp_and_later.inl"
#include "shared/source/command_stream/stream_properties.h"
#include "shared/source/gmm_helper/cache_settings_helper.h"
#include "shared/source/helpers/cache_policy.h"
#include "shared/source/helpers/constants.h"
#include "shared/source/kernel/grf_config.h"
@@ -72,53 +71,6 @@ void EncodeComputeMode<Family>::programComputeModeCommand(LinearStream &csr, Sta
*buffer = stateComputeMode;
}
template <>
void EncodeMemoryPrefetch<Family>::programMemoryPrefetch(LinearStream &commandStream, const GraphicsAllocation &graphicsAllocation, uint32_t size, size_t offset, const RootDeviceEnvironment &rootDeviceEnvironment) {
using STATE_PREFETCH = typename Family::STATE_PREFETCH;
auto &hwInfo = *rootDeviceEnvironment.getHardwareInfo();
auto &productHelper = rootDeviceEnvironment.getHelper<ProductHelper>();
bool prefetch = productHelper.allowMemoryPrefetch(hwInfo);
if (!prefetch) {
return;
}
auto usage = CacheSettingsHelper::getGmmUsageType(graphicsAllocation.getAllocationType(), false, productHelper, &hwInfo);
uint32_t mocs = rootDeviceEnvironment.getGmmHelper()->getMOCS(usage);
uint64_t gpuVa = graphicsAllocation.getGpuAddress() + offset;
while (size > 0) {
uint32_t sizeInBytesToPrefetch = std::min(alignUp(size, MemoryConstants::cacheLineSize),
static_cast<uint32_t>(MemoryConstants::pageSize64k));
uint32_t prefetchSize = sizeInBytesToPrefetch / MemoryConstants::cacheLineSize;
auto statePrefetch = commandStream.getSpaceForCmd<STATE_PREFETCH>();
STATE_PREFETCH cmd = Family::cmdInitStatePrefetch;
cmd.setAddress(gpuVa);
cmd.setPrefetchSize(prefetchSize);
cmd.setMemoryObjectControlState(mocs);
cmd.setKernelInstructionPrefetch(GraphicsAllocation::isIsaAllocationType(graphicsAllocation.getAllocationType()));
if (debugManager.flags.ForceCsStallForStatePrefetch.get() == 1) {
cmd.setParserStall(true);
}
*statePrefetch = cmd;
if (sizeInBytesToPrefetch > size) {
break;
}
gpuVa += sizeInBytesToPrefetch;
size -= sizeInBytesToPrefetch;
}
}
template <>
void EncodeSurfaceState<Family>::setAuxParamsForMCSCCS(R_SURFACE_STATE *surfaceState, const ReleaseHelper *releaseHelper) {
surfaceState->setAuxiliarySurfaceMode(AUXILIARY_SURFACE_MODE::AUXILIARY_SURFACE_MODE_AUX_MCS);

View File

@@ -16,7 +16,6 @@
#include "shared/source/command_container/command_encoder_xe_hpg_core_and_xe_hpc.inl"
#include "shared/source/command_container/command_encoder_xehp_and_later.inl"
#include "shared/source/command_stream/stream_properties.h"
#include "shared/source/gmm_helper/cache_settings_helper.h"
#include "shared/source/helpers/constants.h"
#include "shared/source/kernel/grf_config.h"
#include "shared/source/release_helper/release_helper.h"
@@ -74,54 +73,6 @@ void EncodeComputeMode<Family>::programComputeModeCommand(LinearStream &csr, Sta
*buffer = stateComputeMode;
}
template <>
void EncodeMemoryPrefetch<Family>::programMemoryPrefetch(LinearStream &commandStream, const GraphicsAllocation &graphicsAllocation, uint32_t size, size_t offset, const RootDeviceEnvironment &rootDeviceEnvironment) {
using STATE_PREFETCH = typename Family::STATE_PREFETCH;
auto &hwInfo = *rootDeviceEnvironment.getHardwareInfo();
auto &productHelper = rootDeviceEnvironment.getHelper<ProductHelper>();
bool prefetch = productHelper.allowMemoryPrefetch(hwInfo);
if (!prefetch) {
return;
}
auto usage = CacheSettingsHelper::getGmmUsageType(graphicsAllocation.getAllocationType(), false, productHelper, &hwInfo);
uint32_t mocs = rootDeviceEnvironment.getGmmHelper()->getMOCS(usage);
uint64_t gpuVa = graphicsAllocation.getGpuAddress() + offset;
while (size > 0) {
uint32_t sizeInBytesToPrefetch = std::min(alignUp(size, MemoryConstants::cacheLineSize),
static_cast<uint32_t>(MemoryConstants::pageSize64k));
// zero based cacheline count (0 == 1 cacheline)
uint32_t prefetchSize = (sizeInBytesToPrefetch / MemoryConstants::cacheLineSize) - 1;
auto statePrefetch = commandStream.getSpaceForCmd<STATE_PREFETCH>();
STATE_PREFETCH cmd = Family::cmdInitStatePrefetch;
cmd.setAddress(gpuVa);
cmd.setPrefetchSize(prefetchSize);
cmd.setMemoryObjectControlState(mocs);
cmd.setKernelInstructionPrefetch(GraphicsAllocation::isIsaAllocationType(graphicsAllocation.getAllocationType()));
if (debugManager.flags.ForceCsStallForStatePrefetch.get() == 1) {
cmd.setParserStall(true);
}
*statePrefetch = cmd;
if (sizeInBytesToPrefetch > size) {
break;
}
gpuVa += sizeInBytesToPrefetch;
size -= sizeInBytesToPrefetch;
}
}
template <>
size_t EncodeMemoryPrefetch<Family>::getSizeForMemoryPrefetch(size_t size, const RootDeviceEnvironment &rootDeviceEnvironment) {
if (debugManager.flags.EnableMemoryPrefetch.get() == 0) {

View File

@@ -118,7 +118,7 @@ XE_HPC_CORETEST_F(CommandEncodeXeHpcCoreTest, givenDebugVariableSetwhenProgramin
EXPECT_EQ(sizeof(STATE_PREFETCH) * expectedCmdsCount, linearStream.getUsed());
for (uint32_t i = 0; i < expectedCmdsCount; i++) {
uint32_t programmedSize = (statePrefetchCmd[i].getPrefetchSize() + 1) * MemoryConstants::cacheLineSize;
uint32_t programmedSize = statePrefetchCmd[i].getPrefetchSize() * MemoryConstants::cacheLineSize;
EXPECT_EQ(statePrefetchCmd[i].getAddress(), gpuVa + (i * MemoryConstants::pageSize64k));
EXPECT_FALSE(statePrefetchCmd[i].getKernelInstructionPrefetch());