diff --git a/shared/source/command_container/command_encoder_xe_hpc_core_and_later.inl b/shared/source/command_container/command_encoder_xe_hpc_core_and_later.inl index 1a62c3334d..dd3da591cf 100644 --- a/shared/source/command_container/command_encoder_xe_hpc_core_and_later.inl +++ b/shared/source/command_container/command_encoder_xe_hpc_core_and_later.inl @@ -7,6 +7,7 @@ #include "shared/source/command_container/command_encoder.h" #include "shared/source/command_stream/linear_stream.h" +#include "shared/source/gmm_helper/cache_settings_helper.h" #include "shared/source/memory_manager/graphics_allocation.h" #include "shared/source/utilities/lookup_array.h" @@ -56,4 +57,51 @@ void EncodeDispatchKernel::programBarrierEnable(InterfaceDescriptorType interfaceDescriptor.setNumberOfBarriers(numBarriers); } +template +void EncodeMemoryPrefetch::programMemoryPrefetch(LinearStream &commandStream, const GraphicsAllocation &graphicsAllocation, uint32_t size, size_t offset, const RootDeviceEnvironment &rootDeviceEnvironment) { + using STATE_PREFETCH = typename Family::STATE_PREFETCH; + + auto &hwInfo = *rootDeviceEnvironment.getHardwareInfo(); + auto &productHelper = rootDeviceEnvironment.getHelper(); + + bool prefetch = productHelper.allowMemoryPrefetch(hwInfo); + + if (!prefetch) { + return; + } + + auto usage = CacheSettingsHelper::getGmmUsageType(graphicsAllocation.getAllocationType(), false, productHelper, &hwInfo); + uint32_t mocs = rootDeviceEnvironment.getGmmHelper()->getMOCS(usage); + + uint64_t gpuVa = graphicsAllocation.getGpuAddress() + offset; + + while (size > 0) { + uint32_t sizeInBytesToPrefetch = std::min(alignUp(size, MemoryConstants::cacheLineSize), + static_cast(MemoryConstants::pageSize64k)); + + uint32_t prefetchSize = sizeInBytesToPrefetch / MemoryConstants::cacheLineSize; + + auto statePrefetch = commandStream.getSpaceForCmd(); + STATE_PREFETCH cmd = Family::cmdInitStatePrefetch; + + cmd.setAddress(gpuVa); + cmd.setPrefetchSize(prefetchSize); + cmd.setMemoryObjectControlState(mocs); + cmd.setKernelInstructionPrefetch(GraphicsAllocation::isIsaAllocationType(graphicsAllocation.getAllocationType())); + + if (debugManager.flags.ForceCsStallForStatePrefetch.get() == 1) { + cmd.setParserStall(true); + } + + *statePrefetch = cmd; + + if (sizeInBytesToPrefetch > size) { + break; + } + + gpuVa += sizeInBytesToPrefetch; + size -= sizeInBytesToPrefetch; + } +} + } // namespace NEO \ No newline at end of file diff --git a/shared/source/generated/xe_hpc_core/hw_cmds_generated_xe_hpc_core.inl b/shared/source/generated/xe_hpc_core/hw_cmds_generated_xe_hpc_core.inl index 1c055823e7..c54d3bc6c0 100644 --- a/shared/source/generated/xe_hpc_core/hw_cmds_generated_xe_hpc_core.inl +++ b/shared/source/generated/xe_hpc_core/hw_cmds_generated_xe_hpc_core.inl @@ -7178,10 +7178,10 @@ struct STATE_PREFETCH { } inline void setPrefetchSize(const uint32_t value) { - TheStructure.Common.PrefetchSize = value; + TheStructure.Common.PrefetchSize = value - 1; // patched } inline uint32_t getPrefetchSize() const { - return TheStructure.Common.PrefetchSize; + return TheStructure.Common.PrefetchSize + 1; // patched } inline void setKernelInstructionPrefetch(const bool value) { diff --git a/shared/source/xe2_hpg_core/command_encoder_xe2_hpg_core.cpp b/shared/source/xe2_hpg_core/command_encoder_xe2_hpg_core.cpp index 1bc761e516..15698028ce 100644 --- a/shared/source/xe2_hpg_core/command_encoder_xe2_hpg_core.cpp +++ b/shared/source/xe2_hpg_core/command_encoder_xe2_hpg_core.cpp @@ -16,7 +16,6 @@ #include "shared/source/command_container/command_encoder_xe_hpc_core_and_later.inl" #include "shared/source/command_container/command_encoder_xehp_and_later.inl" #include "shared/source/command_stream/stream_properties.h" -#include "shared/source/gmm_helper/cache_settings_helper.h" #include "shared/source/helpers/constants.h" #include "shared/source/kernel/grf_config.h" #include "shared/source/release_helper/release_helper.h" @@ -86,52 +85,6 @@ void EncodeComputeMode::programComputeModeCommand(LinearStream &csr, Sta *buffer = stateComputeMode; } -template <> -void EncodeMemoryPrefetch::programMemoryPrefetch(LinearStream &commandStream, const GraphicsAllocation &graphicsAllocation, uint32_t size, size_t offset, const RootDeviceEnvironment &rootDeviceEnvironment) { - using STATE_PREFETCH = typename Family::STATE_PREFETCH; - - auto &hwInfo = *rootDeviceEnvironment.getHardwareInfo(); - auto &productHelper = rootDeviceEnvironment.getHelper(); - bool prefetch = productHelper.allowMemoryPrefetch(hwInfo); - - if (!prefetch) { - return; - } - - auto usage = CacheSettingsHelper::getGmmUsageType(graphicsAllocation.getAllocationType(), false, productHelper, &hwInfo); - uint32_t mocs = rootDeviceEnvironment.getGmmHelper()->getMOCS(usage); - - uint64_t gpuVa = graphicsAllocation.getGpuAddress() + offset; - - while (size > 0) { - uint32_t sizeInBytesToPrefetch = std::min(alignUp(size, MemoryConstants::cacheLineSize), - static_cast(MemoryConstants::pageSize64k)); - - uint32_t prefetchSize = sizeInBytesToPrefetch / MemoryConstants::cacheLineSize; - - auto statePrefetch = commandStream.getSpaceForCmd(); - STATE_PREFETCH cmd = Family::cmdInitStatePrefetch; - - cmd.setAddress(gpuVa); - cmd.setPrefetchSize(prefetchSize); - cmd.setMemoryObjectControlState(mocs); - cmd.setKernelInstructionPrefetch(GraphicsAllocation::isIsaAllocationType(graphicsAllocation.getAllocationType())); - - if (debugManager.flags.ForceCsStallForStatePrefetch.get() == 1) { - cmd.setParserStall(true); - } - - *statePrefetch = cmd; - - if (sizeInBytesToPrefetch > size) { - break; - } - - gpuVa += sizeInBytesToPrefetch; - size -= sizeInBytesToPrefetch; - } -} - template <> void EncodeSurfaceState::setAuxParamsForMCSCCS(R_SURFACE_STATE *surfaceState, const ReleaseHelper *releaseHelper) { if (releaseHelper && releaseHelper->isAuxSurfaceModeOverrideRequired()) diff --git a/shared/source/xe3_core/command_encoder_xe3_core.cpp b/shared/source/xe3_core/command_encoder_xe3_core.cpp index d8d1f3a1a5..3ede5261fe 100644 --- a/shared/source/xe3_core/command_encoder_xe3_core.cpp +++ b/shared/source/xe3_core/command_encoder_xe3_core.cpp @@ -15,7 +15,6 @@ #include "shared/source/command_container/command_encoder_xe_hpc_core_and_later.inl" #include "shared/source/command_container/command_encoder_xehp_and_later.inl" #include "shared/source/command_stream/stream_properties.h" -#include "shared/source/gmm_helper/cache_settings_helper.h" #include "shared/source/helpers/cache_policy.h" #include "shared/source/helpers/constants.h" #include "shared/source/kernel/grf_config.h" @@ -72,53 +71,6 @@ void EncodeComputeMode::programComputeModeCommand(LinearStream &csr, Sta *buffer = stateComputeMode; } -template <> -void EncodeMemoryPrefetch::programMemoryPrefetch(LinearStream &commandStream, const GraphicsAllocation &graphicsAllocation, uint32_t size, size_t offset, const RootDeviceEnvironment &rootDeviceEnvironment) { - using STATE_PREFETCH = typename Family::STATE_PREFETCH; - - auto &hwInfo = *rootDeviceEnvironment.getHardwareInfo(); - auto &productHelper = rootDeviceEnvironment.getHelper(); - - bool prefetch = productHelper.allowMemoryPrefetch(hwInfo); - - if (!prefetch) { - return; - } - - auto usage = CacheSettingsHelper::getGmmUsageType(graphicsAllocation.getAllocationType(), false, productHelper, &hwInfo); - uint32_t mocs = rootDeviceEnvironment.getGmmHelper()->getMOCS(usage); - - uint64_t gpuVa = graphicsAllocation.getGpuAddress() + offset; - - while (size > 0) { - uint32_t sizeInBytesToPrefetch = std::min(alignUp(size, MemoryConstants::cacheLineSize), - static_cast(MemoryConstants::pageSize64k)); - - uint32_t prefetchSize = sizeInBytesToPrefetch / MemoryConstants::cacheLineSize; - - auto statePrefetch = commandStream.getSpaceForCmd(); - STATE_PREFETCH cmd = Family::cmdInitStatePrefetch; - - cmd.setAddress(gpuVa); - cmd.setPrefetchSize(prefetchSize); - cmd.setMemoryObjectControlState(mocs); - cmd.setKernelInstructionPrefetch(GraphicsAllocation::isIsaAllocationType(graphicsAllocation.getAllocationType())); - - if (debugManager.flags.ForceCsStallForStatePrefetch.get() == 1) { - cmd.setParserStall(true); - } - - *statePrefetch = cmd; - - if (sizeInBytesToPrefetch > size) { - break; - } - - gpuVa += sizeInBytesToPrefetch; - size -= sizeInBytesToPrefetch; - } -} - template <> void EncodeSurfaceState::setAuxParamsForMCSCCS(R_SURFACE_STATE *surfaceState, const ReleaseHelper *releaseHelper) { surfaceState->setAuxiliarySurfaceMode(AUXILIARY_SURFACE_MODE::AUXILIARY_SURFACE_MODE_AUX_MCS); diff --git a/shared/source/xe_hpc_core/command_encoder_xe_hpc_core.cpp b/shared/source/xe_hpc_core/command_encoder_xe_hpc_core.cpp index dc6ee7ab1f..84c4001f6a 100644 --- a/shared/source/xe_hpc_core/command_encoder_xe_hpc_core.cpp +++ b/shared/source/xe_hpc_core/command_encoder_xe_hpc_core.cpp @@ -16,7 +16,6 @@ #include "shared/source/command_container/command_encoder_xe_hpg_core_and_xe_hpc.inl" #include "shared/source/command_container/command_encoder_xehp_and_later.inl" #include "shared/source/command_stream/stream_properties.h" -#include "shared/source/gmm_helper/cache_settings_helper.h" #include "shared/source/helpers/constants.h" #include "shared/source/kernel/grf_config.h" #include "shared/source/release_helper/release_helper.h" @@ -74,54 +73,6 @@ void EncodeComputeMode::programComputeModeCommand(LinearStream &csr, Sta *buffer = stateComputeMode; } -template <> -void EncodeMemoryPrefetch::programMemoryPrefetch(LinearStream &commandStream, const GraphicsAllocation &graphicsAllocation, uint32_t size, size_t offset, const RootDeviceEnvironment &rootDeviceEnvironment) { - using STATE_PREFETCH = typename Family::STATE_PREFETCH; - - auto &hwInfo = *rootDeviceEnvironment.getHardwareInfo(); - auto &productHelper = rootDeviceEnvironment.getHelper(); - - bool prefetch = productHelper.allowMemoryPrefetch(hwInfo); - - if (!prefetch) { - return; - } - - auto usage = CacheSettingsHelper::getGmmUsageType(graphicsAllocation.getAllocationType(), false, productHelper, &hwInfo); - uint32_t mocs = rootDeviceEnvironment.getGmmHelper()->getMOCS(usage); - - uint64_t gpuVa = graphicsAllocation.getGpuAddress() + offset; - - while (size > 0) { - uint32_t sizeInBytesToPrefetch = std::min(alignUp(size, MemoryConstants::cacheLineSize), - static_cast(MemoryConstants::pageSize64k)); - - // zero based cacheline count (0 == 1 cacheline) - uint32_t prefetchSize = (sizeInBytesToPrefetch / MemoryConstants::cacheLineSize) - 1; - - auto statePrefetch = commandStream.getSpaceForCmd(); - STATE_PREFETCH cmd = Family::cmdInitStatePrefetch; - - cmd.setAddress(gpuVa); - cmd.setPrefetchSize(prefetchSize); - cmd.setMemoryObjectControlState(mocs); - cmd.setKernelInstructionPrefetch(GraphicsAllocation::isIsaAllocationType(graphicsAllocation.getAllocationType())); - - if (debugManager.flags.ForceCsStallForStatePrefetch.get() == 1) { - cmd.setParserStall(true); - } - - *statePrefetch = cmd; - - if (sizeInBytesToPrefetch > size) { - break; - } - - gpuVa += sizeInBytesToPrefetch; - size -= sizeInBytesToPrefetch; - } -} - template <> size_t EncodeMemoryPrefetch::getSizeForMemoryPrefetch(size_t size, const RootDeviceEnvironment &rootDeviceEnvironment) { if (debugManager.flags.EnableMemoryPrefetch.get() == 0) { diff --git a/shared/test/unit_test/xe_hpc_core/test_encode_xe_hpc_core.cpp b/shared/test/unit_test/xe_hpc_core/test_encode_xe_hpc_core.cpp index 211537b889..6dce58f632 100644 --- a/shared/test/unit_test/xe_hpc_core/test_encode_xe_hpc_core.cpp +++ b/shared/test/unit_test/xe_hpc_core/test_encode_xe_hpc_core.cpp @@ -118,7 +118,7 @@ XE_HPC_CORETEST_F(CommandEncodeXeHpcCoreTest, givenDebugVariableSetwhenProgramin EXPECT_EQ(sizeof(STATE_PREFETCH) * expectedCmdsCount, linearStream.getUsed()); for (uint32_t i = 0; i < expectedCmdsCount; i++) { - uint32_t programmedSize = (statePrefetchCmd[i].getPrefetchSize() + 1) * MemoryConstants::cacheLineSize; + uint32_t programmedSize = statePrefetchCmd[i].getPrefetchSize() * MemoryConstants::cacheLineSize; EXPECT_EQ(statePrefetchCmd[i].getAddress(), gpuVa + (i * MemoryConstants::pageSize64k)); EXPECT_FALSE(statePrefetchCmd[i].getKernelInstructionPrefetch());